}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ unsigned int flags)
+{
+ if (pte_write(pte))
+ return true;
+
+ /*
+ * Make sure that we are really following CoWed page. We do not really
+ * have to care about exclusiveness of the page because we only want
+ * to ensure that once COWed page hasn't disappeared in the meantime
+ * or it hasn't been merged to a KSM page.
+ */
+ if ((flags & FOLL_FORCE) && (flags & FOLL_COW))
+ return page && PageAnon(page) && !PageKsm(page);
+
+ return false;
+}
+
/**
* follow_page - look up a page descriptor from a user-virtual address
* @vma: vm_area_struct mapping @address
pte = *ptep;
if (!pte_present(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte))
- goto unlock;
page = vm_normal_page(vma, address, pte);
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) {
+ pte_unmap_unlock(ptep, ptl);
+ return NULL;
+ }
+
if (unlikely(!page)) {
if ((flags & FOLL_DUMP) ||
!is_zero_pfn(pte_pfn(pte)))
unlock_page(page);
}
}
-unlock:
+
pte_unmap_unlock(ptep, ptl);
out:
return page;
return page;
}
-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
-{
- return stack_guard_page_start(vma, addr) ||
- stack_guard_page_end(vma, addr+PAGE_SIZE);
-}
-
/**
* __get_user_pages() - pin user pages in memory
* @tsk: task_struct of target task
int ret;
unsigned int fault_flags = 0;
- /* For mlock, just skip the stack guard page. */
- if (foll_flags & FOLL_MLOCK) {
- if (stack_guard_page(vma, start))
- goto next_page;
- }
if (foll_flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
if (nonblocking)
* The VM_FAULT_WRITE bit tells us that
* do_wp_page has broken COW when necessary,
* even if maybe_mkwrite decided not to set
- * pte_write. We can thus safely do subsequent
- * page lookups as if they were reads. But only
- * do so when looping for pte_write is futile:
- * in some cases userspace may also be wanting
- * to write to the gotten user page, which a
- * read fault here might prevent (a readonly
- * page might get reCOWed by userspace write).
+ * pte_write. We cannot simply drop FOLL_WRITE
+ * here because the COWed page might be gone by
+ * the time we do the subsequent page lookups.
*/
if ((ret & VM_FAULT_WRITE) &&
!(vma->vm_flags & VM_WRITE))
- foll_flags &= ~FOLL_WRITE;
+ foll_flags |= FOLL_COW;
cond_resched();
}
return ret;
}
-/*
- * This is like a special single-page "expand_{down|up}wards()",
- * except we must first make sure that 'address{-|+}PAGE_SIZE'
- * doesn't hit another vma.
- */
-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
-{
- address &= PAGE_MASK;
- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
- struct vm_area_struct *prev = vma->vm_prev;
-
- /*
- * Is there a mapping abutting this one below?
- *
- * That's only ok if it's the same stack mapping
- * that has gotten split..
- */
- if (prev && prev->vm_end == address)
- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
-
- return expand_downwards(vma, address - PAGE_SIZE);
- }
- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
- struct vm_area_struct *next = vma->vm_next;
-
- /* As VM_GROWSDOWN but s/below/above/ */
- if (next && next->vm_start == address + PAGE_SIZE)
- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
-
- return expand_upwards(vma, address + PAGE_SIZE);
- }
- return 0;
-}
-
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
if (vma->vm_flags & VM_SHARED)
return VM_FAULT_SIGBUS;
- /* Check if we need to add a guard page to the stack */
- if (check_stack_guard_page(vma, address) < 0)
- return VM_FAULT_SIGSEGV;
-
/* Use the zero-page for reads */
if (!(flags & FAULT_FLAG_WRITE)) {
entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),