}
EXPORT_SYMBOL_GPL(zap_vma_ptes);
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ unsigned int flags)
+{
+ if (pte_write(pte))
+ return true;
+
+ /*
+ * Make sure that we are really following CoWed page. We do not really
+ * have to care about exclusiveness of the page because we only want
+ * to ensure that once COWed page hasn't disappeared in the meantime
+ * or it hasn't been merged to a KSM page.
+ */
+ if ((flags & FOLL_FORCE) && (flags & FOLL_COW))
+ return page && PageAnon(page) && !PageKsm(page);
+
+ return false;
+}
+
/**
* follow_page - look up a page descriptor from a user-virtual address
* @vma: vm_area_struct mapping @address
pte = *ptep;
if (!pte_present(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte))
- goto unlock;
page = vm_normal_page(vma, address, pte);
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) {
+ pte_unmap_unlock(ptep, ptl);
+ return NULL;
+ }
+
if (unlikely(!page)) {
if ((flags & FOLL_DUMP) ||
!is_zero_pfn(pte_pfn(pte)))
unlock_page(page);
}
}
-unlock:
+
pte_unmap_unlock(ptep, ptl);
out:
return page;
* The VM_FAULT_WRITE bit tells us that
* do_wp_page has broken COW when necessary,
* even if maybe_mkwrite decided not to set
- * pte_write. We can thus safely do subsequent
- * page lookups as if they were reads. But only
- * do so when looping for pte_write is futile:
- * in some cases userspace may also be wanting
- * to write to the gotten user page, which a
- * read fault here might prevent (a readonly
- * page might get reCOWed by userspace write).
+ * pte_write. We cannot simply drop FOLL_WRITE
+ * here because the COWed page might be gone by
+ * the time we do the subsequent page lookups.
*/
if ((ret & VM_FAULT_WRITE) &&
!(vma->vm_flags & VM_WRITE))
- foll_flags &= ~FOLL_WRITE;
+ foll_flags |= FOLL_COW;
cond_resched();
}