mm, gup: close FOLL MAP_PRIVATE race

author Michal Hocko <mhocko@suse.com>

Sun, 16 Oct 2016 09:55:00 +0000 (11:55 +0200)

committer Ben Hutchings <ben@decadent.org.uk>

Thu, 20 Oct 2016 22:41:00 +0000 (23:41 +0100)
author Michal Hocko <mhocko@suse.com>
Sun, 16 Oct 2016 09:55:00 +0000 (11:55 +0200)
committer Ben Hutchings <ben@decadent.org.uk>
Thu, 20 Oct 2016 22:41:00 +0000 (23:41 +0100)
diff --git a/include/linux/mm.h b/include/linux/mm.h

index e5ee683..16394da 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1527,6 +1527,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
  #define FOLL_MLOCK     0x40    /* mark page as mlocked */
  #define FOLL_SPLIT     0x80    /* don't return transhuge pages, split them */
  #define FOLL_HWPOISON  0x100   /* check page is hwpoisoned */
+#define FOLL_COW       0x4000  /* internal GUP flag */
  
  typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                         void *data);
diff --git a/mm/memory.c b/mm/memory.c

index 675b211..2917e9b 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1427,6 +1427,24 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL_GPL(zap_vma_ptes);
  
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+                                       unsigned int flags)
+{
+       if (pte_write(pte))
+               return true;
+
+       /*
+        * Make sure that we are really following CoWed page. We do not really
+        * have to care about exclusiveness of the page because we only want
+        * to ensure that once COWed page hasn't disappeared in the meantime
+        * or it hasn't been merged to a KSM page.
+        */
+       if ((flags & FOLL_FORCE) && (flags & FOLL_COW))
+               return page && PageAnon(page) && !PageKsm(page);
+
+       return false;
+}
+
  /**
   * follow_page - look up a page descriptor from a user-virtual address
   * @vma: vm_area_struct mapping @address
@@ -1509,10 +1527,13 @@ split_fallthrough:
         pte = *ptep;
         if (!pte_present(pte))
                 goto no_page;
-       if ((flags & FOLL_WRITE) && !pte_write(pte))
-               goto unlock;
  
         page = vm_normal_page(vma, address, pte);
+       if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) {
+               pte_unmap_unlock(ptep, ptl);
+               return NULL;
+       }
+
         if (unlikely(!page)) {
                 if ((flags & FOLL_DUMP) ||
                     !is_zero_pfn(pte_pfn(pte)))
@@ -1555,7 +1576,7 @@ split_fallthrough:
                         unlock_page(page);
                 }
         }
-unlock:
+
         pte_unmap_unlock(ptep, ptl);
  out:
         return page;
@@ -1789,17 +1810,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                  * The VM_FAULT_WRITE bit tells us that
                                  * do_wp_page has broken COW when necessary,
                                  * even if maybe_mkwrite decided not to set
-                                * pte_write. We can thus safely do subsequent
-                                * page lookups as if they were reads. But only
-                                * do so when looping for pte_write is futile:
-                                * in some cases userspace may also be wanting
-                                * to write to the gotten user page, which a
-                                * read fault here might prevent (a readonly
-                                * page might get reCOWed by userspace write).
+                                * pte_write. We cannot simply drop FOLL_WRITE
+                                * here because the COWed page might be gone by
+                                * the time we do the subsequent page lookups.
                                  */
                                 if ((ret & VM_FAULT_WRITE) &&
                                     !(vma->vm_flags & VM_WRITE))
-                                       foll_flags &= ~FOLL_WRITE;
+                                       foll_flags |= FOLL_COW;
  
                                 cond_resched();
                         }
author	Michal Hocko <mhocko@suse.com>
	Sun, 16 Oct 2016 09:55:00 +0000 (11:55 +0200)
committer	Ben Hutchings <ben@decadent.org.uk>
	Thu, 20 Oct 2016 22:41:00 +0000 (23:41 +0100)
include/linux/mm.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history