fs/seq_file: fix out-of-bounds read

[pandora-kernel.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 759f915..2917e9b 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1427,6 +1427,24 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
  }
  EXPORT_SYMBOL_GPL(zap_vma_ptes);
  
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+                                       unsigned int flags)
+{
+       if (pte_write(pte))
+               return true;
+
+       /*
+        * Make sure that we are really following CoWed page. We do not really
+        * have to care about exclusiveness of the page because we only want
+        * to ensure that once COWed page hasn't disappeared in the meantime
+        * or it hasn't been merged to a KSM page.
+        */
+       if ((flags & FOLL_FORCE) && (flags & FOLL_COW))
+               return page && PageAnon(page) && !PageKsm(page);
+
+       return false;
+}
+
  /**
   * follow_page - look up a page descriptor from a user-virtual address
   * @vma: vm_area_struct mapping @address
@@ -1509,10 +1527,13 @@ split_fallthrough:
         pte = *ptep;
         if (!pte_present(pte))
                 goto no_page;
-       if ((flags & FOLL_WRITE) && !pte_write(pte))
-               goto unlock;
  
         page = vm_normal_page(vma, address, pte);
+       if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, page, flags)) {
+               pte_unmap_unlock(ptep, ptl);
+               return NULL;
+       }
+
         if (unlikely(!page)) {
                 if ((flags & FOLL_DUMP) ||
                     !is_zero_pfn(pte_pfn(pte)))
@@ -1555,7 +1576,7 @@ split_fallthrough:
                         unlock_page(page);
                 }
         }
-unlock:
+
         pte_unmap_unlock(ptep, ptl);
  out:
         return page;
@@ -1767,7 +1788,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                 else
                                                         return -EFAULT;
                                         }
-                                       if (ret & VM_FAULT_SIGBUS)
+                                       if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                                                 return i ? i : -EFAULT;
                                         BUG();
                                 }
@@ -1789,17 +1810,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                  * The VM_FAULT_WRITE bit tells us that
                                  * do_wp_page has broken COW when necessary,
                                  * even if maybe_mkwrite decided not to set
-                                * pte_write. We can thus safely do subsequent
-                                * page lookups as if they were reads. But only
-                                * do so when looping for pte_write is futile:
-                                * in some cases userspace may also be wanting
-                                * to write to the gotten user page, which a
-                                * read fault here might prevent (a readonly
-                                * page might get reCOWed by userspace write).
+                                * pte_write. We cannot simply drop FOLL_WRITE
+                                * here because the COWed page might be gone by
+                                * the time we do the subsequent page lookups.
                                  */
                                 if ((ret & VM_FAULT_WRITE) &&
                                     !(vma->vm_flags & VM_WRITE))
-                                       foll_flags &= ~FOLL_WRITE;
+                                       foll_flags |= FOLL_COW;
  
                                 cond_resched();
                         }
@@ -1871,7 +1888,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                         return -ENOMEM;
                 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                         return -EHWPOISON;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                         return -EFAULT;
                 BUG();
         }
@@ -3153,9 +3170,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         pte_unmap(page_table);
  
+       /* File mapping without ->vm_ops ? */
+       if (vma->vm_flags & VM_SHARED)
+               return VM_FAULT_SIGBUS;
+
         /* Check if we need to add a guard page to the stack */
         if (check_stack_guard_page(vma, address) < 0)
-               return VM_FAULT_SIGBUS;
+               return VM_FAULT_SIGSEGV;
  
         /* Use the zero-page for reads */
         if (!(flags & FAULT_FLAG_WRITE)) {
@@ -3412,6 +3433,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                         - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
  
         pte_unmap(page_table);
+       /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
+       if (!vma->vm_ops->fault)
+               return VM_FAULT_SIGBUS;
         return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
  }
  
@@ -3470,11 +3494,9 @@ int handle_pte_fault(struct mm_struct *mm,
         entry = *pte;
         if (!pte_present(entry)) {
                 if (pte_none(entry)) {
-                       if (vma->vm_ops) {
-                               if (likely(vma->vm_ops->fault))
-                                       return do_linear_fault(mm, vma, address,
+                       if (vma->vm_ops)
+                               return do_linear_fault(mm, vma, address,
                                                 pte, pmd, flags, entry);
-                       }
                         return do_anonymous_page(mm, vma, address,
                                                  pte, pmd, flags);
                 }
@@ -3578,8 +3600,18 @@ retry:
          */
         if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address))
                 return VM_FAULT_OOM;
-       /* if an huge pmd materialized from under us just retry later */
-       if (unlikely(pmd_trans_huge(*pmd)))
+       /*
+        * If a huge pmd materialized under us just retry later.  Use
+        * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+        * didn't become pmd_trans_huge under us and then back to pmd_none, as
+        * a result of MADV_DONTNEED running immediately after a huge pmd fault
+        * in a different thread of this mm, in turn leading to a misleading
+        * pmd_trans_huge() retval.  All we have to ensure is that it is a
+        * regular pmd that we can walk with pte_offset_map() and we can do that
+        * through an atomic read in C, which is what pmd_trans_unstable()
+        * provides.
+        */
+       if (unlikely(pmd_trans_unstable(pmd)))
                 return 0;
         /*
          * A regular pmd is established and it can't morph into a huge pmd
@@ -3831,7 +3863,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
         if (follow_phys(vma, addr, write, &prot, &phys_addr))
                 return -EINVAL;
  
-       maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+       maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
         if (write)
                 memcpy_toio(maddr + offset, buf, len);
         else