mm: allow drivers to prevent new writable mappings

[pandora-kernel.git] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index 3c0061f..a34afb8 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -22,7 +22,7 @@
  #include <linux/security.h>
  #include <linux/hugetlb.h>
  #include <linux/profile.h>
-#include <linux/module.h>
+#include <linux/export.h>
  #include <linux/mount.h>
  #include <linux/mempolicy.h>
  #include <linux/rmap.h>
@@ -111,7 +111,7 @@ struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
   */
  int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
  {
-       unsigned long free, allowed;
+       long free, allowed;
  
         vm_acct_memory(pages);
  
@@ -194,7 +194,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
         if (vma->vm_flags & VM_DENYWRITE)
                 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
         if (vma->vm_flags & VM_SHARED)
-               mapping->i_mmap_writable--;
+               mapping_unmap_writable(mapping);
  
         flush_dcache_mmap_lock(mapping);
         if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -245,6 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         unsigned long rlim, retval;
         unsigned long newbrk, oldbrk;
         struct mm_struct *mm = current->mm;
+       struct vm_area_struct *next;
         unsigned long min_brk;
  
         down_write(&mm->mmap_sem);
@@ -289,7 +290,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         }
  
         /* Check against existing mmap mappings. */
-       if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+       next = find_vma(mm, oldbrk);
+       if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                 goto out;
  
         /* Ok, looks good - let it rip. */
@@ -408,7 +410,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
                 if (vma->vm_flags & VM_DENYWRITE)
                         atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
                 if (vma->vm_flags & VM_SHARED)
-                       mapping->i_mmap_writable++;
+                       atomic_inc(&mapping->i_mmap_writable);
  
                 flush_dcache_mmap_lock(mapping);
                 if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -537,9 +539,12 @@ again:                     remove_next = 1 + (end > next->vm_end);
                  * shrinking vma had, to cover any anon pages imported.
                  */
                 if (exporter && exporter->anon_vma && !importer->anon_vma) {
-                       if (anon_vma_clone(importer, exporter))
-                               return -ENOMEM;
+                       int error;
+
                         importer->anon_vma = exporter->anon_vma;
+                       error = anon_vma_clone(importer, exporter);
+                       if (error)
+                               return error;
                 }
         }
  
@@ -796,7 +801,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                 end, prev->vm_pgoff, NULL);
                 if (err)
                         return NULL;
-               khugepaged_enter_vma_merge(prev);
+               khugepaged_enter_vma_merge(prev, vm_flags);
                 return prev;
         }
  
@@ -815,7 +820,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                                 next->vm_pgoff - pglen, NULL);
                 if (err)
                         return NULL;
-               khugepaged_enter_vma_merge(area);
+               khugepaged_enter_vma_merge(area, vm_flags);
                 return area;
         }
  
@@ -1044,6 +1049,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
  
                         if (!file->f_op || !file->f_op->mmap)
                                 return -ENODEV;
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                         break;
  
                 default:
@@ -1052,6 +1059,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
         } else {
                 switch (flags & MAP_TYPE) {
                 case MAP_SHARED:
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                         /*
                          * Ignore pgoff.
                          */
@@ -1196,11 +1205,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
  {
         struct mm_struct *mm = current->mm;
         struct vm_area_struct *vma, *prev;
-       int correct_wcount = 0;
         int error;
         struct rb_node **rb_link, *rb_parent;
         unsigned long charged = 0;
-       struct inode *inode =  file ? file->f_path.dentry->d_inode : NULL;
  
         /* Clear old maps */
         error = -ENOMEM;
@@ -1267,17 +1274,23 @@ munmap_back:
         INIT_LIST_HEAD(&vma->anon_vma_chain);
  
         if (file) {
-               error = -EINVAL;
-               if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
-                       goto free_vma;
                 if (vm_flags & VM_DENYWRITE) {
                         error = deny_write_access(file);
                         if (error)
                                 goto free_vma;
-                       correct_wcount = 1;
                 }
-               vma->vm_file = file;
-               get_file(file);
+               if (vm_flags & VM_SHARED) {
+                       error = mapping_map_writable(file->f_mapping);
+                       if (error)
+                               goto allow_write_and_free_vma;
+               }
+
+               /* ->mmap() can change vma->vm_file, but must guarantee that
+                * vma_link() below can deny write-access if VM_DENYWRITE is set
+                * and map writably if VM_SHARED is set. This usually means the
+                * new file must not have been exposed to user-space, yet.
+                */
+               vma->vm_file = get_file(file);
                 error = file->f_op->mmap(file, vma);
                 if (error)
                         goto unmap_and_free_vma;
@@ -1314,11 +1327,14 @@ munmap_back:
         }
  
         vma_link(mm, vma, prev, rb_link, rb_parent);
-       file = vma->vm_file;
-
         /* Once vma denies write, undo our temporary denial count */
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
+       if (file) {
+               if (vm_flags & VM_SHARED)
+                       mapping_unmap_writable(file->f_mapping);
+               if (vm_flags & VM_DENYWRITE)
+                       allow_write_access(file);
+       }
+       file = vma->vm_file;
  out:
         perf_event_mmap(vma);
  
@@ -1332,14 +1348,17 @@ out:
         return addr;
  
  unmap_and_free_vma:
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
         vma->vm_file = NULL;
         fput(file);
  
         /* Undo any partial mapping done by a device driver. */
         unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
         charged = 0;
+       if (vm_flags & VM_SHARED)
+               mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
  free_vma:
         kmem_cache_free(vm_area_cachep, vma);
  unacct_error:
@@ -1365,10 +1384,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
                 unsigned long len, unsigned long pgoff, unsigned long flags)
  {
         struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
-       unsigned long start_addr;
+       struct vm_area_struct *vma, *prev;
+       unsigned long start_addr, vm_start, prev_end;
  
-       if (len > TASK_SIZE)
+       if (len > TASK_SIZE - mmap_min_addr)
                 return -ENOMEM;
  
         if (flags & MAP_FIXED)
@@ -1376,9 +1395,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
  
         if (addr) {
                 addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
-               if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+               vma = find_vma_prev(mm, addr, &prev);
+               if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+                   (!vma || addr + len <= vm_start_gap(vma)) &&
+                   (!prev || addr >= vm_end_gap(prev)))
                         return addr;
         }
         if (len > mm->cached_hole_size) {
@@ -1389,7 +1409,17 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
         }
  
  full_search:
-       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+       for (vma = find_vma_prev(mm, addr, &prev); ; prev = vma,
+                                               vma = vma->vm_next) {
+               if (prev) {
+                       prev_end = vm_end_gap(prev);
+                       if (addr < prev_end) {
+                               addr = prev_end;
+                               /* If vma already violates gap, forget it */
+                               if (vma && addr > vma->vm_start)
+                                       addr = vma->vm_start;
+                       }
+               }
                 /* At this point:  (!vma || addr < vma->vm_end). */
                 if (TASK_SIZE - len < addr) {
                         /*
@@ -1404,16 +1434,16 @@ full_search:
                         }
                         return -ENOMEM;
                 }
-               if (!vma || addr + len <= vma->vm_start) {
+               vm_start = vma ? vm_start_gap(vma) : TASK_SIZE;
+               if (addr + len <= vm_start) {
                         /*
                          * Remember the place where we stopped the search:
                          */
                         mm->free_area_cache = addr + len;
                         return addr;
                 }
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               addr = vma->vm_end;
+               if (addr + mm->cached_hole_size < vm_start)
+                       mm->cached_hole_size = vm_start - addr;
         }
  }
  #endif 
@@ -1439,12 +1469,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                           const unsigned long len, const unsigned long pgoff,
                           const unsigned long flags)
  {
-       struct vm_area_struct *vma;
+       struct vm_area_struct *vma, *prev;
         struct mm_struct *mm = current->mm;
         unsigned long addr = addr0;
+       unsigned long vm_start, prev_end;
+       unsigned long low_limit = max(PAGE_SIZE, mmap_min_addr);
  
         /* requested length too big for entire address space */
-       if (len > TASK_SIZE)
+       if (len > TASK_SIZE - mmap_min_addr)
                 return -ENOMEM;
  
         if (flags & MAP_FIXED)
@@ -1453,9 +1485,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         /* requesting a specific address */
         if (addr) {
                 addr = PAGE_ALIGN(addr);
-               vma = find_vma(mm, addr);
-               if (TASK_SIZE - len >= addr &&
-                               (!vma || addr + len <= vma->vm_start))
+               vma = find_vma_prev(mm, addr, &prev);
+               if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+                               (!vma || addr + len <= vm_start_gap(vma)) &&
+                               (!prev || addr >= vm_end_gap(prev)))
                         return addr;
         }
  
@@ -1469,14 +1502,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         addr = mm->free_area_cache;
  
         /* make sure it can fit in the remaining address space */
-       if (addr > len) {
-               vma = find_vma(mm, addr-len);
-               if (!vma || addr <= vma->vm_start)
+       if (addr >= low_limit + len) {
+               vma = find_vma_prev(mm, addr-len, &prev);
+               if ((!vma || addr <= vm_start_gap(vma)) &&
+                   (!prev || addr-len >= vm_end_gap(prev)))
                         /* remember the address as a hint for next time */
                         return (mm->free_area_cache = addr-len);
         }
  
-       if (mm->mmap_base < len)
+       if (mm->mmap_base < low_limit + len)
                 goto bottomup;
  
         addr = mm->mmap_base-len;
@@ -1487,18 +1521,21 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                  * else if new region fits below vma->vm_start,
                  * return with success:
                  */
-               vma = find_vma(mm, addr);
-               if (!vma || addr+len <= vma->vm_start)
+               vma = find_vma_prev(mm, addr, &prev);
+               vm_start = vma ? vm_start_gap(vma) : mm->mmap_base;
+               prev_end = prev ? vm_end_gap(prev) : low_limit;
+
+               if (addr + len <= vm_start && addr >= prev_end)
                         /* remember the address as a hint for next time */
                         return (mm->free_area_cache = addr);
  
                 /* remember the largest hole we saw so far */
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
+               if (addr + mm->cached_hole_size < vm_start)
+                       mm->cached_hole_size = vm_start - addr;
  
                 /* try just below the current vma->vm_start */
-               addr = vma->vm_start-len;
-       } while (len < vma->vm_start);
+               addr = vm_start - len;
+       } while (vm_start >= low_limit + len);
  
  bottomup:
         /*
@@ -1573,7 +1610,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
         if (mm) {
                 /* Check the cache first. */
                 /* (Cache hit rate is typically around 35%.) */
-               vma = mm->mmap_cache;
+               vma = ACCESS_ONCE(mm->mmap_cache);
                 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
                         struct rb_node * rb_node;
  
@@ -1603,39 +1640,27 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
  
  EXPORT_SYMBOL(find_vma);
  
-/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
+/*
+ * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
+ */
  struct vm_area_struct *
  find_vma_prev(struct mm_struct *mm, unsigned long addr,
                         struct vm_area_struct **pprev)
  {
-       struct vm_area_struct *vma = NULL, *prev = NULL;
-       struct rb_node *rb_node;
-       if (!mm)
-               goto out;
-
-       /* Guard against addr being lower than the first VMA */
-       vma = mm->mmap;
-
-       /* Go through the RB tree quickly. */
-       rb_node = mm->mm_rb.rb_node;
-
-       while (rb_node) {
-               struct vm_area_struct *vma_tmp;
-               vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+       struct vm_area_struct *vma;
  
-               if (addr < vma_tmp->vm_end) {
-                       rb_node = rb_node->rb_left;
-               } else {
-                       prev = vma_tmp;
-                       if (!prev->vm_next || (addr < prev->vm_next->vm_end))
-                               break;
+       vma = find_vma(mm, addr);
+       if (vma) {
+               *pprev = vma->vm_prev;
+       } else {
+               struct rb_node *rb_node = mm->mm_rb.rb_node;
+               *pprev = NULL;
+               while (rb_node) {
+                       *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
                         rb_node = rb_node->rb_right;
                 }
         }
-
-out:
-       *pprev = prev;
-       return prev ? prev->vm_next : vma;
+       return vma;
  }
  
  /*
@@ -1643,7 +1668,8 @@ out:
   * update accounting. This is shared with both the
   * grow-up and grow-down cases.
   */
-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
+static int acct_stack_growth(struct vm_area_struct *vma,
+                            unsigned long size, unsigned long grow)
  {
         struct mm_struct *mm = vma->vm_mm;
         struct rlimit *rlim = current->signal->rlim;
@@ -1696,32 +1722,43 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
   */
  int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  {
-       int error;
+       struct vm_area_struct *next;
+       unsigned long gap_addr;
+       int error = 0;
  
         if (!(vma->vm_flags & VM_GROWSUP))
                 return -EFAULT;
  
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
+       /* Guard against exceeding limits of the address space. */
+       address &= PAGE_MASK;
+       if (address >= TASK_SIZE)
+               return -ENOMEM;
+       address += PAGE_SIZE;
+
+       /* Enforce stack_guard_gap */
+       gap_addr = address + stack_guard_gap;
+
+       /* Guard against overflow */
+       if (gap_addr < address || gap_addr > TASK_SIZE)
+               gap_addr = TASK_SIZE;
+
+       next = vma->vm_next;
+       if (next && next->vm_start < gap_addr) {
+               if (!(next->vm_flags & VM_GROWSUP))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
+
+       /* We must make sure the anon_vma is allocated. */
         if (unlikely(anon_vma_prepare(vma)))
                 return -ENOMEM;
-       vma_lock_anon_vma(vma);
  
         /*
          * vma->vm_start/vm_end cannot change under us because the caller
          * is required to hold the mmap_sem in read mode.  We need the
          * anon_vma lock to serialize against concurrent expand_stacks.
-        * Also guard against wrapping around to address 0.
          */
-       if (address < PAGE_ALIGN(address+4))
-               address = PAGE_ALIGN(address+4);
-       else {
-               vma_unlock_anon_vma(vma);
-               return -ENOMEM;
-       }
-       error = 0;
+       vma_lock_anon_vma(vma);
  
         /* Somebody else might have raced and expanded it already */
         if (address > vma->vm_end) {
@@ -1740,7 +1777,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                 }
         }
         vma_unlock_anon_vma(vma);
-       khugepaged_enter_vma_merge(vma);
+       khugepaged_enter_vma_merge(vma, vma->vm_flags);
         return error;
  }
  #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -1751,27 +1788,36 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  int expand_downwards(struct vm_area_struct *vma,
                                    unsigned long address)
  {
+       struct vm_area_struct *prev;
+       unsigned long gap_addr;
         int error;
  
-       /*
-        * We must make sure the anon_vma is allocated
-        * so that the anon_vma locking is not a noop.
-        */
-       if (unlikely(anon_vma_prepare(vma)))
-               return -ENOMEM;
-
         address &= PAGE_MASK;
         error = security_file_mmap(NULL, 0, 0, 0, address, 1);
         if (error)
                 return error;
  
-       vma_lock_anon_vma(vma);
+       /* Enforce stack_guard_gap */
+       gap_addr = address - stack_guard_gap;
+       if (gap_addr > address)
+               return -ENOMEM;
+       prev = vma->vm_prev;
+       if (prev && prev->vm_end > gap_addr) {
+               if (!(prev->vm_flags & VM_GROWSDOWN))
+                       return -ENOMEM;
+               /* Check that both stack segments have the same anon_vma? */
+       }
+
+       /* We must make sure the anon_vma is allocated. */
+       if (unlikely(anon_vma_prepare(vma)))
+               return -ENOMEM;
  
         /*
          * vma->vm_start/vm_end cannot change under us because the caller
          * is required to hold the mmap_sem in read mode.  We need the
          * anon_vma lock to serialize against concurrent expand_stacks.
          */
+       vma_lock_anon_vma(vma);
  
         /* Somebody else might have raced and expanded it already */
         if (address < vma->vm_start) {
@@ -1791,10 +1837,26 @@ int expand_downwards(struct vm_area_struct *vma,
                 }
         }
         vma_unlock_anon_vma(vma);
-       khugepaged_enter_vma_merge(vma);
+       khugepaged_enter_vma_merge(vma, vma->vm_flags);
         return error;
  }
  
+/* enforced gap between the expanding stack and other mappings. */
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
+static int __init cmdline_parse_stack_guard_gap(char *p)
+{
+       unsigned long val;
+       char *endptr;
+
+       val = simple_strtoul(p, &endptr, 10);
+       if (!*endptr)
+               stack_guard_gap = val << PAGE_SHIFT;
+
+       return 0;
+}
+__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
+
  #ifdef CONFIG_STACK_GROWSUP
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {