mm: actually remap enough memory

[pandora-kernel.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 15e686a..79c71de 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -205,10 +205,14 @@ static int tlb_next_batch(struct mmu_gather *tlb)
                 return 1;
         }
  
+       if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
+               return 0;
+
         batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
         if (!batch)
                 return 0;
  
+       tlb->batch_count++;
         batch->next = NULL;
         batch->nr   = 0;
         batch->max  = MAX_GATHER_BATCH;
@@ -235,6 +239,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
         tlb->local.nr   = 0;
         tlb->local.max  = ARRAY_SIZE(tlb->__pages);
         tlb->active     = &tlb->local;
+       tlb->batch_count = 0;
  
  #ifdef CONFIG_HAVE_RCU_TABLE_FREE
         tlb->batch = NULL;
@@ -865,20 +870,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 if (!pte_file(pte)) {
                         swp_entry_t entry = pte_to_swp_entry(pte);
  
-                       if (swap_duplicate(entry) < 0)
-                               return entry.val;
-
-                       /* make sure dst_mm is on swapoff's mmlist. */
-                       if (unlikely(list_empty(&dst_mm->mmlist))) {
-                               spin_lock(&mmlist_lock);
-                               if (list_empty(&dst_mm->mmlist))
-                                       list_add(&dst_mm->mmlist,
-                                                &src_mm->mmlist);
-                               spin_unlock(&mmlist_lock);
-                       }
-                       if (likely(!non_swap_entry(entry)))
+                       if (likely(!non_swap_entry(entry))) {
+                               if (swap_duplicate(entry) < 0)
+                                       return entry.val;
+
+                               /* make sure dst_mm is on swapoff's mmlist. */
+                               if (unlikely(list_empty(&dst_mm->mmlist))) {
+                                       spin_lock(&mmlist_lock);
+                                       if (list_empty(&dst_mm->mmlist))
+                                               list_add(&dst_mm->mmlist,
+                                                        &src_mm->mmlist);
+                                       spin_unlock(&mmlist_lock);
+                               }
                                 rss[MM_SWAPENTS]++;
-                       else if (is_write_migration_entry(entry) &&
+                       } else if (is_write_migration_entry(entry) &&
                                         is_cow_mapping(vm_flags)) {
                                 /*
                                  * COW mappings require pages in both parent
@@ -1173,8 +1178,10 @@ again:
                         if (unlikely(page_mapcount(page) < 0))
                                 print_bad_pte(vma, addr, ptent, page);
                         force_flush = !__tlb_remove_page(tlb, page);
-                       if (force_flush)
+                       if (force_flush) {
+                               addr += PAGE_SIZE;
                                 break;
+                       }
                         continue;
                 }
                 /*
@@ -1396,6 +1403,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
         tlb_finish_mmu(&tlb, address, end);
         return end;
  }
+EXPORT_SYMBOL_GPL(zap_page_range);
  
  /**
   * zap_vma_ptes - remove ptes mapping the vma
@@ -1760,7 +1768,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                 else
                                                         return -EFAULT;
                                         }
-                                       if (ret & VM_FAULT_SIGBUS)
+                                       if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                                                 return i ? i : -EFAULT;
                                         BUG();
                                 }
@@ -1847,19 +1855,24 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                      unsigned long address, unsigned int fault_flags)
  {
         struct vm_area_struct *vma;
+       vm_flags_t vm_flags;
         int ret;
  
         vma = find_extend_vma(mm, address);
         if (!vma || address < vma->vm_start)
                 return -EFAULT;
  
+       vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+       if (!(vm_flags & vma->vm_flags))
+               return -EFAULT;
+
         ret = handle_mm_fault(mm, vma, address, fault_flags);
         if (ret & VM_FAULT_ERROR) {
                 if (ret & VM_FAULT_OOM)
                         return -ENOMEM;
                 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                         return -EHWPOISON;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                         return -EFAULT;
                 BUG();
         }
@@ -2304,6 +2317,53 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
  }
  EXPORT_SYMBOL(remap_pfn_range);
  
+/**
+ * vm_iomap_memory - remap memory to userspace
+ * @vma: user vma to map to
+ * @start: start of area
+ * @len: size of area
+ *
+ * This is a simplified io_remap_pfn_range() for common driver use. The
+ * driver just needs to give us the physical memory range to be mapped,
+ * we'll figure out the rest from the vma information.
+ *
+ * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
+ * whatever write-combining details or similar.
+ */
+int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
+{
+       unsigned long vm_len, pfn, pages;
+
+       /* Check that the physical memory area passed in looks valid */
+       if (start + len < start)
+               return -EINVAL;
+       /*
+        * You *really* shouldn't map things that aren't page-aligned,
+        * but we've historically allowed it because IO memory might
+        * just have smaller alignment.
+        */
+       len += start & ~PAGE_MASK;
+       pfn = start >> PAGE_SHIFT;
+       pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
+       if (pfn + pages < pfn)
+               return -EINVAL;
+
+       /* We start the mapping 'vm_pgoff' pages into the area */
+       if (vma->vm_pgoff > pages)
+               return -EINVAL;
+       pfn += vma->vm_pgoff;
+       pages -= vma->vm_pgoff;
+
+       /* Can we fit all of the mapping? */
+       vm_len = vma->vm_end - vma->vm_start;
+       if (vm_len >> PAGE_SHIFT > pages)
+               return -EINVAL;
+
+       /* Ok, let it rip */
+       return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_iomap_memory);
+
  static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                      unsigned long addr, unsigned long end,
                                      pte_fn_t fn, void *data)
@@ -2602,17 +2662,24 @@ reuse:
                 if (!dirty_page)
                         return ret;
  
-               /*
-                * Yes, Virginia, this is actually required to prevent a race
-                * with clear_page_dirty_for_io() from clearing the page dirty
-                * bit after it clear all dirty ptes, but before a racing
-                * do_wp_page installs a dirty pte.
-                *
-                * __do_fault is protected similarly.
-                */
                 if (!page_mkwrite) {
-                       wait_on_page_locked(dirty_page);
-                       set_page_dirty_balance(dirty_page, page_mkwrite);
+                       struct address_space *mapping;
+                       int dirtied;
+
+                       lock_page(dirty_page);
+                       dirtied = set_page_dirty(dirty_page);
+                       VM_BUG_ON(PageAnon(dirty_page));
+                       mapping = dirty_page->mapping;
+                       unlock_page(dirty_page);
+
+                       if (dirtied && mapping) {
+                               /*
+                                * Some device drivers do not set page.mapping
+                                * but still dirty their pages
+                                */
+                               balance_dirty_pages_ratelimited(mapping);
+                       }
+
                 }
                 put_page(dirty_page);
                 if (page_mkwrite) {
@@ -3058,7 +3125,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                 if (prev && prev->vm_end == address)
                         return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
  
-               expand_downwards(vma, address - PAGE_SIZE);
+               return expand_downwards(vma, address - PAGE_SIZE);
         }
         if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
                 struct vm_area_struct *next = vma->vm_next;
@@ -3067,7 +3134,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                 if (next && next->vm_start == address + PAGE_SIZE)
                         return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
  
-               expand_upwards(vma, address + PAGE_SIZE);
+               return expand_upwards(vma, address + PAGE_SIZE);
         }
         return 0;
  }
@@ -3089,7 +3156,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         /* Check if we need to add a guard page to the stack */
         if (check_stack_guard_page(vma, address) < 0)
-               return VM_FAULT_SIGBUS;
+               return VM_FAULT_SIGSEGV;
  
         /* Use the zero-page for reads */
         if (!(flags & FAULT_FLAG_WRITE)) {
@@ -3487,8 +3554,9 @@ retry:
  
                 barrier();
                 if (pmd_trans_huge(orig_pmd)) {
-                       if (flags & FAULT_FLAG_WRITE &&
-                           !pmd_write(orig_pmd) &&
+                       unsigned int dirty = flags & FAULT_FLAG_WRITE;
+
+                       if (dirty && !pmd_write(orig_pmd) &&
                             !pmd_trans_splitting(orig_pmd)) {
                                 ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
                                                           orig_pmd);
@@ -3500,6 +3568,9 @@ retry:
                                 if (unlikely(ret & VM_FAULT_OOM))
                                         goto retry;
                                 return ret;
+                       } else {
+                               huge_pmd_set_accessed(mm, vma, address, pmd,
+                                                     orig_pmd, dirty);
                         }
                         return 0;
                 }
@@ -3765,7 +3836,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
         if (follow_phys(vma, addr, write, &prot, &phys_addr))
                 return -EINVAL;
  
-       maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+       maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
         if (write)
                 memcpy_toio(maddr + offset, buf, len);
         else
@@ -3804,7 +3875,11 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                         vma = find_vma(mm, addr);
                         if (!vma || vma->vm_start > addr)
                                 break;
-                       if (vma->vm_ops && vma->vm_ops->access)
+                       if ((vma->vm_flags & VM_PFNMAP) &&
+                           !(vma->vm_flags & VM_IO))
+                               ret = generic_access_phys(vma, addr, buf,
+                                                         len, write);
+                       if (ret <= 0 && vma->vm_ops && vma->vm_ops->access)
                                 ret = vma->vm_ops->access(vma, addr, buf,
                                                           len, write);
                         if (ret <= 0)