mm: actually remap enough memory
[pandora-kernel.git] / mm / memory.c
index 15e686a..79c71de 100644 (file)
@@ -205,10 +205,14 @@ static int tlb_next_batch(struct mmu_gather *tlb)
                return 1;
        }
 
+       if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
+               return 0;
+
        batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
        if (!batch)
                return 0;
 
+       tlb->batch_count++;
        batch->next = NULL;
        batch->nr   = 0;
        batch->max  = MAX_GATHER_BATCH;
@@ -235,6 +239,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
        tlb->local.nr   = 0;
        tlb->local.max  = ARRAY_SIZE(tlb->__pages);
        tlb->active     = &tlb->local;
+       tlb->batch_count = 0;
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
        tlb->batch = NULL;
@@ -865,20 +870,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                if (!pte_file(pte)) {
                        swp_entry_t entry = pte_to_swp_entry(pte);
 
-                       if (swap_duplicate(entry) < 0)
-                               return entry.val;
-
-                       /* make sure dst_mm is on swapoff's mmlist. */
-                       if (unlikely(list_empty(&dst_mm->mmlist))) {
-                               spin_lock(&mmlist_lock);
-                               if (list_empty(&dst_mm->mmlist))
-                                       list_add(&dst_mm->mmlist,
-                                                &src_mm->mmlist);
-                               spin_unlock(&mmlist_lock);
-                       }
-                       if (likely(!non_swap_entry(entry)))
+                       if (likely(!non_swap_entry(entry))) {
+                               if (swap_duplicate(entry) < 0)
+                                       return entry.val;
+
+                               /* make sure dst_mm is on swapoff's mmlist. */
+                               if (unlikely(list_empty(&dst_mm->mmlist))) {
+                                       spin_lock(&mmlist_lock);
+                                       if (list_empty(&dst_mm->mmlist))
+                                               list_add(&dst_mm->mmlist,
+                                                        &src_mm->mmlist);
+                                       spin_unlock(&mmlist_lock);
+                               }
                                rss[MM_SWAPENTS]++;
-                       else if (is_write_migration_entry(entry) &&
+                       else if (is_write_migration_entry(entry) &&
                                        is_cow_mapping(vm_flags)) {
                                /*
                                 * COW mappings require pages in both parent
@@ -1173,8 +1178,10 @@ again:
                        if (unlikely(page_mapcount(page) < 0))
                                print_bad_pte(vma, addr, ptent, page);
                        force_flush = !__tlb_remove_page(tlb, page);
-                       if (force_flush)
+                       if (force_flush) {
+                               addr += PAGE_SIZE;
                                break;
+                       }
                        continue;
                }
                /*
@@ -1396,6 +1403,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
        tlb_finish_mmu(&tlb, address, end);
        return end;
 }
+EXPORT_SYMBOL_GPL(zap_page_range);
 
 /**
  * zap_vma_ptes - remove ptes mapping the vma
@@ -1760,7 +1768,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                                else
                                                        return -EFAULT;
                                        }
-                                       if (ret & VM_FAULT_SIGBUS)
+                                       if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
@@ -1847,19 +1855,24 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
                     unsigned long address, unsigned int fault_flags)
 {
        struct vm_area_struct *vma;
+       vm_flags_t vm_flags;
        int ret;
 
        vma = find_extend_vma(mm, address);
        if (!vma || address < vma->vm_start)
                return -EFAULT;
 
+       vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+       if (!(vm_flags & vma->vm_flags))
+               return -EFAULT;
+
        ret = handle_mm_fault(mm, vma, address, fault_flags);
        if (ret & VM_FAULT_ERROR) {
                if (ret & VM_FAULT_OOM)
                        return -ENOMEM;
                if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
                        return -EHWPOISON;
-               if (ret & VM_FAULT_SIGBUS)
+               if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
                        return -EFAULT;
                BUG();
        }
@@ -2304,6 +2317,53 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(remap_pfn_range);
 
+/**
+ * vm_iomap_memory - remap memory to userspace
+ * @vma: user vma to map to
+ * @start: start of area
+ * @len: size of area
+ *
+ * This is a simplified io_remap_pfn_range() for common driver use. The
+ * driver just needs to give us the physical memory range to be mapped,
+ * we'll figure out the rest from the vma information.
+ *
+ * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
+ * whatever write-combining details or similar.
+ */
+int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
+{
+       unsigned long vm_len, pfn, pages;
+
+       /* Check that the physical memory area passed in looks valid */
+       if (start + len < start)
+               return -EINVAL;
+       /*
+        * You *really* shouldn't map things that aren't page-aligned,
+        * but we've historically allowed it because IO memory might
+        * just have smaller alignment.
+        */
+       len += start & ~PAGE_MASK;
+       pfn = start >> PAGE_SHIFT;
+       pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
+       if (pfn + pages < pfn)
+               return -EINVAL;
+
+       /* We start the mapping 'vm_pgoff' pages into the area */
+       if (vma->vm_pgoff > pages)
+               return -EINVAL;
+       pfn += vma->vm_pgoff;
+       pages -= vma->vm_pgoff;
+
+       /* Can we fit all of the mapping? */
+       vm_len = vma->vm_end - vma->vm_start;
+       if (vm_len >> PAGE_SHIFT > pages)
+               return -EINVAL;
+
+       /* Ok, let it rip */
+       return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_iomap_memory);
+
 static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                     unsigned long addr, unsigned long end,
                                     pte_fn_t fn, void *data)
@@ -2602,17 +2662,24 @@ reuse:
                if (!dirty_page)
                        return ret;
 
-               /*
-                * Yes, Virginia, this is actually required to prevent a race
-                * with clear_page_dirty_for_io() from clearing the page dirty
-                * bit after it clear all dirty ptes, but before a racing
-                * do_wp_page installs a dirty pte.
-                *
-                * __do_fault is protected similarly.
-                */
                if (!page_mkwrite) {
-                       wait_on_page_locked(dirty_page);
-                       set_page_dirty_balance(dirty_page, page_mkwrite);
+                       struct address_space *mapping;
+                       int dirtied;
+
+                       lock_page(dirty_page);
+                       dirtied = set_page_dirty(dirty_page);
+                       VM_BUG_ON(PageAnon(dirty_page));
+                       mapping = dirty_page->mapping;
+                       unlock_page(dirty_page);
+
+                       if (dirtied && mapping) {
+                               /*
+                                * Some device drivers do not set page.mapping
+                                * but still dirty their pages
+                                */
+                               balance_dirty_pages_ratelimited(mapping);
+                       }
+
                }
                put_page(dirty_page);
                if (page_mkwrite) {
@@ -3058,7 +3125,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                if (prev && prev->vm_end == address)
                        return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
 
-               expand_downwards(vma, address - PAGE_SIZE);
+               return expand_downwards(vma, address - PAGE_SIZE);
        }
        if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
                struct vm_area_struct *next = vma->vm_next;
@@ -3067,7 +3134,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
                if (next && next->vm_start == address + PAGE_SIZE)
                        return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
 
-               expand_upwards(vma, address + PAGE_SIZE);
+               return expand_upwards(vma, address + PAGE_SIZE);
        }
        return 0;
 }
@@ -3089,7 +3156,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
        /* Check if we need to add a guard page to the stack */
        if (check_stack_guard_page(vma, address) < 0)
-               return VM_FAULT_SIGBUS;
+               return VM_FAULT_SIGSEGV;
 
        /* Use the zero-page for reads */
        if (!(flags & FAULT_FLAG_WRITE)) {
@@ -3487,8 +3554,9 @@ retry:
 
                barrier();
                if (pmd_trans_huge(orig_pmd)) {
-                       if (flags & FAULT_FLAG_WRITE &&
-                           !pmd_write(orig_pmd) &&
+                       unsigned int dirty = flags & FAULT_FLAG_WRITE;
+
+                       if (dirty && !pmd_write(orig_pmd) &&
                            !pmd_trans_splitting(orig_pmd)) {
                                ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
                                                          orig_pmd);
@@ -3500,6 +3568,9 @@ retry:
                                if (unlikely(ret & VM_FAULT_OOM))
                                        goto retry;
                                return ret;
+                       } else {
+                               huge_pmd_set_accessed(mm, vma, address, pmd,
+                                                     orig_pmd, dirty);
                        }
                        return 0;
                }
@@ -3765,7 +3836,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
        if (follow_phys(vma, addr, write, &prot, &phys_addr))
                return -EINVAL;
 
-       maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
+       maddr = ioremap_prot(phys_addr, PAGE_ALIGN(len + offset), prot);
        if (write)
                memcpy_toio(maddr + offset, buf, len);
        else
@@ -3804,7 +3875,11 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                        vma = find_vma(mm, addr);
                        if (!vma || vma->vm_start > addr)
                                break;
-                       if (vma->vm_ops && vma->vm_ops->access)
+                       if ((vma->vm_flags & VM_PFNMAP) &&
+                           !(vma->vm_flags & VM_IO))
+                               ret = generic_access_phys(vma, addr, buf,
+                                                         len, write);
+                       if (ret <= 0 && vma->vm_ops && vma->vm_ops->access)
                                ret = vma->vm_ops->access(vma, addr, buf,
                                                          len, write);
                        if (ret <= 0)