Merge tag 'char-misc-3.15-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregk...
[pandora-kernel.git] / mm / memory.c
index 82c1e4c..d0f0bef 100644 (file)
@@ -60,6 +60,7 @@
 #include <linux/migrate.h>
 #include <linux/string.h>
 #include <linux/dma-debug.h>
+#include <linux/debugfs.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -1320,9 +1321,9 @@ static void unmap_single_vma(struct mmu_gather *tlb,
                         * It is undesirable to test vma->vm_file as it
                         * should be non-null for valid hugetlb area.
                         * However, vm_file will be NULL in the error
-                        * cleanup path of do_mmap_pgoff. When
+                        * cleanup path of mmap_region. When
                         * hugetlbfs ->mmap method fails,
-                        * do_mmap_pgoff() nullifies vma->vm_file
+                        * mmap_region() nullifies vma->vm_file
                         * before calling this function to clean up.
                         * Since no pte has actually been setup, it is
                         * safe to do nothing in this case.
@@ -2781,7 +2782,7 @@ reuse:
                 */
                if (!page_mkwrite) {
                        wait_on_page_locked(dirty_page);
-                       set_page_dirty_balance(dirty_page, page_mkwrite);
+                       set_page_dirty_balance(dirty_page);
                        /* file_update_time outside page_lock */
                        if (vma->vm_file)
                                file_update_time(vma->vm_file);
@@ -2827,7 +2828,7 @@ gotten:
        }
        __SetPageUptodate(new_page);
 
-       if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
+       if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
                goto oom_free_new;
 
        mmun_start  = address & PAGE_MASK;
@@ -3280,7 +3281,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         */
        __SetPageUptodate(page);
 
-       if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
+       if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
                goto oom_free_page;
 
        entry = mk_pte(page, vma->vm_page_prot);
@@ -3342,7 +3343,22 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
        return ret;
 }
 
-static void do_set_pte(struct vm_area_struct *vma, unsigned long address,
+/**
+ * do_set_pte - setup new PTE entry for given page and add reverse page mapping.
+ *
+ * @vma: virtual memory area
+ * @address: user virtual address
+ * @page: page to map
+ * @pte: pointer to target page table entry
+ * @write: true, if new entry is writable
+ * @anon: true, if it's anonymous page
+ *
+ * Caller must hold page table lock relevant for @pte.
+ *
+ * Target users are page handler itself and implementations of
+ * vm_ops->map_pages.
+ */
+void do_set_pte(struct vm_area_struct *vma, unsigned long address,
                struct page *page, pte_t *pte, bool write, bool anon)
 {
        pte_t entry;
@@ -3366,6 +3382,105 @@ static void do_set_pte(struct vm_area_struct *vma, unsigned long address,
        update_mmu_cache(vma, address, pte);
 }
 
+#define FAULT_AROUND_ORDER 4
+
+#ifdef CONFIG_DEBUG_FS
+static unsigned int fault_around_order = FAULT_AROUND_ORDER;
+
+static int fault_around_order_get(void *data, u64 *val)
+{
+       *val = fault_around_order;
+       return 0;
+}
+
+static int fault_around_order_set(void *data, u64 val)
+{
+       BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE);
+       if (1UL << val > PTRS_PER_PTE)
+               return -EINVAL;
+       fault_around_order = val;
+       return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops,
+               fault_around_order_get, fault_around_order_set, "%llu\n");
+
+static int __init fault_around_debugfs(void)
+{
+       void *ret;
+
+       ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL,
+                       &fault_around_order_fops);
+       if (!ret)
+               pr_warn("Failed to create fault_around_order in debugfs");
+       return 0;
+}
+late_initcall(fault_around_debugfs);
+
+static inline unsigned long fault_around_pages(void)
+{
+       return 1UL << fault_around_order;
+}
+
+static inline unsigned long fault_around_mask(void)
+{
+       return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1);
+}
+#else
+static inline unsigned long fault_around_pages(void)
+{
+       unsigned long nr_pages;
+
+       nr_pages = 1UL << FAULT_AROUND_ORDER;
+       BUILD_BUG_ON(nr_pages > PTRS_PER_PTE);
+       return nr_pages;
+}
+
+static inline unsigned long fault_around_mask(void)
+{
+       return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1);
+}
+#endif
+
+static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
+               pte_t *pte, pgoff_t pgoff, unsigned int flags)
+{
+       unsigned long start_addr;
+       pgoff_t max_pgoff;
+       struct vm_fault vmf;
+       int off;
+
+       start_addr = max(address & fault_around_mask(), vma->vm_start);
+       off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+       pte -= off;
+       pgoff -= off;
+
+       /*
+        *  max_pgoff is either end of page table or end of vma
+        *  or fault_around_pages() from pgoff, depending what is neast.
+        */
+       max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
+               PTRS_PER_PTE - 1;
+       max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
+                       pgoff + fault_around_pages() - 1);
+
+       /* Check if it makes any sense to call ->map_pages */
+       while (!pte_none(*pte)) {
+               if (++pgoff > max_pgoff)
+                       return;
+               start_addr += PAGE_SIZE;
+               if (start_addr >= vma->vm_end)
+                       return;
+               pte++;
+       }
+
+       vmf.virtual_address = (void __user *) start_addr;
+       vmf.pte = pte;
+       vmf.pgoff = pgoff;
+       vmf.max_pgoff = max_pgoff;
+       vmf.flags = flags;
+       vma->vm_ops->map_pages(vma, &vmf);
+}
+
 static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmd,
                pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
@@ -3373,7 +3488,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *fault_page;
        spinlock_t *ptl;
        pte_t *pte;
-       int ret;
+       int ret = 0;
+
+       /*
+        * Let's call ->map_pages() first and use ->fault() as fallback
+        * if page by the offset is not ready to be mapped (cold cache or
+        * something).
+        */
+       if (vma->vm_ops->map_pages) {
+               pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+               do_fault_around(vma, address, pte, pgoff, flags);
+               if (!pte_same(*pte, orig_pte))
+                       goto unlock_out;
+               pte_unmap_unlock(pte, ptl);
+       }
 
        ret = __do_fault(vma, address, pgoff, flags, &fault_page);
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -3387,8 +3515,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                return ret;
        }
        do_set_pte(vma, address, fault_page, pte, false, false);
-       pte_unmap_unlock(pte, ptl);
        unlock_page(fault_page);
+unlock_out:
+       pte_unmap_unlock(pte, ptl);
        return ret;
 }
 
@@ -3408,7 +3537,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (!new_page)
                return VM_FAULT_OOM;
 
-       if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) {
+       if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
                page_cache_release(new_page);
                return VM_FAULT_OOM;
        }