tcp: enforce tcp_min_snd_mss in tcp_mtu_probing()
[pandora-kernel.git] / mm / huge_memory.c
index b43721f..1b04121 100644 (file)
@@ -65,6 +65,17 @@ static void khugepaged_slab_free(void);
 static struct hlist_head *mm_slots_hash __read_mostly;
 static struct kmem_cache *mm_slot_cache __read_mostly;
 
+#ifdef CONFIG_FB
+extern const struct file_operations fb_fops;
+
+#define is_fb_vma(vma) \
+       (vma->vm_file && vma->vm_file->f_op == &fb_fops)
+#else
+#define is_fb_vma(vma) 0
+#endif
+
+static void split_fb_pmd(struct vm_area_struct *vma, pmd_t *pmd);
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -682,7 +693,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
                if (unlikely(anon_vma_prepare(vma)))
                        return VM_FAULT_OOM;
-               if (unlikely(khugepaged_enter(vma)))
+               if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
                        return VM_FAULT_OOM;
                page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
                                          vma, haddr, numa_node_id(), 0);
@@ -986,6 +997,18 @@ out:
        return ret;
 }
 
+/*
+ * FOLL_FORCE can write to even unwritable pmd's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+                                       unsigned int flags)
+{
+       return pmd_write(pmd) ||
+               ((flags & FOLL_FORCE) && (flags & FOLL_COW) &&
+                page && PageAnon(page));
+}
+
 struct page *follow_trans_huge_pmd(struct mm_struct *mm,
                                   unsigned long addr,
                                   pmd_t *pmd,
@@ -995,11 +1018,12 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm,
 
        assert_spin_locked(&mm->page_table_lock);
 
-       if (flags & FOLL_WRITE && !pmd_write(*pmd))
-               goto out;
-
        page = pmd_page(*pmd);
        VM_BUG_ON(!PageHead(page));
+
+       if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, page, flags))
+               return NULL;
+
        if (flags & FOLL_TOUCH) {
                pmd_t _pmd;
                /*
@@ -1018,7 +1042,6 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm,
        if (flags & FOLL_GET)
                get_page_foll(page);
 
-out:
        return page;
 }
 
@@ -1029,6 +1052,11 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
        spin_lock(&tlb->mm->page_table_lock);
        if (likely(pmd_trans_huge(*pmd))) {
+               if (is_fb_vma(vma)) {
+                       split_fb_pmd(vma, pmd);
+                       return 0;
+               }
+
                if (unlikely(pmd_trans_splitting(*pmd))) {
                        spin_unlock(&tlb->mm->page_table_lock);
                        wait_split_huge_page(vma->anon_vma,
@@ -1495,6 +1523,157 @@ out:
        return ret;
 }
 
+/* callers must hold mmap_sem (madvise() does) */
+static int collapse_fb_pmd(struct mm_struct *mm, pmd_t *pmd,
+       unsigned long addr, struct vm_area_struct *vma)
+{
+       unsigned long _addr;
+       struct page *page;
+       pgtable_t pgtable;
+       pte_t *pte, *_pte;
+       pmd_t _pmd;
+       u32 pa;
+
+       pte = pte_offset_map(pmd, addr);
+       page = pte_page(*pte);
+       pa = __pfn_to_phys(page_to_pfn(page));
+       _pmd = pmdp_clear_flush_notify(vma, addr, pmd);
+
+       if ((addr | pa) & ~HPAGE_PMD_MASK) {
+               printk(KERN_ERR "collapse_fb: bad alignment: %08lx->%08x\n",
+                       addr, pa);
+               pte_unmap(pte);
+               return -EINVAL;
+       }
+
+       for (_pte = pte, _addr = addr; _pte < pte + HPAGE_PMD_NR; _pte++) {
+               pte_t pteval = *_pte;
+               struct page *src_page;
+
+               if (!pte_none(pteval)) {
+                       src_page = pte_page(pteval);
+
+                       pte_clear(vma->vm_mm, _addr, _pte);
+                       if (pte_present(pteval))
+                               page_remove_rmap(src_page);
+               }
+
+               _addr += PAGE_SIZE;
+       }
+
+       pte_unmap(pte);
+       pgtable = pmd_pgtable(_pmd);
+       VM_BUG_ON(page_count(pgtable) != 1);
+       VM_BUG_ON(page_mapcount(pgtable) != 0);
+
+       _pmd = mk_pmd(page, vma->vm_page_prot);
+       _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
+       _pmd = pmd_mkhuge(_pmd);
+
+       smp_wmb();
+
+       spin_lock(&mm->page_table_lock);
+       BUG_ON(!pmd_none(*pmd));
+       set_pmd_at(mm, addr, pmd, _pmd);
+       update_mmu_cache(vma, addr, pmd);
+       prepare_pmd_huge_pte(pgtable, mm);
+       spin_unlock(&mm->page_table_lock);
+
+       return 0;
+}
+
+static int try_collapse_fb(struct vm_area_struct *vma)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long hstart, hend, addr;
+       int ret = 0;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+       hend = vma->vm_end & HPAGE_PMD_MASK;
+       if (hstart >= hend)
+               return -EINVAL;
+
+       for (addr = hstart; addr < hend; addr += HPAGE_PMD_SIZE) {
+               pgd = pgd_offset(mm, addr);
+               if (!pgd_present(*pgd))
+                       return -EINVAL;
+
+               pud = pud_offset(pgd, addr);
+               if (!pud_present(*pud))
+                       return -EINVAL;
+
+               pmd = pmd_offset(pud, addr);
+               if (!pmd_present(*pmd))
+                       return -EINVAL;
+               if (pmd_trans_huge(*pmd))
+                       continue;
+
+               ret = collapse_fb_pmd(mm, pmd, addr, vma);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+/* undo collapse_fb_pmd(), restore pages so that mm subsys can release them
+ * page_table_lock() should be held */
+static void split_fb_pmd(struct vm_area_struct *vma, pmd_t *pmd)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long addr, haddr, pfn;
+       struct page *page;
+       pgtable_t pgtable;
+       pmd_t _pmd;
+       int i;
+
+       page = pmd_page(*pmd);
+       pgtable = get_pmd_huge_pte(mm);
+       pfn = page_to_pfn(page);
+       addr = pfn << PAGE_SHIFT;
+
+       pmd_populate(mm, &_pmd, pgtable);
+
+       for (i = 0, haddr = addr; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+               pte_t *pte, entry;
+               BUG_ON(PageCompound(page + i));
+               entry = mk_pte(page + i, vma->vm_page_prot);
+               entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+               if (!pmd_young(*pmd))
+                       entry = pte_mkold(entry);
+               atomic_set(&page[i]._mapcount, 0); // hack?
+               pte = pte_offset_map(&_pmd, haddr);
+               BUG_ON(!pte_none(*pte));
+               set_pte_at(mm, haddr, pte, entry);
+               pte_unmap(pte);
+       }
+
+       set_pmd_at(mm, addr, pmd, pmd_mknotpresent(*pmd));
+       flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
+       pmd_populate(mm, pmd, pgtable);
+}
+
+#ifndef __arm__
+#error arm only..
+#endif
+static u32 pmd_to_va(struct mm_struct *mm, pmd_t *pmd)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd0;
+       u32 ret;
+
+       pgd = pgd_offset(mm, 0);
+       pud = pud_offset(pgd, 0);
+       pmd0 = pmd_offset(pud, 0);
+
+       ret = (pmd - pmd0) << SECTION_SHIFT;
+       return ret;
+}
+
 #define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
                   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
 
@@ -1503,6 +1682,9 @@ int hugepage_madvise(struct vm_area_struct *vma,
 {
        switch (advice) {
        case MADV_HUGEPAGE:
+               if (is_fb_vma(vma))
+                       return try_collapse_fb(vma);
+
                /*
                 * Be somewhat over-protective like KSM for now!
                 */
@@ -1515,7 +1697,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
                 * register it here without waiting a page fault that
                 * may not happen any time soon.
                 */
-               if (unlikely(khugepaged_enter_vma_merge(vma)))
+               if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
                        return -ENOMEM;
                break;
        case MADV_NOHUGEPAGE:
@@ -1647,7 +1829,8 @@ int __khugepaged_enter(struct mm_struct *mm)
        return 0;
 }
 
-int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+                              unsigned long vm_flags)
 {
        unsigned long hstart, hend;
        if (!vma->anon_vma)
@@ -1656,18 +1839,18 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
                 * page fault if needed.
                 */
                return 0;
-       if (vma->vm_ops)
+       if (vma->vm_ops || (vm_flags & VM_NO_THP))
                /* khugepaged not yet working on file or special mappings */
                return 0;
        /*
         * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
         * true too, verify it here.
         */
-       VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+       VM_BUG_ON(is_linear_pfn_mapping(vma));
        hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
        hend = vma->vm_end & HPAGE_PMD_MASK;
        if (hstart < hend)
-               return khugepaged_enter(vma);
+               return khugepaged_enter(vma, vm_flags);
        return 0;
 }
 
@@ -1838,6 +2021,24 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
        }
 }
 
+static bool hugepage_vma_check(struct vm_area_struct *vma)
+{
+       if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
+           (vma->vm_flags & VM_NOHUGEPAGE))
+               return false;
+
+       if (!vma->anon_vma || vma->vm_ops)
+               return false;
+       if (is_vma_temporary_stack(vma))
+               return false;
+       /*
+        * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+        * true too, verify it here.
+        */
+       VM_BUG_ON(is_linear_pfn_mapping(vma));
+       return !(vma->vm_flags & VM_NO_THP);
+}
+
 static void collapse_huge_page(struct mm_struct *mm,
                               unsigned long address,
                               struct page **hpage,
@@ -1910,21 +2111,8 @@ static void collapse_huge_page(struct mm_struct *mm,
        hend = vma->vm_end & HPAGE_PMD_MASK;
        if (address < hstart || address + HPAGE_PMD_SIZE > hend)
                goto out;
-
-       if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-           (vma->vm_flags & VM_NOHUGEPAGE))
-               goto out;
-
-       if (!vma->anon_vma || vma->vm_ops)
-               goto out;
-       if (is_vma_temporary_stack(vma))
+       if (!hugepage_vma_check(vma))
                goto out;
-       /*
-        * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
-        * true too, verify it here.
-        */
-       VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
-
        pgd = pgd_offset(mm, address);
        if (!pgd_present(*pgd))
                goto out;
@@ -2154,25 +2342,11 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
                        progress++;
                        break;
                }
-
-               if ((!(vma->vm_flags & VM_HUGEPAGE) &&
-                    !khugepaged_always()) ||
-                   (vma->vm_flags & VM_NOHUGEPAGE)) {
-               skip:
+               if (!hugepage_vma_check(vma)) {
+skip:
                        progress++;
                        continue;
                }
-               if (!vma->anon_vma || vma->vm_ops)
-                       goto skip;
-               if (is_vma_temporary_stack(vma))
-                       goto skip;
-               /*
-                * If is_pfn_mapping() is true is_learn_pfn_mapping()
-                * must be true too, verify it here.
-                */
-               VM_BUG_ON(is_linear_pfn_mapping(vma) ||
-                         vma->vm_flags & VM_NO_THP);
-
                hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
                hend = vma->vm_end & HPAGE_PMD_MASK;
                if (hstart >= hend)
@@ -2389,6 +2563,7 @@ static int khugepaged(void *none)
 
 void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
 {
+       struct vm_area_struct *vma;
        struct page *page;
 
        spin_lock(&mm->page_table_lock);
@@ -2396,6 +2571,12 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd)
                spin_unlock(&mm->page_table_lock);
                return;
        }
+       vma = find_vma(mm, pmd_to_va(mm, pmd));
+       if (vma && is_fb_vma(vma)) {
+               split_fb_pmd(vma, pmd);
+               spin_unlock(&mm->page_table_lock);
+               return;
+       }
        page = pmd_page(*pmd);
        VM_BUG_ON(!page_count(page));
        get_page(page);