#include <linux/mm_inline.h>
#include <linux/kthread.h>
#include <linux/khugepaged.h>
+#include <linux/freezer.h>
+#include <linux/mman.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
goto out;
}
+ /*
+ * By default disable transparent hugepages on smaller systems,
+ * where the extra memory used could hurt more than TLB overhead
+ * is likely to save. The admin can still enable it through /sys.
+ */
+ if (totalram_pages < (512 << (20 - PAGE_SHIFT)))
+ transparent_hugepage_flags = 0;
+
start_khugepaged();
set_recommended_min_free_kbytes();
int i;
unsigned long head_index = page->index;
struct zone *zone = page_zone(page);
+ int zonestat;
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irq(&zone->lru_lock);
BUG_ON(!PageDirty(page_tail));
BUG_ON(!PageSwapBacked(page_tail));
+ mem_cgroup_split_huge_fixup(page, page_tail);
+
lru_add_page_tail(zone, page, page_tail);
}
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
+ /*
+ * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
+ * so adjust those appropriately if this page is on the LRU.
+ */
+ if (PageLRU(page)) {
+ zonestat = NR_LRU_BASE + page_lru(page);
+ __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
+ }
+
ClearPageCompound(page);
compound_unlock(page);
spin_unlock_irq(&zone->lru_lock);
return ret;
}
-int hugepage_madvise(unsigned long *vm_flags)
+int hugepage_madvise(struct vm_area_struct *vma,
+ unsigned long *vm_flags, int advice)
{
- /*
- * Be somewhat over-protective like KSM for now!
- */
- if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
- VM_MIXEDMAP | VM_SAO))
- return -EINVAL;
-
- *vm_flags |= VM_HUGEPAGE;
+ switch (advice) {
+ case MADV_HUGEPAGE:
+ /*
+ * Be somewhat over-protective like KSM for now!
+ */
+ if (*vm_flags & (VM_HUGEPAGE |
+ VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+ VM_MIXEDMAP | VM_SAO))
+ return -EINVAL;
+ *vm_flags &= ~VM_NOHUGEPAGE;
+ *vm_flags |= VM_HUGEPAGE;
+ /*
+ * If the vma become good for khugepaged to scan,
+ * register it here without waiting a page fault that
+ * may not happen any time soon.
+ */
+ if (unlikely(khugepaged_enter_vma_merge(vma)))
+ return -ENOMEM;
+ break;
+ case MADV_NOHUGEPAGE:
+ /*
+ * Be somewhat over-protective like KSM for now!
+ */
+ if (*vm_flags & (VM_NOHUGEPAGE |
+ VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+ VM_MIXEDMAP | VM_SAO))
+ return -EINVAL;
+ *vm_flags &= ~VM_HUGEPAGE;
+ *vm_flags |= VM_NOHUGEPAGE;
+ /*
+ * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
+ * this vma even if we leave the mm registered in khugepaged if
+ * it got registered before VM_NOHUGEPAGE was set.
+ */
+ break;
+ }
return 0;
}
if (address < hstart || address + HPAGE_PMD_SIZE > hend)
goto out;
- if (!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+ if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
+ (vma->vm_flags & VM_NOHUGEPAGE))
goto out;
/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
spin_lock(ptl);
isolated = __collapse_huge_page_isolate(vma, address, pte);
spin_unlock(ptl);
- pte_unmap(pte);
if (unlikely(!isolated)) {
+ pte_unmap(pte);
spin_lock(&mm->page_table_lock);
BUG_ON(!pmd_none(*pmd));
set_pmd_at(mm, address, pmd, _pmd);
anon_vma_unlock(vma->anon_vma);
__collapse_huge_page_copy(pte, new_page, vma, address, ptl);
+ pte_unmap(pte);
__SetPageUptodate(new_page);
pgtable = pmd_pgtable(_pmd);
VM_BUG_ON(page_count(pgtable) != 1);
break;
}
- if (!(vma->vm_flags & VM_HUGEPAGE) &&
- !khugepaged_always()) {
+ if ((!(vma->vm_flags & VM_HUGEPAGE) &&
+ !khugepaged_always()) ||
+ (vma->vm_flags & VM_NOHUGEPAGE)) {
progress++;
continue;
}
break;
#endif
+ if (unlikely(kthread_should_stop() || freezing(current)))
+ break;
+
spin_lock(&khugepaged_mm_lock);
if (!khugepaged_scan.mm_slot)
pass_through_head++;
if (hpage)
put_page(hpage);
#endif
+ try_to_freeze();
+ if (unlikely(kthread_should_stop()))
+ break;
if (khugepaged_has_work()) {
DEFINE_WAIT(wait);
if (!khugepaged_scan_sleep_millisecs)
khugepaged_scan_sleep_millisecs));
remove_wait_queue(&khugepaged_wait, &wait);
} else if (khugepaged_enabled())
- wait_event_interruptible(khugepaged_wait,
- khugepaged_wait_event());
+ wait_event_freezable(khugepaged_wait,
+ khugepaged_wait_event());
}
}
{
struct mm_slot *mm_slot;
+ set_freezable();
set_user_nice(current, 19);
/* serialize with start_khugepaged() */
mutex_lock(&khugepaged_mutex);
if (!khugepaged_enabled())
break;
+ if (unlikely(kthread_should_stop()))
+ break;
}
spin_lock(&khugepaged_mm_lock);