*/
unsigned long transparent_hugepage_flags __read_mostly =
(1<<TRANSPARENT_HUGEPAGE_FLAG)|
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
/* default scan 8*512 pte (or vmas) every 30 second */
.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
};
+
+static int set_recommended_min_free_kbytes(void)
+{
+ struct zone *zone;
+ int nr_zones = 0;
+ unsigned long recommended_min;
+ extern int min_free_kbytes;
+
+ if (!test_bit(TRANSPARENT_HUGEPAGE_FLAG,
+ &transparent_hugepage_flags) &&
+ !test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
+ &transparent_hugepage_flags))
+ return 0;
+
+ for_each_populated_zone(zone)
+ nr_zones++;
+
+ /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
+ recommended_min = pageblock_nr_pages * nr_zones * 2;
+
+ /*
+ * Make sure that on average at least two pageblocks are almost free
+ * of another type, one for a migratetype to fall back to and a
+ * second to avoid subsequent fallbacks of other types There are 3
+ * MIGRATE_TYPES we care about.
+ */
+ recommended_min += pageblock_nr_pages * nr_zones *
+ MIGRATE_PCPTYPES * MIGRATE_PCPTYPES;
+
+ /* don't ever allow to reserve more than 5% of the lowmem */
+ recommended_min = min(recommended_min,
+ (unsigned long) nr_free_buffer_pages() / 20);
+ recommended_min <<= (PAGE_SHIFT-10);
+
+ if (recommended_min > min_free_kbytes)
+ min_free_kbytes = recommended_min;
+ setup_per_zone_wmarks();
+ return 0;
+}
+late_initcall(set_recommended_min_free_kbytes);
+
static int start_khugepaged(void)
{
int err = 0;
mutex_unlock(&khugepaged_mutex);
if (wakeup)
wake_up_interruptible(&khugepaged_wait);
+
+ set_recommended_min_free_kbytes();
} else
/* wakeup to exit */
wake_up_interruptible(&khugepaged_wait);
ret = err;
}
+ if (ret > 0 &&
+ (test_bit(TRANSPARENT_HUGEPAGE_FLAG,
+ &transparent_hugepage_flags) ||
+ test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
+ &transparent_hugepage_flags)))
+ set_recommended_min_free_kbytes();
+
return ret;
}
static struct kobj_attribute enabled_attr =
start_khugepaged();
+ set_recommended_min_free_kbytes();
+
out:
return err;
}
return ret;
}
+static inline gfp_t alloc_hugepage_gfpmask(int defrag)
+{
+ return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
+}
+
+static inline struct page *alloc_hugepage_vma(int defrag,
+ struct vm_area_struct *vma,
+ unsigned long haddr)
+{
+ return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
+ HPAGE_PMD_ORDER, vma, haddr);
+}
+
+#ifndef CONFIG_NUMA
static inline struct page *alloc_hugepage(int defrag)
{
- return alloc_pages(GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT),
+ return alloc_pages(alloc_hugepage_gfpmask(defrag),
HPAGE_PMD_ORDER);
}
+#endif
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma)))
return VM_FAULT_OOM;
- page = alloc_hugepage(transparent_hugepage_defrag(vma));
+ page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
+ vma, haddr);
if (unlikely(!page))
goto out;
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow())
- new_page = alloc_hugepage(transparent_hugepage_defrag(vma));
+ new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
+ vma, haddr);
else
new_page = NULL;
return ret;
}
+int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned char *vec)
+{
+ int ret = 0;
+
+ spin_lock(&vma->vm_mm->page_table_lock);
+ if (likely(pmd_trans_huge(*pmd))) {
+ ret = !pmd_trans_splitting(*pmd);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ if (unlikely(!ret))
+ wait_split_huge_page(vma->anon_vma, pmd);
+ else {
+ /*
+ * All logical pages in the range are present
+ * if backed by a huge page.
+ */
+ memset(vec, 1, (end - addr) >> PAGE_SHIFT);
+ }
+ } else
+ spin_unlock(&vma->vm_mm->page_table_lock);
+
+ return ret;
+}
+
+int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, pgprot_t newprot)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ int ret = 0;
+
+ spin_lock(&mm->page_table_lock);
+ if (likely(pmd_trans_huge(*pmd))) {
+ if (unlikely(pmd_trans_splitting(*pmd))) {
+ spin_unlock(&mm->page_table_lock);
+ wait_split_huge_page(vma->anon_vma, pmd);
+ } else {
+ pmd_t entry;
+
+ entry = pmdp_get_and_clear(mm, addr, pmd);
+ entry = pmd_modify(entry, newprot);
+ set_pmd_at(mm, addr, pmd, entry);
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
+ ret = 1;
+ }
+ } else
+ spin_unlock(&vma->vm_mm->page_table_lock);
+
+ return ret;
+}
+
pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm,
unsigned long address,
unsigned long hstart, hend;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+#ifndef CONFIG_NUMA
VM_BUG_ON(!*hpage);
+#else
+ VM_BUG_ON(*hpage);
+#endif
/*
* Prevent all access to pagetables with the exception of
if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
goto out;
+#ifndef CONFIG_NUMA
new_page = *hpage;
- if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
+#else
+ new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
+ if (unlikely(!new_page)) {
+ *hpage = ERR_PTR(-ENOMEM);
goto out;
+ }
+#endif
+ if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
+ goto out_put_page;
anon_vma_lock(vma->anon_vma);
spin_unlock(&mm->page_table_lock);
anon_vma_unlock(vma->anon_vma);
mem_cgroup_uncharge_page(new_page);
- goto out;
+ goto out_put_page;
}
/*
mm->nr_ptes--;
spin_unlock(&mm->page_table_lock);
+#ifndef CONFIG_NUMA
*hpage = NULL;
+#endif
khugepaged_pages_collapsed++;
out:
up_write(&mm->mmap_sem);
+ return;
+
+out_put_page:
+#ifdef CONFIG_NUMA
+ put_page(new_page);
+#endif
+ goto out;
}
static int khugepaged_scan_pmd(struct mm_struct *mm,
while (progress < pages) {
cond_resched();
+#ifndef CONFIG_NUMA
if (!*hpage) {
*hpage = alloc_hugepage(khugepaged_defrag());
if (unlikely(!*hpage))
break;
}
+#else
+ if (IS_ERR(*hpage))
+ break;
+#endif
spin_lock(&khugepaged_mm_lock);
if (!khugepaged_scan.mm_slot)
}
}
+static void khugepaged_alloc_sleep(void)
+{
+ DEFINE_WAIT(wait);
+ add_wait_queue(&khugepaged_wait, &wait);
+ schedule_timeout_interruptible(
+ msecs_to_jiffies(
+ khugepaged_alloc_sleep_millisecs));
+ remove_wait_queue(&khugepaged_wait, &wait);
+}
+
+#ifndef CONFIG_NUMA
static struct page *khugepaged_alloc_hugepage(void)
{
struct page *hpage;
do {
hpage = alloc_hugepage(khugepaged_defrag());
- if (!hpage) {
- DEFINE_WAIT(wait);
- add_wait_queue(&khugepaged_wait, &wait);
- schedule_timeout_interruptible(
- msecs_to_jiffies(
- khugepaged_alloc_sleep_millisecs));
- remove_wait_queue(&khugepaged_wait, &wait);
- }
+ if (!hpage)
+ khugepaged_alloc_sleep();
} while (unlikely(!hpage) &&
likely(khugepaged_enabled()));
return hpage;
}
+#endif
static void khugepaged_loop(void)
{
struct page *hpage;
+#ifdef CONFIG_NUMA
+ hpage = NULL;
+#endif
while (likely(khugepaged_enabled())) {
+#ifndef CONFIG_NUMA
hpage = khugepaged_alloc_hugepage();
if (unlikely(!hpage))
break;
+#else
+ if (IS_ERR(hpage)) {
+ khugepaged_alloc_sleep();
+ hpage = NULL;
+ }
+#endif
khugepaged_do_scan(&hpage);
+#ifndef CONFIG_NUMA
if (hpage)
put_page(hpage);
+#endif
if (khugepaged_has_work()) {
DEFINE_WAIT(wait);
if (!khugepaged_scan_sleep_millisecs)