Merge tag 'pm+acpi-3.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafae...
[pandora-kernel.git] / mm / hugetlb.c
index dd30f22..226910c 100644 (file)
@@ -544,7 +544,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 /* Movability of hugepages depends on migration support. */
 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 {
-       if (hugepages_treat_as_movable || hugepage_migration_support(h))
+       if (hugepages_treat_as_movable || hugepage_migration_supported(h))
                return GFP_HIGHUSER_MOVABLE;
        else
                return GFP_HIGHUSER;
@@ -607,25 +607,242 @@ err:
        return NULL;
 }
 
+/*
+ * common helper functions for hstate_next_node_to_{alloc|free}.
+ * We may have allocated or freed a huge page based on a different
+ * nodes_allowed previously, so h->next_node_to_{alloc|free} might
+ * be outside of *nodes_allowed.  Ensure that we use an allowed
+ * node for alloc or free.
+ */
+static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
+{
+       nid = next_node(nid, *nodes_allowed);
+       if (nid == MAX_NUMNODES)
+               nid = first_node(*nodes_allowed);
+       VM_BUG_ON(nid >= MAX_NUMNODES);
+
+       return nid;
+}
+
+static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
+{
+       if (!node_isset(nid, *nodes_allowed))
+               nid = next_node_allowed(nid, nodes_allowed);
+       return nid;
+}
+
+/*
+ * returns the previously saved node ["this node"] from which to
+ * allocate a persistent huge page for the pool and advance the
+ * next node from which to allocate, handling wrap at end of node
+ * mask.
+ */
+static int hstate_next_node_to_alloc(struct hstate *h,
+                                       nodemask_t *nodes_allowed)
+{
+       int nid;
+
+       VM_BUG_ON(!nodes_allowed);
+
+       nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
+       h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
+
+       return nid;
+}
+
+/*
+ * helper for free_pool_huge_page() - return the previously saved
+ * node ["this node"] from which to free a huge page.  Advance the
+ * next node id whether or not we find a free huge page to free so
+ * that the next attempt to free addresses the next node.
+ */
+static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
+{
+       int nid;
+
+       VM_BUG_ON(!nodes_allowed);
+
+       nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
+       h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
+
+       return nid;
+}
+
+#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask)          \
+       for (nr_nodes = nodes_weight(*mask);                            \
+               nr_nodes > 0 &&                                         \
+               ((node = hstate_next_node_to_alloc(hs, mask)) || 1);    \
+               nr_nodes--)
+
+#define for_each_node_mask_to_free(hs, nr_nodes, node, mask)           \
+       for (nr_nodes = nodes_weight(*mask);                            \
+               nr_nodes > 0 &&                                         \
+               ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
+               nr_nodes--)
+
+#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
+static void destroy_compound_gigantic_page(struct page *page,
+                                       unsigned long order)
+{
+       int i;
+       int nr_pages = 1 << order;
+       struct page *p = page + 1;
+
+       for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+               __ClearPageTail(p);
+               set_page_refcounted(p);
+               p->first_page = NULL;
+       }
+
+       set_compound_order(page, 0);
+       __ClearPageHead(page);
+}
+
+static void free_gigantic_page(struct page *page, unsigned order)
+{
+       free_contig_range(page_to_pfn(page), 1 << order);
+}
+
+static int __alloc_gigantic_page(unsigned long start_pfn,
+                               unsigned long nr_pages)
+{
+       unsigned long end_pfn = start_pfn + nr_pages;
+       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+}
+
+static bool pfn_range_valid_gigantic(unsigned long start_pfn,
+                               unsigned long nr_pages)
+{
+       unsigned long i, end_pfn = start_pfn + nr_pages;
+       struct page *page;
+
+       for (i = start_pfn; i < end_pfn; i++) {
+               if (!pfn_valid(i))
+                       return false;
+
+               page = pfn_to_page(i);
+
+               if (PageReserved(page))
+                       return false;
+
+               if (page_count(page) > 0)
+                       return false;
+
+               if (PageHuge(page))
+                       return false;
+       }
+
+       return true;
+}
+
+static bool zone_spans_last_pfn(const struct zone *zone,
+                       unsigned long start_pfn, unsigned long nr_pages)
+{
+       unsigned long last_pfn = start_pfn + nr_pages - 1;
+       return zone_spans_pfn(zone, last_pfn);
+}
+
+static struct page *alloc_gigantic_page(int nid, unsigned order)
+{
+       unsigned long nr_pages = 1 << order;
+       unsigned long ret, pfn, flags;
+       struct zone *z;
+
+       z = NODE_DATA(nid)->node_zones;
+       for (; z - NODE_DATA(nid)->node_zones < MAX_NR_ZONES; z++) {
+               spin_lock_irqsave(&z->lock, flags);
+
+               pfn = ALIGN(z->zone_start_pfn, nr_pages);
+               while (zone_spans_last_pfn(z, pfn, nr_pages)) {
+                       if (pfn_range_valid_gigantic(pfn, nr_pages)) {
+                               /*
+                                * We release the zone lock here because
+                                * alloc_contig_range() will also lock the zone
+                                * at some point. If there's an allocation
+                                * spinning on this lock, it may win the race
+                                * and cause alloc_contig_range() to fail...
+                                */
+                               spin_unlock_irqrestore(&z->lock, flags);
+                               ret = __alloc_gigantic_page(pfn, nr_pages);
+                               if (!ret)
+                                       return pfn_to_page(pfn);
+                               spin_lock_irqsave(&z->lock, flags);
+                       }
+                       pfn += nr_pages;
+               }
+
+               spin_unlock_irqrestore(&z->lock, flags);
+       }
+
+       return NULL;
+}
+
+static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
+static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+
+static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
+{
+       struct page *page;
+
+       page = alloc_gigantic_page(nid, huge_page_order(h));
+       if (page) {
+               prep_compound_gigantic_page(page, huge_page_order(h));
+               prep_new_huge_page(h, page, nid);
+       }
+
+       return page;
+}
+
+static int alloc_fresh_gigantic_page(struct hstate *h,
+                               nodemask_t *nodes_allowed)
+{
+       struct page *page = NULL;
+       int nr_nodes, node;
+
+       for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+               page = alloc_fresh_gigantic_page_node(h, node);
+               if (page)
+                       return 1;
+       }
+
+       return 0;
+}
+
+static inline bool gigantic_page_supported(void) { return true; }
+#else
+static inline bool gigantic_page_supported(void) { return false; }
+static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void destroy_compound_gigantic_page(struct page *page,
+                                               unsigned long order) { }
+static inline int alloc_fresh_gigantic_page(struct hstate *h,
+                                       nodemask_t *nodes_allowed) { return 0; }
+#endif
+
 static void update_and_free_page(struct hstate *h, struct page *page)
 {
        int i;
 
-       VM_BUG_ON(h->order >= MAX_ORDER);
+       if (hstate_is_gigantic(h) && !gigantic_page_supported())
+               return;
 
        h->nr_huge_pages--;
        h->nr_huge_pages_node[page_to_nid(page)]--;
        for (i = 0; i < pages_per_huge_page(h); i++) {
                page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
                                1 << PG_referenced | 1 << PG_dirty |
-                               1 << PG_active | 1 << PG_reserved |
-                               1 << PG_private | 1 << PG_writeback);
+                               1 << PG_active | 1 << PG_private |
+                               1 << PG_writeback);
        }
        VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
        set_compound_page_dtor(page, NULL);
        set_page_refcounted(page);
-       arch_release_hugepage(page);
-       __free_pages(page, huge_page_order(h));
+       if (hstate_is_gigantic(h)) {
+               destroy_compound_gigantic_page(page, huge_page_order(h));
+               free_gigantic_page(page, huge_page_order(h));
+       } else {
+               arch_release_hugepage(page);
+               __free_pages(page, huge_page_order(h));
+       }
 }
 
 struct hstate *size_to_hstate(unsigned long size)
@@ -664,7 +881,7 @@ static void free_huge_page(struct page *page)
        if (restore_reserve)
                h->resv_huge_pages++;
 
-       if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
+       if (h->surplus_huge_pages_node[nid]) {
                /* remove the page from active list */
                list_del(&page->lru);
                update_and_free_page(h, page);
@@ -690,8 +907,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
        put_page(page); /* free it into the hugepage allocator */
 }
 
-static void __init prep_compound_gigantic_page(struct page *page,
-                                              unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned long order)
 {
        int i;
        int nr_pages = 1 << order;
@@ -769,9 +985,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
        struct page *page;
 
-       if (h->order >= MAX_ORDER)
-               return NULL;
-
        page = alloc_pages_exact_node(nid,
                htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
                                                __GFP_REPEAT|__GFP_NOWARN,
@@ -787,79 +1000,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
        return page;
 }
 
-/*
- * common helper functions for hstate_next_node_to_{alloc|free}.
- * We may have allocated or freed a huge page based on a different
- * nodes_allowed previously, so h->next_node_to_{alloc|free} might
- * be outside of *nodes_allowed.  Ensure that we use an allowed
- * node for alloc or free.
- */
-static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
-{
-       nid = next_node(nid, *nodes_allowed);
-       if (nid == MAX_NUMNODES)
-               nid = first_node(*nodes_allowed);
-       VM_BUG_ON(nid >= MAX_NUMNODES);
-
-       return nid;
-}
-
-static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
-{
-       if (!node_isset(nid, *nodes_allowed))
-               nid = next_node_allowed(nid, nodes_allowed);
-       return nid;
-}
-
-/*
- * returns the previously saved node ["this node"] from which to
- * allocate a persistent huge page for the pool and advance the
- * next node from which to allocate, handling wrap at end of node
- * mask.
- */
-static int hstate_next_node_to_alloc(struct hstate *h,
-                                       nodemask_t *nodes_allowed)
-{
-       int nid;
-
-       VM_BUG_ON(!nodes_allowed);
-
-       nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
-       h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
-
-       return nid;
-}
-
-/*
- * helper for free_pool_huge_page() - return the previously saved
- * node ["this node"] from which to free a huge page.  Advance the
- * next node id whether or not we find a free huge page to free so
- * that the next attempt to free addresses the next node.
- */
-static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
-{
-       int nid;
-
-       VM_BUG_ON(!nodes_allowed);
-
-       nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
-       h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
-
-       return nid;
-}
-
-#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask)          \
-       for (nr_nodes = nodes_weight(*mask);                            \
-               nr_nodes > 0 &&                                         \
-               ((node = hstate_next_node_to_alloc(hs, mask)) || 1);    \
-               nr_nodes--)
-
-#define for_each_node_mask_to_free(hs, nr_nodes, node, mask)           \
-       for (nr_nodes = nodes_weight(*mask);                            \
-               nr_nodes > 0 &&                                         \
-               ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
-               nr_nodes--)
-
 static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
 {
        struct page *page;
@@ -963,7 +1103,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
        struct page *page;
        unsigned int r_nid;
 
-       if (h->order >= MAX_ORDER)
+       if (hstate_is_gigantic(h))
                return NULL;
 
        /*
@@ -1156,7 +1296,7 @@ static void return_unused_surplus_pages(struct hstate *h,
        h->resv_huge_pages -= unused_resv_pages;
 
        /* Cannot return gigantic pages currently */
-       if (h->order >= MAX_ORDER)
+       if (hstate_is_gigantic(h))
                return;
 
        nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
@@ -1172,6 +1312,7 @@ static void return_unused_surplus_pages(struct hstate *h,
        while (nr_pages--) {
                if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
                        break;
+               cond_resched_lock(&hugetlb_lock);
        }
 }
 
@@ -1245,24 +1386,17 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
                        return ERR_PTR(-ENOSPC);
 
        ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
-       if (ret) {
-               if (chg || avoid_reserve)
-                       hugepage_subpool_put_pages(spool, 1);
-               return ERR_PTR(-ENOSPC);
-       }
+       if (ret)
+               goto out_subpool_put;
+
        spin_lock(&hugetlb_lock);
        page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
        if (!page) {
                spin_unlock(&hugetlb_lock);
                page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
-               if (!page) {
-                       hugetlb_cgroup_uncharge_cgroup(idx,
-                                                      pages_per_huge_page(h),
-                                                      h_cg);
-                       if (chg || avoid_reserve)
-                               hugepage_subpool_put_pages(spool, 1);
-                       return ERR_PTR(-ENOSPC);
-               }
+               if (!page)
+                       goto out_uncharge_cgroup;
+
                spin_lock(&hugetlb_lock);
                list_move(&page->lru, &h->hugepage_activelist);
                /* Fall through */
@@ -1274,6 +1408,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 
        vma_commit_reservation(h, vma, addr);
        return page;
+
+out_uncharge_cgroup:
+       hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
+out_subpool_put:
+       if (chg || avoid_reserve)
+               hugepage_subpool_put_pages(spool, 1);
+       return ERR_PTR(-ENOSPC);
 }
 
 /*
@@ -1355,7 +1496,7 @@ static void __init gather_bootmem_prealloc(void)
                 * fix confusing memory reports from free(1) and another
                 * side-effects, like CommitLimit going negative.
                 */
-               if (h->order > (MAX_ORDER - 1))
+               if (hstate_is_gigantic(h))
                        adjust_managed_page_count(page, 1 << h->order);
        }
 }
@@ -1365,7 +1506,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
        unsigned long i;
 
        for (i = 0; i < h->max_huge_pages; ++i) {
-               if (h->order >= MAX_ORDER) {
+               if (hstate_is_gigantic(h)) {
                        if (!alloc_bootmem_huge_page(h))
                                break;
                } else if (!alloc_fresh_huge_page(h,
@@ -1381,7 +1522,7 @@ static void __init hugetlb_init_hstates(void)
 
        for_each_hstate(h) {
                /* oversize hugepages were init'ed in early boot */
-               if (h->order < MAX_ORDER)
+               if (!hstate_is_gigantic(h))
                        hugetlb_hstate_alloc_pages(h);
        }
 }
@@ -1415,7 +1556,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
 {
        int i;
 
-       if (h->order >= MAX_ORDER)
+       if (hstate_is_gigantic(h))
                return;
 
        for_each_node_mask(i, *nodes_allowed) {
@@ -1478,7 +1619,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
 {
        unsigned long min_count, ret;
 
-       if (h->order >= MAX_ORDER)
+       if (hstate_is_gigantic(h) && !gigantic_page_supported())
                return h->max_huge_pages;
 
        /*
@@ -1505,7 +1646,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
                 * and reducing the surplus.
                 */
                spin_unlock(&hugetlb_lock);
-               ret = alloc_fresh_huge_page(h, nodes_allowed);
+               if (hstate_is_gigantic(h))
+                       ret = alloc_fresh_gigantic_page(h, nodes_allowed);
+               else
+                       ret = alloc_fresh_huge_page(h, nodes_allowed);
                spin_lock(&hugetlb_lock);
                if (!ret)
                        goto out;
@@ -1605,7 +1749,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
                goto out;
 
        h = kobj_to_hstate(kobj, &nid);
-       if (h->order >= MAX_ORDER) {
+       if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
                err = -EINVAL;
                goto out;
        }
@@ -1688,7 +1832,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
        unsigned long input;
        struct hstate *h = kobj_to_hstate(kobj, NULL);
 
-       if (h->order >= MAX_ORDER)
+       if (hstate_is_gigantic(h))
                return -EINVAL;
 
        err = kstrtoul(buf, 10, &input);
@@ -1980,11 +2124,7 @@ static int __init hugetlb_init(void)
 {
        int i;
 
-       /* Some platform decide whether they support huge pages at boot
-        * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
-        * there is no such support
-        */
-       if (HPAGE_SHIFT == 0)
+       if (!hugepages_supported())
                return 0;
 
        if (!size_to_hstate(default_hstate_size)) {
@@ -2111,9 +2251,12 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
        unsigned long tmp;
        int ret;
 
+       if (!hugepages_supported())
+               return -ENOTSUPP;
+
        tmp = h->max_huge_pages;
 
-       if (write && h->order >= MAX_ORDER)
+       if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
                return -EINVAL;
 
        table->data = &tmp;
@@ -2164,9 +2307,12 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
        unsigned long tmp;
        int ret;
 
+       if (!hugepages_supported())
+               return -ENOTSUPP;
+
        tmp = h->nr_overcommit_huge_pages;
 
-       if (write && h->order >= MAX_ORDER)
+       if (write && hstate_is_gigantic(h))
                return -EINVAL;
 
        table->data = &tmp;
@@ -2189,6 +2335,8 @@ out:
 void hugetlb_report_meminfo(struct seq_file *m)
 {
        struct hstate *h = &default_hstate;
+       if (!hugepages_supported())
+               return;
        seq_printf(m,
                        "HugePages_Total:   %5lu\n"
                        "HugePages_Free:    %5lu\n"
@@ -2205,6 +2353,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
        struct hstate *h = &default_hstate;
+       if (!hugepages_supported())
+               return 0;
        return sprintf(buf,
                "Node %d HugePages_Total: %5u\n"
                "Node %d HugePages_Free:  %5u\n"
@@ -2219,6 +2369,9 @@ void hugetlb_show_meminfo(void)
        struct hstate *h;
        int nid;
 
+       if (!hugepages_supported())
+               return;
+
        for_each_node_state(nid, N_MEMORY)
                for_each_hstate(h)
                        pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",