qeth: Fix scatter-gather regression
[pandora-kernel.git] / mm / page_alloc.c
index df2022f..8fcced7 100644 (file)
@@ -58,6 +58,7 @@
 #include <linux/prefetch.h>
 #include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
+#include <linux/sched/rt.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -239,15 +240,20 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
        int ret = 0;
        unsigned seq;
        unsigned long pfn = page_to_pfn(page);
+       unsigned long sp, start_pfn;
 
        do {
                seq = zone_span_seqbegin(zone);
-               if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
-                       ret = 1;
-               else if (pfn < zone->zone_start_pfn)
+               start_pfn = zone->zone_start_pfn;
+               sp = zone->spanned_pages;
+               if (!zone_spans_pfn(zone, pfn))
                        ret = 1;
        } while (zone_span_seqretry(zone, seq));
 
+       if (ret)
+               pr_err("page %lu outside zone [ %lu - %lu ]\n",
+                       pfn, start_pfn, start_pfn + sp);
+
        return ret;
 }
 
@@ -287,7 +293,7 @@ static void bad_page(struct page *page)
 
        /* Don't complain about poisoned pages */
        if (PageHWPoison(page)) {
-               reset_page_mapcount(page); /* remove PageBuddy */
+               page_mapcount_reset(page); /* remove PageBuddy */
                return;
        }
 
@@ -319,8 +325,8 @@ static void bad_page(struct page *page)
        dump_stack();
 out:
        /* Leave bad fields for debug, except PageBuddy could make trouble */
-       reset_page_mapcount(page); /* remove PageBuddy */
-       add_taint(TAINT_BAD_PAGE);
+       page_mapcount_reset(page); /* remove PageBuddy */
+       add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 /*
@@ -532,6 +538,8 @@ static inline void __free_one_page(struct page *page,
        unsigned long uninitialized_var(buddy_idx);
        struct page *buddy;
 
+       VM_BUG_ON(!zone_is_initialized(zone));
+
        if (unlikely(PageCompound(page)))
                if (unlikely(destroy_compound_page(page, order)))
                        return;
@@ -605,7 +613,7 @@ static inline int free_pages_check(struct page *page)
                bad_page(page);
                return 1;
        }
-       reset_page_last_nid(page);
+       page_nid_reset_last(page);
        if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
                page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
        return 0;
@@ -665,7 +673,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
                        __free_one_page(page, zone, 0, mt);
                        trace_mm_page_pcpu_drain(page, 0, mt);
-                       if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
+                       if (likely(!is_migrate_isolate_page(page))) {
                                __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
                                if (is_migrate_cma(mt))
                                        __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
@@ -683,7 +691,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
        zone->pages_scanned = 0;
 
        __free_one_page(page, zone, order, migratetype);
-       if (unlikely(migratetype != MIGRATE_ISOLATE))
+       if (unlikely(!is_migrate_isolate(migratetype)))
                __mod_zone_freepage_state(zone, 1 << order, migratetype);
        spin_unlock(&zone->lock);
 }
@@ -773,6 +781,10 @@ void __init init_cma_reserved_pageblock(struct page *page)
        set_pageblock_migratetype(page, MIGRATE_CMA);
        __free_pages(page, pageblock_order);
        totalram_pages += pageblock_nr_pages;
+#ifdef CONFIG_HIGHMEM
+       if (PageHighMem(page))
+               totalhigh_pages += pageblock_nr_pages;
+#endif
 }
 #endif
 
@@ -911,7 +923,9 @@ static int fallbacks[MIGRATE_TYPES][4] = {
        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
 #endif
        [MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
+#ifdef CONFIG_MEMORY_ISOLATION
        [MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
+#endif
 };
 
 /*
@@ -976,9 +990,9 @@ int move_freepages_block(struct zone *zone, struct page *page,
        end_pfn = start_pfn + pageblock_nr_pages - 1;
 
        /* Do not cross zone boundaries */
-       if (start_pfn < zone->zone_start_pfn)
+       if (!zone_spans_pfn(zone, start_pfn))
                start_page = page;
-       if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
+       if (!zone_spans_pfn(zone, end_pfn))
                return 0;
 
        return move_freepages(zone, start_page, end_page, migratetype);
@@ -1137,7 +1151,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                        list_add_tail(&page->lru, list);
                if (IS_ENABLED(CONFIG_CMA)) {
                        mt = get_pageblock_migratetype(page);
-                       if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
+                       if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
                                mt = migratetype;
                }
                set_freepage_migratetype(page, mt);
@@ -1272,7 +1286,7 @@ void mark_free_pages(struct zone *zone)
 
        spin_lock_irqsave(&zone->lock, flags);
 
-       max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+       max_zone_pfn = zone_end_pfn(zone);
        for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                if (pfn_valid(pfn)) {
                        struct page *page = pfn_to_page(pfn);
@@ -1321,7 +1335,7 @@ void free_hot_cold_page(struct page *page, int cold)
         * excessively into the page allocator
         */
        if (migratetype >= MIGRATE_PCPTYPES) {
-               if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+               if (unlikely(is_migrate_isolate(migratetype))) {
                        free_one_page(zone, page, 0, migratetype);
                        goto out;
                }
@@ -1395,7 +1409,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
        zone = page_zone(page);
        mt = get_pageblock_migratetype(page);
 
-       if (mt != MIGRATE_ISOLATE) {
+       if (!is_migrate_isolate(mt)) {
                /* Obey watermarks as if the page was being allocated */
                watermark = low_wmark_pages(zone) + (1 << order);
                if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
@@ -1414,7 +1428,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
                struct page *endpage = page + (1 << order) - 1;
                for (; page < endpage; page += pageblock_nr_pages) {
                        int mt = get_pageblock_migratetype(page);
-                       if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
+                       if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
                                set_pageblock_migratetype(page,
                                                          MIGRATE_MOVABLE);
                }
@@ -2610,10 +2624,17 @@ retry_cpuset:
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, alloc_flags,
                        preferred_zone, migratetype);
-       if (unlikely(!page))
+       if (unlikely(!page)) {
+               /*
+                * Runtime PM, block IO and its error handling path
+                * can deadlock because I/O on the device might not
+                * complete.
+                */
+               gfp_mask = memalloc_noio_flags(gfp_mask);
                page = __alloc_pages_slowpath(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
                                preferred_zone, migratetype);
+       }
 
        trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 
@@ -2785,18 +2806,27 @@ void free_pages_exact(void *virt, size_t size)
 }
 EXPORT_SYMBOL(free_pages_exact);
 
-static unsigned int nr_free_zone_pages(int offset)
+/**
+ * nr_free_zone_pages - count number of pages beyond high watermark
+ * @offset: The zone index of the highest zone
+ *
+ * nr_free_zone_pages() counts the number of counts pages which are beyond the
+ * high watermark within all zones at or below a given zone index.  For each
+ * zone, the number of pages is calculated as:
+ *     present_pages - high_pages
+ */
+static unsigned long nr_free_zone_pages(int offset)
 {
        struct zoneref *z;
        struct zone *zone;
 
        /* Just pick one node, since fallback list is circular */
-       unsigned int sum = 0;
+       unsigned long sum = 0;
 
        struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
 
        for_each_zone_zonelist(zone, z, zonelist, offset) {
-               unsigned long size = zone->present_pages;
+               unsigned long size = zone->managed_pages;
                unsigned long high = high_wmark_pages(zone);
                if (size > high)
                        sum += size - high;
@@ -2805,19 +2835,25 @@ static unsigned int nr_free_zone_pages(int offset)
        return sum;
 }
 
-/*
- * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
+/**
+ * nr_free_buffer_pages - count number of pages beyond high watermark
+ *
+ * nr_free_buffer_pages() counts the number of pages which are beyond the high
+ * watermark within ZONE_DMA and ZONE_NORMAL.
  */
-unsigned int nr_free_buffer_pages(void)
+unsigned long nr_free_buffer_pages(void)
 {
        return nr_free_zone_pages(gfp_zone(GFP_USER));
 }
 EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
 
-/*
- * Amount of free RAM allocatable within all zones
+/**
+ * nr_free_pagecache_pages - count number of pages beyond high watermark
+ *
+ * nr_free_pagecache_pages() counts the number of pages which are beyond the
+ * high watermark within all zones.
  */
-unsigned int nr_free_pagecache_pages(void)
+unsigned long nr_free_pagecache_pages(void)
 {
        return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
 }
@@ -2849,7 +2885,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
        val->totalram = pgdat->node_present_pages;
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
-       val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
+       val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
        val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
                        NR_FREE_PAGES);
 #else
@@ -2892,7 +2928,9 @@ static void show_migration_types(unsigned char type)
 #ifdef CONFIG_CMA
                [MIGRATE_CMA]           = 'C',
 #endif
+#ifdef CONFIG_MEMORY_ISOLATION
                [MIGRATE_ISOLATE]       = 'I',
+#endif
        };
        char tmp[MIGRATE_TYPES + 1];
        char *p = tmp;
@@ -3231,7 +3269,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
 {
        int n, val;
        int min_val = INT_MAX;
-       int best_node = -1;
+       int best_node = NUMA_NO_NODE;
        const struct cpumask *tmp = cpumask_of_node(0);
 
        /* Use the local node if we haven't already */
@@ -3775,7 +3813,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
         * the block.
         */
        start_pfn = zone->zone_start_pfn;
-       end_pfn = start_pfn + zone->spanned_pages;
+       end_pfn = zone_end_pfn(zone);
        start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
@@ -3871,8 +3909,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                set_page_links(page, zone, nid, pfn);
                mminit_verify_page_links(page, zone, nid, pfn);
                init_page_count(page);
-               reset_page_mapcount(page);
-               reset_page_last_nid(page);
+               page_mapcount_reset(page);
+               page_nid_reset_last(page);
                SetPageReserved(page);
                /*
                 * Mark the block movable so that blocks are reserved for
@@ -3889,7 +3927,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                 * pfn out of zone.
                 */
                if ((z->zone_start_pfn <= pfn)
-                   && (pfn < z->zone_start_pfn + z->spanned_pages)
+                   && (pfn < zone_end_pfn(z))
                    && !(pfn & (pageblock_nr_pages - 1)))
                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);
 
@@ -3927,7 +3965,7 @@ static int __meminit zone_batchsize(struct zone *zone)
         *
         * OK, so we don't know how big the cache is.  So guess.
         */
-       batch = zone->present_pages / 1024;
+       batch = zone->managed_pages / 1024;
        if (batch * PAGE_SIZE > 512 * 1024)
                batch = (512 * 1024) / PAGE_SIZE;
        batch /= 4;             /* We effectively *= 4 below */
@@ -4011,7 +4049,7 @@ static void __meminit setup_zone_pageset(struct zone *zone)
 
                if (percpu_pagelist_fraction)
                        setup_pagelist_highmark(pcp,
-                               (zone->present_pages /
+                               (zone->managed_pages /
                                        percpu_pagelist_fraction));
        }
 }
@@ -4416,10 +4454,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
  * round what is now in bits to nearest long in bits, then return it in
  * bytes.
  */
-static unsigned long __init usemap_size(unsigned long zonesize)
+static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
 {
        unsigned long usemapsize;
 
+       zonesize += zone_start_pfn & (pageblock_nr_pages-1);
        usemapsize = roundup(zonesize, pageblock_nr_pages);
        usemapsize = usemapsize >> pageblock_order;
        usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4429,17 +4468,19 @@ static unsigned long __init usemap_size(unsigned long zonesize)
 }
 
 static void __init setup_usemap(struct pglist_data *pgdat,
-                               struct zone *zone, unsigned long zonesize)
+                               struct zone *zone,
+                               unsigned long zone_start_pfn,
+                               unsigned long zonesize)
 {
-       unsigned long usemapsize = usemap_size(zonesize);
+       unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
        zone->pageblock_flags = NULL;
        if (usemapsize)
                zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
                                                                   usemapsize);
 }
 #else
-static inline void setup_usemap(struct pglist_data *pgdat,
-                               struct zone *zone, unsigned long zonesize) {}
+static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
+                               unsigned long zone_start_pfn, unsigned long zonesize) {}
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4565,7 +4606,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                nr_all_pages += freesize;
 
                zone->spanned_pages = size;
-               zone->present_pages = freesize;
+               zone->present_pages = realsize;
                /*
                 * Set an approximate value for lowmem here, it will be adjusted
                 * when the bootmem allocator frees pages into the buddy system.
@@ -4590,7 +4631,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                        continue;
 
                set_pageblock_order();
-               setup_usemap(pgdat, zone, size);
+               setup_usemap(pgdat, zone, zone_start_pfn, size);
                ret = init_currently_empty_zone(zone, zone_start_pfn,
                                                size, MEMMAP_EARLY);
                BUG_ON(ret);
@@ -4617,7 +4658,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
                 * for the buddy allocator to function correctly.
                 */
                start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
-               end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+               end = pgdat_end_pfn(pgdat);
                end = ALIGN(end, MAX_ORDER_NR_PAGES);
                size =  (end - start) * sizeof(struct page);
                map = alloc_remap(pgdat->node_id, size);
@@ -5152,8 +5193,8 @@ static void calculate_totalreserve_pages(void)
                        /* we treat the high watermark as reserved pages. */
                        max += high_wmark_pages(zone);
 
-                       if (max > zone->present_pages)
-                               max = zone->present_pages;
+                       if (max > zone->managed_pages)
+                               max = zone->managed_pages;
                        reserve_pages += max;
                        /*
                         * Lowmem reserves are not available to
@@ -5185,7 +5226,7 @@ static void setup_per_zone_lowmem_reserve(void)
        for_each_online_pgdat(pgdat) {
                for (j = 0; j < MAX_NR_ZONES; j++) {
                        struct zone *zone = pgdat->node_zones + j;
-                       unsigned long present_pages = zone->present_pages;
+                       unsigned long managed_pages = zone->managed_pages;
 
                        zone->lowmem_reserve[j] = 0;
 
@@ -5199,9 +5240,9 @@ static void setup_per_zone_lowmem_reserve(void)
                                        sysctl_lowmem_reserve_ratio[idx] = 1;
 
                                lower_zone = pgdat->node_zones + idx;
-                               lower_zone->lowmem_reserve[j] = present_pages /
+                               lower_zone->lowmem_reserve[j] = managed_pages /
                                        sysctl_lowmem_reserve_ratio[idx];
-                               present_pages += lower_zone->present_pages;
+                               managed_pages += lower_zone->managed_pages;
                        }
                }
        }
@@ -5220,14 +5261,14 @@ static void __setup_per_zone_wmarks(void)
        /* Calculate total number of !ZONE_HIGHMEM pages */
        for_each_zone(zone) {
                if (!is_highmem(zone))
-                       lowmem_pages += zone->present_pages;
+                       lowmem_pages += zone->managed_pages;
        }
 
        for_each_zone(zone) {
                u64 tmp;
 
                spin_lock_irqsave(&zone->lock, flags);
-               tmp = (u64)pages_min * zone->present_pages;
+               tmp = (u64)pages_min * zone->managed_pages;
                do_div(tmp, lowmem_pages);
                if (is_highmem(zone)) {
                        /*
@@ -5239,13 +5280,10 @@ static void __setup_per_zone_wmarks(void)
                         * deltas controls asynch page reclaim, and so should
                         * not be capped for highmem.
                         */
-                       int min_pages;
+                       unsigned long min_pages;
 
-                       min_pages = zone->present_pages / 1024;
-                       if (min_pages < SWAP_CLUSTER_MAX)
-                               min_pages = SWAP_CLUSTER_MAX;
-                       if (min_pages > 128)
-                               min_pages = 128;
+                       min_pages = zone->managed_pages / 1024;
+                       min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
                        zone->watermark[WMARK_MIN] = min_pages;
                } else {
                        /*
@@ -5306,7 +5344,7 @@ static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
        unsigned int gb, ratio;
 
        /* Zone size in gigabytes */
-       gb = zone->present_pages >> (30 - PAGE_SHIFT);
+       gb = zone->managed_pages >> (30 - PAGE_SHIFT);
        if (gb)
                ratio = int_sqrt(10 * gb);
        else
@@ -5392,7 +5430,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
                return rc;
 
        for_each_zone(zone)
-               zone->min_unmapped_pages = (zone->present_pages *
+               zone->min_unmapped_pages = (zone->managed_pages *
                                sysctl_min_unmapped_ratio) / 100;
        return 0;
 }
@@ -5408,7 +5446,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
                return rc;
 
        for_each_zone(zone)
-               zone->min_slab_pages = (zone->present_pages *
+               zone->min_slab_pages = (zone->managed_pages *
                                sysctl_min_slab_ratio) / 100;
        return 0;
 }
@@ -5450,7 +5488,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
        for_each_populated_zone(zone) {
                for_each_possible_cpu(cpu) {
                        unsigned long  high;
-                       high = zone->present_pages / percpu_pagelist_fraction;
+                       high = zone->managed_pages / percpu_pagelist_fraction;
                        setup_pagelist_highmark(
                                per_cpu_ptr(zone->pageset, cpu), high);
                }
@@ -5637,8 +5675,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
        pfn = page_to_pfn(page);
        bitmap = get_pageblock_bitmap(zone, pfn);
        bitidx = pfn_to_bitidx(zone, pfn);
-       VM_BUG_ON(pfn < zone->zone_start_pfn);
-       VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
+       VM_BUG_ON(!zone_spans_pfn(zone, pfn));
 
        for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
                if (flags & value)
@@ -5736,8 +5773,7 @@ bool is_pageblock_removable_nolock(struct page *page)
 
        zone = page_zone(page);
        pfn = page_to_pfn(page);
-       if (zone->zone_start_pfn > pfn ||
-                       zone->zone_start_pfn + zone->spanned_pages <= pfn)
+       if (!zone_spans_pfn(zone, pfn))
                return false;
 
        return !has_unmovable_pages(zone, page, 0, true);
@@ -5793,14 +5829,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
                                                        &cc->migratepages);
                cc->nr_migratepages -= nr_reclaimed;
 
-               ret = migrate_pages(&cc->migratepages,
-                                   alloc_migrate_target,
-                                   0, false, MIGRATE_SYNC,
-                                   MR_CMA);
+               ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
+                                   0, MIGRATE_SYNC, MR_CMA);
        }
-
-       putback_movable_pages(&cc->migratepages);
-       return ret > 0 ? 0 : ret;
+       if (ret < 0) {
+               putback_movable_pages(&cc->migratepages);
+               return ret;
+       }
+       return 0;
 }
 
 /**