[PATCH] zone_reclaim: dynamic slab reclaim
[pandora-kernel.git] / mm / page_alloc.c
index 2410a3c..cf913bd 100644 (file)
@@ -70,8 +70,12 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  */
 int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
         256,
+#ifdef CONFIG_ZONE_DMA32
         256,
+#endif
+#ifdef CONFIG_HIGHMEM
         32
+#endif
 };
 
 EXPORT_SYMBOL(totalram_pages);
@@ -85,9 +89,13 @@ EXPORT_SYMBOL(zone_table);
 
 static char *zone_names[MAX_NR_ZONES] = {
         "DMA",
+#ifdef CONFIG_ZONE_DMA32
         "DMA32",
+#endif
         "Normal",
+#ifdef CONFIG_HIGHMEM
         "HighMem"
+#endif
 };
 
 int min_free_kbytes = 1024;
@@ -440,9 +448,11 @@ static void free_pages_bulk(struct zone *zone, int count,
 
 static void free_one_page(struct zone *zone, struct page *page, int order)
 {
-       LIST_HEAD(list);
-       list_add(&page->lru, &list);
-       free_pages_bulk(zone, 1, &list, order);
+       spin_lock(&zone->lock);
+       zone->all_unreclaimable = 0;
+       zone->pages_scanned = 0;
+       __free_one_page(page, zone ,order);
+       spin_unlock(&zone->lock);
 }
 
 static void __free_pages_ok(struct page *page, unsigned int order)
@@ -623,19 +633,23 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 #ifdef CONFIG_NUMA
 /*
  * Called from the slab reaper to drain pagesets on a particular node that
- * belong to the currently executing processor.
+ * belongs to the currently executing processor.
  * Note that this function must be called with the thread pinned to
  * a single processor.
  */
 void drain_node_pages(int nodeid)
 {
-       int i, z;
+       int i;
+       enum zone_type z;
        unsigned long flags;
 
        for (z = 0; z < MAX_NR_ZONES; z++) {
                struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
                struct per_cpu_pageset *pset;
 
+               if (!populated_zone(zone))
+                       continue;
+
                pset = zone_pcp(zone, smp_processor_id());
                for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
                        struct per_cpu_pages *pcp;
@@ -878,32 +892,37 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
        struct zone **z = zonelist->zones;
        struct page *page = NULL;
        int classzone_idx = zone_idx(*z);
+       struct zone *zone;
 
        /*
         * Go through the zonelist once, looking for a zone with enough free.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
        do {
+               zone = *z;
+               if (unlikely((gfp_mask & __GFP_THISNODE) &&
+                       zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
+                               break;
                if ((alloc_flags & ALLOC_CPUSET) &&
-                               !cpuset_zone_allowed(*z, gfp_mask))
+                               !cpuset_zone_allowed(zone, gfp_mask))
                        continue;
 
                if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
                        unsigned long mark;
                        if (alloc_flags & ALLOC_WMARK_MIN)
-                               mark = (*z)->pages_min;
+                               mark = zone->pages_min;
                        else if (alloc_flags & ALLOC_WMARK_LOW)
-                               mark = (*z)->pages_low;
+                               mark = zone->pages_low;
                        else
-                               mark = (*z)->pages_high;
-                       if (!zone_watermark_ok(*z, order, mark,
+                               mark = zone->pages_high;
+                       if (!zone_watermark_ok(zone , order, mark,
                                    classzone_idx, alloc_flags))
                                if (!zone_reclaim_mode ||
-                                   !zone_reclaim(*z, gfp_mask, order))
+                                   !zone_reclaim(zone, gfp_mask, order))
                                        continue;
                }
 
-               page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
+               page = buffered_rmqueue(zonelist, zone, order, gfp_mask);
                if (page) {
                        break;
                }
@@ -1150,7 +1169,8 @@ EXPORT_SYMBOL(nr_free_pages);
 #ifdef CONFIG_NUMA
 unsigned int nr_free_pages_pgdat(pg_data_t *pgdat)
 {
-       unsigned int i, sum = 0;
+       unsigned int sum = 0;
+       enum zone_type i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
                sum += pgdat->node_zones[i].free_pages;
@@ -1284,7 +1304,8 @@ void show_free_areas(void)
                global_page_state(NR_WRITEBACK),
                global_page_state(NR_UNSTABLE_NFS),
                nr_free_pages(),
-               global_page_state(NR_SLAB),
+               global_page_state(NR_SLAB_RECLAIMABLE) +
+                       global_page_state(NR_SLAB_UNRECLAIMABLE),
                global_page_state(NR_FILE_MAPPED),
                global_page_state(NR_PAGETABLE));
 
@@ -1350,36 +1371,25 @@ void show_free_areas(void)
  * Add all populated zones of a node to the zonelist.
  */
 static int __meminit build_zonelists_node(pg_data_t *pgdat,
-                       struct zonelist *zonelist, int nr_zones, int zone_type)
+                       struct zonelist *zonelist, int nr_zones, enum zone_type zone_type)
 {
        struct zone *zone;
 
        BUG_ON(zone_type >= MAX_NR_ZONES);
+       zone_type++;
 
        do {
+               zone_type--;
                zone = pgdat->node_zones + zone_type;
                if (populated_zone(zone)) {
                        zonelist->zones[nr_zones++] = zone;
                        check_highest_zone(zone_type);
                }
-               zone_type--;
 
-       } while (zone_type >= 0);
+       } while (zone_type);
        return nr_zones;
 }
 
-static inline int highest_zone(int zone_bits)
-{
-       int res = ZONE_NORMAL;
-       if (zone_bits & (__force int)__GFP_HIGHMEM)
-               res = ZONE_HIGHMEM;
-       if (zone_bits & (__force int)__GFP_DMA32)
-               res = ZONE_DMA32;
-       if (zone_bits & (__force int)__GFP_DMA)
-               res = ZONE_DMA;
-       return res;
-}
-
 #ifdef CONFIG_NUMA
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __meminitdata node_load[MAX_NUMNODES];
@@ -1445,13 +1455,14 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-       int i, j, k, node, local_node;
+       int j, node, local_node;
+       enum zone_type i;
        int prev_node, load;
        struct zonelist *zonelist;
        nodemask_t used_mask;
 
        /* initialize zonelists */
-       for (i = 0; i < GFP_ZONETYPES; i++) {
+       for (i = 0; i < MAX_NR_ZONES; i++) {
                zonelist = pgdat->node_zonelists + i;
                zonelist->zones[0] = NULL;
        }
@@ -1481,13 +1492,11 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
                        node_load[node] += load;
                prev_node = node;
                load--;
-               for (i = 0; i < GFP_ZONETYPES; i++) {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
                        zonelist = pgdat->node_zonelists + i;
                        for (j = 0; zonelist->zones[j] != NULL; j++);
 
-                       k = highest_zone(i);
-
-                       j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+                       j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
                        zonelist->zones[j] = NULL;
                }
        }
@@ -1497,19 +1506,16 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
 
 static void __meminit build_zonelists(pg_data_t *pgdat)
 {
-       int i, node, local_node;
-       enum zone_type k;
-       enum zone_type j;
+       int node, local_node;
+       enum zone_type i,j;
 
        local_node = pgdat->node_id;
-       for (i = 0; i < GFP_ZONETYPES; i++) {
+       for (i = 0; i < MAX_NR_ZONES; i++) {
                struct zonelist *zonelist;
 
                zonelist = pgdat->node_zonelists + i;
 
-               j = 0;
-               k = highest_zone(i);
-               j = build_zonelists_node(pgdat, zonelist, j, k);
+               j = build_zonelists_node(pgdat, zonelist, 0, i);
                /*
                 * Now we build the zonelist so that it contains the zones
                 * of all the other nodes.
@@ -1521,12 +1527,12 @@ static void __meminit build_zonelists(pg_data_t *pgdat)
                for (node = local_node + 1; node < MAX_NUMNODES; node++) {
                        if (!node_online(node))
                                continue;
-                       j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+                       j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
                }
                for (node = 0; node < local_node; node++) {
                        if (!node_online(node))
                                continue;
-                       j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+                       j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
                }
 
                zonelist->zones[j] = NULL;
@@ -1632,7 +1638,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
                unsigned long *zones_size, unsigned long *zholes_size)
 {
        unsigned long realtotalpages, totalpages = 0;
-       int i;
+       enum zone_type i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
                totalpages += zones_size[i];
@@ -1997,8 +2003,9 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
                zone->spanned_pages = size;
                zone->present_pages = realsize;
 #ifdef CONFIG_NUMA
-               zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
+               zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
                                                / 100;
+               zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
 #endif
                zone->name = zone_names[j];
                spin_lock_init(&zone->lock);
@@ -2120,7 +2127,7 @@ static void calculate_totalreserve_pages(void)
 {
        struct pglist_data *pgdat;
        unsigned long reserve_pages = 0;
-       int i, j;
+       enum zone_type i, j;
 
        for_each_online_pgdat(pgdat) {
                for (i = 0; i < MAX_NR_ZONES; i++) {
@@ -2153,7 +2160,7 @@ static void calculate_totalreserve_pages(void)
 static void setup_per_zone_lowmem_reserve(void)
 {
        struct pglist_data *pgdat;
-       int j, idx;
+       enum zone_type j, idx;
 
        for_each_online_pgdat(pgdat) {
                for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -2162,9 +2169,12 @@ static void setup_per_zone_lowmem_reserve(void)
 
                        zone->lowmem_reserve[j] = 0;
 
-                       for (idx = j-1; idx >= 0; idx--) {
+                       idx = j;
+                       while (idx) {
                                struct zone *lower_zone;
 
+                               idx--;
+
                                if (sysctl_lowmem_reserve_ratio[idx] < 1)
                                        sysctl_lowmem_reserve_ratio[idx] = 1;
 
@@ -2305,10 +2315,26 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
                return rc;
 
        for_each_zone(zone)
-               zone->min_unmapped_ratio = (zone->present_pages *
+               zone->min_unmapped_pages = (zone->present_pages *
                                sysctl_min_unmapped_ratio) / 100;
        return 0;
 }
+
+int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+       struct zone *zone;
+       int rc;
+
+       rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       if (rc)
+               return rc;
+
+       for_each_zone(zone)
+               zone->min_slab_pages = (zone->present_pages *
+                               sysctl_min_slab_ratio) / 100;
+       return 0;
+}
 #endif
 
 /*