[PATCH] slab: Node rotor for freeing alien caches and remote per cpu pages.

author Christoph Lameter <clameter@engr.sgi.com>

Fri, 10 Mar 2006 01:33:54 +0000 (17:33 -0800)

committer Linus Torvalds <torvalds@g5.osdl.org>

Fri, 10 Mar 2006 03:47:38 +0000 (19:47 -0800)
author Christoph Lameter <clameter@engr.sgi.com>
Fri, 10 Mar 2006 01:33:54 +0000 (17:33 -0800)
committer Linus Torvalds <torvalds@g5.osdl.org>
Fri, 10 Mar 2006 03:47:38 +0000 (19:47 -0800)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h

index 20f9148..7851e6b 100644 (file)
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -157,9 +157,9 @@ extern void FASTCALL(free_cold_page(struct page *page));
  
  void page_alloc_init(void);
  #ifdef CONFIG_NUMA
-void drain_remote_pages(void);
+void drain_node_pages(int node);
  #else
-static inline void drain_remote_pages(void) { };
+static inline void drain_node_pages(int node) { };
  #endif
  
  #endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 791690d..234bd48 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -590,21 +590,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  }
  
  #ifdef CONFIG_NUMA
-/* Called from the slab reaper to drain remote pagesets */
-void drain_remote_pages(void)
+/*
+ * Called from the slab reaper to drain pagesets on a particular node that
+ * belong to the currently executing processor.
+ */
+void drain_node_pages(int nodeid)
  {
-       struct zone *zone;
-       int i;
+       int i, z;
         unsigned long flags;
  
         local_irq_save(flags);
-       for_each_zone(zone) {
+       for (z = 0; z < MAX_NR_ZONES; z++) {
+               struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
                 struct per_cpu_pageset *pset;
  
-               /* Do not drain local pagesets */
-               if (zone->zone_pgdat->node_id == numa_node_id())
-                       continue;
-
                 pset = zone_pcp(zone, smp_processor_id());
                 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
                         struct per_cpu_pages *pcp;
diff --git a/mm/slab.c b/mm/slab.c

index 61800b8..d0bd7f0 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char *
         dump_stack();
  }
  
+#ifdef CONFIG_NUMA
+/*
+ * Special reaping functions for NUMA systems called from cache_reap().
+ * These take care of doing round robin flushing of alien caches (containing
+ * objects freed on different nodes from which they were allocated) and the
+ * flushing of remote pcps by calling drain_node_pages.
+ */
+static DEFINE_PER_CPU(unsigned long, reap_node);
+
+static void init_reap_node(int cpu)
+{
+       int node;
+
+       node = next_node(cpu_to_node(cpu), node_online_map);
+       if (node == MAX_NUMNODES)
+               node = 0;
+
+       __get_cpu_var(reap_node) = node;
+}
+
+static void next_reap_node(void)
+{
+       int node = __get_cpu_var(reap_node);
+
+       /*
+        * Also drain per cpu pages on remote zones
+        */
+       if (node != numa_node_id())
+               drain_node_pages(node);
+
+       node = next_node(node, node_online_map);
+       if (unlikely(node >= MAX_NUMNODES))
+               node = first_node(node_online_map);
+       __get_cpu_var(reap_node) = node;
+}
+
+#else
+#define init_reap_node(cpu) do { } while (0)
+#define next_reap_node(void) do { } while (0)
+#endif
+
  /*
   * Initiate the reap timer running on the target CPU.  We run at around 1 to 2Hz
   * via the workqueue/eventd.
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu)
          * at that time.
          */
         if (keventd_up() && reap_work->func == NULL) {
+               init_reap_node(cpu);
                 INIT_WORK(reap_work, cache_reap, NULL);
                 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
         }
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
         }
  }
  
+/*
+ * Called from cache_reap() to regularly drain alien caches round robin.
+ */
+static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
+{
+       int node = __get_cpu_var(reap_node);
+
+       if (l3->alien) {
+               struct array_cache *ac = l3->alien[node];
+               if (ac && ac->avail) {
+                       spin_lock_irq(&ac->lock);
+                       __drain_alien_cache(cachep, ac, node);
+                       spin_unlock_irq(&ac->lock);
+               }
+       }
+}
+
  static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
  {
         int i = 0;
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al
  #else
  
  #define drain_alien_cache(cachep, alien) do { } while (0)
+#define reap_alien(cachep, l3) do { } while (0)
  
  static inline struct array_cache **alloc_alien_cache(int node, int limit)
  {
@@ -3497,8 +3557,7 @@ static void cache_reap(void *unused)
                 check_irq_on();
  
                 l3 = searchp->nodelists[numa_node_id()];
-               if (l3->alien)
-                       drain_alien_cache(searchp, l3->alien);
+               reap_alien(searchp, l3);
                 spin_lock_irq(&l3->list_lock);
  
                 drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3548,7 +3607,7 @@ static void cache_reap(void *unused)
         }
         check_irq_on();
         mutex_unlock(&cache_chain_mutex);
-       drain_remote_pages();
+       next_reap_node();
         /* Setup the next iteration */
         schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
  }
author	Christoph Lameter <clameter@engr.sgi.com>
	Fri, 10 Mar 2006 01:33:54 +0000 (17:33 -0800)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Fri, 10 Mar 2006 03:47:38 +0000 (19:47 -0800)
include/linux/gfp.h		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/slab.c		patch \| blob \| history