x86-64: Combine SRAT regions when possible
authorJan Beulich <JBeulich@novell.com>
Wed, 21 Apr 2010 15:13:20 +0000 (16:13 +0100)
committerH. Peter Anvin <hpa@zytor.com>
Thu, 29 Apr 2010 00:14:11 +0000 (17:14 -0700)
... i.e. when the hole between two regions isn't occupied by memory on
another node. This reduces the memory->node table size, thus reducing
cache footprint of lookups, which got increased significantly some
time ago, and things go back to how they were before that change on
the systems I looked at.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <4BCF3230020000780003B3CA@vpn.id2.novell.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
arch/x86/mm/srat_64.c

index 28c6876..3ebe651 100644 (file)
@@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
        for (i = 0; i < MAX_NUMNODES; i++)
                cutoff_node(i, start, end);
 
+       /*
+        * Join together blocks on the same node, holes between
+        * which don't overlap with memory on other nodes.
+        */
+       for (i = 0; i < num_node_memblks; ++i) {
+               int j, k;
+
+               for (j = i + 1; j < num_node_memblks; ++j) {
+                       unsigned long start, end;
+
+                       if (memblk_nodeid[i] != memblk_nodeid[j])
+                               continue;
+                       start = min(node_memblk_range[i].end,
+                                   node_memblk_range[j].end);
+                       end = max(node_memblk_range[i].start,
+                                 node_memblk_range[j].start);
+                       for (k = 0; k < num_node_memblks; ++k) {
+                               if (memblk_nodeid[i] == memblk_nodeid[k])
+                                       continue;
+                               if (start < node_memblk_range[k].end &&
+                                   end > node_memblk_range[k].start)
+                                       break;
+                       }
+                       if (k < num_node_memblks)
+                               continue;
+                       start = min(node_memblk_range[i].start,
+                                   node_memblk_range[j].start);
+                       end = max(node_memblk_range[i].end,
+                                 node_memblk_range[j].end);
+                       printk(KERN_INFO "SRAT: Node %d "
+                              "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
+                              memblk_nodeid[i],
+                              node_memblk_range[i].start,
+                              node_memblk_range[i].end,
+                              node_memblk_range[j].start,
+                              node_memblk_range[j].end,
+                              start, end);
+                       node_memblk_range[i].start = start;
+                       node_memblk_range[i].end = end;
+                       k = --num_node_memblks - j;
+                       memmove(memblk_nodeid + j, memblk_nodeid + j+1,
+                               k * sizeof(*memblk_nodeid));
+                       memmove(node_memblk_range + j, node_memblk_range + j+1,
+                               k * sizeof(*node_memblk_range));
+                       --j;
+               }
+       }
+
        memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
                                           memblk_nodeid);
        if (memnode_shift < 0) {