Merge branch 'linus' into x86/bootmem
authorIngo Molnar <mingo@elte.hu>
Mon, 14 Feb 2011 10:55:18 +0000 (11:55 +0100)
committerIngo Molnar <mingo@elte.hu>
Mon, 14 Feb 2011 10:55:18 +0000 (11:55 +0100)
Conflicts:
arch/x86/mm/numa_64.c

Merge reason: fix the conflict, update to latest -rc and pick up this
              dependent fix from Yinghai:

  e6d2e2b2b1e1: memblock: don't adjust size in memblock_find_base()

Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/include/asm/page_types.h
arch/x86/kernel/aperture_64.c
arch/x86/kernel/setup.c
arch/x86/mm/amdtopology_64.c
arch/x86/mm/init.c
arch/x86/mm/init_64.c
arch/x86/mm/numa_64.c
arch/x86/mm/srat_64.c
mm/page_alloc.c

index 1df6621..731d211 100644 (file)
@@ -2,6 +2,7 @@
 #define _ASM_X86_PAGE_DEFS_H
 
 #include <linux/const.h>
+#include <linux/types.h>
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT     12
@@ -45,9 +46,16 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
+static inline phys_addr_t get_max_mapped(void)
+{
+       return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
+}
+
 extern unsigned long init_memory_mapping(unsigned long start,
                                         unsigned long end);
 
+void init_memory_mapping_high(void);
+
 extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
                                int acpi, int k8);
 extern void free_initmem(void);
index 5955a78..7b1e8e1 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
 static u32 __init allocate_aperture(void)
 {
        u32 aper_size;
-       void *p;
+       unsigned long addr;
 
        /* aper_size should <= 1G */
        if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
         * so don't use 512M below as gart iommu, leave the space for kernel
         * code for safe
         */
-       p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+       addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
+       if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+               printk(KERN_ERR
+                       "Cannot allocate aperture memory hole (%lx,%uK)\n",
+                               addr, aper_size>>10);
+               return 0;
+       }
+       memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
        /*
         * Kmemleak should not scan this block as it may not be mapped via the
         * kernel direct mapping.
         */
-       kmemleak_ignore(p);
-       if (!p || __pa(p)+aper_size > 0xffffffff) {
-               printk(KERN_ERR
-                       "Cannot allocate aperture memory hole (%p,%uK)\n",
-                               p, aper_size>>10);
-               if (p)
-                       free_bootmem(__pa(p), aper_size);
-               return 0;
-       }
+       kmemleak_ignore(phys_to_virt(addr));
        printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
-                       aper_size >> 10, __pa(p));
-       insert_aperture_resource((u32)__pa(p), aper_size);
-       register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
-                               (u32)__pa(p+aper_size) >> PAGE_SHIFT);
+                       aper_size >> 10, addr);
+       insert_aperture_resource((u32)addr, aper_size);
+       register_nosave_region(addr >> PAGE_SHIFT,
+                              (addr+aper_size) >> PAGE_SHIFT);
 
-       return (u32)__pa(p);
+       return (u32)addr;
 }
 
 
index d3cfe26..6b286d8 100644 (file)
@@ -293,10 +293,32 @@ static void __init init_gbpages(void)
        else
                direct_gbpages = 0;
 }
+
+static void __init cleanup_highmap_brk_end(void)
+{
+       pud_t *pud;
+       pmd_t *pmd;
+
+       mmu_cr4_features = read_cr4();
+
+       /*
+        * _brk_end cannot change anymore, but it and _end may be
+        * located on different 2M pages. cleanup_highmap(), however,
+        * can only consider _end when it runs, so destroy any
+        * mappings beyond _brk_end here.
+        */
+       pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+       pmd = pmd_offset(pud, _brk_end - 1);
+       while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+               pmd_clear(pmd);
+}
 #else
 static inline void init_gbpages(void)
 {
 }
+static inline void cleanup_highmap_brk_end(void)
+{
+}
 #endif
 
 static void __init reserve_brk(void)
@@ -307,6 +329,8 @@ static void __init reserve_brk(void)
        /* Mark brk area as locked down and no longer taking any
           new allocations */
        _brk_start = 0;
+
+       cleanup_highmap_brk_end();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -680,15 +704,6 @@ static int __init parse_reservelow(char *p)
 
 early_param("reservelow", parse_reservelow);
 
-static u64 __init get_max_mapped(void)
-{
-       u64 end = max_pfn_mapped;
-
-       end <<= PAGE_SHIFT;
-
-       return end;
-}
-
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -950,14 +965,6 @@ void __init setup_arch(char **cmdline_p)
        max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
        max_pfn_mapped = max_low_pfn_mapped;
 
-#ifdef CONFIG_X86_64
-       if (max_pfn > max_low_pfn) {
-               max_pfn_mapped = init_memory_mapping(1UL<<32,
-                                                    max_pfn<<PAGE_SHIFT);
-               /* can we preseve max_low_pfn ?*/
-               max_low_pfn = max_pfn;
-       }
-#endif
        memblock.current_limit = get_max_mapped();
 
        /*
index f21962c..49b334c 100644 (file)
@@ -278,12 +278,14 @@ int __init amd_scan_nodes(void)
                apicid_base = boot_cpu_physical_apicid;
        }
 
-       for_each_node_mask(i, node_possible_map) {
-               int j;
-
+       for_each_node_mask(i, node_possible_map)
                memblock_x86_register_active_regions(i,
                                nodes[i].start >> PAGE_SHIFT,
                                nodes[i].end >> PAGE_SHIFT);
+       init_memory_mapping_high();
+       for_each_node_mask(i, node_possible_map) {
+               int j;
+
                for (j = apicid_base; j < cores + apicid_base; j++)
                        apicid_to_node[(i << bits) + j] = i;
                setup_node_bootmem(i, nodes[i].start, nodes[i].end);
index 947f42a..b8054e0 100644 (file)
@@ -33,7 +33,7 @@ int direct_gbpages
 static void __init find_early_table_space(unsigned long end, int use_pse,
                                          int use_gbpages)
 {
-       unsigned long puds, pmds, ptes, tables, start;
+       unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
        phys_addr_t base;
 
        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,20 +65,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 #ifdef CONFIG_X86_32
        /* for fixmap */
        tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
 
-       /*
-        * RED-PEN putting page tables only on node 0 could
-        * cause a hotspot and fill up ZONE_DMA. The page tables
-        * need roughly 0.5KB per GB.
-        */
-#ifdef CONFIG_X86_32
-       start = 0x7000;
-#else
-       start = 0x8000;
+       good_end = max_pfn_mapped << PAGE_SHIFT;
 #endif
-       base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
-                                       tables, PAGE_SIZE);
+
+       base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
        if (base == MEMBLOCK_ERROR)
                panic("Cannot find space for the kernel page tables");
 
@@ -279,25 +270,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
        load_cr3(swapper_pg_dir);
 #endif
 
-#ifdef CONFIG_X86_64
-       if (!after_bootmem && !start) {
-               pud_t *pud;
-               pmd_t *pmd;
-
-               mmu_cr4_features = read_cr4();
-
-               /*
-                * _brk_end cannot change anymore, but it and _end may be
-                * located on different 2M pages. cleanup_highmap(), however,
-                * can only consider _end when it runs, so destroy any
-                * mappings beyond _brk_end here.
-                */
-               pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
-               pmd = pmd_offset(pud, _brk_end - 1);
-               while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
-                       pmd_clear(pmd);
-       }
-#endif
        __flush_tlb_all();
 
        if (!after_bootmem && e820_table_end > e820_table_start)
index 71a5929..194f273 100644 (file)
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
        return adr;
 }
 
+static __ref void *map_low_page(void *virt)
+{
+       void *adr;
+       unsigned long phys, left;
+
+       if (after_bootmem)
+               return virt;
+
+       phys = __pa(virt);
+       left = phys & (PAGE_SIZE - 1);
+       adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
+       adr = (void *)(((unsigned long)adr) | left);
+
+       return adr;
+}
+
 static __ref void unmap_low_page(void *adr)
 {
        if (after_bootmem)
                return;
 
-       early_iounmap(adr, PAGE_SIZE);
+       early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
 }
 
 static unsigned long __meminit
@@ -385,15 +401,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
        return last_map_addr;
 }
 
-static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
-               pgprot_t prot)
-{
-       pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
-       return phys_pte_init(pte, address, end, prot);
-}
-
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
              unsigned long page_size_mask, pgprot_t prot)
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
                if (pmd_val(*pmd)) {
                        if (!pmd_large(*pmd)) {
                                spin_lock(&init_mm.page_table_lock);
-                               last_map_addr = phys_pte_update(pmd, address,
+                               pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+                               last_map_addr = phys_pte_init(pte, address,
                                                                end, prot);
+                               unmap_low_page(pte);
                                spin_unlock(&init_mm.page_table_lock);
                                continue;
                        }
@@ -467,18 +476,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
        return last_map_addr;
 }
 
-static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
-               unsigned long page_size_mask, pgprot_t prot)
-{
-       pmd_t *pmd = pmd_offset(pud, 0);
-       unsigned long last_map_addr;
-
-       last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
-       __flush_tlb_all();
-       return last_map_addr;
-}
-
 static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
                         unsigned long page_size_mask)
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 
                if (pud_val(*pud)) {
                        if (!pud_large(*pud)) {
-                               last_map_addr = phys_pmd_update(pud, addr, end,
+                               pmd = map_low_page(pmd_offset(pud, 0));
+                               last_map_addr = phys_pmd_init(pmd, addr, end,
                                                         page_size_mask, prot);
+                               unmap_low_page(pmd);
+                               __flush_tlb_all();
                                continue;
                        }
                        /*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
        return last_map_addr;
 }
 
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
-                unsigned long page_size_mask)
-{
-       pud_t *pud;
-
-       pud = (pud_t *)pgd_page_vaddr(*pgd);
-
-       return phys_pud_init(pud, addr, end, page_size_mask);
-}
-
 unsigned long __meminit
 kernel_physical_mapping_init(unsigned long start,
                             unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
                        next = end;
 
                if (pgd_val(*pgd)) {
-                       last_map_addr = phys_pud_update(pgd, __pa(start),
+                       pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+                       last_map_addr = phys_pud_init(pud, __pa(start),
                                                 __pa(end), page_size_mask);
+                       unmap_low_page(pud);
                        continue;
                }
 
@@ -616,9 +607,63 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
                                int acpi, int k8)
 {
        memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+       init_memory_mapping_high();
 }
 #endif
 
+struct mapping_work_data {
+       unsigned long start;
+       unsigned long end;
+       unsigned long pfn_mapped;
+};
+
+static int __init_refok
+mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
+{
+       struct mapping_work_data *data = datax;
+       unsigned long pfn_mapped;
+       unsigned long final_start, final_end;
+
+       final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
+       final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
+
+       if (final_end <= final_start)
+               return 0;
+
+       pfn_mapped = init_memory_mapping(final_start, final_end);
+
+       if (pfn_mapped > data->pfn_mapped)
+               data->pfn_mapped = pfn_mapped;
+
+       return 0;
+}
+
+static unsigned long __init_refok
+init_memory_mapping_active_regions(unsigned long start, unsigned long end)
+{
+       struct mapping_work_data data;
+
+       data.start = start;
+       data.end = end;
+       data.pfn_mapped = 0;
+
+       work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
+
+       return data.pfn_mapped;
+}
+
+void __init_refok init_memory_mapping_high(void)
+{
+       if (max_pfn > max_low_pfn) {
+               max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
+                                                        max_pfn<<PAGE_SHIFT);
+               /* can we preserve max_low_pfn ? */
+               max_low_pfn = max_pfn;
+
+               memblock.current_limit = get_max_mapped();
+       }
+}
+
 void __init paging_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES];
index 95ea155..62cb634 100644 (file)
@@ -86,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void)
 
        addr = 0x8000;
        nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
-       nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
+       nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
                                      nodemap_size, L1_CACHE_BYTES);
        if (nodemap_addr == MEMBLOCK_ERROR) {
                printk(KERN_ERR
@@ -598,11 +598,12 @@ static int __init numa_emulation(unsigned long start_pfn,
         * the e820 memory map.
         */
        remove_all_active_ranges();
-       for_each_node_mask(i, node_possible_map) {
+       for_each_node_mask(i, node_possible_map)
                memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
                                                nodes[i].end >> PAGE_SHIFT);
+       init_memory_mapping_high();
+       for_each_node_mask(i, node_possible_map)
                setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-       }
        setup_physnodes(addr, max_addr, acpi, amd);
        fake_physnodes(acpi, amd, num_nodes);
        numa_init_array();
@@ -658,6 +659,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
        for (i = 0; i < nr_cpu_ids; i++)
                numa_set_node(i, 0);
        memblock_x86_register_active_regions(0, start_pfn, last_pfn);
+       init_memory_mapping_high();
        setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
 }
 
index 603d285..4c03e13 100644 (file)
@@ -444,6 +444,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
                return -1;
        }
 
+       init_memory_mapping_high();
+
        /* Account for nodes with cpus and no memory */
        nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
 
index a873e61..887ce3b 100644 (file)
@@ -3616,6 +3616,34 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
        return -1;
 }
 
+/*
+ * Basic iterator support. Return the last range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns last region regardless of node
+ */
+static int __meminit last_active_region_index_in_nid(int nid)
+{
+       int i;
+
+       for (i = nr_nodemap_entries - 1; i >= 0; i--)
+               if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+                       return i;
+
+       return -1;
+}
+
+/*
+ * Basic iterator support. Return the previous active range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns next region regardless of node
+ */
+static int __meminit previous_active_region_index_in_nid(int index, int nid)
+{
+       for (index = index - 1; index >= 0; index--)
+               if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+                       return index;
+
+       return -1;
+}
+
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
 /*
  * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3667,6 +3695,10 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
        for (i = first_active_region_index_in_nid(nid); i != -1; \
                                i = next_active_region_index_in_nid(i, nid))
 
+#define for_each_active_range_index_in_nid_reverse(i, nid) \
+       for (i = last_active_region_index_in_nid(nid); i != -1; \
+                               i = previous_active_region_index_in_nid(i, nid))
+
 /**
  * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
  * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3705,7 +3737,7 @@ u64 __init find_memory_core_early(int nid, u64 size, u64 align,
        int i;
 
        /* Need to go over early_node_map to find out good range for node */
-       for_each_active_range_index_in_nid(i, nid) {
+       for_each_active_range_index_in_nid_reverse(i, nid) {
                u64 addr;
                u64 ei_start, ei_last;
                u64 final_start, final_end;