x86: introduce max_low_pfn_mapped for 64-bit
authorYinghai Lu <yhlu.kernel@gmail.com>
Fri, 11 Jul 2008 03:38:26 +0000 (20:38 -0700)
committerIngo Molnar <mingo@elte.hu>
Fri, 11 Jul 2008 08:24:04 +0000 (10:24 +0200)
when more than 4g memory is installed, don't map the big hole below 4g.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
12 files changed:
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/cpu/amd_64.c
arch/x86/kernel/e820.c
arch/x86/kernel/efi.c
arch/x86/kernel/setup.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/pageattr.c
arch/x86/mm/pat.c
arch/x86/pci/i386.c
include/asm-x86/e820.h
include/asm-x86/page.h

index a31a579..9c981c4 100644 (file)
@@ -130,7 +130,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
        if (!phys || !size)
                return NULL;
 
-       if (phys+size <= (max_pfn_mapped << PAGE_SHIFT))
+       if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
                return __va(phys);
 
        offset = phys & (PAGE_SIZE - 1);
index 958526d..bd182b7 100644 (file)
@@ -199,10 +199,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
                 * Don't do it for gbpages because there seems very little
                 * benefit in doing so.
                 */
-               if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-                   (tseg >> PMD_SHIFT) <
-                       (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+               if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+                   if ((tseg>>PMD_SHIFT) <
+                               (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+                       ((tseg>>PMD_SHIFT) <
+                               (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+                        (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
                        set_memory_4k((unsigned long)__va(tseg), 1);
+               }
        }
 }
 
index 3451e0b..9f5002e 100644 (file)
@@ -1056,7 +1056,7 @@ unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end(void)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
        int i;
        unsigned long last_pfn = 0;
@@ -1064,12 +1064,21 @@ unsigned long __init e820_end(void)
 
        for (i = 0; i < e820.nr_map; i++) {
                struct e820entry *ei = &e820.map[i];
+               unsigned long start_pfn;
                unsigned long end_pfn;
 
-               if (ei->type != E820_RAM)
+               if (ei->type != type)
                        continue;
 
+               start_pfn = ei->addr >> PAGE_SHIFT;
                end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+
+               if (start_pfn >= limit_pfn)
+                       continue;
+               if (end_pfn > limit_pfn) {
+                       last_pfn = limit_pfn;
+                       break;
+               }
                if (end_pfn > last_pfn)
                        last_pfn = end_pfn;
        }
@@ -1083,7 +1092,15 @@ unsigned long __init e820_end(void)
                         last_pfn, max_arch_pfn);
        return last_pfn;
 }
+unsigned long __init e820_end_of_ram_pfn(void)
+{
+       return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+}
 
+unsigned long __init e820_end_of_low_ram_pfn(void)
+{
+       return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+}
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
@@ -1206,7 +1223,7 @@ static int __init parse_memmap_opt(char *p)
                 * the real mem size before original memory map is
                 * reset.
                 */
-               saved_max_pfn = e820_end();
+               saved_max_pfn = e820_end_of_ram_pfn();
 #endif
                e820.nr_map = 0;
                userdef = 1;
index 94382fa..06cc8d4 100644 (file)
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
                size = md->num_pages << EFI_PAGE_SHIFT;
                end = md->phys_addr + size;
 
-               if (PFN_UP(end) <= max_pfn_mapped)
+               if (PFN_UP(end) <= max_low_pfn_mapped)
                        va = __va(md->phys_addr);
                else
                        va = efi_ioremap(md->phys_addr, size);
index a7c3471..86fc2d6 100644 (file)
@@ -713,14 +713,14 @@ void __init setup_arch(char **cmdline_p)
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       max_pfn = e820_end();
+       max_pfn = e820_end_of_ram_pfn();
 
        /* preallocate 4k for mptable mpc */
        early_reserve_e820_mpc_new();
        /* update e820 for memory not covered by WB MTRRs */
        mtrr_bp_init();
        if (mtrr_trim_uncached_memory(max_pfn))
-               max_pfn = e820_end();
+               max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
        /* max_low_pfn get updated here */
@@ -732,12 +732,26 @@ void __init setup_arch(char **cmdline_p)
 
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
-       max_low_pfn = max_pfn;
+       if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+               max_low_pfn = e820_end_of_low_ram_pfn();
+       else
+               max_low_pfn = max_pfn;
+
        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
        /* max_pfn_mapped is updated here */
-       max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+       max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+       max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+       if (max_pfn > max_low_pfn) {
+               max_pfn_mapped = init_memory_mapping(1UL<<32,
+                                                    max_pfn<<PAGE_SHIFT);
+               /* can we preseve max_low_pfn ?*/
+               max_low_pfn = max_pfn;
+       }
+#endif
 
        /*
         * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
index b5a0fd5..029e8cf 100644 (file)
@@ -50,6 +50,7 @@
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
index 48548ef..122bcef 100644 (file)
@@ -53,6 +53,7 @@
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 static unsigned long dma_reserve __initdata;
index afd4005..0389cb8 100644 (file)
@@ -536,8 +536,14 @@ static int split_large_page(pte_t *kpte, unsigned long address)
                set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
        if (address >= (unsigned long)__va(0) &&
+               address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+               split_page_count(level);
+
+#ifdef CONFIG_X86_64
+       if (address >= (unsigned long)__va(1UL<<32) &&
                address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
                split_page_count(level);
+#endif
 
        /*
         * Install the new, split up pagetable. Important details here:
@@ -655,12 +661,21 @@ static int cpa_process_alias(struct cpa_data *cpa)
        if (cpa->pfn > max_pfn_mapped)
                return 0;
 
+#ifdef CONFIG_X86_64
+       if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+               return 0;
+#endif
        /*
         * No need to redo, when the primary call touched the direct
         * mapping already:
         */
-       if (!within(cpa->vaddr, PAGE_OFFSET,
-                   PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+       if (!(within(cpa->vaddr, PAGE_OFFSET,
+                   PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+#ifdef CONFIG_X86_64
+               || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+                   PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+#endif
+       )) {
 
                alias_cpa = *cpa;
                alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
index a885a10..749766c 100644 (file)
@@ -449,7 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
        if (retval < 0)
                return 0;
 
-       if (pfn <= max_pfn_mapped &&
+       if (((pfn <= max_low_pfn_mapped) ||
+            (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
                free_memtype(offset, offset + size);
                printk(KERN_INFO
index 6ccd7a1..5281e34 100644 (file)
@@ -334,7 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
                flags = new_flags;
        }
 
-       if (vma->vm_pgoff <= max_pfn_mapped &&
+       if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
+            (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+             vma->vm_pgoff <= max_pfn_mapped)) &&
            ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
                free_memtype(addr, addr + len);
                return -EINVAL;
index 78c03d7..33e793e 100644 (file)
@@ -99,7 +99,8 @@ extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
-extern unsigned long e820_end(void);
+extern unsigned long e820_end_of_ram_pfn(void);
+extern unsigned long e820_end_of_low_ram_pfn(void);
 extern int e820_find_active_region(const struct e820entry *ei,
                                  unsigned long start_pfn,
                                  unsigned long last_pfn,
index b52ed85..28d7b45 100644 (file)
@@ -61,6 +61,7 @@ extern void map_devmem(unsigned long pfn, unsigned long size,
 extern void unmap_devmem(unsigned long pfn, unsigned long size,
                         pgprot_t vma_prot);
 
+extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
 struct page;