2 * linux/arch/x86-64/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code.
13 * This file handles the architecture-dependent parts of initialization
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/initrd.h>
32 #include <linux/highmem.h>
33 #include <linux/bootmem.h>
34 #include <linux/module.h>
35 #include <asm/processor.h>
36 #include <linux/console.h>
37 #include <linux/seq_file.h>
38 #include <linux/crash_dump.h>
39 #include <linux/root_dev.h>
40 #include <linux/pci.h>
41 #include <linux/acpi.h>
42 #include <linux/kallsyms.h>
43 #include <linux/edd.h>
44 #include <linux/mmzone.h>
45 #include <linux/kexec.h>
46 #include <linux/cpufreq.h>
47 #include <linux/dmi.h>
48 #include <linux/dma-mapping.h>
49 #include <linux/ctype.h>
50 #include <linux/suspend.h>
53 #include <asm/uaccess.h>
54 #include <asm/system.h>
59 #include <video/edid.h>
62 #include <asm/mpspec.h>
63 #include <asm/mmu_context.h>
64 #include <asm/bootsetup.h>
65 #include <asm/proto.h>
66 #include <asm/setup.h>
67 #include <asm/mach_apic.h>
69 #include <asm/swiotlb.h>
70 #include <asm/sections.h>
71 #include <asm/gart-mapping.h>
78 struct cpuinfo_x86 boot_cpu_data __read_mostly;
80 unsigned long mmu_cr4_features;
83 EXPORT_SYMBOL(acpi_disabled);
85 extern int __initdata acpi_ht;
86 extern acpi_interrupt_flags acpi_sci_flags;
87 int __initdata acpi_force = 0;
90 int acpi_numa __initdata;
92 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
95 unsigned long saved_video_mode;
101 char dmi_alloc_data[DMI_MAX_DATA];
106 struct screen_info screen_info;
107 struct sys_desc_table_struct {
108 unsigned short length;
109 unsigned char table[0];
112 struct edid_info edid_info;
115 extern int root_mountflags;
117 char command_line[COMMAND_LINE_SIZE];
119 struct resource standard_io_resources[] = {
120 { .name = "dma1", .start = 0x00, .end = 0x1f,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
122 { .name = "pic1", .start = 0x20, .end = 0x21,
123 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
124 { .name = "timer0", .start = 0x40, .end = 0x43,
125 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
126 { .name = "timer1", .start = 0x50, .end = 0x53,
127 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
128 { .name = "keyboard", .start = 0x60, .end = 0x6f,
129 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
130 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
131 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
132 { .name = "pic2", .start = 0xa0, .end = 0xa1,
133 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
134 { .name = "dma2", .start = 0xc0, .end = 0xdf,
135 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
136 { .name = "fpu", .start = 0xf0, .end = 0xff,
137 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
140 #define STANDARD_IO_RESOURCES \
141 (sizeof standard_io_resources / sizeof standard_io_resources[0])
143 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
145 struct resource data_resource = {
146 .name = "Kernel data",
149 .flags = IORESOURCE_RAM,
151 struct resource code_resource = {
152 .name = "Kernel code",
155 .flags = IORESOURCE_RAM,
158 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
160 static struct resource system_rom_resource = {
161 .name = "System ROM",
164 .flags = IORESOURCE_ROM,
167 static struct resource extension_rom_resource = {
168 .name = "Extension ROM",
171 .flags = IORESOURCE_ROM,
174 static struct resource adapter_rom_resources[] = {
175 { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
176 .flags = IORESOURCE_ROM },
177 { .name = "Adapter ROM", .start = 0, .end = 0,
178 .flags = IORESOURCE_ROM },
179 { .name = "Adapter ROM", .start = 0, .end = 0,
180 .flags = IORESOURCE_ROM },
181 { .name = "Adapter ROM", .start = 0, .end = 0,
182 .flags = IORESOURCE_ROM },
183 { .name = "Adapter ROM", .start = 0, .end = 0,
184 .flags = IORESOURCE_ROM },
185 { .name = "Adapter ROM", .start = 0, .end = 0,
186 .flags = IORESOURCE_ROM }
189 #define ADAPTER_ROM_RESOURCES \
190 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
192 static struct resource video_rom_resource = {
196 .flags = IORESOURCE_ROM,
199 static struct resource video_ram_resource = {
200 .name = "Video RAM area",
203 .flags = IORESOURCE_RAM,
206 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
208 static int __init romchecksum(unsigned char *rom, unsigned long length)
210 unsigned char *p, sum = 0;
212 for (p = rom; p < rom + length; p++)
217 static void __init probe_roms(void)
219 unsigned long start, length, upper;
224 upper = adapter_rom_resources[0].start;
225 for (start = video_rom_resource.start; start < upper; start += 2048) {
226 rom = isa_bus_to_virt(start);
227 if (!romsignature(rom))
230 video_rom_resource.start = start;
232 /* 0 < length <= 0x7f * 512, historically */
233 length = rom[2] * 512;
235 /* if checksum okay, trust length byte */
236 if (length && romchecksum(rom, length))
237 video_rom_resource.end = start + length - 1;
239 request_resource(&iomem_resource, &video_rom_resource);
243 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
248 request_resource(&iomem_resource, &system_rom_resource);
249 upper = system_rom_resource.start;
251 /* check for extension rom (ignore length byte!) */
252 rom = isa_bus_to_virt(extension_rom_resource.start);
253 if (romsignature(rom)) {
254 length = extension_rom_resource.end - extension_rom_resource.start + 1;
255 if (romchecksum(rom, length)) {
256 request_resource(&iomem_resource, &extension_rom_resource);
257 upper = extension_rom_resource.start;
261 /* check for adapter roms on 2k boundaries */
262 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
263 rom = isa_bus_to_virt(start);
264 if (!romsignature(rom))
267 /* 0 < length <= 0x7f * 512, historically */
268 length = rom[2] * 512;
270 /* but accept any length that fits if checksum okay */
271 if (!length || start + length > upper || !romchecksum(rom, length))
274 adapter_rom_resources[i].start = start;
275 adapter_rom_resources[i].end = start + length - 1;
276 request_resource(&iomem_resource, &adapter_rom_resources[i]);
278 start = adapter_rom_resources[i++].end & ~2047UL;
282 /* Check for full argument with no trailing characters */
283 static int fullarg(char *p, char *arg)
286 return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
289 static __init void parse_cmdline_early (char ** cmdline_p)
291 char c = ' ', *to = command_line, *from = COMMAND_LINE;
301 * If the BIOS enumerates physical processors before logical,
302 * maxcpus=N at enumeration-time can be used to disable HT.
304 else if (!memcmp(from, "maxcpus=", 8)) {
305 extern unsigned int maxcpus;
307 maxcpus = simple_strtoul(from + 8, NULL, 0);
311 /* "acpi=off" disables both ACPI table parsing and interpreter init */
312 if (fullarg(from,"acpi=off"))
315 if (fullarg(from, "acpi=force")) {
316 /* add later when we do DMI horrors: */
321 /* acpi=ht just means: do ACPI MADT parsing
322 at bootup, but don't enable the full ACPI interpreter */
323 if (fullarg(from, "acpi=ht")) {
328 else if (fullarg(from, "pci=noacpi"))
330 else if (fullarg(from, "acpi=noirq"))
333 else if (fullarg(from, "acpi_sci=edge"))
334 acpi_sci_flags.trigger = 1;
335 else if (fullarg(from, "acpi_sci=level"))
336 acpi_sci_flags.trigger = 3;
337 else if (fullarg(from, "acpi_sci=high"))
338 acpi_sci_flags.polarity = 1;
339 else if (fullarg(from, "acpi_sci=low"))
340 acpi_sci_flags.polarity = 3;
342 /* acpi=strict disables out-of-spec workarounds */
343 else if (fullarg(from, "acpi=strict")) {
346 #ifdef CONFIG_X86_IO_APIC
347 else if (fullarg(from, "acpi_skip_timer_override"))
348 acpi_skip_timer_override = 1;
352 if (fullarg(from, "disable_timer_pin_1"))
353 disable_timer_pin_1 = 1;
354 if (fullarg(from, "enable_timer_pin_1"))
355 disable_timer_pin_1 = -1;
357 if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
358 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
362 if (fullarg(from, "noapic"))
363 skip_ioapic_setup = 1;
365 if (fullarg(from,"apic")) {
366 skip_ioapic_setup = 0;
370 if (!memcmp(from, "mem=", 4))
371 parse_memopt(from+4, &from);
373 if (!memcmp(from, "memmap=", 7)) {
374 /* exactmap option is for used defined memory */
375 if (!memcmp(from+7, "exactmap", 8)) {
376 #ifdef CONFIG_CRASH_DUMP
377 /* If we are doing a crash dump, we
378 * still need to know the real mem
379 * size before original memory map is
382 saved_max_pfn = e820_end_of_ram();
390 parse_memmapopt(from+7, &from);
396 if (!memcmp(from, "numa=", 5))
400 if (!memcmp(from,"iommu=",6)) {
404 if (fullarg(from,"oops=panic"))
407 if (!memcmp(from, "noexec=", 7))
408 nonx_setup(from + 7);
411 /* crashkernel=size@addr specifies the location to reserve for
412 * a crash kernel. By reserving this memory we guarantee
413 * that linux never set's it up as a DMA target.
414 * Useful for holding code to do something appropriate
415 * after a kernel panic.
417 else if (!memcmp(from, "crashkernel=", 12)) {
418 unsigned long size, base;
419 size = memparse(from+12, &from);
421 base = memparse(from+1, &from);
422 /* FIXME: Do I want a sanity check
423 * to validate the memory range?
425 crashk_res.start = base;
426 crashk_res.end = base + size - 1;
431 #ifdef CONFIG_PROC_VMCORE
432 /* elfcorehdr= specifies the location of elf core header
433 * stored by the crashed kernel. This option will be passed
434 * by kexec loader to the capture kernel.
436 else if(!memcmp(from, "elfcorehdr=", 11))
437 elfcorehdr_addr = memparse(from+11, &from);
440 #ifdef CONFIG_HOTPLUG_CPU
441 else if (!memcmp(from, "additional_cpus=", 16))
442 setup_additional_cpus(from+16);
449 if (COMMAND_LINE_SIZE <= ++len)
454 printk(KERN_INFO "user-defined physical RAM map:\n");
455 e820_print_map("user");
458 *cmdline_p = command_line;
463 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
465 unsigned long bootmap_size, bootmap;
467 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
468 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
470 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
471 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
472 e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
473 reserve_bootmem(bootmap, bootmap_size);
477 /* Use inline assembly to define this because the nops are defined
478 as inline assembly strings in the include files and we cannot
479 get them easily into strings. */
480 asm("\t.data\nk8nops: "
481 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
484 extern unsigned char k8nops[];
485 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
491 k8nops + 1 + 2 + 3 + 4,
492 k8nops + 1 + 2 + 3 + 4 + 5,
493 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
494 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
497 extern char __vsyscall_0;
499 /* Replace instructions with better alternatives for this CPU type.
501 This runs before SMP is initialized to avoid SMP problems with
502 self modifying code. This implies that assymetric systems where
503 APs have less capabilities than the boot processor are not handled.
504 In this case boot with "noreplacement". */
505 void apply_alternatives(void *start, void *end)
509 for (a = start; (void *)a < end; a++) {
512 if (!boot_cpu_has(a->cpuid))
515 BUG_ON(a->replacementlen > a->instrlen);
517 /* vsyscall code is not mapped yet. resolve it manually. */
518 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
519 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
520 __inline_memcpy(instr, a->replacement, a->replacementlen);
521 diff = a->instrlen - a->replacementlen;
523 /* Pad the rest with nops */
524 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
528 __inline_memcpy(instr + i, k8_nops[k], k);
533 static int no_replacement __initdata = 0;
535 void __init alternative_instructions(void)
537 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
540 apply_alternatives(__alt_instructions, __alt_instructions_end);
543 static int __init noreplacement_setup(char *s)
549 __setup("noreplacement", noreplacement_setup);
551 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
553 #ifdef CONFIG_EDD_MODULE
557 * copy_edd() - Copy the BIOS EDD information
558 * from boot_params into a safe place.
561 static inline void copy_edd(void)
563 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
564 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
565 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
566 edd.edd_info_nr = EDD_NR;
569 static inline void copy_edd(void)
574 #define EBDA_ADDR_POINTER 0x40E
576 unsigned __initdata ebda_addr;
577 unsigned __initdata ebda_size;
579 static void discover_ebda(void)
582 * there is a real-mode segmented pointer pointing to the
583 * 4K EBDA area at 0x40E
585 ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
588 ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
590 /* Round EBDA up to pages */
594 ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
595 if (ebda_size > 64*1024)
599 #ifdef CONFIG_SOFTWARE_SUSPEND
600 static void __init mark_nosave_page_range(unsigned long start, unsigned long end)
603 while (start <= end) {
604 page = pfn_to_page(start);
610 static void __init e820_nosave_reserved_pages(void)
613 unsigned long r_start = 0, r_end = 0;
615 /* Assume e820 map is sorted */
616 for (i = 0; i < e820.nr_map; i++) {
617 struct e820entry *ei = &e820.map[i];
618 unsigned long start, end;
620 start = round_down(ei->addr, PAGE_SIZE);
621 end = round_up(ei->addr + ei->size, PAGE_SIZE);
624 if (ei->type == E820_RESERVED)
626 r_end = start>>PAGE_SHIFT;
627 /* swsusp ignores invalid pfn, ignore these pages here */
631 mark_nosave_page_range(r_start, r_end-1);
632 if (r_end >= end_pfn)
634 r_start = end>>PAGE_SHIFT;
638 static void __init e820_save_acpi_pages(void)
642 /* Assume e820 map is sorted */
643 for (i = 0; i < e820.nr_map; i++) {
644 struct e820entry *ei = &e820.map[i];
645 unsigned long start, end;
647 start = ei->addr, PAGE_SIZE;
648 end = ei->addr + ei->size;
651 if (ei->type != E820_ACPI && ei->type != E820_NVS)
654 * If the region is below end_pfn, it will be
655 * saved/restored by swsusp follow 'RAM' type.
657 if (start < (end_pfn << PAGE_SHIFT))
658 start = end_pfn << PAGE_SHIFT;
660 swsusp_add_arch_pages(start, end);
664 extern char __start_rodata, __end_rodata;
666 * BIOS reserved region/hole - no save/restore
667 * ACPI NVS - save/restore
668 * ACPI Data - this is a little tricky, the mem could be used by OS after OS
669 * reads tables from the region, but anyway save/restore the memory hasn't any
670 * side effect and Linux runtime module load/unload might use it.
671 * kernel rodata - no save/restore (kernel rodata isn't changed)
673 static int __init mark_nosave_pages(void)
675 unsigned long pfn_start, pfn_end;
677 /* BIOS reserved regions & holes */
678 e820_nosave_reserved_pages();
681 pfn_start = round_up(__pa_symbol(&__start_rodata), PAGE_SIZE) >> PAGE_SHIFT;
682 pfn_end = round_down(__pa_symbol(&__end_rodata), PAGE_SIZE) >> PAGE_SHIFT;
683 mark_nosave_page_range(pfn_start, pfn_end-1);
685 /* record ACPI Data/NVS as saveable */
686 e820_save_acpi_pages();
690 core_initcall(mark_nosave_pages);
693 void __init setup_arch(char **cmdline_p)
695 unsigned long kernel_end;
697 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
698 screen_info = SCREEN_INFO;
699 edid_info = EDID_INFO;
700 saved_video_mode = SAVED_VIDEO_MODE;
701 bootloader_type = LOADER_TYPE;
703 #ifdef CONFIG_BLK_DEV_RAM
704 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
705 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
706 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
708 setup_memory_region();
711 if (!MOUNT_ROOT_RDONLY)
712 root_mountflags &= ~MS_RDONLY;
713 init_mm.start_code = (unsigned long) &_text;
714 init_mm.end_code = (unsigned long) &_etext;
715 init_mm.end_data = (unsigned long) &_edata;
716 init_mm.brk = (unsigned long) &_end;
718 code_resource.start = virt_to_phys(&_text);
719 code_resource.end = virt_to_phys(&_etext)-1;
720 data_resource.start = virt_to_phys(&_etext);
721 data_resource.end = virt_to_phys(&_edata)-1;
723 parse_cmdline_early(cmdline_p);
725 early_identify_cpu(&boot_cpu_data);
728 * partially used pages are not usable - thus
729 * we are rounding upwards:
731 end_pfn = e820_end_of_ram();
732 num_physpages = end_pfn; /* for pfn_valid */
738 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
746 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
747 * Call this early for SRAT node setup.
749 acpi_boot_table_init();
752 #ifdef CONFIG_ACPI_NUMA
754 * Parse SRAT to discover nodes.
760 numa_initmem_init(0, end_pfn);
762 contig_initmem_init(0, end_pfn);
765 /* Reserve direct mapping */
766 reserve_bootmem_generic(table_start << PAGE_SHIFT,
767 (table_end - table_start) << PAGE_SHIFT);
770 kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
771 reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
774 * reserve physical page 0 - it's a special BIOS page on many boxes,
775 * enabling clean reboots, SMP operation, laptop functions.
777 reserve_bootmem_generic(0, PAGE_SIZE);
779 /* reserve ebda region */
781 reserve_bootmem_generic(ebda_addr, ebda_size);
785 * But first pinch a few for the stack/trampoline stuff
786 * FIXME: Don't need the extra page at 4K, but need to fix
787 * trampoline before removing it. (see the GDT stuff)
789 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
791 /* Reserve SMP trampoline */
792 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
795 #ifdef CONFIG_ACPI_SLEEP
797 * Reserve low memory region for sleep support.
799 acpi_reserve_bootmem();
801 #ifdef CONFIG_X86_LOCAL_APIC
803 * Find and reserve possible boot-time SMP configuration:
807 #ifdef CONFIG_BLK_DEV_INITRD
808 if (LOADER_TYPE && INITRD_START) {
809 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
810 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
812 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
813 initrd_end = initrd_start+INITRD_SIZE;
816 printk(KERN_ERR "initrd extends beyond end of memory "
817 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
818 (unsigned long)(INITRD_START + INITRD_SIZE),
819 (unsigned long)(end_pfn << PAGE_SHIFT));
825 if (crashk_res.start != crashk_res.end) {
826 reserve_bootmem_generic(crashk_res.start,
827 crashk_res.end - crashk_res.start + 1);
836 * set this early, so we dont allocate cpu0
837 * if MADT list doesnt list BSP first
838 * mpparse.c/MP_processor_info() allocates logical cpu numbers.
840 cpu_set(0, cpu_present_map);
843 * Read APIC and some other early information from ACPI tables.
850 #ifdef CONFIG_X86_LOCAL_APIC
852 * get boot-time SMP configuration:
854 if (smp_found_config)
856 init_apic_mappings();
860 * Request address space for all standard RAM and ROM resources
861 * and also for regions reported as reserved by the e820.
864 e820_reserve_resources();
866 request_resource(&iomem_resource, &video_ram_resource);
870 /* request I/O space for devices used on all i[345]86 PCs */
871 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
872 request_resource(&ioport_resource, &standard_io_resources[i]);
877 #ifdef CONFIG_GART_IOMMU
882 #if defined(CONFIG_VGA_CONSOLE)
883 conswitchp = &vga_con;
884 #elif defined(CONFIG_DUMMY_CONSOLE)
885 conswitchp = &dummy_con;
890 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
894 if (c->extended_cpuid_level < 0x80000004)
897 v = (unsigned int *) c->x86_model_id;
898 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
899 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
900 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
901 c->x86_model_id[48] = 0;
906 static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
908 unsigned int n, dummy, eax, ebx, ecx, edx;
910 n = c->extended_cpuid_level;
912 if (n >= 0x80000005) {
913 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
914 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
915 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
916 c->x86_cache_size=(ecx>>24)+(edx>>24);
917 /* On K8 L1 TLB is inclusive, so don't count it */
921 if (n >= 0x80000006) {
922 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
923 ecx = cpuid_ecx(0x80000006);
924 c->x86_cache_size = ecx >> 16;
925 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
927 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
928 c->x86_cache_size, ecx & 0xFF);
932 cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
933 if (n >= 0x80000008) {
934 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
935 c->x86_virt_bits = (eax >> 8) & 0xff;
936 c->x86_phys_bits = eax & 0xff;
941 static int nearby_node(int apicid)
944 for (i = apicid - 1; i >= 0; i--) {
945 int node = apicid_to_node[i];
946 if (node != NUMA_NO_NODE && node_online(node))
949 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
950 int node = apicid_to_node[i];
951 if (node != NUMA_NO_NODE && node_online(node))
954 return first_node(node_online_map); /* Shouldn't happen */
959 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
960 * Assumes number of cores is a power of two.
962 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
965 int cpu = smp_processor_id();
969 unsigned apicid = hard_smp_processor_id();
973 while ((1 << bits) < c->x86_max_cores)
976 /* Low order bits define the core id (index of core in socket) */
977 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
978 /* Convert the APIC ID into the socket ID */
979 phys_proc_id[cpu] = phys_pkg_id(bits);
982 node = phys_proc_id[cpu];
983 if (apicid_to_node[apicid] != NUMA_NO_NODE)
984 node = apicid_to_node[apicid];
985 if (!node_online(node)) {
986 /* Two possibilities here:
987 - The CPU is missing memory and no node was created.
988 In that case try picking one from a nearby CPU
989 - The APIC IDs differ from the HyperTransport node IDs
990 which the K8 northbridge parsing fills in.
991 Assume they are all increased by a constant offset,
992 but in the same order as the HT nodeids.
993 If that doesn't result in a usable node fall back to the
994 path for the previous case. */
995 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
996 if (ht_nodeid >= 0 &&
997 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
998 node = apicid_to_node[ht_nodeid];
999 /* Pick a nearby node */
1000 if (!node_online(node))
1001 node = nearby_node(apicid);
1003 numa_set_node(cpu, node);
1005 printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
1006 cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
1011 static int __init init_amd(struct cpuinfo_x86 *c)
1017 unsigned long value;
1020 * Disable TLB flush filter by setting HWCR.FFDIS on K8
1021 * bit 6 of msr C001_0015
1023 * Errata 63 for SH-B3 steppings
1024 * Errata 122 for all steppings (F+ have it disabled by default)
1027 rdmsrl(MSR_K8_HWCR, value);
1029 wrmsrl(MSR_K8_HWCR, value);
1033 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
1034 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
1035 clear_bit(0*32+31, &c->x86_capability);
1037 /* On C+ stepping K8 rep microcode works well for copy/memset */
1038 level = cpuid_eax(1);
1039 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
1040 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
1042 /* Enable workaround for FXSAVE leak */
1044 set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
1046 r = get_model_name(c);
1050 /* Should distinguish Models here, but this is only
1051 a fallback anyways. */
1052 strcpy(c->x86_model_id, "Hammer");
1056 display_cacheinfo(c);
1058 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
1059 if (c->x86_power & (1<<8))
1060 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1062 if (c->extended_cpuid_level >= 0x80000008) {
1063 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
1071 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
1074 u32 eax, ebx, ecx, edx;
1075 int index_msb, core_bits;
1076 int cpu = smp_processor_id();
1078 cpuid(1, &eax, &ebx, &ecx, &edx);
1081 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
1084 smp_num_siblings = (ebx & 0xff0000) >> 16;
1086 if (smp_num_siblings == 1) {
1087 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
1088 } else if (smp_num_siblings > 1 ) {
1090 if (smp_num_siblings > NR_CPUS) {
1091 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
1092 smp_num_siblings = 1;
1096 index_msb = get_count_order(smp_num_siblings);
1097 phys_proc_id[cpu] = phys_pkg_id(index_msb);
1099 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
1102 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
1104 index_msb = get_count_order(smp_num_siblings) ;
1106 core_bits = get_count_order(c->x86_max_cores);
1108 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
1109 ((1 << core_bits) - 1);
1111 if (c->x86_max_cores > 1)
1112 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
1119 * find out the number of processor cores on the die
1121 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
1125 if (c->cpuid_level < 4)
1134 return ((eax >> 26) + 1);
1139 static void srat_detect_node(void)
1143 int cpu = smp_processor_id();
1145 /* Don't do the funky fallback heuristics the AMD version employs
1147 node = apicid_to_node[hard_smp_processor_id()];
1148 if (node == NUMA_NO_NODE)
1149 node = first_node(node_online_map);
1150 numa_set_node(cpu, node);
1153 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
1157 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1162 init_intel_cacheinfo(c);
1163 n = c->extended_cpuid_level;
1164 if (n >= 0x80000008) {
1165 unsigned eax = cpuid_eax(0x80000008);
1166 c->x86_virt_bits = (eax >> 8) & 0xff;
1167 c->x86_phys_bits = eax & 0xff;
1168 /* CPUID workaround for Intel 0F34 CPU */
1169 if (c->x86_vendor == X86_VENDOR_INTEL &&
1170 c->x86 == 0xF && c->x86_model == 0x3 &&
1172 c->x86_phys_bits = 36;
1176 c->x86_cache_alignment = c->x86_clflush_size * 2;
1177 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1178 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1179 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1180 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1181 c->x86_max_cores = intel_num_cpu_cores(c);
1186 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
1188 char *v = c->x86_vendor_id;
1190 if (!strcmp(v, "AuthenticAMD"))
1191 c->x86_vendor = X86_VENDOR_AMD;
1192 else if (!strcmp(v, "GenuineIntel"))
1193 c->x86_vendor = X86_VENDOR_INTEL;
1195 c->x86_vendor = X86_VENDOR_UNKNOWN;
1198 struct cpu_model_info {
1201 char *model_names[16];
1204 /* Do some early cpuid on the boot CPU to get some parameter that are
1205 needed before check_bugs. Everything advanced is in identify_cpu
1207 void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1211 c->loops_per_jiffy = loops_per_jiffy;
1212 c->x86_cache_size = -1;
1213 c->x86_vendor = X86_VENDOR_UNKNOWN;
1214 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1215 c->x86_vendor_id[0] = '\0'; /* Unset */
1216 c->x86_model_id[0] = '\0'; /* Unset */
1217 c->x86_clflush_size = 64;
1218 c->x86_cache_alignment = c->x86_clflush_size;
1219 c->x86_max_cores = 1;
1220 c->extended_cpuid_level = 0;
1221 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1223 /* Get vendor name */
1224 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1225 (unsigned int *)&c->x86_vendor_id[0],
1226 (unsigned int *)&c->x86_vendor_id[8],
1227 (unsigned int *)&c->x86_vendor_id[4]);
1231 /* Initialize the standard set of capabilities */
1232 /* Note that the vendor-specific code below might override */
1234 /* Intel-defined flags: level 0x00000001 */
1235 if (c->cpuid_level >= 0x00000001) {
1237 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1238 &c->x86_capability[0]);
1239 c->x86 = (tfms >> 8) & 0xf;
1240 c->x86_model = (tfms >> 4) & 0xf;
1241 c->x86_mask = tfms & 0xf;
1243 c->x86 += (tfms >> 20) & 0xff;
1245 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1246 if (c->x86_capability[0] & (1<<19))
1247 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1249 /* Have CPUID level 0 only - unheard of */
1254 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
1259 * This does the hard work of actually picking apart the CPU stuff...
1261 void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1266 early_identify_cpu(c);
1268 /* AMD-defined flags: level 0x80000001 */
1269 xlvl = cpuid_eax(0x80000000);
1270 c->extended_cpuid_level = xlvl;
1271 if ((xlvl & 0xffff0000) == 0x80000000) {
1272 if (xlvl >= 0x80000001) {
1273 c->x86_capability[1] = cpuid_edx(0x80000001);
1274 c->x86_capability[6] = cpuid_ecx(0x80000001);
1276 if (xlvl >= 0x80000004)
1277 get_model_name(c); /* Default name */
1280 /* Transmeta-defined flags: level 0x80860001 */
1281 xlvl = cpuid_eax(0x80860000);
1282 if ((xlvl & 0xffff0000) == 0x80860000) {
1283 /* Don't set x86_cpuid_level here for now to not confuse. */
1284 if (xlvl >= 0x80860001)
1285 c->x86_capability[2] = cpuid_edx(0x80860001);
1288 c->apicid = phys_pkg_id(0);
1291 * Vendor-specific initialization. In this section we
1292 * canonicalize the feature flags, meaning if there are
1293 * features a certain CPU supports which CPUID doesn't
1294 * tell us, CPUID claiming incorrect flags, or other bugs,
1295 * we handle them here.
1297 * At the end of this section, c->x86_capability better
1298 * indicate the features this CPU genuinely supports!
1300 switch (c->x86_vendor) {
1301 case X86_VENDOR_AMD:
1305 case X86_VENDOR_INTEL:
1309 case X86_VENDOR_UNKNOWN:
1311 display_cacheinfo(c);
1315 select_idle_routine(c);
1319 * On SMP, boot_cpu_data holds the common feature set between
1320 * all CPUs; so make sure that we indicate which features are
1321 * common between the CPUs. The first time this routine gets
1322 * executed, c == &boot_cpu_data.
1324 if (c != &boot_cpu_data) {
1325 /* AND the already accumulated flags with these */
1326 for (i = 0 ; i < NCAPINTS ; i++)
1327 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1330 #ifdef CONFIG_X86_MCE
1333 if (c == &boot_cpu_data)
1338 numa_add_cpu(smp_processor_id());
1343 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1345 if (c->x86_model_id[0])
1346 printk("%s", c->x86_model_id);
1348 if (c->x86_mask || c->cpuid_level >= 0)
1349 printk(" stepping %02x\n", c->x86_mask);
1355 * Get CPU information for use by the procfs.
1358 static int show_cpuinfo(struct seq_file *m, void *v)
1360 struct cpuinfo_x86 *c = v;
1363 * These flag bits must match the definitions in <asm/cpufeature.h>.
1364 * NULL means this bit is undefined or reserved; either way it doesn't
1365 * have meaning as far as Linux is concerned. Note that it's important
1366 * to realize there is a difference between this table and CPUID -- if
1367 * applications want to get the raw CPUID data, they should access
1368 * /dev/cpu/<cpu_nr>/cpuid instead.
1370 static char *x86_cap_flags[] = {
1372 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1373 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1374 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1375 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1378 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1379 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1380 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1381 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
1383 /* Transmeta-defined */
1384 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1385 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1386 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1387 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1389 /* Other (Linux-defined) */
1390 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1391 "constant_tsc", NULL, NULL,
1392 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1393 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1394 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1396 /* Intel-defined (#2) */
1397 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
1398 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1399 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1400 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1402 /* VIA/Cyrix/Centaur-defined */
1403 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1404 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1405 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1406 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1408 /* AMD-defined (#2) */
1409 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
1410 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1411 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1412 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1414 static char *x86_power_flags[] = {
1415 "ts", /* temperature sensor */
1416 "fid", /* frequency id control */
1417 "vid", /* voltage id control */
1418 "ttp", /* thermal trip */
1422 /* nothing */ /* constant_tsc - moved to flags */
1427 if (!cpu_online(c-cpu_data))
1431 seq_printf(m,"processor\t: %u\n"
1433 "cpu family\t: %d\n"
1435 "model name\t: %s\n",
1436 (unsigned)(c-cpu_data),
1437 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1440 c->x86_model_id[0] ? c->x86_model_id : "unknown");
1442 if (c->x86_mask || c->cpuid_level >= 0)
1443 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1445 seq_printf(m, "stepping\t: unknown\n");
1447 if (cpu_has(c,X86_FEATURE_TSC)) {
1448 unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1451 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1452 freq / 1000, (freq % 1000));
1456 if (c->x86_cache_size >= 0)
1457 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1460 if (smp_num_siblings * c->x86_max_cores > 1) {
1461 int cpu = c - cpu_data;
1462 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1463 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1464 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1465 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1471 "fpu_exception\t: yes\n"
1472 "cpuid level\t: %d\n"
1479 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1480 if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
1481 seq_printf(m, " %s", x86_cap_flags[i]);
1484 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1485 c->loops_per_jiffy/(500000/HZ),
1486 (c->loops_per_jiffy/(5000/HZ)) % 100);
1488 if (c->x86_tlbsize > 0)
1489 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1490 seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1491 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1493 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1494 c->x86_phys_bits, c->x86_virt_bits);
1496 seq_printf(m, "power management:");
1499 for (i = 0; i < 32; i++)
1500 if (c->x86_power & (1 << i)) {
1501 if (i < ARRAY_SIZE(x86_power_flags) &&
1503 seq_printf(m, "%s%s",
1504 x86_power_flags[i][0]?" ":"",
1505 x86_power_flags[i]);
1507 seq_printf(m, " [%d]", i);
1511 seq_printf(m, "\n\n");
1516 static void *c_start(struct seq_file *m, loff_t *pos)
1518 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1521 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1524 return c_start(m, pos);
1527 static void c_stop(struct seq_file *m, void *v)
1531 struct seq_operations cpuinfo_op = {
1535 .show = show_cpuinfo,
1538 #ifdef CONFIG_INPUT_PCSPKR
1539 #include <linux/platform_device.h>
1540 static __init int add_pcspkr(void)
1542 struct platform_device *pd;
1545 pd = platform_device_alloc("pcspkr", -1);
1549 ret = platform_device_add(pd);
1551 platform_device_put(pd);
1555 device_initcall(add_pcspkr);