2 * linux/arch/x86-64/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code.
13 * This file handles the architecture-dependent parts of initialization
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/initrd.h>
32 #include <linux/highmem.h>
33 #include <linux/bootmem.h>
34 #include <linux/module.h>
35 #include <asm/processor.h>
36 #include <linux/console.h>
37 #include <linux/seq_file.h>
38 #include <linux/crash_dump.h>
39 #include <linux/root_dev.h>
40 #include <linux/pci.h>
41 #include <linux/acpi.h>
42 #include <linux/kallsyms.h>
43 #include <linux/edd.h>
44 #include <linux/mmzone.h>
45 #include <linux/kexec.h>
46 #include <linux/cpufreq.h>
47 #include <linux/dmi.h>
50 #include <asm/uaccess.h>
51 #include <asm/system.h>
56 #include <video/edid.h>
59 #include <asm/mpspec.h>
60 #include <asm/mmu_context.h>
61 #include <asm/bootsetup.h>
62 #include <asm/proto.h>
63 #include <asm/setup.h>
64 #include <asm/mach_apic.h>
66 #include <asm/sections.h>
72 struct cpuinfo_x86 boot_cpu_data __read_mostly;
74 unsigned long mmu_cr4_features;
77 EXPORT_SYMBOL(acpi_disabled);
79 extern int __initdata acpi_ht;
80 extern acpi_interrupt_flags acpi_sci_flags;
81 int __initdata acpi_force = 0;
84 int acpi_numa __initdata;
86 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
89 unsigned long saved_video_mode;
93 EXPORT_SYMBOL(swiotlb);
99 struct drive_info_struct { char dummy[32]; } drive_info;
100 struct screen_info screen_info;
101 struct sys_desc_table_struct {
102 unsigned short length;
103 unsigned char table[0];
106 struct edid_info edid_info;
109 extern int root_mountflags;
111 char command_line[COMMAND_LINE_SIZE];
113 struct resource standard_io_resources[] = {
114 { .name = "dma1", .start = 0x00, .end = 0x1f,
115 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
116 { .name = "pic1", .start = 0x20, .end = 0x21,
117 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
118 { .name = "timer0", .start = 0x40, .end = 0x43,
119 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
120 { .name = "timer1", .start = 0x50, .end = 0x53,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
122 { .name = "keyboard", .start = 0x60, .end = 0x6f,
123 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
124 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
125 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
126 { .name = "pic2", .start = 0xa0, .end = 0xa1,
127 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
128 { .name = "dma2", .start = 0xc0, .end = 0xdf,
129 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
130 { .name = "fpu", .start = 0xf0, .end = 0xff,
131 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
134 #define STANDARD_IO_RESOURCES \
135 (sizeof standard_io_resources / sizeof standard_io_resources[0])
137 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
139 struct resource data_resource = {
140 .name = "Kernel data",
143 .flags = IORESOURCE_RAM,
145 struct resource code_resource = {
146 .name = "Kernel code",
149 .flags = IORESOURCE_RAM,
152 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
154 static struct resource system_rom_resource = {
155 .name = "System ROM",
158 .flags = IORESOURCE_ROM,
161 static struct resource extension_rom_resource = {
162 .name = "Extension ROM",
165 .flags = IORESOURCE_ROM,
168 static struct resource adapter_rom_resources[] = {
169 { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
170 .flags = IORESOURCE_ROM },
171 { .name = "Adapter ROM", .start = 0, .end = 0,
172 .flags = IORESOURCE_ROM },
173 { .name = "Adapter ROM", .start = 0, .end = 0,
174 .flags = IORESOURCE_ROM },
175 { .name = "Adapter ROM", .start = 0, .end = 0,
176 .flags = IORESOURCE_ROM },
177 { .name = "Adapter ROM", .start = 0, .end = 0,
178 .flags = IORESOURCE_ROM },
179 { .name = "Adapter ROM", .start = 0, .end = 0,
180 .flags = IORESOURCE_ROM }
183 #define ADAPTER_ROM_RESOURCES \
184 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
186 static struct resource video_rom_resource = {
190 .flags = IORESOURCE_ROM,
193 static struct resource video_ram_resource = {
194 .name = "Video RAM area",
197 .flags = IORESOURCE_RAM,
200 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
202 static int __init romchecksum(unsigned char *rom, unsigned long length)
204 unsigned char *p, sum = 0;
206 for (p = rom; p < rom + length; p++)
211 static void __init probe_roms(void)
213 unsigned long start, length, upper;
218 upper = adapter_rom_resources[0].start;
219 for (start = video_rom_resource.start; start < upper; start += 2048) {
220 rom = isa_bus_to_virt(start);
221 if (!romsignature(rom))
224 video_rom_resource.start = start;
226 /* 0 < length <= 0x7f * 512, historically */
227 length = rom[2] * 512;
229 /* if checksum okay, trust length byte */
230 if (length && romchecksum(rom, length))
231 video_rom_resource.end = start + length - 1;
233 request_resource(&iomem_resource, &video_rom_resource);
237 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
242 request_resource(&iomem_resource, &system_rom_resource);
243 upper = system_rom_resource.start;
245 /* check for extension rom (ignore length byte!) */
246 rom = isa_bus_to_virt(extension_rom_resource.start);
247 if (romsignature(rom)) {
248 length = extension_rom_resource.end - extension_rom_resource.start + 1;
249 if (romchecksum(rom, length)) {
250 request_resource(&iomem_resource, &extension_rom_resource);
251 upper = extension_rom_resource.start;
255 /* check for adapter roms on 2k boundaries */
256 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
257 rom = isa_bus_to_virt(start);
258 if (!romsignature(rom))
261 /* 0 < length <= 0x7f * 512, historically */
262 length = rom[2] * 512;
264 /* but accept any length that fits if checksum okay */
265 if (!length || start + length > upper || !romchecksum(rom, length))
268 adapter_rom_resources[i].start = start;
269 adapter_rom_resources[i].end = start + length - 1;
270 request_resource(&iomem_resource, &adapter_rom_resources[i]);
272 start = adapter_rom_resources[i++].end & ~2047UL;
276 static __init void parse_cmdline_early (char ** cmdline_p)
278 char c = ' ', *to = command_line, *from = COMMAND_LINE;
282 /* Save unparsed command line copy for /proc/cmdline */
283 memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
284 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
292 * If the BIOS enumerates physical processors before logical,
293 * maxcpus=N at enumeration-time can be used to disable HT.
295 else if (!memcmp(from, "maxcpus=", 8)) {
296 extern unsigned int maxcpus;
298 maxcpus = simple_strtoul(from + 8, NULL, 0);
302 /* "acpi=off" disables both ACPI table parsing and interpreter init */
303 if (!memcmp(from, "acpi=off", 8))
306 if (!memcmp(from, "acpi=force", 10)) {
307 /* add later when we do DMI horrors: */
312 /* acpi=ht just means: do ACPI MADT parsing
313 at bootup, but don't enable the full ACPI interpreter */
314 if (!memcmp(from, "acpi=ht", 7)) {
319 else if (!memcmp(from, "pci=noacpi", 10))
321 else if (!memcmp(from, "acpi=noirq", 10))
324 else if (!memcmp(from, "acpi_sci=edge", 13))
325 acpi_sci_flags.trigger = 1;
326 else if (!memcmp(from, "acpi_sci=level", 14))
327 acpi_sci_flags.trigger = 3;
328 else if (!memcmp(from, "acpi_sci=high", 13))
329 acpi_sci_flags.polarity = 1;
330 else if (!memcmp(from, "acpi_sci=low", 12))
331 acpi_sci_flags.polarity = 3;
333 /* acpi=strict disables out-of-spec workarounds */
334 else if (!memcmp(from, "acpi=strict", 11)) {
337 #ifdef CONFIG_X86_IO_APIC
338 else if (!memcmp(from, "acpi_skip_timer_override", 24))
339 acpi_skip_timer_override = 1;
343 if (!memcmp(from, "disable_timer_pin_1", 19))
344 disable_timer_pin_1 = 1;
345 if (!memcmp(from, "enable_timer_pin_1", 18))
346 disable_timer_pin_1 = -1;
348 if (!memcmp(from, "nolapic", 7) ||
349 !memcmp(from, "disableapic", 11))
352 if (!memcmp(from, "noapic", 6))
353 skip_ioapic_setup = 1;
355 /* Make sure to not confuse with apic= */
356 if (!memcmp(from, "apic", 4) &&
357 (from[4] == ' ' || from[4] == 0)) {
358 skip_ioapic_setup = 0;
362 if (!memcmp(from, "mem=", 4))
363 parse_memopt(from+4, &from);
365 if (!memcmp(from, "memmap=", 7)) {
366 /* exactmap option is for used defined memory */
367 if (!memcmp(from+7, "exactmap", 8)) {
368 #ifdef CONFIG_CRASH_DUMP
369 /* If we are doing a crash dump, we
370 * still need to know the real mem
371 * size before original memory map is
374 saved_max_pfn = e820_end_of_ram();
382 parse_memmapopt(from+7, &from);
388 if (!memcmp(from, "numa=", 5))
392 #ifdef CONFIG_GART_IOMMU
393 if (!memcmp(from,"iommu=",6)) {
398 if (!memcmp(from,"oops=panic", 10))
401 if (!memcmp(from, "noexec=", 7))
402 nonx_setup(from + 7);
405 /* crashkernel=size@addr specifies the location to reserve for
406 * a crash kernel. By reserving this memory we guarantee
407 * that linux never set's it up as a DMA target.
408 * Useful for holding code to do something appropriate
409 * after a kernel panic.
411 else if (!memcmp(from, "crashkernel=", 12)) {
412 unsigned long size, base;
413 size = memparse(from+12, &from);
415 base = memparse(from+1, &from);
416 /* FIXME: Do I want a sanity check
417 * to validate the memory range?
419 crashk_res.start = base;
420 crashk_res.end = base + size - 1;
425 #ifdef CONFIG_PROC_VMCORE
426 /* elfcorehdr= specifies the location of elf core header
427 * stored by the crashed kernel. This option will be passed
428 * by kexec loader to the capture kernel.
430 else if(!memcmp(from, "elfcorehdr=", 11))
431 elfcorehdr_addr = memparse(from+11, &from);
437 if (COMMAND_LINE_SIZE <= ++len)
442 printk(KERN_INFO "user-defined physical RAM map:\n");
443 e820_print_map("user");
446 *cmdline_p = command_line;
451 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
453 unsigned long bootmap_size, bootmap;
455 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
456 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
458 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
459 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
460 e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
461 reserve_bootmem(bootmap, bootmap_size);
465 /* Use inline assembly to define this because the nops are defined
466 as inline assembly strings in the include files and we cannot
467 get them easily into strings. */
468 asm("\t.data\nk8nops: "
469 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
472 extern unsigned char k8nops[];
473 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
479 k8nops + 1 + 2 + 3 + 4,
480 k8nops + 1 + 2 + 3 + 4 + 5,
481 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
482 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
485 /* Replace instructions with better alternatives for this CPU type.
487 This runs before SMP is initialized to avoid SMP problems with
488 self modifying code. This implies that assymetric systems where
489 APs have less capabilities than the boot processor are not handled.
490 In this case boot with "noreplacement". */
491 void apply_alternatives(void *start, void *end)
495 for (a = start; (void *)a < end; a++) {
496 if (!boot_cpu_has(a->cpuid))
499 BUG_ON(a->replacementlen > a->instrlen);
500 __inline_memcpy(a->instr, a->replacement, a->replacementlen);
501 diff = a->instrlen - a->replacementlen;
503 /* Pad the rest with nops */
504 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
508 __inline_memcpy(a->instr + i, k8_nops[k], k);
513 static int no_replacement __initdata = 0;
515 void __init alternative_instructions(void)
517 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
520 apply_alternatives(__alt_instructions, __alt_instructions_end);
523 static int __init noreplacement_setup(char *s)
529 __setup("noreplacement", noreplacement_setup);
531 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
533 #ifdef CONFIG_EDD_MODULE
537 * copy_edd() - Copy the BIOS EDD information
538 * from boot_params into a safe place.
541 static inline void copy_edd(void)
543 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
544 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
545 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
546 edd.edd_info_nr = EDD_NR;
549 static inline void copy_edd(void)
554 #define EBDA_ADDR_POINTER 0x40E
555 static void __init reserve_ebda_region(void)
559 * there is a real-mode segmented pointer pointing to the
560 * 4K EBDA area at 0x40E
562 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
565 reserve_bootmem_generic(addr, PAGE_SIZE);
568 void __init setup_arch(char **cmdline_p)
570 unsigned long kernel_end;
572 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
573 drive_info = DRIVE_INFO;
574 screen_info = SCREEN_INFO;
575 edid_info = EDID_INFO;
576 saved_video_mode = SAVED_VIDEO_MODE;
577 bootloader_type = LOADER_TYPE;
579 #ifdef CONFIG_BLK_DEV_RAM
580 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
581 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
582 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
584 setup_memory_region();
587 if (!MOUNT_ROOT_RDONLY)
588 root_mountflags &= ~MS_RDONLY;
589 init_mm.start_code = (unsigned long) &_text;
590 init_mm.end_code = (unsigned long) &_etext;
591 init_mm.end_data = (unsigned long) &_edata;
592 init_mm.brk = (unsigned long) &_end;
594 code_resource.start = virt_to_phys(&_text);
595 code_resource.end = virt_to_phys(&_etext)-1;
596 data_resource.start = virt_to_phys(&_etext);
597 data_resource.end = virt_to_phys(&_edata)-1;
599 parse_cmdline_early(cmdline_p);
601 early_identify_cpu(&boot_cpu_data);
604 * partially used pages are not usable - thus
605 * we are rounding upwards:
607 end_pfn = e820_end_of_ram();
611 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
617 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
618 * Call this early for SRAT node setup.
620 acpi_boot_table_init();
623 #ifdef CONFIG_ACPI_NUMA
625 * Parse SRAT to discover nodes.
631 numa_initmem_init(0, end_pfn);
633 contig_initmem_init(0, end_pfn);
636 /* Reserve direct mapping */
637 reserve_bootmem_generic(table_start << PAGE_SHIFT,
638 (table_end - table_start) << PAGE_SHIFT);
641 kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
642 reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
645 * reserve physical page 0 - it's a special BIOS page on many boxes,
646 * enabling clean reboots, SMP operation, laptop functions.
648 reserve_bootmem_generic(0, PAGE_SIZE);
650 /* reserve ebda region */
651 reserve_ebda_region();
655 * But first pinch a few for the stack/trampoline stuff
656 * FIXME: Don't need the extra page at 4K, but need to fix
657 * trampoline before removing it. (see the GDT stuff)
659 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
661 /* Reserve SMP trampoline */
662 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
665 #ifdef CONFIG_ACPI_SLEEP
667 * Reserve low memory region for sleep support.
669 acpi_reserve_bootmem();
671 #ifdef CONFIG_X86_LOCAL_APIC
673 * Find and reserve possible boot-time SMP configuration:
677 #ifdef CONFIG_BLK_DEV_INITRD
678 if (LOADER_TYPE && INITRD_START) {
679 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
680 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
682 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
683 initrd_end = initrd_start+INITRD_SIZE;
686 printk(KERN_ERR "initrd extends beyond end of memory "
687 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
688 (unsigned long)(INITRD_START + INITRD_SIZE),
689 (unsigned long)(end_pfn << PAGE_SHIFT));
695 if (crashk_res.start != crashk_res.end) {
696 reserve_bootmem(crashk_res.start,
697 crashk_res.end - crashk_res.start + 1);
707 * Read APIC and some other early information from ACPI tables.
712 #ifdef CONFIG_X86_LOCAL_APIC
714 * get boot-time SMP configuration:
716 if (smp_found_config)
718 init_apic_mappings();
722 * Request address space for all standard RAM and ROM resources
723 * and also for regions reported as reserved by the e820.
726 e820_reserve_resources();
728 request_resource(&iomem_resource, &video_ram_resource);
732 /* request I/O space for devices used on all i[345]86 PCs */
733 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
734 request_resource(&ioport_resource, &standard_io_resources[i]);
739 #ifdef CONFIG_GART_IOMMU
744 #if defined(CONFIG_VGA_CONSOLE)
745 conswitchp = &vga_con;
746 #elif defined(CONFIG_DUMMY_CONSOLE)
747 conswitchp = &dummy_con;
752 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
756 if (c->extended_cpuid_level < 0x80000004)
759 v = (unsigned int *) c->x86_model_id;
760 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
761 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
762 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
763 c->x86_model_id[48] = 0;
768 static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
770 unsigned int n, dummy, eax, ebx, ecx, edx;
772 n = c->extended_cpuid_level;
774 if (n >= 0x80000005) {
775 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
776 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
777 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
778 c->x86_cache_size=(ecx>>24)+(edx>>24);
779 /* On K8 L1 TLB is inclusive, so don't count it */
783 if (n >= 0x80000006) {
784 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
785 ecx = cpuid_ecx(0x80000006);
786 c->x86_cache_size = ecx >> 16;
787 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
789 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
790 c->x86_cache_size, ecx & 0xFF);
794 cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
795 if (n >= 0x80000008) {
796 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
797 c->x86_virt_bits = (eax >> 8) & 0xff;
798 c->x86_phys_bits = eax & 0xff;
803 static int nearby_node(int apicid)
806 for (i = apicid - 1; i >= 0; i--) {
807 int node = apicid_to_node[i];
808 if (node != NUMA_NO_NODE && node_online(node))
811 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
812 int node = apicid_to_node[i];
813 if (node != NUMA_NO_NODE && node_online(node))
816 return first_node(node_online_map); /* Shouldn't happen */
821 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
822 * Assumes number of cores is a power of two.
824 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
827 int cpu = smp_processor_id();
831 unsigned apicid = phys_proc_id[cpu];
835 while ((1 << bits) < c->x86_max_cores)
838 /* Low order bits define the core id (index of core in socket) */
839 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
840 /* Convert the APIC ID into the socket ID */
841 phys_proc_id[cpu] >>= bits;
844 node = phys_proc_id[cpu];
845 if (apicid_to_node[apicid] != NUMA_NO_NODE)
846 node = apicid_to_node[apicid];
847 if (!node_online(node)) {
848 /* Two possibilities here:
849 - The CPU is missing memory and no node was created.
850 In that case try picking one from a nearby CPU
851 - The APIC IDs differ from the HyperTransport node IDs
852 which the K8 northbridge parsing fills in.
853 Assume they are all increased by a constant offset,
854 but in the same order as the HT nodeids.
855 If that doesn't result in a usable node fall back to the
856 path for the previous case. */
857 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
858 if (ht_nodeid >= 0 &&
859 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
860 node = apicid_to_node[ht_nodeid];
861 /* Pick a nearby node */
862 if (!node_online(node))
863 node = nearby_node(apicid);
865 numa_set_node(cpu, node);
867 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
868 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
873 static int __init init_amd(struct cpuinfo_x86 *c)
882 * Disable TLB flush filter by setting HWCR.FFDIS on K8
883 * bit 6 of msr C001_0015
885 * Errata 63 for SH-B3 steppings
886 * Errata 122 for all steppings (F+ have it disabled by default)
889 rdmsrl(MSR_K8_HWCR, value);
891 wrmsrl(MSR_K8_HWCR, value);
895 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
896 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
897 clear_bit(0*32+31, &c->x86_capability);
900 level = cpuid_eax(1);
901 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
902 set_bit(X86_FEATURE_K8_C, &c->x86_capability);
904 r = get_model_name(c);
908 /* Should distinguish Models here, but this is only
909 a fallback anyways. */
910 strcpy(c->x86_model_id, "Hammer");
914 display_cacheinfo(c);
916 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
917 if (c->x86_power & (1<<8))
918 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
920 if (c->extended_cpuid_level >= 0x80000008) {
921 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
922 if (c->x86_max_cores & (c->x86_max_cores - 1))
923 c->x86_max_cores = 1;
931 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
934 u32 eax, ebx, ecx, edx;
935 int index_msb, core_bits;
936 int cpu = smp_processor_id();
938 cpuid(1, &eax, &ebx, &ecx, &edx);
940 c->apicid = phys_pkg_id(0);
942 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
945 smp_num_siblings = (ebx & 0xff0000) >> 16;
947 if (smp_num_siblings == 1) {
948 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
949 } else if (smp_num_siblings > 1 ) {
951 if (smp_num_siblings > NR_CPUS) {
952 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
953 smp_num_siblings = 1;
957 index_msb = get_count_order(smp_num_siblings);
958 phys_proc_id[cpu] = phys_pkg_id(index_msb);
960 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
963 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
965 index_msb = get_count_order(smp_num_siblings) ;
967 core_bits = get_count_order(c->x86_max_cores);
969 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
970 ((1 << core_bits) - 1);
972 if (c->x86_max_cores > 1)
973 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
980 * find out the number of processor cores on the die
982 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
986 if (c->cpuid_level < 4)
995 return ((eax >> 26) + 1);
1000 static void srat_detect_node(void)
1004 int cpu = smp_processor_id();
1006 /* Don't do the funky fallback heuristics the AMD version employs
1008 node = apicid_to_node[hard_smp_processor_id()];
1009 if (node == NUMA_NO_NODE)
1011 numa_set_node(cpu, node);
1014 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
1018 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1023 init_intel_cacheinfo(c);
1024 n = c->extended_cpuid_level;
1025 if (n >= 0x80000008) {
1026 unsigned eax = cpuid_eax(0x80000008);
1027 c->x86_virt_bits = (eax >> 8) & 0xff;
1028 c->x86_phys_bits = eax & 0xff;
1029 /* CPUID workaround for Intel 0F34 CPU */
1030 if (c->x86_vendor == X86_VENDOR_INTEL &&
1031 c->x86 == 0xF && c->x86_model == 0x3 &&
1033 c->x86_phys_bits = 36;
1037 c->x86_cache_alignment = c->x86_clflush_size * 2;
1038 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1039 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1040 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1041 c->x86_max_cores = intel_num_cpu_cores(c);
1046 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
1048 char *v = c->x86_vendor_id;
1050 if (!strcmp(v, "AuthenticAMD"))
1051 c->x86_vendor = X86_VENDOR_AMD;
1052 else if (!strcmp(v, "GenuineIntel"))
1053 c->x86_vendor = X86_VENDOR_INTEL;
1055 c->x86_vendor = X86_VENDOR_UNKNOWN;
1058 struct cpu_model_info {
1061 char *model_names[16];
1064 /* Do some early cpuid on the boot CPU to get some parameter that are
1065 needed before check_bugs. Everything advanced is in identify_cpu
1067 void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1071 c->loops_per_jiffy = loops_per_jiffy;
1072 c->x86_cache_size = -1;
1073 c->x86_vendor = X86_VENDOR_UNKNOWN;
1074 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1075 c->x86_vendor_id[0] = '\0'; /* Unset */
1076 c->x86_model_id[0] = '\0'; /* Unset */
1077 c->x86_clflush_size = 64;
1078 c->x86_cache_alignment = c->x86_clflush_size;
1079 c->x86_max_cores = 1;
1080 c->extended_cpuid_level = 0;
1081 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1083 /* Get vendor name */
1084 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1085 (unsigned int *)&c->x86_vendor_id[0],
1086 (unsigned int *)&c->x86_vendor_id[8],
1087 (unsigned int *)&c->x86_vendor_id[4]);
1091 /* Initialize the standard set of capabilities */
1092 /* Note that the vendor-specific code below might override */
1094 /* Intel-defined flags: level 0x00000001 */
1095 if (c->cpuid_level >= 0x00000001) {
1097 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1098 &c->x86_capability[0]);
1099 c->x86 = (tfms >> 8) & 0xf;
1100 c->x86_model = (tfms >> 4) & 0xf;
1101 c->x86_mask = tfms & 0xf;
1103 c->x86 += (tfms >> 20) & 0xff;
1105 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1106 if (c->x86_capability[0] & (1<<19))
1107 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1109 /* Have CPUID level 0 only - unheard of */
1114 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
1119 * This does the hard work of actually picking apart the CPU stuff...
1121 void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1126 early_identify_cpu(c);
1128 /* AMD-defined flags: level 0x80000001 */
1129 xlvl = cpuid_eax(0x80000000);
1130 c->extended_cpuid_level = xlvl;
1131 if ((xlvl & 0xffff0000) == 0x80000000) {
1132 if (xlvl >= 0x80000001) {
1133 c->x86_capability[1] = cpuid_edx(0x80000001);
1134 c->x86_capability[6] = cpuid_ecx(0x80000001);
1136 if (xlvl >= 0x80000004)
1137 get_model_name(c); /* Default name */
1140 /* Transmeta-defined flags: level 0x80860001 */
1141 xlvl = cpuid_eax(0x80860000);
1142 if ((xlvl & 0xffff0000) == 0x80860000) {
1143 /* Don't set x86_cpuid_level here for now to not confuse. */
1144 if (xlvl >= 0x80860001)
1145 c->x86_capability[2] = cpuid_edx(0x80860001);
1149 * Vendor-specific initialization. In this section we
1150 * canonicalize the feature flags, meaning if there are
1151 * features a certain CPU supports which CPUID doesn't
1152 * tell us, CPUID claiming incorrect flags, or other bugs,
1153 * we handle them here.
1155 * At the end of this section, c->x86_capability better
1156 * indicate the features this CPU genuinely supports!
1158 switch (c->x86_vendor) {
1159 case X86_VENDOR_AMD:
1163 case X86_VENDOR_INTEL:
1167 case X86_VENDOR_UNKNOWN:
1169 display_cacheinfo(c);
1173 select_idle_routine(c);
1177 * On SMP, boot_cpu_data holds the common feature set between
1178 * all CPUs; so make sure that we indicate which features are
1179 * common between the CPUs. The first time this routine gets
1180 * executed, c == &boot_cpu_data.
1182 if (c != &boot_cpu_data) {
1183 /* AND the already accumulated flags with these */
1184 for (i = 0 ; i < NCAPINTS ; i++)
1185 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1188 #ifdef CONFIG_X86_MCE
1191 if (c == &boot_cpu_data)
1196 numa_add_cpu(smp_processor_id());
1201 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1203 if (c->x86_model_id[0])
1204 printk("%s", c->x86_model_id);
1206 if (c->x86_mask || c->cpuid_level >= 0)
1207 printk(" stepping %02x\n", c->x86_mask);
1213 * Get CPU information for use by the procfs.
1216 static int show_cpuinfo(struct seq_file *m, void *v)
1218 struct cpuinfo_x86 *c = v;
1221 * These flag bits must match the definitions in <asm/cpufeature.h>.
1222 * NULL means this bit is undefined or reserved; either way it doesn't
1223 * have meaning as far as Linux is concerned. Note that it's important
1224 * to realize there is a difference between this table and CPUID -- if
1225 * applications want to get the raw CPUID data, they should access
1226 * /dev/cpu/<cpu_nr>/cpuid instead.
1228 static char *x86_cap_flags[] = {
1230 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1231 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1232 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1233 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1236 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1237 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1238 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1239 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
1241 /* Transmeta-defined */
1242 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1243 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1244 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1245 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1247 /* Other (Linux-defined) */
1248 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1249 "constant_tsc", NULL, NULL,
1250 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1251 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1252 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1254 /* Intel-defined (#2) */
1255 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
1256 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1257 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1258 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1260 /* VIA/Cyrix/Centaur-defined */
1261 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1262 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1263 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1264 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1266 /* AMD-defined (#2) */
1267 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
1268 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1269 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1270 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1272 static char *x86_power_flags[] = {
1273 "ts", /* temperature sensor */
1274 "fid", /* frequency id control */
1275 "vid", /* voltage id control */
1276 "ttp", /* thermal trip */
1280 /* nothing */ /* constant_tsc - moved to flags */
1285 if (!cpu_online(c-cpu_data))
1289 seq_printf(m,"processor\t: %u\n"
1291 "cpu family\t: %d\n"
1293 "model name\t: %s\n",
1294 (unsigned)(c-cpu_data),
1295 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1298 c->x86_model_id[0] ? c->x86_model_id : "unknown");
1300 if (c->x86_mask || c->cpuid_level >= 0)
1301 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1303 seq_printf(m, "stepping\t: unknown\n");
1305 if (cpu_has(c,X86_FEATURE_TSC)) {
1306 unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1309 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1310 freq / 1000, (freq % 1000));
1314 if (c->x86_cache_size >= 0)
1315 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1318 if (smp_num_siblings * c->x86_max_cores > 1) {
1319 int cpu = c - cpu_data;
1320 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1321 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1322 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1323 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1329 "fpu_exception\t: yes\n"
1330 "cpuid level\t: %d\n"
1337 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1338 if ( test_bit(i, &c->x86_capability) &&
1339 x86_cap_flags[i] != NULL )
1340 seq_printf(m, " %s", x86_cap_flags[i]);
1343 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1344 c->loops_per_jiffy/(500000/HZ),
1345 (c->loops_per_jiffy/(5000/HZ)) % 100);
1347 if (c->x86_tlbsize > 0)
1348 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1349 seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1350 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1352 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1353 c->x86_phys_bits, c->x86_virt_bits);
1355 seq_printf(m, "power management:");
1358 for (i = 0; i < 32; i++)
1359 if (c->x86_power & (1 << i)) {
1360 if (i < ARRAY_SIZE(x86_power_flags) &&
1362 seq_printf(m, "%s%s",
1363 x86_power_flags[i][0]?" ":"",
1364 x86_power_flags[i]);
1366 seq_printf(m, " [%d]", i);
1370 seq_printf(m, "\n\n");
1375 static void *c_start(struct seq_file *m, loff_t *pos)
1377 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1380 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1383 return c_start(m, pos);
1386 static void c_stop(struct seq_file *m, void *v)
1390 struct seq_operations cpuinfo_op = {
1394 .show = show_cpuinfo,
1397 static int __init run_dmi_scan(void)
1402 core_initcall(run_dmi_scan);