# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
-CFLAGS_REMOVE_paravirt.o = -pg
+CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
endif
#
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
- obj-y += i387.o
+ obj-y += i387.o xsave.o
obj-y += ptrace.o
obj-y += ds.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
+ obj-$(CONFIG_X86_ES7000) += es7000_32.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
obj-y += vsmp_64.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
obj-y += bios_uv.o
+ obj-y += genx2apic_cluster.o
+ obj-y += genx2apic_phys.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
#include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
#include <linux/string.h>
+ #include <linux/bootmem.h>
+ #include <linux/bitops.h>
+ #include <linux/module.h>
+ #include <linux/kgdb.h>
+ #include <linux/topology.h>
#include <linux/delay.h>
#include <linux/smp.h>
- #include <linux/module.h>
#include <linux/percpu.h>
- #include <linux/bootmem.h>
- #include <asm/processor.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/io.h>
+ #include <asm/linkage.h>
#include <asm/mmu_context.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
#include <asm/asm.h>
+ #include <asm/numa.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
#include <mach_apic.h>
+ #include <asm/genapic.h>
#endif
+ #include <asm/pda.h>
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+ #include <asm/desc.h>
+ #include <asm/atomic.h>
+ #include <asm/proto.h>
+ #include <asm/sections.h>
+ #include <asm/setup.h>
+
#include "cpu.h"
+ static struct cpu_dev *this_cpu __cpuinitdata;
+
+ #ifdef CONFIG_X86_64
+ /* We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout
+ */
+ /* The TLS descriptors are currently at a different place compared to i386.
+ Hopefully nobody expects them at a fixed place (Wine?) */
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
+ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+ } };
+ #else
+ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
} };
+ #endif
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
- __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
-
+ #ifdef CONFIG_X86_32
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_serial_nr __cpuinitdata = 1;
- struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+ static int __init cachesize_setup(char *str)
+ {
+ get_option(&str, &cachesize_override);
+ return 1;
+ }
+ __setup("cachesize=", cachesize_setup);
+
+ static int __init x86_fxsr_setup(char *s)
+ {
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
+ setup_clear_cpu_cap(X86_FEATURE_XMM);
+ return 1;
+ }
+ __setup("nofxsr", x86_fxsr_setup);
+
+ static int __init x86_sep_setup(char *s)
+ {
+ setup_clear_cpu_cap(X86_FEATURE_SEP);
+ return 1;
+ }
+ __setup("nosep", x86_sep_setup);
+
+ /* Standard macro to see if a specific flag is changeable */
+ static inline int flag_is_changeable_p(u32 flag)
+ {
+ u32 f1, f2;
+
+ asm("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl %2,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl\n\t"
+ : "=&r" (f1), "=&r" (f2)
+ : "ir" (flag));
+
+ return ((f1^f2) & flag) != 0;
+ }
+
+ /* Probe for the CPUID instruction */
+ static int __cpuinit have_cpuid_p(void)
+ {
+ return flag_is_changeable_p(X86_EFLAGS_ID);
+ }
+
+ static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+ {
+ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
+ /* Disable processor serial number */
+ unsigned long lo, hi;
+ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+ lo |= 0x200000;
+ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+ printk(KERN_NOTICE "CPU serial number disabled.\n");
+ clear_cpu_cap(c, X86_FEATURE_PN);
+
+ /* Disabling the serial number may affect the cpuid level */
+ c->cpuid_level = cpuid_eax(0);
+ }
+ }
+
+ static int __init x86_serial_nr_setup(char *s)
+ {
+ disable_x86_serial_nr = 0;
+ return 1;
+ }
+ __setup("serialnumber", x86_serial_nr_setup);
+ #else
+ static inline int flag_is_changeable_p(u32 flag)
+ {
+ return 1;
+ }
+ /* Probe for the CPUID instruction */
+ static inline int have_cpuid_p(void)
+ {
+ return 1;
+ }
+ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+ {
+ }
+ #endif
+
+ /*
+ * Naming convention should be: <Name> [(<Codename>)]
+ * This table only is used unless init_<vendor>() below doesn't set it;
+ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
+ *
+ */
+
+ /* Look up CPU names by table lookup. */
+ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+ {
+ struct cpu_model_info *info;
+
+ if (c->x86_model >= 16)
+ return NULL; /* Range check */
+
+ if (!this_cpu)
+ return NULL;
+
+ info = this_cpu->c_models;
+
+ while (info && info->family) {
+ if (info->family == c->x86)
+ return info->model_names[c->x86_model];
+ info++;
+ }
+ return NULL; /* Not found */
+ }
+
+ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+
+ /* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+ void switch_to_new_gdt(void)
+ {
+ struct desc_ptr gdt_descr;
+
+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+ #ifdef CONFIG_X86_32
+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+ #endif
+ }
+
+ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
+ #ifdef CONFIG_X86_64
+ display_cacheinfo(c);
+ #else
/* Not much we can do here... */
/* Check if at least it has cpuid */
if (c->cpuid_level == -1) {
else if (c->x86 == 3)
strcpy(c->x86_model_id, "386");
}
+ #endif
}
static struct cpu_dev __cpuinitdata default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
+ .c_x86_vendor = X86_VENDOR_UNKNOWN,
};
- static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
- static int __init cachesize_setup(char *str)
- {
- get_option(&str, &cachesize_override);
- return 1;
- }
- __setup("cachesize=", cachesize_setup);
-
- int __cpuinit get_model_name(struct cpuinfo_x86 *c)
+ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
- if (cpuid_eax(0x80000000) < 0x80000004)
- return 0;
+ if (c->extended_cpuid_level < 0x80000004)
+ return;
v = (unsigned int *) c->x86_model_id;
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
while (q <= &c->x86_model_id[48])
*q++ = '\0'; /* Zero-pad the rest */
}
-
- return 1;
}
-
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
{
- unsigned int n, dummy, ecx, edx, l2size;
+ unsigned int n, dummy, ebx, ecx, edx, l2size;
- n = cpuid_eax(0x80000000);
+ n = c->extended_cpuid_level;
if (n >= 0x80000005) {
- cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
- c->x86_cache_size = (ecx>>24)+(edx>>24);
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+ c->x86_cache_size = (ecx>>24) + (edx>>24);
+ #ifdef CONFIG_X86_64
+ /* On K8 L1 TLB is inclusive, so don't count it */
+ c->x86_tlbsize = 0;
+ #endif
}
if (n < 0x80000006) /* Some chips just has a large L1. */
return;
- ecx = cpuid_ecx(0x80000006);
+ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
l2size = ecx >> 16;
+ #ifdef CONFIG_X86_64
+ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
+ #else
/* do processor-specific cache resizing */
if (this_cpu->c_size_cache)
l2size = this_cpu->c_size_cache(c, l2size);
if (l2size == 0)
return; /* Again, no L2 cache is possible */
+ #endif
c->x86_cache_size = l2size;
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- l2size, ecx & 0xFF);
+ l2size, ecx & 0xFF);
}
- /*
- * Naming convention should be: <Name> [(<Codename>)]
- * This table only is used unless init_<vendor>() below doesn't set it;
- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
- *
- */
-
- /* Look up CPU names by table lookup. */
- static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
- struct cpu_model_info *info;
+ #ifdef CONFIG_X86_HT
+ u32 eax, ebx, ecx, edx;
+ int index_msb, core_bits;
- if (c->x86_model >= 16)
- return NULL; /* Range check */
+ if (!cpu_has(c, X86_FEATURE_HT))
+ return;
- if (!this_cpu)
- return NULL;
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+ goto out;
- info = this_cpu->c_models;
+ if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
+ return;
- while (info && info->family) {
- if (info->family == c->x86)
- return info->model_names[c->x86_model];
- info++;
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
+
+ if (smp_num_siblings == 1) {
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ } else if (smp_num_siblings > 1) {
+
+ if (smp_num_siblings > NR_CPUS) {
+ printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
+ smp_num_siblings);
+ smp_num_siblings = 1;
+ return;
+ }
+
+ index_msb = get_count_order(smp_num_siblings);
+ #ifdef CONFIG_X86_64
+ c->phys_proc_id = phys_pkg_id(index_msb);
+ #else
+ c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
+ #endif
+
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+
+ index_msb = get_count_order(smp_num_siblings);
+
+ core_bits = get_count_order(c->x86_max_cores);
+
+ #ifdef CONFIG_X86_64
+ c->cpu_core_id = phys_pkg_id(index_msb) &
+ ((1 << core_bits) - 1);
+ #else
+ c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
+ ((1 << core_bits) - 1);
+ #endif
}
- return NULL; /* Not found */
- }
+ out:
+ if ((c->x86_max_cores * smp_num_siblings) > 1) {
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
+ }
+ #endif
+ }
- static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
+ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
int i;
static int printed;
for (i = 0; i < X86_VENDOR_NUM; i++) {
- if (cpu_devs[i]) {
- if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
- (cpu_devs[i]->c_ident[1] &&
- !strcmp(v, cpu_devs[i]->c_ident[1]))) {
- c->x86_vendor = i;
- if (!early)
- this_cpu = cpu_devs[i];
- return;
- }
+ if (!cpu_devs[i])
+ break;
+
+ if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+ (cpu_devs[i]->c_ident[1] &&
+ !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+ this_cpu = cpu_devs[i];
+ c->x86_vendor = this_cpu->c_x86_vendor;
+ return;
}
}
+
if (!printed) {
printed++;
- printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
+ printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
printk(KERN_ERR "CPU: Your system may be unstable.\n");
}
+
c->x86_vendor = X86_VENDOR_UNKNOWN;
this_cpu = &default_cpu;
}
-
- static int __init x86_fxsr_setup(char *s)
- {
- setup_clear_cpu_cap(X86_FEATURE_FXSR);
- setup_clear_cpu_cap(X86_FEATURE_XMM);
- return 1;
- }
- __setup("nofxsr", x86_fxsr_setup);
-
-
- static int __init x86_sep_setup(char *s)
- {
- setup_clear_cpu_cap(X86_FEATURE_SEP);
- return 1;
- }
- __setup("nosep", x86_sep_setup);
-
-
- /* Standard macro to see if a specific flag is changeable */
- static inline int flag_is_changeable_p(u32 flag)
- {
- u32 f1, f2;
-
- asm("pushfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "movl %0,%1\n\t"
- "xorl %2,%0\n\t"
- "pushl %0\n\t"
- "popfl\n\t"
- "pushfl\n\t"
- "popl %0\n\t"
- "popfl\n\t"
- : "=&r" (f1), "=&r" (f2)
- : "ir" (flag));
-
- return ((f1^f2) & flag) != 0;
- }
-
-
- /* Probe for the CPUID instruction */
- static int __cpuinit have_cpuid_p(void)
- {
- return flag_is_changeable_p(X86_EFLAGS_ID);
- }
-
- void __init cpu_detect(struct cpuinfo_x86 *c)
+ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
{
/* Get vendor name */
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
(unsigned int *)&c->x86_vendor_id[4]);
c->x86 = 4;
+ /* Intel-defined flags: level 0x00000001 */
if (c->cpuid_level >= 0x00000001) {
u32 junk, tfms, cap0, misc;
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
+ c->x86 = (tfms >> 8) & 0xf;
+ c->x86_model = (tfms >> 4) & 0xf;
+ c->x86_mask = tfms & 0xf;
if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- c->x86_mask = tfms & 15;
+ c->x86_model += ((tfms >> 16) & 0xf) << 4;
if (cap0 & (1<<19)) {
- c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+ c->x86_cache_alignment = c->x86_clflush_size;
}
}
}
- static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
+
+ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
- unsigned int ebx;
+ u32 ebx;
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
- if (have_cpuid_p()) {
- /* Intel-defined flags: level 0x00000001 */
- if (c->cpuid_level >= 0x00000001) {
- u32 capability, excap;
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
- }
+ /* Intel-defined flags: level 0x00000001 */
+ if (c->cpuid_level >= 0x00000001) {
+ u32 capability, excap;
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+ c->x86_capability[0] = capability;
+ c->x86_capability[4] = excap;
+ }
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
+ /* AMD-defined flags: level 0x80000001 */
+ xlvl = cpuid_eax(0x80000000);
+ c->extended_cpuid_level = xlvl;
+ if ((xlvl & 0xffff0000) == 0x80000000) {
+ if (xlvl >= 0x80000001) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
}
+ }
+ #ifdef CONFIG_X86_64
+ if (c->extended_cpuid_level >= 0x80000008) {
+ u32 eax = cpuid_eax(0x80000008);
+
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
}
+ #endif
+
+ if (c->extended_cpuid_level >= 0x80000007)
+ c->x86_power = cpuid_edx(0x80000007);
}
+ static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+ {
+ #ifdef CONFIG_X86_32
+ int i;
+
+ /*
+ * First of all, decide if this is a 486 or higher
+ * It's a 486 if we can modify the AC flag
+ */
+ if (flag_is_changeable_p(X86_EFLAGS_AC))
+ c->x86 = 4;
+ else
+ c->x86 = 3;
+
+ for (i = 0; i < X86_VENDOR_NUM; i++)
+ if (cpu_devs[i] && cpu_devs[i]->c_identify) {
+ c->x86_vendor_id[0] = 0;
+ cpu_devs[i]->c_identify(c);
+ if (c->x86_vendor_id[0]) {
+ get_cpu_vendor(c);
+ break;
+ }
+ }
+ #endif
+ }
+
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
* WARNING: this function is only called on the BP. Don't add code here
* that is supposed to run on all CPUs.
*/
- static void __init early_cpu_detect(void)
+ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- c->x86_cache_alignment = 32;
+ #ifdef CONFIG_X86_64
+ c->x86_clflush_size = 64;
+ #else
c->x86_clflush_size = 32;
+ #endif
+ c->x86_cache_alignment = c->x86_clflush_size;
+
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
+ c->extended_cpuid_level = 0;
+ if (!have_cpuid_p())
+ identify_cpu_without_cpuid(c);
+
+ /* cyrix could have cpuid enabled via c_identify()*/
if (!have_cpuid_p())
return;
cpu_detect(c);
- get_cpu_vendor(c, 1);
+ get_cpu_vendor(c);
- early_get_cap(c);
+ get_cpu_cap(c);
- if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
- cpu_devs[c->x86_vendor]->c_early_init)
- cpu_devs[c->x86_vendor]->c_early_init(c);
+ if (this_cpu->c_early_init)
+ this_cpu->c_early_init(c);
+
+ validate_pat_support(c);
+ }
+
+ void __init early_cpu_init(void)
+ {
+ struct cpu_dev **cdev;
+ int count = 0;
+
+ printk("KERNEL supported cpus:\n");
+ for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
+ struct cpu_dev *cpudev = *cdev;
+ unsigned int j;
+
+ if (count >= X86_VENDOR_NUM)
+ break;
+ cpu_devs[count] = cpudev;
+ count++;
+
+ for (j = 0; j < 2; j++) {
+ if (!cpudev->c_ident[j])
+ continue;
+ printk(" %s %s\n", cpudev->c_vendor,
+ cpudev->c_ident[j]);
+ }
+ }
+
+ early_identify_cpu(&boot_cpu_data);
}
/*
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
{
- u32 tfms, xlvl;
- unsigned int ebx;
-
- if (have_cpuid_p()) {
- /* Get vendor name */
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
- (unsigned int *)&c->x86_vendor_id[0],
- (unsigned int *)&c->x86_vendor_id[8],
- (unsigned int *)&c->x86_vendor_id[4]);
-
- get_cpu_vendor(c, 0);
- /* Initialize the standard set of capabilities */
- /* Note that the vendor-specific code below might override */
- /* Intel-defined flags: level 0x00000001 */
- if (c->cpuid_level >= 0x00000001) {
- u32 capability, excap;
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
- c->x86_capability[0] = capability;
- c->x86_capability[4] = excap;
- c->x86 = (tfms >> 8) & 15;
- c->x86_model = (tfms >> 4) & 15;
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff;
- if (c->x86 >= 0x6)
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
- c->x86_mask = tfms & 15;
- c->initial_apicid = (ebx >> 24) & 0xFF;
- #ifdef CONFIG_X86_HT
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
- c->phys_proc_id = c->initial_apicid;
- #else
- c->apicid = c->initial_apicid;
- #endif
- if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
- c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
+ c->extended_cpuid_level = 0;
- /* AMD-defined flags: level 0x80000001 */
- xlvl = cpuid_eax(0x80000000);
- if ((xlvl & 0xffff0000) == 0x80000000) {
- if (xlvl >= 0x80000001) {
- c->x86_capability[1] = cpuid_edx(0x80000001);
- c->x86_capability[6] = cpuid_ecx(0x80000001);
- }
- if (xlvl >= 0x80000004)
- get_model_name(c); /* Default name */
- }
+ if (!have_cpuid_p())
+ identify_cpu_without_cpuid(c);
- init_scattered_cpuid_features(c);
- detect_nopl(c);
- }
- }
+ /* cyrix could have cpuid enabled via c_identify()*/
+ if (!have_cpuid_p())
+ return;
- static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
- {
- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
- /* Disable processor serial number */
- unsigned long lo, hi;
- rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- lo |= 0x200000;
- wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
- printk(KERN_NOTICE "CPU serial number disabled.\n");
- clear_cpu_cap(c, X86_FEATURE_PN);
+ cpu_detect(c);
- /* Disabling the serial number may affect the cpuid level */
- c->cpuid_level = cpuid_eax(0);
- }
- }
+ get_cpu_vendor(c);
- static int __init x86_serial_nr_setup(char *s)
- {
- disable_x86_serial_nr = 0;
- return 1;
- }
- __setup("serialnumber", x86_serial_nr_setup);
+ get_cpu_cap(c);
+ if (c->cpuid_level >= 0x00000001) {
+ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
+ #ifdef CONFIG_X86_32
+ # ifdef CONFIG_X86_HT
+ c->apicid = phys_pkg_id(c->initial_apicid, 0);
+ # else
+ c->apicid = c->initial_apicid;
+ # endif
+ #endif
+ #ifdef CONFIG_X86_HT
+ c->phys_proc_id = c->initial_apicid;
+ #endif
+ }
+
+ get_model_name(c); /* Default name */
+
+ init_scattered_cpuid_features(c);
+ detect_nopl(c);
+ }
/*
* This does the hard work of actually picking apart the CPU stuff...
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = -1;
c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
+ c->x86_coreid_bits = 0;
+ #ifdef CONFIG_X86_64
+ c->x86_clflush_size = 64;
+ #else
+ c->cpuid_level = -1; /* CPUID not detected */
c->x86_clflush_size = 32;
+ #endif
+ c->x86_cache_alignment = c->x86_clflush_size;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
- if (!have_cpuid_p()) {
- /*
- * First of all, decide if this is a 486 or higher
- * It's a 486 if we can modify the AC flag
- */
- if (flag_is_changeable_p(X86_EFLAGS_AC))
- c->x86 = 4;
- else
- c->x86 = 3;
- }
-
generic_identify(c);
if (this_cpu->c_identify)
this_cpu->c_identify(c);
+ #ifdef CONFIG_X86_64
+ c->apicid = phys_pkg_id(0);
+ #endif
+
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
c->x86, c->x86_model);
}
+ #ifdef CONFIG_X86_64
+ detect_ht(c);
+ #endif
+
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
*/
if (c != &boot_cpu_data) {
/* AND the already accumulated flags with these */
- for (i = 0 ; i < NCAPINTS ; i++)
+ for (i = 0; i < NCAPINTS; i++)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
for (i = 0; i < NCAPINTS; i++)
c->x86_capability[i] &= ~cleared_cpu_caps[i];
+ #ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
+ #endif
select_idle_routine(c);
+
+ #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+ numa_add_cpu(smp_processor_id());
+ #endif
}
void __init identify_boot_cpu(void)
{
identify_cpu(&boot_cpu_data);
+ #ifdef CONFIG_X86_32
sysenter_setup();
enable_sep_cpu();
+ #endif
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
{
BUG_ON(c == &boot_cpu_data);
identify_cpu(c);
+ #ifdef CONFIG_X86_32
enable_sep_cpu();
+ #endif
mtrr_ap_init();
}
- #ifdef CONFIG_X86_HT
- void __cpuinit detect_ht(struct cpuinfo_x86 *c)
- {
- u32 eax, ebx, ecx, edx;
- int index_msb, core_bits;
-
- cpuid(1, &eax, &ebx, &ecx, &edx);
-
- if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
- return;
-
- smp_num_siblings = (ebx & 0xff0000) >> 16;
+ struct msr_range {
+ unsigned min;
+ unsigned max;
+ };
- if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1) {
+ static struct msr_range msr_range_array[] __cpuinitdata = {
+ { 0x00000000, 0x00000418},
+ { 0xc0000000, 0xc000040b},
+ { 0xc0010000, 0xc0010142},
+ { 0xc0011000, 0xc001103b},
+ };
- if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the "
- "siblings %d", smp_num_siblings);
- smp_num_siblings = 1;
- return;
+ static void __cpuinit print_cpu_msr(void)
+ {
+ unsigned index;
+ u64 val;
+ int i;
+ unsigned index_min, index_max;
+
+ for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+ index_min = msr_range_array[i].min;
+ index_max = msr_range_array[i].max;
+ for (index = index_min; index < index_max; index++) {
+ if (rdmsrl_amd_safe(index, &val))
+ continue;
+ printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
}
+ }
+ }
- index_msb = get_count_order(smp_num_siblings);
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
-
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
-
- index_msb = get_count_order(smp_num_siblings) ;
+ static int show_msr __cpuinitdata;
+ static __init int setup_show_msr(char *arg)
+ {
+ int num;
- core_bits = get_count_order(c->x86_max_cores);
+ get_option(&arg, &num);
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
- ((1 << core_bits) - 1);
-
- if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
- }
+ if (num > 0)
+ show_msr = num;
+ return 1;
}
- #endif
+ __setup("show_msr=", setup_show_msr);
static __init int setup_noclflush(char *arg)
{
vendor = c->x86_vendor_id;
if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
- printk("%s ", vendor);
+ printk(KERN_CONT "%s ", vendor);
- if (!c->x86_model_id[0])
- printk("%d86", c->x86);
+ if (c->x86_model_id[0])
+ printk(KERN_CONT "%s", c->x86_model_id);
else
- printk("%s", c->x86_model_id);
+ printk(KERN_CONT "%d86", c->x86);
if (c->x86_mask || c->cpuid_level >= 0)
- printk(" stepping %02x\n", c->x86_mask);
+ printk(KERN_CONT " stepping %02x\n", c->x86_mask);
else
- printk("\n");
+ printk(KERN_CONT "\n");
+
+ #ifdef CONFIG_SMP
+ if (c->cpu_index < show_msr)
+ print_cpu_msr();
+ #else
+ if (show_msr)
+ print_cpu_msr();
+ #endif
}
static __init int setup_disablecpuid(char *arg)
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
- void __init early_cpu_init(void)
+ #ifdef CONFIG_X86_64
+ struct x8664_pda **_cpu_pda __read_mostly;
+ EXPORT_SYMBOL(_cpu_pda);
+
+ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+
+ char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+
+ void __cpuinit pda_init(int cpu)
+ {
+ struct x8664_pda *pda = cpu_pda(cpu);
+
+ /* Setup up data that may be needed in __get_free_pages early */
+ loadsegment(fs, 0);
+ loadsegment(gs, 0);
+ /* Memory clobbers used to order PDA accessed */
+ mb();
+ wrmsrl(MSR_GS_BASE, pda);
+ mb();
+
+ pda->cpunumber = cpu;
+ pda->irqcount = -1;
+ pda->kernelstack = (unsigned long)stack_thread_info() -
+ PDA_STACKOFFSET + THREAD_SIZE;
+ pda->active_mm = &init_mm;
+ pda->mmu_state = 0;
+
+ if (cpu == 0) {
+ /* others are initialized in smpboot.c */
+ pda->pcurrent = &init_task;
+ pda->irqstackptr = boot_cpu_stack;
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ } else {
+ if (!pda->irqstackptr) {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d",
+ cpu);
+ pda->irqstackptr += IRQSTACKSIZE - 64;
+ }
+
+ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
+ pda->nodenumber = cpu_to_node(cpu);
+ }
+ }
+
+ char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+ DEBUG_STKSZ] __page_aligned_bss;
+
+ extern asmlinkage void ignore_sysret(void);
+
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
{
- struct cpu_vendor_dev *cvdev;
+ /*
+ * LSTAR and STAR live in a bit strange symbiosis.
+ * They both write to the same internal register. STAR allows to
+ * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
+ */
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsrl(MSR_LSTAR, system_call);
+ wrmsrl(MSR_CSTAR, ignore_sysret);
- for (cvdev = __x86cpuvendor_start ;
- cvdev < __x86cpuvendor_end ;
- cvdev++)
- cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
+ #ifdef CONFIG_IA32_EMULATION
+ syscall32_cpu_init();
+ #endif
- early_cpu_detect();
- validate_pat_support(&boot_cpu_data);
+ /* Flags to clear on syscall */
+ wrmsrl(MSR_SYSCALL_MASK,
+ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
}
+ unsigned long kernel_eflags;
+
+ /*
+ * Copies of the original ist values from the tss are only accessed during
+ * debugging, no special alignment required.
+ */
+ DEFINE_PER_CPU(struct orig_ist, orig_ist);
+
+ #else
+
/* Make sure %fs is initialized properly in idle threads */
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
regs->fs = __KERNEL_PERCPU;
return regs;
}
-
- /* Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one. */
- void switch_to_new_gdt(void)
- {
- struct desc_ptr gdt_descr;
-
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
- gdt_descr.size = GDT_SIZE - 1;
- load_gdt(&gdt_descr);
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
- }
+ #endif
/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
+ * A lot of state is already set up in PDA init for 64 bit
*/
+ #ifdef CONFIG_X86_64
+ void __cpuinit cpu_init(void)
+ {
+ int cpu = stack_smp_processor_id();
+ struct tss_struct *t = &per_cpu(init_tss, cpu);
+ struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
+ unsigned long v;
+ char *estacks = NULL;
+ struct task_struct *me;
+ int i;
+
+ /* CPU 0 is initialised in head64.c */
+ if (cpu != 0)
+ pda_init(cpu);
+ else
+ estacks = boot_exception_stacks;
+
+ me = current;
+
+ if (cpu_test_and_set(cpu, cpu_initialized))
+ panic("CPU#%d already initialized!\n", cpu);
+
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+
+ /*
+ * Initialize the per-CPU GDT with the boot GDT,
+ * and set up the GDT descriptor:
+ */
+
+ switch_to_new_gdt();
+ load_idt((const struct desc_ptr *)&idt_descr);
+
+ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+ syscall_init();
+
+ wrmsrl(MSR_FS_BASE, 0);
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
+ barrier();
+
+ check_efer();
+ if (cpu != 0 && x2apic)
+ enable_x2apic();
+
+ /*
+ * set up and load the per-CPU TSS
+ */
+ if (!orig_ist->ist[0]) {
+ static const unsigned int order[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+ };
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+ if (cpu) {
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+ if (!estacks)
+ panic("Cannot allocate exception "
+ "stack %ld %d\n", v, cpu);
+ }
+ estacks += PAGE_SIZE << order[v];
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
+ (unsigned long)estacks;
+ }
+ }
+
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ /*
+ * <= is required because the CPU will access up to
+ * 8 bits beyond the end of the IO permission bitmap.
+ */
+ for (i = 0; i <= IO_BITMAP_LONGS; i++)
+ t->io_bitmap[i] = ~0UL;
+
+ atomic_inc(&init_mm.mm_count);
+ me->active_mm = &init_mm;
+ if (me->mm)
+ BUG();
+ enter_lazy_tlb(&init_mm, me);
+
+ load_sp0(t, ¤t->thread);
+ set_tss_desc(cpu, t);
+ load_TR_desc();
+ load_LDT(&init_mm.context);
+
+ #ifdef CONFIG_KGDB
+ /*
+ * If the kgdb is connected no debug regs should be altered. This
+ * is only applicable when KGDB and a KGDB I/O module are built
+ * into the kernel and you are using early debugging with
+ * kgdbwait. KGDB will control the kernel HW breakpoint registers.
+ */
+ if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
+ arch_kgdb_ops.correct_hw_break();
+ else {
+ #endif
+ /*
+ * Clear all 6 debug registers:
+ */
+
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+ set_debugreg(0UL, 6);
+ set_debugreg(0UL, 7);
+ #ifdef CONFIG_KGDB
+ /* If the kgdb is connected no debug regs should be altered. */
+ }
+ #endif
+
+ fpu_init();
+
+ raw_local_save_flags(kernel_eflags);
+
+ if (is_uv_system())
+ uv_cpu_init();
+ }
+
+ #else
+
void __cpuinit cpu_init(void)
{
int cpu = smp_processor_id();
/*
* Force FPU initialization:
*/
- current_thread_info()->status = 0;
+ if (cpu_has_xsave)
+ current_thread_info()->status = TS_XSAVE;
+ else
+ current_thread_info()->status = 0;
clear_used_math();
mxcsr_feature_mask_init();
+
+ /*
+ * Boot processor to setup the FP and extended state context info.
+ */
+ if (!smp_processor_id())
+ init_thread_xstate();
+
+ xsave_init();
}
-#ifdef CONFIG_HOTPLUG_CPU
-void __cpuinit cpu_uninit(void)
-{
- int cpu = raw_smp_processor_id();
- cpu_clear(cpu, cpu_initialized);
-
- /* lazy TLB state */
- per_cpu(cpu_tlbstate, cpu).state = 0;
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-#endif
+
+ #endif
#include <asm/ldt.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
+ #include <asm/syscalls.h>
#ifdef CONFIG_SMP
static void flush_ldt(void *current_mm)
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE);
+ paravirt_alloc_ldt(newldt, mincount);
+
#ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */
wmb();
#endif
}
if (oldsize) {
+ paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
+ int i;
if (err < 0)
return err;
- memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
+
+ for(i = 0; i < old->size; i++)
+ write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
}
if (mm == current->active_mm)
clear_LDT();
#endif
+ paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
return __get_cpu_var(paravirt_lazy_mode);
}
-void __init paravirt_use_bytelocks(void)
-{
-#ifdef CONFIG_SMP
- pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
- pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
- pv_lock_ops.spin_lock = __byte_spin_lock;
- pv_lock_ops.spin_trylock = __byte_spin_trylock;
- pv_lock_ops.spin_unlock = __byte_spin_unlock;
-#endif
-}
-
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
+ .read_msr_amd = native_read_msr_amd_safe,
.write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.write_ldt_entry = native_write_ldt_entry,
.write_gdt_entry = native_write_gdt_entry,
.write_idt_entry = native_write_idt_entry,
+
+ .alloc_ldt = paravirt_nop,
+ .free_ldt = paravirt_nop,
+
.load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
struct pv_apic_ops pv_apic_ops = {
#ifdef CONFIG_X86_LOCAL_APIC
- .apic_write = native_apic_write,
- .apic_read = native_apic_read,
.setup_boot_clock = setup_boot_APIC_clock,
.setup_secondary_clock = setup_secondary_APIC_clock,
.startup_ipi_hook = paravirt_nop,
.set_fixmap = native_set_fixmap,
};
-struct pv_lock_ops pv_lock_ops = {
-#ifdef CONFIG_SMP
- .spin_is_locked = __ticket_spin_is_locked,
- .spin_is_contended = __ticket_spin_is_contended,
-
- .spin_lock = __ticket_spin_lock,
- .spin_trylock = __ticket_spin_trylock,
- .spin_unlock = __ticket_spin_unlock,
-#endif
-};
-EXPORT_SYMBOL(pv_lock_ops);
-
EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops);
#include <linux/tick.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
+ #include <linux/dmi.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/cpu.h>
#include <asm/kdebug.h>
+ #include <asm/idle.h>
+ #include <asm/syscalls.h>
+ #include <asm/smp.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
return ((unsigned long *)tsk->thread.sp)[3];
}
-#ifdef CONFIG_HOTPLUG_CPU
-#include <asm/nmi.h>
-
-static void cpu_exit_clear(void)
-{
- int cpu = raw_smp_processor_id();
-
- idle_task_exit();
-
- cpu_uninit();
- irq_ctx_exit(cpu);
-
- cpu_clear(cpu, cpu_callout_map);
- cpu_clear(cpu, cpu_callin_map);
-
- numa_remove_cpu(cpu);
- c1e_remove_cpu(cpu);
-}
-
-/* We don't actually take CPU down, just spin without interrupts. */
-static inline void play_dead(void)
-{
- /* This must be done before dead CPU ack */
- cpu_exit_clear();
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- /*
- * With physical CPU hotplug, we should halt the cpu
- */
- local_irq_disable();
- /* mask all interrupts, flush any and all caches, and halt */
- wbinvd_halt();
-}
-#else
+#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif
/*
* The idle thread. There's no useful work to be
unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp;
unsigned short ss, gs;
+ const char *board;
if (user_mode_vm(regs)) {
sp = regs->sp;
}
printk("\n");
- printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+
+ board = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!board)
+ board = "";
+ printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
task_pid_nr(current), current->comm,
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
+ init_utsname()->version, board);
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
+ #ifdef CONFIG_X86_DS
+ /* Free any DS contexts that have not been properly released. */
+ if (unlikely(current->thread.ds_ctx)) {
+ /* we clear debugctl to make sure DS is not used. */
+ update_debugctlmsr(0);
+ ds_free(current->thread.ds_ctx);
+ }
+ #endif /* CONFIG_X86_DS */
}
void flush_thread(void)
return 0;
}
+ #ifdef CONFIG_X86_DS
+ static int update_debugctl(struct thread_struct *prev,
+ struct thread_struct *next, unsigned long debugctl)
+ {
+ unsigned long ds_prev = 0;
+ unsigned long ds_next = 0;
+
+ if (prev->ds_ctx)
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
+ if (next->ds_ctx)
+ ds_next = (unsigned long)next->ds_ctx->ds;
+
+ if (ds_next != ds_prev) {
+ /* we clear debugctl to make sure DS
+ * is not in use when we change it */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
+ }
+ return debugctl;
+ }
+ #else
+ static int update_debugctl(struct thread_struct *prev,
+ struct thread_struct *next, unsigned long debugctl)
+ {
+ return debugctl;
+ }
+ #endif /* CONFIG_X86_DS */
+
static noinline void
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
prev = &prev_p->thread;
next = &next_p->thread;
- debugctl = prev->debugctlmsr;
- if (next->ds_area_msr != prev->ds_area_msr) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
- }
+ debugctl = update_debugctl(prev, next, prev->debugctlmsr);
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
hard_enable_TSC();
}
- #ifdef X86_BTS
+ #ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
- #endif
+ #endif /* CONFIG_X86_PTRACE_BTS */
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
#include <linux/kdebug.h>
#include <linux/tick.h>
#include <linux/prctl.h>
+ #include <linux/uaccess.h>
+ #include <linux/io.h>
- #include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
- #include <asm/io.h>
#include <asm/processor.h>
#include <asm/i387.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/ia32.h>
#include <asm/idle.h>
+ #include <asm/syscalls.h>
asmlinkage extern void ret_from_fork(void);
__exit_idle();
}
-#ifdef CONFIG_HOTPLUG_CPU
-DECLARE_PER_CPU(int, cpu_state);
-
-#include <linux/nmi.h>
-/* We halt the CPU with physical CPU hotplug */
-static inline void play_dead(void)
-{
- idle_task_exit();
- c1e_remove_cpu(raw_smp_processor_id());
-
- mb();
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- local_irq_disable();
- /* mask all interrupts, flush any and all caches, and halt */
- wbinvd_halt();
-}
-#else
+#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
-#endif /* CONFIG_HOTPLUG_CPU */
+#endif
/*
* The idle thread. There's no useful work to be
}
/* Prints also some state that isn't saved in the pt_regs */
- void __show_regs(struct pt_regs * regs)
+ void __show_regs(struct pt_regs *regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
printk("\n");
print_modules();
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
- regs->flags);
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
+ regs->sp, regs->flags);
+ printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+ printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
regs->dx, regs->si, regs->di);
- printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+ printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
regs->bp, regs->r8, regs->r9);
- printk("R10: %016lx R11: %016lx R12: %016lx\n",
- regs->r10, regs->r11, regs->r12);
- printk("R13: %016lx R14: %016lx R15: %016lx\n",
- regs->r13, regs->r14, regs->r15);
-
- asm("movl %%ds,%0" : "=r" (ds));
- asm("movl %%cs,%0" : "=r" (cs));
- asm("movl %%es,%0" : "=r" (es));
+ printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+ regs->r10, regs->r11, regs->r12);
+ printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+ regs->r13, regs->r14, regs->r15);
+
+ asm("movl %%ds,%0" : "=r" (ds));
+ asm("movl %%cs,%0" : "=r" (cs));
+ asm("movl %%es,%0" : "=r" (es));
asm("movl %%fs,%0" : "=r" (fsindex));
asm("movl %%gs,%0" : "=r" (gsindex));
rdmsrl(MSR_FS_BASE, fs);
- rdmsrl(MSR_GS_BASE, gs);
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ rdmsrl(MSR_GS_BASE, gs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4();
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs,fsindex,gs,gsindex,shadowgs);
- printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
- printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+ printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ fs, fsindex, gs, gsindex, shadowgs);
+ printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+ es, cr0);
+ printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+ cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+ printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
void show_regs(struct pt_regs *regs)
{
- printk("CPU %d:", smp_processor_id());
+ printk(KERN_INFO "CPU %d:", smp_processor_id());
__show_regs(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
t->io_bitmap_max = 0;
put_cpu();
}
+ #ifdef CONFIG_X86_DS
+ /* Free any DS contexts that have not been properly released. */
+ if (unlikely(t->ds_ctx)) {
+ /* we clear debugctl to make sure DS is not used. */
+ update_debugctlmsr(0);
+ ds_free(t->ds_ctx);
+ }
+ #endif /* CONFIG_X86_DS */
}
void flush_thread(void)
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
unsigned long unused,
- struct task_struct * p, struct pt_regs * regs)
+ struct task_struct *p, struct pt_regs *regs)
{
int err;
- struct pt_regs * childregs;
+ struct pt_regs *childregs;
struct task_struct *me = current;
childregs = ((struct pt_regs *)
if (test_thread_flag(TIF_IA32))
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
- else
- #endif
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
- if (err)
+ else
+ #endif
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+ if (err)
goto out;
}
err = 0;
next = &next_p->thread;
debugctl = prev->debugctlmsr;
- if (next->ds_area_msr != prev->ds_area_msr) {
- /* we clear debugctl to make sure DS
- * is not in use when we change it */
- debugctl = 0;
- update_debugctlmsr(0);
- wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+
+ #ifdef CONFIG_X86_DS
+ {
+ unsigned long ds_prev = 0, ds_next = 0;
+
+ if (prev->ds_ctx)
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
+ if (next->ds_ctx)
+ ds_next = (unsigned long)next->ds_ctx->ds;
+
+ if (ds_next != ds_prev) {
+ /*
+ * We clear debugctl to make sure DS
+ * is not in use when we change it:
+ */
+ debugctl = 0;
+ update_debugctlmsr(0);
+ wrmsrl(MSR_IA32_DS_AREA, ds_next);
+ }
}
+ #endif /* CONFIG_X86_DS */
if (next->debugctlmsr != debugctl)
update_debugctlmsr(next->debugctlmsr);
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
- #ifdef X86_BTS
+ #ifdef CONFIG_X86_PTRACE_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
- #endif
+ #endif /* CONFIG_X86_PTRACE_BTS */
}
/*
unsigned fsindex, gsindex;
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter>5)
+ if (next_p->fpu_counter > 5)
prefetch(next->xstate);
/*
*/
load_sp0(tss, next);
- /*
+ /*
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
savesegment(es, prev->es);
if (unlikely(next->es | prev->es))
- loadsegment(es, next->es);
+ loadsegment(es, next->es);
savesegment(ds, prev->ds);
if (unlikely(next->ds | prev->ds))
*/
arch_leave_lazy_cpu_mode();
- /*
+ /*
* Switch FS and GS.
*
* Segment register != 0 always requires a reload. Also
*/
if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex);
- /*
+ /*
* Check if the user used a selector != 0; if yes
* clear 64bit base, since overloaded base is always
* mapped to the Null selector
*/
if (fsindex)
- prev->fs = 0;
+ prev->fs = 0;
}
/* when next process has a 64bit base use it */
if (next->fs)
if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex);
if (gsindex)
- prev->gs = 0;
+ prev->gs = 0;
}
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
/* Must be after DS reload */
unlazy_fpu(prev_p);
- /*
+ /*
* Switch the PDA and FPU contexts.
*/
prev->usersp = read_pda(oldrsp);
write_pda(oldrsp, next->usersp);
- write_pda(pcurrent, next_p);
+ write_pda(pcurrent, next_p);
write_pda(kernelstack,
(unsigned long)task_stack_page(next_p) +
char __user * __user *envp, struct pt_regs *regs)
{
long error;
- char * filename;
+ char *filename;
filename = getname(name);
error = PTR_ERR(filename);
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
- u64 fp,ip;
+ u64 fp, ip;
int count = 0;
- if (!p || p == current || p->state==TASK_RUNNING)
- return 0;
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
stack = (unsigned long)task_stack_page(p);
if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
return 0;
fp = *(u64 *)(p->thread.sp);
- do {
+ do {
if (fp < (unsigned long)stack ||
fp > (unsigned long)stack+THREAD_SIZE)
- return 0;
+ return 0;
ip = *(u64 *)(fp+8);
if (!in_sched_functions(ip))
return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
+ fp = *(u64 *)fp;
+ } while (count++ < 16);
return 0;
}
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
- {
- int ret = 0;
+ {
+ int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (code) {
case ARCH_SET_GS:
if (addr >= TASK_SIZE_OF(task))
- return -EPERM;
+ return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
+ /* handle small bases via the GDT because that's faster to
switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
+ if (addr <= 0xffffffff) {
+ set_32bit_tls(task, GS_TLS, addr);
+ if (doit) {
load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
+ load_gs_index(GS_TLS_SEL);
}
- task->thread.gsindex = GS_TLS_SEL;
+ task->thread.gsindex = GS_TLS_SEL;
task->thread.gs = 0;
- } else {
+ } else {
task->thread.gsindex = 0;
task->thread.gs = addr;
if (doit) {
load_gs_index(0);
ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
- }
+ }
}
put_cpu();
break;
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gs;
- }
- else
+ } else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
break;
#include <asm/desc.h>
#include <asm/nmi.h>
#include <asm/irq.h>
+#include <asm/idle.h>
#include <asm/smp.h>
#include <asm/trampoline.h>
#include <asm/cpu.h>
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
#else
- struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+ static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
#endif
static atomic_t init_deasserted;
- static int boot_cpu_logical_apicid;
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
/* Set if we find a B stepping CPU */
- int __cpuinitdata smp_b_stepping;
+ static int __cpuinitdata smp_b_stepping;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
#endif
#ifdef CONFIG_X86_32
+ static int boot_cpu_logical_apicid;
+
u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = BAD_APICID };
/*
* (This works even if the APIC is not enabled.)
*/
- phys_id = GET_APIC_ID(read_apic_id());
+ phys_id = read_apic_id();
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
end_local_APIC_setup();
map_cpu_to_logical_apicid();
+ notify_cpu_starting(cpuid);
/*
* Get our bogomips.
*
printk(KERN_CONT
"a previous APIC delivery may have failed\n");
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
- apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
+ apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
timeout = 0;
do {
int maxlvt;
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+ apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
/*
* Turn INIT on target chip
*/
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/*
* Send IPI
*/
- apic_write(APIC_ICR,
- APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+ phys_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
pr_debug("Deasserting INIT.\n");
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Send IPI */
- apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
*/
/* Target chip */
- apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
/* Boot on the stack */
/* Kick the second */
- apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
+ apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+ phys_apicid);
/*
* Give the other CPU some time to accept the IPI.
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
+ #ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id();
+ #endif
current_thread_info()->cpu = 0; /* needed? */
set_cpu_sibling_map(0);
+ #ifdef CONFIG_X86_64
+ enable_IR_x2apic();
+ setup_apic_routing();
+ #endif
+
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
disable_smp();
}
preempt_disable();
- if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+ if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
- GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+ read_apic_id(), boot_cpu_physical_apicid);
/* Or can we switch back to PIC here? */
}
preempt_enable();
if (!num_processors)
num_processors = 1;
- #ifdef CONFIG_HOTPLUG_CPU
if (additional_cpus == -1) {
if (disabled_cpus > 0)
additional_cpus = disabled_cpus;
else
additional_cpus = 0;
}
- #else
- additional_cpus = 0;
- #endif
+
possible = num_processors + additional_cpus;
if (possible > NR_CPUS)
possible = NR_CPUS;
numa_remove_cpu(cpu);
}
-int __cpu_disable(void)
+void cpu_disable_common(void)
{
int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- stop_apic_nmi_watchdog(NULL);
- clear_local_APIC();
-
/*
* HACK:
* Allow any queued timer interrupts to get serviced
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
+}
+
+int native_cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Perhaps use cpufreq to drop frequency, but that could go
+ * into generic code.
+ *
+ * We won't take down the boot processor on i386 due to some
+ * interrupts only being able to be serviced by the BSP.
+ * Especially so if we're not using an IOAPIC -zwane
+ */
+ if (cpu == 0)
+ return -EBUSY;
+
+ if (nmi_watchdog == NMI_LOCAL_APIC)
+ stop_apic_nmi_watchdog(NULL);
+ clear_local_APIC();
+
+ cpu_disable_common();
return 0;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
unsigned int i;
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
+
+void play_dead_common(void)
+{
+ idle_task_exit();
+ reset_lazy_tlbstate();
+ irq_ctx_exit(raw_smp_processor_id());
+ c1e_remove_cpu(raw_smp_processor_id());
+
+ mb();
+ /* Ack it */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
+
+ /*
+ * With physical CPU hotplug, we should halt the cpu
+ */
+ local_irq_disable();
+}
+
+void native_play_dead(void)
+{
+ play_dead_common();
+ wbinvd_halt();
+}
+
#else /* ... !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
+int native_cpu_disable(void)
{
return -ENOSYS;
}
-void __cpu_die(unsigned int cpu)
+void native_cpu_die(unsigned int cpu)
{
/* We said "no" in __cpu_disable */
BUG();
}
+
+void native_play_dead(void)
+{
+ BUG();
+}
+
#endif
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm-generic/sections.h>
+ #include <asm/traps.h>
/*
* Page fault error code bits
return 0;
}
- void do_invalid_op(struct pt_regs *, unsigned long);
-
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_F00F_BUG
void vmalloc_sync_all(void)
{
-#ifdef CONFIG_X86_32
- unsigned long start = VMALLOC_START & PGDIR_MASK;
unsigned long address;
+#ifdef CONFIG_X86_32
if (SHARED_KERNEL_PMD)
return;
- BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
- for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
+ for (address = VMALLOC_START & PMD_MASK;
+ address >= TASK_SIZE && address < FIXADDR_TOP;
+ address += PMD_SIZE) {
unsigned long flags;
struct page *page;
spin_unlock_irqrestore(&pgd_lock, flags);
}
#else /* CONFIG_X86_64 */
- unsigned long start = VMALLOC_START & PGDIR_MASK;
- unsigned long address;
-
- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+ address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
unsigned long flags;
struct page *page;
#include <xen/interface/xen.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
-#include <xen/interface/sched.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvc-console.h>
#include <asm/paravirt.h>
+ #include <asm/apic.h>
#include <asm/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+enum xen_domain_type xen_domain_type = XEN_NATIVE;
+EXPORT_SYMBOL_GPL(xen_domain_type);
+
/*
* Identity map, in addition to plain kernel map. This needs to be
* large enough to allocate page table pages to allocate the rest.
*
* 0: not available, 1: available
*/
-static int have_vcpu_info_placement = 1;
+static int have_vcpu_info_placement =
+#ifdef CONFIG_X86_32
+ 1
+#else
+ 0
+#endif
+ ;
+
static void xen_vcpu_setup(int cpu)
{
return HYPERVISOR_get_debugreg(reg);
}
-static unsigned long xen_save_fl(void)
+static void xen_leave_lazy(void)
{
- struct vcpu_info *vcpu;
- unsigned long flags;
-
- vcpu = x86_read_percpu(xen_vcpu);
-
- /* flag has opposite sense of mask */
- flags = !vcpu->evtchn_upcall_mask;
-
- /* convert to IF type flag
- -0 -> 0x00000000
- -1 -> 0xffffffff
- */
- return (-flags) & X86_EFLAGS_IF;
+ paravirt_leave_lazy(paravirt_get_lazy_mode());
+ xen_mc_flush();
}
-static void xen_restore_fl(unsigned long flags)
+static unsigned long xen_store_tr(void)
{
- struct vcpu_info *vcpu;
-
- /* convert from IF type flag */
- flags = !(flags & X86_EFLAGS_IF);
-
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = flags;
- preempt_enable_no_resched();
-
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
-
- if (flags == 0) {
- preempt_check_resched();
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- force_evtchn_callback();
- }
+ return 0;
}
-static void xen_irq_disable(void)
+/*
+ * Set the page permissions for a particular virtual address. If the
+ * address is a vmalloc mapping (or other non-linear mapping), then
+ * find the linear mapping of the page and also set its protections to
+ * match.
+ */
+static void set_aliased_prot(void *v, pgprot_t prot)
{
- /* There's a one instruction preempt window here. We need to
- make sure we're don't switch CPUs between getting the vcpu
- pointer and updating the mask. */
- preempt_disable();
- x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
- preempt_enable_no_resched();
-}
+ int level;
+ pte_t *ptep;
+ pte_t pte;
+ unsigned long pfn;
+ struct page *page;
-static void xen_irq_enable(void)
-{
- struct vcpu_info *vcpu;
+ ptep = lookup_address((unsigned long)v, &level);
+ BUG_ON(ptep == NULL);
- /* We don't need to worry about being preempted here, since
- either a) interrupts are disabled, so no preemption, or b)
- the caller is confused and is trying to re-enable interrupts
- on an indeterminate processor. */
+ pfn = pte_pfn(*ptep);
+ page = pfn_to_page(pfn);
- vcpu = x86_read_percpu(xen_vcpu);
- vcpu->evtchn_upcall_mask = 0;
+ pte = pfn_pte(pfn, prot);
- /* Doesn't matter if we get preempted here, because any
- pending event will get dealt with anyway. */
+ if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
+ BUG();
- barrier(); /* unmask then check (avoid races) */
- if (unlikely(vcpu->evtchn_upcall_pending))
- force_evtchn_callback();
-}
+ if (!PageHighMem(page)) {
+ void *av = __va(PFN_PHYS(pfn));
-static void xen_safe_halt(void)
-{
- /* Blocking includes an implicit local_irq_enable(). */
- if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
- BUG();
+ if (av != v)
+ if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
+ BUG();
+ } else
+ kmap_flush_unused();
}
-static void xen_halt(void)
+static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
- if (irqs_disabled())
- HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
- else
- xen_safe_halt();
-}
+ const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
+ int i;
-static void xen_leave_lazy(void)
-{
- paravirt_leave_lazy(paravirt_get_lazy_mode());
- xen_mc_flush();
+ for(i = 0; i < entries; i += entries_per_page)
+ set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
}
-static unsigned long xen_store_tr(void)
+static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
{
- return 0;
+ const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
+ int i;
+
+ for(i = 0; i < entries; i += entries_per_page)
+ set_aliased_prot(ldt + i, PAGE_KERNEL);
}
static void xen_set_ldt(const void *addr, unsigned entries)
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr)
{
- unsigned long lp = (unsigned long)&dt[entrynum];
- xmaddr_t mach_lp = virt_to_machine(lp);
+ xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
u64 entry = *(u64 *)ptr;
preempt_disable();
}
static void xen_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+ struct thread_struct *thread)
{
struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
}
#ifdef CONFIG_X86_LOCAL_APIC
- static u32 xen_apic_read(unsigned long reg)
+ static u32 xen_apic_read(u32 reg)
{
return 0;
}
- static void xen_apic_write(unsigned long reg, u32 val)
+ static void xen_apic_write(u32 reg, u32 val)
{
/* Warn to see if there's any stray references */
WARN_ON(1);
}
+
+ static u64 xen_apic_icr_read(void)
+ {
+ return 0;
+ }
+
+ static void xen_apic_icr_write(u32 low, u32 id)
+ {
+ /* Warn to see if there's any stray references */
+ WARN_ON(1);
+ }
+
+ static void xen_apic_wait_icr_idle(void)
+ {
+ return;
+ }
+
+ static u32 xen_safe_apic_wait_icr_idle(void)
+ {
+ return 0;
+ }
+
+ static struct apic_ops xen_basic_apic_ops = {
+ .read = xen_apic_read,
+ .write = xen_apic_write,
+ .icr_read = xen_apic_icr_read,
+ .icr_write = xen_apic_icr_write,
+ .wait_icr_idle = xen_apic_wait_icr_idle,
+ .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+ };
+
#endif
static void xen_flush_tlb(void)
ret = -EFAULT;
break;
#endif
+
+ case MSR_STAR:
+ case MSR_CSTAR:
+ case MSR_LSTAR:
+ case MSR_SYSCALL_MASK:
+ case MSR_IA32_SYSENTER_CS:
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ /* Fast syscall setup is all done in hypercalls, so
+ these are all ignored. Stub them out here to stop
+ Xen console noise. */
+ break;
+
default:
ret = native_write_msr_safe(msr, low, high);
}
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
- static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
+ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
{
#ifdef CONFIG_FLATMEM
BUG_ON(mem_map); /* should only be used early */
/* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */
- static void xen_release_pte_init(u32 pfn)
+ static void xen_release_pte_init(unsigned long pfn)
{
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
/* This needs to make sure the new pte page is pinned iff its being
attached to a pinned pagetable. */
- static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
+ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
SetPagePinned(page);
if (!PageHighMem(page)) {
- make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- if (level == PT_PTE)
+ make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
+ if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
} else
/* make sure there are no stray mappings of
}
}
- static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
+ static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PTE);
}
- static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
+ static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PMD);
}
}
/* This should never happen until we're OK to use struct page */
- static void xen_release_ptpage(u32 pfn, unsigned level)
+ static void xen_release_ptpage(unsigned long pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
if (PagePinned(page)) {
if (!PageHighMem(page)) {
- if (level == PT_PTE)
+ if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
}
}
- static void xen_release_pte(u32 pfn)
+ static void xen_release_pte(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PTE);
}
- static void xen_release_pmd(u32 pfn)
+ static void xen_release_pmd(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PMD);
}
#if PAGETABLE_LEVELS == 4
- static void xen_alloc_pud(struct mm_struct *mm, u32 pfn)
+ static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PUD);
}
- static void xen_release_pud(u32 pfn)
+ static void xen_release_pud(unsigned long pfn)
{
xen_release_ptpage(pfn, PT_PUD);
}
}
#endif
+#ifdef CONFIG_X86_32
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{
/* If there's an existing pte, then don't allow _PAGE_RW to be set */
xen_set_pte(ptep, pte);
}
+#endif
static __init void xen_pagetable_setup_start(pgd_t *base)
{
/* xen_vcpu_setup managed to place the vcpu_info within the
percpu area for all cpus, so make use of it */
-#ifdef CONFIG_X86_32
if (have_vcpu_info_placement) {
printk(KERN_INFO "Xen: using vcpu_info placement\n");
pv_irq_ops.irq_enable = xen_irq_enable_direct;
pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
}
-#endif
}
static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
goto patch_site
switch (type) {
-#ifdef CONFIG_X86_32
SITE(pv_irq_ops, irq_enable);
SITE(pv_irq_ops, irq_disable);
SITE(pv_irq_ops, save_fl);
SITE(pv_irq_ops, restore_fl);
-#endif /* CONFIG_X86_32 */
#undef SITE
patch_site:
.load_gs_index = xen_load_gs_index,
#endif
+ .alloc_ldt = xen_alloc_ldt,
+ .free_ldt = xen_free_ldt,
+
.store_gdt = native_store_gdt,
.store_idt = native_store_idt,
.store_tr = xen_store_tr,
},
};
-static void __init __xen_init_IRQ(void)
-{
-#ifdef CONFIG_X86_64
- int i;
-
- /* Create identity vector->irq map */
- for(i = 0; i < NR_VECTORS; i++) {
- int cpu;
-
- for_each_possible_cpu(cpu)
- per_cpu(vector_irq, cpu)[i] = i;
- }
-#endif /* CONFIG_X86_64 */
-
- xen_init_IRQ();
-}
-
-static const struct pv_irq_ops xen_irq_ops __initdata = {
- .init_IRQ = __xen_init_IRQ,
- .save_fl = xen_save_fl,
- .restore_fl = xen_restore_fl,
- .irq_disable = xen_irq_disable,
- .irq_enable = xen_irq_enable,
- .safe_halt = xen_safe_halt,
- .halt = xen_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = xen_adjust_exception_frame,
-#endif
-};
-
static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC
- .apic_write = xen_apic_write,
- .apic_read = xen_apic_read,
.setup_boot_clock = paravirt_nop,
.setup_secondary_clock = paravirt_nop,
.startup_ipi_hook = paravirt_nop,
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
top = pp.virt_start;
- reserve_top_address(-top + 2 * PAGE_SIZE);
+ reserve_top_address(-top);
#endif /* CONFIG_X86_32 */
}
return __ka(m2p(maddr));
}
-#ifdef CONFIG_X86_64
-static void walk(pgd_t *pgd, unsigned long addr)
-{
- unsigned l4idx = pgd_index(addr);
- unsigned l3idx = pud_index(addr);
- unsigned l2idx = pmd_index(addr);
- unsigned l1idx = pte_index(addr);
- pgd_t l4;
- pud_t l3;
- pmd_t l2;
- pte_t l1;
-
- xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
- pgd, addr, l4idx, l3idx, l2idx, l1idx);
-
- l4 = pgd[l4idx];
- xen_raw_printk(" l4: %016lx\n", l4.pgd);
- xen_raw_printk(" %016lx\n", pgd_val(l4));
-
- l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
- xen_raw_printk(" l3: %016lx\n", l3.pud);
- xen_raw_printk(" %016lx\n", pud_val(l3));
-
- l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
- xen_raw_printk(" l2: %016lx\n", l2.pmd);
- xen_raw_printk(" %016lx\n", pmd_val(l2));
-
- l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
- xen_raw_printk(" l1: %016lx\n", l1.pte);
- xen_raw_printk(" %016lx\n", pte_val(l1));
-}
-#endif
-
static void set_page_prot(void *addr, pgprot_t prot)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
- xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
- addr, pfn, get_phys_to_machine(pfn),
- pgprot_val(prot), pte.pte);
-
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG();
}
if (!xen_start_info)
return;
+ xen_domain_type = XEN_PV_DOMAIN;
+
BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
xen_setup_features();
pv_init_ops = xen_init_ops;
pv_time_ops = xen_time_ops;
pv_cpu_ops = xen_cpu_ops;
- pv_irq_ops = xen_irq_ops;
pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops;
+ xen_init_irq_ops();
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * set up the basic apic ops.
+ */
+ apic_ops = &xen_basic_apic_ops;
+ #endif
+
if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
- if (!is_initial_xendomain())
+ if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
/* Don't do the full vcpu_info placement stuff until we have a
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
- if (!is_initial_xendomain()) {
+ if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL);
xen_raw_console_write("about to get started...\n");
-#if 0
- xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
- &boot_params, __pa_symbol(&boot_params),
- __va(__pa_symbol(&boot_params)));
-
- walk(pgd, &boot_params);
- walk(pgd, __va(__pa(&boot_params)));
-#endif
-
/* Start the world */
#ifdef CONFIG_X86_32
i386_start_kernel();
#define GRANT_INVALID_REF 0
#define PARTS_PER_DISK 16
+ #define PARTS_PER_EXT_DISK 256
#define BLKIF_MAJOR(dev) ((dev)>>8)
#define BLKIF_MINOR(dev) ((dev) & 0xff)
- #define DEV_NAME "xvd" /* name in /dev */
+ #define EXT_SHIFT 28
+ #define EXTENDED (1<<EXT_SHIFT)
+ #define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
+ #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
- /* Information about our VBDs. */
- #define MAX_VBDS 64
- static LIST_HEAD(vbds_list);
+ #define DEV_NAME "xvd" /* name in /dev */
static int get_id_from_freelist(struct blkfront_info *info)
{
}
- static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
- int vdevice, u16 vdisk_info, u16 sector_size,
- struct blkfront_info *info)
+ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+ struct blkfront_info *info,
+ u16 vdisk_info, u16 sector_size)
{
struct gendisk *gd;
int nr_minors = 1;
int err = -ENODEV;
+ unsigned int offset;
+ int minor;
+ int nr_parts;
BUG_ON(info->gd != NULL);
BUG_ON(info->rq != NULL);
- if ((minor % PARTS_PER_DISK) == 0)
- nr_minors = PARTS_PER_DISK;
+ if ((info->vdevice>>EXT_SHIFT) > 1) {
+ /* this is above the extended range; something is wrong */
+ printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
+ return -ENODEV;
+ }
+
+ if (!VDEV_IS_EXTENDED(info->vdevice)) {
+ minor = BLKIF_MINOR(info->vdevice);
+ nr_parts = PARTS_PER_DISK;
+ } else {
+ minor = BLKIF_MINOR_EXT(info->vdevice);
+ nr_parts = PARTS_PER_EXT_DISK;
+ }
+
+ if ((minor % nr_parts) == 0)
+ nr_minors = nr_parts;
gd = alloc_disk(nr_minors);
if (gd == NULL)
goto out;
- if (nr_minors > 1)
- sprintf(gd->disk_name, "%s%c", DEV_NAME,
- 'a' + minor / PARTS_PER_DISK);
- else
- sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
- 'a' + minor / PARTS_PER_DISK,
- minor % PARTS_PER_DISK);
+ offset = minor / nr_parts;
+
+ if (nr_minors > 1) {
+ if (offset < 26)
+ sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
+ else
+ sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
+ 'a' + ((offset / 26)-1), 'a' + (offset % 26));
+ } else {
+ if (offset < 26)
+ sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
+ 'a' + offset,
+ minor & (nr_parts - 1));
+ else
+ sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
+ 'a' + ((offset / 26) - 1),
+ 'a' + (offset % 26),
+ minor & (nr_parts - 1));
+ }
gd->major = XENVBD_MAJOR;
gd->first_minor = minor;
err = xenbus_scanf(XBT_NIL, dev->nodename,
"virtual-device", "%i", &vdevice);
if (err != 1) {
- xenbus_dev_fatal(dev, err, "reading virtual-device");
- return err;
+ /* go looking in the extended area instead */
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
+ "%i", &vdevice);
+ if (err != 1) {
+ xenbus_dev_fatal(dev, err, "reading virtual-device");
+ return err;
+ }
}
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (err)
info->feature_barrier = 0;
- err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
- sectors, info->vdevice,
- binfo, sector_size, info);
+ err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
static int __init xlblk_init(void)
{
- if (!is_running_on_xen())
+ if (!xen_domain())
return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
- #ifndef _ASM_DESC_H_
- #define _ASM_DESC_H_
+ #ifndef ASM_X86__DESC_H
+ #define ASM_X86__DESC_H
#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
desc->d = info->seg_32bit;
desc->g = info->limit_in_pages;
desc->base2 = (info->base_addr & 0xff000000) >> 24;
+ /*
+ * Don't allow setting of the lm bit. It is useless anyway
+ * because 64bit system calls require __USER_CS:
+ */
+ desc->l = 0;
}
extern struct desc_ptr idt_descr;
native_write_gdt_entry(dt, entry, desc, type)
#define write_idt_entry(dt, entry, g) \
native_write_idt_entry(dt, entry, g)
-#endif
+
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+}
+
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+}
+#endif /* CONFIG_PARAVIRT */
static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
#endif /* __ASSEMBLY__ */
- #endif
+ #endif /* ASM_X86__DESC_H */
- #ifndef __ASM_PARAVIRT_H
- #define __ASM_PARAVIRT_H
+ #ifndef ASM_X86__PARAVIRT_H
+ #define ASM_X86__PARAVIRT_H
/* Various instructions on x86 need to be replaced for
* para-virtualization: those hooks are defined here. */
int entrynum, const void *desc, int size);
void (*write_idt_entry)(gate_desc *,
int entrynum, const gate_desc *gate);
+ void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+ void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
void (*set_iopl_mask)(unsigned mask);
/* MSR, PMC and TSR operations.
err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
+ u64 (*read_msr_amd)(unsigned int msr, int *err);
u64 (*read_msr)(unsigned int msr, int *err);
int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
struct pv_apic_ops {
#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Direct APIC operations, principally for VMI. Ideally
- * these shouldn't be in this interface.
- */
- void (*apic_write)(unsigned long reg, u32 v);
- u32 (*apic_read)(unsigned long reg);
void (*setup_boot_clock)(void);
void (*setup_secondary_clock)(void);
* Hooks for allocating/releasing pagetable pages when they're
* attached to a pagetable
*/
- void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
- void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
- void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
- void (*alloc_pud)(struct mm_struct *mm, u32 pfn);
- void (*release_pte)(u32 pfn);
- void (*release_pmd)(u32 pfn);
- void (*release_pud)(u32 pfn);
+ void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
+ void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
+ void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
+ void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+ void (*release_pte)(unsigned long pfn);
+ void (*release_pmd)(unsigned long pfn);
+ void (*release_pud)(unsigned long pfn);
/* Pagetable manipulation functions */
void (*set_pte)(pte_t *ptep, pte_t pteval);
int (*spin_is_locked)(struct raw_spinlock *lock);
int (*spin_is_contended)(struct raw_spinlock *lock);
void (*spin_lock)(struct raw_spinlock *lock);
+ void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
int (*spin_trylock)(struct raw_spinlock *lock);
void (*spin_unlock)(struct raw_spinlock *lock);
};
{
return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
}
+ static inline u64 paravirt_read_msr_amd(unsigned msr, int *err)
+ {
+ return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err);
+ }
static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
{
return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
*p = paravirt_read_msr(msr, &err);
return err;
}
+ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+ {
+ int err;
+
+ *p = paravirt_read_msr_amd(msr, &err);
+ return err;
+ }
static inline u64 paravirt_read_tsc(void)
{
(aux) = __aux; \
} while (0)
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
+{
+ PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
+}
+
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
+{
+ PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
+}
+
static inline void load_TR_desc(void)
{
PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
}
#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Basic functions accessing APICs.
- */
- static inline void apic_write(unsigned long reg, u32 v)
- {
- PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
- }
-
- static inline u32 apic_read(unsigned long reg)
- {
- return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
- }
-
static inline void setup_boot_clock(void)
{
PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
}
- static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
+ static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)
{
PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
}
- static inline void paravirt_release_pte(unsigned pfn)
+ static inline void paravirt_release_pte(unsigned long pfn)
{
PVOP_VCALL1(pv_mmu_ops.release_pte, pfn);
}
- static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn)
+ static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
{
PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn);
}
- static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn,
- unsigned start, unsigned count)
+ static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
+ unsigned long start, unsigned long count)
{
PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count);
}
- static inline void paravirt_release_pmd(unsigned pfn)
+ static inline void paravirt_release_pmd(unsigned long pfn)
{
PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn);
}
- static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn)
+ static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn);
}
- static inline void paravirt_release_pud(unsigned pfn)
+ static inline void paravirt_release_pud(unsigned long pfn)
{
PVOP_VCALL1(pv_mmu_ops.release_pud, pfn);
}
PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
}
+static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
+ unsigned long flags)
+{
+ PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
+}
+
static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
{
return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */
- #endif /* __ASM_PARAVIRT_H */
+ #endif /* ASM_X86__PARAVIRT_H */
- #ifndef _ASM_X86_SMP_H_
- #define _ASM_X86_SMP_H_
+ #ifndef ASM_X86__SMP_H
+ #define ASM_X86__SMP_H
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
#include <linux/init.h>
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
+ #ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(int, cpu_number);
+ #endif
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
struct smp_ops {
void (*smp_prepare_boot_cpu)(void);
void (*smp_prepare_cpus)(unsigned max_cpus);
- int (*cpu_up)(unsigned cpu);
void (*smp_cpus_done)(unsigned max_cpus);
void (*smp_send_stop)(void);
void (*smp_send_reschedule)(int cpu);
+ int (*cpu_up)(unsigned cpu);
+ int (*cpu_disable)(void);
+ void (*cpu_die)(unsigned int cpu);
+ void (*play_dead)(void);
+
void (*send_call_func_ipi)(cpumask_t mask);
void (*send_call_func_single_ipi)(int cpu);
};
return smp_ops.cpu_up(cpu);
}
+static inline int __cpu_disable(void)
+{
+ return smp_ops.cpu_disable();
+}
+
+static inline void __cpu_die(unsigned int cpu)
+{
+ smp_ops.cpu_die(cpu);
+}
+
+static inline void play_dead(void)
+{
+ smp_ops.play_dead();
+}
+
static inline void smp_send_reschedule(int cpu)
{
smp_ops.smp_send_reschedule(cpu);
smp_ops.send_call_func_ipi(mask);
}
+void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
int native_cpu_up(unsigned int cpunum);
+int native_cpu_disable(void);
+void native_cpu_die(unsigned int cpu);
+void native_play_dead(void);
+void play_dead_common(void);
+
void native_send_call_func_ipi(cpumask_t mask);
void native_send_call_func_single_ipi(int cpu);
-extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
-
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
- DECLARE_PER_CPU(int, cpu_number);
#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
extern int safe_smp_processor_id(void);
#ifdef CONFIG_X86_LOCAL_APIC
+ #ifndef CONFIG_X86_64
static inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
}
- #ifndef CONFIG_X86_64
+ #include <mach_apicdef.h>
static inline unsigned int read_apic_id(void)
{
- return *(u32 *)(APIC_BASE + APIC_ID);
+ unsigned int reg;
+
+ reg = *(u32 *)(APIC_BASE + APIC_ID);
+
+ return GET_APIC_ID(reg);
}
- #else
- extern unsigned int read_apic_id(void);
#endif
- # ifdef APIC_DEFINITION
+ # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
extern int hard_smp_processor_id(void);
# else
- # include <mach_apicdef.h>
+ #include <mach_apicdef.h>
static inline int hard_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(read_apic_id());
+ return read_apic_id();
}
# endif /* APIC_DEFINITION */
#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_HOTPLUG_CPU
-extern void cpu_uninit(void);
-#endif
-
#endif /* __ASSEMBLY__ */
- #endif
+ #endif /* ASM_X86__SMP_H */
- #ifndef _X86_SPINLOCK_H_
- #define _X86_SPINLOCK_H_
+ #ifndef ASM_X86__SPINLOCK_H
+ #define ASM_X86__SPINLOCK_H
#include <asm/atomic.h>
#include <asm/rwlock.h>
"jne 1f\n\t"
"movw %w0,%w1\n\t"
"incb %h1\n\t"
- "lock ; cmpxchgw %w1,%2\n\t"
+ LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
"1:"
"sete %b1\n\t"
"movzbl %b1,%0\n\t"
int inc = 0x00010000;
int tmp;
- asm volatile("lock ; xaddl %0, %1\n"
+ asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0,%1\n\t"
"jne 1f\n\t"
"addl $0x00010000, %1\n\t"
- "lock ; cmpxchgl %1,%2\n\t"
+ LOCK_PREFIX "cmpxchgl %1,%2\n\t"
"1:"
"sete %b1\n\t"
"movzbl %b1,%0\n\t"
}
#endif
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
-
#ifdef CONFIG_PARAVIRT
/*
* Define virtualization-friendly old-style lock byte lock, for use in
{
__ticket_spin_unlock(lock);
}
+
+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+ unsigned long flags)
+{
+ __raw_spin_lock(lock);
+}
+
#endif /* CONFIG_PARAVIRT */
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
#define _raw_read_relax(lock) cpu_relax()
#define _raw_write_relax(lock) cpu_relax()
- #endif
+ #endif /* ASM_X86__SPINLOCK_H */
- #ifndef _ASM_X86_TLBFLUSH_H
- #define _ASM_X86_TLBFLUSH_H
+ #ifndef ASM_X86__TLBFLUSH_H
+ #define ASM_X86__TLBFLUSH_H
#include <linux/mm.h>
#include <linux/sched.h>
{
}
+static inline void reset_lazy_tlbstate(void)
+{
+}
+
#else /* SMP */
#include <asm/smp.h>
char __cacheline_padding[L1_CACHE_BYTES-8];
};
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+
+void reset_lazy_tlbstate(void);
+#else
+static inline void reset_lazy_tlbstate(void)
+{
+}
#endif
#endif /* SMP */
flush_tlb_all();
}
- #endif /* _ASM_X86_TLBFLUSH_H */
+ #endif /* ASM_X86__TLBFLUSH_H */
* IN THE SOFTWARE.
*/
- #ifndef __HYPERVISOR_H__
- #define __HYPERVISOR_H__
+ #ifndef ASM_X86__XEN__HYPERVISOR_H
+ #define ASM_X86__XEN__HYPERVISOR_H
#include <linux/types.h>
#include <linux/kernel.h>
/* arch/i386/kernel/setup.c */
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
-#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
/* arch/i386/mach-xen/evtchn.c */
/* Force a proper event-channel callback from Xen. */
#define MULTI_UVMFLAGS_INDEX 3
#define MULTI_UVMDOMID_INDEX 4
-#define is_running_on_xen() (xen_start_info ? 1 : 0)
+enum xen_domain_type {
+ XEN_NATIVE,
+ XEN_PV_DOMAIN,
+ XEN_HVM_DOMAIN,
+};
+
+extern enum xen_domain_type xen_domain_type;
+
+#define xen_domain() (xen_domain_type != XEN_NATIVE)
+#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN)
+#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
+#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
- #endif /* __HYPERVISOR_H__ */
+ #endif /* ASM_X86__XEN__HYPERVISOR_H */
extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
/*
* The mm counters are not protected by its page_table_lock,
* so must be incremented atomically.
#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#else /* !USE_SPLIT_PTLOCKS */
/*
* The mm counters are protected by its page_table_lock,
* so can be incremented directly.
#define inc_mm_counter(mm, member) (mm)->_##member++
#define dec_mm_counter(mm, member) (mm)->_##member--
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif /* !USE_SPLIT_PTLOCKS */
#define get_mm_rss(mm) \
(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
* - everyone except group_exit_task is stopped during signal delivery
* of fatal signals, group_exit_task processes the signal.
*/
- struct task_struct *group_exit_task;
int notify_count;
+ struct task_struct *group_exit_task;
/* thread group stop support, overloads group_exit_code too */
int group_stop_count;
unsigned int ttwu_move_affine;
unsigned int ttwu_move_balance;
#endif
+ #ifdef CONFIG_SCHED_DEBUG
+ char *name;
+ #endif
};
extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
void (*yield_task) (struct rq *rq);
int (*select_task_rq)(struct task_struct *p, int sync);
- void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+ void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
struct sched_rt_entity {
struct list_head run_list;
- unsigned int time_slice;
unsigned long timeout;
+ unsigned int time_slice;
int nr_cpus_allowed;
struct sched_rt_entity *back;