* 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, suspend: Avoid unnecessary smp alternatives switch during suspend/resume
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86-64, asm: Use fxsaveq/fxrestorq in more places
* 'x86-hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, hwmon: Add core threshold notification to therm_throt.c
* 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, paravirt: Use native_halt on a halt, not native_safe_halt
* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
locking, lockdep: Convert sprintf_symbol to %pS
* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
irq: Better struct irqaction layout
extern void alternatives_smp_module_del(struct module *mod);
extern void alternatives_smp_switch(int smp);
extern int alternatives_text_reserved(void *start, void *end);
+ +++++extern bool skip_smp_alternatives;
#else
static inline void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
* On the local CPU you need to be protected again NMI or MCE handlers seeing an
* inconsistent instruction while you patch.
*/
++++++struct text_poke_param {
++++++ void *addr;
++++++ const void *opcode;
++++++ size_t len;
++++++};
++++++
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
++++++extern void text_poke_smp_batch(struct text_poke_param *params, int n);
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#define IDEAL_NOP_SIZE_5 5
#define MSR_AMD64_IBSDCLINAD 0xc0011038
#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
#define MSR_AMD64_IBSCTL 0xc001103a
+ #define MSR_AMD64_IBSBRTARGET 0xc001103b
+
++++++/* Fam 15h MSRs */
++++++#define MSR_F15H_PERF_CTL 0xc0010200
++++++#define MSR_F15H_PERF_CTR 0xc0010201
+ ++++
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
#define FAM10H_MMIO_CONF_ENABLE (1<<0)
#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
- --#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff
+ ++#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
#define MSR_FAM10H_NODE_ID 0xc001100c
#define MSR_IA32_TSC 0x00000010
#define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
+ #define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define FEATURE_CONTROL_LOCKED (1<<0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+++ +++/* Thermal Thresholds Support */
+++ +++#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
+++ +++#define THERM_SHIFT_THRESHOLD0 8
+++ +++#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
+++ +++#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
+++ +++#define THERM_SHIFT_THRESHOLD1 16
+++ +++#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
+++ +++#define THERM_STATUS_THRESHOLD0 (1 << 6)
+++ +++#define THERM_LOG_THRESHOLD0 (1 << 7)
+++ +++#define THERM_STATUS_THRESHOLD1 (1 << 8)
+++ +++#define THERM_LOG_THRESHOLD1 (1 << 9)
+++ +++
/* MISC_ENABLE bits: architectural */
#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
mutex_unlock(&smp_alt);
}
+ +++++bool skip_smp_alternatives;
void alternatives_smp_switch(int smp)
{
struct smp_alt_module *mod;
printk("lockdep: fixing up alternatives.\n");
#endif
- ----- if (noreplace_smp || smp_alt_once)
+ +++++ if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
static int wrote_text;
struct text_poke_params {
------ void *addr;
------ const void *opcode;
------ size_t len;
++++++ struct text_poke_param *params;
++++++ int nparams;
};
static int __kprobes stop_machine_text_poke(void *data)
{
struct text_poke_params *tpp = data;
++++++ struct text_poke_param *p;
++++++ int i;
if (atomic_dec_and_test(&stop_machine_first)) {
------ text_poke(tpp->addr, tpp->opcode, tpp->len);
++++++ for (i = 0; i < tpp->nparams; i++) {
++++++ p = &tpp->params[i];
++++++ text_poke(p->addr, p->opcode, p->len);
++++++ }
smp_wmb(); /* Make sure other cpus see that this has run */
wrote_text = 1;
} else {
smp_mb(); /* Load wrote_text before following execution */
}
------ flush_icache_range((unsigned long)tpp->addr,
------ (unsigned long)tpp->addr + tpp->len);
++++++ for (i = 0; i < tpp->nparams; i++) {
++++++ p = &tpp->params[i];
++++++ flush_icache_range((unsigned long)p->addr,
++++++ (unsigned long)p->addr + p->len);
++++++ }
++++++
return 0;
}
void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
{
struct text_poke_params tpp;
++++++ struct text_poke_param p;
------ tpp.addr = addr;
------ tpp.opcode = opcode;
------ tpp.len = len;
++++++ p.addr = addr;
++++++ p.opcode = opcode;
++++++ p.len = len;
++++++ tpp.params = &p;
++++++ tpp.nparams = 1;
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
/* Use __stop_machine() because the caller already got online_cpus. */
- __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+ __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
return addr;
}
++++++/**
++++++ * text_poke_smp_batch - Update instructions on a live kernel on SMP
++++++ * @params: an array of text_poke parameters
++++++ * @n: the number of elements in params.
++++++ *
++++++ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
++++++ * stop_machine() is heavy task, it is better to aggregate text_poke requests
++++++ * and do it once if possible.
++++++ *
++++++ * Note: Must be called under get_online_cpus() and text_mutex.
++++++ */
++++++void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
++++++{
++++++ struct text_poke_params tpp = {.params = params, .nparams = n};
++++++
++++++ atomic_set(&stop_machine_first, 1);
++++++ wrote_text = 0;
++++++ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
++++++}
++++++
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
- unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+ #ifdef CONFIG_X86_64
+ unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
+ #else
+ unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+ #endif
void __init arch_init_ideal_nop5(void)
{
- extern const unsigned char ftrace_test_p6nop[];
- extern const unsigned char ftrace_test_nop5[];
- extern const unsigned char ftrace_test_jmp[];
- int faulted = 0;
-
/*
- * There is no good nop for all x86 archs.
- * We will default to using the P6_NOP5, but first we
- * will test to make sure that the nop will actually
- * work on this CPU. If it faults, we will then
- * go to a lesser efficient 5 byte nop. If that fails
- * we then just use a jmp as our nop. This isn't the most
- * efficient nop, but we can not use a multi part nop
- * since we would then risk being preempted in the middle
- * of that nop, and if we enabled tracing then, it might
- * cause a system crash.
+ * There is no good nop for all x86 archs. This selection
+ * algorithm should be unified with the one in find_nop_table(),
+ * but this should be good enough for now.
*
- * TODO: check the cpuid to determine the best nop.
+ * For cases other than the ones below, use the safe (as in
+ * always functional) defaults above.
*/
- asm volatile (
- "ftrace_test_jmp:"
- "jmp ftrace_test_p6nop\n"
- "nop\n"
- "nop\n"
- "nop\n" /* 2 byte jmp + 3 bytes */
- "ftrace_test_p6nop:"
- P6_NOP5
- "jmp 1f\n"
- "ftrace_test_nop5:"
- ".byte 0x66,0x66,0x66,0x66,0x90\n"
- "1:"
- ".section .fixup, \"ax\"\n"
- "2: movl $1, %0\n"
- " jmp ftrace_test_nop5\n"
- "3: movl $2, %0\n"
- " jmp 1b\n"
- ".previous\n"
- _ASM_EXTABLE(ftrace_test_p6nop, 2b)
- _ASM_EXTABLE(ftrace_test_nop5, 3b)
- : "=r"(faulted) : "0" (faulted));
-
- switch (faulted) {
- case 0:
- pr_info("converting mcount calls to 0f 1f 44 00 00\n");
- memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
- break;
- case 1:
- pr_info("converting mcount calls to 66 66 66 66 90\n");
- memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
- break;
- case 2:
- pr_info("converting mcount calls to jmp . + 5\n");
- memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
- break;
- }
-
+ #ifdef CONFIG_X86_64
+ /* Don't use these on 32 bits due to broken virtualizers */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ memcpy(ideal_nop5, p6_nops[5], 5);
+ #endif
}
#endif
*/
smp_store_cpu_info(cpuid);
++++++ /*
++++++ * This must be done before setting cpu_online_mask
++++++ * or calling notify_cpu_starting.
++++++ */
++++++ set_cpu_sibling_map(raw_smp_processor_id());
++++++ wmb();
++++++
notify_cpu_starting(cpuid);
/*
* fragile that we want to limit the things done here to the
* most necessary things.
*/
+ cpu_init();
+ preempt_disable();
+ smp_callin();
#ifdef CONFIG_X86_32
- /*
- * Switch away from the trampoline page-table
- *
- * Do this before cpu_init() because it needs to access per-cpu
- * data which may not be mapped in the trampoline page-table.
- */
+ /* switch away from the initial page table */
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
- cpu_init();
- preempt_disable();
- smp_callin();
-
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
*/
check_tsc_sync_target();
------ if (nmi_watchdog == NMI_IO_APIC) {
------ legacy_pic->mask(0);
------ enable_NMI_through_LVT0();
------ legacy_pic->unmask(0);
------ }
------
------ /* This must be done before setting cpu_online_mask */
------ set_cpu_sibling_map(raw_smp_processor_id());
------ wmb();
------
/*
* We need to hold call_lock, so there is no inconsistency
* between the time smp_call_function() determines number of
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
- initial_page_table = __pa(&trampoline_pg_dir);
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
err = do_boot_cpu(apicid, cpu);
-
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
printk(KERN_INFO "SMP mode deactivated.\n");
smpboot_clear_io_apic();
------ localise_nmi_watchdog();
------
connect_bsp_APIC();
setup_local_APIC();
end_local_APIC_setup();
preempt_enable();
}
+ +++++void arch_disable_nonboot_cpus_begin(void)
+ +++++{
+ +++++ /*
+ +++++ * Avoid the smp alternatives switch during the disable_nonboot_cpus().
+ +++++ * In the suspend path, we will be back in the SMP mode shortly anyways.
+ +++++ */
+ +++++ skip_smp_alternatives = true;
+ +++++}
+ +++++
+ +++++void arch_disable_nonboot_cpus_end(void)
+ +++++{
+ +++++ skip_smp_alternatives = false;
+ +++++}
+ +++++
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
#endif
------ check_nmi_watchdog();
mtrr_aps_init();
}
if (cpu == 0)
return -EBUSY;
------ if (nmi_watchdog == NMI_LOCAL_APIC)
------ stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
cpu_disable_common();
{
idle_task_exit();
reset_lazy_tlbstate();
- irq_ctx_exit(raw_smp_processor_id());
c1e_remove_cpu(raw_smp_processor_id());
mb();
}
struct take_cpu_down_param {
------ struct task_struct *caller;
unsigned long mod;
void *hcpu;
};
static int __ref take_cpu_down(void *_param)
{
struct take_cpu_down_param *param = _param;
------ unsigned int cpu = (unsigned long)param->hcpu;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
cpu_notify(CPU_DYING | param->mod, param->hcpu);
------ if (task_cpu(param->caller) == cpu)
------ move_task_off_dead_cpu(cpu, param->caller);
------ /* Force idle task to run as soon as we yield: it should
------ immediately notice cpu is offline and die quickly. */
------ sched_idle_next();
return 0;
}
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
------ .caller = current,
.mod = mod,
.hcpu = hcpu,
};
}
BUG_ON(cpu_online(cpu));
------ /* Wait for it to sleep (leaving idle task). */
++++++ /*
++++++ * The migration_call() CPU_DYING callback will have removed all
++++++ * runnable tasks from the cpu, there's only the idle task left now
++++++ * that the migration thread is done doing the stop_machine thing.
++++++ *
++++++ * Wait for the stop thread to go away.
++++++ */
while (!idle_cpu(cpu))
------ yield();
++++++ cpu_relax();
/* This actually kills the CPU. */
__cpu_die(cpu);
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
+ +++++void __weak arch_disable_nonboot_cpus_begin(void)
+ +++++{
+ +++++}
+ +++++
+ +++++void __weak arch_disable_nonboot_cpus_end(void)
+ +++++{
+ +++++}
+ +++++
int disable_nonboot_cpus(void)
{
int cpu, first_cpu, error = 0;
* with the userspace trying to use the CPU hotplug at the same time
*/
cpumask_clear(frozen_cpus);
+ +++++ arch_disable_nonboot_cpus_begin();
printk("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
}
}
+ +++++ arch_disable_nonboot_cpus_end();
+ +++++
if (!error) {
BUG_ON(num_online_cpus() > 1);
/* Make sure the CPUs won't be enabled by someone else */