x86/smp: Don't ever patch back to UP if we unplug cpus
[pandora-kernel.git] / arch / x86 / kernel / alternative.c
index c638228..ead630a 100644 (file)
 
 #define MAX_PATCH_LEN (255-1)
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int smp_alt_once;
-
-static int __init bootonly(char *str)
-{
-       smp_alt_once = 1;
-       return 1;
-}
-__setup("smp-alt-boot", bootonly);
-#else
-#define smp_alt_once 1
-#endif
-
 static int __initdata_or_module debug_alternative;
 
 static int __init debug_alt(char *str)
@@ -63,8 +50,26 @@ static int __init setup_noreplace_paravirt(char *str)
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, args...) if (debug_alternative) \
-       printk(KERN_DEBUG fmt, args)
+#define DPRINTK(fmt, args...)                                          \
+do {                                                                   \
+       if (debug_alternative)                                          \
+               printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);   \
+} while (0)
+
+#define DUMP_BYTES(buf, len, fmt, args...)                             \
+do {                                                                   \
+       if (unlikely(debug_alternative)) {                              \
+               int j;                                                  \
+                                                                       \
+               if (!(len))                                             \
+                       break;                                          \
+                                                                       \
+               printk(KERN_DEBUG fmt, ##args);                         \
+               for (j = 0; j < (len) - 1; j++)                         \
+                       printk(KERN_CONT "%02hhx ", buf[j]);            \
+               printk(KERN_CONT "%02hhx\n", buf[j]);                   \
+       }                                                               \
+} while (0)
 
 /*
  * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
@@ -160,7 +165,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
 #endif
 
 #ifdef P6_NOP1
-static const unsigned char  __initconst_or_module p6nops[] =
+static const unsigned char p6nops[] =
 {
        P6_NOP1,
        P6_NOP2,
@@ -219,7 +224,7 @@ void __init arch_init_ideal_nops(void)
                        ideal_nops = intel_nops;
 #endif
                }
-
+               break;
        default:
 #ifdef CONFIG_X86_64
                ideal_nops = k8_nops;
@@ -251,12 +256,86 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-/* Replace instructions with better alternatives for this CPU type.
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that asymmetric systems where
-   APs have less capabilities than the boot processor are not handled.
-   Tough. Make sure you disable such features by hand. */
+/*
+ * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ */
+static inline bool is_jmp(const u8 opcode)
+{
+       return opcode == 0xeb || opcode == 0xe9;
+}
+
+static void __init_or_module
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+{
+       u8 *next_rip, *tgt_rip;
+       s32 n_dspl, o_dspl;
+       int repl_len;
+
+       if (a->replacementlen != 5)
+               return;
+
+       o_dspl = *(s32 *)(insnbuf + 1);
+
+       /* next_rip of the replacement JMP */
+       next_rip = repl_insn + a->replacementlen;
+       /* target rip of the replacement JMP */
+       tgt_rip  = next_rip + o_dspl;
+       n_dspl = tgt_rip - orig_insn;
+
+       DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+
+       if (tgt_rip - orig_insn >= 0) {
+               if (n_dspl - 2 <= 127)
+                       goto two_byte_jmp;
+               else
+                       goto five_byte_jmp;
+       /* negative offset */
+       } else {
+               if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
+                       goto two_byte_jmp;
+               else
+                       goto five_byte_jmp;
+       }
+
+two_byte_jmp:
+       n_dspl -= 2;
+
+       insnbuf[0] = 0xeb;
+       insnbuf[1] = (s8)n_dspl;
+       add_nops(insnbuf + 2, 3);
+
+       repl_len = 2;
+       goto done;
+
+five_byte_jmp:
+       n_dspl -= 5;
+
+       insnbuf[0] = 0xe9;
+       *(s32 *)&insnbuf[1] = n_dspl;
+
+       repl_len = 5;
+
+done:
+
+       DPRINTK("final displ: 0x%08x, JMP 0x%lx",
+               n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+}
+
+static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+{
+       add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+
+       DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+                  instr, a->instrlen - a->padlen, a->padlen);
+}
 
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
 void __init_or_module apply_alternatives(struct alt_instr *start,
                                         struct alt_instr *end)
 {
@@ -264,10 +343,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
        u8 *instr, *replacement;
        u8 insnbuf[MAX_PATCH_LEN];
 
-       DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+       DPRINTK("alt table %p -> %p", start, end);
        /*
         * The scan order should be from start to end. A later scanned
-        * alternative code can overwrite a previous scanned alternative code.
+        * alternative code can overwrite previously scanned alternative code.
         * Some kernel functions (e.g. memcpy, memset, etc) use this order to
         * patch code.
         *
@@ -275,29 +354,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
         * order.
         */
        for (a = start; a < end; a++) {
+               int insnbuf_sz = 0;
+
                instr = (u8 *)&a->instr_offset + a->instr_offset;
                replacement = (u8 *)&a->repl_offset + a->repl_offset;
-               BUG_ON(a->replacementlen > a->instrlen);
                BUG_ON(a->instrlen > sizeof(insnbuf));
                BUG_ON(a->cpuid >= NCAPINTS*32);
-               if (!boot_cpu_has(a->cpuid))
+               if (!boot_cpu_has(a->cpuid)) {
+                       if (a->padlen > 1)
+                               optimize_nops(a, instr);
+
                        continue;
+               }
+
+               DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
+                       a->cpuid >> 5,
+                       a->cpuid & 0x1f,
+                       instr, a->instrlen,
+                       replacement, a->replacementlen);
+
+               DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+               DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
 
                memcpy(insnbuf, replacement, a->replacementlen);
+               insnbuf_sz = a->replacementlen;
 
                /* 0xe8 is a relative jump; fix the offset. */
-               if (*insnbuf == 0xe8 && a->replacementlen == 5)
-                   *(s32 *)(insnbuf + 1) += replacement - instr;
+               if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+                       *(s32 *)(insnbuf + 1) += replacement - instr;
+                       DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+                               *(s32 *)(insnbuf + 1),
+                               (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+               }
+
+               if (a->replacementlen && is_jmp(replacement[0]))
+                       recompute_jump(a, instr, replacement, insnbuf);
 
-               add_nops(insnbuf + a->replacementlen,
-                        a->instrlen - a->replacementlen);
+               if (a->instrlen > a->replacementlen) {
+                       add_nops(insnbuf + a->replacementlen,
+                                a->instrlen - a->replacementlen);
+                       insnbuf_sz += a->instrlen - a->replacementlen;
+               }
+               DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
 
-               text_poke_early(instr, insnbuf, a->instrlen);
+               text_poke_early(instr, insnbuf, insnbuf_sz);
        }
 }
 
 #ifdef CONFIG_SMP
-
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
                                  u8 *text, u8 *text_end)
 {
@@ -321,9 +425,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 {
        const s32 *poff;
 
-       if (noreplace_smp)
-               return;
-
        mutex_lock(&text_mutex);
        for (poff = start; poff < end; poff++) {
                u8 *ptr = (u8 *)poff + *poff;
@@ -354,7 +455,7 @@ struct smp_alt_module {
 };
 static LIST_HEAD(smp_alt_modules);
 static DEFINE_MUTEX(smp_alt);
-static int smp_mode = 1;       /* protected by smp_alt */
+static bool uniproc_patched = false;   /* protected by smp_alt */
 
 void __init_or_module alternatives_smp_module_add(struct module *mod,
                                                  char *name,
@@ -363,19 +464,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 {
        struct smp_alt_module *smp;
 
-       if (noreplace_smp)
-               return;
+       mutex_lock(&smp_alt);
+       if (!uniproc_patched)
+               goto unlock;
 
-       if (smp_alt_once) {
-               if (boot_cpu_has(X86_FEATURE_UP))
-                       alternatives_smp_unlock(locks, locks_end,
-                                               text, text_end);
-               return;
-       }
+       if (num_possible_cpus() == 1)
+               /* Don't bother remembering, we'll never have to undo it. */
+               goto smp_unlock;
 
        smp = kzalloc(sizeof(*smp), GFP_KERNEL);
        if (NULL == smp)
-               return; /* we'll run the (safe but slow) SMP code then ... */
+               /* we'll run the (safe but slow) SMP code then ... */
+               goto unlock;
 
        smp->mod        = mod;
        smp->name       = name;
@@ -383,15 +483,14 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
        smp->locks_end  = locks_end;
        smp->text       = text;
        smp->text_end   = text_end;
-       DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-               __func__, smp->locks, smp->locks_end,
+       DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+               smp->locks, smp->locks_end,
                smp->text, smp->text_end, smp->name);
 
-       mutex_lock(&smp_alt);
        list_add_tail(&smp->next, &smp_alt_modules);
-       if (boot_cpu_has(X86_FEATURE_UP))
-               alternatives_smp_unlock(smp->locks, smp->locks_end,
-                                       smp->text, smp->text_end);
+smp_unlock:
+       alternatives_smp_unlock(locks, locks_end, text, text_end);
+unlock:
        mutex_unlock(&smp_alt);
 }
 
@@ -399,24 +498,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
 {
        struct smp_alt_module *item;
 
-       if (smp_alt_once || noreplace_smp)
-               return;
-
        mutex_lock(&smp_alt);
        list_for_each_entry(item, &smp_alt_modules, next) {
                if (mod != item->mod)
                        continue;
                list_del(&item->next);
-               mutex_unlock(&smp_alt);
-               DPRINTK("%s: %s\n", __func__, item->name);
                kfree(item);
-               return;
+               break;
        }
        mutex_unlock(&smp_alt);
 }
 
-bool skip_smp_alternatives;
-void alternatives_smp_switch(int smp)
+void alternatives_enable_smp(void)
 {
        struct smp_alt_module *mod;
 
@@ -431,34 +524,21 @@ void alternatives_smp_switch(int smp)
        printk("lockdep: fixing up alternatives.\n");
 #endif
 
-       if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
-               return;
-       BUG_ON(!smp && (num_online_cpus() > 1));
+       /* Why bother if there are no other CPUs? */
+       BUG_ON(num_possible_cpus() == 1);
 
        mutex_lock(&smp_alt);
 
-       /*
-        * Avoid unnecessary switches because it forces JIT based VMs to
-        * throw away all cached translations, which can be quite costly.
-        */
-       if (smp == smp_mode) {
-               /* nothing */
-       } else if (smp) {
+       if (uniproc_patched) {
                printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
+               BUG_ON(num_online_cpus() != 1);
                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
                clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
                list_for_each_entry(mod, &smp_alt_modules, next)
                        alternatives_smp_lock(mod->locks, mod->locks_end,
                                              mod->text, mod->text_end);
-       } else {
-               printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-               set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
-               set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-               list_for_each_entry(mod, &smp_alt_modules, next)
-                       alternatives_smp_unlock(mod->locks, mod->locks_end,
-                                               mod->text, mod->text_end);
+               uniproc_patched = false;
        }
-       smp_mode = smp;
        mutex_unlock(&smp_alt);
 }
 
@@ -483,7 +563,7 @@ int alternatives_text_reserved(void *start, void *end)
 
        return 0;
 }
-#endif
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PARAVIRT
 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@@ -535,40 +615,22 @@ void __init alternative_instructions(void)
 
        apply_alternatives(__alt_instructions, __alt_instructions_end);
 
-       /* switch to patch-once-at-boottime-only mode and free the
-        * tables in case we know the number of CPUs will never ever
-        * change */
-#ifdef CONFIG_HOTPLUG_CPU
-       if (num_possible_cpus() < 2)
-               smp_alt_once = 1;
-#endif
-
 #ifdef CONFIG_SMP
-       if (smp_alt_once) {
-               if (1 == num_possible_cpus()) {
-                       printk(KERN_INFO "SMP alternatives: switching to UP code\n");
-                       set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
-                       set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-
-                       alternatives_smp_unlock(__smp_locks, __smp_locks_end,
-                                               _text, _etext);
-               }
-       } else {
+       /* Patch to UP if other cpus not imminent. */
+       if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
+               uniproc_patched = true;
                alternatives_smp_module_add(NULL, "core kernel",
                                            __smp_locks, __smp_locks_end,
                                            _text, _etext);
-
-               /* Only switch to UP mode if we don't immediately boot others */
-               if (num_present_cpus() == 1 || setup_max_cpus <= 1)
-                       alternatives_smp_switch(0);
        }
-#endif
-       apply_paravirt(__parainstructions, __parainstructions_end);
 
-       if (smp_alt_once)
+       if (!uniproc_patched || num_possible_cpus() == 1)
                free_init_pages("SMP alternatives",
                                (unsigned long)__smp_locks,
                                (unsigned long)__smp_locks_end);
+#endif
+
+       apply_paravirt(__parainstructions, __parainstructions_end);
 
        restart_nmi();
 }
@@ -738,5 +800,5 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 
        atomic_set(&stop_machine_first, 1);
        wrote_text = 0;
-       __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+       __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
 }