x86: A fast way to check capabilities of the current cpu
authorChristoph Lameter <cl@linux.com>
Sat, 12 Mar 2011 11:50:10 +0000 (12:50 +0100)
committerTejun Heo <tj@kernel.org>
Tue, 29 Mar 2011 08:18:30 +0000 (10:18 +0200)
Add this_cpu_has() which determines if the current cpu has a certain
ability using a segment prefix and a bit test operation.

For that we need to add bit operations to x86s percpu.h.

Many uses of cpu_has use a pointer passed to a function to determine
the current flags. That is no longer necessary after this patch.

However, this patch only converts the straightforward cases where
cpu_has is used with this_cpu_ptr. The rest is work for later.

-tj: Rolled up patch to add x86_ prefix and use percpu_read() instead
     of percpu_read_stable().

Signed-off-by: Christoph Lameter <cl@linux.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/percpu.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/process.c
arch/x86/kernel/smpboot.c

index 91f3e08..50c0d30 100644 (file)
@@ -207,8 +207,7 @@ extern const char * const x86_power_flags[32];
 #define test_cpu_cap(c, bit)                                           \
         test_bit(bit, (unsigned long *)((c)->x86_capability))
 
-#define cpu_has(c, bit)                                                        \
-       (__builtin_constant_p(bit) &&                                   \
+#define REQUIRED_MASK_BIT_SET(bit)                                     \
         ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||     \
           (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||     \
           (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||     \
@@ -218,10 +217,16 @@ extern const char * const x86_power_flags[32];
           (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||     \
           (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||     \
           (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||     \
-          (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )      \
-         ? 1 :                                                         \
+          (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
+
+#define cpu_has(c, bit)                                                        \
+       (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :  \
         test_cpu_cap(c, bit))
 
+#define this_cpu_has(bit)                                              \
+       (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :  \
+        x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+
 #define boot_cpu_has(bit)      cpu_has(&boot_cpu_data, bit)
 
 #define set_cpu_cap(c, bit)    set_bit(bit, (unsigned long *)((c)->x86_capability))
index d475b43..76042d9 100644 (file)
@@ -542,6 +542,33 @@ do {                                                                       \
        old__;                                                          \
 })
 
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+                        const unsigned long __percpu *addr)
+{
+       unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+       return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+}
+
+static inline int x86_this_cpu_variable_test_bit(int nr,
+                        const unsigned long __percpu *addr)
+{
+       int oldbit;
+
+       asm volatile("bt "__percpu_arg(2)",%1\n\t"
+                       "sbb %0,%0"
+                       : "=r" (oldbit)
+                       : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+       return oldbit;
+}
+
+#define x86_this_cpu_test_bit(nr, addr)                        \
+       (__builtin_constant_p((nr))                     \
+        ? x86_this_cpu_constant_test_bit((nr), (addr)) \
+        : x86_this_cpu_variable_test_bit((nr), (addr)))
+
+
 #include <asm-generic/percpu.h>
 
 /* We can use this directly for local CPU (faster). */
index fabf01e..2bc503b 100644 (file)
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
 {
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-       if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
+       if (this_cpu_has(X86_FEATURE_ARAT)) {
                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
                /* Make LAPIC timer preferrable over percpu HPET */
                lapic_clockevent.rating = 150;
index d46cbe4..88a90a9 100644 (file)
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
        if (!need_resched()) {
-               if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+               if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
                __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -465,7 +465,7 @@ static void mwait_idle(void)
        if (!need_resched()) {
                trace_power_start(POWER_CSTATE, 1, smp_processor_id());
                trace_cpu_idle(1, smp_processor_id());
-               if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+               if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
                __monitor((void *)&current_thread_info()->flags, 0, 0);
index c2871d3..a3c430b 100644 (file)
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
        void *mwait_ptr;
        struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
 
-       if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)))
+       if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
                return;
-       if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+       if (!this_cpu_has(X86_FEATURE_CLFLSH))
                return;
        if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
                return;