Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / arch / x86 / include / asm / percpu.h
index d475b43..3470c9d 100644 (file)
@@ -388,12 +388,9 @@ do {                                                                       \
 #define __this_cpu_xor_1(pcp, val)     percpu_to_op("xor", (pcp), val)
 #define __this_cpu_xor_2(pcp, val)     percpu_to_op("xor", (pcp), val)
 #define __this_cpu_xor_4(pcp, val)     percpu_to_op("xor", (pcp), val)
-/*
- * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
- * faster than an xchg with forced lock semantics.
- */
-#define __this_cpu_xchg_8(pcp, nval)   percpu_xchg_op(pcp, nval)
-#define __this_cpu_cmpxchg_8(pcp, oval, nval)  percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_xchg_1(pcp, val)    percpu_xchg_op(pcp, val)
+#define __this_cpu_xchg_2(pcp, val)    percpu_xchg_op(pcp, val)
+#define __this_cpu_xchg_4(pcp, val)    percpu_xchg_op(pcp, val)
 
 #define this_cpu_read_1(pcp)           percpu_from_op("mov", (pcp), "m"(pcp))
 #define this_cpu_read_2(pcp)           percpu_from_op("mov", (pcp), "m"(pcp))
@@ -485,6 +482,8 @@ do {                                                                        \
 #define __this_cpu_or_8(pcp, val)      percpu_to_op("or", (pcp), val)
 #define __this_cpu_xor_8(pcp, val)     percpu_to_op("xor", (pcp), val)
 #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
+#define __this_cpu_xchg_8(pcp, nval)   percpu_xchg_op(pcp, nval)
+#define __this_cpu_cmpxchg_8(pcp, oval, nval)  percpu_cmpxchg_op(pcp, oval, nval)
 
 #define this_cpu_read_8(pcp)           percpu_from_op("mov", (pcp), "m"(pcp))
 #define this_cpu_write_8(pcp, val)     percpu_to_op("mov", (pcp), val)
@@ -509,6 +508,11 @@ do {                                                                       \
  * it in software.  The address used in the cmpxchg16 instruction must be
  * aligned to a 16 byte boundary.
  */
+#ifdef CONFIG_SMP
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
+#else
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
+#endif
 #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)                 \
 ({                                                                     \
        char __ret;                                                     \
@@ -517,7 +521,7 @@ do {                                                                        \
        typeof(o2) __o2 = o2;                                           \
        typeof(o2) __n2 = n2;                                           \
        typeof(o2) __dummy;                                             \
-       alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,      \
+       alternative_io(CMPXCHG16B_EMU_CALL,                             \
                       "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",  \
                       X86_FEATURE_CX16,                                \
                       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),         \
@@ -542,6 +546,33 @@ do {                                                                       \
        old__;                                                          \
 })
 
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+                        const unsigned long __percpu *addr)
+{
+       unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+       return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+}
+
+static inline int x86_this_cpu_variable_test_bit(int nr,
+                        const unsigned long __percpu *addr)
+{
+       int oldbit;
+
+       asm volatile("bt "__percpu_arg(2)",%1\n\t"
+                       "sbb %0,%0"
+                       : "=r" (oldbit)
+                       : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+       return oldbit;
+}
+
+#define x86_this_cpu_test_bit(nr, addr)                        \
+       (__builtin_constant_p((nr))                     \
+        ? x86_this_cpu_constant_test_bit((nr), (addr)) \
+        : x86_this_cpu_variable_test_bit((nr), (addr)))
+
+
 #include <asm-generic/percpu.h>
 
 /* We can use this directly for local CPU (faster). */