#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
-/*
- * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
- * faster than an xchg with forced lock semantics.
- */
-#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
+#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
+#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
+#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
* it in software. The address used in the cmpxchg16 instruction must be
* aligned to a 16 byte boundary.
*/
+#ifdef CONFIG_SMP
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
+#else
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
+#endif
#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
({ \
char __ret; \
typeof(o2) __o2 = o2; \
typeof(o2) __n2 = n2; \
typeof(o2) __dummy; \
- alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
+ alternative_io(CMPXCHG16B_EMU_CALL, \
"cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \
X86_FEATURE_CX16, \
ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
old__; \
})
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+ const unsigned long __percpu *addr)
+{
+ unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+ return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+}
+
+static inline int x86_this_cpu_variable_test_bit(int nr,
+ const unsigned long __percpu *addr)
+{
+ int oldbit;
+
+ asm volatile("bt "__percpu_arg(2)",%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+ return oldbit;
+}
+
+#define x86_this_cpu_test_bit(nr, addr) \
+ (__builtin_constant_p((nr)) \
+ ? x86_this_cpu_constant_test_bit((nr), (addr)) \
+ : x86_this_cpu_variable_test_bit((nr), (addr)))
+
+
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */