percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support
authorChristoph Lameter <cl@linux.com>
Mon, 28 Feb 2011 10:02:24 +0000 (11:02 +0100)
committerTejun Heo <tj@kernel.org>
Mon, 28 Feb 2011 10:20:49 +0000 (11:20 +0100)
Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b
instructions.

-tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and
     other cosmetic changes.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
arch/x86/include/asm/percpu.h
arch/x86/lib/Makefile
arch/x86/lib/cmpxchg16b_emu.S [new file with mode: 0644]

index 3788f46..260ac7a 100644 (file)
@@ -451,6 +451,26 @@ do {                                                                       \
 #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
 #endif /* !CONFIG_M386 */
 
+#ifdef CONFIG_X86_CMPXCHG64
+#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)                  \
+({                                                                     \
+       char __ret;                                                     \
+       typeof(o1) __o1 = o1;                                           \
+       typeof(o1) __n1 = n1;                                           \
+       typeof(o2) __o2 = o2;                                           \
+       typeof(o2) __n2 = n2;                                           \
+       typeof(o2) __dummy = n2;                                        \
+       asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t"       \
+                   : "=a"(__ret), "=m" (pcp1), "=d"(__dummy)           \
+                   :  "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2));     \
+       __ret;                                                          \
+})
+
+#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)                percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)          percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)       percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#endif /* CONFIG_X86_CMPXCHG64 */
+
 /*
  * Per cpu atomic 64 bit operations are only available under 64 bit.
  * 32 bit must fall back to generic operations.
@@ -480,6 +500,34 @@ do {                                                                       \
 #define irqsafe_cpu_xor_8(pcp, val)    percpu_to_op("xor", (pcp), val)
 #define irqsafe_cpu_xchg_8(pcp, nval)  percpu_xchg_op(pcp, nval)
 #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+/*
+ * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
+ * is not supported on early AMD64 processors so we must be able to emulate
+ * it in software.  The address used in the cmpxchg16 instruction must be
+ * aligned to a 16 byte boundary.
+ */
+#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)                 \
+({                                                                     \
+       char __ret;                                                     \
+       typeof(o1) __o1 = o1;                                           \
+       typeof(o1) __n1 = n1;                                           \
+       typeof(o2) __o2 = o2;                                           \
+       typeof(o2) __n2 = n2;                                           \
+       typeof(o2) __dummy;                                             \
+       alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,      \
+                      "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t",        \
+                      X86_FEATURE_CX16,                                \
+                      ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),         \
+                      "S" (&pcp1), "b"(__n1), "c"(__n2),               \
+                      "a"(__o1), "d"(__o2));                           \
+       __ret;                                                          \
+})
+
+#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)                percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)          percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)       percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+
 #endif
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
index e10cf07..f2479f1 100644 (file)
@@ -42,4 +42,5 @@ else
         lib-y += memmove_64.o memset_64.o
         lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
        lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
+       lib-y += cmpxchg16b_emu.o
 endif
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
new file mode 100644 (file)
index 0000000..3e8b08a
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; version 2
+ *     of the License.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/frame.h>
+#include <asm/dwarf2.h>
+
+.text
+
+/*
+ * Inputs:
+ * %rsi : memory location to compare
+ * %rax : low 64 bits of old value
+ * %rdx : high 64 bits of old value
+ * %rbx : low 64 bits of new value
+ * %rcx : high 64 bits of new value
+ * %al  : Operation successful
+ */
+ENTRY(this_cpu_cmpxchg16b_emu)
+CFI_STARTPROC
+
+#
+# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
+# via the ZF.  Caller will access %al to get result.
+#
+# Note that this is only useful for a cpuops operation.  Meaning that we
+# do *not* have a fully atomic operation but just an operation that is
+# *atomic* on a single cpu (as provided by the this_cpu_xx class of
+# macros).
+#
+this_cpu_cmpxchg16b_emu:
+       pushf
+       cli
+
+       cmpq %gs:(%rsi), %rax
+       jne not_same
+       cmpq %gs:8(%rsi), %rdx
+       jne not_same
+
+       movq %rbx, %gs:(%rsi)
+       movq %rcx, %gs:8(%rsi)
+
+       popf
+       mov $1, %al
+       ret
+
+ not_same:
+       popf
+       xor %al,%al
+       ret
+
+CFI_ENDPROC
+
+ENDPROC(this_cpu_cmpxchg16b_emu)