+
+/*
+ * Pretty complex macro to generate cmpxchg16 instruction. The instruction
+ * is not supported on early AMD64 processors so we must be able to emulate
+ * it in software. The address used in the cmpxchg16 instruction must be
+ * aligned to a 16 byte boundary.
+ */
+#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ typeof(o1) __o1 = o1; \
+ typeof(o1) __n1 = n1; \
+ typeof(o2) __o2 = o2; \
+ typeof(o2) __n2 = n2; \
+ typeof(o2) __dummy; \
+ alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
+ "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \
+ X86_FEATURE_CX16, \
+ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
+ "S" (&pcp1), "b"(__n1), "c"(__n2), \
+ "a"(__o1), "d"(__o2)); \
+ __ret; \
+})
+
+#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+