Merge branch 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Oct 2011 12:43:29 +0000 (05:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Oct 2011 12:43:29 +0000 (05:43 -0700)
* 'x86-spinlocks-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, ticketlock: remove obsolete comment
  x86, cmpxchg: Use __compiletime_error() to make usage messages a bit nicer
  x86, ticketlock: Make __ticket_spin_trylock common
  x86, ticketlock: Convert __ticket_spin_lock to use xadd()
  x86, ticketlock: Convert spin loop to C
  x86, ticketlock: Clean up types and accessors
  x86: Use xadd helper more widely
  x86: Add xadd helper macro
  x86, cmpxchg: Unify cmpxchg into cmpxchg.h
  x86, cmpxchg: Move 64-bit set64_bit() to match 32-bit
  x86, cmpxchg: Move 32-bit __cmpxchg_wrong_size to match 64 bit.
  x86, cmpxchg: <linux/alternative.h> has LOCK_PREFIX

arch/x86/include/asm/atomic.h
arch/x86/include/asm/atomic64_64.h
arch/x86/include/asm/cmpxchg.h
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/cmpxchg_64.h
arch/x86/include/asm/rwsem.h
arch/x86/include/asm/spinlock.h
arch/x86/include/asm/spinlock_types.h
arch/x86/include/asm/uv/uv_bau.h

index 10572e3..58cb6d4 100644 (file)
@@ -172,18 +172,14 @@ static inline int atomic_add_negative(int i, atomic_t *v)
  */
 static inline int atomic_add_return(int i, atomic_t *v)
 {
-       int __i;
 #ifdef CONFIG_M386
+       int __i;
        unsigned long flags;
        if (unlikely(boot_cpu_data.x86 <= 3))
                goto no_xadd;
 #endif
        /* Modern 486+ processor */
-       __i = i;
-       asm volatile(LOCK_PREFIX "xaddl %0, %1"
-                    : "+r" (i), "+m" (v->counter)
-                    : : "memory");
-       return i + __i;
+       return i + xadd(&v->counter, i);
 
 #ifdef CONFIG_M386
 no_xadd: /* Legacy 386 processor */
index 017594d..0e1cbfc 100644 (file)
@@ -170,11 +170,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v)
  */
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
-       long __i = i;
-       asm volatile(LOCK_PREFIX "xaddq %0, %1;"
-                    : "+r" (i), "+m" (v->counter)
-                    : : "memory");
-       return i + __i;
+       return i + xadd(&v->counter, i);
 }
 
 static inline long atomic64_sub_return(long i, atomic64_t *v)
index a460fa0..5d3acdf 100644 (file)
@@ -1,5 +1,210 @@
+#ifndef ASM_X86_CMPXCHG_H
+#define ASM_X86_CMPXCHG_H
+
+#include <linux/compiler.h>
+#include <asm/alternative.h> /* Provides LOCK_PREFIX */
+
+/*
+ * Non-existant functions to indicate usage errors at link time
+ * (or compile-time if the compiler implements __compiletime_error().
+ */
+extern void __xchg_wrong_size(void)
+       __compiletime_error("Bad argument size for xchg");
+extern void __cmpxchg_wrong_size(void)
+       __compiletime_error("Bad argument size for cmpxchg");
+extern void __xadd_wrong_size(void)
+       __compiletime_error("Bad argument size for xadd");
+
+/*
+ * Constants for operation sizes. On 32-bit, the 64-bit size it set to
+ * -1 because sizeof will never return -1, thereby making those switch
+ * case statements guaranteeed dead code which the compiler will
+ * eliminate, and allowing the "missing symbol in the default case" to
+ * indicate a usage error.
+ */
+#define __X86_CASE_B   1
+#define __X86_CASE_W   2
+#define __X86_CASE_L   4
+#ifdef CONFIG_64BIT
+#define __X86_CASE_Q   8
+#else
+#define        __X86_CASE_Q    -1              /* sizeof will never return -1 */
+#endif
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
+ * Since this is generally used to protect other memory information, we
+ * use "asm volatile" and "memory" clobbers to prevent gcc from moving
+ * information around.
+ */
+#define __xchg(x, ptr, size)                                           \
+({                                                                     \
+       __typeof(*(ptr)) __x = (x);                                     \
+       switch (size) {                                                 \
+       case __X86_CASE_B:                                              \
+       {                                                               \
+               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
+               asm volatile("xchgb %0,%1"                              \
+                            : "=q" (__x), "+m" (*__ptr)                \
+                            : "0" (__x)                                \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       case __X86_CASE_W:                                              \
+       {                                                               \
+               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
+               asm volatile("xchgw %0,%1"                              \
+                            : "=r" (__x), "+m" (*__ptr)                \
+                            : "0" (__x)                                \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       case __X86_CASE_L:                                              \
+       {                                                               \
+               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
+               asm volatile("xchgl %0,%1"                              \
+                            : "=r" (__x), "+m" (*__ptr)                \
+                            : "0" (__x)                                \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       case __X86_CASE_Q:                                              \
+       {                                                               \
+               volatile u64 *__ptr = (volatile u64 *)(ptr);            \
+               asm volatile("xchgq %0,%1"                              \
+                            : "=r" (__x), "+m" (*__ptr)                \
+                            : "0" (__x)                                \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       default:                                                        \
+               __xchg_wrong_size();                                    \
+       }                                                               \
+       __x;                                                            \
+})
+
+#define xchg(ptr, v)                                                   \
+       __xchg((v), (ptr), sizeof(*ptr))
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+#define __raw_cmpxchg(ptr, old, new, size, lock)                       \
+({                                                                     \
+       __typeof__(*(ptr)) __ret;                                       \
+       __typeof__(*(ptr)) __old = (old);                               \
+       __typeof__(*(ptr)) __new = (new);                               \
+       switch (size) {                                                 \
+       case __X86_CASE_B:                                              \
+       {                                                               \
+               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
+               asm volatile(lock "cmpxchgb %2,%1"                      \
+                            : "=a" (__ret), "+m" (*__ptr)              \
+                            : "q" (__new), "0" (__old)                 \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       case __X86_CASE_W:                                              \
+       {                                                               \
+               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
+               asm volatile(lock "cmpxchgw %2,%1"                      \
+                            : "=a" (__ret), "+m" (*__ptr)              \
+                            : "r" (__new), "0" (__old)                 \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       case __X86_CASE_L:                                              \
+       {                                                               \
+               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
+               asm volatile(lock "cmpxchgl %2,%1"                      \
+                            : "=a" (__ret), "+m" (*__ptr)              \
+                            : "r" (__new), "0" (__old)                 \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       case __X86_CASE_Q:                                              \
+       {                                                               \
+               volatile u64 *__ptr = (volatile u64 *)(ptr);            \
+               asm volatile(lock "cmpxchgq %2,%1"                      \
+                            : "=a" (__ret), "+m" (*__ptr)              \
+                            : "r" (__new), "0" (__old)                 \
+                            : "memory");                               \
+               break;                                                  \
+       }                                                               \
+       default:                                                        \
+               __cmpxchg_wrong_size();                                 \
+       }                                                               \
+       __ret;                                                          \
+})
+
+#define __cmpxchg(ptr, old, new, size)                                 \
+       __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
+
+#define __sync_cmpxchg(ptr, old, new, size)                            \
+       __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+
+#define __cmpxchg_local(ptr, old, new, size)                           \
+       __raw_cmpxchg((ptr), (old), (new), (size), "")
+
 #ifdef CONFIG_X86_32
 # include "cmpxchg_32.h"
 #else
 # include "cmpxchg_64.h"
 #endif
+
+#ifdef __HAVE_ARCH_CMPXCHG
+#define cmpxchg(ptr, old, new)                                         \
+       __cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define sync_cmpxchg(ptr, old, new)                                    \
+       __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define cmpxchg_local(ptr, old, new)                                   \
+       __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
+#endif
+
+#define __xadd(ptr, inc, lock)                                         \
+       ({                                                              \
+               __typeof__ (*(ptr)) __ret = (inc);                      \
+               switch (sizeof(*(ptr))) {                               \
+               case __X86_CASE_B:                                      \
+                       asm volatile (lock "xaddb %b0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_W:                                      \
+                       asm volatile (lock "xaddw %w0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_L:                                      \
+                       asm volatile (lock "xaddl %0, %1\n"             \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_Q:                                      \
+                       asm volatile (lock "xaddq %q0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               default:                                                \
+                       __xadd_wrong_size();                            \
+               }                                                       \
+               __ret;                                                  \
+       })
+
+/*
+ * xadd() adds "inc" to "*ptr" and atomically returns the previous
+ * value of "*ptr".
+ *
+ * xadd() is locked when multiple CPUs are online
+ * xadd_sync() is always locked
+ * xadd_local() is never locked
+ */
+#define xadd(ptr, inc)         __xadd((ptr), (inc), LOCK_PREFIX)
+#define xadd_sync(ptr, inc)    __xadd((ptr), (inc), "lock; ")
+#define xadd_local(ptr, inc)   __xadd((ptr), (inc), "")
+
+#endif /* ASM_X86_CMPXCHG_H */
index 3deb725..fbebb07 100644 (file)
@@ -1,61 +1,11 @@
 #ifndef _ASM_X86_CMPXCHG_32_H
 #define _ASM_X86_CMPXCHG_32_H
 
-#include <linux/bitops.h> /* for LOCK_PREFIX */
-
 /*
  * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
  *       you need to test for the feature in boot_cpu_data.
  */
 
-extern void __xchg_wrong_size(void);
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
- * Since this is generally used to protect other memory information, we
- * use "asm volatile" and "memory" clobbers to prevent gcc from moving
- * information around.
- */
-#define __xchg(x, ptr, size)                                           \
-({                                                                     \
-       __typeof(*(ptr)) __x = (x);                                     \
-       switch (size) {                                                 \
-       case 1:                                                         \
-       {                                                               \
-               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
-               asm volatile("xchgb %0,%1"                              \
-                            : "=q" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 2:                                                         \
-       {                                                               \
-               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
-               asm volatile("xchgw %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 4:                                                         \
-       {                                                               \
-               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
-               asm volatile("xchgl %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       default:                                                        \
-               __xchg_wrong_size();                                    \
-       }                                                               \
-       __x;                                                            \
-})
-
-#define xchg(ptr, v)                                                   \
-       __xchg((v), (ptr), sizeof(*ptr))
-
 /*
  * CMPXCHG8B only writes to the target if we had the previous
  * value in registers, otherwise it acts as a read and gives us the
@@ -84,72 +34,8 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
                     : "memory");
 }
 
-extern void __cmpxchg_wrong_size(void);
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#define __raw_cmpxchg(ptr, old, new, size, lock)                       \
-({                                                                     \
-       __typeof__(*(ptr)) __ret;                                       \
-       __typeof__(*(ptr)) __old = (old);                               \
-       __typeof__(*(ptr)) __new = (new);                               \
-       switch (size) {                                                 \
-       case 1:                                                         \
-       {                                                               \
-               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
-               asm volatile(lock "cmpxchgb %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "q" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 2:                                                         \
-       {                                                               \
-               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
-               asm volatile(lock "cmpxchgw %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "r" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 4:                                                         \
-       {                                                               \
-               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
-               asm volatile(lock "cmpxchgl %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "r" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       default:                                                        \
-               __cmpxchg_wrong_size();                                 \
-       }                                                               \
-       __ret;                                                          \
-})
-
-#define __cmpxchg(ptr, old, new, size)                                 \
-       __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
-
-#define __sync_cmpxchg(ptr, old, new, size)                            \
-       __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
-
-#define __cmpxchg_local(ptr, old, new, size)                           \
-       __raw_cmpxchg((ptr), (old), (new), (size), "")
-
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
-
-#define cmpxchg(ptr, old, new)                                         \
-       __cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define sync_cmpxchg(ptr, old, new)                                    \
-       __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define cmpxchg_local(ptr, old, new)                                   \
-       __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 #endif
 
 #ifdef CONFIG_X86_CMPXCHG64
index 7cf5c0a..285da02 100644 (file)
 #ifndef _ASM_X86_CMPXCHG_64_H
 #define _ASM_X86_CMPXCHG_64_H
 
-#include <asm/alternative.h> /* Provides LOCK_PREFIX */
-
 static inline void set_64bit(volatile u64 *ptr, u64 val)
 {
        *ptr = val;
 }
 
-extern void __xchg_wrong_size(void);
-extern void __cmpxchg_wrong_size(void);
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
- * Since this is generally used to protect other memory information, we
- * use "asm volatile" and "memory" clobbers to prevent gcc from moving
- * information around.
- */
-#define __xchg(x, ptr, size)                                           \
-({                                                                     \
-       __typeof(*(ptr)) __x = (x);                                     \
-       switch (size) {                                                 \
-       case 1:                                                         \
-       {                                                               \
-               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
-               asm volatile("xchgb %0,%1"                              \
-                            : "=q" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 2:                                                         \
-       {                                                               \
-               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
-               asm volatile("xchgw %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 4:                                                         \
-       {                                                               \
-               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
-               asm volatile("xchgl %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 8:                                                         \
-       {                                                               \
-               volatile u64 *__ptr = (volatile u64 *)(ptr);            \
-               asm volatile("xchgq %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       default:                                                        \
-               __xchg_wrong_size();                                    \
-       }                                                               \
-       __x;                                                            \
-})
-
-#define xchg(ptr, v)                                                   \
-       __xchg((v), (ptr), sizeof(*ptr))
-
 #define __HAVE_ARCH_CMPXCHG 1
 
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#define __raw_cmpxchg(ptr, old, new, size, lock)                       \
-({                                                                     \
-       __typeof__(*(ptr)) __ret;                                       \
-       __typeof__(*(ptr)) __old = (old);                               \
-       __typeof__(*(ptr)) __new = (new);                               \
-       switch (size) {                                                 \
-       case 1:                                                         \
-       {                                                               \
-               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
-               asm volatile(lock "cmpxchgb %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "q" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 2:                                                         \
-       {                                                               \
-               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
-               asm volatile(lock "cmpxchgw %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "r" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 4:                                                         \
-       {                                                               \
-               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
-               asm volatile(lock "cmpxchgl %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "r" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case 8:                                                         \
-       {                                                               \
-               volatile u64 *__ptr = (volatile u64 *)(ptr);            \
-               asm volatile(lock "cmpxchgq %2,%1"                      \
-                            : "=a" (__ret), "+m" (*__ptr)              \
-                            : "r" (__new), "0" (__old)                 \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       default:                                                        \
-               __cmpxchg_wrong_size();                                 \
-       }                                                               \
-       __ret;                                                          \
-})
-
-#define __cmpxchg(ptr, old, new, size)                                 \
-       __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
-
-#define __sync_cmpxchg(ptr, old, new, size)                            \
-       __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
-
-#define __cmpxchg_local(ptr, old, new, size)                           \
-       __raw_cmpxchg((ptr), (old), (new), (size), "")
-
-#define cmpxchg(ptr, old, new)                                         \
-       __cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define sync_cmpxchg(ptr, old, new)                                    \
-       __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define cmpxchg_local(ptr, old, new)                                   \
-       __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
-
 #define cmpxchg64(ptr, o, n)                                           \
 ({                                                                     \
        BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
index df4cd32..2dbe4a7 100644 (file)
@@ -204,13 +204,7 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
  */
 static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 {
-       long tmp = delta;
-
-       asm volatile(LOCK_PREFIX "xadd %0,%1"
-                    : "+r" (tmp), "+m" (sem->count)
-                    : : "memory");
-
-       return tmp + delta;
+       return delta + xadd(&sem->count, delta);
 }
 
 #endif /* __KERNEL__ */
index ee67edf..972c260 100644 (file)
  * issues and should be optimal for the uncontended case. Note the tail must be
  * in the high part, because a wide xadd increment of the low part would carry
  * up and contaminate the high part.
- *
- * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
- * save some instructions and make the code more elegant. There really isn't
- * much between them in performance though, especially as locks are out of line.
  */
-#if (NR_CPUS < 256)
-#define TICKET_SHIFT 8
-
 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
-       short inc = 0x0100;
-
-       asm volatile (
-               LOCK_PREFIX "xaddw %w0, %1\n"
-               "1:\t"
-               "cmpb %h0, %b0\n\t"
-               "je 2f\n\t"
-               "rep ; nop\n\t"
-               "movb %1, %b0\n\t"
-               /* don't need lfence here, because loads are in-order */
-               "jmp 1b\n"
-               "2:"
-               : "+Q" (inc), "+m" (lock->slock)
-               :
-               : "memory", "cc");
+       register struct __raw_tickets inc = { .tail = 1 };
+
+       inc = xadd(&lock->tickets, inc);
+
+       for (;;) {
+               if (inc.head == inc.tail)
+                       break;
+               cpu_relax();
+               inc.head = ACCESS_ONCE(lock->tickets.head);
+       }
+       barrier();              /* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
-       int tmp, new;
-
-       asm volatile("movzwl %2, %0\n\t"
-                    "cmpb %h0,%b0\n\t"
-                    "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
-                    "jne 1f\n\t"
-                    LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
-                    "1:"
-                    "sete %b1\n\t"
-                    "movzbl %b1,%0\n\t"
-                    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-                    :
-                    : "memory", "cc");
+       arch_spinlock_t old, new;
+
+       old.tickets = ACCESS_ONCE(lock->tickets);
+       if (old.tickets.head != old.tickets.tail)
+               return 0;
+
+       new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
 
-       return tmp;
+       /* cmpxchg is a full barrier, so nothing can move before it */
+       return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
+#if (NR_CPUS < 256)
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
        asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
-                    : "+m" (lock->slock)
+                    : "+m" (lock->head_tail)
                     :
                     : "memory", "cc");
 }
 #else
-#define TICKET_SHIFT 16
-
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
-{
-       int inc = 0x00010000;
-       int tmp;
-
-       asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
-                    "movzwl %w0, %2\n\t"
-                    "shrl $16, %0\n\t"
-                    "1:\t"
-                    "cmpl %0, %2\n\t"
-                    "je 2f\n\t"
-                    "rep ; nop\n\t"
-                    "movzwl %1, %2\n\t"
-                    /* don't need lfence here, because loads are in-order */
-                    "jmp 1b\n"
-                    "2:"
-                    : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
-                    :
-                    : "memory", "cc");
-}
-
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
-       int tmp;
-       int new;
-
-       asm volatile("movl %2,%0\n\t"
-                    "movl %0,%1\n\t"
-                    "roll $16, %0\n\t"
-                    "cmpl %0,%1\n\t"
-                    "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
-                    "jne 1f\n\t"
-                    LOCK_PREFIX "cmpxchgl %1,%2\n\t"
-                    "1:"
-                    "sete %b1\n\t"
-                    "movzbl %b1,%0\n\t"
-                    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-                    :
-                    : "memory", "cc");
-
-       return tmp;
-}
-
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
        asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
-                    : "+m" (lock->slock)
+                    : "+m" (lock->head_tail)
                     :
                     : "memory", "cc");
 }
@@ -159,16 +99,16 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
-       int tmp = ACCESS_ONCE(lock->slock);
+       struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-       return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+       return !!(tmp.tail ^ tmp.head);
 }
 
 static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 {
-       int tmp = ACCESS_ONCE(lock->slock);
+       struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+       return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
 }
 
 #ifndef CONFIG_PARAVIRT_SPINLOCKS
index 7c7a486..8ebd5df 100644 (file)
@@ -5,11 +5,29 @@
 # error "please don't include this file directly"
 #endif
 
+#include <linux/types.h>
+
+#if (CONFIG_NR_CPUS < 256)
+typedef u8  __ticket_t;
+typedef u16 __ticketpair_t;
+#else
+typedef u16 __ticket_t;
+typedef u32 __ticketpair_t;
+#endif
+
+#define TICKET_SHIFT   (sizeof(__ticket_t) * 8)
+#define TICKET_MASK    ((__ticket_t)((1 << TICKET_SHIFT) - 1))
+
 typedef struct arch_spinlock {
-       unsigned int slock;
+       union {
+               __ticketpair_t head_tail;
+               struct __raw_tickets {
+                       __ticket_t head, tail;
+               } tickets;
+       };
 } arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED      { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED      { { 0 } }
 
 #include <asm/rwlock.h>
 
index 37d3698..c568ccc 100644 (file)
@@ -656,11 +656,7 @@ static inline int atomic_read_short(const struct atomic_short *v)
  */
 static inline int atom_asr(short i, struct atomic_short *v)
 {
-       short __i = i;
-       asm volatile(LOCK_PREFIX "xaddw %0, %1"
-                       : "+r" (i), "+m" (v->counter)
-                       : : "memory");
-       return i + __i;
+       return i + xadd(&v->counter, i);
 }
 
 /*