* issues and should be optimal for the uncontended case. Note the tail must be
* in the high part, because a wide xadd increment of the low part would carry
* up and contaminate the high part.
- *
- * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
- * save some instructions and make the code more elegant. There really isn't
- * much between them in performance though, especially as locks are out of line.
*/
static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
barrier(); /* make sure nothing creeps before the lock is taken */
}
-#if (NR_CPUS < 256)
static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
{
- unsigned int tmp, new;
-
- asm volatile("movzwl %2, %0\n\t"
- "cmpb %h0,%b0\n\t"
- "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
- "jne 1f\n\t"
- LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
- "1:"
- "sete %b1\n\t"
- "movzbl %b1,%0\n\t"
- : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
- :
- : "memory", "cc");
+ arch_spinlock_t old, new;
- return tmp;
+ old.tickets = ACCESS_ONCE(lock->tickets);
+ if (old.tickets.head != old.tickets.tail)
+ return 0;
+
+ new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
+
+ /* cmpxchg is a full barrier, so nothing can move before it */
+ return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
}
+#if (NR_CPUS < 256)
static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
- : "+m" (lock->slock)
+ : "+m" (lock->head_tail)
:
: "memory", "cc");
}
#else
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
- unsigned tmp;
- unsigned new;
-
- asm volatile("movl %2,%0\n\t"
- "movl %0,%1\n\t"
- "roll $16, %0\n\t"
- "cmpl %0,%1\n\t"
- "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
- "jne 1f\n\t"
- LOCK_PREFIX "cmpxchgl %1,%2\n\t"
- "1:"
- "sete %b1\n\t"
- "movzbl %b1,%0\n\t"
- : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
- :
- : "memory", "cc");
-
- return tmp;
-}
-
static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
- : "+m" (lock->slock)
+ : "+m" (lock->head_tail)
:
: "memory", "cc");
}