arm64: atomics: prefetch the destination word for write prior to stxr
authorWill Deacon <will.deacon@arm.com>
Fri, 29 May 2015 12:31:10 +0000 (13:31 +0100)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:53 +0000 (15:28 +0100)
The cost of changing a cacheline from shared to exclusive state can be
significant, especially when this is triggered by an exclusive store,
since it may result in having to retry the transaction.

This patch makes use of prfm to prefetch cachelines for write prior to
ldxr/stxr loops when using the ll/sc atomic routines.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/cmpxchg.h
arch/arm64/include/asm/futex.h
arch/arm64/lib/bitops.S

index 5a9fb37..50d6abd 100644 (file)
@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))                               \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "\n"                              \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stxr    %w1, %w0, %2\n"                                         \
@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))              \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "_return\n"                       \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stlxr   %w1, %w0, %2\n"                                         \
@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
        int oldval;
 
        asm volatile("// atomic_cmpxchg\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %w1, %2\n"
 "      eor     %w0, %w1, %w3\n"
 "      cbnz    %w0, 2f\n"
@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))                        \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "\n"                            \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stxr    %w1, %0, %2\n"                                          \
@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))               \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "_return\n"                     \
+"      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stlxr   %w1, %0, %2\n"                                          \
@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
        unsigned long res;
 
        asm volatile("// atomic64_cmpxchg\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %1, %2\n"
 "      eor     %0, %1, %3\n"
 "      cbnz    %w0, 2f\n"
@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
        unsigned long tmp;
 
        asm volatile("// atomic64_dec_if_positive\n"
+"      prfm    pstl1strm, %2\n"
 "1:    ldxr    %0, %2\n"
 "      subs    %0, %0, #1\n"
 "      b.mi    2f\n"
@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,            \
        unsigned long tmp, oldval;                                      \
                                                                        \
        asm volatile(                                                   \
+       "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxr" #sz "\t%" #w "[oldval], %[v]\n"                   \
        "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
        "       cbnz    %" #w "[tmp], 2f\n"                             \
@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,           \
        unsigned long tmp, ret;                                         \
                                                                        \
        asm volatile("// __cmpxchg_double" #name "\n"                   \
+       "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxp    %0, %1, %2\n"                                   \
        "       eor     %0, %0, %3\n"                                   \
        "       eor     %1, %1, %4\n"                                   \
index f702126..7bfda09 100644 (file)
@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 1:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxrb   %w0, %2\n"
                "       stlxrb  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpalb  %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 2:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxrh   %w0, %2\n"
                "       stlxrh  %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpalh  %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 4:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxr    %w0, %2\n"
                "       stlxr   %w1, %w3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpal   %w3, %w0, %2\n"
                "       nop\n"
                "       nop")
@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
        case 8:
                asm volatile(ARM64_LSE_ATOMIC_INSN(
                /* LL/SC */
+               "       prfm    pstl1strm, %2\n"
                "1:     ldxr    %0, %2\n"
                "       stlxr   %w1, %3, %2\n"
                "       cbnz    %w1, 1b\n"
                "       dmb     ish",
                /* LSE atomics */
                "       nop\n"
+               "       nop\n"
                "       swpal   %3, %0, %2\n"
                "       nop\n"
                "       nop")
Simple merge
Simple merge