arm64: update 32-bit kuser helpers to ARMv8
authorRobin Murphy <Robin.Murphy@arm.com>
Mon, 7 Oct 2013 17:30:34 +0000 (18:30 +0100)
committerCatalin Marinas <catalin.marinas@arm.com>
Mon, 28 Oct 2013 10:40:28 +0000 (10:40 +0000)
This patch updates the barrier semantics in the kuser helper functions
to take advantage of the ARMv8 additions to AArch32, which are
guaranteed to be available in situations where these functions will be
called.

Note that this slightly changes the cmpxchg functions in that they are
no longer necessarily full barriers if they return 1. However, the
documentation only states they include their own barriers "as needed",
not that they are obligated to act as a full barrier for the caller.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
CC: Matthew Leach <matthew.leach@arm.com>
CC: Dave Martin <dave.martin@arm.com>
CC: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
arch/arm64/kernel/kuser32.S

index 1e4905d..63c48ff 100644 (file)
@@ -38,33 +38,30 @@ __kuser_cmpxchg64:                  // 0xffff0f60
        .inst   0xe92d00f0              //      push            {r4, r5, r6, r7}
        .inst   0xe1c040d0              //      ldrd            r4, r5, [r0]
        .inst   0xe1c160d0              //      ldrd            r6, r7, [r1]
-       .inst   0xf57ff05f              //      dmb             sy
-       .inst   0xe1b20f9f              // 1:   ldrexd          r0, r1, [r2]
+       .inst   0xe1b20e9f              // 1:   ldaexd          r0, r1, [r2]
        .inst   0xe0303004              //      eors            r3, r0, r4
        .inst   0x00313005              //      eoreqs          r3, r1, r5
-       .inst   0x01a23f96              //      strexdeq        r3, r6, [r2]
+       .inst   0x01a23e96              //      stlexdeq        r3, r6, [r2]
        .inst   0x03330001              //      teqeq           r3, #1
        .inst   0x0afffff9              //      beq             1b
-       .inst   0xf57ff05f              //      dmb             sy
        .inst   0xe2730000              //      rsbs            r0, r3, #0
        .inst   0xe8bd00f0              //      pop             {r4, r5, r6, r7}
        .inst   0xe12fff1e              //      bx              lr
 
        .align  5
 __kuser_memory_barrier:                        // 0xffff0fa0
-       .inst   0xf57ff05f              //      dmb             sy
+       .inst   0xf57ff05b              //      dmb             ish
        .inst   0xe12fff1e              //      bx              lr
 
        .align  5
 __kuser_cmpxchg:                       // 0xffff0fc0
-       .inst   0xf57ff05f              //      dmb             sy
-       .inst   0xe1923f9f              // 1:   ldrex           r3, [r2]
+       .inst   0xe1923e9f              // 1:   ldaex           r3, [r2]
        .inst   0xe0533000              //      subs            r3, r3, r0
-       .inst   0x01823f91              //      strexeq r3, r1, [r2]
+       .inst   0x01823e91              //      stlexeq         r3, r1, [r2]
        .inst   0x03330001              //      teqeq           r3, #1
        .inst   0x0afffffa              //      beq             1b
        .inst   0xe2730000              //      rsbs            r0, r3, #0
-       .inst   0xeaffffef              //      b               <__kuser_memory_barrier>
+       .inst   0xe12fff1e              //      bx              lr
 
        .align  5
 __kuser_get_tls:                       // 0xffff0fe0