--- /dev/null
+ .fpu neon
+ .text
+
+ .global memcpy_neon
+ .func memcpy_neon
+memcpy_neon:
+ push {r4-r11}
+ mov r3, r0
+1: pld [r1, #192]
+ pld [r1, #256]
+ vld1.64 {d0-d3}, [r1,:128]!
+ vld1.64 {d4-d7}, [r1,:128]!
+ vld1.64 {d16-d19}, [r1,:128]!
+ ldm r1!, {r4-r11}
+ subs r2, r2, #128
+ vst1.64 {d0-d3}, [r3,:128]!
+ vst1.64 {d4-d7}, [r3,:128]!
+ vst1.64 {d16-d19}, [r3,:128]!
+ stm r3!, {r4-r11}
+ bgt 1b
+ pop {r4-r11}
+ bx lr
+ .endfunc