8 extern void *memcpy_neon(void *dst, const void *src, size_t size);
10 #define BUFSIZE (8*1024*1024)
14 tv_diff(struct timeval *tv1, struct timeval *tv2)
16 return (tv2->tv_sec - tv1->tv_sec) * 1000000 +
17 (tv2->tv_usec - tv1->tv_usec);
20 static void do_test(const char *name, void *p1, void *p2, size_t size,
21 void *(*cpy)(void *, const void *, size_t))
23 struct timeval t1, t2;
26 gettimeofday(&t1, NULL);
27 for (i = 0; i < ITER; i++)
29 gettimeofday(&t2, NULL);
31 printf("%-8s %llu B/s\n", name,
32 (uint64_t)size * ITER * 1000000 / tv_diff(&t1, &t2));
35 static void *int32_cpy(void *dst, const void *src, size_t size)
37 const uint32_t *s = src;
43 for (i = 0; i < size; i++)
49 static void *vec_cpy(void *dst, const void *src, size_t size)
51 typedef int v4si __attribute__ ((vector_size(16)));
63 int main(int argc, char **argv)
67 buf1 = memalign(64, BUFSIZE);
68 buf2 = memalign(64, BUFSIZE);
70 memset(buf2, 0, BUFSIZE);
72 do_test("memcpy", buf1, buf2, BUFSIZE, memcpy);
73 do_test("INT32", buf1, buf2, BUFSIZE, int32_cpy);
74 do_test("C SIMD", buf1, buf2, BUFSIZE, vec_cpy);
75 do_test("ASM SIMD", buf1, buf2, BUFSIZE, memcpy_neon);