7 #include <inc_libc64_mini.h>
9 void aligned_fetch_fbmem_to_scratch_neon(int numbytes, void *scratch, const void *fbmem);
10 void aligned_fetch_fbmem_to_scratch_vfp (int numbytes, void *scratch, const void *fbmem);
12 #define TEST_SIZE (1024 * 1024)
15 static char dummy [TEST_SIZE] __attribute__((aligned(64)));
16 static char dummy2[TEST_SIZE] __attribute__((aligned(64)));
18 static inline void pcnt_init(void)
21 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(v));
22 v |= 5; // master enable, ccnt reset
23 v &= ~8; // ccnt divider 0
24 asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(v));
25 // enable cycle counter
26 asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(1<<31));
29 static inline unsigned int pcnt_get(void)
32 __asm__ volatile("mrc p15, 0, %0, c9, c13, 0"
37 #define make_rd_test(name, type) \
38 static int name(void *mem_, size_t size) \
40 const type *mem = mem_; \
43 size /= sizeof(*mem); \
50 make_rd_test(read_c_8, int8_t)
51 make_rd_test(read_c_16, int16_t)
52 make_rd_test(read_c_32, int32_t)
53 make_rd_test(read_c_64, int64_t)
55 static int read_ldrd(void *mem, size_t size)
59 "0: ldrd r2, r3, [%0], #8\n"
62 : "=&r"(mem), "=&r"(size)
64 : "r2", "r3", "cc", "memory");
68 static int read_ldrd_pld(void *mem, size_t size)
72 "0: ldrd r2, r3, [%0], #8\n"
76 : "=&r"(mem), "=&r"(size)
78 : "r2", "r3", "cc", "memory");
83 static int read_c_32_skip(void *mem_, size_t size)
85 const int *mem = mem_;
86 int skip = g_skip / 4;
91 for (i = 0; i < size; i += skip)
97 static int read_fbt_neon(void *mem, size_t size)
101 for (i = 0; i < size; i += 256)
102 aligned_fetch_fbmem_to_scratch_neon(256, dummy2, mem + i);
107 static int read_fbt_vfp(void *mem, size_t size)
111 for (i = 0; i < size; i += 256)
112 aligned_fetch_fbmem_to_scratch_vfp(256, dummy2, mem + i);
117 static int write8(void *mem, size_t size)
120 "0: strb %1, [%0], #1\n"
123 : "=&r"(mem), "=&r"(size)
124 : "0"(mem), "1"(size)
129 static int write16(void *mem, size_t size)
132 "0: strh %1, [%0], #2\n"
135 : "=&r"(mem), "=&r"(size)
136 : "0"(mem), "1"(size)
141 static int write32(void *mem, size_t size)
144 "0: str %1, [%0], #4\n"
147 : "=&r"(mem), "=&r"(size)
148 : "0"(mem), "1"(size)
153 static int write64(void *mem, size_t size)
156 "0: strd r12, r13, [%0], #8\n"
159 : "=&r"(mem), "=&r"(size)
160 : "0"(mem), "1"(size)
165 static unsigned int run(const char *name,
166 void (*inv)(void *mem, size_t size),
167 int (*test)(void *mem, size_t size),
168 void *mem, unsigned int baseline)
170 unsigned int i, cycles, smallest = ~0;
172 for (i = 0; i < TRIES; i++) {
176 test(mem, TEST_SIZE);
177 cycles = pcnt_get() - cycles;
179 if (cycles < smallest)
183 printf("%-10s %6uk", name, smallest / 1000);
185 printf(" %5lld%%", smallest * 100ull / baseline);
191 static void run_all(const char *name,
192 void (*inv)(void *mem, size_t size),
195 static unsigned int b[16];
199 printf("%s\n", name);
200 r[t] = run(" 8", inv, read_c_8, mem, b[t]); t++;
201 r[t] = run(" 16", inv, read_c_16, mem, b[t]); t++;
202 r[t] = run(" 32", inv, read_c_32, mem, b[t]); t++;
203 r[t] = run(" 64", inv, read_c_64, mem, b[t]); t++;
205 r[t] = run(" 32_s32", inv, read_c_32_skip, mem, b[t]); t++;
207 r[t] = run(" 32_s64", inv, read_c_32_skip, mem, b[t]); t++;
208 r[t] = run(" ldrd", inv, read_ldrd, mem, b[t]); t++;
209 r[t] = run(" ldrd pld", inv, read_ldrd_pld, mem, b[t]); t++;
210 r[t] = run(" fbt neon", inv, read_fbt_neon, mem, b[t]); t++;
211 r[t] = run(" fbt vfp", inv, read_fbt_vfp, mem, b[t]); t++;
212 r[t] = run(" w08", NULL, write8, mem, b[t]); t++;
213 r[t] = run(" w16", NULL, write16, mem, b[t]); t++;
214 r[t] = run(" w32", NULL, write32, mem, b[t]); t++;
215 r[t] = run(" w64", NULL, write64, mem, b[t]); t++;
218 memcpy(b, r, sizeof(b));
221 static void shm_inv(void *mem, size_t size)
223 dsp_cache_inv_virt(mem, size);
226 static void run_shm(const char *name, dsp_cache_t ct, int use_inv)
228 dsp_mem_region_t region;
231 region = dsp_shm_alloc(ct, TEST_SIZE);
232 if (region.size < TEST_SIZE || region.virt_addr == 0) {
233 fprintf(stderr, "dsp_shm_alloc failed\n");
236 mem = (void *)region.virt_addr;
237 // printf("mapped %d %p\n", ct, mem);
239 run_all(name, use_inv ? shm_inv : NULL, mem);
241 dsp_shm_free(region);
249 memset(dummy, 1, sizeof(dummy));
250 memset(dummy2, 1, sizeof(dummy2));
251 printf("pid: %d dummy: %p\n", (int)getpid(), dummy);
255 run_all(".bss", NULL, dummy);
259 fprintf(stderr, "dsp_open %d\n", ret);
263 run_shm("shm wb", DSP_CACHE_RW, 0);
264 //run_shm("shm wt", DSP_CACHE_R, 0);
265 run_shm("shm nc", DSP_CACHE_W, 0);
266 //run_shm("shm wb inv", DSP_CACHE_RW, 1);
267 run_shm("shm wt inv", DSP_CACHE_R, 1);