X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=arch%2Fmicroblaze%2Flib%2Fuaccess_old.S;h=f037266cdaf3e358676c0428e83393c16dd410cb;hb=cf0223503e6198292cdcc864e01eeb5fe7490752;hp=5810cec54a7a36e356d5c48b4ad29923ee0829e4;hpb=85252b6ae5f7e8bba570309d1945eba63c53ad3e;p=pandora-kernel.git diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S index 5810cec54a7a..f037266cdaf3 100644 --- a/arch/microblaze/lib/uaccess_old.S +++ b/arch/microblaze/lib/uaccess_old.S @@ -10,6 +10,7 @@ #include #include +#include /* * int __strncpy_user(char *to, char *from, int len); @@ -33,8 +34,8 @@ __strncpy_user: * r3 - temp count * r4 - temp val */ + beqid r7,3f addik r3,r7,0 /* temp_count = len */ - beqi r3,3f 1: lbu r4,r6,r0 sb r4,r5,r0 @@ -76,8 +77,8 @@ __strncpy_user: .type __strnlen_user, @function .align 4; __strnlen_user: + beqid r6,3f addik r3,r6,0 - beqi r3,3f 1: lbu r4,r5,r0 beqid r4,2f /* break on NUL */ @@ -102,6 +103,49 @@ __strnlen_user: .section __ex_table,"a" .word 1b,4b +/* Loop unrolling for __copy_tofrom_user */ +#define COPY(offset) \ +1: lwi r4 , r6, 0x0000 + offset; \ +2: lwi r19, r6, 0x0004 + offset; \ +3: lwi r20, r6, 0x0008 + offset; \ +4: lwi r21, r6, 0x000C + offset; \ +5: lwi r22, r6, 0x0010 + offset; \ +6: lwi r23, r6, 0x0014 + offset; \ +7: lwi r24, r6, 0x0018 + offset; \ +8: lwi r25, r6, 0x001C + offset; \ +9: swi r4 , r5, 0x0000 + offset; \ +10: swi r19, r5, 0x0004 + offset; \ +11: swi r20, r5, 0x0008 + offset; \ +12: swi r21, r5, 0x000C + offset; \ +13: swi r22, r5, 0x0010 + offset; \ +14: swi r23, r5, 0x0014 + offset; \ +15: swi r24, r5, 0x0018 + offset; \ +16: swi r25, r5, 0x001C + offset; \ + .section __ex_table,"a"; \ + .word 1b, 0f; \ + .word 2b, 0f; \ + .word 3b, 0f; \ + .word 4b, 0f; \ + .word 5b, 0f; \ + .word 6b, 0f; \ + .word 7b, 0f; \ + .word 8b, 0f; \ + .word 9b, 0f; \ + .word 10b, 0f; \ + .word 11b, 0f; \ + .word 12b, 0f; \ + .word 13b, 0f; \ + .word 14b, 0f; \ + .word 15b, 0f; \ + .word 16b, 0f; \ + .text + +#define COPY_80(offset) \ + COPY(0x00 + offset);\ + COPY(0x20 + offset);\ + COPY(0x40 + offset);\ + COPY(0x60 + offset); + /* * int __copy_tofrom_user(char *to, char *from, int len) * Return: @@ -119,34 +163,79 @@ __copy_tofrom_user: * r7, r3 - count * r4 - tempval */ - beqid r7, 3f /* zero size is not likely */ - andi r3, r7, 0x3 /* filter add count */ - bneid r3, 4f /* if is odd value then byte copying */ + beqid r7, 0f /* zero size is not likely */ or r3, r5, r6 /* find if is any to/from unaligned */ - andi r3, r3, 0x3 /* mask unaligned */ - bneid r3, 1f /* it is unaligned -> then jump */ + or r3, r3, r7 /* find if count is unaligned */ + andi r3, r3, 0x3 /* mask last 3 bits */ + bneid r3, bu1 /* if r3 is not zero then byte copying */ + or r3, r0, r0 + + rsubi r3, r7, PAGE_SIZE /* detect PAGE_SIZE */ + beqid r3, page; or r3, r0, r0 -/* at least one 4 byte copy */ -5: lw r4, r6, r3 -6: sw r4, r5, r3 +w1: lw r4, r6, r3 /* at least one 4 byte copy */ +w2: sw r4, r5, r3 addik r7, r7, -4 - bneid r7, 5b + bneid r7, w1 addik r3, r3, 4 addik r3, r7, 0 rtsd r15, 8 nop -4: or r3, r0, r0 -1: lbu r4,r6,r3 -2: sb r4,r5,r3 + + .section __ex_table,"a" + .word w1, 0f; + .word w2, 0f; + .text + +.align 4 /* Alignment is important to keep icache happy */ +page: /* Create room on stack and save registers for storign values */ + addik r1, r1, -32 + swi r19, r1, 4 + swi r20, r1, 8 + swi r21, r1, 12 + swi r22, r1, 16 + swi r23, r1, 20 + swi r24, r1, 24 + swi r25, r1, 28 +loop: /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */ + /* Loop unrolling to get performance boost */ + COPY_80(0x000); + COPY_80(0x080); + COPY_80(0x100); + COPY_80(0x180); + /* copy loop */ + addik r6, r6, 0x200 + addik r7, r7, -0x200 + bneid r7, loop + addik r5, r5, 0x200 + /* Restore register content */ + lwi r19, r1, 4 + lwi r20, r1, 8 + lwi r21, r1, 12 + lwi r22, r1, 16 + lwi r23, r1, 20 + lwi r24, r1, 24 + lwi r25, r1, 28 + addik r1, r1, 32 + /* return back */ + addik r3, r7, 0 + rtsd r15, 8 + nop + +.align 4 /* Alignment is important to keep icache happy */ +bu1: lbu r4,r6,r3 +bu2: sb r4,r5,r3 addik r7,r7,-1 - bneid r7,1b + bneid r7,bu1 addik r3,r3,1 /* delay slot */ -3: +0: addik r3,r7,0 rtsd r15,8 nop .size __copy_tofrom_user, . - __copy_tofrom_user .section __ex_table,"a" - .word 1b,3b,2b,3b,5b,3b,6b,3b + .word bu1, 0b; + .word bu2, 0b; + .text