Merge branch 'sh-latest' of git://github.com/pmundt/linux-sh
[pandora-kernel.git] / arch / microblaze / lib / uaccess_old.S
index 5810cec..f037266 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/linkage.h>
+#include <asm/page.h>
 
 /*
  * int __strncpy_user(char *to, char *from, int len);
@@ -33,8 +34,8 @@ __strncpy_user:
         * r3 - temp count
         * r4 - temp val
         */
+       beqid   r7,3f
        addik   r3,r7,0         /* temp_count = len */
-       beqi    r3,3f
 1:
        lbu     r4,r6,r0
        sb      r4,r5,r0
@@ -76,8 +77,8 @@ __strncpy_user:
 .type  __strnlen_user, @function
 .align 4;
 __strnlen_user:
+       beqid   r6,3f
        addik   r3,r6,0
-       beqi    r3,3f
 1:
        lbu     r4,r5,r0
        beqid   r4,2f           /* break on NUL */
@@ -102,6 +103,49 @@ __strnlen_user:
        .section        __ex_table,"a"
        .word   1b,4b
 
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset)   \
+1:     lwi     r4 , r6, 0x0000 + offset;       \
+2:     lwi     r19, r6, 0x0004 + offset;       \
+3:     lwi     r20, r6, 0x0008 + offset;       \
+4:     lwi     r21, r6, 0x000C + offset;       \
+5:     lwi     r22, r6, 0x0010 + offset;       \
+6:     lwi     r23, r6, 0x0014 + offset;       \
+7:     lwi     r24, r6, 0x0018 + offset;       \
+8:     lwi     r25, r6, 0x001C + offset;       \
+9:     swi     r4 , r5, 0x0000 + offset;       \
+10:    swi     r19, r5, 0x0004 + offset;       \
+11:    swi     r20, r5, 0x0008 + offset;       \
+12:    swi     r21, r5, 0x000C + offset;       \
+13:    swi     r22, r5, 0x0010 + offset;       \
+14:    swi     r23, r5, 0x0014 + offset;       \
+15:    swi     r24, r5, 0x0018 + offset;       \
+16:    swi     r25, r5, 0x001C + offset;       \
+       .section __ex_table,"a";                \
+       .word   1b, 0f;                         \
+       .word   2b, 0f;                         \
+       .word   3b, 0f;                         \
+       .word   4b, 0f;                         \
+       .word   5b, 0f;                         \
+       .word   6b, 0f;                         \
+       .word   7b, 0f;                         \
+       .word   8b, 0f;                         \
+       .word   9b, 0f;                         \
+       .word   10b, 0f;                        \
+       .word   11b, 0f;                        \
+       .word   12b, 0f;                        \
+       .word   13b, 0f;                        \
+       .word   14b, 0f;                        \
+       .word   15b, 0f;                        \
+       .word   16b, 0f;                        \
+       .text
+
+#define COPY_80(offset)        \
+       COPY(0x00 + offset);\
+       COPY(0x20 + offset);\
+       COPY(0x40 + offset);\
+       COPY(0x60 + offset);
+
 /*
  * int __copy_tofrom_user(char *to, char *from, int len)
  * Return:
@@ -119,34 +163,79 @@ __copy_tofrom_user:
         * r7, r3 - count
         * r4 - tempval
         */
-       beqid   r7, 3f /* zero size is not likely */
-       andi    r3, r7, 0x3 /* filter add count */
-       bneid   r3, 4f /* if is odd value then byte copying */
+       beqid   r7, 0f /* zero size is not likely */
        or      r3, r5, r6 /* find if is any to/from unaligned */
-       andi    r3, r3, 0x3 /* mask unaligned */
-       bneid   r3, 1f /* it is unaligned -> then jump */
+       or      r3, r3, r7 /* find if count is unaligned */
+       andi    r3, r3, 0x3 /* mask last 3 bits */
+       bneid   r3, bu1 /* if r3 is not zero then byte copying */
+       or      r3, r0, r0
+
+       rsubi   r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+       beqid   r3, page;
        or      r3, r0, r0
 
-/* at least one 4 byte copy */
-5:     lw      r4, r6, r3
-6:     sw      r4, r5, r3
+w1:    lw      r4, r6, r3 /* at least one 4 byte copy */
+w2:    sw      r4, r5, r3
        addik   r7, r7, -4
-       bneid   r7, 5b
+       bneid   r7, w1
        addik   r3, r3, 4
        addik   r3, r7, 0
        rtsd    r15, 8
        nop
-4:     or      r3, r0, r0
-1:     lbu     r4,r6,r3
-2:     sb      r4,r5,r3
+
+       .section        __ex_table,"a"
+       .word   w1, 0f;
+       .word   w2, 0f;
+       .text
+
+.align 4 /* Alignment is important to keep icache happy */
+page:  /* Create room on stack and save registers for storign values */
+       addik   r1, r1, -32
+       swi     r19, r1, 4
+       swi     r20, r1, 8
+       swi     r21, r1, 12
+       swi     r22, r1, 16
+       swi     r23, r1, 20
+       swi     r24, r1, 24
+       swi     r25, r1, 28
+loop:  /* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+       /* Loop unrolling to get performance boost */
+       COPY_80(0x000);
+       COPY_80(0x080);
+       COPY_80(0x100);
+       COPY_80(0x180);
+       /* copy loop */
+       addik   r6, r6, 0x200
+       addik   r7, r7, -0x200
+       bneid   r7, loop
+       addik   r5, r5, 0x200
+       /* Restore register content */
+       lwi     r19, r1, 4
+       lwi     r20, r1, 8
+       lwi     r21, r1, 12
+       lwi     r22, r1, 16
+       lwi     r23, r1, 20
+       lwi     r24, r1, 24
+       lwi     r25, r1, 28
+       addik   r1, r1, 32
+       /* return back */
+       addik   r3, r7, 0
+       rtsd    r15, 8
+       nop
+
+.align 4 /* Alignment is important to keep icache happy */
+bu1:   lbu     r4,r6,r3
+bu2:   sb      r4,r5,r3
        addik   r7,r7,-1
-       bneid   r7,1b
+       bneid   r7,bu1
        addik   r3,r3,1         /* delay slot */
-3:
+0:
        addik   r3,r7,0
        rtsd    r15,8
        nop
        .size   __copy_tofrom_user, . - __copy_tofrom_user
 
        .section        __ex_table,"a"
-       .word   1b,3b,2b,3b,5b,3b,6b,3b
+       .word   bu1, 0b;
+       .word   bu2, 0b;
+       .text