Merge branch 'davinci-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11
12         .align  7
13 _GLOBAL(__copy_tofrom_user)
14         /* first check for a whole page copy on a page boundary */
15         cmpldi  cr1,r5,16
16         cmpdi   cr6,r5,4096
17         or      r0,r3,r4
18         neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
19         andi.   r0,r0,4095
20         std     r3,-24(r1)
21         crand   cr0*4+2,cr0*4+2,cr6*4+2
22         std     r4,-16(r1)
23         std     r5,-8(r1)
24         dcbt    0,r4
25         beq     .Lcopy_page_4K
26         andi.   r6,r6,7
27         PPC_MTOCRF      0x01,r5
28         blt     cr1,.Lshort_copy
29 /* Below we want to nop out the bne if we're on a CPU that has the
30  * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31  * cleared.
32  * At the time of writing the only CPU that has this combination of bits
33  * set is Power6.
34  */
35 BEGIN_FTR_SECTION
36         nop
37 FTR_SECTION_ELSE
38         bne     .Ldst_unaligned
39 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40                     CPU_FTR_UNALIGNED_LD_STD)
41 .Ldst_aligned:
42         addi    r3,r3,-16
43 BEGIN_FTR_SECTION
44         andi.   r0,r4,7
45         bne     .Lsrc_unaligned
46 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
47         blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
48         srdi    r0,r5,5
49         cmpdi   cr1,r0,0
50 20:     ld      r7,0(r4)
51 220:    ld      r6,8(r4)
52         addi    r4,r4,16
53         mtctr   r0
54         andi.   r0,r5,0x10
55         beq     22f
56         addi    r3,r3,16
57         addi    r4,r4,-16
58         mr      r9,r7
59         mr      r8,r6
60         beq     cr1,72f
61 21:     ld      r7,16(r4)
62 221:    ld      r6,24(r4)
63         addi    r4,r4,32
64 70:     std     r9,0(r3)
65 270:    std     r8,8(r3)
66 22:     ld      r9,0(r4)
67 222:    ld      r8,8(r4)
68 71:     std     r7,16(r3)
69 271:    std     r6,24(r3)
70         addi    r3,r3,32
71         bdnz    21b
72 72:     std     r9,0(r3)
73 272:    std     r8,8(r3)
74         andi.   r5,r5,0xf
75         beq+    3f
76         addi    r4,r4,16
77 .Ldo_tail:
78         addi    r3,r3,16
79         bf      cr7*4+0,246f
80 244:    ld      r9,0(r4)
81         addi    r4,r4,8
82 245:    std     r9,0(r3)
83         addi    r3,r3,8
84 246:    bf      cr7*4+1,1f
85 23:     lwz     r9,0(r4)
86         addi    r4,r4,4
87 73:     stw     r9,0(r3)
88         addi    r3,r3,4
89 1:      bf      cr7*4+2,2f
90 44:     lhz     r9,0(r4)
91         addi    r4,r4,2
92 74:     sth     r9,0(r3)
93         addi    r3,r3,2
94 2:      bf      cr7*4+3,3f
95 45:     lbz     r9,0(r4)
96 75:     stb     r9,0(r3)
97 3:      li      r3,0
98         blr
99
100 .Lsrc_unaligned:
101         srdi    r6,r5,3
102         addi    r5,r5,-16
103         subf    r4,r0,r4
104         srdi    r7,r5,4
105         sldi    r10,r0,3
106         cmpldi  cr6,r6,3
107         andi.   r5,r5,7
108         mtctr   r7
109         subfic  r11,r10,64
110         add     r5,r5,r0
111         bt      cr7*4+0,28f
112
113 24:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
114 25:     ld      r0,8(r4)
115         sld     r6,r9,r10
116 26:     ldu     r9,16(r4)
117         srd     r7,r0,r11
118         sld     r8,r0,r10
119         or      r7,r7,r6
120         blt     cr6,79f
121 27:     ld      r0,8(r4)
122         b       2f
123
124 28:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
125 29:     ldu     r9,8(r4)
126         sld     r8,r0,r10
127         addi    r3,r3,-8
128         blt     cr6,5f
129 30:     ld      r0,8(r4)
130         srd     r12,r9,r11
131         sld     r6,r9,r10
132 31:     ldu     r9,16(r4)
133         or      r12,r8,r12
134         srd     r7,r0,r11
135         sld     r8,r0,r10
136         addi    r3,r3,16
137         beq     cr6,78f
138
139 1:      or      r7,r7,r6
140 32:     ld      r0,8(r4)
141 76:     std     r12,8(r3)
142 2:      srd     r12,r9,r11
143         sld     r6,r9,r10
144 33:     ldu     r9,16(r4)
145         or      r12,r8,r12
146 77:     stdu    r7,16(r3)
147         srd     r7,r0,r11
148         sld     r8,r0,r10
149         bdnz    1b
150
151 78:     std     r12,8(r3)
152         or      r7,r7,r6
153 79:     std     r7,16(r3)
154 5:      srd     r12,r9,r11
155         or      r12,r8,r12
156 80:     std     r12,24(r3)
157         bne     6f
158         li      r3,0
159         blr
160 6:      cmpwi   cr1,r5,8
161         addi    r3,r3,32
162         sld     r9,r9,r10
163         ble     cr1,7f
164 34:     ld      r0,8(r4)
165         srd     r7,r0,r11
166         or      r9,r7,r9
167 7:
168         bf      cr7*4+1,1f
169         rotldi  r9,r9,32
170 94:     stw     r9,0(r3)
171         addi    r3,r3,4
172 1:      bf      cr7*4+2,2f
173         rotldi  r9,r9,16
174 95:     sth     r9,0(r3)
175         addi    r3,r3,2
176 2:      bf      cr7*4+3,3f
177         rotldi  r9,r9,8
178 96:     stb     r9,0(r3)
179 3:      li      r3,0
180         blr
181
182 .Ldst_unaligned:
183         PPC_MTOCRF      0x01,r6         /* put #bytes to 8B bdry into cr7 */
184         subf    r5,r6,r5
185         li      r7,0
186         cmpldi  cr1,r5,16
187         bf      cr7*4+3,1f
188 35:     lbz     r0,0(r4)
189 81:     stb     r0,0(r3)
190         addi    r7,r7,1
191 1:      bf      cr7*4+2,2f
192 36:     lhzx    r0,r7,r4
193 82:     sthx    r0,r7,r3
194         addi    r7,r7,2
195 2:      bf      cr7*4+1,3f
196 37:     lwzx    r0,r7,r4
197 83:     stwx    r0,r7,r3
198 3:      PPC_MTOCRF      0x01,r5
199         add     r4,r6,r4
200         add     r3,r6,r3
201         b       .Ldst_aligned
202
203 .Lshort_copy:
204         bf      cr7*4+0,1f
205 38:     lwz     r0,0(r4)
206 39:     lwz     r9,4(r4)
207         addi    r4,r4,8
208 84:     stw     r0,0(r3)
209 85:     stw     r9,4(r3)
210         addi    r3,r3,8
211 1:      bf      cr7*4+1,2f
212 40:     lwz     r0,0(r4)
213         addi    r4,r4,4
214 86:     stw     r0,0(r3)
215         addi    r3,r3,4
216 2:      bf      cr7*4+2,3f
217 41:     lhz     r0,0(r4)
218         addi    r4,r4,2
219 87:     sth     r0,0(r3)
220         addi    r3,r3,2
221 3:      bf      cr7*4+3,4f
222 42:     lbz     r0,0(r4)
223 88:     stb     r0,0(r3)
224 4:      li      r3,0
225         blr
226
227 /*
228  * exception handlers follow
229  * we have to return the number of bytes not copied
230  * for an exception on a load, we set the rest of the destination to 0
231  */
232
233 136:
234 137:
235         add     r3,r3,r7
236         b       1f
237 130:
238 131:
239         addi    r3,r3,8
240 120:
241 320:
242 122:
243 322:
244 124:
245 125:
246 126:
247 127:
248 128:
249 129:
250 133:
251         addi    r3,r3,8
252 132:
253         addi    r3,r3,8
254 121:
255 321:
256 344:
257 134:
258 135:
259 138:
260 139:
261 140:
262 141:
263 142:
264 123:
265 144:
266 145:
267
268 /*
269  * here we have had a fault on a load and r3 points to the first
270  * unmodified byte of the destination
271  */
272 1:      ld      r6,-24(r1)
273         ld      r4,-16(r1)
274         ld      r5,-8(r1)
275         subf    r6,r6,r3
276         add     r4,r4,r6
277         subf    r5,r6,r5        /* #bytes left to go */
278
279 /*
280  * first see if we can copy any more bytes before hitting another exception
281  */
282         mtctr   r5
283 43:     lbz     r0,0(r4)
284         addi    r4,r4,1
285 89:     stb     r0,0(r3)
286         addi    r3,r3,1
287         bdnz    43b
288         li      r3,0            /* huh? all copied successfully this time? */
289         blr
290
291 /*
292  * here we have trapped again, need to clear ctr bytes starting at r3
293  */
294 143:    mfctr   r5
295         li      r0,0
296         mr      r4,r3
297         mr      r3,r5           /* return the number of bytes not copied */
298 1:      andi.   r9,r4,7
299         beq     3f
300 90:     stb     r0,0(r4)
301         addic.  r5,r5,-1
302         addi    r4,r4,1
303         bne     1b
304         blr
305 3:      cmpldi  cr1,r5,8
306         srdi    r9,r5,3
307         andi.   r5,r5,7
308         blt     cr1,93f
309         mtctr   r9
310 91:     std     r0,0(r4)
311         addi    r4,r4,8
312         bdnz    91b
313 93:     beqlr
314         mtctr   r5      
315 92:     stb     r0,0(r4)
316         addi    r4,r4,1
317         bdnz    92b
318         blr
319
320 /*
321  * exception handlers for stores: we just need to work
322  * out how many bytes weren't copied
323  */
324 182:
325 183:
326         add     r3,r3,r7
327         b       1f
328 371:
329 180:
330         addi    r3,r3,8
331 171:
332 177:
333         addi    r3,r3,8
334 370:
335 372:
336 176:
337 178:
338         addi    r3,r3,4
339 185:
340         addi    r3,r3,4
341 170:
342 172:
343 345:
344 173:
345 174:
346 175:
347 179:
348 181:
349 184:
350 186:
351 187:
352 188:
353 189:    
354 194:
355 195:
356 196:
357 1:
358         ld      r6,-24(r1)
359         ld      r5,-8(r1)
360         add     r6,r6,r5
361         subf    r3,r3,r6        /* #bytes not copied */
362 190:
363 191:
364 192:
365         blr                     /* #bytes not copied in r3 */
366
367         .section __ex_table,"a"
368         .align  3
369         .llong  20b,120b
370         .llong  220b,320b
371         .llong  21b,121b
372         .llong  221b,321b
373         .llong  70b,170b
374         .llong  270b,370b
375         .llong  22b,122b
376         .llong  222b,322b
377         .llong  71b,171b
378         .llong  271b,371b
379         .llong  72b,172b
380         .llong  272b,372b
381         .llong  244b,344b
382         .llong  245b,345b
383         .llong  23b,123b
384         .llong  73b,173b
385         .llong  44b,144b
386         .llong  74b,174b
387         .llong  45b,145b
388         .llong  75b,175b
389         .llong  24b,124b
390         .llong  25b,125b
391         .llong  26b,126b
392         .llong  27b,127b
393         .llong  28b,128b
394         .llong  29b,129b
395         .llong  30b,130b
396         .llong  31b,131b
397         .llong  32b,132b
398         .llong  76b,176b
399         .llong  33b,133b
400         .llong  77b,177b
401         .llong  78b,178b
402         .llong  79b,179b
403         .llong  80b,180b
404         .llong  34b,134b
405         .llong  94b,194b
406         .llong  95b,195b
407         .llong  96b,196b
408         .llong  35b,135b
409         .llong  81b,181b
410         .llong  36b,136b
411         .llong  82b,182b
412         .llong  37b,137b
413         .llong  83b,183b
414         .llong  38b,138b
415         .llong  39b,139b
416         .llong  84b,184b
417         .llong  85b,185b
418         .llong  40b,140b
419         .llong  86b,186b
420         .llong  41b,141b
421         .llong  87b,187b
422         .llong  42b,142b
423         .llong  88b,188b
424         .llong  43b,143b
425         .llong  89b,189b
426         .llong  90b,190b
427         .llong  91b,191b
428         .llong  92b,192b
429         
430         .text
431
432 /*
433  * Routine to copy a whole page of data, optimized for POWER4.
434  * On POWER4 it is more than 50% faster than the simple loop
435  * above (following the .Ldst_aligned label) but it runs slightly
436  * slower on POWER3.
437  */
438 .Lcopy_page_4K:
439         std     r31,-32(1)
440         std     r30,-40(1)
441         std     r29,-48(1)
442         std     r28,-56(1)
443         std     r27,-64(1)
444         std     r26,-72(1)
445         std     r25,-80(1)
446         std     r24,-88(1)
447         std     r23,-96(1)
448         std     r22,-104(1)
449         std     r21,-112(1)
450         std     r20,-120(1)
451         li      r5,4096/32 - 1
452         addi    r3,r3,-8
453         li      r0,5
454 0:      addi    r5,r5,-24
455         mtctr   r0
456 20:     ld      r22,640(4)
457 21:     ld      r21,512(4)
458 22:     ld      r20,384(4)
459 23:     ld      r11,256(4)
460 24:     ld      r9,128(4)
461 25:     ld      r7,0(4)
462 26:     ld      r25,648(4)
463 27:     ld      r24,520(4)
464 28:     ld      r23,392(4)
465 29:     ld      r10,264(4)
466 30:     ld      r8,136(4)
467 31:     ldu     r6,8(4)
468         cmpwi   r5,24
469 1:
470 32:     std     r22,648(3)
471 33:     std     r21,520(3)
472 34:     std     r20,392(3)
473 35:     std     r11,264(3)
474 36:     std     r9,136(3)
475 37:     std     r7,8(3)
476 38:     ld      r28,648(4)
477 39:     ld      r27,520(4)
478 40:     ld      r26,392(4)
479 41:     ld      r31,264(4)
480 42:     ld      r30,136(4)
481 43:     ld      r29,8(4)
482 44:     std     r25,656(3)
483 45:     std     r24,528(3)
484 46:     std     r23,400(3)
485 47:     std     r10,272(3)
486 48:     std     r8,144(3)
487 49:     std     r6,16(3)
488 50:     ld      r22,656(4)
489 51:     ld      r21,528(4)
490 52:     ld      r20,400(4)
491 53:     ld      r11,272(4)
492 54:     ld      r9,144(4)
493 55:     ld      r7,16(4)
494 56:     std     r28,664(3)
495 57:     std     r27,536(3)
496 58:     std     r26,408(3)
497 59:     std     r31,280(3)
498 60:     std     r30,152(3)
499 61:     stdu    r29,24(3)
500 62:     ld      r25,664(4)
501 63:     ld      r24,536(4)
502 64:     ld      r23,408(4)
503 65:     ld      r10,280(4)
504 66:     ld      r8,152(4)
505 67:     ldu     r6,24(4)
506         bdnz    1b
507 68:     std     r22,648(3)
508 69:     std     r21,520(3)
509 70:     std     r20,392(3)
510 71:     std     r11,264(3)
511 72:     std     r9,136(3)
512 73:     std     r7,8(3)
513 74:     addi    r4,r4,640
514 75:     addi    r3,r3,648
515         bge     0b
516         mtctr   r5
517 76:     ld      r7,0(4)
518 77:     ld      r8,8(4)
519 78:     ldu     r9,16(4)
520 3:
521 79:     ld      r10,8(4)
522 80:     std     r7,8(3)
523 81:     ld      r7,16(4)
524 82:     std     r8,16(3)
525 83:     ld      r8,24(4)
526 84:     std     r9,24(3)
527 85:     ldu     r9,32(4)
528 86:     stdu    r10,32(3)
529         bdnz    3b
530 4:
531 87:     ld      r10,8(4)
532 88:     std     r7,8(3)
533 89:     std     r8,16(3)
534 90:     std     r9,24(3)
535 91:     std     r10,32(3)
536 9:      ld      r20,-120(1)
537         ld      r21,-112(1)
538         ld      r22,-104(1)
539         ld      r23,-96(1)
540         ld      r24,-88(1)
541         ld      r25,-80(1)
542         ld      r26,-72(1)
543         ld      r27,-64(1)
544         ld      r28,-56(1)
545         ld      r29,-48(1)
546         ld      r30,-40(1)
547         ld      r31,-32(1)
548         li      r3,0
549         blr
550
551 /*
552  * on an exception, reset to the beginning and jump back into the
553  * standard __copy_tofrom_user
554  */
555 100:    ld      r20,-120(1)
556         ld      r21,-112(1)
557         ld      r22,-104(1)
558         ld      r23,-96(1)
559         ld      r24,-88(1)
560         ld      r25,-80(1)
561         ld      r26,-72(1)
562         ld      r27,-64(1)
563         ld      r28,-56(1)
564         ld      r29,-48(1)
565         ld      r30,-40(1)
566         ld      r31,-32(1)
567         ld      r3,-24(r1)
568         ld      r4,-16(r1)
569         li      r5,4096
570         b       .Ldst_aligned
571
572         .section __ex_table,"a"
573         .align  3
574         .llong  20b,100b
575         .llong  21b,100b
576         .llong  22b,100b
577         .llong  23b,100b
578         .llong  24b,100b
579         .llong  25b,100b
580         .llong  26b,100b
581         .llong  27b,100b
582         .llong  28b,100b
583         .llong  29b,100b
584         .llong  30b,100b
585         .llong  31b,100b
586         .llong  32b,100b
587         .llong  33b,100b
588         .llong  34b,100b
589         .llong  35b,100b
590         .llong  36b,100b
591         .llong  37b,100b
592         .llong  38b,100b
593         .llong  39b,100b
594         .llong  40b,100b
595         .llong  41b,100b
596         .llong  42b,100b
597         .llong  43b,100b
598         .llong  44b,100b
599         .llong  45b,100b
600         .llong  46b,100b
601         .llong  47b,100b
602         .llong  48b,100b
603         .llong  49b,100b
604         .llong  50b,100b
605         .llong  51b,100b
606         .llong  52b,100b
607         .llong  53b,100b
608         .llong  54b,100b
609         .llong  55b,100b
610         .llong  56b,100b
611         .llong  57b,100b
612         .llong  58b,100b
613         .llong  59b,100b
614         .llong  60b,100b
615         .llong  61b,100b
616         .llong  62b,100b
617         .llong  63b,100b
618         .llong  64b,100b
619         .llong  65b,100b
620         .llong  66b,100b
621         .llong  67b,100b
622         .llong  68b,100b
623         .llong  69b,100b
624         .llong  70b,100b
625         .llong  71b,100b
626         .llong  72b,100b
627         .llong  73b,100b
628         .llong  74b,100b
629         .llong  75b,100b
630         .llong  76b,100b
631         .llong  77b,100b
632         .llong  78b,100b
633         .llong  79b,100b
634         .llong  80b,100b
635         .llong  81b,100b
636         .llong  82b,100b
637         .llong  83b,100b
638         .llong  84b,100b
639         .llong  85b,100b
640         .llong  86b,100b
641         .llong  87b,100b
642         .llong  88b,100b
643         .llong  89b,100b
644         .llong  90b,100b
645         .llong  91b,100b