Pull release into acpica branch
[pandora-kernel.git] / arch / arm / lib / csumpartialcopygeneric.S
1 /*
2  *  linux/arch/arm/lib/csumpartialcopygeneric.S
3  *
4  *  Copyright (C) 1995-2001 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * unsigned int
13  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14  *  r0 = src, r1 = dst, r2 = len, r3 = sum
15  *  Returns : r0 = checksum
16  *
17  * Note that 'tst' and 'teq' preserve the carry flag.
18  */
19
20 src     .req    r0
21 dst     .req    r1
22 len     .req    r2
23 sum     .req    r3
24
25 .Lzero:         mov     r0, sum
26                 load_regs       ea
27
28                 /*
29                  * Align an unaligned destination pointer.  We know that
30                  * we have >= 8 bytes here, so we don't need to check
31                  * the length.  Note that the source pointer hasn't been
32                  * aligned yet.
33                  */
34 .Ldst_unaligned:
35                 tst     dst, #1
36                 beq     .Ldst_16bit
37
38                 load1b  ip
39                 sub     len, len, #1
40                 adcs    sum, sum, ip, put_byte_1        @ update checksum
41                 strb    ip, [dst], #1
42                 tst     dst, #2
43                 moveq   pc, lr                  @ dst is now 32bit aligned
44
45 .Ldst_16bit:    load2b  r8, ip
46                 sub     len, len, #2
47                 adcs    sum, sum, r8, put_byte_0
48                 strb    r8, [dst], #1
49                 adcs    sum, sum, ip, put_byte_1
50                 strb    ip, [dst], #1
51                 mov     pc, lr                  @ dst is now 32bit aligned
52
53                 /*
54                  * Handle 0 to 7 bytes, with any alignment of source and
55                  * destination pointers.  Note that when we get here, C = 0
56                  */
57 .Lless8:        teq     len, #0                 @ check for zero count
58                 beq     .Lzero
59
60                 /* we must have at least one byte. */
61                 tst     dst, #1                 @ dst 16-bit aligned
62                 beq     .Lless8_aligned
63
64                 /* Align dst */
65                 load1b  ip
66                 sub     len, len, #1
67                 adcs    sum, sum, ip, put_byte_1        @ update checksum
68                 strb    ip, [dst], #1
69                 tst     len, #6
70                 beq     .Lless8_byteonly
71
72 1:              load2b  r8, ip
73                 sub     len, len, #2
74                 adcs    sum, sum, r8, put_byte_0
75                 strb    r8, [dst], #1
76                 adcs    sum, sum, ip, put_byte_1
77                 strb    ip, [dst], #1
78 .Lless8_aligned:
79                 tst     len, #6
80                 bne     1b
81 .Lless8_byteonly:
82                 tst     len, #1
83                 beq     .Ldone
84                 load1b  r8
85                 adcs    sum, sum, r8, put_byte_0        @ update checksum
86                 strb    r8, [dst], #1
87                 b       .Ldone
88
89 FN_ENTRY
90                 mov     ip, sp
91                 save_regs
92                 sub     fp, ip, #4
93
94                 cmp     len, #8                 @ Ensure that we have at least
95                 blo     .Lless8                 @ 8 bytes to copy.
96
97                 adds    sum, sum, #0            @ C = 0
98                 tst     dst, #3                 @ Test destination alignment
99                 blne    .Ldst_unaligned         @ align destination, return here
100
101                 /*
102                  * Ok, the dst pointer is now 32bit aligned, and we know
103                  * that we must have more than 4 bytes to copy.  Note
104                  * that C contains the carry from the dst alignment above.
105                  */
106
107                 tst     src, #3                 @ Test source alignment
108                 bne     .Lsrc_not_aligned
109
110                 /* Routine for src & dst aligned */
111
112                 bics    ip, len, #15
113                 beq     2f
114
115 1:              load4l  r4, r5, r6, r7
116                 stmia   dst!, {r4, r5, r6, r7}
117                 adcs    sum, sum, r4
118                 adcs    sum, sum, r5
119                 adcs    sum, sum, r6
120                 adcs    sum, sum, r7
121                 sub     ip, ip, #16
122                 teq     ip, #0
123                 bne     1b
124
125 2:              ands    ip, len, #12
126                 beq     4f
127                 tst     ip, #8
128                 beq     3f
129                 load2l  r4, r5
130                 stmia   dst!, {r4, r5}
131                 adcs    sum, sum, r4
132                 adcs    sum, sum, r5
133                 tst     ip, #4
134                 beq     4f
135
136 3:              load1l  r4
137                 str     r4, [dst], #4
138                 adcs    sum, sum, r4
139
140 4:              ands    len, len, #3
141                 beq     .Ldone
142                 load1l  r4
143                 tst     len, #2
144                 mov     r5, r4, get_byte_0
145                 beq     .Lexit
146                 adcs    sum, sum, r4, push #16
147                 strb    r5, [dst], #1
148                 mov     r5, r4, get_byte_1
149                 strb    r5, [dst], #1
150                 mov     r5, r4, get_byte_2
151 .Lexit:         tst     len, #1
152                 strneb  r5, [dst], #1
153                 andne   r5, r5, #255
154                 adcnes  sum, sum, r5, put_byte_0
155
156                 /*
157                  * If the dst pointer was not 16-bit aligned, we
158                  * need to rotate the checksum here to get around
159                  * the inefficient byte manipulations in the
160                  * architecture independent code.
161                  */
162 .Ldone:         adc     r0, sum, #0
163                 ldr     sum, [sp, #0]           @ dst
164                 tst     sum, #1
165                 movne   r0, r0, ror #8
166                 load_regs       ea
167
168 .Lsrc_not_aligned:
169                 adc     sum, sum, #0            @ include C from dst alignment
170                 and     ip, src, #3
171                 bic     src, src, #3
172                 load1l  r5
173                 cmp     ip, #2
174                 beq     .Lsrc2_aligned
175                 bhi     .Lsrc3_aligned
176                 mov     r4, r5, pull #8         @ C = 0
177                 bics    ip, len, #15
178                 beq     2f
179 1:              load4l  r5, r6, r7, r8
180                 orr     r4, r4, r5, push #24
181                 mov     r5, r5, pull #8
182                 orr     r5, r5, r6, push #24
183                 mov     r6, r6, pull #8
184                 orr     r6, r6, r7, push #24
185                 mov     r7, r7, pull #8
186                 orr     r7, r7, r8, push #24
187                 stmia   dst!, {r4, r5, r6, r7}
188                 adcs    sum, sum, r4
189                 adcs    sum, sum, r5
190                 adcs    sum, sum, r6
191                 adcs    sum, sum, r7
192                 mov     r4, r8, pull #8
193                 sub     ip, ip, #16
194                 teq     ip, #0
195                 bne     1b
196 2:              ands    ip, len, #12
197                 beq     4f
198                 tst     ip, #8
199                 beq     3f
200                 load2l  r5, r6
201                 orr     r4, r4, r5, push #24
202                 mov     r5, r5, pull #8
203                 orr     r5, r5, r6, push #24
204                 stmia   dst!, {r4, r5}
205                 adcs    sum, sum, r4
206                 adcs    sum, sum, r5
207                 mov     r4, r6, pull #8
208                 tst     ip, #4
209                 beq     4f
210 3:              load1l  r5
211                 orr     r4, r4, r5, push #24
212                 str     r4, [dst], #4
213                 adcs    sum, sum, r4
214                 mov     r4, r5, pull #8
215 4:              ands    len, len, #3
216                 beq     .Ldone
217                 mov     r5, r4, get_byte_0
218                 tst     len, #2
219                 beq     .Lexit
220                 adcs    sum, sum, r4, push #16
221                 strb    r5, [dst], #1
222                 mov     r5, r4, get_byte_1
223                 strb    r5, [dst], #1
224                 mov     r5, r4, get_byte_2
225                 b       .Lexit
226
227 .Lsrc2_aligned: mov     r4, r5, pull #16
228                 adds    sum, sum, #0
229                 bics    ip, len, #15
230                 beq     2f
231 1:              load4l  r5, r6, r7, r8
232                 orr     r4, r4, r5, push #16
233                 mov     r5, r5, pull #16
234                 orr     r5, r5, r6, push #16
235                 mov     r6, r6, pull #16
236                 orr     r6, r6, r7, push #16
237                 mov     r7, r7, pull #16
238                 orr     r7, r7, r8, push #16
239                 stmia   dst!, {r4, r5, r6, r7}
240                 adcs    sum, sum, r4
241                 adcs    sum, sum, r5
242                 adcs    sum, sum, r6
243                 adcs    sum, sum, r7
244                 mov     r4, r8, pull #16
245                 sub     ip, ip, #16
246                 teq     ip, #0
247                 bne     1b
248 2:              ands    ip, len, #12
249                 beq     4f
250                 tst     ip, #8
251                 beq     3f
252                 load2l  r5, r6
253                 orr     r4, r4, r5, push #16
254                 mov     r5, r5, pull #16
255                 orr     r5, r5, r6, push #16
256                 stmia   dst!, {r4, r5}
257                 adcs    sum, sum, r4
258                 adcs    sum, sum, r5
259                 mov     r4, r6, pull #16
260                 tst     ip, #4
261                 beq     4f
262 3:              load1l  r5
263                 orr     r4, r4, r5, push #16
264                 str     r4, [dst], #4
265                 adcs    sum, sum, r4
266                 mov     r4, r5, pull #16
267 4:              ands    len, len, #3
268                 beq     .Ldone
269                 mov     r5, r4, get_byte_0
270                 tst     len, #2
271                 beq     .Lexit
272                 adcs    sum, sum, r4
273                 strb    r5, [dst], #1
274                 mov     r5, r4, get_byte_1
275                 strb    r5, [dst], #1
276                 tst     len, #1
277                 beq     .Ldone
278                 load1b  r5
279                 b       .Lexit
280
281 .Lsrc3_aligned: mov     r4, r5, pull #24
282                 adds    sum, sum, #0
283                 bics    ip, len, #15
284                 beq     2f
285 1:              load4l  r5, r6, r7, r8
286                 orr     r4, r4, r5, push #8
287                 mov     r5, r5, pull #24
288                 orr     r5, r5, r6, push #8
289                 mov     r6, r6, pull #24
290                 orr     r6, r6, r7, push #8
291                 mov     r7, r7, pull #24
292                 orr     r7, r7, r8, push #8
293                 stmia   dst!, {r4, r5, r6, r7}
294                 adcs    sum, sum, r4
295                 adcs    sum, sum, r5
296                 adcs    sum, sum, r6
297                 adcs    sum, sum, r7
298                 mov     r4, r8, pull #24
299                 sub     ip, ip, #16
300                 teq     ip, #0
301                 bne     1b
302 2:              ands    ip, len, #12
303                 beq     4f
304                 tst     ip, #8
305                 beq     3f
306                 load2l  r5, r6
307                 orr     r4, r4, r5, push #8
308                 mov     r5, r5, pull #24
309                 orr     r5, r5, r6, push #8
310                 stmia   dst!, {r4, r5}
311                 adcs    sum, sum, r4
312                 adcs    sum, sum, r5
313                 mov     r4, r6, pull #24
314                 tst     ip, #4
315                 beq     4f
316 3:              load1l  r5
317                 orr     r4, r4, r5, push #8
318                 str     r4, [dst], #4
319                 adcs    sum, sum, r4
320                 mov     r4, r5, pull #24
321 4:              ands    len, len, #3
322                 beq     .Ldone
323                 mov     r5, r4, get_byte_0
324                 tst     len, #2
325                 beq     .Lexit
326                 strb    r5, [dst], #1
327                 adcs    sum, sum, r4
328                 load1l  r4
329                 mov     r5, r4, get_byte_0
330                 strb    r5, [dst], #1
331                 adcs    sum, sum, r4, push #24
332                 mov     r5, r4, get_byte_1
333                 b       .Lexit