arch/sh/lib/checksum.S

   1 /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
   2  *
   3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4  *              operating system.  INET is implemented using the  BSD Socket
   5  *              interface as the means of communication with the user level.
   6  *
   7  *              IP/TCP/UDP checksumming routines
   8  *
   9  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  10  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11  *              Tom May, <ftom@netcom.com>
  12  *              Pentium Pro/II routines:
  13  *              Alexander Kjeldaas <astor@guardian.no>
  14  *              Finn Arne Gangstad <finnag@guardian.no>
  15  *              Lots of code moved from tcp.c and ip.c; see those files
  16  *              for more names.
  17  *
  18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19  *                           handling.
  20  *              Andi Kleen,  add zeroing on error
  21  *                   converted to pure assembler
  22  *
  23  * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  24  *
  25  *              This program is free software; you can redistribute it and/or
  26  *              modify it under the terms of the GNU General Public License
  27  *              as published by the Free Software Foundation; either version
  28  *              2 of the License, or (at your option) any later version.
  29  */
  30
  31 #include <asm/errno.h>
  32 #include <linux/linkage.h>
  33
  34 /*
  35  * computes a partial checksum, e.g. for TCP/UDP fragments
  36  */
  37
  38 /*
  39  * unsigned int csum_partial(const unsigned char *buf, int len,
  40  *                           unsigned int sum);
  41  */
  42
  43 .text
  44 ENTRY(csum_partial)
  45           /*
  46            * Experiments with Ethernet and SLIP connections show that buff
  47            * is aligned on either a 2-byte or 4-byte boundary.  We get at
  48            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  49            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  50            * alignment for the unrolled loop.
  51            */
  52         mov     r5, r1
  53         mov     r4, r0
  54         tst     #2, r0          ! Check alignment.
  55         bt      2f              ! Jump if alignment is ok.
  56         !
  57         add     #-2, r5         ! Alignment uses up two bytes.
  58         cmp/pz  r5              !
  59         bt/s    1f              ! Jump if we had at least two bytes.
  60          clrt
  61         bra     6f
  62          add    #2, r5          ! r5 was < 2.  Deal with it.
  63 1:
  64         mov     r5, r1          ! Save new len for later use.
  65         mov.w   @r4+, r0
  66         extu.w  r0, r0
  67         addc    r0, r6
  68         bf      2f
  69         add     #1, r6
  70 2:
  71         mov     #-5, r0
  72         shld    r0, r5
  73         tst     r5, r5
  74         bt/s    4f              ! if it's =0, go to 4f
  75          clrt
  76         .align  2
  77 3:
  78         mov.l   @r4+, r0
  79         mov.l   @r4+, r2
  80         mov.l   @r4+, r3
  81         addc    r0, r6
  82         mov.l   @r4+, r0
  83         addc    r2, r6
  84         mov.l   @r4+, r2
  85         addc    r3, r6
  86         mov.l   @r4+, r3
  87         addc    r0, r6
  88         mov.l   @r4+, r0
  89         addc    r2, r6
  90         mov.l   @r4+, r2
  91         addc    r3, r6
  92         addc    r0, r6
  93         addc    r2, r6
  94         movt    r0
  95         dt      r5
  96         bf/s    3b
  97          cmp/eq #1, r0
  98         ! here, we know r5==0
  99         addc    r5, r6                  ! add carry to r6
 100 4:
 101         mov     r1, r0
 102         and     #0x1c, r0
 103         tst     r0, r0
 104         bt/s    6f
 105          mov    r0, r5
 106         shlr2   r5
 107         mov     #0, r2
 108 5:
 109         addc    r2, r6
 110         mov.l   @r4+, r2
 111         movt    r0
 112         dt      r5
 113         bf/s    5b
 114          cmp/eq #1, r0
 115         addc    r2, r6
 116         addc    r5, r6          ! r5==0 here, so it means add carry-bit
 117 6:
 118         mov     r1, r5
 119         mov     #3, r0
 120         and     r0, r5
 121         tst     r5, r5
 122         bt      9f              ! if it's =0 go to 9f
 123         mov     #2, r1
 124         cmp/hs  r1, r5
 125         bf      7f
 126         mov.w   @r4+, r0
 127         extu.w  r0, r0
 128         cmp/eq  r1, r5
 129         bt/s    8f
 130          clrt
 131         shll16  r0
 132         addc    r0, r6
 133 7:
 134         mov.b   @r4+, r0
 135         extu.b  r0, r0
 136 #ifndef __LITTLE_ENDIAN__
 137         shll8   r0
 138 #endif
 139 8:
 140         addc    r0, r6
 141         mov     #0, r0
 142         addc    r0, r6
 143 9:
 144         rts
 145          mov    r6, r0
 146
 147 /*
 148 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
 149                                         int sum, int *src_err_ptr, int *dst_err_ptr)
 150  */
 151
 152 /*
 153  * Copy from ds while checksumming, otherwise like csum_partial
 154  *
 155  * The macros SRC and DST specify the type of access for the instruction.
 156  * thus we can call a custom exception handler for all access types.
 157  *
 158  * FIXME: could someone double-check whether I haven't mixed up some SRC and
 159  *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 160  *        them all but there's no guarantee.
 161  */
 162
 163 #define SRC(...)                        \
 164         9999: __VA_ARGS__ ;             \
 165         .section __ex_table, "a";       \
 166         .long 9999b, 6001f      ;       \
 167         .previous
 168
 169 #define DST(...)                        \
 170         9999: __VA_ARGS__ ;             \
 171         .section __ex_table, "a";       \
 172         .long 9999b, 6002f      ;       \
 173         .previous
 174
 175 !
 176 ! r4:   const char *SRC
 177 ! r5:   char *DST
 178 ! r6:   int LEN
 179 ! r7:   int SUM
 180 !
 181 ! on stack:
 182 ! int *SRC_ERR_PTR
 183 ! int *DST_ERR_PTR
 184 !
 185 ENTRY(csum_partial_copy_generic)
 186         mov.l   r5,@-r15
 187         mov.l   r6,@-r15
 188
 189         mov     #3,r0           ! Check src and dest are equally aligned
 190         mov     r4,r1
 191         and     r0,r1
 192         and     r5,r0
 193         cmp/eq  r1,r0
 194         bf      3f              ! Different alignments, use slow version
 195         tst     #1,r0           ! Check dest word aligned
 196         bf      3f              ! If not, do it the slow way
 197
 198         mov     #2,r0
 199         tst     r0,r5           ! Check dest alignment.
 200         bt      2f              ! Jump if alignment is ok.
 201         add     #-2,r6          ! Alignment uses up two bytes.
 202         cmp/pz  r6              ! Jump if we had at least two bytes.
 203         bt/s    1f
 204          clrt
 205         bra     4f
 206          add    #2,r6           ! r6 was < 2.   Deal with it.
 207
 208 3:      ! Handle different src and dest alignments.
 209         ! This is not common, so simple byte by byte copy will do.
 210         mov     r6,r2
 211         shlr    r6
 212         tst     r6,r6
 213         bt      4f
 214         clrt
 215         .align  2
 216 5:
 217 SRC(    mov.b   @r4+,r1         )
 218 SRC(    mov.b   @r4+,r0         )
 219         extu.b  r1,r1
 220 DST(    mov.b   r1,@r5          )
 221 DST(    mov.b   r0,@(1,r5)      )
 222         extu.b  r0,r0
 223         add     #2,r5
 224
 225 #ifdef  __LITTLE_ENDIAN__
 226         shll8   r0
 227 #else
 228         shll8   r1
 229 #endif
 230         or      r1,r0
 231
 232         addc    r0,r7
 233         movt    r0
 234         dt      r6
 235         bf/s    5b
 236          cmp/eq #1,r0
 237         mov     #0,r0
 238         addc    r0, r7
 239
 240         mov     r2, r0
 241         tst     #1, r0
 242         bt      7f
 243         bra     5f
 244          clrt
 245
 246         ! src and dest equally aligned, but to a two byte boundary.
 247         ! Handle first two bytes as a special case
 248         .align  2
 249 1:
 250 SRC(    mov.w   @r4+,r0         )
 251 DST(    mov.w   r0,@r5          )
 252         add     #2,r5
 253         extu.w  r0,r0
 254         addc    r0,r7
 255         mov     #0,r0
 256         addc    r0,r7
 257 2:
 258         mov     r6,r2
 259         mov     #-5,r0
 260         shld    r0,r6
 261         tst     r6,r6
 262         bt/s    2f
 263          clrt
 264         .align  2
 265 1:
 266 SRC(    mov.l   @r4+,r0         )
 267 SRC(    mov.l   @r4+,r1         )
 268         addc    r0,r7
 269 DST(    mov.l   r0,@r5          )
 270 DST(    mov.l   r1,@(4,r5)      )
 271         addc    r1,r7
 272
 273 SRC(    mov.l   @r4+,r0         )
 274 SRC(    mov.l   @r4+,r1         )
 275         addc    r0,r7
 276 DST(    mov.l   r0,@(8,r5)      )
 277 DST(    mov.l   r1,@(12,r5)     )
 278         addc    r1,r7
 279
 280 SRC(    mov.l   @r4+,r0         )
 281 SRC(    mov.l   @r4+,r1         )
 282         addc    r0,r7
 283 DST(    mov.l   r0,@(16,r5)     )
 284 DST(    mov.l   r1,@(20,r5)     )
 285         addc    r1,r7
 286
 287 SRC(    mov.l   @r4+,r0         )
 288 SRC(    mov.l   @r4+,r1         )
 289         addc    r0,r7
 290 DST(    mov.l   r0,@(24,r5)     )
 291 DST(    mov.l   r1,@(28,r5)     )
 292         addc    r1,r7
 293         add     #32,r5
 294         movt    r0
 295         dt      r6
 296         bf/s    1b
 297          cmp/eq #1,r0
 298         mov     #0,r0
 299         addc    r0,r7
 300
 301 2:      mov     r2,r6
 302         mov     #0x1c,r0
 303         and     r0,r6
 304         cmp/pl  r6
 305         bf/s    4f
 306          clrt
 307         shlr2   r6
 308 3:
 309 SRC(    mov.l   @r4+,r0 )
 310         addc    r0,r7
 311 DST(    mov.l   r0,@r5  )
 312         add     #4,r5
 313         movt    r0
 314         dt      r6
 315         bf/s    3b
 316          cmp/eq #1,r0
 317         mov     #0,r0
 318         addc    r0,r7
 319 4:      mov     r2,r6
 320         mov     #3,r0
 321         and     r0,r6
 322         cmp/pl  r6
 323         bf      7f
 324         mov     #2,r1
 325         cmp/hs  r1,r6
 326         bf      5f
 327 SRC(    mov.w   @r4+,r0 )
 328 DST(    mov.w   r0,@r5  )
 329         extu.w  r0,r0
 330         add     #2,r5
 331         cmp/eq  r1,r6
 332         bt/s    6f
 333          clrt
 334         shll16  r0
 335         addc    r0,r7
 336 5:
 337 SRC(    mov.b   @r4+,r0 )
 338 DST(    mov.b   r0,@r5  )
 339         extu.b  r0,r0
 340 #ifndef __LITTLE_ENDIAN__
 341         shll8   r0
 342 #endif
 343 6:      addc    r0,r7
 344         mov     #0,r0
 345         addc    r0,r7
 346 7:
 347 5000:
 348
 349 # Exception handler:
 350 .section .fixup, "ax"
 351
 352 6001:
 353         mov.l   @(8,r15),r0                     ! src_err_ptr
 354         mov     #-EFAULT,r1
 355         mov.l   r1,@r0
 356
 357         ! zero the complete destination - computing the rest
 358         ! is too much work
 359         mov.l   @(4,r15),r5             ! dst
 360         mov.l   @r15,r6                 ! len
 361         mov     #0,r7
 362 1:      mov.b   r7,@r5
 363         dt      r6
 364         bf/s    1b
 365          add    #1,r5
 366         mov.l   8000f,r0
 367         jmp     @r0
 368          nop
 369         .align  2
 370 8000:   .long   5000b
 371
 372 6002:
 373         mov.l   @(12,r15),r0                    ! dst_err_ptr
 374         mov     #-EFAULT,r1
 375         mov.l   r1,@r0
 376         mov.l   8001f,r0
 377         jmp     @r0
 378          nop
 379         .align  2
 380 8001:   .long   5000b
 381
 382 .previous
 383         add     #8,r15
 384         rts
 385          mov    r7,r0