arch/sh/lib/checksum.S

   1 /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
   2  *
   3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4  *              operating system.  INET is implemented using the  BSD Socket
   5  *              interface as the means of communication with the user level.
   6  *
   7  *              IP/TCP/UDP checksumming routines
   8  *
   9  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  10  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11  *              Tom May, <ftom@netcom.com>
  12  *              Pentium Pro/II routines:
  13  *              Alexander Kjeldaas <astor@guardian.no>
  14  *              Finn Arne Gangstad <finnag@guardian.no>
  15  *              Lots of code moved from tcp.c and ip.c; see those files
  16  *              for more names.
  17  *
  18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19  *                           handling.
  20  *              Andi Kleen,  add zeroing on error
  21  *                   converted to pure assembler
  22  *
  23  * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  24  *
  25  *              This program is free software; you can redistribute it and/or
  26  *              modify it under the terms of the GNU General Public License
  27  *              as published by the Free Software Foundation; either version
  28  *              2 of the License, or (at your option) any later version.
  29  */
  30
  31 #include <asm/errno.h>
  32 #include <linux/linkage.h>
  33
  34 /*
  35  * computes a partial checksum, e.g. for TCP/UDP fragments
  36  */
  37
  38 /*
  39  * unsigned int csum_partial(const unsigned char *buf, int len,
  40  *                           unsigned int sum);
  41  */
  42
  43 .text
  44 ENTRY(csum_partial)
  45           /*
  46            * Experiments with Ethernet and SLIP connections show that buff
  47            * is aligned on either a 2-byte or 4-byte boundary.  We get at
  48            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  49            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  50            * alignment for the unrolled loop.
  51            */
  52         mov     r5, r1
  53         mov     r4, r0
  54         tst     #2, r0          ! Check alignment.
  55         bt      2f              ! Jump if alignment is ok.
  56         !
  57         add     #-2, r5         ! Alignment uses up two bytes.
  58         cmp/pz  r5              !
  59         bt/s    1f              ! Jump if we had at least two bytes.
  60          clrt
  61         bra     6f
  62          add    #2, r5          ! r5 was < 2.  Deal with it.
  63 1:
  64         mov     r5, r1          ! Save new len for later use.
  65         mov.w   @r4+, r0
  66         extu.w  r0, r0
  67         addc    r0, r6
  68         bf      2f
  69         add     #1, r6
  70 2:
  71         mov     #-5, r0
  72         shld    r0, r5
  73         tst     r5, r5
  74         bt/s    4f              ! if it's =0, go to 4f
  75          clrt
  76         .align  2
  77 3:
  78         mov.l   @r4+, r0
  79         mov.l   @r4+, r2
  80         mov.l   @r4+, r3
  81         addc    r0, r6
  82         mov.l   @r4+, r0
  83         addc    r2, r6
  84         mov.l   @r4+, r2
  85         addc    r3, r6
  86         mov.l   @r4+, r3
  87         addc    r0, r6
  88         mov.l   @r4+, r0
  89         addc    r2, r6
  90         mov.l   @r4+, r2
  91         addc    r3, r6
  92         addc    r0, r6
  93         addc    r2, r6
  94         movt    r0
  95         dt      r5
  96         bf/s    3b
  97          cmp/eq #1, r0
  98         ! here, we know r5==0
  99         addc    r5, r6                  ! add carry to r6
 100 4:
 101         mov     r1, r0
 102         and     #0x1c, r0
 103         tst     r0, r0
 104         bt/s    6f
 105          mov    r0, r5
 106         shlr2   r5
 107         mov     #0, r2
 108 5:
 109         addc    r2, r6
 110         mov.l   @r4+, r2
 111         movt    r0
 112         dt      r5
 113         bf/s    5b
 114          cmp/eq #1, r0
 115         addc    r2, r6
 116         addc    r5, r6          ! r5==0 here, so it means add carry-bit
 117 6:
 118         mov     r1, r5
 119         mov     #3, r0
 120         and     r0, r5
 121         tst     r5, r5
 122         bt      9f              ! if it's =0 go to 9f
 123         mov     #2, r1
 124         cmp/hs  r1, r5
 125         bf      7f
 126         mov.w   @r4+, r0
 127         extu.w  r0, r0
 128         cmp/eq  r1, r5
 129         bt/s    8f
 130          clrt
 131         shll16  r0
 132         addc    r0, r6
 133 7:
 134         mov.b   @r4+, r0
 135         extu.b  r0, r0
 136 #ifndef __LITTLE_ENDIAN__
 137         shll8   r0
 138 #endif
 139 8:
 140         addc    r0, r6
 141         mov     #0, r0
 142         addc    r0, r6
 143 9:
 144         rts
 145          mov    r6, r0
 146
 147 /*
 148 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
 149                                         int sum, int *src_err_ptr, int *dst_err_ptr)
 150  */
 151
 152 /*
 153  * Copy from ds while checksumming, otherwise like csum_partial
 154  *
 155  * The macros SRC and DST specify the type of access for the instruction.
 156  * thus we can call a custom exception handler for all access types.
 157  *
 158  * FIXME: could someone double-check whether I haven't mixed up some SRC and
 159  *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 160  *        them all but there's no guarantee.
 161  */
 162
 163 #define SRC(...)                        \
 164         9999: __VA_ARGS__ ;             \
 165         .section __ex_table, "a";       \
 166         .long 9999b, 6001f      ;       \
 167         .previous
 168
 169 #define DST(...)                        \
 170         9999: __VA_ARGS__ ;             \
 171         .section __ex_table, "a";       \
 172         .long 9999b, 6002f      ;       \
 173         .previous
 174
 175 !
 176 ! r4:   const char *SRC
 177 ! r5:   char *DST
 178 ! r6:   int LEN
 179 ! r7:   int SUM
 180 !
 181 ! on stack:
 182 ! int *SRC_ERR_PTR
 183 ! int *DST_ERR_PTR
 184 !
 185 ENTRY(csum_partial_copy_generic)
 186         mov.l   r5,@-r15
 187         mov.l   r6,@-r15
 188
 189         mov     #3,r0           ! Check src and dest are equally aligned
 190         mov     r4,r1
 191         and     r0,r1
 192         and     r5,r0
 193         cmp/eq  r1,r0
 194         bf      3f              ! Different alignments, use slow version
 195         tst     #1,r0           ! Check dest word aligned
 196         bf      3f              ! If not, do it the slow way
 197
 198         mov     #2,r0
 199         tst     r0,r5           ! Check dest alignment.
 200         bt      2f              ! Jump if alignment is ok.
 201         add     #-2,r6          ! Alignment uses up two bytes.
 202         cmp/pz  r6              ! Jump if we had at least two bytes.
 203         bt/s    1f
 204          clrt
 205         add     #2,r6           ! r6 was < 2.   Deal with it.
 206         bra     4f
 207          mov    r6,r2
 208
 209 3:      ! Handle different src and dest alignments.
 210         ! This is not common, so simple byte by byte copy will do.
 211         mov     r6,r2
 212         shlr    r6
 213         tst     r6,r6
 214         bt      4f
 215         clrt
 216         .align  2
 217 5:
 218 SRC(    mov.b   @r4+,r1         )
 219 SRC(    mov.b   @r4+,r0         )
 220         extu.b  r1,r1
 221 DST(    mov.b   r1,@r5          )
 222 DST(    mov.b   r0,@(1,r5)      )
 223         extu.b  r0,r0
 224         add     #2,r5
 225
 226 #ifdef  __LITTLE_ENDIAN__
 227         shll8   r0
 228 #else
 229         shll8   r1
 230 #endif
 231         or      r1,r0
 232
 233         addc    r0,r7
 234         movt    r0
 235         dt      r6
 236         bf/s    5b
 237          cmp/eq #1,r0
 238         mov     #0,r0
 239         addc    r0, r7
 240
 241         mov     r2, r0
 242         tst     #1, r0
 243         bt      7f
 244         bra     5f
 245          clrt
 246
 247         ! src and dest equally aligned, but to a two byte boundary.
 248         ! Handle first two bytes as a special case
 249         .align  2
 250 1:
 251 SRC(    mov.w   @r4+,r0         )
 252 DST(    mov.w   r0,@r5          )
 253         add     #2,r5
 254         extu.w  r0,r0
 255         addc    r0,r7
 256         mov     #0,r0
 257         addc    r0,r7
 258 2:
 259         mov     r6,r2
 260         mov     #-5,r0
 261         shld    r0,r6
 262         tst     r6,r6
 263         bt/s    2f
 264          clrt
 265         .align  2
 266 1:
 267 SRC(    mov.l   @r4+,r0         )
 268 SRC(    mov.l   @r4+,r1         )
 269         addc    r0,r7
 270 DST(    mov.l   r0,@r5          )
 271 DST(    mov.l   r1,@(4,r5)      )
 272         addc    r1,r7
 273
 274 SRC(    mov.l   @r4+,r0         )
 275 SRC(    mov.l   @r4+,r1         )
 276         addc    r0,r7
 277 DST(    mov.l   r0,@(8,r5)      )
 278 DST(    mov.l   r1,@(12,r5)     )
 279         addc    r1,r7
 280
 281 SRC(    mov.l   @r4+,r0         )
 282 SRC(    mov.l   @r4+,r1         )
 283         addc    r0,r7
 284 DST(    mov.l   r0,@(16,r5)     )
 285 DST(    mov.l   r1,@(20,r5)     )
 286         addc    r1,r7
 287
 288 SRC(    mov.l   @r4+,r0         )
 289 SRC(    mov.l   @r4+,r1         )
 290         addc    r0,r7
 291 DST(    mov.l   r0,@(24,r5)     )
 292 DST(    mov.l   r1,@(28,r5)     )
 293         addc    r1,r7
 294         add     #32,r5
 295         movt    r0
 296         dt      r6
 297         bf/s    1b
 298          cmp/eq #1,r0
 299         mov     #0,r0
 300         addc    r0,r7
 301
 302 2:      mov     r2,r6
 303         mov     #0x1c,r0
 304         and     r0,r6
 305         cmp/pl  r6
 306         bf/s    4f
 307          clrt
 308         shlr2   r6
 309 3:
 310 SRC(    mov.l   @r4+,r0 )
 311         addc    r0,r7
 312 DST(    mov.l   r0,@r5  )
 313         add     #4,r5
 314         movt    r0
 315         dt      r6
 316         bf/s    3b
 317          cmp/eq #1,r0
 318         mov     #0,r0
 319         addc    r0,r7
 320 4:      mov     r2,r6
 321         mov     #3,r0
 322         and     r0,r6
 323         cmp/pl  r6
 324         bf      7f
 325         mov     #2,r1
 326         cmp/hs  r1,r6
 327         bf      5f
 328 SRC(    mov.w   @r4+,r0 )
 329 DST(    mov.w   r0,@r5  )
 330         extu.w  r0,r0
 331         add     #2,r5
 332         cmp/eq  r1,r6
 333         bt/s    6f
 334          clrt
 335         shll16  r0
 336         addc    r0,r7
 337 5:
 338 SRC(    mov.b   @r4+,r0 )
 339 DST(    mov.b   r0,@r5  )
 340         extu.b  r0,r0
 341 #ifndef __LITTLE_ENDIAN__
 342         shll8   r0
 343 #endif
 344 6:      addc    r0,r7
 345         mov     #0,r0
 346         addc    r0,r7
 347 7:
 348 5000:
 349
 350 # Exception handler:
 351 .section .fixup, "ax"
 352
 353 6001:
 354         mov.l   @(8,r15),r0                     ! src_err_ptr
 355         mov     #-EFAULT,r1
 356         mov.l   r1,@r0
 357
 358         ! zero the complete destination - computing the rest
 359         ! is too much work
 360         mov.l   @(4,r15),r5             ! dst
 361         mov.l   @r15,r6                 ! len
 362         mov     #0,r7
 363 1:      mov.b   r7,@r5
 364         dt      r6
 365         bf/s    1b
 366          add    #1,r5
 367         mov.l   8000f,r0
 368         jmp     @r0
 369          nop
 370         .align  2
 371 8000:   .long   5000b
 372
 373 6002:
 374         mov.l   @(12,r15),r0                    ! dst_err_ptr
 375         mov     #-EFAULT,r1
 376         mov.l   r1,@r0
 377         mov.l   8001f,r0
 378         jmp     @r0
 379          nop
 380         .align  2
 381 8001:   .long   5000b
 382
 383 .previous
 384         add     #8,r15
 385         rts
 386          mov    r7,r0