arch/mn10300/lib/do_csum.S

   1 /* Optimised simple memory checksum
   2  *
   3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
   4  * Written by David Howells (dhowells@redhat.com)
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public Licence
   8  * as published by the Free Software Foundation; either version
   9  * 2 of the Licence, or (at your option) any later version.
  10  */
  11 #include <asm/cache.h>
  12
  13         .section .text
  14         .balign L1_CACHE_BYTES
  15
  16 ###############################################################################
  17 #
  18 # unsigned int do_csum(const unsigned char *buff, int len)
  19 #
  20 ###############################################################################
  21         .globl  do_csum
  22         .type   do_csum,@function
  23 do_csum:
  24         movm    [d2,d3],(sp)
  25         mov     d1,d2                           # count
  26         mov     d0,a0                           # buff
  27         mov     a0,a1
  28         clr     d1                              # accumulator
  29
  30         cmp     +0,d2
  31         ble     do_csum_done                    # check for zero length or negative
  32
  33         # 4-byte align the buffer pointer
  34         btst    +3,a0
  35         beq     do_csum_now_4b_aligned
  36
  37         btst    +1,a0
  38         beq     do_csum_addr_not_odd
  39         movbu   (a0),d0
  40         inc     a0
  41         asl     +8,d0
  42         add     d0,d1
  43         add     -1,d2
  44
  45 do_csum_addr_not_odd:
  46         cmp     +2,d2
  47         bcs     do_csum_fewer_than_4
  48         btst    +2,a0
  49         beq     do_csum_now_4b_aligned
  50         movhu   (a0+),d0
  51         add     d0,d1
  52         add     -2,d2
  53         cmp     +4,d2
  54         bcs     do_csum_fewer_than_4
  55
  56 do_csum_now_4b_aligned:
  57         # we want to checksum as much as we can in chunks of 32 bytes
  58         cmp     +31,d2
  59         bls     do_csum_remainder               # 4-byte aligned remainder
  60
  61         add     -32,d2
  62         mov     +32,d3
  63
  64 do_csum_loop:
  65         mov     (a0+),d0
  66         mov     (a0+),e0
  67         mov     (a0+),e1
  68         mov     (a0+),e3
  69         add     d0,d1
  70         addc    e0,d1
  71         addc    e1,d1
  72         addc    e3,d1
  73         mov     (a0+),d0
  74         mov     (a0+),e0
  75         mov     (a0+),e1
  76         mov     (a0+),e3
  77         addc    d0,d1
  78         addc    e0,d1
  79         addc    e1,d1
  80         addc    e3,d1
  81         addc    +0,d1
  82
  83         sub     d3,d2
  84         bcc     do_csum_loop
  85
  86         add     d3,d2
  87         beq     do_csum_done
  88
  89 do_csum_remainder:
  90         # cut 16-31 bytes down to 0-15
  91         cmp     +16,d2
  92         bcs     do_csum_fewer_than_16
  93         mov     (a0+),d0
  94         mov     (a0+),e0
  95         mov     (a0+),e1
  96         mov     (a0+),e3
  97         add     d0,d1
  98         addc    e0,d1
  99         addc    e1,d1
 100         addc    e3,d1
 101         addc    +0,d1
 102         add     -16,d2
 103         beq     do_csum_done
 104
 105 do_csum_fewer_than_16:
 106         # copy the remaining whole words
 107         cmp     +4,d2
 108         bcs     do_csum_fewer_than_4
 109         cmp     +8,d2
 110         bcs     do_csum_one_word
 111         cmp     +12,d2
 112         bcs     do_csum_two_words
 113         mov     (a0+),d0
 114         add     d0,d1
 115         addc    +0,d1
 116 do_csum_two_words:
 117         mov     (a0+),d0
 118         add     d0,d1
 119         addc    +0,d1
 120 do_csum_one_word:
 121         mov     (a0+),d0
 122         add     d0,d1
 123         addc    +0,d1
 124
 125 do_csum_fewer_than_4:
 126         and     +3,d2
 127         beq     do_csum_done
 128         xor_cmp d0,d0,+2,d2
 129         bcs     do_csum_fewer_than_2
 130         movhu   (a0+),d0
 131         and     +1,d2
 132         beq     do_csum_add_last_bit
 133 do_csum_fewer_than_2:
 134         movbu   (a0),d3
 135         add     d3,d0
 136 do_csum_add_last_bit:
 137         add     d0,d1
 138         addc    +0,d1
 139
 140 do_csum_done:
 141         # compress the checksum down to 16 bits
 142         mov     +0xffff0000,d0
 143         and     d1,d0
 144         asl     +16,d1
 145         add     d1,d0
 146         addc    +0xffff,d0
 147         lsr     +16,d0
 148
 149         # flip the halves of the word result if the buffer was oddly aligned
 150         and     +1,a1
 151         beq     do_csum_not_oddly_aligned
 152         swaph   d0,d0                           # exchange bits 15:8 with 7:0
 153
 154 do_csum_not_oddly_aligned:
 155         ret     [d2,d3],8
 156
 157         .size   do_csum, .-do_csum