arch/x86_64/lib/memcpy.S

   1 /* Copyright 2002 Andi Kleen */
   2
   3 #include <linux/config.h>
   4 #include <linux/linkage.h>
   5 #include <asm/dwarf2.h>
   6 #include <asm/cpufeature.h>
   7
   8 /*
   9  * memcpy - Copy a memory block.
  10  *
  11  * Input:
  12  * rdi destination
  13  * rsi source
  14  * rdx count
  15  *
  16  * Output:
  17  * rax original destination
  18  */
  19
  20         ALIGN
  21 memcpy_c:
  22         CFI_STARTPROC
  23         movq %rdi,%rax
  24         movl %edx,%ecx
  25         shrl $3,%ecx
  26         andl $7,%edx
  27         rep movsq
  28         movl %edx,%ecx
  29         rep movsb
  30         ret
  31         CFI_ENDPROC
  32 ENDPROC(memcpy_c)
  33
  34 ENTRY(__memcpy)
  35 ENTRY(memcpy)
  36         CFI_STARTPROC
  37         pushq %rbx
  38         CFI_ADJUST_CFA_OFFSET 8
  39         CFI_REL_OFFSET rbx, 0
  40         movq %rdi,%rax
  41
  42         movl %edx,%ecx
  43         shrl $6,%ecx
  44         jz .Lhandle_tail
  45
  46         .p2align 4
  47 .Lloop_64:
  48         decl %ecx
  49
  50         movq (%rsi),%r11
  51         movq 8(%rsi),%r8
  52
  53         movq %r11,(%rdi)
  54         movq %r8,1*8(%rdi)
  55
  56         movq 2*8(%rsi),%r9
  57         movq 3*8(%rsi),%r10
  58
  59         movq %r9,2*8(%rdi)
  60         movq %r10,3*8(%rdi)
  61
  62         movq 4*8(%rsi),%r11
  63         movq 5*8(%rsi),%r8
  64
  65         movq %r11,4*8(%rdi)
  66         movq %r8,5*8(%rdi)
  67
  68         movq 6*8(%rsi),%r9
  69         movq 7*8(%rsi),%r10
  70
  71         movq %r9,6*8(%rdi)
  72         movq %r10,7*8(%rdi)
  73
  74         leaq 64(%rsi),%rsi
  75         leaq 64(%rdi),%rdi
  76         jnz  .Lloop_64
  77
  78 .Lhandle_tail:
  79         movl %edx,%ecx
  80         andl $63,%ecx
  81         shrl $3,%ecx
  82         jz   .Lhandle_7
  83         .p2align 4
  84 .Lloop_8:
  85         decl %ecx
  86         movq (%rsi),%r8
  87         movq %r8,(%rdi)
  88         leaq 8(%rdi),%rdi
  89         leaq 8(%rsi),%rsi
  90         jnz  .Lloop_8
  91
  92 .Lhandle_7:
  93         movl %edx,%ecx
  94         andl $7,%ecx
  95         jz .Lende
  96         .p2align 4
  97 .Lloop_1:
  98         movb (%rsi),%r8b
  99         movb %r8b,(%rdi)
 100         incq %rdi
 101         incq %rsi
 102         decl %ecx
 103         jnz .Lloop_1
 104
 105 .Lende:
 106         popq %rbx
 107         CFI_ADJUST_CFA_OFFSET -8
 108         CFI_RESTORE rbx
 109         ret
 110 .Lfinal:
 111         CFI_ENDPROC
 112 ENDPROC(memcpy)
 113 ENDPROC(__memcpy)
 114
 115         /* Some CPUs run faster using the string copy instructions.
 116            It is also a lot simpler. Use this when possible */
 117
 118         .section .altinstr_replacement,"ax"
 119 1:      .byte 0xeb                              /* jmp <disp8> */
 120         .byte (memcpy_c - memcpy) - (2f - 1b)   /* offset */
 121 2:
 122         .previous
 123         .section .altinstructions,"a"
 124         .align 8
 125         .quad memcpy
 126         .quad 1b
 127         .byte X86_FEATURE_REP_GOOD
 128         .byte .Lfinal - memcpy
 129         .byte 2b - 1b
 130         .previous