arch/x86_64/lib/copy_user.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs.
   2  * Subject to the GNU Public License v2.
   3  *
   4  * Functions to copy from and to user space.
   5  */
   6
   7 #define FIX_ALIGNMENT 1
   8
   9         #include <asm/current.h>
  10         #include <asm/asm-offsets.h>
  11         #include <asm/thread_info.h>
  12         #include <asm/cpufeature.h>
  13
  14 /* Standard copy_to_user with segment limit checking */
  15         .globl copy_to_user
  16         .p2align 4
  17 copy_to_user:
  18         GET_THREAD_INFO(%rax)
  19         movq %rdi,%rcx
  20         addq %rdx,%rcx
  21         jc  bad_to_user
  22         cmpq threadinfo_addr_limit(%rax),%rcx
  23         jae bad_to_user
  24 2:
  25         .byte 0xe9      /* 32bit jump */
  26         .long .Lcug-1f
  27 1:
  28
  29         .section .altinstr_replacement,"ax"
  30 3:      .byte 0xe9                      /* replacement jmp with 8 bit immediate */
  31         .long copy_user_generic_c-1b    /* offset */
  32         .previous
  33         .section .altinstructions,"a"
  34         .align 8
  35         .quad  2b
  36         .quad  3b
  37         .byte  X86_FEATURE_REP_GOOD
  38         .byte  5
  39         .byte  5
  40         .previous
  41
  42 /* Standard copy_from_user with segment limit checking */
  43         .globl copy_from_user
  44         .p2align 4
  45 copy_from_user:
  46         GET_THREAD_INFO(%rax)
  47         movq %rsi,%rcx
  48         addq %rdx,%rcx
  49         jc  bad_from_user
  50         cmpq threadinfo_addr_limit(%rax),%rcx
  51         jae  bad_from_user
  52         /* FALL THROUGH to copy_user_generic */
  53
  54         .section .fixup,"ax"
  55         /* must zero dest */
  56 bad_from_user:
  57         movl %edx,%ecx
  58         xorl %eax,%eax
  59         rep
  60         stosb
  61 bad_to_user:
  62         movl    %edx,%eax
  63         ret
  64         .previous
  65
  66
  67 /*
  68  * copy_user_generic - memory copy with exception handling.
  69  *
  70  * Input:
  71  * rdi destination
  72  * rsi source
  73  * rdx count
  74  *
  75  * Output:
  76  * eax uncopied bytes or 0 if successful.
  77  */
  78         .globl copy_user_generic
  79         .p2align 4
  80 copy_user_generic:
  81         .byte 0x66,0x66,0x90    /* 5 byte nop for replacement jump */
  82         .byte 0x66,0x90
  83 1:
  84         .section .altinstr_replacement,"ax"
  85 2:      .byte 0xe9                   /* near jump with 32bit immediate */
  86         .long copy_user_generic_c-1b /* offset */
  87         .previous
  88         .section .altinstructions,"a"
  89         .align 8
  90         .quad  copy_user_generic
  91         .quad  2b
  92         .byte  X86_FEATURE_REP_GOOD
  93         .byte  5
  94         .byte  5
  95         .previous
  96 .Lcug:
  97         pushq %rbx
  98         xorl %eax,%eax          /*zero for the exception handler */
  99
 100 #ifdef FIX_ALIGNMENT
 101         /* check for bad alignment of destination */
 102         movl %edi,%ecx
 103         andl $7,%ecx
 104         jnz  .Lbad_alignment
 105 .Lafter_bad_alignment:
 106 #endif
 107
 108         movq %rdx,%rcx
 109
 110         movl $64,%ebx
 111         shrq $6,%rdx
 112         decq %rdx
 113         js   .Lhandle_tail
 114
 115         .p2align 4
 116 .Lloop:
 117 .Ls1:   movq (%rsi),%r11
 118 .Ls2:   movq 1*8(%rsi),%r8
 119 .Ls3:   movq 2*8(%rsi),%r9
 120 .Ls4:   movq 3*8(%rsi),%r10
 121 .Ld1:   movq %r11,(%rdi)
 122 .Ld2:   movq %r8,1*8(%rdi)
 123 .Ld3:   movq %r9,2*8(%rdi)
 124 .Ld4:   movq %r10,3*8(%rdi)
 125
 126 .Ls5:   movq 4*8(%rsi),%r11
 127 .Ls6:   movq 5*8(%rsi),%r8
 128 .Ls7:   movq 6*8(%rsi),%r9
 129 .Ls8:   movq 7*8(%rsi),%r10
 130 .Ld5:   movq %r11,4*8(%rdi)
 131 .Ld6:   movq %r8,5*8(%rdi)
 132 .Ld7:   movq %r9,6*8(%rdi)
 133 .Ld8:   movq %r10,7*8(%rdi)
 134
 135         decq %rdx
 136
 137         leaq 64(%rsi),%rsi
 138         leaq 64(%rdi),%rdi
 139
 140         jns  .Lloop
 141
 142         .p2align 4
 143 .Lhandle_tail:
 144         movl %ecx,%edx
 145         andl $63,%ecx
 146         shrl $3,%ecx
 147         jz   .Lhandle_7
 148         movl $8,%ebx
 149         .p2align 4
 150 .Lloop_8:
 151 .Ls9:   movq (%rsi),%r8
 152 .Ld9:   movq %r8,(%rdi)
 153         decl %ecx
 154         leaq 8(%rdi),%rdi
 155         leaq 8(%rsi),%rsi
 156         jnz .Lloop_8
 157
 158 .Lhandle_7:
 159         movl %edx,%ecx
 160         andl $7,%ecx
 161         jz   .Lende
 162         .p2align 4
 163 .Lloop_1:
 164 .Ls10:  movb (%rsi),%bl
 165 .Ld10:  movb %bl,(%rdi)
 166         incq %rdi
 167         incq %rsi
 168         decl %ecx
 169         jnz .Lloop_1
 170
 171 .Lende:
 172         popq %rbx
 173         ret
 174
 175 #ifdef FIX_ALIGNMENT
 176         /* align destination */
 177         .p2align 4
 178 .Lbad_alignment:
 179         movl $8,%r9d
 180         subl %ecx,%r9d
 181         movl %r9d,%ecx
 182         cmpq %r9,%rdx
 183         jz   .Lhandle_7
 184         js   .Lhandle_7
 185 .Lalign_1:
 186 .Ls11:  movb (%rsi),%bl
 187 .Ld11:  movb %bl,(%rdi)
 188         incq %rsi
 189         incq %rdi
 190         decl %ecx
 191         jnz .Lalign_1
 192         subq %r9,%rdx
 193         jmp .Lafter_bad_alignment
 194 #endif
 195
 196         /* table sorted by exception address */
 197         .section __ex_table,"a"
 198         .align 8
 199         .quad .Ls1,.Ls1e
 200         .quad .Ls2,.Ls2e
 201         .quad .Ls3,.Ls3e
 202         .quad .Ls4,.Ls4e
 203         .quad .Ld1,.Ls1e
 204         .quad .Ld2,.Ls2e
 205         .quad .Ld3,.Ls3e
 206         .quad .Ld4,.Ls4e
 207         .quad .Ls5,.Ls5e
 208         .quad .Ls6,.Ls6e
 209         .quad .Ls7,.Ls7e
 210         .quad .Ls8,.Ls8e
 211         .quad .Ld5,.Ls5e
 212         .quad .Ld6,.Ls6e
 213         .quad .Ld7,.Ls7e
 214         .quad .Ld8,.Ls8e
 215         .quad .Ls9,.Le_quad
 216         .quad .Ld9,.Le_quad
 217         .quad .Ls10,.Le_byte
 218         .quad .Ld10,.Le_byte
 219 #ifdef FIX_ALIGNMENT
 220         .quad .Ls11,.Lzero_rest
 221         .quad .Ld11,.Lzero_rest
 222 #endif
 223         .quad .Le5,.Le_zero
 224         .previous
 225
 226         /* compute 64-offset for main loop. 8 bytes accuracy with error on the
 227            pessimistic side. this is gross. it would be better to fix the
 228            interface. */
 229         /* eax: zero, ebx: 64 */
 230 .Ls1e:  addl $8,%eax
 231 .Ls2e:  addl $8,%eax
 232 .Ls3e:  addl $8,%eax
 233 .Ls4e:  addl $8,%eax
 234 .Ls5e:  addl $8,%eax
 235 .Ls6e:  addl $8,%eax
 236 .Ls7e:  addl $8,%eax
 237 .Ls8e:  addl $8,%eax
 238         addq %rbx,%rdi  /* +64 */
 239         subq %rax,%rdi  /* correct destination with computed offset */
 240
 241         shlq $6,%rdx    /* loop counter * 64 (stride length) */
 242         addq %rax,%rdx  /* add offset to loopcnt */
 243         andl $63,%ecx   /* remaining bytes */
 244         addq %rcx,%rdx  /* add them */
 245         jmp .Lzero_rest
 246
 247         /* exception on quad word loop in tail handling */
 248         /* ecx: loopcnt/8, %edx: length, rdi: correct */
 249 .Le_quad:
 250         shll $3,%ecx
 251         andl $7,%edx
 252         addl %ecx,%edx
 253         /* edx: bytes to zero, rdi: dest, eax:zero */
 254 .Lzero_rest:
 255         movq %rdx,%rcx
 256 .Le_byte:
 257         xorl %eax,%eax
 258 .Le5:   rep
 259         stosb
 260         /* when there is another exception while zeroing the rest just return */
 261 .Le_zero:
 262         movq %rdx,%rax
 263         jmp .Lende
 264
 265         /* Some CPUs run faster using the string copy instructions.
 266            This is also a lot simpler. Use them when possible.
 267            Patch in jmps to this code instead of copying it fully
 268            to avoid unwanted aliasing in the exception tables. */
 269
 270  /* rdi destination
 271   * rsi source
 272   * rdx count
 273   *
 274   * Output:
 275   * eax uncopied bytes or 0 if successfull.
 276   *
 277   * Only 4GB of copy is supported. This shouldn't be a problem
 278   * because the kernel normally only writes from/to page sized chunks
 279   * even if user space passed a longer buffer.
 280   * And more would be dangerous because both Intel and AMD have
 281   * errata with rep movsq > 4GB. If someone feels the need to fix
 282   * this please consider this.
 283    */
 284 copy_user_generic_c:
 285         movl %edx,%ecx
 286         shrl $3,%ecx
 287         andl $7,%edx
 288 1:      rep
 289         movsq
 290         movl %edx,%ecx
 291 2:      rep
 292         movsb
 293 4:      movl %ecx,%eax
 294         ret
 295 3:      lea (%rdx,%rcx,8),%rax
 296         ret
 297
 298         .section __ex_table,"a"
 299         .quad 1b,3b
 300         .quad 2b,4b
 301         .previous