arch/x86_64/lib/copy_user.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs.
   2  * Subject to the GNU Public License v2.
   3  *
   4  * Functions to copy from and to user space.
   5  */
   6
   7 #include <linux/linkage.h>
   8 #include <asm/dwarf2.h>
   9
  10 #define FIX_ALIGNMENT 1
  11
  12         #include <asm/current.h>
  13         #include <asm/asm-offsets.h>
  14         #include <asm/thread_info.h>
  15         #include <asm/cpufeature.h>
  16
  17 /* Standard copy_to_user with segment limit checking */
  18 ENTRY(copy_to_user)
  19         CFI_STARTPROC
  20         GET_THREAD_INFO(%rax)
  21         movq %rdi,%rcx
  22         addq %rdx,%rcx
  23         jc  bad_to_user
  24         cmpq threadinfo_addr_limit(%rax),%rcx
  25         jae bad_to_user
  26 2:
  27         .byte 0xe9      /* 32bit jump */
  28         .long .Lcug-1f
  29 1:
  30         CFI_ENDPROC
  31 ENDPROC(copy_to_user)
  32
  33         .section .altinstr_replacement,"ax"
  34 3:      .byte 0xe9                      /* replacement jmp with 32 bit immediate */
  35         .long copy_user_generic_c-1b    /* offset */
  36         .previous
  37         .section .altinstructions,"a"
  38         .align 8
  39         .quad  2b
  40         .quad  3b
  41         .byte  X86_FEATURE_REP_GOOD
  42         .byte  5
  43         .byte  5
  44         .previous
  45
  46 /* Standard copy_from_user with segment limit checking */
  47 ENTRY(copy_from_user)
  48         CFI_STARTPROC
  49         GET_THREAD_INFO(%rax)
  50         movq %rsi,%rcx
  51         addq %rdx,%rcx
  52         jc  bad_from_user
  53         cmpq threadinfo_addr_limit(%rax),%rcx
  54         jae  bad_from_user
  55         /* FALL THROUGH to copy_user_generic */
  56         CFI_ENDPROC
  57 ENDPROC(copy_from_user)
  58
  59         .section .fixup,"ax"
  60         /* must zero dest */
  61 bad_from_user:
  62         CFI_STARTPROC
  63         movl %edx,%ecx
  64         xorl %eax,%eax
  65         rep
  66         stosb
  67 bad_to_user:
  68         movl    %edx,%eax
  69         ret
  70         CFI_ENDPROC
  71 END(bad_from_user)
  72         .previous
  73
  74
  75 /*
  76  * copy_user_generic - memory copy with exception handling.
  77  *
  78  * Input:
  79  * rdi destination
  80  * rsi source
  81  * rdx count
  82  *
  83  * Output:
  84  * eax uncopied bytes or 0 if successful.
  85  */
  86 ENTRY(copy_user_generic)
  87         CFI_STARTPROC
  88         .byte 0x66,0x66,0x90    /* 5 byte nop for replacement jump */
  89         .byte 0x66,0x90
  90 1:
  91         .section .altinstr_replacement,"ax"
  92 2:      .byte 0xe9                   /* near jump with 32bit immediate */
  93         .long copy_user_generic_c-1b /* offset */
  94         .previous
  95         .section .altinstructions,"a"
  96         .align 8
  97         .quad  copy_user_generic
  98         .quad  2b
  99         .byte  X86_FEATURE_REP_GOOD
 100         .byte  5
 101         .byte  5
 102         .previous
 103 .Lcug:
 104         pushq %rbx
 105         CFI_ADJUST_CFA_OFFSET 8
 106         CFI_REL_OFFSET rbx, 0
 107         xorl %eax,%eax          /*zero for the exception handler */
 108
 109 #ifdef FIX_ALIGNMENT
 110         /* check for bad alignment of destination */
 111         movl %edi,%ecx
 112         andl $7,%ecx
 113         jnz  .Lbad_alignment
 114 .Lafter_bad_alignment:
 115 #endif
 116
 117         movq %rdx,%rcx
 118
 119         movl $64,%ebx
 120         shrq $6,%rdx
 121         decq %rdx
 122         js   .Lhandle_tail
 123
 124         .p2align 4
 125 .Lloop:
 126 .Ls1:   movq (%rsi),%r11
 127 .Ls2:   movq 1*8(%rsi),%r8
 128 .Ls3:   movq 2*8(%rsi),%r9
 129 .Ls4:   movq 3*8(%rsi),%r10
 130 .Ld1:   movq %r11,(%rdi)
 131 .Ld2:   movq %r8,1*8(%rdi)
 132 .Ld3:   movq %r9,2*8(%rdi)
 133 .Ld4:   movq %r10,3*8(%rdi)
 134
 135 .Ls5:   movq 4*8(%rsi),%r11
 136 .Ls6:   movq 5*8(%rsi),%r8
 137 .Ls7:   movq 6*8(%rsi),%r9
 138 .Ls8:   movq 7*8(%rsi),%r10
 139 .Ld5:   movq %r11,4*8(%rdi)
 140 .Ld6:   movq %r8,5*8(%rdi)
 141 .Ld7:   movq %r9,6*8(%rdi)
 142 .Ld8:   movq %r10,7*8(%rdi)
 143
 144         decq %rdx
 145
 146         leaq 64(%rsi),%rsi
 147         leaq 64(%rdi),%rdi
 148
 149         jns  .Lloop
 150
 151         .p2align 4
 152 .Lhandle_tail:
 153         movl %ecx,%edx
 154         andl $63,%ecx
 155         shrl $3,%ecx
 156         jz   .Lhandle_7
 157         movl $8,%ebx
 158         .p2align 4
 159 .Lloop_8:
 160 .Ls9:   movq (%rsi),%r8
 161 .Ld9:   movq %r8,(%rdi)
 162         decl %ecx
 163         leaq 8(%rdi),%rdi
 164         leaq 8(%rsi),%rsi
 165         jnz .Lloop_8
 166
 167 .Lhandle_7:
 168         movl %edx,%ecx
 169         andl $7,%ecx
 170         jz   .Lende
 171         .p2align 4
 172 .Lloop_1:
 173 .Ls10:  movb (%rsi),%bl
 174 .Ld10:  movb %bl,(%rdi)
 175         incq %rdi
 176         incq %rsi
 177         decl %ecx
 178         jnz .Lloop_1
 179
 180         CFI_REMEMBER_STATE
 181 .Lende:
 182         popq %rbx
 183         CFI_ADJUST_CFA_OFFSET -8
 184         CFI_RESTORE rbx
 185         ret
 186         CFI_RESTORE_STATE
 187
 188 #ifdef FIX_ALIGNMENT
 189         /* align destination */
 190         .p2align 4
 191 .Lbad_alignment:
 192         movl $8,%r9d
 193         subl %ecx,%r9d
 194         movl %r9d,%ecx
 195         cmpq %r9,%rdx
 196         jz   .Lhandle_7
 197         js   .Lhandle_7
 198 .Lalign_1:
 199 .Ls11:  movb (%rsi),%bl
 200 .Ld11:  movb %bl,(%rdi)
 201         incq %rsi
 202         incq %rdi
 203         decl %ecx
 204         jnz .Lalign_1
 205         subq %r9,%rdx
 206         jmp .Lafter_bad_alignment
 207 #endif
 208
 209         /* table sorted by exception address */
 210         .section __ex_table,"a"
 211         .align 8
 212         .quad .Ls1,.Ls1e
 213         .quad .Ls2,.Ls2e
 214         .quad .Ls3,.Ls3e
 215         .quad .Ls4,.Ls4e
 216         .quad .Ld1,.Ls1e
 217         .quad .Ld2,.Ls2e
 218         .quad .Ld3,.Ls3e
 219         .quad .Ld4,.Ls4e
 220         .quad .Ls5,.Ls5e
 221         .quad .Ls6,.Ls6e
 222         .quad .Ls7,.Ls7e
 223         .quad .Ls8,.Ls8e
 224         .quad .Ld5,.Ls5e
 225         .quad .Ld6,.Ls6e
 226         .quad .Ld7,.Ls7e
 227         .quad .Ld8,.Ls8e
 228         .quad .Ls9,.Le_quad
 229         .quad .Ld9,.Le_quad
 230         .quad .Ls10,.Le_byte
 231         .quad .Ld10,.Le_byte
 232 #ifdef FIX_ALIGNMENT
 233         .quad .Ls11,.Lzero_rest
 234         .quad .Ld11,.Lzero_rest
 235 #endif
 236         .quad .Le5,.Le_zero
 237         .previous
 238
 239         /* compute 64-offset for main loop. 8 bytes accuracy with error on the
 240            pessimistic side. this is gross. it would be better to fix the
 241            interface. */
 242         /* eax: zero, ebx: 64 */
 243 .Ls1e:  addl $8,%eax
 244 .Ls2e:  addl $8,%eax
 245 .Ls3e:  addl $8,%eax
 246 .Ls4e:  addl $8,%eax
 247 .Ls5e:  addl $8,%eax
 248 .Ls6e:  addl $8,%eax
 249 .Ls7e:  addl $8,%eax
 250 .Ls8e:  addl $8,%eax
 251         addq %rbx,%rdi  /* +64 */
 252         subq %rax,%rdi  /* correct destination with computed offset */
 253
 254         shlq $6,%rdx    /* loop counter * 64 (stride length) */
 255         addq %rax,%rdx  /* add offset to loopcnt */
 256         andl $63,%ecx   /* remaining bytes */
 257         addq %rcx,%rdx  /* add them */
 258         jmp .Lzero_rest
 259
 260         /* exception on quad word loop in tail handling */
 261         /* ecx: loopcnt/8, %edx: length, rdi: correct */
 262 .Le_quad:
 263         shll $3,%ecx
 264         andl $7,%edx
 265         addl %ecx,%edx
 266         /* edx: bytes to zero, rdi: dest, eax:zero */
 267 .Lzero_rest:
 268         movq %rdx,%rcx
 269 .Le_byte:
 270         xorl %eax,%eax
 271 .Le5:   rep
 272         stosb
 273         /* when there is another exception while zeroing the rest just return */
 274 .Le_zero:
 275         movq %rdx,%rax
 276         jmp .Lende
 277         CFI_ENDPROC
 278 ENDPROC(copy_user_generic)
 279
 280
 281         /* Some CPUs run faster using the string copy instructions.
 282            This is also a lot simpler. Use them when possible.
 283            Patch in jmps to this code instead of copying it fully
 284            to avoid unwanted aliasing in the exception tables. */
 285
 286  /* rdi destination
 287   * rsi source
 288   * rdx count
 289   *
 290   * Output:
 291   * eax uncopied bytes or 0 if successfull.
 292   *
 293   * Only 4GB of copy is supported. This shouldn't be a problem
 294   * because the kernel normally only writes from/to page sized chunks
 295   * even if user space passed a longer buffer.
 296   * And more would be dangerous because both Intel and AMD have
 297   * errata with rep movsq > 4GB. If someone feels the need to fix
 298   * this please consider this.
 299    */
 300 copy_user_generic_c:
 301         CFI_STARTPROC
 302         movl %edx,%ecx
 303         shrl $3,%ecx
 304         andl $7,%edx
 305 1:      rep
 306         movsq
 307         movl %edx,%ecx
 308 2:      rep
 309         movsb
 310 4:      movl %ecx,%eax
 311         ret
 312 3:      lea (%rdx,%rcx,8),%rax
 313         ret
 314         CFI_ENDPROC
 315 END(copy_user_generic_c)
 316
 317         .section __ex_table,"a"
 318         .quad 1b,3b
 319         .quad 2b,4b
 320         .previous