arch/x86/lib/copy_user_64.S

   1 /*
   2  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   3  * Copyright 2002 Andi Kleen, SuSE Labs.
   4  * Subject to the GNU Public License v2.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative-asm.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17 #include <asm/export.h>
  18
  19 /*
  20  * copy_user_generic_unrolled - memory copy with exception handling.
  21  * This version is for CPUs like P4 that don't have efficient micro
  22  * code for rep movsq
  23  *
  24  * Input:
  25  * rdi destination
  26  * rsi source
  27  * rdx count
  28  *
  29  * Output:
  30  * eax uncopied bytes or 0 if successful.
  31  */
  32 ENTRY(copy_user_generic_unrolled)
  33         ASM_STAC
  34         cmpl $8,%edx
  35         jb 20f          /* less then 8 bytes, go to byte copy loop */
  36         ALIGN_DESTINATION
  37         movl %edx,%ecx
  38         andl $63,%edx
  39         shrl $6,%ecx
  40         jz .L_copy_short_string
  41 1:      movq (%rsi),%r8
  42 2:      movq 1*8(%rsi),%r9
  43 3:      movq 2*8(%rsi),%r10
  44 4:      movq 3*8(%rsi),%r11
  45 5:      movq %r8,(%rdi)
  46 6:      movq %r9,1*8(%rdi)
  47 7:      movq %r10,2*8(%rdi)
  48 8:      movq %r11,3*8(%rdi)
  49 9:      movq 4*8(%rsi),%r8
  50 10:     movq 5*8(%rsi),%r9
  51 11:     movq 6*8(%rsi),%r10
  52 12:     movq 7*8(%rsi),%r11
  53 13:     movq %r8,4*8(%rdi)
  54 14:     movq %r9,5*8(%rdi)
  55 15:     movq %r10,6*8(%rdi)
  56 16:     movq %r11,7*8(%rdi)
  57         leaq 64(%rsi),%rsi
  58         leaq 64(%rdi),%rdi
  59         decl %ecx
  60         jnz 1b
  61 .L_copy_short_string:
  62         movl %edx,%ecx
  63         andl $7,%edx
  64         shrl $3,%ecx
  65         jz 20f
  66 18:     movq (%rsi),%r8
  67 19:     movq %r8,(%rdi)
  68         leaq 8(%rsi),%rsi
  69         leaq 8(%rdi),%rdi
  70         decl %ecx
  71         jnz 18b
  72 20:     andl %edx,%edx
  73         jz 23f
  74         movl %edx,%ecx
  75 21:     movb (%rsi),%al
  76 22:     movb %al,(%rdi)
  77         incq %rsi
  78         incq %rdi
  79         decl %ecx
  80         jnz 21b
  81 23:     xor %eax,%eax
  82         ASM_CLAC
  83         ret
  84
  85         .section .fixup,"ax"
  86 30:     shll $6,%ecx
  87         addl %ecx,%edx
  88         jmp 60f
  89 40:     leal (%rdx,%rcx,8),%edx
  90         jmp 60f
  91 50:     movl %ecx,%edx
  92 60:     jmp copy_user_handle_tail /* ecx is zerorest also */
  93         .previous
  94
  95         _ASM_EXTABLE(1b,30b)
  96         _ASM_EXTABLE(2b,30b)
  97         _ASM_EXTABLE(3b,30b)
  98         _ASM_EXTABLE(4b,30b)
  99         _ASM_EXTABLE(5b,30b)
 100         _ASM_EXTABLE(6b,30b)
 101         _ASM_EXTABLE(7b,30b)
 102         _ASM_EXTABLE(8b,30b)
 103         _ASM_EXTABLE(9b,30b)
 104         _ASM_EXTABLE(10b,30b)
 105         _ASM_EXTABLE(11b,30b)
 106         _ASM_EXTABLE(12b,30b)
 107         _ASM_EXTABLE(13b,30b)
 108         _ASM_EXTABLE(14b,30b)
 109         _ASM_EXTABLE(15b,30b)
 110         _ASM_EXTABLE(16b,30b)
 111         _ASM_EXTABLE(18b,40b)
 112         _ASM_EXTABLE(19b,40b)
 113         _ASM_EXTABLE(21b,50b)
 114         _ASM_EXTABLE(22b,50b)
 115 ENDPROC(copy_user_generic_unrolled)
 116 EXPORT_SYMBOL(copy_user_generic_unrolled)
 117
 118 /* Some CPUs run faster using the string copy instructions.
 119  * This is also a lot simpler. Use them when possible.
 120  *
 121  * Only 4GB of copy is supported. This shouldn't be a problem
 122  * because the kernel normally only writes from/to page sized chunks
 123  * even if user space passed a longer buffer.
 124  * And more would be dangerous because both Intel and AMD have
 125  * errata with rep movsq > 4GB. If someone feels the need to fix
 126  * this please consider this.
 127  *
 128  * Input:
 129  * rdi destination
 130  * rsi source
 131  * rdx count
 132  *
 133  * Output:
 134  * eax uncopied bytes or 0 if successful.
 135  */
 136 ENTRY(copy_user_generic_string)
 137         ASM_STAC
 138         cmpl $8,%edx
 139         jb 2f           /* less than 8 bytes, go to byte copy loop */
 140         ALIGN_DESTINATION
 141         movl %edx,%ecx
 142         shrl $3,%ecx
 143         andl $7,%edx
 144 1:      rep
 145         movsq
 146 2:      movl %edx,%ecx
 147 3:      rep
 148         movsb
 149         xorl %eax,%eax
 150         ASM_CLAC
 151         ret
 152
 153         .section .fixup,"ax"
 154 11:     leal (%rdx,%rcx,8),%ecx
 155 12:     movl %ecx,%edx          /* ecx is zerorest also */
 156         jmp copy_user_handle_tail
 157         .previous
 158
 159         _ASM_EXTABLE(1b,11b)
 160         _ASM_EXTABLE(3b,12b)
 161 ENDPROC(copy_user_generic_string)
 162 EXPORT_SYMBOL(copy_user_generic_string)
 163
 164 /*
 165  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 166  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 167  *
 168  * Input:
 169  * rdi destination
 170  * rsi source
 171  * rdx count
 172  *
 173  * Output:
 174  * eax uncopied bytes or 0 if successful.
 175  */
 176 ENTRY(copy_user_enhanced_fast_string)
 177         ASM_STAC
 178         cmpl $64,%edx
 179         jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
 180         movl %edx,%ecx
 181 1:      rep
 182         movsb
 183         xorl %eax,%eax
 184         ASM_CLAC
 185         ret
 186
 187         .section .fixup,"ax"
 188 12:     movl %ecx,%edx          /* ecx is zerorest also */
 189         jmp copy_user_handle_tail
 190         .previous
 191
 192         _ASM_EXTABLE(1b,12b)
 193 ENDPROC(copy_user_enhanced_fast_string)
 194 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 195
 196 /*
 197  * copy_user_nocache - Uncached memory copy with exception handling
 198  * This will force destination out of cache for more performance.
 199  *
 200  * Note: Cached memory copy is used when destination or size is not
 201  * naturally aligned. That is:
 202  *  - Require 8-byte alignment when size is 8 bytes or larger.
 203  *  - Require 4-byte alignment when size is 4 bytes.
 204  */
 205 ENTRY(__copy_user_nocache)
 206         ASM_STAC
 207
 208         /* If size is less than 8 bytes, go to 4-byte copy */
 209         cmpl $8,%edx
 210         jb .L_4b_nocache_copy_entry
 211
 212         /* If destination is not 8-byte aligned, "cache" copy to align it */
 213         ALIGN_DESTINATION
 214
 215         /* Set 4x8-byte copy count and remainder */
 216         movl %edx,%ecx
 217         andl $63,%edx
 218         shrl $6,%ecx
 219         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 220
 221         /* Perform 4x8-byte nocache loop-copy */
 222 .L_4x8b_nocache_copy_loop:
 223 1:      movq (%rsi),%r8
 224 2:      movq 1*8(%rsi),%r9
 225 3:      movq 2*8(%rsi),%r10
 226 4:      movq 3*8(%rsi),%r11
 227 5:      movnti %r8,(%rdi)
 228 6:      movnti %r9,1*8(%rdi)
 229 7:      movnti %r10,2*8(%rdi)
 230 8:      movnti %r11,3*8(%rdi)
 231 9:      movq 4*8(%rsi),%r8
 232 10:     movq 5*8(%rsi),%r9
 233 11:     movq 6*8(%rsi),%r10
 234 12:     movq 7*8(%rsi),%r11
 235 13:     movnti %r8,4*8(%rdi)
 236 14:     movnti %r9,5*8(%rdi)
 237 15:     movnti %r10,6*8(%rdi)
 238 16:     movnti %r11,7*8(%rdi)
 239         leaq 64(%rsi),%rsi
 240         leaq 64(%rdi),%rdi
 241         decl %ecx
 242         jnz .L_4x8b_nocache_copy_loop
 243
 244         /* Set 8-byte copy count and remainder */
 245 .L_8b_nocache_copy_entry:
 246         movl %edx,%ecx
 247         andl $7,%edx
 248         shrl $3,%ecx
 249         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 250
 251         /* Perform 8-byte nocache loop-copy */
 252 .L_8b_nocache_copy_loop:
 253 20:     movq (%rsi),%r8
 254 21:     movnti %r8,(%rdi)
 255         leaq 8(%rsi),%rsi
 256         leaq 8(%rdi),%rdi
 257         decl %ecx
 258         jnz .L_8b_nocache_copy_loop
 259
 260         /* If no byte left, we're done */
 261 .L_4b_nocache_copy_entry:
 262         andl %edx,%edx
 263         jz .L_finish_copy
 264
 265         /* If destination is not 4-byte aligned, go to byte copy: */
 266         movl %edi,%ecx
 267         andl $3,%ecx
 268         jnz .L_1b_cache_copy_entry
 269
 270         /* Set 4-byte copy count (1 or 0) and remainder */
 271         movl %edx,%ecx
 272         andl $3,%edx
 273         shrl $2,%ecx
 274         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 275
 276         /* Perform 4-byte nocache copy: */
 277 30:     movl (%rsi),%r8d
 278 31:     movnti %r8d,(%rdi)
 279         leaq 4(%rsi),%rsi
 280         leaq 4(%rdi),%rdi
 281
 282         /* If no bytes left, we're done: */
 283         andl %edx,%edx
 284         jz .L_finish_copy
 285
 286         /* Perform byte "cache" loop-copy for the remainder */
 287 .L_1b_cache_copy_entry:
 288         movl %edx,%ecx
 289 .L_1b_cache_copy_loop:
 290 40:     movb (%rsi),%al
 291 41:     movb %al,(%rdi)
 292         incq %rsi
 293         incq %rdi
 294         decl %ecx
 295         jnz .L_1b_cache_copy_loop
 296
 297         /* Finished copying; fence the prior stores */
 298 .L_finish_copy:
 299         xorl %eax,%eax
 300         ASM_CLAC
 301         sfence
 302         ret
 303
 304         .section .fixup,"ax"
 305 .L_fixup_4x8b_copy:
 306         shll $6,%ecx
 307         addl %ecx,%edx
 308         jmp .L_fixup_handle_tail
 309 .L_fixup_8b_copy:
 310         lea (%rdx,%rcx,8),%rdx
 311         jmp .L_fixup_handle_tail
 312 .L_fixup_4b_copy:
 313         lea (%rdx,%rcx,4),%rdx
 314         jmp .L_fixup_handle_tail
 315 .L_fixup_1b_copy:
 316         movl %ecx,%edx
 317 .L_fixup_handle_tail:
 318         sfence
 319         jmp copy_user_handle_tail
 320         .previous
 321
 322         _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
 323         _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
 324         _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
 325         _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
 326         _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
 327         _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
 328         _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
 329         _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
 330         _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
 331         _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
 332         _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
 333         _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
 334         _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
 335         _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
 336         _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
 337         _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
 338         _ASM_EXTABLE(20b,.L_fixup_8b_copy)
 339         _ASM_EXTABLE(21b,.L_fixup_8b_copy)
 340         _ASM_EXTABLE(30b,.L_fixup_4b_copy)
 341         _ASM_EXTABLE(31b,.L_fixup_4b_copy)
 342         _ASM_EXTABLE(40b,.L_fixup_1b_copy)
 343         _ASM_EXTABLE(41b,.L_fixup_1b_copy)
 344 ENDPROC(__copy_user_nocache)
 345 EXPORT_SYMBOL(__copy_user_nocache)