1/* 2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 3 * Copyright 2002 Andi Kleen, SuSE Labs. 4 * Subject to the GNU Public License v2. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative-asm.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18 19/* 20 * copy_user_generic_unrolled - memory copy with exception handling. 21 * This version is for CPUs like P4 that don't have efficient micro 22 * code for rep movsq 23 * 24 * Input: 25 * rdi destination 26 * rsi source 27 * rdx count 28 * 29 * Output: 30 * eax uncopied bytes or 0 if successful. 31 */ 32ENTRY(copy_user_generic_unrolled) 33 ASM_STAC 34 cmpl $8,%edx 35 jb 20f /* less then 8 bytes, go to byte copy loop */ 36 ALIGN_DESTINATION 37 movl %edx,%ecx 38 andl $63,%edx 39 shrl $6,%ecx 40 jz 17f 411: movq (%rsi),%r8 422: movq 1*8(%rsi),%r9 433: movq 2*8(%rsi),%r10 444: movq 3*8(%rsi),%r11 455: movq %r8,(%rdi) 466: movq %r9,1*8(%rdi) 477: movq %r10,2*8(%rdi) 488: movq %r11,3*8(%rdi) 499: movq 4*8(%rsi),%r8 5010: movq 5*8(%rsi),%r9 5111: movq 6*8(%rsi),%r10 5212: movq 7*8(%rsi),%r11 5313: movq %r8,4*8(%rdi) 5414: movq %r9,5*8(%rdi) 5515: movq %r10,6*8(%rdi) 5616: movq %r11,7*8(%rdi) 57 leaq 64(%rsi),%rsi 58 leaq 64(%rdi),%rdi 59 decl %ecx 60 jnz 1b 6117: movl %edx,%ecx 62 andl $7,%edx 63 shrl $3,%ecx 64 jz 20f 6518: movq (%rsi),%r8 6619: movq %r8,(%rdi) 67 leaq 8(%rsi),%rsi 68 leaq 8(%rdi),%rdi 69 decl %ecx 70 jnz 18b 7120: andl %edx,%edx 72 jz 23f 73 movl %edx,%ecx 7421: movb (%rsi),%al 7522: movb %al,(%rdi) 76 incq %rsi 77 incq %rdi 78 decl %ecx 79 jnz 21b 8023: xor %eax,%eax 81 ASM_CLAC 82 ret 83 84 .section .fixup,"ax" 8530: shll $6,%ecx 86 addl %ecx,%edx 87 jmp 60f 8840: leal (%rdx,%rcx,8),%edx 89 jmp 60f 9050: movl %ecx,%edx 9160: jmp copy_user_handle_tail /* ecx is zerorest also */ 92 .previous 93 94 _ASM_EXTABLE(1b,30b) 95 _ASM_EXTABLE(2b,30b) 96 _ASM_EXTABLE(3b,30b) 97 _ASM_EXTABLE(4b,30b) 98 _ASM_EXTABLE(5b,30b) 99 _ASM_EXTABLE(6b,30b) 100 _ASM_EXTABLE(7b,30b) 101 _ASM_EXTABLE(8b,30b) 102 _ASM_EXTABLE(9b,30b) 103 _ASM_EXTABLE(10b,30b) 104 _ASM_EXTABLE(11b,30b) 105 _ASM_EXTABLE(12b,30b) 106 _ASM_EXTABLE(13b,30b) 107 _ASM_EXTABLE(14b,30b) 108 _ASM_EXTABLE(15b,30b) 109 _ASM_EXTABLE(16b,30b) 110 _ASM_EXTABLE(18b,40b) 111 _ASM_EXTABLE(19b,40b) 112 _ASM_EXTABLE(21b,50b) 113 _ASM_EXTABLE(22b,50b) 114ENDPROC(copy_user_generic_unrolled) 115EXPORT_SYMBOL(copy_user_generic_unrolled) 116 117/* Some CPUs run faster using the string copy instructions. 118 * This is also a lot simpler. Use them when possible. 119 * 120 * Only 4GB of copy is supported. This shouldn't be a problem 121 * because the kernel normally only writes from/to page sized chunks 122 * even if user space passed a longer buffer. 123 * And more would be dangerous because both Intel and AMD have 124 * errata with rep movsq > 4GB. If someone feels the need to fix 125 * this please consider this. 126 * 127 * Input: 128 * rdi destination 129 * rsi source 130 * rdx count 131 * 132 * Output: 133 * eax uncopied bytes or 0 if successful. 134 */ 135ENTRY(copy_user_generic_string) 136 ASM_STAC 137 cmpl $8,%edx 138 jb 2f /* less than 8 bytes, go to byte copy loop */ 139 ALIGN_DESTINATION 140 movl %edx,%ecx 141 shrl $3,%ecx 142 andl $7,%edx 1431: rep 144 movsq 1452: movl %edx,%ecx 1463: rep 147 movsb 148 xorl %eax,%eax 149 ASM_CLAC 150 ret 151 152 .section .fixup,"ax" 15311: leal (%rdx,%rcx,8),%ecx 15412: movl %ecx,%edx /* ecx is zerorest also */ 155 jmp copy_user_handle_tail 156 .previous 157 158 _ASM_EXTABLE(1b,11b) 159 _ASM_EXTABLE(3b,12b) 160ENDPROC(copy_user_generic_string) 161EXPORT_SYMBOL(copy_user_generic_string) 162 163/* 164 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 165 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 166 * 167 * Input: 168 * rdi destination 169 * rsi source 170 * rdx count 171 * 172 * Output: 173 * eax uncopied bytes or 0 if successful. 174 */ 175ENTRY(copy_user_enhanced_fast_string) 176 ASM_STAC 177 movl %edx,%ecx 1781: rep 179 movsb 180 xorl %eax,%eax 181 ASM_CLAC 182 ret 183 184 .section .fixup,"ax" 18512: movl %ecx,%edx /* ecx is zerorest also */ 186 jmp copy_user_handle_tail 187 .previous 188 189 _ASM_EXTABLE(1b,12b) 190ENDPROC(copy_user_enhanced_fast_string) 191EXPORT_SYMBOL(copy_user_enhanced_fast_string) 192 193/* 194 * copy_user_nocache - Uncached memory copy with exception handling 195 * This will force destination out of cache for more performance. 196 * 197 * Note: Cached memory copy is used when destination or size is not 198 * naturally aligned. That is: 199 * - Require 8-byte alignment when size is 8 bytes or larger. 200 * - Require 4-byte alignment when size is 4 bytes. 201 */ 202ENTRY(__copy_user_nocache) 203 ASM_STAC 204 205 /* If size is less than 8 bytes, go to 4-byte copy */ 206 cmpl $8,%edx 207 jb .L_4b_nocache_copy_entry 208 209 /* If destination is not 8-byte aligned, "cache" copy to align it */ 210 ALIGN_DESTINATION 211 212 /* Set 4x8-byte copy count and remainder */ 213 movl %edx,%ecx 214 andl $63,%edx 215 shrl $6,%ecx 216 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 217 218 /* Perform 4x8-byte nocache loop-copy */ 219.L_4x8b_nocache_copy_loop: 2201: movq (%rsi),%r8 2212: movq 1*8(%rsi),%r9 2223: movq 2*8(%rsi),%r10 2234: movq 3*8(%rsi),%r11 2245: movnti %r8,(%rdi) 2256: movnti %r9,1*8(%rdi) 2267: movnti %r10,2*8(%rdi) 2278: movnti %r11,3*8(%rdi) 2289: movq 4*8(%rsi),%r8 22910: movq 5*8(%rsi),%r9 23011: movq 6*8(%rsi),%r10 23112: movq 7*8(%rsi),%r11 23213: movnti %r8,4*8(%rdi) 23314: movnti %r9,5*8(%rdi) 23415: movnti %r10,6*8(%rdi) 23516: movnti %r11,7*8(%rdi) 236 leaq 64(%rsi),%rsi 237 leaq 64(%rdi),%rdi 238 decl %ecx 239 jnz .L_4x8b_nocache_copy_loop 240 241 /* Set 8-byte copy count and remainder */ 242.L_8b_nocache_copy_entry: 243 movl %edx,%ecx 244 andl $7,%edx 245 shrl $3,%ecx 246 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 247 248 /* Perform 8-byte nocache loop-copy */ 249.L_8b_nocache_copy_loop: 25020: movq (%rsi),%r8 25121: movnti %r8,(%rdi) 252 leaq 8(%rsi),%rsi 253 leaq 8(%rdi),%rdi 254 decl %ecx 255 jnz .L_8b_nocache_copy_loop 256 257 /* If no byte left, we're done */ 258.L_4b_nocache_copy_entry: 259 andl %edx,%edx 260 jz .L_finish_copy 261 262 /* If destination is not 4-byte aligned, go to byte copy: */ 263 movl %edi,%ecx 264 andl $3,%ecx 265 jnz .L_1b_cache_copy_entry 266 267 /* Set 4-byte copy count (1 or 0) and remainder */ 268 movl %edx,%ecx 269 andl $3,%edx 270 shrl $2,%ecx 271 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 272 273 /* Perform 4-byte nocache copy: */ 27430: movl (%rsi),%r8d 27531: movnti %r8d,(%rdi) 276 leaq 4(%rsi),%rsi 277 leaq 4(%rdi),%rdi 278 279 /* If no bytes left, we're done: */ 280 andl %edx,%edx 281 jz .L_finish_copy 282 283 /* Perform byte "cache" loop-copy for the remainder */ 284.L_1b_cache_copy_entry: 285 movl %edx,%ecx 286.L_1b_cache_copy_loop: 28740: movb (%rsi),%al 28841: movb %al,(%rdi) 289 incq %rsi 290 incq %rdi 291 decl %ecx 292 jnz .L_1b_cache_copy_loop 293 294 /* Finished copying; fence the prior stores */ 295.L_finish_copy: 296 xorl %eax,%eax 297 ASM_CLAC 298 sfence 299 ret 300 301 .section .fixup,"ax" 302.L_fixup_4x8b_copy: 303 shll $6,%ecx 304 addl %ecx,%edx 305 jmp .L_fixup_handle_tail 306.L_fixup_8b_copy: 307 lea (%rdx,%rcx,8),%rdx 308 jmp .L_fixup_handle_tail 309.L_fixup_4b_copy: 310 lea (%rdx,%rcx,4),%rdx 311 jmp .L_fixup_handle_tail 312.L_fixup_1b_copy: 313 movl %ecx,%edx 314.L_fixup_handle_tail: 315 sfence 316 jmp copy_user_handle_tail 317 .previous 318 319 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) 320 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) 321 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) 322 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) 323 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) 324 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) 325 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) 326 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) 327 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) 328 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) 329 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) 330 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) 331 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) 332 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) 333 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) 334 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) 335 _ASM_EXTABLE(20b,.L_fixup_8b_copy) 336 _ASM_EXTABLE(21b,.L_fixup_8b_copy) 337 _ASM_EXTABLE(30b,.L_fixup_4b_copy) 338 _ASM_EXTABLE(31b,.L_fixup_4b_copy) 339 _ASM_EXTABLE(40b,.L_fixup_1b_copy) 340 _ASM_EXTABLE(41b,.L_fixup_1b_copy) 341ENDPROC(__copy_user_nocache) 342EXPORT_SYMBOL(__copy_user_nocache) 343