1/* SPDX-License-Identifier: GPL-2.0 */ 2/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 3 4#include <linux/linkage.h> 5#include <asm/cpufeatures.h> 6#include <asm/alternative.h> 7#include <asm/export.h> 8 9/* 10 * Some CPUs run faster using the string copy instructions (sane microcode). 11 * It is also a lot simpler. Use this when possible. But, don't use streaming 12 * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the 13 * prefetch distance based on SMP/UP. 14 */ 15 ALIGN 16SYM_FUNC_START(copy_page) 17 ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 18 movl $4096/8, %ecx 19 rep movsq 20 ret 21SYM_FUNC_END(copy_page) 22EXPORT_SYMBOL(copy_page) 23 24SYM_FUNC_START_LOCAL(copy_page_regs) 25 subq $2*8, %rsp 26 movq %rbx, (%rsp) 27 movq %r12, 1*8(%rsp) 28 29 movl $(4096/64)-5, %ecx 30 .p2align 4 31.Loop64: 32 dec %rcx 33 movq 0x8*0(%rsi), %rax 34 movq 0x8*1(%rsi), %rbx 35 movq 0x8*2(%rsi), %rdx 36 movq 0x8*3(%rsi), %r8 37 movq 0x8*4(%rsi), %r9 38 movq 0x8*5(%rsi), %r10 39 movq 0x8*6(%rsi), %r11 40 movq 0x8*7(%rsi), %r12 41 42 prefetcht0 5*64(%rsi) 43 44 movq %rax, 0x8*0(%rdi) 45 movq %rbx, 0x8*1(%rdi) 46 movq %rdx, 0x8*2(%rdi) 47 movq %r8, 0x8*3(%rdi) 48 movq %r9, 0x8*4(%rdi) 49 movq %r10, 0x8*5(%rdi) 50 movq %r11, 0x8*6(%rdi) 51 movq %r12, 0x8*7(%rdi) 52 53 leaq 64 (%rsi), %rsi 54 leaq 64 (%rdi), %rdi 55 56 jnz .Loop64 57 58 movl $5, %ecx 59 .p2align 4 60.Loop2: 61 decl %ecx 62 63 movq 0x8*0(%rsi), %rax 64 movq 0x8*1(%rsi), %rbx 65 movq 0x8*2(%rsi), %rdx 66 movq 0x8*3(%rsi), %r8 67 movq 0x8*4(%rsi), %r9 68 movq 0x8*5(%rsi), %r10 69 movq 0x8*6(%rsi), %r11 70 movq 0x8*7(%rsi), %r12 71 72 movq %rax, 0x8*0(%rdi) 73 movq %rbx, 0x8*1(%rdi) 74 movq %rdx, 0x8*2(%rdi) 75 movq %r8, 0x8*3(%rdi) 76 movq %r9, 0x8*4(%rdi) 77 movq %r10, 0x8*5(%rdi) 78 movq %r11, 0x8*6(%rdi) 79 movq %r12, 0x8*7(%rdi) 80 81 leaq 64(%rdi), %rdi 82 leaq 64(%rsi), %rsi 83 jnz .Loop2 84 85 movq (%rsp), %rbx 86 movq 1*8(%rsp), %r12 87 addq $2*8, %rsp 88 ret 89SYM_FUNC_END(copy_page_regs) 90