1/* 2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs. 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file COPYING in the main directory of this archive 6 * for more details. No warranty for anything given at all. 7 */ 8#include <linux/linkage.h> 9#include <asm/dwarf2.h> 10#include <asm/errno.h> 11 12/* 13 * Checksum copy with exception handling. 14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 15 * destination is zeroed. 16 * 17 * Input 18 * rdi source 19 * rsi destination 20 * edx len (32bit) 21 * ecx sum (32bit) 22 * r8 src_err_ptr (int) 23 * r9 dst_err_ptr (int) 24 * 25 * Output 26 * eax 64bit sum. undefined in case of exception. 27 * 28 * Wrappers need to take care of valid exception sum and zeroing. 29 * They also should align source or destination to 8 bytes. 30 */ 31 32 .macro source 3310: 34 .section __ex_table, "a" 35 .align 8 36 .quad 10b, .Lbad_source 37 .previous 38 .endm 39 40 .macro dest 4120: 42 .section __ex_table, "a" 43 .align 8 44 .quad 20b, .Lbad_dest 45 .previous 46 .endm 47 48 .macro ignore L=.Lignore 4930: 50 .section __ex_table, "a" 51 .align 8 52 .quad 30b, \L 53 .previous 54 .endm 55 56 57ENTRY(csum_partial_copy_generic) 58 CFI_STARTPROC 59 cmpl $3*64, %edx 60 jle .Lignore 61 62.Lignore: 63 subq $7*8, %rsp 64 CFI_ADJUST_CFA_OFFSET 7*8 65 movq %rbx, 2*8(%rsp) 66 CFI_REL_OFFSET rbx, 2*8 67 movq %r12, 3*8(%rsp) 68 CFI_REL_OFFSET r12, 3*8 69 movq %r14, 4*8(%rsp) 70 CFI_REL_OFFSET r14, 4*8 71 movq %r13, 5*8(%rsp) 72 CFI_REL_OFFSET r13, 5*8 73 movq %rbp, 6*8(%rsp) 74 CFI_REL_OFFSET rbp, 6*8 75 76 movq %r8, (%rsp) 77 movq %r9, 1*8(%rsp) 78 79 movl %ecx, %eax 80 movl %edx, %ecx 81 82 xorl %r9d, %r9d 83 movq %rcx, %r12 84 85 shrq $6, %r12 86 jz .Lhandle_tail /* < 64 */ 87 88 clc 89 90 /* main loop. clear in 64 byte blocks */ 91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 92 /* r11: temp3, rdx: temp4, r12 loopcnt */ 93 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ 94 .p2align 4 95.Lloop: 96 source 97 movq (%rdi), %rbx 98 source 99 movq 8(%rdi), %r8 100 source 101 movq 16(%rdi), %r11 102 source 103 movq 24(%rdi), %rdx 104 105 source 106 movq 32(%rdi), %r10 107 source 108 movq 40(%rdi), %rbp 109 source 110 movq 48(%rdi), %r14 111 source 112 movq 56(%rdi), %r13 113 114 ignore 2f 115 prefetcht0 5*64(%rdi) 1162: 117 adcq %rbx, %rax 118 adcq %r8, %rax 119 adcq %r11, %rax 120 adcq %rdx, %rax 121 adcq %r10, %rax 122 adcq %rbp, %rax 123 adcq %r14, %rax 124 adcq %r13, %rax 125 126 decl %r12d 127 128 dest 129 movq %rbx, (%rsi) 130 dest 131 movq %r8, 8(%rsi) 132 dest 133 movq %r11, 16(%rsi) 134 dest 135 movq %rdx, 24(%rsi) 136 137 dest 138 movq %r10, 32(%rsi) 139 dest 140 movq %rbp, 40(%rsi) 141 dest 142 movq %r14, 48(%rsi) 143 dest 144 movq %r13, 56(%rsi) 145 1463: 147 148 leaq 64(%rdi), %rdi 149 leaq 64(%rsi), %rsi 150 151 jnz .Lloop 152 153 adcq %r9, %rax 154 155 /* do last up to 56 bytes */ 156.Lhandle_tail: 157 /* ecx: count */ 158 movl %ecx, %r10d 159 andl $63, %ecx 160 shrl $3, %ecx 161 jz .Lfold 162 clc 163 .p2align 4 164.Lloop_8: 165 source 166 movq (%rdi), %rbx 167 adcq %rbx, %rax 168 decl %ecx 169 dest 170 movq %rbx, (%rsi) 171 leaq 8(%rsi), %rsi /* preserve carry */ 172 leaq 8(%rdi), %rdi 173 jnz .Lloop_8 174 adcq %r9, %rax /* add in carry */ 175 176.Lfold: 177 /* reduce checksum to 32bits */ 178 movl %eax, %ebx 179 shrq $32, %rax 180 addl %ebx, %eax 181 adcl %r9d, %eax 182 183 /* do last up to 6 bytes */ 184.Lhandle_7: 185 movl %r10d, %ecx 186 andl $7, %ecx 187 shrl $1, %ecx 188 jz .Lhandle_1 189 movl $2, %edx 190 xorl %ebx, %ebx 191 clc 192 .p2align 4 193.Lloop_1: 194 source 195 movw (%rdi), %bx 196 adcl %ebx, %eax 197 decl %ecx 198 dest 199 movw %bx, (%rsi) 200 leaq 2(%rdi), %rdi 201 leaq 2(%rsi), %rsi 202 jnz .Lloop_1 203 adcl %r9d, %eax /* add in carry */ 204 205 /* handle last odd byte */ 206.Lhandle_1: 207 testl $1, %r10d 208 jz .Lende 209 xorl %ebx, %ebx 210 source 211 movb (%rdi), %bl 212 dest 213 movb %bl, (%rsi) 214 addl %ebx, %eax 215 adcl %r9d, %eax /* carry */ 216 217 CFI_REMEMBER_STATE 218.Lende: 219 movq 2*8(%rsp), %rbx 220 CFI_RESTORE rbx 221 movq 3*8(%rsp), %r12 222 CFI_RESTORE r12 223 movq 4*8(%rsp), %r14 224 CFI_RESTORE r14 225 movq 5*8(%rsp), %r13 226 CFI_RESTORE r13 227 movq 6*8(%rsp), %rbp 228 CFI_RESTORE rbp 229 addq $7*8, %rsp 230 CFI_ADJUST_CFA_OFFSET -7*8 231 ret 232 CFI_RESTORE_STATE 233 234 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 235.Lbad_source: 236 movq (%rsp), %rax 237 testq %rax, %rax 238 jz .Lende 239 movl $-EFAULT, (%rax) 240 jmp .Lende 241 242.Lbad_dest: 243 movq 8(%rsp), %rax 244 testq %rax, %rax 245 jz .Lende 246 movl $-EFAULT, (%rax) 247 jmp .Lende 248 CFI_ENDPROC 249ENDPROC(csum_partial_copy_generic) 250