xref: /linux/arch/x86/lib/csum-copy_64.S (revision 803f69144f0d48863c68f9d111b56849c7cef5bb)
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License.  See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h>
11
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi  source
19 * rsi  destination
20 * edx  len (32bit)
21 * ecx  sum (32bit)
22 * r8   src_err_ptr (int)
23 * r9   dst_err_ptr (int)
24 *
25 * Output
26 * eax  64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32	.macro source
3310:
34	.section __ex_table, "a"
35	.align 8
36	.quad 10b, .Lbad_source
37	.previous
38	.endm
39
40	.macro dest
4120:
42	.section __ex_table, "a"
43	.align 8
44	.quad 20b, .Lbad_dest
45	.previous
46	.endm
47
48	.macro ignore L=.Lignore
4930:
50	.section __ex_table, "a"
51	.align 8
52	.quad 30b, \L
53	.previous
54	.endm
55
56
57ENTRY(csum_partial_copy_generic)
58	CFI_STARTPROC
59	cmpl	$3*64, %edx
60	jle	.Lignore
61
62.Lignore:
63	subq  $7*8, %rsp
64	CFI_ADJUST_CFA_OFFSET 7*8
65	movq  %rbx, 2*8(%rsp)
66	CFI_REL_OFFSET rbx, 2*8
67	movq  %r12, 3*8(%rsp)
68	CFI_REL_OFFSET r12, 3*8
69	movq  %r14, 4*8(%rsp)
70	CFI_REL_OFFSET r14, 4*8
71	movq  %r13, 5*8(%rsp)
72	CFI_REL_OFFSET r13, 5*8
73	movq  %rbp, 6*8(%rsp)
74	CFI_REL_OFFSET rbp, 6*8
75
76	movq  %r8, (%rsp)
77	movq  %r9, 1*8(%rsp)
78
79	movl  %ecx, %eax
80	movl  %edx, %ecx
81
82	xorl  %r9d, %r9d
83	movq  %rcx, %r12
84
85	shrq  $6, %r12
86	jz	.Lhandle_tail       /* < 64 */
87
88	clc
89
90	/* main loop. clear in 64 byte blocks */
91	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
92	/* r11:	temp3, rdx: temp4, r12 loopcnt */
93	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
94	.p2align 4
95.Lloop:
96	source
97	movq  (%rdi), %rbx
98	source
99	movq  8(%rdi), %r8
100	source
101	movq  16(%rdi), %r11
102	source
103	movq  24(%rdi), %rdx
104
105	source
106	movq  32(%rdi), %r10
107	source
108	movq  40(%rdi), %rbp
109	source
110	movq  48(%rdi), %r14
111	source
112	movq  56(%rdi), %r13
113
114	ignore 2f
115	prefetcht0 5*64(%rdi)
1162:
117	adcq  %rbx, %rax
118	adcq  %r8, %rax
119	adcq  %r11, %rax
120	adcq  %rdx, %rax
121	adcq  %r10, %rax
122	adcq  %rbp, %rax
123	adcq  %r14, %rax
124	adcq  %r13, %rax
125
126	decl %r12d
127
128	dest
129	movq %rbx, (%rsi)
130	dest
131	movq %r8, 8(%rsi)
132	dest
133	movq %r11, 16(%rsi)
134	dest
135	movq %rdx, 24(%rsi)
136
137	dest
138	movq %r10, 32(%rsi)
139	dest
140	movq %rbp, 40(%rsi)
141	dest
142	movq %r14, 48(%rsi)
143	dest
144	movq %r13, 56(%rsi)
145
1463:
147
148	leaq 64(%rdi), %rdi
149	leaq 64(%rsi), %rsi
150
151	jnz	.Lloop
152
153	adcq  %r9, %rax
154
155	/* do last up to 56 bytes */
156.Lhandle_tail:
157	/* ecx:	count */
158	movl %ecx, %r10d
159	andl $63, %ecx
160	shrl $3, %ecx
161	jz	.Lfold
162	clc
163	.p2align 4
164.Lloop_8:
165	source
166	movq (%rdi), %rbx
167	adcq %rbx, %rax
168	decl %ecx
169	dest
170	movq %rbx, (%rsi)
171	leaq 8(%rsi), %rsi /* preserve carry */
172	leaq 8(%rdi), %rdi
173	jnz	.Lloop_8
174	adcq %r9, %rax	/* add in carry */
175
176.Lfold:
177	/* reduce checksum to 32bits */
178	movl %eax, %ebx
179	shrq $32, %rax
180	addl %ebx, %eax
181	adcl %r9d, %eax
182
183	/* do last up to 6 bytes */
184.Lhandle_7:
185	movl %r10d, %ecx
186	andl $7, %ecx
187	shrl $1, %ecx
188	jz   .Lhandle_1
189	movl $2, %edx
190	xorl %ebx, %ebx
191	clc
192	.p2align 4
193.Lloop_1:
194	source
195	movw (%rdi), %bx
196	adcl %ebx, %eax
197	decl %ecx
198	dest
199	movw %bx, (%rsi)
200	leaq 2(%rdi), %rdi
201	leaq 2(%rsi), %rsi
202	jnz .Lloop_1
203	adcl %r9d, %eax	/* add in carry */
204
205	/* handle last odd byte */
206.Lhandle_1:
207	testl $1, %r10d
208	jz    .Lende
209	xorl  %ebx, %ebx
210	source
211	movb (%rdi), %bl
212	dest
213	movb %bl, (%rsi)
214	addl %ebx, %eax
215	adcl %r9d, %eax		/* carry */
216
217	CFI_REMEMBER_STATE
218.Lende:
219	movq 2*8(%rsp), %rbx
220	CFI_RESTORE rbx
221	movq 3*8(%rsp), %r12
222	CFI_RESTORE r12
223	movq 4*8(%rsp), %r14
224	CFI_RESTORE r14
225	movq 5*8(%rsp), %r13
226	CFI_RESTORE r13
227	movq 6*8(%rsp), %rbp
228	CFI_RESTORE rbp
229	addq $7*8, %rsp
230	CFI_ADJUST_CFA_OFFSET -7*8
231	ret
232	CFI_RESTORE_STATE
233
234	/* Exception handlers. Very simple, zeroing is done in the wrappers */
235.Lbad_source:
236	movq (%rsp), %rax
237	testq %rax, %rax
238	jz   .Lende
239	movl $-EFAULT, (%rax)
240	jmp  .Lende
241
242.Lbad_dest:
243	movq 8(%rsp), %rax
244	testq %rax, %rax
245	jz   .Lende
246	movl $-EFAULT, (%rax)
247	jmp .Lende
248	CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)
250