xref: /linux/arch/x86/lib/csum-copy_64.S (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1/*
2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License.  See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/errno.h>
10#include <asm/asm.h>
11
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi  source
19 * rsi  destination
20 * edx  len (32bit)
21 * ecx  sum (32bit)
22 * r8   src_err_ptr (int)
23 * r9   dst_err_ptr (int)
24 *
25 * Output
26 * eax  64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32	.macro source
3310:
34	_ASM_EXTABLE(10b, .Lbad_source)
35	.endm
36
37	.macro dest
3820:
39	_ASM_EXTABLE(20b, .Lbad_dest)
40	.endm
41
42	.macro ignore L=.Lignore
4330:
44	_ASM_EXTABLE(30b, \L)
45	.endm
46
47
48ENTRY(csum_partial_copy_generic)
49	cmpl	$3*64, %edx
50	jle	.Lignore
51
52.Lignore:
53	subq  $7*8, %rsp
54	movq  %rbx, 2*8(%rsp)
55	movq  %r12, 3*8(%rsp)
56	movq  %r14, 4*8(%rsp)
57	movq  %r13, 5*8(%rsp)
58	movq  %rbp, 6*8(%rsp)
59
60	movq  %r8, (%rsp)
61	movq  %r9, 1*8(%rsp)
62
63	movl  %ecx, %eax
64	movl  %edx, %ecx
65
66	xorl  %r9d, %r9d
67	movq  %rcx, %r12
68
69	shrq  $6, %r12
70	jz	.Lhandle_tail       /* < 64 */
71
72	clc
73
74	/* main loop. clear in 64 byte blocks */
75	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
76	/* r11:	temp3, rdx: temp4, r12 loopcnt */
77	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
78	.p2align 4
79.Lloop:
80	source
81	movq  (%rdi), %rbx
82	source
83	movq  8(%rdi), %r8
84	source
85	movq  16(%rdi), %r11
86	source
87	movq  24(%rdi), %rdx
88
89	source
90	movq  32(%rdi), %r10
91	source
92	movq  40(%rdi), %rbp
93	source
94	movq  48(%rdi), %r14
95	source
96	movq  56(%rdi), %r13
97
98	ignore 2f
99	prefetcht0 5*64(%rdi)
1002:
101	adcq  %rbx, %rax
102	adcq  %r8, %rax
103	adcq  %r11, %rax
104	adcq  %rdx, %rax
105	adcq  %r10, %rax
106	adcq  %rbp, %rax
107	adcq  %r14, %rax
108	adcq  %r13, %rax
109
110	decl %r12d
111
112	dest
113	movq %rbx, (%rsi)
114	dest
115	movq %r8, 8(%rsi)
116	dest
117	movq %r11, 16(%rsi)
118	dest
119	movq %rdx, 24(%rsi)
120
121	dest
122	movq %r10, 32(%rsi)
123	dest
124	movq %rbp, 40(%rsi)
125	dest
126	movq %r14, 48(%rsi)
127	dest
128	movq %r13, 56(%rsi)
129
1303:
131
132	leaq 64(%rdi), %rdi
133	leaq 64(%rsi), %rsi
134
135	jnz	.Lloop
136
137	adcq  %r9, %rax
138
139	/* do last up to 56 bytes */
140.Lhandle_tail:
141	/* ecx:	count */
142	movl %ecx, %r10d
143	andl $63, %ecx
144	shrl $3, %ecx
145	jz	.Lfold
146	clc
147	.p2align 4
148.Lloop_8:
149	source
150	movq (%rdi), %rbx
151	adcq %rbx, %rax
152	decl %ecx
153	dest
154	movq %rbx, (%rsi)
155	leaq 8(%rsi), %rsi /* preserve carry */
156	leaq 8(%rdi), %rdi
157	jnz	.Lloop_8
158	adcq %r9, %rax	/* add in carry */
159
160.Lfold:
161	/* reduce checksum to 32bits */
162	movl %eax, %ebx
163	shrq $32, %rax
164	addl %ebx, %eax
165	adcl %r9d, %eax
166
167	/* do last up to 6 bytes */
168.Lhandle_7:
169	movl %r10d, %ecx
170	andl $7, %ecx
171	shrl $1, %ecx
172	jz   .Lhandle_1
173	movl $2, %edx
174	xorl %ebx, %ebx
175	clc
176	.p2align 4
177.Lloop_1:
178	source
179	movw (%rdi), %bx
180	adcl %ebx, %eax
181	decl %ecx
182	dest
183	movw %bx, (%rsi)
184	leaq 2(%rdi), %rdi
185	leaq 2(%rsi), %rsi
186	jnz .Lloop_1
187	adcl %r9d, %eax	/* add in carry */
188
189	/* handle last odd byte */
190.Lhandle_1:
191	testb $1, %r10b
192	jz    .Lende
193	xorl  %ebx, %ebx
194	source
195	movb (%rdi), %bl
196	dest
197	movb %bl, (%rsi)
198	addl %ebx, %eax
199	adcl %r9d, %eax		/* carry */
200
201.Lende:
202	movq 2*8(%rsp), %rbx
203	movq 3*8(%rsp), %r12
204	movq 4*8(%rsp), %r14
205	movq 5*8(%rsp), %r13
206	movq 6*8(%rsp), %rbp
207	addq $7*8, %rsp
208	ret
209
210	/* Exception handlers. Very simple, zeroing is done in the wrappers */
211.Lbad_source:
212	movq (%rsp), %rax
213	testq %rax, %rax
214	jz   .Lende
215	movl $-EFAULT, (%rax)
216	jmp  .Lende
217
218.Lbad_dest:
219	movq 8(%rsp), %rax
220	testq %rax, %rax
221	jz   .Lende
222	movl $-EFAULT, (%rax)
223	jmp .Lende
224ENDPROC(csum_partial_copy_generic)
225