xref: /freebsd/sys/crypto/openssl/amd64/keccak1600-x86_64.S (revision 02e9120893770924227138ba49df1edb3896112a)
1/* Do not modify. This file is auto-generated from keccak1600-x86_64.pl. */
2.text
3
4.type	__KeccakF1600,@function
5.align	32
6__KeccakF1600:
7.cfi_startproc
8	movq	60(%rdi),%rax
9	movq	68(%rdi),%rbx
10	movq	76(%rdi),%rcx
11	movq	84(%rdi),%rdx
12	movq	92(%rdi),%rbp
13	jmp	.Loop
14
15.align	32
16.Loop:
17	movq	-100(%rdi),%r8
18	movq	-52(%rdi),%r9
19	movq	-4(%rdi),%r10
20	movq	44(%rdi),%r11
21
22	xorq	-84(%rdi),%rcx
23	xorq	-76(%rdi),%rdx
24	xorq	%r8,%rax
25	xorq	-92(%rdi),%rbx
26	xorq	-44(%rdi),%rcx
27	xorq	-60(%rdi),%rax
28	movq	%rbp,%r12
29	xorq	-68(%rdi),%rbp
30
31	xorq	%r10,%rcx
32	xorq	-20(%rdi),%rax
33	xorq	-36(%rdi),%rdx
34	xorq	%r9,%rbx
35	xorq	-28(%rdi),%rbp
36
37	xorq	36(%rdi),%rcx
38	xorq	20(%rdi),%rax
39	xorq	4(%rdi),%rdx
40	xorq	-12(%rdi),%rbx
41	xorq	12(%rdi),%rbp
42
43	movq	%rcx,%r13
44	rolq	$1,%rcx
45	xorq	%rax,%rcx
46	xorq	%r11,%rdx
47
48	rolq	$1,%rax
49	xorq	%rdx,%rax
50	xorq	28(%rdi),%rbx
51
52	rolq	$1,%rdx
53	xorq	%rbx,%rdx
54	xorq	52(%rdi),%rbp
55
56	rolq	$1,%rbx
57	xorq	%rbp,%rbx
58
59	rolq	$1,%rbp
60	xorq	%r13,%rbp
61	xorq	%rcx,%r9
62	xorq	%rdx,%r10
63	rolq	$44,%r9
64	xorq	%rbp,%r11
65	xorq	%rax,%r12
66	rolq	$43,%r10
67	xorq	%rbx,%r8
68	movq	%r9,%r13
69	rolq	$21,%r11
70	orq	%r10,%r9
71	xorq	%r8,%r9
72	rolq	$14,%r12
73
74	xorq	(%r15),%r9
75	leaq	8(%r15),%r15
76
77	movq	%r12,%r14
78	andq	%r11,%r12
79	movq	%r9,-100(%rsi)
80	xorq	%r10,%r12
81	notq	%r10
82	movq	%r12,-84(%rsi)
83
84	orq	%r11,%r10
85	movq	76(%rdi),%r12
86	xorq	%r13,%r10
87	movq	%r10,-92(%rsi)
88
89	andq	%r8,%r13
90	movq	-28(%rdi),%r9
91	xorq	%r14,%r13
92	movq	-20(%rdi),%r10
93	movq	%r13,-68(%rsi)
94
95	orq	%r8,%r14
96	movq	-76(%rdi),%r8
97	xorq	%r11,%r14
98	movq	28(%rdi),%r11
99	movq	%r14,-76(%rsi)
100
101
102	xorq	%rbp,%r8
103	xorq	%rdx,%r12
104	rolq	$28,%r8
105	xorq	%rcx,%r11
106	xorq	%rax,%r9
107	rolq	$61,%r12
108	rolq	$45,%r11
109	xorq	%rbx,%r10
110	rolq	$20,%r9
111	movq	%r8,%r13
112	orq	%r12,%r8
113	rolq	$3,%r10
114
115	xorq	%r11,%r8
116	movq	%r8,-36(%rsi)
117
118	movq	%r9,%r14
119	andq	%r13,%r9
120	movq	-92(%rdi),%r8
121	xorq	%r12,%r9
122	notq	%r12
123	movq	%r9,-28(%rsi)
124
125	orq	%r11,%r12
126	movq	-44(%rdi),%r9
127	xorq	%r10,%r12
128	movq	%r12,-44(%rsi)
129
130	andq	%r10,%r11
131	movq	60(%rdi),%r12
132	xorq	%r14,%r11
133	movq	%r11,-52(%rsi)
134
135	orq	%r10,%r14
136	movq	4(%rdi),%r10
137	xorq	%r13,%r14
138	movq	52(%rdi),%r11
139	movq	%r14,-60(%rsi)
140
141
142	xorq	%rbp,%r10
143	xorq	%rax,%r11
144	rolq	$25,%r10
145	xorq	%rdx,%r9
146	rolq	$8,%r11
147	xorq	%rbx,%r12
148	rolq	$6,%r9
149	xorq	%rcx,%r8
150	rolq	$18,%r12
151	movq	%r10,%r13
152	andq	%r11,%r10
153	rolq	$1,%r8
154
155	notq	%r11
156	xorq	%r9,%r10
157	movq	%r10,-12(%rsi)
158
159	movq	%r12,%r14
160	andq	%r11,%r12
161	movq	-12(%rdi),%r10
162	xorq	%r13,%r12
163	movq	%r12,-4(%rsi)
164
165	orq	%r9,%r13
166	movq	84(%rdi),%r12
167	xorq	%r8,%r13
168	movq	%r13,-20(%rsi)
169
170	andq	%r8,%r9
171	xorq	%r14,%r9
172	movq	%r9,12(%rsi)
173
174	orq	%r8,%r14
175	movq	-60(%rdi),%r9
176	xorq	%r11,%r14
177	movq	36(%rdi),%r11
178	movq	%r14,4(%rsi)
179
180
181	movq	-68(%rdi),%r8
182
183	xorq	%rcx,%r10
184	xorq	%rdx,%r11
185	rolq	$10,%r10
186	xorq	%rbx,%r9
187	rolq	$15,%r11
188	xorq	%rbp,%r12
189	rolq	$36,%r9
190	xorq	%rax,%r8
191	rolq	$56,%r12
192	movq	%r10,%r13
193	orq	%r11,%r10
194	rolq	$27,%r8
195
196	notq	%r11
197	xorq	%r9,%r10
198	movq	%r10,28(%rsi)
199
200	movq	%r12,%r14
201	orq	%r11,%r12
202	xorq	%r13,%r12
203	movq	%r12,36(%rsi)
204
205	andq	%r9,%r13
206	xorq	%r8,%r13
207	movq	%r13,20(%rsi)
208
209	orq	%r8,%r9
210	xorq	%r14,%r9
211	movq	%r9,52(%rsi)
212
213	andq	%r14,%r8
214	xorq	%r11,%r8
215	movq	%r8,44(%rsi)
216
217
218	xorq	-84(%rdi),%rdx
219	xorq	-36(%rdi),%rbp
220	rolq	$62,%rdx
221	xorq	68(%rdi),%rcx
222	rolq	$55,%rbp
223	xorq	12(%rdi),%rax
224	rolq	$2,%rcx
225	xorq	20(%rdi),%rbx
226	xchgq	%rsi,%rdi
227	rolq	$39,%rax
228	rolq	$41,%rbx
229	movq	%rdx,%r13
230	andq	%rbp,%rdx
231	notq	%rbp
232	xorq	%rcx,%rdx
233	movq	%rdx,92(%rdi)
234
235	movq	%rax,%r14
236	andq	%rbp,%rax
237	xorq	%r13,%rax
238	movq	%rax,60(%rdi)
239
240	orq	%rcx,%r13
241	xorq	%rbx,%r13
242	movq	%r13,84(%rdi)
243
244	andq	%rbx,%rcx
245	xorq	%r14,%rcx
246	movq	%rcx,76(%rdi)
247
248	orq	%r14,%rbx
249	xorq	%rbp,%rbx
250	movq	%rbx,68(%rdi)
251
252	movq	%rdx,%rbp
253	movq	%r13,%rdx
254
255	testq	$255,%r15
256	jnz	.Loop
257
258	leaq	-192(%r15),%r15
259	.byte	0xf3,0xc3
260.cfi_endproc
261.size	__KeccakF1600,.-__KeccakF1600
262
263.type	KeccakF1600,@function
264.align	32
265KeccakF1600:
266.cfi_startproc
267	pushq	%rbx
268.cfi_adjust_cfa_offset	8
269.cfi_offset	%rbx,-16
270	pushq	%rbp
271.cfi_adjust_cfa_offset	8
272.cfi_offset	%rbp,-24
273	pushq	%r12
274.cfi_adjust_cfa_offset	8
275.cfi_offset	%r12,-32
276	pushq	%r13
277.cfi_adjust_cfa_offset	8
278.cfi_offset	%r13,-40
279	pushq	%r14
280.cfi_adjust_cfa_offset	8
281.cfi_offset	%r14,-48
282	pushq	%r15
283.cfi_adjust_cfa_offset	8
284.cfi_offset	%r15,-56
285
286	leaq	100(%rdi),%rdi
287	subq	$200,%rsp
288.cfi_adjust_cfa_offset	200
289
290	notq	-92(%rdi)
291	notq	-84(%rdi)
292	notq	-36(%rdi)
293	notq	-4(%rdi)
294	notq	36(%rdi)
295	notq	60(%rdi)
296
297	leaq	iotas(%rip),%r15
298	leaq	100(%rsp),%rsi
299
300	call	__KeccakF1600
301
302	notq	-92(%rdi)
303	notq	-84(%rdi)
304	notq	-36(%rdi)
305	notq	-4(%rdi)
306	notq	36(%rdi)
307	notq	60(%rdi)
308	leaq	-100(%rdi),%rdi
309
310	addq	$200,%rsp
311.cfi_adjust_cfa_offset	-200
312
313	popq	%r15
314.cfi_adjust_cfa_offset	-8
315.cfi_restore	%r15
316	popq	%r14
317.cfi_adjust_cfa_offset	-8
318.cfi_restore	%r14
319	popq	%r13
320.cfi_adjust_cfa_offset	-8
321.cfi_restore	%r13
322	popq	%r12
323.cfi_adjust_cfa_offset	-8
324.cfi_restore	%r12
325	popq	%rbp
326.cfi_adjust_cfa_offset	-8
327.cfi_restore	%rbp
328	popq	%rbx
329.cfi_adjust_cfa_offset	-8
330.cfi_restore	%rbx
331	.byte	0xf3,0xc3
332.cfi_endproc
333.size	KeccakF1600,.-KeccakF1600
334.globl	SHA3_absorb
335.type	SHA3_absorb,@function
336.align	32
337SHA3_absorb:
338.cfi_startproc
339	pushq	%rbx
340.cfi_adjust_cfa_offset	8
341.cfi_offset	%rbx,-16
342	pushq	%rbp
343.cfi_adjust_cfa_offset	8
344.cfi_offset	%rbp,-24
345	pushq	%r12
346.cfi_adjust_cfa_offset	8
347.cfi_offset	%r12,-32
348	pushq	%r13
349.cfi_adjust_cfa_offset	8
350.cfi_offset	%r13,-40
351	pushq	%r14
352.cfi_adjust_cfa_offset	8
353.cfi_offset	%r14,-48
354	pushq	%r15
355.cfi_adjust_cfa_offset	8
356.cfi_offset	%r15,-56
357
358	leaq	100(%rdi),%rdi
359	subq	$232,%rsp
360.cfi_adjust_cfa_offset	232
361
362	movq	%rsi,%r9
363	leaq	100(%rsp),%rsi
364
365	notq	-92(%rdi)
366	notq	-84(%rdi)
367	notq	-36(%rdi)
368	notq	-4(%rdi)
369	notq	36(%rdi)
370	notq	60(%rdi)
371	leaq	iotas(%rip),%r15
372
373	movq	%rcx,216-100(%rsi)
374
375.Loop_absorb:
376	cmpq	%rcx,%rdx
377	jc	.Ldone_absorb
378
379	shrq	$3,%rcx
380	leaq	-100(%rdi),%r8
381
382.Lblock_absorb:
383	movq	(%r9),%rax
384	leaq	8(%r9),%r9
385	xorq	(%r8),%rax
386	leaq	8(%r8),%r8
387	subq	$8,%rdx
388	movq	%rax,-8(%r8)
389	subq	$1,%rcx
390	jnz	.Lblock_absorb
391
392	movq	%r9,200-100(%rsi)
393	movq	%rdx,208-100(%rsi)
394	call	__KeccakF1600
395	movq	200-100(%rsi),%r9
396	movq	208-100(%rsi),%rdx
397	movq	216-100(%rsi),%rcx
398	jmp	.Loop_absorb
399
400.align	32
401.Ldone_absorb:
402	movq	%rdx,%rax
403
404	notq	-92(%rdi)
405	notq	-84(%rdi)
406	notq	-36(%rdi)
407	notq	-4(%rdi)
408	notq	36(%rdi)
409	notq	60(%rdi)
410
411	addq	$232,%rsp
412.cfi_adjust_cfa_offset	-232
413
414	popq	%r15
415.cfi_adjust_cfa_offset	-8
416.cfi_restore	%r15
417	popq	%r14
418.cfi_adjust_cfa_offset	-8
419.cfi_restore	%r14
420	popq	%r13
421.cfi_adjust_cfa_offset	-8
422.cfi_restore	%r13
423	popq	%r12
424.cfi_adjust_cfa_offset	-8
425.cfi_restore	%r12
426	popq	%rbp
427.cfi_adjust_cfa_offset	-8
428.cfi_restore	%rbp
429	popq	%rbx
430.cfi_adjust_cfa_offset	-8
431.cfi_restore	%rbx
432	.byte	0xf3,0xc3
433.cfi_endproc
434.size	SHA3_absorb,.-SHA3_absorb
435.globl	SHA3_squeeze
436.type	SHA3_squeeze,@function
437.align	32
438SHA3_squeeze:
439.cfi_startproc
440	pushq	%r12
441.cfi_adjust_cfa_offset	8
442.cfi_offset	%r12,-16
443	pushq	%r13
444.cfi_adjust_cfa_offset	8
445.cfi_offset	%r13,-24
446	pushq	%r14
447.cfi_adjust_cfa_offset	8
448.cfi_offset	%r14,-32
449
450	shrq	$3,%rcx
451	movq	%rdi,%r8
452	movq	%rsi,%r12
453	movq	%rdx,%r13
454	movq	%rcx,%r14
455	jmp	.Loop_squeeze
456
457.align	32
458.Loop_squeeze:
459	cmpq	$8,%r13
460	jb	.Ltail_squeeze
461
462	movq	(%r8),%rax
463	leaq	8(%r8),%r8
464	movq	%rax,(%r12)
465	leaq	8(%r12),%r12
466	subq	$8,%r13
467	jz	.Ldone_squeeze
468
469	subq	$1,%rcx
470	jnz	.Loop_squeeze
471
472	call	KeccakF1600
473	movq	%rdi,%r8
474	movq	%r14,%rcx
475	jmp	.Loop_squeeze
476
477.Ltail_squeeze:
478	movq	%r8,%rsi
479	movq	%r12,%rdi
480	movq	%r13,%rcx
481.byte	0xf3,0xa4
482
483.Ldone_squeeze:
484	popq	%r14
485.cfi_adjust_cfa_offset	-8
486.cfi_restore	%r14
487	popq	%r13
488.cfi_adjust_cfa_offset	-8
489.cfi_restore	%r13
490	popq	%r12
491.cfi_adjust_cfa_offset	-8
492.cfi_restore	%r13
493	.byte	0xf3,0xc3
494.cfi_endproc
495.size	SHA3_squeeze,.-SHA3_squeeze
496.align	256
497.quad	0,0,0,0,0,0,0,0
498.type	iotas,@object
499iotas:
500.quad	0x0000000000000001
501.quad	0x0000000000008082
502.quad	0x800000000000808a
503.quad	0x8000000080008000
504.quad	0x000000000000808b
505.quad	0x0000000080000001
506.quad	0x8000000080008081
507.quad	0x8000000000008009
508.quad	0x000000000000008a
509.quad	0x0000000000000088
510.quad	0x0000000080008009
511.quad	0x000000008000000a
512.quad	0x000000008000808b
513.quad	0x800000000000008b
514.quad	0x8000000000008089
515.quad	0x8000000000008003
516.quad	0x8000000000008002
517.quad	0x8000000000000080
518.quad	0x000000000000800a
519.quad	0x800000008000000a
520.quad	0x8000000080008081
521.quad	0x8000000000008080
522.quad	0x0000000080000001
523.quad	0x8000000080008008
524.size	iotas,.-iotas
525.byte	75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
526	.section ".note.gnu.property", "a"
527	.p2align 3
528	.long 1f - 0f
529	.long 4f - 1f
530	.long 5
5310:
532	# "GNU" encoded with .byte, since .asciz isn't supported
533	# on Solaris.
534	.byte 0x47
535	.byte 0x4e
536	.byte 0x55
537	.byte 0
5381:
539	.p2align 3
540	.long 0xc0000002
541	.long 3f - 2f
5422:
543	.long 3
5443:
545	.p2align 3
5464:
547