xref: /freebsd/sys/crypto/openssl/amd64/x86_64-gf2m.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from x86_64-gf2m.pl. */
2bc3d5698SJohn Baldwin.text
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin.type	_mul_1x1,@function
5bc3d5698SJohn Baldwin.align	16
6bc3d5698SJohn Baldwin_mul_1x1:
7bc3d5698SJohn Baldwin.cfi_startproc
8bc3d5698SJohn Baldwin	subq	$128+8,%rsp
9bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+8
10bc3d5698SJohn Baldwin	movq	$-1,%r9
11bc3d5698SJohn Baldwin	leaq	(%rax,%rax,1),%rsi
12bc3d5698SJohn Baldwin	shrq	$3,%r9
13bc3d5698SJohn Baldwin	leaq	(,%rax,4),%rdi
14bc3d5698SJohn Baldwin	andq	%rax,%r9
15bc3d5698SJohn Baldwin	leaq	(,%rax,8),%r12
16bc3d5698SJohn Baldwin	sarq	$63,%rax
17bc3d5698SJohn Baldwin	leaq	(%r9,%r9,1),%r10
18bc3d5698SJohn Baldwin	sarq	$63,%rsi
19bc3d5698SJohn Baldwin	leaq	(,%r9,4),%r11
20bc3d5698SJohn Baldwin	andq	%rbp,%rax
21bc3d5698SJohn Baldwin	sarq	$63,%rdi
22bc3d5698SJohn Baldwin	movq	%rax,%rdx
23bc3d5698SJohn Baldwin	shlq	$63,%rax
24bc3d5698SJohn Baldwin	andq	%rbp,%rsi
25bc3d5698SJohn Baldwin	shrq	$1,%rdx
26bc3d5698SJohn Baldwin	movq	%rsi,%rcx
27bc3d5698SJohn Baldwin	shlq	$62,%rsi
28bc3d5698SJohn Baldwin	andq	%rbp,%rdi
29bc3d5698SJohn Baldwin	shrq	$2,%rcx
30bc3d5698SJohn Baldwin	xorq	%rsi,%rax
31bc3d5698SJohn Baldwin	movq	%rdi,%rbx
32bc3d5698SJohn Baldwin	shlq	$61,%rdi
33bc3d5698SJohn Baldwin	xorq	%rcx,%rdx
34bc3d5698SJohn Baldwin	shrq	$3,%rbx
35bc3d5698SJohn Baldwin	xorq	%rdi,%rax
36bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
37bc3d5698SJohn Baldwin
38bc3d5698SJohn Baldwin	movq	%r9,%r13
39bc3d5698SJohn Baldwin	movq	$0,0(%rsp)
40bc3d5698SJohn Baldwin	xorq	%r10,%r13
41bc3d5698SJohn Baldwin	movq	%r9,8(%rsp)
42bc3d5698SJohn Baldwin	movq	%r11,%r14
43bc3d5698SJohn Baldwin	movq	%r10,16(%rsp)
44bc3d5698SJohn Baldwin	xorq	%r12,%r14
45bc3d5698SJohn Baldwin	movq	%r13,24(%rsp)
46bc3d5698SJohn Baldwin
47bc3d5698SJohn Baldwin	xorq	%r11,%r9
48bc3d5698SJohn Baldwin	movq	%r11,32(%rsp)
49bc3d5698SJohn Baldwin	xorq	%r11,%r10
50bc3d5698SJohn Baldwin	movq	%r9,40(%rsp)
51bc3d5698SJohn Baldwin	xorq	%r11,%r13
52bc3d5698SJohn Baldwin	movq	%r10,48(%rsp)
53bc3d5698SJohn Baldwin	xorq	%r14,%r9
54bc3d5698SJohn Baldwin	movq	%r13,56(%rsp)
55bc3d5698SJohn Baldwin	xorq	%r14,%r10
56bc3d5698SJohn Baldwin
57bc3d5698SJohn Baldwin	movq	%r12,64(%rsp)
58bc3d5698SJohn Baldwin	xorq	%r14,%r13
59bc3d5698SJohn Baldwin	movq	%r9,72(%rsp)
60bc3d5698SJohn Baldwin	xorq	%r11,%r9
61bc3d5698SJohn Baldwin	movq	%r10,80(%rsp)
62bc3d5698SJohn Baldwin	xorq	%r11,%r10
63bc3d5698SJohn Baldwin	movq	%r13,88(%rsp)
64bc3d5698SJohn Baldwin
65bc3d5698SJohn Baldwin	xorq	%r11,%r13
66bc3d5698SJohn Baldwin	movq	%r14,96(%rsp)
67bc3d5698SJohn Baldwin	movq	%r8,%rsi
68bc3d5698SJohn Baldwin	movq	%r9,104(%rsp)
69bc3d5698SJohn Baldwin	andq	%rbp,%rsi
70bc3d5698SJohn Baldwin	movq	%r10,112(%rsp)
71bc3d5698SJohn Baldwin	shrq	$4,%rbp
72bc3d5698SJohn Baldwin	movq	%r13,120(%rsp)
73bc3d5698SJohn Baldwin	movq	%r8,%rdi
74bc3d5698SJohn Baldwin	andq	%rbp,%rdi
75bc3d5698SJohn Baldwin	shrq	$4,%rbp
76bc3d5698SJohn Baldwin
77bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm0
78bc3d5698SJohn Baldwin	movq	%r8,%rsi
79bc3d5698SJohn Baldwin	andq	%rbp,%rsi
80bc3d5698SJohn Baldwin	shrq	$4,%rbp
81bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
82bc3d5698SJohn Baldwin	movq	%r8,%rdi
83bc3d5698SJohn Baldwin	movq	%rcx,%rbx
84bc3d5698SJohn Baldwin	shlq	$4,%rcx
85bc3d5698SJohn Baldwin	andq	%rbp,%rdi
86bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
87bc3d5698SJohn Baldwin	shrq	$60,%rbx
88bc3d5698SJohn Baldwin	xorq	%rcx,%rax
89bc3d5698SJohn Baldwin	pslldq	$1,%xmm1
90bc3d5698SJohn Baldwin	movq	%r8,%rsi
91bc3d5698SJohn Baldwin	shrq	$4,%rbp
92bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
93bc3d5698SJohn Baldwin	andq	%rbp,%rsi
94bc3d5698SJohn Baldwin	shrq	$4,%rbp
95bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
96bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
97bc3d5698SJohn Baldwin	movq	%r8,%rdi
98bc3d5698SJohn Baldwin	movq	%rcx,%rbx
99bc3d5698SJohn Baldwin	shlq	$12,%rcx
100bc3d5698SJohn Baldwin	andq	%rbp,%rdi
101bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
102bc3d5698SJohn Baldwin	shrq	$52,%rbx
103bc3d5698SJohn Baldwin	xorq	%rcx,%rax
104bc3d5698SJohn Baldwin	pslldq	$2,%xmm1
105bc3d5698SJohn Baldwin	movq	%r8,%rsi
106bc3d5698SJohn Baldwin	shrq	$4,%rbp
107bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
108bc3d5698SJohn Baldwin	andq	%rbp,%rsi
109bc3d5698SJohn Baldwin	shrq	$4,%rbp
110bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
111bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
112bc3d5698SJohn Baldwin	movq	%r8,%rdi
113bc3d5698SJohn Baldwin	movq	%rcx,%rbx
114bc3d5698SJohn Baldwin	shlq	$20,%rcx
115bc3d5698SJohn Baldwin	andq	%rbp,%rdi
116bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
117bc3d5698SJohn Baldwin	shrq	$44,%rbx
118bc3d5698SJohn Baldwin	xorq	%rcx,%rax
119bc3d5698SJohn Baldwin	pslldq	$3,%xmm1
120bc3d5698SJohn Baldwin	movq	%r8,%rsi
121bc3d5698SJohn Baldwin	shrq	$4,%rbp
122bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
123bc3d5698SJohn Baldwin	andq	%rbp,%rsi
124bc3d5698SJohn Baldwin	shrq	$4,%rbp
125bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
126bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
127bc3d5698SJohn Baldwin	movq	%r8,%rdi
128bc3d5698SJohn Baldwin	movq	%rcx,%rbx
129bc3d5698SJohn Baldwin	shlq	$28,%rcx
130bc3d5698SJohn Baldwin	andq	%rbp,%rdi
131bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
132bc3d5698SJohn Baldwin	shrq	$36,%rbx
133bc3d5698SJohn Baldwin	xorq	%rcx,%rax
134bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
135bc3d5698SJohn Baldwin	movq	%r8,%rsi
136bc3d5698SJohn Baldwin	shrq	$4,%rbp
137bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
138bc3d5698SJohn Baldwin	andq	%rbp,%rsi
139bc3d5698SJohn Baldwin	shrq	$4,%rbp
140bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
141bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
142bc3d5698SJohn Baldwin	movq	%r8,%rdi
143bc3d5698SJohn Baldwin	movq	%rcx,%rbx
144bc3d5698SJohn Baldwin	shlq	$36,%rcx
145bc3d5698SJohn Baldwin	andq	%rbp,%rdi
146bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
147bc3d5698SJohn Baldwin	shrq	$28,%rbx
148bc3d5698SJohn Baldwin	xorq	%rcx,%rax
149bc3d5698SJohn Baldwin	pslldq	$5,%xmm1
150bc3d5698SJohn Baldwin	movq	%r8,%rsi
151bc3d5698SJohn Baldwin	shrq	$4,%rbp
152bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
153bc3d5698SJohn Baldwin	andq	%rbp,%rsi
154bc3d5698SJohn Baldwin	shrq	$4,%rbp
155bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
156bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
157bc3d5698SJohn Baldwin	movq	%r8,%rdi
158bc3d5698SJohn Baldwin	movq	%rcx,%rbx
159bc3d5698SJohn Baldwin	shlq	$44,%rcx
160bc3d5698SJohn Baldwin	andq	%rbp,%rdi
161bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
162bc3d5698SJohn Baldwin	shrq	$20,%rbx
163bc3d5698SJohn Baldwin	xorq	%rcx,%rax
164bc3d5698SJohn Baldwin	pslldq	$6,%xmm1
165bc3d5698SJohn Baldwin	movq	%r8,%rsi
166bc3d5698SJohn Baldwin	shrq	$4,%rbp
167bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
168bc3d5698SJohn Baldwin	andq	%rbp,%rsi
169bc3d5698SJohn Baldwin	shrq	$4,%rbp
170bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
171bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
172bc3d5698SJohn Baldwin	movq	%r8,%rdi
173bc3d5698SJohn Baldwin	movq	%rcx,%rbx
174bc3d5698SJohn Baldwin	shlq	$52,%rcx
175bc3d5698SJohn Baldwin	andq	%rbp,%rdi
176bc3d5698SJohn Baldwin	movq	(%rsp,%rsi,8),%xmm1
177bc3d5698SJohn Baldwin	shrq	$12,%rbx
178bc3d5698SJohn Baldwin	xorq	%rcx,%rax
179bc3d5698SJohn Baldwin	pslldq	$7,%xmm1
180bc3d5698SJohn Baldwin	movq	%r8,%rsi
181bc3d5698SJohn Baldwin	shrq	$4,%rbp
182bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
183bc3d5698SJohn Baldwin	andq	%rbp,%rsi
184bc3d5698SJohn Baldwin	shrq	$4,%rbp
185bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
186bc3d5698SJohn Baldwin	movq	(%rsp,%rdi,8),%rcx
187bc3d5698SJohn Baldwin	movq	%rcx,%rbx
188bc3d5698SJohn Baldwin	shlq	$60,%rcx
189bc3d5698SJohn Baldwin.byte	102,72,15,126,198
190bc3d5698SJohn Baldwin	shrq	$4,%rbx
191bc3d5698SJohn Baldwin	xorq	%rcx,%rax
192bc3d5698SJohn Baldwin	psrldq	$8,%xmm0
193bc3d5698SJohn Baldwin	xorq	%rbx,%rdx
194bc3d5698SJohn Baldwin.byte	102,72,15,126,199
195bc3d5698SJohn Baldwin	xorq	%rsi,%rax
196bc3d5698SJohn Baldwin	xorq	%rdi,%rdx
197bc3d5698SJohn Baldwin
198bc3d5698SJohn Baldwin	addq	$128+8,%rsp
199bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	-128-8
200bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
201bc3d5698SJohn Baldwin.Lend_mul_1x1:
202bc3d5698SJohn Baldwin.cfi_endproc
203bc3d5698SJohn Baldwin.size	_mul_1x1,.-_mul_1x1
204bc3d5698SJohn Baldwin
205bc3d5698SJohn Baldwin.globl	bn_GF2m_mul_2x2
206bc3d5698SJohn Baldwin.type	bn_GF2m_mul_2x2,@function
207bc3d5698SJohn Baldwin.align	16
208bc3d5698SJohn Baldwinbn_GF2m_mul_2x2:
209bc3d5698SJohn Baldwin.cfi_startproc
210bc3d5698SJohn Baldwin	movq	%rsp,%rax
211bc3d5698SJohn Baldwin	movq	OPENSSL_ia32cap_P(%rip),%r10
212bc3d5698SJohn Baldwin	btq	$33,%r10
213bc3d5698SJohn Baldwin	jnc	.Lvanilla_mul_2x2
214bc3d5698SJohn Baldwin
215bc3d5698SJohn Baldwin.byte	102,72,15,110,198
216bc3d5698SJohn Baldwin.byte	102,72,15,110,201
217bc3d5698SJohn Baldwin.byte	102,72,15,110,210
218bc3d5698SJohn Baldwin.byte	102,73,15,110,216
219bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm4
220bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
221bc3d5698SJohn Baldwin.byte	102,15,58,68,193,0
222bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm4
223bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm5
224bc3d5698SJohn Baldwin.byte	102,15,58,68,211,0
225bc3d5698SJohn Baldwin.byte	102,15,58,68,229,0
226bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
227bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm4
228bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm5
229bc3d5698SJohn Baldwin	pslldq	$8,%xmm4
230bc3d5698SJohn Baldwin	psrldq	$8,%xmm5
231bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm2
232bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm0
233bc3d5698SJohn Baldwin	movdqu	%xmm2,0(%rdi)
234bc3d5698SJohn Baldwin	movdqu	%xmm0,16(%rdi)
235bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
236bc3d5698SJohn Baldwin
237bc3d5698SJohn Baldwin.align	16
238bc3d5698SJohn Baldwin.Lvanilla_mul_2x2:
239bc3d5698SJohn Baldwin	leaq	-136(%rsp),%rsp
240bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8*17
241bc3d5698SJohn Baldwin	movq	%r14,80(%rsp)
242bc3d5698SJohn Baldwin.cfi_rel_offset	%r14,8*10
243bc3d5698SJohn Baldwin	movq	%r13,88(%rsp)
244bc3d5698SJohn Baldwin.cfi_rel_offset	%r13,8*11
245bc3d5698SJohn Baldwin	movq	%r12,96(%rsp)
246bc3d5698SJohn Baldwin.cfi_rel_offset	%r12,8*12
247bc3d5698SJohn Baldwin	movq	%rbp,104(%rsp)
248bc3d5698SJohn Baldwin.cfi_rel_offset	%rbp,8*13
249bc3d5698SJohn Baldwin	movq	%rbx,112(%rsp)
250bc3d5698SJohn Baldwin.cfi_rel_offset	%rbx,8*14
251bc3d5698SJohn Baldwin.Lbody_mul_2x2:
252bc3d5698SJohn Baldwin	movq	%rdi,32(%rsp)
253bc3d5698SJohn Baldwin	movq	%rsi,40(%rsp)
254bc3d5698SJohn Baldwin	movq	%rdx,48(%rsp)
255bc3d5698SJohn Baldwin	movq	%rcx,56(%rsp)
256bc3d5698SJohn Baldwin	movq	%r8,64(%rsp)
257bc3d5698SJohn Baldwin
258bc3d5698SJohn Baldwin	movq	$0xf,%r8
259bc3d5698SJohn Baldwin	movq	%rsi,%rax
260bc3d5698SJohn Baldwin	movq	%rcx,%rbp
261bc3d5698SJohn Baldwin	call	_mul_1x1
262bc3d5698SJohn Baldwin	movq	%rax,16(%rsp)
263bc3d5698SJohn Baldwin	movq	%rdx,24(%rsp)
264bc3d5698SJohn Baldwin
265bc3d5698SJohn Baldwin	movq	48(%rsp),%rax
266bc3d5698SJohn Baldwin	movq	64(%rsp),%rbp
267bc3d5698SJohn Baldwin	call	_mul_1x1
268bc3d5698SJohn Baldwin	movq	%rax,0(%rsp)
269bc3d5698SJohn Baldwin	movq	%rdx,8(%rsp)
270bc3d5698SJohn Baldwin
271bc3d5698SJohn Baldwin	movq	40(%rsp),%rax
272bc3d5698SJohn Baldwin	movq	56(%rsp),%rbp
273bc3d5698SJohn Baldwin	xorq	48(%rsp),%rax
274bc3d5698SJohn Baldwin	xorq	64(%rsp),%rbp
275bc3d5698SJohn Baldwin	call	_mul_1x1
276bc3d5698SJohn Baldwin	movq	0(%rsp),%rbx
277bc3d5698SJohn Baldwin	movq	8(%rsp),%rcx
278bc3d5698SJohn Baldwin	movq	16(%rsp),%rdi
279bc3d5698SJohn Baldwin	movq	24(%rsp),%rsi
280bc3d5698SJohn Baldwin	movq	32(%rsp),%rbp
281bc3d5698SJohn Baldwin
282bc3d5698SJohn Baldwin	xorq	%rdx,%rax
283bc3d5698SJohn Baldwin	xorq	%rcx,%rdx
284bc3d5698SJohn Baldwin	xorq	%rbx,%rax
285bc3d5698SJohn Baldwin	movq	%rbx,0(%rbp)
286bc3d5698SJohn Baldwin	xorq	%rdi,%rdx
287bc3d5698SJohn Baldwin	movq	%rsi,24(%rbp)
288bc3d5698SJohn Baldwin	xorq	%rsi,%rax
289bc3d5698SJohn Baldwin	xorq	%rsi,%rdx
290bc3d5698SJohn Baldwin	xorq	%rdx,%rax
291bc3d5698SJohn Baldwin	movq	%rdx,16(%rbp)
292bc3d5698SJohn Baldwin	movq	%rax,8(%rbp)
293bc3d5698SJohn Baldwin
294bc3d5698SJohn Baldwin	movq	80(%rsp),%r14
295bc3d5698SJohn Baldwin.cfi_restore	%r14
296bc3d5698SJohn Baldwin	movq	88(%rsp),%r13
297bc3d5698SJohn Baldwin.cfi_restore	%r13
298bc3d5698SJohn Baldwin	movq	96(%rsp),%r12
299bc3d5698SJohn Baldwin.cfi_restore	%r12
300bc3d5698SJohn Baldwin	movq	104(%rsp),%rbp
301bc3d5698SJohn Baldwin.cfi_restore	%rbp
302bc3d5698SJohn Baldwin	movq	112(%rsp),%rbx
303bc3d5698SJohn Baldwin.cfi_restore	%rbx
304bc3d5698SJohn Baldwin	leaq	136(%rsp),%rsp
305bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	-8*17
306bc3d5698SJohn Baldwin.Lepilogue_mul_2x2:
307bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
308bc3d5698SJohn Baldwin.Lend_mul_2x2:
309bc3d5698SJohn Baldwin.cfi_endproc
310bc3d5698SJohn Baldwin.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
311bc3d5698SJohn Baldwin.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
312bc3d5698SJohn Baldwin.align	16
313*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
314*c0855eaaSJohn Baldwin	.p2align 3
315*c0855eaaSJohn Baldwin	.long 1f - 0f
316*c0855eaaSJohn Baldwin	.long 4f - 1f
317*c0855eaaSJohn Baldwin	.long 5
318*c0855eaaSJohn Baldwin0:
319*c0855eaaSJohn Baldwin	# "GNU" encoded with .byte, since .asciz isn't supported
320*c0855eaaSJohn Baldwin	# on Solaris.
321*c0855eaaSJohn Baldwin	.byte 0x47
322*c0855eaaSJohn Baldwin	.byte 0x4e
323*c0855eaaSJohn Baldwin	.byte 0x55
324*c0855eaaSJohn Baldwin	.byte 0
325*c0855eaaSJohn Baldwin1:
326*c0855eaaSJohn Baldwin	.p2align 3
327*c0855eaaSJohn Baldwin	.long 0xc0000002
328*c0855eaaSJohn Baldwin	.long 3f - 2f
329*c0855eaaSJohn Baldwin2:
330*c0855eaaSJohn Baldwin	.long 3
331*c0855eaaSJohn Baldwin3:
332*c0855eaaSJohn Baldwin	.p2align 3
333*c0855eaaSJohn Baldwin4:
334