xref: /freebsd/sys/crypto/openssl/amd64/x86_64-gf2m.S (revision da5432eda807c4b7232d030d5157d5b417ea4f52)
1/* Do not modify. This file is auto-generated from x86_64-gf2m.pl. */
2.text
3
4.type	_mul_1x1,@function
5.align	16
6_mul_1x1:
7.cfi_startproc
8	subq	$128+8,%rsp
9.cfi_adjust_cfa_offset	128+8
10	movq	$-1,%r9
11	leaq	(%rax,%rax,1),%rsi
12	shrq	$3,%r9
13	leaq	(,%rax,4),%rdi
14	andq	%rax,%r9
15	leaq	(,%rax,8),%r12
16	sarq	$63,%rax
17	leaq	(%r9,%r9,1),%r10
18	sarq	$63,%rsi
19	leaq	(,%r9,4),%r11
20	andq	%rbp,%rax
21	sarq	$63,%rdi
22	movq	%rax,%rdx
23	shlq	$63,%rax
24	andq	%rbp,%rsi
25	shrq	$1,%rdx
26	movq	%rsi,%rcx
27	shlq	$62,%rsi
28	andq	%rbp,%rdi
29	shrq	$2,%rcx
30	xorq	%rsi,%rax
31	movq	%rdi,%rbx
32	shlq	$61,%rdi
33	xorq	%rcx,%rdx
34	shrq	$3,%rbx
35	xorq	%rdi,%rax
36	xorq	%rbx,%rdx
37
38	movq	%r9,%r13
39	movq	$0,0(%rsp)
40	xorq	%r10,%r13
41	movq	%r9,8(%rsp)
42	movq	%r11,%r14
43	movq	%r10,16(%rsp)
44	xorq	%r12,%r14
45	movq	%r13,24(%rsp)
46
47	xorq	%r11,%r9
48	movq	%r11,32(%rsp)
49	xorq	%r11,%r10
50	movq	%r9,40(%rsp)
51	xorq	%r11,%r13
52	movq	%r10,48(%rsp)
53	xorq	%r14,%r9
54	movq	%r13,56(%rsp)
55	xorq	%r14,%r10
56
57	movq	%r12,64(%rsp)
58	xorq	%r14,%r13
59	movq	%r9,72(%rsp)
60	xorq	%r11,%r9
61	movq	%r10,80(%rsp)
62	xorq	%r11,%r10
63	movq	%r13,88(%rsp)
64
65	xorq	%r11,%r13
66	movq	%r14,96(%rsp)
67	movq	%r8,%rsi
68	movq	%r9,104(%rsp)
69	andq	%rbp,%rsi
70	movq	%r10,112(%rsp)
71	shrq	$4,%rbp
72	movq	%r13,120(%rsp)
73	movq	%r8,%rdi
74	andq	%rbp,%rdi
75	shrq	$4,%rbp
76
77	movq	(%rsp,%rsi,8),%xmm0
78	movq	%r8,%rsi
79	andq	%rbp,%rsi
80	shrq	$4,%rbp
81	movq	(%rsp,%rdi,8),%rcx
82	movq	%r8,%rdi
83	movq	%rcx,%rbx
84	shlq	$4,%rcx
85	andq	%rbp,%rdi
86	movq	(%rsp,%rsi,8),%xmm1
87	shrq	$60,%rbx
88	xorq	%rcx,%rax
89	pslldq	$1,%xmm1
90	movq	%r8,%rsi
91	shrq	$4,%rbp
92	xorq	%rbx,%rdx
93	andq	%rbp,%rsi
94	shrq	$4,%rbp
95	pxor	%xmm1,%xmm0
96	movq	(%rsp,%rdi,8),%rcx
97	movq	%r8,%rdi
98	movq	%rcx,%rbx
99	shlq	$12,%rcx
100	andq	%rbp,%rdi
101	movq	(%rsp,%rsi,8),%xmm1
102	shrq	$52,%rbx
103	xorq	%rcx,%rax
104	pslldq	$2,%xmm1
105	movq	%r8,%rsi
106	shrq	$4,%rbp
107	xorq	%rbx,%rdx
108	andq	%rbp,%rsi
109	shrq	$4,%rbp
110	pxor	%xmm1,%xmm0
111	movq	(%rsp,%rdi,8),%rcx
112	movq	%r8,%rdi
113	movq	%rcx,%rbx
114	shlq	$20,%rcx
115	andq	%rbp,%rdi
116	movq	(%rsp,%rsi,8),%xmm1
117	shrq	$44,%rbx
118	xorq	%rcx,%rax
119	pslldq	$3,%xmm1
120	movq	%r8,%rsi
121	shrq	$4,%rbp
122	xorq	%rbx,%rdx
123	andq	%rbp,%rsi
124	shrq	$4,%rbp
125	pxor	%xmm1,%xmm0
126	movq	(%rsp,%rdi,8),%rcx
127	movq	%r8,%rdi
128	movq	%rcx,%rbx
129	shlq	$28,%rcx
130	andq	%rbp,%rdi
131	movq	(%rsp,%rsi,8),%xmm1
132	shrq	$36,%rbx
133	xorq	%rcx,%rax
134	pslldq	$4,%xmm1
135	movq	%r8,%rsi
136	shrq	$4,%rbp
137	xorq	%rbx,%rdx
138	andq	%rbp,%rsi
139	shrq	$4,%rbp
140	pxor	%xmm1,%xmm0
141	movq	(%rsp,%rdi,8),%rcx
142	movq	%r8,%rdi
143	movq	%rcx,%rbx
144	shlq	$36,%rcx
145	andq	%rbp,%rdi
146	movq	(%rsp,%rsi,8),%xmm1
147	shrq	$28,%rbx
148	xorq	%rcx,%rax
149	pslldq	$5,%xmm1
150	movq	%r8,%rsi
151	shrq	$4,%rbp
152	xorq	%rbx,%rdx
153	andq	%rbp,%rsi
154	shrq	$4,%rbp
155	pxor	%xmm1,%xmm0
156	movq	(%rsp,%rdi,8),%rcx
157	movq	%r8,%rdi
158	movq	%rcx,%rbx
159	shlq	$44,%rcx
160	andq	%rbp,%rdi
161	movq	(%rsp,%rsi,8),%xmm1
162	shrq	$20,%rbx
163	xorq	%rcx,%rax
164	pslldq	$6,%xmm1
165	movq	%r8,%rsi
166	shrq	$4,%rbp
167	xorq	%rbx,%rdx
168	andq	%rbp,%rsi
169	shrq	$4,%rbp
170	pxor	%xmm1,%xmm0
171	movq	(%rsp,%rdi,8),%rcx
172	movq	%r8,%rdi
173	movq	%rcx,%rbx
174	shlq	$52,%rcx
175	andq	%rbp,%rdi
176	movq	(%rsp,%rsi,8),%xmm1
177	shrq	$12,%rbx
178	xorq	%rcx,%rax
179	pslldq	$7,%xmm1
180	movq	%r8,%rsi
181	shrq	$4,%rbp
182	xorq	%rbx,%rdx
183	andq	%rbp,%rsi
184	shrq	$4,%rbp
185	pxor	%xmm1,%xmm0
186	movq	(%rsp,%rdi,8),%rcx
187	movq	%rcx,%rbx
188	shlq	$60,%rcx
189.byte	102,72,15,126,198
190	shrq	$4,%rbx
191	xorq	%rcx,%rax
192	psrldq	$8,%xmm0
193	xorq	%rbx,%rdx
194.byte	102,72,15,126,199
195	xorq	%rsi,%rax
196	xorq	%rdi,%rdx
197
198	addq	$128+8,%rsp
199.cfi_adjust_cfa_offset	-128-8
200	.byte	0xf3,0xc3
201.Lend_mul_1x1:
202.cfi_endproc
203.size	_mul_1x1,.-_mul_1x1
204
205.globl	bn_GF2m_mul_2x2
206.type	bn_GF2m_mul_2x2,@function
207.align	16
208bn_GF2m_mul_2x2:
209.cfi_startproc
210	movq	%rsp,%rax
211	movq	OPENSSL_ia32cap_P(%rip),%r10
212	btq	$33,%r10
213	jnc	.Lvanilla_mul_2x2
214
215.byte	102,72,15,110,198
216.byte	102,72,15,110,201
217.byte	102,72,15,110,210
218.byte	102,73,15,110,216
219	movdqa	%xmm0,%xmm4
220	movdqa	%xmm1,%xmm5
221.byte	102,15,58,68,193,0
222	pxor	%xmm2,%xmm4
223	pxor	%xmm3,%xmm5
224.byte	102,15,58,68,211,0
225.byte	102,15,58,68,229,0
226	xorps	%xmm0,%xmm4
227	xorps	%xmm2,%xmm4
228	movdqa	%xmm4,%xmm5
229	pslldq	$8,%xmm4
230	psrldq	$8,%xmm5
231	pxor	%xmm4,%xmm2
232	pxor	%xmm5,%xmm0
233	movdqu	%xmm2,0(%rdi)
234	movdqu	%xmm0,16(%rdi)
235	.byte	0xf3,0xc3
236
237.align	16
238.Lvanilla_mul_2x2:
239	leaq	-136(%rsp),%rsp
240.cfi_adjust_cfa_offset	8*17
241	movq	%r14,80(%rsp)
242.cfi_rel_offset	%r14,8*10
243	movq	%r13,88(%rsp)
244.cfi_rel_offset	%r13,8*11
245	movq	%r12,96(%rsp)
246.cfi_rel_offset	%r12,8*12
247	movq	%rbp,104(%rsp)
248.cfi_rel_offset	%rbp,8*13
249	movq	%rbx,112(%rsp)
250.cfi_rel_offset	%rbx,8*14
251.Lbody_mul_2x2:
252	movq	%rdi,32(%rsp)
253	movq	%rsi,40(%rsp)
254	movq	%rdx,48(%rsp)
255	movq	%rcx,56(%rsp)
256	movq	%r8,64(%rsp)
257
258	movq	$0xf,%r8
259	movq	%rsi,%rax
260	movq	%rcx,%rbp
261	call	_mul_1x1
262	movq	%rax,16(%rsp)
263	movq	%rdx,24(%rsp)
264
265	movq	48(%rsp),%rax
266	movq	64(%rsp),%rbp
267	call	_mul_1x1
268	movq	%rax,0(%rsp)
269	movq	%rdx,8(%rsp)
270
271	movq	40(%rsp),%rax
272	movq	56(%rsp),%rbp
273	xorq	48(%rsp),%rax
274	xorq	64(%rsp),%rbp
275	call	_mul_1x1
276	movq	0(%rsp),%rbx
277	movq	8(%rsp),%rcx
278	movq	16(%rsp),%rdi
279	movq	24(%rsp),%rsi
280	movq	32(%rsp),%rbp
281
282	xorq	%rdx,%rax
283	xorq	%rcx,%rdx
284	xorq	%rbx,%rax
285	movq	%rbx,0(%rbp)
286	xorq	%rdi,%rdx
287	movq	%rsi,24(%rbp)
288	xorq	%rsi,%rax
289	xorq	%rsi,%rdx
290	xorq	%rdx,%rax
291	movq	%rdx,16(%rbp)
292	movq	%rax,8(%rbp)
293
294	movq	80(%rsp),%r14
295.cfi_restore	%r14
296	movq	88(%rsp),%r13
297.cfi_restore	%r13
298	movq	96(%rsp),%r12
299.cfi_restore	%r12
300	movq	104(%rsp),%rbp
301.cfi_restore	%rbp
302	movq	112(%rsp),%rbx
303.cfi_restore	%rbx
304	leaq	136(%rsp),%rsp
305.cfi_adjust_cfa_offset	-8*17
306.Lepilogue_mul_2x2:
307	.byte	0xf3,0xc3
308.Lend_mul_2x2:
309.cfi_endproc
310.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
311.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
312.align	16
313