xref: /freebsd/sys/crypto/openssl/i386/vpaes-x86.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from vpaes-x86.pl. */
2bc3d5698SJohn Baldwin#ifdef PIC
3bc3d5698SJohn Baldwin.text
4bc3d5698SJohn Baldwin.align	64
5bc3d5698SJohn Baldwin.L_vpaes_consts:
6bc3d5698SJohn Baldwin.long	218628480,235210255,168496130,67568393
7bc3d5698SJohn Baldwin.long	252381056,17041926,33884169,51187212
8bc3d5698SJohn Baldwin.long	252645135,252645135,252645135,252645135
9bc3d5698SJohn Baldwin.long	1512730624,3266504856,1377990664,3401244816
10bc3d5698SJohn Baldwin.long	830229760,1275146365,2969422977,3447763452
11bc3d5698SJohn Baldwin.long	3411033600,2979783055,338359620,2782886510
12bc3d5698SJohn Baldwin.long	4209124096,907596821,221174255,1006095553
13bc3d5698SJohn Baldwin.long	191964160,3799684038,3164090317,1589111125
14bc3d5698SJohn Baldwin.long	182528256,1777043520,2877432650,3265356744
15bc3d5698SJohn Baldwin.long	1874708224,3503451415,3305285752,363511674
16bc3d5698SJohn Baldwin.long	1606117888,3487855781,1093350906,2384367825
17bc3d5698SJohn Baldwin.long	197121,67569157,134941193,202313229
18bc3d5698SJohn Baldwin.long	67569157,134941193,202313229,197121
19bc3d5698SJohn Baldwin.long	134941193,202313229,197121,67569157
20bc3d5698SJohn Baldwin.long	202313229,197121,67569157,134941193
21bc3d5698SJohn Baldwin.long	33619971,100992007,168364043,235736079
22bc3d5698SJohn Baldwin.long	235736079,33619971,100992007,168364043
23bc3d5698SJohn Baldwin.long	168364043,235736079,33619971,100992007
24bc3d5698SJohn Baldwin.long	100992007,168364043,235736079,33619971
25bc3d5698SJohn Baldwin.long	50462976,117835012,185207048,252579084
26bc3d5698SJohn Baldwin.long	252314880,51251460,117574920,184942860
27bc3d5698SJohn Baldwin.long	184682752,252054788,50987272,118359308
28bc3d5698SJohn Baldwin.long	118099200,185467140,251790600,50727180
29bc3d5698SJohn Baldwin.long	2946363062,528716217,1300004225,1881839624
30bc3d5698SJohn Baldwin.long	1532713819,1532713819,1532713819,1532713819
31bc3d5698SJohn Baldwin.long	3602276352,4288629033,3737020424,4153884961
32bc3d5698SJohn Baldwin.long	1354558464,32357713,2958822624,3775749553
33bc3d5698SJohn Baldwin.long	1201988352,132424512,1572796698,503232858
34bc3d5698SJohn Baldwin.long	2213177600,1597421020,4103937655,675398315
35bc3d5698SJohn Baldwin.long	2749646592,4273543773,1511898873,121693092
36bc3d5698SJohn Baldwin.long	3040248576,1103263732,2871565598,1608280554
37bc3d5698SJohn Baldwin.long	2236667136,2588920351,482954393,64377734
38bc3d5698SJohn Baldwin.long	3069987328,291237287,2117370568,3650299247
39bc3d5698SJohn Baldwin.long	533321216,3573750986,2572112006,1401264716
40bc3d5698SJohn Baldwin.long	1339849704,2721158661,548607111,3445553514
41bc3d5698SJohn Baldwin.long	2128193280,3054596040,2183486460,1257083700
42bc3d5698SJohn Baldwin.long	655635200,1165381986,3923443150,2344132524
43bc3d5698SJohn Baldwin.long	190078720,256924420,290342170,357187870
44bc3d5698SJohn Baldwin.long	1610966272,2263057382,4103205268,309794674
45bc3d5698SJohn Baldwin.long	2592527872,2233205587,1335446729,3402964816
46bc3d5698SJohn Baldwin.long	3973531904,3225098121,3002836325,1918774430
47bc3d5698SJohn Baldwin.long	3870401024,2102906079,2284471353,4117666579
48bc3d5698SJohn Baldwin.long	617007872,1021508343,366931923,691083277
49bc3d5698SJohn Baldwin.long	2528395776,3491914898,2968704004,1613121270
50bc3d5698SJohn Baldwin.long	3445188352,3247741094,844474987,4093578302
51bc3d5698SJohn Baldwin.long	651481088,1190302358,1689581232,574775300
52bc3d5698SJohn Baldwin.long	4289380608,206939853,2555985458,2489840491
53bc3d5698SJohn Baldwin.long	2130264064,327674451,3566485037,3349835193
54bc3d5698SJohn Baldwin.long	2470714624,316102159,3636825756,3393945945
55bc3d5698SJohn Baldwin.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
56bc3d5698SJohn Baldwin.byte	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
57bc3d5698SJohn Baldwin.byte	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
58bc3d5698SJohn Baldwin.byte	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
59bc3d5698SJohn Baldwin.byte	118,101,114,115,105,116,121,41,0
60bc3d5698SJohn Baldwin.align	64
61bc3d5698SJohn Baldwin.type	_vpaes_preheat,@function
62bc3d5698SJohn Baldwin.align	16
63bc3d5698SJohn Baldwin_vpaes_preheat:
64*c0855eaaSJohn Baldwin	#ifdef __CET__
65*c0855eaaSJohn Baldwin
66*c0855eaaSJohn Baldwin.byte	243,15,30,251
67*c0855eaaSJohn Baldwin	#endif
68*c0855eaaSJohn Baldwin
69bc3d5698SJohn Baldwin	addl	(%esp),%ebp
70bc3d5698SJohn Baldwin	movdqa	-48(%ebp),%xmm7
71bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm6
72bc3d5698SJohn Baldwin	ret
73bc3d5698SJohn Baldwin.size	_vpaes_preheat,.-_vpaes_preheat
74bc3d5698SJohn Baldwin.type	_vpaes_encrypt_core,@function
75bc3d5698SJohn Baldwin.align	16
76bc3d5698SJohn Baldwin_vpaes_encrypt_core:
77*c0855eaaSJohn Baldwin	#ifdef __CET__
78*c0855eaaSJohn Baldwin
79*c0855eaaSJohn Baldwin.byte	243,15,30,251
80*c0855eaaSJohn Baldwin	#endif
81*c0855eaaSJohn Baldwin
82bc3d5698SJohn Baldwin	movl	$16,%ecx
83bc3d5698SJohn Baldwin	movl	240(%edx),%eax
84bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
85bc3d5698SJohn Baldwin	movdqa	(%ebp),%xmm2
86bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
87bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
88bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm5
89bc3d5698SJohn Baldwin.byte	102,15,56,0,208
90bc3d5698SJohn Baldwin	movdqa	16(%ebp),%xmm0
91bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
92bc3d5698SJohn Baldwin	psrld	$4,%xmm1
93bc3d5698SJohn Baldwin	addl	$16,%edx
94bc3d5698SJohn Baldwin.byte	102,15,56,0,193
95bc3d5698SJohn Baldwin	leal	192(%ebp),%ebx
96bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
97bc3d5698SJohn Baldwin	jmp	.L000enc_entry
98bc3d5698SJohn Baldwin.align	16
99bc3d5698SJohn Baldwin.L001enc_loop:
100bc3d5698SJohn Baldwin	movdqa	32(%ebp),%xmm4
101bc3d5698SJohn Baldwin	movdqa	48(%ebp),%xmm0
102bc3d5698SJohn Baldwin.byte	102,15,56,0,226
103bc3d5698SJohn Baldwin.byte	102,15,56,0,195
104bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm4
105bc3d5698SJohn Baldwin	movdqa	64(%ebp),%xmm5
106bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
107bc3d5698SJohn Baldwin	movdqa	-64(%ebx,%ecx,1),%xmm1
108bc3d5698SJohn Baldwin.byte	102,15,56,0,234
109bc3d5698SJohn Baldwin	movdqa	80(%ebp),%xmm2
110bc3d5698SJohn Baldwin	movdqa	(%ebx,%ecx,1),%xmm4
111bc3d5698SJohn Baldwin.byte	102,15,56,0,211
112bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
113bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
114bc3d5698SJohn Baldwin.byte	102,15,56,0,193
115bc3d5698SJohn Baldwin	addl	$16,%edx
116bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
117bc3d5698SJohn Baldwin.byte	102,15,56,0,220
118bc3d5698SJohn Baldwin	addl	$16,%ecx
119bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
120bc3d5698SJohn Baldwin.byte	102,15,56,0,193
121bc3d5698SJohn Baldwin	andl	$48,%ecx
122bc3d5698SJohn Baldwin	subl	$1,%eax
123bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
124bc3d5698SJohn Baldwin.L000enc_entry:
125bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
126bc3d5698SJohn Baldwin	movdqa	-32(%ebp),%xmm5
127bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
128bc3d5698SJohn Baldwin	psrld	$4,%xmm1
129bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
130bc3d5698SJohn Baldwin.byte	102,15,56,0,232
131bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
132bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
133bc3d5698SJohn Baldwin.byte	102,15,56,0,217
134bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
135bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm3
136bc3d5698SJohn Baldwin.byte	102,15,56,0,224
137bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
138bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm4
139bc3d5698SJohn Baldwin.byte	102,15,56,0,211
140bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
141bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
142bc3d5698SJohn Baldwin.byte	102,15,56,0,220
143bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm5
144bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
145bc3d5698SJohn Baldwin	jnz	.L001enc_loop
146bc3d5698SJohn Baldwin	movdqa	96(%ebp),%xmm4
147bc3d5698SJohn Baldwin	movdqa	112(%ebp),%xmm0
148bc3d5698SJohn Baldwin.byte	102,15,56,0,226
149bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm4
150bc3d5698SJohn Baldwin.byte	102,15,56,0,195
151bc3d5698SJohn Baldwin	movdqa	64(%ebx,%ecx,1),%xmm1
152bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
153bc3d5698SJohn Baldwin.byte	102,15,56,0,193
154bc3d5698SJohn Baldwin	ret
155bc3d5698SJohn Baldwin.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
156bc3d5698SJohn Baldwin.type	_vpaes_decrypt_core,@function
157bc3d5698SJohn Baldwin.align	16
158bc3d5698SJohn Baldwin_vpaes_decrypt_core:
159*c0855eaaSJohn Baldwin	#ifdef __CET__
160*c0855eaaSJohn Baldwin
161*c0855eaaSJohn Baldwin.byte	243,15,30,251
162*c0855eaaSJohn Baldwin	#endif
163*c0855eaaSJohn Baldwin
164bc3d5698SJohn Baldwin	leal	608(%ebp),%ebx
165bc3d5698SJohn Baldwin	movl	240(%edx),%eax
166bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
167bc3d5698SJohn Baldwin	movdqa	-64(%ebx),%xmm2
168bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
169bc3d5698SJohn Baldwin	movl	%eax,%ecx
170bc3d5698SJohn Baldwin	psrld	$4,%xmm1
171bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm5
172bc3d5698SJohn Baldwin	shll	$4,%ecx
173bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
174bc3d5698SJohn Baldwin.byte	102,15,56,0,208
175bc3d5698SJohn Baldwin	movdqa	-48(%ebx),%xmm0
176bc3d5698SJohn Baldwin	xorl	$48,%ecx
177bc3d5698SJohn Baldwin.byte	102,15,56,0,193
178bc3d5698SJohn Baldwin	andl	$48,%ecx
179bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
180bc3d5698SJohn Baldwin	movdqa	176(%ebp),%xmm5
181bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
182bc3d5698SJohn Baldwin	addl	$16,%edx
183bc3d5698SJohn Baldwin	leal	-352(%ebx,%ecx,1),%ecx
184bc3d5698SJohn Baldwin	jmp	.L002dec_entry
185bc3d5698SJohn Baldwin.align	16
186bc3d5698SJohn Baldwin.L003dec_loop:
187bc3d5698SJohn Baldwin	movdqa	-32(%ebx),%xmm4
188bc3d5698SJohn Baldwin	movdqa	-16(%ebx),%xmm1
189bc3d5698SJohn Baldwin.byte	102,15,56,0,226
190bc3d5698SJohn Baldwin.byte	102,15,56,0,203
191bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
192bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm4
193bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
194bc3d5698SJohn Baldwin	movdqa	16(%ebx),%xmm1
195bc3d5698SJohn Baldwin.byte	102,15,56,0,226
196bc3d5698SJohn Baldwin.byte	102,15,56,0,197
197bc3d5698SJohn Baldwin.byte	102,15,56,0,203
198bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
199bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
200bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
201bc3d5698SJohn Baldwin	movdqa	48(%ebx),%xmm1
202bc3d5698SJohn Baldwin.byte	102,15,56,0,226
203bc3d5698SJohn Baldwin.byte	102,15,56,0,197
204bc3d5698SJohn Baldwin.byte	102,15,56,0,203
205bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
206bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm4
207bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
208bc3d5698SJohn Baldwin	movdqa	80(%ebx),%xmm1
209bc3d5698SJohn Baldwin.byte	102,15,56,0,226
210bc3d5698SJohn Baldwin.byte	102,15,56,0,197
211bc3d5698SJohn Baldwin.byte	102,15,56,0,203
212bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
213bc3d5698SJohn Baldwin	addl	$16,%edx
214bc3d5698SJohn Baldwin.byte	102,15,58,15,237,12
215bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
216bc3d5698SJohn Baldwin	subl	$1,%eax
217bc3d5698SJohn Baldwin.L002dec_entry:
218bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
219bc3d5698SJohn Baldwin	movdqa	-32(%ebp),%xmm2
220bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
221bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
222bc3d5698SJohn Baldwin	psrld	$4,%xmm1
223bc3d5698SJohn Baldwin.byte	102,15,56,0,208
224bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
225bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
226bc3d5698SJohn Baldwin.byte	102,15,56,0,217
227bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
228bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
229bc3d5698SJohn Baldwin.byte	102,15,56,0,224
230bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm4
231bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
232bc3d5698SJohn Baldwin.byte	102,15,56,0,211
233bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
234bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
235bc3d5698SJohn Baldwin.byte	102,15,56,0,220
236bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm0
237bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
238bc3d5698SJohn Baldwin	jnz	.L003dec_loop
239bc3d5698SJohn Baldwin	movdqa	96(%ebx),%xmm4
240bc3d5698SJohn Baldwin.byte	102,15,56,0,226
241bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
242bc3d5698SJohn Baldwin	movdqa	112(%ebx),%xmm0
243bc3d5698SJohn Baldwin	movdqa	(%ecx),%xmm2
244bc3d5698SJohn Baldwin.byte	102,15,56,0,195
245bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
246bc3d5698SJohn Baldwin.byte	102,15,56,0,194
247bc3d5698SJohn Baldwin	ret
248bc3d5698SJohn Baldwin.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
249bc3d5698SJohn Baldwin.type	_vpaes_schedule_core,@function
250bc3d5698SJohn Baldwin.align	16
251bc3d5698SJohn Baldwin_vpaes_schedule_core:
252*c0855eaaSJohn Baldwin	#ifdef __CET__
253*c0855eaaSJohn Baldwin
254*c0855eaaSJohn Baldwin.byte	243,15,30,251
255*c0855eaaSJohn Baldwin	#endif
256*c0855eaaSJohn Baldwin
257bc3d5698SJohn Baldwin	addl	(%esp),%ebp
258bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
259bc3d5698SJohn Baldwin	movdqa	320(%ebp),%xmm2
260bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
261bc3d5698SJohn Baldwin	leal	(%ebp),%ebx
262bc3d5698SJohn Baldwin	movdqa	%xmm2,4(%esp)
263bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
264bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm7
265bc3d5698SJohn Baldwin	testl	%edi,%edi
266bc3d5698SJohn Baldwin	jnz	.L004schedule_am_decrypting
267bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
268bc3d5698SJohn Baldwin	jmp	.L005schedule_go
269bc3d5698SJohn Baldwin.L004schedule_am_decrypting:
270bc3d5698SJohn Baldwin	movdqa	256(%ebp,%ecx,1),%xmm1
271bc3d5698SJohn Baldwin.byte	102,15,56,0,217
272bc3d5698SJohn Baldwin	movdqu	%xmm3,(%edx)
273bc3d5698SJohn Baldwin	xorl	$48,%ecx
274bc3d5698SJohn Baldwin.L005schedule_go:
275bc3d5698SJohn Baldwin	cmpl	$192,%eax
276bc3d5698SJohn Baldwin	ja	.L006schedule_256
277bc3d5698SJohn Baldwin	je	.L007schedule_192
278bc3d5698SJohn Baldwin.L008schedule_128:
279bc3d5698SJohn Baldwin	movl	$10,%eax
280bc3d5698SJohn Baldwin.L009loop_schedule_128:
281bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
282bc3d5698SJohn Baldwin	decl	%eax
283bc3d5698SJohn Baldwin	jz	.L010schedule_mangle_last
284bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
285bc3d5698SJohn Baldwin	jmp	.L009loop_schedule_128
286bc3d5698SJohn Baldwin.align	16
287bc3d5698SJohn Baldwin.L007schedule_192:
288bc3d5698SJohn Baldwin	movdqu	8(%esi),%xmm0
289bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
290bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
291bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
292bc3d5698SJohn Baldwin	movhlps	%xmm4,%xmm6
293bc3d5698SJohn Baldwin	movl	$4,%eax
294bc3d5698SJohn Baldwin.L011loop_schedule_192:
295bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
296bc3d5698SJohn Baldwin.byte	102,15,58,15,198,8
297bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
298bc3d5698SJohn Baldwin	call	_vpaes_schedule_192_smear
299bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
300bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
301bc3d5698SJohn Baldwin	decl	%eax
302bc3d5698SJohn Baldwin	jz	.L010schedule_mangle_last
303bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
304bc3d5698SJohn Baldwin	call	_vpaes_schedule_192_smear
305bc3d5698SJohn Baldwin	jmp	.L011loop_schedule_192
306bc3d5698SJohn Baldwin.align	16
307bc3d5698SJohn Baldwin.L006schedule_256:
308bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm0
309bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
310bc3d5698SJohn Baldwin	movl	$7,%eax
311bc3d5698SJohn Baldwin.L012loop_schedule_256:
312bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
313bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
314bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
315bc3d5698SJohn Baldwin	decl	%eax
316bc3d5698SJohn Baldwin	jz	.L010schedule_mangle_last
317bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
318bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm0
319bc3d5698SJohn Baldwin	movdqa	%xmm7,20(%esp)
320bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
321bc3d5698SJohn Baldwin	call	.L_vpaes_schedule_low_round
322bc3d5698SJohn Baldwin	movdqa	20(%esp),%xmm7
323bc3d5698SJohn Baldwin	jmp	.L012loop_schedule_256
324bc3d5698SJohn Baldwin.align	16
325bc3d5698SJohn Baldwin.L010schedule_mangle_last:
326bc3d5698SJohn Baldwin	leal	384(%ebp),%ebx
327bc3d5698SJohn Baldwin	testl	%edi,%edi
328bc3d5698SJohn Baldwin	jnz	.L013schedule_mangle_last_dec
329bc3d5698SJohn Baldwin	movdqa	256(%ebp,%ecx,1),%xmm1
330bc3d5698SJohn Baldwin.byte	102,15,56,0,193
331bc3d5698SJohn Baldwin	leal	352(%ebp),%ebx
332bc3d5698SJohn Baldwin	addl	$32,%edx
333bc3d5698SJohn Baldwin.L013schedule_mangle_last_dec:
334bc3d5698SJohn Baldwin	addl	$-16,%edx
335bc3d5698SJohn Baldwin	pxor	336(%ebp),%xmm0
336bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
337bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
338bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
339bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
340bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
341bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
342bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
343bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
344bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
345bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
346bc3d5698SJohn Baldwin	ret
347bc3d5698SJohn Baldwin.size	_vpaes_schedule_core,.-_vpaes_schedule_core
348bc3d5698SJohn Baldwin.type	_vpaes_schedule_192_smear,@function
349bc3d5698SJohn Baldwin.align	16
350bc3d5698SJohn Baldwin_vpaes_schedule_192_smear:
351*c0855eaaSJohn Baldwin	#ifdef __CET__
352*c0855eaaSJohn Baldwin
353*c0855eaaSJohn Baldwin.byte	243,15,30,251
354*c0855eaaSJohn Baldwin	#endif
355*c0855eaaSJohn Baldwin
356bc3d5698SJohn Baldwin	pshufd	$128,%xmm6,%xmm1
357bc3d5698SJohn Baldwin	pshufd	$254,%xmm7,%xmm0
358bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm6
359bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
360bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
361bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm0
362bc3d5698SJohn Baldwin	movhlps	%xmm1,%xmm6
363bc3d5698SJohn Baldwin	ret
364bc3d5698SJohn Baldwin.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
365bc3d5698SJohn Baldwin.type	_vpaes_schedule_round,@function
366bc3d5698SJohn Baldwin.align	16
367bc3d5698SJohn Baldwin_vpaes_schedule_round:
368*c0855eaaSJohn Baldwin	#ifdef __CET__
369*c0855eaaSJohn Baldwin
370*c0855eaaSJohn Baldwin.byte	243,15,30,251
371*c0855eaaSJohn Baldwin	#endif
372*c0855eaaSJohn Baldwin
373bc3d5698SJohn Baldwin	movdqa	8(%esp),%xmm2
374bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
375bc3d5698SJohn Baldwin.byte	102,15,58,15,202,15
376bc3d5698SJohn Baldwin.byte	102,15,58,15,210,15
377bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
378bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm0
379bc3d5698SJohn Baldwin.byte	102,15,58,15,192,1
380bc3d5698SJohn Baldwin	movdqa	%xmm2,8(%esp)
381bc3d5698SJohn Baldwin.L_vpaes_schedule_low_round:
382bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
383bc3d5698SJohn Baldwin	pslldq	$4,%xmm7
384bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
385bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
386bc3d5698SJohn Baldwin	pslldq	$8,%xmm7
387bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
388bc3d5698SJohn Baldwin	pxor	336(%ebp),%xmm7
389bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm4
390bc3d5698SJohn Baldwin	movdqa	-48(%ebp),%xmm5
391bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm1
392bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
393bc3d5698SJohn Baldwin	psrld	$4,%xmm1
394bc3d5698SJohn Baldwin	pand	%xmm4,%xmm0
395bc3d5698SJohn Baldwin	movdqa	-32(%ebp),%xmm2
396bc3d5698SJohn Baldwin.byte	102,15,56,0,208
397bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
398bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm3
399bc3d5698SJohn Baldwin.byte	102,15,56,0,217
400bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
401bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
402bc3d5698SJohn Baldwin.byte	102,15,56,0,224
403bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm4
404bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
405bc3d5698SJohn Baldwin.byte	102,15,56,0,211
406bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
407bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm3
408bc3d5698SJohn Baldwin.byte	102,15,56,0,220
409bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
410bc3d5698SJohn Baldwin	movdqa	32(%ebp),%xmm4
411bc3d5698SJohn Baldwin.byte	102,15,56,0,226
412bc3d5698SJohn Baldwin	movdqa	48(%ebp),%xmm0
413bc3d5698SJohn Baldwin.byte	102,15,56,0,195
414bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
415bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm0
416bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm7
417bc3d5698SJohn Baldwin	ret
418bc3d5698SJohn Baldwin.size	_vpaes_schedule_round,.-_vpaes_schedule_round
419bc3d5698SJohn Baldwin.type	_vpaes_schedule_transform,@function
420bc3d5698SJohn Baldwin.align	16
421bc3d5698SJohn Baldwin_vpaes_schedule_transform:
422*c0855eaaSJohn Baldwin	#ifdef __CET__
423*c0855eaaSJohn Baldwin
424*c0855eaaSJohn Baldwin.byte	243,15,30,251
425*c0855eaaSJohn Baldwin	#endif
426*c0855eaaSJohn Baldwin
427bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm2
428bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
429bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
430bc3d5698SJohn Baldwin	psrld	$4,%xmm1
431bc3d5698SJohn Baldwin	pand	%xmm2,%xmm0
432bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm2
433bc3d5698SJohn Baldwin.byte	102,15,56,0,208
434bc3d5698SJohn Baldwin	movdqa	16(%ebx),%xmm0
435bc3d5698SJohn Baldwin.byte	102,15,56,0,193
436bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
437bc3d5698SJohn Baldwin	ret
438bc3d5698SJohn Baldwin.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
439bc3d5698SJohn Baldwin.type	_vpaes_schedule_mangle,@function
440bc3d5698SJohn Baldwin.align	16
441bc3d5698SJohn Baldwin_vpaes_schedule_mangle:
442*c0855eaaSJohn Baldwin	#ifdef __CET__
443*c0855eaaSJohn Baldwin
444*c0855eaaSJohn Baldwin.byte	243,15,30,251
445*c0855eaaSJohn Baldwin	#endif
446*c0855eaaSJohn Baldwin
447bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm4
448bc3d5698SJohn Baldwin	movdqa	128(%ebp),%xmm5
449bc3d5698SJohn Baldwin	testl	%edi,%edi
450bc3d5698SJohn Baldwin	jnz	.L014schedule_mangle_dec
451bc3d5698SJohn Baldwin	addl	$16,%edx
452bc3d5698SJohn Baldwin	pxor	336(%ebp),%xmm4
453bc3d5698SJohn Baldwin.byte	102,15,56,0,229
454bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm3
455bc3d5698SJohn Baldwin.byte	102,15,56,0,229
456bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm3
457bc3d5698SJohn Baldwin.byte	102,15,56,0,229
458bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm3
459bc3d5698SJohn Baldwin	jmp	.L015schedule_mangle_both
460bc3d5698SJohn Baldwin.align	16
461bc3d5698SJohn Baldwin.L014schedule_mangle_dec:
462bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm2
463bc3d5698SJohn Baldwin	leal	416(%ebp),%esi
464bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
465bc3d5698SJohn Baldwin	pandn	%xmm4,%xmm1
466bc3d5698SJohn Baldwin	psrld	$4,%xmm1
467bc3d5698SJohn Baldwin	pand	%xmm2,%xmm4
468bc3d5698SJohn Baldwin	movdqa	(%esi),%xmm2
469bc3d5698SJohn Baldwin.byte	102,15,56,0,212
470bc3d5698SJohn Baldwin	movdqa	16(%esi),%xmm3
471bc3d5698SJohn Baldwin.byte	102,15,56,0,217
472bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
473bc3d5698SJohn Baldwin.byte	102,15,56,0,221
474bc3d5698SJohn Baldwin	movdqa	32(%esi),%xmm2
475bc3d5698SJohn Baldwin.byte	102,15,56,0,212
476bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
477bc3d5698SJohn Baldwin	movdqa	48(%esi),%xmm3
478bc3d5698SJohn Baldwin.byte	102,15,56,0,217
479bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
480bc3d5698SJohn Baldwin.byte	102,15,56,0,221
481bc3d5698SJohn Baldwin	movdqa	64(%esi),%xmm2
482bc3d5698SJohn Baldwin.byte	102,15,56,0,212
483bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
484bc3d5698SJohn Baldwin	movdqa	80(%esi),%xmm3
485bc3d5698SJohn Baldwin.byte	102,15,56,0,217
486bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
487bc3d5698SJohn Baldwin.byte	102,15,56,0,221
488bc3d5698SJohn Baldwin	movdqa	96(%esi),%xmm2
489bc3d5698SJohn Baldwin.byte	102,15,56,0,212
490bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
491bc3d5698SJohn Baldwin	movdqa	112(%esi),%xmm3
492bc3d5698SJohn Baldwin.byte	102,15,56,0,217
493bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
494bc3d5698SJohn Baldwin	addl	$-16,%edx
495bc3d5698SJohn Baldwin.L015schedule_mangle_both:
496bc3d5698SJohn Baldwin	movdqa	256(%ebp,%ecx,1),%xmm1
497bc3d5698SJohn Baldwin.byte	102,15,56,0,217
498bc3d5698SJohn Baldwin	addl	$-16,%ecx
499bc3d5698SJohn Baldwin	andl	$48,%ecx
500bc3d5698SJohn Baldwin	movdqu	%xmm3,(%edx)
501bc3d5698SJohn Baldwin	ret
502bc3d5698SJohn Baldwin.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
503bc3d5698SJohn Baldwin.globl	vpaes_set_encrypt_key
504bc3d5698SJohn Baldwin.type	vpaes_set_encrypt_key,@function
505bc3d5698SJohn Baldwin.align	16
506bc3d5698SJohn Baldwinvpaes_set_encrypt_key:
507bc3d5698SJohn Baldwin.L_vpaes_set_encrypt_key_begin:
508*c0855eaaSJohn Baldwin	#ifdef __CET__
509*c0855eaaSJohn Baldwin
510*c0855eaaSJohn Baldwin.byte	243,15,30,251
511*c0855eaaSJohn Baldwin	#endif
512*c0855eaaSJohn Baldwin
513bc3d5698SJohn Baldwin	pushl	%ebp
514bc3d5698SJohn Baldwin	pushl	%ebx
515bc3d5698SJohn Baldwin	pushl	%esi
516bc3d5698SJohn Baldwin	pushl	%edi
517bc3d5698SJohn Baldwin	movl	20(%esp),%esi
518bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
519bc3d5698SJohn Baldwin	movl	24(%esp),%eax
520bc3d5698SJohn Baldwin	andl	$-16,%ebx
521bc3d5698SJohn Baldwin	movl	28(%esp),%edx
522bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
523bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
524bc3d5698SJohn Baldwin	movl	%eax,%ebx
525bc3d5698SJohn Baldwin	shrl	$5,%ebx
526bc3d5698SJohn Baldwin	addl	$5,%ebx
527bc3d5698SJohn Baldwin	movl	%ebx,240(%edx)
528bc3d5698SJohn Baldwin	movl	$48,%ecx
529bc3d5698SJohn Baldwin	movl	$0,%edi
530bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L016pic_point,%ebp
531bc3d5698SJohn Baldwin	call	_vpaes_schedule_core
532bc3d5698SJohn Baldwin.L016pic_point:
533bc3d5698SJohn Baldwin	movl	48(%esp),%esp
534bc3d5698SJohn Baldwin	xorl	%eax,%eax
535bc3d5698SJohn Baldwin	popl	%edi
536bc3d5698SJohn Baldwin	popl	%esi
537bc3d5698SJohn Baldwin	popl	%ebx
538bc3d5698SJohn Baldwin	popl	%ebp
539bc3d5698SJohn Baldwin	ret
540bc3d5698SJohn Baldwin.size	vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
541bc3d5698SJohn Baldwin.globl	vpaes_set_decrypt_key
542bc3d5698SJohn Baldwin.type	vpaes_set_decrypt_key,@function
543bc3d5698SJohn Baldwin.align	16
544bc3d5698SJohn Baldwinvpaes_set_decrypt_key:
545bc3d5698SJohn Baldwin.L_vpaes_set_decrypt_key_begin:
546*c0855eaaSJohn Baldwin	#ifdef __CET__
547*c0855eaaSJohn Baldwin
548*c0855eaaSJohn Baldwin.byte	243,15,30,251
549*c0855eaaSJohn Baldwin	#endif
550*c0855eaaSJohn Baldwin
551bc3d5698SJohn Baldwin	pushl	%ebp
552bc3d5698SJohn Baldwin	pushl	%ebx
553bc3d5698SJohn Baldwin	pushl	%esi
554bc3d5698SJohn Baldwin	pushl	%edi
555bc3d5698SJohn Baldwin	movl	20(%esp),%esi
556bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
557bc3d5698SJohn Baldwin	movl	24(%esp),%eax
558bc3d5698SJohn Baldwin	andl	$-16,%ebx
559bc3d5698SJohn Baldwin	movl	28(%esp),%edx
560bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
561bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
562bc3d5698SJohn Baldwin	movl	%eax,%ebx
563bc3d5698SJohn Baldwin	shrl	$5,%ebx
564bc3d5698SJohn Baldwin	addl	$5,%ebx
565bc3d5698SJohn Baldwin	movl	%ebx,240(%edx)
566bc3d5698SJohn Baldwin	shll	$4,%ebx
567bc3d5698SJohn Baldwin	leal	16(%edx,%ebx,1),%edx
568bc3d5698SJohn Baldwin	movl	$1,%edi
569bc3d5698SJohn Baldwin	movl	%eax,%ecx
570bc3d5698SJohn Baldwin	shrl	$1,%ecx
571bc3d5698SJohn Baldwin	andl	$32,%ecx
572bc3d5698SJohn Baldwin	xorl	$32,%ecx
573bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L017pic_point,%ebp
574bc3d5698SJohn Baldwin	call	_vpaes_schedule_core
575bc3d5698SJohn Baldwin.L017pic_point:
576bc3d5698SJohn Baldwin	movl	48(%esp),%esp
577bc3d5698SJohn Baldwin	xorl	%eax,%eax
578bc3d5698SJohn Baldwin	popl	%edi
579bc3d5698SJohn Baldwin	popl	%esi
580bc3d5698SJohn Baldwin	popl	%ebx
581bc3d5698SJohn Baldwin	popl	%ebp
582bc3d5698SJohn Baldwin	ret
583bc3d5698SJohn Baldwin.size	vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
584bc3d5698SJohn Baldwin.globl	vpaes_encrypt
585bc3d5698SJohn Baldwin.type	vpaes_encrypt,@function
586bc3d5698SJohn Baldwin.align	16
587bc3d5698SJohn Baldwinvpaes_encrypt:
588bc3d5698SJohn Baldwin.L_vpaes_encrypt_begin:
589*c0855eaaSJohn Baldwin	#ifdef __CET__
590*c0855eaaSJohn Baldwin
591*c0855eaaSJohn Baldwin.byte	243,15,30,251
592*c0855eaaSJohn Baldwin	#endif
593*c0855eaaSJohn Baldwin
594bc3d5698SJohn Baldwin	pushl	%ebp
595bc3d5698SJohn Baldwin	pushl	%ebx
596bc3d5698SJohn Baldwin	pushl	%esi
597bc3d5698SJohn Baldwin	pushl	%edi
598bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L018pic_point,%ebp
599bc3d5698SJohn Baldwin	call	_vpaes_preheat
600bc3d5698SJohn Baldwin.L018pic_point:
601bc3d5698SJohn Baldwin	movl	20(%esp),%esi
602bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
603bc3d5698SJohn Baldwin	movl	24(%esp),%edi
604bc3d5698SJohn Baldwin	andl	$-16,%ebx
605bc3d5698SJohn Baldwin	movl	28(%esp),%edx
606bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
607bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
608bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
609bc3d5698SJohn Baldwin	call	_vpaes_encrypt_core
610bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
611bc3d5698SJohn Baldwin	movl	48(%esp),%esp
612bc3d5698SJohn Baldwin	popl	%edi
613bc3d5698SJohn Baldwin	popl	%esi
614bc3d5698SJohn Baldwin	popl	%ebx
615bc3d5698SJohn Baldwin	popl	%ebp
616bc3d5698SJohn Baldwin	ret
617bc3d5698SJohn Baldwin.size	vpaes_encrypt,.-.L_vpaes_encrypt_begin
618bc3d5698SJohn Baldwin.globl	vpaes_decrypt
619bc3d5698SJohn Baldwin.type	vpaes_decrypt,@function
620bc3d5698SJohn Baldwin.align	16
621bc3d5698SJohn Baldwinvpaes_decrypt:
622bc3d5698SJohn Baldwin.L_vpaes_decrypt_begin:
623*c0855eaaSJohn Baldwin	#ifdef __CET__
624*c0855eaaSJohn Baldwin
625*c0855eaaSJohn Baldwin.byte	243,15,30,251
626*c0855eaaSJohn Baldwin	#endif
627*c0855eaaSJohn Baldwin
628bc3d5698SJohn Baldwin	pushl	%ebp
629bc3d5698SJohn Baldwin	pushl	%ebx
630bc3d5698SJohn Baldwin	pushl	%esi
631bc3d5698SJohn Baldwin	pushl	%edi
632bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L019pic_point,%ebp
633bc3d5698SJohn Baldwin	call	_vpaes_preheat
634bc3d5698SJohn Baldwin.L019pic_point:
635bc3d5698SJohn Baldwin	movl	20(%esp),%esi
636bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
637bc3d5698SJohn Baldwin	movl	24(%esp),%edi
638bc3d5698SJohn Baldwin	andl	$-16,%ebx
639bc3d5698SJohn Baldwin	movl	28(%esp),%edx
640bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
641bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
642bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
643bc3d5698SJohn Baldwin	call	_vpaes_decrypt_core
644bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
645bc3d5698SJohn Baldwin	movl	48(%esp),%esp
646bc3d5698SJohn Baldwin	popl	%edi
647bc3d5698SJohn Baldwin	popl	%esi
648bc3d5698SJohn Baldwin	popl	%ebx
649bc3d5698SJohn Baldwin	popl	%ebp
650bc3d5698SJohn Baldwin	ret
651bc3d5698SJohn Baldwin.size	vpaes_decrypt,.-.L_vpaes_decrypt_begin
652bc3d5698SJohn Baldwin.globl	vpaes_cbc_encrypt
653bc3d5698SJohn Baldwin.type	vpaes_cbc_encrypt,@function
654bc3d5698SJohn Baldwin.align	16
655bc3d5698SJohn Baldwinvpaes_cbc_encrypt:
656bc3d5698SJohn Baldwin.L_vpaes_cbc_encrypt_begin:
657*c0855eaaSJohn Baldwin	#ifdef __CET__
658*c0855eaaSJohn Baldwin
659*c0855eaaSJohn Baldwin.byte	243,15,30,251
660*c0855eaaSJohn Baldwin	#endif
661*c0855eaaSJohn Baldwin
662bc3d5698SJohn Baldwin	pushl	%ebp
663bc3d5698SJohn Baldwin	pushl	%ebx
664bc3d5698SJohn Baldwin	pushl	%esi
665bc3d5698SJohn Baldwin	pushl	%edi
666bc3d5698SJohn Baldwin	movl	20(%esp),%esi
667bc3d5698SJohn Baldwin	movl	24(%esp),%edi
668bc3d5698SJohn Baldwin	movl	28(%esp),%eax
669bc3d5698SJohn Baldwin	movl	32(%esp),%edx
670bc3d5698SJohn Baldwin	subl	$16,%eax
671bc3d5698SJohn Baldwin	jc	.L020cbc_abort
672bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
673bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
674bc3d5698SJohn Baldwin	andl	$-16,%ebx
675bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
676bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
677bc3d5698SJohn Baldwin	movdqu	(%ebp),%xmm1
678bc3d5698SJohn Baldwin	subl	%esi,%edi
679bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
680bc3d5698SJohn Baldwin	movl	%edi,(%esp)
681bc3d5698SJohn Baldwin	movl	%edx,4(%esp)
682bc3d5698SJohn Baldwin	movl	%ebp,8(%esp)
683bc3d5698SJohn Baldwin	movl	%eax,%edi
684bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L021pic_point,%ebp
685bc3d5698SJohn Baldwin	call	_vpaes_preheat
686bc3d5698SJohn Baldwin.L021pic_point:
687bc3d5698SJohn Baldwin	cmpl	$0,%ecx
688bc3d5698SJohn Baldwin	je	.L022cbc_dec_loop
689bc3d5698SJohn Baldwin	jmp	.L023cbc_enc_loop
690bc3d5698SJohn Baldwin.align	16
691bc3d5698SJohn Baldwin.L023cbc_enc_loop:
692bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
693bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
694bc3d5698SJohn Baldwin	call	_vpaes_encrypt_core
695bc3d5698SJohn Baldwin	movl	(%esp),%ebx
696bc3d5698SJohn Baldwin	movl	4(%esp),%edx
697bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm1
698bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ebx,%esi,1)
699bc3d5698SJohn Baldwin	leal	16(%esi),%esi
700bc3d5698SJohn Baldwin	subl	$16,%edi
701bc3d5698SJohn Baldwin	jnc	.L023cbc_enc_loop
702bc3d5698SJohn Baldwin	jmp	.L024cbc_done
703bc3d5698SJohn Baldwin.align	16
704bc3d5698SJohn Baldwin.L022cbc_dec_loop:
705bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
706bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
707bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
708bc3d5698SJohn Baldwin	call	_vpaes_decrypt_core
709bc3d5698SJohn Baldwin	movl	(%esp),%ebx
710bc3d5698SJohn Baldwin	movl	4(%esp),%edx
711bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm0
712bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm1
713bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ebx,%esi,1)
714bc3d5698SJohn Baldwin	leal	16(%esi),%esi
715bc3d5698SJohn Baldwin	subl	$16,%edi
716bc3d5698SJohn Baldwin	jnc	.L022cbc_dec_loop
717bc3d5698SJohn Baldwin.L024cbc_done:
718bc3d5698SJohn Baldwin	movl	8(%esp),%ebx
719bc3d5698SJohn Baldwin	movl	48(%esp),%esp
720bc3d5698SJohn Baldwin	movdqu	%xmm1,(%ebx)
721bc3d5698SJohn Baldwin.L020cbc_abort:
722bc3d5698SJohn Baldwin	popl	%edi
723bc3d5698SJohn Baldwin	popl	%esi
724bc3d5698SJohn Baldwin	popl	%ebx
725bc3d5698SJohn Baldwin	popl	%ebp
726bc3d5698SJohn Baldwin	ret
727bc3d5698SJohn Baldwin.size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
728*c0855eaaSJohn Baldwin
729*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
730*c0855eaaSJohn Baldwin	.p2align 2
731*c0855eaaSJohn Baldwin	.long 1f - 0f
732*c0855eaaSJohn Baldwin	.long 4f - 1f
733*c0855eaaSJohn Baldwin	.long 5
734*c0855eaaSJohn Baldwin0:
735*c0855eaaSJohn Baldwin	.asciz "GNU"
736*c0855eaaSJohn Baldwin1:
737*c0855eaaSJohn Baldwin	.p2align 2
738*c0855eaaSJohn Baldwin	.long 0xc0000002
739*c0855eaaSJohn Baldwin	.long 3f - 2f
740*c0855eaaSJohn Baldwin2:
741*c0855eaaSJohn Baldwin	.long 3
742*c0855eaaSJohn Baldwin3:
743*c0855eaaSJohn Baldwin	.p2align 2
744*c0855eaaSJohn Baldwin4:
745bc3d5698SJohn Baldwin#else
746bc3d5698SJohn Baldwin.text
747bc3d5698SJohn Baldwin.align	64
748bc3d5698SJohn Baldwin.L_vpaes_consts:
749bc3d5698SJohn Baldwin.long	218628480,235210255,168496130,67568393
750bc3d5698SJohn Baldwin.long	252381056,17041926,33884169,51187212
751bc3d5698SJohn Baldwin.long	252645135,252645135,252645135,252645135
752bc3d5698SJohn Baldwin.long	1512730624,3266504856,1377990664,3401244816
753bc3d5698SJohn Baldwin.long	830229760,1275146365,2969422977,3447763452
754bc3d5698SJohn Baldwin.long	3411033600,2979783055,338359620,2782886510
755bc3d5698SJohn Baldwin.long	4209124096,907596821,221174255,1006095553
756bc3d5698SJohn Baldwin.long	191964160,3799684038,3164090317,1589111125
757bc3d5698SJohn Baldwin.long	182528256,1777043520,2877432650,3265356744
758bc3d5698SJohn Baldwin.long	1874708224,3503451415,3305285752,363511674
759bc3d5698SJohn Baldwin.long	1606117888,3487855781,1093350906,2384367825
760bc3d5698SJohn Baldwin.long	197121,67569157,134941193,202313229
761bc3d5698SJohn Baldwin.long	67569157,134941193,202313229,197121
762bc3d5698SJohn Baldwin.long	134941193,202313229,197121,67569157
763bc3d5698SJohn Baldwin.long	202313229,197121,67569157,134941193
764bc3d5698SJohn Baldwin.long	33619971,100992007,168364043,235736079
765bc3d5698SJohn Baldwin.long	235736079,33619971,100992007,168364043
766bc3d5698SJohn Baldwin.long	168364043,235736079,33619971,100992007
767bc3d5698SJohn Baldwin.long	100992007,168364043,235736079,33619971
768bc3d5698SJohn Baldwin.long	50462976,117835012,185207048,252579084
769bc3d5698SJohn Baldwin.long	252314880,51251460,117574920,184942860
770bc3d5698SJohn Baldwin.long	184682752,252054788,50987272,118359308
771bc3d5698SJohn Baldwin.long	118099200,185467140,251790600,50727180
772bc3d5698SJohn Baldwin.long	2946363062,528716217,1300004225,1881839624
773bc3d5698SJohn Baldwin.long	1532713819,1532713819,1532713819,1532713819
774bc3d5698SJohn Baldwin.long	3602276352,4288629033,3737020424,4153884961
775bc3d5698SJohn Baldwin.long	1354558464,32357713,2958822624,3775749553
776bc3d5698SJohn Baldwin.long	1201988352,132424512,1572796698,503232858
777bc3d5698SJohn Baldwin.long	2213177600,1597421020,4103937655,675398315
778bc3d5698SJohn Baldwin.long	2749646592,4273543773,1511898873,121693092
779bc3d5698SJohn Baldwin.long	3040248576,1103263732,2871565598,1608280554
780bc3d5698SJohn Baldwin.long	2236667136,2588920351,482954393,64377734
781bc3d5698SJohn Baldwin.long	3069987328,291237287,2117370568,3650299247
782bc3d5698SJohn Baldwin.long	533321216,3573750986,2572112006,1401264716
783bc3d5698SJohn Baldwin.long	1339849704,2721158661,548607111,3445553514
784bc3d5698SJohn Baldwin.long	2128193280,3054596040,2183486460,1257083700
785bc3d5698SJohn Baldwin.long	655635200,1165381986,3923443150,2344132524
786bc3d5698SJohn Baldwin.long	190078720,256924420,290342170,357187870
787bc3d5698SJohn Baldwin.long	1610966272,2263057382,4103205268,309794674
788bc3d5698SJohn Baldwin.long	2592527872,2233205587,1335446729,3402964816
789bc3d5698SJohn Baldwin.long	3973531904,3225098121,3002836325,1918774430
790bc3d5698SJohn Baldwin.long	3870401024,2102906079,2284471353,4117666579
791bc3d5698SJohn Baldwin.long	617007872,1021508343,366931923,691083277
792bc3d5698SJohn Baldwin.long	2528395776,3491914898,2968704004,1613121270
793bc3d5698SJohn Baldwin.long	3445188352,3247741094,844474987,4093578302
794bc3d5698SJohn Baldwin.long	651481088,1190302358,1689581232,574775300
795bc3d5698SJohn Baldwin.long	4289380608,206939853,2555985458,2489840491
796bc3d5698SJohn Baldwin.long	2130264064,327674451,3566485037,3349835193
797bc3d5698SJohn Baldwin.long	2470714624,316102159,3636825756,3393945945
798bc3d5698SJohn Baldwin.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
799bc3d5698SJohn Baldwin.byte	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
800bc3d5698SJohn Baldwin.byte	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
801bc3d5698SJohn Baldwin.byte	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
802bc3d5698SJohn Baldwin.byte	118,101,114,115,105,116,121,41,0
803bc3d5698SJohn Baldwin.align	64
804bc3d5698SJohn Baldwin.type	_vpaes_preheat,@function
805bc3d5698SJohn Baldwin.align	16
806bc3d5698SJohn Baldwin_vpaes_preheat:
807*c0855eaaSJohn Baldwin	#ifdef __CET__
808*c0855eaaSJohn Baldwin
809*c0855eaaSJohn Baldwin.byte	243,15,30,251
810*c0855eaaSJohn Baldwin	#endif
811*c0855eaaSJohn Baldwin
812bc3d5698SJohn Baldwin	addl	(%esp),%ebp
813bc3d5698SJohn Baldwin	movdqa	-48(%ebp),%xmm7
814bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm6
815bc3d5698SJohn Baldwin	ret
816bc3d5698SJohn Baldwin.size	_vpaes_preheat,.-_vpaes_preheat
817bc3d5698SJohn Baldwin.type	_vpaes_encrypt_core,@function
818bc3d5698SJohn Baldwin.align	16
819bc3d5698SJohn Baldwin_vpaes_encrypt_core:
820*c0855eaaSJohn Baldwin	#ifdef __CET__
821*c0855eaaSJohn Baldwin
822*c0855eaaSJohn Baldwin.byte	243,15,30,251
823*c0855eaaSJohn Baldwin	#endif
824*c0855eaaSJohn Baldwin
825bc3d5698SJohn Baldwin	movl	$16,%ecx
826bc3d5698SJohn Baldwin	movl	240(%edx),%eax
827bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
828bc3d5698SJohn Baldwin	movdqa	(%ebp),%xmm2
829bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
830bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
831bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm5
832bc3d5698SJohn Baldwin.byte	102,15,56,0,208
833bc3d5698SJohn Baldwin	movdqa	16(%ebp),%xmm0
834bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
835bc3d5698SJohn Baldwin	psrld	$4,%xmm1
836bc3d5698SJohn Baldwin	addl	$16,%edx
837bc3d5698SJohn Baldwin.byte	102,15,56,0,193
838bc3d5698SJohn Baldwin	leal	192(%ebp),%ebx
839bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
840bc3d5698SJohn Baldwin	jmp	.L000enc_entry
841bc3d5698SJohn Baldwin.align	16
842bc3d5698SJohn Baldwin.L001enc_loop:
843bc3d5698SJohn Baldwin	movdqa	32(%ebp),%xmm4
844bc3d5698SJohn Baldwin	movdqa	48(%ebp),%xmm0
845bc3d5698SJohn Baldwin.byte	102,15,56,0,226
846bc3d5698SJohn Baldwin.byte	102,15,56,0,195
847bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm4
848bc3d5698SJohn Baldwin	movdqa	64(%ebp),%xmm5
849bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
850bc3d5698SJohn Baldwin	movdqa	-64(%ebx,%ecx,1),%xmm1
851bc3d5698SJohn Baldwin.byte	102,15,56,0,234
852bc3d5698SJohn Baldwin	movdqa	80(%ebp),%xmm2
853bc3d5698SJohn Baldwin	movdqa	(%ebx,%ecx,1),%xmm4
854bc3d5698SJohn Baldwin.byte	102,15,56,0,211
855bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
856bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
857bc3d5698SJohn Baldwin.byte	102,15,56,0,193
858bc3d5698SJohn Baldwin	addl	$16,%edx
859bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
860bc3d5698SJohn Baldwin.byte	102,15,56,0,220
861bc3d5698SJohn Baldwin	addl	$16,%ecx
862bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
863bc3d5698SJohn Baldwin.byte	102,15,56,0,193
864bc3d5698SJohn Baldwin	andl	$48,%ecx
865bc3d5698SJohn Baldwin	subl	$1,%eax
866bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
867bc3d5698SJohn Baldwin.L000enc_entry:
868bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
869bc3d5698SJohn Baldwin	movdqa	-32(%ebp),%xmm5
870bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
871bc3d5698SJohn Baldwin	psrld	$4,%xmm1
872bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
873bc3d5698SJohn Baldwin.byte	102,15,56,0,232
874bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
875bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
876bc3d5698SJohn Baldwin.byte	102,15,56,0,217
877bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
878bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm3
879bc3d5698SJohn Baldwin.byte	102,15,56,0,224
880bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
881bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm4
882bc3d5698SJohn Baldwin.byte	102,15,56,0,211
883bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
884bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
885bc3d5698SJohn Baldwin.byte	102,15,56,0,220
886bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm5
887bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
888bc3d5698SJohn Baldwin	jnz	.L001enc_loop
889bc3d5698SJohn Baldwin	movdqa	96(%ebp),%xmm4
890bc3d5698SJohn Baldwin	movdqa	112(%ebp),%xmm0
891bc3d5698SJohn Baldwin.byte	102,15,56,0,226
892bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm4
893bc3d5698SJohn Baldwin.byte	102,15,56,0,195
894bc3d5698SJohn Baldwin	movdqa	64(%ebx,%ecx,1),%xmm1
895bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
896bc3d5698SJohn Baldwin.byte	102,15,56,0,193
897bc3d5698SJohn Baldwin	ret
898bc3d5698SJohn Baldwin.size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
899bc3d5698SJohn Baldwin.type	_vpaes_decrypt_core,@function
900bc3d5698SJohn Baldwin.align	16
901bc3d5698SJohn Baldwin_vpaes_decrypt_core:
902*c0855eaaSJohn Baldwin	#ifdef __CET__
903*c0855eaaSJohn Baldwin
904*c0855eaaSJohn Baldwin.byte	243,15,30,251
905*c0855eaaSJohn Baldwin	#endif
906*c0855eaaSJohn Baldwin
907bc3d5698SJohn Baldwin	leal	608(%ebp),%ebx
908bc3d5698SJohn Baldwin	movl	240(%edx),%eax
909bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
910bc3d5698SJohn Baldwin	movdqa	-64(%ebx),%xmm2
911bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
912bc3d5698SJohn Baldwin	movl	%eax,%ecx
913bc3d5698SJohn Baldwin	psrld	$4,%xmm1
914bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm5
915bc3d5698SJohn Baldwin	shll	$4,%ecx
916bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
917bc3d5698SJohn Baldwin.byte	102,15,56,0,208
918bc3d5698SJohn Baldwin	movdqa	-48(%ebx),%xmm0
919bc3d5698SJohn Baldwin	xorl	$48,%ecx
920bc3d5698SJohn Baldwin.byte	102,15,56,0,193
921bc3d5698SJohn Baldwin	andl	$48,%ecx
922bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
923bc3d5698SJohn Baldwin	movdqa	176(%ebp),%xmm5
924bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
925bc3d5698SJohn Baldwin	addl	$16,%edx
926bc3d5698SJohn Baldwin	leal	-352(%ebx,%ecx,1),%ecx
927bc3d5698SJohn Baldwin	jmp	.L002dec_entry
928bc3d5698SJohn Baldwin.align	16
929bc3d5698SJohn Baldwin.L003dec_loop:
930bc3d5698SJohn Baldwin	movdqa	-32(%ebx),%xmm4
931bc3d5698SJohn Baldwin	movdqa	-16(%ebx),%xmm1
932bc3d5698SJohn Baldwin.byte	102,15,56,0,226
933bc3d5698SJohn Baldwin.byte	102,15,56,0,203
934bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
935bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm4
936bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
937bc3d5698SJohn Baldwin	movdqa	16(%ebx),%xmm1
938bc3d5698SJohn Baldwin.byte	102,15,56,0,226
939bc3d5698SJohn Baldwin.byte	102,15,56,0,197
940bc3d5698SJohn Baldwin.byte	102,15,56,0,203
941bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
942bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
943bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
944bc3d5698SJohn Baldwin	movdqa	48(%ebx),%xmm1
945bc3d5698SJohn Baldwin.byte	102,15,56,0,226
946bc3d5698SJohn Baldwin.byte	102,15,56,0,197
947bc3d5698SJohn Baldwin.byte	102,15,56,0,203
948bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
949bc3d5698SJohn Baldwin	movdqa	64(%ebx),%xmm4
950bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
951bc3d5698SJohn Baldwin	movdqa	80(%ebx),%xmm1
952bc3d5698SJohn Baldwin.byte	102,15,56,0,226
953bc3d5698SJohn Baldwin.byte	102,15,56,0,197
954bc3d5698SJohn Baldwin.byte	102,15,56,0,203
955bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
956bc3d5698SJohn Baldwin	addl	$16,%edx
957bc3d5698SJohn Baldwin.byte	102,15,58,15,237,12
958bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
959bc3d5698SJohn Baldwin	subl	$1,%eax
960bc3d5698SJohn Baldwin.L002dec_entry:
961bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
962bc3d5698SJohn Baldwin	movdqa	-32(%ebp),%xmm2
963bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
964bc3d5698SJohn Baldwin	pand	%xmm6,%xmm0
965bc3d5698SJohn Baldwin	psrld	$4,%xmm1
966bc3d5698SJohn Baldwin.byte	102,15,56,0,208
967bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
968bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
969bc3d5698SJohn Baldwin.byte	102,15,56,0,217
970bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
971bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
972bc3d5698SJohn Baldwin.byte	102,15,56,0,224
973bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm4
974bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
975bc3d5698SJohn Baldwin.byte	102,15,56,0,211
976bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
977bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
978bc3d5698SJohn Baldwin.byte	102,15,56,0,220
979bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm0
980bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
981bc3d5698SJohn Baldwin	jnz	.L003dec_loop
982bc3d5698SJohn Baldwin	movdqa	96(%ebx),%xmm4
983bc3d5698SJohn Baldwin.byte	102,15,56,0,226
984bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
985bc3d5698SJohn Baldwin	movdqa	112(%ebx),%xmm0
986bc3d5698SJohn Baldwin	movdqa	(%ecx),%xmm2
987bc3d5698SJohn Baldwin.byte	102,15,56,0,195
988bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
989bc3d5698SJohn Baldwin.byte	102,15,56,0,194
990bc3d5698SJohn Baldwin	ret
991bc3d5698SJohn Baldwin.size	_vpaes_decrypt_core,.-_vpaes_decrypt_core
992bc3d5698SJohn Baldwin.type	_vpaes_schedule_core,@function
993bc3d5698SJohn Baldwin.align	16
994bc3d5698SJohn Baldwin_vpaes_schedule_core:
995*c0855eaaSJohn Baldwin	#ifdef __CET__
996*c0855eaaSJohn Baldwin
997*c0855eaaSJohn Baldwin.byte	243,15,30,251
998*c0855eaaSJohn Baldwin	#endif
999*c0855eaaSJohn Baldwin
1000bc3d5698SJohn Baldwin	addl	(%esp),%ebp
1001bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
1002bc3d5698SJohn Baldwin	movdqa	320(%ebp),%xmm2
1003bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
1004bc3d5698SJohn Baldwin	leal	(%ebp),%ebx
1005bc3d5698SJohn Baldwin	movdqa	%xmm2,4(%esp)
1006bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
1007bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm7
1008bc3d5698SJohn Baldwin	testl	%edi,%edi
1009bc3d5698SJohn Baldwin	jnz	.L004schedule_am_decrypting
1010bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
1011bc3d5698SJohn Baldwin	jmp	.L005schedule_go
1012bc3d5698SJohn Baldwin.L004schedule_am_decrypting:
1013bc3d5698SJohn Baldwin	movdqa	256(%ebp,%ecx,1),%xmm1
1014bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1015bc3d5698SJohn Baldwin	movdqu	%xmm3,(%edx)
1016bc3d5698SJohn Baldwin	xorl	$48,%ecx
1017bc3d5698SJohn Baldwin.L005schedule_go:
1018bc3d5698SJohn Baldwin	cmpl	$192,%eax
1019bc3d5698SJohn Baldwin	ja	.L006schedule_256
1020bc3d5698SJohn Baldwin	je	.L007schedule_192
1021bc3d5698SJohn Baldwin.L008schedule_128:
1022bc3d5698SJohn Baldwin	movl	$10,%eax
1023bc3d5698SJohn Baldwin.L009loop_schedule_128:
1024bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
1025bc3d5698SJohn Baldwin	decl	%eax
1026bc3d5698SJohn Baldwin	jz	.L010schedule_mangle_last
1027bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
1028bc3d5698SJohn Baldwin	jmp	.L009loop_schedule_128
1029bc3d5698SJohn Baldwin.align	16
1030bc3d5698SJohn Baldwin.L007schedule_192:
1031bc3d5698SJohn Baldwin	movdqu	8(%esi),%xmm0
1032bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
1033bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
1034bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
1035bc3d5698SJohn Baldwin	movhlps	%xmm4,%xmm6
1036bc3d5698SJohn Baldwin	movl	$4,%eax
1037bc3d5698SJohn Baldwin.L011loop_schedule_192:
1038bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
1039bc3d5698SJohn Baldwin.byte	102,15,58,15,198,8
1040bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
1041bc3d5698SJohn Baldwin	call	_vpaes_schedule_192_smear
1042bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
1043bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
1044bc3d5698SJohn Baldwin	decl	%eax
1045bc3d5698SJohn Baldwin	jz	.L010schedule_mangle_last
1046bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
1047bc3d5698SJohn Baldwin	call	_vpaes_schedule_192_smear
1048bc3d5698SJohn Baldwin	jmp	.L011loop_schedule_192
1049bc3d5698SJohn Baldwin.align	16
1050bc3d5698SJohn Baldwin.L006schedule_256:
1051bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm0
1052bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
1053bc3d5698SJohn Baldwin	movl	$7,%eax
1054bc3d5698SJohn Baldwin.L012loop_schedule_256:
1055bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
1056bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm6
1057bc3d5698SJohn Baldwin	call	_vpaes_schedule_round
1058bc3d5698SJohn Baldwin	decl	%eax
1059bc3d5698SJohn Baldwin	jz	.L010schedule_mangle_last
1060bc3d5698SJohn Baldwin	call	_vpaes_schedule_mangle
1061bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm0
1062bc3d5698SJohn Baldwin	movdqa	%xmm7,20(%esp)
1063bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm7
1064bc3d5698SJohn Baldwin	call	.L_vpaes_schedule_low_round
1065bc3d5698SJohn Baldwin	movdqa	20(%esp),%xmm7
1066bc3d5698SJohn Baldwin	jmp	.L012loop_schedule_256
1067bc3d5698SJohn Baldwin.align	16
1068bc3d5698SJohn Baldwin.L010schedule_mangle_last:
1069bc3d5698SJohn Baldwin	leal	384(%ebp),%ebx
1070bc3d5698SJohn Baldwin	testl	%edi,%edi
1071bc3d5698SJohn Baldwin	jnz	.L013schedule_mangle_last_dec
1072bc3d5698SJohn Baldwin	movdqa	256(%ebp,%ecx,1),%xmm1
1073bc3d5698SJohn Baldwin.byte	102,15,56,0,193
1074bc3d5698SJohn Baldwin	leal	352(%ebp),%ebx
1075bc3d5698SJohn Baldwin	addl	$32,%edx
1076bc3d5698SJohn Baldwin.L013schedule_mangle_last_dec:
1077bc3d5698SJohn Baldwin	addl	$-16,%edx
1078bc3d5698SJohn Baldwin	pxor	336(%ebp),%xmm0
1079bc3d5698SJohn Baldwin	call	_vpaes_schedule_transform
1080bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
1081bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1082bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1083bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
1084bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1085bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
1086bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
1087bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
1088bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
1089bc3d5698SJohn Baldwin	ret
1090bc3d5698SJohn Baldwin.size	_vpaes_schedule_core,.-_vpaes_schedule_core
1091bc3d5698SJohn Baldwin.type	_vpaes_schedule_192_smear,@function
1092bc3d5698SJohn Baldwin.align	16
1093bc3d5698SJohn Baldwin_vpaes_schedule_192_smear:
1094*c0855eaaSJohn Baldwin	#ifdef __CET__
1095*c0855eaaSJohn Baldwin
1096*c0855eaaSJohn Baldwin.byte	243,15,30,251
1097*c0855eaaSJohn Baldwin	#endif
1098*c0855eaaSJohn Baldwin
1099bc3d5698SJohn Baldwin	pshufd	$128,%xmm6,%xmm1
1100bc3d5698SJohn Baldwin	pshufd	$254,%xmm7,%xmm0
1101bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm6
1102bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1103bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
1104bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm0
1105bc3d5698SJohn Baldwin	movhlps	%xmm1,%xmm6
1106bc3d5698SJohn Baldwin	ret
1107bc3d5698SJohn Baldwin.size	_vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
1108bc3d5698SJohn Baldwin.type	_vpaes_schedule_round,@function
1109bc3d5698SJohn Baldwin.align	16
1110bc3d5698SJohn Baldwin_vpaes_schedule_round:
1111*c0855eaaSJohn Baldwin	#ifdef __CET__
1112*c0855eaaSJohn Baldwin
1113*c0855eaaSJohn Baldwin.byte	243,15,30,251
1114*c0855eaaSJohn Baldwin	#endif
1115*c0855eaaSJohn Baldwin
1116bc3d5698SJohn Baldwin	movdqa	8(%esp),%xmm2
1117bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1118bc3d5698SJohn Baldwin.byte	102,15,58,15,202,15
1119bc3d5698SJohn Baldwin.byte	102,15,58,15,210,15
1120bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1121bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm0
1122bc3d5698SJohn Baldwin.byte	102,15,58,15,192,1
1123bc3d5698SJohn Baldwin	movdqa	%xmm2,8(%esp)
1124bc3d5698SJohn Baldwin.L_vpaes_schedule_low_round:
1125bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
1126bc3d5698SJohn Baldwin	pslldq	$4,%xmm7
1127bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1128bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
1129bc3d5698SJohn Baldwin	pslldq	$8,%xmm7
1130bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1131bc3d5698SJohn Baldwin	pxor	336(%ebp),%xmm7
1132bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm4
1133bc3d5698SJohn Baldwin	movdqa	-48(%ebp),%xmm5
1134bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm1
1135bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
1136bc3d5698SJohn Baldwin	psrld	$4,%xmm1
1137bc3d5698SJohn Baldwin	pand	%xmm4,%xmm0
1138bc3d5698SJohn Baldwin	movdqa	-32(%ebp),%xmm2
1139bc3d5698SJohn Baldwin.byte	102,15,56,0,208
1140bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
1141bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm3
1142bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1143bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
1144bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm4
1145bc3d5698SJohn Baldwin.byte	102,15,56,0,224
1146bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm4
1147bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm2
1148bc3d5698SJohn Baldwin.byte	102,15,56,0,211
1149bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
1150bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm3
1151bc3d5698SJohn Baldwin.byte	102,15,56,0,220
1152bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
1153bc3d5698SJohn Baldwin	movdqa	32(%ebp),%xmm4
1154bc3d5698SJohn Baldwin.byte	102,15,56,0,226
1155bc3d5698SJohn Baldwin	movdqa	48(%ebp),%xmm0
1156bc3d5698SJohn Baldwin.byte	102,15,56,0,195
1157bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm0
1158bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm0
1159bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm7
1160bc3d5698SJohn Baldwin	ret
1161bc3d5698SJohn Baldwin.size	_vpaes_schedule_round,.-_vpaes_schedule_round
1162bc3d5698SJohn Baldwin.type	_vpaes_schedule_transform,@function
1163bc3d5698SJohn Baldwin.align	16
1164bc3d5698SJohn Baldwin_vpaes_schedule_transform:
1165*c0855eaaSJohn Baldwin	#ifdef __CET__
1166*c0855eaaSJohn Baldwin
1167*c0855eaaSJohn Baldwin.byte	243,15,30,251
1168*c0855eaaSJohn Baldwin	#endif
1169*c0855eaaSJohn Baldwin
1170bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm2
1171bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
1172bc3d5698SJohn Baldwin	pandn	%xmm0,%xmm1
1173bc3d5698SJohn Baldwin	psrld	$4,%xmm1
1174bc3d5698SJohn Baldwin	pand	%xmm2,%xmm0
1175bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm2
1176bc3d5698SJohn Baldwin.byte	102,15,56,0,208
1177bc3d5698SJohn Baldwin	movdqa	16(%ebx),%xmm0
1178bc3d5698SJohn Baldwin.byte	102,15,56,0,193
1179bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
1180bc3d5698SJohn Baldwin	ret
1181bc3d5698SJohn Baldwin.size	_vpaes_schedule_transform,.-_vpaes_schedule_transform
1182bc3d5698SJohn Baldwin.type	_vpaes_schedule_mangle,@function
1183bc3d5698SJohn Baldwin.align	16
1184bc3d5698SJohn Baldwin_vpaes_schedule_mangle:
1185*c0855eaaSJohn Baldwin	#ifdef __CET__
1186*c0855eaaSJohn Baldwin
1187*c0855eaaSJohn Baldwin.byte	243,15,30,251
1188*c0855eaaSJohn Baldwin	#endif
1189*c0855eaaSJohn Baldwin
1190bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm4
1191bc3d5698SJohn Baldwin	movdqa	128(%ebp),%xmm5
1192bc3d5698SJohn Baldwin	testl	%edi,%edi
1193bc3d5698SJohn Baldwin	jnz	.L014schedule_mangle_dec
1194bc3d5698SJohn Baldwin	addl	$16,%edx
1195bc3d5698SJohn Baldwin	pxor	336(%ebp),%xmm4
1196bc3d5698SJohn Baldwin.byte	102,15,56,0,229
1197bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm3
1198bc3d5698SJohn Baldwin.byte	102,15,56,0,229
1199bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm3
1200bc3d5698SJohn Baldwin.byte	102,15,56,0,229
1201bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm3
1202bc3d5698SJohn Baldwin	jmp	.L015schedule_mangle_both
1203bc3d5698SJohn Baldwin.align	16
1204bc3d5698SJohn Baldwin.L014schedule_mangle_dec:
1205bc3d5698SJohn Baldwin	movdqa	-16(%ebp),%xmm2
1206bc3d5698SJohn Baldwin	leal	416(%ebp),%esi
1207bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
1208bc3d5698SJohn Baldwin	pandn	%xmm4,%xmm1
1209bc3d5698SJohn Baldwin	psrld	$4,%xmm1
1210bc3d5698SJohn Baldwin	pand	%xmm2,%xmm4
1211bc3d5698SJohn Baldwin	movdqa	(%esi),%xmm2
1212bc3d5698SJohn Baldwin.byte	102,15,56,0,212
1213bc3d5698SJohn Baldwin	movdqa	16(%esi),%xmm3
1214bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1215bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
1216bc3d5698SJohn Baldwin.byte	102,15,56,0,221
1217bc3d5698SJohn Baldwin	movdqa	32(%esi),%xmm2
1218bc3d5698SJohn Baldwin.byte	102,15,56,0,212
1219bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
1220bc3d5698SJohn Baldwin	movdqa	48(%esi),%xmm3
1221bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1222bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
1223bc3d5698SJohn Baldwin.byte	102,15,56,0,221
1224bc3d5698SJohn Baldwin	movdqa	64(%esi),%xmm2
1225bc3d5698SJohn Baldwin.byte	102,15,56,0,212
1226bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
1227bc3d5698SJohn Baldwin	movdqa	80(%esi),%xmm3
1228bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1229bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
1230bc3d5698SJohn Baldwin.byte	102,15,56,0,221
1231bc3d5698SJohn Baldwin	movdqa	96(%esi),%xmm2
1232bc3d5698SJohn Baldwin.byte	102,15,56,0,212
1233bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
1234bc3d5698SJohn Baldwin	movdqa	112(%esi),%xmm3
1235bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1236bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
1237bc3d5698SJohn Baldwin	addl	$-16,%edx
1238bc3d5698SJohn Baldwin.L015schedule_mangle_both:
1239bc3d5698SJohn Baldwin	movdqa	256(%ebp,%ecx,1),%xmm1
1240bc3d5698SJohn Baldwin.byte	102,15,56,0,217
1241bc3d5698SJohn Baldwin	addl	$-16,%ecx
1242bc3d5698SJohn Baldwin	andl	$48,%ecx
1243bc3d5698SJohn Baldwin	movdqu	%xmm3,(%edx)
1244bc3d5698SJohn Baldwin	ret
1245bc3d5698SJohn Baldwin.size	_vpaes_schedule_mangle,.-_vpaes_schedule_mangle
1246bc3d5698SJohn Baldwin.globl	vpaes_set_encrypt_key
1247bc3d5698SJohn Baldwin.type	vpaes_set_encrypt_key,@function
1248bc3d5698SJohn Baldwin.align	16
1249bc3d5698SJohn Baldwinvpaes_set_encrypt_key:
1250bc3d5698SJohn Baldwin.L_vpaes_set_encrypt_key_begin:
1251*c0855eaaSJohn Baldwin	#ifdef __CET__
1252*c0855eaaSJohn Baldwin
1253*c0855eaaSJohn Baldwin.byte	243,15,30,251
1254*c0855eaaSJohn Baldwin	#endif
1255*c0855eaaSJohn Baldwin
1256bc3d5698SJohn Baldwin	pushl	%ebp
1257bc3d5698SJohn Baldwin	pushl	%ebx
1258bc3d5698SJohn Baldwin	pushl	%esi
1259bc3d5698SJohn Baldwin	pushl	%edi
1260bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1261bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
1262bc3d5698SJohn Baldwin	movl	24(%esp),%eax
1263bc3d5698SJohn Baldwin	andl	$-16,%ebx
1264bc3d5698SJohn Baldwin	movl	28(%esp),%edx
1265bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
1266bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
1267bc3d5698SJohn Baldwin	movl	%eax,%ebx
1268bc3d5698SJohn Baldwin	shrl	$5,%ebx
1269bc3d5698SJohn Baldwin	addl	$5,%ebx
1270bc3d5698SJohn Baldwin	movl	%ebx,240(%edx)
1271bc3d5698SJohn Baldwin	movl	$48,%ecx
1272bc3d5698SJohn Baldwin	movl	$0,%edi
1273bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L016pic_point,%ebp
1274bc3d5698SJohn Baldwin	call	_vpaes_schedule_core
1275bc3d5698SJohn Baldwin.L016pic_point:
1276bc3d5698SJohn Baldwin	movl	48(%esp),%esp
1277bc3d5698SJohn Baldwin	xorl	%eax,%eax
1278bc3d5698SJohn Baldwin	popl	%edi
1279bc3d5698SJohn Baldwin	popl	%esi
1280bc3d5698SJohn Baldwin	popl	%ebx
1281bc3d5698SJohn Baldwin	popl	%ebp
1282bc3d5698SJohn Baldwin	ret
1283bc3d5698SJohn Baldwin.size	vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
1284bc3d5698SJohn Baldwin.globl	vpaes_set_decrypt_key
1285bc3d5698SJohn Baldwin.type	vpaes_set_decrypt_key,@function
1286bc3d5698SJohn Baldwin.align	16
1287bc3d5698SJohn Baldwinvpaes_set_decrypt_key:
1288bc3d5698SJohn Baldwin.L_vpaes_set_decrypt_key_begin:
1289*c0855eaaSJohn Baldwin	#ifdef __CET__
1290*c0855eaaSJohn Baldwin
1291*c0855eaaSJohn Baldwin.byte	243,15,30,251
1292*c0855eaaSJohn Baldwin	#endif
1293*c0855eaaSJohn Baldwin
1294bc3d5698SJohn Baldwin	pushl	%ebp
1295bc3d5698SJohn Baldwin	pushl	%ebx
1296bc3d5698SJohn Baldwin	pushl	%esi
1297bc3d5698SJohn Baldwin	pushl	%edi
1298bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1299bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
1300bc3d5698SJohn Baldwin	movl	24(%esp),%eax
1301bc3d5698SJohn Baldwin	andl	$-16,%ebx
1302bc3d5698SJohn Baldwin	movl	28(%esp),%edx
1303bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
1304bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
1305bc3d5698SJohn Baldwin	movl	%eax,%ebx
1306bc3d5698SJohn Baldwin	shrl	$5,%ebx
1307bc3d5698SJohn Baldwin	addl	$5,%ebx
1308bc3d5698SJohn Baldwin	movl	%ebx,240(%edx)
1309bc3d5698SJohn Baldwin	shll	$4,%ebx
1310bc3d5698SJohn Baldwin	leal	16(%edx,%ebx,1),%edx
1311bc3d5698SJohn Baldwin	movl	$1,%edi
1312bc3d5698SJohn Baldwin	movl	%eax,%ecx
1313bc3d5698SJohn Baldwin	shrl	$1,%ecx
1314bc3d5698SJohn Baldwin	andl	$32,%ecx
1315bc3d5698SJohn Baldwin	xorl	$32,%ecx
1316bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L017pic_point,%ebp
1317bc3d5698SJohn Baldwin	call	_vpaes_schedule_core
1318bc3d5698SJohn Baldwin.L017pic_point:
1319bc3d5698SJohn Baldwin	movl	48(%esp),%esp
1320bc3d5698SJohn Baldwin	xorl	%eax,%eax
1321bc3d5698SJohn Baldwin	popl	%edi
1322bc3d5698SJohn Baldwin	popl	%esi
1323bc3d5698SJohn Baldwin	popl	%ebx
1324bc3d5698SJohn Baldwin	popl	%ebp
1325bc3d5698SJohn Baldwin	ret
1326bc3d5698SJohn Baldwin.size	vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
1327bc3d5698SJohn Baldwin.globl	vpaes_encrypt
1328bc3d5698SJohn Baldwin.type	vpaes_encrypt,@function
1329bc3d5698SJohn Baldwin.align	16
1330bc3d5698SJohn Baldwinvpaes_encrypt:
1331bc3d5698SJohn Baldwin.L_vpaes_encrypt_begin:
1332*c0855eaaSJohn Baldwin	#ifdef __CET__
1333*c0855eaaSJohn Baldwin
1334*c0855eaaSJohn Baldwin.byte	243,15,30,251
1335*c0855eaaSJohn Baldwin	#endif
1336*c0855eaaSJohn Baldwin
1337bc3d5698SJohn Baldwin	pushl	%ebp
1338bc3d5698SJohn Baldwin	pushl	%ebx
1339bc3d5698SJohn Baldwin	pushl	%esi
1340bc3d5698SJohn Baldwin	pushl	%edi
1341bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L018pic_point,%ebp
1342bc3d5698SJohn Baldwin	call	_vpaes_preheat
1343bc3d5698SJohn Baldwin.L018pic_point:
1344bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1345bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
1346bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1347bc3d5698SJohn Baldwin	andl	$-16,%ebx
1348bc3d5698SJohn Baldwin	movl	28(%esp),%edx
1349bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
1350bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
1351bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
1352bc3d5698SJohn Baldwin	call	_vpaes_encrypt_core
1353bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
1354bc3d5698SJohn Baldwin	movl	48(%esp),%esp
1355bc3d5698SJohn Baldwin	popl	%edi
1356bc3d5698SJohn Baldwin	popl	%esi
1357bc3d5698SJohn Baldwin	popl	%ebx
1358bc3d5698SJohn Baldwin	popl	%ebp
1359bc3d5698SJohn Baldwin	ret
1360bc3d5698SJohn Baldwin.size	vpaes_encrypt,.-.L_vpaes_encrypt_begin
1361bc3d5698SJohn Baldwin.globl	vpaes_decrypt
1362bc3d5698SJohn Baldwin.type	vpaes_decrypt,@function
1363bc3d5698SJohn Baldwin.align	16
1364bc3d5698SJohn Baldwinvpaes_decrypt:
1365bc3d5698SJohn Baldwin.L_vpaes_decrypt_begin:
1366*c0855eaaSJohn Baldwin	#ifdef __CET__
1367*c0855eaaSJohn Baldwin
1368*c0855eaaSJohn Baldwin.byte	243,15,30,251
1369*c0855eaaSJohn Baldwin	#endif
1370*c0855eaaSJohn Baldwin
1371bc3d5698SJohn Baldwin	pushl	%ebp
1372bc3d5698SJohn Baldwin	pushl	%ebx
1373bc3d5698SJohn Baldwin	pushl	%esi
1374bc3d5698SJohn Baldwin	pushl	%edi
1375bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L019pic_point,%ebp
1376bc3d5698SJohn Baldwin	call	_vpaes_preheat
1377bc3d5698SJohn Baldwin.L019pic_point:
1378bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1379bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
1380bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1381bc3d5698SJohn Baldwin	andl	$-16,%ebx
1382bc3d5698SJohn Baldwin	movl	28(%esp),%edx
1383bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
1384bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
1385bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
1386bc3d5698SJohn Baldwin	call	_vpaes_decrypt_core
1387bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edi)
1388bc3d5698SJohn Baldwin	movl	48(%esp),%esp
1389bc3d5698SJohn Baldwin	popl	%edi
1390bc3d5698SJohn Baldwin	popl	%esi
1391bc3d5698SJohn Baldwin	popl	%ebx
1392bc3d5698SJohn Baldwin	popl	%ebp
1393bc3d5698SJohn Baldwin	ret
1394bc3d5698SJohn Baldwin.size	vpaes_decrypt,.-.L_vpaes_decrypt_begin
1395bc3d5698SJohn Baldwin.globl	vpaes_cbc_encrypt
1396bc3d5698SJohn Baldwin.type	vpaes_cbc_encrypt,@function
1397bc3d5698SJohn Baldwin.align	16
1398bc3d5698SJohn Baldwinvpaes_cbc_encrypt:
1399bc3d5698SJohn Baldwin.L_vpaes_cbc_encrypt_begin:
1400*c0855eaaSJohn Baldwin	#ifdef __CET__
1401*c0855eaaSJohn Baldwin
1402*c0855eaaSJohn Baldwin.byte	243,15,30,251
1403*c0855eaaSJohn Baldwin	#endif
1404*c0855eaaSJohn Baldwin
1405bc3d5698SJohn Baldwin	pushl	%ebp
1406bc3d5698SJohn Baldwin	pushl	%ebx
1407bc3d5698SJohn Baldwin	pushl	%esi
1408bc3d5698SJohn Baldwin	pushl	%edi
1409bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1410bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1411bc3d5698SJohn Baldwin	movl	28(%esp),%eax
1412bc3d5698SJohn Baldwin	movl	32(%esp),%edx
1413bc3d5698SJohn Baldwin	subl	$16,%eax
1414bc3d5698SJohn Baldwin	jc	.L020cbc_abort
1415bc3d5698SJohn Baldwin	leal	-56(%esp),%ebx
1416bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
1417bc3d5698SJohn Baldwin	andl	$-16,%ebx
1418bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
1419bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
1420bc3d5698SJohn Baldwin	movdqu	(%ebp),%xmm1
1421bc3d5698SJohn Baldwin	subl	%esi,%edi
1422bc3d5698SJohn Baldwin	movl	%ebx,48(%esp)
1423bc3d5698SJohn Baldwin	movl	%edi,(%esp)
1424bc3d5698SJohn Baldwin	movl	%edx,4(%esp)
1425bc3d5698SJohn Baldwin	movl	%ebp,8(%esp)
1426bc3d5698SJohn Baldwin	movl	%eax,%edi
1427bc3d5698SJohn Baldwin	leal	.L_vpaes_consts+0x30-.L021pic_point,%ebp
1428bc3d5698SJohn Baldwin	call	_vpaes_preheat
1429bc3d5698SJohn Baldwin.L021pic_point:
1430bc3d5698SJohn Baldwin	cmpl	$0,%ecx
1431bc3d5698SJohn Baldwin	je	.L022cbc_dec_loop
1432bc3d5698SJohn Baldwin	jmp	.L023cbc_enc_loop
1433bc3d5698SJohn Baldwin.align	16
1434bc3d5698SJohn Baldwin.L023cbc_enc_loop:
1435bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
1436bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
1437bc3d5698SJohn Baldwin	call	_vpaes_encrypt_core
1438bc3d5698SJohn Baldwin	movl	(%esp),%ebx
1439bc3d5698SJohn Baldwin	movl	4(%esp),%edx
1440bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm1
1441bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ebx,%esi,1)
1442bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1443bc3d5698SJohn Baldwin	subl	$16,%edi
1444bc3d5698SJohn Baldwin	jnc	.L023cbc_enc_loop
1445bc3d5698SJohn Baldwin	jmp	.L024cbc_done
1446bc3d5698SJohn Baldwin.align	16
1447bc3d5698SJohn Baldwin.L022cbc_dec_loop:
1448bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm0
1449bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
1450bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
1451bc3d5698SJohn Baldwin	call	_vpaes_decrypt_core
1452bc3d5698SJohn Baldwin	movl	(%esp),%ebx
1453bc3d5698SJohn Baldwin	movl	4(%esp),%edx
1454bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm0
1455bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm1
1456bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ebx,%esi,1)
1457bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1458bc3d5698SJohn Baldwin	subl	$16,%edi
1459bc3d5698SJohn Baldwin	jnc	.L022cbc_dec_loop
1460bc3d5698SJohn Baldwin.L024cbc_done:
1461bc3d5698SJohn Baldwin	movl	8(%esp),%ebx
1462bc3d5698SJohn Baldwin	movl	48(%esp),%esp
1463bc3d5698SJohn Baldwin	movdqu	%xmm1,(%ebx)
1464bc3d5698SJohn Baldwin.L020cbc_abort:
1465bc3d5698SJohn Baldwin	popl	%edi
1466bc3d5698SJohn Baldwin	popl	%esi
1467bc3d5698SJohn Baldwin	popl	%ebx
1468bc3d5698SJohn Baldwin	popl	%ebp
1469bc3d5698SJohn Baldwin	ret
1470bc3d5698SJohn Baldwin.size	vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
1471*c0855eaaSJohn Baldwin
1472*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
1473*c0855eaaSJohn Baldwin	.p2align 2
1474*c0855eaaSJohn Baldwin	.long 1f - 0f
1475*c0855eaaSJohn Baldwin	.long 4f - 1f
1476*c0855eaaSJohn Baldwin	.long 5
1477*c0855eaaSJohn Baldwin0:
1478*c0855eaaSJohn Baldwin	.asciz "GNU"
1479*c0855eaaSJohn Baldwin1:
1480*c0855eaaSJohn Baldwin	.p2align 2
1481*c0855eaaSJohn Baldwin	.long 0xc0000002
1482*c0855eaaSJohn Baldwin	.long 3f - 2f
1483*c0855eaaSJohn Baldwin2:
1484*c0855eaaSJohn Baldwin	.long 3
1485*c0855eaaSJohn Baldwin3:
1486*c0855eaaSJohn Baldwin	.p2align 2
1487*c0855eaaSJohn Baldwin4:
1488bc3d5698SJohn Baldwin#endif
1489