xref: /freebsd/sys/crypto/openssl/i386/ghash-x86.S (revision 67be1e195acfaec99ce4fffeb17111ce085755f7)
1/* Do not modify. This file is auto-generated from ghash-x86.pl. */
2#ifdef PIC
3.text
4.globl	gcm_gmult_4bit_x86
5.type	gcm_gmult_4bit_x86,@function
6.align	16
7gcm_gmult_4bit_x86:
8.L_gcm_gmult_4bit_x86_begin:
9	#ifdef __CET__
10
11.byte	243,15,30,251
12	#endif
13
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	subl	$84,%esp
19	movl	104(%esp),%edi
20	movl	108(%esp),%esi
21	movl	(%edi),%ebp
22	movl	4(%edi),%edx
23	movl	8(%edi),%ecx
24	movl	12(%edi),%ebx
25	movl	$0,16(%esp)
26	movl	$471859200,20(%esp)
27	movl	$943718400,24(%esp)
28	movl	$610271232,28(%esp)
29	movl	$1887436800,32(%esp)
30	movl	$1822425088,36(%esp)
31	movl	$1220542464,40(%esp)
32	movl	$1423966208,44(%esp)
33	movl	$3774873600,48(%esp)
34	movl	$4246732800,52(%esp)
35	movl	$3644850176,56(%esp)
36	movl	$3311403008,60(%esp)
37	movl	$2441084928,64(%esp)
38	movl	$2376073216,68(%esp)
39	movl	$2847932416,72(%esp)
40	movl	$3051356160,76(%esp)
41	movl	%ebp,(%esp)
42	movl	%edx,4(%esp)
43	movl	%ecx,8(%esp)
44	movl	%ebx,12(%esp)
45	shrl	$20,%ebx
46	andl	$240,%ebx
47	movl	4(%esi,%ebx,1),%ebp
48	movl	(%esi,%ebx,1),%edx
49	movl	12(%esi,%ebx,1),%ecx
50	movl	8(%esi,%ebx,1),%ebx
51	xorl	%eax,%eax
52	movl	$15,%edi
53	jmp	.L000x86_loop
54.align	16
55.L000x86_loop:
56	movb	%bl,%al
57	shrdl	$4,%ecx,%ebx
58	andb	$15,%al
59	shrdl	$4,%edx,%ecx
60	shrdl	$4,%ebp,%edx
61	shrl	$4,%ebp
62	xorl	16(%esp,%eax,4),%ebp
63	movb	(%esp,%edi,1),%al
64	andb	$240,%al
65	xorl	8(%esi,%eax,1),%ebx
66	xorl	12(%esi,%eax,1),%ecx
67	xorl	(%esi,%eax,1),%edx
68	xorl	4(%esi,%eax,1),%ebp
69	decl	%edi
70	js	.L001x86_break
71	movb	%bl,%al
72	shrdl	$4,%ecx,%ebx
73	andb	$15,%al
74	shrdl	$4,%edx,%ecx
75	shrdl	$4,%ebp,%edx
76	shrl	$4,%ebp
77	xorl	16(%esp,%eax,4),%ebp
78	movb	(%esp,%edi,1),%al
79	shlb	$4,%al
80	xorl	8(%esi,%eax,1),%ebx
81	xorl	12(%esi,%eax,1),%ecx
82	xorl	(%esi,%eax,1),%edx
83	xorl	4(%esi,%eax,1),%ebp
84	jmp	.L000x86_loop
85.align	16
86.L001x86_break:
87	bswap	%ebx
88	bswap	%ecx
89	bswap	%edx
90	bswap	%ebp
91	movl	104(%esp),%edi
92	movl	%ebx,12(%edi)
93	movl	%ecx,8(%edi)
94	movl	%edx,4(%edi)
95	movl	%ebp,(%edi)
96	addl	$84,%esp
97	popl	%edi
98	popl	%esi
99	popl	%ebx
100	popl	%ebp
101	ret
102.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
103.globl	gcm_ghash_4bit_x86
104.type	gcm_ghash_4bit_x86,@function
105.align	16
106gcm_ghash_4bit_x86:
107.L_gcm_ghash_4bit_x86_begin:
108	#ifdef __CET__
109
110.byte	243,15,30,251
111	#endif
112
113	pushl	%ebp
114	pushl	%ebx
115	pushl	%esi
116	pushl	%edi
117	subl	$84,%esp
118	movl	104(%esp),%ebx
119	movl	108(%esp),%esi
120	movl	112(%esp),%edi
121	movl	116(%esp),%ecx
122	addl	%edi,%ecx
123	movl	%ecx,116(%esp)
124	movl	(%ebx),%ebp
125	movl	4(%ebx),%edx
126	movl	8(%ebx),%ecx
127	movl	12(%ebx),%ebx
128	movl	$0,16(%esp)
129	movl	$471859200,20(%esp)
130	movl	$943718400,24(%esp)
131	movl	$610271232,28(%esp)
132	movl	$1887436800,32(%esp)
133	movl	$1822425088,36(%esp)
134	movl	$1220542464,40(%esp)
135	movl	$1423966208,44(%esp)
136	movl	$3774873600,48(%esp)
137	movl	$4246732800,52(%esp)
138	movl	$3644850176,56(%esp)
139	movl	$3311403008,60(%esp)
140	movl	$2441084928,64(%esp)
141	movl	$2376073216,68(%esp)
142	movl	$2847932416,72(%esp)
143	movl	$3051356160,76(%esp)
144.align	16
145.L002x86_outer_loop:
146	xorl	12(%edi),%ebx
147	xorl	8(%edi),%ecx
148	xorl	4(%edi),%edx
149	xorl	(%edi),%ebp
150	movl	%ebx,12(%esp)
151	movl	%ecx,8(%esp)
152	movl	%edx,4(%esp)
153	movl	%ebp,(%esp)
154	shrl	$20,%ebx
155	andl	$240,%ebx
156	movl	4(%esi,%ebx,1),%ebp
157	movl	(%esi,%ebx,1),%edx
158	movl	12(%esi,%ebx,1),%ecx
159	movl	8(%esi,%ebx,1),%ebx
160	xorl	%eax,%eax
161	movl	$15,%edi
162	jmp	.L003x86_loop
163.align	16
164.L003x86_loop:
165	movb	%bl,%al
166	shrdl	$4,%ecx,%ebx
167	andb	$15,%al
168	shrdl	$4,%edx,%ecx
169	shrdl	$4,%ebp,%edx
170	shrl	$4,%ebp
171	xorl	16(%esp,%eax,4),%ebp
172	movb	(%esp,%edi,1),%al
173	andb	$240,%al
174	xorl	8(%esi,%eax,1),%ebx
175	xorl	12(%esi,%eax,1),%ecx
176	xorl	(%esi,%eax,1),%edx
177	xorl	4(%esi,%eax,1),%ebp
178	decl	%edi
179	js	.L004x86_break
180	movb	%bl,%al
181	shrdl	$4,%ecx,%ebx
182	andb	$15,%al
183	shrdl	$4,%edx,%ecx
184	shrdl	$4,%ebp,%edx
185	shrl	$4,%ebp
186	xorl	16(%esp,%eax,4),%ebp
187	movb	(%esp,%edi,1),%al
188	shlb	$4,%al
189	xorl	8(%esi,%eax,1),%ebx
190	xorl	12(%esi,%eax,1),%ecx
191	xorl	(%esi,%eax,1),%edx
192	xorl	4(%esi,%eax,1),%ebp
193	jmp	.L003x86_loop
194.align	16
195.L004x86_break:
196	bswap	%ebx
197	bswap	%ecx
198	bswap	%edx
199	bswap	%ebp
200	movl	112(%esp),%edi
201	leal	16(%edi),%edi
202	cmpl	116(%esp),%edi
203	movl	%edi,112(%esp)
204	jb	.L002x86_outer_loop
205	movl	104(%esp),%edi
206	movl	%ebx,12(%edi)
207	movl	%ecx,8(%edi)
208	movl	%edx,4(%edi)
209	movl	%ebp,(%edi)
210	addl	$84,%esp
211	popl	%edi
212	popl	%esi
213	popl	%ebx
214	popl	%ebp
215	ret
216.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
217.globl	gcm_gmult_4bit_mmx
218.type	gcm_gmult_4bit_mmx,@function
219.align	16
220gcm_gmult_4bit_mmx:
221.L_gcm_gmult_4bit_mmx_begin:
222	#ifdef __CET__
223
224.byte	243,15,30,251
225	#endif
226
227	pushl	%ebp
228	pushl	%ebx
229	pushl	%esi
230	pushl	%edi
231	movl	20(%esp),%edi
232	movl	24(%esp),%esi
233	call	.L005pic_point
234.L005pic_point:
235	popl	%eax
236	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
237	movzbl	15(%edi),%ebx
238	xorl	%ecx,%ecx
239	movl	%ebx,%edx
240	movb	%dl,%cl
241	movl	$14,%ebp
242	shlb	$4,%cl
243	andl	$240,%edx
244	movq	8(%esi,%ecx,1),%mm0
245	movq	(%esi,%ecx,1),%mm1
246	movd	%mm0,%ebx
247	jmp	.L006mmx_loop
248.align	16
249.L006mmx_loop:
250	psrlq	$4,%mm0
251	andl	$15,%ebx
252	movq	%mm1,%mm2
253	psrlq	$4,%mm1
254	pxor	8(%esi,%edx,1),%mm0
255	movb	(%edi,%ebp,1),%cl
256	psllq	$60,%mm2
257	pxor	(%eax,%ebx,8),%mm1
258	decl	%ebp
259	movd	%mm0,%ebx
260	pxor	(%esi,%edx,1),%mm1
261	movl	%ecx,%edx
262	pxor	%mm2,%mm0
263	js	.L007mmx_break
264	shlb	$4,%cl
265	andl	$15,%ebx
266	psrlq	$4,%mm0
267	andl	$240,%edx
268	movq	%mm1,%mm2
269	psrlq	$4,%mm1
270	pxor	8(%esi,%ecx,1),%mm0
271	psllq	$60,%mm2
272	pxor	(%eax,%ebx,8),%mm1
273	movd	%mm0,%ebx
274	pxor	(%esi,%ecx,1),%mm1
275	pxor	%mm2,%mm0
276	jmp	.L006mmx_loop
277.align	16
278.L007mmx_break:
279	shlb	$4,%cl
280	andl	$15,%ebx
281	psrlq	$4,%mm0
282	andl	$240,%edx
283	movq	%mm1,%mm2
284	psrlq	$4,%mm1
285	pxor	8(%esi,%ecx,1),%mm0
286	psllq	$60,%mm2
287	pxor	(%eax,%ebx,8),%mm1
288	movd	%mm0,%ebx
289	pxor	(%esi,%ecx,1),%mm1
290	pxor	%mm2,%mm0
291	psrlq	$4,%mm0
292	andl	$15,%ebx
293	movq	%mm1,%mm2
294	psrlq	$4,%mm1
295	pxor	8(%esi,%edx,1),%mm0
296	psllq	$60,%mm2
297	pxor	(%eax,%ebx,8),%mm1
298	movd	%mm0,%ebx
299	pxor	(%esi,%edx,1),%mm1
300	pxor	%mm2,%mm0
301	psrlq	$32,%mm0
302	movd	%mm1,%edx
303	psrlq	$32,%mm1
304	movd	%mm0,%ecx
305	movd	%mm1,%ebp
306	bswap	%ebx
307	bswap	%edx
308	bswap	%ecx
309	bswap	%ebp
310	emms
311	movl	%ebx,12(%edi)
312	movl	%edx,4(%edi)
313	movl	%ecx,8(%edi)
314	movl	%ebp,(%edi)
315	popl	%edi
316	popl	%esi
317	popl	%ebx
318	popl	%ebp
319	ret
320.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
321.globl	gcm_ghash_4bit_mmx
322.type	gcm_ghash_4bit_mmx,@function
323.align	16
324gcm_ghash_4bit_mmx:
325.L_gcm_ghash_4bit_mmx_begin:
326	#ifdef __CET__
327
328.byte	243,15,30,251
329	#endif
330
331	pushl	%ebp
332	pushl	%ebx
333	pushl	%esi
334	pushl	%edi
335	movl	20(%esp),%eax
336	movl	24(%esp),%ebx
337	movl	28(%esp),%ecx
338	movl	32(%esp),%edx
339	movl	%esp,%ebp
340	call	.L008pic_point
341.L008pic_point:
342	popl	%esi
343	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
344	subl	$544,%esp
345	andl	$-64,%esp
346	subl	$16,%esp
347	addl	%ecx,%edx
348	movl	%eax,544(%esp)
349	movl	%edx,552(%esp)
350	movl	%ebp,556(%esp)
351	addl	$128,%ebx
352	leal	144(%esp),%edi
353	leal	400(%esp),%ebp
354	movl	-120(%ebx),%edx
355	movq	-120(%ebx),%mm0
356	movq	-128(%ebx),%mm3
357	shll	$4,%edx
358	movb	%dl,(%esp)
359	movl	-104(%ebx),%edx
360	movq	-104(%ebx),%mm2
361	movq	-112(%ebx),%mm5
362	movq	%mm0,-128(%edi)
363	psrlq	$4,%mm0
364	movq	%mm3,(%edi)
365	movq	%mm3,%mm7
366	psrlq	$4,%mm3
367	shll	$4,%edx
368	movb	%dl,1(%esp)
369	movl	-88(%ebx),%edx
370	movq	-88(%ebx),%mm1
371	psllq	$60,%mm7
372	movq	-96(%ebx),%mm4
373	por	%mm7,%mm0
374	movq	%mm2,-120(%edi)
375	psrlq	$4,%mm2
376	movq	%mm5,8(%edi)
377	movq	%mm5,%mm6
378	movq	%mm0,-128(%ebp)
379	psrlq	$4,%mm5
380	movq	%mm3,(%ebp)
381	shll	$4,%edx
382	movb	%dl,2(%esp)
383	movl	-72(%ebx),%edx
384	movq	-72(%ebx),%mm0
385	psllq	$60,%mm6
386	movq	-80(%ebx),%mm3
387	por	%mm6,%mm2
388	movq	%mm1,-112(%edi)
389	psrlq	$4,%mm1
390	movq	%mm4,16(%edi)
391	movq	%mm4,%mm7
392	movq	%mm2,-120(%ebp)
393	psrlq	$4,%mm4
394	movq	%mm5,8(%ebp)
395	shll	$4,%edx
396	movb	%dl,3(%esp)
397	movl	-56(%ebx),%edx
398	movq	-56(%ebx),%mm2
399	psllq	$60,%mm7
400	movq	-64(%ebx),%mm5
401	por	%mm7,%mm1
402	movq	%mm0,-104(%edi)
403	psrlq	$4,%mm0
404	movq	%mm3,24(%edi)
405	movq	%mm3,%mm6
406	movq	%mm1,-112(%ebp)
407	psrlq	$4,%mm3
408	movq	%mm4,16(%ebp)
409	shll	$4,%edx
410	movb	%dl,4(%esp)
411	movl	-40(%ebx),%edx
412	movq	-40(%ebx),%mm1
413	psllq	$60,%mm6
414	movq	-48(%ebx),%mm4
415	por	%mm6,%mm0
416	movq	%mm2,-96(%edi)
417	psrlq	$4,%mm2
418	movq	%mm5,32(%edi)
419	movq	%mm5,%mm7
420	movq	%mm0,-104(%ebp)
421	psrlq	$4,%mm5
422	movq	%mm3,24(%ebp)
423	shll	$4,%edx
424	movb	%dl,5(%esp)
425	movl	-24(%ebx),%edx
426	movq	-24(%ebx),%mm0
427	psllq	$60,%mm7
428	movq	-32(%ebx),%mm3
429	por	%mm7,%mm2
430	movq	%mm1,-88(%edi)
431	psrlq	$4,%mm1
432	movq	%mm4,40(%edi)
433	movq	%mm4,%mm6
434	movq	%mm2,-96(%ebp)
435	psrlq	$4,%mm4
436	movq	%mm5,32(%ebp)
437	shll	$4,%edx
438	movb	%dl,6(%esp)
439	movl	-8(%ebx),%edx
440	movq	-8(%ebx),%mm2
441	psllq	$60,%mm6
442	movq	-16(%ebx),%mm5
443	por	%mm6,%mm1
444	movq	%mm0,-80(%edi)
445	psrlq	$4,%mm0
446	movq	%mm3,48(%edi)
447	movq	%mm3,%mm7
448	movq	%mm1,-88(%ebp)
449	psrlq	$4,%mm3
450	movq	%mm4,40(%ebp)
451	shll	$4,%edx
452	movb	%dl,7(%esp)
453	movl	8(%ebx),%edx
454	movq	8(%ebx),%mm1
455	psllq	$60,%mm7
456	movq	(%ebx),%mm4
457	por	%mm7,%mm0
458	movq	%mm2,-72(%edi)
459	psrlq	$4,%mm2
460	movq	%mm5,56(%edi)
461	movq	%mm5,%mm6
462	movq	%mm0,-80(%ebp)
463	psrlq	$4,%mm5
464	movq	%mm3,48(%ebp)
465	shll	$4,%edx
466	movb	%dl,8(%esp)
467	movl	24(%ebx),%edx
468	movq	24(%ebx),%mm0
469	psllq	$60,%mm6
470	movq	16(%ebx),%mm3
471	por	%mm6,%mm2
472	movq	%mm1,-64(%edi)
473	psrlq	$4,%mm1
474	movq	%mm4,64(%edi)
475	movq	%mm4,%mm7
476	movq	%mm2,-72(%ebp)
477	psrlq	$4,%mm4
478	movq	%mm5,56(%ebp)
479	shll	$4,%edx
480	movb	%dl,9(%esp)
481	movl	40(%ebx),%edx
482	movq	40(%ebx),%mm2
483	psllq	$60,%mm7
484	movq	32(%ebx),%mm5
485	por	%mm7,%mm1
486	movq	%mm0,-56(%edi)
487	psrlq	$4,%mm0
488	movq	%mm3,72(%edi)
489	movq	%mm3,%mm6
490	movq	%mm1,-64(%ebp)
491	psrlq	$4,%mm3
492	movq	%mm4,64(%ebp)
493	shll	$4,%edx
494	movb	%dl,10(%esp)
495	movl	56(%ebx),%edx
496	movq	56(%ebx),%mm1
497	psllq	$60,%mm6
498	movq	48(%ebx),%mm4
499	por	%mm6,%mm0
500	movq	%mm2,-48(%edi)
501	psrlq	$4,%mm2
502	movq	%mm5,80(%edi)
503	movq	%mm5,%mm7
504	movq	%mm0,-56(%ebp)
505	psrlq	$4,%mm5
506	movq	%mm3,72(%ebp)
507	shll	$4,%edx
508	movb	%dl,11(%esp)
509	movl	72(%ebx),%edx
510	movq	72(%ebx),%mm0
511	psllq	$60,%mm7
512	movq	64(%ebx),%mm3
513	por	%mm7,%mm2
514	movq	%mm1,-40(%edi)
515	psrlq	$4,%mm1
516	movq	%mm4,88(%edi)
517	movq	%mm4,%mm6
518	movq	%mm2,-48(%ebp)
519	psrlq	$4,%mm4
520	movq	%mm5,80(%ebp)
521	shll	$4,%edx
522	movb	%dl,12(%esp)
523	movl	88(%ebx),%edx
524	movq	88(%ebx),%mm2
525	psllq	$60,%mm6
526	movq	80(%ebx),%mm5
527	por	%mm6,%mm1
528	movq	%mm0,-32(%edi)
529	psrlq	$4,%mm0
530	movq	%mm3,96(%edi)
531	movq	%mm3,%mm7
532	movq	%mm1,-40(%ebp)
533	psrlq	$4,%mm3
534	movq	%mm4,88(%ebp)
535	shll	$4,%edx
536	movb	%dl,13(%esp)
537	movl	104(%ebx),%edx
538	movq	104(%ebx),%mm1
539	psllq	$60,%mm7
540	movq	96(%ebx),%mm4
541	por	%mm7,%mm0
542	movq	%mm2,-24(%edi)
543	psrlq	$4,%mm2
544	movq	%mm5,104(%edi)
545	movq	%mm5,%mm6
546	movq	%mm0,-32(%ebp)
547	psrlq	$4,%mm5
548	movq	%mm3,96(%ebp)
549	shll	$4,%edx
550	movb	%dl,14(%esp)
551	movl	120(%ebx),%edx
552	movq	120(%ebx),%mm0
553	psllq	$60,%mm6
554	movq	112(%ebx),%mm3
555	por	%mm6,%mm2
556	movq	%mm1,-16(%edi)
557	psrlq	$4,%mm1
558	movq	%mm4,112(%edi)
559	movq	%mm4,%mm7
560	movq	%mm2,-24(%ebp)
561	psrlq	$4,%mm4
562	movq	%mm5,104(%ebp)
563	shll	$4,%edx
564	movb	%dl,15(%esp)
565	psllq	$60,%mm7
566	por	%mm7,%mm1
567	movq	%mm0,-8(%edi)
568	psrlq	$4,%mm0
569	movq	%mm3,120(%edi)
570	movq	%mm3,%mm6
571	movq	%mm1,-16(%ebp)
572	psrlq	$4,%mm3
573	movq	%mm4,112(%ebp)
574	psllq	$60,%mm6
575	por	%mm6,%mm0
576	movq	%mm0,-8(%ebp)
577	movq	%mm3,120(%ebp)
578	movq	(%eax),%mm6
579	movl	8(%eax),%ebx
580	movl	12(%eax),%edx
581.align	16
582.L009outer:
583	xorl	12(%ecx),%edx
584	xorl	8(%ecx),%ebx
585	pxor	(%ecx),%mm6
586	leal	16(%ecx),%ecx
587	movl	%ebx,536(%esp)
588	movq	%mm6,528(%esp)
589	movl	%ecx,548(%esp)
590	xorl	%eax,%eax
591	roll	$8,%edx
592	movb	%dl,%al
593	movl	%eax,%ebp
594	andb	$15,%al
595	shrl	$4,%ebp
596	pxor	%mm0,%mm0
597	roll	$8,%edx
598	pxor	%mm1,%mm1
599	pxor	%mm2,%mm2
600	movq	16(%esp,%eax,8),%mm7
601	movq	144(%esp,%eax,8),%mm6
602	movb	%dl,%al
603	movd	%mm7,%ebx
604	psrlq	$8,%mm7
605	movq	%mm6,%mm3
606	movl	%eax,%edi
607	psrlq	$8,%mm6
608	pxor	272(%esp,%ebp,8),%mm7
609	andb	$15,%al
610	psllq	$56,%mm3
611	shrl	$4,%edi
612	pxor	16(%esp,%eax,8),%mm7
613	roll	$8,%edx
614	pxor	144(%esp,%eax,8),%mm6
615	pxor	%mm3,%mm7
616	pxor	400(%esp,%ebp,8),%mm6
617	xorb	(%esp,%ebp,1),%bl
618	movb	%dl,%al
619	movd	%mm7,%ecx
620	movzbl	%bl,%ebx
621	psrlq	$8,%mm7
622	movq	%mm6,%mm3
623	movl	%eax,%ebp
624	psrlq	$8,%mm6
625	pxor	272(%esp,%edi,8),%mm7
626	andb	$15,%al
627	psllq	$56,%mm3
628	shrl	$4,%ebp
629	pinsrw	$2,(%esi,%ebx,2),%mm2
630	pxor	16(%esp,%eax,8),%mm7
631	roll	$8,%edx
632	pxor	144(%esp,%eax,8),%mm6
633	pxor	%mm3,%mm7
634	pxor	400(%esp,%edi,8),%mm6
635	xorb	(%esp,%edi,1),%cl
636	movb	%dl,%al
637	movl	536(%esp),%edx
638	movd	%mm7,%ebx
639	movzbl	%cl,%ecx
640	psrlq	$8,%mm7
641	movq	%mm6,%mm3
642	movl	%eax,%edi
643	psrlq	$8,%mm6
644	pxor	272(%esp,%ebp,8),%mm7
645	andb	$15,%al
646	psllq	$56,%mm3
647	pxor	%mm2,%mm6
648	shrl	$4,%edi
649	pinsrw	$2,(%esi,%ecx,2),%mm1
650	pxor	16(%esp,%eax,8),%mm7
651	roll	$8,%edx
652	pxor	144(%esp,%eax,8),%mm6
653	pxor	%mm3,%mm7
654	pxor	400(%esp,%ebp,8),%mm6
655	xorb	(%esp,%ebp,1),%bl
656	movb	%dl,%al
657	movd	%mm7,%ecx
658	movzbl	%bl,%ebx
659	psrlq	$8,%mm7
660	movq	%mm6,%mm3
661	movl	%eax,%ebp
662	psrlq	$8,%mm6
663	pxor	272(%esp,%edi,8),%mm7
664	andb	$15,%al
665	psllq	$56,%mm3
666	pxor	%mm1,%mm6
667	shrl	$4,%ebp
668	pinsrw	$2,(%esi,%ebx,2),%mm0
669	pxor	16(%esp,%eax,8),%mm7
670	roll	$8,%edx
671	pxor	144(%esp,%eax,8),%mm6
672	pxor	%mm3,%mm7
673	pxor	400(%esp,%edi,8),%mm6
674	xorb	(%esp,%edi,1),%cl
675	movb	%dl,%al
676	movd	%mm7,%ebx
677	movzbl	%cl,%ecx
678	psrlq	$8,%mm7
679	movq	%mm6,%mm3
680	movl	%eax,%edi
681	psrlq	$8,%mm6
682	pxor	272(%esp,%ebp,8),%mm7
683	andb	$15,%al
684	psllq	$56,%mm3
685	pxor	%mm0,%mm6
686	shrl	$4,%edi
687	pinsrw	$2,(%esi,%ecx,2),%mm2
688	pxor	16(%esp,%eax,8),%mm7
689	roll	$8,%edx
690	pxor	144(%esp,%eax,8),%mm6
691	pxor	%mm3,%mm7
692	pxor	400(%esp,%ebp,8),%mm6
693	xorb	(%esp,%ebp,1),%bl
694	movb	%dl,%al
695	movd	%mm7,%ecx
696	movzbl	%bl,%ebx
697	psrlq	$8,%mm7
698	movq	%mm6,%mm3
699	movl	%eax,%ebp
700	psrlq	$8,%mm6
701	pxor	272(%esp,%edi,8),%mm7
702	andb	$15,%al
703	psllq	$56,%mm3
704	pxor	%mm2,%mm6
705	shrl	$4,%ebp
706	pinsrw	$2,(%esi,%ebx,2),%mm1
707	pxor	16(%esp,%eax,8),%mm7
708	roll	$8,%edx
709	pxor	144(%esp,%eax,8),%mm6
710	pxor	%mm3,%mm7
711	pxor	400(%esp,%edi,8),%mm6
712	xorb	(%esp,%edi,1),%cl
713	movb	%dl,%al
714	movl	532(%esp),%edx
715	movd	%mm7,%ebx
716	movzbl	%cl,%ecx
717	psrlq	$8,%mm7
718	movq	%mm6,%mm3
719	movl	%eax,%edi
720	psrlq	$8,%mm6
721	pxor	272(%esp,%ebp,8),%mm7
722	andb	$15,%al
723	psllq	$56,%mm3
724	pxor	%mm1,%mm6
725	shrl	$4,%edi
726	pinsrw	$2,(%esi,%ecx,2),%mm0
727	pxor	16(%esp,%eax,8),%mm7
728	roll	$8,%edx
729	pxor	144(%esp,%eax,8),%mm6
730	pxor	%mm3,%mm7
731	pxor	400(%esp,%ebp,8),%mm6
732	xorb	(%esp,%ebp,1),%bl
733	movb	%dl,%al
734	movd	%mm7,%ecx
735	movzbl	%bl,%ebx
736	psrlq	$8,%mm7
737	movq	%mm6,%mm3
738	movl	%eax,%ebp
739	psrlq	$8,%mm6
740	pxor	272(%esp,%edi,8),%mm7
741	andb	$15,%al
742	psllq	$56,%mm3
743	pxor	%mm0,%mm6
744	shrl	$4,%ebp
745	pinsrw	$2,(%esi,%ebx,2),%mm2
746	pxor	16(%esp,%eax,8),%mm7
747	roll	$8,%edx
748	pxor	144(%esp,%eax,8),%mm6
749	pxor	%mm3,%mm7
750	pxor	400(%esp,%edi,8),%mm6
751	xorb	(%esp,%edi,1),%cl
752	movb	%dl,%al
753	movd	%mm7,%ebx
754	movzbl	%cl,%ecx
755	psrlq	$8,%mm7
756	movq	%mm6,%mm3
757	movl	%eax,%edi
758	psrlq	$8,%mm6
759	pxor	272(%esp,%ebp,8),%mm7
760	andb	$15,%al
761	psllq	$56,%mm3
762	pxor	%mm2,%mm6
763	shrl	$4,%edi
764	pinsrw	$2,(%esi,%ecx,2),%mm1
765	pxor	16(%esp,%eax,8),%mm7
766	roll	$8,%edx
767	pxor	144(%esp,%eax,8),%mm6
768	pxor	%mm3,%mm7
769	pxor	400(%esp,%ebp,8),%mm6
770	xorb	(%esp,%ebp,1),%bl
771	movb	%dl,%al
772	movd	%mm7,%ecx
773	movzbl	%bl,%ebx
774	psrlq	$8,%mm7
775	movq	%mm6,%mm3
776	movl	%eax,%ebp
777	psrlq	$8,%mm6
778	pxor	272(%esp,%edi,8),%mm7
779	andb	$15,%al
780	psllq	$56,%mm3
781	pxor	%mm1,%mm6
782	shrl	$4,%ebp
783	pinsrw	$2,(%esi,%ebx,2),%mm0
784	pxor	16(%esp,%eax,8),%mm7
785	roll	$8,%edx
786	pxor	144(%esp,%eax,8),%mm6
787	pxor	%mm3,%mm7
788	pxor	400(%esp,%edi,8),%mm6
789	xorb	(%esp,%edi,1),%cl
790	movb	%dl,%al
791	movl	528(%esp),%edx
792	movd	%mm7,%ebx
793	movzbl	%cl,%ecx
794	psrlq	$8,%mm7
795	movq	%mm6,%mm3
796	movl	%eax,%edi
797	psrlq	$8,%mm6
798	pxor	272(%esp,%ebp,8),%mm7
799	andb	$15,%al
800	psllq	$56,%mm3
801	pxor	%mm0,%mm6
802	shrl	$4,%edi
803	pinsrw	$2,(%esi,%ecx,2),%mm2
804	pxor	16(%esp,%eax,8),%mm7
805	roll	$8,%edx
806	pxor	144(%esp,%eax,8),%mm6
807	pxor	%mm3,%mm7
808	pxor	400(%esp,%ebp,8),%mm6
809	xorb	(%esp,%ebp,1),%bl
810	movb	%dl,%al
811	movd	%mm7,%ecx
812	movzbl	%bl,%ebx
813	psrlq	$8,%mm7
814	movq	%mm6,%mm3
815	movl	%eax,%ebp
816	psrlq	$8,%mm6
817	pxor	272(%esp,%edi,8),%mm7
818	andb	$15,%al
819	psllq	$56,%mm3
820	pxor	%mm2,%mm6
821	shrl	$4,%ebp
822	pinsrw	$2,(%esi,%ebx,2),%mm1
823	pxor	16(%esp,%eax,8),%mm7
824	roll	$8,%edx
825	pxor	144(%esp,%eax,8),%mm6
826	pxor	%mm3,%mm7
827	pxor	400(%esp,%edi,8),%mm6
828	xorb	(%esp,%edi,1),%cl
829	movb	%dl,%al
830	movd	%mm7,%ebx
831	movzbl	%cl,%ecx
832	psrlq	$8,%mm7
833	movq	%mm6,%mm3
834	movl	%eax,%edi
835	psrlq	$8,%mm6
836	pxor	272(%esp,%ebp,8),%mm7
837	andb	$15,%al
838	psllq	$56,%mm3
839	pxor	%mm1,%mm6
840	shrl	$4,%edi
841	pinsrw	$2,(%esi,%ecx,2),%mm0
842	pxor	16(%esp,%eax,8),%mm7
843	roll	$8,%edx
844	pxor	144(%esp,%eax,8),%mm6
845	pxor	%mm3,%mm7
846	pxor	400(%esp,%ebp,8),%mm6
847	xorb	(%esp,%ebp,1),%bl
848	movb	%dl,%al
849	movd	%mm7,%ecx
850	movzbl	%bl,%ebx
851	psrlq	$8,%mm7
852	movq	%mm6,%mm3
853	movl	%eax,%ebp
854	psrlq	$8,%mm6
855	pxor	272(%esp,%edi,8),%mm7
856	andb	$15,%al
857	psllq	$56,%mm3
858	pxor	%mm0,%mm6
859	shrl	$4,%ebp
860	pinsrw	$2,(%esi,%ebx,2),%mm2
861	pxor	16(%esp,%eax,8),%mm7
862	roll	$8,%edx
863	pxor	144(%esp,%eax,8),%mm6
864	pxor	%mm3,%mm7
865	pxor	400(%esp,%edi,8),%mm6
866	xorb	(%esp,%edi,1),%cl
867	movb	%dl,%al
868	movl	524(%esp),%edx
869	movd	%mm7,%ebx
870	movzbl	%cl,%ecx
871	psrlq	$8,%mm7
872	movq	%mm6,%mm3
873	movl	%eax,%edi
874	psrlq	$8,%mm6
875	pxor	272(%esp,%ebp,8),%mm7
876	andb	$15,%al
877	psllq	$56,%mm3
878	pxor	%mm2,%mm6
879	shrl	$4,%edi
880	pinsrw	$2,(%esi,%ecx,2),%mm1
881	pxor	16(%esp,%eax,8),%mm7
882	pxor	144(%esp,%eax,8),%mm6
883	xorb	(%esp,%ebp,1),%bl
884	pxor	%mm3,%mm7
885	pxor	400(%esp,%ebp,8),%mm6
886	movzbl	%bl,%ebx
887	pxor	%mm2,%mm2
888	psllq	$4,%mm1
889	movd	%mm7,%ecx
890	psrlq	$4,%mm7
891	movq	%mm6,%mm3
892	psrlq	$4,%mm6
893	shll	$4,%ecx
894	pxor	16(%esp,%edi,8),%mm7
895	psllq	$60,%mm3
896	movzbl	%cl,%ecx
897	pxor	%mm3,%mm7
898	pxor	144(%esp,%edi,8),%mm6
899	pinsrw	$2,(%esi,%ebx,2),%mm0
900	pxor	%mm1,%mm6
901	movd	%mm7,%edx
902	pinsrw	$3,(%esi,%ecx,2),%mm2
903	psllq	$12,%mm0
904	pxor	%mm0,%mm6
905	psrlq	$32,%mm7
906	pxor	%mm2,%mm6
907	movl	548(%esp),%ecx
908	movd	%mm7,%ebx
909	movq	%mm6,%mm3
910	psllw	$8,%mm6
911	psrlw	$8,%mm3
912	por	%mm3,%mm6
913	bswap	%edx
914	pshufw	$27,%mm6,%mm6
915	bswap	%ebx
916	cmpl	552(%esp),%ecx
917	jne	.L009outer
918	movl	544(%esp),%eax
919	movl	%edx,12(%eax)
920	movl	%ebx,8(%eax)
921	movq	%mm6,(%eax)
922	movl	556(%esp),%esp
923	emms
924	popl	%edi
925	popl	%esi
926	popl	%ebx
927	popl	%ebp
928	ret
929.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
930.globl	gcm_init_clmul
931.type	gcm_init_clmul,@function
932.align	16
933gcm_init_clmul:
934.L_gcm_init_clmul_begin:
935	#ifdef __CET__
936
937.byte	243,15,30,251
938	#endif
939
940	movl	4(%esp),%edx
941	movl	8(%esp),%eax
942	call	.L010pic
943.L010pic:
944	popl	%ecx
945	leal	.Lbswap-.L010pic(%ecx),%ecx
946	movdqu	(%eax),%xmm2
947	pshufd	$78,%xmm2,%xmm2
948	pshufd	$255,%xmm2,%xmm4
949	movdqa	%xmm2,%xmm3
950	psllq	$1,%xmm2
951	pxor	%xmm5,%xmm5
952	psrlq	$63,%xmm3
953	pcmpgtd	%xmm4,%xmm5
954	pslldq	$8,%xmm3
955	por	%xmm3,%xmm2
956	pand	16(%ecx),%xmm5
957	pxor	%xmm5,%xmm2
958	movdqa	%xmm2,%xmm0
959	movdqa	%xmm0,%xmm1
960	pshufd	$78,%xmm0,%xmm3
961	pshufd	$78,%xmm2,%xmm4
962	pxor	%xmm0,%xmm3
963	pxor	%xmm2,%xmm4
964.byte	102,15,58,68,194,0
965.byte	102,15,58,68,202,17
966.byte	102,15,58,68,220,0
967	xorps	%xmm0,%xmm3
968	xorps	%xmm1,%xmm3
969	movdqa	%xmm3,%xmm4
970	psrldq	$8,%xmm3
971	pslldq	$8,%xmm4
972	pxor	%xmm3,%xmm1
973	pxor	%xmm4,%xmm0
974	movdqa	%xmm0,%xmm4
975	movdqa	%xmm0,%xmm3
976	psllq	$5,%xmm0
977	pxor	%xmm0,%xmm3
978	psllq	$1,%xmm0
979	pxor	%xmm3,%xmm0
980	psllq	$57,%xmm0
981	movdqa	%xmm0,%xmm3
982	pslldq	$8,%xmm0
983	psrldq	$8,%xmm3
984	pxor	%xmm4,%xmm0
985	pxor	%xmm3,%xmm1
986	movdqa	%xmm0,%xmm4
987	psrlq	$1,%xmm0
988	pxor	%xmm4,%xmm1
989	pxor	%xmm0,%xmm4
990	psrlq	$5,%xmm0
991	pxor	%xmm4,%xmm0
992	psrlq	$1,%xmm0
993	pxor	%xmm1,%xmm0
994	pshufd	$78,%xmm2,%xmm3
995	pshufd	$78,%xmm0,%xmm4
996	pxor	%xmm2,%xmm3
997	movdqu	%xmm2,(%edx)
998	pxor	%xmm0,%xmm4
999	movdqu	%xmm0,16(%edx)
1000.byte	102,15,58,15,227,8
1001	movdqu	%xmm4,32(%edx)
1002	ret
1003.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
1004.globl	gcm_gmult_clmul
1005.type	gcm_gmult_clmul,@function
1006.align	16
1007gcm_gmult_clmul:
1008.L_gcm_gmult_clmul_begin:
1009	#ifdef __CET__
1010
1011.byte	243,15,30,251
1012	#endif
1013
1014	movl	4(%esp),%eax
1015	movl	8(%esp),%edx
1016	call	.L011pic
1017.L011pic:
1018	popl	%ecx
1019	leal	.Lbswap-.L011pic(%ecx),%ecx
1020	movdqu	(%eax),%xmm0
1021	movdqa	(%ecx),%xmm5
1022	movups	(%edx),%xmm2
1023.byte	102,15,56,0,197
1024	movups	32(%edx),%xmm4
1025	movdqa	%xmm0,%xmm1
1026	pshufd	$78,%xmm0,%xmm3
1027	pxor	%xmm0,%xmm3
1028.byte	102,15,58,68,194,0
1029.byte	102,15,58,68,202,17
1030.byte	102,15,58,68,220,0
1031	xorps	%xmm0,%xmm3
1032	xorps	%xmm1,%xmm3
1033	movdqa	%xmm3,%xmm4
1034	psrldq	$8,%xmm3
1035	pslldq	$8,%xmm4
1036	pxor	%xmm3,%xmm1
1037	pxor	%xmm4,%xmm0
1038	movdqa	%xmm0,%xmm4
1039	movdqa	%xmm0,%xmm3
1040	psllq	$5,%xmm0
1041	pxor	%xmm0,%xmm3
1042	psllq	$1,%xmm0
1043	pxor	%xmm3,%xmm0
1044	psllq	$57,%xmm0
1045	movdqa	%xmm0,%xmm3
1046	pslldq	$8,%xmm0
1047	psrldq	$8,%xmm3
1048	pxor	%xmm4,%xmm0
1049	pxor	%xmm3,%xmm1
1050	movdqa	%xmm0,%xmm4
1051	psrlq	$1,%xmm0
1052	pxor	%xmm4,%xmm1
1053	pxor	%xmm0,%xmm4
1054	psrlq	$5,%xmm0
1055	pxor	%xmm4,%xmm0
1056	psrlq	$1,%xmm0
1057	pxor	%xmm1,%xmm0
1058.byte	102,15,56,0,197
1059	movdqu	%xmm0,(%eax)
1060	ret
1061.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
1062.globl	gcm_ghash_clmul
1063.type	gcm_ghash_clmul,@function
1064.align	16
1065gcm_ghash_clmul:
1066.L_gcm_ghash_clmul_begin:
1067	#ifdef __CET__
1068
1069.byte	243,15,30,251
1070	#endif
1071
1072	pushl	%ebp
1073	pushl	%ebx
1074	pushl	%esi
1075	pushl	%edi
1076	movl	20(%esp),%eax
1077	movl	24(%esp),%edx
1078	movl	28(%esp),%esi
1079	movl	32(%esp),%ebx
1080	call	.L012pic
1081.L012pic:
1082	popl	%ecx
1083	leal	.Lbswap-.L012pic(%ecx),%ecx
1084	movdqu	(%eax),%xmm0
1085	movdqa	(%ecx),%xmm5
1086	movdqu	(%edx),%xmm2
1087.byte	102,15,56,0,197
1088	subl	$16,%ebx
1089	jz	.L013odd_tail
1090	movdqu	(%esi),%xmm3
1091	movdqu	16(%esi),%xmm6
1092.byte	102,15,56,0,221
1093.byte	102,15,56,0,245
1094	movdqu	32(%edx),%xmm5
1095	pxor	%xmm3,%xmm0
1096	pshufd	$78,%xmm6,%xmm3
1097	movdqa	%xmm6,%xmm7
1098	pxor	%xmm6,%xmm3
1099	leal	32(%esi),%esi
1100.byte	102,15,58,68,242,0
1101.byte	102,15,58,68,250,17
1102.byte	102,15,58,68,221,0
1103	movups	16(%edx),%xmm2
1104	nop
1105	subl	$32,%ebx
1106	jbe	.L014even_tail
1107	jmp	.L015mod_loop
1108.align	32
1109.L015mod_loop:
1110	pshufd	$78,%xmm0,%xmm4
1111	movdqa	%xmm0,%xmm1
1112	pxor	%xmm0,%xmm4
1113	nop
1114.byte	102,15,58,68,194,0
1115.byte	102,15,58,68,202,17
1116.byte	102,15,58,68,229,16
1117	movups	(%edx),%xmm2
1118	xorps	%xmm6,%xmm0
1119	movdqa	(%ecx),%xmm5
1120	xorps	%xmm7,%xmm1
1121	movdqu	(%esi),%xmm7
1122	pxor	%xmm0,%xmm3
1123	movdqu	16(%esi),%xmm6
1124	pxor	%xmm1,%xmm3
1125.byte	102,15,56,0,253
1126	pxor	%xmm3,%xmm4
1127	movdqa	%xmm4,%xmm3
1128	psrldq	$8,%xmm4
1129	pslldq	$8,%xmm3
1130	pxor	%xmm4,%xmm1
1131	pxor	%xmm3,%xmm0
1132.byte	102,15,56,0,245
1133	pxor	%xmm7,%xmm1
1134	movdqa	%xmm6,%xmm7
1135	movdqa	%xmm0,%xmm4
1136	movdqa	%xmm0,%xmm3
1137	psllq	$5,%xmm0
1138	pxor	%xmm0,%xmm3
1139	psllq	$1,%xmm0
1140	pxor	%xmm3,%xmm0
1141.byte	102,15,58,68,242,0
1142	movups	32(%edx),%xmm5
1143	psllq	$57,%xmm0
1144	movdqa	%xmm0,%xmm3
1145	pslldq	$8,%xmm0
1146	psrldq	$8,%xmm3
1147	pxor	%xmm4,%xmm0
1148	pxor	%xmm3,%xmm1
1149	pshufd	$78,%xmm7,%xmm3
1150	movdqa	%xmm0,%xmm4
1151	psrlq	$1,%xmm0
1152	pxor	%xmm7,%xmm3
1153	pxor	%xmm4,%xmm1
1154.byte	102,15,58,68,250,17
1155	movups	16(%edx),%xmm2
1156	pxor	%xmm0,%xmm4
1157	psrlq	$5,%xmm0
1158	pxor	%xmm4,%xmm0
1159	psrlq	$1,%xmm0
1160	pxor	%xmm1,%xmm0
1161.byte	102,15,58,68,221,0
1162	leal	32(%esi),%esi
1163	subl	$32,%ebx
1164	ja	.L015mod_loop
1165.L014even_tail:
1166	pshufd	$78,%xmm0,%xmm4
1167	movdqa	%xmm0,%xmm1
1168	pxor	%xmm0,%xmm4
1169.byte	102,15,58,68,194,0
1170.byte	102,15,58,68,202,17
1171.byte	102,15,58,68,229,16
1172	movdqa	(%ecx),%xmm5
1173	xorps	%xmm6,%xmm0
1174	xorps	%xmm7,%xmm1
1175	pxor	%xmm0,%xmm3
1176	pxor	%xmm1,%xmm3
1177	pxor	%xmm3,%xmm4
1178	movdqa	%xmm4,%xmm3
1179	psrldq	$8,%xmm4
1180	pslldq	$8,%xmm3
1181	pxor	%xmm4,%xmm1
1182	pxor	%xmm3,%xmm0
1183	movdqa	%xmm0,%xmm4
1184	movdqa	%xmm0,%xmm3
1185	psllq	$5,%xmm0
1186	pxor	%xmm0,%xmm3
1187	psllq	$1,%xmm0
1188	pxor	%xmm3,%xmm0
1189	psllq	$57,%xmm0
1190	movdqa	%xmm0,%xmm3
1191	pslldq	$8,%xmm0
1192	psrldq	$8,%xmm3
1193	pxor	%xmm4,%xmm0
1194	pxor	%xmm3,%xmm1
1195	movdqa	%xmm0,%xmm4
1196	psrlq	$1,%xmm0
1197	pxor	%xmm4,%xmm1
1198	pxor	%xmm0,%xmm4
1199	psrlq	$5,%xmm0
1200	pxor	%xmm4,%xmm0
1201	psrlq	$1,%xmm0
1202	pxor	%xmm1,%xmm0
1203	testl	%ebx,%ebx
1204	jnz	.L016done
1205	movups	(%edx),%xmm2
1206.L013odd_tail:
1207	movdqu	(%esi),%xmm3
1208.byte	102,15,56,0,221
1209	pxor	%xmm3,%xmm0
1210	movdqa	%xmm0,%xmm1
1211	pshufd	$78,%xmm0,%xmm3
1212	pshufd	$78,%xmm2,%xmm4
1213	pxor	%xmm0,%xmm3
1214	pxor	%xmm2,%xmm4
1215.byte	102,15,58,68,194,0
1216.byte	102,15,58,68,202,17
1217.byte	102,15,58,68,220,0
1218	xorps	%xmm0,%xmm3
1219	xorps	%xmm1,%xmm3
1220	movdqa	%xmm3,%xmm4
1221	psrldq	$8,%xmm3
1222	pslldq	$8,%xmm4
1223	pxor	%xmm3,%xmm1
1224	pxor	%xmm4,%xmm0
1225	movdqa	%xmm0,%xmm4
1226	movdqa	%xmm0,%xmm3
1227	psllq	$5,%xmm0
1228	pxor	%xmm0,%xmm3
1229	psllq	$1,%xmm0
1230	pxor	%xmm3,%xmm0
1231	psllq	$57,%xmm0
1232	movdqa	%xmm0,%xmm3
1233	pslldq	$8,%xmm0
1234	psrldq	$8,%xmm3
1235	pxor	%xmm4,%xmm0
1236	pxor	%xmm3,%xmm1
1237	movdqa	%xmm0,%xmm4
1238	psrlq	$1,%xmm0
1239	pxor	%xmm4,%xmm1
1240	pxor	%xmm0,%xmm4
1241	psrlq	$5,%xmm0
1242	pxor	%xmm4,%xmm0
1243	psrlq	$1,%xmm0
1244	pxor	%xmm1,%xmm0
1245.L016done:
1246.byte	102,15,56,0,197
1247	movdqu	%xmm0,(%eax)
1248	popl	%edi
1249	popl	%esi
1250	popl	%ebx
1251	popl	%ebp
1252	ret
1253.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1254.align	64
1255.Lbswap:
1256.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1257.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1258.align	64
1259.Lrem_8bit:
1260.value	0,450,900,582,1800,1738,1164,1358
1261.value	3600,4050,3476,3158,2328,2266,2716,2910
1262.value	7200,7650,8100,7782,6952,6890,6316,6510
1263.value	4656,5106,4532,4214,5432,5370,5820,6014
1264.value	14400,14722,15300,14854,16200,16010,15564,15630
1265.value	13904,14226,13780,13334,12632,12442,13020,13086
1266.value	9312,9634,10212,9766,9064,8874,8428,8494
1267.value	10864,11186,10740,10294,11640,11450,12028,12094
1268.value	28800,28994,29444,29382,30600,30282,29708,30158
1269.value	32400,32594,32020,31958,31128,30810,31260,31710
1270.value	27808,28002,28452,28390,27560,27242,26668,27118
1271.value	25264,25458,24884,24822,26040,25722,26172,26622
1272.value	18624,18690,19268,19078,20424,19978,19532,19854
1273.value	18128,18194,17748,17558,16856,16410,16988,17310
1274.value	21728,21794,22372,22182,21480,21034,20588,20910
1275.value	23280,23346,22900,22710,24056,23610,24188,24510
1276.value	57600,57538,57988,58182,58888,59338,58764,58446
1277.value	61200,61138,60564,60758,59416,59866,60316,59998
1278.value	64800,64738,65188,65382,64040,64490,63916,63598
1279.value	62256,62194,61620,61814,62520,62970,63420,63102
1280.value	55616,55426,56004,56070,56904,57226,56780,56334
1281.value	55120,54930,54484,54550,53336,53658,54236,53790
1282.value	50528,50338,50916,50982,49768,50090,49644,49198
1283.value	52080,51890,51444,51510,52344,52666,53244,52798
1284.value	37248,36930,37380,37830,38536,38730,38156,38094
1285.value	40848,40530,39956,40406,39064,39258,39708,39646
1286.value	36256,35938,36388,36838,35496,35690,35116,35054
1287.value	33712,33394,32820,33270,33976,34170,34620,34558
1288.value	43456,43010,43588,43910,44744,44810,44364,44174
1289.value	42960,42514,42068,42390,41176,41242,41820,41630
1290.value	46560,46114,46692,47014,45800,45866,45420,45230
1291.value	48112,47666,47220,47542,48376,48442,49020,48830
1292.align	64
1293.Lrem_4bit:
1294.long	0,0,0,471859200,0,943718400,0,610271232
1295.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1296.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1297.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1298.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1299.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1300.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1301.byte	0
1302
1303	.section ".note.gnu.property", "a"
1304	.p2align 2
1305	.long 1f - 0f
1306	.long 4f - 1f
1307	.long 5
13080:
1309	.asciz "GNU"
13101:
1311	.p2align 2
1312	.long 0xc0000002
1313	.long 3f - 2f
13142:
1315	.long 3
13163:
1317	.p2align 2
13184:
1319#else
1320.text
1321.globl	gcm_gmult_4bit_x86
1322.type	gcm_gmult_4bit_x86,@function
1323.align	16
1324gcm_gmult_4bit_x86:
1325.L_gcm_gmult_4bit_x86_begin:
1326	#ifdef __CET__
1327
1328.byte	243,15,30,251
1329	#endif
1330
1331	pushl	%ebp
1332	pushl	%ebx
1333	pushl	%esi
1334	pushl	%edi
1335	subl	$84,%esp
1336	movl	104(%esp),%edi
1337	movl	108(%esp),%esi
1338	movl	(%edi),%ebp
1339	movl	4(%edi),%edx
1340	movl	8(%edi),%ecx
1341	movl	12(%edi),%ebx
1342	movl	$0,16(%esp)
1343	movl	$471859200,20(%esp)
1344	movl	$943718400,24(%esp)
1345	movl	$610271232,28(%esp)
1346	movl	$1887436800,32(%esp)
1347	movl	$1822425088,36(%esp)
1348	movl	$1220542464,40(%esp)
1349	movl	$1423966208,44(%esp)
1350	movl	$3774873600,48(%esp)
1351	movl	$4246732800,52(%esp)
1352	movl	$3644850176,56(%esp)
1353	movl	$3311403008,60(%esp)
1354	movl	$2441084928,64(%esp)
1355	movl	$2376073216,68(%esp)
1356	movl	$2847932416,72(%esp)
1357	movl	$3051356160,76(%esp)
1358	movl	%ebp,(%esp)
1359	movl	%edx,4(%esp)
1360	movl	%ecx,8(%esp)
1361	movl	%ebx,12(%esp)
1362	shrl	$20,%ebx
1363	andl	$240,%ebx
1364	movl	4(%esi,%ebx,1),%ebp
1365	movl	(%esi,%ebx,1),%edx
1366	movl	12(%esi,%ebx,1),%ecx
1367	movl	8(%esi,%ebx,1),%ebx
1368	xorl	%eax,%eax
1369	movl	$15,%edi
1370	jmp	.L000x86_loop
1371.align	16
1372.L000x86_loop:
1373	movb	%bl,%al
1374	shrdl	$4,%ecx,%ebx
1375	andb	$15,%al
1376	shrdl	$4,%edx,%ecx
1377	shrdl	$4,%ebp,%edx
1378	shrl	$4,%ebp
1379	xorl	16(%esp,%eax,4),%ebp
1380	movb	(%esp,%edi,1),%al
1381	andb	$240,%al
1382	xorl	8(%esi,%eax,1),%ebx
1383	xorl	12(%esi,%eax,1),%ecx
1384	xorl	(%esi,%eax,1),%edx
1385	xorl	4(%esi,%eax,1),%ebp
1386	decl	%edi
1387	js	.L001x86_break
1388	movb	%bl,%al
1389	shrdl	$4,%ecx,%ebx
1390	andb	$15,%al
1391	shrdl	$4,%edx,%ecx
1392	shrdl	$4,%ebp,%edx
1393	shrl	$4,%ebp
1394	xorl	16(%esp,%eax,4),%ebp
1395	movb	(%esp,%edi,1),%al
1396	shlb	$4,%al
1397	xorl	8(%esi,%eax,1),%ebx
1398	xorl	12(%esi,%eax,1),%ecx
1399	xorl	(%esi,%eax,1),%edx
1400	xorl	4(%esi,%eax,1),%ebp
1401	jmp	.L000x86_loop
1402.align	16
1403.L001x86_break:
1404	bswap	%ebx
1405	bswap	%ecx
1406	bswap	%edx
1407	bswap	%ebp
1408	movl	104(%esp),%edi
1409	movl	%ebx,12(%edi)
1410	movl	%ecx,8(%edi)
1411	movl	%edx,4(%edi)
1412	movl	%ebp,(%edi)
1413	addl	$84,%esp
1414	popl	%edi
1415	popl	%esi
1416	popl	%ebx
1417	popl	%ebp
1418	ret
1419.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
1420.globl	gcm_ghash_4bit_x86
1421.type	gcm_ghash_4bit_x86,@function
1422.align	16
1423gcm_ghash_4bit_x86:
1424.L_gcm_ghash_4bit_x86_begin:
1425	#ifdef __CET__
1426
1427.byte	243,15,30,251
1428	#endif
1429
1430	pushl	%ebp
1431	pushl	%ebx
1432	pushl	%esi
1433	pushl	%edi
1434	subl	$84,%esp
1435	movl	104(%esp),%ebx
1436	movl	108(%esp),%esi
1437	movl	112(%esp),%edi
1438	movl	116(%esp),%ecx
1439	addl	%edi,%ecx
1440	movl	%ecx,116(%esp)
1441	movl	(%ebx),%ebp
1442	movl	4(%ebx),%edx
1443	movl	8(%ebx),%ecx
1444	movl	12(%ebx),%ebx
1445	movl	$0,16(%esp)
1446	movl	$471859200,20(%esp)
1447	movl	$943718400,24(%esp)
1448	movl	$610271232,28(%esp)
1449	movl	$1887436800,32(%esp)
1450	movl	$1822425088,36(%esp)
1451	movl	$1220542464,40(%esp)
1452	movl	$1423966208,44(%esp)
1453	movl	$3774873600,48(%esp)
1454	movl	$4246732800,52(%esp)
1455	movl	$3644850176,56(%esp)
1456	movl	$3311403008,60(%esp)
1457	movl	$2441084928,64(%esp)
1458	movl	$2376073216,68(%esp)
1459	movl	$2847932416,72(%esp)
1460	movl	$3051356160,76(%esp)
1461.align	16
1462.L002x86_outer_loop:
1463	xorl	12(%edi),%ebx
1464	xorl	8(%edi),%ecx
1465	xorl	4(%edi),%edx
1466	xorl	(%edi),%ebp
1467	movl	%ebx,12(%esp)
1468	movl	%ecx,8(%esp)
1469	movl	%edx,4(%esp)
1470	movl	%ebp,(%esp)
1471	shrl	$20,%ebx
1472	andl	$240,%ebx
1473	movl	4(%esi,%ebx,1),%ebp
1474	movl	(%esi,%ebx,1),%edx
1475	movl	12(%esi,%ebx,1),%ecx
1476	movl	8(%esi,%ebx,1),%ebx
1477	xorl	%eax,%eax
1478	movl	$15,%edi
1479	jmp	.L003x86_loop
1480.align	16
1481.L003x86_loop:
1482	movb	%bl,%al
1483	shrdl	$4,%ecx,%ebx
1484	andb	$15,%al
1485	shrdl	$4,%edx,%ecx
1486	shrdl	$4,%ebp,%edx
1487	shrl	$4,%ebp
1488	xorl	16(%esp,%eax,4),%ebp
1489	movb	(%esp,%edi,1),%al
1490	andb	$240,%al
1491	xorl	8(%esi,%eax,1),%ebx
1492	xorl	12(%esi,%eax,1),%ecx
1493	xorl	(%esi,%eax,1),%edx
1494	xorl	4(%esi,%eax,1),%ebp
1495	decl	%edi
1496	js	.L004x86_break
1497	movb	%bl,%al
1498	shrdl	$4,%ecx,%ebx
1499	andb	$15,%al
1500	shrdl	$4,%edx,%ecx
1501	shrdl	$4,%ebp,%edx
1502	shrl	$4,%ebp
1503	xorl	16(%esp,%eax,4),%ebp
1504	movb	(%esp,%edi,1),%al
1505	shlb	$4,%al
1506	xorl	8(%esi,%eax,1),%ebx
1507	xorl	12(%esi,%eax,1),%ecx
1508	xorl	(%esi,%eax,1),%edx
1509	xorl	4(%esi,%eax,1),%ebp
1510	jmp	.L003x86_loop
1511.align	16
1512.L004x86_break:
1513	bswap	%ebx
1514	bswap	%ecx
1515	bswap	%edx
1516	bswap	%ebp
1517	movl	112(%esp),%edi
1518	leal	16(%edi),%edi
1519	cmpl	116(%esp),%edi
1520	movl	%edi,112(%esp)
1521	jb	.L002x86_outer_loop
1522	movl	104(%esp),%edi
1523	movl	%ebx,12(%edi)
1524	movl	%ecx,8(%edi)
1525	movl	%edx,4(%edi)
1526	movl	%ebp,(%edi)
1527	addl	$84,%esp
1528	popl	%edi
1529	popl	%esi
1530	popl	%ebx
1531	popl	%ebp
1532	ret
1533.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
1534.globl	gcm_gmult_4bit_mmx
1535.type	gcm_gmult_4bit_mmx,@function
1536.align	16
1537gcm_gmult_4bit_mmx:
1538.L_gcm_gmult_4bit_mmx_begin:
1539	#ifdef __CET__
1540
1541.byte	243,15,30,251
1542	#endif
1543
1544	pushl	%ebp
1545	pushl	%ebx
1546	pushl	%esi
1547	pushl	%edi
1548	movl	20(%esp),%edi
1549	movl	24(%esp),%esi
1550	call	.L005pic_point
1551.L005pic_point:
1552	popl	%eax
1553	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
1554	movzbl	15(%edi),%ebx
1555	xorl	%ecx,%ecx
1556	movl	%ebx,%edx
1557	movb	%dl,%cl
1558	movl	$14,%ebp
1559	shlb	$4,%cl
1560	andl	$240,%edx
1561	movq	8(%esi,%ecx,1),%mm0
1562	movq	(%esi,%ecx,1),%mm1
1563	movd	%mm0,%ebx
1564	jmp	.L006mmx_loop
1565.align	16
1566.L006mmx_loop:
1567	psrlq	$4,%mm0
1568	andl	$15,%ebx
1569	movq	%mm1,%mm2
1570	psrlq	$4,%mm1
1571	pxor	8(%esi,%edx,1),%mm0
1572	movb	(%edi,%ebp,1),%cl
1573	psllq	$60,%mm2
1574	pxor	(%eax,%ebx,8),%mm1
1575	decl	%ebp
1576	movd	%mm0,%ebx
1577	pxor	(%esi,%edx,1),%mm1
1578	movl	%ecx,%edx
1579	pxor	%mm2,%mm0
1580	js	.L007mmx_break
1581	shlb	$4,%cl
1582	andl	$15,%ebx
1583	psrlq	$4,%mm0
1584	andl	$240,%edx
1585	movq	%mm1,%mm2
1586	psrlq	$4,%mm1
1587	pxor	8(%esi,%ecx,1),%mm0
1588	psllq	$60,%mm2
1589	pxor	(%eax,%ebx,8),%mm1
1590	movd	%mm0,%ebx
1591	pxor	(%esi,%ecx,1),%mm1
1592	pxor	%mm2,%mm0
1593	jmp	.L006mmx_loop
1594.align	16
1595.L007mmx_break:
1596	shlb	$4,%cl
1597	andl	$15,%ebx
1598	psrlq	$4,%mm0
1599	andl	$240,%edx
1600	movq	%mm1,%mm2
1601	psrlq	$4,%mm1
1602	pxor	8(%esi,%ecx,1),%mm0
1603	psllq	$60,%mm2
1604	pxor	(%eax,%ebx,8),%mm1
1605	movd	%mm0,%ebx
1606	pxor	(%esi,%ecx,1),%mm1
1607	pxor	%mm2,%mm0
1608	psrlq	$4,%mm0
1609	andl	$15,%ebx
1610	movq	%mm1,%mm2
1611	psrlq	$4,%mm1
1612	pxor	8(%esi,%edx,1),%mm0
1613	psllq	$60,%mm2
1614	pxor	(%eax,%ebx,8),%mm1
1615	movd	%mm0,%ebx
1616	pxor	(%esi,%edx,1),%mm1
1617	pxor	%mm2,%mm0
1618	psrlq	$32,%mm0
1619	movd	%mm1,%edx
1620	psrlq	$32,%mm1
1621	movd	%mm0,%ecx
1622	movd	%mm1,%ebp
1623	bswap	%ebx
1624	bswap	%edx
1625	bswap	%ecx
1626	bswap	%ebp
1627	emms
1628	movl	%ebx,12(%edi)
1629	movl	%edx,4(%edi)
1630	movl	%ecx,8(%edi)
1631	movl	%ebp,(%edi)
1632	popl	%edi
1633	popl	%esi
1634	popl	%ebx
1635	popl	%ebp
1636	ret
1637.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
1638.globl	gcm_ghash_4bit_mmx
1639.type	gcm_ghash_4bit_mmx,@function
1640.align	16
1641gcm_ghash_4bit_mmx:
1642.L_gcm_ghash_4bit_mmx_begin:
1643	#ifdef __CET__
1644
1645.byte	243,15,30,251
1646	#endif
1647
1648	pushl	%ebp
1649	pushl	%ebx
1650	pushl	%esi
1651	pushl	%edi
1652	movl	20(%esp),%eax
1653	movl	24(%esp),%ebx
1654	movl	28(%esp),%ecx
1655	movl	32(%esp),%edx
1656	movl	%esp,%ebp
1657	call	.L008pic_point
1658.L008pic_point:
1659	popl	%esi
1660	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
1661	subl	$544,%esp
1662	andl	$-64,%esp
1663	subl	$16,%esp
1664	addl	%ecx,%edx
1665	movl	%eax,544(%esp)
1666	movl	%edx,552(%esp)
1667	movl	%ebp,556(%esp)
1668	addl	$128,%ebx
1669	leal	144(%esp),%edi
1670	leal	400(%esp),%ebp
1671	movl	-120(%ebx),%edx
1672	movq	-120(%ebx),%mm0
1673	movq	-128(%ebx),%mm3
1674	shll	$4,%edx
1675	movb	%dl,(%esp)
1676	movl	-104(%ebx),%edx
1677	movq	-104(%ebx),%mm2
1678	movq	-112(%ebx),%mm5
1679	movq	%mm0,-128(%edi)
1680	psrlq	$4,%mm0
1681	movq	%mm3,(%edi)
1682	movq	%mm3,%mm7
1683	psrlq	$4,%mm3
1684	shll	$4,%edx
1685	movb	%dl,1(%esp)
1686	movl	-88(%ebx),%edx
1687	movq	-88(%ebx),%mm1
1688	psllq	$60,%mm7
1689	movq	-96(%ebx),%mm4
1690	por	%mm7,%mm0
1691	movq	%mm2,-120(%edi)
1692	psrlq	$4,%mm2
1693	movq	%mm5,8(%edi)
1694	movq	%mm5,%mm6
1695	movq	%mm0,-128(%ebp)
1696	psrlq	$4,%mm5
1697	movq	%mm3,(%ebp)
1698	shll	$4,%edx
1699	movb	%dl,2(%esp)
1700	movl	-72(%ebx),%edx
1701	movq	-72(%ebx),%mm0
1702	psllq	$60,%mm6
1703	movq	-80(%ebx),%mm3
1704	por	%mm6,%mm2
1705	movq	%mm1,-112(%edi)
1706	psrlq	$4,%mm1
1707	movq	%mm4,16(%edi)
1708	movq	%mm4,%mm7
1709	movq	%mm2,-120(%ebp)
1710	psrlq	$4,%mm4
1711	movq	%mm5,8(%ebp)
1712	shll	$4,%edx
1713	movb	%dl,3(%esp)
1714	movl	-56(%ebx),%edx
1715	movq	-56(%ebx),%mm2
1716	psllq	$60,%mm7
1717	movq	-64(%ebx),%mm5
1718	por	%mm7,%mm1
1719	movq	%mm0,-104(%edi)
1720	psrlq	$4,%mm0
1721	movq	%mm3,24(%edi)
1722	movq	%mm3,%mm6
1723	movq	%mm1,-112(%ebp)
1724	psrlq	$4,%mm3
1725	movq	%mm4,16(%ebp)
1726	shll	$4,%edx
1727	movb	%dl,4(%esp)
1728	movl	-40(%ebx),%edx
1729	movq	-40(%ebx),%mm1
1730	psllq	$60,%mm6
1731	movq	-48(%ebx),%mm4
1732	por	%mm6,%mm0
1733	movq	%mm2,-96(%edi)
1734	psrlq	$4,%mm2
1735	movq	%mm5,32(%edi)
1736	movq	%mm5,%mm7
1737	movq	%mm0,-104(%ebp)
1738	psrlq	$4,%mm5
1739	movq	%mm3,24(%ebp)
1740	shll	$4,%edx
1741	movb	%dl,5(%esp)
1742	movl	-24(%ebx),%edx
1743	movq	-24(%ebx),%mm0
1744	psllq	$60,%mm7
1745	movq	-32(%ebx),%mm3
1746	por	%mm7,%mm2
1747	movq	%mm1,-88(%edi)
1748	psrlq	$4,%mm1
1749	movq	%mm4,40(%edi)
1750	movq	%mm4,%mm6
1751	movq	%mm2,-96(%ebp)
1752	psrlq	$4,%mm4
1753	movq	%mm5,32(%ebp)
1754	shll	$4,%edx
1755	movb	%dl,6(%esp)
1756	movl	-8(%ebx),%edx
1757	movq	-8(%ebx),%mm2
1758	psllq	$60,%mm6
1759	movq	-16(%ebx),%mm5
1760	por	%mm6,%mm1
1761	movq	%mm0,-80(%edi)
1762	psrlq	$4,%mm0
1763	movq	%mm3,48(%edi)
1764	movq	%mm3,%mm7
1765	movq	%mm1,-88(%ebp)
1766	psrlq	$4,%mm3
1767	movq	%mm4,40(%ebp)
1768	shll	$4,%edx
1769	movb	%dl,7(%esp)
1770	movl	8(%ebx),%edx
1771	movq	8(%ebx),%mm1
1772	psllq	$60,%mm7
1773	movq	(%ebx),%mm4
1774	por	%mm7,%mm0
1775	movq	%mm2,-72(%edi)
1776	psrlq	$4,%mm2
1777	movq	%mm5,56(%edi)
1778	movq	%mm5,%mm6
1779	movq	%mm0,-80(%ebp)
1780	psrlq	$4,%mm5
1781	movq	%mm3,48(%ebp)
1782	shll	$4,%edx
1783	movb	%dl,8(%esp)
1784	movl	24(%ebx),%edx
1785	movq	24(%ebx),%mm0
1786	psllq	$60,%mm6
1787	movq	16(%ebx),%mm3
1788	por	%mm6,%mm2
1789	movq	%mm1,-64(%edi)
1790	psrlq	$4,%mm1
1791	movq	%mm4,64(%edi)
1792	movq	%mm4,%mm7
1793	movq	%mm2,-72(%ebp)
1794	psrlq	$4,%mm4
1795	movq	%mm5,56(%ebp)
1796	shll	$4,%edx
1797	movb	%dl,9(%esp)
1798	movl	40(%ebx),%edx
1799	movq	40(%ebx),%mm2
1800	psllq	$60,%mm7
1801	movq	32(%ebx),%mm5
1802	por	%mm7,%mm1
1803	movq	%mm0,-56(%edi)
1804	psrlq	$4,%mm0
1805	movq	%mm3,72(%edi)
1806	movq	%mm3,%mm6
1807	movq	%mm1,-64(%ebp)
1808	psrlq	$4,%mm3
1809	movq	%mm4,64(%ebp)
1810	shll	$4,%edx
1811	movb	%dl,10(%esp)
1812	movl	56(%ebx),%edx
1813	movq	56(%ebx),%mm1
1814	psllq	$60,%mm6
1815	movq	48(%ebx),%mm4
1816	por	%mm6,%mm0
1817	movq	%mm2,-48(%edi)
1818	psrlq	$4,%mm2
1819	movq	%mm5,80(%edi)
1820	movq	%mm5,%mm7
1821	movq	%mm0,-56(%ebp)
1822	psrlq	$4,%mm5
1823	movq	%mm3,72(%ebp)
1824	shll	$4,%edx
1825	movb	%dl,11(%esp)
1826	movl	72(%ebx),%edx
1827	movq	72(%ebx),%mm0
1828	psllq	$60,%mm7
1829	movq	64(%ebx),%mm3
1830	por	%mm7,%mm2
1831	movq	%mm1,-40(%edi)
1832	psrlq	$4,%mm1
1833	movq	%mm4,88(%edi)
1834	movq	%mm4,%mm6
1835	movq	%mm2,-48(%ebp)
1836	psrlq	$4,%mm4
1837	movq	%mm5,80(%ebp)
1838	shll	$4,%edx
1839	movb	%dl,12(%esp)
1840	movl	88(%ebx),%edx
1841	movq	88(%ebx),%mm2
1842	psllq	$60,%mm6
1843	movq	80(%ebx),%mm5
1844	por	%mm6,%mm1
1845	movq	%mm0,-32(%edi)
1846	psrlq	$4,%mm0
1847	movq	%mm3,96(%edi)
1848	movq	%mm3,%mm7
1849	movq	%mm1,-40(%ebp)
1850	psrlq	$4,%mm3
1851	movq	%mm4,88(%ebp)
1852	shll	$4,%edx
1853	movb	%dl,13(%esp)
1854	movl	104(%ebx),%edx
1855	movq	104(%ebx),%mm1
1856	psllq	$60,%mm7
1857	movq	96(%ebx),%mm4
1858	por	%mm7,%mm0
1859	movq	%mm2,-24(%edi)
1860	psrlq	$4,%mm2
1861	movq	%mm5,104(%edi)
1862	movq	%mm5,%mm6
1863	movq	%mm0,-32(%ebp)
1864	psrlq	$4,%mm5
1865	movq	%mm3,96(%ebp)
1866	shll	$4,%edx
1867	movb	%dl,14(%esp)
1868	movl	120(%ebx),%edx
1869	movq	120(%ebx),%mm0
1870	psllq	$60,%mm6
1871	movq	112(%ebx),%mm3
1872	por	%mm6,%mm2
1873	movq	%mm1,-16(%edi)
1874	psrlq	$4,%mm1
1875	movq	%mm4,112(%edi)
1876	movq	%mm4,%mm7
1877	movq	%mm2,-24(%ebp)
1878	psrlq	$4,%mm4
1879	movq	%mm5,104(%ebp)
1880	shll	$4,%edx
1881	movb	%dl,15(%esp)
1882	psllq	$60,%mm7
1883	por	%mm7,%mm1
1884	movq	%mm0,-8(%edi)
1885	psrlq	$4,%mm0
1886	movq	%mm3,120(%edi)
1887	movq	%mm3,%mm6
1888	movq	%mm1,-16(%ebp)
1889	psrlq	$4,%mm3
1890	movq	%mm4,112(%ebp)
1891	psllq	$60,%mm6
1892	por	%mm6,%mm0
1893	movq	%mm0,-8(%ebp)
1894	movq	%mm3,120(%ebp)
1895	movq	(%eax),%mm6
1896	movl	8(%eax),%ebx
1897	movl	12(%eax),%edx
1898.align	16
1899.L009outer:
1900	xorl	12(%ecx),%edx
1901	xorl	8(%ecx),%ebx
1902	pxor	(%ecx),%mm6
1903	leal	16(%ecx),%ecx
1904	movl	%ebx,536(%esp)
1905	movq	%mm6,528(%esp)
1906	movl	%ecx,548(%esp)
1907	xorl	%eax,%eax
1908	roll	$8,%edx
1909	movb	%dl,%al
1910	movl	%eax,%ebp
1911	andb	$15,%al
1912	shrl	$4,%ebp
1913	pxor	%mm0,%mm0
1914	roll	$8,%edx
1915	pxor	%mm1,%mm1
1916	pxor	%mm2,%mm2
1917	movq	16(%esp,%eax,8),%mm7
1918	movq	144(%esp,%eax,8),%mm6
1919	movb	%dl,%al
1920	movd	%mm7,%ebx
1921	psrlq	$8,%mm7
1922	movq	%mm6,%mm3
1923	movl	%eax,%edi
1924	psrlq	$8,%mm6
1925	pxor	272(%esp,%ebp,8),%mm7
1926	andb	$15,%al
1927	psllq	$56,%mm3
1928	shrl	$4,%edi
1929	pxor	16(%esp,%eax,8),%mm7
1930	roll	$8,%edx
1931	pxor	144(%esp,%eax,8),%mm6
1932	pxor	%mm3,%mm7
1933	pxor	400(%esp,%ebp,8),%mm6
1934	xorb	(%esp,%ebp,1),%bl
1935	movb	%dl,%al
1936	movd	%mm7,%ecx
1937	movzbl	%bl,%ebx
1938	psrlq	$8,%mm7
1939	movq	%mm6,%mm3
1940	movl	%eax,%ebp
1941	psrlq	$8,%mm6
1942	pxor	272(%esp,%edi,8),%mm7
1943	andb	$15,%al
1944	psllq	$56,%mm3
1945	shrl	$4,%ebp
1946	pinsrw	$2,(%esi,%ebx,2),%mm2
1947	pxor	16(%esp,%eax,8),%mm7
1948	roll	$8,%edx
1949	pxor	144(%esp,%eax,8),%mm6
1950	pxor	%mm3,%mm7
1951	pxor	400(%esp,%edi,8),%mm6
1952	xorb	(%esp,%edi,1),%cl
1953	movb	%dl,%al
1954	movl	536(%esp),%edx
1955	movd	%mm7,%ebx
1956	movzbl	%cl,%ecx
1957	psrlq	$8,%mm7
1958	movq	%mm6,%mm3
1959	movl	%eax,%edi
1960	psrlq	$8,%mm6
1961	pxor	272(%esp,%ebp,8),%mm7
1962	andb	$15,%al
1963	psllq	$56,%mm3
1964	pxor	%mm2,%mm6
1965	shrl	$4,%edi
1966	pinsrw	$2,(%esi,%ecx,2),%mm1
1967	pxor	16(%esp,%eax,8),%mm7
1968	roll	$8,%edx
1969	pxor	144(%esp,%eax,8),%mm6
1970	pxor	%mm3,%mm7
1971	pxor	400(%esp,%ebp,8),%mm6
1972	xorb	(%esp,%ebp,1),%bl
1973	movb	%dl,%al
1974	movd	%mm7,%ecx
1975	movzbl	%bl,%ebx
1976	psrlq	$8,%mm7
1977	movq	%mm6,%mm3
1978	movl	%eax,%ebp
1979	psrlq	$8,%mm6
1980	pxor	272(%esp,%edi,8),%mm7
1981	andb	$15,%al
1982	psllq	$56,%mm3
1983	pxor	%mm1,%mm6
1984	shrl	$4,%ebp
1985	pinsrw	$2,(%esi,%ebx,2),%mm0
1986	pxor	16(%esp,%eax,8),%mm7
1987	roll	$8,%edx
1988	pxor	144(%esp,%eax,8),%mm6
1989	pxor	%mm3,%mm7
1990	pxor	400(%esp,%edi,8),%mm6
1991	xorb	(%esp,%edi,1),%cl
1992	movb	%dl,%al
1993	movd	%mm7,%ebx
1994	movzbl	%cl,%ecx
1995	psrlq	$8,%mm7
1996	movq	%mm6,%mm3
1997	movl	%eax,%edi
1998	psrlq	$8,%mm6
1999	pxor	272(%esp,%ebp,8),%mm7
2000	andb	$15,%al
2001	psllq	$56,%mm3
2002	pxor	%mm0,%mm6
2003	shrl	$4,%edi
2004	pinsrw	$2,(%esi,%ecx,2),%mm2
2005	pxor	16(%esp,%eax,8),%mm7
2006	roll	$8,%edx
2007	pxor	144(%esp,%eax,8),%mm6
2008	pxor	%mm3,%mm7
2009	pxor	400(%esp,%ebp,8),%mm6
2010	xorb	(%esp,%ebp,1),%bl
2011	movb	%dl,%al
2012	movd	%mm7,%ecx
2013	movzbl	%bl,%ebx
2014	psrlq	$8,%mm7
2015	movq	%mm6,%mm3
2016	movl	%eax,%ebp
2017	psrlq	$8,%mm6
2018	pxor	272(%esp,%edi,8),%mm7
2019	andb	$15,%al
2020	psllq	$56,%mm3
2021	pxor	%mm2,%mm6
2022	shrl	$4,%ebp
2023	pinsrw	$2,(%esi,%ebx,2),%mm1
2024	pxor	16(%esp,%eax,8),%mm7
2025	roll	$8,%edx
2026	pxor	144(%esp,%eax,8),%mm6
2027	pxor	%mm3,%mm7
2028	pxor	400(%esp,%edi,8),%mm6
2029	xorb	(%esp,%edi,1),%cl
2030	movb	%dl,%al
2031	movl	532(%esp),%edx
2032	movd	%mm7,%ebx
2033	movzbl	%cl,%ecx
2034	psrlq	$8,%mm7
2035	movq	%mm6,%mm3
2036	movl	%eax,%edi
2037	psrlq	$8,%mm6
2038	pxor	272(%esp,%ebp,8),%mm7
2039	andb	$15,%al
2040	psllq	$56,%mm3
2041	pxor	%mm1,%mm6
2042	shrl	$4,%edi
2043	pinsrw	$2,(%esi,%ecx,2),%mm0
2044	pxor	16(%esp,%eax,8),%mm7
2045	roll	$8,%edx
2046	pxor	144(%esp,%eax,8),%mm6
2047	pxor	%mm3,%mm7
2048	pxor	400(%esp,%ebp,8),%mm6
2049	xorb	(%esp,%ebp,1),%bl
2050	movb	%dl,%al
2051	movd	%mm7,%ecx
2052	movzbl	%bl,%ebx
2053	psrlq	$8,%mm7
2054	movq	%mm6,%mm3
2055	movl	%eax,%ebp
2056	psrlq	$8,%mm6
2057	pxor	272(%esp,%edi,8),%mm7
2058	andb	$15,%al
2059	psllq	$56,%mm3
2060	pxor	%mm0,%mm6
2061	shrl	$4,%ebp
2062	pinsrw	$2,(%esi,%ebx,2),%mm2
2063	pxor	16(%esp,%eax,8),%mm7
2064	roll	$8,%edx
2065	pxor	144(%esp,%eax,8),%mm6
2066	pxor	%mm3,%mm7
2067	pxor	400(%esp,%edi,8),%mm6
2068	xorb	(%esp,%edi,1),%cl
2069	movb	%dl,%al
2070	movd	%mm7,%ebx
2071	movzbl	%cl,%ecx
2072	psrlq	$8,%mm7
2073	movq	%mm6,%mm3
2074	movl	%eax,%edi
2075	psrlq	$8,%mm6
2076	pxor	272(%esp,%ebp,8),%mm7
2077	andb	$15,%al
2078	psllq	$56,%mm3
2079	pxor	%mm2,%mm6
2080	shrl	$4,%edi
2081	pinsrw	$2,(%esi,%ecx,2),%mm1
2082	pxor	16(%esp,%eax,8),%mm7
2083	roll	$8,%edx
2084	pxor	144(%esp,%eax,8),%mm6
2085	pxor	%mm3,%mm7
2086	pxor	400(%esp,%ebp,8),%mm6
2087	xorb	(%esp,%ebp,1),%bl
2088	movb	%dl,%al
2089	movd	%mm7,%ecx
2090	movzbl	%bl,%ebx
2091	psrlq	$8,%mm7
2092	movq	%mm6,%mm3
2093	movl	%eax,%ebp
2094	psrlq	$8,%mm6
2095	pxor	272(%esp,%edi,8),%mm7
2096	andb	$15,%al
2097	psllq	$56,%mm3
2098	pxor	%mm1,%mm6
2099	shrl	$4,%ebp
2100	pinsrw	$2,(%esi,%ebx,2),%mm0
2101	pxor	16(%esp,%eax,8),%mm7
2102	roll	$8,%edx
2103	pxor	144(%esp,%eax,8),%mm6
2104	pxor	%mm3,%mm7
2105	pxor	400(%esp,%edi,8),%mm6
2106	xorb	(%esp,%edi,1),%cl
2107	movb	%dl,%al
2108	movl	528(%esp),%edx
2109	movd	%mm7,%ebx
2110	movzbl	%cl,%ecx
2111	psrlq	$8,%mm7
2112	movq	%mm6,%mm3
2113	movl	%eax,%edi
2114	psrlq	$8,%mm6
2115	pxor	272(%esp,%ebp,8),%mm7
2116	andb	$15,%al
2117	psllq	$56,%mm3
2118	pxor	%mm0,%mm6
2119	shrl	$4,%edi
2120	pinsrw	$2,(%esi,%ecx,2),%mm2
2121	pxor	16(%esp,%eax,8),%mm7
2122	roll	$8,%edx
2123	pxor	144(%esp,%eax,8),%mm6
2124	pxor	%mm3,%mm7
2125	pxor	400(%esp,%ebp,8),%mm6
2126	xorb	(%esp,%ebp,1),%bl
2127	movb	%dl,%al
2128	movd	%mm7,%ecx
2129	movzbl	%bl,%ebx
2130	psrlq	$8,%mm7
2131	movq	%mm6,%mm3
2132	movl	%eax,%ebp
2133	psrlq	$8,%mm6
2134	pxor	272(%esp,%edi,8),%mm7
2135	andb	$15,%al
2136	psllq	$56,%mm3
2137	pxor	%mm2,%mm6
2138	shrl	$4,%ebp
2139	pinsrw	$2,(%esi,%ebx,2),%mm1
2140	pxor	16(%esp,%eax,8),%mm7
2141	roll	$8,%edx
2142	pxor	144(%esp,%eax,8),%mm6
2143	pxor	%mm3,%mm7
2144	pxor	400(%esp,%edi,8),%mm6
2145	xorb	(%esp,%edi,1),%cl
2146	movb	%dl,%al
2147	movd	%mm7,%ebx
2148	movzbl	%cl,%ecx
2149	psrlq	$8,%mm7
2150	movq	%mm6,%mm3
2151	movl	%eax,%edi
2152	psrlq	$8,%mm6
2153	pxor	272(%esp,%ebp,8),%mm7
2154	andb	$15,%al
2155	psllq	$56,%mm3
2156	pxor	%mm1,%mm6
2157	shrl	$4,%edi
2158	pinsrw	$2,(%esi,%ecx,2),%mm0
2159	pxor	16(%esp,%eax,8),%mm7
2160	roll	$8,%edx
2161	pxor	144(%esp,%eax,8),%mm6
2162	pxor	%mm3,%mm7
2163	pxor	400(%esp,%ebp,8),%mm6
2164	xorb	(%esp,%ebp,1),%bl
2165	movb	%dl,%al
2166	movd	%mm7,%ecx
2167	movzbl	%bl,%ebx
2168	psrlq	$8,%mm7
2169	movq	%mm6,%mm3
2170	movl	%eax,%ebp
2171	psrlq	$8,%mm6
2172	pxor	272(%esp,%edi,8),%mm7
2173	andb	$15,%al
2174	psllq	$56,%mm3
2175	pxor	%mm0,%mm6
2176	shrl	$4,%ebp
2177	pinsrw	$2,(%esi,%ebx,2),%mm2
2178	pxor	16(%esp,%eax,8),%mm7
2179	roll	$8,%edx
2180	pxor	144(%esp,%eax,8),%mm6
2181	pxor	%mm3,%mm7
2182	pxor	400(%esp,%edi,8),%mm6
2183	xorb	(%esp,%edi,1),%cl
2184	movb	%dl,%al
2185	movl	524(%esp),%edx
2186	movd	%mm7,%ebx
2187	movzbl	%cl,%ecx
2188	psrlq	$8,%mm7
2189	movq	%mm6,%mm3
2190	movl	%eax,%edi
2191	psrlq	$8,%mm6
2192	pxor	272(%esp,%ebp,8),%mm7
2193	andb	$15,%al
2194	psllq	$56,%mm3
2195	pxor	%mm2,%mm6
2196	shrl	$4,%edi
2197	pinsrw	$2,(%esi,%ecx,2),%mm1
2198	pxor	16(%esp,%eax,8),%mm7
2199	pxor	144(%esp,%eax,8),%mm6
2200	xorb	(%esp,%ebp,1),%bl
2201	pxor	%mm3,%mm7
2202	pxor	400(%esp,%ebp,8),%mm6
2203	movzbl	%bl,%ebx
2204	pxor	%mm2,%mm2
2205	psllq	$4,%mm1
2206	movd	%mm7,%ecx
2207	psrlq	$4,%mm7
2208	movq	%mm6,%mm3
2209	psrlq	$4,%mm6
2210	shll	$4,%ecx
2211	pxor	16(%esp,%edi,8),%mm7
2212	psllq	$60,%mm3
2213	movzbl	%cl,%ecx
2214	pxor	%mm3,%mm7
2215	pxor	144(%esp,%edi,8),%mm6
2216	pinsrw	$2,(%esi,%ebx,2),%mm0
2217	pxor	%mm1,%mm6
2218	movd	%mm7,%edx
2219	pinsrw	$3,(%esi,%ecx,2),%mm2
2220	psllq	$12,%mm0
2221	pxor	%mm0,%mm6
2222	psrlq	$32,%mm7
2223	pxor	%mm2,%mm6
2224	movl	548(%esp),%ecx
2225	movd	%mm7,%ebx
2226	movq	%mm6,%mm3
2227	psllw	$8,%mm6
2228	psrlw	$8,%mm3
2229	por	%mm3,%mm6
2230	bswap	%edx
2231	pshufw	$27,%mm6,%mm6
2232	bswap	%ebx
2233	cmpl	552(%esp),%ecx
2234	jne	.L009outer
2235	movl	544(%esp),%eax
2236	movl	%edx,12(%eax)
2237	movl	%ebx,8(%eax)
2238	movq	%mm6,(%eax)
2239	movl	556(%esp),%esp
2240	emms
2241	popl	%edi
2242	popl	%esi
2243	popl	%ebx
2244	popl	%ebp
2245	ret
2246.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
2247.globl	gcm_init_clmul
2248.type	gcm_init_clmul,@function
2249.align	16
2250gcm_init_clmul:
2251.L_gcm_init_clmul_begin:
2252	#ifdef __CET__
2253
2254.byte	243,15,30,251
2255	#endif
2256
2257	movl	4(%esp),%edx
2258	movl	8(%esp),%eax
2259	call	.L010pic
2260.L010pic:
2261	popl	%ecx
2262	leal	.Lbswap-.L010pic(%ecx),%ecx
2263	movdqu	(%eax),%xmm2
2264	pshufd	$78,%xmm2,%xmm2
2265	pshufd	$255,%xmm2,%xmm4
2266	movdqa	%xmm2,%xmm3
2267	psllq	$1,%xmm2
2268	pxor	%xmm5,%xmm5
2269	psrlq	$63,%xmm3
2270	pcmpgtd	%xmm4,%xmm5
2271	pslldq	$8,%xmm3
2272	por	%xmm3,%xmm2
2273	pand	16(%ecx),%xmm5
2274	pxor	%xmm5,%xmm2
2275	movdqa	%xmm2,%xmm0
2276	movdqa	%xmm0,%xmm1
2277	pshufd	$78,%xmm0,%xmm3
2278	pshufd	$78,%xmm2,%xmm4
2279	pxor	%xmm0,%xmm3
2280	pxor	%xmm2,%xmm4
2281.byte	102,15,58,68,194,0
2282.byte	102,15,58,68,202,17
2283.byte	102,15,58,68,220,0
2284	xorps	%xmm0,%xmm3
2285	xorps	%xmm1,%xmm3
2286	movdqa	%xmm3,%xmm4
2287	psrldq	$8,%xmm3
2288	pslldq	$8,%xmm4
2289	pxor	%xmm3,%xmm1
2290	pxor	%xmm4,%xmm0
2291	movdqa	%xmm0,%xmm4
2292	movdqa	%xmm0,%xmm3
2293	psllq	$5,%xmm0
2294	pxor	%xmm0,%xmm3
2295	psllq	$1,%xmm0
2296	pxor	%xmm3,%xmm0
2297	psllq	$57,%xmm0
2298	movdqa	%xmm0,%xmm3
2299	pslldq	$8,%xmm0
2300	psrldq	$8,%xmm3
2301	pxor	%xmm4,%xmm0
2302	pxor	%xmm3,%xmm1
2303	movdqa	%xmm0,%xmm4
2304	psrlq	$1,%xmm0
2305	pxor	%xmm4,%xmm1
2306	pxor	%xmm0,%xmm4
2307	psrlq	$5,%xmm0
2308	pxor	%xmm4,%xmm0
2309	psrlq	$1,%xmm0
2310	pxor	%xmm1,%xmm0
2311	pshufd	$78,%xmm2,%xmm3
2312	pshufd	$78,%xmm0,%xmm4
2313	pxor	%xmm2,%xmm3
2314	movdqu	%xmm2,(%edx)
2315	pxor	%xmm0,%xmm4
2316	movdqu	%xmm0,16(%edx)
2317.byte	102,15,58,15,227,8
2318	movdqu	%xmm4,32(%edx)
2319	ret
2320.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
2321.globl	gcm_gmult_clmul
2322.type	gcm_gmult_clmul,@function
2323.align	16
2324gcm_gmult_clmul:
2325.L_gcm_gmult_clmul_begin:
2326	#ifdef __CET__
2327
2328.byte	243,15,30,251
2329	#endif
2330
2331	movl	4(%esp),%eax
2332	movl	8(%esp),%edx
2333	call	.L011pic
2334.L011pic:
2335	popl	%ecx
2336	leal	.Lbswap-.L011pic(%ecx),%ecx
2337	movdqu	(%eax),%xmm0
2338	movdqa	(%ecx),%xmm5
2339	movups	(%edx),%xmm2
2340.byte	102,15,56,0,197
2341	movups	32(%edx),%xmm4
2342	movdqa	%xmm0,%xmm1
2343	pshufd	$78,%xmm0,%xmm3
2344	pxor	%xmm0,%xmm3
2345.byte	102,15,58,68,194,0
2346.byte	102,15,58,68,202,17
2347.byte	102,15,58,68,220,0
2348	xorps	%xmm0,%xmm3
2349	xorps	%xmm1,%xmm3
2350	movdqa	%xmm3,%xmm4
2351	psrldq	$8,%xmm3
2352	pslldq	$8,%xmm4
2353	pxor	%xmm3,%xmm1
2354	pxor	%xmm4,%xmm0
2355	movdqa	%xmm0,%xmm4
2356	movdqa	%xmm0,%xmm3
2357	psllq	$5,%xmm0
2358	pxor	%xmm0,%xmm3
2359	psllq	$1,%xmm0
2360	pxor	%xmm3,%xmm0
2361	psllq	$57,%xmm0
2362	movdqa	%xmm0,%xmm3
2363	pslldq	$8,%xmm0
2364	psrldq	$8,%xmm3
2365	pxor	%xmm4,%xmm0
2366	pxor	%xmm3,%xmm1
2367	movdqa	%xmm0,%xmm4
2368	psrlq	$1,%xmm0
2369	pxor	%xmm4,%xmm1
2370	pxor	%xmm0,%xmm4
2371	psrlq	$5,%xmm0
2372	pxor	%xmm4,%xmm0
2373	psrlq	$1,%xmm0
2374	pxor	%xmm1,%xmm0
2375.byte	102,15,56,0,197
2376	movdqu	%xmm0,(%eax)
2377	ret
2378.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
2379.globl	gcm_ghash_clmul
2380.type	gcm_ghash_clmul,@function
2381.align	16
2382gcm_ghash_clmul:
2383.L_gcm_ghash_clmul_begin:
2384	#ifdef __CET__
2385
2386.byte	243,15,30,251
2387	#endif
2388
2389	pushl	%ebp
2390	pushl	%ebx
2391	pushl	%esi
2392	pushl	%edi
2393	movl	20(%esp),%eax
2394	movl	24(%esp),%edx
2395	movl	28(%esp),%esi
2396	movl	32(%esp),%ebx
2397	call	.L012pic
2398.L012pic:
2399	popl	%ecx
2400	leal	.Lbswap-.L012pic(%ecx),%ecx
2401	movdqu	(%eax),%xmm0
2402	movdqa	(%ecx),%xmm5
2403	movdqu	(%edx),%xmm2
2404.byte	102,15,56,0,197
2405	subl	$16,%ebx
2406	jz	.L013odd_tail
2407	movdqu	(%esi),%xmm3
2408	movdqu	16(%esi),%xmm6
2409.byte	102,15,56,0,221
2410.byte	102,15,56,0,245
2411	movdqu	32(%edx),%xmm5
2412	pxor	%xmm3,%xmm0
2413	pshufd	$78,%xmm6,%xmm3
2414	movdqa	%xmm6,%xmm7
2415	pxor	%xmm6,%xmm3
2416	leal	32(%esi),%esi
2417.byte	102,15,58,68,242,0
2418.byte	102,15,58,68,250,17
2419.byte	102,15,58,68,221,0
2420	movups	16(%edx),%xmm2
2421	nop
2422	subl	$32,%ebx
2423	jbe	.L014even_tail
2424	jmp	.L015mod_loop
2425.align	32
2426.L015mod_loop:
2427	pshufd	$78,%xmm0,%xmm4
2428	movdqa	%xmm0,%xmm1
2429	pxor	%xmm0,%xmm4
2430	nop
2431.byte	102,15,58,68,194,0
2432.byte	102,15,58,68,202,17
2433.byte	102,15,58,68,229,16
2434	movups	(%edx),%xmm2
2435	xorps	%xmm6,%xmm0
2436	movdqa	(%ecx),%xmm5
2437	xorps	%xmm7,%xmm1
2438	movdqu	(%esi),%xmm7
2439	pxor	%xmm0,%xmm3
2440	movdqu	16(%esi),%xmm6
2441	pxor	%xmm1,%xmm3
2442.byte	102,15,56,0,253
2443	pxor	%xmm3,%xmm4
2444	movdqa	%xmm4,%xmm3
2445	psrldq	$8,%xmm4
2446	pslldq	$8,%xmm3
2447	pxor	%xmm4,%xmm1
2448	pxor	%xmm3,%xmm0
2449.byte	102,15,56,0,245
2450	pxor	%xmm7,%xmm1
2451	movdqa	%xmm6,%xmm7
2452	movdqa	%xmm0,%xmm4
2453	movdqa	%xmm0,%xmm3
2454	psllq	$5,%xmm0
2455	pxor	%xmm0,%xmm3
2456	psllq	$1,%xmm0
2457	pxor	%xmm3,%xmm0
2458.byte	102,15,58,68,242,0
2459	movups	32(%edx),%xmm5
2460	psllq	$57,%xmm0
2461	movdqa	%xmm0,%xmm3
2462	pslldq	$8,%xmm0
2463	psrldq	$8,%xmm3
2464	pxor	%xmm4,%xmm0
2465	pxor	%xmm3,%xmm1
2466	pshufd	$78,%xmm7,%xmm3
2467	movdqa	%xmm0,%xmm4
2468	psrlq	$1,%xmm0
2469	pxor	%xmm7,%xmm3
2470	pxor	%xmm4,%xmm1
2471.byte	102,15,58,68,250,17
2472	movups	16(%edx),%xmm2
2473	pxor	%xmm0,%xmm4
2474	psrlq	$5,%xmm0
2475	pxor	%xmm4,%xmm0
2476	psrlq	$1,%xmm0
2477	pxor	%xmm1,%xmm0
2478.byte	102,15,58,68,221,0
2479	leal	32(%esi),%esi
2480	subl	$32,%ebx
2481	ja	.L015mod_loop
2482.L014even_tail:
2483	pshufd	$78,%xmm0,%xmm4
2484	movdqa	%xmm0,%xmm1
2485	pxor	%xmm0,%xmm4
2486.byte	102,15,58,68,194,0
2487.byte	102,15,58,68,202,17
2488.byte	102,15,58,68,229,16
2489	movdqa	(%ecx),%xmm5
2490	xorps	%xmm6,%xmm0
2491	xorps	%xmm7,%xmm1
2492	pxor	%xmm0,%xmm3
2493	pxor	%xmm1,%xmm3
2494	pxor	%xmm3,%xmm4
2495	movdqa	%xmm4,%xmm3
2496	psrldq	$8,%xmm4
2497	pslldq	$8,%xmm3
2498	pxor	%xmm4,%xmm1
2499	pxor	%xmm3,%xmm0
2500	movdqa	%xmm0,%xmm4
2501	movdqa	%xmm0,%xmm3
2502	psllq	$5,%xmm0
2503	pxor	%xmm0,%xmm3
2504	psllq	$1,%xmm0
2505	pxor	%xmm3,%xmm0
2506	psllq	$57,%xmm0
2507	movdqa	%xmm0,%xmm3
2508	pslldq	$8,%xmm0
2509	psrldq	$8,%xmm3
2510	pxor	%xmm4,%xmm0
2511	pxor	%xmm3,%xmm1
2512	movdqa	%xmm0,%xmm4
2513	psrlq	$1,%xmm0
2514	pxor	%xmm4,%xmm1
2515	pxor	%xmm0,%xmm4
2516	psrlq	$5,%xmm0
2517	pxor	%xmm4,%xmm0
2518	psrlq	$1,%xmm0
2519	pxor	%xmm1,%xmm0
2520	testl	%ebx,%ebx
2521	jnz	.L016done
2522	movups	(%edx),%xmm2
2523.L013odd_tail:
2524	movdqu	(%esi),%xmm3
2525.byte	102,15,56,0,221
2526	pxor	%xmm3,%xmm0
2527	movdqa	%xmm0,%xmm1
2528	pshufd	$78,%xmm0,%xmm3
2529	pshufd	$78,%xmm2,%xmm4
2530	pxor	%xmm0,%xmm3
2531	pxor	%xmm2,%xmm4
2532.byte	102,15,58,68,194,0
2533.byte	102,15,58,68,202,17
2534.byte	102,15,58,68,220,0
2535	xorps	%xmm0,%xmm3
2536	xorps	%xmm1,%xmm3
2537	movdqa	%xmm3,%xmm4
2538	psrldq	$8,%xmm3
2539	pslldq	$8,%xmm4
2540	pxor	%xmm3,%xmm1
2541	pxor	%xmm4,%xmm0
2542	movdqa	%xmm0,%xmm4
2543	movdqa	%xmm0,%xmm3
2544	psllq	$5,%xmm0
2545	pxor	%xmm0,%xmm3
2546	psllq	$1,%xmm0
2547	pxor	%xmm3,%xmm0
2548	psllq	$57,%xmm0
2549	movdqa	%xmm0,%xmm3
2550	pslldq	$8,%xmm0
2551	psrldq	$8,%xmm3
2552	pxor	%xmm4,%xmm0
2553	pxor	%xmm3,%xmm1
2554	movdqa	%xmm0,%xmm4
2555	psrlq	$1,%xmm0
2556	pxor	%xmm4,%xmm1
2557	pxor	%xmm0,%xmm4
2558	psrlq	$5,%xmm0
2559	pxor	%xmm4,%xmm0
2560	psrlq	$1,%xmm0
2561	pxor	%xmm1,%xmm0
2562.L016done:
2563.byte	102,15,56,0,197
2564	movdqu	%xmm0,(%eax)
2565	popl	%edi
2566	popl	%esi
2567	popl	%ebx
2568	popl	%ebp
2569	ret
2570.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
2571.align	64
2572.Lbswap:
2573.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2574.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
2575.align	64
2576.Lrem_8bit:
2577.value	0,450,900,582,1800,1738,1164,1358
2578.value	3600,4050,3476,3158,2328,2266,2716,2910
2579.value	7200,7650,8100,7782,6952,6890,6316,6510
2580.value	4656,5106,4532,4214,5432,5370,5820,6014
2581.value	14400,14722,15300,14854,16200,16010,15564,15630
2582.value	13904,14226,13780,13334,12632,12442,13020,13086
2583.value	9312,9634,10212,9766,9064,8874,8428,8494
2584.value	10864,11186,10740,10294,11640,11450,12028,12094
2585.value	28800,28994,29444,29382,30600,30282,29708,30158
2586.value	32400,32594,32020,31958,31128,30810,31260,31710
2587.value	27808,28002,28452,28390,27560,27242,26668,27118
2588.value	25264,25458,24884,24822,26040,25722,26172,26622
2589.value	18624,18690,19268,19078,20424,19978,19532,19854
2590.value	18128,18194,17748,17558,16856,16410,16988,17310
2591.value	21728,21794,22372,22182,21480,21034,20588,20910
2592.value	23280,23346,22900,22710,24056,23610,24188,24510
2593.value	57600,57538,57988,58182,58888,59338,58764,58446
2594.value	61200,61138,60564,60758,59416,59866,60316,59998
2595.value	64800,64738,65188,65382,64040,64490,63916,63598
2596.value	62256,62194,61620,61814,62520,62970,63420,63102
2597.value	55616,55426,56004,56070,56904,57226,56780,56334
2598.value	55120,54930,54484,54550,53336,53658,54236,53790
2599.value	50528,50338,50916,50982,49768,50090,49644,49198
2600.value	52080,51890,51444,51510,52344,52666,53244,52798
2601.value	37248,36930,37380,37830,38536,38730,38156,38094
2602.value	40848,40530,39956,40406,39064,39258,39708,39646
2603.value	36256,35938,36388,36838,35496,35690,35116,35054
2604.value	33712,33394,32820,33270,33976,34170,34620,34558
2605.value	43456,43010,43588,43910,44744,44810,44364,44174
2606.value	42960,42514,42068,42390,41176,41242,41820,41630
2607.value	46560,46114,46692,47014,45800,45866,45420,45230
2608.value	48112,47666,47220,47542,48376,48442,49020,48830
2609.align	64
2610.Lrem_4bit:
2611.long	0,0,0,471859200,0,943718400,0,610271232
2612.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
2613.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
2614.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
2615.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
2616.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
2617.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
2618.byte	0
2619
2620	.section ".note.gnu.property", "a"
2621	.p2align 2
2622	.long 1f - 0f
2623	.long 4f - 1f
2624	.long 5
26250:
2626	.asciz "GNU"
26271:
2628	.p2align 2
2629	.long 0xc0000002
2630	.long 3f - 2f
26312:
2632	.long 3
26333:
2634	.p2align 2
26354:
2636#endif
2637