xref: /freebsd/sys/crypto/openssl/i386/ghash-x86.S (revision bc5304a006238115291e7568583632889dffbab9)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from ghash-x86.pl. */
3#ifdef PIC
4.text
5.globl	gcm_gmult_4bit_x86
6.type	gcm_gmult_4bit_x86,@function
7.align	16
8gcm_gmult_4bit_x86:
9.L_gcm_gmult_4bit_x86_begin:
10	pushl	%ebp
11	pushl	%ebx
12	pushl	%esi
13	pushl	%edi
14	subl	$84,%esp
15	movl	104(%esp),%edi
16	movl	108(%esp),%esi
17	movl	(%edi),%ebp
18	movl	4(%edi),%edx
19	movl	8(%edi),%ecx
20	movl	12(%edi),%ebx
21	movl	$0,16(%esp)
22	movl	$471859200,20(%esp)
23	movl	$943718400,24(%esp)
24	movl	$610271232,28(%esp)
25	movl	$1887436800,32(%esp)
26	movl	$1822425088,36(%esp)
27	movl	$1220542464,40(%esp)
28	movl	$1423966208,44(%esp)
29	movl	$3774873600,48(%esp)
30	movl	$4246732800,52(%esp)
31	movl	$3644850176,56(%esp)
32	movl	$3311403008,60(%esp)
33	movl	$2441084928,64(%esp)
34	movl	$2376073216,68(%esp)
35	movl	$2847932416,72(%esp)
36	movl	$3051356160,76(%esp)
37	movl	%ebp,(%esp)
38	movl	%edx,4(%esp)
39	movl	%ecx,8(%esp)
40	movl	%ebx,12(%esp)
41	shrl	$20,%ebx
42	andl	$240,%ebx
43	movl	4(%esi,%ebx,1),%ebp
44	movl	(%esi,%ebx,1),%edx
45	movl	12(%esi,%ebx,1),%ecx
46	movl	8(%esi,%ebx,1),%ebx
47	xorl	%eax,%eax
48	movl	$15,%edi
49	jmp	.L000x86_loop
50.align	16
51.L000x86_loop:
52	movb	%bl,%al
53	shrdl	$4,%ecx,%ebx
54	andb	$15,%al
55	shrdl	$4,%edx,%ecx
56	shrdl	$4,%ebp,%edx
57	shrl	$4,%ebp
58	xorl	16(%esp,%eax,4),%ebp
59	movb	(%esp,%edi,1),%al
60	andb	$240,%al
61	xorl	8(%esi,%eax,1),%ebx
62	xorl	12(%esi,%eax,1),%ecx
63	xorl	(%esi,%eax,1),%edx
64	xorl	4(%esi,%eax,1),%ebp
65	decl	%edi
66	js	.L001x86_break
67	movb	%bl,%al
68	shrdl	$4,%ecx,%ebx
69	andb	$15,%al
70	shrdl	$4,%edx,%ecx
71	shrdl	$4,%ebp,%edx
72	shrl	$4,%ebp
73	xorl	16(%esp,%eax,4),%ebp
74	movb	(%esp,%edi,1),%al
75	shlb	$4,%al
76	xorl	8(%esi,%eax,1),%ebx
77	xorl	12(%esi,%eax,1),%ecx
78	xorl	(%esi,%eax,1),%edx
79	xorl	4(%esi,%eax,1),%ebp
80	jmp	.L000x86_loop
81.align	16
82.L001x86_break:
83	bswap	%ebx
84	bswap	%ecx
85	bswap	%edx
86	bswap	%ebp
87	movl	104(%esp),%edi
88	movl	%ebx,12(%edi)
89	movl	%ecx,8(%edi)
90	movl	%edx,4(%edi)
91	movl	%ebp,(%edi)
92	addl	$84,%esp
93	popl	%edi
94	popl	%esi
95	popl	%ebx
96	popl	%ebp
97	ret
98.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
99.globl	gcm_ghash_4bit_x86
100.type	gcm_ghash_4bit_x86,@function
101.align	16
102gcm_ghash_4bit_x86:
103.L_gcm_ghash_4bit_x86_begin:
104	pushl	%ebp
105	pushl	%ebx
106	pushl	%esi
107	pushl	%edi
108	subl	$84,%esp
109	movl	104(%esp),%ebx
110	movl	108(%esp),%esi
111	movl	112(%esp),%edi
112	movl	116(%esp),%ecx
113	addl	%edi,%ecx
114	movl	%ecx,116(%esp)
115	movl	(%ebx),%ebp
116	movl	4(%ebx),%edx
117	movl	8(%ebx),%ecx
118	movl	12(%ebx),%ebx
119	movl	$0,16(%esp)
120	movl	$471859200,20(%esp)
121	movl	$943718400,24(%esp)
122	movl	$610271232,28(%esp)
123	movl	$1887436800,32(%esp)
124	movl	$1822425088,36(%esp)
125	movl	$1220542464,40(%esp)
126	movl	$1423966208,44(%esp)
127	movl	$3774873600,48(%esp)
128	movl	$4246732800,52(%esp)
129	movl	$3644850176,56(%esp)
130	movl	$3311403008,60(%esp)
131	movl	$2441084928,64(%esp)
132	movl	$2376073216,68(%esp)
133	movl	$2847932416,72(%esp)
134	movl	$3051356160,76(%esp)
135.align	16
136.L002x86_outer_loop:
137	xorl	12(%edi),%ebx
138	xorl	8(%edi),%ecx
139	xorl	4(%edi),%edx
140	xorl	(%edi),%ebp
141	movl	%ebx,12(%esp)
142	movl	%ecx,8(%esp)
143	movl	%edx,4(%esp)
144	movl	%ebp,(%esp)
145	shrl	$20,%ebx
146	andl	$240,%ebx
147	movl	4(%esi,%ebx,1),%ebp
148	movl	(%esi,%ebx,1),%edx
149	movl	12(%esi,%ebx,1),%ecx
150	movl	8(%esi,%ebx,1),%ebx
151	xorl	%eax,%eax
152	movl	$15,%edi
153	jmp	.L003x86_loop
154.align	16
155.L003x86_loop:
156	movb	%bl,%al
157	shrdl	$4,%ecx,%ebx
158	andb	$15,%al
159	shrdl	$4,%edx,%ecx
160	shrdl	$4,%ebp,%edx
161	shrl	$4,%ebp
162	xorl	16(%esp,%eax,4),%ebp
163	movb	(%esp,%edi,1),%al
164	andb	$240,%al
165	xorl	8(%esi,%eax,1),%ebx
166	xorl	12(%esi,%eax,1),%ecx
167	xorl	(%esi,%eax,1),%edx
168	xorl	4(%esi,%eax,1),%ebp
169	decl	%edi
170	js	.L004x86_break
171	movb	%bl,%al
172	shrdl	$4,%ecx,%ebx
173	andb	$15,%al
174	shrdl	$4,%edx,%ecx
175	shrdl	$4,%ebp,%edx
176	shrl	$4,%ebp
177	xorl	16(%esp,%eax,4),%ebp
178	movb	(%esp,%edi,1),%al
179	shlb	$4,%al
180	xorl	8(%esi,%eax,1),%ebx
181	xorl	12(%esi,%eax,1),%ecx
182	xorl	(%esi,%eax,1),%edx
183	xorl	4(%esi,%eax,1),%ebp
184	jmp	.L003x86_loop
185.align	16
186.L004x86_break:
187	bswap	%ebx
188	bswap	%ecx
189	bswap	%edx
190	bswap	%ebp
191	movl	112(%esp),%edi
192	leal	16(%edi),%edi
193	cmpl	116(%esp),%edi
194	movl	%edi,112(%esp)
195	jb	.L002x86_outer_loop
196	movl	104(%esp),%edi
197	movl	%ebx,12(%edi)
198	movl	%ecx,8(%edi)
199	movl	%edx,4(%edi)
200	movl	%ebp,(%edi)
201	addl	$84,%esp
202	popl	%edi
203	popl	%esi
204	popl	%ebx
205	popl	%ebp
206	ret
207.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
208.globl	gcm_gmult_4bit_mmx
209.type	gcm_gmult_4bit_mmx,@function
210.align	16
211gcm_gmult_4bit_mmx:
212.L_gcm_gmult_4bit_mmx_begin:
213	pushl	%ebp
214	pushl	%ebx
215	pushl	%esi
216	pushl	%edi
217	movl	20(%esp),%edi
218	movl	24(%esp),%esi
219	call	.L005pic_point
220.L005pic_point:
221	popl	%eax
222	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
223	movzbl	15(%edi),%ebx
224	xorl	%ecx,%ecx
225	movl	%ebx,%edx
226	movb	%dl,%cl
227	movl	$14,%ebp
228	shlb	$4,%cl
229	andl	$240,%edx
230	movq	8(%esi,%ecx,1),%mm0
231	movq	(%esi,%ecx,1),%mm1
232	movd	%mm0,%ebx
233	jmp	.L006mmx_loop
234.align	16
235.L006mmx_loop:
236	psrlq	$4,%mm0
237	andl	$15,%ebx
238	movq	%mm1,%mm2
239	psrlq	$4,%mm1
240	pxor	8(%esi,%edx,1),%mm0
241	movb	(%edi,%ebp,1),%cl
242	psllq	$60,%mm2
243	pxor	(%eax,%ebx,8),%mm1
244	decl	%ebp
245	movd	%mm0,%ebx
246	pxor	(%esi,%edx,1),%mm1
247	movl	%ecx,%edx
248	pxor	%mm2,%mm0
249	js	.L007mmx_break
250	shlb	$4,%cl
251	andl	$15,%ebx
252	psrlq	$4,%mm0
253	andl	$240,%edx
254	movq	%mm1,%mm2
255	psrlq	$4,%mm1
256	pxor	8(%esi,%ecx,1),%mm0
257	psllq	$60,%mm2
258	pxor	(%eax,%ebx,8),%mm1
259	movd	%mm0,%ebx
260	pxor	(%esi,%ecx,1),%mm1
261	pxor	%mm2,%mm0
262	jmp	.L006mmx_loop
263.align	16
264.L007mmx_break:
265	shlb	$4,%cl
266	andl	$15,%ebx
267	psrlq	$4,%mm0
268	andl	$240,%edx
269	movq	%mm1,%mm2
270	psrlq	$4,%mm1
271	pxor	8(%esi,%ecx,1),%mm0
272	psllq	$60,%mm2
273	pxor	(%eax,%ebx,8),%mm1
274	movd	%mm0,%ebx
275	pxor	(%esi,%ecx,1),%mm1
276	pxor	%mm2,%mm0
277	psrlq	$4,%mm0
278	andl	$15,%ebx
279	movq	%mm1,%mm2
280	psrlq	$4,%mm1
281	pxor	8(%esi,%edx,1),%mm0
282	psllq	$60,%mm2
283	pxor	(%eax,%ebx,8),%mm1
284	movd	%mm0,%ebx
285	pxor	(%esi,%edx,1),%mm1
286	pxor	%mm2,%mm0
287	psrlq	$32,%mm0
288	movd	%mm1,%edx
289	psrlq	$32,%mm1
290	movd	%mm0,%ecx
291	movd	%mm1,%ebp
292	bswap	%ebx
293	bswap	%edx
294	bswap	%ecx
295	bswap	%ebp
296	emms
297	movl	%ebx,12(%edi)
298	movl	%edx,4(%edi)
299	movl	%ecx,8(%edi)
300	movl	%ebp,(%edi)
301	popl	%edi
302	popl	%esi
303	popl	%ebx
304	popl	%ebp
305	ret
306.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
307.globl	gcm_ghash_4bit_mmx
308.type	gcm_ghash_4bit_mmx,@function
309.align	16
310gcm_ghash_4bit_mmx:
311.L_gcm_ghash_4bit_mmx_begin:
312	pushl	%ebp
313	pushl	%ebx
314	pushl	%esi
315	pushl	%edi
316	movl	20(%esp),%eax
317	movl	24(%esp),%ebx
318	movl	28(%esp),%ecx
319	movl	32(%esp),%edx
320	movl	%esp,%ebp
321	call	.L008pic_point
322.L008pic_point:
323	popl	%esi
324	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
325	subl	$544,%esp
326	andl	$-64,%esp
327	subl	$16,%esp
328	addl	%ecx,%edx
329	movl	%eax,544(%esp)
330	movl	%edx,552(%esp)
331	movl	%ebp,556(%esp)
332	addl	$128,%ebx
333	leal	144(%esp),%edi
334	leal	400(%esp),%ebp
335	movl	-120(%ebx),%edx
336	movq	-120(%ebx),%mm0
337	movq	-128(%ebx),%mm3
338	shll	$4,%edx
339	movb	%dl,(%esp)
340	movl	-104(%ebx),%edx
341	movq	-104(%ebx),%mm2
342	movq	-112(%ebx),%mm5
343	movq	%mm0,-128(%edi)
344	psrlq	$4,%mm0
345	movq	%mm3,(%edi)
346	movq	%mm3,%mm7
347	psrlq	$4,%mm3
348	shll	$4,%edx
349	movb	%dl,1(%esp)
350	movl	-88(%ebx),%edx
351	movq	-88(%ebx),%mm1
352	psllq	$60,%mm7
353	movq	-96(%ebx),%mm4
354	por	%mm7,%mm0
355	movq	%mm2,-120(%edi)
356	psrlq	$4,%mm2
357	movq	%mm5,8(%edi)
358	movq	%mm5,%mm6
359	movq	%mm0,-128(%ebp)
360	psrlq	$4,%mm5
361	movq	%mm3,(%ebp)
362	shll	$4,%edx
363	movb	%dl,2(%esp)
364	movl	-72(%ebx),%edx
365	movq	-72(%ebx),%mm0
366	psllq	$60,%mm6
367	movq	-80(%ebx),%mm3
368	por	%mm6,%mm2
369	movq	%mm1,-112(%edi)
370	psrlq	$4,%mm1
371	movq	%mm4,16(%edi)
372	movq	%mm4,%mm7
373	movq	%mm2,-120(%ebp)
374	psrlq	$4,%mm4
375	movq	%mm5,8(%ebp)
376	shll	$4,%edx
377	movb	%dl,3(%esp)
378	movl	-56(%ebx),%edx
379	movq	-56(%ebx),%mm2
380	psllq	$60,%mm7
381	movq	-64(%ebx),%mm5
382	por	%mm7,%mm1
383	movq	%mm0,-104(%edi)
384	psrlq	$4,%mm0
385	movq	%mm3,24(%edi)
386	movq	%mm3,%mm6
387	movq	%mm1,-112(%ebp)
388	psrlq	$4,%mm3
389	movq	%mm4,16(%ebp)
390	shll	$4,%edx
391	movb	%dl,4(%esp)
392	movl	-40(%ebx),%edx
393	movq	-40(%ebx),%mm1
394	psllq	$60,%mm6
395	movq	-48(%ebx),%mm4
396	por	%mm6,%mm0
397	movq	%mm2,-96(%edi)
398	psrlq	$4,%mm2
399	movq	%mm5,32(%edi)
400	movq	%mm5,%mm7
401	movq	%mm0,-104(%ebp)
402	psrlq	$4,%mm5
403	movq	%mm3,24(%ebp)
404	shll	$4,%edx
405	movb	%dl,5(%esp)
406	movl	-24(%ebx),%edx
407	movq	-24(%ebx),%mm0
408	psllq	$60,%mm7
409	movq	-32(%ebx),%mm3
410	por	%mm7,%mm2
411	movq	%mm1,-88(%edi)
412	psrlq	$4,%mm1
413	movq	%mm4,40(%edi)
414	movq	%mm4,%mm6
415	movq	%mm2,-96(%ebp)
416	psrlq	$4,%mm4
417	movq	%mm5,32(%ebp)
418	shll	$4,%edx
419	movb	%dl,6(%esp)
420	movl	-8(%ebx),%edx
421	movq	-8(%ebx),%mm2
422	psllq	$60,%mm6
423	movq	-16(%ebx),%mm5
424	por	%mm6,%mm1
425	movq	%mm0,-80(%edi)
426	psrlq	$4,%mm0
427	movq	%mm3,48(%edi)
428	movq	%mm3,%mm7
429	movq	%mm1,-88(%ebp)
430	psrlq	$4,%mm3
431	movq	%mm4,40(%ebp)
432	shll	$4,%edx
433	movb	%dl,7(%esp)
434	movl	8(%ebx),%edx
435	movq	8(%ebx),%mm1
436	psllq	$60,%mm7
437	movq	(%ebx),%mm4
438	por	%mm7,%mm0
439	movq	%mm2,-72(%edi)
440	psrlq	$4,%mm2
441	movq	%mm5,56(%edi)
442	movq	%mm5,%mm6
443	movq	%mm0,-80(%ebp)
444	psrlq	$4,%mm5
445	movq	%mm3,48(%ebp)
446	shll	$4,%edx
447	movb	%dl,8(%esp)
448	movl	24(%ebx),%edx
449	movq	24(%ebx),%mm0
450	psllq	$60,%mm6
451	movq	16(%ebx),%mm3
452	por	%mm6,%mm2
453	movq	%mm1,-64(%edi)
454	psrlq	$4,%mm1
455	movq	%mm4,64(%edi)
456	movq	%mm4,%mm7
457	movq	%mm2,-72(%ebp)
458	psrlq	$4,%mm4
459	movq	%mm5,56(%ebp)
460	shll	$4,%edx
461	movb	%dl,9(%esp)
462	movl	40(%ebx),%edx
463	movq	40(%ebx),%mm2
464	psllq	$60,%mm7
465	movq	32(%ebx),%mm5
466	por	%mm7,%mm1
467	movq	%mm0,-56(%edi)
468	psrlq	$4,%mm0
469	movq	%mm3,72(%edi)
470	movq	%mm3,%mm6
471	movq	%mm1,-64(%ebp)
472	psrlq	$4,%mm3
473	movq	%mm4,64(%ebp)
474	shll	$4,%edx
475	movb	%dl,10(%esp)
476	movl	56(%ebx),%edx
477	movq	56(%ebx),%mm1
478	psllq	$60,%mm6
479	movq	48(%ebx),%mm4
480	por	%mm6,%mm0
481	movq	%mm2,-48(%edi)
482	psrlq	$4,%mm2
483	movq	%mm5,80(%edi)
484	movq	%mm5,%mm7
485	movq	%mm0,-56(%ebp)
486	psrlq	$4,%mm5
487	movq	%mm3,72(%ebp)
488	shll	$4,%edx
489	movb	%dl,11(%esp)
490	movl	72(%ebx),%edx
491	movq	72(%ebx),%mm0
492	psllq	$60,%mm7
493	movq	64(%ebx),%mm3
494	por	%mm7,%mm2
495	movq	%mm1,-40(%edi)
496	psrlq	$4,%mm1
497	movq	%mm4,88(%edi)
498	movq	%mm4,%mm6
499	movq	%mm2,-48(%ebp)
500	psrlq	$4,%mm4
501	movq	%mm5,80(%ebp)
502	shll	$4,%edx
503	movb	%dl,12(%esp)
504	movl	88(%ebx),%edx
505	movq	88(%ebx),%mm2
506	psllq	$60,%mm6
507	movq	80(%ebx),%mm5
508	por	%mm6,%mm1
509	movq	%mm0,-32(%edi)
510	psrlq	$4,%mm0
511	movq	%mm3,96(%edi)
512	movq	%mm3,%mm7
513	movq	%mm1,-40(%ebp)
514	psrlq	$4,%mm3
515	movq	%mm4,88(%ebp)
516	shll	$4,%edx
517	movb	%dl,13(%esp)
518	movl	104(%ebx),%edx
519	movq	104(%ebx),%mm1
520	psllq	$60,%mm7
521	movq	96(%ebx),%mm4
522	por	%mm7,%mm0
523	movq	%mm2,-24(%edi)
524	psrlq	$4,%mm2
525	movq	%mm5,104(%edi)
526	movq	%mm5,%mm6
527	movq	%mm0,-32(%ebp)
528	psrlq	$4,%mm5
529	movq	%mm3,96(%ebp)
530	shll	$4,%edx
531	movb	%dl,14(%esp)
532	movl	120(%ebx),%edx
533	movq	120(%ebx),%mm0
534	psllq	$60,%mm6
535	movq	112(%ebx),%mm3
536	por	%mm6,%mm2
537	movq	%mm1,-16(%edi)
538	psrlq	$4,%mm1
539	movq	%mm4,112(%edi)
540	movq	%mm4,%mm7
541	movq	%mm2,-24(%ebp)
542	psrlq	$4,%mm4
543	movq	%mm5,104(%ebp)
544	shll	$4,%edx
545	movb	%dl,15(%esp)
546	psllq	$60,%mm7
547	por	%mm7,%mm1
548	movq	%mm0,-8(%edi)
549	psrlq	$4,%mm0
550	movq	%mm3,120(%edi)
551	movq	%mm3,%mm6
552	movq	%mm1,-16(%ebp)
553	psrlq	$4,%mm3
554	movq	%mm4,112(%ebp)
555	psllq	$60,%mm6
556	por	%mm6,%mm0
557	movq	%mm0,-8(%ebp)
558	movq	%mm3,120(%ebp)
559	movq	(%eax),%mm6
560	movl	8(%eax),%ebx
561	movl	12(%eax),%edx
562.align	16
563.L009outer:
564	xorl	12(%ecx),%edx
565	xorl	8(%ecx),%ebx
566	pxor	(%ecx),%mm6
567	leal	16(%ecx),%ecx
568	movl	%ebx,536(%esp)
569	movq	%mm6,528(%esp)
570	movl	%ecx,548(%esp)
571	xorl	%eax,%eax
572	roll	$8,%edx
573	movb	%dl,%al
574	movl	%eax,%ebp
575	andb	$15,%al
576	shrl	$4,%ebp
577	pxor	%mm0,%mm0
578	roll	$8,%edx
579	pxor	%mm1,%mm1
580	pxor	%mm2,%mm2
581	movq	16(%esp,%eax,8),%mm7
582	movq	144(%esp,%eax,8),%mm6
583	movb	%dl,%al
584	movd	%mm7,%ebx
585	psrlq	$8,%mm7
586	movq	%mm6,%mm3
587	movl	%eax,%edi
588	psrlq	$8,%mm6
589	pxor	272(%esp,%ebp,8),%mm7
590	andb	$15,%al
591	psllq	$56,%mm3
592	shrl	$4,%edi
593	pxor	16(%esp,%eax,8),%mm7
594	roll	$8,%edx
595	pxor	144(%esp,%eax,8),%mm6
596	pxor	%mm3,%mm7
597	pxor	400(%esp,%ebp,8),%mm6
598	xorb	(%esp,%ebp,1),%bl
599	movb	%dl,%al
600	movd	%mm7,%ecx
601	movzbl	%bl,%ebx
602	psrlq	$8,%mm7
603	movq	%mm6,%mm3
604	movl	%eax,%ebp
605	psrlq	$8,%mm6
606	pxor	272(%esp,%edi,8),%mm7
607	andb	$15,%al
608	psllq	$56,%mm3
609	shrl	$4,%ebp
610	pinsrw	$2,(%esi,%ebx,2),%mm2
611	pxor	16(%esp,%eax,8),%mm7
612	roll	$8,%edx
613	pxor	144(%esp,%eax,8),%mm6
614	pxor	%mm3,%mm7
615	pxor	400(%esp,%edi,8),%mm6
616	xorb	(%esp,%edi,1),%cl
617	movb	%dl,%al
618	movl	536(%esp),%edx
619	movd	%mm7,%ebx
620	movzbl	%cl,%ecx
621	psrlq	$8,%mm7
622	movq	%mm6,%mm3
623	movl	%eax,%edi
624	psrlq	$8,%mm6
625	pxor	272(%esp,%ebp,8),%mm7
626	andb	$15,%al
627	psllq	$56,%mm3
628	pxor	%mm2,%mm6
629	shrl	$4,%edi
630	pinsrw	$2,(%esi,%ecx,2),%mm1
631	pxor	16(%esp,%eax,8),%mm7
632	roll	$8,%edx
633	pxor	144(%esp,%eax,8),%mm6
634	pxor	%mm3,%mm7
635	pxor	400(%esp,%ebp,8),%mm6
636	xorb	(%esp,%ebp,1),%bl
637	movb	%dl,%al
638	movd	%mm7,%ecx
639	movzbl	%bl,%ebx
640	psrlq	$8,%mm7
641	movq	%mm6,%mm3
642	movl	%eax,%ebp
643	psrlq	$8,%mm6
644	pxor	272(%esp,%edi,8),%mm7
645	andb	$15,%al
646	psllq	$56,%mm3
647	pxor	%mm1,%mm6
648	shrl	$4,%ebp
649	pinsrw	$2,(%esi,%ebx,2),%mm0
650	pxor	16(%esp,%eax,8),%mm7
651	roll	$8,%edx
652	pxor	144(%esp,%eax,8),%mm6
653	pxor	%mm3,%mm7
654	pxor	400(%esp,%edi,8),%mm6
655	xorb	(%esp,%edi,1),%cl
656	movb	%dl,%al
657	movd	%mm7,%ebx
658	movzbl	%cl,%ecx
659	psrlq	$8,%mm7
660	movq	%mm6,%mm3
661	movl	%eax,%edi
662	psrlq	$8,%mm6
663	pxor	272(%esp,%ebp,8),%mm7
664	andb	$15,%al
665	psllq	$56,%mm3
666	pxor	%mm0,%mm6
667	shrl	$4,%edi
668	pinsrw	$2,(%esi,%ecx,2),%mm2
669	pxor	16(%esp,%eax,8),%mm7
670	roll	$8,%edx
671	pxor	144(%esp,%eax,8),%mm6
672	pxor	%mm3,%mm7
673	pxor	400(%esp,%ebp,8),%mm6
674	xorb	(%esp,%ebp,1),%bl
675	movb	%dl,%al
676	movd	%mm7,%ecx
677	movzbl	%bl,%ebx
678	psrlq	$8,%mm7
679	movq	%mm6,%mm3
680	movl	%eax,%ebp
681	psrlq	$8,%mm6
682	pxor	272(%esp,%edi,8),%mm7
683	andb	$15,%al
684	psllq	$56,%mm3
685	pxor	%mm2,%mm6
686	shrl	$4,%ebp
687	pinsrw	$2,(%esi,%ebx,2),%mm1
688	pxor	16(%esp,%eax,8),%mm7
689	roll	$8,%edx
690	pxor	144(%esp,%eax,8),%mm6
691	pxor	%mm3,%mm7
692	pxor	400(%esp,%edi,8),%mm6
693	xorb	(%esp,%edi,1),%cl
694	movb	%dl,%al
695	movl	532(%esp),%edx
696	movd	%mm7,%ebx
697	movzbl	%cl,%ecx
698	psrlq	$8,%mm7
699	movq	%mm6,%mm3
700	movl	%eax,%edi
701	psrlq	$8,%mm6
702	pxor	272(%esp,%ebp,8),%mm7
703	andb	$15,%al
704	psllq	$56,%mm3
705	pxor	%mm1,%mm6
706	shrl	$4,%edi
707	pinsrw	$2,(%esi,%ecx,2),%mm0
708	pxor	16(%esp,%eax,8),%mm7
709	roll	$8,%edx
710	pxor	144(%esp,%eax,8),%mm6
711	pxor	%mm3,%mm7
712	pxor	400(%esp,%ebp,8),%mm6
713	xorb	(%esp,%ebp,1),%bl
714	movb	%dl,%al
715	movd	%mm7,%ecx
716	movzbl	%bl,%ebx
717	psrlq	$8,%mm7
718	movq	%mm6,%mm3
719	movl	%eax,%ebp
720	psrlq	$8,%mm6
721	pxor	272(%esp,%edi,8),%mm7
722	andb	$15,%al
723	psllq	$56,%mm3
724	pxor	%mm0,%mm6
725	shrl	$4,%ebp
726	pinsrw	$2,(%esi,%ebx,2),%mm2
727	pxor	16(%esp,%eax,8),%mm7
728	roll	$8,%edx
729	pxor	144(%esp,%eax,8),%mm6
730	pxor	%mm3,%mm7
731	pxor	400(%esp,%edi,8),%mm6
732	xorb	(%esp,%edi,1),%cl
733	movb	%dl,%al
734	movd	%mm7,%ebx
735	movzbl	%cl,%ecx
736	psrlq	$8,%mm7
737	movq	%mm6,%mm3
738	movl	%eax,%edi
739	psrlq	$8,%mm6
740	pxor	272(%esp,%ebp,8),%mm7
741	andb	$15,%al
742	psllq	$56,%mm3
743	pxor	%mm2,%mm6
744	shrl	$4,%edi
745	pinsrw	$2,(%esi,%ecx,2),%mm1
746	pxor	16(%esp,%eax,8),%mm7
747	roll	$8,%edx
748	pxor	144(%esp,%eax,8),%mm6
749	pxor	%mm3,%mm7
750	pxor	400(%esp,%ebp,8),%mm6
751	xorb	(%esp,%ebp,1),%bl
752	movb	%dl,%al
753	movd	%mm7,%ecx
754	movzbl	%bl,%ebx
755	psrlq	$8,%mm7
756	movq	%mm6,%mm3
757	movl	%eax,%ebp
758	psrlq	$8,%mm6
759	pxor	272(%esp,%edi,8),%mm7
760	andb	$15,%al
761	psllq	$56,%mm3
762	pxor	%mm1,%mm6
763	shrl	$4,%ebp
764	pinsrw	$2,(%esi,%ebx,2),%mm0
765	pxor	16(%esp,%eax,8),%mm7
766	roll	$8,%edx
767	pxor	144(%esp,%eax,8),%mm6
768	pxor	%mm3,%mm7
769	pxor	400(%esp,%edi,8),%mm6
770	xorb	(%esp,%edi,1),%cl
771	movb	%dl,%al
772	movl	528(%esp),%edx
773	movd	%mm7,%ebx
774	movzbl	%cl,%ecx
775	psrlq	$8,%mm7
776	movq	%mm6,%mm3
777	movl	%eax,%edi
778	psrlq	$8,%mm6
779	pxor	272(%esp,%ebp,8),%mm7
780	andb	$15,%al
781	psllq	$56,%mm3
782	pxor	%mm0,%mm6
783	shrl	$4,%edi
784	pinsrw	$2,(%esi,%ecx,2),%mm2
785	pxor	16(%esp,%eax,8),%mm7
786	roll	$8,%edx
787	pxor	144(%esp,%eax,8),%mm6
788	pxor	%mm3,%mm7
789	pxor	400(%esp,%ebp,8),%mm6
790	xorb	(%esp,%ebp,1),%bl
791	movb	%dl,%al
792	movd	%mm7,%ecx
793	movzbl	%bl,%ebx
794	psrlq	$8,%mm7
795	movq	%mm6,%mm3
796	movl	%eax,%ebp
797	psrlq	$8,%mm6
798	pxor	272(%esp,%edi,8),%mm7
799	andb	$15,%al
800	psllq	$56,%mm3
801	pxor	%mm2,%mm6
802	shrl	$4,%ebp
803	pinsrw	$2,(%esi,%ebx,2),%mm1
804	pxor	16(%esp,%eax,8),%mm7
805	roll	$8,%edx
806	pxor	144(%esp,%eax,8),%mm6
807	pxor	%mm3,%mm7
808	pxor	400(%esp,%edi,8),%mm6
809	xorb	(%esp,%edi,1),%cl
810	movb	%dl,%al
811	movd	%mm7,%ebx
812	movzbl	%cl,%ecx
813	psrlq	$8,%mm7
814	movq	%mm6,%mm3
815	movl	%eax,%edi
816	psrlq	$8,%mm6
817	pxor	272(%esp,%ebp,8),%mm7
818	andb	$15,%al
819	psllq	$56,%mm3
820	pxor	%mm1,%mm6
821	shrl	$4,%edi
822	pinsrw	$2,(%esi,%ecx,2),%mm0
823	pxor	16(%esp,%eax,8),%mm7
824	roll	$8,%edx
825	pxor	144(%esp,%eax,8),%mm6
826	pxor	%mm3,%mm7
827	pxor	400(%esp,%ebp,8),%mm6
828	xorb	(%esp,%ebp,1),%bl
829	movb	%dl,%al
830	movd	%mm7,%ecx
831	movzbl	%bl,%ebx
832	psrlq	$8,%mm7
833	movq	%mm6,%mm3
834	movl	%eax,%ebp
835	psrlq	$8,%mm6
836	pxor	272(%esp,%edi,8),%mm7
837	andb	$15,%al
838	psllq	$56,%mm3
839	pxor	%mm0,%mm6
840	shrl	$4,%ebp
841	pinsrw	$2,(%esi,%ebx,2),%mm2
842	pxor	16(%esp,%eax,8),%mm7
843	roll	$8,%edx
844	pxor	144(%esp,%eax,8),%mm6
845	pxor	%mm3,%mm7
846	pxor	400(%esp,%edi,8),%mm6
847	xorb	(%esp,%edi,1),%cl
848	movb	%dl,%al
849	movl	524(%esp),%edx
850	movd	%mm7,%ebx
851	movzbl	%cl,%ecx
852	psrlq	$8,%mm7
853	movq	%mm6,%mm3
854	movl	%eax,%edi
855	psrlq	$8,%mm6
856	pxor	272(%esp,%ebp,8),%mm7
857	andb	$15,%al
858	psllq	$56,%mm3
859	pxor	%mm2,%mm6
860	shrl	$4,%edi
861	pinsrw	$2,(%esi,%ecx,2),%mm1
862	pxor	16(%esp,%eax,8),%mm7
863	pxor	144(%esp,%eax,8),%mm6
864	xorb	(%esp,%ebp,1),%bl
865	pxor	%mm3,%mm7
866	pxor	400(%esp,%ebp,8),%mm6
867	movzbl	%bl,%ebx
868	pxor	%mm2,%mm2
869	psllq	$4,%mm1
870	movd	%mm7,%ecx
871	psrlq	$4,%mm7
872	movq	%mm6,%mm3
873	psrlq	$4,%mm6
874	shll	$4,%ecx
875	pxor	16(%esp,%edi,8),%mm7
876	psllq	$60,%mm3
877	movzbl	%cl,%ecx
878	pxor	%mm3,%mm7
879	pxor	144(%esp,%edi,8),%mm6
880	pinsrw	$2,(%esi,%ebx,2),%mm0
881	pxor	%mm1,%mm6
882	movd	%mm7,%edx
883	pinsrw	$3,(%esi,%ecx,2),%mm2
884	psllq	$12,%mm0
885	pxor	%mm0,%mm6
886	psrlq	$32,%mm7
887	pxor	%mm2,%mm6
888	movl	548(%esp),%ecx
889	movd	%mm7,%ebx
890	movq	%mm6,%mm3
891	psllw	$8,%mm6
892	psrlw	$8,%mm3
893	por	%mm3,%mm6
894	bswap	%edx
895	pshufw	$27,%mm6,%mm6
896	bswap	%ebx
897	cmpl	552(%esp),%ecx
898	jne	.L009outer
899	movl	544(%esp),%eax
900	movl	%edx,12(%eax)
901	movl	%ebx,8(%eax)
902	movq	%mm6,(%eax)
903	movl	556(%esp),%esp
904	emms
905	popl	%edi
906	popl	%esi
907	popl	%ebx
908	popl	%ebp
909	ret
910.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
911.globl	gcm_init_clmul
912.type	gcm_init_clmul,@function
913.align	16
914gcm_init_clmul:
915.L_gcm_init_clmul_begin:
916	movl	4(%esp),%edx
917	movl	8(%esp),%eax
918	call	.L010pic
919.L010pic:
920	popl	%ecx
921	leal	.Lbswap-.L010pic(%ecx),%ecx
922	movdqu	(%eax),%xmm2
923	pshufd	$78,%xmm2,%xmm2
924	pshufd	$255,%xmm2,%xmm4
925	movdqa	%xmm2,%xmm3
926	psllq	$1,%xmm2
927	pxor	%xmm5,%xmm5
928	psrlq	$63,%xmm3
929	pcmpgtd	%xmm4,%xmm5
930	pslldq	$8,%xmm3
931	por	%xmm3,%xmm2
932	pand	16(%ecx),%xmm5
933	pxor	%xmm5,%xmm2
934	movdqa	%xmm2,%xmm0
935	movdqa	%xmm0,%xmm1
936	pshufd	$78,%xmm0,%xmm3
937	pshufd	$78,%xmm2,%xmm4
938	pxor	%xmm0,%xmm3
939	pxor	%xmm2,%xmm4
940.byte	102,15,58,68,194,0
941.byte	102,15,58,68,202,17
942.byte	102,15,58,68,220,0
943	xorps	%xmm0,%xmm3
944	xorps	%xmm1,%xmm3
945	movdqa	%xmm3,%xmm4
946	psrldq	$8,%xmm3
947	pslldq	$8,%xmm4
948	pxor	%xmm3,%xmm1
949	pxor	%xmm4,%xmm0
950	movdqa	%xmm0,%xmm4
951	movdqa	%xmm0,%xmm3
952	psllq	$5,%xmm0
953	pxor	%xmm0,%xmm3
954	psllq	$1,%xmm0
955	pxor	%xmm3,%xmm0
956	psllq	$57,%xmm0
957	movdqa	%xmm0,%xmm3
958	pslldq	$8,%xmm0
959	psrldq	$8,%xmm3
960	pxor	%xmm4,%xmm0
961	pxor	%xmm3,%xmm1
962	movdqa	%xmm0,%xmm4
963	psrlq	$1,%xmm0
964	pxor	%xmm4,%xmm1
965	pxor	%xmm0,%xmm4
966	psrlq	$5,%xmm0
967	pxor	%xmm4,%xmm0
968	psrlq	$1,%xmm0
969	pxor	%xmm1,%xmm0
970	pshufd	$78,%xmm2,%xmm3
971	pshufd	$78,%xmm0,%xmm4
972	pxor	%xmm2,%xmm3
973	movdqu	%xmm2,(%edx)
974	pxor	%xmm0,%xmm4
975	movdqu	%xmm0,16(%edx)
976.byte	102,15,58,15,227,8
977	movdqu	%xmm4,32(%edx)
978	ret
979.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
980.globl	gcm_gmult_clmul
981.type	gcm_gmult_clmul,@function
982.align	16
983gcm_gmult_clmul:
984.L_gcm_gmult_clmul_begin:
985	movl	4(%esp),%eax
986	movl	8(%esp),%edx
987	call	.L011pic
988.L011pic:
989	popl	%ecx
990	leal	.Lbswap-.L011pic(%ecx),%ecx
991	movdqu	(%eax),%xmm0
992	movdqa	(%ecx),%xmm5
993	movups	(%edx),%xmm2
994.byte	102,15,56,0,197
995	movups	32(%edx),%xmm4
996	movdqa	%xmm0,%xmm1
997	pshufd	$78,%xmm0,%xmm3
998	pxor	%xmm0,%xmm3
999.byte	102,15,58,68,194,0
1000.byte	102,15,58,68,202,17
1001.byte	102,15,58,68,220,0
1002	xorps	%xmm0,%xmm3
1003	xorps	%xmm1,%xmm3
1004	movdqa	%xmm3,%xmm4
1005	psrldq	$8,%xmm3
1006	pslldq	$8,%xmm4
1007	pxor	%xmm3,%xmm1
1008	pxor	%xmm4,%xmm0
1009	movdqa	%xmm0,%xmm4
1010	movdqa	%xmm0,%xmm3
1011	psllq	$5,%xmm0
1012	pxor	%xmm0,%xmm3
1013	psllq	$1,%xmm0
1014	pxor	%xmm3,%xmm0
1015	psllq	$57,%xmm0
1016	movdqa	%xmm0,%xmm3
1017	pslldq	$8,%xmm0
1018	psrldq	$8,%xmm3
1019	pxor	%xmm4,%xmm0
1020	pxor	%xmm3,%xmm1
1021	movdqa	%xmm0,%xmm4
1022	psrlq	$1,%xmm0
1023	pxor	%xmm4,%xmm1
1024	pxor	%xmm0,%xmm4
1025	psrlq	$5,%xmm0
1026	pxor	%xmm4,%xmm0
1027	psrlq	$1,%xmm0
1028	pxor	%xmm1,%xmm0
1029.byte	102,15,56,0,197
1030	movdqu	%xmm0,(%eax)
1031	ret
1032.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
1033.globl	gcm_ghash_clmul
1034.type	gcm_ghash_clmul,@function
1035.align	16
1036gcm_ghash_clmul:
1037.L_gcm_ghash_clmul_begin:
1038	pushl	%ebp
1039	pushl	%ebx
1040	pushl	%esi
1041	pushl	%edi
1042	movl	20(%esp),%eax
1043	movl	24(%esp),%edx
1044	movl	28(%esp),%esi
1045	movl	32(%esp),%ebx
1046	call	.L012pic
1047.L012pic:
1048	popl	%ecx
1049	leal	.Lbswap-.L012pic(%ecx),%ecx
1050	movdqu	(%eax),%xmm0
1051	movdqa	(%ecx),%xmm5
1052	movdqu	(%edx),%xmm2
1053.byte	102,15,56,0,197
1054	subl	$16,%ebx
1055	jz	.L013odd_tail
1056	movdqu	(%esi),%xmm3
1057	movdqu	16(%esi),%xmm6
1058.byte	102,15,56,0,221
1059.byte	102,15,56,0,245
1060	movdqu	32(%edx),%xmm5
1061	pxor	%xmm3,%xmm0
1062	pshufd	$78,%xmm6,%xmm3
1063	movdqa	%xmm6,%xmm7
1064	pxor	%xmm6,%xmm3
1065	leal	32(%esi),%esi
1066.byte	102,15,58,68,242,0
1067.byte	102,15,58,68,250,17
1068.byte	102,15,58,68,221,0
1069	movups	16(%edx),%xmm2
1070	nop
1071	subl	$32,%ebx
1072	jbe	.L014even_tail
1073	jmp	.L015mod_loop
1074.align	32
1075.L015mod_loop:
1076	pshufd	$78,%xmm0,%xmm4
1077	movdqa	%xmm0,%xmm1
1078	pxor	%xmm0,%xmm4
1079	nop
1080.byte	102,15,58,68,194,0
1081.byte	102,15,58,68,202,17
1082.byte	102,15,58,68,229,16
1083	movups	(%edx),%xmm2
1084	xorps	%xmm6,%xmm0
1085	movdqa	(%ecx),%xmm5
1086	xorps	%xmm7,%xmm1
1087	movdqu	(%esi),%xmm7
1088	pxor	%xmm0,%xmm3
1089	movdqu	16(%esi),%xmm6
1090	pxor	%xmm1,%xmm3
1091.byte	102,15,56,0,253
1092	pxor	%xmm3,%xmm4
1093	movdqa	%xmm4,%xmm3
1094	psrldq	$8,%xmm4
1095	pslldq	$8,%xmm3
1096	pxor	%xmm4,%xmm1
1097	pxor	%xmm3,%xmm0
1098.byte	102,15,56,0,245
1099	pxor	%xmm7,%xmm1
1100	movdqa	%xmm6,%xmm7
1101	movdqa	%xmm0,%xmm4
1102	movdqa	%xmm0,%xmm3
1103	psllq	$5,%xmm0
1104	pxor	%xmm0,%xmm3
1105	psllq	$1,%xmm0
1106	pxor	%xmm3,%xmm0
1107.byte	102,15,58,68,242,0
1108	movups	32(%edx),%xmm5
1109	psllq	$57,%xmm0
1110	movdqa	%xmm0,%xmm3
1111	pslldq	$8,%xmm0
1112	psrldq	$8,%xmm3
1113	pxor	%xmm4,%xmm0
1114	pxor	%xmm3,%xmm1
1115	pshufd	$78,%xmm7,%xmm3
1116	movdqa	%xmm0,%xmm4
1117	psrlq	$1,%xmm0
1118	pxor	%xmm7,%xmm3
1119	pxor	%xmm4,%xmm1
1120.byte	102,15,58,68,250,17
1121	movups	16(%edx),%xmm2
1122	pxor	%xmm0,%xmm4
1123	psrlq	$5,%xmm0
1124	pxor	%xmm4,%xmm0
1125	psrlq	$1,%xmm0
1126	pxor	%xmm1,%xmm0
1127.byte	102,15,58,68,221,0
1128	leal	32(%esi),%esi
1129	subl	$32,%ebx
1130	ja	.L015mod_loop
1131.L014even_tail:
1132	pshufd	$78,%xmm0,%xmm4
1133	movdqa	%xmm0,%xmm1
1134	pxor	%xmm0,%xmm4
1135.byte	102,15,58,68,194,0
1136.byte	102,15,58,68,202,17
1137.byte	102,15,58,68,229,16
1138	movdqa	(%ecx),%xmm5
1139	xorps	%xmm6,%xmm0
1140	xorps	%xmm7,%xmm1
1141	pxor	%xmm0,%xmm3
1142	pxor	%xmm1,%xmm3
1143	pxor	%xmm3,%xmm4
1144	movdqa	%xmm4,%xmm3
1145	psrldq	$8,%xmm4
1146	pslldq	$8,%xmm3
1147	pxor	%xmm4,%xmm1
1148	pxor	%xmm3,%xmm0
1149	movdqa	%xmm0,%xmm4
1150	movdqa	%xmm0,%xmm3
1151	psllq	$5,%xmm0
1152	pxor	%xmm0,%xmm3
1153	psllq	$1,%xmm0
1154	pxor	%xmm3,%xmm0
1155	psllq	$57,%xmm0
1156	movdqa	%xmm0,%xmm3
1157	pslldq	$8,%xmm0
1158	psrldq	$8,%xmm3
1159	pxor	%xmm4,%xmm0
1160	pxor	%xmm3,%xmm1
1161	movdqa	%xmm0,%xmm4
1162	psrlq	$1,%xmm0
1163	pxor	%xmm4,%xmm1
1164	pxor	%xmm0,%xmm4
1165	psrlq	$5,%xmm0
1166	pxor	%xmm4,%xmm0
1167	psrlq	$1,%xmm0
1168	pxor	%xmm1,%xmm0
1169	testl	%ebx,%ebx
1170	jnz	.L016done
1171	movups	(%edx),%xmm2
1172.L013odd_tail:
1173	movdqu	(%esi),%xmm3
1174.byte	102,15,56,0,221
1175	pxor	%xmm3,%xmm0
1176	movdqa	%xmm0,%xmm1
1177	pshufd	$78,%xmm0,%xmm3
1178	pshufd	$78,%xmm2,%xmm4
1179	pxor	%xmm0,%xmm3
1180	pxor	%xmm2,%xmm4
1181.byte	102,15,58,68,194,0
1182.byte	102,15,58,68,202,17
1183.byte	102,15,58,68,220,0
1184	xorps	%xmm0,%xmm3
1185	xorps	%xmm1,%xmm3
1186	movdqa	%xmm3,%xmm4
1187	psrldq	$8,%xmm3
1188	pslldq	$8,%xmm4
1189	pxor	%xmm3,%xmm1
1190	pxor	%xmm4,%xmm0
1191	movdqa	%xmm0,%xmm4
1192	movdqa	%xmm0,%xmm3
1193	psllq	$5,%xmm0
1194	pxor	%xmm0,%xmm3
1195	psllq	$1,%xmm0
1196	pxor	%xmm3,%xmm0
1197	psllq	$57,%xmm0
1198	movdqa	%xmm0,%xmm3
1199	pslldq	$8,%xmm0
1200	psrldq	$8,%xmm3
1201	pxor	%xmm4,%xmm0
1202	pxor	%xmm3,%xmm1
1203	movdqa	%xmm0,%xmm4
1204	psrlq	$1,%xmm0
1205	pxor	%xmm4,%xmm1
1206	pxor	%xmm0,%xmm4
1207	psrlq	$5,%xmm0
1208	pxor	%xmm4,%xmm0
1209	psrlq	$1,%xmm0
1210	pxor	%xmm1,%xmm0
1211.L016done:
1212.byte	102,15,56,0,197
1213	movdqu	%xmm0,(%eax)
1214	popl	%edi
1215	popl	%esi
1216	popl	%ebx
1217	popl	%ebp
1218	ret
1219.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1220.align	64
1221.Lbswap:
1222.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1223.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1224.align	64
1225.Lrem_8bit:
1226.value	0,450,900,582,1800,1738,1164,1358
1227.value	3600,4050,3476,3158,2328,2266,2716,2910
1228.value	7200,7650,8100,7782,6952,6890,6316,6510
1229.value	4656,5106,4532,4214,5432,5370,5820,6014
1230.value	14400,14722,15300,14854,16200,16010,15564,15630
1231.value	13904,14226,13780,13334,12632,12442,13020,13086
1232.value	9312,9634,10212,9766,9064,8874,8428,8494
1233.value	10864,11186,10740,10294,11640,11450,12028,12094
1234.value	28800,28994,29444,29382,30600,30282,29708,30158
1235.value	32400,32594,32020,31958,31128,30810,31260,31710
1236.value	27808,28002,28452,28390,27560,27242,26668,27118
1237.value	25264,25458,24884,24822,26040,25722,26172,26622
1238.value	18624,18690,19268,19078,20424,19978,19532,19854
1239.value	18128,18194,17748,17558,16856,16410,16988,17310
1240.value	21728,21794,22372,22182,21480,21034,20588,20910
1241.value	23280,23346,22900,22710,24056,23610,24188,24510
1242.value	57600,57538,57988,58182,58888,59338,58764,58446
1243.value	61200,61138,60564,60758,59416,59866,60316,59998
1244.value	64800,64738,65188,65382,64040,64490,63916,63598
1245.value	62256,62194,61620,61814,62520,62970,63420,63102
1246.value	55616,55426,56004,56070,56904,57226,56780,56334
1247.value	55120,54930,54484,54550,53336,53658,54236,53790
1248.value	50528,50338,50916,50982,49768,50090,49644,49198
1249.value	52080,51890,51444,51510,52344,52666,53244,52798
1250.value	37248,36930,37380,37830,38536,38730,38156,38094
1251.value	40848,40530,39956,40406,39064,39258,39708,39646
1252.value	36256,35938,36388,36838,35496,35690,35116,35054
1253.value	33712,33394,32820,33270,33976,34170,34620,34558
1254.value	43456,43010,43588,43910,44744,44810,44364,44174
1255.value	42960,42514,42068,42390,41176,41242,41820,41630
1256.value	46560,46114,46692,47014,45800,45866,45420,45230
1257.value	48112,47666,47220,47542,48376,48442,49020,48830
1258.align	64
1259.Lrem_4bit:
1260.long	0,0,0,471859200,0,943718400,0,610271232
1261.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1262.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1263.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1264.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1265.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1266.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1267.byte	0
1268#else
1269.text
1270.globl	gcm_gmult_4bit_x86
1271.type	gcm_gmult_4bit_x86,@function
1272.align	16
1273gcm_gmult_4bit_x86:
1274.L_gcm_gmult_4bit_x86_begin:
1275	pushl	%ebp
1276	pushl	%ebx
1277	pushl	%esi
1278	pushl	%edi
1279	subl	$84,%esp
1280	movl	104(%esp),%edi
1281	movl	108(%esp),%esi
1282	movl	(%edi),%ebp
1283	movl	4(%edi),%edx
1284	movl	8(%edi),%ecx
1285	movl	12(%edi),%ebx
1286	movl	$0,16(%esp)
1287	movl	$471859200,20(%esp)
1288	movl	$943718400,24(%esp)
1289	movl	$610271232,28(%esp)
1290	movl	$1887436800,32(%esp)
1291	movl	$1822425088,36(%esp)
1292	movl	$1220542464,40(%esp)
1293	movl	$1423966208,44(%esp)
1294	movl	$3774873600,48(%esp)
1295	movl	$4246732800,52(%esp)
1296	movl	$3644850176,56(%esp)
1297	movl	$3311403008,60(%esp)
1298	movl	$2441084928,64(%esp)
1299	movl	$2376073216,68(%esp)
1300	movl	$2847932416,72(%esp)
1301	movl	$3051356160,76(%esp)
1302	movl	%ebp,(%esp)
1303	movl	%edx,4(%esp)
1304	movl	%ecx,8(%esp)
1305	movl	%ebx,12(%esp)
1306	shrl	$20,%ebx
1307	andl	$240,%ebx
1308	movl	4(%esi,%ebx,1),%ebp
1309	movl	(%esi,%ebx,1),%edx
1310	movl	12(%esi,%ebx,1),%ecx
1311	movl	8(%esi,%ebx,1),%ebx
1312	xorl	%eax,%eax
1313	movl	$15,%edi
1314	jmp	.L000x86_loop
1315.align	16
1316.L000x86_loop:
1317	movb	%bl,%al
1318	shrdl	$4,%ecx,%ebx
1319	andb	$15,%al
1320	shrdl	$4,%edx,%ecx
1321	shrdl	$4,%ebp,%edx
1322	shrl	$4,%ebp
1323	xorl	16(%esp,%eax,4),%ebp
1324	movb	(%esp,%edi,1),%al
1325	andb	$240,%al
1326	xorl	8(%esi,%eax,1),%ebx
1327	xorl	12(%esi,%eax,1),%ecx
1328	xorl	(%esi,%eax,1),%edx
1329	xorl	4(%esi,%eax,1),%ebp
1330	decl	%edi
1331	js	.L001x86_break
1332	movb	%bl,%al
1333	shrdl	$4,%ecx,%ebx
1334	andb	$15,%al
1335	shrdl	$4,%edx,%ecx
1336	shrdl	$4,%ebp,%edx
1337	shrl	$4,%ebp
1338	xorl	16(%esp,%eax,4),%ebp
1339	movb	(%esp,%edi,1),%al
1340	shlb	$4,%al
1341	xorl	8(%esi,%eax,1),%ebx
1342	xorl	12(%esi,%eax,1),%ecx
1343	xorl	(%esi,%eax,1),%edx
1344	xorl	4(%esi,%eax,1),%ebp
1345	jmp	.L000x86_loop
1346.align	16
1347.L001x86_break:
1348	bswap	%ebx
1349	bswap	%ecx
1350	bswap	%edx
1351	bswap	%ebp
1352	movl	104(%esp),%edi
1353	movl	%ebx,12(%edi)
1354	movl	%ecx,8(%edi)
1355	movl	%edx,4(%edi)
1356	movl	%ebp,(%edi)
1357	addl	$84,%esp
1358	popl	%edi
1359	popl	%esi
1360	popl	%ebx
1361	popl	%ebp
1362	ret
1363.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
1364.globl	gcm_ghash_4bit_x86
1365.type	gcm_ghash_4bit_x86,@function
1366.align	16
1367gcm_ghash_4bit_x86:
1368.L_gcm_ghash_4bit_x86_begin:
1369	pushl	%ebp
1370	pushl	%ebx
1371	pushl	%esi
1372	pushl	%edi
1373	subl	$84,%esp
1374	movl	104(%esp),%ebx
1375	movl	108(%esp),%esi
1376	movl	112(%esp),%edi
1377	movl	116(%esp),%ecx
1378	addl	%edi,%ecx
1379	movl	%ecx,116(%esp)
1380	movl	(%ebx),%ebp
1381	movl	4(%ebx),%edx
1382	movl	8(%ebx),%ecx
1383	movl	12(%ebx),%ebx
1384	movl	$0,16(%esp)
1385	movl	$471859200,20(%esp)
1386	movl	$943718400,24(%esp)
1387	movl	$610271232,28(%esp)
1388	movl	$1887436800,32(%esp)
1389	movl	$1822425088,36(%esp)
1390	movl	$1220542464,40(%esp)
1391	movl	$1423966208,44(%esp)
1392	movl	$3774873600,48(%esp)
1393	movl	$4246732800,52(%esp)
1394	movl	$3644850176,56(%esp)
1395	movl	$3311403008,60(%esp)
1396	movl	$2441084928,64(%esp)
1397	movl	$2376073216,68(%esp)
1398	movl	$2847932416,72(%esp)
1399	movl	$3051356160,76(%esp)
1400.align	16
1401.L002x86_outer_loop:
1402	xorl	12(%edi),%ebx
1403	xorl	8(%edi),%ecx
1404	xorl	4(%edi),%edx
1405	xorl	(%edi),%ebp
1406	movl	%ebx,12(%esp)
1407	movl	%ecx,8(%esp)
1408	movl	%edx,4(%esp)
1409	movl	%ebp,(%esp)
1410	shrl	$20,%ebx
1411	andl	$240,%ebx
1412	movl	4(%esi,%ebx,1),%ebp
1413	movl	(%esi,%ebx,1),%edx
1414	movl	12(%esi,%ebx,1),%ecx
1415	movl	8(%esi,%ebx,1),%ebx
1416	xorl	%eax,%eax
1417	movl	$15,%edi
1418	jmp	.L003x86_loop
1419.align	16
1420.L003x86_loop:
1421	movb	%bl,%al
1422	shrdl	$4,%ecx,%ebx
1423	andb	$15,%al
1424	shrdl	$4,%edx,%ecx
1425	shrdl	$4,%ebp,%edx
1426	shrl	$4,%ebp
1427	xorl	16(%esp,%eax,4),%ebp
1428	movb	(%esp,%edi,1),%al
1429	andb	$240,%al
1430	xorl	8(%esi,%eax,1),%ebx
1431	xorl	12(%esi,%eax,1),%ecx
1432	xorl	(%esi,%eax,1),%edx
1433	xorl	4(%esi,%eax,1),%ebp
1434	decl	%edi
1435	js	.L004x86_break
1436	movb	%bl,%al
1437	shrdl	$4,%ecx,%ebx
1438	andb	$15,%al
1439	shrdl	$4,%edx,%ecx
1440	shrdl	$4,%ebp,%edx
1441	shrl	$4,%ebp
1442	xorl	16(%esp,%eax,4),%ebp
1443	movb	(%esp,%edi,1),%al
1444	shlb	$4,%al
1445	xorl	8(%esi,%eax,1),%ebx
1446	xorl	12(%esi,%eax,1),%ecx
1447	xorl	(%esi,%eax,1),%edx
1448	xorl	4(%esi,%eax,1),%ebp
1449	jmp	.L003x86_loop
1450.align	16
1451.L004x86_break:
1452	bswap	%ebx
1453	bswap	%ecx
1454	bswap	%edx
1455	bswap	%ebp
1456	movl	112(%esp),%edi
1457	leal	16(%edi),%edi
1458	cmpl	116(%esp),%edi
1459	movl	%edi,112(%esp)
1460	jb	.L002x86_outer_loop
1461	movl	104(%esp),%edi
1462	movl	%ebx,12(%edi)
1463	movl	%ecx,8(%edi)
1464	movl	%edx,4(%edi)
1465	movl	%ebp,(%edi)
1466	addl	$84,%esp
1467	popl	%edi
1468	popl	%esi
1469	popl	%ebx
1470	popl	%ebp
1471	ret
1472.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
1473.globl	gcm_gmult_4bit_mmx
1474.type	gcm_gmult_4bit_mmx,@function
1475.align	16
1476gcm_gmult_4bit_mmx:
1477.L_gcm_gmult_4bit_mmx_begin:
1478	pushl	%ebp
1479	pushl	%ebx
1480	pushl	%esi
1481	pushl	%edi
1482	movl	20(%esp),%edi
1483	movl	24(%esp),%esi
1484	call	.L005pic_point
1485.L005pic_point:
1486	popl	%eax
1487	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
1488	movzbl	15(%edi),%ebx
1489	xorl	%ecx,%ecx
1490	movl	%ebx,%edx
1491	movb	%dl,%cl
1492	movl	$14,%ebp
1493	shlb	$4,%cl
1494	andl	$240,%edx
1495	movq	8(%esi,%ecx,1),%mm0
1496	movq	(%esi,%ecx,1),%mm1
1497	movd	%mm0,%ebx
1498	jmp	.L006mmx_loop
1499.align	16
1500.L006mmx_loop:
1501	psrlq	$4,%mm0
1502	andl	$15,%ebx
1503	movq	%mm1,%mm2
1504	psrlq	$4,%mm1
1505	pxor	8(%esi,%edx,1),%mm0
1506	movb	(%edi,%ebp,1),%cl
1507	psllq	$60,%mm2
1508	pxor	(%eax,%ebx,8),%mm1
1509	decl	%ebp
1510	movd	%mm0,%ebx
1511	pxor	(%esi,%edx,1),%mm1
1512	movl	%ecx,%edx
1513	pxor	%mm2,%mm0
1514	js	.L007mmx_break
1515	shlb	$4,%cl
1516	andl	$15,%ebx
1517	psrlq	$4,%mm0
1518	andl	$240,%edx
1519	movq	%mm1,%mm2
1520	psrlq	$4,%mm1
1521	pxor	8(%esi,%ecx,1),%mm0
1522	psllq	$60,%mm2
1523	pxor	(%eax,%ebx,8),%mm1
1524	movd	%mm0,%ebx
1525	pxor	(%esi,%ecx,1),%mm1
1526	pxor	%mm2,%mm0
1527	jmp	.L006mmx_loop
1528.align	16
1529.L007mmx_break:
1530	shlb	$4,%cl
1531	andl	$15,%ebx
1532	psrlq	$4,%mm0
1533	andl	$240,%edx
1534	movq	%mm1,%mm2
1535	psrlq	$4,%mm1
1536	pxor	8(%esi,%ecx,1),%mm0
1537	psllq	$60,%mm2
1538	pxor	(%eax,%ebx,8),%mm1
1539	movd	%mm0,%ebx
1540	pxor	(%esi,%ecx,1),%mm1
1541	pxor	%mm2,%mm0
1542	psrlq	$4,%mm0
1543	andl	$15,%ebx
1544	movq	%mm1,%mm2
1545	psrlq	$4,%mm1
1546	pxor	8(%esi,%edx,1),%mm0
1547	psllq	$60,%mm2
1548	pxor	(%eax,%ebx,8),%mm1
1549	movd	%mm0,%ebx
1550	pxor	(%esi,%edx,1),%mm1
1551	pxor	%mm2,%mm0
1552	psrlq	$32,%mm0
1553	movd	%mm1,%edx
1554	psrlq	$32,%mm1
1555	movd	%mm0,%ecx
1556	movd	%mm1,%ebp
1557	bswap	%ebx
1558	bswap	%edx
1559	bswap	%ecx
1560	bswap	%ebp
1561	emms
1562	movl	%ebx,12(%edi)
1563	movl	%edx,4(%edi)
1564	movl	%ecx,8(%edi)
1565	movl	%ebp,(%edi)
1566	popl	%edi
1567	popl	%esi
1568	popl	%ebx
1569	popl	%ebp
1570	ret
1571.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
1572.globl	gcm_ghash_4bit_mmx
1573.type	gcm_ghash_4bit_mmx,@function
1574.align	16
1575gcm_ghash_4bit_mmx:
1576.L_gcm_ghash_4bit_mmx_begin:
1577	pushl	%ebp
1578	pushl	%ebx
1579	pushl	%esi
1580	pushl	%edi
1581	movl	20(%esp),%eax
1582	movl	24(%esp),%ebx
1583	movl	28(%esp),%ecx
1584	movl	32(%esp),%edx
1585	movl	%esp,%ebp
1586	call	.L008pic_point
1587.L008pic_point:
1588	popl	%esi
1589	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
1590	subl	$544,%esp
1591	andl	$-64,%esp
1592	subl	$16,%esp
1593	addl	%ecx,%edx
1594	movl	%eax,544(%esp)
1595	movl	%edx,552(%esp)
1596	movl	%ebp,556(%esp)
1597	addl	$128,%ebx
1598	leal	144(%esp),%edi
1599	leal	400(%esp),%ebp
1600	movl	-120(%ebx),%edx
1601	movq	-120(%ebx),%mm0
1602	movq	-128(%ebx),%mm3
1603	shll	$4,%edx
1604	movb	%dl,(%esp)
1605	movl	-104(%ebx),%edx
1606	movq	-104(%ebx),%mm2
1607	movq	-112(%ebx),%mm5
1608	movq	%mm0,-128(%edi)
1609	psrlq	$4,%mm0
1610	movq	%mm3,(%edi)
1611	movq	%mm3,%mm7
1612	psrlq	$4,%mm3
1613	shll	$4,%edx
1614	movb	%dl,1(%esp)
1615	movl	-88(%ebx),%edx
1616	movq	-88(%ebx),%mm1
1617	psllq	$60,%mm7
1618	movq	-96(%ebx),%mm4
1619	por	%mm7,%mm0
1620	movq	%mm2,-120(%edi)
1621	psrlq	$4,%mm2
1622	movq	%mm5,8(%edi)
1623	movq	%mm5,%mm6
1624	movq	%mm0,-128(%ebp)
1625	psrlq	$4,%mm5
1626	movq	%mm3,(%ebp)
1627	shll	$4,%edx
1628	movb	%dl,2(%esp)
1629	movl	-72(%ebx),%edx
1630	movq	-72(%ebx),%mm0
1631	psllq	$60,%mm6
1632	movq	-80(%ebx),%mm3
1633	por	%mm6,%mm2
1634	movq	%mm1,-112(%edi)
1635	psrlq	$4,%mm1
1636	movq	%mm4,16(%edi)
1637	movq	%mm4,%mm7
1638	movq	%mm2,-120(%ebp)
1639	psrlq	$4,%mm4
1640	movq	%mm5,8(%ebp)
1641	shll	$4,%edx
1642	movb	%dl,3(%esp)
1643	movl	-56(%ebx),%edx
1644	movq	-56(%ebx),%mm2
1645	psllq	$60,%mm7
1646	movq	-64(%ebx),%mm5
1647	por	%mm7,%mm1
1648	movq	%mm0,-104(%edi)
1649	psrlq	$4,%mm0
1650	movq	%mm3,24(%edi)
1651	movq	%mm3,%mm6
1652	movq	%mm1,-112(%ebp)
1653	psrlq	$4,%mm3
1654	movq	%mm4,16(%ebp)
1655	shll	$4,%edx
1656	movb	%dl,4(%esp)
1657	movl	-40(%ebx),%edx
1658	movq	-40(%ebx),%mm1
1659	psllq	$60,%mm6
1660	movq	-48(%ebx),%mm4
1661	por	%mm6,%mm0
1662	movq	%mm2,-96(%edi)
1663	psrlq	$4,%mm2
1664	movq	%mm5,32(%edi)
1665	movq	%mm5,%mm7
1666	movq	%mm0,-104(%ebp)
1667	psrlq	$4,%mm5
1668	movq	%mm3,24(%ebp)
1669	shll	$4,%edx
1670	movb	%dl,5(%esp)
1671	movl	-24(%ebx),%edx
1672	movq	-24(%ebx),%mm0
1673	psllq	$60,%mm7
1674	movq	-32(%ebx),%mm3
1675	por	%mm7,%mm2
1676	movq	%mm1,-88(%edi)
1677	psrlq	$4,%mm1
1678	movq	%mm4,40(%edi)
1679	movq	%mm4,%mm6
1680	movq	%mm2,-96(%ebp)
1681	psrlq	$4,%mm4
1682	movq	%mm5,32(%ebp)
1683	shll	$4,%edx
1684	movb	%dl,6(%esp)
1685	movl	-8(%ebx),%edx
1686	movq	-8(%ebx),%mm2
1687	psllq	$60,%mm6
1688	movq	-16(%ebx),%mm5
1689	por	%mm6,%mm1
1690	movq	%mm0,-80(%edi)
1691	psrlq	$4,%mm0
1692	movq	%mm3,48(%edi)
1693	movq	%mm3,%mm7
1694	movq	%mm1,-88(%ebp)
1695	psrlq	$4,%mm3
1696	movq	%mm4,40(%ebp)
1697	shll	$4,%edx
1698	movb	%dl,7(%esp)
1699	movl	8(%ebx),%edx
1700	movq	8(%ebx),%mm1
1701	psllq	$60,%mm7
1702	movq	(%ebx),%mm4
1703	por	%mm7,%mm0
1704	movq	%mm2,-72(%edi)
1705	psrlq	$4,%mm2
1706	movq	%mm5,56(%edi)
1707	movq	%mm5,%mm6
1708	movq	%mm0,-80(%ebp)
1709	psrlq	$4,%mm5
1710	movq	%mm3,48(%ebp)
1711	shll	$4,%edx
1712	movb	%dl,8(%esp)
1713	movl	24(%ebx),%edx
1714	movq	24(%ebx),%mm0
1715	psllq	$60,%mm6
1716	movq	16(%ebx),%mm3
1717	por	%mm6,%mm2
1718	movq	%mm1,-64(%edi)
1719	psrlq	$4,%mm1
1720	movq	%mm4,64(%edi)
1721	movq	%mm4,%mm7
1722	movq	%mm2,-72(%ebp)
1723	psrlq	$4,%mm4
1724	movq	%mm5,56(%ebp)
1725	shll	$4,%edx
1726	movb	%dl,9(%esp)
1727	movl	40(%ebx),%edx
1728	movq	40(%ebx),%mm2
1729	psllq	$60,%mm7
1730	movq	32(%ebx),%mm5
1731	por	%mm7,%mm1
1732	movq	%mm0,-56(%edi)
1733	psrlq	$4,%mm0
1734	movq	%mm3,72(%edi)
1735	movq	%mm3,%mm6
1736	movq	%mm1,-64(%ebp)
1737	psrlq	$4,%mm3
1738	movq	%mm4,64(%ebp)
1739	shll	$4,%edx
1740	movb	%dl,10(%esp)
1741	movl	56(%ebx),%edx
1742	movq	56(%ebx),%mm1
1743	psllq	$60,%mm6
1744	movq	48(%ebx),%mm4
1745	por	%mm6,%mm0
1746	movq	%mm2,-48(%edi)
1747	psrlq	$4,%mm2
1748	movq	%mm5,80(%edi)
1749	movq	%mm5,%mm7
1750	movq	%mm0,-56(%ebp)
1751	psrlq	$4,%mm5
1752	movq	%mm3,72(%ebp)
1753	shll	$4,%edx
1754	movb	%dl,11(%esp)
1755	movl	72(%ebx),%edx
1756	movq	72(%ebx),%mm0
1757	psllq	$60,%mm7
1758	movq	64(%ebx),%mm3
1759	por	%mm7,%mm2
1760	movq	%mm1,-40(%edi)
1761	psrlq	$4,%mm1
1762	movq	%mm4,88(%edi)
1763	movq	%mm4,%mm6
1764	movq	%mm2,-48(%ebp)
1765	psrlq	$4,%mm4
1766	movq	%mm5,80(%ebp)
1767	shll	$4,%edx
1768	movb	%dl,12(%esp)
1769	movl	88(%ebx),%edx
1770	movq	88(%ebx),%mm2
1771	psllq	$60,%mm6
1772	movq	80(%ebx),%mm5
1773	por	%mm6,%mm1
1774	movq	%mm0,-32(%edi)
1775	psrlq	$4,%mm0
1776	movq	%mm3,96(%edi)
1777	movq	%mm3,%mm7
1778	movq	%mm1,-40(%ebp)
1779	psrlq	$4,%mm3
1780	movq	%mm4,88(%ebp)
1781	shll	$4,%edx
1782	movb	%dl,13(%esp)
1783	movl	104(%ebx),%edx
1784	movq	104(%ebx),%mm1
1785	psllq	$60,%mm7
1786	movq	96(%ebx),%mm4
1787	por	%mm7,%mm0
1788	movq	%mm2,-24(%edi)
1789	psrlq	$4,%mm2
1790	movq	%mm5,104(%edi)
1791	movq	%mm5,%mm6
1792	movq	%mm0,-32(%ebp)
1793	psrlq	$4,%mm5
1794	movq	%mm3,96(%ebp)
1795	shll	$4,%edx
1796	movb	%dl,14(%esp)
1797	movl	120(%ebx),%edx
1798	movq	120(%ebx),%mm0
1799	psllq	$60,%mm6
1800	movq	112(%ebx),%mm3
1801	por	%mm6,%mm2
1802	movq	%mm1,-16(%edi)
1803	psrlq	$4,%mm1
1804	movq	%mm4,112(%edi)
1805	movq	%mm4,%mm7
1806	movq	%mm2,-24(%ebp)
1807	psrlq	$4,%mm4
1808	movq	%mm5,104(%ebp)
1809	shll	$4,%edx
1810	movb	%dl,15(%esp)
1811	psllq	$60,%mm7
1812	por	%mm7,%mm1
1813	movq	%mm0,-8(%edi)
1814	psrlq	$4,%mm0
1815	movq	%mm3,120(%edi)
1816	movq	%mm3,%mm6
1817	movq	%mm1,-16(%ebp)
1818	psrlq	$4,%mm3
1819	movq	%mm4,112(%ebp)
1820	psllq	$60,%mm6
1821	por	%mm6,%mm0
1822	movq	%mm0,-8(%ebp)
1823	movq	%mm3,120(%ebp)
1824	movq	(%eax),%mm6
1825	movl	8(%eax),%ebx
1826	movl	12(%eax),%edx
1827.align	16
1828.L009outer:
1829	xorl	12(%ecx),%edx
1830	xorl	8(%ecx),%ebx
1831	pxor	(%ecx),%mm6
1832	leal	16(%ecx),%ecx
1833	movl	%ebx,536(%esp)
1834	movq	%mm6,528(%esp)
1835	movl	%ecx,548(%esp)
1836	xorl	%eax,%eax
1837	roll	$8,%edx
1838	movb	%dl,%al
1839	movl	%eax,%ebp
1840	andb	$15,%al
1841	shrl	$4,%ebp
1842	pxor	%mm0,%mm0
1843	roll	$8,%edx
1844	pxor	%mm1,%mm1
1845	pxor	%mm2,%mm2
1846	movq	16(%esp,%eax,8),%mm7
1847	movq	144(%esp,%eax,8),%mm6
1848	movb	%dl,%al
1849	movd	%mm7,%ebx
1850	psrlq	$8,%mm7
1851	movq	%mm6,%mm3
1852	movl	%eax,%edi
1853	psrlq	$8,%mm6
1854	pxor	272(%esp,%ebp,8),%mm7
1855	andb	$15,%al
1856	psllq	$56,%mm3
1857	shrl	$4,%edi
1858	pxor	16(%esp,%eax,8),%mm7
1859	roll	$8,%edx
1860	pxor	144(%esp,%eax,8),%mm6
1861	pxor	%mm3,%mm7
1862	pxor	400(%esp,%ebp,8),%mm6
1863	xorb	(%esp,%ebp,1),%bl
1864	movb	%dl,%al
1865	movd	%mm7,%ecx
1866	movzbl	%bl,%ebx
1867	psrlq	$8,%mm7
1868	movq	%mm6,%mm3
1869	movl	%eax,%ebp
1870	psrlq	$8,%mm6
1871	pxor	272(%esp,%edi,8),%mm7
1872	andb	$15,%al
1873	psllq	$56,%mm3
1874	shrl	$4,%ebp
1875	pinsrw	$2,(%esi,%ebx,2),%mm2
1876	pxor	16(%esp,%eax,8),%mm7
1877	roll	$8,%edx
1878	pxor	144(%esp,%eax,8),%mm6
1879	pxor	%mm3,%mm7
1880	pxor	400(%esp,%edi,8),%mm6
1881	xorb	(%esp,%edi,1),%cl
1882	movb	%dl,%al
1883	movl	536(%esp),%edx
1884	movd	%mm7,%ebx
1885	movzbl	%cl,%ecx
1886	psrlq	$8,%mm7
1887	movq	%mm6,%mm3
1888	movl	%eax,%edi
1889	psrlq	$8,%mm6
1890	pxor	272(%esp,%ebp,8),%mm7
1891	andb	$15,%al
1892	psllq	$56,%mm3
1893	pxor	%mm2,%mm6
1894	shrl	$4,%edi
1895	pinsrw	$2,(%esi,%ecx,2),%mm1
1896	pxor	16(%esp,%eax,8),%mm7
1897	roll	$8,%edx
1898	pxor	144(%esp,%eax,8),%mm6
1899	pxor	%mm3,%mm7
1900	pxor	400(%esp,%ebp,8),%mm6
1901	xorb	(%esp,%ebp,1),%bl
1902	movb	%dl,%al
1903	movd	%mm7,%ecx
1904	movzbl	%bl,%ebx
1905	psrlq	$8,%mm7
1906	movq	%mm6,%mm3
1907	movl	%eax,%ebp
1908	psrlq	$8,%mm6
1909	pxor	272(%esp,%edi,8),%mm7
1910	andb	$15,%al
1911	psllq	$56,%mm3
1912	pxor	%mm1,%mm6
1913	shrl	$4,%ebp
1914	pinsrw	$2,(%esi,%ebx,2),%mm0
1915	pxor	16(%esp,%eax,8),%mm7
1916	roll	$8,%edx
1917	pxor	144(%esp,%eax,8),%mm6
1918	pxor	%mm3,%mm7
1919	pxor	400(%esp,%edi,8),%mm6
1920	xorb	(%esp,%edi,1),%cl
1921	movb	%dl,%al
1922	movd	%mm7,%ebx
1923	movzbl	%cl,%ecx
1924	psrlq	$8,%mm7
1925	movq	%mm6,%mm3
1926	movl	%eax,%edi
1927	psrlq	$8,%mm6
1928	pxor	272(%esp,%ebp,8),%mm7
1929	andb	$15,%al
1930	psllq	$56,%mm3
1931	pxor	%mm0,%mm6
1932	shrl	$4,%edi
1933	pinsrw	$2,(%esi,%ecx,2),%mm2
1934	pxor	16(%esp,%eax,8),%mm7
1935	roll	$8,%edx
1936	pxor	144(%esp,%eax,8),%mm6
1937	pxor	%mm3,%mm7
1938	pxor	400(%esp,%ebp,8),%mm6
1939	xorb	(%esp,%ebp,1),%bl
1940	movb	%dl,%al
1941	movd	%mm7,%ecx
1942	movzbl	%bl,%ebx
1943	psrlq	$8,%mm7
1944	movq	%mm6,%mm3
1945	movl	%eax,%ebp
1946	psrlq	$8,%mm6
1947	pxor	272(%esp,%edi,8),%mm7
1948	andb	$15,%al
1949	psllq	$56,%mm3
1950	pxor	%mm2,%mm6
1951	shrl	$4,%ebp
1952	pinsrw	$2,(%esi,%ebx,2),%mm1
1953	pxor	16(%esp,%eax,8),%mm7
1954	roll	$8,%edx
1955	pxor	144(%esp,%eax,8),%mm6
1956	pxor	%mm3,%mm7
1957	pxor	400(%esp,%edi,8),%mm6
1958	xorb	(%esp,%edi,1),%cl
1959	movb	%dl,%al
1960	movl	532(%esp),%edx
1961	movd	%mm7,%ebx
1962	movzbl	%cl,%ecx
1963	psrlq	$8,%mm7
1964	movq	%mm6,%mm3
1965	movl	%eax,%edi
1966	psrlq	$8,%mm6
1967	pxor	272(%esp,%ebp,8),%mm7
1968	andb	$15,%al
1969	psllq	$56,%mm3
1970	pxor	%mm1,%mm6
1971	shrl	$4,%edi
1972	pinsrw	$2,(%esi,%ecx,2),%mm0
1973	pxor	16(%esp,%eax,8),%mm7
1974	roll	$8,%edx
1975	pxor	144(%esp,%eax,8),%mm6
1976	pxor	%mm3,%mm7
1977	pxor	400(%esp,%ebp,8),%mm6
1978	xorb	(%esp,%ebp,1),%bl
1979	movb	%dl,%al
1980	movd	%mm7,%ecx
1981	movzbl	%bl,%ebx
1982	psrlq	$8,%mm7
1983	movq	%mm6,%mm3
1984	movl	%eax,%ebp
1985	psrlq	$8,%mm6
1986	pxor	272(%esp,%edi,8),%mm7
1987	andb	$15,%al
1988	psllq	$56,%mm3
1989	pxor	%mm0,%mm6
1990	shrl	$4,%ebp
1991	pinsrw	$2,(%esi,%ebx,2),%mm2
1992	pxor	16(%esp,%eax,8),%mm7
1993	roll	$8,%edx
1994	pxor	144(%esp,%eax,8),%mm6
1995	pxor	%mm3,%mm7
1996	pxor	400(%esp,%edi,8),%mm6
1997	xorb	(%esp,%edi,1),%cl
1998	movb	%dl,%al
1999	movd	%mm7,%ebx
2000	movzbl	%cl,%ecx
2001	psrlq	$8,%mm7
2002	movq	%mm6,%mm3
2003	movl	%eax,%edi
2004	psrlq	$8,%mm6
2005	pxor	272(%esp,%ebp,8),%mm7
2006	andb	$15,%al
2007	psllq	$56,%mm3
2008	pxor	%mm2,%mm6
2009	shrl	$4,%edi
2010	pinsrw	$2,(%esi,%ecx,2),%mm1
2011	pxor	16(%esp,%eax,8),%mm7
2012	roll	$8,%edx
2013	pxor	144(%esp,%eax,8),%mm6
2014	pxor	%mm3,%mm7
2015	pxor	400(%esp,%ebp,8),%mm6
2016	xorb	(%esp,%ebp,1),%bl
2017	movb	%dl,%al
2018	movd	%mm7,%ecx
2019	movzbl	%bl,%ebx
2020	psrlq	$8,%mm7
2021	movq	%mm6,%mm3
2022	movl	%eax,%ebp
2023	psrlq	$8,%mm6
2024	pxor	272(%esp,%edi,8),%mm7
2025	andb	$15,%al
2026	psllq	$56,%mm3
2027	pxor	%mm1,%mm6
2028	shrl	$4,%ebp
2029	pinsrw	$2,(%esi,%ebx,2),%mm0
2030	pxor	16(%esp,%eax,8),%mm7
2031	roll	$8,%edx
2032	pxor	144(%esp,%eax,8),%mm6
2033	pxor	%mm3,%mm7
2034	pxor	400(%esp,%edi,8),%mm6
2035	xorb	(%esp,%edi,1),%cl
2036	movb	%dl,%al
2037	movl	528(%esp),%edx
2038	movd	%mm7,%ebx
2039	movzbl	%cl,%ecx
2040	psrlq	$8,%mm7
2041	movq	%mm6,%mm3
2042	movl	%eax,%edi
2043	psrlq	$8,%mm6
2044	pxor	272(%esp,%ebp,8),%mm7
2045	andb	$15,%al
2046	psllq	$56,%mm3
2047	pxor	%mm0,%mm6
2048	shrl	$4,%edi
2049	pinsrw	$2,(%esi,%ecx,2),%mm2
2050	pxor	16(%esp,%eax,8),%mm7
2051	roll	$8,%edx
2052	pxor	144(%esp,%eax,8),%mm6
2053	pxor	%mm3,%mm7
2054	pxor	400(%esp,%ebp,8),%mm6
2055	xorb	(%esp,%ebp,1),%bl
2056	movb	%dl,%al
2057	movd	%mm7,%ecx
2058	movzbl	%bl,%ebx
2059	psrlq	$8,%mm7
2060	movq	%mm6,%mm3
2061	movl	%eax,%ebp
2062	psrlq	$8,%mm6
2063	pxor	272(%esp,%edi,8),%mm7
2064	andb	$15,%al
2065	psllq	$56,%mm3
2066	pxor	%mm2,%mm6
2067	shrl	$4,%ebp
2068	pinsrw	$2,(%esi,%ebx,2),%mm1
2069	pxor	16(%esp,%eax,8),%mm7
2070	roll	$8,%edx
2071	pxor	144(%esp,%eax,8),%mm6
2072	pxor	%mm3,%mm7
2073	pxor	400(%esp,%edi,8),%mm6
2074	xorb	(%esp,%edi,1),%cl
2075	movb	%dl,%al
2076	movd	%mm7,%ebx
2077	movzbl	%cl,%ecx
2078	psrlq	$8,%mm7
2079	movq	%mm6,%mm3
2080	movl	%eax,%edi
2081	psrlq	$8,%mm6
2082	pxor	272(%esp,%ebp,8),%mm7
2083	andb	$15,%al
2084	psllq	$56,%mm3
2085	pxor	%mm1,%mm6
2086	shrl	$4,%edi
2087	pinsrw	$2,(%esi,%ecx,2),%mm0
2088	pxor	16(%esp,%eax,8),%mm7
2089	roll	$8,%edx
2090	pxor	144(%esp,%eax,8),%mm6
2091	pxor	%mm3,%mm7
2092	pxor	400(%esp,%ebp,8),%mm6
2093	xorb	(%esp,%ebp,1),%bl
2094	movb	%dl,%al
2095	movd	%mm7,%ecx
2096	movzbl	%bl,%ebx
2097	psrlq	$8,%mm7
2098	movq	%mm6,%mm3
2099	movl	%eax,%ebp
2100	psrlq	$8,%mm6
2101	pxor	272(%esp,%edi,8),%mm7
2102	andb	$15,%al
2103	psllq	$56,%mm3
2104	pxor	%mm0,%mm6
2105	shrl	$4,%ebp
2106	pinsrw	$2,(%esi,%ebx,2),%mm2
2107	pxor	16(%esp,%eax,8),%mm7
2108	roll	$8,%edx
2109	pxor	144(%esp,%eax,8),%mm6
2110	pxor	%mm3,%mm7
2111	pxor	400(%esp,%edi,8),%mm6
2112	xorb	(%esp,%edi,1),%cl
2113	movb	%dl,%al
2114	movl	524(%esp),%edx
2115	movd	%mm7,%ebx
2116	movzbl	%cl,%ecx
2117	psrlq	$8,%mm7
2118	movq	%mm6,%mm3
2119	movl	%eax,%edi
2120	psrlq	$8,%mm6
2121	pxor	272(%esp,%ebp,8),%mm7
2122	andb	$15,%al
2123	psllq	$56,%mm3
2124	pxor	%mm2,%mm6
2125	shrl	$4,%edi
2126	pinsrw	$2,(%esi,%ecx,2),%mm1
2127	pxor	16(%esp,%eax,8),%mm7
2128	pxor	144(%esp,%eax,8),%mm6
2129	xorb	(%esp,%ebp,1),%bl
2130	pxor	%mm3,%mm7
2131	pxor	400(%esp,%ebp,8),%mm6
2132	movzbl	%bl,%ebx
2133	pxor	%mm2,%mm2
2134	psllq	$4,%mm1
2135	movd	%mm7,%ecx
2136	psrlq	$4,%mm7
2137	movq	%mm6,%mm3
2138	psrlq	$4,%mm6
2139	shll	$4,%ecx
2140	pxor	16(%esp,%edi,8),%mm7
2141	psllq	$60,%mm3
2142	movzbl	%cl,%ecx
2143	pxor	%mm3,%mm7
2144	pxor	144(%esp,%edi,8),%mm6
2145	pinsrw	$2,(%esi,%ebx,2),%mm0
2146	pxor	%mm1,%mm6
2147	movd	%mm7,%edx
2148	pinsrw	$3,(%esi,%ecx,2),%mm2
2149	psllq	$12,%mm0
2150	pxor	%mm0,%mm6
2151	psrlq	$32,%mm7
2152	pxor	%mm2,%mm6
2153	movl	548(%esp),%ecx
2154	movd	%mm7,%ebx
2155	movq	%mm6,%mm3
2156	psllw	$8,%mm6
2157	psrlw	$8,%mm3
2158	por	%mm3,%mm6
2159	bswap	%edx
2160	pshufw	$27,%mm6,%mm6
2161	bswap	%ebx
2162	cmpl	552(%esp),%ecx
2163	jne	.L009outer
2164	movl	544(%esp),%eax
2165	movl	%edx,12(%eax)
2166	movl	%ebx,8(%eax)
2167	movq	%mm6,(%eax)
2168	movl	556(%esp),%esp
2169	emms
2170	popl	%edi
2171	popl	%esi
2172	popl	%ebx
2173	popl	%ebp
2174	ret
2175.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
2176.globl	gcm_init_clmul
2177.type	gcm_init_clmul,@function
2178.align	16
2179gcm_init_clmul:
2180.L_gcm_init_clmul_begin:
2181	movl	4(%esp),%edx
2182	movl	8(%esp),%eax
2183	call	.L010pic
2184.L010pic:
2185	popl	%ecx
2186	leal	.Lbswap-.L010pic(%ecx),%ecx
2187	movdqu	(%eax),%xmm2
2188	pshufd	$78,%xmm2,%xmm2
2189	pshufd	$255,%xmm2,%xmm4
2190	movdqa	%xmm2,%xmm3
2191	psllq	$1,%xmm2
2192	pxor	%xmm5,%xmm5
2193	psrlq	$63,%xmm3
2194	pcmpgtd	%xmm4,%xmm5
2195	pslldq	$8,%xmm3
2196	por	%xmm3,%xmm2
2197	pand	16(%ecx),%xmm5
2198	pxor	%xmm5,%xmm2
2199	movdqa	%xmm2,%xmm0
2200	movdqa	%xmm0,%xmm1
2201	pshufd	$78,%xmm0,%xmm3
2202	pshufd	$78,%xmm2,%xmm4
2203	pxor	%xmm0,%xmm3
2204	pxor	%xmm2,%xmm4
2205.byte	102,15,58,68,194,0
2206.byte	102,15,58,68,202,17
2207.byte	102,15,58,68,220,0
2208	xorps	%xmm0,%xmm3
2209	xorps	%xmm1,%xmm3
2210	movdqa	%xmm3,%xmm4
2211	psrldq	$8,%xmm3
2212	pslldq	$8,%xmm4
2213	pxor	%xmm3,%xmm1
2214	pxor	%xmm4,%xmm0
2215	movdqa	%xmm0,%xmm4
2216	movdqa	%xmm0,%xmm3
2217	psllq	$5,%xmm0
2218	pxor	%xmm0,%xmm3
2219	psllq	$1,%xmm0
2220	pxor	%xmm3,%xmm0
2221	psllq	$57,%xmm0
2222	movdqa	%xmm0,%xmm3
2223	pslldq	$8,%xmm0
2224	psrldq	$8,%xmm3
2225	pxor	%xmm4,%xmm0
2226	pxor	%xmm3,%xmm1
2227	movdqa	%xmm0,%xmm4
2228	psrlq	$1,%xmm0
2229	pxor	%xmm4,%xmm1
2230	pxor	%xmm0,%xmm4
2231	psrlq	$5,%xmm0
2232	pxor	%xmm4,%xmm0
2233	psrlq	$1,%xmm0
2234	pxor	%xmm1,%xmm0
2235	pshufd	$78,%xmm2,%xmm3
2236	pshufd	$78,%xmm0,%xmm4
2237	pxor	%xmm2,%xmm3
2238	movdqu	%xmm2,(%edx)
2239	pxor	%xmm0,%xmm4
2240	movdqu	%xmm0,16(%edx)
2241.byte	102,15,58,15,227,8
2242	movdqu	%xmm4,32(%edx)
2243	ret
2244.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
2245.globl	gcm_gmult_clmul
2246.type	gcm_gmult_clmul,@function
2247.align	16
2248gcm_gmult_clmul:
2249.L_gcm_gmult_clmul_begin:
2250	movl	4(%esp),%eax
2251	movl	8(%esp),%edx
2252	call	.L011pic
2253.L011pic:
2254	popl	%ecx
2255	leal	.Lbswap-.L011pic(%ecx),%ecx
2256	movdqu	(%eax),%xmm0
2257	movdqa	(%ecx),%xmm5
2258	movups	(%edx),%xmm2
2259.byte	102,15,56,0,197
2260	movups	32(%edx),%xmm4
2261	movdqa	%xmm0,%xmm1
2262	pshufd	$78,%xmm0,%xmm3
2263	pxor	%xmm0,%xmm3
2264.byte	102,15,58,68,194,0
2265.byte	102,15,58,68,202,17
2266.byte	102,15,58,68,220,0
2267	xorps	%xmm0,%xmm3
2268	xorps	%xmm1,%xmm3
2269	movdqa	%xmm3,%xmm4
2270	psrldq	$8,%xmm3
2271	pslldq	$8,%xmm4
2272	pxor	%xmm3,%xmm1
2273	pxor	%xmm4,%xmm0
2274	movdqa	%xmm0,%xmm4
2275	movdqa	%xmm0,%xmm3
2276	psllq	$5,%xmm0
2277	pxor	%xmm0,%xmm3
2278	psllq	$1,%xmm0
2279	pxor	%xmm3,%xmm0
2280	psllq	$57,%xmm0
2281	movdqa	%xmm0,%xmm3
2282	pslldq	$8,%xmm0
2283	psrldq	$8,%xmm3
2284	pxor	%xmm4,%xmm0
2285	pxor	%xmm3,%xmm1
2286	movdqa	%xmm0,%xmm4
2287	psrlq	$1,%xmm0
2288	pxor	%xmm4,%xmm1
2289	pxor	%xmm0,%xmm4
2290	psrlq	$5,%xmm0
2291	pxor	%xmm4,%xmm0
2292	psrlq	$1,%xmm0
2293	pxor	%xmm1,%xmm0
2294.byte	102,15,56,0,197
2295	movdqu	%xmm0,(%eax)
2296	ret
2297.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
2298.globl	gcm_ghash_clmul
2299.type	gcm_ghash_clmul,@function
2300.align	16
2301gcm_ghash_clmul:
2302.L_gcm_ghash_clmul_begin:
2303	pushl	%ebp
2304	pushl	%ebx
2305	pushl	%esi
2306	pushl	%edi
2307	movl	20(%esp),%eax
2308	movl	24(%esp),%edx
2309	movl	28(%esp),%esi
2310	movl	32(%esp),%ebx
2311	call	.L012pic
2312.L012pic:
2313	popl	%ecx
2314	leal	.Lbswap-.L012pic(%ecx),%ecx
2315	movdqu	(%eax),%xmm0
2316	movdqa	(%ecx),%xmm5
2317	movdqu	(%edx),%xmm2
2318.byte	102,15,56,0,197
2319	subl	$16,%ebx
2320	jz	.L013odd_tail
2321	movdqu	(%esi),%xmm3
2322	movdqu	16(%esi),%xmm6
2323.byte	102,15,56,0,221
2324.byte	102,15,56,0,245
2325	movdqu	32(%edx),%xmm5
2326	pxor	%xmm3,%xmm0
2327	pshufd	$78,%xmm6,%xmm3
2328	movdqa	%xmm6,%xmm7
2329	pxor	%xmm6,%xmm3
2330	leal	32(%esi),%esi
2331.byte	102,15,58,68,242,0
2332.byte	102,15,58,68,250,17
2333.byte	102,15,58,68,221,0
2334	movups	16(%edx),%xmm2
2335	nop
2336	subl	$32,%ebx
2337	jbe	.L014even_tail
2338	jmp	.L015mod_loop
2339.align	32
2340.L015mod_loop:
2341	pshufd	$78,%xmm0,%xmm4
2342	movdqa	%xmm0,%xmm1
2343	pxor	%xmm0,%xmm4
2344	nop
2345.byte	102,15,58,68,194,0
2346.byte	102,15,58,68,202,17
2347.byte	102,15,58,68,229,16
2348	movups	(%edx),%xmm2
2349	xorps	%xmm6,%xmm0
2350	movdqa	(%ecx),%xmm5
2351	xorps	%xmm7,%xmm1
2352	movdqu	(%esi),%xmm7
2353	pxor	%xmm0,%xmm3
2354	movdqu	16(%esi),%xmm6
2355	pxor	%xmm1,%xmm3
2356.byte	102,15,56,0,253
2357	pxor	%xmm3,%xmm4
2358	movdqa	%xmm4,%xmm3
2359	psrldq	$8,%xmm4
2360	pslldq	$8,%xmm3
2361	pxor	%xmm4,%xmm1
2362	pxor	%xmm3,%xmm0
2363.byte	102,15,56,0,245
2364	pxor	%xmm7,%xmm1
2365	movdqa	%xmm6,%xmm7
2366	movdqa	%xmm0,%xmm4
2367	movdqa	%xmm0,%xmm3
2368	psllq	$5,%xmm0
2369	pxor	%xmm0,%xmm3
2370	psllq	$1,%xmm0
2371	pxor	%xmm3,%xmm0
2372.byte	102,15,58,68,242,0
2373	movups	32(%edx),%xmm5
2374	psllq	$57,%xmm0
2375	movdqa	%xmm0,%xmm3
2376	pslldq	$8,%xmm0
2377	psrldq	$8,%xmm3
2378	pxor	%xmm4,%xmm0
2379	pxor	%xmm3,%xmm1
2380	pshufd	$78,%xmm7,%xmm3
2381	movdqa	%xmm0,%xmm4
2382	psrlq	$1,%xmm0
2383	pxor	%xmm7,%xmm3
2384	pxor	%xmm4,%xmm1
2385.byte	102,15,58,68,250,17
2386	movups	16(%edx),%xmm2
2387	pxor	%xmm0,%xmm4
2388	psrlq	$5,%xmm0
2389	pxor	%xmm4,%xmm0
2390	psrlq	$1,%xmm0
2391	pxor	%xmm1,%xmm0
2392.byte	102,15,58,68,221,0
2393	leal	32(%esi),%esi
2394	subl	$32,%ebx
2395	ja	.L015mod_loop
2396.L014even_tail:
2397	pshufd	$78,%xmm0,%xmm4
2398	movdqa	%xmm0,%xmm1
2399	pxor	%xmm0,%xmm4
2400.byte	102,15,58,68,194,0
2401.byte	102,15,58,68,202,17
2402.byte	102,15,58,68,229,16
2403	movdqa	(%ecx),%xmm5
2404	xorps	%xmm6,%xmm0
2405	xorps	%xmm7,%xmm1
2406	pxor	%xmm0,%xmm3
2407	pxor	%xmm1,%xmm3
2408	pxor	%xmm3,%xmm4
2409	movdqa	%xmm4,%xmm3
2410	psrldq	$8,%xmm4
2411	pslldq	$8,%xmm3
2412	pxor	%xmm4,%xmm1
2413	pxor	%xmm3,%xmm0
2414	movdqa	%xmm0,%xmm4
2415	movdqa	%xmm0,%xmm3
2416	psllq	$5,%xmm0
2417	pxor	%xmm0,%xmm3
2418	psllq	$1,%xmm0
2419	pxor	%xmm3,%xmm0
2420	psllq	$57,%xmm0
2421	movdqa	%xmm0,%xmm3
2422	pslldq	$8,%xmm0
2423	psrldq	$8,%xmm3
2424	pxor	%xmm4,%xmm0
2425	pxor	%xmm3,%xmm1
2426	movdqa	%xmm0,%xmm4
2427	psrlq	$1,%xmm0
2428	pxor	%xmm4,%xmm1
2429	pxor	%xmm0,%xmm4
2430	psrlq	$5,%xmm0
2431	pxor	%xmm4,%xmm0
2432	psrlq	$1,%xmm0
2433	pxor	%xmm1,%xmm0
2434	testl	%ebx,%ebx
2435	jnz	.L016done
2436	movups	(%edx),%xmm2
2437.L013odd_tail:
2438	movdqu	(%esi),%xmm3
2439.byte	102,15,56,0,221
2440	pxor	%xmm3,%xmm0
2441	movdqa	%xmm0,%xmm1
2442	pshufd	$78,%xmm0,%xmm3
2443	pshufd	$78,%xmm2,%xmm4
2444	pxor	%xmm0,%xmm3
2445	pxor	%xmm2,%xmm4
2446.byte	102,15,58,68,194,0
2447.byte	102,15,58,68,202,17
2448.byte	102,15,58,68,220,0
2449	xorps	%xmm0,%xmm3
2450	xorps	%xmm1,%xmm3
2451	movdqa	%xmm3,%xmm4
2452	psrldq	$8,%xmm3
2453	pslldq	$8,%xmm4
2454	pxor	%xmm3,%xmm1
2455	pxor	%xmm4,%xmm0
2456	movdqa	%xmm0,%xmm4
2457	movdqa	%xmm0,%xmm3
2458	psllq	$5,%xmm0
2459	pxor	%xmm0,%xmm3
2460	psllq	$1,%xmm0
2461	pxor	%xmm3,%xmm0
2462	psllq	$57,%xmm0
2463	movdqa	%xmm0,%xmm3
2464	pslldq	$8,%xmm0
2465	psrldq	$8,%xmm3
2466	pxor	%xmm4,%xmm0
2467	pxor	%xmm3,%xmm1
2468	movdqa	%xmm0,%xmm4
2469	psrlq	$1,%xmm0
2470	pxor	%xmm4,%xmm1
2471	pxor	%xmm0,%xmm4
2472	psrlq	$5,%xmm0
2473	pxor	%xmm4,%xmm0
2474	psrlq	$1,%xmm0
2475	pxor	%xmm1,%xmm0
2476.L016done:
2477.byte	102,15,56,0,197
2478	movdqu	%xmm0,(%eax)
2479	popl	%edi
2480	popl	%esi
2481	popl	%ebx
2482	popl	%ebp
2483	ret
2484.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
2485.align	64
2486.Lbswap:
2487.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2488.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
2489.align	64
2490.Lrem_8bit:
2491.value	0,450,900,582,1800,1738,1164,1358
2492.value	3600,4050,3476,3158,2328,2266,2716,2910
2493.value	7200,7650,8100,7782,6952,6890,6316,6510
2494.value	4656,5106,4532,4214,5432,5370,5820,6014
2495.value	14400,14722,15300,14854,16200,16010,15564,15630
2496.value	13904,14226,13780,13334,12632,12442,13020,13086
2497.value	9312,9634,10212,9766,9064,8874,8428,8494
2498.value	10864,11186,10740,10294,11640,11450,12028,12094
2499.value	28800,28994,29444,29382,30600,30282,29708,30158
2500.value	32400,32594,32020,31958,31128,30810,31260,31710
2501.value	27808,28002,28452,28390,27560,27242,26668,27118
2502.value	25264,25458,24884,24822,26040,25722,26172,26622
2503.value	18624,18690,19268,19078,20424,19978,19532,19854
2504.value	18128,18194,17748,17558,16856,16410,16988,17310
2505.value	21728,21794,22372,22182,21480,21034,20588,20910
2506.value	23280,23346,22900,22710,24056,23610,24188,24510
2507.value	57600,57538,57988,58182,58888,59338,58764,58446
2508.value	61200,61138,60564,60758,59416,59866,60316,59998
2509.value	64800,64738,65188,65382,64040,64490,63916,63598
2510.value	62256,62194,61620,61814,62520,62970,63420,63102
2511.value	55616,55426,56004,56070,56904,57226,56780,56334
2512.value	55120,54930,54484,54550,53336,53658,54236,53790
2513.value	50528,50338,50916,50982,49768,50090,49644,49198
2514.value	52080,51890,51444,51510,52344,52666,53244,52798
2515.value	37248,36930,37380,37830,38536,38730,38156,38094
2516.value	40848,40530,39956,40406,39064,39258,39708,39646
2517.value	36256,35938,36388,36838,35496,35690,35116,35054
2518.value	33712,33394,32820,33270,33976,34170,34620,34558
2519.value	43456,43010,43588,43910,44744,44810,44364,44174
2520.value	42960,42514,42068,42390,41176,41242,41820,41630
2521.value	46560,46114,46692,47014,45800,45866,45420,45230
2522.value	48112,47666,47220,47542,48376,48442,49020,48830
2523.align	64
2524.Lrem_4bit:
2525.long	0,0,0,471859200,0,943718400,0,610271232
2526.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
2527.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
2528.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
2529.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
2530.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
2531.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
2532.byte	0
2533#endif
2534