xref: /freebsd/sys/crypto/openssl/i386/ghash-x86.S (revision d0b2dbfa0ecf2bbc9709efc5e20baf8e4b44bbbf)
1/* Do not modify. This file is auto-generated from ghash-x86.pl. */
2#ifdef PIC
3.text
4.globl	gcm_gmult_4bit_x86
5.type	gcm_gmult_4bit_x86,@function
6.align	16
7gcm_gmult_4bit_x86:
8.L_gcm_gmult_4bit_x86_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	subl	$84,%esp
14	movl	104(%esp),%edi
15	movl	108(%esp),%esi
16	movl	(%edi),%ebp
17	movl	4(%edi),%edx
18	movl	8(%edi),%ecx
19	movl	12(%edi),%ebx
20	movl	$0,16(%esp)
21	movl	$471859200,20(%esp)
22	movl	$943718400,24(%esp)
23	movl	$610271232,28(%esp)
24	movl	$1887436800,32(%esp)
25	movl	$1822425088,36(%esp)
26	movl	$1220542464,40(%esp)
27	movl	$1423966208,44(%esp)
28	movl	$3774873600,48(%esp)
29	movl	$4246732800,52(%esp)
30	movl	$3644850176,56(%esp)
31	movl	$3311403008,60(%esp)
32	movl	$2441084928,64(%esp)
33	movl	$2376073216,68(%esp)
34	movl	$2847932416,72(%esp)
35	movl	$3051356160,76(%esp)
36	movl	%ebp,(%esp)
37	movl	%edx,4(%esp)
38	movl	%ecx,8(%esp)
39	movl	%ebx,12(%esp)
40	shrl	$20,%ebx
41	andl	$240,%ebx
42	movl	4(%esi,%ebx,1),%ebp
43	movl	(%esi,%ebx,1),%edx
44	movl	12(%esi,%ebx,1),%ecx
45	movl	8(%esi,%ebx,1),%ebx
46	xorl	%eax,%eax
47	movl	$15,%edi
48	jmp	.L000x86_loop
49.align	16
50.L000x86_loop:
51	movb	%bl,%al
52	shrdl	$4,%ecx,%ebx
53	andb	$15,%al
54	shrdl	$4,%edx,%ecx
55	shrdl	$4,%ebp,%edx
56	shrl	$4,%ebp
57	xorl	16(%esp,%eax,4),%ebp
58	movb	(%esp,%edi,1),%al
59	andb	$240,%al
60	xorl	8(%esi,%eax,1),%ebx
61	xorl	12(%esi,%eax,1),%ecx
62	xorl	(%esi,%eax,1),%edx
63	xorl	4(%esi,%eax,1),%ebp
64	decl	%edi
65	js	.L001x86_break
66	movb	%bl,%al
67	shrdl	$4,%ecx,%ebx
68	andb	$15,%al
69	shrdl	$4,%edx,%ecx
70	shrdl	$4,%ebp,%edx
71	shrl	$4,%ebp
72	xorl	16(%esp,%eax,4),%ebp
73	movb	(%esp,%edi,1),%al
74	shlb	$4,%al
75	xorl	8(%esi,%eax,1),%ebx
76	xorl	12(%esi,%eax,1),%ecx
77	xorl	(%esi,%eax,1),%edx
78	xorl	4(%esi,%eax,1),%ebp
79	jmp	.L000x86_loop
80.align	16
81.L001x86_break:
82	bswap	%ebx
83	bswap	%ecx
84	bswap	%edx
85	bswap	%ebp
86	movl	104(%esp),%edi
87	movl	%ebx,12(%edi)
88	movl	%ecx,8(%edi)
89	movl	%edx,4(%edi)
90	movl	%ebp,(%edi)
91	addl	$84,%esp
92	popl	%edi
93	popl	%esi
94	popl	%ebx
95	popl	%ebp
96	ret
97.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
98.globl	gcm_ghash_4bit_x86
99.type	gcm_ghash_4bit_x86,@function
100.align	16
101gcm_ghash_4bit_x86:
102.L_gcm_ghash_4bit_x86_begin:
103	pushl	%ebp
104	pushl	%ebx
105	pushl	%esi
106	pushl	%edi
107	subl	$84,%esp
108	movl	104(%esp),%ebx
109	movl	108(%esp),%esi
110	movl	112(%esp),%edi
111	movl	116(%esp),%ecx
112	addl	%edi,%ecx
113	movl	%ecx,116(%esp)
114	movl	(%ebx),%ebp
115	movl	4(%ebx),%edx
116	movl	8(%ebx),%ecx
117	movl	12(%ebx),%ebx
118	movl	$0,16(%esp)
119	movl	$471859200,20(%esp)
120	movl	$943718400,24(%esp)
121	movl	$610271232,28(%esp)
122	movl	$1887436800,32(%esp)
123	movl	$1822425088,36(%esp)
124	movl	$1220542464,40(%esp)
125	movl	$1423966208,44(%esp)
126	movl	$3774873600,48(%esp)
127	movl	$4246732800,52(%esp)
128	movl	$3644850176,56(%esp)
129	movl	$3311403008,60(%esp)
130	movl	$2441084928,64(%esp)
131	movl	$2376073216,68(%esp)
132	movl	$2847932416,72(%esp)
133	movl	$3051356160,76(%esp)
134.align	16
135.L002x86_outer_loop:
136	xorl	12(%edi),%ebx
137	xorl	8(%edi),%ecx
138	xorl	4(%edi),%edx
139	xorl	(%edi),%ebp
140	movl	%ebx,12(%esp)
141	movl	%ecx,8(%esp)
142	movl	%edx,4(%esp)
143	movl	%ebp,(%esp)
144	shrl	$20,%ebx
145	andl	$240,%ebx
146	movl	4(%esi,%ebx,1),%ebp
147	movl	(%esi,%ebx,1),%edx
148	movl	12(%esi,%ebx,1),%ecx
149	movl	8(%esi,%ebx,1),%ebx
150	xorl	%eax,%eax
151	movl	$15,%edi
152	jmp	.L003x86_loop
153.align	16
154.L003x86_loop:
155	movb	%bl,%al
156	shrdl	$4,%ecx,%ebx
157	andb	$15,%al
158	shrdl	$4,%edx,%ecx
159	shrdl	$4,%ebp,%edx
160	shrl	$4,%ebp
161	xorl	16(%esp,%eax,4),%ebp
162	movb	(%esp,%edi,1),%al
163	andb	$240,%al
164	xorl	8(%esi,%eax,1),%ebx
165	xorl	12(%esi,%eax,1),%ecx
166	xorl	(%esi,%eax,1),%edx
167	xorl	4(%esi,%eax,1),%ebp
168	decl	%edi
169	js	.L004x86_break
170	movb	%bl,%al
171	shrdl	$4,%ecx,%ebx
172	andb	$15,%al
173	shrdl	$4,%edx,%ecx
174	shrdl	$4,%ebp,%edx
175	shrl	$4,%ebp
176	xorl	16(%esp,%eax,4),%ebp
177	movb	(%esp,%edi,1),%al
178	shlb	$4,%al
179	xorl	8(%esi,%eax,1),%ebx
180	xorl	12(%esi,%eax,1),%ecx
181	xorl	(%esi,%eax,1),%edx
182	xorl	4(%esi,%eax,1),%ebp
183	jmp	.L003x86_loop
184.align	16
185.L004x86_break:
186	bswap	%ebx
187	bswap	%ecx
188	bswap	%edx
189	bswap	%ebp
190	movl	112(%esp),%edi
191	leal	16(%edi),%edi
192	cmpl	116(%esp),%edi
193	movl	%edi,112(%esp)
194	jb	.L002x86_outer_loop
195	movl	104(%esp),%edi
196	movl	%ebx,12(%edi)
197	movl	%ecx,8(%edi)
198	movl	%edx,4(%edi)
199	movl	%ebp,(%edi)
200	addl	$84,%esp
201	popl	%edi
202	popl	%esi
203	popl	%ebx
204	popl	%ebp
205	ret
206.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
207.globl	gcm_gmult_4bit_mmx
208.type	gcm_gmult_4bit_mmx,@function
209.align	16
210gcm_gmult_4bit_mmx:
211.L_gcm_gmult_4bit_mmx_begin:
212	pushl	%ebp
213	pushl	%ebx
214	pushl	%esi
215	pushl	%edi
216	movl	20(%esp),%edi
217	movl	24(%esp),%esi
218	call	.L005pic_point
219.L005pic_point:
220	popl	%eax
221	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
222	movzbl	15(%edi),%ebx
223	xorl	%ecx,%ecx
224	movl	%ebx,%edx
225	movb	%dl,%cl
226	movl	$14,%ebp
227	shlb	$4,%cl
228	andl	$240,%edx
229	movq	8(%esi,%ecx,1),%mm0
230	movq	(%esi,%ecx,1),%mm1
231	movd	%mm0,%ebx
232	jmp	.L006mmx_loop
233.align	16
234.L006mmx_loop:
235	psrlq	$4,%mm0
236	andl	$15,%ebx
237	movq	%mm1,%mm2
238	psrlq	$4,%mm1
239	pxor	8(%esi,%edx,1),%mm0
240	movb	(%edi,%ebp,1),%cl
241	psllq	$60,%mm2
242	pxor	(%eax,%ebx,8),%mm1
243	decl	%ebp
244	movd	%mm0,%ebx
245	pxor	(%esi,%edx,1),%mm1
246	movl	%ecx,%edx
247	pxor	%mm2,%mm0
248	js	.L007mmx_break
249	shlb	$4,%cl
250	andl	$15,%ebx
251	psrlq	$4,%mm0
252	andl	$240,%edx
253	movq	%mm1,%mm2
254	psrlq	$4,%mm1
255	pxor	8(%esi,%ecx,1),%mm0
256	psllq	$60,%mm2
257	pxor	(%eax,%ebx,8),%mm1
258	movd	%mm0,%ebx
259	pxor	(%esi,%ecx,1),%mm1
260	pxor	%mm2,%mm0
261	jmp	.L006mmx_loop
262.align	16
263.L007mmx_break:
264	shlb	$4,%cl
265	andl	$15,%ebx
266	psrlq	$4,%mm0
267	andl	$240,%edx
268	movq	%mm1,%mm2
269	psrlq	$4,%mm1
270	pxor	8(%esi,%ecx,1),%mm0
271	psllq	$60,%mm2
272	pxor	(%eax,%ebx,8),%mm1
273	movd	%mm0,%ebx
274	pxor	(%esi,%ecx,1),%mm1
275	pxor	%mm2,%mm0
276	psrlq	$4,%mm0
277	andl	$15,%ebx
278	movq	%mm1,%mm2
279	psrlq	$4,%mm1
280	pxor	8(%esi,%edx,1),%mm0
281	psllq	$60,%mm2
282	pxor	(%eax,%ebx,8),%mm1
283	movd	%mm0,%ebx
284	pxor	(%esi,%edx,1),%mm1
285	pxor	%mm2,%mm0
286	psrlq	$32,%mm0
287	movd	%mm1,%edx
288	psrlq	$32,%mm1
289	movd	%mm0,%ecx
290	movd	%mm1,%ebp
291	bswap	%ebx
292	bswap	%edx
293	bswap	%ecx
294	bswap	%ebp
295	emms
296	movl	%ebx,12(%edi)
297	movl	%edx,4(%edi)
298	movl	%ecx,8(%edi)
299	movl	%ebp,(%edi)
300	popl	%edi
301	popl	%esi
302	popl	%ebx
303	popl	%ebp
304	ret
305.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
306.globl	gcm_ghash_4bit_mmx
307.type	gcm_ghash_4bit_mmx,@function
308.align	16
309gcm_ghash_4bit_mmx:
310.L_gcm_ghash_4bit_mmx_begin:
311	pushl	%ebp
312	pushl	%ebx
313	pushl	%esi
314	pushl	%edi
315	movl	20(%esp),%eax
316	movl	24(%esp),%ebx
317	movl	28(%esp),%ecx
318	movl	32(%esp),%edx
319	movl	%esp,%ebp
320	call	.L008pic_point
321.L008pic_point:
322	popl	%esi
323	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
324	subl	$544,%esp
325	andl	$-64,%esp
326	subl	$16,%esp
327	addl	%ecx,%edx
328	movl	%eax,544(%esp)
329	movl	%edx,552(%esp)
330	movl	%ebp,556(%esp)
331	addl	$128,%ebx
332	leal	144(%esp),%edi
333	leal	400(%esp),%ebp
334	movl	-120(%ebx),%edx
335	movq	-120(%ebx),%mm0
336	movq	-128(%ebx),%mm3
337	shll	$4,%edx
338	movb	%dl,(%esp)
339	movl	-104(%ebx),%edx
340	movq	-104(%ebx),%mm2
341	movq	-112(%ebx),%mm5
342	movq	%mm0,-128(%edi)
343	psrlq	$4,%mm0
344	movq	%mm3,(%edi)
345	movq	%mm3,%mm7
346	psrlq	$4,%mm3
347	shll	$4,%edx
348	movb	%dl,1(%esp)
349	movl	-88(%ebx),%edx
350	movq	-88(%ebx),%mm1
351	psllq	$60,%mm7
352	movq	-96(%ebx),%mm4
353	por	%mm7,%mm0
354	movq	%mm2,-120(%edi)
355	psrlq	$4,%mm2
356	movq	%mm5,8(%edi)
357	movq	%mm5,%mm6
358	movq	%mm0,-128(%ebp)
359	psrlq	$4,%mm5
360	movq	%mm3,(%ebp)
361	shll	$4,%edx
362	movb	%dl,2(%esp)
363	movl	-72(%ebx),%edx
364	movq	-72(%ebx),%mm0
365	psllq	$60,%mm6
366	movq	-80(%ebx),%mm3
367	por	%mm6,%mm2
368	movq	%mm1,-112(%edi)
369	psrlq	$4,%mm1
370	movq	%mm4,16(%edi)
371	movq	%mm4,%mm7
372	movq	%mm2,-120(%ebp)
373	psrlq	$4,%mm4
374	movq	%mm5,8(%ebp)
375	shll	$4,%edx
376	movb	%dl,3(%esp)
377	movl	-56(%ebx),%edx
378	movq	-56(%ebx),%mm2
379	psllq	$60,%mm7
380	movq	-64(%ebx),%mm5
381	por	%mm7,%mm1
382	movq	%mm0,-104(%edi)
383	psrlq	$4,%mm0
384	movq	%mm3,24(%edi)
385	movq	%mm3,%mm6
386	movq	%mm1,-112(%ebp)
387	psrlq	$4,%mm3
388	movq	%mm4,16(%ebp)
389	shll	$4,%edx
390	movb	%dl,4(%esp)
391	movl	-40(%ebx),%edx
392	movq	-40(%ebx),%mm1
393	psllq	$60,%mm6
394	movq	-48(%ebx),%mm4
395	por	%mm6,%mm0
396	movq	%mm2,-96(%edi)
397	psrlq	$4,%mm2
398	movq	%mm5,32(%edi)
399	movq	%mm5,%mm7
400	movq	%mm0,-104(%ebp)
401	psrlq	$4,%mm5
402	movq	%mm3,24(%ebp)
403	shll	$4,%edx
404	movb	%dl,5(%esp)
405	movl	-24(%ebx),%edx
406	movq	-24(%ebx),%mm0
407	psllq	$60,%mm7
408	movq	-32(%ebx),%mm3
409	por	%mm7,%mm2
410	movq	%mm1,-88(%edi)
411	psrlq	$4,%mm1
412	movq	%mm4,40(%edi)
413	movq	%mm4,%mm6
414	movq	%mm2,-96(%ebp)
415	psrlq	$4,%mm4
416	movq	%mm5,32(%ebp)
417	shll	$4,%edx
418	movb	%dl,6(%esp)
419	movl	-8(%ebx),%edx
420	movq	-8(%ebx),%mm2
421	psllq	$60,%mm6
422	movq	-16(%ebx),%mm5
423	por	%mm6,%mm1
424	movq	%mm0,-80(%edi)
425	psrlq	$4,%mm0
426	movq	%mm3,48(%edi)
427	movq	%mm3,%mm7
428	movq	%mm1,-88(%ebp)
429	psrlq	$4,%mm3
430	movq	%mm4,40(%ebp)
431	shll	$4,%edx
432	movb	%dl,7(%esp)
433	movl	8(%ebx),%edx
434	movq	8(%ebx),%mm1
435	psllq	$60,%mm7
436	movq	(%ebx),%mm4
437	por	%mm7,%mm0
438	movq	%mm2,-72(%edi)
439	psrlq	$4,%mm2
440	movq	%mm5,56(%edi)
441	movq	%mm5,%mm6
442	movq	%mm0,-80(%ebp)
443	psrlq	$4,%mm5
444	movq	%mm3,48(%ebp)
445	shll	$4,%edx
446	movb	%dl,8(%esp)
447	movl	24(%ebx),%edx
448	movq	24(%ebx),%mm0
449	psllq	$60,%mm6
450	movq	16(%ebx),%mm3
451	por	%mm6,%mm2
452	movq	%mm1,-64(%edi)
453	psrlq	$4,%mm1
454	movq	%mm4,64(%edi)
455	movq	%mm4,%mm7
456	movq	%mm2,-72(%ebp)
457	psrlq	$4,%mm4
458	movq	%mm5,56(%ebp)
459	shll	$4,%edx
460	movb	%dl,9(%esp)
461	movl	40(%ebx),%edx
462	movq	40(%ebx),%mm2
463	psllq	$60,%mm7
464	movq	32(%ebx),%mm5
465	por	%mm7,%mm1
466	movq	%mm0,-56(%edi)
467	psrlq	$4,%mm0
468	movq	%mm3,72(%edi)
469	movq	%mm3,%mm6
470	movq	%mm1,-64(%ebp)
471	psrlq	$4,%mm3
472	movq	%mm4,64(%ebp)
473	shll	$4,%edx
474	movb	%dl,10(%esp)
475	movl	56(%ebx),%edx
476	movq	56(%ebx),%mm1
477	psllq	$60,%mm6
478	movq	48(%ebx),%mm4
479	por	%mm6,%mm0
480	movq	%mm2,-48(%edi)
481	psrlq	$4,%mm2
482	movq	%mm5,80(%edi)
483	movq	%mm5,%mm7
484	movq	%mm0,-56(%ebp)
485	psrlq	$4,%mm5
486	movq	%mm3,72(%ebp)
487	shll	$4,%edx
488	movb	%dl,11(%esp)
489	movl	72(%ebx),%edx
490	movq	72(%ebx),%mm0
491	psllq	$60,%mm7
492	movq	64(%ebx),%mm3
493	por	%mm7,%mm2
494	movq	%mm1,-40(%edi)
495	psrlq	$4,%mm1
496	movq	%mm4,88(%edi)
497	movq	%mm4,%mm6
498	movq	%mm2,-48(%ebp)
499	psrlq	$4,%mm4
500	movq	%mm5,80(%ebp)
501	shll	$4,%edx
502	movb	%dl,12(%esp)
503	movl	88(%ebx),%edx
504	movq	88(%ebx),%mm2
505	psllq	$60,%mm6
506	movq	80(%ebx),%mm5
507	por	%mm6,%mm1
508	movq	%mm0,-32(%edi)
509	psrlq	$4,%mm0
510	movq	%mm3,96(%edi)
511	movq	%mm3,%mm7
512	movq	%mm1,-40(%ebp)
513	psrlq	$4,%mm3
514	movq	%mm4,88(%ebp)
515	shll	$4,%edx
516	movb	%dl,13(%esp)
517	movl	104(%ebx),%edx
518	movq	104(%ebx),%mm1
519	psllq	$60,%mm7
520	movq	96(%ebx),%mm4
521	por	%mm7,%mm0
522	movq	%mm2,-24(%edi)
523	psrlq	$4,%mm2
524	movq	%mm5,104(%edi)
525	movq	%mm5,%mm6
526	movq	%mm0,-32(%ebp)
527	psrlq	$4,%mm5
528	movq	%mm3,96(%ebp)
529	shll	$4,%edx
530	movb	%dl,14(%esp)
531	movl	120(%ebx),%edx
532	movq	120(%ebx),%mm0
533	psllq	$60,%mm6
534	movq	112(%ebx),%mm3
535	por	%mm6,%mm2
536	movq	%mm1,-16(%edi)
537	psrlq	$4,%mm1
538	movq	%mm4,112(%edi)
539	movq	%mm4,%mm7
540	movq	%mm2,-24(%ebp)
541	psrlq	$4,%mm4
542	movq	%mm5,104(%ebp)
543	shll	$4,%edx
544	movb	%dl,15(%esp)
545	psllq	$60,%mm7
546	por	%mm7,%mm1
547	movq	%mm0,-8(%edi)
548	psrlq	$4,%mm0
549	movq	%mm3,120(%edi)
550	movq	%mm3,%mm6
551	movq	%mm1,-16(%ebp)
552	psrlq	$4,%mm3
553	movq	%mm4,112(%ebp)
554	psllq	$60,%mm6
555	por	%mm6,%mm0
556	movq	%mm0,-8(%ebp)
557	movq	%mm3,120(%ebp)
558	movq	(%eax),%mm6
559	movl	8(%eax),%ebx
560	movl	12(%eax),%edx
561.align	16
562.L009outer:
563	xorl	12(%ecx),%edx
564	xorl	8(%ecx),%ebx
565	pxor	(%ecx),%mm6
566	leal	16(%ecx),%ecx
567	movl	%ebx,536(%esp)
568	movq	%mm6,528(%esp)
569	movl	%ecx,548(%esp)
570	xorl	%eax,%eax
571	roll	$8,%edx
572	movb	%dl,%al
573	movl	%eax,%ebp
574	andb	$15,%al
575	shrl	$4,%ebp
576	pxor	%mm0,%mm0
577	roll	$8,%edx
578	pxor	%mm1,%mm1
579	pxor	%mm2,%mm2
580	movq	16(%esp,%eax,8),%mm7
581	movq	144(%esp,%eax,8),%mm6
582	movb	%dl,%al
583	movd	%mm7,%ebx
584	psrlq	$8,%mm7
585	movq	%mm6,%mm3
586	movl	%eax,%edi
587	psrlq	$8,%mm6
588	pxor	272(%esp,%ebp,8),%mm7
589	andb	$15,%al
590	psllq	$56,%mm3
591	shrl	$4,%edi
592	pxor	16(%esp,%eax,8),%mm7
593	roll	$8,%edx
594	pxor	144(%esp,%eax,8),%mm6
595	pxor	%mm3,%mm7
596	pxor	400(%esp,%ebp,8),%mm6
597	xorb	(%esp,%ebp,1),%bl
598	movb	%dl,%al
599	movd	%mm7,%ecx
600	movzbl	%bl,%ebx
601	psrlq	$8,%mm7
602	movq	%mm6,%mm3
603	movl	%eax,%ebp
604	psrlq	$8,%mm6
605	pxor	272(%esp,%edi,8),%mm7
606	andb	$15,%al
607	psllq	$56,%mm3
608	shrl	$4,%ebp
609	pinsrw	$2,(%esi,%ebx,2),%mm2
610	pxor	16(%esp,%eax,8),%mm7
611	roll	$8,%edx
612	pxor	144(%esp,%eax,8),%mm6
613	pxor	%mm3,%mm7
614	pxor	400(%esp,%edi,8),%mm6
615	xorb	(%esp,%edi,1),%cl
616	movb	%dl,%al
617	movl	536(%esp),%edx
618	movd	%mm7,%ebx
619	movzbl	%cl,%ecx
620	psrlq	$8,%mm7
621	movq	%mm6,%mm3
622	movl	%eax,%edi
623	psrlq	$8,%mm6
624	pxor	272(%esp,%ebp,8),%mm7
625	andb	$15,%al
626	psllq	$56,%mm3
627	pxor	%mm2,%mm6
628	shrl	$4,%edi
629	pinsrw	$2,(%esi,%ecx,2),%mm1
630	pxor	16(%esp,%eax,8),%mm7
631	roll	$8,%edx
632	pxor	144(%esp,%eax,8),%mm6
633	pxor	%mm3,%mm7
634	pxor	400(%esp,%ebp,8),%mm6
635	xorb	(%esp,%ebp,1),%bl
636	movb	%dl,%al
637	movd	%mm7,%ecx
638	movzbl	%bl,%ebx
639	psrlq	$8,%mm7
640	movq	%mm6,%mm3
641	movl	%eax,%ebp
642	psrlq	$8,%mm6
643	pxor	272(%esp,%edi,8),%mm7
644	andb	$15,%al
645	psllq	$56,%mm3
646	pxor	%mm1,%mm6
647	shrl	$4,%ebp
648	pinsrw	$2,(%esi,%ebx,2),%mm0
649	pxor	16(%esp,%eax,8),%mm7
650	roll	$8,%edx
651	pxor	144(%esp,%eax,8),%mm6
652	pxor	%mm3,%mm7
653	pxor	400(%esp,%edi,8),%mm6
654	xorb	(%esp,%edi,1),%cl
655	movb	%dl,%al
656	movd	%mm7,%ebx
657	movzbl	%cl,%ecx
658	psrlq	$8,%mm7
659	movq	%mm6,%mm3
660	movl	%eax,%edi
661	psrlq	$8,%mm6
662	pxor	272(%esp,%ebp,8),%mm7
663	andb	$15,%al
664	psllq	$56,%mm3
665	pxor	%mm0,%mm6
666	shrl	$4,%edi
667	pinsrw	$2,(%esi,%ecx,2),%mm2
668	pxor	16(%esp,%eax,8),%mm7
669	roll	$8,%edx
670	pxor	144(%esp,%eax,8),%mm6
671	pxor	%mm3,%mm7
672	pxor	400(%esp,%ebp,8),%mm6
673	xorb	(%esp,%ebp,1),%bl
674	movb	%dl,%al
675	movd	%mm7,%ecx
676	movzbl	%bl,%ebx
677	psrlq	$8,%mm7
678	movq	%mm6,%mm3
679	movl	%eax,%ebp
680	psrlq	$8,%mm6
681	pxor	272(%esp,%edi,8),%mm7
682	andb	$15,%al
683	psllq	$56,%mm3
684	pxor	%mm2,%mm6
685	shrl	$4,%ebp
686	pinsrw	$2,(%esi,%ebx,2),%mm1
687	pxor	16(%esp,%eax,8),%mm7
688	roll	$8,%edx
689	pxor	144(%esp,%eax,8),%mm6
690	pxor	%mm3,%mm7
691	pxor	400(%esp,%edi,8),%mm6
692	xorb	(%esp,%edi,1),%cl
693	movb	%dl,%al
694	movl	532(%esp),%edx
695	movd	%mm7,%ebx
696	movzbl	%cl,%ecx
697	psrlq	$8,%mm7
698	movq	%mm6,%mm3
699	movl	%eax,%edi
700	psrlq	$8,%mm6
701	pxor	272(%esp,%ebp,8),%mm7
702	andb	$15,%al
703	psllq	$56,%mm3
704	pxor	%mm1,%mm6
705	shrl	$4,%edi
706	pinsrw	$2,(%esi,%ecx,2),%mm0
707	pxor	16(%esp,%eax,8),%mm7
708	roll	$8,%edx
709	pxor	144(%esp,%eax,8),%mm6
710	pxor	%mm3,%mm7
711	pxor	400(%esp,%ebp,8),%mm6
712	xorb	(%esp,%ebp,1),%bl
713	movb	%dl,%al
714	movd	%mm7,%ecx
715	movzbl	%bl,%ebx
716	psrlq	$8,%mm7
717	movq	%mm6,%mm3
718	movl	%eax,%ebp
719	psrlq	$8,%mm6
720	pxor	272(%esp,%edi,8),%mm7
721	andb	$15,%al
722	psllq	$56,%mm3
723	pxor	%mm0,%mm6
724	shrl	$4,%ebp
725	pinsrw	$2,(%esi,%ebx,2),%mm2
726	pxor	16(%esp,%eax,8),%mm7
727	roll	$8,%edx
728	pxor	144(%esp,%eax,8),%mm6
729	pxor	%mm3,%mm7
730	pxor	400(%esp,%edi,8),%mm6
731	xorb	(%esp,%edi,1),%cl
732	movb	%dl,%al
733	movd	%mm7,%ebx
734	movzbl	%cl,%ecx
735	psrlq	$8,%mm7
736	movq	%mm6,%mm3
737	movl	%eax,%edi
738	psrlq	$8,%mm6
739	pxor	272(%esp,%ebp,8),%mm7
740	andb	$15,%al
741	psllq	$56,%mm3
742	pxor	%mm2,%mm6
743	shrl	$4,%edi
744	pinsrw	$2,(%esi,%ecx,2),%mm1
745	pxor	16(%esp,%eax,8),%mm7
746	roll	$8,%edx
747	pxor	144(%esp,%eax,8),%mm6
748	pxor	%mm3,%mm7
749	pxor	400(%esp,%ebp,8),%mm6
750	xorb	(%esp,%ebp,1),%bl
751	movb	%dl,%al
752	movd	%mm7,%ecx
753	movzbl	%bl,%ebx
754	psrlq	$8,%mm7
755	movq	%mm6,%mm3
756	movl	%eax,%ebp
757	psrlq	$8,%mm6
758	pxor	272(%esp,%edi,8),%mm7
759	andb	$15,%al
760	psllq	$56,%mm3
761	pxor	%mm1,%mm6
762	shrl	$4,%ebp
763	pinsrw	$2,(%esi,%ebx,2),%mm0
764	pxor	16(%esp,%eax,8),%mm7
765	roll	$8,%edx
766	pxor	144(%esp,%eax,8),%mm6
767	pxor	%mm3,%mm7
768	pxor	400(%esp,%edi,8),%mm6
769	xorb	(%esp,%edi,1),%cl
770	movb	%dl,%al
771	movl	528(%esp),%edx
772	movd	%mm7,%ebx
773	movzbl	%cl,%ecx
774	psrlq	$8,%mm7
775	movq	%mm6,%mm3
776	movl	%eax,%edi
777	psrlq	$8,%mm6
778	pxor	272(%esp,%ebp,8),%mm7
779	andb	$15,%al
780	psllq	$56,%mm3
781	pxor	%mm0,%mm6
782	shrl	$4,%edi
783	pinsrw	$2,(%esi,%ecx,2),%mm2
784	pxor	16(%esp,%eax,8),%mm7
785	roll	$8,%edx
786	pxor	144(%esp,%eax,8),%mm6
787	pxor	%mm3,%mm7
788	pxor	400(%esp,%ebp,8),%mm6
789	xorb	(%esp,%ebp,1),%bl
790	movb	%dl,%al
791	movd	%mm7,%ecx
792	movzbl	%bl,%ebx
793	psrlq	$8,%mm7
794	movq	%mm6,%mm3
795	movl	%eax,%ebp
796	psrlq	$8,%mm6
797	pxor	272(%esp,%edi,8),%mm7
798	andb	$15,%al
799	psllq	$56,%mm3
800	pxor	%mm2,%mm6
801	shrl	$4,%ebp
802	pinsrw	$2,(%esi,%ebx,2),%mm1
803	pxor	16(%esp,%eax,8),%mm7
804	roll	$8,%edx
805	pxor	144(%esp,%eax,8),%mm6
806	pxor	%mm3,%mm7
807	pxor	400(%esp,%edi,8),%mm6
808	xorb	(%esp,%edi,1),%cl
809	movb	%dl,%al
810	movd	%mm7,%ebx
811	movzbl	%cl,%ecx
812	psrlq	$8,%mm7
813	movq	%mm6,%mm3
814	movl	%eax,%edi
815	psrlq	$8,%mm6
816	pxor	272(%esp,%ebp,8),%mm7
817	andb	$15,%al
818	psllq	$56,%mm3
819	pxor	%mm1,%mm6
820	shrl	$4,%edi
821	pinsrw	$2,(%esi,%ecx,2),%mm0
822	pxor	16(%esp,%eax,8),%mm7
823	roll	$8,%edx
824	pxor	144(%esp,%eax,8),%mm6
825	pxor	%mm3,%mm7
826	pxor	400(%esp,%ebp,8),%mm6
827	xorb	(%esp,%ebp,1),%bl
828	movb	%dl,%al
829	movd	%mm7,%ecx
830	movzbl	%bl,%ebx
831	psrlq	$8,%mm7
832	movq	%mm6,%mm3
833	movl	%eax,%ebp
834	psrlq	$8,%mm6
835	pxor	272(%esp,%edi,8),%mm7
836	andb	$15,%al
837	psllq	$56,%mm3
838	pxor	%mm0,%mm6
839	shrl	$4,%ebp
840	pinsrw	$2,(%esi,%ebx,2),%mm2
841	pxor	16(%esp,%eax,8),%mm7
842	roll	$8,%edx
843	pxor	144(%esp,%eax,8),%mm6
844	pxor	%mm3,%mm7
845	pxor	400(%esp,%edi,8),%mm6
846	xorb	(%esp,%edi,1),%cl
847	movb	%dl,%al
848	movl	524(%esp),%edx
849	movd	%mm7,%ebx
850	movzbl	%cl,%ecx
851	psrlq	$8,%mm7
852	movq	%mm6,%mm3
853	movl	%eax,%edi
854	psrlq	$8,%mm6
855	pxor	272(%esp,%ebp,8),%mm7
856	andb	$15,%al
857	psllq	$56,%mm3
858	pxor	%mm2,%mm6
859	shrl	$4,%edi
860	pinsrw	$2,(%esi,%ecx,2),%mm1
861	pxor	16(%esp,%eax,8),%mm7
862	pxor	144(%esp,%eax,8),%mm6
863	xorb	(%esp,%ebp,1),%bl
864	pxor	%mm3,%mm7
865	pxor	400(%esp,%ebp,8),%mm6
866	movzbl	%bl,%ebx
867	pxor	%mm2,%mm2
868	psllq	$4,%mm1
869	movd	%mm7,%ecx
870	psrlq	$4,%mm7
871	movq	%mm6,%mm3
872	psrlq	$4,%mm6
873	shll	$4,%ecx
874	pxor	16(%esp,%edi,8),%mm7
875	psllq	$60,%mm3
876	movzbl	%cl,%ecx
877	pxor	%mm3,%mm7
878	pxor	144(%esp,%edi,8),%mm6
879	pinsrw	$2,(%esi,%ebx,2),%mm0
880	pxor	%mm1,%mm6
881	movd	%mm7,%edx
882	pinsrw	$3,(%esi,%ecx,2),%mm2
883	psllq	$12,%mm0
884	pxor	%mm0,%mm6
885	psrlq	$32,%mm7
886	pxor	%mm2,%mm6
887	movl	548(%esp),%ecx
888	movd	%mm7,%ebx
889	movq	%mm6,%mm3
890	psllw	$8,%mm6
891	psrlw	$8,%mm3
892	por	%mm3,%mm6
893	bswap	%edx
894	pshufw	$27,%mm6,%mm6
895	bswap	%ebx
896	cmpl	552(%esp),%ecx
897	jne	.L009outer
898	movl	544(%esp),%eax
899	movl	%edx,12(%eax)
900	movl	%ebx,8(%eax)
901	movq	%mm6,(%eax)
902	movl	556(%esp),%esp
903	emms
904	popl	%edi
905	popl	%esi
906	popl	%ebx
907	popl	%ebp
908	ret
909.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
910.globl	gcm_init_clmul
911.type	gcm_init_clmul,@function
912.align	16
913gcm_init_clmul:
914.L_gcm_init_clmul_begin:
915	movl	4(%esp),%edx
916	movl	8(%esp),%eax
917	call	.L010pic
918.L010pic:
919	popl	%ecx
920	leal	.Lbswap-.L010pic(%ecx),%ecx
921	movdqu	(%eax),%xmm2
922	pshufd	$78,%xmm2,%xmm2
923	pshufd	$255,%xmm2,%xmm4
924	movdqa	%xmm2,%xmm3
925	psllq	$1,%xmm2
926	pxor	%xmm5,%xmm5
927	psrlq	$63,%xmm3
928	pcmpgtd	%xmm4,%xmm5
929	pslldq	$8,%xmm3
930	por	%xmm3,%xmm2
931	pand	16(%ecx),%xmm5
932	pxor	%xmm5,%xmm2
933	movdqa	%xmm2,%xmm0
934	movdqa	%xmm0,%xmm1
935	pshufd	$78,%xmm0,%xmm3
936	pshufd	$78,%xmm2,%xmm4
937	pxor	%xmm0,%xmm3
938	pxor	%xmm2,%xmm4
939.byte	102,15,58,68,194,0
940.byte	102,15,58,68,202,17
941.byte	102,15,58,68,220,0
942	xorps	%xmm0,%xmm3
943	xorps	%xmm1,%xmm3
944	movdqa	%xmm3,%xmm4
945	psrldq	$8,%xmm3
946	pslldq	$8,%xmm4
947	pxor	%xmm3,%xmm1
948	pxor	%xmm4,%xmm0
949	movdqa	%xmm0,%xmm4
950	movdqa	%xmm0,%xmm3
951	psllq	$5,%xmm0
952	pxor	%xmm0,%xmm3
953	psllq	$1,%xmm0
954	pxor	%xmm3,%xmm0
955	psllq	$57,%xmm0
956	movdqa	%xmm0,%xmm3
957	pslldq	$8,%xmm0
958	psrldq	$8,%xmm3
959	pxor	%xmm4,%xmm0
960	pxor	%xmm3,%xmm1
961	movdqa	%xmm0,%xmm4
962	psrlq	$1,%xmm0
963	pxor	%xmm4,%xmm1
964	pxor	%xmm0,%xmm4
965	psrlq	$5,%xmm0
966	pxor	%xmm4,%xmm0
967	psrlq	$1,%xmm0
968	pxor	%xmm1,%xmm0
969	pshufd	$78,%xmm2,%xmm3
970	pshufd	$78,%xmm0,%xmm4
971	pxor	%xmm2,%xmm3
972	movdqu	%xmm2,(%edx)
973	pxor	%xmm0,%xmm4
974	movdqu	%xmm0,16(%edx)
975.byte	102,15,58,15,227,8
976	movdqu	%xmm4,32(%edx)
977	ret
978.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
979.globl	gcm_gmult_clmul
980.type	gcm_gmult_clmul,@function
981.align	16
982gcm_gmult_clmul:
983.L_gcm_gmult_clmul_begin:
984	movl	4(%esp),%eax
985	movl	8(%esp),%edx
986	call	.L011pic
987.L011pic:
988	popl	%ecx
989	leal	.Lbswap-.L011pic(%ecx),%ecx
990	movdqu	(%eax),%xmm0
991	movdqa	(%ecx),%xmm5
992	movups	(%edx),%xmm2
993.byte	102,15,56,0,197
994	movups	32(%edx),%xmm4
995	movdqa	%xmm0,%xmm1
996	pshufd	$78,%xmm0,%xmm3
997	pxor	%xmm0,%xmm3
998.byte	102,15,58,68,194,0
999.byte	102,15,58,68,202,17
1000.byte	102,15,58,68,220,0
1001	xorps	%xmm0,%xmm3
1002	xorps	%xmm1,%xmm3
1003	movdqa	%xmm3,%xmm4
1004	psrldq	$8,%xmm3
1005	pslldq	$8,%xmm4
1006	pxor	%xmm3,%xmm1
1007	pxor	%xmm4,%xmm0
1008	movdqa	%xmm0,%xmm4
1009	movdqa	%xmm0,%xmm3
1010	psllq	$5,%xmm0
1011	pxor	%xmm0,%xmm3
1012	psllq	$1,%xmm0
1013	pxor	%xmm3,%xmm0
1014	psllq	$57,%xmm0
1015	movdqa	%xmm0,%xmm3
1016	pslldq	$8,%xmm0
1017	psrldq	$8,%xmm3
1018	pxor	%xmm4,%xmm0
1019	pxor	%xmm3,%xmm1
1020	movdqa	%xmm0,%xmm4
1021	psrlq	$1,%xmm0
1022	pxor	%xmm4,%xmm1
1023	pxor	%xmm0,%xmm4
1024	psrlq	$5,%xmm0
1025	pxor	%xmm4,%xmm0
1026	psrlq	$1,%xmm0
1027	pxor	%xmm1,%xmm0
1028.byte	102,15,56,0,197
1029	movdqu	%xmm0,(%eax)
1030	ret
1031.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
1032.globl	gcm_ghash_clmul
1033.type	gcm_ghash_clmul,@function
1034.align	16
1035gcm_ghash_clmul:
1036.L_gcm_ghash_clmul_begin:
1037	pushl	%ebp
1038	pushl	%ebx
1039	pushl	%esi
1040	pushl	%edi
1041	movl	20(%esp),%eax
1042	movl	24(%esp),%edx
1043	movl	28(%esp),%esi
1044	movl	32(%esp),%ebx
1045	call	.L012pic
1046.L012pic:
1047	popl	%ecx
1048	leal	.Lbswap-.L012pic(%ecx),%ecx
1049	movdqu	(%eax),%xmm0
1050	movdqa	(%ecx),%xmm5
1051	movdqu	(%edx),%xmm2
1052.byte	102,15,56,0,197
1053	subl	$16,%ebx
1054	jz	.L013odd_tail
1055	movdqu	(%esi),%xmm3
1056	movdqu	16(%esi),%xmm6
1057.byte	102,15,56,0,221
1058.byte	102,15,56,0,245
1059	movdqu	32(%edx),%xmm5
1060	pxor	%xmm3,%xmm0
1061	pshufd	$78,%xmm6,%xmm3
1062	movdqa	%xmm6,%xmm7
1063	pxor	%xmm6,%xmm3
1064	leal	32(%esi),%esi
1065.byte	102,15,58,68,242,0
1066.byte	102,15,58,68,250,17
1067.byte	102,15,58,68,221,0
1068	movups	16(%edx),%xmm2
1069	nop
1070	subl	$32,%ebx
1071	jbe	.L014even_tail
1072	jmp	.L015mod_loop
1073.align	32
1074.L015mod_loop:
1075	pshufd	$78,%xmm0,%xmm4
1076	movdqa	%xmm0,%xmm1
1077	pxor	%xmm0,%xmm4
1078	nop
1079.byte	102,15,58,68,194,0
1080.byte	102,15,58,68,202,17
1081.byte	102,15,58,68,229,16
1082	movups	(%edx),%xmm2
1083	xorps	%xmm6,%xmm0
1084	movdqa	(%ecx),%xmm5
1085	xorps	%xmm7,%xmm1
1086	movdqu	(%esi),%xmm7
1087	pxor	%xmm0,%xmm3
1088	movdqu	16(%esi),%xmm6
1089	pxor	%xmm1,%xmm3
1090.byte	102,15,56,0,253
1091	pxor	%xmm3,%xmm4
1092	movdqa	%xmm4,%xmm3
1093	psrldq	$8,%xmm4
1094	pslldq	$8,%xmm3
1095	pxor	%xmm4,%xmm1
1096	pxor	%xmm3,%xmm0
1097.byte	102,15,56,0,245
1098	pxor	%xmm7,%xmm1
1099	movdqa	%xmm6,%xmm7
1100	movdqa	%xmm0,%xmm4
1101	movdqa	%xmm0,%xmm3
1102	psllq	$5,%xmm0
1103	pxor	%xmm0,%xmm3
1104	psllq	$1,%xmm0
1105	pxor	%xmm3,%xmm0
1106.byte	102,15,58,68,242,0
1107	movups	32(%edx),%xmm5
1108	psllq	$57,%xmm0
1109	movdqa	%xmm0,%xmm3
1110	pslldq	$8,%xmm0
1111	psrldq	$8,%xmm3
1112	pxor	%xmm4,%xmm0
1113	pxor	%xmm3,%xmm1
1114	pshufd	$78,%xmm7,%xmm3
1115	movdqa	%xmm0,%xmm4
1116	psrlq	$1,%xmm0
1117	pxor	%xmm7,%xmm3
1118	pxor	%xmm4,%xmm1
1119.byte	102,15,58,68,250,17
1120	movups	16(%edx),%xmm2
1121	pxor	%xmm0,%xmm4
1122	psrlq	$5,%xmm0
1123	pxor	%xmm4,%xmm0
1124	psrlq	$1,%xmm0
1125	pxor	%xmm1,%xmm0
1126.byte	102,15,58,68,221,0
1127	leal	32(%esi),%esi
1128	subl	$32,%ebx
1129	ja	.L015mod_loop
1130.L014even_tail:
1131	pshufd	$78,%xmm0,%xmm4
1132	movdqa	%xmm0,%xmm1
1133	pxor	%xmm0,%xmm4
1134.byte	102,15,58,68,194,0
1135.byte	102,15,58,68,202,17
1136.byte	102,15,58,68,229,16
1137	movdqa	(%ecx),%xmm5
1138	xorps	%xmm6,%xmm0
1139	xorps	%xmm7,%xmm1
1140	pxor	%xmm0,%xmm3
1141	pxor	%xmm1,%xmm3
1142	pxor	%xmm3,%xmm4
1143	movdqa	%xmm4,%xmm3
1144	psrldq	$8,%xmm4
1145	pslldq	$8,%xmm3
1146	pxor	%xmm4,%xmm1
1147	pxor	%xmm3,%xmm0
1148	movdqa	%xmm0,%xmm4
1149	movdqa	%xmm0,%xmm3
1150	psllq	$5,%xmm0
1151	pxor	%xmm0,%xmm3
1152	psllq	$1,%xmm0
1153	pxor	%xmm3,%xmm0
1154	psllq	$57,%xmm0
1155	movdqa	%xmm0,%xmm3
1156	pslldq	$8,%xmm0
1157	psrldq	$8,%xmm3
1158	pxor	%xmm4,%xmm0
1159	pxor	%xmm3,%xmm1
1160	movdqa	%xmm0,%xmm4
1161	psrlq	$1,%xmm0
1162	pxor	%xmm4,%xmm1
1163	pxor	%xmm0,%xmm4
1164	psrlq	$5,%xmm0
1165	pxor	%xmm4,%xmm0
1166	psrlq	$1,%xmm0
1167	pxor	%xmm1,%xmm0
1168	testl	%ebx,%ebx
1169	jnz	.L016done
1170	movups	(%edx),%xmm2
1171.L013odd_tail:
1172	movdqu	(%esi),%xmm3
1173.byte	102,15,56,0,221
1174	pxor	%xmm3,%xmm0
1175	movdqa	%xmm0,%xmm1
1176	pshufd	$78,%xmm0,%xmm3
1177	pshufd	$78,%xmm2,%xmm4
1178	pxor	%xmm0,%xmm3
1179	pxor	%xmm2,%xmm4
1180.byte	102,15,58,68,194,0
1181.byte	102,15,58,68,202,17
1182.byte	102,15,58,68,220,0
1183	xorps	%xmm0,%xmm3
1184	xorps	%xmm1,%xmm3
1185	movdqa	%xmm3,%xmm4
1186	psrldq	$8,%xmm3
1187	pslldq	$8,%xmm4
1188	pxor	%xmm3,%xmm1
1189	pxor	%xmm4,%xmm0
1190	movdqa	%xmm0,%xmm4
1191	movdqa	%xmm0,%xmm3
1192	psllq	$5,%xmm0
1193	pxor	%xmm0,%xmm3
1194	psllq	$1,%xmm0
1195	pxor	%xmm3,%xmm0
1196	psllq	$57,%xmm0
1197	movdqa	%xmm0,%xmm3
1198	pslldq	$8,%xmm0
1199	psrldq	$8,%xmm3
1200	pxor	%xmm4,%xmm0
1201	pxor	%xmm3,%xmm1
1202	movdqa	%xmm0,%xmm4
1203	psrlq	$1,%xmm0
1204	pxor	%xmm4,%xmm1
1205	pxor	%xmm0,%xmm4
1206	psrlq	$5,%xmm0
1207	pxor	%xmm4,%xmm0
1208	psrlq	$1,%xmm0
1209	pxor	%xmm1,%xmm0
1210.L016done:
1211.byte	102,15,56,0,197
1212	movdqu	%xmm0,(%eax)
1213	popl	%edi
1214	popl	%esi
1215	popl	%ebx
1216	popl	%ebp
1217	ret
1218.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1219.align	64
1220.Lbswap:
1221.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1222.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1223.align	64
1224.Lrem_8bit:
1225.value	0,450,900,582,1800,1738,1164,1358
1226.value	3600,4050,3476,3158,2328,2266,2716,2910
1227.value	7200,7650,8100,7782,6952,6890,6316,6510
1228.value	4656,5106,4532,4214,5432,5370,5820,6014
1229.value	14400,14722,15300,14854,16200,16010,15564,15630
1230.value	13904,14226,13780,13334,12632,12442,13020,13086
1231.value	9312,9634,10212,9766,9064,8874,8428,8494
1232.value	10864,11186,10740,10294,11640,11450,12028,12094
1233.value	28800,28994,29444,29382,30600,30282,29708,30158
1234.value	32400,32594,32020,31958,31128,30810,31260,31710
1235.value	27808,28002,28452,28390,27560,27242,26668,27118
1236.value	25264,25458,24884,24822,26040,25722,26172,26622
1237.value	18624,18690,19268,19078,20424,19978,19532,19854
1238.value	18128,18194,17748,17558,16856,16410,16988,17310
1239.value	21728,21794,22372,22182,21480,21034,20588,20910
1240.value	23280,23346,22900,22710,24056,23610,24188,24510
1241.value	57600,57538,57988,58182,58888,59338,58764,58446
1242.value	61200,61138,60564,60758,59416,59866,60316,59998
1243.value	64800,64738,65188,65382,64040,64490,63916,63598
1244.value	62256,62194,61620,61814,62520,62970,63420,63102
1245.value	55616,55426,56004,56070,56904,57226,56780,56334
1246.value	55120,54930,54484,54550,53336,53658,54236,53790
1247.value	50528,50338,50916,50982,49768,50090,49644,49198
1248.value	52080,51890,51444,51510,52344,52666,53244,52798
1249.value	37248,36930,37380,37830,38536,38730,38156,38094
1250.value	40848,40530,39956,40406,39064,39258,39708,39646
1251.value	36256,35938,36388,36838,35496,35690,35116,35054
1252.value	33712,33394,32820,33270,33976,34170,34620,34558
1253.value	43456,43010,43588,43910,44744,44810,44364,44174
1254.value	42960,42514,42068,42390,41176,41242,41820,41630
1255.value	46560,46114,46692,47014,45800,45866,45420,45230
1256.value	48112,47666,47220,47542,48376,48442,49020,48830
1257.align	64
1258.Lrem_4bit:
1259.long	0,0,0,471859200,0,943718400,0,610271232
1260.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1261.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1262.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1263.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1264.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1265.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1266.byte	0
1267#else
1268.text
1269.globl	gcm_gmult_4bit_x86
1270.type	gcm_gmult_4bit_x86,@function
1271.align	16
1272gcm_gmult_4bit_x86:
1273.L_gcm_gmult_4bit_x86_begin:
1274	pushl	%ebp
1275	pushl	%ebx
1276	pushl	%esi
1277	pushl	%edi
1278	subl	$84,%esp
1279	movl	104(%esp),%edi
1280	movl	108(%esp),%esi
1281	movl	(%edi),%ebp
1282	movl	4(%edi),%edx
1283	movl	8(%edi),%ecx
1284	movl	12(%edi),%ebx
1285	movl	$0,16(%esp)
1286	movl	$471859200,20(%esp)
1287	movl	$943718400,24(%esp)
1288	movl	$610271232,28(%esp)
1289	movl	$1887436800,32(%esp)
1290	movl	$1822425088,36(%esp)
1291	movl	$1220542464,40(%esp)
1292	movl	$1423966208,44(%esp)
1293	movl	$3774873600,48(%esp)
1294	movl	$4246732800,52(%esp)
1295	movl	$3644850176,56(%esp)
1296	movl	$3311403008,60(%esp)
1297	movl	$2441084928,64(%esp)
1298	movl	$2376073216,68(%esp)
1299	movl	$2847932416,72(%esp)
1300	movl	$3051356160,76(%esp)
1301	movl	%ebp,(%esp)
1302	movl	%edx,4(%esp)
1303	movl	%ecx,8(%esp)
1304	movl	%ebx,12(%esp)
1305	shrl	$20,%ebx
1306	andl	$240,%ebx
1307	movl	4(%esi,%ebx,1),%ebp
1308	movl	(%esi,%ebx,1),%edx
1309	movl	12(%esi,%ebx,1),%ecx
1310	movl	8(%esi,%ebx,1),%ebx
1311	xorl	%eax,%eax
1312	movl	$15,%edi
1313	jmp	.L000x86_loop
1314.align	16
1315.L000x86_loop:
1316	movb	%bl,%al
1317	shrdl	$4,%ecx,%ebx
1318	andb	$15,%al
1319	shrdl	$4,%edx,%ecx
1320	shrdl	$4,%ebp,%edx
1321	shrl	$4,%ebp
1322	xorl	16(%esp,%eax,4),%ebp
1323	movb	(%esp,%edi,1),%al
1324	andb	$240,%al
1325	xorl	8(%esi,%eax,1),%ebx
1326	xorl	12(%esi,%eax,1),%ecx
1327	xorl	(%esi,%eax,1),%edx
1328	xorl	4(%esi,%eax,1),%ebp
1329	decl	%edi
1330	js	.L001x86_break
1331	movb	%bl,%al
1332	shrdl	$4,%ecx,%ebx
1333	andb	$15,%al
1334	shrdl	$4,%edx,%ecx
1335	shrdl	$4,%ebp,%edx
1336	shrl	$4,%ebp
1337	xorl	16(%esp,%eax,4),%ebp
1338	movb	(%esp,%edi,1),%al
1339	shlb	$4,%al
1340	xorl	8(%esi,%eax,1),%ebx
1341	xorl	12(%esi,%eax,1),%ecx
1342	xorl	(%esi,%eax,1),%edx
1343	xorl	4(%esi,%eax,1),%ebp
1344	jmp	.L000x86_loop
1345.align	16
1346.L001x86_break:
1347	bswap	%ebx
1348	bswap	%ecx
1349	bswap	%edx
1350	bswap	%ebp
1351	movl	104(%esp),%edi
1352	movl	%ebx,12(%edi)
1353	movl	%ecx,8(%edi)
1354	movl	%edx,4(%edi)
1355	movl	%ebp,(%edi)
1356	addl	$84,%esp
1357	popl	%edi
1358	popl	%esi
1359	popl	%ebx
1360	popl	%ebp
1361	ret
1362.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
1363.globl	gcm_ghash_4bit_x86
1364.type	gcm_ghash_4bit_x86,@function
1365.align	16
1366gcm_ghash_4bit_x86:
1367.L_gcm_ghash_4bit_x86_begin:
1368	pushl	%ebp
1369	pushl	%ebx
1370	pushl	%esi
1371	pushl	%edi
1372	subl	$84,%esp
1373	movl	104(%esp),%ebx
1374	movl	108(%esp),%esi
1375	movl	112(%esp),%edi
1376	movl	116(%esp),%ecx
1377	addl	%edi,%ecx
1378	movl	%ecx,116(%esp)
1379	movl	(%ebx),%ebp
1380	movl	4(%ebx),%edx
1381	movl	8(%ebx),%ecx
1382	movl	12(%ebx),%ebx
1383	movl	$0,16(%esp)
1384	movl	$471859200,20(%esp)
1385	movl	$943718400,24(%esp)
1386	movl	$610271232,28(%esp)
1387	movl	$1887436800,32(%esp)
1388	movl	$1822425088,36(%esp)
1389	movl	$1220542464,40(%esp)
1390	movl	$1423966208,44(%esp)
1391	movl	$3774873600,48(%esp)
1392	movl	$4246732800,52(%esp)
1393	movl	$3644850176,56(%esp)
1394	movl	$3311403008,60(%esp)
1395	movl	$2441084928,64(%esp)
1396	movl	$2376073216,68(%esp)
1397	movl	$2847932416,72(%esp)
1398	movl	$3051356160,76(%esp)
1399.align	16
1400.L002x86_outer_loop:
1401	xorl	12(%edi),%ebx
1402	xorl	8(%edi),%ecx
1403	xorl	4(%edi),%edx
1404	xorl	(%edi),%ebp
1405	movl	%ebx,12(%esp)
1406	movl	%ecx,8(%esp)
1407	movl	%edx,4(%esp)
1408	movl	%ebp,(%esp)
1409	shrl	$20,%ebx
1410	andl	$240,%ebx
1411	movl	4(%esi,%ebx,1),%ebp
1412	movl	(%esi,%ebx,1),%edx
1413	movl	12(%esi,%ebx,1),%ecx
1414	movl	8(%esi,%ebx,1),%ebx
1415	xorl	%eax,%eax
1416	movl	$15,%edi
1417	jmp	.L003x86_loop
1418.align	16
1419.L003x86_loop:
1420	movb	%bl,%al
1421	shrdl	$4,%ecx,%ebx
1422	andb	$15,%al
1423	shrdl	$4,%edx,%ecx
1424	shrdl	$4,%ebp,%edx
1425	shrl	$4,%ebp
1426	xorl	16(%esp,%eax,4),%ebp
1427	movb	(%esp,%edi,1),%al
1428	andb	$240,%al
1429	xorl	8(%esi,%eax,1),%ebx
1430	xorl	12(%esi,%eax,1),%ecx
1431	xorl	(%esi,%eax,1),%edx
1432	xorl	4(%esi,%eax,1),%ebp
1433	decl	%edi
1434	js	.L004x86_break
1435	movb	%bl,%al
1436	shrdl	$4,%ecx,%ebx
1437	andb	$15,%al
1438	shrdl	$4,%edx,%ecx
1439	shrdl	$4,%ebp,%edx
1440	shrl	$4,%ebp
1441	xorl	16(%esp,%eax,4),%ebp
1442	movb	(%esp,%edi,1),%al
1443	shlb	$4,%al
1444	xorl	8(%esi,%eax,1),%ebx
1445	xorl	12(%esi,%eax,1),%ecx
1446	xorl	(%esi,%eax,1),%edx
1447	xorl	4(%esi,%eax,1),%ebp
1448	jmp	.L003x86_loop
1449.align	16
1450.L004x86_break:
1451	bswap	%ebx
1452	bswap	%ecx
1453	bswap	%edx
1454	bswap	%ebp
1455	movl	112(%esp),%edi
1456	leal	16(%edi),%edi
1457	cmpl	116(%esp),%edi
1458	movl	%edi,112(%esp)
1459	jb	.L002x86_outer_loop
1460	movl	104(%esp),%edi
1461	movl	%ebx,12(%edi)
1462	movl	%ecx,8(%edi)
1463	movl	%edx,4(%edi)
1464	movl	%ebp,(%edi)
1465	addl	$84,%esp
1466	popl	%edi
1467	popl	%esi
1468	popl	%ebx
1469	popl	%ebp
1470	ret
1471.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
1472.globl	gcm_gmult_4bit_mmx
1473.type	gcm_gmult_4bit_mmx,@function
1474.align	16
1475gcm_gmult_4bit_mmx:
1476.L_gcm_gmult_4bit_mmx_begin:
1477	pushl	%ebp
1478	pushl	%ebx
1479	pushl	%esi
1480	pushl	%edi
1481	movl	20(%esp),%edi
1482	movl	24(%esp),%esi
1483	call	.L005pic_point
1484.L005pic_point:
1485	popl	%eax
1486	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
1487	movzbl	15(%edi),%ebx
1488	xorl	%ecx,%ecx
1489	movl	%ebx,%edx
1490	movb	%dl,%cl
1491	movl	$14,%ebp
1492	shlb	$4,%cl
1493	andl	$240,%edx
1494	movq	8(%esi,%ecx,1),%mm0
1495	movq	(%esi,%ecx,1),%mm1
1496	movd	%mm0,%ebx
1497	jmp	.L006mmx_loop
1498.align	16
1499.L006mmx_loop:
1500	psrlq	$4,%mm0
1501	andl	$15,%ebx
1502	movq	%mm1,%mm2
1503	psrlq	$4,%mm1
1504	pxor	8(%esi,%edx,1),%mm0
1505	movb	(%edi,%ebp,1),%cl
1506	psllq	$60,%mm2
1507	pxor	(%eax,%ebx,8),%mm1
1508	decl	%ebp
1509	movd	%mm0,%ebx
1510	pxor	(%esi,%edx,1),%mm1
1511	movl	%ecx,%edx
1512	pxor	%mm2,%mm0
1513	js	.L007mmx_break
1514	shlb	$4,%cl
1515	andl	$15,%ebx
1516	psrlq	$4,%mm0
1517	andl	$240,%edx
1518	movq	%mm1,%mm2
1519	psrlq	$4,%mm1
1520	pxor	8(%esi,%ecx,1),%mm0
1521	psllq	$60,%mm2
1522	pxor	(%eax,%ebx,8),%mm1
1523	movd	%mm0,%ebx
1524	pxor	(%esi,%ecx,1),%mm1
1525	pxor	%mm2,%mm0
1526	jmp	.L006mmx_loop
1527.align	16
1528.L007mmx_break:
1529	shlb	$4,%cl
1530	andl	$15,%ebx
1531	psrlq	$4,%mm0
1532	andl	$240,%edx
1533	movq	%mm1,%mm2
1534	psrlq	$4,%mm1
1535	pxor	8(%esi,%ecx,1),%mm0
1536	psllq	$60,%mm2
1537	pxor	(%eax,%ebx,8),%mm1
1538	movd	%mm0,%ebx
1539	pxor	(%esi,%ecx,1),%mm1
1540	pxor	%mm2,%mm0
1541	psrlq	$4,%mm0
1542	andl	$15,%ebx
1543	movq	%mm1,%mm2
1544	psrlq	$4,%mm1
1545	pxor	8(%esi,%edx,1),%mm0
1546	psllq	$60,%mm2
1547	pxor	(%eax,%ebx,8),%mm1
1548	movd	%mm0,%ebx
1549	pxor	(%esi,%edx,1),%mm1
1550	pxor	%mm2,%mm0
1551	psrlq	$32,%mm0
1552	movd	%mm1,%edx
1553	psrlq	$32,%mm1
1554	movd	%mm0,%ecx
1555	movd	%mm1,%ebp
1556	bswap	%ebx
1557	bswap	%edx
1558	bswap	%ecx
1559	bswap	%ebp
1560	emms
1561	movl	%ebx,12(%edi)
1562	movl	%edx,4(%edi)
1563	movl	%ecx,8(%edi)
1564	movl	%ebp,(%edi)
1565	popl	%edi
1566	popl	%esi
1567	popl	%ebx
1568	popl	%ebp
1569	ret
1570.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
1571.globl	gcm_ghash_4bit_mmx
1572.type	gcm_ghash_4bit_mmx,@function
1573.align	16
1574gcm_ghash_4bit_mmx:
1575.L_gcm_ghash_4bit_mmx_begin:
1576	pushl	%ebp
1577	pushl	%ebx
1578	pushl	%esi
1579	pushl	%edi
1580	movl	20(%esp),%eax
1581	movl	24(%esp),%ebx
1582	movl	28(%esp),%ecx
1583	movl	32(%esp),%edx
1584	movl	%esp,%ebp
1585	call	.L008pic_point
1586.L008pic_point:
1587	popl	%esi
1588	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
1589	subl	$544,%esp
1590	andl	$-64,%esp
1591	subl	$16,%esp
1592	addl	%ecx,%edx
1593	movl	%eax,544(%esp)
1594	movl	%edx,552(%esp)
1595	movl	%ebp,556(%esp)
1596	addl	$128,%ebx
1597	leal	144(%esp),%edi
1598	leal	400(%esp),%ebp
1599	movl	-120(%ebx),%edx
1600	movq	-120(%ebx),%mm0
1601	movq	-128(%ebx),%mm3
1602	shll	$4,%edx
1603	movb	%dl,(%esp)
1604	movl	-104(%ebx),%edx
1605	movq	-104(%ebx),%mm2
1606	movq	-112(%ebx),%mm5
1607	movq	%mm0,-128(%edi)
1608	psrlq	$4,%mm0
1609	movq	%mm3,(%edi)
1610	movq	%mm3,%mm7
1611	psrlq	$4,%mm3
1612	shll	$4,%edx
1613	movb	%dl,1(%esp)
1614	movl	-88(%ebx),%edx
1615	movq	-88(%ebx),%mm1
1616	psllq	$60,%mm7
1617	movq	-96(%ebx),%mm4
1618	por	%mm7,%mm0
1619	movq	%mm2,-120(%edi)
1620	psrlq	$4,%mm2
1621	movq	%mm5,8(%edi)
1622	movq	%mm5,%mm6
1623	movq	%mm0,-128(%ebp)
1624	psrlq	$4,%mm5
1625	movq	%mm3,(%ebp)
1626	shll	$4,%edx
1627	movb	%dl,2(%esp)
1628	movl	-72(%ebx),%edx
1629	movq	-72(%ebx),%mm0
1630	psllq	$60,%mm6
1631	movq	-80(%ebx),%mm3
1632	por	%mm6,%mm2
1633	movq	%mm1,-112(%edi)
1634	psrlq	$4,%mm1
1635	movq	%mm4,16(%edi)
1636	movq	%mm4,%mm7
1637	movq	%mm2,-120(%ebp)
1638	psrlq	$4,%mm4
1639	movq	%mm5,8(%ebp)
1640	shll	$4,%edx
1641	movb	%dl,3(%esp)
1642	movl	-56(%ebx),%edx
1643	movq	-56(%ebx),%mm2
1644	psllq	$60,%mm7
1645	movq	-64(%ebx),%mm5
1646	por	%mm7,%mm1
1647	movq	%mm0,-104(%edi)
1648	psrlq	$4,%mm0
1649	movq	%mm3,24(%edi)
1650	movq	%mm3,%mm6
1651	movq	%mm1,-112(%ebp)
1652	psrlq	$4,%mm3
1653	movq	%mm4,16(%ebp)
1654	shll	$4,%edx
1655	movb	%dl,4(%esp)
1656	movl	-40(%ebx),%edx
1657	movq	-40(%ebx),%mm1
1658	psllq	$60,%mm6
1659	movq	-48(%ebx),%mm4
1660	por	%mm6,%mm0
1661	movq	%mm2,-96(%edi)
1662	psrlq	$4,%mm2
1663	movq	%mm5,32(%edi)
1664	movq	%mm5,%mm7
1665	movq	%mm0,-104(%ebp)
1666	psrlq	$4,%mm5
1667	movq	%mm3,24(%ebp)
1668	shll	$4,%edx
1669	movb	%dl,5(%esp)
1670	movl	-24(%ebx),%edx
1671	movq	-24(%ebx),%mm0
1672	psllq	$60,%mm7
1673	movq	-32(%ebx),%mm3
1674	por	%mm7,%mm2
1675	movq	%mm1,-88(%edi)
1676	psrlq	$4,%mm1
1677	movq	%mm4,40(%edi)
1678	movq	%mm4,%mm6
1679	movq	%mm2,-96(%ebp)
1680	psrlq	$4,%mm4
1681	movq	%mm5,32(%ebp)
1682	shll	$4,%edx
1683	movb	%dl,6(%esp)
1684	movl	-8(%ebx),%edx
1685	movq	-8(%ebx),%mm2
1686	psllq	$60,%mm6
1687	movq	-16(%ebx),%mm5
1688	por	%mm6,%mm1
1689	movq	%mm0,-80(%edi)
1690	psrlq	$4,%mm0
1691	movq	%mm3,48(%edi)
1692	movq	%mm3,%mm7
1693	movq	%mm1,-88(%ebp)
1694	psrlq	$4,%mm3
1695	movq	%mm4,40(%ebp)
1696	shll	$4,%edx
1697	movb	%dl,7(%esp)
1698	movl	8(%ebx),%edx
1699	movq	8(%ebx),%mm1
1700	psllq	$60,%mm7
1701	movq	(%ebx),%mm4
1702	por	%mm7,%mm0
1703	movq	%mm2,-72(%edi)
1704	psrlq	$4,%mm2
1705	movq	%mm5,56(%edi)
1706	movq	%mm5,%mm6
1707	movq	%mm0,-80(%ebp)
1708	psrlq	$4,%mm5
1709	movq	%mm3,48(%ebp)
1710	shll	$4,%edx
1711	movb	%dl,8(%esp)
1712	movl	24(%ebx),%edx
1713	movq	24(%ebx),%mm0
1714	psllq	$60,%mm6
1715	movq	16(%ebx),%mm3
1716	por	%mm6,%mm2
1717	movq	%mm1,-64(%edi)
1718	psrlq	$4,%mm1
1719	movq	%mm4,64(%edi)
1720	movq	%mm4,%mm7
1721	movq	%mm2,-72(%ebp)
1722	psrlq	$4,%mm4
1723	movq	%mm5,56(%ebp)
1724	shll	$4,%edx
1725	movb	%dl,9(%esp)
1726	movl	40(%ebx),%edx
1727	movq	40(%ebx),%mm2
1728	psllq	$60,%mm7
1729	movq	32(%ebx),%mm5
1730	por	%mm7,%mm1
1731	movq	%mm0,-56(%edi)
1732	psrlq	$4,%mm0
1733	movq	%mm3,72(%edi)
1734	movq	%mm3,%mm6
1735	movq	%mm1,-64(%ebp)
1736	psrlq	$4,%mm3
1737	movq	%mm4,64(%ebp)
1738	shll	$4,%edx
1739	movb	%dl,10(%esp)
1740	movl	56(%ebx),%edx
1741	movq	56(%ebx),%mm1
1742	psllq	$60,%mm6
1743	movq	48(%ebx),%mm4
1744	por	%mm6,%mm0
1745	movq	%mm2,-48(%edi)
1746	psrlq	$4,%mm2
1747	movq	%mm5,80(%edi)
1748	movq	%mm5,%mm7
1749	movq	%mm0,-56(%ebp)
1750	psrlq	$4,%mm5
1751	movq	%mm3,72(%ebp)
1752	shll	$4,%edx
1753	movb	%dl,11(%esp)
1754	movl	72(%ebx),%edx
1755	movq	72(%ebx),%mm0
1756	psllq	$60,%mm7
1757	movq	64(%ebx),%mm3
1758	por	%mm7,%mm2
1759	movq	%mm1,-40(%edi)
1760	psrlq	$4,%mm1
1761	movq	%mm4,88(%edi)
1762	movq	%mm4,%mm6
1763	movq	%mm2,-48(%ebp)
1764	psrlq	$4,%mm4
1765	movq	%mm5,80(%ebp)
1766	shll	$4,%edx
1767	movb	%dl,12(%esp)
1768	movl	88(%ebx),%edx
1769	movq	88(%ebx),%mm2
1770	psllq	$60,%mm6
1771	movq	80(%ebx),%mm5
1772	por	%mm6,%mm1
1773	movq	%mm0,-32(%edi)
1774	psrlq	$4,%mm0
1775	movq	%mm3,96(%edi)
1776	movq	%mm3,%mm7
1777	movq	%mm1,-40(%ebp)
1778	psrlq	$4,%mm3
1779	movq	%mm4,88(%ebp)
1780	shll	$4,%edx
1781	movb	%dl,13(%esp)
1782	movl	104(%ebx),%edx
1783	movq	104(%ebx),%mm1
1784	psllq	$60,%mm7
1785	movq	96(%ebx),%mm4
1786	por	%mm7,%mm0
1787	movq	%mm2,-24(%edi)
1788	psrlq	$4,%mm2
1789	movq	%mm5,104(%edi)
1790	movq	%mm5,%mm6
1791	movq	%mm0,-32(%ebp)
1792	psrlq	$4,%mm5
1793	movq	%mm3,96(%ebp)
1794	shll	$4,%edx
1795	movb	%dl,14(%esp)
1796	movl	120(%ebx),%edx
1797	movq	120(%ebx),%mm0
1798	psllq	$60,%mm6
1799	movq	112(%ebx),%mm3
1800	por	%mm6,%mm2
1801	movq	%mm1,-16(%edi)
1802	psrlq	$4,%mm1
1803	movq	%mm4,112(%edi)
1804	movq	%mm4,%mm7
1805	movq	%mm2,-24(%ebp)
1806	psrlq	$4,%mm4
1807	movq	%mm5,104(%ebp)
1808	shll	$4,%edx
1809	movb	%dl,15(%esp)
1810	psllq	$60,%mm7
1811	por	%mm7,%mm1
1812	movq	%mm0,-8(%edi)
1813	psrlq	$4,%mm0
1814	movq	%mm3,120(%edi)
1815	movq	%mm3,%mm6
1816	movq	%mm1,-16(%ebp)
1817	psrlq	$4,%mm3
1818	movq	%mm4,112(%ebp)
1819	psllq	$60,%mm6
1820	por	%mm6,%mm0
1821	movq	%mm0,-8(%ebp)
1822	movq	%mm3,120(%ebp)
1823	movq	(%eax),%mm6
1824	movl	8(%eax),%ebx
1825	movl	12(%eax),%edx
1826.align	16
1827.L009outer:
1828	xorl	12(%ecx),%edx
1829	xorl	8(%ecx),%ebx
1830	pxor	(%ecx),%mm6
1831	leal	16(%ecx),%ecx
1832	movl	%ebx,536(%esp)
1833	movq	%mm6,528(%esp)
1834	movl	%ecx,548(%esp)
1835	xorl	%eax,%eax
1836	roll	$8,%edx
1837	movb	%dl,%al
1838	movl	%eax,%ebp
1839	andb	$15,%al
1840	shrl	$4,%ebp
1841	pxor	%mm0,%mm0
1842	roll	$8,%edx
1843	pxor	%mm1,%mm1
1844	pxor	%mm2,%mm2
1845	movq	16(%esp,%eax,8),%mm7
1846	movq	144(%esp,%eax,8),%mm6
1847	movb	%dl,%al
1848	movd	%mm7,%ebx
1849	psrlq	$8,%mm7
1850	movq	%mm6,%mm3
1851	movl	%eax,%edi
1852	psrlq	$8,%mm6
1853	pxor	272(%esp,%ebp,8),%mm7
1854	andb	$15,%al
1855	psllq	$56,%mm3
1856	shrl	$4,%edi
1857	pxor	16(%esp,%eax,8),%mm7
1858	roll	$8,%edx
1859	pxor	144(%esp,%eax,8),%mm6
1860	pxor	%mm3,%mm7
1861	pxor	400(%esp,%ebp,8),%mm6
1862	xorb	(%esp,%ebp,1),%bl
1863	movb	%dl,%al
1864	movd	%mm7,%ecx
1865	movzbl	%bl,%ebx
1866	psrlq	$8,%mm7
1867	movq	%mm6,%mm3
1868	movl	%eax,%ebp
1869	psrlq	$8,%mm6
1870	pxor	272(%esp,%edi,8),%mm7
1871	andb	$15,%al
1872	psllq	$56,%mm3
1873	shrl	$4,%ebp
1874	pinsrw	$2,(%esi,%ebx,2),%mm2
1875	pxor	16(%esp,%eax,8),%mm7
1876	roll	$8,%edx
1877	pxor	144(%esp,%eax,8),%mm6
1878	pxor	%mm3,%mm7
1879	pxor	400(%esp,%edi,8),%mm6
1880	xorb	(%esp,%edi,1),%cl
1881	movb	%dl,%al
1882	movl	536(%esp),%edx
1883	movd	%mm7,%ebx
1884	movzbl	%cl,%ecx
1885	psrlq	$8,%mm7
1886	movq	%mm6,%mm3
1887	movl	%eax,%edi
1888	psrlq	$8,%mm6
1889	pxor	272(%esp,%ebp,8),%mm7
1890	andb	$15,%al
1891	psllq	$56,%mm3
1892	pxor	%mm2,%mm6
1893	shrl	$4,%edi
1894	pinsrw	$2,(%esi,%ecx,2),%mm1
1895	pxor	16(%esp,%eax,8),%mm7
1896	roll	$8,%edx
1897	pxor	144(%esp,%eax,8),%mm6
1898	pxor	%mm3,%mm7
1899	pxor	400(%esp,%ebp,8),%mm6
1900	xorb	(%esp,%ebp,1),%bl
1901	movb	%dl,%al
1902	movd	%mm7,%ecx
1903	movzbl	%bl,%ebx
1904	psrlq	$8,%mm7
1905	movq	%mm6,%mm3
1906	movl	%eax,%ebp
1907	psrlq	$8,%mm6
1908	pxor	272(%esp,%edi,8),%mm7
1909	andb	$15,%al
1910	psllq	$56,%mm3
1911	pxor	%mm1,%mm6
1912	shrl	$4,%ebp
1913	pinsrw	$2,(%esi,%ebx,2),%mm0
1914	pxor	16(%esp,%eax,8),%mm7
1915	roll	$8,%edx
1916	pxor	144(%esp,%eax,8),%mm6
1917	pxor	%mm3,%mm7
1918	pxor	400(%esp,%edi,8),%mm6
1919	xorb	(%esp,%edi,1),%cl
1920	movb	%dl,%al
1921	movd	%mm7,%ebx
1922	movzbl	%cl,%ecx
1923	psrlq	$8,%mm7
1924	movq	%mm6,%mm3
1925	movl	%eax,%edi
1926	psrlq	$8,%mm6
1927	pxor	272(%esp,%ebp,8),%mm7
1928	andb	$15,%al
1929	psllq	$56,%mm3
1930	pxor	%mm0,%mm6
1931	shrl	$4,%edi
1932	pinsrw	$2,(%esi,%ecx,2),%mm2
1933	pxor	16(%esp,%eax,8),%mm7
1934	roll	$8,%edx
1935	pxor	144(%esp,%eax,8),%mm6
1936	pxor	%mm3,%mm7
1937	pxor	400(%esp,%ebp,8),%mm6
1938	xorb	(%esp,%ebp,1),%bl
1939	movb	%dl,%al
1940	movd	%mm7,%ecx
1941	movzbl	%bl,%ebx
1942	psrlq	$8,%mm7
1943	movq	%mm6,%mm3
1944	movl	%eax,%ebp
1945	psrlq	$8,%mm6
1946	pxor	272(%esp,%edi,8),%mm7
1947	andb	$15,%al
1948	psllq	$56,%mm3
1949	pxor	%mm2,%mm6
1950	shrl	$4,%ebp
1951	pinsrw	$2,(%esi,%ebx,2),%mm1
1952	pxor	16(%esp,%eax,8),%mm7
1953	roll	$8,%edx
1954	pxor	144(%esp,%eax,8),%mm6
1955	pxor	%mm3,%mm7
1956	pxor	400(%esp,%edi,8),%mm6
1957	xorb	(%esp,%edi,1),%cl
1958	movb	%dl,%al
1959	movl	532(%esp),%edx
1960	movd	%mm7,%ebx
1961	movzbl	%cl,%ecx
1962	psrlq	$8,%mm7
1963	movq	%mm6,%mm3
1964	movl	%eax,%edi
1965	psrlq	$8,%mm6
1966	pxor	272(%esp,%ebp,8),%mm7
1967	andb	$15,%al
1968	psllq	$56,%mm3
1969	pxor	%mm1,%mm6
1970	shrl	$4,%edi
1971	pinsrw	$2,(%esi,%ecx,2),%mm0
1972	pxor	16(%esp,%eax,8),%mm7
1973	roll	$8,%edx
1974	pxor	144(%esp,%eax,8),%mm6
1975	pxor	%mm3,%mm7
1976	pxor	400(%esp,%ebp,8),%mm6
1977	xorb	(%esp,%ebp,1),%bl
1978	movb	%dl,%al
1979	movd	%mm7,%ecx
1980	movzbl	%bl,%ebx
1981	psrlq	$8,%mm7
1982	movq	%mm6,%mm3
1983	movl	%eax,%ebp
1984	psrlq	$8,%mm6
1985	pxor	272(%esp,%edi,8),%mm7
1986	andb	$15,%al
1987	psllq	$56,%mm3
1988	pxor	%mm0,%mm6
1989	shrl	$4,%ebp
1990	pinsrw	$2,(%esi,%ebx,2),%mm2
1991	pxor	16(%esp,%eax,8),%mm7
1992	roll	$8,%edx
1993	pxor	144(%esp,%eax,8),%mm6
1994	pxor	%mm3,%mm7
1995	pxor	400(%esp,%edi,8),%mm6
1996	xorb	(%esp,%edi,1),%cl
1997	movb	%dl,%al
1998	movd	%mm7,%ebx
1999	movzbl	%cl,%ecx
2000	psrlq	$8,%mm7
2001	movq	%mm6,%mm3
2002	movl	%eax,%edi
2003	psrlq	$8,%mm6
2004	pxor	272(%esp,%ebp,8),%mm7
2005	andb	$15,%al
2006	psllq	$56,%mm3
2007	pxor	%mm2,%mm6
2008	shrl	$4,%edi
2009	pinsrw	$2,(%esi,%ecx,2),%mm1
2010	pxor	16(%esp,%eax,8),%mm7
2011	roll	$8,%edx
2012	pxor	144(%esp,%eax,8),%mm6
2013	pxor	%mm3,%mm7
2014	pxor	400(%esp,%ebp,8),%mm6
2015	xorb	(%esp,%ebp,1),%bl
2016	movb	%dl,%al
2017	movd	%mm7,%ecx
2018	movzbl	%bl,%ebx
2019	psrlq	$8,%mm7
2020	movq	%mm6,%mm3
2021	movl	%eax,%ebp
2022	psrlq	$8,%mm6
2023	pxor	272(%esp,%edi,8),%mm7
2024	andb	$15,%al
2025	psllq	$56,%mm3
2026	pxor	%mm1,%mm6
2027	shrl	$4,%ebp
2028	pinsrw	$2,(%esi,%ebx,2),%mm0
2029	pxor	16(%esp,%eax,8),%mm7
2030	roll	$8,%edx
2031	pxor	144(%esp,%eax,8),%mm6
2032	pxor	%mm3,%mm7
2033	pxor	400(%esp,%edi,8),%mm6
2034	xorb	(%esp,%edi,1),%cl
2035	movb	%dl,%al
2036	movl	528(%esp),%edx
2037	movd	%mm7,%ebx
2038	movzbl	%cl,%ecx
2039	psrlq	$8,%mm7
2040	movq	%mm6,%mm3
2041	movl	%eax,%edi
2042	psrlq	$8,%mm6
2043	pxor	272(%esp,%ebp,8),%mm7
2044	andb	$15,%al
2045	psllq	$56,%mm3
2046	pxor	%mm0,%mm6
2047	shrl	$4,%edi
2048	pinsrw	$2,(%esi,%ecx,2),%mm2
2049	pxor	16(%esp,%eax,8),%mm7
2050	roll	$8,%edx
2051	pxor	144(%esp,%eax,8),%mm6
2052	pxor	%mm3,%mm7
2053	pxor	400(%esp,%ebp,8),%mm6
2054	xorb	(%esp,%ebp,1),%bl
2055	movb	%dl,%al
2056	movd	%mm7,%ecx
2057	movzbl	%bl,%ebx
2058	psrlq	$8,%mm7
2059	movq	%mm6,%mm3
2060	movl	%eax,%ebp
2061	psrlq	$8,%mm6
2062	pxor	272(%esp,%edi,8),%mm7
2063	andb	$15,%al
2064	psllq	$56,%mm3
2065	pxor	%mm2,%mm6
2066	shrl	$4,%ebp
2067	pinsrw	$2,(%esi,%ebx,2),%mm1
2068	pxor	16(%esp,%eax,8),%mm7
2069	roll	$8,%edx
2070	pxor	144(%esp,%eax,8),%mm6
2071	pxor	%mm3,%mm7
2072	pxor	400(%esp,%edi,8),%mm6
2073	xorb	(%esp,%edi,1),%cl
2074	movb	%dl,%al
2075	movd	%mm7,%ebx
2076	movzbl	%cl,%ecx
2077	psrlq	$8,%mm7
2078	movq	%mm6,%mm3
2079	movl	%eax,%edi
2080	psrlq	$8,%mm6
2081	pxor	272(%esp,%ebp,8),%mm7
2082	andb	$15,%al
2083	psllq	$56,%mm3
2084	pxor	%mm1,%mm6
2085	shrl	$4,%edi
2086	pinsrw	$2,(%esi,%ecx,2),%mm0
2087	pxor	16(%esp,%eax,8),%mm7
2088	roll	$8,%edx
2089	pxor	144(%esp,%eax,8),%mm6
2090	pxor	%mm3,%mm7
2091	pxor	400(%esp,%ebp,8),%mm6
2092	xorb	(%esp,%ebp,1),%bl
2093	movb	%dl,%al
2094	movd	%mm7,%ecx
2095	movzbl	%bl,%ebx
2096	psrlq	$8,%mm7
2097	movq	%mm6,%mm3
2098	movl	%eax,%ebp
2099	psrlq	$8,%mm6
2100	pxor	272(%esp,%edi,8),%mm7
2101	andb	$15,%al
2102	psllq	$56,%mm3
2103	pxor	%mm0,%mm6
2104	shrl	$4,%ebp
2105	pinsrw	$2,(%esi,%ebx,2),%mm2
2106	pxor	16(%esp,%eax,8),%mm7
2107	roll	$8,%edx
2108	pxor	144(%esp,%eax,8),%mm6
2109	pxor	%mm3,%mm7
2110	pxor	400(%esp,%edi,8),%mm6
2111	xorb	(%esp,%edi,1),%cl
2112	movb	%dl,%al
2113	movl	524(%esp),%edx
2114	movd	%mm7,%ebx
2115	movzbl	%cl,%ecx
2116	psrlq	$8,%mm7
2117	movq	%mm6,%mm3
2118	movl	%eax,%edi
2119	psrlq	$8,%mm6
2120	pxor	272(%esp,%ebp,8),%mm7
2121	andb	$15,%al
2122	psllq	$56,%mm3
2123	pxor	%mm2,%mm6
2124	shrl	$4,%edi
2125	pinsrw	$2,(%esi,%ecx,2),%mm1
2126	pxor	16(%esp,%eax,8),%mm7
2127	pxor	144(%esp,%eax,8),%mm6
2128	xorb	(%esp,%ebp,1),%bl
2129	pxor	%mm3,%mm7
2130	pxor	400(%esp,%ebp,8),%mm6
2131	movzbl	%bl,%ebx
2132	pxor	%mm2,%mm2
2133	psllq	$4,%mm1
2134	movd	%mm7,%ecx
2135	psrlq	$4,%mm7
2136	movq	%mm6,%mm3
2137	psrlq	$4,%mm6
2138	shll	$4,%ecx
2139	pxor	16(%esp,%edi,8),%mm7
2140	psllq	$60,%mm3
2141	movzbl	%cl,%ecx
2142	pxor	%mm3,%mm7
2143	pxor	144(%esp,%edi,8),%mm6
2144	pinsrw	$2,(%esi,%ebx,2),%mm0
2145	pxor	%mm1,%mm6
2146	movd	%mm7,%edx
2147	pinsrw	$3,(%esi,%ecx,2),%mm2
2148	psllq	$12,%mm0
2149	pxor	%mm0,%mm6
2150	psrlq	$32,%mm7
2151	pxor	%mm2,%mm6
2152	movl	548(%esp),%ecx
2153	movd	%mm7,%ebx
2154	movq	%mm6,%mm3
2155	psllw	$8,%mm6
2156	psrlw	$8,%mm3
2157	por	%mm3,%mm6
2158	bswap	%edx
2159	pshufw	$27,%mm6,%mm6
2160	bswap	%ebx
2161	cmpl	552(%esp),%ecx
2162	jne	.L009outer
2163	movl	544(%esp),%eax
2164	movl	%edx,12(%eax)
2165	movl	%ebx,8(%eax)
2166	movq	%mm6,(%eax)
2167	movl	556(%esp),%esp
2168	emms
2169	popl	%edi
2170	popl	%esi
2171	popl	%ebx
2172	popl	%ebp
2173	ret
2174.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
2175.globl	gcm_init_clmul
2176.type	gcm_init_clmul,@function
2177.align	16
2178gcm_init_clmul:
2179.L_gcm_init_clmul_begin:
2180	movl	4(%esp),%edx
2181	movl	8(%esp),%eax
2182	call	.L010pic
2183.L010pic:
2184	popl	%ecx
2185	leal	.Lbswap-.L010pic(%ecx),%ecx
2186	movdqu	(%eax),%xmm2
2187	pshufd	$78,%xmm2,%xmm2
2188	pshufd	$255,%xmm2,%xmm4
2189	movdqa	%xmm2,%xmm3
2190	psllq	$1,%xmm2
2191	pxor	%xmm5,%xmm5
2192	psrlq	$63,%xmm3
2193	pcmpgtd	%xmm4,%xmm5
2194	pslldq	$8,%xmm3
2195	por	%xmm3,%xmm2
2196	pand	16(%ecx),%xmm5
2197	pxor	%xmm5,%xmm2
2198	movdqa	%xmm2,%xmm0
2199	movdqa	%xmm0,%xmm1
2200	pshufd	$78,%xmm0,%xmm3
2201	pshufd	$78,%xmm2,%xmm4
2202	pxor	%xmm0,%xmm3
2203	pxor	%xmm2,%xmm4
2204.byte	102,15,58,68,194,0
2205.byte	102,15,58,68,202,17
2206.byte	102,15,58,68,220,0
2207	xorps	%xmm0,%xmm3
2208	xorps	%xmm1,%xmm3
2209	movdqa	%xmm3,%xmm4
2210	psrldq	$8,%xmm3
2211	pslldq	$8,%xmm4
2212	pxor	%xmm3,%xmm1
2213	pxor	%xmm4,%xmm0
2214	movdqa	%xmm0,%xmm4
2215	movdqa	%xmm0,%xmm3
2216	psllq	$5,%xmm0
2217	pxor	%xmm0,%xmm3
2218	psllq	$1,%xmm0
2219	pxor	%xmm3,%xmm0
2220	psllq	$57,%xmm0
2221	movdqa	%xmm0,%xmm3
2222	pslldq	$8,%xmm0
2223	psrldq	$8,%xmm3
2224	pxor	%xmm4,%xmm0
2225	pxor	%xmm3,%xmm1
2226	movdqa	%xmm0,%xmm4
2227	psrlq	$1,%xmm0
2228	pxor	%xmm4,%xmm1
2229	pxor	%xmm0,%xmm4
2230	psrlq	$5,%xmm0
2231	pxor	%xmm4,%xmm0
2232	psrlq	$1,%xmm0
2233	pxor	%xmm1,%xmm0
2234	pshufd	$78,%xmm2,%xmm3
2235	pshufd	$78,%xmm0,%xmm4
2236	pxor	%xmm2,%xmm3
2237	movdqu	%xmm2,(%edx)
2238	pxor	%xmm0,%xmm4
2239	movdqu	%xmm0,16(%edx)
2240.byte	102,15,58,15,227,8
2241	movdqu	%xmm4,32(%edx)
2242	ret
2243.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
2244.globl	gcm_gmult_clmul
2245.type	gcm_gmult_clmul,@function
2246.align	16
2247gcm_gmult_clmul:
2248.L_gcm_gmult_clmul_begin:
2249	movl	4(%esp),%eax
2250	movl	8(%esp),%edx
2251	call	.L011pic
2252.L011pic:
2253	popl	%ecx
2254	leal	.Lbswap-.L011pic(%ecx),%ecx
2255	movdqu	(%eax),%xmm0
2256	movdqa	(%ecx),%xmm5
2257	movups	(%edx),%xmm2
2258.byte	102,15,56,0,197
2259	movups	32(%edx),%xmm4
2260	movdqa	%xmm0,%xmm1
2261	pshufd	$78,%xmm0,%xmm3
2262	pxor	%xmm0,%xmm3
2263.byte	102,15,58,68,194,0
2264.byte	102,15,58,68,202,17
2265.byte	102,15,58,68,220,0
2266	xorps	%xmm0,%xmm3
2267	xorps	%xmm1,%xmm3
2268	movdqa	%xmm3,%xmm4
2269	psrldq	$8,%xmm3
2270	pslldq	$8,%xmm4
2271	pxor	%xmm3,%xmm1
2272	pxor	%xmm4,%xmm0
2273	movdqa	%xmm0,%xmm4
2274	movdqa	%xmm0,%xmm3
2275	psllq	$5,%xmm0
2276	pxor	%xmm0,%xmm3
2277	psllq	$1,%xmm0
2278	pxor	%xmm3,%xmm0
2279	psllq	$57,%xmm0
2280	movdqa	%xmm0,%xmm3
2281	pslldq	$8,%xmm0
2282	psrldq	$8,%xmm3
2283	pxor	%xmm4,%xmm0
2284	pxor	%xmm3,%xmm1
2285	movdqa	%xmm0,%xmm4
2286	psrlq	$1,%xmm0
2287	pxor	%xmm4,%xmm1
2288	pxor	%xmm0,%xmm4
2289	psrlq	$5,%xmm0
2290	pxor	%xmm4,%xmm0
2291	psrlq	$1,%xmm0
2292	pxor	%xmm1,%xmm0
2293.byte	102,15,56,0,197
2294	movdqu	%xmm0,(%eax)
2295	ret
2296.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
2297.globl	gcm_ghash_clmul
2298.type	gcm_ghash_clmul,@function
2299.align	16
2300gcm_ghash_clmul:
2301.L_gcm_ghash_clmul_begin:
2302	pushl	%ebp
2303	pushl	%ebx
2304	pushl	%esi
2305	pushl	%edi
2306	movl	20(%esp),%eax
2307	movl	24(%esp),%edx
2308	movl	28(%esp),%esi
2309	movl	32(%esp),%ebx
2310	call	.L012pic
2311.L012pic:
2312	popl	%ecx
2313	leal	.Lbswap-.L012pic(%ecx),%ecx
2314	movdqu	(%eax),%xmm0
2315	movdqa	(%ecx),%xmm5
2316	movdqu	(%edx),%xmm2
2317.byte	102,15,56,0,197
2318	subl	$16,%ebx
2319	jz	.L013odd_tail
2320	movdqu	(%esi),%xmm3
2321	movdqu	16(%esi),%xmm6
2322.byte	102,15,56,0,221
2323.byte	102,15,56,0,245
2324	movdqu	32(%edx),%xmm5
2325	pxor	%xmm3,%xmm0
2326	pshufd	$78,%xmm6,%xmm3
2327	movdqa	%xmm6,%xmm7
2328	pxor	%xmm6,%xmm3
2329	leal	32(%esi),%esi
2330.byte	102,15,58,68,242,0
2331.byte	102,15,58,68,250,17
2332.byte	102,15,58,68,221,0
2333	movups	16(%edx),%xmm2
2334	nop
2335	subl	$32,%ebx
2336	jbe	.L014even_tail
2337	jmp	.L015mod_loop
2338.align	32
2339.L015mod_loop:
2340	pshufd	$78,%xmm0,%xmm4
2341	movdqa	%xmm0,%xmm1
2342	pxor	%xmm0,%xmm4
2343	nop
2344.byte	102,15,58,68,194,0
2345.byte	102,15,58,68,202,17
2346.byte	102,15,58,68,229,16
2347	movups	(%edx),%xmm2
2348	xorps	%xmm6,%xmm0
2349	movdqa	(%ecx),%xmm5
2350	xorps	%xmm7,%xmm1
2351	movdqu	(%esi),%xmm7
2352	pxor	%xmm0,%xmm3
2353	movdqu	16(%esi),%xmm6
2354	pxor	%xmm1,%xmm3
2355.byte	102,15,56,0,253
2356	pxor	%xmm3,%xmm4
2357	movdqa	%xmm4,%xmm3
2358	psrldq	$8,%xmm4
2359	pslldq	$8,%xmm3
2360	pxor	%xmm4,%xmm1
2361	pxor	%xmm3,%xmm0
2362.byte	102,15,56,0,245
2363	pxor	%xmm7,%xmm1
2364	movdqa	%xmm6,%xmm7
2365	movdqa	%xmm0,%xmm4
2366	movdqa	%xmm0,%xmm3
2367	psllq	$5,%xmm0
2368	pxor	%xmm0,%xmm3
2369	psllq	$1,%xmm0
2370	pxor	%xmm3,%xmm0
2371.byte	102,15,58,68,242,0
2372	movups	32(%edx),%xmm5
2373	psllq	$57,%xmm0
2374	movdqa	%xmm0,%xmm3
2375	pslldq	$8,%xmm0
2376	psrldq	$8,%xmm3
2377	pxor	%xmm4,%xmm0
2378	pxor	%xmm3,%xmm1
2379	pshufd	$78,%xmm7,%xmm3
2380	movdqa	%xmm0,%xmm4
2381	psrlq	$1,%xmm0
2382	pxor	%xmm7,%xmm3
2383	pxor	%xmm4,%xmm1
2384.byte	102,15,58,68,250,17
2385	movups	16(%edx),%xmm2
2386	pxor	%xmm0,%xmm4
2387	psrlq	$5,%xmm0
2388	pxor	%xmm4,%xmm0
2389	psrlq	$1,%xmm0
2390	pxor	%xmm1,%xmm0
2391.byte	102,15,58,68,221,0
2392	leal	32(%esi),%esi
2393	subl	$32,%ebx
2394	ja	.L015mod_loop
2395.L014even_tail:
2396	pshufd	$78,%xmm0,%xmm4
2397	movdqa	%xmm0,%xmm1
2398	pxor	%xmm0,%xmm4
2399.byte	102,15,58,68,194,0
2400.byte	102,15,58,68,202,17
2401.byte	102,15,58,68,229,16
2402	movdqa	(%ecx),%xmm5
2403	xorps	%xmm6,%xmm0
2404	xorps	%xmm7,%xmm1
2405	pxor	%xmm0,%xmm3
2406	pxor	%xmm1,%xmm3
2407	pxor	%xmm3,%xmm4
2408	movdqa	%xmm4,%xmm3
2409	psrldq	$8,%xmm4
2410	pslldq	$8,%xmm3
2411	pxor	%xmm4,%xmm1
2412	pxor	%xmm3,%xmm0
2413	movdqa	%xmm0,%xmm4
2414	movdqa	%xmm0,%xmm3
2415	psllq	$5,%xmm0
2416	pxor	%xmm0,%xmm3
2417	psllq	$1,%xmm0
2418	pxor	%xmm3,%xmm0
2419	psllq	$57,%xmm0
2420	movdqa	%xmm0,%xmm3
2421	pslldq	$8,%xmm0
2422	psrldq	$8,%xmm3
2423	pxor	%xmm4,%xmm0
2424	pxor	%xmm3,%xmm1
2425	movdqa	%xmm0,%xmm4
2426	psrlq	$1,%xmm0
2427	pxor	%xmm4,%xmm1
2428	pxor	%xmm0,%xmm4
2429	psrlq	$5,%xmm0
2430	pxor	%xmm4,%xmm0
2431	psrlq	$1,%xmm0
2432	pxor	%xmm1,%xmm0
2433	testl	%ebx,%ebx
2434	jnz	.L016done
2435	movups	(%edx),%xmm2
2436.L013odd_tail:
2437	movdqu	(%esi),%xmm3
2438.byte	102,15,56,0,221
2439	pxor	%xmm3,%xmm0
2440	movdqa	%xmm0,%xmm1
2441	pshufd	$78,%xmm0,%xmm3
2442	pshufd	$78,%xmm2,%xmm4
2443	pxor	%xmm0,%xmm3
2444	pxor	%xmm2,%xmm4
2445.byte	102,15,58,68,194,0
2446.byte	102,15,58,68,202,17
2447.byte	102,15,58,68,220,0
2448	xorps	%xmm0,%xmm3
2449	xorps	%xmm1,%xmm3
2450	movdqa	%xmm3,%xmm4
2451	psrldq	$8,%xmm3
2452	pslldq	$8,%xmm4
2453	pxor	%xmm3,%xmm1
2454	pxor	%xmm4,%xmm0
2455	movdqa	%xmm0,%xmm4
2456	movdqa	%xmm0,%xmm3
2457	psllq	$5,%xmm0
2458	pxor	%xmm0,%xmm3
2459	psllq	$1,%xmm0
2460	pxor	%xmm3,%xmm0
2461	psllq	$57,%xmm0
2462	movdqa	%xmm0,%xmm3
2463	pslldq	$8,%xmm0
2464	psrldq	$8,%xmm3
2465	pxor	%xmm4,%xmm0
2466	pxor	%xmm3,%xmm1
2467	movdqa	%xmm0,%xmm4
2468	psrlq	$1,%xmm0
2469	pxor	%xmm4,%xmm1
2470	pxor	%xmm0,%xmm4
2471	psrlq	$5,%xmm0
2472	pxor	%xmm4,%xmm0
2473	psrlq	$1,%xmm0
2474	pxor	%xmm1,%xmm0
2475.L016done:
2476.byte	102,15,56,0,197
2477	movdqu	%xmm0,(%eax)
2478	popl	%edi
2479	popl	%esi
2480	popl	%ebx
2481	popl	%ebp
2482	ret
2483.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
2484.align	64
2485.Lbswap:
2486.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2487.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
2488.align	64
2489.Lrem_8bit:
2490.value	0,450,900,582,1800,1738,1164,1358
2491.value	3600,4050,3476,3158,2328,2266,2716,2910
2492.value	7200,7650,8100,7782,6952,6890,6316,6510
2493.value	4656,5106,4532,4214,5432,5370,5820,6014
2494.value	14400,14722,15300,14854,16200,16010,15564,15630
2495.value	13904,14226,13780,13334,12632,12442,13020,13086
2496.value	9312,9634,10212,9766,9064,8874,8428,8494
2497.value	10864,11186,10740,10294,11640,11450,12028,12094
2498.value	28800,28994,29444,29382,30600,30282,29708,30158
2499.value	32400,32594,32020,31958,31128,30810,31260,31710
2500.value	27808,28002,28452,28390,27560,27242,26668,27118
2501.value	25264,25458,24884,24822,26040,25722,26172,26622
2502.value	18624,18690,19268,19078,20424,19978,19532,19854
2503.value	18128,18194,17748,17558,16856,16410,16988,17310
2504.value	21728,21794,22372,22182,21480,21034,20588,20910
2505.value	23280,23346,22900,22710,24056,23610,24188,24510
2506.value	57600,57538,57988,58182,58888,59338,58764,58446
2507.value	61200,61138,60564,60758,59416,59866,60316,59998
2508.value	64800,64738,65188,65382,64040,64490,63916,63598
2509.value	62256,62194,61620,61814,62520,62970,63420,63102
2510.value	55616,55426,56004,56070,56904,57226,56780,56334
2511.value	55120,54930,54484,54550,53336,53658,54236,53790
2512.value	50528,50338,50916,50982,49768,50090,49644,49198
2513.value	52080,51890,51444,51510,52344,52666,53244,52798
2514.value	37248,36930,37380,37830,38536,38730,38156,38094
2515.value	40848,40530,39956,40406,39064,39258,39708,39646
2516.value	36256,35938,36388,36838,35496,35690,35116,35054
2517.value	33712,33394,32820,33270,33976,34170,34620,34558
2518.value	43456,43010,43588,43910,44744,44810,44364,44174
2519.value	42960,42514,42068,42390,41176,41242,41820,41630
2520.value	46560,46114,46692,47014,45800,45866,45420,45230
2521.value	48112,47666,47220,47542,48376,48442,49020,48830
2522.align	64
2523.Lrem_4bit:
2524.long	0,0,0,471859200,0,943718400,0,610271232
2525.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
2526.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
2527.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
2528.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
2529.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
2530.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
2531.byte	0
2532#endif
2533