xref: /freebsd/sys/crypto/openssl/i386/x86-mont.S (revision f7c32ed617858bcd22f8d1b03199099d50125721)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from x86-mont.pl. */
3#ifdef PIC
4.text
5.globl	bn_mul_mont
6.type	bn_mul_mont,@function
7.align	16
8bn_mul_mont:
9.L_bn_mul_mont_begin:
10	pushl	%ebp
11	pushl	%ebx
12	pushl	%esi
13	pushl	%edi
14	xorl	%eax,%eax
15	movl	40(%esp),%edi
16	cmpl	$4,%edi
17	jl	.L000just_leave
18	leal	20(%esp),%esi
19	leal	24(%esp),%edx
20	addl	$2,%edi
21	negl	%edi
22	leal	-32(%esp,%edi,4),%ebp
23	negl	%edi
24	movl	%ebp,%eax
25	subl	%edx,%eax
26	andl	$2047,%eax
27	subl	%eax,%ebp
28	xorl	%ebp,%edx
29	andl	$2048,%edx
30	xorl	$2048,%edx
31	subl	%edx,%ebp
32	andl	$-64,%ebp
33	movl	%esp,%eax
34	subl	%ebp,%eax
35	andl	$-4096,%eax
36	movl	%esp,%edx
37	leal	(%ebp,%eax,1),%esp
38	movl	(%esp),%eax
39	cmpl	%ebp,%esp
40	ja	.L001page_walk
41	jmp	.L002page_walk_done
42.align	16
43.L001page_walk:
44	leal	-4096(%esp),%esp
45	movl	(%esp),%eax
46	cmpl	%ebp,%esp
47	ja	.L001page_walk
48.L002page_walk_done:
49	movl	(%esi),%eax
50	movl	4(%esi),%ebx
51	movl	8(%esi),%ecx
52	movl	12(%esi),%ebp
53	movl	16(%esi),%esi
54	movl	(%esi),%esi
55	movl	%eax,4(%esp)
56	movl	%ebx,8(%esp)
57	movl	%ecx,12(%esp)
58	movl	%ebp,16(%esp)
59	movl	%esi,20(%esp)
60	leal	-3(%edi),%ebx
61	movl	%edx,24(%esp)
62	call	.L003PIC_me_up
63.L003PIC_me_up:
64	popl	%eax
65	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
66	btl	$26,(%eax)
67	jnc	.L004non_sse2
68	movl	$-1,%eax
69	movd	%eax,%mm7
70	movl	8(%esp),%esi
71	movl	12(%esp),%edi
72	movl	16(%esp),%ebp
73	xorl	%edx,%edx
74	xorl	%ecx,%ecx
75	movd	(%edi),%mm4
76	movd	(%esi),%mm5
77	movd	(%ebp),%mm3
78	pmuludq	%mm4,%mm5
79	movq	%mm5,%mm2
80	movq	%mm5,%mm0
81	pand	%mm7,%mm0
82	pmuludq	20(%esp),%mm5
83	pmuludq	%mm5,%mm3
84	paddq	%mm0,%mm3
85	movd	4(%ebp),%mm1
86	movd	4(%esi),%mm0
87	psrlq	$32,%mm2
88	psrlq	$32,%mm3
89	incl	%ecx
90.align	16
91.L0051st:
92	pmuludq	%mm4,%mm0
93	pmuludq	%mm5,%mm1
94	paddq	%mm0,%mm2
95	paddq	%mm1,%mm3
96	movq	%mm2,%mm0
97	pand	%mm7,%mm0
98	movd	4(%ebp,%ecx,4),%mm1
99	paddq	%mm0,%mm3
100	movd	4(%esi,%ecx,4),%mm0
101	psrlq	$32,%mm2
102	movd	%mm3,28(%esp,%ecx,4)
103	psrlq	$32,%mm3
104	leal	1(%ecx),%ecx
105	cmpl	%ebx,%ecx
106	jl	.L0051st
107	pmuludq	%mm4,%mm0
108	pmuludq	%mm5,%mm1
109	paddq	%mm0,%mm2
110	paddq	%mm1,%mm3
111	movq	%mm2,%mm0
112	pand	%mm7,%mm0
113	paddq	%mm0,%mm3
114	movd	%mm3,28(%esp,%ecx,4)
115	psrlq	$32,%mm2
116	psrlq	$32,%mm3
117	paddq	%mm2,%mm3
118	movq	%mm3,32(%esp,%ebx,4)
119	incl	%edx
120.L006outer:
121	xorl	%ecx,%ecx
122	movd	(%edi,%edx,4),%mm4
123	movd	(%esi),%mm5
124	movd	32(%esp),%mm6
125	movd	(%ebp),%mm3
126	pmuludq	%mm4,%mm5
127	paddq	%mm6,%mm5
128	movq	%mm5,%mm0
129	movq	%mm5,%mm2
130	pand	%mm7,%mm0
131	pmuludq	20(%esp),%mm5
132	pmuludq	%mm5,%mm3
133	paddq	%mm0,%mm3
134	movd	36(%esp),%mm6
135	movd	4(%ebp),%mm1
136	movd	4(%esi),%mm0
137	psrlq	$32,%mm2
138	psrlq	$32,%mm3
139	paddq	%mm6,%mm2
140	incl	%ecx
141	decl	%ebx
142.L007inner:
143	pmuludq	%mm4,%mm0
144	pmuludq	%mm5,%mm1
145	paddq	%mm0,%mm2
146	paddq	%mm1,%mm3
147	movq	%mm2,%mm0
148	movd	36(%esp,%ecx,4),%mm6
149	pand	%mm7,%mm0
150	movd	4(%ebp,%ecx,4),%mm1
151	paddq	%mm0,%mm3
152	movd	4(%esi,%ecx,4),%mm0
153	psrlq	$32,%mm2
154	movd	%mm3,28(%esp,%ecx,4)
155	psrlq	$32,%mm3
156	paddq	%mm6,%mm2
157	decl	%ebx
158	leal	1(%ecx),%ecx
159	jnz	.L007inner
160	movl	%ecx,%ebx
161	pmuludq	%mm4,%mm0
162	pmuludq	%mm5,%mm1
163	paddq	%mm0,%mm2
164	paddq	%mm1,%mm3
165	movq	%mm2,%mm0
166	pand	%mm7,%mm0
167	paddq	%mm0,%mm3
168	movd	%mm3,28(%esp,%ecx,4)
169	psrlq	$32,%mm2
170	psrlq	$32,%mm3
171	movd	36(%esp,%ebx,4),%mm6
172	paddq	%mm2,%mm3
173	paddq	%mm6,%mm3
174	movq	%mm3,32(%esp,%ebx,4)
175	leal	1(%edx),%edx
176	cmpl	%ebx,%edx
177	jle	.L006outer
178	emms
179	jmp	.L008common_tail
180.align	16
181.L004non_sse2:
182	movl	8(%esp),%esi
183	leal	1(%ebx),%ebp
184	movl	12(%esp),%edi
185	xorl	%ecx,%ecx
186	movl	%esi,%edx
187	andl	$1,%ebp
188	subl	%edi,%edx
189	leal	4(%edi,%ebx,4),%eax
190	orl	%edx,%ebp
191	movl	(%edi),%edi
192	jz	.L009bn_sqr_mont
193	movl	%eax,28(%esp)
194	movl	(%esi),%eax
195	xorl	%edx,%edx
196.align	16
197.L010mull:
198	movl	%edx,%ebp
199	mull	%edi
200	addl	%eax,%ebp
201	leal	1(%ecx),%ecx
202	adcl	$0,%edx
203	movl	(%esi,%ecx,4),%eax
204	cmpl	%ebx,%ecx
205	movl	%ebp,28(%esp,%ecx,4)
206	jl	.L010mull
207	movl	%edx,%ebp
208	mull	%edi
209	movl	20(%esp),%edi
210	addl	%ebp,%eax
211	movl	16(%esp),%esi
212	adcl	$0,%edx
213	imull	32(%esp),%edi
214	movl	%eax,32(%esp,%ebx,4)
215	xorl	%ecx,%ecx
216	movl	%edx,36(%esp,%ebx,4)
217	movl	%ecx,40(%esp,%ebx,4)
218	movl	(%esi),%eax
219	mull	%edi
220	addl	32(%esp),%eax
221	movl	4(%esi),%eax
222	adcl	$0,%edx
223	incl	%ecx
224	jmp	.L0112ndmadd
225.align	16
226.L0121stmadd:
227	movl	%edx,%ebp
228	mull	%edi
229	addl	32(%esp,%ecx,4),%ebp
230	leal	1(%ecx),%ecx
231	adcl	$0,%edx
232	addl	%eax,%ebp
233	movl	(%esi,%ecx,4),%eax
234	adcl	$0,%edx
235	cmpl	%ebx,%ecx
236	movl	%ebp,28(%esp,%ecx,4)
237	jl	.L0121stmadd
238	movl	%edx,%ebp
239	mull	%edi
240	addl	32(%esp,%ebx,4),%eax
241	movl	20(%esp),%edi
242	adcl	$0,%edx
243	movl	16(%esp),%esi
244	addl	%eax,%ebp
245	adcl	$0,%edx
246	imull	32(%esp),%edi
247	xorl	%ecx,%ecx
248	addl	36(%esp,%ebx,4),%edx
249	movl	%ebp,32(%esp,%ebx,4)
250	adcl	$0,%ecx
251	movl	(%esi),%eax
252	movl	%edx,36(%esp,%ebx,4)
253	movl	%ecx,40(%esp,%ebx,4)
254	mull	%edi
255	addl	32(%esp),%eax
256	movl	4(%esi),%eax
257	adcl	$0,%edx
258	movl	$1,%ecx
259.align	16
260.L0112ndmadd:
261	movl	%edx,%ebp
262	mull	%edi
263	addl	32(%esp,%ecx,4),%ebp
264	leal	1(%ecx),%ecx
265	adcl	$0,%edx
266	addl	%eax,%ebp
267	movl	(%esi,%ecx,4),%eax
268	adcl	$0,%edx
269	cmpl	%ebx,%ecx
270	movl	%ebp,24(%esp,%ecx,4)
271	jl	.L0112ndmadd
272	movl	%edx,%ebp
273	mull	%edi
274	addl	32(%esp,%ebx,4),%ebp
275	adcl	$0,%edx
276	addl	%eax,%ebp
277	adcl	$0,%edx
278	movl	%ebp,28(%esp,%ebx,4)
279	xorl	%eax,%eax
280	movl	12(%esp),%ecx
281	addl	36(%esp,%ebx,4),%edx
282	adcl	40(%esp,%ebx,4),%eax
283	leal	4(%ecx),%ecx
284	movl	%edx,32(%esp,%ebx,4)
285	cmpl	28(%esp),%ecx
286	movl	%eax,36(%esp,%ebx,4)
287	je	.L008common_tail
288	movl	(%ecx),%edi
289	movl	8(%esp),%esi
290	movl	%ecx,12(%esp)
291	xorl	%ecx,%ecx
292	xorl	%edx,%edx
293	movl	(%esi),%eax
294	jmp	.L0121stmadd
295.align	16
296.L009bn_sqr_mont:
297	movl	%ebx,(%esp)
298	movl	%ecx,12(%esp)
299	movl	%edi,%eax
300	mull	%edi
301	movl	%eax,32(%esp)
302	movl	%edx,%ebx
303	shrl	$1,%edx
304	andl	$1,%ebx
305	incl	%ecx
306.align	16
307.L013sqr:
308	movl	(%esi,%ecx,4),%eax
309	movl	%edx,%ebp
310	mull	%edi
311	addl	%ebp,%eax
312	leal	1(%ecx),%ecx
313	adcl	$0,%edx
314	leal	(%ebx,%eax,2),%ebp
315	shrl	$31,%eax
316	cmpl	(%esp),%ecx
317	movl	%eax,%ebx
318	movl	%ebp,28(%esp,%ecx,4)
319	jl	.L013sqr
320	movl	(%esi,%ecx,4),%eax
321	movl	%edx,%ebp
322	mull	%edi
323	addl	%ebp,%eax
324	movl	20(%esp),%edi
325	adcl	$0,%edx
326	movl	16(%esp),%esi
327	leal	(%ebx,%eax,2),%ebp
328	imull	32(%esp),%edi
329	shrl	$31,%eax
330	movl	%ebp,32(%esp,%ecx,4)
331	leal	(%eax,%edx,2),%ebp
332	movl	(%esi),%eax
333	shrl	$31,%edx
334	movl	%ebp,36(%esp,%ecx,4)
335	movl	%edx,40(%esp,%ecx,4)
336	mull	%edi
337	addl	32(%esp),%eax
338	movl	%ecx,%ebx
339	adcl	$0,%edx
340	movl	4(%esi),%eax
341	movl	$1,%ecx
342.align	16
343.L0143rdmadd:
344	movl	%edx,%ebp
345	mull	%edi
346	addl	32(%esp,%ecx,4),%ebp
347	adcl	$0,%edx
348	addl	%eax,%ebp
349	movl	4(%esi,%ecx,4),%eax
350	adcl	$0,%edx
351	movl	%ebp,28(%esp,%ecx,4)
352	movl	%edx,%ebp
353	mull	%edi
354	addl	36(%esp,%ecx,4),%ebp
355	leal	2(%ecx),%ecx
356	adcl	$0,%edx
357	addl	%eax,%ebp
358	movl	(%esi,%ecx,4),%eax
359	adcl	$0,%edx
360	cmpl	%ebx,%ecx
361	movl	%ebp,24(%esp,%ecx,4)
362	jl	.L0143rdmadd
363	movl	%edx,%ebp
364	mull	%edi
365	addl	32(%esp,%ebx,4),%ebp
366	adcl	$0,%edx
367	addl	%eax,%ebp
368	adcl	$0,%edx
369	movl	%ebp,28(%esp,%ebx,4)
370	movl	12(%esp),%ecx
371	xorl	%eax,%eax
372	movl	8(%esp),%esi
373	addl	36(%esp,%ebx,4),%edx
374	adcl	40(%esp,%ebx,4),%eax
375	movl	%edx,32(%esp,%ebx,4)
376	cmpl	%ebx,%ecx
377	movl	%eax,36(%esp,%ebx,4)
378	je	.L008common_tail
379	movl	4(%esi,%ecx,4),%edi
380	leal	1(%ecx),%ecx
381	movl	%edi,%eax
382	movl	%ecx,12(%esp)
383	mull	%edi
384	addl	32(%esp,%ecx,4),%eax
385	adcl	$0,%edx
386	movl	%eax,32(%esp,%ecx,4)
387	xorl	%ebp,%ebp
388	cmpl	%ebx,%ecx
389	leal	1(%ecx),%ecx
390	je	.L015sqrlast
391	movl	%edx,%ebx
392	shrl	$1,%edx
393	andl	$1,%ebx
394.align	16
395.L016sqradd:
396	movl	(%esi,%ecx,4),%eax
397	movl	%edx,%ebp
398	mull	%edi
399	addl	%ebp,%eax
400	leal	(%eax,%eax,1),%ebp
401	adcl	$0,%edx
402	shrl	$31,%eax
403	addl	32(%esp,%ecx,4),%ebp
404	leal	1(%ecx),%ecx
405	adcl	$0,%eax
406	addl	%ebx,%ebp
407	adcl	$0,%eax
408	cmpl	(%esp),%ecx
409	movl	%ebp,28(%esp,%ecx,4)
410	movl	%eax,%ebx
411	jle	.L016sqradd
412	movl	%edx,%ebp
413	addl	%edx,%edx
414	shrl	$31,%ebp
415	addl	%ebx,%edx
416	adcl	$0,%ebp
417.L015sqrlast:
418	movl	20(%esp),%edi
419	movl	16(%esp),%esi
420	imull	32(%esp),%edi
421	addl	32(%esp,%ecx,4),%edx
422	movl	(%esi),%eax
423	adcl	$0,%ebp
424	movl	%edx,32(%esp,%ecx,4)
425	movl	%ebp,36(%esp,%ecx,4)
426	mull	%edi
427	addl	32(%esp),%eax
428	leal	-1(%ecx),%ebx
429	adcl	$0,%edx
430	movl	$1,%ecx
431	movl	4(%esi),%eax
432	jmp	.L0143rdmadd
433.align	16
434.L008common_tail:
435	movl	16(%esp),%ebp
436	movl	4(%esp),%edi
437	leal	32(%esp),%esi
438	movl	(%esi),%eax
439	movl	%ebx,%ecx
440	xorl	%edx,%edx
441.align	16
442.L017sub:
443	sbbl	(%ebp,%edx,4),%eax
444	movl	%eax,(%edi,%edx,4)
445	decl	%ecx
446	movl	4(%esi,%edx,4),%eax
447	leal	1(%edx),%edx
448	jge	.L017sub
449	sbbl	$0,%eax
450	movl	$-1,%edx
451	xorl	%eax,%edx
452	jmp	.L018copy
453.align	16
454.L018copy:
455	movl	32(%esp,%ebx,4),%esi
456	movl	(%edi,%ebx,4),%ebp
457	movl	%ecx,32(%esp,%ebx,4)
458	andl	%eax,%esi
459	andl	%edx,%ebp
460	orl	%esi,%ebp
461	movl	%ebp,(%edi,%ebx,4)
462	decl	%ebx
463	jge	.L018copy
464	movl	24(%esp),%esp
465	movl	$1,%eax
466.L000just_leave:
467	popl	%edi
468	popl	%esi
469	popl	%ebx
470	popl	%ebp
471	ret
472.size	bn_mul_mont,.-.L_bn_mul_mont_begin
473.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
474.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
475.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
476.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
477.byte	111,114,103,62,0
478.comm	OPENSSL_ia32cap_P,16,4
479#else
480.text
481.globl	bn_mul_mont
482.type	bn_mul_mont,@function
483.align	16
484bn_mul_mont:
485.L_bn_mul_mont_begin:
486	pushl	%ebp
487	pushl	%ebx
488	pushl	%esi
489	pushl	%edi
490	xorl	%eax,%eax
491	movl	40(%esp),%edi
492	cmpl	$4,%edi
493	jl	.L000just_leave
494	leal	20(%esp),%esi
495	leal	24(%esp),%edx
496	addl	$2,%edi
497	negl	%edi
498	leal	-32(%esp,%edi,4),%ebp
499	negl	%edi
500	movl	%ebp,%eax
501	subl	%edx,%eax
502	andl	$2047,%eax
503	subl	%eax,%ebp
504	xorl	%ebp,%edx
505	andl	$2048,%edx
506	xorl	$2048,%edx
507	subl	%edx,%ebp
508	andl	$-64,%ebp
509	movl	%esp,%eax
510	subl	%ebp,%eax
511	andl	$-4096,%eax
512	movl	%esp,%edx
513	leal	(%ebp,%eax,1),%esp
514	movl	(%esp),%eax
515	cmpl	%ebp,%esp
516	ja	.L001page_walk
517	jmp	.L002page_walk_done
518.align	16
519.L001page_walk:
520	leal	-4096(%esp),%esp
521	movl	(%esp),%eax
522	cmpl	%ebp,%esp
523	ja	.L001page_walk
524.L002page_walk_done:
525	movl	(%esi),%eax
526	movl	4(%esi),%ebx
527	movl	8(%esi),%ecx
528	movl	12(%esi),%ebp
529	movl	16(%esi),%esi
530	movl	(%esi),%esi
531	movl	%eax,4(%esp)
532	movl	%ebx,8(%esp)
533	movl	%ecx,12(%esp)
534	movl	%ebp,16(%esp)
535	movl	%esi,20(%esp)
536	leal	-3(%edi),%ebx
537	movl	%edx,24(%esp)
538	leal	OPENSSL_ia32cap_P,%eax
539	btl	$26,(%eax)
540	jnc	.L003non_sse2
541	movl	$-1,%eax
542	movd	%eax,%mm7
543	movl	8(%esp),%esi
544	movl	12(%esp),%edi
545	movl	16(%esp),%ebp
546	xorl	%edx,%edx
547	xorl	%ecx,%ecx
548	movd	(%edi),%mm4
549	movd	(%esi),%mm5
550	movd	(%ebp),%mm3
551	pmuludq	%mm4,%mm5
552	movq	%mm5,%mm2
553	movq	%mm5,%mm0
554	pand	%mm7,%mm0
555	pmuludq	20(%esp),%mm5
556	pmuludq	%mm5,%mm3
557	paddq	%mm0,%mm3
558	movd	4(%ebp),%mm1
559	movd	4(%esi),%mm0
560	psrlq	$32,%mm2
561	psrlq	$32,%mm3
562	incl	%ecx
563.align	16
564.L0041st:
565	pmuludq	%mm4,%mm0
566	pmuludq	%mm5,%mm1
567	paddq	%mm0,%mm2
568	paddq	%mm1,%mm3
569	movq	%mm2,%mm0
570	pand	%mm7,%mm0
571	movd	4(%ebp,%ecx,4),%mm1
572	paddq	%mm0,%mm3
573	movd	4(%esi,%ecx,4),%mm0
574	psrlq	$32,%mm2
575	movd	%mm3,28(%esp,%ecx,4)
576	psrlq	$32,%mm3
577	leal	1(%ecx),%ecx
578	cmpl	%ebx,%ecx
579	jl	.L0041st
580	pmuludq	%mm4,%mm0
581	pmuludq	%mm5,%mm1
582	paddq	%mm0,%mm2
583	paddq	%mm1,%mm3
584	movq	%mm2,%mm0
585	pand	%mm7,%mm0
586	paddq	%mm0,%mm3
587	movd	%mm3,28(%esp,%ecx,4)
588	psrlq	$32,%mm2
589	psrlq	$32,%mm3
590	paddq	%mm2,%mm3
591	movq	%mm3,32(%esp,%ebx,4)
592	incl	%edx
593.L005outer:
594	xorl	%ecx,%ecx
595	movd	(%edi,%edx,4),%mm4
596	movd	(%esi),%mm5
597	movd	32(%esp),%mm6
598	movd	(%ebp),%mm3
599	pmuludq	%mm4,%mm5
600	paddq	%mm6,%mm5
601	movq	%mm5,%mm0
602	movq	%mm5,%mm2
603	pand	%mm7,%mm0
604	pmuludq	20(%esp),%mm5
605	pmuludq	%mm5,%mm3
606	paddq	%mm0,%mm3
607	movd	36(%esp),%mm6
608	movd	4(%ebp),%mm1
609	movd	4(%esi),%mm0
610	psrlq	$32,%mm2
611	psrlq	$32,%mm3
612	paddq	%mm6,%mm2
613	incl	%ecx
614	decl	%ebx
615.L006inner:
616	pmuludq	%mm4,%mm0
617	pmuludq	%mm5,%mm1
618	paddq	%mm0,%mm2
619	paddq	%mm1,%mm3
620	movq	%mm2,%mm0
621	movd	36(%esp,%ecx,4),%mm6
622	pand	%mm7,%mm0
623	movd	4(%ebp,%ecx,4),%mm1
624	paddq	%mm0,%mm3
625	movd	4(%esi,%ecx,4),%mm0
626	psrlq	$32,%mm2
627	movd	%mm3,28(%esp,%ecx,4)
628	psrlq	$32,%mm3
629	paddq	%mm6,%mm2
630	decl	%ebx
631	leal	1(%ecx),%ecx
632	jnz	.L006inner
633	movl	%ecx,%ebx
634	pmuludq	%mm4,%mm0
635	pmuludq	%mm5,%mm1
636	paddq	%mm0,%mm2
637	paddq	%mm1,%mm3
638	movq	%mm2,%mm0
639	pand	%mm7,%mm0
640	paddq	%mm0,%mm3
641	movd	%mm3,28(%esp,%ecx,4)
642	psrlq	$32,%mm2
643	psrlq	$32,%mm3
644	movd	36(%esp,%ebx,4),%mm6
645	paddq	%mm2,%mm3
646	paddq	%mm6,%mm3
647	movq	%mm3,32(%esp,%ebx,4)
648	leal	1(%edx),%edx
649	cmpl	%ebx,%edx
650	jle	.L005outer
651	emms
652	jmp	.L007common_tail
653.align	16
654.L003non_sse2:
655	movl	8(%esp),%esi
656	leal	1(%ebx),%ebp
657	movl	12(%esp),%edi
658	xorl	%ecx,%ecx
659	movl	%esi,%edx
660	andl	$1,%ebp
661	subl	%edi,%edx
662	leal	4(%edi,%ebx,4),%eax
663	orl	%edx,%ebp
664	movl	(%edi),%edi
665	jz	.L008bn_sqr_mont
666	movl	%eax,28(%esp)
667	movl	(%esi),%eax
668	xorl	%edx,%edx
669.align	16
670.L009mull:
671	movl	%edx,%ebp
672	mull	%edi
673	addl	%eax,%ebp
674	leal	1(%ecx),%ecx
675	adcl	$0,%edx
676	movl	(%esi,%ecx,4),%eax
677	cmpl	%ebx,%ecx
678	movl	%ebp,28(%esp,%ecx,4)
679	jl	.L009mull
680	movl	%edx,%ebp
681	mull	%edi
682	movl	20(%esp),%edi
683	addl	%ebp,%eax
684	movl	16(%esp),%esi
685	adcl	$0,%edx
686	imull	32(%esp),%edi
687	movl	%eax,32(%esp,%ebx,4)
688	xorl	%ecx,%ecx
689	movl	%edx,36(%esp,%ebx,4)
690	movl	%ecx,40(%esp,%ebx,4)
691	movl	(%esi),%eax
692	mull	%edi
693	addl	32(%esp),%eax
694	movl	4(%esi),%eax
695	adcl	$0,%edx
696	incl	%ecx
697	jmp	.L0102ndmadd
698.align	16
699.L0111stmadd:
700	movl	%edx,%ebp
701	mull	%edi
702	addl	32(%esp,%ecx,4),%ebp
703	leal	1(%ecx),%ecx
704	adcl	$0,%edx
705	addl	%eax,%ebp
706	movl	(%esi,%ecx,4),%eax
707	adcl	$0,%edx
708	cmpl	%ebx,%ecx
709	movl	%ebp,28(%esp,%ecx,4)
710	jl	.L0111stmadd
711	movl	%edx,%ebp
712	mull	%edi
713	addl	32(%esp,%ebx,4),%eax
714	movl	20(%esp),%edi
715	adcl	$0,%edx
716	movl	16(%esp),%esi
717	addl	%eax,%ebp
718	adcl	$0,%edx
719	imull	32(%esp),%edi
720	xorl	%ecx,%ecx
721	addl	36(%esp,%ebx,4),%edx
722	movl	%ebp,32(%esp,%ebx,4)
723	adcl	$0,%ecx
724	movl	(%esi),%eax
725	movl	%edx,36(%esp,%ebx,4)
726	movl	%ecx,40(%esp,%ebx,4)
727	mull	%edi
728	addl	32(%esp),%eax
729	movl	4(%esi),%eax
730	adcl	$0,%edx
731	movl	$1,%ecx
732.align	16
733.L0102ndmadd:
734	movl	%edx,%ebp
735	mull	%edi
736	addl	32(%esp,%ecx,4),%ebp
737	leal	1(%ecx),%ecx
738	adcl	$0,%edx
739	addl	%eax,%ebp
740	movl	(%esi,%ecx,4),%eax
741	adcl	$0,%edx
742	cmpl	%ebx,%ecx
743	movl	%ebp,24(%esp,%ecx,4)
744	jl	.L0102ndmadd
745	movl	%edx,%ebp
746	mull	%edi
747	addl	32(%esp,%ebx,4),%ebp
748	adcl	$0,%edx
749	addl	%eax,%ebp
750	adcl	$0,%edx
751	movl	%ebp,28(%esp,%ebx,4)
752	xorl	%eax,%eax
753	movl	12(%esp),%ecx
754	addl	36(%esp,%ebx,4),%edx
755	adcl	40(%esp,%ebx,4),%eax
756	leal	4(%ecx),%ecx
757	movl	%edx,32(%esp,%ebx,4)
758	cmpl	28(%esp),%ecx
759	movl	%eax,36(%esp,%ebx,4)
760	je	.L007common_tail
761	movl	(%ecx),%edi
762	movl	8(%esp),%esi
763	movl	%ecx,12(%esp)
764	xorl	%ecx,%ecx
765	xorl	%edx,%edx
766	movl	(%esi),%eax
767	jmp	.L0111stmadd
768.align	16
769.L008bn_sqr_mont:
770	movl	%ebx,(%esp)
771	movl	%ecx,12(%esp)
772	movl	%edi,%eax
773	mull	%edi
774	movl	%eax,32(%esp)
775	movl	%edx,%ebx
776	shrl	$1,%edx
777	andl	$1,%ebx
778	incl	%ecx
779.align	16
780.L012sqr:
781	movl	(%esi,%ecx,4),%eax
782	movl	%edx,%ebp
783	mull	%edi
784	addl	%ebp,%eax
785	leal	1(%ecx),%ecx
786	adcl	$0,%edx
787	leal	(%ebx,%eax,2),%ebp
788	shrl	$31,%eax
789	cmpl	(%esp),%ecx
790	movl	%eax,%ebx
791	movl	%ebp,28(%esp,%ecx,4)
792	jl	.L012sqr
793	movl	(%esi,%ecx,4),%eax
794	movl	%edx,%ebp
795	mull	%edi
796	addl	%ebp,%eax
797	movl	20(%esp),%edi
798	adcl	$0,%edx
799	movl	16(%esp),%esi
800	leal	(%ebx,%eax,2),%ebp
801	imull	32(%esp),%edi
802	shrl	$31,%eax
803	movl	%ebp,32(%esp,%ecx,4)
804	leal	(%eax,%edx,2),%ebp
805	movl	(%esi),%eax
806	shrl	$31,%edx
807	movl	%ebp,36(%esp,%ecx,4)
808	movl	%edx,40(%esp,%ecx,4)
809	mull	%edi
810	addl	32(%esp),%eax
811	movl	%ecx,%ebx
812	adcl	$0,%edx
813	movl	4(%esi),%eax
814	movl	$1,%ecx
815.align	16
816.L0133rdmadd:
817	movl	%edx,%ebp
818	mull	%edi
819	addl	32(%esp,%ecx,4),%ebp
820	adcl	$0,%edx
821	addl	%eax,%ebp
822	movl	4(%esi,%ecx,4),%eax
823	adcl	$0,%edx
824	movl	%ebp,28(%esp,%ecx,4)
825	movl	%edx,%ebp
826	mull	%edi
827	addl	36(%esp,%ecx,4),%ebp
828	leal	2(%ecx),%ecx
829	adcl	$0,%edx
830	addl	%eax,%ebp
831	movl	(%esi,%ecx,4),%eax
832	adcl	$0,%edx
833	cmpl	%ebx,%ecx
834	movl	%ebp,24(%esp,%ecx,4)
835	jl	.L0133rdmadd
836	movl	%edx,%ebp
837	mull	%edi
838	addl	32(%esp,%ebx,4),%ebp
839	adcl	$0,%edx
840	addl	%eax,%ebp
841	adcl	$0,%edx
842	movl	%ebp,28(%esp,%ebx,4)
843	movl	12(%esp),%ecx
844	xorl	%eax,%eax
845	movl	8(%esp),%esi
846	addl	36(%esp,%ebx,4),%edx
847	adcl	40(%esp,%ebx,4),%eax
848	movl	%edx,32(%esp,%ebx,4)
849	cmpl	%ebx,%ecx
850	movl	%eax,36(%esp,%ebx,4)
851	je	.L007common_tail
852	movl	4(%esi,%ecx,4),%edi
853	leal	1(%ecx),%ecx
854	movl	%edi,%eax
855	movl	%ecx,12(%esp)
856	mull	%edi
857	addl	32(%esp,%ecx,4),%eax
858	adcl	$0,%edx
859	movl	%eax,32(%esp,%ecx,4)
860	xorl	%ebp,%ebp
861	cmpl	%ebx,%ecx
862	leal	1(%ecx),%ecx
863	je	.L014sqrlast
864	movl	%edx,%ebx
865	shrl	$1,%edx
866	andl	$1,%ebx
867.align	16
868.L015sqradd:
869	movl	(%esi,%ecx,4),%eax
870	movl	%edx,%ebp
871	mull	%edi
872	addl	%ebp,%eax
873	leal	(%eax,%eax,1),%ebp
874	adcl	$0,%edx
875	shrl	$31,%eax
876	addl	32(%esp,%ecx,4),%ebp
877	leal	1(%ecx),%ecx
878	adcl	$0,%eax
879	addl	%ebx,%ebp
880	adcl	$0,%eax
881	cmpl	(%esp),%ecx
882	movl	%ebp,28(%esp,%ecx,4)
883	movl	%eax,%ebx
884	jle	.L015sqradd
885	movl	%edx,%ebp
886	addl	%edx,%edx
887	shrl	$31,%ebp
888	addl	%ebx,%edx
889	adcl	$0,%ebp
890.L014sqrlast:
891	movl	20(%esp),%edi
892	movl	16(%esp),%esi
893	imull	32(%esp),%edi
894	addl	32(%esp,%ecx,4),%edx
895	movl	(%esi),%eax
896	adcl	$0,%ebp
897	movl	%edx,32(%esp,%ecx,4)
898	movl	%ebp,36(%esp,%ecx,4)
899	mull	%edi
900	addl	32(%esp),%eax
901	leal	-1(%ecx),%ebx
902	adcl	$0,%edx
903	movl	$1,%ecx
904	movl	4(%esi),%eax
905	jmp	.L0133rdmadd
906.align	16
907.L007common_tail:
908	movl	16(%esp),%ebp
909	movl	4(%esp),%edi
910	leal	32(%esp),%esi
911	movl	(%esi),%eax
912	movl	%ebx,%ecx
913	xorl	%edx,%edx
914.align	16
915.L016sub:
916	sbbl	(%ebp,%edx,4),%eax
917	movl	%eax,(%edi,%edx,4)
918	decl	%ecx
919	movl	4(%esi,%edx,4),%eax
920	leal	1(%edx),%edx
921	jge	.L016sub
922	sbbl	$0,%eax
923	movl	$-1,%edx
924	xorl	%eax,%edx
925	jmp	.L017copy
926.align	16
927.L017copy:
928	movl	32(%esp,%ebx,4),%esi
929	movl	(%edi,%ebx,4),%ebp
930	movl	%ecx,32(%esp,%ebx,4)
931	andl	%eax,%esi
932	andl	%edx,%ebp
933	orl	%esi,%ebp
934	movl	%ebp,(%edi,%ebx,4)
935	decl	%ebx
936	jge	.L017copy
937	movl	24(%esp),%esp
938	movl	$1,%eax
939.L000just_leave:
940	popl	%edi
941	popl	%esi
942	popl	%ebx
943	popl	%ebp
944	ret
945.size	bn_mul_mont,.-.L_bn_mul_mont_begin
946.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
947.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
948.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
949.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
950.byte	111,114,103,62,0
951.comm	OPENSSL_ia32cap_P,16,4
952#endif
953