xref: /freebsd/sys/crypto/openssl/i386/x86-mont.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from x86-mont.pl. */
2bc3d5698SJohn Baldwin#ifdef PIC
3bc3d5698SJohn Baldwin.text
4bc3d5698SJohn Baldwin.globl	bn_mul_mont
5bc3d5698SJohn Baldwin.type	bn_mul_mont,@function
6bc3d5698SJohn Baldwin.align	16
7bc3d5698SJohn Baldwinbn_mul_mont:
8bc3d5698SJohn Baldwin.L_bn_mul_mont_begin:
9*c0855eaaSJohn Baldwin	#ifdef __CET__
10*c0855eaaSJohn Baldwin
11*c0855eaaSJohn Baldwin.byte	243,15,30,251
12*c0855eaaSJohn Baldwin	#endif
13*c0855eaaSJohn Baldwin
14bc3d5698SJohn Baldwin	pushl	%ebp
15bc3d5698SJohn Baldwin	pushl	%ebx
16bc3d5698SJohn Baldwin	pushl	%esi
17bc3d5698SJohn Baldwin	pushl	%edi
18bc3d5698SJohn Baldwin	xorl	%eax,%eax
19bc3d5698SJohn Baldwin	movl	40(%esp),%edi
20bc3d5698SJohn Baldwin	cmpl	$4,%edi
21bc3d5698SJohn Baldwin	jl	.L000just_leave
22bc3d5698SJohn Baldwin	leal	20(%esp),%esi
23bc3d5698SJohn Baldwin	leal	24(%esp),%edx
24bc3d5698SJohn Baldwin	addl	$2,%edi
25bc3d5698SJohn Baldwin	negl	%edi
26bc3d5698SJohn Baldwin	leal	-32(%esp,%edi,4),%ebp
27bc3d5698SJohn Baldwin	negl	%edi
28bc3d5698SJohn Baldwin	movl	%ebp,%eax
29bc3d5698SJohn Baldwin	subl	%edx,%eax
30bc3d5698SJohn Baldwin	andl	$2047,%eax
31bc3d5698SJohn Baldwin	subl	%eax,%ebp
32bc3d5698SJohn Baldwin	xorl	%ebp,%edx
33bc3d5698SJohn Baldwin	andl	$2048,%edx
34bc3d5698SJohn Baldwin	xorl	$2048,%edx
35bc3d5698SJohn Baldwin	subl	%edx,%ebp
36bc3d5698SJohn Baldwin	andl	$-64,%ebp
37bc3d5698SJohn Baldwin	movl	%esp,%eax
38bc3d5698SJohn Baldwin	subl	%ebp,%eax
39bc3d5698SJohn Baldwin	andl	$-4096,%eax
40bc3d5698SJohn Baldwin	movl	%esp,%edx
41bc3d5698SJohn Baldwin	leal	(%ebp,%eax,1),%esp
42bc3d5698SJohn Baldwin	movl	(%esp),%eax
43bc3d5698SJohn Baldwin	cmpl	%ebp,%esp
44bc3d5698SJohn Baldwin	ja	.L001page_walk
45bc3d5698SJohn Baldwin	jmp	.L002page_walk_done
46bc3d5698SJohn Baldwin.align	16
47bc3d5698SJohn Baldwin.L001page_walk:
48bc3d5698SJohn Baldwin	leal	-4096(%esp),%esp
49bc3d5698SJohn Baldwin	movl	(%esp),%eax
50bc3d5698SJohn Baldwin	cmpl	%ebp,%esp
51bc3d5698SJohn Baldwin	ja	.L001page_walk
52bc3d5698SJohn Baldwin.L002page_walk_done:
53bc3d5698SJohn Baldwin	movl	(%esi),%eax
54bc3d5698SJohn Baldwin	movl	4(%esi),%ebx
55bc3d5698SJohn Baldwin	movl	8(%esi),%ecx
56bc3d5698SJohn Baldwin	movl	12(%esi),%ebp
57bc3d5698SJohn Baldwin	movl	16(%esi),%esi
58bc3d5698SJohn Baldwin	movl	(%esi),%esi
59bc3d5698SJohn Baldwin	movl	%eax,4(%esp)
60bc3d5698SJohn Baldwin	movl	%ebx,8(%esp)
61bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
62bc3d5698SJohn Baldwin	movl	%ebp,16(%esp)
63bc3d5698SJohn Baldwin	movl	%esi,20(%esp)
64bc3d5698SJohn Baldwin	leal	-3(%edi),%ebx
65bc3d5698SJohn Baldwin	movl	%edx,24(%esp)
66bc3d5698SJohn Baldwin	call	.L003PIC_me_up
67bc3d5698SJohn Baldwin.L003PIC_me_up:
68bc3d5698SJohn Baldwin	popl	%eax
69bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
70bc3d5698SJohn Baldwin	btl	$26,(%eax)
71bc3d5698SJohn Baldwin	jnc	.L004non_sse2
72bc3d5698SJohn Baldwin	movl	$-1,%eax
73bc3d5698SJohn Baldwin	movd	%eax,%mm7
74bc3d5698SJohn Baldwin	movl	8(%esp),%esi
75bc3d5698SJohn Baldwin	movl	12(%esp),%edi
76bc3d5698SJohn Baldwin	movl	16(%esp),%ebp
77bc3d5698SJohn Baldwin	xorl	%edx,%edx
78bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
79bc3d5698SJohn Baldwin	movd	(%edi),%mm4
80bc3d5698SJohn Baldwin	movd	(%esi),%mm5
81bc3d5698SJohn Baldwin	movd	(%ebp),%mm3
82bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm5
83bc3d5698SJohn Baldwin	movq	%mm5,%mm2
84bc3d5698SJohn Baldwin	movq	%mm5,%mm0
85bc3d5698SJohn Baldwin	pand	%mm7,%mm0
86bc3d5698SJohn Baldwin	pmuludq	20(%esp),%mm5
87bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm3
88bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
89bc3d5698SJohn Baldwin	movd	4(%ebp),%mm1
90bc3d5698SJohn Baldwin	movd	4(%esi),%mm0
91bc3d5698SJohn Baldwin	psrlq	$32,%mm2
92bc3d5698SJohn Baldwin	psrlq	$32,%mm3
93bc3d5698SJohn Baldwin	incl	%ecx
94bc3d5698SJohn Baldwin.align	16
95bc3d5698SJohn Baldwin.L0051st:
96bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
97bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
98bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
99bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
100bc3d5698SJohn Baldwin	movq	%mm2,%mm0
101bc3d5698SJohn Baldwin	pand	%mm7,%mm0
102bc3d5698SJohn Baldwin	movd	4(%ebp,%ecx,4),%mm1
103bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
104bc3d5698SJohn Baldwin	movd	4(%esi,%ecx,4),%mm0
105bc3d5698SJohn Baldwin	psrlq	$32,%mm2
106bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
107bc3d5698SJohn Baldwin	psrlq	$32,%mm3
108bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
109bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
110bc3d5698SJohn Baldwin	jl	.L0051st
111bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
112bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
113bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
114bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
115bc3d5698SJohn Baldwin	movq	%mm2,%mm0
116bc3d5698SJohn Baldwin	pand	%mm7,%mm0
117bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
118bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
119bc3d5698SJohn Baldwin	psrlq	$32,%mm2
120bc3d5698SJohn Baldwin	psrlq	$32,%mm3
121bc3d5698SJohn Baldwin	paddq	%mm2,%mm3
122bc3d5698SJohn Baldwin	movq	%mm3,32(%esp,%ebx,4)
123bc3d5698SJohn Baldwin	incl	%edx
124bc3d5698SJohn Baldwin.L006outer:
125bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
126bc3d5698SJohn Baldwin	movd	(%edi,%edx,4),%mm4
127bc3d5698SJohn Baldwin	movd	(%esi),%mm5
128bc3d5698SJohn Baldwin	movd	32(%esp),%mm6
129bc3d5698SJohn Baldwin	movd	(%ebp),%mm3
130bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm5
131bc3d5698SJohn Baldwin	paddq	%mm6,%mm5
132bc3d5698SJohn Baldwin	movq	%mm5,%mm0
133bc3d5698SJohn Baldwin	movq	%mm5,%mm2
134bc3d5698SJohn Baldwin	pand	%mm7,%mm0
135bc3d5698SJohn Baldwin	pmuludq	20(%esp),%mm5
136bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm3
137bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
138bc3d5698SJohn Baldwin	movd	36(%esp),%mm6
139bc3d5698SJohn Baldwin	movd	4(%ebp),%mm1
140bc3d5698SJohn Baldwin	movd	4(%esi),%mm0
141bc3d5698SJohn Baldwin	psrlq	$32,%mm2
142bc3d5698SJohn Baldwin	psrlq	$32,%mm3
143bc3d5698SJohn Baldwin	paddq	%mm6,%mm2
144bc3d5698SJohn Baldwin	incl	%ecx
145bc3d5698SJohn Baldwin	decl	%ebx
146bc3d5698SJohn Baldwin.L007inner:
147bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
148bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
149bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
150bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
151bc3d5698SJohn Baldwin	movq	%mm2,%mm0
152bc3d5698SJohn Baldwin	movd	36(%esp,%ecx,4),%mm6
153bc3d5698SJohn Baldwin	pand	%mm7,%mm0
154bc3d5698SJohn Baldwin	movd	4(%ebp,%ecx,4),%mm1
155bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
156bc3d5698SJohn Baldwin	movd	4(%esi,%ecx,4),%mm0
157bc3d5698SJohn Baldwin	psrlq	$32,%mm2
158bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
159bc3d5698SJohn Baldwin	psrlq	$32,%mm3
160bc3d5698SJohn Baldwin	paddq	%mm6,%mm2
161bc3d5698SJohn Baldwin	decl	%ebx
162bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
163bc3d5698SJohn Baldwin	jnz	.L007inner
164bc3d5698SJohn Baldwin	movl	%ecx,%ebx
165bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
166bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
167bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
168bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
169bc3d5698SJohn Baldwin	movq	%mm2,%mm0
170bc3d5698SJohn Baldwin	pand	%mm7,%mm0
171bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
172bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
173bc3d5698SJohn Baldwin	psrlq	$32,%mm2
174bc3d5698SJohn Baldwin	psrlq	$32,%mm3
175bc3d5698SJohn Baldwin	movd	36(%esp,%ebx,4),%mm6
176bc3d5698SJohn Baldwin	paddq	%mm2,%mm3
177bc3d5698SJohn Baldwin	paddq	%mm6,%mm3
178bc3d5698SJohn Baldwin	movq	%mm3,32(%esp,%ebx,4)
179bc3d5698SJohn Baldwin	leal	1(%edx),%edx
180bc3d5698SJohn Baldwin	cmpl	%ebx,%edx
181bc3d5698SJohn Baldwin	jle	.L006outer
182bc3d5698SJohn Baldwin	emms
183bc3d5698SJohn Baldwin	jmp	.L008common_tail
184bc3d5698SJohn Baldwin.align	16
185bc3d5698SJohn Baldwin.L004non_sse2:
186bc3d5698SJohn Baldwin	movl	8(%esp),%esi
187bc3d5698SJohn Baldwin	leal	1(%ebx),%ebp
188bc3d5698SJohn Baldwin	movl	12(%esp),%edi
189bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
190bc3d5698SJohn Baldwin	movl	%esi,%edx
191bc3d5698SJohn Baldwin	andl	$1,%ebp
192bc3d5698SJohn Baldwin	subl	%edi,%edx
193bc3d5698SJohn Baldwin	leal	4(%edi,%ebx,4),%eax
194bc3d5698SJohn Baldwin	orl	%edx,%ebp
195bc3d5698SJohn Baldwin	movl	(%edi),%edi
196bc3d5698SJohn Baldwin	jz	.L009bn_sqr_mont
197bc3d5698SJohn Baldwin	movl	%eax,28(%esp)
198bc3d5698SJohn Baldwin	movl	(%esi),%eax
199bc3d5698SJohn Baldwin	xorl	%edx,%edx
200bc3d5698SJohn Baldwin.align	16
201bc3d5698SJohn Baldwin.L010mull:
202bc3d5698SJohn Baldwin	movl	%edx,%ebp
203bc3d5698SJohn Baldwin	mull	%edi
204bc3d5698SJohn Baldwin	addl	%eax,%ebp
205bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
206bc3d5698SJohn Baldwin	adcl	$0,%edx
207bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
208bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
209bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
210bc3d5698SJohn Baldwin	jl	.L010mull
211bc3d5698SJohn Baldwin	movl	%edx,%ebp
212bc3d5698SJohn Baldwin	mull	%edi
213bc3d5698SJohn Baldwin	movl	20(%esp),%edi
214bc3d5698SJohn Baldwin	addl	%ebp,%eax
215bc3d5698SJohn Baldwin	movl	16(%esp),%esi
216bc3d5698SJohn Baldwin	adcl	$0,%edx
217bc3d5698SJohn Baldwin	imull	32(%esp),%edi
218bc3d5698SJohn Baldwin	movl	%eax,32(%esp,%ebx,4)
219bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
220bc3d5698SJohn Baldwin	movl	%edx,36(%esp,%ebx,4)
221bc3d5698SJohn Baldwin	movl	%ecx,40(%esp,%ebx,4)
222bc3d5698SJohn Baldwin	movl	(%esi),%eax
223bc3d5698SJohn Baldwin	mull	%edi
224bc3d5698SJohn Baldwin	addl	32(%esp),%eax
225bc3d5698SJohn Baldwin	movl	4(%esi),%eax
226bc3d5698SJohn Baldwin	adcl	$0,%edx
227bc3d5698SJohn Baldwin	incl	%ecx
228bc3d5698SJohn Baldwin	jmp	.L0112ndmadd
229bc3d5698SJohn Baldwin.align	16
230bc3d5698SJohn Baldwin.L0121stmadd:
231bc3d5698SJohn Baldwin	movl	%edx,%ebp
232bc3d5698SJohn Baldwin	mull	%edi
233bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
234bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
235bc3d5698SJohn Baldwin	adcl	$0,%edx
236bc3d5698SJohn Baldwin	addl	%eax,%ebp
237bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
238bc3d5698SJohn Baldwin	adcl	$0,%edx
239bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
240bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
241bc3d5698SJohn Baldwin	jl	.L0121stmadd
242bc3d5698SJohn Baldwin	movl	%edx,%ebp
243bc3d5698SJohn Baldwin	mull	%edi
244bc3d5698SJohn Baldwin	addl	32(%esp,%ebx,4),%eax
245bc3d5698SJohn Baldwin	movl	20(%esp),%edi
246bc3d5698SJohn Baldwin	adcl	$0,%edx
247bc3d5698SJohn Baldwin	movl	16(%esp),%esi
248bc3d5698SJohn Baldwin	addl	%eax,%ebp
249bc3d5698SJohn Baldwin	adcl	$0,%edx
250bc3d5698SJohn Baldwin	imull	32(%esp),%edi
251bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
252bc3d5698SJohn Baldwin	addl	36(%esp,%ebx,4),%edx
253bc3d5698SJohn Baldwin	movl	%ebp,32(%esp,%ebx,4)
254bc3d5698SJohn Baldwin	adcl	$0,%ecx
255bc3d5698SJohn Baldwin	movl	(%esi),%eax
256bc3d5698SJohn Baldwin	movl	%edx,36(%esp,%ebx,4)
257bc3d5698SJohn Baldwin	movl	%ecx,40(%esp,%ebx,4)
258bc3d5698SJohn Baldwin	mull	%edi
259bc3d5698SJohn Baldwin	addl	32(%esp),%eax
260bc3d5698SJohn Baldwin	movl	4(%esi),%eax
261bc3d5698SJohn Baldwin	adcl	$0,%edx
262bc3d5698SJohn Baldwin	movl	$1,%ecx
263bc3d5698SJohn Baldwin.align	16
264bc3d5698SJohn Baldwin.L0112ndmadd:
265bc3d5698SJohn Baldwin	movl	%edx,%ebp
266bc3d5698SJohn Baldwin	mull	%edi
267bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
268bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
269bc3d5698SJohn Baldwin	adcl	$0,%edx
270bc3d5698SJohn Baldwin	addl	%eax,%ebp
271bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
272bc3d5698SJohn Baldwin	adcl	$0,%edx
273bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
274bc3d5698SJohn Baldwin	movl	%ebp,24(%esp,%ecx,4)
275bc3d5698SJohn Baldwin	jl	.L0112ndmadd
276bc3d5698SJohn Baldwin	movl	%edx,%ebp
277bc3d5698SJohn Baldwin	mull	%edi
278bc3d5698SJohn Baldwin	addl	32(%esp,%ebx,4),%ebp
279bc3d5698SJohn Baldwin	adcl	$0,%edx
280bc3d5698SJohn Baldwin	addl	%eax,%ebp
281bc3d5698SJohn Baldwin	adcl	$0,%edx
282bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ebx,4)
283bc3d5698SJohn Baldwin	xorl	%eax,%eax
284bc3d5698SJohn Baldwin	movl	12(%esp),%ecx
285bc3d5698SJohn Baldwin	addl	36(%esp,%ebx,4),%edx
286bc3d5698SJohn Baldwin	adcl	40(%esp,%ebx,4),%eax
287bc3d5698SJohn Baldwin	leal	4(%ecx),%ecx
288bc3d5698SJohn Baldwin	movl	%edx,32(%esp,%ebx,4)
289bc3d5698SJohn Baldwin	cmpl	28(%esp),%ecx
290bc3d5698SJohn Baldwin	movl	%eax,36(%esp,%ebx,4)
291bc3d5698SJohn Baldwin	je	.L008common_tail
292bc3d5698SJohn Baldwin	movl	(%ecx),%edi
293bc3d5698SJohn Baldwin	movl	8(%esp),%esi
294bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
295bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
296bc3d5698SJohn Baldwin	xorl	%edx,%edx
297bc3d5698SJohn Baldwin	movl	(%esi),%eax
298bc3d5698SJohn Baldwin	jmp	.L0121stmadd
299bc3d5698SJohn Baldwin.align	16
300bc3d5698SJohn Baldwin.L009bn_sqr_mont:
301bc3d5698SJohn Baldwin	movl	%ebx,(%esp)
302bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
303bc3d5698SJohn Baldwin	movl	%edi,%eax
304bc3d5698SJohn Baldwin	mull	%edi
305bc3d5698SJohn Baldwin	movl	%eax,32(%esp)
306bc3d5698SJohn Baldwin	movl	%edx,%ebx
307bc3d5698SJohn Baldwin	shrl	$1,%edx
308bc3d5698SJohn Baldwin	andl	$1,%ebx
309bc3d5698SJohn Baldwin	incl	%ecx
310bc3d5698SJohn Baldwin.align	16
311bc3d5698SJohn Baldwin.L013sqr:
312bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
313bc3d5698SJohn Baldwin	movl	%edx,%ebp
314bc3d5698SJohn Baldwin	mull	%edi
315bc3d5698SJohn Baldwin	addl	%ebp,%eax
316bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
317bc3d5698SJohn Baldwin	adcl	$0,%edx
318bc3d5698SJohn Baldwin	leal	(%ebx,%eax,2),%ebp
319bc3d5698SJohn Baldwin	shrl	$31,%eax
320bc3d5698SJohn Baldwin	cmpl	(%esp),%ecx
321bc3d5698SJohn Baldwin	movl	%eax,%ebx
322bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
323bc3d5698SJohn Baldwin	jl	.L013sqr
324bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
325bc3d5698SJohn Baldwin	movl	%edx,%ebp
326bc3d5698SJohn Baldwin	mull	%edi
327bc3d5698SJohn Baldwin	addl	%ebp,%eax
328bc3d5698SJohn Baldwin	movl	20(%esp),%edi
329bc3d5698SJohn Baldwin	adcl	$0,%edx
330bc3d5698SJohn Baldwin	movl	16(%esp),%esi
331bc3d5698SJohn Baldwin	leal	(%ebx,%eax,2),%ebp
332bc3d5698SJohn Baldwin	imull	32(%esp),%edi
333bc3d5698SJohn Baldwin	shrl	$31,%eax
334bc3d5698SJohn Baldwin	movl	%ebp,32(%esp,%ecx,4)
335bc3d5698SJohn Baldwin	leal	(%eax,%edx,2),%ebp
336bc3d5698SJohn Baldwin	movl	(%esi),%eax
337bc3d5698SJohn Baldwin	shrl	$31,%edx
338bc3d5698SJohn Baldwin	movl	%ebp,36(%esp,%ecx,4)
339bc3d5698SJohn Baldwin	movl	%edx,40(%esp,%ecx,4)
340bc3d5698SJohn Baldwin	mull	%edi
341bc3d5698SJohn Baldwin	addl	32(%esp),%eax
342bc3d5698SJohn Baldwin	movl	%ecx,%ebx
343bc3d5698SJohn Baldwin	adcl	$0,%edx
344bc3d5698SJohn Baldwin	movl	4(%esi),%eax
345bc3d5698SJohn Baldwin	movl	$1,%ecx
346bc3d5698SJohn Baldwin.align	16
347bc3d5698SJohn Baldwin.L0143rdmadd:
348bc3d5698SJohn Baldwin	movl	%edx,%ebp
349bc3d5698SJohn Baldwin	mull	%edi
350bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
351bc3d5698SJohn Baldwin	adcl	$0,%edx
352bc3d5698SJohn Baldwin	addl	%eax,%ebp
353bc3d5698SJohn Baldwin	movl	4(%esi,%ecx,4),%eax
354bc3d5698SJohn Baldwin	adcl	$0,%edx
355bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
356bc3d5698SJohn Baldwin	movl	%edx,%ebp
357bc3d5698SJohn Baldwin	mull	%edi
358bc3d5698SJohn Baldwin	addl	36(%esp,%ecx,4),%ebp
359bc3d5698SJohn Baldwin	leal	2(%ecx),%ecx
360bc3d5698SJohn Baldwin	adcl	$0,%edx
361bc3d5698SJohn Baldwin	addl	%eax,%ebp
362bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
363bc3d5698SJohn Baldwin	adcl	$0,%edx
364bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
365bc3d5698SJohn Baldwin	movl	%ebp,24(%esp,%ecx,4)
366bc3d5698SJohn Baldwin	jl	.L0143rdmadd
367bc3d5698SJohn Baldwin	movl	%edx,%ebp
368bc3d5698SJohn Baldwin	mull	%edi
369bc3d5698SJohn Baldwin	addl	32(%esp,%ebx,4),%ebp
370bc3d5698SJohn Baldwin	adcl	$0,%edx
371bc3d5698SJohn Baldwin	addl	%eax,%ebp
372bc3d5698SJohn Baldwin	adcl	$0,%edx
373bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ebx,4)
374bc3d5698SJohn Baldwin	movl	12(%esp),%ecx
375bc3d5698SJohn Baldwin	xorl	%eax,%eax
376bc3d5698SJohn Baldwin	movl	8(%esp),%esi
377bc3d5698SJohn Baldwin	addl	36(%esp,%ebx,4),%edx
378bc3d5698SJohn Baldwin	adcl	40(%esp,%ebx,4),%eax
379bc3d5698SJohn Baldwin	movl	%edx,32(%esp,%ebx,4)
380bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
381bc3d5698SJohn Baldwin	movl	%eax,36(%esp,%ebx,4)
382bc3d5698SJohn Baldwin	je	.L008common_tail
383bc3d5698SJohn Baldwin	movl	4(%esi,%ecx,4),%edi
384bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
385bc3d5698SJohn Baldwin	movl	%edi,%eax
386bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
387bc3d5698SJohn Baldwin	mull	%edi
388bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%eax
389bc3d5698SJohn Baldwin	adcl	$0,%edx
390bc3d5698SJohn Baldwin	movl	%eax,32(%esp,%ecx,4)
391bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
392bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
393bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
394bc3d5698SJohn Baldwin	je	.L015sqrlast
395bc3d5698SJohn Baldwin	movl	%edx,%ebx
396bc3d5698SJohn Baldwin	shrl	$1,%edx
397bc3d5698SJohn Baldwin	andl	$1,%ebx
398bc3d5698SJohn Baldwin.align	16
399bc3d5698SJohn Baldwin.L016sqradd:
400bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
401bc3d5698SJohn Baldwin	movl	%edx,%ebp
402bc3d5698SJohn Baldwin	mull	%edi
403bc3d5698SJohn Baldwin	addl	%ebp,%eax
404bc3d5698SJohn Baldwin	leal	(%eax,%eax,1),%ebp
405bc3d5698SJohn Baldwin	adcl	$0,%edx
406bc3d5698SJohn Baldwin	shrl	$31,%eax
407bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
408bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
409bc3d5698SJohn Baldwin	adcl	$0,%eax
410bc3d5698SJohn Baldwin	addl	%ebx,%ebp
411bc3d5698SJohn Baldwin	adcl	$0,%eax
412bc3d5698SJohn Baldwin	cmpl	(%esp),%ecx
413bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
414bc3d5698SJohn Baldwin	movl	%eax,%ebx
415bc3d5698SJohn Baldwin	jle	.L016sqradd
416bc3d5698SJohn Baldwin	movl	%edx,%ebp
417bc3d5698SJohn Baldwin	addl	%edx,%edx
418bc3d5698SJohn Baldwin	shrl	$31,%ebp
419bc3d5698SJohn Baldwin	addl	%ebx,%edx
420bc3d5698SJohn Baldwin	adcl	$0,%ebp
421bc3d5698SJohn Baldwin.L015sqrlast:
422bc3d5698SJohn Baldwin	movl	20(%esp),%edi
423bc3d5698SJohn Baldwin	movl	16(%esp),%esi
424bc3d5698SJohn Baldwin	imull	32(%esp),%edi
425bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%edx
426bc3d5698SJohn Baldwin	movl	(%esi),%eax
427bc3d5698SJohn Baldwin	adcl	$0,%ebp
428bc3d5698SJohn Baldwin	movl	%edx,32(%esp,%ecx,4)
429bc3d5698SJohn Baldwin	movl	%ebp,36(%esp,%ecx,4)
430bc3d5698SJohn Baldwin	mull	%edi
431bc3d5698SJohn Baldwin	addl	32(%esp),%eax
432bc3d5698SJohn Baldwin	leal	-1(%ecx),%ebx
433bc3d5698SJohn Baldwin	adcl	$0,%edx
434bc3d5698SJohn Baldwin	movl	$1,%ecx
435bc3d5698SJohn Baldwin	movl	4(%esi),%eax
436bc3d5698SJohn Baldwin	jmp	.L0143rdmadd
437bc3d5698SJohn Baldwin.align	16
438bc3d5698SJohn Baldwin.L008common_tail:
439bc3d5698SJohn Baldwin	movl	16(%esp),%ebp
440bc3d5698SJohn Baldwin	movl	4(%esp),%edi
441bc3d5698SJohn Baldwin	leal	32(%esp),%esi
442bc3d5698SJohn Baldwin	movl	(%esi),%eax
443bc3d5698SJohn Baldwin	movl	%ebx,%ecx
444bc3d5698SJohn Baldwin	xorl	%edx,%edx
445bc3d5698SJohn Baldwin.align	16
446bc3d5698SJohn Baldwin.L017sub:
447bc3d5698SJohn Baldwin	sbbl	(%ebp,%edx,4),%eax
448bc3d5698SJohn Baldwin	movl	%eax,(%edi,%edx,4)
449bc3d5698SJohn Baldwin	decl	%ecx
450bc3d5698SJohn Baldwin	movl	4(%esi,%edx,4),%eax
451bc3d5698SJohn Baldwin	leal	1(%edx),%edx
452bc3d5698SJohn Baldwin	jge	.L017sub
453bc3d5698SJohn Baldwin	sbbl	$0,%eax
454bc3d5698SJohn Baldwin	movl	$-1,%edx
455bc3d5698SJohn Baldwin	xorl	%eax,%edx
456bc3d5698SJohn Baldwin	jmp	.L018copy
457bc3d5698SJohn Baldwin.align	16
458bc3d5698SJohn Baldwin.L018copy:
459bc3d5698SJohn Baldwin	movl	32(%esp,%ebx,4),%esi
460bc3d5698SJohn Baldwin	movl	(%edi,%ebx,4),%ebp
461bc3d5698SJohn Baldwin	movl	%ecx,32(%esp,%ebx,4)
462bc3d5698SJohn Baldwin	andl	%eax,%esi
463bc3d5698SJohn Baldwin	andl	%edx,%ebp
464bc3d5698SJohn Baldwin	orl	%esi,%ebp
465bc3d5698SJohn Baldwin	movl	%ebp,(%edi,%ebx,4)
466bc3d5698SJohn Baldwin	decl	%ebx
467bc3d5698SJohn Baldwin	jge	.L018copy
468bc3d5698SJohn Baldwin	movl	24(%esp),%esp
469bc3d5698SJohn Baldwin	movl	$1,%eax
470bc3d5698SJohn Baldwin.L000just_leave:
471bc3d5698SJohn Baldwin	popl	%edi
472bc3d5698SJohn Baldwin	popl	%esi
473bc3d5698SJohn Baldwin	popl	%ebx
474bc3d5698SJohn Baldwin	popl	%ebp
475bc3d5698SJohn Baldwin	ret
476bc3d5698SJohn Baldwin.size	bn_mul_mont,.-.L_bn_mul_mont_begin
477bc3d5698SJohn Baldwin.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
478bc3d5698SJohn Baldwin.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
479bc3d5698SJohn Baldwin.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
480bc3d5698SJohn Baldwin.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
481bc3d5698SJohn Baldwin.byte	111,114,103,62,0
482bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
483*c0855eaaSJohn Baldwin
484*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
485*c0855eaaSJohn Baldwin	.p2align 2
486*c0855eaaSJohn Baldwin	.long 1f - 0f
487*c0855eaaSJohn Baldwin	.long 4f - 1f
488*c0855eaaSJohn Baldwin	.long 5
489*c0855eaaSJohn Baldwin0:
490*c0855eaaSJohn Baldwin	.asciz "GNU"
491*c0855eaaSJohn Baldwin1:
492*c0855eaaSJohn Baldwin	.p2align 2
493*c0855eaaSJohn Baldwin	.long 0xc0000002
494*c0855eaaSJohn Baldwin	.long 3f - 2f
495*c0855eaaSJohn Baldwin2:
496*c0855eaaSJohn Baldwin	.long 3
497*c0855eaaSJohn Baldwin3:
498*c0855eaaSJohn Baldwin	.p2align 2
499*c0855eaaSJohn Baldwin4:
500bc3d5698SJohn Baldwin#else
501bc3d5698SJohn Baldwin.text
502bc3d5698SJohn Baldwin.globl	bn_mul_mont
503bc3d5698SJohn Baldwin.type	bn_mul_mont,@function
504bc3d5698SJohn Baldwin.align	16
505bc3d5698SJohn Baldwinbn_mul_mont:
506bc3d5698SJohn Baldwin.L_bn_mul_mont_begin:
507*c0855eaaSJohn Baldwin	#ifdef __CET__
508*c0855eaaSJohn Baldwin
509*c0855eaaSJohn Baldwin.byte	243,15,30,251
510*c0855eaaSJohn Baldwin	#endif
511*c0855eaaSJohn Baldwin
512bc3d5698SJohn Baldwin	pushl	%ebp
513bc3d5698SJohn Baldwin	pushl	%ebx
514bc3d5698SJohn Baldwin	pushl	%esi
515bc3d5698SJohn Baldwin	pushl	%edi
516bc3d5698SJohn Baldwin	xorl	%eax,%eax
517bc3d5698SJohn Baldwin	movl	40(%esp),%edi
518bc3d5698SJohn Baldwin	cmpl	$4,%edi
519bc3d5698SJohn Baldwin	jl	.L000just_leave
520bc3d5698SJohn Baldwin	leal	20(%esp),%esi
521bc3d5698SJohn Baldwin	leal	24(%esp),%edx
522bc3d5698SJohn Baldwin	addl	$2,%edi
523bc3d5698SJohn Baldwin	negl	%edi
524bc3d5698SJohn Baldwin	leal	-32(%esp,%edi,4),%ebp
525bc3d5698SJohn Baldwin	negl	%edi
526bc3d5698SJohn Baldwin	movl	%ebp,%eax
527bc3d5698SJohn Baldwin	subl	%edx,%eax
528bc3d5698SJohn Baldwin	andl	$2047,%eax
529bc3d5698SJohn Baldwin	subl	%eax,%ebp
530bc3d5698SJohn Baldwin	xorl	%ebp,%edx
531bc3d5698SJohn Baldwin	andl	$2048,%edx
532bc3d5698SJohn Baldwin	xorl	$2048,%edx
533bc3d5698SJohn Baldwin	subl	%edx,%ebp
534bc3d5698SJohn Baldwin	andl	$-64,%ebp
535bc3d5698SJohn Baldwin	movl	%esp,%eax
536bc3d5698SJohn Baldwin	subl	%ebp,%eax
537bc3d5698SJohn Baldwin	andl	$-4096,%eax
538bc3d5698SJohn Baldwin	movl	%esp,%edx
539bc3d5698SJohn Baldwin	leal	(%ebp,%eax,1),%esp
540bc3d5698SJohn Baldwin	movl	(%esp),%eax
541bc3d5698SJohn Baldwin	cmpl	%ebp,%esp
542bc3d5698SJohn Baldwin	ja	.L001page_walk
543bc3d5698SJohn Baldwin	jmp	.L002page_walk_done
544bc3d5698SJohn Baldwin.align	16
545bc3d5698SJohn Baldwin.L001page_walk:
546bc3d5698SJohn Baldwin	leal	-4096(%esp),%esp
547bc3d5698SJohn Baldwin	movl	(%esp),%eax
548bc3d5698SJohn Baldwin	cmpl	%ebp,%esp
549bc3d5698SJohn Baldwin	ja	.L001page_walk
550bc3d5698SJohn Baldwin.L002page_walk_done:
551bc3d5698SJohn Baldwin	movl	(%esi),%eax
552bc3d5698SJohn Baldwin	movl	4(%esi),%ebx
553bc3d5698SJohn Baldwin	movl	8(%esi),%ecx
554bc3d5698SJohn Baldwin	movl	12(%esi),%ebp
555bc3d5698SJohn Baldwin	movl	16(%esi),%esi
556bc3d5698SJohn Baldwin	movl	(%esi),%esi
557bc3d5698SJohn Baldwin	movl	%eax,4(%esp)
558bc3d5698SJohn Baldwin	movl	%ebx,8(%esp)
559bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
560bc3d5698SJohn Baldwin	movl	%ebp,16(%esp)
561bc3d5698SJohn Baldwin	movl	%esi,20(%esp)
562bc3d5698SJohn Baldwin	leal	-3(%edi),%ebx
563bc3d5698SJohn Baldwin	movl	%edx,24(%esp)
564bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P,%eax
565bc3d5698SJohn Baldwin	btl	$26,(%eax)
566bc3d5698SJohn Baldwin	jnc	.L003non_sse2
567bc3d5698SJohn Baldwin	movl	$-1,%eax
568bc3d5698SJohn Baldwin	movd	%eax,%mm7
569bc3d5698SJohn Baldwin	movl	8(%esp),%esi
570bc3d5698SJohn Baldwin	movl	12(%esp),%edi
571bc3d5698SJohn Baldwin	movl	16(%esp),%ebp
572bc3d5698SJohn Baldwin	xorl	%edx,%edx
573bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
574bc3d5698SJohn Baldwin	movd	(%edi),%mm4
575bc3d5698SJohn Baldwin	movd	(%esi),%mm5
576bc3d5698SJohn Baldwin	movd	(%ebp),%mm3
577bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm5
578bc3d5698SJohn Baldwin	movq	%mm5,%mm2
579bc3d5698SJohn Baldwin	movq	%mm5,%mm0
580bc3d5698SJohn Baldwin	pand	%mm7,%mm0
581bc3d5698SJohn Baldwin	pmuludq	20(%esp),%mm5
582bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm3
583bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
584bc3d5698SJohn Baldwin	movd	4(%ebp),%mm1
585bc3d5698SJohn Baldwin	movd	4(%esi),%mm0
586bc3d5698SJohn Baldwin	psrlq	$32,%mm2
587bc3d5698SJohn Baldwin	psrlq	$32,%mm3
588bc3d5698SJohn Baldwin	incl	%ecx
589bc3d5698SJohn Baldwin.align	16
590bc3d5698SJohn Baldwin.L0041st:
591bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
592bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
593bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
594bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
595bc3d5698SJohn Baldwin	movq	%mm2,%mm0
596bc3d5698SJohn Baldwin	pand	%mm7,%mm0
597bc3d5698SJohn Baldwin	movd	4(%ebp,%ecx,4),%mm1
598bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
599bc3d5698SJohn Baldwin	movd	4(%esi,%ecx,4),%mm0
600bc3d5698SJohn Baldwin	psrlq	$32,%mm2
601bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
602bc3d5698SJohn Baldwin	psrlq	$32,%mm3
603bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
604bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
605bc3d5698SJohn Baldwin	jl	.L0041st
606bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
607bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
608bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
609bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
610bc3d5698SJohn Baldwin	movq	%mm2,%mm0
611bc3d5698SJohn Baldwin	pand	%mm7,%mm0
612bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
613bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
614bc3d5698SJohn Baldwin	psrlq	$32,%mm2
615bc3d5698SJohn Baldwin	psrlq	$32,%mm3
616bc3d5698SJohn Baldwin	paddq	%mm2,%mm3
617bc3d5698SJohn Baldwin	movq	%mm3,32(%esp,%ebx,4)
618bc3d5698SJohn Baldwin	incl	%edx
619bc3d5698SJohn Baldwin.L005outer:
620bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
621bc3d5698SJohn Baldwin	movd	(%edi,%edx,4),%mm4
622bc3d5698SJohn Baldwin	movd	(%esi),%mm5
623bc3d5698SJohn Baldwin	movd	32(%esp),%mm6
624bc3d5698SJohn Baldwin	movd	(%ebp),%mm3
625bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm5
626bc3d5698SJohn Baldwin	paddq	%mm6,%mm5
627bc3d5698SJohn Baldwin	movq	%mm5,%mm0
628bc3d5698SJohn Baldwin	movq	%mm5,%mm2
629bc3d5698SJohn Baldwin	pand	%mm7,%mm0
630bc3d5698SJohn Baldwin	pmuludq	20(%esp),%mm5
631bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm3
632bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
633bc3d5698SJohn Baldwin	movd	36(%esp),%mm6
634bc3d5698SJohn Baldwin	movd	4(%ebp),%mm1
635bc3d5698SJohn Baldwin	movd	4(%esi),%mm0
636bc3d5698SJohn Baldwin	psrlq	$32,%mm2
637bc3d5698SJohn Baldwin	psrlq	$32,%mm3
638bc3d5698SJohn Baldwin	paddq	%mm6,%mm2
639bc3d5698SJohn Baldwin	incl	%ecx
640bc3d5698SJohn Baldwin	decl	%ebx
641bc3d5698SJohn Baldwin.L006inner:
642bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
643bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
644bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
645bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
646bc3d5698SJohn Baldwin	movq	%mm2,%mm0
647bc3d5698SJohn Baldwin	movd	36(%esp,%ecx,4),%mm6
648bc3d5698SJohn Baldwin	pand	%mm7,%mm0
649bc3d5698SJohn Baldwin	movd	4(%ebp,%ecx,4),%mm1
650bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
651bc3d5698SJohn Baldwin	movd	4(%esi,%ecx,4),%mm0
652bc3d5698SJohn Baldwin	psrlq	$32,%mm2
653bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
654bc3d5698SJohn Baldwin	psrlq	$32,%mm3
655bc3d5698SJohn Baldwin	paddq	%mm6,%mm2
656bc3d5698SJohn Baldwin	decl	%ebx
657bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
658bc3d5698SJohn Baldwin	jnz	.L006inner
659bc3d5698SJohn Baldwin	movl	%ecx,%ebx
660bc3d5698SJohn Baldwin	pmuludq	%mm4,%mm0
661bc3d5698SJohn Baldwin	pmuludq	%mm5,%mm1
662bc3d5698SJohn Baldwin	paddq	%mm0,%mm2
663bc3d5698SJohn Baldwin	paddq	%mm1,%mm3
664bc3d5698SJohn Baldwin	movq	%mm2,%mm0
665bc3d5698SJohn Baldwin	pand	%mm7,%mm0
666bc3d5698SJohn Baldwin	paddq	%mm0,%mm3
667bc3d5698SJohn Baldwin	movd	%mm3,28(%esp,%ecx,4)
668bc3d5698SJohn Baldwin	psrlq	$32,%mm2
669bc3d5698SJohn Baldwin	psrlq	$32,%mm3
670bc3d5698SJohn Baldwin	movd	36(%esp,%ebx,4),%mm6
671bc3d5698SJohn Baldwin	paddq	%mm2,%mm3
672bc3d5698SJohn Baldwin	paddq	%mm6,%mm3
673bc3d5698SJohn Baldwin	movq	%mm3,32(%esp,%ebx,4)
674bc3d5698SJohn Baldwin	leal	1(%edx),%edx
675bc3d5698SJohn Baldwin	cmpl	%ebx,%edx
676bc3d5698SJohn Baldwin	jle	.L005outer
677bc3d5698SJohn Baldwin	emms
678bc3d5698SJohn Baldwin	jmp	.L007common_tail
679bc3d5698SJohn Baldwin.align	16
680bc3d5698SJohn Baldwin.L003non_sse2:
681bc3d5698SJohn Baldwin	movl	8(%esp),%esi
682bc3d5698SJohn Baldwin	leal	1(%ebx),%ebp
683bc3d5698SJohn Baldwin	movl	12(%esp),%edi
684bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
685bc3d5698SJohn Baldwin	movl	%esi,%edx
686bc3d5698SJohn Baldwin	andl	$1,%ebp
687bc3d5698SJohn Baldwin	subl	%edi,%edx
688bc3d5698SJohn Baldwin	leal	4(%edi,%ebx,4),%eax
689bc3d5698SJohn Baldwin	orl	%edx,%ebp
690bc3d5698SJohn Baldwin	movl	(%edi),%edi
691bc3d5698SJohn Baldwin	jz	.L008bn_sqr_mont
692bc3d5698SJohn Baldwin	movl	%eax,28(%esp)
693bc3d5698SJohn Baldwin	movl	(%esi),%eax
694bc3d5698SJohn Baldwin	xorl	%edx,%edx
695bc3d5698SJohn Baldwin.align	16
696bc3d5698SJohn Baldwin.L009mull:
697bc3d5698SJohn Baldwin	movl	%edx,%ebp
698bc3d5698SJohn Baldwin	mull	%edi
699bc3d5698SJohn Baldwin	addl	%eax,%ebp
700bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
701bc3d5698SJohn Baldwin	adcl	$0,%edx
702bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
703bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
704bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
705bc3d5698SJohn Baldwin	jl	.L009mull
706bc3d5698SJohn Baldwin	movl	%edx,%ebp
707bc3d5698SJohn Baldwin	mull	%edi
708bc3d5698SJohn Baldwin	movl	20(%esp),%edi
709bc3d5698SJohn Baldwin	addl	%ebp,%eax
710bc3d5698SJohn Baldwin	movl	16(%esp),%esi
711bc3d5698SJohn Baldwin	adcl	$0,%edx
712bc3d5698SJohn Baldwin	imull	32(%esp),%edi
713bc3d5698SJohn Baldwin	movl	%eax,32(%esp,%ebx,4)
714bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
715bc3d5698SJohn Baldwin	movl	%edx,36(%esp,%ebx,4)
716bc3d5698SJohn Baldwin	movl	%ecx,40(%esp,%ebx,4)
717bc3d5698SJohn Baldwin	movl	(%esi),%eax
718bc3d5698SJohn Baldwin	mull	%edi
719bc3d5698SJohn Baldwin	addl	32(%esp),%eax
720bc3d5698SJohn Baldwin	movl	4(%esi),%eax
721bc3d5698SJohn Baldwin	adcl	$0,%edx
722bc3d5698SJohn Baldwin	incl	%ecx
723bc3d5698SJohn Baldwin	jmp	.L0102ndmadd
724bc3d5698SJohn Baldwin.align	16
725bc3d5698SJohn Baldwin.L0111stmadd:
726bc3d5698SJohn Baldwin	movl	%edx,%ebp
727bc3d5698SJohn Baldwin	mull	%edi
728bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
729bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
730bc3d5698SJohn Baldwin	adcl	$0,%edx
731bc3d5698SJohn Baldwin	addl	%eax,%ebp
732bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
733bc3d5698SJohn Baldwin	adcl	$0,%edx
734bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
735bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
736bc3d5698SJohn Baldwin	jl	.L0111stmadd
737bc3d5698SJohn Baldwin	movl	%edx,%ebp
738bc3d5698SJohn Baldwin	mull	%edi
739bc3d5698SJohn Baldwin	addl	32(%esp,%ebx,4),%eax
740bc3d5698SJohn Baldwin	movl	20(%esp),%edi
741bc3d5698SJohn Baldwin	adcl	$0,%edx
742bc3d5698SJohn Baldwin	movl	16(%esp),%esi
743bc3d5698SJohn Baldwin	addl	%eax,%ebp
744bc3d5698SJohn Baldwin	adcl	$0,%edx
745bc3d5698SJohn Baldwin	imull	32(%esp),%edi
746bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
747bc3d5698SJohn Baldwin	addl	36(%esp,%ebx,4),%edx
748bc3d5698SJohn Baldwin	movl	%ebp,32(%esp,%ebx,4)
749bc3d5698SJohn Baldwin	adcl	$0,%ecx
750bc3d5698SJohn Baldwin	movl	(%esi),%eax
751bc3d5698SJohn Baldwin	movl	%edx,36(%esp,%ebx,4)
752bc3d5698SJohn Baldwin	movl	%ecx,40(%esp,%ebx,4)
753bc3d5698SJohn Baldwin	mull	%edi
754bc3d5698SJohn Baldwin	addl	32(%esp),%eax
755bc3d5698SJohn Baldwin	movl	4(%esi),%eax
756bc3d5698SJohn Baldwin	adcl	$0,%edx
757bc3d5698SJohn Baldwin	movl	$1,%ecx
758bc3d5698SJohn Baldwin.align	16
759bc3d5698SJohn Baldwin.L0102ndmadd:
760bc3d5698SJohn Baldwin	movl	%edx,%ebp
761bc3d5698SJohn Baldwin	mull	%edi
762bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
763bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
764bc3d5698SJohn Baldwin	adcl	$0,%edx
765bc3d5698SJohn Baldwin	addl	%eax,%ebp
766bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
767bc3d5698SJohn Baldwin	adcl	$0,%edx
768bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
769bc3d5698SJohn Baldwin	movl	%ebp,24(%esp,%ecx,4)
770bc3d5698SJohn Baldwin	jl	.L0102ndmadd
771bc3d5698SJohn Baldwin	movl	%edx,%ebp
772bc3d5698SJohn Baldwin	mull	%edi
773bc3d5698SJohn Baldwin	addl	32(%esp,%ebx,4),%ebp
774bc3d5698SJohn Baldwin	adcl	$0,%edx
775bc3d5698SJohn Baldwin	addl	%eax,%ebp
776bc3d5698SJohn Baldwin	adcl	$0,%edx
777bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ebx,4)
778bc3d5698SJohn Baldwin	xorl	%eax,%eax
779bc3d5698SJohn Baldwin	movl	12(%esp),%ecx
780bc3d5698SJohn Baldwin	addl	36(%esp,%ebx,4),%edx
781bc3d5698SJohn Baldwin	adcl	40(%esp,%ebx,4),%eax
782bc3d5698SJohn Baldwin	leal	4(%ecx),%ecx
783bc3d5698SJohn Baldwin	movl	%edx,32(%esp,%ebx,4)
784bc3d5698SJohn Baldwin	cmpl	28(%esp),%ecx
785bc3d5698SJohn Baldwin	movl	%eax,36(%esp,%ebx,4)
786bc3d5698SJohn Baldwin	je	.L007common_tail
787bc3d5698SJohn Baldwin	movl	(%ecx),%edi
788bc3d5698SJohn Baldwin	movl	8(%esp),%esi
789bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
790bc3d5698SJohn Baldwin	xorl	%ecx,%ecx
791bc3d5698SJohn Baldwin	xorl	%edx,%edx
792bc3d5698SJohn Baldwin	movl	(%esi),%eax
793bc3d5698SJohn Baldwin	jmp	.L0111stmadd
794bc3d5698SJohn Baldwin.align	16
795bc3d5698SJohn Baldwin.L008bn_sqr_mont:
796bc3d5698SJohn Baldwin	movl	%ebx,(%esp)
797bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
798bc3d5698SJohn Baldwin	movl	%edi,%eax
799bc3d5698SJohn Baldwin	mull	%edi
800bc3d5698SJohn Baldwin	movl	%eax,32(%esp)
801bc3d5698SJohn Baldwin	movl	%edx,%ebx
802bc3d5698SJohn Baldwin	shrl	$1,%edx
803bc3d5698SJohn Baldwin	andl	$1,%ebx
804bc3d5698SJohn Baldwin	incl	%ecx
805bc3d5698SJohn Baldwin.align	16
806bc3d5698SJohn Baldwin.L012sqr:
807bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
808bc3d5698SJohn Baldwin	movl	%edx,%ebp
809bc3d5698SJohn Baldwin	mull	%edi
810bc3d5698SJohn Baldwin	addl	%ebp,%eax
811bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
812bc3d5698SJohn Baldwin	adcl	$0,%edx
813bc3d5698SJohn Baldwin	leal	(%ebx,%eax,2),%ebp
814bc3d5698SJohn Baldwin	shrl	$31,%eax
815bc3d5698SJohn Baldwin	cmpl	(%esp),%ecx
816bc3d5698SJohn Baldwin	movl	%eax,%ebx
817bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
818bc3d5698SJohn Baldwin	jl	.L012sqr
819bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
820bc3d5698SJohn Baldwin	movl	%edx,%ebp
821bc3d5698SJohn Baldwin	mull	%edi
822bc3d5698SJohn Baldwin	addl	%ebp,%eax
823bc3d5698SJohn Baldwin	movl	20(%esp),%edi
824bc3d5698SJohn Baldwin	adcl	$0,%edx
825bc3d5698SJohn Baldwin	movl	16(%esp),%esi
826bc3d5698SJohn Baldwin	leal	(%ebx,%eax,2),%ebp
827bc3d5698SJohn Baldwin	imull	32(%esp),%edi
828bc3d5698SJohn Baldwin	shrl	$31,%eax
829bc3d5698SJohn Baldwin	movl	%ebp,32(%esp,%ecx,4)
830bc3d5698SJohn Baldwin	leal	(%eax,%edx,2),%ebp
831bc3d5698SJohn Baldwin	movl	(%esi),%eax
832bc3d5698SJohn Baldwin	shrl	$31,%edx
833bc3d5698SJohn Baldwin	movl	%ebp,36(%esp,%ecx,4)
834bc3d5698SJohn Baldwin	movl	%edx,40(%esp,%ecx,4)
835bc3d5698SJohn Baldwin	mull	%edi
836bc3d5698SJohn Baldwin	addl	32(%esp),%eax
837bc3d5698SJohn Baldwin	movl	%ecx,%ebx
838bc3d5698SJohn Baldwin	adcl	$0,%edx
839bc3d5698SJohn Baldwin	movl	4(%esi),%eax
840bc3d5698SJohn Baldwin	movl	$1,%ecx
841bc3d5698SJohn Baldwin.align	16
842bc3d5698SJohn Baldwin.L0133rdmadd:
843bc3d5698SJohn Baldwin	movl	%edx,%ebp
844bc3d5698SJohn Baldwin	mull	%edi
845bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
846bc3d5698SJohn Baldwin	adcl	$0,%edx
847bc3d5698SJohn Baldwin	addl	%eax,%ebp
848bc3d5698SJohn Baldwin	movl	4(%esi,%ecx,4),%eax
849bc3d5698SJohn Baldwin	adcl	$0,%edx
850bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
851bc3d5698SJohn Baldwin	movl	%edx,%ebp
852bc3d5698SJohn Baldwin	mull	%edi
853bc3d5698SJohn Baldwin	addl	36(%esp,%ecx,4),%ebp
854bc3d5698SJohn Baldwin	leal	2(%ecx),%ecx
855bc3d5698SJohn Baldwin	adcl	$0,%edx
856bc3d5698SJohn Baldwin	addl	%eax,%ebp
857bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
858bc3d5698SJohn Baldwin	adcl	$0,%edx
859bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
860bc3d5698SJohn Baldwin	movl	%ebp,24(%esp,%ecx,4)
861bc3d5698SJohn Baldwin	jl	.L0133rdmadd
862bc3d5698SJohn Baldwin	movl	%edx,%ebp
863bc3d5698SJohn Baldwin	mull	%edi
864bc3d5698SJohn Baldwin	addl	32(%esp,%ebx,4),%ebp
865bc3d5698SJohn Baldwin	adcl	$0,%edx
866bc3d5698SJohn Baldwin	addl	%eax,%ebp
867bc3d5698SJohn Baldwin	adcl	$0,%edx
868bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ebx,4)
869bc3d5698SJohn Baldwin	movl	12(%esp),%ecx
870bc3d5698SJohn Baldwin	xorl	%eax,%eax
871bc3d5698SJohn Baldwin	movl	8(%esp),%esi
872bc3d5698SJohn Baldwin	addl	36(%esp,%ebx,4),%edx
873bc3d5698SJohn Baldwin	adcl	40(%esp,%ebx,4),%eax
874bc3d5698SJohn Baldwin	movl	%edx,32(%esp,%ebx,4)
875bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
876bc3d5698SJohn Baldwin	movl	%eax,36(%esp,%ebx,4)
877bc3d5698SJohn Baldwin	je	.L007common_tail
878bc3d5698SJohn Baldwin	movl	4(%esi,%ecx,4),%edi
879bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
880bc3d5698SJohn Baldwin	movl	%edi,%eax
881bc3d5698SJohn Baldwin	movl	%ecx,12(%esp)
882bc3d5698SJohn Baldwin	mull	%edi
883bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%eax
884bc3d5698SJohn Baldwin	adcl	$0,%edx
885bc3d5698SJohn Baldwin	movl	%eax,32(%esp,%ecx,4)
886bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
887bc3d5698SJohn Baldwin	cmpl	%ebx,%ecx
888bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
889bc3d5698SJohn Baldwin	je	.L014sqrlast
890bc3d5698SJohn Baldwin	movl	%edx,%ebx
891bc3d5698SJohn Baldwin	shrl	$1,%edx
892bc3d5698SJohn Baldwin	andl	$1,%ebx
893bc3d5698SJohn Baldwin.align	16
894bc3d5698SJohn Baldwin.L015sqradd:
895bc3d5698SJohn Baldwin	movl	(%esi,%ecx,4),%eax
896bc3d5698SJohn Baldwin	movl	%edx,%ebp
897bc3d5698SJohn Baldwin	mull	%edi
898bc3d5698SJohn Baldwin	addl	%ebp,%eax
899bc3d5698SJohn Baldwin	leal	(%eax,%eax,1),%ebp
900bc3d5698SJohn Baldwin	adcl	$0,%edx
901bc3d5698SJohn Baldwin	shrl	$31,%eax
902bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%ebp
903bc3d5698SJohn Baldwin	leal	1(%ecx),%ecx
904bc3d5698SJohn Baldwin	adcl	$0,%eax
905bc3d5698SJohn Baldwin	addl	%ebx,%ebp
906bc3d5698SJohn Baldwin	adcl	$0,%eax
907bc3d5698SJohn Baldwin	cmpl	(%esp),%ecx
908bc3d5698SJohn Baldwin	movl	%ebp,28(%esp,%ecx,4)
909bc3d5698SJohn Baldwin	movl	%eax,%ebx
910bc3d5698SJohn Baldwin	jle	.L015sqradd
911bc3d5698SJohn Baldwin	movl	%edx,%ebp
912bc3d5698SJohn Baldwin	addl	%edx,%edx
913bc3d5698SJohn Baldwin	shrl	$31,%ebp
914bc3d5698SJohn Baldwin	addl	%ebx,%edx
915bc3d5698SJohn Baldwin	adcl	$0,%ebp
916bc3d5698SJohn Baldwin.L014sqrlast:
917bc3d5698SJohn Baldwin	movl	20(%esp),%edi
918bc3d5698SJohn Baldwin	movl	16(%esp),%esi
919bc3d5698SJohn Baldwin	imull	32(%esp),%edi
920bc3d5698SJohn Baldwin	addl	32(%esp,%ecx,4),%edx
921bc3d5698SJohn Baldwin	movl	(%esi),%eax
922bc3d5698SJohn Baldwin	adcl	$0,%ebp
923bc3d5698SJohn Baldwin	movl	%edx,32(%esp,%ecx,4)
924bc3d5698SJohn Baldwin	movl	%ebp,36(%esp,%ecx,4)
925bc3d5698SJohn Baldwin	mull	%edi
926bc3d5698SJohn Baldwin	addl	32(%esp),%eax
927bc3d5698SJohn Baldwin	leal	-1(%ecx),%ebx
928bc3d5698SJohn Baldwin	adcl	$0,%edx
929bc3d5698SJohn Baldwin	movl	$1,%ecx
930bc3d5698SJohn Baldwin	movl	4(%esi),%eax
931bc3d5698SJohn Baldwin	jmp	.L0133rdmadd
932bc3d5698SJohn Baldwin.align	16
933bc3d5698SJohn Baldwin.L007common_tail:
934bc3d5698SJohn Baldwin	movl	16(%esp),%ebp
935bc3d5698SJohn Baldwin	movl	4(%esp),%edi
936bc3d5698SJohn Baldwin	leal	32(%esp),%esi
937bc3d5698SJohn Baldwin	movl	(%esi),%eax
938bc3d5698SJohn Baldwin	movl	%ebx,%ecx
939bc3d5698SJohn Baldwin	xorl	%edx,%edx
940bc3d5698SJohn Baldwin.align	16
941bc3d5698SJohn Baldwin.L016sub:
942bc3d5698SJohn Baldwin	sbbl	(%ebp,%edx,4),%eax
943bc3d5698SJohn Baldwin	movl	%eax,(%edi,%edx,4)
944bc3d5698SJohn Baldwin	decl	%ecx
945bc3d5698SJohn Baldwin	movl	4(%esi,%edx,4),%eax
946bc3d5698SJohn Baldwin	leal	1(%edx),%edx
947bc3d5698SJohn Baldwin	jge	.L016sub
948bc3d5698SJohn Baldwin	sbbl	$0,%eax
949bc3d5698SJohn Baldwin	movl	$-1,%edx
950bc3d5698SJohn Baldwin	xorl	%eax,%edx
951bc3d5698SJohn Baldwin	jmp	.L017copy
952bc3d5698SJohn Baldwin.align	16
953bc3d5698SJohn Baldwin.L017copy:
954bc3d5698SJohn Baldwin	movl	32(%esp,%ebx,4),%esi
955bc3d5698SJohn Baldwin	movl	(%edi,%ebx,4),%ebp
956bc3d5698SJohn Baldwin	movl	%ecx,32(%esp,%ebx,4)
957bc3d5698SJohn Baldwin	andl	%eax,%esi
958bc3d5698SJohn Baldwin	andl	%edx,%ebp
959bc3d5698SJohn Baldwin	orl	%esi,%ebp
960bc3d5698SJohn Baldwin	movl	%ebp,(%edi,%ebx,4)
961bc3d5698SJohn Baldwin	decl	%ebx
962bc3d5698SJohn Baldwin	jge	.L017copy
963bc3d5698SJohn Baldwin	movl	24(%esp),%esp
964bc3d5698SJohn Baldwin	movl	$1,%eax
965bc3d5698SJohn Baldwin.L000just_leave:
966bc3d5698SJohn Baldwin	popl	%edi
967bc3d5698SJohn Baldwin	popl	%esi
968bc3d5698SJohn Baldwin	popl	%ebx
969bc3d5698SJohn Baldwin	popl	%ebp
970bc3d5698SJohn Baldwin	ret
971bc3d5698SJohn Baldwin.size	bn_mul_mont,.-.L_bn_mul_mont_begin
972bc3d5698SJohn Baldwin.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
973bc3d5698SJohn Baldwin.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
974bc3d5698SJohn Baldwin.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
975bc3d5698SJohn Baldwin.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
976bc3d5698SJohn Baldwin.byte	111,114,103,62,0
977bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
978*c0855eaaSJohn Baldwin
979*c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
980*c0855eaaSJohn Baldwin	.p2align 2
981*c0855eaaSJohn Baldwin	.long 1f - 0f
982*c0855eaaSJohn Baldwin	.long 4f - 1f
983*c0855eaaSJohn Baldwin	.long 5
984*c0855eaaSJohn Baldwin0:
985*c0855eaaSJohn Baldwin	.asciz "GNU"
986*c0855eaaSJohn Baldwin1:
987*c0855eaaSJohn Baldwin	.p2align 2
988*c0855eaaSJohn Baldwin	.long 0xc0000002
989*c0855eaaSJohn Baldwin	.long 3f - 2f
990*c0855eaaSJohn Baldwin2:
991*c0855eaaSJohn Baldwin	.long 3
992*c0855eaaSJohn Baldwin3:
993*c0855eaaSJohn Baldwin	.p2align 2
994*c0855eaaSJohn Baldwin4:
995bc3d5698SJohn Baldwin#endif
996