1#if defined(lint) || defined(__lint)
2#include <sys/stdint.h>
3#include <sys/sha2.h>
4
5/* ARGSUSED */
6void
7SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
8{
9}
10
11
12#else
13#include <sys/asm_linkage.h>
14
15ENTRY_NP(SHA256TransformBlocks)
16	push	%rbx
17	push	%rbp
18	push	%r12
19	push	%r13
20	push	%r14
21	push	%r15
22	mov	%rsp,%rbp		# copy %rsp
23	shl	$4,%rdx		# num*16
24	sub	$16*4+4*8,%rsp
25	lea	(%rsi,%rdx,4),%rdx	# inp+num*16*4
26	and	$-64,%rsp		# align stack frame
27	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
28	mov	%rdi,16*4+0*8(%rsp)		# save ctx, 1st arg
29	mov	%rsi,16*4+1*8(%rsp)		# save inp, 2nd arg
30	mov	%rdx,16*4+2*8(%rsp)		# save end pointer, "3rd" arg
31	mov	%rbp,16*4+3*8(%rsp)		# save copy of %rsp
32
33	/.picmeup %rbp
34	/ The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
35	/ the address of the "next" instruction into the target register
36	/ (%rbp).  This generates these 2 instructions:
37	lea	.Llea(%rip),%rbp
38	/nop	/ .picmeup generates a nop for mod 8 alignment--not needed here
39
40.Llea:
41	lea	K256-.(%rbp),%rbp
42
43	mov	4*0(%rdi),%eax
44	mov	4*1(%rdi),%ebx
45	mov	4*2(%rdi),%ecx
46	mov	4*3(%rdi),%edx
47	mov	4*4(%rdi),%r8d
48	mov	4*5(%rdi),%r9d
49	mov	4*6(%rdi),%r10d
50	mov	4*7(%rdi),%r11d
51	jmp	.Lloop
52
53.align	16
54.Lloop:
55	xor	%rdi,%rdi
56	mov	4*0(%rsi),%r12d
57	bswap	%r12d
58	mov	%r8d,%r13d
59	mov	%r8d,%r14d
60	mov	%r9d,%r15d
61
62	ror	$6,%r13d
63	ror	$11,%r14d
64	xor	%r10d,%r15d			# f^g
65
66	xor	%r14d,%r13d
67	ror	$14,%r14d
68	and	%r8d,%r15d			# (f^g)&e
69	mov	%r12d,0(%rsp)
70
71	xor	%r14d,%r13d			# Sigma1(e)
72	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
73	add	%r11d,%r12d			# T1+=h
74
75	mov	%eax,%r11d
76	add	%r13d,%r12d			# T1+=Sigma1(e)
77
78	add	%r15d,%r12d			# T1+=Ch(e,f,g)
79	mov	%eax,%r13d
80	mov	%eax,%r14d
81
82	ror	$2,%r11d
83	ror	$13,%r13d
84	mov	%eax,%r15d
85	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
86
87	xor	%r13d,%r11d
88	ror	$9,%r13d
89	or	%ecx,%r14d			# a|c
90
91	xor	%r13d,%r11d			# h=Sigma0(a)
92	and	%ecx,%r15d			# a&c
93	add	%r12d,%edx			# d+=T1
94
95	and	%ebx,%r14d			# (a|c)&b
96	add	%r12d,%r11d			# h+=T1
97
98	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
99	lea	1(%rdi),%rdi	# round++
100
101	add	%r14d,%r11d			# h+=Maj(a,b,c)
102	mov	4*1(%rsi),%r12d
103	bswap	%r12d
104	mov	%edx,%r13d
105	mov	%edx,%r14d
106	mov	%r8d,%r15d
107
108	ror	$6,%r13d
109	ror	$11,%r14d
110	xor	%r9d,%r15d			# f^g
111
112	xor	%r14d,%r13d
113	ror	$14,%r14d
114	and	%edx,%r15d			# (f^g)&e
115	mov	%r12d,4(%rsp)
116
117	xor	%r14d,%r13d			# Sigma1(e)
118	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
119	add	%r10d,%r12d			# T1+=h
120
121	mov	%r11d,%r10d
122	add	%r13d,%r12d			# T1+=Sigma1(e)
123
124	add	%r15d,%r12d			# T1+=Ch(e,f,g)
125	mov	%r11d,%r13d
126	mov	%r11d,%r14d
127
128	ror	$2,%r10d
129	ror	$13,%r13d
130	mov	%r11d,%r15d
131	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
132
133	xor	%r13d,%r10d
134	ror	$9,%r13d
135	or	%ebx,%r14d			# a|c
136
137	xor	%r13d,%r10d			# h=Sigma0(a)
138	and	%ebx,%r15d			# a&c
139	add	%r12d,%ecx			# d+=T1
140
141	and	%eax,%r14d			# (a|c)&b
142	add	%r12d,%r10d			# h+=T1
143
144	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
145	lea	1(%rdi),%rdi	# round++
146
147	add	%r14d,%r10d			# h+=Maj(a,b,c)
148	mov	4*2(%rsi),%r12d
149	bswap	%r12d
150	mov	%ecx,%r13d
151	mov	%ecx,%r14d
152	mov	%edx,%r15d
153
154	ror	$6,%r13d
155	ror	$11,%r14d
156	xor	%r8d,%r15d			# f^g
157
158	xor	%r14d,%r13d
159	ror	$14,%r14d
160	and	%ecx,%r15d			# (f^g)&e
161	mov	%r12d,8(%rsp)
162
163	xor	%r14d,%r13d			# Sigma1(e)
164	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
165	add	%r9d,%r12d			# T1+=h
166
167	mov	%r10d,%r9d
168	add	%r13d,%r12d			# T1+=Sigma1(e)
169
170	add	%r15d,%r12d			# T1+=Ch(e,f,g)
171	mov	%r10d,%r13d
172	mov	%r10d,%r14d
173
174	ror	$2,%r9d
175	ror	$13,%r13d
176	mov	%r10d,%r15d
177	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
178
179	xor	%r13d,%r9d
180	ror	$9,%r13d
181	or	%eax,%r14d			# a|c
182
183	xor	%r13d,%r9d			# h=Sigma0(a)
184	and	%eax,%r15d			# a&c
185	add	%r12d,%ebx			# d+=T1
186
187	and	%r11d,%r14d			# (a|c)&b
188	add	%r12d,%r9d			# h+=T1
189
190	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
191	lea	1(%rdi),%rdi	# round++
192
193	add	%r14d,%r9d			# h+=Maj(a,b,c)
194	mov	4*3(%rsi),%r12d
195	bswap	%r12d
196	mov	%ebx,%r13d
197	mov	%ebx,%r14d
198	mov	%ecx,%r15d
199
200	ror	$6,%r13d
201	ror	$11,%r14d
202	xor	%edx,%r15d			# f^g
203
204	xor	%r14d,%r13d
205	ror	$14,%r14d
206	and	%ebx,%r15d			# (f^g)&e
207	mov	%r12d,12(%rsp)
208
209	xor	%r14d,%r13d			# Sigma1(e)
210	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
211	add	%r8d,%r12d			# T1+=h
212
213	mov	%r9d,%r8d
214	add	%r13d,%r12d			# T1+=Sigma1(e)
215
216	add	%r15d,%r12d			# T1+=Ch(e,f,g)
217	mov	%r9d,%r13d
218	mov	%r9d,%r14d
219
220	ror	$2,%r8d
221	ror	$13,%r13d
222	mov	%r9d,%r15d
223	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
224
225	xor	%r13d,%r8d
226	ror	$9,%r13d
227	or	%r11d,%r14d			# a|c
228
229	xor	%r13d,%r8d			# h=Sigma0(a)
230	and	%r11d,%r15d			# a&c
231	add	%r12d,%eax			# d+=T1
232
233	and	%r10d,%r14d			# (a|c)&b
234	add	%r12d,%r8d			# h+=T1
235
236	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
237	lea	1(%rdi),%rdi	# round++
238
239	add	%r14d,%r8d			# h+=Maj(a,b,c)
240	mov	4*4(%rsi),%r12d
241	bswap	%r12d
242	mov	%eax,%r13d
243	mov	%eax,%r14d
244	mov	%ebx,%r15d
245
246	ror	$6,%r13d
247	ror	$11,%r14d
248	xor	%ecx,%r15d			# f^g
249
250	xor	%r14d,%r13d
251	ror	$14,%r14d
252	and	%eax,%r15d			# (f^g)&e
253	mov	%r12d,16(%rsp)
254
255	xor	%r14d,%r13d			# Sigma1(e)
256	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
257	add	%edx,%r12d			# T1+=h
258
259	mov	%r8d,%edx
260	add	%r13d,%r12d			# T1+=Sigma1(e)
261
262	add	%r15d,%r12d			# T1+=Ch(e,f,g)
263	mov	%r8d,%r13d
264	mov	%r8d,%r14d
265
266	ror	$2,%edx
267	ror	$13,%r13d
268	mov	%r8d,%r15d
269	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
270
271	xor	%r13d,%edx
272	ror	$9,%r13d
273	or	%r10d,%r14d			# a|c
274
275	xor	%r13d,%edx			# h=Sigma0(a)
276	and	%r10d,%r15d			# a&c
277	add	%r12d,%r11d			# d+=T1
278
279	and	%r9d,%r14d			# (a|c)&b
280	add	%r12d,%edx			# h+=T1
281
282	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
283	lea	1(%rdi),%rdi	# round++
284
285	add	%r14d,%edx			# h+=Maj(a,b,c)
286	mov	4*5(%rsi),%r12d
287	bswap	%r12d
288	mov	%r11d,%r13d
289	mov	%r11d,%r14d
290	mov	%eax,%r15d
291
292	ror	$6,%r13d
293	ror	$11,%r14d
294	xor	%ebx,%r15d			# f^g
295
296	xor	%r14d,%r13d
297	ror	$14,%r14d
298	and	%r11d,%r15d			# (f^g)&e
299	mov	%r12d,20(%rsp)
300
301	xor	%r14d,%r13d			# Sigma1(e)
302	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
303	add	%ecx,%r12d			# T1+=h
304
305	mov	%edx,%ecx
306	add	%r13d,%r12d			# T1+=Sigma1(e)
307
308	add	%r15d,%r12d			# T1+=Ch(e,f,g)
309	mov	%edx,%r13d
310	mov	%edx,%r14d
311
312	ror	$2,%ecx
313	ror	$13,%r13d
314	mov	%edx,%r15d
315	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
316
317	xor	%r13d,%ecx
318	ror	$9,%r13d
319	or	%r9d,%r14d			# a|c
320
321	xor	%r13d,%ecx			# h=Sigma0(a)
322	and	%r9d,%r15d			# a&c
323	add	%r12d,%r10d			# d+=T1
324
325	and	%r8d,%r14d			# (a|c)&b
326	add	%r12d,%ecx			# h+=T1
327
328	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
329	lea	1(%rdi),%rdi	# round++
330
331	add	%r14d,%ecx			# h+=Maj(a,b,c)
332	mov	4*6(%rsi),%r12d
333	bswap	%r12d
334	mov	%r10d,%r13d
335	mov	%r10d,%r14d
336	mov	%r11d,%r15d
337
338	ror	$6,%r13d
339	ror	$11,%r14d
340	xor	%eax,%r15d			# f^g
341
342	xor	%r14d,%r13d
343	ror	$14,%r14d
344	and	%r10d,%r15d			# (f^g)&e
345	mov	%r12d,24(%rsp)
346
347	xor	%r14d,%r13d			# Sigma1(e)
348	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
349	add	%ebx,%r12d			# T1+=h
350
351	mov	%ecx,%ebx
352	add	%r13d,%r12d			# T1+=Sigma1(e)
353
354	add	%r15d,%r12d			# T1+=Ch(e,f,g)
355	mov	%ecx,%r13d
356	mov	%ecx,%r14d
357
358	ror	$2,%ebx
359	ror	$13,%r13d
360	mov	%ecx,%r15d
361	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
362
363	xor	%r13d,%ebx
364	ror	$9,%r13d
365	or	%r8d,%r14d			# a|c
366
367	xor	%r13d,%ebx			# h=Sigma0(a)
368	and	%r8d,%r15d			# a&c
369	add	%r12d,%r9d			# d+=T1
370
371	and	%edx,%r14d			# (a|c)&b
372	add	%r12d,%ebx			# h+=T1
373
374	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
375	lea	1(%rdi),%rdi	# round++
376
377	add	%r14d,%ebx			# h+=Maj(a,b,c)
378	mov	4*7(%rsi),%r12d
379	bswap	%r12d
380	mov	%r9d,%r13d
381	mov	%r9d,%r14d
382	mov	%r10d,%r15d
383
384	ror	$6,%r13d
385	ror	$11,%r14d
386	xor	%r11d,%r15d			# f^g
387
388	xor	%r14d,%r13d
389	ror	$14,%r14d
390	and	%r9d,%r15d			# (f^g)&e
391	mov	%r12d,28(%rsp)
392
393	xor	%r14d,%r13d			# Sigma1(e)
394	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
395	add	%eax,%r12d			# T1+=h
396
397	mov	%ebx,%eax
398	add	%r13d,%r12d			# T1+=Sigma1(e)
399
400	add	%r15d,%r12d			# T1+=Ch(e,f,g)
401	mov	%ebx,%r13d
402	mov	%ebx,%r14d
403
404	ror	$2,%eax
405	ror	$13,%r13d
406	mov	%ebx,%r15d
407	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
408
409	xor	%r13d,%eax
410	ror	$9,%r13d
411	or	%edx,%r14d			# a|c
412
413	xor	%r13d,%eax			# h=Sigma0(a)
414	and	%edx,%r15d			# a&c
415	add	%r12d,%r8d			# d+=T1
416
417	and	%ecx,%r14d			# (a|c)&b
418	add	%r12d,%eax			# h+=T1
419
420	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
421	lea	1(%rdi),%rdi	# round++
422
423	add	%r14d,%eax			# h+=Maj(a,b,c)
424	mov	4*8(%rsi),%r12d
425	bswap	%r12d
426	mov	%r8d,%r13d
427	mov	%r8d,%r14d
428	mov	%r9d,%r15d
429
430	ror	$6,%r13d
431	ror	$11,%r14d
432	xor	%r10d,%r15d			# f^g
433
434	xor	%r14d,%r13d
435	ror	$14,%r14d
436	and	%r8d,%r15d			# (f^g)&e
437	mov	%r12d,32(%rsp)
438
439	xor	%r14d,%r13d			# Sigma1(e)
440	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
441	add	%r11d,%r12d			# T1+=h
442
443	mov	%eax,%r11d
444	add	%r13d,%r12d			# T1+=Sigma1(e)
445
446	add	%r15d,%r12d			# T1+=Ch(e,f,g)
447	mov	%eax,%r13d
448	mov	%eax,%r14d
449
450	ror	$2,%r11d
451	ror	$13,%r13d
452	mov	%eax,%r15d
453	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
454
455	xor	%r13d,%r11d
456	ror	$9,%r13d
457	or	%ecx,%r14d			# a|c
458
459	xor	%r13d,%r11d			# h=Sigma0(a)
460	and	%ecx,%r15d			# a&c
461	add	%r12d,%edx			# d+=T1
462
463	and	%ebx,%r14d			# (a|c)&b
464	add	%r12d,%r11d			# h+=T1
465
466	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
467	lea	1(%rdi),%rdi	# round++
468
469	add	%r14d,%r11d			# h+=Maj(a,b,c)
470	mov	4*9(%rsi),%r12d
471	bswap	%r12d
472	mov	%edx,%r13d
473	mov	%edx,%r14d
474	mov	%r8d,%r15d
475
476	ror	$6,%r13d
477	ror	$11,%r14d
478	xor	%r9d,%r15d			# f^g
479
480	xor	%r14d,%r13d
481	ror	$14,%r14d
482	and	%edx,%r15d			# (f^g)&e
483	mov	%r12d,36(%rsp)
484
485	xor	%r14d,%r13d			# Sigma1(e)
486	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
487	add	%r10d,%r12d			# T1+=h
488
489	mov	%r11d,%r10d
490	add	%r13d,%r12d			# T1+=Sigma1(e)
491
492	add	%r15d,%r12d			# T1+=Ch(e,f,g)
493	mov	%r11d,%r13d
494	mov	%r11d,%r14d
495
496	ror	$2,%r10d
497	ror	$13,%r13d
498	mov	%r11d,%r15d
499	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
500
501	xor	%r13d,%r10d
502	ror	$9,%r13d
503	or	%ebx,%r14d			# a|c
504
505	xor	%r13d,%r10d			# h=Sigma0(a)
506	and	%ebx,%r15d			# a&c
507	add	%r12d,%ecx			# d+=T1
508
509	and	%eax,%r14d			# (a|c)&b
510	add	%r12d,%r10d			# h+=T1
511
512	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
513	lea	1(%rdi),%rdi	# round++
514
515	add	%r14d,%r10d			# h+=Maj(a,b,c)
516	mov	4*10(%rsi),%r12d
517	bswap	%r12d
518	mov	%ecx,%r13d
519	mov	%ecx,%r14d
520	mov	%edx,%r15d
521
522	ror	$6,%r13d
523	ror	$11,%r14d
524	xor	%r8d,%r15d			# f^g
525
526	xor	%r14d,%r13d
527	ror	$14,%r14d
528	and	%ecx,%r15d			# (f^g)&e
529	mov	%r12d,40(%rsp)
530
531	xor	%r14d,%r13d			# Sigma1(e)
532	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
533	add	%r9d,%r12d			# T1+=h
534
535	mov	%r10d,%r9d
536	add	%r13d,%r12d			# T1+=Sigma1(e)
537
538	add	%r15d,%r12d			# T1+=Ch(e,f,g)
539	mov	%r10d,%r13d
540	mov	%r10d,%r14d
541
542	ror	$2,%r9d
543	ror	$13,%r13d
544	mov	%r10d,%r15d
545	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
546
547	xor	%r13d,%r9d
548	ror	$9,%r13d
549	or	%eax,%r14d			# a|c
550
551	xor	%r13d,%r9d			# h=Sigma0(a)
552	and	%eax,%r15d			# a&c
553	add	%r12d,%ebx			# d+=T1
554
555	and	%r11d,%r14d			# (a|c)&b
556	add	%r12d,%r9d			# h+=T1
557
558	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
559	lea	1(%rdi),%rdi	# round++
560
561	add	%r14d,%r9d			# h+=Maj(a,b,c)
562	mov	4*11(%rsi),%r12d
563	bswap	%r12d
564	mov	%ebx,%r13d
565	mov	%ebx,%r14d
566	mov	%ecx,%r15d
567
568	ror	$6,%r13d
569	ror	$11,%r14d
570	xor	%edx,%r15d			# f^g
571
572	xor	%r14d,%r13d
573	ror	$14,%r14d
574	and	%ebx,%r15d			# (f^g)&e
575	mov	%r12d,44(%rsp)
576
577	xor	%r14d,%r13d			# Sigma1(e)
578	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
579	add	%r8d,%r12d			# T1+=h
580
581	mov	%r9d,%r8d
582	add	%r13d,%r12d			# T1+=Sigma1(e)
583
584	add	%r15d,%r12d			# T1+=Ch(e,f,g)
585	mov	%r9d,%r13d
586	mov	%r9d,%r14d
587
588	ror	$2,%r8d
589	ror	$13,%r13d
590	mov	%r9d,%r15d
591	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
592
593	xor	%r13d,%r8d
594	ror	$9,%r13d
595	or	%r11d,%r14d			# a|c
596
597	xor	%r13d,%r8d			# h=Sigma0(a)
598	and	%r11d,%r15d			# a&c
599	add	%r12d,%eax			# d+=T1
600
601	and	%r10d,%r14d			# (a|c)&b
602	add	%r12d,%r8d			# h+=T1
603
604	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
605	lea	1(%rdi),%rdi	# round++
606
607	add	%r14d,%r8d			# h+=Maj(a,b,c)
608	mov	4*12(%rsi),%r12d
609	bswap	%r12d
610	mov	%eax,%r13d
611	mov	%eax,%r14d
612	mov	%ebx,%r15d
613
614	ror	$6,%r13d
615	ror	$11,%r14d
616	xor	%ecx,%r15d			# f^g
617
618	xor	%r14d,%r13d
619	ror	$14,%r14d
620	and	%eax,%r15d			# (f^g)&e
621	mov	%r12d,48(%rsp)
622
623	xor	%r14d,%r13d			# Sigma1(e)
624	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
625	add	%edx,%r12d			# T1+=h
626
627	mov	%r8d,%edx
628	add	%r13d,%r12d			# T1+=Sigma1(e)
629
630	add	%r15d,%r12d			# T1+=Ch(e,f,g)
631	mov	%r8d,%r13d
632	mov	%r8d,%r14d
633
634	ror	$2,%edx
635	ror	$13,%r13d
636	mov	%r8d,%r15d
637	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
638
639	xor	%r13d,%edx
640	ror	$9,%r13d
641	or	%r10d,%r14d			# a|c
642
643	xor	%r13d,%edx			# h=Sigma0(a)
644	and	%r10d,%r15d			# a&c
645	add	%r12d,%r11d			# d+=T1
646
647	and	%r9d,%r14d			# (a|c)&b
648	add	%r12d,%edx			# h+=T1
649
650	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
651	lea	1(%rdi),%rdi	# round++
652
653	add	%r14d,%edx			# h+=Maj(a,b,c)
654	mov	4*13(%rsi),%r12d
655	bswap	%r12d
656	mov	%r11d,%r13d
657	mov	%r11d,%r14d
658	mov	%eax,%r15d
659
660	ror	$6,%r13d
661	ror	$11,%r14d
662	xor	%ebx,%r15d			# f^g
663
664	xor	%r14d,%r13d
665	ror	$14,%r14d
666	and	%r11d,%r15d			# (f^g)&e
667	mov	%r12d,52(%rsp)
668
669	xor	%r14d,%r13d			# Sigma1(e)
670	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
671	add	%ecx,%r12d			# T1+=h
672
673	mov	%edx,%ecx
674	add	%r13d,%r12d			# T1+=Sigma1(e)
675
676	add	%r15d,%r12d			# T1+=Ch(e,f,g)
677	mov	%edx,%r13d
678	mov	%edx,%r14d
679
680	ror	$2,%ecx
681	ror	$13,%r13d
682	mov	%edx,%r15d
683	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
684
685	xor	%r13d,%ecx
686	ror	$9,%r13d
687	or	%r9d,%r14d			# a|c
688
689	xor	%r13d,%ecx			# h=Sigma0(a)
690	and	%r9d,%r15d			# a&c
691	add	%r12d,%r10d			# d+=T1
692
693	and	%r8d,%r14d			# (a|c)&b
694	add	%r12d,%ecx			# h+=T1
695
696	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
697	lea	1(%rdi),%rdi	# round++
698
699	add	%r14d,%ecx			# h+=Maj(a,b,c)
700	mov	4*14(%rsi),%r12d
701	bswap	%r12d
702	mov	%r10d,%r13d
703	mov	%r10d,%r14d
704	mov	%r11d,%r15d
705
706	ror	$6,%r13d
707	ror	$11,%r14d
708	xor	%eax,%r15d			# f^g
709
710	xor	%r14d,%r13d
711	ror	$14,%r14d
712	and	%r10d,%r15d			# (f^g)&e
713	mov	%r12d,56(%rsp)
714
715	xor	%r14d,%r13d			# Sigma1(e)
716	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
717	add	%ebx,%r12d			# T1+=h
718
719	mov	%ecx,%ebx
720	add	%r13d,%r12d			# T1+=Sigma1(e)
721
722	add	%r15d,%r12d			# T1+=Ch(e,f,g)
723	mov	%ecx,%r13d
724	mov	%ecx,%r14d
725
726	ror	$2,%ebx
727	ror	$13,%r13d
728	mov	%ecx,%r15d
729	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
730
731	xor	%r13d,%ebx
732	ror	$9,%r13d
733	or	%r8d,%r14d			# a|c
734
735	xor	%r13d,%ebx			# h=Sigma0(a)
736	and	%r8d,%r15d			# a&c
737	add	%r12d,%r9d			# d+=T1
738
739	and	%edx,%r14d			# (a|c)&b
740	add	%r12d,%ebx			# h+=T1
741
742	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
743	lea	1(%rdi),%rdi	# round++
744
745	add	%r14d,%ebx			# h+=Maj(a,b,c)
746	mov	4*15(%rsi),%r12d
747	bswap	%r12d
748	mov	%r9d,%r13d
749	mov	%r9d,%r14d
750	mov	%r10d,%r15d
751
752	ror	$6,%r13d
753	ror	$11,%r14d
754	xor	%r11d,%r15d			# f^g
755
756	xor	%r14d,%r13d
757	ror	$14,%r14d
758	and	%r9d,%r15d			# (f^g)&e
759	mov	%r12d,60(%rsp)
760
761	xor	%r14d,%r13d			# Sigma1(e)
762	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
763	add	%eax,%r12d			# T1+=h
764
765	mov	%ebx,%eax
766	add	%r13d,%r12d			# T1+=Sigma1(e)
767
768	add	%r15d,%r12d			# T1+=Ch(e,f,g)
769	mov	%ebx,%r13d
770	mov	%ebx,%r14d
771
772	ror	$2,%eax
773	ror	$13,%r13d
774	mov	%ebx,%r15d
775	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
776
777	xor	%r13d,%eax
778	ror	$9,%r13d
779	or	%edx,%r14d			# a|c
780
781	xor	%r13d,%eax			# h=Sigma0(a)
782	and	%edx,%r15d			# a&c
783	add	%r12d,%r8d			# d+=T1
784
785	and	%ecx,%r14d			# (a|c)&b
786	add	%r12d,%eax			# h+=T1
787
788	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
789	lea	1(%rdi),%rdi	# round++
790
791	add	%r14d,%eax			# h+=Maj(a,b,c)
792	jmp	.Lrounds_16_xx
793.align	16
794.Lrounds_16_xx:
795	mov	4(%rsp),%r13d
796	mov	56(%rsp),%r12d
797
798	mov	%r13d,%r15d
799
800	shr	$3,%r13d
801	ror	$7,%r15d
802
803	xor	%r15d,%r13d
804	ror	$11,%r15d
805
806	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
807	mov	%r12d,%r14d
808
809	shr	$10,%r12d
810	ror	$17,%r14d
811
812	xor	%r14d,%r12d
813	ror	$2,%r14d
814
815	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
816
817	add	%r13d,%r12d
818
819	add	36(%rsp),%r12d
820
821	add	0(%rsp),%r12d
822	mov	%r8d,%r13d
823	mov	%r8d,%r14d
824	mov	%r9d,%r15d
825
826	ror	$6,%r13d
827	ror	$11,%r14d
828	xor	%r10d,%r15d			# f^g
829
830	xor	%r14d,%r13d
831	ror	$14,%r14d
832	and	%r8d,%r15d			# (f^g)&e
833	mov	%r12d,0(%rsp)
834
835	xor	%r14d,%r13d			# Sigma1(e)
836	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
837	add	%r11d,%r12d			# T1+=h
838
839	mov	%eax,%r11d
840	add	%r13d,%r12d			# T1+=Sigma1(e)
841
842	add	%r15d,%r12d			# T1+=Ch(e,f,g)
843	mov	%eax,%r13d
844	mov	%eax,%r14d
845
846	ror	$2,%r11d
847	ror	$13,%r13d
848	mov	%eax,%r15d
849	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
850
851	xor	%r13d,%r11d
852	ror	$9,%r13d
853	or	%ecx,%r14d			# a|c
854
855	xor	%r13d,%r11d			# h=Sigma0(a)
856	and	%ecx,%r15d			# a&c
857	add	%r12d,%edx			# d+=T1
858
859	and	%ebx,%r14d			# (a|c)&b
860	add	%r12d,%r11d			# h+=T1
861
862	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
863	lea	1(%rdi),%rdi	# round++
864
865	add	%r14d,%r11d			# h+=Maj(a,b,c)
866	mov	8(%rsp),%r13d
867	mov	60(%rsp),%r12d
868
869	mov	%r13d,%r15d
870
871	shr	$3,%r13d
872	ror	$7,%r15d
873
874	xor	%r15d,%r13d
875	ror	$11,%r15d
876
877	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
878	mov	%r12d,%r14d
879
880	shr	$10,%r12d
881	ror	$17,%r14d
882
883	xor	%r14d,%r12d
884	ror	$2,%r14d
885
886	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
887
888	add	%r13d,%r12d
889
890	add	40(%rsp),%r12d
891
892	add	4(%rsp),%r12d
893	mov	%edx,%r13d
894	mov	%edx,%r14d
895	mov	%r8d,%r15d
896
897	ror	$6,%r13d
898	ror	$11,%r14d
899	xor	%r9d,%r15d			# f^g
900
901	xor	%r14d,%r13d
902	ror	$14,%r14d
903	and	%edx,%r15d			# (f^g)&e
904	mov	%r12d,4(%rsp)
905
906	xor	%r14d,%r13d			# Sigma1(e)
907	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
908	add	%r10d,%r12d			# T1+=h
909
910	mov	%r11d,%r10d
911	add	%r13d,%r12d			# T1+=Sigma1(e)
912
913	add	%r15d,%r12d			# T1+=Ch(e,f,g)
914	mov	%r11d,%r13d
915	mov	%r11d,%r14d
916
917	ror	$2,%r10d
918	ror	$13,%r13d
919	mov	%r11d,%r15d
920	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
921
922	xor	%r13d,%r10d
923	ror	$9,%r13d
924	or	%ebx,%r14d			# a|c
925
926	xor	%r13d,%r10d			# h=Sigma0(a)
927	and	%ebx,%r15d			# a&c
928	add	%r12d,%ecx			# d+=T1
929
930	and	%eax,%r14d			# (a|c)&b
931	add	%r12d,%r10d			# h+=T1
932
933	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
934	lea	1(%rdi),%rdi	# round++
935
936	add	%r14d,%r10d			# h+=Maj(a,b,c)
937	mov	12(%rsp),%r13d
938	mov	0(%rsp),%r12d
939
940	mov	%r13d,%r15d
941
942	shr	$3,%r13d
943	ror	$7,%r15d
944
945	xor	%r15d,%r13d
946	ror	$11,%r15d
947
948	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
949	mov	%r12d,%r14d
950
951	shr	$10,%r12d
952	ror	$17,%r14d
953
954	xor	%r14d,%r12d
955	ror	$2,%r14d
956
957	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
958
959	add	%r13d,%r12d
960
961	add	44(%rsp),%r12d
962
963	add	8(%rsp),%r12d
964	mov	%ecx,%r13d
965	mov	%ecx,%r14d
966	mov	%edx,%r15d
967
968	ror	$6,%r13d
969	ror	$11,%r14d
970	xor	%r8d,%r15d			# f^g
971
972	xor	%r14d,%r13d
973	ror	$14,%r14d
974	and	%ecx,%r15d			# (f^g)&e
975	mov	%r12d,8(%rsp)
976
977	xor	%r14d,%r13d			# Sigma1(e)
978	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
979	add	%r9d,%r12d			# T1+=h
980
981	mov	%r10d,%r9d
982	add	%r13d,%r12d			# T1+=Sigma1(e)
983
984	add	%r15d,%r12d			# T1+=Ch(e,f,g)
985	mov	%r10d,%r13d
986	mov	%r10d,%r14d
987
988	ror	$2,%r9d
989	ror	$13,%r13d
990	mov	%r10d,%r15d
991	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
992
993	xor	%r13d,%r9d
994	ror	$9,%r13d
995	or	%eax,%r14d			# a|c
996
997	xor	%r13d,%r9d			# h=Sigma0(a)
998	and	%eax,%r15d			# a&c
999	add	%r12d,%ebx			# d+=T1
1000
1001	and	%r11d,%r14d			# (a|c)&b
1002	add	%r12d,%r9d			# h+=T1
1003
1004	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1005	lea	1(%rdi),%rdi	# round++
1006
1007	add	%r14d,%r9d			# h+=Maj(a,b,c)
1008	mov	16(%rsp),%r13d
1009	mov	4(%rsp),%r12d
1010
1011	mov	%r13d,%r15d
1012
1013	shr	$3,%r13d
1014	ror	$7,%r15d
1015
1016	xor	%r15d,%r13d
1017	ror	$11,%r15d
1018
1019	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1020	mov	%r12d,%r14d
1021
1022	shr	$10,%r12d
1023	ror	$17,%r14d
1024
1025	xor	%r14d,%r12d
1026	ror	$2,%r14d
1027
1028	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1029
1030	add	%r13d,%r12d
1031
1032	add	48(%rsp),%r12d
1033
1034	add	12(%rsp),%r12d
1035	mov	%ebx,%r13d
1036	mov	%ebx,%r14d
1037	mov	%ecx,%r15d
1038
1039	ror	$6,%r13d
1040	ror	$11,%r14d
1041	xor	%edx,%r15d			# f^g
1042
1043	xor	%r14d,%r13d
1044	ror	$14,%r14d
1045	and	%ebx,%r15d			# (f^g)&e
1046	mov	%r12d,12(%rsp)
1047
1048	xor	%r14d,%r13d			# Sigma1(e)
1049	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1050	add	%r8d,%r12d			# T1+=h
1051
1052	mov	%r9d,%r8d
1053	add	%r13d,%r12d			# T1+=Sigma1(e)
1054
1055	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1056	mov	%r9d,%r13d
1057	mov	%r9d,%r14d
1058
1059	ror	$2,%r8d
1060	ror	$13,%r13d
1061	mov	%r9d,%r15d
1062	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1063
1064	xor	%r13d,%r8d
1065	ror	$9,%r13d
1066	or	%r11d,%r14d			# a|c
1067
1068	xor	%r13d,%r8d			# h=Sigma0(a)
1069	and	%r11d,%r15d			# a&c
1070	add	%r12d,%eax			# d+=T1
1071
1072	and	%r10d,%r14d			# (a|c)&b
1073	add	%r12d,%r8d			# h+=T1
1074
1075	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1076	lea	1(%rdi),%rdi	# round++
1077
1078	add	%r14d,%r8d			# h+=Maj(a,b,c)
1079	mov	20(%rsp),%r13d
1080	mov	8(%rsp),%r12d
1081
1082	mov	%r13d,%r15d
1083
1084	shr	$3,%r13d
1085	ror	$7,%r15d
1086
1087	xor	%r15d,%r13d
1088	ror	$11,%r15d
1089
1090	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1091	mov	%r12d,%r14d
1092
1093	shr	$10,%r12d
1094	ror	$17,%r14d
1095
1096	xor	%r14d,%r12d
1097	ror	$2,%r14d
1098
1099	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1100
1101	add	%r13d,%r12d
1102
1103	add	52(%rsp),%r12d
1104
1105	add	16(%rsp),%r12d
1106	mov	%eax,%r13d
1107	mov	%eax,%r14d
1108	mov	%ebx,%r15d
1109
1110	ror	$6,%r13d
1111	ror	$11,%r14d
1112	xor	%ecx,%r15d			# f^g
1113
1114	xor	%r14d,%r13d
1115	ror	$14,%r14d
1116	and	%eax,%r15d			# (f^g)&e
1117	mov	%r12d,16(%rsp)
1118
1119	xor	%r14d,%r13d			# Sigma1(e)
1120	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1121	add	%edx,%r12d			# T1+=h
1122
1123	mov	%r8d,%edx
1124	add	%r13d,%r12d			# T1+=Sigma1(e)
1125
1126	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1127	mov	%r8d,%r13d
1128	mov	%r8d,%r14d
1129
1130	ror	$2,%edx
1131	ror	$13,%r13d
1132	mov	%r8d,%r15d
1133	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1134
1135	xor	%r13d,%edx
1136	ror	$9,%r13d
1137	or	%r10d,%r14d			# a|c
1138
1139	xor	%r13d,%edx			# h=Sigma0(a)
1140	and	%r10d,%r15d			# a&c
1141	add	%r12d,%r11d			# d+=T1
1142
1143	and	%r9d,%r14d			# (a|c)&b
1144	add	%r12d,%edx			# h+=T1
1145
1146	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1147	lea	1(%rdi),%rdi	# round++
1148
1149	add	%r14d,%edx			# h+=Maj(a,b,c)
1150	mov	24(%rsp),%r13d
1151	mov	12(%rsp),%r12d
1152
1153	mov	%r13d,%r15d
1154
1155	shr	$3,%r13d
1156	ror	$7,%r15d
1157
1158	xor	%r15d,%r13d
1159	ror	$11,%r15d
1160
1161	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1162	mov	%r12d,%r14d
1163
1164	shr	$10,%r12d
1165	ror	$17,%r14d
1166
1167	xor	%r14d,%r12d
1168	ror	$2,%r14d
1169
1170	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1171
1172	add	%r13d,%r12d
1173
1174	add	56(%rsp),%r12d
1175
1176	add	20(%rsp),%r12d
1177	mov	%r11d,%r13d
1178	mov	%r11d,%r14d
1179	mov	%eax,%r15d
1180
1181	ror	$6,%r13d
1182	ror	$11,%r14d
1183	xor	%ebx,%r15d			# f^g
1184
1185	xor	%r14d,%r13d
1186	ror	$14,%r14d
1187	and	%r11d,%r15d			# (f^g)&e
1188	mov	%r12d,20(%rsp)
1189
1190	xor	%r14d,%r13d			# Sigma1(e)
1191	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1192	add	%ecx,%r12d			# T1+=h
1193
1194	mov	%edx,%ecx
1195	add	%r13d,%r12d			# T1+=Sigma1(e)
1196
1197	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1198	mov	%edx,%r13d
1199	mov	%edx,%r14d
1200
1201	ror	$2,%ecx
1202	ror	$13,%r13d
1203	mov	%edx,%r15d
1204	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1205
1206	xor	%r13d,%ecx
1207	ror	$9,%r13d
1208	or	%r9d,%r14d			# a|c
1209
1210	xor	%r13d,%ecx			# h=Sigma0(a)
1211	and	%r9d,%r15d			# a&c
1212	add	%r12d,%r10d			# d+=T1
1213
1214	and	%r8d,%r14d			# (a|c)&b
1215	add	%r12d,%ecx			# h+=T1
1216
1217	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1218	lea	1(%rdi),%rdi	# round++
1219
1220	add	%r14d,%ecx			# h+=Maj(a,b,c)
1221	mov	28(%rsp),%r13d
1222	mov	16(%rsp),%r12d
1223
1224	mov	%r13d,%r15d
1225
1226	shr	$3,%r13d
1227	ror	$7,%r15d
1228
1229	xor	%r15d,%r13d
1230	ror	$11,%r15d
1231
1232	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1233	mov	%r12d,%r14d
1234
1235	shr	$10,%r12d
1236	ror	$17,%r14d
1237
1238	xor	%r14d,%r12d
1239	ror	$2,%r14d
1240
1241	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1242
1243	add	%r13d,%r12d
1244
1245	add	60(%rsp),%r12d
1246
1247	add	24(%rsp),%r12d
1248	mov	%r10d,%r13d
1249	mov	%r10d,%r14d
1250	mov	%r11d,%r15d
1251
1252	ror	$6,%r13d
1253	ror	$11,%r14d
1254	xor	%eax,%r15d			# f^g
1255
1256	xor	%r14d,%r13d
1257	ror	$14,%r14d
1258	and	%r10d,%r15d			# (f^g)&e
1259	mov	%r12d,24(%rsp)
1260
1261	xor	%r14d,%r13d			# Sigma1(e)
1262	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1263	add	%ebx,%r12d			# T1+=h
1264
1265	mov	%ecx,%ebx
1266	add	%r13d,%r12d			# T1+=Sigma1(e)
1267
1268	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1269	mov	%ecx,%r13d
1270	mov	%ecx,%r14d
1271
1272	ror	$2,%ebx
1273	ror	$13,%r13d
1274	mov	%ecx,%r15d
1275	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1276
1277	xor	%r13d,%ebx
1278	ror	$9,%r13d
1279	or	%r8d,%r14d			# a|c
1280
1281	xor	%r13d,%ebx			# h=Sigma0(a)
1282	and	%r8d,%r15d			# a&c
1283	add	%r12d,%r9d			# d+=T1
1284
1285	and	%edx,%r14d			# (a|c)&b
1286	add	%r12d,%ebx			# h+=T1
1287
1288	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1289	lea	1(%rdi),%rdi	# round++
1290
1291	add	%r14d,%ebx			# h+=Maj(a,b,c)
1292	mov	32(%rsp),%r13d
1293	mov	20(%rsp),%r12d
1294
1295	mov	%r13d,%r15d
1296
1297	shr	$3,%r13d
1298	ror	$7,%r15d
1299
1300	xor	%r15d,%r13d
1301	ror	$11,%r15d
1302
1303	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1304	mov	%r12d,%r14d
1305
1306	shr	$10,%r12d
1307	ror	$17,%r14d
1308
1309	xor	%r14d,%r12d
1310	ror	$2,%r14d
1311
1312	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1313
1314	add	%r13d,%r12d
1315
1316	add	0(%rsp),%r12d
1317
1318	add	28(%rsp),%r12d
1319	mov	%r9d,%r13d
1320	mov	%r9d,%r14d
1321	mov	%r10d,%r15d
1322
1323	ror	$6,%r13d
1324	ror	$11,%r14d
1325	xor	%r11d,%r15d			# f^g
1326
1327	xor	%r14d,%r13d
1328	ror	$14,%r14d
1329	and	%r9d,%r15d			# (f^g)&e
1330	mov	%r12d,28(%rsp)
1331
1332	xor	%r14d,%r13d			# Sigma1(e)
1333	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1334	add	%eax,%r12d			# T1+=h
1335
1336	mov	%ebx,%eax
1337	add	%r13d,%r12d			# T1+=Sigma1(e)
1338
1339	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1340	mov	%ebx,%r13d
1341	mov	%ebx,%r14d
1342
1343	ror	$2,%eax
1344	ror	$13,%r13d
1345	mov	%ebx,%r15d
1346	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1347
1348	xor	%r13d,%eax
1349	ror	$9,%r13d
1350	or	%edx,%r14d			# a|c
1351
1352	xor	%r13d,%eax			# h=Sigma0(a)
1353	and	%edx,%r15d			# a&c
1354	add	%r12d,%r8d			# d+=T1
1355
1356	and	%ecx,%r14d			# (a|c)&b
1357	add	%r12d,%eax			# h+=T1
1358
1359	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1360	lea	1(%rdi),%rdi	# round++
1361
1362	add	%r14d,%eax			# h+=Maj(a,b,c)
1363	mov	36(%rsp),%r13d
1364	mov	24(%rsp),%r12d
1365
1366	mov	%r13d,%r15d
1367
1368	shr	$3,%r13d
1369	ror	$7,%r15d
1370
1371	xor	%r15d,%r13d
1372	ror	$11,%r15d
1373
1374	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1375	mov	%r12d,%r14d
1376
1377	shr	$10,%r12d
1378	ror	$17,%r14d
1379
1380	xor	%r14d,%r12d
1381	ror	$2,%r14d
1382
1383	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1384
1385	add	%r13d,%r12d
1386
1387	add	4(%rsp),%r12d
1388
1389	add	32(%rsp),%r12d
1390	mov	%r8d,%r13d
1391	mov	%r8d,%r14d
1392	mov	%r9d,%r15d
1393
1394	ror	$6,%r13d
1395	ror	$11,%r14d
1396	xor	%r10d,%r15d			# f^g
1397
1398	xor	%r14d,%r13d
1399	ror	$14,%r14d
1400	and	%r8d,%r15d			# (f^g)&e
1401	mov	%r12d,32(%rsp)
1402
1403	xor	%r14d,%r13d			# Sigma1(e)
1404	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1405	add	%r11d,%r12d			# T1+=h
1406
1407	mov	%eax,%r11d
1408	add	%r13d,%r12d			# T1+=Sigma1(e)
1409
1410	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1411	mov	%eax,%r13d
1412	mov	%eax,%r14d
1413
1414	ror	$2,%r11d
1415	ror	$13,%r13d
1416	mov	%eax,%r15d
1417	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1418
1419	xor	%r13d,%r11d
1420	ror	$9,%r13d
1421	or	%ecx,%r14d			# a|c
1422
1423	xor	%r13d,%r11d			# h=Sigma0(a)
1424	and	%ecx,%r15d			# a&c
1425	add	%r12d,%edx			# d+=T1
1426
1427	and	%ebx,%r14d			# (a|c)&b
1428	add	%r12d,%r11d			# h+=T1
1429
1430	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1431	lea	1(%rdi),%rdi	# round++
1432
1433	add	%r14d,%r11d			# h+=Maj(a,b,c)
1434	mov	40(%rsp),%r13d
1435	mov	28(%rsp),%r12d
1436
1437	mov	%r13d,%r15d
1438
1439	shr	$3,%r13d
1440	ror	$7,%r15d
1441
1442	xor	%r15d,%r13d
1443	ror	$11,%r15d
1444
1445	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1446	mov	%r12d,%r14d
1447
1448	shr	$10,%r12d
1449	ror	$17,%r14d
1450
1451	xor	%r14d,%r12d
1452	ror	$2,%r14d
1453
1454	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1455
1456	add	%r13d,%r12d
1457
1458	add	8(%rsp),%r12d
1459
1460	add	36(%rsp),%r12d
1461	mov	%edx,%r13d
1462	mov	%edx,%r14d
1463	mov	%r8d,%r15d
1464
1465	ror	$6,%r13d
1466	ror	$11,%r14d
1467	xor	%r9d,%r15d			# f^g
1468
1469	xor	%r14d,%r13d
1470	ror	$14,%r14d
1471	and	%edx,%r15d			# (f^g)&e
1472	mov	%r12d,36(%rsp)
1473
1474	xor	%r14d,%r13d			# Sigma1(e)
1475	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1476	add	%r10d,%r12d			# T1+=h
1477
1478	mov	%r11d,%r10d
1479	add	%r13d,%r12d			# T1+=Sigma1(e)
1480
1481	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1482	mov	%r11d,%r13d
1483	mov	%r11d,%r14d
1484
1485	ror	$2,%r10d
1486	ror	$13,%r13d
1487	mov	%r11d,%r15d
1488	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1489
1490	xor	%r13d,%r10d
1491	ror	$9,%r13d
1492	or	%ebx,%r14d			# a|c
1493
1494	xor	%r13d,%r10d			# h=Sigma0(a)
1495	and	%ebx,%r15d			# a&c
1496	add	%r12d,%ecx			# d+=T1
1497
1498	and	%eax,%r14d			# (a|c)&b
1499	add	%r12d,%r10d			# h+=T1
1500
1501	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1502	lea	1(%rdi),%rdi	# round++
1503
1504	add	%r14d,%r10d			# h+=Maj(a,b,c)
1505	mov	44(%rsp),%r13d
1506	mov	32(%rsp),%r12d
1507
1508	mov	%r13d,%r15d
1509
1510	shr	$3,%r13d
1511	ror	$7,%r15d
1512
1513	xor	%r15d,%r13d
1514	ror	$11,%r15d
1515
1516	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1517	mov	%r12d,%r14d
1518
1519	shr	$10,%r12d
1520	ror	$17,%r14d
1521
1522	xor	%r14d,%r12d
1523	ror	$2,%r14d
1524
1525	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1526
1527	add	%r13d,%r12d
1528
1529	add	12(%rsp),%r12d
1530
1531	add	40(%rsp),%r12d
1532	mov	%ecx,%r13d
1533	mov	%ecx,%r14d
1534	mov	%edx,%r15d
1535
1536	ror	$6,%r13d
1537	ror	$11,%r14d
1538	xor	%r8d,%r15d			# f^g
1539
1540	xor	%r14d,%r13d
1541	ror	$14,%r14d
1542	and	%ecx,%r15d			# (f^g)&e
1543	mov	%r12d,40(%rsp)
1544
1545	xor	%r14d,%r13d			# Sigma1(e)
1546	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1547	add	%r9d,%r12d			# T1+=h
1548
1549	mov	%r10d,%r9d
1550	add	%r13d,%r12d			# T1+=Sigma1(e)
1551
1552	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1553	mov	%r10d,%r13d
1554	mov	%r10d,%r14d
1555
1556	ror	$2,%r9d
1557	ror	$13,%r13d
1558	mov	%r10d,%r15d
1559	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1560
1561	xor	%r13d,%r9d
1562	ror	$9,%r13d
1563	or	%eax,%r14d			# a|c
1564
1565	xor	%r13d,%r9d			# h=Sigma0(a)
1566	and	%eax,%r15d			# a&c
1567	add	%r12d,%ebx			# d+=T1
1568
1569	and	%r11d,%r14d			# (a|c)&b
1570	add	%r12d,%r9d			# h+=T1
1571
1572	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1573	lea	1(%rdi),%rdi	# round++
1574
1575	add	%r14d,%r9d			# h+=Maj(a,b,c)
1576	mov	48(%rsp),%r13d
1577	mov	36(%rsp),%r12d
1578
1579	mov	%r13d,%r15d
1580
1581	shr	$3,%r13d
1582	ror	$7,%r15d
1583
1584	xor	%r15d,%r13d
1585	ror	$11,%r15d
1586
1587	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1588	mov	%r12d,%r14d
1589
1590	shr	$10,%r12d
1591	ror	$17,%r14d
1592
1593	xor	%r14d,%r12d
1594	ror	$2,%r14d
1595
1596	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1597
1598	add	%r13d,%r12d
1599
1600	add	16(%rsp),%r12d
1601
1602	add	44(%rsp),%r12d
1603	mov	%ebx,%r13d
1604	mov	%ebx,%r14d
1605	mov	%ecx,%r15d
1606
1607	ror	$6,%r13d
1608	ror	$11,%r14d
1609	xor	%edx,%r15d			# f^g
1610
1611	xor	%r14d,%r13d
1612	ror	$14,%r14d
1613	and	%ebx,%r15d			# (f^g)&e
1614	mov	%r12d,44(%rsp)
1615
1616	xor	%r14d,%r13d			# Sigma1(e)
1617	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1618	add	%r8d,%r12d			# T1+=h
1619
1620	mov	%r9d,%r8d
1621	add	%r13d,%r12d			# T1+=Sigma1(e)
1622
1623	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1624	mov	%r9d,%r13d
1625	mov	%r9d,%r14d
1626
1627	ror	$2,%r8d
1628	ror	$13,%r13d
1629	mov	%r9d,%r15d
1630	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1631
1632	xor	%r13d,%r8d
1633	ror	$9,%r13d
1634	or	%r11d,%r14d			# a|c
1635
1636	xor	%r13d,%r8d			# h=Sigma0(a)
1637	and	%r11d,%r15d			# a&c
1638	add	%r12d,%eax			# d+=T1
1639
1640	and	%r10d,%r14d			# (a|c)&b
1641	add	%r12d,%r8d			# h+=T1
1642
1643	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1644	lea	1(%rdi),%rdi	# round++
1645
1646	add	%r14d,%r8d			# h+=Maj(a,b,c)
1647	mov	52(%rsp),%r13d
1648	mov	40(%rsp),%r12d
1649
1650	mov	%r13d,%r15d
1651
1652	shr	$3,%r13d
1653	ror	$7,%r15d
1654
1655	xor	%r15d,%r13d
1656	ror	$11,%r15d
1657
1658	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1659	mov	%r12d,%r14d
1660
1661	shr	$10,%r12d
1662	ror	$17,%r14d
1663
1664	xor	%r14d,%r12d
1665	ror	$2,%r14d
1666
1667	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1668
1669	add	%r13d,%r12d
1670
1671	add	20(%rsp),%r12d
1672
1673	add	48(%rsp),%r12d
1674	mov	%eax,%r13d
1675	mov	%eax,%r14d
1676	mov	%ebx,%r15d
1677
1678	ror	$6,%r13d
1679	ror	$11,%r14d
1680	xor	%ecx,%r15d			# f^g
1681
1682	xor	%r14d,%r13d
1683	ror	$14,%r14d
1684	and	%eax,%r15d			# (f^g)&e
1685	mov	%r12d,48(%rsp)
1686
1687	xor	%r14d,%r13d			# Sigma1(e)
1688	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1689	add	%edx,%r12d			# T1+=h
1690
1691	mov	%r8d,%edx
1692	add	%r13d,%r12d			# T1+=Sigma1(e)
1693
1694	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1695	mov	%r8d,%r13d
1696	mov	%r8d,%r14d
1697
1698	ror	$2,%edx
1699	ror	$13,%r13d
1700	mov	%r8d,%r15d
1701	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1702
1703	xor	%r13d,%edx
1704	ror	$9,%r13d
1705	or	%r10d,%r14d			# a|c
1706
1707	xor	%r13d,%edx			# h=Sigma0(a)
1708	and	%r10d,%r15d			# a&c
1709	add	%r12d,%r11d			# d+=T1
1710
1711	and	%r9d,%r14d			# (a|c)&b
1712	add	%r12d,%edx			# h+=T1
1713
1714	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1715	lea	1(%rdi),%rdi	# round++
1716
1717	add	%r14d,%edx			# h+=Maj(a,b,c)
1718	mov	56(%rsp),%r13d
1719	mov	44(%rsp),%r12d
1720
1721	mov	%r13d,%r15d
1722
1723	shr	$3,%r13d
1724	ror	$7,%r15d
1725
1726	xor	%r15d,%r13d
1727	ror	$11,%r15d
1728
1729	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1730	mov	%r12d,%r14d
1731
1732	shr	$10,%r12d
1733	ror	$17,%r14d
1734
1735	xor	%r14d,%r12d
1736	ror	$2,%r14d
1737
1738	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1739
1740	add	%r13d,%r12d
1741
1742	add	24(%rsp),%r12d
1743
1744	add	52(%rsp),%r12d
1745	mov	%r11d,%r13d
1746	mov	%r11d,%r14d
1747	mov	%eax,%r15d
1748
1749	ror	$6,%r13d
1750	ror	$11,%r14d
1751	xor	%ebx,%r15d			# f^g
1752
1753	xor	%r14d,%r13d
1754	ror	$14,%r14d
1755	and	%r11d,%r15d			# (f^g)&e
1756	mov	%r12d,52(%rsp)
1757
1758	xor	%r14d,%r13d			# Sigma1(e)
1759	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1760	add	%ecx,%r12d			# T1+=h
1761
1762	mov	%edx,%ecx
1763	add	%r13d,%r12d			# T1+=Sigma1(e)
1764
1765	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1766	mov	%edx,%r13d
1767	mov	%edx,%r14d
1768
1769	ror	$2,%ecx
1770	ror	$13,%r13d
1771	mov	%edx,%r15d
1772	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1773
1774	xor	%r13d,%ecx
1775	ror	$9,%r13d
1776	or	%r9d,%r14d			# a|c
1777
1778	xor	%r13d,%ecx			# h=Sigma0(a)
1779	and	%r9d,%r15d			# a&c
1780	add	%r12d,%r10d			# d+=T1
1781
1782	and	%r8d,%r14d			# (a|c)&b
1783	add	%r12d,%ecx			# h+=T1
1784
1785	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1786	lea	1(%rdi),%rdi	# round++
1787
1788	add	%r14d,%ecx			# h+=Maj(a,b,c)
1789	mov	60(%rsp),%r13d
1790	mov	48(%rsp),%r12d
1791
1792	mov	%r13d,%r15d
1793
1794	shr	$3,%r13d
1795	ror	$7,%r15d
1796
1797	xor	%r15d,%r13d
1798	ror	$11,%r15d
1799
1800	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1801	mov	%r12d,%r14d
1802
1803	shr	$10,%r12d
1804	ror	$17,%r14d
1805
1806	xor	%r14d,%r12d
1807	ror	$2,%r14d
1808
1809	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1810
1811	add	%r13d,%r12d
1812
1813	add	28(%rsp),%r12d
1814
1815	add	56(%rsp),%r12d
1816	mov	%r10d,%r13d
1817	mov	%r10d,%r14d
1818	mov	%r11d,%r15d
1819
1820	ror	$6,%r13d
1821	ror	$11,%r14d
1822	xor	%eax,%r15d			# f^g
1823
1824	xor	%r14d,%r13d
1825	ror	$14,%r14d
1826	and	%r10d,%r15d			# (f^g)&e
1827	mov	%r12d,56(%rsp)
1828
1829	xor	%r14d,%r13d			# Sigma1(e)
1830	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1831	add	%ebx,%r12d			# T1+=h
1832
1833	mov	%ecx,%ebx
1834	add	%r13d,%r12d			# T1+=Sigma1(e)
1835
1836	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1837	mov	%ecx,%r13d
1838	mov	%ecx,%r14d
1839
1840	ror	$2,%ebx
1841	ror	$13,%r13d
1842	mov	%ecx,%r15d
1843	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1844
1845	xor	%r13d,%ebx
1846	ror	$9,%r13d
1847	or	%r8d,%r14d			# a|c
1848
1849	xor	%r13d,%ebx			# h=Sigma0(a)
1850	and	%r8d,%r15d			# a&c
1851	add	%r12d,%r9d			# d+=T1
1852
1853	and	%edx,%r14d			# (a|c)&b
1854	add	%r12d,%ebx			# h+=T1
1855
1856	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1857	lea	1(%rdi),%rdi	# round++
1858
1859	add	%r14d,%ebx			# h+=Maj(a,b,c)
1860	mov	0(%rsp),%r13d
1861	mov	52(%rsp),%r12d
1862
1863	mov	%r13d,%r15d
1864
1865	shr	$3,%r13d
1866	ror	$7,%r15d
1867
1868	xor	%r15d,%r13d
1869	ror	$11,%r15d
1870
1871	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
1872	mov	%r12d,%r14d
1873
1874	shr	$10,%r12d
1875	ror	$17,%r14d
1876
1877	xor	%r14d,%r12d
1878	ror	$2,%r14d
1879
1880	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
1881
1882	add	%r13d,%r12d
1883
1884	add	32(%rsp),%r12d
1885
1886	add	60(%rsp),%r12d
1887	mov	%r9d,%r13d
1888	mov	%r9d,%r14d
1889	mov	%r10d,%r15d
1890
1891	ror	$6,%r13d
1892	ror	$11,%r14d
1893	xor	%r11d,%r15d			# f^g
1894
1895	xor	%r14d,%r13d
1896	ror	$14,%r14d
1897	and	%r9d,%r15d			# (f^g)&e
1898	mov	%r12d,60(%rsp)
1899
1900	xor	%r14d,%r13d			# Sigma1(e)
1901	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
1902	add	%eax,%r12d			# T1+=h
1903
1904	mov	%ebx,%eax
1905	add	%r13d,%r12d			# T1+=Sigma1(e)
1906
1907	add	%r15d,%r12d			# T1+=Ch(e,f,g)
1908	mov	%ebx,%r13d
1909	mov	%ebx,%r14d
1910
1911	ror	$2,%eax
1912	ror	$13,%r13d
1913	mov	%ebx,%r15d
1914	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
1915
1916	xor	%r13d,%eax
1917	ror	$9,%r13d
1918	or	%edx,%r14d			# a|c
1919
1920	xor	%r13d,%eax			# h=Sigma0(a)
1921	and	%edx,%r15d			# a&c
1922	add	%r12d,%r8d			# d+=T1
1923
1924	and	%ecx,%r14d			# (a|c)&b
1925	add	%r12d,%eax			# h+=T1
1926
1927	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
1928	lea	1(%rdi),%rdi	# round++
1929
1930	add	%r14d,%eax			# h+=Maj(a,b,c)
1931	cmp	$64,%rdi
1932	jb	.Lrounds_16_xx
1933
1934	mov	16*4+0*8(%rsp),%rdi
1935	lea	16*4(%rsi),%rsi
1936
1937	add	4*0(%rdi),%eax
1938	add	4*1(%rdi),%ebx
1939	add	4*2(%rdi),%ecx
1940	add	4*3(%rdi),%edx
1941	add	4*4(%rdi),%r8d
1942	add	4*5(%rdi),%r9d
1943	add	4*6(%rdi),%r10d
1944	add	4*7(%rdi),%r11d
1945
1946	cmp	16*4+2*8(%rsp),%rsi
1947
1948	mov	%eax,4*0(%rdi)
1949	mov	%ebx,4*1(%rdi)
1950	mov	%ecx,4*2(%rdi)
1951	mov	%edx,4*3(%rdi)
1952	mov	%r8d,4*4(%rdi)
1953	mov	%r9d,4*5(%rdi)
1954	mov	%r10d,4*6(%rdi)
1955	mov	%r11d,4*7(%rdi)
1956	jb	.Lloop
1957
1958	mov	16*4+3*8(%rsp),%rsp
1959	pop	%r15
1960	pop	%r14
1961	pop	%r13
1962	pop	%r12
1963	pop	%rbp
1964	pop	%rbx
1965
1966	ret
1967SET_SIZE(SHA256TransformBlocks)
1968
1969.align	64
1970.type	K256,@object
1971K256:
1972	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
1973	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
1974	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
1975	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
1976	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
1977	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
1978	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
1979	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
1980	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
1981	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
1982	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
1983	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
1984	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
1985	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
1986	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
1987	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
1988#endif /* !lint && !__lint */
1989