1#if defined(lint) || defined(__lint)
2#include <sys/stdint.h>
3#include <sys/sha2.h>
4
5/* ARGSUSED */
6void
7SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
8{
9}
10
11
12#else
13#include <sys/asm_linkage.h>
14
15ENTRY_NP(SHA512TransformBlocks)
16	push	%rbx
17	push	%rbp
18	push	%r12
19	push	%r13
20	push	%r14
21	push	%r15
22	mov	%rsp,%rbp		# copy %rsp
23	shl	$4,%rdx		# num*16
24	sub	$16*8+4*8,%rsp
25	lea	(%rsi,%rdx,8),%rdx	# inp+num*16*8
26	and	$-64,%rsp		# align stack frame
27	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
28	mov	%rdi,16*8+0*8(%rsp)		# save ctx, 1st arg
29	mov	%rsi,16*8+1*8(%rsp)		# save inp, 2nd arg
30	mov	%rdx,16*8+2*8(%rsp)		# save end pointer, "3rd" arg
31	mov	%rbp,16*8+3*8(%rsp)		# save copy of %rsp
32
33	/.picmeup %rbp
34	/ The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
35	/ the address of the "next" instruction into the target register
36	/ (%rbp).  This generates these 2 instructions:
37	lea	.Llea(%rip),%rbp
38	/nop	/ .picmeup generates a nop for mod 8 alignment--not needed here
39
40.Llea:
41	lea	K512-.(%rbp),%rbp
42
43	mov	8*0(%rdi),%rax
44	mov	8*1(%rdi),%rbx
45	mov	8*2(%rdi),%rcx
46	mov	8*3(%rdi),%rdx
47	mov	8*4(%rdi),%r8
48	mov	8*5(%rdi),%r9
49	mov	8*6(%rdi),%r10
50	mov	8*7(%rdi),%r11
51	jmp	.Lloop
52
53.align	16
54.Lloop:
55	xor	%rdi,%rdi
56	mov	8*0(%rsi),%r12
57	bswap	%r12
58	mov	%r8,%r13
59	mov	%r8,%r14
60	mov	%r9,%r15
61
62	ror	$14,%r13
63	ror	$18,%r14
64	xor	%r10,%r15			# f^g
65
66	xor	%r14,%r13
67	ror	$23,%r14
68	and	%r8,%r15			# (f^g)&e
69	mov	%r12,0(%rsp)
70
71	xor	%r14,%r13			# Sigma1(e)
72	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
73	add	%r11,%r12			# T1+=h
74
75	mov	%rax,%r11
76	add	%r13,%r12			# T1+=Sigma1(e)
77
78	add	%r15,%r12			# T1+=Ch(e,f,g)
79	mov	%rax,%r13
80	mov	%rax,%r14
81
82	ror	$28,%r11
83	ror	$34,%r13
84	mov	%rax,%r15
85	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
86
87	xor	%r13,%r11
88	ror	$5,%r13
89	or	%rcx,%r14			# a|c
90
91	xor	%r13,%r11			# h=Sigma0(a)
92	and	%rcx,%r15			# a&c
93	add	%r12,%rdx			# d+=T1
94
95	and	%rbx,%r14			# (a|c)&b
96	add	%r12,%r11			# h+=T1
97
98	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
99	lea	1(%rdi),%rdi	# round++
100
101	add	%r14,%r11			# h+=Maj(a,b,c)
102	mov	8*1(%rsi),%r12
103	bswap	%r12
104	mov	%rdx,%r13
105	mov	%rdx,%r14
106	mov	%r8,%r15
107
108	ror	$14,%r13
109	ror	$18,%r14
110	xor	%r9,%r15			# f^g
111
112	xor	%r14,%r13
113	ror	$23,%r14
114	and	%rdx,%r15			# (f^g)&e
115	mov	%r12,8(%rsp)
116
117	xor	%r14,%r13			# Sigma1(e)
118	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
119	add	%r10,%r12			# T1+=h
120
121	mov	%r11,%r10
122	add	%r13,%r12			# T1+=Sigma1(e)
123
124	add	%r15,%r12			# T1+=Ch(e,f,g)
125	mov	%r11,%r13
126	mov	%r11,%r14
127
128	ror	$28,%r10
129	ror	$34,%r13
130	mov	%r11,%r15
131	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
132
133	xor	%r13,%r10
134	ror	$5,%r13
135	or	%rbx,%r14			# a|c
136
137	xor	%r13,%r10			# h=Sigma0(a)
138	and	%rbx,%r15			# a&c
139	add	%r12,%rcx			# d+=T1
140
141	and	%rax,%r14			# (a|c)&b
142	add	%r12,%r10			# h+=T1
143
144	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
145	lea	1(%rdi),%rdi	# round++
146
147	add	%r14,%r10			# h+=Maj(a,b,c)
148	mov	8*2(%rsi),%r12
149	bswap	%r12
150	mov	%rcx,%r13
151	mov	%rcx,%r14
152	mov	%rdx,%r15
153
154	ror	$14,%r13
155	ror	$18,%r14
156	xor	%r8,%r15			# f^g
157
158	xor	%r14,%r13
159	ror	$23,%r14
160	and	%rcx,%r15			# (f^g)&e
161	mov	%r12,16(%rsp)
162
163	xor	%r14,%r13			# Sigma1(e)
164	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
165	add	%r9,%r12			# T1+=h
166
167	mov	%r10,%r9
168	add	%r13,%r12			# T1+=Sigma1(e)
169
170	add	%r15,%r12			# T1+=Ch(e,f,g)
171	mov	%r10,%r13
172	mov	%r10,%r14
173
174	ror	$28,%r9
175	ror	$34,%r13
176	mov	%r10,%r15
177	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
178
179	xor	%r13,%r9
180	ror	$5,%r13
181	or	%rax,%r14			# a|c
182
183	xor	%r13,%r9			# h=Sigma0(a)
184	and	%rax,%r15			# a&c
185	add	%r12,%rbx			# d+=T1
186
187	and	%r11,%r14			# (a|c)&b
188	add	%r12,%r9			# h+=T1
189
190	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
191	lea	1(%rdi),%rdi	# round++
192
193	add	%r14,%r9			# h+=Maj(a,b,c)
194	mov	8*3(%rsi),%r12
195	bswap	%r12
196	mov	%rbx,%r13
197	mov	%rbx,%r14
198	mov	%rcx,%r15
199
200	ror	$14,%r13
201	ror	$18,%r14
202	xor	%rdx,%r15			# f^g
203
204	xor	%r14,%r13
205	ror	$23,%r14
206	and	%rbx,%r15			# (f^g)&e
207	mov	%r12,24(%rsp)
208
209	xor	%r14,%r13			# Sigma1(e)
210	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
211	add	%r8,%r12			# T1+=h
212
213	mov	%r9,%r8
214	add	%r13,%r12			# T1+=Sigma1(e)
215
216	add	%r15,%r12			# T1+=Ch(e,f,g)
217	mov	%r9,%r13
218	mov	%r9,%r14
219
220	ror	$28,%r8
221	ror	$34,%r13
222	mov	%r9,%r15
223	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
224
225	xor	%r13,%r8
226	ror	$5,%r13
227	or	%r11,%r14			# a|c
228
229	xor	%r13,%r8			# h=Sigma0(a)
230	and	%r11,%r15			# a&c
231	add	%r12,%rax			# d+=T1
232
233	and	%r10,%r14			# (a|c)&b
234	add	%r12,%r8			# h+=T1
235
236	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
237	lea	1(%rdi),%rdi	# round++
238
239	add	%r14,%r8			# h+=Maj(a,b,c)
240	mov	8*4(%rsi),%r12
241	bswap	%r12
242	mov	%rax,%r13
243	mov	%rax,%r14
244	mov	%rbx,%r15
245
246	ror	$14,%r13
247	ror	$18,%r14
248	xor	%rcx,%r15			# f^g
249
250	xor	%r14,%r13
251	ror	$23,%r14
252	and	%rax,%r15			# (f^g)&e
253	mov	%r12,32(%rsp)
254
255	xor	%r14,%r13			# Sigma1(e)
256	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
257	add	%rdx,%r12			# T1+=h
258
259	mov	%r8,%rdx
260	add	%r13,%r12			# T1+=Sigma1(e)
261
262	add	%r15,%r12			# T1+=Ch(e,f,g)
263	mov	%r8,%r13
264	mov	%r8,%r14
265
266	ror	$28,%rdx
267	ror	$34,%r13
268	mov	%r8,%r15
269	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
270
271	xor	%r13,%rdx
272	ror	$5,%r13
273	or	%r10,%r14			# a|c
274
275	xor	%r13,%rdx			# h=Sigma0(a)
276	and	%r10,%r15			# a&c
277	add	%r12,%r11			# d+=T1
278
279	and	%r9,%r14			# (a|c)&b
280	add	%r12,%rdx			# h+=T1
281
282	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
283	lea	1(%rdi),%rdi	# round++
284
285	add	%r14,%rdx			# h+=Maj(a,b,c)
286	mov	8*5(%rsi),%r12
287	bswap	%r12
288	mov	%r11,%r13
289	mov	%r11,%r14
290	mov	%rax,%r15
291
292	ror	$14,%r13
293	ror	$18,%r14
294	xor	%rbx,%r15			# f^g
295
296	xor	%r14,%r13
297	ror	$23,%r14
298	and	%r11,%r15			# (f^g)&e
299	mov	%r12,40(%rsp)
300
301	xor	%r14,%r13			# Sigma1(e)
302	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
303	add	%rcx,%r12			# T1+=h
304
305	mov	%rdx,%rcx
306	add	%r13,%r12			# T1+=Sigma1(e)
307
308	add	%r15,%r12			# T1+=Ch(e,f,g)
309	mov	%rdx,%r13
310	mov	%rdx,%r14
311
312	ror	$28,%rcx
313	ror	$34,%r13
314	mov	%rdx,%r15
315	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
316
317	xor	%r13,%rcx
318	ror	$5,%r13
319	or	%r9,%r14			# a|c
320
321	xor	%r13,%rcx			# h=Sigma0(a)
322	and	%r9,%r15			# a&c
323	add	%r12,%r10			# d+=T1
324
325	and	%r8,%r14			# (a|c)&b
326	add	%r12,%rcx			# h+=T1
327
328	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
329	lea	1(%rdi),%rdi	# round++
330
331	add	%r14,%rcx			# h+=Maj(a,b,c)
332	mov	8*6(%rsi),%r12
333	bswap	%r12
334	mov	%r10,%r13
335	mov	%r10,%r14
336	mov	%r11,%r15
337
338	ror	$14,%r13
339	ror	$18,%r14
340	xor	%rax,%r15			# f^g
341
342	xor	%r14,%r13
343	ror	$23,%r14
344	and	%r10,%r15			# (f^g)&e
345	mov	%r12,48(%rsp)
346
347	xor	%r14,%r13			# Sigma1(e)
348	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
349	add	%rbx,%r12			# T1+=h
350
351	mov	%rcx,%rbx
352	add	%r13,%r12			# T1+=Sigma1(e)
353
354	add	%r15,%r12			# T1+=Ch(e,f,g)
355	mov	%rcx,%r13
356	mov	%rcx,%r14
357
358	ror	$28,%rbx
359	ror	$34,%r13
360	mov	%rcx,%r15
361	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
362
363	xor	%r13,%rbx
364	ror	$5,%r13
365	or	%r8,%r14			# a|c
366
367	xor	%r13,%rbx			# h=Sigma0(a)
368	and	%r8,%r15			# a&c
369	add	%r12,%r9			# d+=T1
370
371	and	%rdx,%r14			# (a|c)&b
372	add	%r12,%rbx			# h+=T1
373
374	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
375	lea	1(%rdi),%rdi	# round++
376
377	add	%r14,%rbx			# h+=Maj(a,b,c)
378	mov	8*7(%rsi),%r12
379	bswap	%r12
380	mov	%r9,%r13
381	mov	%r9,%r14
382	mov	%r10,%r15
383
384	ror	$14,%r13
385	ror	$18,%r14
386	xor	%r11,%r15			# f^g
387
388	xor	%r14,%r13
389	ror	$23,%r14
390	and	%r9,%r15			# (f^g)&e
391	mov	%r12,56(%rsp)
392
393	xor	%r14,%r13			# Sigma1(e)
394	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
395	add	%rax,%r12			# T1+=h
396
397	mov	%rbx,%rax
398	add	%r13,%r12			# T1+=Sigma1(e)
399
400	add	%r15,%r12			# T1+=Ch(e,f,g)
401	mov	%rbx,%r13
402	mov	%rbx,%r14
403
404	ror	$28,%rax
405	ror	$34,%r13
406	mov	%rbx,%r15
407	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
408
409	xor	%r13,%rax
410	ror	$5,%r13
411	or	%rdx,%r14			# a|c
412
413	xor	%r13,%rax			# h=Sigma0(a)
414	and	%rdx,%r15			# a&c
415	add	%r12,%r8			# d+=T1
416
417	and	%rcx,%r14			# (a|c)&b
418	add	%r12,%rax			# h+=T1
419
420	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
421	lea	1(%rdi),%rdi	# round++
422
423	add	%r14,%rax			# h+=Maj(a,b,c)
424	mov	8*8(%rsi),%r12
425	bswap	%r12
426	mov	%r8,%r13
427	mov	%r8,%r14
428	mov	%r9,%r15
429
430	ror	$14,%r13
431	ror	$18,%r14
432	xor	%r10,%r15			# f^g
433
434	xor	%r14,%r13
435	ror	$23,%r14
436	and	%r8,%r15			# (f^g)&e
437	mov	%r12,64(%rsp)
438
439	xor	%r14,%r13			# Sigma1(e)
440	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
441	add	%r11,%r12			# T1+=h
442
443	mov	%rax,%r11
444	add	%r13,%r12			# T1+=Sigma1(e)
445
446	add	%r15,%r12			# T1+=Ch(e,f,g)
447	mov	%rax,%r13
448	mov	%rax,%r14
449
450	ror	$28,%r11
451	ror	$34,%r13
452	mov	%rax,%r15
453	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
454
455	xor	%r13,%r11
456	ror	$5,%r13
457	or	%rcx,%r14			# a|c
458
459	xor	%r13,%r11			# h=Sigma0(a)
460	and	%rcx,%r15			# a&c
461	add	%r12,%rdx			# d+=T1
462
463	and	%rbx,%r14			# (a|c)&b
464	add	%r12,%r11			# h+=T1
465
466	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
467	lea	1(%rdi),%rdi	# round++
468
469	add	%r14,%r11			# h+=Maj(a,b,c)
470	mov	8*9(%rsi),%r12
471	bswap	%r12
472	mov	%rdx,%r13
473	mov	%rdx,%r14
474	mov	%r8,%r15
475
476	ror	$14,%r13
477	ror	$18,%r14
478	xor	%r9,%r15			# f^g
479
480	xor	%r14,%r13
481	ror	$23,%r14
482	and	%rdx,%r15			# (f^g)&e
483	mov	%r12,72(%rsp)
484
485	xor	%r14,%r13			# Sigma1(e)
486	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
487	add	%r10,%r12			# T1+=h
488
489	mov	%r11,%r10
490	add	%r13,%r12			# T1+=Sigma1(e)
491
492	add	%r15,%r12			# T1+=Ch(e,f,g)
493	mov	%r11,%r13
494	mov	%r11,%r14
495
496	ror	$28,%r10
497	ror	$34,%r13
498	mov	%r11,%r15
499	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
500
501	xor	%r13,%r10
502	ror	$5,%r13
503	or	%rbx,%r14			# a|c
504
505	xor	%r13,%r10			# h=Sigma0(a)
506	and	%rbx,%r15			# a&c
507	add	%r12,%rcx			# d+=T1
508
509	and	%rax,%r14			# (a|c)&b
510	add	%r12,%r10			# h+=T1
511
512	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
513	lea	1(%rdi),%rdi	# round++
514
515	add	%r14,%r10			# h+=Maj(a,b,c)
516	mov	8*10(%rsi),%r12
517	bswap	%r12
518	mov	%rcx,%r13
519	mov	%rcx,%r14
520	mov	%rdx,%r15
521
522	ror	$14,%r13
523	ror	$18,%r14
524	xor	%r8,%r15			# f^g
525
526	xor	%r14,%r13
527	ror	$23,%r14
528	and	%rcx,%r15			# (f^g)&e
529	mov	%r12,80(%rsp)
530
531	xor	%r14,%r13			# Sigma1(e)
532	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
533	add	%r9,%r12			# T1+=h
534
535	mov	%r10,%r9
536	add	%r13,%r12			# T1+=Sigma1(e)
537
538	add	%r15,%r12			# T1+=Ch(e,f,g)
539	mov	%r10,%r13
540	mov	%r10,%r14
541
542	ror	$28,%r9
543	ror	$34,%r13
544	mov	%r10,%r15
545	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
546
547	xor	%r13,%r9
548	ror	$5,%r13
549	or	%rax,%r14			# a|c
550
551	xor	%r13,%r9			# h=Sigma0(a)
552	and	%rax,%r15			# a&c
553	add	%r12,%rbx			# d+=T1
554
555	and	%r11,%r14			# (a|c)&b
556	add	%r12,%r9			# h+=T1
557
558	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
559	lea	1(%rdi),%rdi	# round++
560
561	add	%r14,%r9			# h+=Maj(a,b,c)
562	mov	8*11(%rsi),%r12
563	bswap	%r12
564	mov	%rbx,%r13
565	mov	%rbx,%r14
566	mov	%rcx,%r15
567
568	ror	$14,%r13
569	ror	$18,%r14
570	xor	%rdx,%r15			# f^g
571
572	xor	%r14,%r13
573	ror	$23,%r14
574	and	%rbx,%r15			# (f^g)&e
575	mov	%r12,88(%rsp)
576
577	xor	%r14,%r13			# Sigma1(e)
578	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
579	add	%r8,%r12			# T1+=h
580
581	mov	%r9,%r8
582	add	%r13,%r12			# T1+=Sigma1(e)
583
584	add	%r15,%r12			# T1+=Ch(e,f,g)
585	mov	%r9,%r13
586	mov	%r9,%r14
587
588	ror	$28,%r8
589	ror	$34,%r13
590	mov	%r9,%r15
591	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
592
593	xor	%r13,%r8
594	ror	$5,%r13
595	or	%r11,%r14			# a|c
596
597	xor	%r13,%r8			# h=Sigma0(a)
598	and	%r11,%r15			# a&c
599	add	%r12,%rax			# d+=T1
600
601	and	%r10,%r14			# (a|c)&b
602	add	%r12,%r8			# h+=T1
603
604	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
605	lea	1(%rdi),%rdi	# round++
606
607	add	%r14,%r8			# h+=Maj(a,b,c)
608	mov	8*12(%rsi),%r12
609	bswap	%r12
610	mov	%rax,%r13
611	mov	%rax,%r14
612	mov	%rbx,%r15
613
614	ror	$14,%r13
615	ror	$18,%r14
616	xor	%rcx,%r15			# f^g
617
618	xor	%r14,%r13
619	ror	$23,%r14
620	and	%rax,%r15			# (f^g)&e
621	mov	%r12,96(%rsp)
622
623	xor	%r14,%r13			# Sigma1(e)
624	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
625	add	%rdx,%r12			# T1+=h
626
627	mov	%r8,%rdx
628	add	%r13,%r12			# T1+=Sigma1(e)
629
630	add	%r15,%r12			# T1+=Ch(e,f,g)
631	mov	%r8,%r13
632	mov	%r8,%r14
633
634	ror	$28,%rdx
635	ror	$34,%r13
636	mov	%r8,%r15
637	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
638
639	xor	%r13,%rdx
640	ror	$5,%r13
641	or	%r10,%r14			# a|c
642
643	xor	%r13,%rdx			# h=Sigma0(a)
644	and	%r10,%r15			# a&c
645	add	%r12,%r11			# d+=T1
646
647	and	%r9,%r14			# (a|c)&b
648	add	%r12,%rdx			# h+=T1
649
650	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
651	lea	1(%rdi),%rdi	# round++
652
653	add	%r14,%rdx			# h+=Maj(a,b,c)
654	mov	8*13(%rsi),%r12
655	bswap	%r12
656	mov	%r11,%r13
657	mov	%r11,%r14
658	mov	%rax,%r15
659
660	ror	$14,%r13
661	ror	$18,%r14
662	xor	%rbx,%r15			# f^g
663
664	xor	%r14,%r13
665	ror	$23,%r14
666	and	%r11,%r15			# (f^g)&e
667	mov	%r12,104(%rsp)
668
669	xor	%r14,%r13			# Sigma1(e)
670	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
671	add	%rcx,%r12			# T1+=h
672
673	mov	%rdx,%rcx
674	add	%r13,%r12			# T1+=Sigma1(e)
675
676	add	%r15,%r12			# T1+=Ch(e,f,g)
677	mov	%rdx,%r13
678	mov	%rdx,%r14
679
680	ror	$28,%rcx
681	ror	$34,%r13
682	mov	%rdx,%r15
683	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
684
685	xor	%r13,%rcx
686	ror	$5,%r13
687	or	%r9,%r14			# a|c
688
689	xor	%r13,%rcx			# h=Sigma0(a)
690	and	%r9,%r15			# a&c
691	add	%r12,%r10			# d+=T1
692
693	and	%r8,%r14			# (a|c)&b
694	add	%r12,%rcx			# h+=T1
695
696	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
697	lea	1(%rdi),%rdi	# round++
698
699	add	%r14,%rcx			# h+=Maj(a,b,c)
700	mov	8*14(%rsi),%r12
701	bswap	%r12
702	mov	%r10,%r13
703	mov	%r10,%r14
704	mov	%r11,%r15
705
706	ror	$14,%r13
707	ror	$18,%r14
708	xor	%rax,%r15			# f^g
709
710	xor	%r14,%r13
711	ror	$23,%r14
712	and	%r10,%r15			# (f^g)&e
713	mov	%r12,112(%rsp)
714
715	xor	%r14,%r13			# Sigma1(e)
716	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
717	add	%rbx,%r12			# T1+=h
718
719	mov	%rcx,%rbx
720	add	%r13,%r12			# T1+=Sigma1(e)
721
722	add	%r15,%r12			# T1+=Ch(e,f,g)
723	mov	%rcx,%r13
724	mov	%rcx,%r14
725
726	ror	$28,%rbx
727	ror	$34,%r13
728	mov	%rcx,%r15
729	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
730
731	xor	%r13,%rbx
732	ror	$5,%r13
733	or	%r8,%r14			# a|c
734
735	xor	%r13,%rbx			# h=Sigma0(a)
736	and	%r8,%r15			# a&c
737	add	%r12,%r9			# d+=T1
738
739	and	%rdx,%r14			# (a|c)&b
740	add	%r12,%rbx			# h+=T1
741
742	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
743	lea	1(%rdi),%rdi	# round++
744
745	add	%r14,%rbx			# h+=Maj(a,b,c)
746	mov	8*15(%rsi),%r12
747	bswap	%r12
748	mov	%r9,%r13
749	mov	%r9,%r14
750	mov	%r10,%r15
751
752	ror	$14,%r13
753	ror	$18,%r14
754	xor	%r11,%r15			# f^g
755
756	xor	%r14,%r13
757	ror	$23,%r14
758	and	%r9,%r15			# (f^g)&e
759	mov	%r12,120(%rsp)
760
761	xor	%r14,%r13			# Sigma1(e)
762	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
763	add	%rax,%r12			# T1+=h
764
765	mov	%rbx,%rax
766	add	%r13,%r12			# T1+=Sigma1(e)
767
768	add	%r15,%r12			# T1+=Ch(e,f,g)
769	mov	%rbx,%r13
770	mov	%rbx,%r14
771
772	ror	$28,%rax
773	ror	$34,%r13
774	mov	%rbx,%r15
775	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
776
777	xor	%r13,%rax
778	ror	$5,%r13
779	or	%rdx,%r14			# a|c
780
781	xor	%r13,%rax			# h=Sigma0(a)
782	and	%rdx,%r15			# a&c
783	add	%r12,%r8			# d+=T1
784
785	and	%rcx,%r14			# (a|c)&b
786	add	%r12,%rax			# h+=T1
787
788	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
789	lea	1(%rdi),%rdi	# round++
790
791	add	%r14,%rax			# h+=Maj(a,b,c)
792	jmp	.Lrounds_16_xx
793.align	16
794.Lrounds_16_xx:
795	mov	8(%rsp),%r13
796	mov	112(%rsp),%r12
797
798	mov	%r13,%r15
799
800	shr	$7,%r13
801	ror	$1,%r15
802
803	xor	%r15,%r13
804	ror	$7,%r15
805
806	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
807	mov	%r12,%r14
808
809	shr	$6,%r12
810	ror	$19,%r14
811
812	xor	%r14,%r12
813	ror	$42,%r14
814
815	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
816
817	add	%r13,%r12
818
819	add	72(%rsp),%r12
820
821	add	0(%rsp),%r12
822	mov	%r8,%r13
823	mov	%r8,%r14
824	mov	%r9,%r15
825
826	ror	$14,%r13
827	ror	$18,%r14
828	xor	%r10,%r15			# f^g
829
830	xor	%r14,%r13
831	ror	$23,%r14
832	and	%r8,%r15			# (f^g)&e
833	mov	%r12,0(%rsp)
834
835	xor	%r14,%r13			# Sigma1(e)
836	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
837	add	%r11,%r12			# T1+=h
838
839	mov	%rax,%r11
840	add	%r13,%r12			# T1+=Sigma1(e)
841
842	add	%r15,%r12			# T1+=Ch(e,f,g)
843	mov	%rax,%r13
844	mov	%rax,%r14
845
846	ror	$28,%r11
847	ror	$34,%r13
848	mov	%rax,%r15
849	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
850
851	xor	%r13,%r11
852	ror	$5,%r13
853	or	%rcx,%r14			# a|c
854
855	xor	%r13,%r11			# h=Sigma0(a)
856	and	%rcx,%r15			# a&c
857	add	%r12,%rdx			# d+=T1
858
859	and	%rbx,%r14			# (a|c)&b
860	add	%r12,%r11			# h+=T1
861
862	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
863	lea	1(%rdi),%rdi	# round++
864
865	add	%r14,%r11			# h+=Maj(a,b,c)
866	mov	16(%rsp),%r13
867	mov	120(%rsp),%r12
868
869	mov	%r13,%r15
870
871	shr	$7,%r13
872	ror	$1,%r15
873
874	xor	%r15,%r13
875	ror	$7,%r15
876
877	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
878	mov	%r12,%r14
879
880	shr	$6,%r12
881	ror	$19,%r14
882
883	xor	%r14,%r12
884	ror	$42,%r14
885
886	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
887
888	add	%r13,%r12
889
890	add	80(%rsp),%r12
891
892	add	8(%rsp),%r12
893	mov	%rdx,%r13
894	mov	%rdx,%r14
895	mov	%r8,%r15
896
897	ror	$14,%r13
898	ror	$18,%r14
899	xor	%r9,%r15			# f^g
900
901	xor	%r14,%r13
902	ror	$23,%r14
903	and	%rdx,%r15			# (f^g)&e
904	mov	%r12,8(%rsp)
905
906	xor	%r14,%r13			# Sigma1(e)
907	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
908	add	%r10,%r12			# T1+=h
909
910	mov	%r11,%r10
911	add	%r13,%r12			# T1+=Sigma1(e)
912
913	add	%r15,%r12			# T1+=Ch(e,f,g)
914	mov	%r11,%r13
915	mov	%r11,%r14
916
917	ror	$28,%r10
918	ror	$34,%r13
919	mov	%r11,%r15
920	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
921
922	xor	%r13,%r10
923	ror	$5,%r13
924	or	%rbx,%r14			# a|c
925
926	xor	%r13,%r10			# h=Sigma0(a)
927	and	%rbx,%r15			# a&c
928	add	%r12,%rcx			# d+=T1
929
930	and	%rax,%r14			# (a|c)&b
931	add	%r12,%r10			# h+=T1
932
933	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
934	lea	1(%rdi),%rdi	# round++
935
936	add	%r14,%r10			# h+=Maj(a,b,c)
937	mov	24(%rsp),%r13
938	mov	0(%rsp),%r12
939
940	mov	%r13,%r15
941
942	shr	$7,%r13
943	ror	$1,%r15
944
945	xor	%r15,%r13
946	ror	$7,%r15
947
948	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
949	mov	%r12,%r14
950
951	shr	$6,%r12
952	ror	$19,%r14
953
954	xor	%r14,%r12
955	ror	$42,%r14
956
957	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
958
959	add	%r13,%r12
960
961	add	88(%rsp),%r12
962
963	add	16(%rsp),%r12
964	mov	%rcx,%r13
965	mov	%rcx,%r14
966	mov	%rdx,%r15
967
968	ror	$14,%r13
969	ror	$18,%r14
970	xor	%r8,%r15			# f^g
971
972	xor	%r14,%r13
973	ror	$23,%r14
974	and	%rcx,%r15			# (f^g)&e
975	mov	%r12,16(%rsp)
976
977	xor	%r14,%r13			# Sigma1(e)
978	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
979	add	%r9,%r12			# T1+=h
980
981	mov	%r10,%r9
982	add	%r13,%r12			# T1+=Sigma1(e)
983
984	add	%r15,%r12			# T1+=Ch(e,f,g)
985	mov	%r10,%r13
986	mov	%r10,%r14
987
988	ror	$28,%r9
989	ror	$34,%r13
990	mov	%r10,%r15
991	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
992
993	xor	%r13,%r9
994	ror	$5,%r13
995	or	%rax,%r14			# a|c
996
997	xor	%r13,%r9			# h=Sigma0(a)
998	and	%rax,%r15			# a&c
999	add	%r12,%rbx			# d+=T1
1000
1001	and	%r11,%r14			# (a|c)&b
1002	add	%r12,%r9			# h+=T1
1003
1004	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1005	lea	1(%rdi),%rdi	# round++
1006
1007	add	%r14,%r9			# h+=Maj(a,b,c)
1008	mov	32(%rsp),%r13
1009	mov	8(%rsp),%r12
1010
1011	mov	%r13,%r15
1012
1013	shr	$7,%r13
1014	ror	$1,%r15
1015
1016	xor	%r15,%r13
1017	ror	$7,%r15
1018
1019	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1020	mov	%r12,%r14
1021
1022	shr	$6,%r12
1023	ror	$19,%r14
1024
1025	xor	%r14,%r12
1026	ror	$42,%r14
1027
1028	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1029
1030	add	%r13,%r12
1031
1032	add	96(%rsp),%r12
1033
1034	add	24(%rsp),%r12
1035	mov	%rbx,%r13
1036	mov	%rbx,%r14
1037	mov	%rcx,%r15
1038
1039	ror	$14,%r13
1040	ror	$18,%r14
1041	xor	%rdx,%r15			# f^g
1042
1043	xor	%r14,%r13
1044	ror	$23,%r14
1045	and	%rbx,%r15			# (f^g)&e
1046	mov	%r12,24(%rsp)
1047
1048	xor	%r14,%r13			# Sigma1(e)
1049	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1050	add	%r8,%r12			# T1+=h
1051
1052	mov	%r9,%r8
1053	add	%r13,%r12			# T1+=Sigma1(e)
1054
1055	add	%r15,%r12			# T1+=Ch(e,f,g)
1056	mov	%r9,%r13
1057	mov	%r9,%r14
1058
1059	ror	$28,%r8
1060	ror	$34,%r13
1061	mov	%r9,%r15
1062	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1063
1064	xor	%r13,%r8
1065	ror	$5,%r13
1066	or	%r11,%r14			# a|c
1067
1068	xor	%r13,%r8			# h=Sigma0(a)
1069	and	%r11,%r15			# a&c
1070	add	%r12,%rax			# d+=T1
1071
1072	and	%r10,%r14			# (a|c)&b
1073	add	%r12,%r8			# h+=T1
1074
1075	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1076	lea	1(%rdi),%rdi	# round++
1077
1078	add	%r14,%r8			# h+=Maj(a,b,c)
1079	mov	40(%rsp),%r13
1080	mov	16(%rsp),%r12
1081
1082	mov	%r13,%r15
1083
1084	shr	$7,%r13
1085	ror	$1,%r15
1086
1087	xor	%r15,%r13
1088	ror	$7,%r15
1089
1090	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1091	mov	%r12,%r14
1092
1093	shr	$6,%r12
1094	ror	$19,%r14
1095
1096	xor	%r14,%r12
1097	ror	$42,%r14
1098
1099	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1100
1101	add	%r13,%r12
1102
1103	add	104(%rsp),%r12
1104
1105	add	32(%rsp),%r12
1106	mov	%rax,%r13
1107	mov	%rax,%r14
1108	mov	%rbx,%r15
1109
1110	ror	$14,%r13
1111	ror	$18,%r14
1112	xor	%rcx,%r15			# f^g
1113
1114	xor	%r14,%r13
1115	ror	$23,%r14
1116	and	%rax,%r15			# (f^g)&e
1117	mov	%r12,32(%rsp)
1118
1119	xor	%r14,%r13			# Sigma1(e)
1120	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1121	add	%rdx,%r12			# T1+=h
1122
1123	mov	%r8,%rdx
1124	add	%r13,%r12			# T1+=Sigma1(e)
1125
1126	add	%r15,%r12			# T1+=Ch(e,f,g)
1127	mov	%r8,%r13
1128	mov	%r8,%r14
1129
1130	ror	$28,%rdx
1131	ror	$34,%r13
1132	mov	%r8,%r15
1133	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1134
1135	xor	%r13,%rdx
1136	ror	$5,%r13
1137	or	%r10,%r14			# a|c
1138
1139	xor	%r13,%rdx			# h=Sigma0(a)
1140	and	%r10,%r15			# a&c
1141	add	%r12,%r11			# d+=T1
1142
1143	and	%r9,%r14			# (a|c)&b
1144	add	%r12,%rdx			# h+=T1
1145
1146	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1147	lea	1(%rdi),%rdi	# round++
1148
1149	add	%r14,%rdx			# h+=Maj(a,b,c)
1150	mov	48(%rsp),%r13
1151	mov	24(%rsp),%r12
1152
1153	mov	%r13,%r15
1154
1155	shr	$7,%r13
1156	ror	$1,%r15
1157
1158	xor	%r15,%r13
1159	ror	$7,%r15
1160
1161	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1162	mov	%r12,%r14
1163
1164	shr	$6,%r12
1165	ror	$19,%r14
1166
1167	xor	%r14,%r12
1168	ror	$42,%r14
1169
1170	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1171
1172	add	%r13,%r12
1173
1174	add	112(%rsp),%r12
1175
1176	add	40(%rsp),%r12
1177	mov	%r11,%r13
1178	mov	%r11,%r14
1179	mov	%rax,%r15
1180
1181	ror	$14,%r13
1182	ror	$18,%r14
1183	xor	%rbx,%r15			# f^g
1184
1185	xor	%r14,%r13
1186	ror	$23,%r14
1187	and	%r11,%r15			# (f^g)&e
1188	mov	%r12,40(%rsp)
1189
1190	xor	%r14,%r13			# Sigma1(e)
1191	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1192	add	%rcx,%r12			# T1+=h
1193
1194	mov	%rdx,%rcx
1195	add	%r13,%r12			# T1+=Sigma1(e)
1196
1197	add	%r15,%r12			# T1+=Ch(e,f,g)
1198	mov	%rdx,%r13
1199	mov	%rdx,%r14
1200
1201	ror	$28,%rcx
1202	ror	$34,%r13
1203	mov	%rdx,%r15
1204	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1205
1206	xor	%r13,%rcx
1207	ror	$5,%r13
1208	or	%r9,%r14			# a|c
1209
1210	xor	%r13,%rcx			# h=Sigma0(a)
1211	and	%r9,%r15			# a&c
1212	add	%r12,%r10			# d+=T1
1213
1214	and	%r8,%r14			# (a|c)&b
1215	add	%r12,%rcx			# h+=T1
1216
1217	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1218	lea	1(%rdi),%rdi	# round++
1219
1220	add	%r14,%rcx			# h+=Maj(a,b,c)
1221	mov	56(%rsp),%r13
1222	mov	32(%rsp),%r12
1223
1224	mov	%r13,%r15
1225
1226	shr	$7,%r13
1227	ror	$1,%r15
1228
1229	xor	%r15,%r13
1230	ror	$7,%r15
1231
1232	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1233	mov	%r12,%r14
1234
1235	shr	$6,%r12
1236	ror	$19,%r14
1237
1238	xor	%r14,%r12
1239	ror	$42,%r14
1240
1241	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1242
1243	add	%r13,%r12
1244
1245	add	120(%rsp),%r12
1246
1247	add	48(%rsp),%r12
1248	mov	%r10,%r13
1249	mov	%r10,%r14
1250	mov	%r11,%r15
1251
1252	ror	$14,%r13
1253	ror	$18,%r14
1254	xor	%rax,%r15			# f^g
1255
1256	xor	%r14,%r13
1257	ror	$23,%r14
1258	and	%r10,%r15			# (f^g)&e
1259	mov	%r12,48(%rsp)
1260
1261	xor	%r14,%r13			# Sigma1(e)
1262	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
1263	add	%rbx,%r12			# T1+=h
1264
1265	mov	%rcx,%rbx
1266	add	%r13,%r12			# T1+=Sigma1(e)
1267
1268	add	%r15,%r12			# T1+=Ch(e,f,g)
1269	mov	%rcx,%r13
1270	mov	%rcx,%r14
1271
1272	ror	$28,%rbx
1273	ror	$34,%r13
1274	mov	%rcx,%r15
1275	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1276
1277	xor	%r13,%rbx
1278	ror	$5,%r13
1279	or	%r8,%r14			# a|c
1280
1281	xor	%r13,%rbx			# h=Sigma0(a)
1282	and	%r8,%r15			# a&c
1283	add	%r12,%r9			# d+=T1
1284
1285	and	%rdx,%r14			# (a|c)&b
1286	add	%r12,%rbx			# h+=T1
1287
1288	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1289	lea	1(%rdi),%rdi	# round++
1290
1291	add	%r14,%rbx			# h+=Maj(a,b,c)
1292	mov	64(%rsp),%r13
1293	mov	40(%rsp),%r12
1294
1295	mov	%r13,%r15
1296
1297	shr	$7,%r13
1298	ror	$1,%r15
1299
1300	xor	%r15,%r13
1301	ror	$7,%r15
1302
1303	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1304	mov	%r12,%r14
1305
1306	shr	$6,%r12
1307	ror	$19,%r14
1308
1309	xor	%r14,%r12
1310	ror	$42,%r14
1311
1312	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1313
1314	add	%r13,%r12
1315
1316	add	0(%rsp),%r12
1317
1318	add	56(%rsp),%r12
1319	mov	%r9,%r13
1320	mov	%r9,%r14
1321	mov	%r10,%r15
1322
1323	ror	$14,%r13
1324	ror	$18,%r14
1325	xor	%r11,%r15			# f^g
1326
1327	xor	%r14,%r13
1328	ror	$23,%r14
1329	and	%r9,%r15			# (f^g)&e
1330	mov	%r12,56(%rsp)
1331
1332	xor	%r14,%r13			# Sigma1(e)
1333	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
1334	add	%rax,%r12			# T1+=h
1335
1336	mov	%rbx,%rax
1337	add	%r13,%r12			# T1+=Sigma1(e)
1338
1339	add	%r15,%r12			# T1+=Ch(e,f,g)
1340	mov	%rbx,%r13
1341	mov	%rbx,%r14
1342
1343	ror	$28,%rax
1344	ror	$34,%r13
1345	mov	%rbx,%r15
1346	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1347
1348	xor	%r13,%rax
1349	ror	$5,%r13
1350	or	%rdx,%r14			# a|c
1351
1352	xor	%r13,%rax			# h=Sigma0(a)
1353	and	%rdx,%r15			# a&c
1354	add	%r12,%r8			# d+=T1
1355
1356	and	%rcx,%r14			# (a|c)&b
1357	add	%r12,%rax			# h+=T1
1358
1359	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1360	lea	1(%rdi),%rdi	# round++
1361
1362	add	%r14,%rax			# h+=Maj(a,b,c)
1363	mov	72(%rsp),%r13
1364	mov	48(%rsp),%r12
1365
1366	mov	%r13,%r15
1367
1368	shr	$7,%r13
1369	ror	$1,%r15
1370
1371	xor	%r15,%r13
1372	ror	$7,%r15
1373
1374	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1375	mov	%r12,%r14
1376
1377	shr	$6,%r12
1378	ror	$19,%r14
1379
1380	xor	%r14,%r12
1381	ror	$42,%r14
1382
1383	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1384
1385	add	%r13,%r12
1386
1387	add	8(%rsp),%r12
1388
1389	add	64(%rsp),%r12
1390	mov	%r8,%r13
1391	mov	%r8,%r14
1392	mov	%r9,%r15
1393
1394	ror	$14,%r13
1395	ror	$18,%r14
1396	xor	%r10,%r15			# f^g
1397
1398	xor	%r14,%r13
1399	ror	$23,%r14
1400	and	%r8,%r15			# (f^g)&e
1401	mov	%r12,64(%rsp)
1402
1403	xor	%r14,%r13			# Sigma1(e)
1404	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
1405	add	%r11,%r12			# T1+=h
1406
1407	mov	%rax,%r11
1408	add	%r13,%r12			# T1+=Sigma1(e)
1409
1410	add	%r15,%r12			# T1+=Ch(e,f,g)
1411	mov	%rax,%r13
1412	mov	%rax,%r14
1413
1414	ror	$28,%r11
1415	ror	$34,%r13
1416	mov	%rax,%r15
1417	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1418
1419	xor	%r13,%r11
1420	ror	$5,%r13
1421	or	%rcx,%r14			# a|c
1422
1423	xor	%r13,%r11			# h=Sigma0(a)
1424	and	%rcx,%r15			# a&c
1425	add	%r12,%rdx			# d+=T1
1426
1427	and	%rbx,%r14			# (a|c)&b
1428	add	%r12,%r11			# h+=T1
1429
1430	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1431	lea	1(%rdi),%rdi	# round++
1432
1433	add	%r14,%r11			# h+=Maj(a,b,c)
1434	mov	80(%rsp),%r13
1435	mov	56(%rsp),%r12
1436
1437	mov	%r13,%r15
1438
1439	shr	$7,%r13
1440	ror	$1,%r15
1441
1442	xor	%r15,%r13
1443	ror	$7,%r15
1444
1445	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1446	mov	%r12,%r14
1447
1448	shr	$6,%r12
1449	ror	$19,%r14
1450
1451	xor	%r14,%r12
1452	ror	$42,%r14
1453
1454	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1455
1456	add	%r13,%r12
1457
1458	add	16(%rsp),%r12
1459
1460	add	72(%rsp),%r12
1461	mov	%rdx,%r13
1462	mov	%rdx,%r14
1463	mov	%r8,%r15
1464
1465	ror	$14,%r13
1466	ror	$18,%r14
1467	xor	%r9,%r15			# f^g
1468
1469	xor	%r14,%r13
1470	ror	$23,%r14
1471	and	%rdx,%r15			# (f^g)&e
1472	mov	%r12,72(%rsp)
1473
1474	xor	%r14,%r13			# Sigma1(e)
1475	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
1476	add	%r10,%r12			# T1+=h
1477
1478	mov	%r11,%r10
1479	add	%r13,%r12			# T1+=Sigma1(e)
1480
1481	add	%r15,%r12			# T1+=Ch(e,f,g)
1482	mov	%r11,%r13
1483	mov	%r11,%r14
1484
1485	ror	$28,%r10
1486	ror	$34,%r13
1487	mov	%r11,%r15
1488	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1489
1490	xor	%r13,%r10
1491	ror	$5,%r13
1492	or	%rbx,%r14			# a|c
1493
1494	xor	%r13,%r10			# h=Sigma0(a)
1495	and	%rbx,%r15			# a&c
1496	add	%r12,%rcx			# d+=T1
1497
1498	and	%rax,%r14			# (a|c)&b
1499	add	%r12,%r10			# h+=T1
1500
1501	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1502	lea	1(%rdi),%rdi	# round++
1503
1504	add	%r14,%r10			# h+=Maj(a,b,c)
1505	mov	88(%rsp),%r13
1506	mov	64(%rsp),%r12
1507
1508	mov	%r13,%r15
1509
1510	shr	$7,%r13
1511	ror	$1,%r15
1512
1513	xor	%r15,%r13
1514	ror	$7,%r15
1515
1516	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1517	mov	%r12,%r14
1518
1519	shr	$6,%r12
1520	ror	$19,%r14
1521
1522	xor	%r14,%r12
1523	ror	$42,%r14
1524
1525	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1526
1527	add	%r13,%r12
1528
1529	add	24(%rsp),%r12
1530
1531	add	80(%rsp),%r12
1532	mov	%rcx,%r13
1533	mov	%rcx,%r14
1534	mov	%rdx,%r15
1535
1536	ror	$14,%r13
1537	ror	$18,%r14
1538	xor	%r8,%r15			# f^g
1539
1540	xor	%r14,%r13
1541	ror	$23,%r14
1542	and	%rcx,%r15			# (f^g)&e
1543	mov	%r12,80(%rsp)
1544
1545	xor	%r14,%r13			# Sigma1(e)
1546	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
1547	add	%r9,%r12			# T1+=h
1548
1549	mov	%r10,%r9
1550	add	%r13,%r12			# T1+=Sigma1(e)
1551
1552	add	%r15,%r12			# T1+=Ch(e,f,g)
1553	mov	%r10,%r13
1554	mov	%r10,%r14
1555
1556	ror	$28,%r9
1557	ror	$34,%r13
1558	mov	%r10,%r15
1559	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1560
1561	xor	%r13,%r9
1562	ror	$5,%r13
1563	or	%rax,%r14			# a|c
1564
1565	xor	%r13,%r9			# h=Sigma0(a)
1566	and	%rax,%r15			# a&c
1567	add	%r12,%rbx			# d+=T1
1568
1569	and	%r11,%r14			# (a|c)&b
1570	add	%r12,%r9			# h+=T1
1571
1572	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1573	lea	1(%rdi),%rdi	# round++
1574
1575	add	%r14,%r9			# h+=Maj(a,b,c)
1576	mov	96(%rsp),%r13
1577	mov	72(%rsp),%r12
1578
1579	mov	%r13,%r15
1580
1581	shr	$7,%r13
1582	ror	$1,%r15
1583
1584	xor	%r15,%r13
1585	ror	$7,%r15
1586
1587	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1588	mov	%r12,%r14
1589
1590	shr	$6,%r12
1591	ror	$19,%r14
1592
1593	xor	%r14,%r12
1594	ror	$42,%r14
1595
1596	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1597
1598	add	%r13,%r12
1599
1600	add	32(%rsp),%r12
1601
1602	add	88(%rsp),%r12
1603	mov	%rbx,%r13
1604	mov	%rbx,%r14
1605	mov	%rcx,%r15
1606
1607	ror	$14,%r13
1608	ror	$18,%r14
1609	xor	%rdx,%r15			# f^g
1610
1611	xor	%r14,%r13
1612	ror	$23,%r14
1613	and	%rbx,%r15			# (f^g)&e
1614	mov	%r12,88(%rsp)
1615
1616	xor	%r14,%r13			# Sigma1(e)
1617	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1618	add	%r8,%r12			# T1+=h
1619
1620	mov	%r9,%r8
1621	add	%r13,%r12			# T1+=Sigma1(e)
1622
1623	add	%r15,%r12			# T1+=Ch(e,f,g)
1624	mov	%r9,%r13
1625	mov	%r9,%r14
1626
1627	ror	$28,%r8
1628	ror	$34,%r13
1629	mov	%r9,%r15
1630	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1631
1632	xor	%r13,%r8
1633	ror	$5,%r13
1634	or	%r11,%r14			# a|c
1635
1636	xor	%r13,%r8			# h=Sigma0(a)
1637	and	%r11,%r15			# a&c
1638	add	%r12,%rax			# d+=T1
1639
1640	and	%r10,%r14			# (a|c)&b
1641	add	%r12,%r8			# h+=T1
1642
1643	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1644	lea	1(%rdi),%rdi	# round++
1645
1646	add	%r14,%r8			# h+=Maj(a,b,c)
1647	mov	104(%rsp),%r13
1648	mov	80(%rsp),%r12
1649
1650	mov	%r13,%r15
1651
1652	shr	$7,%r13
1653	ror	$1,%r15
1654
1655	xor	%r15,%r13
1656	ror	$7,%r15
1657
1658	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1659	mov	%r12,%r14
1660
1661	shr	$6,%r12
1662	ror	$19,%r14
1663
1664	xor	%r14,%r12
1665	ror	$42,%r14
1666
1667	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1668
1669	add	%r13,%r12
1670
1671	add	40(%rsp),%r12
1672
1673	add	96(%rsp),%r12
1674	mov	%rax,%r13
1675	mov	%rax,%r14
1676	mov	%rbx,%r15
1677
1678	ror	$14,%r13
1679	ror	$18,%r14
1680	xor	%rcx,%r15			# f^g
1681
1682	xor	%r14,%r13
1683	ror	$23,%r14
1684	and	%rax,%r15			# (f^g)&e
1685	mov	%r12,96(%rsp)
1686
1687	xor	%r14,%r13			# Sigma1(e)
1688	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1689	add	%rdx,%r12			# T1+=h
1690
1691	mov	%r8,%rdx
1692	add	%r13,%r12			# T1+=Sigma1(e)
1693
1694	add	%r15,%r12			# T1+=Ch(e,f,g)
1695	mov	%r8,%r13
1696	mov	%r8,%r14
1697
1698	ror	$28,%rdx
1699	ror	$34,%r13
1700	mov	%r8,%r15
1701	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1702
1703	xor	%r13,%rdx
1704	ror	$5,%r13
1705	or	%r10,%r14			# a|c
1706
1707	xor	%r13,%rdx			# h=Sigma0(a)
1708	and	%r10,%r15			# a&c
1709	add	%r12,%r11			# d+=T1
1710
1711	and	%r9,%r14			# (a|c)&b
1712	add	%r12,%rdx			# h+=T1
1713
1714	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1715	lea	1(%rdi),%rdi	# round++
1716
1717	add	%r14,%rdx			# h+=Maj(a,b,c)
1718	mov	112(%rsp),%r13
1719	mov	88(%rsp),%r12
1720
1721	mov	%r13,%r15
1722
1723	shr	$7,%r13
1724	ror	$1,%r15
1725
1726	xor	%r15,%r13
1727	ror	$7,%r15
1728
1729	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1730	mov	%r12,%r14
1731
1732	shr	$6,%r12
1733	ror	$19,%r14
1734
1735	xor	%r14,%r12
1736	ror	$42,%r14
1737
1738	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1739
1740	add	%r13,%r12
1741
1742	add	48(%rsp),%r12
1743
1744	add	104(%rsp),%r12
1745	mov	%r11,%r13
1746	mov	%r11,%r14
1747	mov	%rax,%r15
1748
1749	ror	$14,%r13
1750	ror	$18,%r14
1751	xor	%rbx,%r15			# f^g
1752
1753	xor	%r14,%r13
1754	ror	$23,%r14
1755	and	%r11,%r15			# (f^g)&e
1756	mov	%r12,104(%rsp)
1757
1758	xor	%r14,%r13			# Sigma1(e)
1759	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
1760	add	%rcx,%r12			# T1+=h
1761
1762	mov	%rdx,%rcx
1763	add	%r13,%r12			# T1+=Sigma1(e)
1764
1765	add	%r15,%r12			# T1+=Ch(e,f,g)
1766	mov	%rdx,%r13
1767	mov	%rdx,%r14
1768
1769	ror	$28,%rcx
1770	ror	$34,%r13
1771	mov	%rdx,%r15
1772	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1773
1774	xor	%r13,%rcx
1775	ror	$5,%r13
1776	or	%r9,%r14			# a|c
1777
1778	xor	%r13,%rcx			# h=Sigma0(a)
1779	and	%r9,%r15			# a&c
1780	add	%r12,%r10			# d+=T1
1781
1782	and	%r8,%r14			# (a|c)&b
1783	add	%r12,%rcx			# h+=T1
1784
1785	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1786	lea	1(%rdi),%rdi	# round++
1787
1788	add	%r14,%rcx			# h+=Maj(a,b,c)
1789	mov	120(%rsp),%r13
1790	mov	96(%rsp),%r12
1791
1792	mov	%r13,%r15
1793
1794	shr	$7,%r13
1795	ror	$1,%r15
1796
1797	xor	%r15,%r13
1798	ror	$7,%r15
1799
1800	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1801	mov	%r12,%r14
1802
1803	shr	$6,%r12
1804	ror	$19,%r14
1805
1806	xor	%r14,%r12
1807	ror	$42,%r14
1808
1809	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1810
1811	add	%r13,%r12
1812
1813	add	56(%rsp),%r12
1814
1815	add	112(%rsp),%r12
1816	mov	%r10,%r13
1817	mov	%r10,%r14
1818	mov	%r11,%r15
1819
1820	ror	$14,%r13
1821	ror	$18,%r14
1822	xor	%rax,%r15			# f^g
1823
1824	xor	%r14,%r13
1825	ror	$23,%r14
1826	and	%r10,%r15			# (f^g)&e
1827	mov	%r12,112(%rsp)
1828
1829	xor	%r14,%r13			# Sigma1(e)
1830	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
1831	add	%rbx,%r12			# T1+=h
1832
1833	mov	%rcx,%rbx
1834	add	%r13,%r12			# T1+=Sigma1(e)
1835
1836	add	%r15,%r12			# T1+=Ch(e,f,g)
1837	mov	%rcx,%r13
1838	mov	%rcx,%r14
1839
1840	ror	$28,%rbx
1841	ror	$34,%r13
1842	mov	%rcx,%r15
1843	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1844
1845	xor	%r13,%rbx
1846	ror	$5,%r13
1847	or	%r8,%r14			# a|c
1848
1849	xor	%r13,%rbx			# h=Sigma0(a)
1850	and	%r8,%r15			# a&c
1851	add	%r12,%r9			# d+=T1
1852
1853	and	%rdx,%r14			# (a|c)&b
1854	add	%r12,%rbx			# h+=T1
1855
1856	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1857	lea	1(%rdi),%rdi	# round++
1858
1859	add	%r14,%rbx			# h+=Maj(a,b,c)
1860	mov	0(%rsp),%r13
1861	mov	104(%rsp),%r12
1862
1863	mov	%r13,%r15
1864
1865	shr	$7,%r13
1866	ror	$1,%r15
1867
1868	xor	%r15,%r13
1869	ror	$7,%r15
1870
1871	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
1872	mov	%r12,%r14
1873
1874	shr	$6,%r12
1875	ror	$19,%r14
1876
1877	xor	%r14,%r12
1878	ror	$42,%r14
1879
1880	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
1881
1882	add	%r13,%r12
1883
1884	add	64(%rsp),%r12
1885
1886	add	120(%rsp),%r12
1887	mov	%r9,%r13
1888	mov	%r9,%r14
1889	mov	%r10,%r15
1890
1891	ror	$14,%r13
1892	ror	$18,%r14
1893	xor	%r11,%r15			# f^g
1894
1895	xor	%r14,%r13
1896	ror	$23,%r14
1897	and	%r9,%r15			# (f^g)&e
1898	mov	%r12,120(%rsp)
1899
1900	xor	%r14,%r13			# Sigma1(e)
1901	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
1902	add	%rax,%r12			# T1+=h
1903
1904	mov	%rbx,%rax
1905	add	%r13,%r12			# T1+=Sigma1(e)
1906
1907	add	%r15,%r12			# T1+=Ch(e,f,g)
1908	mov	%rbx,%r13
1909	mov	%rbx,%r14
1910
1911	ror	$28,%rax
1912	ror	$34,%r13
1913	mov	%rbx,%r15
1914	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
1915
1916	xor	%r13,%rax
1917	ror	$5,%r13
1918	or	%rdx,%r14			# a|c
1919
1920	xor	%r13,%rax			# h=Sigma0(a)
1921	and	%rdx,%r15			# a&c
1922	add	%r12,%r8			# d+=T1
1923
1924	and	%rcx,%r14			# (a|c)&b
1925	add	%r12,%rax			# h+=T1
1926
1927	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
1928	lea	1(%rdi),%rdi	# round++
1929
1930	add	%r14,%rax			# h+=Maj(a,b,c)
1931	cmp	$80,%rdi
1932	jb	.Lrounds_16_xx
1933
1934	mov	16*8+0*8(%rsp),%rdi
1935	lea	16*8(%rsi),%rsi
1936
1937	add	8*0(%rdi),%rax
1938	add	8*1(%rdi),%rbx
1939	add	8*2(%rdi),%rcx
1940	add	8*3(%rdi),%rdx
1941	add	8*4(%rdi),%r8
1942	add	8*5(%rdi),%r9
1943	add	8*6(%rdi),%r10
1944	add	8*7(%rdi),%r11
1945
1946	cmp	16*8+2*8(%rsp),%rsi
1947
1948	mov	%rax,8*0(%rdi)
1949	mov	%rbx,8*1(%rdi)
1950	mov	%rcx,8*2(%rdi)
1951	mov	%rdx,8*3(%rdi)
1952	mov	%r8,8*4(%rdi)
1953	mov	%r9,8*5(%rdi)
1954	mov	%r10,8*6(%rdi)
1955	mov	%r11,8*7(%rdi)
1956	jb	.Lloop
1957
1958	mov	16*8+3*8(%rsp),%rsp
1959	pop	%r15
1960	pop	%r14
1961	pop	%r13
1962	pop	%r12
1963	pop	%rbp
1964	pop	%rbx
1965
1966	ret
1967SET_SIZE(SHA512TransformBlocks)
1968
1969.align	64
1970.type	K512,@object
1971K512:
1972	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
1973	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1974	.quad	0x3956c25bf348b538,0x59f111f1b605d019
1975	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
1976	.quad	0xd807aa98a3030242,0x12835b0145706fbe
1977	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1978	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
1979	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
1980	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
1981	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1982	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
1983	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1984	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
1985	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
1986	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
1987	.quad	0x06ca6351e003826f,0x142929670a0e6e70
1988	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
1989	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1990	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
1991	.quad	0x81c2c92e47edaee6,0x92722c851482353b
1992	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
1993	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
1994	.quad	0xd192e819d6ef5218,0xd69906245565a910
1995	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
1996	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
1997	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1998	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1999	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
2000	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
2001	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
2002	.quad	0x90befffa23631e28,0xa4506cebde82bde9
2003	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
2004	.quad	0xca273eceea26619c,0xd186b8c721c0c207
2005	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
2006	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
2007	.quad	0x113f9804bef90dae,0x1b710b35131c471b
2008	.quad	0x28db77f523047d84,0x32caab7b40c72493
2009	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
2010	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
2011	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
2012#endif /* !lint && !__lint */
2013