xref: /freebsd/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S (revision df21a004be237a1dccd03c7b47254625eea62fa9)
1/* Do not modify. This file is auto-generated from aes-gcm-ppc.pl. */
2.machine	"any"
3.abiversion	2
4.text
5
6
7
8
9
10.macro	.Loop_aes_middle4x
11	xxlor	19+32, 1, 1
12	xxlor	20+32, 2, 2
13	xxlor	21+32, 3, 3
14	xxlor	22+32, 4, 4
15
16	.long	0x11EF9D08
17	.long	0x12109D08
18	.long	0x12319D08
19	.long	0x12529D08
20
21	.long	0x11EFA508
22	.long	0x1210A508
23	.long	0x1231A508
24	.long	0x1252A508
25
26	.long	0x11EFAD08
27	.long	0x1210AD08
28	.long	0x1231AD08
29	.long	0x1252AD08
30
31	.long	0x11EFB508
32	.long	0x1210B508
33	.long	0x1231B508
34	.long	0x1252B508
35
36	xxlor	19+32, 5, 5
37	xxlor	20+32, 6, 6
38	xxlor	21+32, 7, 7
39	xxlor	22+32, 8, 8
40
41	.long	0x11EF9D08
42	.long	0x12109D08
43	.long	0x12319D08
44	.long	0x12529D08
45
46	.long	0x11EFA508
47	.long	0x1210A508
48	.long	0x1231A508
49	.long	0x1252A508
50
51	.long	0x11EFAD08
52	.long	0x1210AD08
53	.long	0x1231AD08
54	.long	0x1252AD08
55
56	.long	0x11EFB508
57	.long	0x1210B508
58	.long	0x1231B508
59	.long	0x1252B508
60
61	xxlor	23+32, 9, 9
62	.long	0x11EFBD08
63	.long	0x1210BD08
64	.long	0x1231BD08
65	.long	0x1252BD08
66.endm
67
68
69
70
71
72.macro	.Loop_aes_middle8x
73	xxlor	23+32, 1, 1
74	xxlor	24+32, 2, 2
75	xxlor	25+32, 3, 3
76	xxlor	26+32, 4, 4
77
78	.long	0x11EFBD08
79	.long	0x1210BD08
80	.long	0x1231BD08
81	.long	0x1252BD08
82	.long	0x1273BD08
83	.long	0x1294BD08
84	.long	0x12B5BD08
85	.long	0x12D6BD08
86
87	.long	0x11EFC508
88	.long	0x1210C508
89	.long	0x1231C508
90	.long	0x1252C508
91	.long	0x1273C508
92	.long	0x1294C508
93	.long	0x12B5C508
94	.long	0x12D6C508
95
96	.long	0x11EFCD08
97	.long	0x1210CD08
98	.long	0x1231CD08
99	.long	0x1252CD08
100	.long	0x1273CD08
101	.long	0x1294CD08
102	.long	0x12B5CD08
103	.long	0x12D6CD08
104
105	.long	0x11EFD508
106	.long	0x1210D508
107	.long	0x1231D508
108	.long	0x1252D508
109	.long	0x1273D508
110	.long	0x1294D508
111	.long	0x12B5D508
112	.long	0x12D6D508
113
114	xxlor	23+32, 5, 5
115	xxlor	24+32, 6, 6
116	xxlor	25+32, 7, 7
117	xxlor	26+32, 8, 8
118
119	.long	0x11EFBD08
120	.long	0x1210BD08
121	.long	0x1231BD08
122	.long	0x1252BD08
123	.long	0x1273BD08
124	.long	0x1294BD08
125	.long	0x12B5BD08
126	.long	0x12D6BD08
127
128	.long	0x11EFC508
129	.long	0x1210C508
130	.long	0x1231C508
131	.long	0x1252C508
132	.long	0x1273C508
133	.long	0x1294C508
134	.long	0x12B5C508
135	.long	0x12D6C508
136
137	.long	0x11EFCD08
138	.long	0x1210CD08
139	.long	0x1231CD08
140	.long	0x1252CD08
141	.long	0x1273CD08
142	.long	0x1294CD08
143	.long	0x12B5CD08
144	.long	0x12D6CD08
145
146	.long	0x11EFD508
147	.long	0x1210D508
148	.long	0x1231D508
149	.long	0x1252D508
150	.long	0x1273D508
151	.long	0x1294D508
152	.long	0x12B5D508
153	.long	0x12D6D508
154
155	xxlor	23+32, 9, 9
156	.long	0x11EFBD08
157	.long	0x1210BD08
158	.long	0x1231BD08
159	.long	0x1252BD08
160	.long	0x1273BD08
161	.long	0x1294BD08
162	.long	0x12B5BD08
163	.long	0x12D6BD08
164.endm
165
166
167
168
169ppc_aes_gcm_ghash:
170	vxor	15, 15, 0
171
172	xxlxor	29, 29, 29
173
174	.long	0x12EC7CC8
175	.long	0x130984C8
176	.long	0x13268CC8
177	.long	0x134394C8
178
179	vxor	23, 23, 24
180	vxor	23, 23, 25
181	vxor	23, 23, 26
182
183	.long	0x130D7CC8
184	.long	0x132A84C8
185	.long	0x13478CC8
186	.long	0x136494C8
187
188	vxor	24, 24, 25
189	vxor	24, 24, 26
190	vxor	24, 24, 27
191
192
193	.long	0x139714C8
194
195	xxlor	29+32, 29, 29
196	vsldoi	26, 24, 29, 8
197	vsldoi	29, 29, 24, 8
198	vxor	23, 23, 26
199
200	vsldoi	23, 23, 23, 8
201	vxor	23, 23, 28
202
203	.long	0x130E7CC8
204	.long	0x132B84C8
205	.long	0x13488CC8
206	.long	0x136594C8
207
208	vxor	24, 24, 25
209	vxor	24, 24, 26
210	vxor	24, 24, 27
211
212	vxor	24, 24, 29
213
214
215	vsldoi	27, 23, 23, 8
216	.long	0x12F714C8
217	vxor	27, 27, 24
218	vxor	23, 23, 27
219
220	xxlor	32, 23+32, 23+32
221
222	blr
223
224
225
226
227
228.macro	ppc_aes_gcm_ghash2_4x
229
230	vxor	15, 15, 0
231
232	xxlxor	29, 29, 29
233
234	.long	0x12EC7CC8
235	.long	0x130984C8
236	.long	0x13268CC8
237	.long	0x134394C8
238
239	vxor	23, 23, 24
240	vxor	23, 23, 25
241	vxor	23, 23, 26
242
243	.long	0x130D7CC8
244	.long	0x132A84C8
245	.long	0x13478CC8
246	.long	0x136494C8
247
248	vxor	24, 24, 25
249	vxor	24, 24, 26
250
251
252	.long	0x139714C8
253
254	xxlor	29+32, 29, 29
255
256	vxor	24, 24, 27
257	vsldoi	26, 24, 29, 8
258	vsldoi	29, 29, 24, 8
259	vxor	23, 23, 26
260
261	vsldoi	23, 23, 23, 8
262	vxor	23, 23, 28
263
264	.long	0x130E7CC8
265	.long	0x132B84C8
266	.long	0x13488CC8
267	.long	0x136594C8
268
269	vxor	24, 24, 25
270	vxor	24, 24, 26
271	vxor	24, 24, 27
272
273	vxor	24, 24, 29
274
275
276	vsldoi	27, 23, 23, 8
277	.long	0x12F714C8
278	vxor	27, 27, 24
279	vxor	27, 23, 27
280
281
282	.long	0x1309A4C8
283	.long	0x1326ACC8
284	.long	0x1343B4C8
285	vxor	19, 19, 27
286	.long	0x12EC9CC8
287
288	vxor	23, 23, 24
289	vxor	23, 23, 25
290	vxor	23, 23, 26
291
292	.long	0x130D9CC8
293	.long	0x132AA4C8
294	.long	0x1347ACC8
295	.long	0x1364B4C8
296
297	vxor	24, 24, 25
298	vxor	24, 24, 26
299
300
301	.long	0x139714C8
302
303	xxlor	29+32, 29, 29
304
305	vxor	24, 24, 27
306	vsldoi	26, 24, 29, 8
307	vsldoi	29, 29, 24, 8
308	vxor	23, 23, 26
309
310	vsldoi	23, 23, 23, 8
311	vxor	23, 23, 28
312
313	.long	0x130E9CC8
314	.long	0x132BA4C8
315	.long	0x1348ACC8
316	.long	0x1365B4C8
317
318	vxor	24, 24, 25
319	vxor	24, 24, 26
320	vxor	24, 24, 27
321
322	vxor	24, 24, 29
323
324
325	vsldoi	27, 23, 23, 8
326	.long	0x12F714C8
327	vxor	27, 27, 24
328	vxor	23, 23, 27
329
330	xxlor	32, 23+32, 23+32
331
332.endm
333
334
335
336
337.macro	ppc_update_hash_1x
338	vxor	28, 28, 0
339
340	vxor	19, 19, 19
341
342	.long	0x12C3E4C8
343	.long	0x12E4E4C8
344	.long	0x1305E4C8
345
346	.long	0x137614C8
347
348	vsldoi	25, 23, 19, 8
349	vsldoi	26, 19, 23, 8
350	vxor	22, 22, 25
351	vxor	24, 24, 26
352
353	vsldoi	22, 22, 22, 8
354	vxor	22, 22, 27
355
356	vsldoi	20, 22, 22, 8
357	.long	0x12D614C8
358	vxor	20, 20, 24
359	vxor	22, 22, 20
360
361	vor	0,22,22
362
363.endm
364
365
366
367
368
369
370
371
372
373
374
375
376
377.global	ppc_aes_gcm_encrypt
378.align	5
379ppc_aes_gcm_encrypt:
380_ppc_aes_gcm_encrypt:
381
382	stdu	1,-512(1)
383	mflr	0
384
385	std	14,112(1)
386	std	15,120(1)
387	std	16,128(1)
388	std	17,136(1)
389	std	18,144(1)
390	std	19,152(1)
391	std	20,160(1)
392	std	21,168(1)
393	li	9, 256
394	stvx	20, 9, 1
395	addi	9, 9, 16
396	stvx	21, 9, 1
397	addi	9, 9, 16
398	stvx	22, 9, 1
399	addi	9, 9, 16
400	stvx	23, 9, 1
401	addi	9, 9, 16
402	stvx	24, 9, 1
403	addi	9, 9, 16
404	stvx	25, 9, 1
405	addi	9, 9, 16
406	stvx	26, 9, 1
407	addi	9, 9, 16
408	stvx	27, 9, 1
409	addi	9, 9, 16
410	stvx	28, 9, 1
411	addi	9, 9, 16
412	stvx	29, 9, 1
413	addi	9, 9, 16
414	stvx	30, 9, 1
415	addi	9, 9, 16
416	stvx	31, 9, 1
417	std	0, 528(1)
418
419
420	lxvb16x	32, 0, 8
421
422
423	li	10, 32
424	lxvd2x	2+32, 10, 8
425	li	10, 48
426	lxvd2x	3+32, 10, 8
427	li	10, 64
428	lxvd2x	4+32, 10, 8
429	li	10, 80
430	lxvd2x	5+32, 10, 8
431
432	li	10, 96
433	lxvd2x	6+32, 10, 8
434	li	10, 112
435	lxvd2x	7+32, 10, 8
436	li	10, 128
437	lxvd2x	8+32, 10, 8
438
439	li	10, 144
440	lxvd2x	9+32, 10, 8
441	li	10, 160
442	lxvd2x	10+32, 10, 8
443	li	10, 176
444	lxvd2x	11+32, 10, 8
445
446	li	10, 192
447	lxvd2x	12+32, 10, 8
448	li	10, 208
449	lxvd2x	13+32, 10, 8
450	li	10, 224
451	lxvd2x	14+32, 10, 8
452
453
454	lxvb16x	30+32, 0, 7
455
456	mr	12, 5
457	li	11, 0
458
459
460	vxor	31, 31, 31
461	vspltisb	22,1
462	vsldoi	31, 31, 22,1
463
464
465	lxv	0, 0(6)
466	lxv	1, 0x10(6)
467	lxv	2, 0x20(6)
468	lxv	3, 0x30(6)
469	lxv	4, 0x40(6)
470	lxv	5, 0x50(6)
471	lxv	6, 0x60(6)
472	lxv	7, 0x70(6)
473	lxv	8, 0x80(6)
474	lxv	9, 0x90(6)
475	lxv	10, 0xa0(6)
476
477
478	lwz	9,240(6)
479
480
481
482	xxlor	32+29, 0, 0
483	vxor	15, 30, 29
484
485	cmpdi	9, 10
486	beq	.Loop_aes_gcm_8x
487
488
489	lxv	11, 0xb0(6)
490	lxv	12, 0xc0(6)
491
492	cmpdi	9, 12
493	beq	.Loop_aes_gcm_8x
494
495
496	lxv	13, 0xd0(6)
497	lxv	14, 0xe0(6)
498	cmpdi	9, 14
499	beq	.Loop_aes_gcm_8x
500
501	b	aes_gcm_out
502
503.align	5
504.Loop_aes_gcm_8x:
505	mr	14, 3
506	mr	9, 4
507
508
509	li	10, 128
510	divdu	10, 5, 10
511	cmpdi	10, 0
512	beq	.Loop_last_block
513
514	.long	0x13DEF8C0
515	vxor	16, 30, 29
516	.long	0x13DEF8C0
517	vxor	17, 30, 29
518	.long	0x13DEF8C0
519	vxor	18, 30, 29
520	.long	0x13DEF8C0
521	vxor	19, 30, 29
522	.long	0x13DEF8C0
523	vxor	20, 30, 29
524	.long	0x13DEF8C0
525	vxor	21, 30, 29
526	.long	0x13DEF8C0
527	vxor	22, 30, 29
528
529	mtctr	10
530
531	li	15, 16
532	li	16, 32
533	li	17, 48
534	li	18, 64
535	li	19, 80
536	li	20, 96
537	li	21, 112
538
539	lwz	10, 240(6)
540
541.Loop_8x_block:
542
543	lxvb16x	15, 0, 14
544	lxvb16x	16, 15, 14
545	lxvb16x	17, 16, 14
546	lxvb16x	18, 17, 14
547	lxvb16x	19, 18, 14
548	lxvb16x	20, 19, 14
549	lxvb16x	21, 20, 14
550	lxvb16x	22, 21, 14
551	addi	14, 14, 128
552
553.Loop_aes_middle8x
554
555	xxlor	23+32, 10, 10
556
557	cmpdi	10, 10
558	beq	Do_next_ghash
559
560
561	xxlor	24+32, 11, 11
562
563	.long	0x11EFBD08
564	.long	0x1210BD08
565	.long	0x1231BD08
566	.long	0x1252BD08
567	.long	0x1273BD08
568	.long	0x1294BD08
569	.long	0x12B5BD08
570	.long	0x12D6BD08
571
572	.long	0x11EFC508
573	.long	0x1210C508
574	.long	0x1231C508
575	.long	0x1252C508
576	.long	0x1273C508
577	.long	0x1294C508
578	.long	0x12B5C508
579	.long	0x12D6C508
580
581	xxlor	23+32, 12, 12
582
583	cmpdi	10, 12
584	beq	Do_next_ghash
585
586
587	xxlor	24+32, 13, 13
588
589	.long	0x11EFBD08
590	.long	0x1210BD08
591	.long	0x1231BD08
592	.long	0x1252BD08
593	.long	0x1273BD08
594	.long	0x1294BD08
595	.long	0x12B5BD08
596	.long	0x12D6BD08
597
598	.long	0x11EFC508
599	.long	0x1210C508
600	.long	0x1231C508
601	.long	0x1252C508
602	.long	0x1273C508
603	.long	0x1294C508
604	.long	0x12B5C508
605	.long	0x12D6C508
606
607	xxlor	23+32, 14, 14
608
609	cmpdi	10, 14
610	beq	Do_next_ghash
611	b	aes_gcm_out
612
613Do_next_ghash:
614
615
616
617	.long	0x11EFBD09
618	.long	0x1210BD09
619
620	xxlxor	47, 47, 15
621	stxvb16x	47, 0, 9
622	xxlxor	48, 48, 16
623	stxvb16x	48, 15, 9
624
625	.long	0x1231BD09
626	.long	0x1252BD09
627
628	xxlxor	49, 49, 17
629	stxvb16x	49, 16, 9
630	xxlxor	50, 50, 18
631	stxvb16x	50, 17, 9
632
633	.long	0x1273BD09
634	.long	0x1294BD09
635
636	xxlxor	51, 51, 19
637	stxvb16x	51, 18, 9
638	xxlxor	52, 52, 20
639	stxvb16x	52, 19, 9
640
641	.long	0x12B5BD09
642	.long	0x12D6BD09
643
644	xxlxor	53, 53, 21
645	stxvb16x	53, 20, 9
646	xxlxor	54, 54, 22
647	stxvb16x	54, 21, 9
648
649	addi	9, 9, 128
650
651
652	ppc_aes_gcm_ghash2_4x
653
654	xxlor	27+32, 0, 0
655	.long	0x13DEF8C0
656	vor	29,30,30
657	vxor	15, 30, 27
658	.long	0x13DEF8C0
659	vxor	16, 30, 27
660	.long	0x13DEF8C0
661	vxor	17, 30, 27
662	.long	0x13DEF8C0
663	vxor	18, 30, 27
664	.long	0x13DEF8C0
665	vxor	19, 30, 27
666	.long	0x13DEF8C0
667	vxor	20, 30, 27
668	.long	0x13DEF8C0
669	vxor	21, 30, 27
670	.long	0x13DEF8C0
671	vxor	22, 30, 27
672
673	addi	12, 12, -128
674	addi	11, 11, 128
675
676	bdnz	.Loop_8x_block
677
678	vor	30,29,29
679
680.Loop_last_block:
681	cmpdi	12, 0
682	beq	aes_gcm_out
683
684
685	li	10, 16
686	divdu	10, 12, 10
687
688	mtctr	10
689
690	lwz	10, 240(6)
691
692	cmpdi	12, 16
693	blt	Final_block
694
695.macro	.Loop_aes_middle_1x
696	xxlor	19+32, 1, 1
697	xxlor	20+32, 2, 2
698	xxlor	21+32, 3, 3
699	xxlor	22+32, 4, 4
700
701	.long	0x11EF9D08
702	.long	0x11EFA508
703	.long	0x11EFAD08
704	.long	0x11EFB508
705
706	xxlor	19+32, 5, 5
707	xxlor	20+32, 6, 6
708	xxlor	21+32, 7, 7
709	xxlor	22+32, 8, 8
710
711	.long	0x11EF9D08
712	.long	0x11EFA508
713	.long	0x11EFAD08
714	.long	0x11EFB508
715
716	xxlor	19+32, 9, 9
717	.long	0x11EF9D08
718.endm
719
720Next_rem_block:
721	lxvb16x	15, 0, 14
722
723.Loop_aes_middle_1x
724
725	xxlor	23+32, 10, 10
726
727	cmpdi	10, 10
728	beq	Do_next_1x
729
730
731	xxlor	24+32, 11, 11
732
733	.long	0x11EFBD08
734	.long	0x11EFC508
735
736	xxlor	23+32, 12, 12
737
738	cmpdi	10, 12
739	beq	Do_next_1x
740
741
742	xxlor	24+32, 13, 13
743
744	.long	0x11EFBD08
745	.long	0x11EFC508
746
747	xxlor	23+32, 14, 14
748
749	cmpdi	10, 14
750	beq	Do_next_1x
751
752Do_next_1x:
753	.long	0x11EFBD09
754
755	xxlxor	47, 47, 15
756	stxvb16x	47, 0, 9
757	addi	14, 14, 16
758	addi	9, 9, 16
759
760	vor	28,15,15
761	ppc_update_hash_1x
762
763	addi	12, 12, -16
764	addi	11, 11, 16
765	xxlor	19+32, 0, 0
766	.long	0x13DEF8C0
767	vxor	15, 30, 19
768
769	bdnz	Next_rem_block
770
771	cmpdi	12, 0
772	beq	aes_gcm_out
773
774Final_block:
775.Loop_aes_middle_1x
776
777	xxlor	23+32, 10, 10
778
779	cmpdi	10, 10
780	beq	Do_final_1x
781
782
783	xxlor	24+32, 11, 11
784
785	.long	0x11EFBD08
786	.long	0x11EFC508
787
788	xxlor	23+32, 12, 12
789
790	cmpdi	10, 12
791	beq	Do_final_1x
792
793
794	xxlor	24+32, 13, 13
795
796	.long	0x11EFBD08
797	.long	0x11EFC508
798
799	xxlor	23+32, 14, 14
800
801	cmpdi	10, 14
802	beq	Do_final_1x
803
804Do_final_1x:
805	.long	0x11EFBD09
806
807	lxvb16x	15, 0, 14
808	xxlxor	47, 47, 15
809
810
811	li	15, 16
812	sub	15, 15, 12
813
814	vspltisb	16,-1
815	vspltisb	17,0
816	li	10, 192
817	stvx	16, 10, 1
818	addi	10, 10, 16
819	stvx	17, 10, 1
820
821	addi	10, 1, 192
822	lxvb16x	16, 15, 10
823	xxland	47, 47, 16
824
825	vor	28,15,15
826	ppc_update_hash_1x
827
828
829	bl	Write_partial_block
830
831	b	aes_gcm_out
832
833
834
835
836
837
838
839Write_partial_block:
840	li	10, 192
841	stxvb16x	15+32, 10, 1
842
843
844	addi	10, 9, -1
845	addi	16, 1, 191
846
847	mtctr	12
848	li	15, 0
849
850Write_last_byte:
851	lbzu	14, 1(16)
852	stbu	14, 1(10)
853	bdnz	Write_last_byte
854	blr
855
856aes_gcm_out:
857
858	stxvb16x	32, 0, 8
859	add	3, 11, 12
860
861	li	9, 256
862	lvx	20, 9, 1
863	addi	9, 9, 16
864	lvx	21, 9, 1
865	addi	9, 9, 16
866	lvx	22, 9, 1
867	addi	9, 9, 16
868	lvx	23, 9, 1
869	addi	9, 9, 16
870	lvx	24, 9, 1
871	addi	9, 9, 16
872	lvx	25, 9, 1
873	addi	9, 9, 16
874	lvx	26, 9, 1
875	addi	9, 9, 16
876	lvx	27, 9, 1
877	addi	9, 9, 16
878	lvx	28, 9, 1
879	addi	9, 9, 16
880	lvx	29, 9, 1
881	addi	9, 9, 16
882	lvx	30, 9, 1
883	addi	9, 9, 16
884	lvx	31, 9, 1
885
886	ld	0, 528(1)
887	ld	14,112(1)
888	ld	15,120(1)
889	ld	16,128(1)
890	ld	17,136(1)
891	ld	18,144(1)
892	ld	19,152(1)
893	ld	20,160(1)
894	ld	21,168(1)
895
896	mtlr	0
897	addi	1, 1, 512
898	blr
899
900
901
902
903.global	ppc_aes_gcm_decrypt
904.align	5
905ppc_aes_gcm_decrypt:
906_ppc_aes_gcm_decrypt:
907
908	stdu	1,-512(1)
909	mflr	0
910
911	std	14,112(1)
912	std	15,120(1)
913	std	16,128(1)
914	std	17,136(1)
915	std	18,144(1)
916	std	19,152(1)
917	std	20,160(1)
918	std	21,168(1)
919	li	9, 256
920	stvx	20, 9, 1
921	addi	9, 9, 16
922	stvx	21, 9, 1
923	addi	9, 9, 16
924	stvx	22, 9, 1
925	addi	9, 9, 16
926	stvx	23, 9, 1
927	addi	9, 9, 16
928	stvx	24, 9, 1
929	addi	9, 9, 16
930	stvx	25, 9, 1
931	addi	9, 9, 16
932	stvx	26, 9, 1
933	addi	9, 9, 16
934	stvx	27, 9, 1
935	addi	9, 9, 16
936	stvx	28, 9, 1
937	addi	9, 9, 16
938	stvx	29, 9, 1
939	addi	9, 9, 16
940	stvx	30, 9, 1
941	addi	9, 9, 16
942	stvx	31, 9, 1
943	std	0, 528(1)
944
945
946	lxvb16x	32, 0, 8
947
948
949	li	10, 32
950	lxvd2x	2+32, 10, 8
951	li	10, 48
952	lxvd2x	3+32, 10, 8
953	li	10, 64
954	lxvd2x	4+32, 10, 8
955	li	10, 80
956	lxvd2x	5+32, 10, 8
957
958	li	10, 96
959	lxvd2x	6+32, 10, 8
960	li	10, 112
961	lxvd2x	7+32, 10, 8
962	li	10, 128
963	lxvd2x	8+32, 10, 8
964
965	li	10, 144
966	lxvd2x	9+32, 10, 8
967	li	10, 160
968	lxvd2x	10+32, 10, 8
969	li	10, 176
970	lxvd2x	11+32, 10, 8
971
972	li	10, 192
973	lxvd2x	12+32, 10, 8
974	li	10, 208
975	lxvd2x	13+32, 10, 8
976	li	10, 224
977	lxvd2x	14+32, 10, 8
978
979
980	lxvb16x	30+32, 0, 7
981
982	mr	12, 5
983	li	11, 0
984
985
986	vxor	31, 31, 31
987	vspltisb	22,1
988	vsldoi	31, 31, 22,1
989
990
991	lxv	0, 0(6)
992	lxv	1, 0x10(6)
993	lxv	2, 0x20(6)
994	lxv	3, 0x30(6)
995	lxv	4, 0x40(6)
996	lxv	5, 0x50(6)
997	lxv	6, 0x60(6)
998	lxv	7, 0x70(6)
999	lxv	8, 0x80(6)
1000	lxv	9, 0x90(6)
1001	lxv	10, 0xa0(6)
1002
1003
1004	lwz	9,240(6)
1005
1006
1007
1008	xxlor	32+29, 0, 0
1009	vxor	15, 30, 29
1010
1011	cmpdi	9, 10
1012	beq	.Loop_aes_gcm_8x_dec
1013
1014
1015	lxv	11, 0xb0(6)
1016	lxv	12, 0xc0(6)
1017
1018	cmpdi	9, 12
1019	beq	.Loop_aes_gcm_8x_dec
1020
1021
1022	lxv	13, 0xd0(6)
1023	lxv	14, 0xe0(6)
1024	cmpdi	9, 14
1025	beq	.Loop_aes_gcm_8x_dec
1026
1027	b	aes_gcm_out
1028
1029.align	5
1030.Loop_aes_gcm_8x_dec:
1031	mr	14, 3
1032	mr	9, 4
1033
1034
1035	li	10, 128
1036	divdu	10, 5, 10
1037	cmpdi	10, 0
1038	beq	.Loop_last_block_dec
1039
1040	.long	0x13DEF8C0
1041	vxor	16, 30, 29
1042	.long	0x13DEF8C0
1043	vxor	17, 30, 29
1044	.long	0x13DEF8C0
1045	vxor	18, 30, 29
1046	.long	0x13DEF8C0
1047	vxor	19, 30, 29
1048	.long	0x13DEF8C0
1049	vxor	20, 30, 29
1050	.long	0x13DEF8C0
1051	vxor	21, 30, 29
1052	.long	0x13DEF8C0
1053	vxor	22, 30, 29
1054
1055	mtctr	10
1056
1057	li	15, 16
1058	li	16, 32
1059	li	17, 48
1060	li	18, 64
1061	li	19, 80
1062	li	20, 96
1063	li	21, 112
1064
1065	lwz	10, 240(6)
1066
1067.Loop_8x_block_dec:
1068
1069	lxvb16x	15, 0, 14
1070	lxvb16x	16, 15, 14
1071	lxvb16x	17, 16, 14
1072	lxvb16x	18, 17, 14
1073	lxvb16x	19, 18, 14
1074	lxvb16x	20, 19, 14
1075	lxvb16x	21, 20, 14
1076	lxvb16x	22, 21, 14
1077	addi	14, 14, 128
1078
1079.Loop_aes_middle8x
1080
1081	xxlor	23+32, 10, 10
1082
1083	cmpdi	10, 10
1084	beq	Do_last_aes_dec
1085
1086
1087	xxlor	24+32, 11, 11
1088
1089	.long	0x11EFBD08
1090	.long	0x1210BD08
1091	.long	0x1231BD08
1092	.long	0x1252BD08
1093	.long	0x1273BD08
1094	.long	0x1294BD08
1095	.long	0x12B5BD08
1096	.long	0x12D6BD08
1097
1098	.long	0x11EFC508
1099	.long	0x1210C508
1100	.long	0x1231C508
1101	.long	0x1252C508
1102	.long	0x1273C508
1103	.long	0x1294C508
1104	.long	0x12B5C508
1105	.long	0x12D6C508
1106
1107	xxlor	23+32, 12, 12
1108
1109	cmpdi	10, 12
1110	beq	Do_last_aes_dec
1111
1112
1113	xxlor	24+32, 13, 13
1114
1115	.long	0x11EFBD08
1116	.long	0x1210BD08
1117	.long	0x1231BD08
1118	.long	0x1252BD08
1119	.long	0x1273BD08
1120	.long	0x1294BD08
1121	.long	0x12B5BD08
1122	.long	0x12D6BD08
1123
1124	.long	0x11EFC508
1125	.long	0x1210C508
1126	.long	0x1231C508
1127	.long	0x1252C508
1128	.long	0x1273C508
1129	.long	0x1294C508
1130	.long	0x12B5C508
1131	.long	0x12D6C508
1132
1133	xxlor	23+32, 14, 14
1134
1135	cmpdi	10, 14
1136	beq	Do_last_aes_dec
1137	b	aes_gcm_out
1138
1139Do_last_aes_dec:
1140
1141
1142
1143	.long	0x11EFBD09
1144	.long	0x1210BD09
1145
1146	xxlxor	47, 47, 15
1147	stxvb16x	47, 0, 9
1148	xxlxor	48, 48, 16
1149	stxvb16x	48, 15, 9
1150
1151	.long	0x1231BD09
1152	.long	0x1252BD09
1153
1154	xxlxor	49, 49, 17
1155	stxvb16x	49, 16, 9
1156	xxlxor	50, 50, 18
1157	stxvb16x	50, 17, 9
1158
1159	.long	0x1273BD09
1160	.long	0x1294BD09
1161
1162	xxlxor	51, 51, 19
1163	stxvb16x	51, 18, 9
1164	xxlxor	52, 52, 20
1165	stxvb16x	52, 19, 9
1166
1167	.long	0x12B5BD09
1168	.long	0x12D6BD09
1169
1170	xxlxor	53, 53, 21
1171	stxvb16x	53, 20, 9
1172	xxlxor	54, 54, 22
1173	stxvb16x	54, 21, 9
1174
1175	addi	9, 9, 128
1176
1177	xxlor	15+32, 15, 15
1178	xxlor	16+32, 16, 16
1179	xxlor	17+32, 17, 17
1180	xxlor	18+32, 18, 18
1181	xxlor	19+32, 19, 19
1182	xxlor	20+32, 20, 20
1183	xxlor	21+32, 21, 21
1184	xxlor	22+32, 22, 22
1185
1186
1187	ppc_aes_gcm_ghash2_4x
1188
1189	xxlor	27+32, 0, 0
1190	.long	0x13DEF8C0
1191	vor	29,30,30
1192	vxor	15, 30, 27
1193	.long	0x13DEF8C0
1194	vxor	16, 30, 27
1195	.long	0x13DEF8C0
1196	vxor	17, 30, 27
1197	.long	0x13DEF8C0
1198	vxor	18, 30, 27
1199	.long	0x13DEF8C0
1200	vxor	19, 30, 27
1201	.long	0x13DEF8C0
1202	vxor	20, 30, 27
1203	.long	0x13DEF8C0
1204	vxor	21, 30, 27
1205	.long	0x13DEF8C0
1206	vxor	22, 30, 27
1207	addi	12, 12, -128
1208	addi	11, 11, 128
1209
1210	bdnz	.Loop_8x_block_dec
1211
1212	vor	30,29,29
1213
1214.Loop_last_block_dec:
1215	cmpdi	12, 0
1216	beq	aes_gcm_out
1217
1218
1219	li	10, 16
1220	divdu	10, 12, 10
1221
1222	mtctr	10
1223
1224	lwz	10,240(6)
1225
1226	cmpdi	12, 16
1227	blt	Final_block_dec
1228
1229Next_rem_block_dec:
1230	lxvb16x	15, 0, 14
1231
1232.Loop_aes_middle_1x
1233
1234	xxlor	23+32, 10, 10
1235
1236	cmpdi	10, 10
1237	beq	Do_next_1x_dec
1238
1239
1240	xxlor	24+32, 11, 11
1241
1242	.long	0x11EFBD08
1243	.long	0x11EFC508
1244
1245	xxlor	23+32, 12, 12
1246
1247	cmpdi	10, 12
1248	beq	Do_next_1x_dec
1249
1250
1251	xxlor	24+32, 13, 13
1252
1253	.long	0x11EFBD08
1254	.long	0x11EFC508
1255
1256	xxlor	23+32, 14, 14
1257
1258	cmpdi	10, 14
1259	beq	Do_next_1x_dec
1260
1261Do_next_1x_dec:
1262	.long	0x11EFBD09
1263
1264	xxlxor	47, 47, 15
1265	stxvb16x	47, 0, 9
1266	addi	14, 14, 16
1267	addi	9, 9, 16
1268
1269	xxlor	28+32, 15, 15
1270	ppc_update_hash_1x
1271
1272	addi	12, 12, -16
1273	addi	11, 11, 16
1274	xxlor	19+32, 0, 0
1275	.long	0x13DEF8C0
1276	vxor	15, 30, 19
1277
1278	bdnz	Next_rem_block_dec
1279
1280	cmpdi	12, 0
1281	beq	aes_gcm_out
1282
1283Final_block_dec:
1284.Loop_aes_middle_1x
1285
1286	xxlor	23+32, 10, 10
1287
1288	cmpdi	10, 10
1289	beq	Do_final_1x_dec
1290
1291
1292	xxlor	24+32, 11, 11
1293
1294	.long	0x11EFBD08
1295	.long	0x11EFC508
1296
1297	xxlor	23+32, 12, 12
1298
1299	cmpdi	10, 12
1300	beq	Do_final_1x_dec
1301
1302
1303	xxlor	24+32, 13, 13
1304
1305	.long	0x11EFBD08
1306	.long	0x11EFC508
1307
1308	xxlor	23+32, 14, 14
1309
1310	cmpdi	10, 14
1311	beq	Do_final_1x_dec
1312
1313Do_final_1x_dec:
1314	.long	0x11EFBD09
1315
1316	lxvb16x	15, 0, 14
1317	xxlxor	47, 47, 15
1318
1319
1320	li	15, 16
1321	sub	15, 15, 12
1322
1323	vspltisb	16,-1
1324	vspltisb	17,0
1325	li	10, 192
1326	stvx	16, 10, 1
1327	addi	10, 10, 16
1328	stvx	17, 10, 1
1329
1330	addi	10, 1, 192
1331	lxvb16x	16, 15, 10
1332	xxland	47, 47, 16
1333
1334	xxlor	28+32, 15, 15
1335	ppc_update_hash_1x
1336
1337
1338	bl	Write_partial_block
1339
1340	b	aes_gcm_out
1341