xref: /freebsd/sys/crypto/openssl/aarch64/sm3-armv8.S (revision 4757b351ea9d59d71d4a38b82506d2d16fcd560d)
1*4757b351SPierre Pronchery/* Do not modify. This file is auto-generated from sm3-armv8.pl. */
2*4757b351SPierre Pronchery// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.
3*4757b351SPierre Pronchery//
4*4757b351SPierre Pronchery// Licensed under the Apache License 2.0 (the "License").  You may not use
5*4757b351SPierre Pronchery// this file except in compliance with the License.  You can obtain a copy
6*4757b351SPierre Pronchery// in the file LICENSE in the source distribution or at
7*4757b351SPierre Pronchery// https://www.openssl.org/source/license.html
8*4757b351SPierre Pronchery//
9*4757b351SPierre Pronchery// This module implements support for Armv8 SM3 instructions
10*4757b351SPierre Pronchery
11*4757b351SPierre Pronchery// $output is the last argument if it looks like a file (it has an extension)
12*4757b351SPierre Pronchery// $flavour is the first argument if it doesn't look like a file
13*4757b351SPierre Pronchery#include "arm_arch.h"
14*4757b351SPierre Pronchery.text
15*4757b351SPierre Pronchery.globl	ossl_hwsm3_block_data_order
16*4757b351SPierre Pronchery.type	ossl_hwsm3_block_data_order,%function
17*4757b351SPierre Pronchery.align	5
18*4757b351SPierre Proncheryossl_hwsm3_block_data_order:
19*4757b351SPierre Pronchery	AARCH64_VALID_CALL_TARGET
20*4757b351SPierre Pronchery	// load state
21*4757b351SPierre Pronchery	ld1	{v5.4s,v6.4s}, [x0]
22*4757b351SPierre Pronchery	rev64	v5.4s, v5.4s
23*4757b351SPierre Pronchery	rev64	v6.4s, v6.4s
24*4757b351SPierre Pronchery	ext	v5.16b, v5.16b, v5.16b, #8
25*4757b351SPierre Pronchery	ext	v6.16b, v6.16b, v6.16b, #8
26*4757b351SPierre Pronchery	adrp	x8, .Tj
27*4757b351SPierre Pronchery	add	x8, x8, #:lo12:.Tj
28*4757b351SPierre Pronchery	ldp	s16, s17, [x8]
29*4757b351SPierre Pronchery
30*4757b351SPierre Pronchery.Loop:
31*4757b351SPierre Pronchery	// load input
32*4757b351SPierre Pronchery	ld1	{v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #64
33*4757b351SPierre Pronchery	sub	w2, w2, #1
34*4757b351SPierre Pronchery
35*4757b351SPierre Pronchery	mov	v18.16b, v5.16b
36*4757b351SPierre Pronchery	mov	v19.16b, v6.16b
37*4757b351SPierre Pronchery
38*4757b351SPierre Pronchery#ifndef __AARCH64EB__
39*4757b351SPierre Pronchery	rev32	v0.16b, v0.16b
40*4757b351SPierre Pronchery	rev32	v1.16b, v1.16b
41*4757b351SPierre Pronchery	rev32	v2.16b, v2.16b
42*4757b351SPierre Pronchery	rev32	v3.16b, v3.16b
43*4757b351SPierre Pronchery#endif
44*4757b351SPierre Pronchery
45*4757b351SPierre Pronchery	ext	v20.16b, v16.16b, v16.16b, #4
46*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
47*4757b351SPierre Pronchery	ext	v4.16b, v1.16b, v2.16b, #12
48*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
49*4757b351SPierre Pronchery	ext	v22.16b, v0.16b, v1.16b, #12
50*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
51*4757b351SPierre Pronchery	ext	v23.16b, v2.16b, v3.16b, #8
52*4757b351SPierre Pronchery.inst	0xce63c004	//sm3partw1 v4.4s, v0.4s, v3.4s
53*4757b351SPierre Pronchery.inst	0xce76c6e4	//sm3partw2 v4.4s, v23.4s, v22.4s
54*4757b351SPierre Pronchery	eor	v22.16b, v0.16b, v1.16b
55*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
56*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
57*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
58*4757b351SPierre Pronchery.inst	0xce5682e5	//sm3tt1a v5.4s, v23.4s, v22.4s[0]
59*4757b351SPierre Pronchery.inst	0xce408ae6	//sm3tt2a v6.4s, v23.4s, v0.4s[0]
60*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
61*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
62*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
63*4757b351SPierre Pronchery.inst	0xce5692e5	//sm3tt1a v5.4s, v23.4s, v22.4s[1]
64*4757b351SPierre Pronchery.inst	0xce409ae6	//sm3tt2a v6.4s, v23.4s, v0.4s[1]
65*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
66*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
67*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
68*4757b351SPierre Pronchery.inst	0xce56a2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[2]
69*4757b351SPierre Pronchery.inst	0xce40aae6	//sm3tt2a v6.4s, v23.4s, v0.4s[2]
70*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
71*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
72*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
73*4757b351SPierre Pronchery.inst	0xce56b2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[3]
74*4757b351SPierre Pronchery.inst	0xce40bae6	//sm3tt2a v6.4s, v23.4s, v0.4s[3]
75*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
76*4757b351SPierre Pronchery	ext	v0.16b, v2.16b, v3.16b, #12
77*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
78*4757b351SPierre Pronchery	ext	v22.16b, v1.16b, v2.16b, #12
79*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
80*4757b351SPierre Pronchery	ext	v23.16b, v3.16b, v4.16b, #8
81*4757b351SPierre Pronchery.inst	0xce64c020	//sm3partw1 v0.4s, v1.4s, v4.4s
82*4757b351SPierre Pronchery.inst	0xce76c6e0	//sm3partw2 v0.4s, v23.4s, v22.4s
83*4757b351SPierre Pronchery	eor	v22.16b, v1.16b, v2.16b
84*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
85*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
86*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
87*4757b351SPierre Pronchery.inst	0xce5682e5	//sm3tt1a v5.4s, v23.4s, v22.4s[0]
88*4757b351SPierre Pronchery.inst	0xce418ae6	//sm3tt2a v6.4s, v23.4s, v1.4s[0]
89*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
90*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
91*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
92*4757b351SPierre Pronchery.inst	0xce5692e5	//sm3tt1a v5.4s, v23.4s, v22.4s[1]
93*4757b351SPierre Pronchery.inst	0xce419ae6	//sm3tt2a v6.4s, v23.4s, v1.4s[1]
94*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
95*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
96*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
97*4757b351SPierre Pronchery.inst	0xce56a2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[2]
98*4757b351SPierre Pronchery.inst	0xce41aae6	//sm3tt2a v6.4s, v23.4s, v1.4s[2]
99*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
100*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
101*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
102*4757b351SPierre Pronchery.inst	0xce56b2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[3]
103*4757b351SPierre Pronchery.inst	0xce41bae6	//sm3tt2a v6.4s, v23.4s, v1.4s[3]
104*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
105*4757b351SPierre Pronchery	ext	v1.16b, v3.16b, v4.16b, #12
106*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
107*4757b351SPierre Pronchery	ext	v22.16b, v2.16b, v3.16b, #12
108*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
109*4757b351SPierre Pronchery	ext	v23.16b, v4.16b, v0.16b, #8
110*4757b351SPierre Pronchery.inst	0xce60c041	//sm3partw1 v1.4s, v2.4s, v0.4s
111*4757b351SPierre Pronchery.inst	0xce76c6e1	//sm3partw2 v1.4s, v23.4s, v22.4s
112*4757b351SPierre Pronchery	eor	v22.16b, v2.16b, v3.16b
113*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
114*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
115*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
116*4757b351SPierre Pronchery.inst	0xce5682e5	//sm3tt1a v5.4s, v23.4s, v22.4s[0]
117*4757b351SPierre Pronchery.inst	0xce428ae6	//sm3tt2a v6.4s, v23.4s, v2.4s[0]
118*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
119*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
120*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
121*4757b351SPierre Pronchery.inst	0xce5692e5	//sm3tt1a v5.4s, v23.4s, v22.4s[1]
122*4757b351SPierre Pronchery.inst	0xce429ae6	//sm3tt2a v6.4s, v23.4s, v2.4s[1]
123*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
124*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
125*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
126*4757b351SPierre Pronchery.inst	0xce56a2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[2]
127*4757b351SPierre Pronchery.inst	0xce42aae6	//sm3tt2a v6.4s, v23.4s, v2.4s[2]
128*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
129*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
130*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
131*4757b351SPierre Pronchery.inst	0xce56b2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[3]
132*4757b351SPierre Pronchery.inst	0xce42bae6	//sm3tt2a v6.4s, v23.4s, v2.4s[3]
133*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
134*4757b351SPierre Pronchery	ext	v2.16b, v4.16b, v0.16b, #12
135*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
136*4757b351SPierre Pronchery	ext	v22.16b, v3.16b, v4.16b, #12
137*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
138*4757b351SPierre Pronchery	ext	v23.16b, v0.16b, v1.16b, #8
139*4757b351SPierre Pronchery.inst	0xce61c062	//sm3partw1 v2.4s, v3.4s, v1.4s
140*4757b351SPierre Pronchery.inst	0xce76c6e2	//sm3partw2 v2.4s, v23.4s, v22.4s
141*4757b351SPierre Pronchery	eor	v22.16b, v3.16b, v4.16b
142*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
143*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
144*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
145*4757b351SPierre Pronchery.inst	0xce5682e5	//sm3tt1a v5.4s, v23.4s, v22.4s[0]
146*4757b351SPierre Pronchery.inst	0xce438ae6	//sm3tt2a v6.4s, v23.4s, v3.4s[0]
147*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
148*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
149*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
150*4757b351SPierre Pronchery.inst	0xce5692e5	//sm3tt1a v5.4s, v23.4s, v22.4s[1]
151*4757b351SPierre Pronchery.inst	0xce439ae6	//sm3tt2a v6.4s, v23.4s, v3.4s[1]
152*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
153*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
154*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
155*4757b351SPierre Pronchery.inst	0xce56a2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[2]
156*4757b351SPierre Pronchery.inst	0xce43aae6	//sm3tt2a v6.4s, v23.4s, v3.4s[2]
157*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
158*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
159*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
160*4757b351SPierre Pronchery.inst	0xce56b2e5	//sm3tt1a v5.4s, v23.4s, v22.4s[3]
161*4757b351SPierre Pronchery.inst	0xce43bae6	//sm3tt2a v6.4s, v23.4s, v3.4s[3]
162*4757b351SPierre Pronchery	ext	v20.16b, v17.16b, v17.16b, #4
163*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
164*4757b351SPierre Pronchery	ext	v3.16b, v0.16b, v1.16b, #12
165*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
166*4757b351SPierre Pronchery	ext	v22.16b, v4.16b, v0.16b, #12
167*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
168*4757b351SPierre Pronchery	ext	v23.16b, v1.16b, v2.16b, #8
169*4757b351SPierre Pronchery.inst	0xce62c083	//sm3partw1 v3.4s, v4.4s, v2.4s
170*4757b351SPierre Pronchery.inst	0xce76c6e3	//sm3partw2 v3.4s, v23.4s, v22.4s
171*4757b351SPierre Pronchery	eor	v22.16b, v4.16b, v0.16b
172*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
173*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
174*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
175*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
176*4757b351SPierre Pronchery.inst	0xce448ee6	//sm3tt2b v6.4s, v23.4s, v4.4s[0]
177*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
178*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
179*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
180*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
181*4757b351SPierre Pronchery.inst	0xce449ee6	//sm3tt2b v6.4s, v23.4s, v4.4s[1]
182*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
183*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
184*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
185*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
186*4757b351SPierre Pronchery.inst	0xce44aee6	//sm3tt2b v6.4s, v23.4s, v4.4s[2]
187*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
188*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
189*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
190*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
191*4757b351SPierre Pronchery.inst	0xce44bee6	//sm3tt2b v6.4s, v23.4s, v4.4s[3]
192*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
193*4757b351SPierre Pronchery	ext	v4.16b, v1.16b, v2.16b, #12
194*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
195*4757b351SPierre Pronchery	ext	v22.16b, v0.16b, v1.16b, #12
196*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
197*4757b351SPierre Pronchery	ext	v23.16b, v2.16b, v3.16b, #8
198*4757b351SPierre Pronchery.inst	0xce63c004	//sm3partw1 v4.4s, v0.4s, v3.4s
199*4757b351SPierre Pronchery.inst	0xce76c6e4	//sm3partw2 v4.4s, v23.4s, v22.4s
200*4757b351SPierre Pronchery	eor	v22.16b, v0.16b, v1.16b
201*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
202*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
203*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
204*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
205*4757b351SPierre Pronchery.inst	0xce408ee6	//sm3tt2b v6.4s, v23.4s, v0.4s[0]
206*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
207*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
208*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
209*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
210*4757b351SPierre Pronchery.inst	0xce409ee6	//sm3tt2b v6.4s, v23.4s, v0.4s[1]
211*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
212*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
213*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
214*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
215*4757b351SPierre Pronchery.inst	0xce40aee6	//sm3tt2b v6.4s, v23.4s, v0.4s[2]
216*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
217*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
218*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
219*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
220*4757b351SPierre Pronchery.inst	0xce40bee6	//sm3tt2b v6.4s, v23.4s, v0.4s[3]
221*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
222*4757b351SPierre Pronchery	ext	v0.16b, v2.16b, v3.16b, #12
223*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
224*4757b351SPierre Pronchery	ext	v22.16b, v1.16b, v2.16b, #12
225*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
226*4757b351SPierre Pronchery	ext	v23.16b, v3.16b, v4.16b, #8
227*4757b351SPierre Pronchery.inst	0xce64c020	//sm3partw1 v0.4s, v1.4s, v4.4s
228*4757b351SPierre Pronchery.inst	0xce76c6e0	//sm3partw2 v0.4s, v23.4s, v22.4s
229*4757b351SPierre Pronchery	eor	v22.16b, v1.16b, v2.16b
230*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
231*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
232*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
233*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
234*4757b351SPierre Pronchery.inst	0xce418ee6	//sm3tt2b v6.4s, v23.4s, v1.4s[0]
235*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
236*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
237*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
238*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
239*4757b351SPierre Pronchery.inst	0xce419ee6	//sm3tt2b v6.4s, v23.4s, v1.4s[1]
240*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
241*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
242*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
243*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
244*4757b351SPierre Pronchery.inst	0xce41aee6	//sm3tt2b v6.4s, v23.4s, v1.4s[2]
245*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
246*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
247*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
248*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
249*4757b351SPierre Pronchery.inst	0xce41bee6	//sm3tt2b v6.4s, v23.4s, v1.4s[3]
250*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
251*4757b351SPierre Pronchery	ext	v1.16b, v3.16b, v4.16b, #12
252*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
253*4757b351SPierre Pronchery	ext	v22.16b, v2.16b, v3.16b, #12
254*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
255*4757b351SPierre Pronchery	ext	v23.16b, v4.16b, v0.16b, #8
256*4757b351SPierre Pronchery.inst	0xce60c041	//sm3partw1 v1.4s, v2.4s, v0.4s
257*4757b351SPierre Pronchery.inst	0xce76c6e1	//sm3partw2 v1.4s, v23.4s, v22.4s
258*4757b351SPierre Pronchery	eor	v22.16b, v2.16b, v3.16b
259*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
260*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
261*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
262*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
263*4757b351SPierre Pronchery.inst	0xce428ee6	//sm3tt2b v6.4s, v23.4s, v2.4s[0]
264*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
265*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
266*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
267*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
268*4757b351SPierre Pronchery.inst	0xce429ee6	//sm3tt2b v6.4s, v23.4s, v2.4s[1]
269*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
270*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
271*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
272*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
273*4757b351SPierre Pronchery.inst	0xce42aee6	//sm3tt2b v6.4s, v23.4s, v2.4s[2]
274*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
275*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
276*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
277*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
278*4757b351SPierre Pronchery.inst	0xce42bee6	//sm3tt2b v6.4s, v23.4s, v2.4s[3]
279*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
280*4757b351SPierre Pronchery	ext	v2.16b, v4.16b, v0.16b, #12
281*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
282*4757b351SPierre Pronchery	ext	v22.16b, v3.16b, v4.16b, #12
283*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
284*4757b351SPierre Pronchery	ext	v23.16b, v0.16b, v1.16b, #8
285*4757b351SPierre Pronchery.inst	0xce61c062	//sm3partw1 v2.4s, v3.4s, v1.4s
286*4757b351SPierre Pronchery.inst	0xce76c6e2	//sm3partw2 v2.4s, v23.4s, v22.4s
287*4757b351SPierre Pronchery	eor	v22.16b, v3.16b, v4.16b
288*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
289*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
290*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
291*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
292*4757b351SPierre Pronchery.inst	0xce438ee6	//sm3tt2b v6.4s, v23.4s, v3.4s[0]
293*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
294*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
295*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
296*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
297*4757b351SPierre Pronchery.inst	0xce439ee6	//sm3tt2b v6.4s, v23.4s, v3.4s[1]
298*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
299*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
300*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
301*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
302*4757b351SPierre Pronchery.inst	0xce43aee6	//sm3tt2b v6.4s, v23.4s, v3.4s[2]
303*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
304*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
305*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
306*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
307*4757b351SPierre Pronchery.inst	0xce43bee6	//sm3tt2b v6.4s, v23.4s, v3.4s[3]
308*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
309*4757b351SPierre Pronchery	ext	v3.16b, v0.16b, v1.16b, #12
310*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
311*4757b351SPierre Pronchery	ext	v22.16b, v4.16b, v0.16b, #12
312*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
313*4757b351SPierre Pronchery	ext	v23.16b, v1.16b, v2.16b, #8
314*4757b351SPierre Pronchery.inst	0xce62c083	//sm3partw1 v3.4s, v4.4s, v2.4s
315*4757b351SPierre Pronchery.inst	0xce76c6e3	//sm3partw2 v3.4s, v23.4s, v22.4s
316*4757b351SPierre Pronchery	eor	v22.16b, v4.16b, v0.16b
317*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
318*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
319*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
320*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
321*4757b351SPierre Pronchery.inst	0xce448ee6	//sm3tt2b v6.4s, v23.4s, v4.4s[0]
322*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
323*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
324*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
325*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
326*4757b351SPierre Pronchery.inst	0xce449ee6	//sm3tt2b v6.4s, v23.4s, v4.4s[1]
327*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
328*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
329*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
330*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
331*4757b351SPierre Pronchery.inst	0xce44aee6	//sm3tt2b v6.4s, v23.4s, v4.4s[2]
332*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
333*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
334*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
335*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
336*4757b351SPierre Pronchery.inst	0xce44bee6	//sm3tt2b v6.4s, v23.4s, v4.4s[3]
337*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
338*4757b351SPierre Pronchery	ext	v4.16b, v1.16b, v2.16b, #12
339*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
340*4757b351SPierre Pronchery	ext	v22.16b, v0.16b, v1.16b, #12
341*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
342*4757b351SPierre Pronchery	ext	v23.16b, v2.16b, v3.16b, #8
343*4757b351SPierre Pronchery.inst	0xce63c004	//sm3partw1 v4.4s, v0.4s, v3.4s
344*4757b351SPierre Pronchery.inst	0xce76c6e4	//sm3partw2 v4.4s, v23.4s, v22.4s
345*4757b351SPierre Pronchery	eor	v22.16b, v0.16b, v1.16b
346*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
347*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
348*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
349*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
350*4757b351SPierre Pronchery.inst	0xce408ee6	//sm3tt2b v6.4s, v23.4s, v0.4s[0]
351*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
352*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
353*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
354*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
355*4757b351SPierre Pronchery.inst	0xce409ee6	//sm3tt2b v6.4s, v23.4s, v0.4s[1]
356*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
357*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
358*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
359*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
360*4757b351SPierre Pronchery.inst	0xce40aee6	//sm3tt2b v6.4s, v23.4s, v0.4s[2]
361*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
362*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
363*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
364*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
365*4757b351SPierre Pronchery.inst	0xce40bee6	//sm3tt2b v6.4s, v23.4s, v0.4s[3]
366*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
367*4757b351SPierre Pronchery	ext	v0.16b, v2.16b, v3.16b, #12
368*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
369*4757b351SPierre Pronchery	ext	v22.16b, v1.16b, v2.16b, #12
370*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
371*4757b351SPierre Pronchery	ext	v23.16b, v3.16b, v4.16b, #8
372*4757b351SPierre Pronchery.inst	0xce64c020	//sm3partw1 v0.4s, v1.4s, v4.4s
373*4757b351SPierre Pronchery.inst	0xce76c6e0	//sm3partw2 v0.4s, v23.4s, v22.4s
374*4757b351SPierre Pronchery	eor	v22.16b, v1.16b, v2.16b
375*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
376*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
377*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
378*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
379*4757b351SPierre Pronchery.inst	0xce418ee6	//sm3tt2b v6.4s, v23.4s, v1.4s[0]
380*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
381*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
382*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
383*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
384*4757b351SPierre Pronchery.inst	0xce419ee6	//sm3tt2b v6.4s, v23.4s, v1.4s[1]
385*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
386*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
387*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
388*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
389*4757b351SPierre Pronchery.inst	0xce41aee6	//sm3tt2b v6.4s, v23.4s, v1.4s[2]
390*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
391*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
392*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
393*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
394*4757b351SPierre Pronchery.inst	0xce41bee6	//sm3tt2b v6.4s, v23.4s, v1.4s[3]
395*4757b351SPierre Pronchery	// s4 = w7  | w8  | w9  | w10
396*4757b351SPierre Pronchery	ext	v1.16b, v3.16b, v4.16b, #12
397*4757b351SPierre Pronchery	// vtmp1 = w3  | w4  | w5  | w6
398*4757b351SPierre Pronchery	ext	v22.16b, v2.16b, v3.16b, #12
399*4757b351SPierre Pronchery	// vtmp2 = w10 | w11 | w12 | w13
400*4757b351SPierre Pronchery	ext	v23.16b, v4.16b, v0.16b, #8
401*4757b351SPierre Pronchery.inst	0xce60c041	//sm3partw1 v1.4s, v2.4s, v0.4s
402*4757b351SPierre Pronchery.inst	0xce76c6e1	//sm3partw2 v1.4s, v23.4s, v22.4s
403*4757b351SPierre Pronchery	eor	v22.16b, v2.16b, v3.16b
404*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
405*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
406*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
407*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
408*4757b351SPierre Pronchery.inst	0xce428ee6	//sm3tt2b v6.4s, v23.4s, v2.4s[0]
409*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
410*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
411*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
412*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
413*4757b351SPierre Pronchery.inst	0xce429ee6	//sm3tt2b v6.4s, v23.4s, v2.4s[1]
414*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
415*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
416*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
417*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
418*4757b351SPierre Pronchery.inst	0xce42aee6	//sm3tt2b v6.4s, v23.4s, v2.4s[2]
419*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
420*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
421*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
422*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
423*4757b351SPierre Pronchery.inst	0xce42bee6	//sm3tt2b v6.4s, v23.4s, v2.4s[3]
424*4757b351SPierre Pronchery	eor	v22.16b, v3.16b, v4.16b
425*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
426*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
427*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
428*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
429*4757b351SPierre Pronchery.inst	0xce438ee6	//sm3tt2b v6.4s, v23.4s, v3.4s[0]
430*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
431*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
432*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
433*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
434*4757b351SPierre Pronchery.inst	0xce439ee6	//sm3tt2b v6.4s, v23.4s, v3.4s[1]
435*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
436*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
437*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
438*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
439*4757b351SPierre Pronchery.inst	0xce43aee6	//sm3tt2b v6.4s, v23.4s, v3.4s[2]
440*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
441*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
442*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
443*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
444*4757b351SPierre Pronchery.inst	0xce43bee6	//sm3tt2b v6.4s, v23.4s, v3.4s[3]
445*4757b351SPierre Pronchery	eor	v22.16b, v4.16b, v0.16b
446*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
447*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
448*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
449*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
450*4757b351SPierre Pronchery.inst	0xce448ee6	//sm3tt2b v6.4s, v23.4s, v4.4s[0]
451*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
452*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
453*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
454*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
455*4757b351SPierre Pronchery.inst	0xce449ee6	//sm3tt2b v6.4s, v23.4s, v4.4s[1]
456*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
457*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
458*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
459*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
460*4757b351SPierre Pronchery.inst	0xce44aee6	//sm3tt2b v6.4s, v23.4s, v4.4s[2]
461*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
462*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
463*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
464*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
465*4757b351SPierre Pronchery.inst	0xce44bee6	//sm3tt2b v6.4s, v23.4s, v4.4s[3]
466*4757b351SPierre Pronchery	eor	v22.16b, v0.16b, v1.16b
467*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
468*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
469*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
470*4757b351SPierre Pronchery.inst	0xce5686e5	//sm3tt1b v5.4s, v23.4s, v22.4s[0]
471*4757b351SPierre Pronchery.inst	0xce408ee6	//sm3tt2b v6.4s, v23.4s, v0.4s[0]
472*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
473*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
474*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
475*4757b351SPierre Pronchery.inst	0xce5696e5	//sm3tt1b v5.4s, v23.4s, v22.4s[1]
476*4757b351SPierre Pronchery.inst	0xce409ee6	//sm3tt2b v6.4s, v23.4s, v0.4s[1]
477*4757b351SPierre Pronchery.inst	0xce5418b7	//sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s
478*4757b351SPierre Pronchery	shl	v21.4s, v20.4s, #1
479*4757b351SPierre Pronchery	sri	v21.4s, v20.4s, #31
480*4757b351SPierre Pronchery.inst	0xce56a6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[2]
481*4757b351SPierre Pronchery.inst	0xce40aee6	//sm3tt2b v6.4s, v23.4s, v0.4s[2]
482*4757b351SPierre Pronchery.inst	0xce5518b7	//sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s
483*4757b351SPierre Pronchery	shl	v20.4s, v21.4s, #1
484*4757b351SPierre Pronchery	sri	v20.4s, v21.4s, #31
485*4757b351SPierre Pronchery.inst	0xce56b6e5	//sm3tt1b v5.4s, v23.4s, v22.4s[3]
486*4757b351SPierre Pronchery.inst	0xce40bee6	//sm3tt2b v6.4s, v23.4s, v0.4s[3]
487*4757b351SPierre Pronchery	eor	v5.16b, v5.16b, v18.16b
488*4757b351SPierre Pronchery	eor	v6.16b, v6.16b, v19.16b
489*4757b351SPierre Pronchery
490*4757b351SPierre Pronchery	// any remained blocks?
491*4757b351SPierre Pronchery	cbnz	w2, .Loop
492*4757b351SPierre Pronchery
493*4757b351SPierre Pronchery	// save state
494*4757b351SPierre Pronchery	rev64	v5.4s, v5.4s
495*4757b351SPierre Pronchery	rev64	v6.4s, v6.4s
496*4757b351SPierre Pronchery	ext	v5.16b, v5.16b, v5.16b, #8
497*4757b351SPierre Pronchery	ext	v6.16b, v6.16b, v6.16b, #8
498*4757b351SPierre Pronchery	st1	{v5.4s,v6.4s}, [x0]
499*4757b351SPierre Pronchery	ret
500*4757b351SPierre Pronchery.size	ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order
501*4757b351SPierre Pronchery.section	.rodata
502*4757b351SPierre Pronchery
503*4757b351SPierre Pronchery.type	_sm3_consts,%object
504*4757b351SPierre Pronchery.align	3
505*4757b351SPierre Pronchery_sm3_consts:
506*4757b351SPierre Pronchery.Tj:
507*4757b351SPierre Pronchery.word	0x79cc4519, 0x9d8a7a87
508*4757b351SPierre Pronchery.size	_sm3_consts,.-_sm3_consts
509*4757b351SPierre Pronchery.previous
510