Lines Matching +full:- +full:16 +full:g
1 /* Do not modify. This file is auto-generated from sha512-armv8.pl. */
2 // Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
23 // SHA256-hw SHA256(*) SHA512
24 // Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
25 // Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
26 // Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
28 // X-Gene 20.0 (+100%) 12.8 (+300%(***))
35 // (**) The result is a trade-off: it's possible to improve it by
37 // on Cortex-A53 (or by 4 cycles per round).
38 // (***) Super-impressive coefficients over gcc-generated code are
40 // generated with -mgeneral-regs-only is significantly faster
41 // and the gap is only 40-90%.
46 // version of SHA256 for 64-bit processors. This is because performance
47 // improvement on most wide-spread Cortex-A5x processors was observed
48 // to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
49 // observed that 32-bit NEON SHA256 performs significantly better than
50 // 64-bit scalar version on *some* of the more recent processors. As
51 // result 64-bit NEON version of SHA256 was added to provide best
52 // all-round performance. For example it executes ~30% faster on X-Gene
54 // deliver much less improvement, likely *negative* on Cortex-A5x.
79 stp x29,x30,[sp,#-128]!
82 stp x19,x20,[sp,#16]
111 orr x17,x17,x19 // Ch(e,f,g)
115 add x27,x27,x17 // h+=Ch(e,f,g)
136 orr x17,x17,x28 // Ch(e,f,g)
140 add x26,x26,x17 // h+=Ch(e,f,g)
160 orr x17,x17,x19 // Ch(e,f,g)
164 add x25,x25,x17 // h+=Ch(e,f,g)
185 orr x17,x17,x28 // Ch(e,f,g)
189 add x24,x24,x17 // h+=Ch(e,f,g)
209 orr x17,x17,x19 // Ch(e,f,g)
213 add x23,x23,x17 // h+=Ch(e,f,g)
234 orr x17,x17,x28 // Ch(e,f,g)
238 add x22,x22,x17 // h+=Ch(e,f,g)
258 orr x17,x17,x19 // Ch(e,f,g)
262 add x21,x21,x17 // h+=Ch(e,f,g)
283 orr x17,x17,x28 // Ch(e,f,g)
287 add x20,x20,x17 // h+=Ch(e,f,g)
307 orr x17,x17,x19 // Ch(e,f,g)
311 add x27,x27,x17 // h+=Ch(e,f,g)
332 orr x17,x17,x28 // Ch(e,f,g)
336 add x26,x26,x17 // h+=Ch(e,f,g)
356 orr x17,x17,x19 // Ch(e,f,g)
360 add x25,x25,x17 // h+=Ch(e,f,g)
382 orr x17,x17,x28 // Ch(e,f,g)
386 add x24,x24,x17 // h+=Ch(e,f,g)
407 orr x17,x17,x19 // Ch(e,f,g)
411 add x23,x23,x17 // h+=Ch(e,f,g)
433 orr x17,x17,x28 // Ch(e,f,g)
437 add x22,x22,x17 // h+=Ch(e,f,g)
452 str x9,[sp,#16]
459 orr x17,x17,x19 // Ch(e,f,g)
463 add x21,x21,x17 // h+=Ch(e,f,g)
489 orr x17,x17,x28 // Ch(e,f,g)
493 add x20,x20,x17 // h+=Ch(e,f,g)
521 orr x17,x17,x19 // Ch(e,f,g)
525 add x27,x27,x17 // h+=Ch(e,f,g)
540 ldr x9,[sp,#16]
552 orr x17,x17,x28 // Ch(e,f,g)
556 add x26,x26,x17 // h+=Ch(e,f,g)
572 str x13,[sp,#16]
583 orr x17,x17,x19 // Ch(e,f,g)
587 add x25,x25,x17 // h+=Ch(e,f,g)
614 orr x17,x17,x28 // Ch(e,f,g)
618 add x24,x24,x17 // h+=Ch(e,f,g)
645 orr x17,x17,x19 // Ch(e,f,g)
649 add x23,x23,x17 // h+=Ch(e,f,g)
664 ldr x13,[sp,#16]
676 orr x17,x17,x28 // Ch(e,f,g)
680 add x22,x22,x17 // h+=Ch(e,f,g)
696 str x1,[sp,#16]
707 orr x17,x17,x19 // Ch(e,f,g)
711 add x21,x21,x17 // h+=Ch(e,f,g)
738 orr x17,x17,x28 // Ch(e,f,g)
742 add x20,x20,x17 // h+=Ch(e,f,g)
769 orr x17,x17,x19 // Ch(e,f,g)
773 add x27,x27,x17 // h+=Ch(e,f,g)
788 ldr x1,[sp,#16]
800 orr x17,x17,x28 // Ch(e,f,g)
804 add x26,x26,x17 // h+=Ch(e,f,g)
820 str x5,[sp,#16]
831 orr x17,x17,x19 // Ch(e,f,g)
835 add x25,x25,x17 // h+=Ch(e,f,g)
862 orr x17,x17,x28 // Ch(e,f,g)
866 add x24,x24,x17 // h+=Ch(e,f,g)
893 orr x17,x17,x19 // Ch(e,f,g)
897 add x23,x23,x17 // h+=Ch(e,f,g)
912 ldr x5,[sp,#16]
924 orr x17,x17,x28 // Ch(e,f,g)
928 add x22,x22,x17 // h+=Ch(e,f,g)
944 str x9,[sp,#16]
955 orr x17,x17,x19 // Ch(e,f,g)
959 add x21,x21,x17 // h+=Ch(e,f,g)
986 orr x17,x17,x28 // Ch(e,f,g)
990 add x20,x20,x17 // h+=Ch(e,f,g)
1031 ldp x19,x20,[x29,#16]
1040 .size sha512_block_data_order,.-sha512_block_data_order
1086 .size .LK512,.-.LK512
1095 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
1096 stp x29,x30,[sp,#-16]!
1099 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
1100 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
1105 rev64 v16.16b,v16.16b
1106 rev64 v17.16b,v17.16b
1107 rev64 v18.16b,v18.16b
1108 rev64 v19.16b,v19.16b
1109 rev64 v20.16b,v20.16b
1110 rev64 v21.16b,v21.16b
1111 rev64 v22.16b,v22.16b
1112 rev64 v23.16b,v23.16b
1117 ld1 {v24.2d},[x3],#16
1120 orr v26.16b,v0.16b,v0.16b // offload
1121 orr v27.16b,v1.16b,v1.16b
1122 orr v28.16b,v2.16b,v2.16b
1123 orr v29.16b,v3.16b,v3.16b
1126 ld1 {v25.2d},[x3],#16
1127 ext v24.16b,v24.16b,v24.16b,#8
1128 ext v5.16b,v2.16b,v3.16b,#8
1129 ext v6.16b,v1.16b,v2.16b,#8
1131 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1132 ext v7.16b,v20.16b,v21.16b,#8
1133 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1134 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1136 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1138 ld1 {v24.2d},[x3],#16
1139 ext v25.16b,v25.16b,v25.16b,#8
1140 ext v5.16b,v4.16b,v2.16b,#8
1141 ext v6.16b,v0.16b,v4.16b,#8
1143 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1144 ext v7.16b,v21.16b,v22.16b,#8
1145 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1146 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1148 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1150 ld1 {v25.2d},[x3],#16
1151 ext v24.16b,v24.16b,v24.16b,#8
1152 ext v5.16b,v1.16b,v4.16b,#8
1153 ext v6.16b,v3.16b,v1.16b,#8
1155 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1156 ext v7.16b,v22.16b,v23.16b,#8
1157 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1158 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1160 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1162 ld1 {v24.2d},[x3],#16
1163 ext v25.16b,v25.16b,v25.16b,#8
1164 ext v5.16b,v0.16b,v1.16b,#8
1165 ext v6.16b,v2.16b,v0.16b,#8
1167 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1168 ext v7.16b,v23.16b,v16.16b,#8
1169 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1170 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1172 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1174 ld1 {v25.2d},[x3],#16
1175 ext v24.16b,v24.16b,v24.16b,#8
1176 ext v5.16b,v3.16b,v0.16b,#8
1177 ext v6.16b,v4.16b,v3.16b,#8
1179 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1180 ext v7.16b,v16.16b,v17.16b,#8
1181 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1182 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1184 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1186 ld1 {v24.2d},[x3],#16
1187 ext v25.16b,v25.16b,v25.16b,#8
1188 ext v5.16b,v2.16b,v3.16b,#8
1189 ext v6.16b,v1.16b,v2.16b,#8
1191 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1192 ext v7.16b,v17.16b,v18.16b,#8
1193 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1194 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1196 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1198 ld1 {v25.2d},[x3],#16
1199 ext v24.16b,v24.16b,v24.16b,#8
1200 ext v5.16b,v4.16b,v2.16b,#8
1201 ext v6.16b,v0.16b,v4.16b,#8
1203 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1204 ext v7.16b,v18.16b,v19.16b,#8
1205 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1206 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1208 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1210 ld1 {v24.2d},[x3],#16
1211 ext v25.16b,v25.16b,v25.16b,#8
1212 ext v5.16b,v1.16b,v4.16b,#8
1213 ext v6.16b,v3.16b,v1.16b,#8
1215 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1216 ext v7.16b,v19.16b,v20.16b,#8
1217 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1218 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1220 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1222 ld1 {v25.2d},[x3],#16
1223 ext v24.16b,v24.16b,v24.16b,#8
1224 ext v5.16b,v0.16b,v1.16b,#8
1225 ext v6.16b,v2.16b,v0.16b,#8
1227 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1228 ext v7.16b,v20.16b,v21.16b,#8
1229 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1230 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1232 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1234 ld1 {v24.2d},[x3],#16
1235 ext v25.16b,v25.16b,v25.16b,#8
1236 ext v5.16b,v3.16b,v0.16b,#8
1237 ext v6.16b,v4.16b,v3.16b,#8
1239 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1240 ext v7.16b,v21.16b,v22.16b,#8
1241 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1242 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1244 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1246 ld1 {v25.2d},[x3],#16
1247 ext v24.16b,v24.16b,v24.16b,#8
1248 ext v5.16b,v2.16b,v3.16b,#8
1249 ext v6.16b,v1.16b,v2.16b,#8
1251 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1252 ext v7.16b,v22.16b,v23.16b,#8
1253 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1254 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1256 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1258 ld1 {v24.2d},[x3],#16
1259 ext v25.16b,v25.16b,v25.16b,#8
1260 ext v5.16b,v4.16b,v2.16b,#8
1261 ext v6.16b,v0.16b,v4.16b,#8
1263 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1264 ext v7.16b,v23.16b,v16.16b,#8
1265 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1266 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1268 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1270 ld1 {v25.2d},[x3],#16
1271 ext v24.16b,v24.16b,v24.16b,#8
1272 ext v5.16b,v1.16b,v4.16b,#8
1273 ext v6.16b,v3.16b,v1.16b,#8
1275 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1276 ext v7.16b,v16.16b,v17.16b,#8
1277 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1278 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1280 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1282 ld1 {v24.2d},[x3],#16
1283 ext v25.16b,v25.16b,v25.16b,#8
1284 ext v5.16b,v0.16b,v1.16b,#8
1285 ext v6.16b,v2.16b,v0.16b,#8
1287 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1288 ext v7.16b,v17.16b,v18.16b,#8
1289 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1290 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1292 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1294 ld1 {v25.2d},[x3],#16
1295 ext v24.16b,v24.16b,v24.16b,#8
1296 ext v5.16b,v3.16b,v0.16b,#8
1297 ext v6.16b,v4.16b,v3.16b,#8
1299 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1300 ext v7.16b,v18.16b,v19.16b,#8
1301 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1302 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1304 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1306 ld1 {v24.2d},[x3],#16
1307 ext v25.16b,v25.16b,v25.16b,#8
1308 ext v5.16b,v2.16b,v3.16b,#8
1309 ext v6.16b,v1.16b,v2.16b,#8
1311 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1312 ext v7.16b,v19.16b,v20.16b,#8
1313 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1314 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1316 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1318 ld1 {v25.2d},[x3],#16
1319 ext v24.16b,v24.16b,v24.16b,#8
1320 ext v5.16b,v4.16b,v2.16b,#8
1321 ext v6.16b,v0.16b,v4.16b,#8
1323 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1324 ext v7.16b,v20.16b,v21.16b,#8
1325 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1326 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1328 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1330 ld1 {v24.2d},[x3],#16
1331 ext v25.16b,v25.16b,v25.16b,#8
1332 ext v5.16b,v1.16b,v4.16b,#8
1333 ext v6.16b,v3.16b,v1.16b,#8
1335 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1336 ext v7.16b,v21.16b,v22.16b,#8
1337 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1338 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1340 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1342 ld1 {v25.2d},[x3],#16
1343 ext v24.16b,v24.16b,v24.16b,#8
1344 ext v5.16b,v0.16b,v1.16b,#8
1345 ext v6.16b,v2.16b,v0.16b,#8
1347 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1348 ext v7.16b,v22.16b,v23.16b,#8
1349 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1350 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1352 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1354 ld1 {v24.2d},[x3],#16
1355 ext v25.16b,v25.16b,v25.16b,#8
1356 ext v5.16b,v3.16b,v0.16b,#8
1357 ext v6.16b,v4.16b,v3.16b,#8
1359 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1360 ext v7.16b,v23.16b,v16.16b,#8
1361 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1362 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1364 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1366 ld1 {v25.2d},[x3],#16
1367 ext v24.16b,v24.16b,v24.16b,#8
1368 ext v5.16b,v2.16b,v3.16b,#8
1369 ext v6.16b,v1.16b,v2.16b,#8
1371 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1372 ext v7.16b,v16.16b,v17.16b,#8
1373 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1374 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1376 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1378 ld1 {v24.2d},[x3],#16
1379 ext v25.16b,v25.16b,v25.16b,#8
1380 ext v5.16b,v4.16b,v2.16b,#8
1381 ext v6.16b,v0.16b,v4.16b,#8
1383 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1384 ext v7.16b,v17.16b,v18.16b,#8
1385 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1386 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1388 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1390 ld1 {v25.2d},[x3],#16
1391 ext v24.16b,v24.16b,v24.16b,#8
1392 ext v5.16b,v1.16b,v4.16b,#8
1393 ext v6.16b,v3.16b,v1.16b,#8
1395 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1396 ext v7.16b,v18.16b,v19.16b,#8
1397 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1398 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1400 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1402 ld1 {v24.2d},[x3],#16
1403 ext v25.16b,v25.16b,v25.16b,#8
1404 ext v5.16b,v0.16b,v1.16b,#8
1405 ext v6.16b,v2.16b,v0.16b,#8
1407 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1408 ext v7.16b,v19.16b,v20.16b,#8
1409 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1410 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1412 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1414 ld1 {v25.2d},[x3],#16
1415 ext v24.16b,v24.16b,v24.16b,#8
1416 ext v5.16b,v3.16b,v0.16b,#8
1417 ext v6.16b,v4.16b,v3.16b,#8
1419 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1420 ext v7.16b,v20.16b,v21.16b,#8
1421 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1422 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1424 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1426 ld1 {v24.2d},[x3],#16
1427 ext v25.16b,v25.16b,v25.16b,#8
1428 ext v5.16b,v2.16b,v3.16b,#8
1429 ext v6.16b,v1.16b,v2.16b,#8
1431 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1432 ext v7.16b,v21.16b,v22.16b,#8
1433 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1434 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1436 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1438 ld1 {v25.2d},[x3],#16
1439 ext v24.16b,v24.16b,v24.16b,#8
1440 ext v5.16b,v4.16b,v2.16b,#8
1441 ext v6.16b,v0.16b,v4.16b,#8
1443 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1444 ext v7.16b,v22.16b,v23.16b,#8
1445 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1446 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1448 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1450 ld1 {v24.2d},[x3],#16
1451 ext v25.16b,v25.16b,v25.16b,#8
1452 ext v5.16b,v1.16b,v4.16b,#8
1453 ext v6.16b,v3.16b,v1.16b,#8
1455 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1456 ext v7.16b,v23.16b,v16.16b,#8
1457 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1458 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1460 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1462 ld1 {v25.2d},[x3],#16
1463 ext v24.16b,v24.16b,v24.16b,#8
1464 ext v5.16b,v0.16b,v1.16b,#8
1465 ext v6.16b,v2.16b,v0.16b,#8
1467 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1468 ext v7.16b,v16.16b,v17.16b,#8
1469 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1470 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1472 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1474 ld1 {v24.2d},[x3],#16
1475 ext v25.16b,v25.16b,v25.16b,#8
1476 ext v5.16b,v3.16b,v0.16b,#8
1477 ext v6.16b,v4.16b,v3.16b,#8
1479 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1480 ext v7.16b,v17.16b,v18.16b,#8
1481 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1482 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1484 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1486 ld1 {v25.2d},[x3],#16
1487 ext v24.16b,v24.16b,v24.16b,#8
1488 ext v5.16b,v2.16b,v3.16b,#8
1489 ext v6.16b,v1.16b,v2.16b,#8
1491 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1492 ext v7.16b,v18.16b,v19.16b,#8
1493 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1494 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1496 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1498 ld1 {v24.2d},[x3],#16
1499 ext v25.16b,v25.16b,v25.16b,#8
1500 ext v5.16b,v4.16b,v2.16b,#8
1501 ext v6.16b,v0.16b,v4.16b,#8
1503 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1504 ext v7.16b,v19.16b,v20.16b,#8
1505 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1506 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1508 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1509 ld1 {v25.2d},[x3],#16
1511 ld1 {v16.16b},[x1],#16 // load next input
1512 ext v24.16b,v24.16b,v24.16b,#8
1513 ext v5.16b,v1.16b,v4.16b,#8
1514 ext v6.16b,v3.16b,v1.16b,#8
1516 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1517 rev64 v16.16b,v16.16b
1519 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1520 ld1 {v24.2d},[x3],#16
1522 ld1 {v17.16b},[x1],#16 // load next input
1523 ext v25.16b,v25.16b,v25.16b,#8
1524 ext v5.16b,v0.16b,v1.16b,#8
1525 ext v6.16b,v2.16b,v0.16b,#8
1527 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1528 rev64 v17.16b,v17.16b
1530 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1531 ld1 {v25.2d},[x3],#16
1533 ld1 {v18.16b},[x1],#16 // load next input
1534 ext v24.16b,v24.16b,v24.16b,#8
1535 ext v5.16b,v3.16b,v0.16b,#8
1536 ext v6.16b,v4.16b,v3.16b,#8
1538 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1539 rev64 v18.16b,v18.16b
1541 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1542 ld1 {v24.2d},[x3],#16
1544 ld1 {v19.16b},[x1],#16 // load next input
1545 ext v25.16b,v25.16b,v25.16b,#8
1546 ext v5.16b,v2.16b,v3.16b,#8
1547 ext v6.16b,v1.16b,v2.16b,#8
1549 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1550 rev64 v19.16b,v19.16b
1552 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1553 ld1 {v25.2d},[x3],#16
1555 ld1 {v20.16b},[x1],#16 // load next input
1556 ext v24.16b,v24.16b,v24.16b,#8
1557 ext v5.16b,v4.16b,v2.16b,#8
1558 ext v6.16b,v0.16b,v4.16b,#8
1560 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1561 rev64 v20.16b,v20.16b
1563 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1564 ld1 {v24.2d},[x3],#16
1566 ld1 {v21.16b},[x1],#16 // load next input
1567 ext v25.16b,v25.16b,v25.16b,#8
1568 ext v5.16b,v1.16b,v4.16b,#8
1569 ext v6.16b,v3.16b,v1.16b,#8
1571 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1572 rev64 v21.16b,v21.16b
1574 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1575 ld1 {v25.2d},[x3],#16
1577 ld1 {v22.16b},[x1],#16 // load next input
1578 ext v24.16b,v24.16b,v24.16b,#8
1579 ext v5.16b,v0.16b,v1.16b,#8
1580 ext v6.16b,v2.16b,v0.16b,#8
1582 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1583 rev64 v22.16b,v22.16b
1585 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1588 ld1 {v23.16b},[x1],#16 // load next input
1589 ext v25.16b,v25.16b,v25.16b,#8
1590 ext v5.16b,v3.16b,v0.16b,#8
1591 ext v6.16b,v4.16b,v3.16b,#8
1593 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1594 rev64 v23.16b,v23.16b
1596 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1606 ldr x29,[sp],#16
1608 .size sha512_block_armv8,.-sha512_block_armv8