Lines Matching +full:ext +full:- +full:regs

1 /* Do not modify. This file is auto-generated from sha512-armv8.pl. */
2 // Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.
23 // SHA256-hw SHA256(*) SHA512
24 // Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
25 // Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
26 // Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
28 // X-Gene 20.0 (+100%) 12.8 (+300%(***))
35 // (**) The result is a trade-off: it's possible to improve it by
37 // on Cortex-A53 (or by 4 cycles per round).
38 // (***) Super-impressive coefficients over gcc-generated code are
40 // generated with -mgeneral-regs-only is significantly faster
41 // and the gap is only 40-90%.
46 // version of SHA256 for 64-bit processors. This is because performance
47 // improvement on most wide-spread Cortex-A5x processors was observed
48 // to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
49 // observed that 32-bit NEON SHA256 performs significantly better than
50 // 64-bit scalar version on *some* of the more recent processors. As
51 // result 64-bit NEON version of SHA256 was added to provide best
52 // all-round performance. For example it executes ~30% faster on X-Gene
54 // deliver much less improvement, likely *negative* on Cortex-A5x.
79 stp x29,x30,[sp,#-128]!
1041 .size sha512_block_data_order,.-sha512_block_data_order
1089 .size .LK512,.-.LK512
1100 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
1101 stp x29,x30,[sp,#-16]!
1133 ext v24.16b,v24.16b,v24.16b,#8
1134 ext v5.16b,v2.16b,v3.16b,#8
1135 ext v6.16b,v1.16b,v2.16b,#8
1138 ext v7.16b,v20.16b,v21.16b,#8
1145 ext v25.16b,v25.16b,v25.16b,#8
1146 ext v5.16b,v4.16b,v2.16b,#8
1147 ext v6.16b,v0.16b,v4.16b,#8
1150 ext v7.16b,v21.16b,v22.16b,#8
1157 ext v24.16b,v24.16b,v24.16b,#8
1158 ext v5.16b,v1.16b,v4.16b,#8
1159 ext v6.16b,v3.16b,v1.16b,#8
1162 ext v7.16b,v22.16b,v23.16b,#8
1169 ext v25.16b,v25.16b,v25.16b,#8
1170 ext v5.16b,v0.16b,v1.16b,#8
1171 ext v6.16b,v2.16b,v0.16b,#8
1174 ext v7.16b,v23.16b,v16.16b,#8
1181 ext v24.16b,v24.16b,v24.16b,#8
1182 ext v5.16b,v3.16b,v0.16b,#8
1183 ext v6.16b,v4.16b,v3.16b,#8
1186 ext v7.16b,v16.16b,v17.16b,#8
1193 ext v25.16b,v25.16b,v25.16b,#8
1194 ext v5.16b,v2.16b,v3.16b,#8
1195 ext v6.16b,v1.16b,v2.16b,#8
1198 ext v7.16b,v17.16b,v18.16b,#8
1205 ext v24.16b,v24.16b,v24.16b,#8
1206 ext v5.16b,v4.16b,v2.16b,#8
1207 ext v6.16b,v0.16b,v4.16b,#8
1210 ext v7.16b,v18.16b,v19.16b,#8
1217 ext v25.16b,v25.16b,v25.16b,#8
1218 ext v5.16b,v1.16b,v4.16b,#8
1219 ext v6.16b,v3.16b,v1.16b,#8
1222 ext v7.16b,v19.16b,v20.16b,#8
1229 ext v24.16b,v24.16b,v24.16b,#8
1230 ext v5.16b,v0.16b,v1.16b,#8
1231 ext v6.16b,v2.16b,v0.16b,#8
1234 ext v7.16b,v20.16b,v21.16b,#8
1241 ext v25.16b,v25.16b,v25.16b,#8
1242 ext v5.16b,v3.16b,v0.16b,#8
1243 ext v6.16b,v4.16b,v3.16b,#8
1246 ext v7.16b,v21.16b,v22.16b,#8
1253 ext v24.16b,v24.16b,v24.16b,#8
1254 ext v5.16b,v2.16b,v3.16b,#8
1255 ext v6.16b,v1.16b,v2.16b,#8
1258 ext v7.16b,v22.16b,v23.16b,#8
1265 ext v25.16b,v25.16b,v25.16b,#8
1266 ext v5.16b,v4.16b,v2.16b,#8
1267 ext v6.16b,v0.16b,v4.16b,#8
1270 ext v7.16b,v23.16b,v16.16b,#8
1277 ext v24.16b,v24.16b,v24.16b,#8
1278 ext v5.16b,v1.16b,v4.16b,#8
1279 ext v6.16b,v3.16b,v1.16b,#8
1282 ext v7.16b,v16.16b,v17.16b,#8
1289 ext v25.16b,v25.16b,v25.16b,#8
1290 ext v5.16b,v0.16b,v1.16b,#8
1291 ext v6.16b,v2.16b,v0.16b,#8
1294 ext v7.16b,v17.16b,v18.16b,#8
1301 ext v24.16b,v24.16b,v24.16b,#8
1302 ext v5.16b,v3.16b,v0.16b,#8
1303 ext v6.16b,v4.16b,v3.16b,#8
1306 ext v7.16b,v18.16b,v19.16b,#8
1313 ext v25.16b,v25.16b,v25.16b,#8
1314 ext v5.16b,v2.16b,v3.16b,#8
1315 ext v6.16b,v1.16b,v2.16b,#8
1318 ext v7.16b,v19.16b,v20.16b,#8
1325 ext v24.16b,v24.16b,v24.16b,#8
1326 ext v5.16b,v4.16b,v2.16b,#8
1327 ext v6.16b,v0.16b,v4.16b,#8
1330 ext v7.16b,v20.16b,v21.16b,#8
1337 ext v25.16b,v25.16b,v25.16b,#8
1338 ext v5.16b,v1.16b,v4.16b,#8
1339 ext v6.16b,v3.16b,v1.16b,#8
1342 ext v7.16b,v21.16b,v22.16b,#8
1349 ext v24.16b,v24.16b,v24.16b,#8
1350 ext v5.16b,v0.16b,v1.16b,#8
1351 ext v6.16b,v2.16b,v0.16b,#8
1354 ext v7.16b,v22.16b,v23.16b,#8
1361 ext v25.16b,v25.16b,v25.16b,#8
1362 ext v5.16b,v3.16b,v0.16b,#8
1363 ext v6.16b,v4.16b,v3.16b,#8
1366 ext v7.16b,v23.16b,v16.16b,#8
1373 ext v24.16b,v24.16b,v24.16b,#8
1374 ext v5.16b,v2.16b,v3.16b,#8
1375 ext v6.16b,v1.16b,v2.16b,#8
1378 ext v7.16b,v16.16b,v17.16b,#8
1385 ext v25.16b,v25.16b,v25.16b,#8
1386 ext v5.16b,v4.16b,v2.16b,#8
1387 ext v6.16b,v0.16b,v4.16b,#8
1390 ext v7.16b,v17.16b,v18.16b,#8
1397 ext v24.16b,v24.16b,v24.16b,#8
1398 ext v5.16b,v1.16b,v4.16b,#8
1399 ext v6.16b,v3.16b,v1.16b,#8
1402 ext v7.16b,v18.16b,v19.16b,#8
1409 ext v25.16b,v25.16b,v25.16b,#8
1410 ext v5.16b,v0.16b,v1.16b,#8
1411 ext v6.16b,v2.16b,v0.16b,#8
1414 ext v7.16b,v19.16b,v20.16b,#8
1421 ext v24.16b,v24.16b,v24.16b,#8
1422 ext v5.16b,v3.16b,v0.16b,#8
1423 ext v6.16b,v4.16b,v3.16b,#8
1426 ext v7.16b,v20.16b,v21.16b,#8
1433 ext v25.16b,v25.16b,v25.16b,#8
1434 ext v5.16b,v2.16b,v3.16b,#8
1435 ext v6.16b,v1.16b,v2.16b,#8
1438 ext v7.16b,v21.16b,v22.16b,#8
1445 ext v24.16b,v24.16b,v24.16b,#8
1446 ext v5.16b,v4.16b,v2.16b,#8
1447 ext v6.16b,v0.16b,v4.16b,#8
1450 ext v7.16b,v22.16b,v23.16b,#8
1457 ext v25.16b,v25.16b,v25.16b,#8
1458 ext v5.16b,v1.16b,v4.16b,#8
1459 ext v6.16b,v3.16b,v1.16b,#8
1462 ext v7.16b,v23.16b,v16.16b,#8
1469 ext v24.16b,v24.16b,v24.16b,#8
1470 ext v5.16b,v0.16b,v1.16b,#8
1471 ext v6.16b,v2.16b,v0.16b,#8
1474 ext v7.16b,v16.16b,v17.16b,#8
1481 ext v25.16b,v25.16b,v25.16b,#8
1482 ext v5.16b,v3.16b,v0.16b,#8
1483 ext v6.16b,v4.16b,v3.16b,#8
1486 ext v7.16b,v17.16b,v18.16b,#8
1493 ext v24.16b,v24.16b,v24.16b,#8
1494 ext v5.16b,v2.16b,v3.16b,#8
1495 ext v6.16b,v1.16b,v2.16b,#8
1498 ext v7.16b,v18.16b,v19.16b,#8
1505 ext v25.16b,v25.16b,v25.16b,#8
1506 ext v5.16b,v4.16b,v2.16b,#8
1507 ext v6.16b,v0.16b,v4.16b,#8
1510 ext v7.16b,v19.16b,v20.16b,#8
1518 ext v24.16b,v24.16b,v24.16b,#8
1519 ext v5.16b,v1.16b,v4.16b,#8
1520 ext v6.16b,v3.16b,v1.16b,#8
1529 ext v25.16b,v25.16b,v25.16b,#8
1530 ext v5.16b,v0.16b,v1.16b,#8
1531 ext v6.16b,v2.16b,v0.16b,#8
1540 ext v24.16b,v24.16b,v24.16b,#8
1541 ext v5.16b,v3.16b,v0.16b,#8
1542 ext v6.16b,v4.16b,v3.16b,#8
1551 ext v25.16b,v25.16b,v25.16b,#8
1552 ext v5.16b,v2.16b,v3.16b,#8
1553 ext v6.16b,v1.16b,v2.16b,#8
1562 ext v24.16b,v24.16b,v24.16b,#8
1563 ext v5.16b,v4.16b,v2.16b,#8
1564 ext v6.16b,v0.16b,v4.16b,#8
1573 ext v25.16b,v25.16b,v25.16b,#8
1574 ext v5.16b,v1.16b,v4.16b,#8
1575 ext v6.16b,v3.16b,v1.16b,#8
1584 ext v24.16b,v24.16b,v24.16b,#8
1585 ext v5.16b,v0.16b,v1.16b,#8
1586 ext v6.16b,v2.16b,v0.16b,#8
1595 ext v25.16b,v25.16b,v25.16b,#8
1596 ext v5.16b,v3.16b,v0.16b,#8
1597 ext v6.16b,v4.16b,v3.16b,#8
1614 .size sha512_block_armv8,.-sha512_block_armv8