1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 26ba6c74dSArd Biesheuvel/* 36ba6c74dSArd Biesheuvel * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions 46ba6c74dSArd Biesheuvel * 56ba6c74dSArd Biesheuvel * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 66ba6c74dSArd Biesheuvel */ 76ba6c74dSArd Biesheuvel 86ba6c74dSArd Biesheuvel#include <linux/linkage.h> 96ba6c74dSArd Biesheuvel#include <asm/assembler.h> 106ba6c74dSArd Biesheuvel 116ba6c74dSArd Biesheuvel .text 126ba6c74dSArd Biesheuvel .arch armv8-a+crypto 136ba6c74dSArd Biesheuvel 146ba6c74dSArd Biesheuvel dga .req q20 156ba6c74dSArd Biesheuvel dgav .req v20 166ba6c74dSArd Biesheuvel dgb .req q21 176ba6c74dSArd Biesheuvel dgbv .req v21 186ba6c74dSArd Biesheuvel 196ba6c74dSArd Biesheuvel t0 .req v22 206ba6c74dSArd Biesheuvel t1 .req v23 216ba6c74dSArd Biesheuvel 226ba6c74dSArd Biesheuvel dg0q .req q24 236ba6c74dSArd Biesheuvel dg0v .req v24 246ba6c74dSArd Biesheuvel dg1q .req q25 256ba6c74dSArd Biesheuvel dg1v .req v25 266ba6c74dSArd Biesheuvel dg2q .req q26 276ba6c74dSArd Biesheuvel dg2v .req v26 286ba6c74dSArd Biesheuvel 296ba6c74dSArd Biesheuvel .macro add_only, ev, rc, s0 306ba6c74dSArd Biesheuvel mov dg2v.16b, dg0v.16b 316ba6c74dSArd Biesheuvel .ifeq \ev 326ba6c74dSArd Biesheuvel add t1.4s, v\s0\().4s, \rc\().4s 336ba6c74dSArd Biesheuvel sha256h dg0q, dg1q, t0.4s 346ba6c74dSArd Biesheuvel sha256h2 dg1q, dg2q, t0.4s 356ba6c74dSArd Biesheuvel .else 366ba6c74dSArd Biesheuvel .ifnb \s0 376ba6c74dSArd Biesheuvel add t0.4s, v\s0\().4s, \rc\().4s 386ba6c74dSArd Biesheuvel .endif 396ba6c74dSArd Biesheuvel sha256h dg0q, dg1q, t1.4s 406ba6c74dSArd Biesheuvel sha256h2 dg1q, dg2q, t1.4s 416ba6c74dSArd Biesheuvel .endif 426ba6c74dSArd Biesheuvel .endm 436ba6c74dSArd Biesheuvel 446ba6c74dSArd Biesheuvel .macro add_update, ev, rc, s0, s1, s2, s3 456ba6c74dSArd Biesheuvel sha256su0 v\s0\().4s, v\s1\().4s 466ba6c74dSArd Biesheuvel add_only \ev, \rc, \s1 476ba6c74dSArd Biesheuvel sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s 486ba6c74dSArd Biesheuvel .endm 496ba6c74dSArd Biesheuvel 506ba6c74dSArd Biesheuvel /* 516ba6c74dSArd Biesheuvel * The SHA-256 round constants 526ba6c74dSArd Biesheuvel */ 5317a06070SArd Biesheuvel .section ".rodata", "a" 546ba6c74dSArd Biesheuvel .align 4 556ba6c74dSArd Biesheuvel.Lsha2_rcon: 566ba6c74dSArd Biesheuvel .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 576ba6c74dSArd Biesheuvel .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 586ba6c74dSArd Biesheuvel .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 596ba6c74dSArd Biesheuvel .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 606ba6c74dSArd Biesheuvel .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc 616ba6c74dSArd Biesheuvel .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da 626ba6c74dSArd Biesheuvel .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 636ba6c74dSArd Biesheuvel .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 646ba6c74dSArd Biesheuvel .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 656ba6c74dSArd Biesheuvel .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 666ba6c74dSArd Biesheuvel .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 676ba6c74dSArd Biesheuvel .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 686ba6c74dSArd Biesheuvel .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 696ba6c74dSArd Biesheuvel .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 706ba6c74dSArd Biesheuvel .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 716ba6c74dSArd Biesheuvel .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 726ba6c74dSArd Biesheuvel 736ba6c74dSArd Biesheuvel /* 74*ba30d311SEric Biggers * int __sha256_ce_transform(struct sha256_ce_state *sst, u8 const *src, 7503802f6aSArd Biesheuvel * int blocks) 766ba6c74dSArd Biesheuvel */ 7717a06070SArd Biesheuvel .text 78*ba30d311SEric BiggersSYM_FUNC_START(__sha256_ce_transform) 796ba6c74dSArd Biesheuvel /* load round constants */ 80b2eadbf4SArd Biesheuvel adr_l x8, .Lsha2_rcon 816ba6c74dSArd Biesheuvel ld1 { v0.4s- v3.4s}, [x8], #64 826ba6c74dSArd Biesheuvel ld1 { v4.4s- v7.4s}, [x8], #64 836ba6c74dSArd Biesheuvel ld1 { v8.4s-v11.4s}, [x8], #64 846ba6c74dSArd Biesheuvel ld1 {v12.4s-v15.4s}, [x8] 856ba6c74dSArd Biesheuvel 866ba6c74dSArd Biesheuvel /* load state */ 87b2eadbf4SArd Biesheuvel ld1 {dgav.4s, dgbv.4s}, [x0] 886ba6c74dSArd Biesheuvel 8903802f6aSArd Biesheuvel /* load sha256_ce_state::finalize */ 90f4857f4cSArd Biesheuvel ldr_l w4, sha256_ce_offsetof_finalize, x4 91b2eadbf4SArd Biesheuvel ldr w4, [x0, x4] 926ba6c74dSArd Biesheuvel 936ba6c74dSArd Biesheuvel /* load input */ 94b2eadbf4SArd Biesheuvel0: ld1 {v16.4s-v19.4s}, [x1], #64 95b2eadbf4SArd Biesheuvel sub w2, w2, #1 966ba6c74dSArd Biesheuvel 976ba6c74dSArd BiesheuvelCPU_LE( rev32 v16.16b, v16.16b ) 986ba6c74dSArd BiesheuvelCPU_LE( rev32 v17.16b, v17.16b ) 996ba6c74dSArd BiesheuvelCPU_LE( rev32 v18.16b, v18.16b ) 1006ba6c74dSArd BiesheuvelCPU_LE( rev32 v19.16b, v19.16b ) 1016ba6c74dSArd Biesheuvel 102b2eadbf4SArd Biesheuvel1: add t0.4s, v16.4s, v0.4s 1036ba6c74dSArd Biesheuvel mov dg0v.16b, dgav.16b 1046ba6c74dSArd Biesheuvel mov dg1v.16b, dgbv.16b 1056ba6c74dSArd Biesheuvel 1066ba6c74dSArd Biesheuvel add_update 0, v1, 16, 17, 18, 19 1076ba6c74dSArd Biesheuvel add_update 1, v2, 17, 18, 19, 16 1086ba6c74dSArd Biesheuvel add_update 0, v3, 18, 19, 16, 17 1096ba6c74dSArd Biesheuvel add_update 1, v4, 19, 16, 17, 18 1106ba6c74dSArd Biesheuvel 1116ba6c74dSArd Biesheuvel add_update 0, v5, 16, 17, 18, 19 1126ba6c74dSArd Biesheuvel add_update 1, v6, 17, 18, 19, 16 1136ba6c74dSArd Biesheuvel add_update 0, v7, 18, 19, 16, 17 1146ba6c74dSArd Biesheuvel add_update 1, v8, 19, 16, 17, 18 1156ba6c74dSArd Biesheuvel 1166ba6c74dSArd Biesheuvel add_update 0, v9, 16, 17, 18, 19 1176ba6c74dSArd Biesheuvel add_update 1, v10, 17, 18, 19, 16 1186ba6c74dSArd Biesheuvel add_update 0, v11, 18, 19, 16, 17 1196ba6c74dSArd Biesheuvel add_update 1, v12, 19, 16, 17, 18 1206ba6c74dSArd Biesheuvel 1216ba6c74dSArd Biesheuvel add_only 0, v13, 17 1226ba6c74dSArd Biesheuvel add_only 1, v14, 18 1236ba6c74dSArd Biesheuvel add_only 0, v15, 19 1246ba6c74dSArd Biesheuvel add_only 1 1256ba6c74dSArd Biesheuvel 1266ba6c74dSArd Biesheuvel /* update state */ 1276ba6c74dSArd Biesheuvel add dgav.4s, dgav.4s, dg0v.4s 1286ba6c74dSArd Biesheuvel add dgbv.4s, dgbv.4s, dg1v.4s 1296ba6c74dSArd Biesheuvel 1306ba6c74dSArd Biesheuvel /* handled all input blocks? */ 131b2eadbf4SArd Biesheuvel cbz w2, 2f 13213150149SArd Biesheuvel cond_yield 3f, x5, x6 133d82f37abSArd Biesheuvel b 0b 1346ba6c74dSArd Biesheuvel 1356ba6c74dSArd Biesheuvel /* 1366ba6c74dSArd Biesheuvel * Final block: add padding and total bit count. 13703802f6aSArd Biesheuvel * Skip if the input size was not a round multiple of the block size, 13803802f6aSArd Biesheuvel * the padding is handled by the C code in that case. 1396ba6c74dSArd Biesheuvel */ 140b2eadbf4SArd Biesheuvel2: cbz x4, 3f 141f4857f4cSArd Biesheuvel ldr_l w4, sha256_ce_offsetof_count, x4 142b2eadbf4SArd Biesheuvel ldr x4, [x0, x4] 1436ba6c74dSArd Biesheuvel movi v17.2d, #0 1446ba6c74dSArd Biesheuvel mov x8, #0x80000000 1456ba6c74dSArd Biesheuvel movi v18.2d, #0 1466ba6c74dSArd Biesheuvel ror x7, x4, #29 // ror(lsl(x4, 3), 32) 1476ba6c74dSArd Biesheuvel fmov d16, x8 1486ba6c74dSArd Biesheuvel mov x4, #0 1496ba6c74dSArd Biesheuvel mov v19.d[0], xzr 1506ba6c74dSArd Biesheuvel mov v19.d[1], x7 151b2eadbf4SArd Biesheuvel b 1b 1526ba6c74dSArd Biesheuvel 1536ba6c74dSArd Biesheuvel /* store new state */ 154b2eadbf4SArd Biesheuvel3: st1 {dgav.4s, dgbv.4s}, [x0] 155b2eadbf4SArd Biesheuvel mov w0, w2 1566ba6c74dSArd Biesheuvel ret 157*ba30d311SEric BiggersSYM_FUNC_END(__sha256_ce_transform) 158