1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 22c98833aSArd Biesheuvel/* 32c98833aSArd Biesheuvel * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions 42c98833aSArd Biesheuvel * 52c98833aSArd Biesheuvel * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 62c98833aSArd Biesheuvel */ 72c98833aSArd Biesheuvel 82c98833aSArd Biesheuvel#include <linux/linkage.h> 92c98833aSArd Biesheuvel#include <asm/assembler.h> 102c98833aSArd Biesheuvel 112c98833aSArd Biesheuvel .text 122c98833aSArd Biesheuvel .arch armv8-a+crypto 132c98833aSArd Biesheuvel 142c98833aSArd Biesheuvel k0 .req v0 152c98833aSArd Biesheuvel k1 .req v1 162c98833aSArd Biesheuvel k2 .req v2 172c98833aSArd Biesheuvel k3 .req v3 182c98833aSArd Biesheuvel 192c98833aSArd Biesheuvel t0 .req v4 202c98833aSArd Biesheuvel t1 .req v5 212c98833aSArd Biesheuvel 222c98833aSArd Biesheuvel dga .req q6 232c98833aSArd Biesheuvel dgav .req v6 242c98833aSArd Biesheuvel dgb .req s7 252c98833aSArd Biesheuvel dgbv .req v7 262c98833aSArd Biesheuvel 272c98833aSArd Biesheuvel dg0q .req q12 282c98833aSArd Biesheuvel dg0s .req s12 292c98833aSArd Biesheuvel dg0v .req v12 302c98833aSArd Biesheuvel dg1s .req s13 312c98833aSArd Biesheuvel dg1v .req v13 322c98833aSArd Biesheuvel dg2s .req s14 332c98833aSArd Biesheuvel 342c98833aSArd Biesheuvel .macro add_only, op, ev, rc, s0, dg1 352c98833aSArd Biesheuvel .ifc \ev, ev 362c98833aSArd Biesheuvel add t1.4s, v\s0\().4s, \rc\().4s 372c98833aSArd Biesheuvel sha1h dg2s, dg0s 382c98833aSArd Biesheuvel .ifnb \dg1 392c98833aSArd Biesheuvel sha1\op dg0q, \dg1, t0.4s 402c98833aSArd Biesheuvel .else 412c98833aSArd Biesheuvel sha1\op dg0q, dg1s, t0.4s 422c98833aSArd Biesheuvel .endif 432c98833aSArd Biesheuvel .else 442c98833aSArd Biesheuvel .ifnb \s0 452c98833aSArd Biesheuvel add t0.4s, v\s0\().4s, \rc\().4s 462c98833aSArd Biesheuvel .endif 472c98833aSArd Biesheuvel sha1h dg1s, dg0s 482c98833aSArd Biesheuvel sha1\op dg0q, dg2s, t1.4s 492c98833aSArd Biesheuvel .endif 502c98833aSArd Biesheuvel .endm 512c98833aSArd Biesheuvel 522c98833aSArd Biesheuvel .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 532c98833aSArd Biesheuvel sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s 542c98833aSArd Biesheuvel add_only \op, \ev, \rc, \s1, \dg1 552c98833aSArd Biesheuvel sha1su1 v\s0\().4s, v\s3\().4s 562c98833aSArd Biesheuvel .endm 572c98833aSArd Biesheuvel 5820b04c02SArd Biesheuvel .macro loadrc, k, val, tmp 5920b04c02SArd Biesheuvel movz \tmp, :abs_g0_nc:\val 6020b04c02SArd Biesheuvel movk \tmp, :abs_g1:\val 6120b04c02SArd Biesheuvel dup \k, \tmp 6220b04c02SArd Biesheuvel .endm 632c98833aSArd Biesheuvel 642c98833aSArd Biesheuvel /* 65*1f9f3a52SEric Biggers * int __sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, 6607eb54d3SArd Biesheuvel * int blocks) 672c98833aSArd Biesheuvel */ 68*1f9f3a52SEric BiggersSYM_FUNC_START(__sha1_ce_transform) 692c98833aSArd Biesheuvel /* load round constants */ 705a69e1b7SArd Biesheuvel loadrc k0.4s, 0x5a827999, w6 7120b04c02SArd Biesheuvel loadrc k1.4s, 0x6ed9eba1, w6 7220b04c02SArd Biesheuvel loadrc k2.4s, 0x8f1bbcdc, w6 7320b04c02SArd Biesheuvel loadrc k3.4s, 0xca62c1d6, w6 742c98833aSArd Biesheuvel 752c98833aSArd Biesheuvel /* load state */ 765a69e1b7SArd Biesheuvel ld1 {dgav.4s}, [x0] 775a69e1b7SArd Biesheuvel ldr dgb, [x0, #16] 782c98833aSArd Biesheuvel 7907eb54d3SArd Biesheuvel /* load sha1_ce_state::finalize */ 80f4857f4cSArd Biesheuvel ldr_l w4, sha1_ce_offsetof_finalize, x4 815a69e1b7SArd Biesheuvel ldr w4, [x0, x4] 822c98833aSArd Biesheuvel 832c98833aSArd Biesheuvel /* load input */ 845a69e1b7SArd Biesheuvel0: ld1 {v8.4s-v11.4s}, [x1], #64 855a69e1b7SArd Biesheuvel sub w2, w2, #1 862c98833aSArd Biesheuvel 872c98833aSArd BiesheuvelCPU_LE( rev32 v8.16b, v8.16b ) 882c98833aSArd BiesheuvelCPU_LE( rev32 v9.16b, v9.16b ) 892c98833aSArd BiesheuvelCPU_LE( rev32 v10.16b, v10.16b ) 902c98833aSArd BiesheuvelCPU_LE( rev32 v11.16b, v11.16b ) 912c98833aSArd Biesheuvel 925a69e1b7SArd Biesheuvel1: add t0.4s, v8.4s, k0.4s 932c98833aSArd Biesheuvel mov dg0v.16b, dgav.16b 942c98833aSArd Biesheuvel 952c98833aSArd Biesheuvel add_update c, ev, k0, 8, 9, 10, 11, dgb 962c98833aSArd Biesheuvel add_update c, od, k0, 9, 10, 11, 8 972c98833aSArd Biesheuvel add_update c, ev, k0, 10, 11, 8, 9 982c98833aSArd Biesheuvel add_update c, od, k0, 11, 8, 9, 10 992c98833aSArd Biesheuvel add_update c, ev, k1, 8, 9, 10, 11 1002c98833aSArd Biesheuvel 1012c98833aSArd Biesheuvel add_update p, od, k1, 9, 10, 11, 8 1022c98833aSArd Biesheuvel add_update p, ev, k1, 10, 11, 8, 9 1032c98833aSArd Biesheuvel add_update p, od, k1, 11, 8, 9, 10 1042c98833aSArd Biesheuvel add_update p, ev, k1, 8, 9, 10, 11 1052c98833aSArd Biesheuvel add_update p, od, k2, 9, 10, 11, 8 1062c98833aSArd Biesheuvel 1072c98833aSArd Biesheuvel add_update m, ev, k2, 10, 11, 8, 9 1082c98833aSArd Biesheuvel add_update m, od, k2, 11, 8, 9, 10 1092c98833aSArd Biesheuvel add_update m, ev, k2, 8, 9, 10, 11 1102c98833aSArd Biesheuvel add_update m, od, k2, 9, 10, 11, 8 1112c98833aSArd Biesheuvel add_update m, ev, k3, 10, 11, 8, 9 1122c98833aSArd Biesheuvel 1132c98833aSArd Biesheuvel add_update p, od, k3, 11, 8, 9, 10 1142c98833aSArd Biesheuvel add_only p, ev, k3, 9 1152c98833aSArd Biesheuvel add_only p, od, k3, 10 1162c98833aSArd Biesheuvel add_only p, ev, k3, 11 1172c98833aSArd Biesheuvel add_only p, od 1182c98833aSArd Biesheuvel 1192c98833aSArd Biesheuvel /* update state */ 1202c98833aSArd Biesheuvel add dgbv.2s, dgbv.2s, dg1v.2s 1212c98833aSArd Biesheuvel add dgav.4s, dgav.4s, dg0v.4s 1222c98833aSArd Biesheuvel 1235a69e1b7SArd Biesheuvel cbz w2, 2f 12413150149SArd Biesheuvel cond_yield 3f, x5, x6 1257df8d164SArd Biesheuvel b 0b 1262c98833aSArd Biesheuvel 1272c98833aSArd Biesheuvel /* 1282c98833aSArd Biesheuvel * Final block: add padding and total bit count. 12907eb54d3SArd Biesheuvel * Skip if the input size was not a round multiple of the block size, 13007eb54d3SArd Biesheuvel * the padding is handled by the C code in that case. 1312c98833aSArd Biesheuvel */ 1325a69e1b7SArd Biesheuvel2: cbz x4, 3f 133f4857f4cSArd Biesheuvel ldr_l w4, sha1_ce_offsetof_count, x4 1345a69e1b7SArd Biesheuvel ldr x4, [x0, x4] 1352c98833aSArd Biesheuvel movi v9.2d, #0 1362c98833aSArd Biesheuvel mov x8, #0x80000000 1372c98833aSArd Biesheuvel movi v10.2d, #0 1382c98833aSArd Biesheuvel ror x7, x4, #29 // ror(lsl(x4, 3), 32) 1392c98833aSArd Biesheuvel fmov d8, x8 1402c98833aSArd Biesheuvel mov x4, #0 1412c98833aSArd Biesheuvel mov v11.d[0], xzr 1422c98833aSArd Biesheuvel mov v11.d[1], x7 1435a69e1b7SArd Biesheuvel b 1b 1442c98833aSArd Biesheuvel 1452c98833aSArd Biesheuvel /* store new state */ 1465a69e1b7SArd Biesheuvel3: st1 {dgav.4s}, [x0] 1475a69e1b7SArd Biesheuvel str dgb, [x0, #16] 1485a69e1b7SArd Biesheuvel mov w0, w2 1492c98833aSArd Biesheuvel ret 150*1f9f3a52SEric BiggersSYM_FUNC_END(__sha1_ce_transform) 151