1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions 4 * 5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 6 */ 7 8#include <linux/linkage.h> 9#include <linux/cfi_types.h> 10#include <asm/assembler.h> 11 12 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 13 .set .Lv\b\().4s, \b 14 .endr 15 16 .macro sm3partw1, rd, rn, rm 17 .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 18 .endm 19 20 .macro sm3partw2, rd, rn, rm 21 .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 22 .endm 23 24 .macro sm3ss1, rd, rn, rm, ra 25 .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 26 .endm 27 28 .macro sm3tt1a, rd, rn, rm, imm2 29 .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 30 .endm 31 32 .macro sm3tt1b, rd, rn, rm, imm2 33 .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 34 .endm 35 36 .macro sm3tt2a, rd, rn, rm, imm2 37 .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 38 .endm 39 40 .macro sm3tt2b, rd, rn, rm, imm2 41 .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 42 .endm 43 44 .macro round, ab, s0, t0, t1, i 45 sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s 46 shl \t1\().4s, \t0\().4s, #1 47 sri \t1\().4s, \t0\().4s, #31 48 sm3tt1\ab v8.4s, v5.4s, v10.4s, \i 49 sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i 50 .endm 51 52 .macro qround, ab, s0, s1, s2, s3, s4 53 .ifnb \s4 54 ext \s4\().16b, \s1\().16b, \s2\().16b, #12 55 ext v6.16b, \s0\().16b, \s1\().16b, #12 56 ext v7.16b, \s2\().16b, \s3\().16b, #8 57 sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s 58 .endif 59 60 eor v10.16b, \s0\().16b, \s1\().16b 61 62 round \ab, \s0, v11, v12, 0 63 round \ab, \s0, v12, v11, 1 64 round \ab, \s0, v11, v12, 2 65 round \ab, \s0, v12, v11, 3 66 67 .ifnb \s4 68 sm3partw2 \s4\().4s, v7.4s, v6.4s 69 .endif 70 .endm 71 72 /* 73 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src, 74 * int blocks) 75 */ 76 .text 77SYM_TYPED_FUNC_START(sm3_ce_transform) 78 /* load state */ 79 ld1 {v8.4s-v9.4s}, [x0] 80 rev64 v8.4s, v8.4s 81 rev64 v9.4s, v9.4s 82 ext v8.16b, v8.16b, v8.16b, #8 83 ext v9.16b, v9.16b, v9.16b, #8 84 85 adr_l x8, .Lt 86 ldp s13, s14, [x8] 87 88 /* load input */ 890: ld1 {v0.16b-v3.16b}, [x1], #64 90 sub w2, w2, #1 91 92 mov v15.16b, v8.16b 93 mov v16.16b, v9.16b 94 95CPU_LE( rev32 v0.16b, v0.16b ) 96CPU_LE( rev32 v1.16b, v1.16b ) 97CPU_LE( rev32 v2.16b, v2.16b ) 98CPU_LE( rev32 v3.16b, v3.16b ) 99 100 ext v11.16b, v13.16b, v13.16b, #4 101 102 qround a, v0, v1, v2, v3, v4 103 qround a, v1, v2, v3, v4, v0 104 qround a, v2, v3, v4, v0, v1 105 qround a, v3, v4, v0, v1, v2 106 107 ext v11.16b, v14.16b, v14.16b, #4 108 109 qround b, v4, v0, v1, v2, v3 110 qround b, v0, v1, v2, v3, v4 111 qround b, v1, v2, v3, v4, v0 112 qround b, v2, v3, v4, v0, v1 113 qround b, v3, v4, v0, v1, v2 114 qround b, v4, v0, v1, v2, v3 115 qround b, v0, v1, v2, v3, v4 116 qround b, v1, v2, v3, v4, v0 117 qround b, v2, v3, v4, v0, v1 118 qround b, v3, v4 119 qround b, v4, v0 120 qround b, v0, v1 121 122 eor v8.16b, v8.16b, v15.16b 123 eor v9.16b, v9.16b, v16.16b 124 125 /* handled all input blocks? */ 126 cbnz w2, 0b 127 128 /* save state */ 129 rev64 v8.4s, v8.4s 130 rev64 v9.4s, v9.4s 131 ext v8.16b, v8.16b, v8.16b, #8 132 ext v9.16b, v9.16b, v9.16b, #8 133 st1 {v8.4s-v9.4s}, [x0] 134 ret 135SYM_FUNC_END(sm3_ce_transform) 136 137 .section ".rodata", "a" 138 .align 3 139.Lt: .word 0x79cc4519, 0x9d8a7a87 140