1/* 2 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions 3 * 4 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 15 .set .Lv\b\().4s, \b 16 .endr 17 18 .macro sm3partw1, rd, rn, rm 19 .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 20 .endm 21 22 .macro sm3partw2, rd, rn, rm 23 .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16) 24 .endm 25 26 .macro sm3ss1, rd, rn, rm, ra 27 .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16) 28 .endm 29 30 .macro sm3tt1a, rd, rn, rm, imm2 31 .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 32 .endm 33 34 .macro sm3tt1b, rd, rn, rm, imm2 35 .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 36 .endm 37 38 .macro sm3tt2a, rd, rn, rm, imm2 39 .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 40 .endm 41 42 .macro sm3tt2b, rd, rn, rm, imm2 43 .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16) 44 .endm 45 46 .macro round, ab, s0, t0, t1, i 47 sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s 48 shl \t1\().4s, \t0\().4s, #1 49 sri \t1\().4s, \t0\().4s, #31 50 sm3tt1\ab v8.4s, v5.4s, v10.4s, \i 51 sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i 52 .endm 53 54 .macro qround, ab, s0, s1, s2, s3, s4 55 .ifnb \s4 56 ext \s4\().16b, \s1\().16b, \s2\().16b, #12 57 ext v6.16b, \s0\().16b, \s1\().16b, #12 58 ext v7.16b, \s2\().16b, \s3\().16b, #8 59 sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s 60 .endif 61 62 eor v10.16b, \s0\().16b, \s1\().16b 63 64 round \ab, \s0, v11, v12, 0 65 round \ab, \s0, v12, v11, 1 66 round \ab, \s0, v11, v12, 2 67 round \ab, \s0, v12, v11, 3 68 69 .ifnb \s4 70 sm3partw2 \s4\().4s, v7.4s, v6.4s 71 .endif 72 .endm 73 74 /* 75 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src, 76 * int blocks) 77 */ 78 .text 79ENTRY(sm3_ce_transform) 80 /* load state */ 81 ld1 {v8.4s-v9.4s}, [x0] 82 rev64 v8.4s, v8.4s 83 rev64 v9.4s, v9.4s 84 ext v8.16b, v8.16b, v8.16b, #8 85 ext v9.16b, v9.16b, v9.16b, #8 86 87 adr_l x8, .Lt 88 ldp s13, s14, [x8] 89 90 /* load input */ 910: ld1 {v0.16b-v3.16b}, [x1], #64 92 sub w2, w2, #1 93 94 mov v15.16b, v8.16b 95 mov v16.16b, v9.16b 96 97CPU_LE( rev32 v0.16b, v0.16b ) 98CPU_LE( rev32 v1.16b, v1.16b ) 99CPU_LE( rev32 v2.16b, v2.16b ) 100CPU_LE( rev32 v3.16b, v3.16b ) 101 102 ext v11.16b, v13.16b, v13.16b, #4 103 104 qround a, v0, v1, v2, v3, v4 105 qround a, v1, v2, v3, v4, v0 106 qround a, v2, v3, v4, v0, v1 107 qround a, v3, v4, v0, v1, v2 108 109 ext v11.16b, v14.16b, v14.16b, #4 110 111 qround b, v4, v0, v1, v2, v3 112 qround b, v0, v1, v2, v3, v4 113 qround b, v1, v2, v3, v4, v0 114 qround b, v2, v3, v4, v0, v1 115 qround b, v3, v4, v0, v1, v2 116 qround b, v4, v0, v1, v2, v3 117 qround b, v0, v1, v2, v3, v4 118 qround b, v1, v2, v3, v4, v0 119 qround b, v2, v3, v4, v0, v1 120 qround b, v3, v4 121 qround b, v4, v0 122 qround b, v0, v1 123 124 eor v8.16b, v8.16b, v15.16b 125 eor v9.16b, v9.16b, v16.16b 126 127 /* handled all input blocks? */ 128 cbnz w2, 0b 129 130 /* save state */ 131 rev64 v8.4s, v8.4s 132 rev64 v9.4s, v9.4s 133 ext v8.16b, v8.16b, v8.16b, #8 134 ext v9.16b, v9.16b, v9.16b, #8 135 st1 {v8.4s-v9.4s}, [x0] 136 ret 137ENDPROC(sm3_ce_transform) 138 139 .section ".rodata", "a" 140 .align 3 141.Lt: .word 0x79cc4519, 0x9d8a7a87 142