1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions 4 * as specified in rfc8998 5 * https://datatracker.ietf.org/doc/html/rfc8998 6 * 7 * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 8 */ 9 10#include <linux/linkage.h> 11#include <linux/cfi_types.h> 12#include <asm/assembler.h> 13#include "sm4-ce-asm.h" 14 15.arch armv8-a+crypto 16 17.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31 18 .set .Lv\b\().4s, \b 19.endr 20 21.macro sm4e, vd, vn 22 .inst 0xcec08400 | (.L\vn << 5) | .L\vd 23.endm 24 25/* Register macros */ 26 27#define RMAC v16 28 29/* Helper macros. */ 30 31#define inc_le128(vctr) \ 32 mov vctr.d[1], x8; \ 33 mov vctr.d[0], x7; \ 34 adds x8, x8, #1; \ 35 rev64 vctr.16b, vctr.16b; \ 36 adc x7, x7, xzr; 37 38 39.align 3 40SYM_FUNC_START(sm4_ce_cbcmac_update) 41 /* input: 42 * x0: round key array, CTX 43 * x1: mac 44 * x2: src 45 * w3: nblocks 46 */ 47 SM4_PREPARE(x0) 48 49 ld1 {RMAC.16b}, [x1] 50 51.Lcbcmac_loop_4x: 52 cmp w3, #4 53 blt .Lcbcmac_loop_1x 54 55 sub w3, w3, #4 56 57 ld1 {v0.16b-v3.16b}, [x2], #64 58 59 SM4_CRYPT_BLK(RMAC) 60 eor RMAC.16b, RMAC.16b, v0.16b 61 SM4_CRYPT_BLK(RMAC) 62 eor RMAC.16b, RMAC.16b, v1.16b 63 SM4_CRYPT_BLK(RMAC) 64 eor RMAC.16b, RMAC.16b, v2.16b 65 SM4_CRYPT_BLK(RMAC) 66 eor RMAC.16b, RMAC.16b, v3.16b 67 68 cbz w3, .Lcbcmac_end 69 b .Lcbcmac_loop_4x 70 71.Lcbcmac_loop_1x: 72 sub w3, w3, #1 73 74 ld1 {v0.16b}, [x2], #16 75 76 SM4_CRYPT_BLK(RMAC) 77 eor RMAC.16b, RMAC.16b, v0.16b 78 79 cbnz w3, .Lcbcmac_loop_1x 80 81.Lcbcmac_end: 82 st1 {RMAC.16b}, [x1] 83 ret 84SYM_FUNC_END(sm4_ce_cbcmac_update) 85 86.align 3 87SYM_FUNC_START(sm4_ce_ccm_final) 88 /* input: 89 * x0: round key array, CTX 90 * x1: ctr0 (big endian, 128 bit) 91 * x2: mac 92 */ 93 SM4_PREPARE(x0) 94 95 ld1 {RMAC.16b}, [x2] 96 ld1 {v0.16b}, [x1] 97 98 SM4_CRYPT_BLK2(RMAC, v0) 99 100 /* en-/decrypt the mac with ctr0 */ 101 eor RMAC.16b, RMAC.16b, v0.16b 102 st1 {RMAC.16b}, [x2] 103 104 ret 105SYM_FUNC_END(sm4_ce_ccm_final) 106 107.align 3 108SYM_TYPED_FUNC_START(sm4_ce_ccm_enc) 109 /* input: 110 * x0: round key array, CTX 111 * x1: dst 112 * x2: src 113 * x3: ctr (big endian, 128 bit) 114 * w4: nbytes 115 * x5: mac 116 */ 117 SM4_PREPARE(x0) 118 119 ldp x7, x8, [x3] 120 rev x7, x7 121 rev x8, x8 122 123 ld1 {RMAC.16b}, [x5] 124 125.Lccm_enc_loop_4x: 126 cmp w4, #(4 * 16) 127 blt .Lccm_enc_loop_1x 128 129 sub w4, w4, #(4 * 16) 130 131 /* construct CTRs */ 132 inc_le128(v8) /* +0 */ 133 inc_le128(v9) /* +1 */ 134 inc_le128(v10) /* +2 */ 135 inc_le128(v11) /* +3 */ 136 137 ld1 {v0.16b-v3.16b}, [x2], #64 138 139 SM4_CRYPT_BLK2(v8, RMAC) 140 eor v8.16b, v8.16b, v0.16b 141 eor RMAC.16b, RMAC.16b, v0.16b 142 SM4_CRYPT_BLK2(v9, RMAC) 143 eor v9.16b, v9.16b, v1.16b 144 eor RMAC.16b, RMAC.16b, v1.16b 145 SM4_CRYPT_BLK2(v10, RMAC) 146 eor v10.16b, v10.16b, v2.16b 147 eor RMAC.16b, RMAC.16b, v2.16b 148 SM4_CRYPT_BLK2(v11, RMAC) 149 eor v11.16b, v11.16b, v3.16b 150 eor RMAC.16b, RMAC.16b, v3.16b 151 152 st1 {v8.16b-v11.16b}, [x1], #64 153 154 cbz w4, .Lccm_enc_end 155 b .Lccm_enc_loop_4x 156 157.Lccm_enc_loop_1x: 158 cmp w4, #16 159 blt .Lccm_enc_tail 160 161 sub w4, w4, #16 162 163 /* construct CTRs */ 164 inc_le128(v8) 165 166 ld1 {v0.16b}, [x2], #16 167 168 SM4_CRYPT_BLK2(v8, RMAC) 169 eor v8.16b, v8.16b, v0.16b 170 eor RMAC.16b, RMAC.16b, v0.16b 171 172 st1 {v8.16b}, [x1], #16 173 174 cbz w4, .Lccm_enc_end 175 b .Lccm_enc_loop_1x 176 177.Lccm_enc_tail: 178 /* construct CTRs */ 179 inc_le128(v8) 180 181 SM4_CRYPT_BLK2(RMAC, v8) 182 183 /* store new MAC */ 184 st1 {RMAC.16b}, [x5] 185 186.Lccm_enc_tail_loop: 187 ldrb w0, [x2], #1 /* get 1 byte from input */ 188 umov w9, v8.b[0] /* get top crypted CTR byte */ 189 umov w6, RMAC.b[0] /* get top MAC byte */ 190 191 eor w9, w9, w0 /* w9 = CTR ^ input */ 192 eor w6, w6, w0 /* w6 = MAC ^ input */ 193 194 strb w9, [x1], #1 /* store out byte */ 195 strb w6, [x5], #1 /* store MAC byte */ 196 197 subs w4, w4, #1 198 beq .Lccm_enc_ret 199 200 /* shift out one byte */ 201 ext RMAC.16b, RMAC.16b, RMAC.16b, #1 202 ext v8.16b, v8.16b, v8.16b, #1 203 204 b .Lccm_enc_tail_loop 205 206.Lccm_enc_end: 207 /* store new MAC */ 208 st1 {RMAC.16b}, [x5] 209 210 /* store new CTR */ 211 rev x7, x7 212 rev x8, x8 213 stp x7, x8, [x3] 214 215.Lccm_enc_ret: 216 ret 217SYM_FUNC_END(sm4_ce_ccm_enc) 218 219.align 3 220SYM_TYPED_FUNC_START(sm4_ce_ccm_dec) 221 /* input: 222 * x0: round key array, CTX 223 * x1: dst 224 * x2: src 225 * x3: ctr (big endian, 128 bit) 226 * w4: nbytes 227 * x5: mac 228 */ 229 SM4_PREPARE(x0) 230 231 ldp x7, x8, [x3] 232 rev x7, x7 233 rev x8, x8 234 235 ld1 {RMAC.16b}, [x5] 236 237.Lccm_dec_loop_4x: 238 cmp w4, #(4 * 16) 239 blt .Lccm_dec_loop_1x 240 241 sub w4, w4, #(4 * 16) 242 243 /* construct CTRs */ 244 inc_le128(v8) /* +0 */ 245 inc_le128(v9) /* +1 */ 246 inc_le128(v10) /* +2 */ 247 inc_le128(v11) /* +3 */ 248 249 ld1 {v0.16b-v3.16b}, [x2], #64 250 251 SM4_CRYPT_BLK2(v8, RMAC) 252 eor v8.16b, v8.16b, v0.16b 253 eor RMAC.16b, RMAC.16b, v8.16b 254 SM4_CRYPT_BLK2(v9, RMAC) 255 eor v9.16b, v9.16b, v1.16b 256 eor RMAC.16b, RMAC.16b, v9.16b 257 SM4_CRYPT_BLK2(v10, RMAC) 258 eor v10.16b, v10.16b, v2.16b 259 eor RMAC.16b, RMAC.16b, v10.16b 260 SM4_CRYPT_BLK2(v11, RMAC) 261 eor v11.16b, v11.16b, v3.16b 262 eor RMAC.16b, RMAC.16b, v11.16b 263 264 st1 {v8.16b-v11.16b}, [x1], #64 265 266 cbz w4, .Lccm_dec_end 267 b .Lccm_dec_loop_4x 268 269.Lccm_dec_loop_1x: 270 cmp w4, #16 271 blt .Lccm_dec_tail 272 273 sub w4, w4, #16 274 275 /* construct CTRs */ 276 inc_le128(v8) 277 278 ld1 {v0.16b}, [x2], #16 279 280 SM4_CRYPT_BLK2(v8, RMAC) 281 eor v8.16b, v8.16b, v0.16b 282 eor RMAC.16b, RMAC.16b, v8.16b 283 284 st1 {v8.16b}, [x1], #16 285 286 cbz w4, .Lccm_dec_end 287 b .Lccm_dec_loop_1x 288 289.Lccm_dec_tail: 290 /* construct CTRs */ 291 inc_le128(v8) 292 293 SM4_CRYPT_BLK2(RMAC, v8) 294 295 /* store new MAC */ 296 st1 {RMAC.16b}, [x5] 297 298.Lccm_dec_tail_loop: 299 ldrb w0, [x2], #1 /* get 1 byte from input */ 300 umov w9, v8.b[0] /* get top crypted CTR byte */ 301 umov w6, RMAC.b[0] /* get top MAC byte */ 302 303 eor w9, w9, w0 /* w9 = CTR ^ input */ 304 eor w6, w6, w9 /* w6 = MAC ^ output */ 305 306 strb w9, [x1], #1 /* store out byte */ 307 strb w6, [x5], #1 /* store MAC byte */ 308 309 subs w4, w4, #1 310 beq .Lccm_dec_ret 311 312 /* shift out one byte */ 313 ext RMAC.16b, RMAC.16b, RMAC.16b, #1 314 ext v8.16b, v8.16b, v8.16b, #1 315 316 b .Lccm_dec_tail_loop 317 318.Lccm_dec_end: 319 /* store new MAC */ 320 st1 {RMAC.16b}, [x5] 321 322 /* store new CTR */ 323 rev x7, x7 324 rev x8, x8 325 stp x7, x8, [x3] 326 327.Lccm_dec_ret: 328 ret 329SYM_FUNC_END(sm4_ce_ccm_dec) 330