1/* 2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 3 * 4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12 13 .text 14 .arch armv8-a+crypto 15 16 /* 17 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 18 * u32 *macp, u8 const rk[], u32 rounds); 19 */ 20ENTRY(ce_aes_ccm_auth_data) 21 ldr w8, [x3] /* leftover from prev round? */ 22 ld1 {v0.2d}, [x0] /* load mac */ 23 cbz w8, 1f 24 sub w8, w8, #16 25 eor v1.16b, v1.16b, v1.16b 260: ldrb w7, [x1], #1 /* get 1 byte of input */ 27 subs w2, w2, #1 28 add w8, w8, #1 29 ins v1.b[0], w7 30 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 31 beq 8f /* out of input? */ 32 cbnz w8, 0b 33 eor v0.16b, v0.16b, v1.16b 341: ld1 {v3.2d}, [x4] /* load first round key */ 35 prfm pldl1strm, [x1] 36 cmp w5, #12 /* which key size? */ 37 add x6, x4, #16 38 sub w7, w5, #2 /* modified # of rounds */ 39 bmi 2f 40 bne 5f 41 mov v5.16b, v3.16b 42 b 4f 432: mov v4.16b, v3.16b 44 ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ 453: aese v0.16b, v4.16b 46 aesmc v0.16b, v0.16b 474: ld1 {v3.2d}, [x6], #16 /* load next round key */ 48 aese v0.16b, v5.16b 49 aesmc v0.16b, v0.16b 505: ld1 {v4.2d}, [x6], #16 /* load next round key */ 51 subs w7, w7, #3 52 aese v0.16b, v3.16b 53 aesmc v0.16b, v0.16b 54 ld1 {v5.2d}, [x6], #16 /* load next round key */ 55 bpl 3b 56 aese v0.16b, v4.16b 57 subs w2, w2, #16 /* last data? */ 58 eor v0.16b, v0.16b, v5.16b /* final round */ 59 bmi 6f 60 ld1 {v1.16b}, [x1], #16 /* load next input block */ 61 eor v0.16b, v0.16b, v1.16b /* xor with mac */ 62 bne 1b 636: st1 {v0.2d}, [x0] /* store mac */ 64 beq 10f 65 adds w2, w2, #16 66 beq 10f 67 mov w8, w2 687: ldrb w7, [x1], #1 69 umov w6, v0.b[0] 70 eor w6, w6, w7 71 strb w6, [x0], #1 72 subs w2, w2, #1 73 beq 10f 74 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 75 b 7b 768: mov w7, w8 77 add w8, w8, #16 789: ext v1.16b, v1.16b, v1.16b, #1 79 adds w7, w7, #1 80 bne 9b 81 eor v0.16b, v0.16b, v1.16b 82 st1 {v0.2d}, [x0] 8310: str w8, [x3] 84 ret 85ENDPROC(ce_aes_ccm_auth_data) 86 87 /* 88 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 89 * u32 rounds); 90 */ 91ENTRY(ce_aes_ccm_final) 92 ld1 {v3.2d}, [x2], #16 /* load first round key */ 93 ld1 {v0.2d}, [x0] /* load mac */ 94 cmp w3, #12 /* which key size? */ 95 sub w3, w3, #2 /* modified # of rounds */ 96 ld1 {v1.2d}, [x1] /* load 1st ctriv */ 97 bmi 0f 98 bne 3f 99 mov v5.16b, v3.16b 100 b 2f 1010: mov v4.16b, v3.16b 1021: ld1 {v5.2d}, [x2], #16 /* load next round key */ 103 aese v0.16b, v4.16b 104 aese v1.16b, v4.16b 105 aesmc v0.16b, v0.16b 106 aesmc v1.16b, v1.16b 1072: ld1 {v3.2d}, [x2], #16 /* load next round key */ 108 aese v0.16b, v5.16b 109 aese v1.16b, v5.16b 110 aesmc v0.16b, v0.16b 111 aesmc v1.16b, v1.16b 1123: ld1 {v4.2d}, [x2], #16 /* load next round key */ 113 subs w3, w3, #3 114 aese v0.16b, v3.16b 115 aese v1.16b, v3.16b 116 aesmc v0.16b, v0.16b 117 aesmc v1.16b, v1.16b 118 bpl 1b 119 aese v0.16b, v4.16b 120 aese v1.16b, v4.16b 121 /* final round key cancels out */ 122 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 123 st1 {v0.2d}, [x0] /* store result */ 124 ret 125ENDPROC(ce_aes_ccm_final) 126 127 .macro aes_ccm_do_crypt,enc 128 ldr x8, [x6, #8] /* load lower ctr */ 129 ld1 {v0.2d}, [x5] /* load mac */ 130 rev x8, x8 /* keep swabbed ctr in reg */ 1310: /* outer loop */ 132 ld1 {v1.1d}, [x6] /* load upper ctr */ 133 prfm pldl1strm, [x1] 134 add x8, x8, #1 135 rev x9, x8 136 cmp w4, #12 /* which key size? */ 137 sub w7, w4, #2 /* get modified # of rounds */ 138 ins v1.d[1], x9 /* no carry in lower ctr */ 139 ld1 {v3.2d}, [x3] /* load first round key */ 140 add x10, x3, #16 141 bmi 1f 142 bne 4f 143 mov v5.16b, v3.16b 144 b 3f 1451: mov v4.16b, v3.16b 146 ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ 1472: /* inner loop: 3 rounds, 2x interleaved */ 148 aese v0.16b, v4.16b 149 aese v1.16b, v4.16b 150 aesmc v0.16b, v0.16b 151 aesmc v1.16b, v1.16b 1523: ld1 {v3.2d}, [x10], #16 /* load next round key */ 153 aese v0.16b, v5.16b 154 aese v1.16b, v5.16b 155 aesmc v0.16b, v0.16b 156 aesmc v1.16b, v1.16b 1574: ld1 {v4.2d}, [x10], #16 /* load next round key */ 158 subs w7, w7, #3 159 aese v0.16b, v3.16b 160 aese v1.16b, v3.16b 161 aesmc v0.16b, v0.16b 162 aesmc v1.16b, v1.16b 163 ld1 {v5.2d}, [x10], #16 /* load next round key */ 164 bpl 2b 165 aese v0.16b, v4.16b 166 aese v1.16b, v4.16b 167 subs w2, w2, #16 168 bmi 6f /* partial block? */ 169 ld1 {v2.16b}, [x1], #16 /* load next input block */ 170 .if \enc == 1 171 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 172 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 173 .else 174 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 175 eor v1.16b, v2.16b, v5.16b /* final round enc */ 176 .endif 177 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 178 st1 {v1.16b}, [x0], #16 /* write output block */ 179 bne 0b 180 rev x8, x8 181 st1 {v0.2d}, [x5] /* store mac */ 182 str x8, [x6, #8] /* store lsb end of ctr (BE) */ 1835: ret 184 1856: eor v0.16b, v0.16b, v5.16b /* final round mac */ 186 eor v1.16b, v1.16b, v5.16b /* final round enc */ 187 st1 {v0.2d}, [x5] /* store mac */ 188 add w2, w2, #16 /* process partial tail block */ 1897: ldrb w9, [x1], #1 /* get 1 byte of input */ 190 umov w6, v1.b[0] /* get top crypted ctr byte */ 191 umov w7, v0.b[0] /* get top mac byte */ 192 .if \enc == 1 193 eor w7, w7, w9 194 eor w9, w9, w6 195 .else 196 eor w9, w9, w6 197 eor w7, w7, w9 198 .endif 199 strb w9, [x0], #1 /* store out byte */ 200 strb w7, [x5], #1 /* store mac byte */ 201 subs w2, w2, #1 202 beq 5b 203 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 204 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 205 b 7b 206 .endm 207 208 /* 209 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 210 * u8 const rk[], u32 rounds, u8 mac[], 211 * u8 ctr[]); 212 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 213 * u8 const rk[], u32 rounds, u8 mac[], 214 * u8 ctr[]); 215 */ 216ENTRY(ce_aes_ccm_encrypt) 217 aes_ccm_do_crypt 1 218ENDPROC(ce_aes_ccm_encrypt) 219 220ENTRY(ce_aes_ccm_decrypt) 221 aes_ccm_do_crypt 0 222ENDPROC(ce_aes_ccm_decrypt) 223