1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #define BR_ENABLE_INTRINSICS 1 26 #include "inner.h" 27 28 #if BR_AES_X86NI 29 30 /* see bearssl_block.h */ 31 const br_block_cbcenc_class * 32 br_aes_x86ni_cbcenc_get_vtable(void) 33 { 34 return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL; 35 } 36 37 /* see bearssl_block.h */ 38 void 39 br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx, 40 const void *key, size_t len) 41 { 42 ctx->vtable = &br_aes_x86ni_cbcenc_vtable; 43 ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len); 44 } 45 46 BR_TARGETS_X86_UP 47 48 /* see bearssl_block.h */ 49 BR_TARGET("sse2,aes") 50 void 51 br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx, 52 void *iv, void *data, size_t len) 53 { 54 unsigned char *buf; 55 unsigned num_rounds; 56 __m128i sk[15], ivx; 57 unsigned u; 58 59 buf = data; 60 ivx = _mm_loadu_si128(iv); 61 num_rounds = ctx->num_rounds; 62 for (u = 0; u <= num_rounds; u ++) { 63 sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4))); 64 } 65 while (len > 0) { 66 __m128i x; 67 68 x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx); 69 x = _mm_xor_si128(x, sk[0]); 70 x = _mm_aesenc_si128(x, sk[1]); 71 x = _mm_aesenc_si128(x, sk[2]); 72 x = _mm_aesenc_si128(x, sk[3]); 73 x = _mm_aesenc_si128(x, sk[4]); 74 x = _mm_aesenc_si128(x, sk[5]); 75 x = _mm_aesenc_si128(x, sk[6]); 76 x = _mm_aesenc_si128(x, sk[7]); 77 x = _mm_aesenc_si128(x, sk[8]); 78 x = _mm_aesenc_si128(x, sk[9]); 79 if (num_rounds == 10) { 80 x = _mm_aesenclast_si128(x, sk[10]); 81 } else if (num_rounds == 12) { 82 x = _mm_aesenc_si128(x, sk[10]); 83 x = _mm_aesenc_si128(x, sk[11]); 84 x = _mm_aesenclast_si128(x, sk[12]); 85 } else { 86 x = _mm_aesenc_si128(x, sk[10]); 87 x = _mm_aesenc_si128(x, sk[11]); 88 x = _mm_aesenc_si128(x, sk[12]); 89 x = _mm_aesenc_si128(x, sk[13]); 90 x = _mm_aesenclast_si128(x, sk[14]); 91 } 92 ivx = x; 93 _mm_storeu_si128((void *)buf, x); 94 buf += 16; 95 len -= 16; 96 } 97 _mm_storeu_si128(iv, ivx); 98 } 99 100 BR_TARGETS_X86_DOWN 101 102 /* see bearssl_block.h */ 103 const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = { 104 sizeof(br_aes_x86ni_cbcenc_keys), 105 16, 106 4, 107 (void (*)(const br_block_cbcenc_class **, const void *, size_t)) 108 &br_aes_x86ni_cbcenc_init, 109 (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) 110 &br_aes_x86ni_cbcenc_run 111 }; 112 113 #else 114 115 /* see bearssl_block.h */ 116 const br_block_cbcenc_class * 117 br_aes_x86ni_cbcenc_get_vtable(void) 118 { 119 return NULL; 120 } 121 122 #endif 123