1 /* 2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* see inner.h */ 28 void 29 br_aes_ct64_bitslice_invSbox(uint64_t *q) 30 { 31 /* 32 * See br_aes_ct_bitslice_invSbox(). This is the natural extension 33 * to 64-bit registers. 34 */ 35 uint64_t q0, q1, q2, q3, q4, q5, q6, q7; 36 37 q0 = ~q[0]; 38 q1 = ~q[1]; 39 q2 = q[2]; 40 q3 = q[3]; 41 q4 = q[4]; 42 q5 = ~q[5]; 43 q6 = ~q[6]; 44 q7 = q[7]; 45 q[7] = q1 ^ q4 ^ q6; 46 q[6] = q0 ^ q3 ^ q5; 47 q[5] = q7 ^ q2 ^ q4; 48 q[4] = q6 ^ q1 ^ q3; 49 q[3] = q5 ^ q0 ^ q2; 50 q[2] = q4 ^ q7 ^ q1; 51 q[1] = q3 ^ q6 ^ q0; 52 q[0] = q2 ^ q5 ^ q7; 53 54 br_aes_ct64_bitslice_Sbox(q); 55 56 q0 = ~q[0]; 57 q1 = ~q[1]; 58 q2 = q[2]; 59 q3 = q[3]; 60 q4 = q[4]; 61 q5 = ~q[5]; 62 q6 = ~q[6]; 63 q7 = q[7]; 64 q[7] = q1 ^ q4 ^ q6; 65 q[6] = q0 ^ q3 ^ q5; 66 q[5] = q7 ^ q2 ^ q4; 67 q[4] = q6 ^ q1 ^ q3; 68 q[3] = q5 ^ q0 ^ q2; 69 q[2] = q4 ^ q7 ^ q1; 70 q[1] = q3 ^ q6 ^ q0; 71 q[0] = q2 ^ q5 ^ q7; 72 } 73 74 static void 75 add_round_key(uint64_t *q, const uint64_t *sk) 76 { 77 int i; 78 79 for (i = 0; i < 8; i ++) { 80 q[i] ^= sk[i]; 81 } 82 } 83 84 static void 85 inv_shift_rows(uint64_t *q) 86 { 87 int i; 88 89 for (i = 0; i < 8; i ++) { 90 uint64_t x; 91 92 x = q[i]; 93 q[i] = (x & (uint64_t)0x000000000000FFFF) 94 | ((x & (uint64_t)0x000000000FFF0000) << 4) 95 | ((x & (uint64_t)0x00000000F0000000) >> 12) 96 | ((x & (uint64_t)0x000000FF00000000) << 8) 97 | ((x & (uint64_t)0x0000FF0000000000) >> 8) 98 | ((x & (uint64_t)0x000F000000000000) << 12) 99 | ((x & (uint64_t)0xFFF0000000000000) >> 4); 100 } 101 } 102 103 static inline uint64_t 104 rotr32(uint64_t x) 105 { 106 return (x << 32) | (x >> 32); 107 } 108 109 static void 110 inv_mix_columns(uint64_t *q) 111 { 112 uint64_t q0, q1, q2, q3, q4, q5, q6, q7; 113 uint64_t r0, r1, r2, r3, r4, r5, r6, r7; 114 115 q0 = q[0]; 116 q1 = q[1]; 117 q2 = q[2]; 118 q3 = q[3]; 119 q4 = q[4]; 120 q5 = q[5]; 121 q6 = q[6]; 122 q7 = q[7]; 123 r0 = (q0 >> 16) | (q0 << 48); 124 r1 = (q1 >> 16) | (q1 << 48); 125 r2 = (q2 >> 16) | (q2 << 48); 126 r3 = (q3 >> 16) | (q3 << 48); 127 r4 = (q4 >> 16) | (q4 << 48); 128 r5 = (q5 >> 16) | (q5 << 48); 129 r6 = (q6 >> 16) | (q6 << 48); 130 r7 = (q7 >> 16) | (q7 << 48); 131 132 q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5); 133 q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6); 134 q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7); 135 q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7); 136 q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6); 137 q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7); 138 q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7); 139 q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7); 140 } 141 142 /* see inner.h */ 143 void 144 br_aes_ct64_bitslice_decrypt(unsigned num_rounds, 145 const uint64_t *skey, uint64_t *q) 146 { 147 unsigned u; 148 149 add_round_key(q, skey + (num_rounds << 3)); 150 for (u = num_rounds - 1; u > 0; u --) { 151 inv_shift_rows(q); 152 br_aes_ct64_bitslice_invSbox(q); 153 add_round_key(q, skey + (u << 3)); 154 inv_mix_columns(q); 155 } 156 inv_shift_rows(q); 157 br_aes_ct64_bitslice_invSbox(q); 158 add_round_key(q, skey); 159 } 160