1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #define BR_POWER_ASM_MACROS 1 26 #include "inner.h" 27 28 #if BR_POWER8 29 30 /* see bearssl_block.h */ 31 void 32 br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx, 33 const void *key, size_t len) 34 { 35 ctx->vtable = &br_aes_pwr8_cbcenc_vtable; 36 ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len); 37 } 38 39 static void 40 cbcenc_128(const unsigned char *sk, 41 const unsigned char *iv, unsigned char *buf, size_t len) 42 { 43 long cc; 44 45 #if BR_POWER8_LE 46 static const uint32_t idx2be[] = { 47 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C 48 }; 49 #endif 50 51 cc = 0; 52 asm volatile ( 53 54 /* 55 * Load subkeys into v0..v10 56 */ 57 lxvw4x(32, %[cc], %[sk]) 58 addi(%[cc], %[cc], 16) 59 lxvw4x(33, %[cc], %[sk]) 60 addi(%[cc], %[cc], 16) 61 lxvw4x(34, %[cc], %[sk]) 62 addi(%[cc], %[cc], 16) 63 lxvw4x(35, %[cc], %[sk]) 64 addi(%[cc], %[cc], 16) 65 lxvw4x(36, %[cc], %[sk]) 66 addi(%[cc], %[cc], 16) 67 lxvw4x(37, %[cc], %[sk]) 68 addi(%[cc], %[cc], 16) 69 lxvw4x(38, %[cc], %[sk]) 70 addi(%[cc], %[cc], 16) 71 lxvw4x(39, %[cc], %[sk]) 72 addi(%[cc], %[cc], 16) 73 lxvw4x(40, %[cc], %[sk]) 74 addi(%[cc], %[cc], 16) 75 lxvw4x(41, %[cc], %[sk]) 76 addi(%[cc], %[cc], 16) 77 lxvw4x(42, %[cc], %[sk]) 78 79 #if BR_POWER8_LE 80 /* 81 * v15 = constant for byteswapping words 82 */ 83 lxvw4x(47, 0, %[idx2be]) 84 #endif 85 /* 86 * Load IV into v16. 87 */ 88 lxvw4x(48, 0, %[iv]) 89 #if BR_POWER8_LE 90 vperm(16, 16, 16, 15) 91 #endif 92 93 mtctr(%[num_blocks]) 94 label(loop) 95 /* 96 * Load next plaintext word and XOR with current IV. 97 */ 98 lxvw4x(49, 0, %[buf]) 99 #if BR_POWER8_LE 100 vperm(17, 17, 17, 15) 101 #endif 102 vxor(16, 16, 17) 103 104 /* 105 * Encrypt the block. 106 */ 107 vxor(16, 16, 0) 108 vcipher(16, 16, 1) 109 vcipher(16, 16, 2) 110 vcipher(16, 16, 3) 111 vcipher(16, 16, 4) 112 vcipher(16, 16, 5) 113 vcipher(16, 16, 6) 114 vcipher(16, 16, 7) 115 vcipher(16, 16, 8) 116 vcipher(16, 16, 9) 117 vcipherlast(16, 16, 10) 118 119 /* 120 * Store back result (with byteswap) 121 */ 122 #if BR_POWER8_LE 123 vperm(17, 16, 16, 15) 124 stxvw4x(49, 0, %[buf]) 125 #else 126 stxvw4x(48, 0, %[buf]) 127 #endif 128 addi(%[buf], %[buf], 16) 129 130 bdnz(loop) 131 132 : [cc] "+b" (cc), [buf] "+b" (buf) 133 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4) 134 #if BR_POWER8_LE 135 , [idx2be] "b" (idx2be) 136 #endif 137 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 138 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", 139 "ctr", "memory" 140 ); 141 } 142 143 static void 144 cbcenc_192(const unsigned char *sk, 145 const unsigned char *iv, unsigned char *buf, size_t len) 146 { 147 long cc; 148 149 #if BR_POWER8_LE 150 static const uint32_t idx2be[] = { 151 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C 152 }; 153 #endif 154 155 cc = 0; 156 asm volatile ( 157 158 /* 159 * Load subkeys into v0..v12 160 */ 161 lxvw4x(32, %[cc], %[sk]) 162 addi(%[cc], %[cc], 16) 163 lxvw4x(33, %[cc], %[sk]) 164 addi(%[cc], %[cc], 16) 165 lxvw4x(34, %[cc], %[sk]) 166 addi(%[cc], %[cc], 16) 167 lxvw4x(35, %[cc], %[sk]) 168 addi(%[cc], %[cc], 16) 169 lxvw4x(36, %[cc], %[sk]) 170 addi(%[cc], %[cc], 16) 171 lxvw4x(37, %[cc], %[sk]) 172 addi(%[cc], %[cc], 16) 173 lxvw4x(38, %[cc], %[sk]) 174 addi(%[cc], %[cc], 16) 175 lxvw4x(39, %[cc], %[sk]) 176 addi(%[cc], %[cc], 16) 177 lxvw4x(40, %[cc], %[sk]) 178 addi(%[cc], %[cc], 16) 179 lxvw4x(41, %[cc], %[sk]) 180 addi(%[cc], %[cc], 16) 181 lxvw4x(42, %[cc], %[sk]) 182 addi(%[cc], %[cc], 16) 183 lxvw4x(43, %[cc], %[sk]) 184 addi(%[cc], %[cc], 16) 185 lxvw4x(44, %[cc], %[sk]) 186 187 #if BR_POWER8_LE 188 /* 189 * v15 = constant for byteswapping words 190 */ 191 lxvw4x(47, 0, %[idx2be]) 192 #endif 193 /* 194 * Load IV into v16. 195 */ 196 lxvw4x(48, 0, %[iv]) 197 #if BR_POWER8_LE 198 vperm(16, 16, 16, 15) 199 #endif 200 201 mtctr(%[num_blocks]) 202 label(loop) 203 /* 204 * Load next plaintext word and XOR with current IV. 205 */ 206 lxvw4x(49, 0, %[buf]) 207 #if BR_POWER8_LE 208 vperm(17, 17, 17, 15) 209 #endif 210 vxor(16, 16, 17) 211 212 /* 213 * Encrypt the block. 214 */ 215 vxor(16, 16, 0) 216 vcipher(16, 16, 1) 217 vcipher(16, 16, 2) 218 vcipher(16, 16, 3) 219 vcipher(16, 16, 4) 220 vcipher(16, 16, 5) 221 vcipher(16, 16, 6) 222 vcipher(16, 16, 7) 223 vcipher(16, 16, 8) 224 vcipher(16, 16, 9) 225 vcipher(16, 16, 10) 226 vcipher(16, 16, 11) 227 vcipherlast(16, 16, 12) 228 229 /* 230 * Store back result (with byteswap) 231 */ 232 #if BR_POWER8_LE 233 vperm(17, 16, 16, 15) 234 stxvw4x(49, 0, %[buf]) 235 #else 236 stxvw4x(48, 0, %[buf]) 237 #endif 238 addi(%[buf], %[buf], 16) 239 240 bdnz(loop) 241 242 : [cc] "+b" (cc), [buf] "+b" (buf) 243 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4) 244 #if BR_POWER8_LE 245 , [idx2be] "b" (idx2be) 246 #endif 247 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 248 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", 249 "ctr", "memory" 250 ); 251 } 252 253 static void 254 cbcenc_256(const unsigned char *sk, 255 const unsigned char *iv, unsigned char *buf, size_t len) 256 { 257 long cc; 258 259 #if BR_POWER8_LE 260 static const uint32_t idx2be[] = { 261 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C 262 }; 263 #endif 264 265 cc = 0; 266 asm volatile ( 267 268 /* 269 * Load subkeys into v0..v14 270 */ 271 lxvw4x(32, %[cc], %[sk]) 272 addi(%[cc], %[cc], 16) 273 lxvw4x(33, %[cc], %[sk]) 274 addi(%[cc], %[cc], 16) 275 lxvw4x(34, %[cc], %[sk]) 276 addi(%[cc], %[cc], 16) 277 lxvw4x(35, %[cc], %[sk]) 278 addi(%[cc], %[cc], 16) 279 lxvw4x(36, %[cc], %[sk]) 280 addi(%[cc], %[cc], 16) 281 lxvw4x(37, %[cc], %[sk]) 282 addi(%[cc], %[cc], 16) 283 lxvw4x(38, %[cc], %[sk]) 284 addi(%[cc], %[cc], 16) 285 lxvw4x(39, %[cc], %[sk]) 286 addi(%[cc], %[cc], 16) 287 lxvw4x(40, %[cc], %[sk]) 288 addi(%[cc], %[cc], 16) 289 lxvw4x(41, %[cc], %[sk]) 290 addi(%[cc], %[cc], 16) 291 lxvw4x(42, %[cc], %[sk]) 292 addi(%[cc], %[cc], 16) 293 lxvw4x(43, %[cc], %[sk]) 294 addi(%[cc], %[cc], 16) 295 lxvw4x(44, %[cc], %[sk]) 296 addi(%[cc], %[cc], 16) 297 lxvw4x(45, %[cc], %[sk]) 298 addi(%[cc], %[cc], 16) 299 lxvw4x(46, %[cc], %[sk]) 300 301 #if BR_POWER8_LE 302 /* 303 * v15 = constant for byteswapping words 304 */ 305 lxvw4x(47, 0, %[idx2be]) 306 #endif 307 /* 308 * Load IV into v16. 309 */ 310 lxvw4x(48, 0, %[iv]) 311 #if BR_POWER8_LE 312 vperm(16, 16, 16, 15) 313 #endif 314 315 mtctr(%[num_blocks]) 316 label(loop) 317 /* 318 * Load next plaintext word and XOR with current IV. 319 */ 320 lxvw4x(49, 0, %[buf]) 321 #if BR_POWER8_LE 322 vperm(17, 17, 17, 15) 323 #endif 324 vxor(16, 16, 17) 325 326 /* 327 * Encrypt the block. 328 */ 329 vxor(16, 16, 0) 330 vcipher(16, 16, 1) 331 vcipher(16, 16, 2) 332 vcipher(16, 16, 3) 333 vcipher(16, 16, 4) 334 vcipher(16, 16, 5) 335 vcipher(16, 16, 6) 336 vcipher(16, 16, 7) 337 vcipher(16, 16, 8) 338 vcipher(16, 16, 9) 339 vcipher(16, 16, 10) 340 vcipher(16, 16, 11) 341 vcipher(16, 16, 12) 342 vcipher(16, 16, 13) 343 vcipherlast(16, 16, 14) 344 345 /* 346 * Store back result (with byteswap) 347 */ 348 #if BR_POWER8_LE 349 vperm(17, 16, 16, 15) 350 stxvw4x(49, 0, %[buf]) 351 #else 352 stxvw4x(48, 0, %[buf]) 353 #endif 354 addi(%[buf], %[buf], 16) 355 356 bdnz(loop) 357 358 : [cc] "+b" (cc), [buf] "+b" (buf) 359 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4) 360 #if BR_POWER8_LE 361 , [idx2be] "b" (idx2be) 362 #endif 363 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", 364 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", 365 "ctr", "memory" 366 ); 367 } 368 369 /* see bearssl_block.h */ 370 void 371 br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx, 372 void *iv, void *data, size_t len) 373 { 374 if (len > 0) { 375 switch (ctx->num_rounds) { 376 case 10: 377 cbcenc_128(ctx->skey.skni, iv, data, len); 378 break; 379 case 12: 380 cbcenc_192(ctx->skey.skni, iv, data, len); 381 break; 382 default: 383 cbcenc_256(ctx->skey.skni, iv, data, len); 384 break; 385 } 386 memcpy(iv, (unsigned char *)data + (len - 16), 16); 387 } 388 } 389 390 /* see bearssl_block.h */ 391 const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = { 392 sizeof(br_aes_pwr8_cbcenc_keys), 393 16, 394 4, 395 (void (*)(const br_block_cbcenc_class **, const void *, size_t)) 396 &br_aes_pwr8_cbcenc_init, 397 (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t)) 398 &br_aes_pwr8_cbcenc_run 399 }; 400 401 /* see bearssl_block.h */ 402 const br_block_cbcenc_class * 403 br_aes_pwr8_cbcenc_get_vtable(void) 404 { 405 return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL; 406 } 407 408 #else 409 410 /* see bearssl_block.h */ 411 const br_block_cbcenc_class * 412 br_aes_pwr8_cbcenc_get_vtable(void) 413 { 414 return NULL; 415 } 416 417 #endif 418