1 /* 2 --------------------------------------------------------------------------- 3 Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. 4 5 The redistribution and use of this software (with or without changes) 6 is allowed without the payment of fees or royalties provided that: 7 8 source code distributions include the above copyright notice, this 9 list of conditions and the following disclaimer; 10 11 binary distributions include the above copyright notice, this list 12 of conditions and the following disclaimer in their documentation. 13 14 This software is provided 'as is' with no explicit or implied warranties 15 in respect of its operation, including, but not limited to, correctness 16 and fitness for purpose. 17 --------------------------------------------------------------------------- 18 Issue Date: 20/12/2007 19 20 This file contains the compilation options for AES (Rijndael) and code 21 that is common across encryption, key scheduling and table generation. 22 23 OPERATION 24 25 These source code files implement the AES algorithm Rijndael designed by 26 Joan Daemen and Vincent Rijmen. This version is designed for the standard 27 block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 28 and 32 bytes). 29 30 This version is designed for flexibility and speed using operations on 31 32-bit words rather than operations on bytes. It can be compiled with 32 either big or little endian internal byte order but is faster when the 33 native byte order for the processor is used. 34 35 THE CIPHER INTERFACE 36 37 The cipher interface is implemented as an array of bytes in which lower 38 AES bit sequence indexes map to higher numeric significance within bytes. 39 40 uint8_t (an unsigned 8-bit type) 41 uint32_t (an unsigned 32-bit type) 42 struct aes_encrypt_ctx (structure for the cipher encryption context) 43 struct aes_decrypt_ctx (structure for the cipher decryption context) 44 AES_RETURN the function return type 45 46 C subroutine calls: 47 48 AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); 49 AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); 50 AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); 51 AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, 52 const aes_encrypt_ctx cx[1]); 53 54 AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); 55 AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); 56 AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); 57 AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, 58 const aes_decrypt_ctx cx[1]); 59 60 IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that 61 you call aes_init() before AES is used so that the tables are initialised. 62 63 C++ aes class subroutines: 64 65 Class AESencrypt for encryption 66 67 Constructors: 68 AESencrypt(void) 69 AESencrypt(const unsigned char *key) - 128 bit key 70 Members: 71 AES_RETURN key128(const unsigned char *key) 72 AES_RETURN key192(const unsigned char *key) 73 AES_RETURN key256(const unsigned char *key) 74 AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const 75 76 Class AESdecrypt for encryption 77 Constructors: 78 AESdecrypt(void) 79 AESdecrypt(const unsigned char *key) - 128 bit key 80 Members: 81 AES_RETURN key128(const unsigned char *key) 82 AES_RETURN key192(const unsigned char *key) 83 AES_RETURN key256(const unsigned char *key) 84 AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const 85 */ 86 87 #if !defined( _AESOPT_H ) 88 #define _AESOPT_H 89 90 #if defined( __cplusplus ) 91 #include "aescpp.h" 92 #else 93 #include "aes.h" 94 #endif 95 96 /* PLATFORM SPECIFIC INCLUDES */ 97 98 #include "brg_endian.h" 99 100 /* CONFIGURATION - THE USE OF DEFINES 101 102 Later in this section there are a number of defines that control the 103 operation of the code. In each section, the purpose of each define is 104 explained so that the relevant form can be included or excluded by 105 setting either 1's or 0's respectively on the branches of the related 106 #if clauses. The following local defines should not be changed. 107 */ 108 109 #define ENCRYPTION_IN_C 1 110 #define DECRYPTION_IN_C 2 111 #define ENC_KEYING_IN_C 4 112 #define DEC_KEYING_IN_C 8 113 114 #define NO_TABLES 0 115 #define ONE_TABLE 1 116 #define FOUR_TABLES 4 117 #define NONE 0 118 #define PARTIAL 1 119 #define FULL 2 120 121 /* --- START OF USER CONFIGURED OPTIONS --- */ 122 123 /* 1. BYTE ORDER WITHIN 32 BIT WORDS 124 125 The fundamental data processing units in Rijndael are 8-bit bytes. The 126 input, output and key input are all enumerated arrays of bytes in which 127 bytes are numbered starting at zero and increasing to one less than the 128 number of bytes in the array in question. This enumeration is only used 129 for naming bytes and does not imply any adjacency or order relationship 130 from one byte to another. When these inputs and outputs are considered 131 as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to 132 byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. 133 In this implementation bits are numbered from 0 to 7 starting at the 134 numerically least significant end of each byte (bit n represents 2^n). 135 136 However, Rijndael can be implemented more efficiently using 32-bit 137 words by packing bytes into words so that bytes 4*n to 4*n+3 are placed 138 into word[n]. While in principle these bytes can be assembled into words 139 in any positions, this implementation only supports the two formats in 140 which bytes in adjacent positions within words also have adjacent byte 141 numbers. This order is called big-endian if the lowest numbered bytes 142 in words have the highest numeric significance and little-endian if the 143 opposite applies. 144 145 This code can work in either order irrespective of the order used by the 146 machine on which it runs. Normally the internal byte order will be set 147 to the order of the processor on which the code is to be run but this 148 define can be used to reverse this in special situations 149 150 WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. 151 This define will hence be redefined later (in section 4) if necessary 152 */ 153 154 #if 1 155 # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER 156 #elif 0 157 # define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN 158 #elif 0 159 # define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN 160 #else 161 # error The algorithm byte order is not defined 162 #endif 163 164 /* 2. Intel AES AND VIA ACE SUPPORT */ 165 166 #if defined( __GNUC__ ) && defined( __i386__ ) && !defined(__BEOS__) \ 167 || defined( _WIN32 ) && defined( _M_IX86 ) && !(defined( _WIN64 ) \ 168 || defined( _WIN32_WCE ) || defined( _MSC_VER ) && ( _MSC_VER <= 800 )) 169 # define VIA_ACE_POSSIBLE 170 #endif 171 172 /* AESNI is supported by all Windows x64 compilers, but for Linux/GCC 173 we have to test for SSE 2, SSE 3, and AES to before enabling it; */ 174 #if !defined( INTEL_AES_POSSIBLE ) 175 # if defined( _WIN64 ) && defined( _MSC_VER ) \ 176 || defined( __GNUC__ ) && defined( __x86_64__ ) && \ 177 defined( __SSE2__ ) && defined( __SSE3__ ) && \ 178 defined( __AES__ ) 179 # define INTEL_AES_POSSIBLE 180 # endif 181 #endif 182 183 /* Define this option if support for the Intel AESNI is required 184 If USE_INTEL_AES_IF_PRESENT is defined then AESNI will be used 185 if it is detected (both present and enabled). 186 187 AESNI uses a decryption key schedule with the first decryption 188 round key at the high end of the key schedule with the following 189 round keys at lower positions in memory. So AES_REV_DKS must NOT 190 be defined when AESNI will be used. Although it is unlikely that 191 assembler code will be used with an AESNI build, if it is then 192 AES_REV_DKS must NOT be defined when the assembler files are 193 built (the definition of USE_INTEL_AES_IF_PRESENT in the assembler 194 code files must match that here if they are used). 195 */ 196 197 #if defined( INTEL_AES_POSSIBLE ) 198 # if 0 && !defined( USE_INTEL_AES_IF_PRESENT ) 199 # define USE_INTEL_AES_IF_PRESENT 200 # endif 201 #elif defined( USE_INTEL_AES_IF_PRESENT ) 202 # error: AES_NI is not available on this platform 203 #endif 204 205 /* Define this option if support for the VIA ACE is required. This uses 206 inline assembler instructions and is only implemented for the Microsoft, 207 Intel and GCC compilers. If VIA ACE is known to be present, then defining 208 ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption 209 code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if 210 it is detected (both present and enabled) but the normal AES code will 211 also be present. 212 213 When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte 214 aligned; other input/output buffers do not need to be 16 byte aligned 215 but there are very large performance gains if this can be arranged. 216 VIA ACE also requires the decryption key schedule to be in reverse 217 order (which later checks below ensure). 218 219 AES_REV_DKS must be set for assembler code used with a VIA ACE build 220 */ 221 222 #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT ) 223 # define USE_VIA_ACE_IF_PRESENT 224 #endif 225 226 #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT ) 227 # define ASSUME_VIA_ACE_PRESENT 228 # endif 229 230 /* 3. ASSEMBLER SUPPORT 231 232 This define (which can be on the command line) enables the use of the 233 assembler code routines for encryption, decryption and key scheduling 234 as follows: 235 236 ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for 237 encryption and decryption and but with key scheduling in C 238 ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for 239 encryption, decryption and key scheduling 240 ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for 241 encryption and decryption and but with key scheduling in C 242 ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for 243 encryption and decryption and but with key scheduling in C 244 245 Change one 'if 0' below to 'if 1' to select the version or define 246 as a compilation option. 247 */ 248 249 #if 0 && !defined( ASM_X86_V1C ) 250 # define ASM_X86_V1C 251 #elif 0 && !defined( ASM_X86_V2 ) 252 # define ASM_X86_V2 253 #elif 0 && !defined( ASM_X86_V2C ) 254 # define ASM_X86_V2C 255 #elif 0 && !defined( ASM_AMD64_C ) 256 # define ASM_AMD64_C 257 #endif 258 259 #if defined( __i386 ) || defined( _M_IX86 ) 260 # define A32_ 261 #elif defined( __x86_64__ ) || defined( _M_X64 ) 262 # define A64_ 263 #endif 264 265 #if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \ 266 && !defined( A32_ ) || defined( ASM_AMD64_C ) && !defined( A64_ ) 267 # error Assembler code is only available for x86 and AMD64 systems 268 #endif 269 270 /* 4. FAST INPUT/OUTPUT OPERATIONS. 271 272 On some machines it is possible to improve speed by transferring the 273 bytes in the input and output arrays to and from the internal 32-bit 274 variables by addressing these arrays as if they are arrays of 32-bit 275 words. On some machines this will always be possible but there may 276 be a large performance penalty if the byte arrays are not aligned on 277 the normal word boundaries. On other machines this technique will 278 lead to memory access errors when such 32-bit word accesses are not 279 properly aligned. The option SAFE_IO avoids such problems but will 280 often be slower on those machines that support misaligned access 281 (especially so if care is taken to align the input and output byte 282 arrays on 32-bit word boundaries). If SAFE_IO is not defined it is 283 assumed that access to byte arrays as if they are arrays of 32-bit 284 words will not cause problems when such accesses are misaligned. 285 */ 286 #if 1 && !defined( _MSC_VER ) 287 # define SAFE_IO 288 #endif 289 290 /* 5. LOOP UNROLLING 291 292 The code for encryption and decryption cycles through a number of rounds 293 that can be implemented either in a loop or by expanding the code into a 294 long sequence of instructions, the latter producing a larger program but 295 one that will often be much faster. The latter is called loop unrolling. 296 There are also potential speed advantages in expanding two iterations in 297 a loop with half the number of iterations, which is called partial loop 298 unrolling. The following options allow partial or full loop unrolling 299 to be set independently for encryption and decryption 300 */ 301 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) 302 # define ENC_UNROLL FULL 303 #elif 0 304 # define ENC_UNROLL PARTIAL 305 #else 306 # define ENC_UNROLL NONE 307 #endif 308 309 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) 310 # define DEC_UNROLL FULL 311 #elif 0 312 # define DEC_UNROLL PARTIAL 313 #else 314 # define DEC_UNROLL NONE 315 #endif 316 317 #if 1 318 # define ENC_KS_UNROLL 319 #endif 320 321 #if 1 322 # define DEC_KS_UNROLL 323 #endif 324 325 /* 6. FAST FINITE FIELD OPERATIONS 326 327 If this section is included, tables are used to provide faster finite 328 field arithmetic (this has no effect if STATIC_TABLES is defined). 329 */ 330 #if 1 331 # define FF_TABLES 332 #endif 333 334 /* 7. INTERNAL STATE VARIABLE FORMAT 335 336 The internal state of Rijndael is stored in a number of local 32-bit 337 word variables which can be defined either as an array or as individual 338 names variables. Include this section if you want to store these local 339 variables in arrays. Otherwise individual local variables will be used. 340 */ 341 #if 1 342 # define ARRAYS 343 #endif 344 345 /* 8. FIXED OR DYNAMIC TABLES 346 347 When this section is included the tables used by the code are compiled 348 statically into the binary file. Otherwise the subroutine aes_init() 349 must be called to compute them before the code is first used. 350 */ 351 #if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) 352 # define STATIC_TABLES 353 #endif 354 355 /* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES 356 357 In some systems it is better to mask longer values to extract bytes 358 rather than using a cast. This option allows this choice. 359 */ 360 #if 0 361 # define to_byte(x) ((uint8_t)(x)) 362 #else 363 # define to_byte(x) ((x) & 0xff) 364 #endif 365 366 /* 10. TABLE ALIGNMENT 367 368 On some systems speed will be improved by aligning the AES large lookup 369 tables on particular boundaries. This define should be set to a power of 370 two giving the desired alignment. It can be left undefined if alignment 371 is not needed. This option is specific to the Microsoft VC++ compiler - 372 it seems to sometimes cause trouble for the VC++ version 6 compiler. 373 */ 374 375 #if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) 376 # define TABLE_ALIGN 32 377 #endif 378 379 /* 11. REDUCE CODE AND TABLE SIZE 380 381 This replaces some expanded macros with function calls if AES_ASM_V2 or 382 AES_ASM_V2C are defined 383 */ 384 385 #if 1 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) 386 # define REDUCE_CODE_SIZE 387 #endif 388 389 /* 12. TABLE OPTIONS 390 391 This cipher proceeds by repeating in a number of cycles known as 'rounds' 392 which are implemented by a round function which can optionally be speeded 393 up using tables. The basic tables are each 256 32-bit words, with either 394 one or four tables being required for each round function depending on 395 how much speed is required. The encryption and decryption round functions 396 are different and the last encryption and decryption round functions are 397 different again making four different round functions in all. 398 399 This means that: 400 1. Normal encryption and decryption rounds can each use either 0, 1 401 or 4 tables and table spaces of 0, 1024 or 4096 bytes each. 402 2. The last encryption and decryption rounds can also use either 0, 1 403 or 4 tables and table spaces of 0, 1024 or 4096 bytes each. 404 405 Include or exclude the appropriate definitions below to set the number 406 of tables used by this implementation. 407 */ 408 409 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) /* set tables for the normal encryption round */ 410 # define ENC_ROUND FOUR_TABLES 411 #elif 0 412 # define ENC_ROUND ONE_TABLE 413 #else 414 # define ENC_ROUND NO_TABLES 415 #endif 416 417 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) /* set tables for the last encryption round */ 418 # define LAST_ENC_ROUND FOUR_TABLES 419 #elif 0 420 # define LAST_ENC_ROUND ONE_TABLE 421 #else 422 # define LAST_ENC_ROUND NO_TABLES 423 #endif 424 425 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) /* set tables for the normal decryption round */ 426 # define DEC_ROUND FOUR_TABLES 427 #elif 0 428 # define DEC_ROUND ONE_TABLE 429 #else 430 # define DEC_ROUND NO_TABLES 431 #endif 432 433 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) /* set tables for the last decryption round */ 434 # define LAST_DEC_ROUND FOUR_TABLES 435 #elif 0 436 # define LAST_DEC_ROUND ONE_TABLE 437 #else 438 # define LAST_DEC_ROUND NO_TABLES 439 #endif 440 441 /* The decryption key schedule can be speeded up with tables in the same 442 way that the round functions can. Include or exclude the following 443 defines to set this requirement. 444 */ 445 #if !defined(CONFIG_SMALL) || defined(CONFIG_SMALL_NO_CRYPTO) 446 # define KEY_SCHED FOUR_TABLES 447 #elif 0 448 # define KEY_SCHED ONE_TABLE 449 #else 450 # define KEY_SCHED NO_TABLES 451 #endif 452 453 /* ---- END OF USER CONFIGURED OPTIONS ---- */ 454 455 /* VIA ACE support is only available for VC++ and GCC */ 456 457 #if !defined( _MSC_VER ) && !defined( __GNUC__ ) 458 # if defined( ASSUME_VIA_ACE_PRESENT ) 459 # undef ASSUME_VIA_ACE_PRESENT 460 # endif 461 # if defined( USE_VIA_ACE_IF_PRESENT ) 462 # undef USE_VIA_ACE_IF_PRESENT 463 # endif 464 #endif 465 466 #if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) 467 # define USE_VIA_ACE_IF_PRESENT 468 #endif 469 470 /* define to reverse decryption key schedule */ 471 #if 1 || defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) 472 # define AES_REV_DKS 473 #endif 474 475 /* Intel AESNI uses a decryption key schedule in the encryption order */ 476 #if defined( USE_INTEL_AES_IF_PRESENT ) && defined ( AES_REV_DKS ) 477 # undef AES_REV_DKS 478 #endif 479 480 /* Assembler support requires the use of platform byte order */ 481 482 #if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \ 483 && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) 484 # undef ALGORITHM_BYTE_ORDER 485 # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER 486 #endif 487 488 /* In this implementation the columns of the state array are each held in 489 32-bit words. The state array can be held in various ways: in an array 490 of words, in a number of individual word variables or in a number of 491 processor registers. The following define maps a variable name x and 492 a column number c to the way the state array variable is to be held. 493 The first define below maps the state into an array x[c] whereas the 494 second form maps the state into a number of individual variables x0, 495 x1, etc. Another form could map individual state columns to machine 496 register names. 497 */ 498 499 #if defined( ARRAYS ) 500 # define s(x,c) x[c] 501 #else 502 # define s(x,c) x##c 503 #endif 504 505 /* This implementation provides subroutines for encryption, decryption 506 and for setting the three key lengths (separately) for encryption 507 and decryption. Since not all functions are needed, masks are set 508 up here to determine which will be implemented in C 509 */ 510 511 #if !defined( AES_ENCRYPT ) 512 # define EFUNCS_IN_C 0 513 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ 514 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) 515 # define EFUNCS_IN_C ENC_KEYING_IN_C 516 #elif !defined( ASM_X86_V2 ) 517 # define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) 518 #else 519 # define EFUNCS_IN_C 0 520 #endif 521 522 #if !defined( AES_DECRYPT ) 523 # define DFUNCS_IN_C 0 524 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ 525 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) 526 # define DFUNCS_IN_C DEC_KEYING_IN_C 527 #elif !defined( ASM_X86_V2 ) 528 # define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) 529 #else 530 # define DFUNCS_IN_C 0 531 #endif 532 533 #define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) 534 535 /* END OF CONFIGURATION OPTIONS */ 536 537 #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) 538 539 /* Disable or report errors on some combinations of options */ 540 541 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES 542 # undef LAST_ENC_ROUND 543 # define LAST_ENC_ROUND NO_TABLES 544 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES 545 # undef LAST_ENC_ROUND 546 # define LAST_ENC_ROUND ONE_TABLE 547 #endif 548 549 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE 550 # undef ENC_UNROLL 551 # define ENC_UNROLL NONE 552 #endif 553 554 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES 555 # undef LAST_DEC_ROUND 556 # define LAST_DEC_ROUND NO_TABLES 557 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES 558 # undef LAST_DEC_ROUND 559 # define LAST_DEC_ROUND ONE_TABLE 560 #endif 561 562 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE 563 # undef DEC_UNROLL 564 # define DEC_UNROLL NONE 565 #endif 566 567 #if defined( bswap32 ) 568 # define aes_sw32 bswap32 569 #elif defined( bswap_32 ) 570 # define aes_sw32 bswap_32 571 #else 572 # define brot(x,n) (((uint32_t)(x) << n) | ((uint32_t)(x) >> (32 - n))) 573 # define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) 574 #endif 575 576 /* upr(x,n): rotates bytes within words by n positions, moving bytes to 577 higher index positions with wrap around into low positions 578 ups(x,n): moves bytes by n positions to higher index positions in 579 words but without wrap around 580 bval(x,n): extracts a byte from a word 581 582 WARNING: The definitions given here are intended only for use with 583 unsigned variables and with shift counts that are compile 584 time constants 585 */ 586 587 #if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) 588 # define upr(x,n) (((uint32_t)(x) << (8 * (n))) | ((uint32_t)(x) >> (32 - 8 * (n)))) 589 # define ups(x,n) ((uint32_t) (x) << (8 * (n))) 590 # define bval(x,n) to_byte((x) >> (8 * (n))) 591 # define bytes2word(b0, b1, b2, b3) \ 592 (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0)) 593 #endif 594 595 #if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) 596 # define upr(x,n) (((uint32_t)(x) >> (8 * (n))) | ((uint32_t)(x) << (32 - 8 * (n)))) 597 # define ups(x,n) ((uint32_t) (x) >> (8 * (n))) 598 # define bval(x,n) to_byte((x) >> (24 - 8 * (n))) 599 # define bytes2word(b0, b1, b2, b3) \ 600 (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | ((uint32_t)(b2) << 8) | (b3)) 601 #endif 602 603 #if defined( SAFE_IO ) 604 # define word_in(x,c) bytes2word(((const uint8_t*)(x)+4*c)[0], ((const uint8_t*)(x)+4*c)[1], \ 605 ((const uint8_t*)(x)+4*c)[2], ((const uint8_t*)(x)+4*c)[3]) 606 # define word_out(x,c,v) { ((uint8_t*)(x)+4*c)[0] = bval(v,0); ((uint8_t*)(x)+4*c)[1] = bval(v,1); \ 607 ((uint8_t*)(x)+4*c)[2] = bval(v,2); ((uint8_t*)(x)+4*c)[3] = bval(v,3); } 608 #elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) 609 # define word_in(x,c) (*((uint32_t*)(x)+(c))) 610 # define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v)) 611 #else 612 # define word_in(x,c) aes_sw32(*((uint32_t*)(x)+(c))) 613 # define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = aes_sw32(v)) 614 #endif 615 616 /* the finite field modular polynomial and elements */ 617 618 #define WPOLY 0x011b 619 #define BPOLY 0x1b 620 621 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ 622 623 #define gf_c1 0x80808080 624 #define gf_c2 0x7f7f7f7f 625 #define gf_mulx(x) ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY)) 626 627 /* The following defines provide alternative definitions of gf_mulx that might 628 give improved performance if a fast 32-bit multiply is not available. Note 629 that a temporary variable u needs to be defined where gf_mulx is used. 630 631 #define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6)) 632 #define gf_c4 (0x01010101 * BPOLY) 633 #define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4) 634 */ 635 636 /* Work out which tables are needed for the different options */ 637 638 #if defined( ASM_X86_V1C ) 639 # if defined( ENC_ROUND ) 640 # undef ENC_ROUND 641 # endif 642 # define ENC_ROUND FOUR_TABLES 643 # if defined( LAST_ENC_ROUND ) 644 # undef LAST_ENC_ROUND 645 # endif 646 # define LAST_ENC_ROUND FOUR_TABLES 647 # if defined( DEC_ROUND ) 648 # undef DEC_ROUND 649 # endif 650 # define DEC_ROUND FOUR_TABLES 651 # if defined( LAST_DEC_ROUND ) 652 # undef LAST_DEC_ROUND 653 # endif 654 # define LAST_DEC_ROUND FOUR_TABLES 655 # if defined( KEY_SCHED ) 656 # undef KEY_SCHED 657 # define KEY_SCHED FOUR_TABLES 658 # endif 659 #endif 660 661 #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) 662 # if ENC_ROUND == ONE_TABLE 663 # define FT1_SET 664 # elif ENC_ROUND == FOUR_TABLES 665 # define FT4_SET 666 # else 667 # define SBX_SET 668 # endif 669 # if LAST_ENC_ROUND == ONE_TABLE 670 # define FL1_SET 671 # elif LAST_ENC_ROUND == FOUR_TABLES 672 # define FL4_SET 673 # elif !defined( SBX_SET ) 674 # define SBX_SET 675 # endif 676 #endif 677 678 #if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) 679 # if DEC_ROUND == ONE_TABLE 680 # define IT1_SET 681 # elif DEC_ROUND == FOUR_TABLES 682 # define IT4_SET 683 # else 684 # define ISB_SET 685 # endif 686 # if LAST_DEC_ROUND == ONE_TABLE 687 # define IL1_SET 688 # elif LAST_DEC_ROUND == FOUR_TABLES 689 # define IL4_SET 690 # elif !defined(ISB_SET) 691 # define ISB_SET 692 # endif 693 #endif 694 695 #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 696 # if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C)) 697 # if KEY_SCHED == ONE_TABLE 698 # if !defined( FL1_SET ) && !defined( FL4_SET ) 699 # define LS1_SET 700 # endif 701 # elif KEY_SCHED == FOUR_TABLES 702 # if !defined( FL4_SET ) 703 # define LS4_SET 704 # endif 705 # elif !defined( SBX_SET ) 706 # define SBX_SET 707 # endif 708 # endif 709 # if (FUNCS_IN_C & DEC_KEYING_IN_C) 710 # if KEY_SCHED == ONE_TABLE 711 # define IM1_SET 712 # elif KEY_SCHED == FOUR_TABLES 713 # define IM4_SET 714 # elif !defined( SBX_SET ) 715 # define SBX_SET 716 # endif 717 # endif 718 #endif 719 720 /* generic definitions of Rijndael macros that use tables */ 721 722 #define no_table(x,box,vf,rf,c) bytes2word( \ 723 box[bval(vf(x,0,c),rf(0,c))], \ 724 box[bval(vf(x,1,c),rf(1,c))], \ 725 box[bval(vf(x,2,c),rf(2,c))], \ 726 box[bval(vf(x,3,c),rf(3,c))]) 727 728 #define one_table(x,op,tab,vf,rf,c) \ 729 ( tab[bval(vf(x,0,c),rf(0,c))] \ 730 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ 731 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ 732 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) 733 734 #define four_tables(x,tab,vf,rf,c) \ 735 ( tab[0][bval(vf(x,0,c),rf(0,c))] \ 736 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ 737 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ 738 ^ tab[3][bval(vf(x,3,c),rf(3,c))]) 739 740 #define vf1(x,r,c) (x) 741 #define rf1(r,c) (r) 742 #define rf2(r,c) ((8+r-c)&3) 743 744 /* perform forward and inverse column mix operation on four bytes in long word x in */ 745 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ 746 747 #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 748 749 #if defined( FM4_SET ) /* not currently used */ 750 # define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) 751 #elif defined( FM1_SET ) /* not currently used */ 752 # define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) 753 #else 754 # define dec_fmvars uint32_t g2 755 # define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) 756 #endif 757 758 #if defined( IM4_SET ) 759 # define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) 760 #elif defined( IM1_SET ) 761 # define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) 762 #else 763 # define dec_imvars uint32_t g2, g4, g9 764 # define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ 765 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) 766 #endif 767 768 #if defined( FL4_SET ) 769 # define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) 770 #elif defined( LS4_SET ) 771 # define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) 772 #elif defined( FL1_SET ) 773 # define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) 774 #elif defined( LS1_SET ) 775 # define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) 776 #else 777 # define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) 778 #endif 779 780 #endif 781 782 #if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) 783 # define ISB_SET 784 #endif 785 786 #endif 787