xref: /freebsd/crypto/krb5/src/lib/crypto/builtin/aes/aescrypt.c (revision 7f2fe78b9dd5f51c821d771b63d2e096f6fd49e9)
1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
4 
5 The redistribution and use of this software (with or without changes)
6 is allowed without the payment of fees or royalties provided that:
7 
8   source code distributions include the above copyright notice, this
9   list of conditions and the following disclaimer;
10 
11   binary distributions include the above copyright notice, this list
12   of conditions and the following disclaimer in their documentation.
13 
14 This software is provided 'as is' with no explicit or implied warranties
15 in respect of its operation, including, but not limited to, correctness
16 and fitness for purpose.
17 ---------------------------------------------------------------------------
18 Issue Date: 20/12/2007
19 */
20 
21 #include "aesopt.h"
22 #include "aestab.h"
23 
24 #include "crypto_int.h"
25 #ifdef K5_BUILTIN_AES
26 
27 #if defined( USE_INTEL_AES_IF_PRESENT )
28 #  include "aes_ni.h"
29 #else
30 /* map names here to provide the external API ('name' -> 'aes_name') */
31 #  define aes_xi(x) aes_ ## x
32 #endif
33 
34 #if defined(__cplusplus)
35 extern "C"
36 {
37 #endif
38 
39 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
40 #define so(y,x,c)   word_out(y, c, s(x,c))
41 
42 #if defined(ARRAYS)
43 #define locals(y,x)     x[4],y[4]
44 #else
45 #define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
46 #endif
47 
48 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
49                         s(y,2) = s(x,2); s(y,3) = s(x,3);
50 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
51 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
52 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
53 
54 #if ( FUNCS_IN_C & ENCRYPTION_IN_C )
55 
56 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
57    Pentium optimisation with small code but this is poor for decryption
58    so we need to control this with the following VC++ pragmas
59 */
60 
61 #if defined( _MSC_VER ) && !defined( _WIN64 ) && !defined( __clang__ )
62 #pragma optimize( "s", on )
63 #endif
64 
65 /* Given the column (c) of the output state variable, the following
66    macros give the input state variables which are needed in its
67    computation for each row (r) of the state. All the alternative
68    macros give the same end values but expand into different ways
69    of calculating these values.  In particular the complex macro
70    used for dynamically variable block sizes is designed to expand
71    to a compile time constant whenever possible but will expand to
72    conditional clauses on some branches (I am grateful to Frank
73    Yellin for this construction)
74 */
75 
76 #define fwd_var(x,r,c)\
77  ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
78  : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
79  : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
80  :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
81 
82 #if defined(FT4_SET)
83 #undef  dec_fmvars
84 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
85 #elif defined(FT1_SET)
86 #undef  dec_fmvars
87 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
88 #else
89 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
90 #endif
91 
92 #if defined(FL4_SET)
93 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
94 #elif defined(FL1_SET)
95 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
96 #else
97 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
98 #endif
99 
aes_xi(encrypt)100 AES_RETURN aes_xi(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
101 {   uint32_t         locals(b0, b1);
102     const uint32_t   *kp;
103 #if defined( dec_fmvars )
104     dec_fmvars; /* declare variables for fwd_mcol() if needed */
105 #endif
106 
107 	if(cx->inf.b[0] != 10 * AES_BLOCK_SIZE && cx->inf.b[0] != 12 * AES_BLOCK_SIZE && cx->inf.b[0] != 14 * AES_BLOCK_SIZE)
108 		return EXIT_FAILURE;
109 
110 	kp = cx->ks;
111     state_in(b0, in, kp);
112 
113 #if (ENC_UNROLL == FULL)
114 
115     switch(cx->inf.b[0])
116     {
117     case 14 * AES_BLOCK_SIZE:
118         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
119         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
120         kp += 2 * N_COLS;
121     case 12 * AES_BLOCK_SIZE:
122         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
123         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
124         kp += 2 * N_COLS;
125     case 10 * AES_BLOCK_SIZE:
126         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
127         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
128         round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
129         round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
130         round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
131         round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
132         round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
133         round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
134         round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
135         round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
136     }
137 
138 #else
139 
140 #if (ENC_UNROLL == PARTIAL)
141     {   uint32_t    rnd;
142         for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1ul; ++rnd)
143         {
144             kp += N_COLS;
145             round(fwd_rnd, b1, b0, kp);
146             kp += N_COLS;
147             round(fwd_rnd, b0, b1, kp);
148         }
149         kp += N_COLS;
150         round(fwd_rnd,  b1, b0, kp);
151 #else
152     {   uint32_t    rnd;
153         for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1ul; ++rnd)
154         {
155             kp += N_COLS;
156             round(fwd_rnd, b1, b0, kp);
157             l_copy(b0, b1);
158         }
159 #endif
160         kp += N_COLS;
161         round(fwd_lrnd, b0, b1, kp);
162     }
163 #endif
164 
165     state_out(out, b0);
166     return EXIT_SUCCESS;
167 }
168 
169 #endif
170 
171 #if ( FUNCS_IN_C & DECRYPTION_IN_C)
172 
173 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
174    Pentium optimisation with small code but this is poor for decryption
175    so we need to control this with the following VC++ pragmas
176 */
177 
178 #if defined( _MSC_VER ) && !defined( _WIN64 ) && !defined( __clang__ )
179 #pragma optimize( "t", on )
180 #endif
181 
182 /* Given the column (c) of the output state variable, the following
183    macros give the input state variables which are needed in its
184    computation for each row (r) of the state. All the alternative
185    macros give the same end values but expand into different ways
186    of calculating these values.  In particular the complex macro
187    used for dynamically variable block sizes is designed to expand
188    to a compile time constant whenever possible but will expand to
189    conditional clauses on some branches (I am grateful to Frank
190    Yellin for this construction)
191 */
192 
193 #define inv_var(x,r,c)\
194  ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
195  : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
196  : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
197  :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
198 
199 #if defined(IT4_SET)
200 #undef  dec_imvars
201 #define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
202 #elif defined(IT1_SET)
203 #undef  dec_imvars
204 #define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
205 #else
206 #define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
207 #endif
208 
209 #if defined(IL4_SET)
210 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
211 #elif defined(IL1_SET)
212 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
213 #else
214 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
215 #endif
216 
217 /* This code can work with the decryption key schedule in the   */
218 /* order that is used for encryption (where the 1st decryption  */
219 /* round key is at the high end ot the schedule) or with a key  */
220 /* schedule that has been reversed to put the 1st decryption    */
221 /* round key at the low end of the schedule in memory (when     */
222 /* AES_REV_DKS is defined)                                      */
223 
224 #ifdef AES_REV_DKS
225 #define key_ofs     0
226 #define rnd_key(n)  (kp + n * N_COLS)
227 #else
228 #define key_ofs     1
229 #define rnd_key(n)  (kp - n * N_COLS)
230 #endif
231 
232 AES_RETURN aes_xi(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
233 {   uint32_t        locals(b0, b1);
234 #if defined( dec_imvars )
235     dec_imvars; /* declare variables for inv_mcol() if needed */
236 #endif
237     const uint32_t *kp;
238 
239 	if(cx->inf.b[0] != 10 * AES_BLOCK_SIZE && cx->inf.b[0] != 12 * AES_BLOCK_SIZE && cx->inf.b[0] != 14 * AES_BLOCK_SIZE)
240 		return EXIT_FAILURE;
241 
242     kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
243     state_in(b0, in, kp);
244 
245 #if (DEC_UNROLL == FULL)
246 
247     kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
248     switch(cx->inf.b[0])
249     {
250     case 14 * AES_BLOCK_SIZE:
251         round(inv_rnd,  b1, b0, rnd_key(-13));
252         round(inv_rnd,  b0, b1, rnd_key(-12));
253     case 12 * AES_BLOCK_SIZE:
254         round(inv_rnd,  b1, b0, rnd_key(-11));
255         round(inv_rnd,  b0, b1, rnd_key(-10));
256     case 10 * AES_BLOCK_SIZE:
257         round(inv_rnd,  b1, b0, rnd_key(-9));
258         round(inv_rnd,  b0, b1, rnd_key(-8));
259         round(inv_rnd,  b1, b0, rnd_key(-7));
260         round(inv_rnd,  b0, b1, rnd_key(-6));
261         round(inv_rnd,  b1, b0, rnd_key(-5));
262         round(inv_rnd,  b0, b1, rnd_key(-4));
263         round(inv_rnd,  b1, b0, rnd_key(-3));
264         round(inv_rnd,  b0, b1, rnd_key(-2));
265         round(inv_rnd,  b1, b0, rnd_key(-1));
266         round(inv_lrnd, b0, b1, rnd_key( 0));
267     }
268 
269 #else
270 
271 #if (DEC_UNROLL == PARTIAL)
272     {   uint32_t    rnd;
273         for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1ul; ++rnd)
274         {
275             kp = rnd_key(1);
276             round(inv_rnd, b1, b0, kp);
277             kp = rnd_key(1);
278             round(inv_rnd, b0, b1, kp);
279         }
280         kp = rnd_key(1);
281         round(inv_rnd, b1, b0, kp);
282 #else
283     {   uint32_t    rnd;
284         for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1ul; ++rnd)
285         {
286             kp = rnd_key(1);
287             round(inv_rnd, b1, b0, kp);
288             l_copy(b0, b1);
289         }
290 #endif
291         kp = rnd_key(1);
292         round(inv_lrnd, b0, b1, kp);
293         }
294 #endif
295 
296     state_out(out, b0);
297     return EXIT_SUCCESS;
298 }
299 
300 #endif
301 
302 #if defined(__cplusplus)
303 }
304 #endif
305 
306 #endif /* K5_BUILTIN_AES */
307