1 /*
2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #define BR_POWER_ASM_MACROS 1
26 #include "inner.h"
27
28 #if BR_POWER8
29
30 /* see bearssl_block.h */
31 void
br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys * ctx,const void * key,size_t len)32 br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
33 const void *key, size_t len)
34 {
35 ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
36 ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
37 }
38
39 static void
cbcenc_128(const unsigned char * sk,const unsigned char * iv,unsigned char * buf,size_t len)40 cbcenc_128(const unsigned char *sk,
41 const unsigned char *iv, unsigned char *buf, size_t len)
42 {
43 long cc;
44
45 #if BR_POWER8_LE
46 static const uint32_t idx2be[] = {
47 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
48 };
49 #endif
50
51 cc = 0;
52 asm volatile (
53
54 /*
55 * Load subkeys into v0..v10
56 */
57 lxvw4x(32, %[cc], %[sk])
58 addi(%[cc], %[cc], 16)
59 lxvw4x(33, %[cc], %[sk])
60 addi(%[cc], %[cc], 16)
61 lxvw4x(34, %[cc], %[sk])
62 addi(%[cc], %[cc], 16)
63 lxvw4x(35, %[cc], %[sk])
64 addi(%[cc], %[cc], 16)
65 lxvw4x(36, %[cc], %[sk])
66 addi(%[cc], %[cc], 16)
67 lxvw4x(37, %[cc], %[sk])
68 addi(%[cc], %[cc], 16)
69 lxvw4x(38, %[cc], %[sk])
70 addi(%[cc], %[cc], 16)
71 lxvw4x(39, %[cc], %[sk])
72 addi(%[cc], %[cc], 16)
73 lxvw4x(40, %[cc], %[sk])
74 addi(%[cc], %[cc], 16)
75 lxvw4x(41, %[cc], %[sk])
76 addi(%[cc], %[cc], 16)
77 lxvw4x(42, %[cc], %[sk])
78
79 #if BR_POWER8_LE
80 /*
81 * v15 = constant for byteswapping words
82 */
83 lxvw4x(47, 0, %[idx2be])
84 #endif
85 /*
86 * Load IV into v16.
87 */
88 lxvw4x(48, 0, %[iv])
89 #if BR_POWER8_LE
90 vperm(16, 16, 16, 15)
91 #endif
92
93 mtctr(%[num_blocks])
94 label(loop)
95 /*
96 * Load next plaintext word and XOR with current IV.
97 */
98 lxvw4x(49, 0, %[buf])
99 #if BR_POWER8_LE
100 vperm(17, 17, 17, 15)
101 #endif
102 vxor(16, 16, 17)
103
104 /*
105 * Encrypt the block.
106 */
107 vxor(16, 16, 0)
108 vcipher(16, 16, 1)
109 vcipher(16, 16, 2)
110 vcipher(16, 16, 3)
111 vcipher(16, 16, 4)
112 vcipher(16, 16, 5)
113 vcipher(16, 16, 6)
114 vcipher(16, 16, 7)
115 vcipher(16, 16, 8)
116 vcipher(16, 16, 9)
117 vcipherlast(16, 16, 10)
118
119 /*
120 * Store back result (with byteswap)
121 */
122 #if BR_POWER8_LE
123 vperm(17, 16, 16, 15)
124 stxvw4x(49, 0, %[buf])
125 #else
126 stxvw4x(48, 0, %[buf])
127 #endif
128 addi(%[buf], %[buf], 16)
129
130 bdnz(loop)
131
132 : [cc] "+b" (cc), [buf] "+b" (buf)
133 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
134 #if BR_POWER8_LE
135 , [idx2be] "b" (idx2be)
136 #endif
137 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
138 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
139 "ctr", "memory"
140 );
141 }
142
143 static void
cbcenc_192(const unsigned char * sk,const unsigned char * iv,unsigned char * buf,size_t len)144 cbcenc_192(const unsigned char *sk,
145 const unsigned char *iv, unsigned char *buf, size_t len)
146 {
147 long cc;
148
149 #if BR_POWER8_LE
150 static const uint32_t idx2be[] = {
151 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
152 };
153 #endif
154
155 cc = 0;
156 asm volatile (
157
158 /*
159 * Load subkeys into v0..v12
160 */
161 lxvw4x(32, %[cc], %[sk])
162 addi(%[cc], %[cc], 16)
163 lxvw4x(33, %[cc], %[sk])
164 addi(%[cc], %[cc], 16)
165 lxvw4x(34, %[cc], %[sk])
166 addi(%[cc], %[cc], 16)
167 lxvw4x(35, %[cc], %[sk])
168 addi(%[cc], %[cc], 16)
169 lxvw4x(36, %[cc], %[sk])
170 addi(%[cc], %[cc], 16)
171 lxvw4x(37, %[cc], %[sk])
172 addi(%[cc], %[cc], 16)
173 lxvw4x(38, %[cc], %[sk])
174 addi(%[cc], %[cc], 16)
175 lxvw4x(39, %[cc], %[sk])
176 addi(%[cc], %[cc], 16)
177 lxvw4x(40, %[cc], %[sk])
178 addi(%[cc], %[cc], 16)
179 lxvw4x(41, %[cc], %[sk])
180 addi(%[cc], %[cc], 16)
181 lxvw4x(42, %[cc], %[sk])
182 addi(%[cc], %[cc], 16)
183 lxvw4x(43, %[cc], %[sk])
184 addi(%[cc], %[cc], 16)
185 lxvw4x(44, %[cc], %[sk])
186
187 #if BR_POWER8_LE
188 /*
189 * v15 = constant for byteswapping words
190 */
191 lxvw4x(47, 0, %[idx2be])
192 #endif
193 /*
194 * Load IV into v16.
195 */
196 lxvw4x(48, 0, %[iv])
197 #if BR_POWER8_LE
198 vperm(16, 16, 16, 15)
199 #endif
200
201 mtctr(%[num_blocks])
202 label(loop)
203 /*
204 * Load next plaintext word and XOR with current IV.
205 */
206 lxvw4x(49, 0, %[buf])
207 #if BR_POWER8_LE
208 vperm(17, 17, 17, 15)
209 #endif
210 vxor(16, 16, 17)
211
212 /*
213 * Encrypt the block.
214 */
215 vxor(16, 16, 0)
216 vcipher(16, 16, 1)
217 vcipher(16, 16, 2)
218 vcipher(16, 16, 3)
219 vcipher(16, 16, 4)
220 vcipher(16, 16, 5)
221 vcipher(16, 16, 6)
222 vcipher(16, 16, 7)
223 vcipher(16, 16, 8)
224 vcipher(16, 16, 9)
225 vcipher(16, 16, 10)
226 vcipher(16, 16, 11)
227 vcipherlast(16, 16, 12)
228
229 /*
230 * Store back result (with byteswap)
231 */
232 #if BR_POWER8_LE
233 vperm(17, 16, 16, 15)
234 stxvw4x(49, 0, %[buf])
235 #else
236 stxvw4x(48, 0, %[buf])
237 #endif
238 addi(%[buf], %[buf], 16)
239
240 bdnz(loop)
241
242 : [cc] "+b" (cc), [buf] "+b" (buf)
243 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
244 #if BR_POWER8_LE
245 , [idx2be] "b" (idx2be)
246 #endif
247 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
248 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
249 "ctr", "memory"
250 );
251 }
252
253 static void
cbcenc_256(const unsigned char * sk,const unsigned char * iv,unsigned char * buf,size_t len)254 cbcenc_256(const unsigned char *sk,
255 const unsigned char *iv, unsigned char *buf, size_t len)
256 {
257 long cc;
258
259 #if BR_POWER8_LE
260 static const uint32_t idx2be[] = {
261 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
262 };
263 #endif
264
265 cc = 0;
266 asm volatile (
267
268 /*
269 * Load subkeys into v0..v14
270 */
271 lxvw4x(32, %[cc], %[sk])
272 addi(%[cc], %[cc], 16)
273 lxvw4x(33, %[cc], %[sk])
274 addi(%[cc], %[cc], 16)
275 lxvw4x(34, %[cc], %[sk])
276 addi(%[cc], %[cc], 16)
277 lxvw4x(35, %[cc], %[sk])
278 addi(%[cc], %[cc], 16)
279 lxvw4x(36, %[cc], %[sk])
280 addi(%[cc], %[cc], 16)
281 lxvw4x(37, %[cc], %[sk])
282 addi(%[cc], %[cc], 16)
283 lxvw4x(38, %[cc], %[sk])
284 addi(%[cc], %[cc], 16)
285 lxvw4x(39, %[cc], %[sk])
286 addi(%[cc], %[cc], 16)
287 lxvw4x(40, %[cc], %[sk])
288 addi(%[cc], %[cc], 16)
289 lxvw4x(41, %[cc], %[sk])
290 addi(%[cc], %[cc], 16)
291 lxvw4x(42, %[cc], %[sk])
292 addi(%[cc], %[cc], 16)
293 lxvw4x(43, %[cc], %[sk])
294 addi(%[cc], %[cc], 16)
295 lxvw4x(44, %[cc], %[sk])
296 addi(%[cc], %[cc], 16)
297 lxvw4x(45, %[cc], %[sk])
298 addi(%[cc], %[cc], 16)
299 lxvw4x(46, %[cc], %[sk])
300
301 #if BR_POWER8_LE
302 /*
303 * v15 = constant for byteswapping words
304 */
305 lxvw4x(47, 0, %[idx2be])
306 #endif
307 /*
308 * Load IV into v16.
309 */
310 lxvw4x(48, 0, %[iv])
311 #if BR_POWER8_LE
312 vperm(16, 16, 16, 15)
313 #endif
314
315 mtctr(%[num_blocks])
316 label(loop)
317 /*
318 * Load next plaintext word and XOR with current IV.
319 */
320 lxvw4x(49, 0, %[buf])
321 #if BR_POWER8_LE
322 vperm(17, 17, 17, 15)
323 #endif
324 vxor(16, 16, 17)
325
326 /*
327 * Encrypt the block.
328 */
329 vxor(16, 16, 0)
330 vcipher(16, 16, 1)
331 vcipher(16, 16, 2)
332 vcipher(16, 16, 3)
333 vcipher(16, 16, 4)
334 vcipher(16, 16, 5)
335 vcipher(16, 16, 6)
336 vcipher(16, 16, 7)
337 vcipher(16, 16, 8)
338 vcipher(16, 16, 9)
339 vcipher(16, 16, 10)
340 vcipher(16, 16, 11)
341 vcipher(16, 16, 12)
342 vcipher(16, 16, 13)
343 vcipherlast(16, 16, 14)
344
345 /*
346 * Store back result (with byteswap)
347 */
348 #if BR_POWER8_LE
349 vperm(17, 16, 16, 15)
350 stxvw4x(49, 0, %[buf])
351 #else
352 stxvw4x(48, 0, %[buf])
353 #endif
354 addi(%[buf], %[buf], 16)
355
356 bdnz(loop)
357
358 : [cc] "+b" (cc), [buf] "+b" (buf)
359 : [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
360 #if BR_POWER8_LE
361 , [idx2be] "b" (idx2be)
362 #endif
363 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
364 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
365 "ctr", "memory"
366 );
367 }
368
369 /* see bearssl_block.h */
370 void
br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys * ctx,void * iv,void * data,size_t len)371 br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
372 void *iv, void *data, size_t len)
373 {
374 if (len > 0) {
375 switch (ctx->num_rounds) {
376 case 10:
377 cbcenc_128(ctx->skey.skni, iv, data, len);
378 break;
379 case 12:
380 cbcenc_192(ctx->skey.skni, iv, data, len);
381 break;
382 default:
383 cbcenc_256(ctx->skey.skni, iv, data, len);
384 break;
385 }
386 memcpy(iv, (unsigned char *)data + (len - 16), 16);
387 }
388 }
389
390 /* see bearssl_block.h */
391 const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
392 sizeof(br_aes_pwr8_cbcenc_keys),
393 16,
394 4,
395 (void (*)(const br_block_cbcenc_class **, const void *, size_t))
396 &br_aes_pwr8_cbcenc_init,
397 (void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
398 &br_aes_pwr8_cbcenc_run
399 };
400
401 /* see bearssl_block.h */
402 const br_block_cbcenc_class *
br_aes_pwr8_cbcenc_get_vtable(void)403 br_aes_pwr8_cbcenc_get_vtable(void)
404 {
405 return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
406 }
407
408 #else
409
410 /* see bearssl_block.h */
411 const br_block_cbcenc_class *
br_aes_pwr8_cbcenc_get_vtable(void)412 br_aes_pwr8_cbcenc_get_vtable(void)
413 {
414 return NULL;
415 }
416
417 #endif
418