1*0957b409SSimon J. Gerraty /*
2*0957b409SSimon J. Gerraty * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
3*0957b409SSimon J. Gerraty *
4*0957b409SSimon J. Gerraty * Permission is hereby granted, free of charge, to any person obtaining
5*0957b409SSimon J. Gerraty * a copy of this software and associated documentation files (the
6*0957b409SSimon J. Gerraty * "Software"), to deal in the Software without restriction, including
7*0957b409SSimon J. Gerraty * without limitation the rights to use, copy, modify, merge, publish,
8*0957b409SSimon J. Gerraty * distribute, sublicense, and/or sell copies of the Software, and to
9*0957b409SSimon J. Gerraty * permit persons to whom the Software is furnished to do so, subject to
10*0957b409SSimon J. Gerraty * the following conditions:
11*0957b409SSimon J. Gerraty *
12*0957b409SSimon J. Gerraty * The above copyright notice and this permission notice shall be
13*0957b409SSimon J. Gerraty * included in all copies or substantial portions of the Software.
14*0957b409SSimon J. Gerraty *
15*0957b409SSimon J. Gerraty * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16*0957b409SSimon J. Gerraty * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17*0957b409SSimon J. Gerraty * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18*0957b409SSimon J. Gerraty * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19*0957b409SSimon J. Gerraty * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20*0957b409SSimon J. Gerraty * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21*0957b409SSimon J. Gerraty * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*0957b409SSimon J. Gerraty * SOFTWARE.
23*0957b409SSimon J. Gerraty */
24*0957b409SSimon J. Gerraty
25*0957b409SSimon J. Gerraty #define BR_POWER_ASM_MACROS 1
26*0957b409SSimon J. Gerraty #include "inner.h"
27*0957b409SSimon J. Gerraty
28*0957b409SSimon J. Gerraty #if BR_POWER8
29*0957b409SSimon J. Gerraty
30*0957b409SSimon J. Gerraty /* see bearssl_block.h */
31*0957b409SSimon J. Gerraty const br_block_ctrcbc_class *
br_aes_pwr8_ctrcbc_get_vtable(void)32*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_get_vtable(void)
33*0957b409SSimon J. Gerraty {
34*0957b409SSimon J. Gerraty return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
35*0957b409SSimon J. Gerraty }
36*0957b409SSimon J. Gerraty
37*0957b409SSimon J. Gerraty /* see bearssl_block.h */
38*0957b409SSimon J. Gerraty void
br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys * ctx,const void * key,size_t len)39*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
40*0957b409SSimon J. Gerraty const void *key, size_t len)
41*0957b409SSimon J. Gerraty {
42*0957b409SSimon J. Gerraty ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
43*0957b409SSimon J. Gerraty ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
44*0957b409SSimon J. Gerraty }
45*0957b409SSimon J. Gerraty
46*0957b409SSimon J. Gerraty /*
47*0957b409SSimon J. Gerraty * Register conventions for CTR + CBC-MAC:
48*0957b409SSimon J. Gerraty *
49*0957b409SSimon J. Gerraty * AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
50*0957b409SSimon J. Gerraty * Register v15 contains the byteswap index register (little-endian only)
51*0957b409SSimon J. Gerraty * Register v16 contains the CTR counter value
52*0957b409SSimon J. Gerraty * Register v17 contains the CBC-MAC current value
53*0957b409SSimon J. Gerraty * Registers v18 to v27 are scratch
54*0957b409SSimon J. Gerraty * Counter increment uses v28, v29 and v30
55*0957b409SSimon J. Gerraty *
56*0957b409SSimon J. Gerraty * For CTR alone:
57*0957b409SSimon J. Gerraty *
58*0957b409SSimon J. Gerraty * AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
59*0957b409SSimon J. Gerraty * Register v15 contains the byteswap index register (little-endian only)
60*0957b409SSimon J. Gerraty * Registers v16 to v19 contain the CTR counter values (four blocks)
61*0957b409SSimon J. Gerraty * Registers v20 to v27 are scratch
62*0957b409SSimon J. Gerraty * Counter increment uses v28, v29 and v30
63*0957b409SSimon J. Gerraty */
64*0957b409SSimon J. Gerraty
65*0957b409SSimon J. Gerraty #define LOAD_SUBKEYS_128 \
66*0957b409SSimon J. Gerraty lxvw4x(32, %[cc], %[sk]) \
67*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
68*0957b409SSimon J. Gerraty lxvw4x(33, %[cc], %[sk]) \
69*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
70*0957b409SSimon J. Gerraty lxvw4x(34, %[cc], %[sk]) \
71*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
72*0957b409SSimon J. Gerraty lxvw4x(35, %[cc], %[sk]) \
73*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
74*0957b409SSimon J. Gerraty lxvw4x(36, %[cc], %[sk]) \
75*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
76*0957b409SSimon J. Gerraty lxvw4x(37, %[cc], %[sk]) \
77*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
78*0957b409SSimon J. Gerraty lxvw4x(38, %[cc], %[sk]) \
79*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
80*0957b409SSimon J. Gerraty lxvw4x(39, %[cc], %[sk]) \
81*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
82*0957b409SSimon J. Gerraty lxvw4x(40, %[cc], %[sk]) \
83*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
84*0957b409SSimon J. Gerraty lxvw4x(41, %[cc], %[sk]) \
85*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
86*0957b409SSimon J. Gerraty lxvw4x(42, %[cc], %[sk])
87*0957b409SSimon J. Gerraty
88*0957b409SSimon J. Gerraty #define LOAD_SUBKEYS_192 \
89*0957b409SSimon J. Gerraty LOAD_SUBKEYS_128 \
90*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
91*0957b409SSimon J. Gerraty lxvw4x(43, %[cc], %[sk]) \
92*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
93*0957b409SSimon J. Gerraty lxvw4x(44, %[cc], %[sk])
94*0957b409SSimon J. Gerraty
95*0957b409SSimon J. Gerraty #define LOAD_SUBKEYS_256 \
96*0957b409SSimon J. Gerraty LOAD_SUBKEYS_192 \
97*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
98*0957b409SSimon J. Gerraty lxvw4x(45, %[cc], %[sk]) \
99*0957b409SSimon J. Gerraty addi(%[cc], %[cc], 16) \
100*0957b409SSimon J. Gerraty lxvw4x(46, %[cc], %[sk])
101*0957b409SSimon J. Gerraty
102*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_128(x) \
103*0957b409SSimon J. Gerraty vxor(x, x, 0) \
104*0957b409SSimon J. Gerraty vcipher(x, x, 1) \
105*0957b409SSimon J. Gerraty vcipher(x, x, 2) \
106*0957b409SSimon J. Gerraty vcipher(x, x, 3) \
107*0957b409SSimon J. Gerraty vcipher(x, x, 4) \
108*0957b409SSimon J. Gerraty vcipher(x, x, 5) \
109*0957b409SSimon J. Gerraty vcipher(x, x, 6) \
110*0957b409SSimon J. Gerraty vcipher(x, x, 7) \
111*0957b409SSimon J. Gerraty vcipher(x, x, 8) \
112*0957b409SSimon J. Gerraty vcipher(x, x, 9) \
113*0957b409SSimon J. Gerraty vcipherlast(x, x, 10)
114*0957b409SSimon J. Gerraty
115*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_192(x) \
116*0957b409SSimon J. Gerraty vxor(x, x, 0) \
117*0957b409SSimon J. Gerraty vcipher(x, x, 1) \
118*0957b409SSimon J. Gerraty vcipher(x, x, 2) \
119*0957b409SSimon J. Gerraty vcipher(x, x, 3) \
120*0957b409SSimon J. Gerraty vcipher(x, x, 4) \
121*0957b409SSimon J. Gerraty vcipher(x, x, 5) \
122*0957b409SSimon J. Gerraty vcipher(x, x, 6) \
123*0957b409SSimon J. Gerraty vcipher(x, x, 7) \
124*0957b409SSimon J. Gerraty vcipher(x, x, 8) \
125*0957b409SSimon J. Gerraty vcipher(x, x, 9) \
126*0957b409SSimon J. Gerraty vcipher(x, x, 10) \
127*0957b409SSimon J. Gerraty vcipher(x, x, 11) \
128*0957b409SSimon J. Gerraty vcipherlast(x, x, 12)
129*0957b409SSimon J. Gerraty
130*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_256(x) \
131*0957b409SSimon J. Gerraty vxor(x, x, 0) \
132*0957b409SSimon J. Gerraty vcipher(x, x, 1) \
133*0957b409SSimon J. Gerraty vcipher(x, x, 2) \
134*0957b409SSimon J. Gerraty vcipher(x, x, 3) \
135*0957b409SSimon J. Gerraty vcipher(x, x, 4) \
136*0957b409SSimon J. Gerraty vcipher(x, x, 5) \
137*0957b409SSimon J. Gerraty vcipher(x, x, 6) \
138*0957b409SSimon J. Gerraty vcipher(x, x, 7) \
139*0957b409SSimon J. Gerraty vcipher(x, x, 8) \
140*0957b409SSimon J. Gerraty vcipher(x, x, 9) \
141*0957b409SSimon J. Gerraty vcipher(x, x, 10) \
142*0957b409SSimon J. Gerraty vcipher(x, x, 11) \
143*0957b409SSimon J. Gerraty vcipher(x, x, 12) \
144*0957b409SSimon J. Gerraty vcipher(x, x, 13) \
145*0957b409SSimon J. Gerraty vcipherlast(x, x, 14)
146*0957b409SSimon J. Gerraty
147*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_X2_128(x, y) \
148*0957b409SSimon J. Gerraty vxor(x, x, 0) \
149*0957b409SSimon J. Gerraty vxor(y, y, 0) \
150*0957b409SSimon J. Gerraty vcipher(x, x, 1) \
151*0957b409SSimon J. Gerraty vcipher(y, y, 1) \
152*0957b409SSimon J. Gerraty vcipher(x, x, 2) \
153*0957b409SSimon J. Gerraty vcipher(y, y, 2) \
154*0957b409SSimon J. Gerraty vcipher(x, x, 3) \
155*0957b409SSimon J. Gerraty vcipher(y, y, 3) \
156*0957b409SSimon J. Gerraty vcipher(x, x, 4) \
157*0957b409SSimon J. Gerraty vcipher(y, y, 4) \
158*0957b409SSimon J. Gerraty vcipher(x, x, 5) \
159*0957b409SSimon J. Gerraty vcipher(y, y, 5) \
160*0957b409SSimon J. Gerraty vcipher(x, x, 6) \
161*0957b409SSimon J. Gerraty vcipher(y, y, 6) \
162*0957b409SSimon J. Gerraty vcipher(x, x, 7) \
163*0957b409SSimon J. Gerraty vcipher(y, y, 7) \
164*0957b409SSimon J. Gerraty vcipher(x, x, 8) \
165*0957b409SSimon J. Gerraty vcipher(y, y, 8) \
166*0957b409SSimon J. Gerraty vcipher(x, x, 9) \
167*0957b409SSimon J. Gerraty vcipher(y, y, 9) \
168*0957b409SSimon J. Gerraty vcipherlast(x, x, 10) \
169*0957b409SSimon J. Gerraty vcipherlast(y, y, 10)
170*0957b409SSimon J. Gerraty
171*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_X2_192(x, y) \
172*0957b409SSimon J. Gerraty vxor(x, x, 0) \
173*0957b409SSimon J. Gerraty vxor(y, y, 0) \
174*0957b409SSimon J. Gerraty vcipher(x, x, 1) \
175*0957b409SSimon J. Gerraty vcipher(y, y, 1) \
176*0957b409SSimon J. Gerraty vcipher(x, x, 2) \
177*0957b409SSimon J. Gerraty vcipher(y, y, 2) \
178*0957b409SSimon J. Gerraty vcipher(x, x, 3) \
179*0957b409SSimon J. Gerraty vcipher(y, y, 3) \
180*0957b409SSimon J. Gerraty vcipher(x, x, 4) \
181*0957b409SSimon J. Gerraty vcipher(y, y, 4) \
182*0957b409SSimon J. Gerraty vcipher(x, x, 5) \
183*0957b409SSimon J. Gerraty vcipher(y, y, 5) \
184*0957b409SSimon J. Gerraty vcipher(x, x, 6) \
185*0957b409SSimon J. Gerraty vcipher(y, y, 6) \
186*0957b409SSimon J. Gerraty vcipher(x, x, 7) \
187*0957b409SSimon J. Gerraty vcipher(y, y, 7) \
188*0957b409SSimon J. Gerraty vcipher(x, x, 8) \
189*0957b409SSimon J. Gerraty vcipher(y, y, 8) \
190*0957b409SSimon J. Gerraty vcipher(x, x, 9) \
191*0957b409SSimon J. Gerraty vcipher(y, y, 9) \
192*0957b409SSimon J. Gerraty vcipher(x, x, 10) \
193*0957b409SSimon J. Gerraty vcipher(y, y, 10) \
194*0957b409SSimon J. Gerraty vcipher(x, x, 11) \
195*0957b409SSimon J. Gerraty vcipher(y, y, 11) \
196*0957b409SSimon J. Gerraty vcipherlast(x, x, 12) \
197*0957b409SSimon J. Gerraty vcipherlast(y, y, 12)
198*0957b409SSimon J. Gerraty
199*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_X2_256(x, y) \
200*0957b409SSimon J. Gerraty vxor(x, x, 0) \
201*0957b409SSimon J. Gerraty vxor(y, y, 0) \
202*0957b409SSimon J. Gerraty vcipher(x, x, 1) \
203*0957b409SSimon J. Gerraty vcipher(y, y, 1) \
204*0957b409SSimon J. Gerraty vcipher(x, x, 2) \
205*0957b409SSimon J. Gerraty vcipher(y, y, 2) \
206*0957b409SSimon J. Gerraty vcipher(x, x, 3) \
207*0957b409SSimon J. Gerraty vcipher(y, y, 3) \
208*0957b409SSimon J. Gerraty vcipher(x, x, 4) \
209*0957b409SSimon J. Gerraty vcipher(y, y, 4) \
210*0957b409SSimon J. Gerraty vcipher(x, x, 5) \
211*0957b409SSimon J. Gerraty vcipher(y, y, 5) \
212*0957b409SSimon J. Gerraty vcipher(x, x, 6) \
213*0957b409SSimon J. Gerraty vcipher(y, y, 6) \
214*0957b409SSimon J. Gerraty vcipher(x, x, 7) \
215*0957b409SSimon J. Gerraty vcipher(y, y, 7) \
216*0957b409SSimon J. Gerraty vcipher(x, x, 8) \
217*0957b409SSimon J. Gerraty vcipher(y, y, 8) \
218*0957b409SSimon J. Gerraty vcipher(x, x, 9) \
219*0957b409SSimon J. Gerraty vcipher(y, y, 9) \
220*0957b409SSimon J. Gerraty vcipher(x, x, 10) \
221*0957b409SSimon J. Gerraty vcipher(y, y, 10) \
222*0957b409SSimon J. Gerraty vcipher(x, x, 11) \
223*0957b409SSimon J. Gerraty vcipher(y, y, 11) \
224*0957b409SSimon J. Gerraty vcipher(x, x, 12) \
225*0957b409SSimon J. Gerraty vcipher(y, y, 12) \
226*0957b409SSimon J. Gerraty vcipher(x, x, 13) \
227*0957b409SSimon J. Gerraty vcipher(y, y, 13) \
228*0957b409SSimon J. Gerraty vcipherlast(x, x, 14) \
229*0957b409SSimon J. Gerraty vcipherlast(y, y, 14)
230*0957b409SSimon J. Gerraty
231*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
232*0957b409SSimon J. Gerraty vxor(x0, x0, 0) \
233*0957b409SSimon J. Gerraty vxor(x1, x1, 0) \
234*0957b409SSimon J. Gerraty vxor(x2, x2, 0) \
235*0957b409SSimon J. Gerraty vxor(x3, x3, 0) \
236*0957b409SSimon J. Gerraty vcipher(x0, x0, 1) \
237*0957b409SSimon J. Gerraty vcipher(x1, x1, 1) \
238*0957b409SSimon J. Gerraty vcipher(x2, x2, 1) \
239*0957b409SSimon J. Gerraty vcipher(x3, x3, 1) \
240*0957b409SSimon J. Gerraty vcipher(x0, x0, 2) \
241*0957b409SSimon J. Gerraty vcipher(x1, x1, 2) \
242*0957b409SSimon J. Gerraty vcipher(x2, x2, 2) \
243*0957b409SSimon J. Gerraty vcipher(x3, x3, 2) \
244*0957b409SSimon J. Gerraty vcipher(x0, x0, 3) \
245*0957b409SSimon J. Gerraty vcipher(x1, x1, 3) \
246*0957b409SSimon J. Gerraty vcipher(x2, x2, 3) \
247*0957b409SSimon J. Gerraty vcipher(x3, x3, 3) \
248*0957b409SSimon J. Gerraty vcipher(x0, x0, 4) \
249*0957b409SSimon J. Gerraty vcipher(x1, x1, 4) \
250*0957b409SSimon J. Gerraty vcipher(x2, x2, 4) \
251*0957b409SSimon J. Gerraty vcipher(x3, x3, 4) \
252*0957b409SSimon J. Gerraty vcipher(x0, x0, 5) \
253*0957b409SSimon J. Gerraty vcipher(x1, x1, 5) \
254*0957b409SSimon J. Gerraty vcipher(x2, x2, 5) \
255*0957b409SSimon J. Gerraty vcipher(x3, x3, 5) \
256*0957b409SSimon J. Gerraty vcipher(x0, x0, 6) \
257*0957b409SSimon J. Gerraty vcipher(x1, x1, 6) \
258*0957b409SSimon J. Gerraty vcipher(x2, x2, 6) \
259*0957b409SSimon J. Gerraty vcipher(x3, x3, 6) \
260*0957b409SSimon J. Gerraty vcipher(x0, x0, 7) \
261*0957b409SSimon J. Gerraty vcipher(x1, x1, 7) \
262*0957b409SSimon J. Gerraty vcipher(x2, x2, 7) \
263*0957b409SSimon J. Gerraty vcipher(x3, x3, 7) \
264*0957b409SSimon J. Gerraty vcipher(x0, x0, 8) \
265*0957b409SSimon J. Gerraty vcipher(x1, x1, 8) \
266*0957b409SSimon J. Gerraty vcipher(x2, x2, 8) \
267*0957b409SSimon J. Gerraty vcipher(x3, x3, 8) \
268*0957b409SSimon J. Gerraty vcipher(x0, x0, 9) \
269*0957b409SSimon J. Gerraty vcipher(x1, x1, 9) \
270*0957b409SSimon J. Gerraty vcipher(x2, x2, 9) \
271*0957b409SSimon J. Gerraty vcipher(x3, x3, 9) \
272*0957b409SSimon J. Gerraty vcipherlast(x0, x0, 10) \
273*0957b409SSimon J. Gerraty vcipherlast(x1, x1, 10) \
274*0957b409SSimon J. Gerraty vcipherlast(x2, x2, 10) \
275*0957b409SSimon J. Gerraty vcipherlast(x3, x3, 10)
276*0957b409SSimon J. Gerraty
277*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
278*0957b409SSimon J. Gerraty vxor(x0, x0, 0) \
279*0957b409SSimon J. Gerraty vxor(x1, x1, 0) \
280*0957b409SSimon J. Gerraty vxor(x2, x2, 0) \
281*0957b409SSimon J. Gerraty vxor(x3, x3, 0) \
282*0957b409SSimon J. Gerraty vcipher(x0, x0, 1) \
283*0957b409SSimon J. Gerraty vcipher(x1, x1, 1) \
284*0957b409SSimon J. Gerraty vcipher(x2, x2, 1) \
285*0957b409SSimon J. Gerraty vcipher(x3, x3, 1) \
286*0957b409SSimon J. Gerraty vcipher(x0, x0, 2) \
287*0957b409SSimon J. Gerraty vcipher(x1, x1, 2) \
288*0957b409SSimon J. Gerraty vcipher(x2, x2, 2) \
289*0957b409SSimon J. Gerraty vcipher(x3, x3, 2) \
290*0957b409SSimon J. Gerraty vcipher(x0, x0, 3) \
291*0957b409SSimon J. Gerraty vcipher(x1, x1, 3) \
292*0957b409SSimon J. Gerraty vcipher(x2, x2, 3) \
293*0957b409SSimon J. Gerraty vcipher(x3, x3, 3) \
294*0957b409SSimon J. Gerraty vcipher(x0, x0, 4) \
295*0957b409SSimon J. Gerraty vcipher(x1, x1, 4) \
296*0957b409SSimon J. Gerraty vcipher(x2, x2, 4) \
297*0957b409SSimon J. Gerraty vcipher(x3, x3, 4) \
298*0957b409SSimon J. Gerraty vcipher(x0, x0, 5) \
299*0957b409SSimon J. Gerraty vcipher(x1, x1, 5) \
300*0957b409SSimon J. Gerraty vcipher(x2, x2, 5) \
301*0957b409SSimon J. Gerraty vcipher(x3, x3, 5) \
302*0957b409SSimon J. Gerraty vcipher(x0, x0, 6) \
303*0957b409SSimon J. Gerraty vcipher(x1, x1, 6) \
304*0957b409SSimon J. Gerraty vcipher(x2, x2, 6) \
305*0957b409SSimon J. Gerraty vcipher(x3, x3, 6) \
306*0957b409SSimon J. Gerraty vcipher(x0, x0, 7) \
307*0957b409SSimon J. Gerraty vcipher(x1, x1, 7) \
308*0957b409SSimon J. Gerraty vcipher(x2, x2, 7) \
309*0957b409SSimon J. Gerraty vcipher(x3, x3, 7) \
310*0957b409SSimon J. Gerraty vcipher(x0, x0, 8) \
311*0957b409SSimon J. Gerraty vcipher(x1, x1, 8) \
312*0957b409SSimon J. Gerraty vcipher(x2, x2, 8) \
313*0957b409SSimon J. Gerraty vcipher(x3, x3, 8) \
314*0957b409SSimon J. Gerraty vcipher(x0, x0, 9) \
315*0957b409SSimon J. Gerraty vcipher(x1, x1, 9) \
316*0957b409SSimon J. Gerraty vcipher(x2, x2, 9) \
317*0957b409SSimon J. Gerraty vcipher(x3, x3, 9) \
318*0957b409SSimon J. Gerraty vcipher(x0, x0, 10) \
319*0957b409SSimon J. Gerraty vcipher(x1, x1, 10) \
320*0957b409SSimon J. Gerraty vcipher(x2, x2, 10) \
321*0957b409SSimon J. Gerraty vcipher(x3, x3, 10) \
322*0957b409SSimon J. Gerraty vcipher(x0, x0, 11) \
323*0957b409SSimon J. Gerraty vcipher(x1, x1, 11) \
324*0957b409SSimon J. Gerraty vcipher(x2, x2, 11) \
325*0957b409SSimon J. Gerraty vcipher(x3, x3, 11) \
326*0957b409SSimon J. Gerraty vcipherlast(x0, x0, 12) \
327*0957b409SSimon J. Gerraty vcipherlast(x1, x1, 12) \
328*0957b409SSimon J. Gerraty vcipherlast(x2, x2, 12) \
329*0957b409SSimon J. Gerraty vcipherlast(x3, x3, 12)
330*0957b409SSimon J. Gerraty
331*0957b409SSimon J. Gerraty #define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
332*0957b409SSimon J. Gerraty vxor(x0, x0, 0) \
333*0957b409SSimon J. Gerraty vxor(x1, x1, 0) \
334*0957b409SSimon J. Gerraty vxor(x2, x2, 0) \
335*0957b409SSimon J. Gerraty vxor(x3, x3, 0) \
336*0957b409SSimon J. Gerraty vcipher(x0, x0, 1) \
337*0957b409SSimon J. Gerraty vcipher(x1, x1, 1) \
338*0957b409SSimon J. Gerraty vcipher(x2, x2, 1) \
339*0957b409SSimon J. Gerraty vcipher(x3, x3, 1) \
340*0957b409SSimon J. Gerraty vcipher(x0, x0, 2) \
341*0957b409SSimon J. Gerraty vcipher(x1, x1, 2) \
342*0957b409SSimon J. Gerraty vcipher(x2, x2, 2) \
343*0957b409SSimon J. Gerraty vcipher(x3, x3, 2) \
344*0957b409SSimon J. Gerraty vcipher(x0, x0, 3) \
345*0957b409SSimon J. Gerraty vcipher(x1, x1, 3) \
346*0957b409SSimon J. Gerraty vcipher(x2, x2, 3) \
347*0957b409SSimon J. Gerraty vcipher(x3, x3, 3) \
348*0957b409SSimon J. Gerraty vcipher(x0, x0, 4) \
349*0957b409SSimon J. Gerraty vcipher(x1, x1, 4) \
350*0957b409SSimon J. Gerraty vcipher(x2, x2, 4) \
351*0957b409SSimon J. Gerraty vcipher(x3, x3, 4) \
352*0957b409SSimon J. Gerraty vcipher(x0, x0, 5) \
353*0957b409SSimon J. Gerraty vcipher(x1, x1, 5) \
354*0957b409SSimon J. Gerraty vcipher(x2, x2, 5) \
355*0957b409SSimon J. Gerraty vcipher(x3, x3, 5) \
356*0957b409SSimon J. Gerraty vcipher(x0, x0, 6) \
357*0957b409SSimon J. Gerraty vcipher(x1, x1, 6) \
358*0957b409SSimon J. Gerraty vcipher(x2, x2, 6) \
359*0957b409SSimon J. Gerraty vcipher(x3, x3, 6) \
360*0957b409SSimon J. Gerraty vcipher(x0, x0, 7) \
361*0957b409SSimon J. Gerraty vcipher(x1, x1, 7) \
362*0957b409SSimon J. Gerraty vcipher(x2, x2, 7) \
363*0957b409SSimon J. Gerraty vcipher(x3, x3, 7) \
364*0957b409SSimon J. Gerraty vcipher(x0, x0, 8) \
365*0957b409SSimon J. Gerraty vcipher(x1, x1, 8) \
366*0957b409SSimon J. Gerraty vcipher(x2, x2, 8) \
367*0957b409SSimon J. Gerraty vcipher(x3, x3, 8) \
368*0957b409SSimon J. Gerraty vcipher(x0, x0, 9) \
369*0957b409SSimon J. Gerraty vcipher(x1, x1, 9) \
370*0957b409SSimon J. Gerraty vcipher(x2, x2, 9) \
371*0957b409SSimon J. Gerraty vcipher(x3, x3, 9) \
372*0957b409SSimon J. Gerraty vcipher(x0, x0, 10) \
373*0957b409SSimon J. Gerraty vcipher(x1, x1, 10) \
374*0957b409SSimon J. Gerraty vcipher(x2, x2, 10) \
375*0957b409SSimon J. Gerraty vcipher(x3, x3, 10) \
376*0957b409SSimon J. Gerraty vcipher(x0, x0, 11) \
377*0957b409SSimon J. Gerraty vcipher(x1, x1, 11) \
378*0957b409SSimon J. Gerraty vcipher(x2, x2, 11) \
379*0957b409SSimon J. Gerraty vcipher(x3, x3, 11) \
380*0957b409SSimon J. Gerraty vcipher(x0, x0, 12) \
381*0957b409SSimon J. Gerraty vcipher(x1, x1, 12) \
382*0957b409SSimon J. Gerraty vcipher(x2, x2, 12) \
383*0957b409SSimon J. Gerraty vcipher(x3, x3, 12) \
384*0957b409SSimon J. Gerraty vcipher(x0, x0, 13) \
385*0957b409SSimon J. Gerraty vcipher(x1, x1, 13) \
386*0957b409SSimon J. Gerraty vcipher(x2, x2, 13) \
387*0957b409SSimon J. Gerraty vcipher(x3, x3, 13) \
388*0957b409SSimon J. Gerraty vcipherlast(x0, x0, 14) \
389*0957b409SSimon J. Gerraty vcipherlast(x1, x1, 14) \
390*0957b409SSimon J. Gerraty vcipherlast(x2, x2, 14) \
391*0957b409SSimon J. Gerraty vcipherlast(x3, x3, 14)
392*0957b409SSimon J. Gerraty
393*0957b409SSimon J. Gerraty #if BR_POWER8_LE
394*0957b409SSimon J. Gerraty static const uint32_t idx2be[] = {
395*0957b409SSimon J. Gerraty 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
396*0957b409SSimon J. Gerraty };
397*0957b409SSimon J. Gerraty #define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be])
398*0957b409SSimon J. Gerraty #define BYTESWAP(x) vperm(x, x, x, 15)
399*0957b409SSimon J. Gerraty #define BYTESWAPX(d, s) vperm(d, s, s, 15)
400*0957b409SSimon J. Gerraty #define BYTESWAP_REG , [idx2be] "b" (idx2be)
401*0957b409SSimon J. Gerraty #else
402*0957b409SSimon J. Gerraty #define BYTESWAP_INIT
403*0957b409SSimon J. Gerraty #define BYTESWAP(x)
404*0957b409SSimon J. Gerraty #define BYTESWAPX(d, s) vand(d, s, s)
405*0957b409SSimon J. Gerraty #define BYTESWAP_REG
406*0957b409SSimon J. Gerraty #endif
407*0957b409SSimon J. Gerraty
408*0957b409SSimon J. Gerraty static const uint32_t ctrinc[] = {
409*0957b409SSimon J. Gerraty 0, 0, 0, 1
410*0957b409SSimon J. Gerraty };
411*0957b409SSimon J. Gerraty static const uint32_t ctrinc_x4[] = {
412*0957b409SSimon J. Gerraty 0, 0, 0, 4
413*0957b409SSimon J. Gerraty };
414*0957b409SSimon J. Gerraty #define INCR_128_INIT lxvw4x(60, 0, %[ctrinc])
415*0957b409SSimon J. Gerraty #define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4])
416*0957b409SSimon J. Gerraty #define INCR_128(d, s) \
417*0957b409SSimon J. Gerraty vaddcuw(29, s, 28) \
418*0957b409SSimon J. Gerraty vadduwm(d, s, 28) \
419*0957b409SSimon J. Gerraty vsldoi(30, 29, 29, 4) \
420*0957b409SSimon J. Gerraty vaddcuw(29, d, 30) \
421*0957b409SSimon J. Gerraty vadduwm(d, d, 30) \
422*0957b409SSimon J. Gerraty vsldoi(30, 29, 29, 4) \
423*0957b409SSimon J. Gerraty vaddcuw(29, d, 30) \
424*0957b409SSimon J. Gerraty vadduwm(d, d, 30) \
425*0957b409SSimon J. Gerraty vsldoi(30, 29, 29, 4) \
426*0957b409SSimon J. Gerraty vadduwm(d, d, 30)
427*0957b409SSimon J. Gerraty
428*0957b409SSimon J. Gerraty #define MKCTR(size) \
429*0957b409SSimon J. Gerraty static void \
430*0957b409SSimon J. Gerraty ctr_ ## size(const unsigned char *sk, \
431*0957b409SSimon J. Gerraty unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
432*0957b409SSimon J. Gerraty { \
433*0957b409SSimon J. Gerraty long cc, cc0, cc1, cc2, cc3; \
434*0957b409SSimon J. Gerraty \
435*0957b409SSimon J. Gerraty cc = 0; \
436*0957b409SSimon J. Gerraty cc0 = 0; \
437*0957b409SSimon J. Gerraty cc1 = 16; \
438*0957b409SSimon J. Gerraty cc2 = 32; \
439*0957b409SSimon J. Gerraty cc3 = 48; \
440*0957b409SSimon J. Gerraty asm volatile ( \
441*0957b409SSimon J. Gerraty \
442*0957b409SSimon J. Gerraty /* \
443*0957b409SSimon J. Gerraty * Load subkeys into v0..v10 \
444*0957b409SSimon J. Gerraty */ \
445*0957b409SSimon J. Gerraty LOAD_SUBKEYS_ ## size \
446*0957b409SSimon J. Gerraty li(%[cc], 0) \
447*0957b409SSimon J. Gerraty \
448*0957b409SSimon J. Gerraty BYTESWAP_INIT \
449*0957b409SSimon J. Gerraty INCR_128_X4_INIT \
450*0957b409SSimon J. Gerraty \
451*0957b409SSimon J. Gerraty /* \
452*0957b409SSimon J. Gerraty * Load current CTR counters into v16 to v19. \
453*0957b409SSimon J. Gerraty */ \
454*0957b409SSimon J. Gerraty lxvw4x(48, %[cc0], %[ctrbuf]) \
455*0957b409SSimon J. Gerraty lxvw4x(49, %[cc1], %[ctrbuf]) \
456*0957b409SSimon J. Gerraty lxvw4x(50, %[cc2], %[ctrbuf]) \
457*0957b409SSimon J. Gerraty lxvw4x(51, %[cc3], %[ctrbuf]) \
458*0957b409SSimon J. Gerraty BYTESWAP(16) \
459*0957b409SSimon J. Gerraty BYTESWAP(17) \
460*0957b409SSimon J. Gerraty BYTESWAP(18) \
461*0957b409SSimon J. Gerraty BYTESWAP(19) \
462*0957b409SSimon J. Gerraty \
463*0957b409SSimon J. Gerraty mtctr(%[num_blocks_x4]) \
464*0957b409SSimon J. Gerraty \
465*0957b409SSimon J. Gerraty label(loop) \
466*0957b409SSimon J. Gerraty /* \
467*0957b409SSimon J. Gerraty * Compute next counter values into v20..v23. \
468*0957b409SSimon J. Gerraty */ \
469*0957b409SSimon J. Gerraty INCR_128(20, 16) \
470*0957b409SSimon J. Gerraty INCR_128(21, 17) \
471*0957b409SSimon J. Gerraty INCR_128(22, 18) \
472*0957b409SSimon J. Gerraty INCR_128(23, 19) \
473*0957b409SSimon J. Gerraty \
474*0957b409SSimon J. Gerraty /* \
475*0957b409SSimon J. Gerraty * Encrypt counter values and XOR into next data blocks. \
476*0957b409SSimon J. Gerraty */ \
477*0957b409SSimon J. Gerraty lxvw4x(56, %[cc0], %[buf]) \
478*0957b409SSimon J. Gerraty lxvw4x(57, %[cc1], %[buf]) \
479*0957b409SSimon J. Gerraty lxvw4x(58, %[cc2], %[buf]) \
480*0957b409SSimon J. Gerraty lxvw4x(59, %[cc3], %[buf]) \
481*0957b409SSimon J. Gerraty BYTESWAP(24) \
482*0957b409SSimon J. Gerraty BYTESWAP(25) \
483*0957b409SSimon J. Gerraty BYTESWAP(26) \
484*0957b409SSimon J. Gerraty BYTESWAP(27) \
485*0957b409SSimon J. Gerraty BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
486*0957b409SSimon J. Gerraty vxor(16, 16, 24) \
487*0957b409SSimon J. Gerraty vxor(17, 17, 25) \
488*0957b409SSimon J. Gerraty vxor(18, 18, 26) \
489*0957b409SSimon J. Gerraty vxor(19, 19, 27) \
490*0957b409SSimon J. Gerraty BYTESWAP(16) \
491*0957b409SSimon J. Gerraty BYTESWAP(17) \
492*0957b409SSimon J. Gerraty BYTESWAP(18) \
493*0957b409SSimon J. Gerraty BYTESWAP(19) \
494*0957b409SSimon J. Gerraty stxvw4x(48, %[cc0], %[buf]) \
495*0957b409SSimon J. Gerraty stxvw4x(49, %[cc1], %[buf]) \
496*0957b409SSimon J. Gerraty stxvw4x(50, %[cc2], %[buf]) \
497*0957b409SSimon J. Gerraty stxvw4x(51, %[cc3], %[buf]) \
498*0957b409SSimon J. Gerraty \
499*0957b409SSimon J. Gerraty /* \
500*0957b409SSimon J. Gerraty * Update counters and data pointer. \
501*0957b409SSimon J. Gerraty */ \
502*0957b409SSimon J. Gerraty vand(16, 20, 20) \
503*0957b409SSimon J. Gerraty vand(17, 21, 21) \
504*0957b409SSimon J. Gerraty vand(18, 22, 22) \
505*0957b409SSimon J. Gerraty vand(19, 23, 23) \
506*0957b409SSimon J. Gerraty addi(%[buf], %[buf], 64) \
507*0957b409SSimon J. Gerraty \
508*0957b409SSimon J. Gerraty bdnz(loop) \
509*0957b409SSimon J. Gerraty \
510*0957b409SSimon J. Gerraty /* \
511*0957b409SSimon J. Gerraty * Write back new counter values. \
512*0957b409SSimon J. Gerraty */ \
513*0957b409SSimon J. Gerraty BYTESWAP(16) \
514*0957b409SSimon J. Gerraty BYTESWAP(17) \
515*0957b409SSimon J. Gerraty BYTESWAP(18) \
516*0957b409SSimon J. Gerraty BYTESWAP(19) \
517*0957b409SSimon J. Gerraty stxvw4x(48, %[cc0], %[ctrbuf]) \
518*0957b409SSimon J. Gerraty stxvw4x(49, %[cc1], %[ctrbuf]) \
519*0957b409SSimon J. Gerraty stxvw4x(50, %[cc2], %[ctrbuf]) \
520*0957b409SSimon J. Gerraty stxvw4x(51, %[cc3], %[ctrbuf]) \
521*0957b409SSimon J. Gerraty \
522*0957b409SSimon J. Gerraty : [cc] "+b" (cc), [buf] "+b" (buf), \
523*0957b409SSimon J. Gerraty [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
524*0957b409SSimon J. Gerraty : [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
525*0957b409SSimon J. Gerraty [num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
526*0957b409SSimon J. Gerraty BYTESWAP_REG \
527*0957b409SSimon J. Gerraty : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
528*0957b409SSimon J. Gerraty "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
529*0957b409SSimon J. Gerraty "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
530*0957b409SSimon J. Gerraty "v30", "ctr", "memory" \
531*0957b409SSimon J. Gerraty ); \
532*0957b409SSimon J. Gerraty }
533*0957b409SSimon J. Gerraty
534*0957b409SSimon J. Gerraty MKCTR(128)
535*0957b409SSimon J. Gerraty MKCTR(192)
536*0957b409SSimon J. Gerraty MKCTR(256)
537*0957b409SSimon J. Gerraty
538*0957b409SSimon J. Gerraty #define MKCBCMAC(size) \
539*0957b409SSimon J. Gerraty static void \
540*0957b409SSimon J. Gerraty cbcmac_ ## size(const unsigned char *sk, \
541*0957b409SSimon J. Gerraty unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
542*0957b409SSimon J. Gerraty { \
543*0957b409SSimon J. Gerraty long cc; \
544*0957b409SSimon J. Gerraty \
545*0957b409SSimon J. Gerraty cc = 0; \
546*0957b409SSimon J. Gerraty asm volatile ( \
547*0957b409SSimon J. Gerraty \
548*0957b409SSimon J. Gerraty /* \
549*0957b409SSimon J. Gerraty * Load subkeys into v0..v10 \
550*0957b409SSimon J. Gerraty */ \
551*0957b409SSimon J. Gerraty LOAD_SUBKEYS_ ## size \
552*0957b409SSimon J. Gerraty li(%[cc], 0) \
553*0957b409SSimon J. Gerraty \
554*0957b409SSimon J. Gerraty BYTESWAP_INIT \
555*0957b409SSimon J. Gerraty \
556*0957b409SSimon J. Gerraty /* \
557*0957b409SSimon J. Gerraty * Load current CBC-MAC value into v16. \
558*0957b409SSimon J. Gerraty */ \
559*0957b409SSimon J. Gerraty lxvw4x(48, %[cc], %[cbcmac]) \
560*0957b409SSimon J. Gerraty BYTESWAP(16) \
561*0957b409SSimon J. Gerraty \
562*0957b409SSimon J. Gerraty mtctr(%[num_blocks]) \
563*0957b409SSimon J. Gerraty \
564*0957b409SSimon J. Gerraty label(loop) \
565*0957b409SSimon J. Gerraty /* \
566*0957b409SSimon J. Gerraty * Load next block, XOR into current CBC-MAC value, \
567*0957b409SSimon J. Gerraty * and then encrypt it. \
568*0957b409SSimon J. Gerraty */ \
569*0957b409SSimon J. Gerraty lxvw4x(49, %[cc], %[buf]) \
570*0957b409SSimon J. Gerraty BYTESWAP(17) \
571*0957b409SSimon J. Gerraty vxor(16, 16, 17) \
572*0957b409SSimon J. Gerraty BLOCK_ENCRYPT_ ## size(16) \
573*0957b409SSimon J. Gerraty addi(%[buf], %[buf], 16) \
574*0957b409SSimon J. Gerraty \
575*0957b409SSimon J. Gerraty bdnz(loop) \
576*0957b409SSimon J. Gerraty \
577*0957b409SSimon J. Gerraty /* \
578*0957b409SSimon J. Gerraty * Write back new CBC-MAC value. \
579*0957b409SSimon J. Gerraty */ \
580*0957b409SSimon J. Gerraty BYTESWAP(16) \
581*0957b409SSimon J. Gerraty stxvw4x(48, %[cc], %[cbcmac]) \
582*0957b409SSimon J. Gerraty \
583*0957b409SSimon J. Gerraty : [cc] "+b" (cc), [buf] "+b" (buf) \
584*0957b409SSimon J. Gerraty : [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
585*0957b409SSimon J. Gerraty BYTESWAP_REG \
586*0957b409SSimon J. Gerraty : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
587*0957b409SSimon J. Gerraty "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
588*0957b409SSimon J. Gerraty "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
589*0957b409SSimon J. Gerraty "v30", "ctr", "memory" \
590*0957b409SSimon J. Gerraty ); \
591*0957b409SSimon J. Gerraty }
592*0957b409SSimon J. Gerraty
593*0957b409SSimon J. Gerraty MKCBCMAC(128)
594*0957b409SSimon J. Gerraty MKCBCMAC(192)
595*0957b409SSimon J. Gerraty MKCBCMAC(256)
596*0957b409SSimon J. Gerraty
597*0957b409SSimon J. Gerraty #define MKENCRYPT(size) \
598*0957b409SSimon J. Gerraty static void \
599*0957b409SSimon J. Gerraty ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
600*0957b409SSimon J. Gerraty unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
601*0957b409SSimon J. Gerraty size_t num_blocks) \
602*0957b409SSimon J. Gerraty { \
603*0957b409SSimon J. Gerraty long cc; \
604*0957b409SSimon J. Gerraty \
605*0957b409SSimon J. Gerraty cc = 0; \
606*0957b409SSimon J. Gerraty asm volatile ( \
607*0957b409SSimon J. Gerraty \
608*0957b409SSimon J. Gerraty /* \
609*0957b409SSimon J. Gerraty * Load subkeys into v0..v10 \
610*0957b409SSimon J. Gerraty */ \
611*0957b409SSimon J. Gerraty LOAD_SUBKEYS_ ## size \
612*0957b409SSimon J. Gerraty li(%[cc], 0) \
613*0957b409SSimon J. Gerraty \
614*0957b409SSimon J. Gerraty BYTESWAP_INIT \
615*0957b409SSimon J. Gerraty INCR_128_INIT \
616*0957b409SSimon J. Gerraty \
617*0957b409SSimon J. Gerraty /* \
618*0957b409SSimon J. Gerraty * Load current CTR counter into v16, and current \
619*0957b409SSimon J. Gerraty * CBC-MAC IV into v17. \
620*0957b409SSimon J. Gerraty */ \
621*0957b409SSimon J. Gerraty lxvw4x(48, %[cc], %[ctr]) \
622*0957b409SSimon J. Gerraty lxvw4x(49, %[cc], %[cbcmac]) \
623*0957b409SSimon J. Gerraty BYTESWAP(16) \
624*0957b409SSimon J. Gerraty BYTESWAP(17) \
625*0957b409SSimon J. Gerraty \
626*0957b409SSimon J. Gerraty /* \
627*0957b409SSimon J. Gerraty * At each iteration, we do two parallel encryption: \
628*0957b409SSimon J. Gerraty * - new counter value for encryption of the next block; \
629*0957b409SSimon J. Gerraty * - CBC-MAC over the previous encrypted block. \
630*0957b409SSimon J. Gerraty * Thus, each plaintext block implies two AES instances, \
631*0957b409SSimon J. Gerraty * over two successive iterations. This requires a single \
632*0957b409SSimon J. Gerraty * counter encryption before the loop, and a single \
633*0957b409SSimon J. Gerraty * CBC-MAC encryption after the loop. \
634*0957b409SSimon J. Gerraty */ \
635*0957b409SSimon J. Gerraty \
636*0957b409SSimon J. Gerraty /* \
637*0957b409SSimon J. Gerraty * Encrypt first block (into v20). \
638*0957b409SSimon J. Gerraty */ \
639*0957b409SSimon J. Gerraty lxvw4x(52, %[cc], %[buf]) \
640*0957b409SSimon J. Gerraty BYTESWAP(20) \
641*0957b409SSimon J. Gerraty INCR_128(22, 16) \
642*0957b409SSimon J. Gerraty BLOCK_ENCRYPT_ ## size(16) \
643*0957b409SSimon J. Gerraty vxor(20, 20, 16) \
644*0957b409SSimon J. Gerraty BYTESWAPX(21, 20) \
645*0957b409SSimon J. Gerraty stxvw4x(53, %[cc], %[buf]) \
646*0957b409SSimon J. Gerraty vand(16, 22, 22) \
647*0957b409SSimon J. Gerraty addi(%[buf], %[buf], 16) \
648*0957b409SSimon J. Gerraty \
649*0957b409SSimon J. Gerraty /* \
650*0957b409SSimon J. Gerraty * Load loop counter; skip the loop if there is only \
651*0957b409SSimon J. Gerraty * one block in total (already handled by the boundary \
652*0957b409SSimon J. Gerraty * conditions). \
653*0957b409SSimon J. Gerraty */ \
654*0957b409SSimon J. Gerraty mtctr(%[num_blocks]) \
655*0957b409SSimon J. Gerraty bdz(fastexit) \
656*0957b409SSimon J. Gerraty \
657*0957b409SSimon J. Gerraty label(loop) \
658*0957b409SSimon J. Gerraty /* \
659*0957b409SSimon J. Gerraty * Upon loop entry: \
660*0957b409SSimon J. Gerraty * v16 counter value for next block \
661*0957b409SSimon J. Gerraty * v17 current CBC-MAC value \
662*0957b409SSimon J. Gerraty * v20 encrypted previous block \
663*0957b409SSimon J. Gerraty */ \
664*0957b409SSimon J. Gerraty vxor(17, 17, 20) \
665*0957b409SSimon J. Gerraty INCR_128(22, 16) \
666*0957b409SSimon J. Gerraty lxvw4x(52, %[cc], %[buf]) \
667*0957b409SSimon J. Gerraty BYTESWAP(20) \
668*0957b409SSimon J. Gerraty BLOCK_ENCRYPT_X2_ ## size(16, 17) \
669*0957b409SSimon J. Gerraty vxor(20, 20, 16) \
670*0957b409SSimon J. Gerraty BYTESWAPX(21, 20) \
671*0957b409SSimon J. Gerraty stxvw4x(53, %[cc], %[buf]) \
672*0957b409SSimon J. Gerraty addi(%[buf], %[buf], 16) \
673*0957b409SSimon J. Gerraty vand(16, 22, 22) \
674*0957b409SSimon J. Gerraty \
675*0957b409SSimon J. Gerraty bdnz(loop) \
676*0957b409SSimon J. Gerraty \
677*0957b409SSimon J. Gerraty label(fastexit) \
678*0957b409SSimon J. Gerraty vxor(17, 17, 20) \
679*0957b409SSimon J. Gerraty BLOCK_ENCRYPT_ ## size(17) \
680*0957b409SSimon J. Gerraty BYTESWAP(16) \
681*0957b409SSimon J. Gerraty BYTESWAP(17) \
682*0957b409SSimon J. Gerraty stxvw4x(48, %[cc], %[ctr]) \
683*0957b409SSimon J. Gerraty stxvw4x(49, %[cc], %[cbcmac]) \
684*0957b409SSimon J. Gerraty \
685*0957b409SSimon J. Gerraty : [cc] "+b" (cc), [buf] "+b" (buf) \
686*0957b409SSimon J. Gerraty : [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
687*0957b409SSimon J. Gerraty [num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
688*0957b409SSimon J. Gerraty BYTESWAP_REG \
689*0957b409SSimon J. Gerraty : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
690*0957b409SSimon J. Gerraty "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
691*0957b409SSimon J. Gerraty "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
692*0957b409SSimon J. Gerraty "v30", "ctr", "memory" \
693*0957b409SSimon J. Gerraty ); \
694*0957b409SSimon J. Gerraty }
695*0957b409SSimon J. Gerraty
696*0957b409SSimon J. Gerraty MKENCRYPT(128)
697*0957b409SSimon J. Gerraty MKENCRYPT(192)
698*0957b409SSimon J. Gerraty MKENCRYPT(256)
699*0957b409SSimon J. Gerraty
700*0957b409SSimon J. Gerraty #define MKDECRYPT(size) \
701*0957b409SSimon J. Gerraty static void \
702*0957b409SSimon J. Gerraty ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
703*0957b409SSimon J. Gerraty unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
704*0957b409SSimon J. Gerraty size_t num_blocks) \
705*0957b409SSimon J. Gerraty { \
706*0957b409SSimon J. Gerraty long cc; \
707*0957b409SSimon J. Gerraty \
708*0957b409SSimon J. Gerraty cc = 0; \
709*0957b409SSimon J. Gerraty asm volatile ( \
710*0957b409SSimon J. Gerraty \
711*0957b409SSimon J. Gerraty /* \
712*0957b409SSimon J. Gerraty * Load subkeys into v0..v10 \
713*0957b409SSimon J. Gerraty */ \
714*0957b409SSimon J. Gerraty LOAD_SUBKEYS_ ## size \
715*0957b409SSimon J. Gerraty li(%[cc], 0) \
716*0957b409SSimon J. Gerraty \
717*0957b409SSimon J. Gerraty BYTESWAP_INIT \
718*0957b409SSimon J. Gerraty INCR_128_INIT \
719*0957b409SSimon J. Gerraty \
720*0957b409SSimon J. Gerraty /* \
721*0957b409SSimon J. Gerraty * Load current CTR counter into v16, and current \
722*0957b409SSimon J. Gerraty * CBC-MAC IV into v17. \
723*0957b409SSimon J. Gerraty */ \
724*0957b409SSimon J. Gerraty lxvw4x(48, %[cc], %[ctr]) \
725*0957b409SSimon J. Gerraty lxvw4x(49, %[cc], %[cbcmac]) \
726*0957b409SSimon J. Gerraty BYTESWAP(16) \
727*0957b409SSimon J. Gerraty BYTESWAP(17) \
728*0957b409SSimon J. Gerraty \
729*0957b409SSimon J. Gerraty /* \
730*0957b409SSimon J. Gerraty * At each iteration, we do two parallel encryption: \
731*0957b409SSimon J. Gerraty * - new counter value for decryption of the next block; \
732*0957b409SSimon J. Gerraty * - CBC-MAC over the next encrypted block. \
733*0957b409SSimon J. Gerraty * Each iteration performs the two AES instances related \
734*0957b409SSimon J. Gerraty * to the current block; there is thus no need for some \
735*0957b409SSimon J. Gerraty * extra pre-loop and post-loop work as in encryption. \
736*0957b409SSimon J. Gerraty */ \
737*0957b409SSimon J. Gerraty \
738*0957b409SSimon J. Gerraty mtctr(%[num_blocks]) \
739*0957b409SSimon J. Gerraty \
740*0957b409SSimon J. Gerraty label(loop) \
741*0957b409SSimon J. Gerraty /* \
742*0957b409SSimon J. Gerraty * Upon loop entry: \
743*0957b409SSimon J. Gerraty * v16 counter value for next block \
744*0957b409SSimon J. Gerraty * v17 current CBC-MAC value \
745*0957b409SSimon J. Gerraty */ \
746*0957b409SSimon J. Gerraty lxvw4x(52, %[cc], %[buf]) \
747*0957b409SSimon J. Gerraty BYTESWAP(20) \
748*0957b409SSimon J. Gerraty vxor(17, 17, 20) \
749*0957b409SSimon J. Gerraty INCR_128(22, 16) \
750*0957b409SSimon J. Gerraty BLOCK_ENCRYPT_X2_ ## size(16, 17) \
751*0957b409SSimon J. Gerraty vxor(20, 20, 16) \
752*0957b409SSimon J. Gerraty BYTESWAPX(21, 20) \
753*0957b409SSimon J. Gerraty stxvw4x(53, %[cc], %[buf]) \
754*0957b409SSimon J. Gerraty addi(%[buf], %[buf], 16) \
755*0957b409SSimon J. Gerraty vand(16, 22, 22) \
756*0957b409SSimon J. Gerraty \
757*0957b409SSimon J. Gerraty bdnz(loop) \
758*0957b409SSimon J. Gerraty \
759*0957b409SSimon J. Gerraty /* \
760*0957b409SSimon J. Gerraty * Store back counter and CBC-MAC value. \
761*0957b409SSimon J. Gerraty */ \
762*0957b409SSimon J. Gerraty BYTESWAP(16) \
763*0957b409SSimon J. Gerraty BYTESWAP(17) \
764*0957b409SSimon J. Gerraty stxvw4x(48, %[cc], %[ctr]) \
765*0957b409SSimon J. Gerraty stxvw4x(49, %[cc], %[cbcmac]) \
766*0957b409SSimon J. Gerraty \
767*0957b409SSimon J. Gerraty : [cc] "+b" (cc), [buf] "+b" (buf) \
768*0957b409SSimon J. Gerraty : [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
769*0957b409SSimon J. Gerraty [num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
770*0957b409SSimon J. Gerraty BYTESWAP_REG \
771*0957b409SSimon J. Gerraty : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
772*0957b409SSimon J. Gerraty "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
773*0957b409SSimon J. Gerraty "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
774*0957b409SSimon J. Gerraty "v30", "ctr", "memory" \
775*0957b409SSimon J. Gerraty ); \
776*0957b409SSimon J. Gerraty }
777*0957b409SSimon J. Gerraty
778*0957b409SSimon J. Gerraty MKDECRYPT(128)
779*0957b409SSimon J. Gerraty MKDECRYPT(192)
780*0957b409SSimon J. Gerraty MKDECRYPT(256)
781*0957b409SSimon J. Gerraty
782*0957b409SSimon J. Gerraty /* see bearssl_block.h */
783*0957b409SSimon J. Gerraty void
br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys * ctx,void * ctr,void * cbcmac,void * data,size_t len)784*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
785*0957b409SSimon J. Gerraty void *ctr, void *cbcmac, void *data, size_t len)
786*0957b409SSimon J. Gerraty {
787*0957b409SSimon J. Gerraty if (len == 0) {
788*0957b409SSimon J. Gerraty return;
789*0957b409SSimon J. Gerraty }
790*0957b409SSimon J. Gerraty switch (ctx->num_rounds) {
791*0957b409SSimon J. Gerraty case 10:
792*0957b409SSimon J. Gerraty ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
793*0957b409SSimon J. Gerraty break;
794*0957b409SSimon J. Gerraty case 12:
795*0957b409SSimon J. Gerraty ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
796*0957b409SSimon J. Gerraty break;
797*0957b409SSimon J. Gerraty default:
798*0957b409SSimon J. Gerraty ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
799*0957b409SSimon J. Gerraty break;
800*0957b409SSimon J. Gerraty }
801*0957b409SSimon J. Gerraty }
802*0957b409SSimon J. Gerraty
803*0957b409SSimon J. Gerraty /* see bearssl_block.h */
804*0957b409SSimon J. Gerraty void
br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys * ctx,void * ctr,void * cbcmac,void * data,size_t len)805*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
806*0957b409SSimon J. Gerraty void *ctr, void *cbcmac, void *data, size_t len)
807*0957b409SSimon J. Gerraty {
808*0957b409SSimon J. Gerraty if (len == 0) {
809*0957b409SSimon J. Gerraty return;
810*0957b409SSimon J. Gerraty }
811*0957b409SSimon J. Gerraty switch (ctx->num_rounds) {
812*0957b409SSimon J. Gerraty case 10:
813*0957b409SSimon J. Gerraty ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
814*0957b409SSimon J. Gerraty break;
815*0957b409SSimon J. Gerraty case 12:
816*0957b409SSimon J. Gerraty ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
817*0957b409SSimon J. Gerraty break;
818*0957b409SSimon J. Gerraty default:
819*0957b409SSimon J. Gerraty ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
820*0957b409SSimon J. Gerraty break;
821*0957b409SSimon J. Gerraty }
822*0957b409SSimon J. Gerraty }
823*0957b409SSimon J. Gerraty
824*0957b409SSimon J. Gerraty static inline void
incr_ctr(void * dst,const void * src)825*0957b409SSimon J. Gerraty incr_ctr(void *dst, const void *src)
826*0957b409SSimon J. Gerraty {
827*0957b409SSimon J. Gerraty uint64_t hi, lo;
828*0957b409SSimon J. Gerraty
829*0957b409SSimon J. Gerraty hi = br_dec64be(src);
830*0957b409SSimon J. Gerraty lo = br_dec64be((const unsigned char *)src + 8);
831*0957b409SSimon J. Gerraty lo ++;
832*0957b409SSimon J. Gerraty hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
833*0957b409SSimon J. Gerraty br_enc64be(dst, hi);
834*0957b409SSimon J. Gerraty br_enc64be((unsigned char *)dst + 8, lo);
835*0957b409SSimon J. Gerraty }
836*0957b409SSimon J. Gerraty
837*0957b409SSimon J. Gerraty /* see bearssl_block.h */
838*0957b409SSimon J. Gerraty void
br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys * ctx,void * ctr,void * data,size_t len)839*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
840*0957b409SSimon J. Gerraty void *ctr, void *data, size_t len)
841*0957b409SSimon J. Gerraty {
842*0957b409SSimon J. Gerraty unsigned char ctrbuf[64];
843*0957b409SSimon J. Gerraty
844*0957b409SSimon J. Gerraty memcpy(ctrbuf, ctr, 16);
845*0957b409SSimon J. Gerraty incr_ctr(ctrbuf + 16, ctrbuf);
846*0957b409SSimon J. Gerraty incr_ctr(ctrbuf + 32, ctrbuf + 16);
847*0957b409SSimon J. Gerraty incr_ctr(ctrbuf + 48, ctrbuf + 32);
848*0957b409SSimon J. Gerraty if (len >= 64) {
849*0957b409SSimon J. Gerraty switch (ctx->num_rounds) {
850*0957b409SSimon J. Gerraty case 10:
851*0957b409SSimon J. Gerraty ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
852*0957b409SSimon J. Gerraty break;
853*0957b409SSimon J. Gerraty case 12:
854*0957b409SSimon J. Gerraty ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
855*0957b409SSimon J. Gerraty break;
856*0957b409SSimon J. Gerraty default:
857*0957b409SSimon J. Gerraty ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
858*0957b409SSimon J. Gerraty break;
859*0957b409SSimon J. Gerraty }
860*0957b409SSimon J. Gerraty data = (unsigned char *)data + (len & ~(size_t)63);
861*0957b409SSimon J. Gerraty len &= 63;
862*0957b409SSimon J. Gerraty }
863*0957b409SSimon J. Gerraty if (len > 0) {
864*0957b409SSimon J. Gerraty unsigned char tmp[64];
865*0957b409SSimon J. Gerraty
866*0957b409SSimon J. Gerraty if (len >= 32) {
867*0957b409SSimon J. Gerraty if (len >= 48) {
868*0957b409SSimon J. Gerraty memcpy(ctr, ctrbuf + 48, 16);
869*0957b409SSimon J. Gerraty } else {
870*0957b409SSimon J. Gerraty memcpy(ctr, ctrbuf + 32, 16);
871*0957b409SSimon J. Gerraty }
872*0957b409SSimon J. Gerraty } else {
873*0957b409SSimon J. Gerraty if (len >= 16) {
874*0957b409SSimon J. Gerraty memcpy(ctr, ctrbuf + 16, 16);
875*0957b409SSimon J. Gerraty }
876*0957b409SSimon J. Gerraty }
877*0957b409SSimon J. Gerraty memcpy(tmp, data, len);
878*0957b409SSimon J. Gerraty memset(tmp + len, 0, (sizeof tmp) - len);
879*0957b409SSimon J. Gerraty switch (ctx->num_rounds) {
880*0957b409SSimon J. Gerraty case 10:
881*0957b409SSimon J. Gerraty ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
882*0957b409SSimon J. Gerraty break;
883*0957b409SSimon J. Gerraty case 12:
884*0957b409SSimon J. Gerraty ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
885*0957b409SSimon J. Gerraty break;
886*0957b409SSimon J. Gerraty default:
887*0957b409SSimon J. Gerraty ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
888*0957b409SSimon J. Gerraty break;
889*0957b409SSimon J. Gerraty }
890*0957b409SSimon J. Gerraty memcpy(data, tmp, len);
891*0957b409SSimon J. Gerraty } else {
892*0957b409SSimon J. Gerraty memcpy(ctr, ctrbuf, 16);
893*0957b409SSimon J. Gerraty }
894*0957b409SSimon J. Gerraty }
895*0957b409SSimon J. Gerraty
896*0957b409SSimon J. Gerraty /* see bearssl_block.h */
897*0957b409SSimon J. Gerraty void
br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys * ctx,void * cbcmac,const void * data,size_t len)898*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
899*0957b409SSimon J. Gerraty void *cbcmac, const void *data, size_t len)
900*0957b409SSimon J. Gerraty {
901*0957b409SSimon J. Gerraty if (len > 0) {
902*0957b409SSimon J. Gerraty switch (ctx->num_rounds) {
903*0957b409SSimon J. Gerraty case 10:
904*0957b409SSimon J. Gerraty cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
905*0957b409SSimon J. Gerraty break;
906*0957b409SSimon J. Gerraty case 12:
907*0957b409SSimon J. Gerraty cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
908*0957b409SSimon J. Gerraty break;
909*0957b409SSimon J. Gerraty default:
910*0957b409SSimon J. Gerraty cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
911*0957b409SSimon J. Gerraty break;
912*0957b409SSimon J. Gerraty }
913*0957b409SSimon J. Gerraty }
914*0957b409SSimon J. Gerraty }
915*0957b409SSimon J. Gerraty
916*0957b409SSimon J. Gerraty /* see bearssl_block.h */
917*0957b409SSimon J. Gerraty const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
918*0957b409SSimon J. Gerraty sizeof(br_aes_pwr8_ctrcbc_keys),
919*0957b409SSimon J. Gerraty 16,
920*0957b409SSimon J. Gerraty 4,
921*0957b409SSimon J. Gerraty (void (*)(const br_block_ctrcbc_class **, const void *, size_t))
922*0957b409SSimon J. Gerraty &br_aes_pwr8_ctrcbc_init,
923*0957b409SSimon J. Gerraty (void (*)(const br_block_ctrcbc_class *const *,
924*0957b409SSimon J. Gerraty void *, void *, void *, size_t))
925*0957b409SSimon J. Gerraty &br_aes_pwr8_ctrcbc_encrypt,
926*0957b409SSimon J. Gerraty (void (*)(const br_block_ctrcbc_class *const *,
927*0957b409SSimon J. Gerraty void *, void *, void *, size_t))
928*0957b409SSimon J. Gerraty &br_aes_pwr8_ctrcbc_decrypt,
929*0957b409SSimon J. Gerraty (void (*)(const br_block_ctrcbc_class *const *,
930*0957b409SSimon J. Gerraty void *, void *, size_t))
931*0957b409SSimon J. Gerraty &br_aes_pwr8_ctrcbc_ctr,
932*0957b409SSimon J. Gerraty (void (*)(const br_block_ctrcbc_class *const *,
933*0957b409SSimon J. Gerraty void *, const void *, size_t))
934*0957b409SSimon J. Gerraty &br_aes_pwr8_ctrcbc_mac
935*0957b409SSimon J. Gerraty };
936*0957b409SSimon J. Gerraty
937*0957b409SSimon J. Gerraty #else
938*0957b409SSimon J. Gerraty
939*0957b409SSimon J. Gerraty /* see bearssl_block.h */
940*0957b409SSimon J. Gerraty const br_block_ctrcbc_class *
br_aes_pwr8_ctrcbc_get_vtable(void)941*0957b409SSimon J. Gerraty br_aes_pwr8_ctrcbc_get_vtable(void)
942*0957b409SSimon J. Gerraty {
943*0957b409SSimon J. Gerraty return NULL;
944*0957b409SSimon J. Gerraty }
945*0957b409SSimon J. Gerraty
946*0957b409SSimon J. Gerraty #endif
947