1 /*
2  * Copyright 2010-2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <string.h>
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
15 
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX __attribute((__aligned__(1)));
18 #else
19 typedef size_t size_t_aX;
20 #endif
21 
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
24 # undef  GETU32
25 # define GETU32(p)       BSWAP4(*(const u32 *)(p))
26 # undef  PUTU32
27 # define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28 #endif
29 
30 /* RISC-V uses C implementation as a fallback. */
31 #if defined(__riscv)
32 # define INCLUDE_C_GMULT_4BIT
33 # define INCLUDE_C_GHASH_4BIT
34 #endif
35 
36 #define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
37 #define REDUCE1BIT(V)   do { \
38         if (sizeof(size_t)==8) { \
39                 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
40                 V.lo  = (V.hi<<63)|(V.lo>>1); \
41                 V.hi  = (V.hi>>1 )^T; \
42         } \
43         else { \
44                 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
45                 V.lo  = (V.hi<<63)|(V.lo>>1); \
46                 V.hi  = (V.hi>>1 )^((u64)T<<32); \
47         } \
48 } while(0)
49 
50 /*-
51  *
52  * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.
53  *
54  * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
55  * never be set to 8. 8 is effectively reserved for testing purposes.
56  * TABLE_BITS>1 are lookup-table-driven implementations referred to as
57  * "Shoup's" in GCM specification. In other words OpenSSL does not cover
58  * whole spectrum of possible table driven implementations. Why? In
59  * non-"Shoup's" case memory access pattern is segmented in such manner,
60  * that it's trivial to see that cache timing information can reveal
61  * fair portion of intermediate hash value. Given that ciphertext is
62  * always available to attacker, it's possible for him to attempt to
63  * deduce secret parameter H and if successful, tamper with messages
64  * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
65  * not as trivial, but there is no reason to believe that it's resistant
66  * to cache-timing attack. And the thing about "8-bit" implementation is
67  * that it consumes 16 (sixteen) times more memory, 4KB per individual
68  * key + 1KB shared. Well, on pros side it should be twice as fast as
69  * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
70  * was observed to run ~75% faster, closer to 100% for commercial
71  * compilers... Yet "4-bit" procedure is preferred, because it's
72  * believed to provide better security-performance balance and adequate
73  * all-round performance. "All-round" refers to things like:
74  *
75  * - shorter setup time effectively improves overall timing for
76  *   handling short messages;
77  * - larger table allocation can become unbearable because of VM
78  *   subsystem penalties (for example on Windows large enough free
79  *   results in VM working set trimming, meaning that consequent
80  *   malloc would immediately incur working set expansion);
81  * - larger table has larger cache footprint, which can affect
82  *   performance of other code paths (not necessarily even from same
83  *   thread in Hyper-Threading world);
84  *
85  * Value of 1 is not appropriate for performance reasons.
86  */
87 
gcm_init_4bit(u128 Htable[16],const u64 H[2])88 static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
89 {
90     u128 V;
91 # if defined(OPENSSL_SMALL_FOOTPRINT)
92     int i;
93 # endif
94 
95     Htable[0].hi = 0;
96     Htable[0].lo = 0;
97     V.hi = H[0];
98     V.lo = H[1];
99 
100 # if defined(OPENSSL_SMALL_FOOTPRINT)
101     for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
102         REDUCE1BIT(V);
103         Htable[i] = V;
104     }
105 
106     for (i = 2; i < 16; i <<= 1) {
107         u128 *Hi = Htable + i;
108         int j;
109         for (V = *Hi, j = 1; j < i; ++j) {
110             Hi[j].hi = V.hi ^ Htable[j].hi;
111             Hi[j].lo = V.lo ^ Htable[j].lo;
112         }
113     }
114 # else
115     Htable[8] = V;
116     REDUCE1BIT(V);
117     Htable[4] = V;
118     REDUCE1BIT(V);
119     Htable[2] = V;
120     REDUCE1BIT(V);
121     Htable[1] = V;
122     Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
123     V = Htable[4];
124     Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
125     Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
126     Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
127     V = Htable[8];
128     Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
129     Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
130     Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
131     Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
132     Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
133     Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
134     Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
135 # endif
136 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
137     /*
138      * ARM assembler expects specific dword order in Htable.
139      */
140     {
141         int j;
142         DECLARE_IS_ENDIAN;
143 
144         if (IS_LITTLE_ENDIAN)
145             for (j = 0; j < 16; ++j) {
146                 V = Htable[j];
147                 Htable[j].hi = V.lo;
148                 Htable[j].lo = V.hi;
149         } else
150             for (j = 0; j < 16; ++j) {
151                 V = Htable[j];
152                 Htable[j].hi = V.lo << 32 | V.lo >> 32;
153                 Htable[j].lo = V.hi << 32 | V.hi >> 32;
154             }
155     }
156 # endif
157 }
158 
159 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
160 static const size_t rem_4bit[16] = {
161     PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
162     PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
163     PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
164     PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
165 };
166 
gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])167 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
168 {
169     u128 Z;
170     int cnt = 15;
171     size_t rem, nlo, nhi;
172     DECLARE_IS_ENDIAN;
173 
174     nlo = ((const u8 *)Xi)[15];
175     nhi = nlo >> 4;
176     nlo &= 0xf;
177 
178     Z.hi = Htable[nlo].hi;
179     Z.lo = Htable[nlo].lo;
180 
181     while (1) {
182         rem = (size_t)Z.lo & 0xf;
183         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
184         Z.hi = (Z.hi >> 4);
185         if (sizeof(size_t) == 8)
186             Z.hi ^= rem_4bit[rem];
187         else
188             Z.hi ^= (u64)rem_4bit[rem] << 32;
189 
190         Z.hi ^= Htable[nhi].hi;
191         Z.lo ^= Htable[nhi].lo;
192 
193         if (--cnt < 0)
194             break;
195 
196         nlo = ((const u8 *)Xi)[cnt];
197         nhi = nlo >> 4;
198         nlo &= 0xf;
199 
200         rem = (size_t)Z.lo & 0xf;
201         Z.lo = (Z.hi << 60) | (Z.lo >> 4);
202         Z.hi = (Z.hi >> 4);
203         if (sizeof(size_t) == 8)
204             Z.hi ^= rem_4bit[rem];
205         else
206             Z.hi ^= (u64)rem_4bit[rem] << 32;
207 
208         Z.hi ^= Htable[nlo].hi;
209         Z.lo ^= Htable[nlo].lo;
210     }
211 
212     if (IS_LITTLE_ENDIAN) {
213 #  ifdef BSWAP8
214         Xi[0] = BSWAP8(Z.hi);
215         Xi[1] = BSWAP8(Z.lo);
216 #  else
217         u8 *p = (u8 *)Xi;
218         u32 v;
219         v = (u32)(Z.hi >> 32);
220         PUTU32(p, v);
221         v = (u32)(Z.hi);
222         PUTU32(p + 4, v);
223         v = (u32)(Z.lo >> 32);
224         PUTU32(p + 8, v);
225         v = (u32)(Z.lo);
226         PUTU32(p + 12, v);
227 #  endif
228     } else {
229         Xi[0] = Z.hi;
230         Xi[1] = Z.lo;
231     }
232 }
233 
234 # endif
235 
236 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)
237 #  if !defined(OPENSSL_SMALL_FOOTPRINT)
238 /*
239  * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
240  * details... Compiler-generated code doesn't seem to give any
241  * performance improvement, at least not on x86[_64]. It's here
242  * mostly as reference and a placeholder for possible future
243  * non-trivial optimization[s]...
244  */
gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)245 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
246                            const u8 *inp, size_t len)
247 {
248     u128 Z;
249     int cnt;
250     size_t rem, nlo, nhi;
251     DECLARE_IS_ENDIAN;
252 
253     do {
254         cnt = 15;
255         nlo = ((const u8 *)Xi)[15];
256         nlo ^= inp[15];
257         nhi = nlo >> 4;
258         nlo &= 0xf;
259 
260         Z.hi = Htable[nlo].hi;
261         Z.lo = Htable[nlo].lo;
262 
263         while (1) {
264             rem = (size_t)Z.lo & 0xf;
265             Z.lo = (Z.hi << 60) | (Z.lo >> 4);
266             Z.hi = (Z.hi >> 4);
267             if (sizeof(size_t) == 8)
268                 Z.hi ^= rem_4bit[rem];
269             else
270                 Z.hi ^= (u64)rem_4bit[rem] << 32;
271 
272             Z.hi ^= Htable[nhi].hi;
273             Z.lo ^= Htable[nhi].lo;
274 
275             if (--cnt < 0)
276                 break;
277 
278             nlo = ((const u8 *)Xi)[cnt];
279             nlo ^= inp[cnt];
280             nhi = nlo >> 4;
281             nlo &= 0xf;
282 
283             rem = (size_t)Z.lo & 0xf;
284             Z.lo = (Z.hi << 60) | (Z.lo >> 4);
285             Z.hi = (Z.hi >> 4);
286             if (sizeof(size_t) == 8)
287                 Z.hi ^= rem_4bit[rem];
288             else
289                 Z.hi ^= (u64)rem_4bit[rem] << 32;
290 
291             Z.hi ^= Htable[nlo].hi;
292             Z.lo ^= Htable[nlo].lo;
293         }
294 
295         if (IS_LITTLE_ENDIAN) {
296 #   ifdef BSWAP8
297             Xi[0] = BSWAP8(Z.hi);
298             Xi[1] = BSWAP8(Z.lo);
299 #   else
300             u8 *p = (u8 *)Xi;
301             u32 v;
302             v = (u32)(Z.hi >> 32);
303             PUTU32(p, v);
304             v = (u32)(Z.hi);
305             PUTU32(p + 4, v);
306             v = (u32)(Z.lo >> 32);
307             PUTU32(p + 8, v);
308             v = (u32)(Z.lo);
309             PUTU32(p + 12, v);
310 #   endif
311         } else {
312             Xi[0] = Z.hi;
313             Xi[1] = Z.lo;
314         }
315 
316         inp += 16;
317         /* Block size is 128 bits so len is a multiple of 16 */
318         len -= 16;
319     } while (len > 0);
320 }
321 #  endif
322 # else
323 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
324 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
325                     size_t len);
326 # endif
327 
328 # define GCM_MUL(ctx)      ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
329 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
330 #  define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
331 /*
332  * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
333  * effect. In other words idea is to hash data while it's still in L1 cache
334  * after encryption pass...
335  */
336 #  define GHASH_CHUNK       (3*1024)
337 # endif
338 
339 #if     (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
340 # if    !defined(I386_ONLY) && \
341         (defined(__i386)        || defined(__i386__)    || \
342          defined(__x86_64)      || defined(__x86_64__)  || \
343          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
344 #  define GHASH_ASM_X86_OR_64
345 
346 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
347 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
348 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
349                      size_t len);
350 
351 #  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
352 #   define gcm_init_avx   gcm_init_clmul
353 #   define gcm_gmult_avx  gcm_gmult_clmul
354 #   define gcm_ghash_avx  gcm_ghash_clmul
355 #  else
356 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
357 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
358 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
359                    size_t len);
360 #  endif
361 
362 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
363 #   define GHASH_ASM_X86
364 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
365 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
366                         size_t len);
367 
368 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
369 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
370                         size_t len);
371 #  endif
372 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(_M_ARM64)
373 #  include "arm_arch.h"
374 #  if __ARM_MAX_ARCH__>=7
375 #   define GHASH_ASM_ARM
376 #   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
377 #   if defined(__arm__) || defined(__arm)
378 #    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
379 #   endif
380 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
381 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
382 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
383                     size_t len);
384 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
385 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
386 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
387                   size_t len);
388 #  endif
389 # elif defined(__sparc__) || defined(__sparc)
390 #  include "crypto/sparc_arch.h"
391 #  define GHASH_ASM_SPARC
392 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
393 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
394 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
395                     size_t len);
396 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__POWERPC__) || defined(_ARCH_PPC))
397 #  include "crypto/ppc_arch.h"
398 #  define GHASH_ASM_PPC
399 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
400 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
401 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
402                   size_t len);
403 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
404 #  include "crypto/riscv_arch.h"
405 #  define GHASH_ASM_RV64I
406 /* Zbc/Zbkc (scalar crypto with clmul) based routines. */
407 void gcm_init_rv64i_zbc(u128 Htable[16], const u64 Xi[2]);
408 void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 Xi[2]);
409 void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 Xi[2]);
410 void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]);
411 void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]);
412 void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16],
413                          const u8 *inp, size_t len);
414 void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16],
415                                const u8 *inp, size_t len);
416 /* zvkb/Zvbc (vector crypto with vclmul) based routines. */
417 void gcm_init_rv64i_zvkb_zvbc(u128 Htable[16], const u64 Xi[2]);
418 void gcm_gmult_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16]);
419 void gcm_ghash_rv64i_zvkb_zvbc(u64 Xi[2], const u128 Htable[16],
420                                const u8 *inp, size_t len);
421 /* Zvkg (vector crypto with vgmul.vv and vghsh.vv). */
422 void gcm_init_rv64i_zvkg(u128 Htable[16], const u64 Xi[2]);
423 void gcm_init_rv64i_zvkg_zvkb(u128 Htable[16], const u64 Xi[2]);
424 void gcm_gmult_rv64i_zvkg(u64 Xi[2], const u128 Htable[16]);
425 void gcm_ghash_rv64i_zvkg(u64 Xi[2], const u128 Htable[16],
426                           const u8 *inp, size_t len);
427 # endif
428 #endif
429 
gcm_get_funcs(struct gcm_funcs_st * ctx)430 static void gcm_get_funcs(struct gcm_funcs_st *ctx)
431 {
432     /* set defaults -- overridden below as needed */
433     ctx->ginit = gcm_init_4bit;
434 #if !defined(GHASH_ASM)
435     ctx->gmult = gcm_gmult_4bit;
436 #else
437     ctx->gmult = NULL;
438 #endif
439 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
440     ctx->ghash = gcm_ghash_4bit;
441 #else
442     ctx->ghash = NULL;
443 #endif
444 
445 #if defined(GHASH_ASM_X86_OR_64)
446 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
447     /* x86_64 */
448     if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
449         if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
450             ctx->ginit = gcm_init_avx;
451             ctx->gmult = gcm_gmult_avx;
452             ctx->ghash = gcm_ghash_avx;
453         } else {
454             ctx->ginit = gcm_init_clmul;
455             ctx->gmult = gcm_gmult_clmul;
456             ctx->ghash = gcm_ghash_clmul;
457         }
458         return;
459     }
460 # endif
461 # if defined(GHASH_ASM_X86)
462     /* x86 only */
463 #  if defined(OPENSSL_IA32_SSE2)
464     if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
465         ctx->gmult = gcm_gmult_4bit_mmx;
466         ctx->ghash = gcm_ghash_4bit_mmx;
467         return;
468     }
469 #  else
470     if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
471         ctx->gmult = gcm_gmult_4bit_mmx;
472         ctx->ghash = gcm_ghash_4bit_mmx;
473         return;
474     }
475 #  endif
476     ctx->gmult = gcm_gmult_4bit_x86;
477     ctx->ghash = gcm_ghash_4bit_x86;
478     return;
479 # else
480     /* x86_64 fallback defaults */
481     ctx->gmult = gcm_gmult_4bit;
482     ctx->ghash = gcm_ghash_4bit;
483     return;
484 # endif
485 #elif defined(GHASH_ASM_ARM)
486     /* ARM defaults */
487     ctx->gmult = gcm_gmult_4bit;
488 # if !defined(OPENSSL_SMALL_FOOTPRINT)
489     ctx->ghash = gcm_ghash_4bit;
490 # else
491     ctx->ghash = NULL;
492 # endif
493 # ifdef PMULL_CAPABLE
494     if (PMULL_CAPABLE) {
495         ctx->ginit = (gcm_init_fn)gcm_init_v8;
496         ctx->gmult = gcm_gmult_v8;
497         ctx->ghash = gcm_ghash_v8;
498     }
499 # elif defined(NEON_CAPABLE)
500     if (NEON_CAPABLE) {
501         ctx->ginit = gcm_init_neon;
502         ctx->gmult = gcm_gmult_neon;
503         ctx->ghash = gcm_ghash_neon;
504     }
505 # endif
506     return;
507 #elif defined(GHASH_ASM_SPARC)
508     /* SPARC defaults */
509     ctx->gmult = gcm_gmult_4bit;
510     ctx->ghash = gcm_ghash_4bit;
511     if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
512         ctx->ginit = gcm_init_vis3;
513         ctx->gmult = gcm_gmult_vis3;
514         ctx->ghash = gcm_ghash_vis3;
515     }
516     return;
517 #elif defined(GHASH_ASM_PPC)
518     /* PowerPC does not define GHASH_ASM; defaults set above */
519     if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
520         ctx->ginit = gcm_init_p8;
521         ctx->gmult = gcm_gmult_p8;
522         ctx->ghash = gcm_ghash_p8;
523     }
524     return;
525 #elif defined(GHASH_ASM_RV64I)
526     /* RISCV defaults */
527     ctx->gmult = gcm_gmult_4bit;
528     ctx->ghash = gcm_ghash_4bit;
529 
530     if (RISCV_HAS_ZVKG() && riscv_vlen() >= 128) {
531         if (RISCV_HAS_ZVKB())
532             ctx->ginit = gcm_init_rv64i_zvkg_zvkb;
533         else
534             ctx->ginit = gcm_init_rv64i_zvkg;
535         ctx->gmult = gcm_gmult_rv64i_zvkg;
536         ctx->ghash = gcm_ghash_rv64i_zvkg;
537     } else if (RISCV_HAS_ZVKB() && RISCV_HAS_ZVBC() && riscv_vlen() >= 128) {
538         ctx->ginit = gcm_init_rv64i_zvkb_zvbc;
539         ctx->gmult = gcm_gmult_rv64i_zvkb_zvbc;
540         ctx->ghash = gcm_ghash_rv64i_zvkb_zvbc;
541     } else if (RISCV_HAS_ZBC()) {
542         if (RISCV_HAS_ZBKB()) {
543             ctx->ginit = gcm_init_rv64i_zbc__zbkb;
544             ctx->gmult = gcm_gmult_rv64i_zbc__zbkb;
545             ctx->ghash = gcm_ghash_rv64i_zbc__zbkb;
546         } else if (RISCV_HAS_ZBB()) {
547             ctx->ginit = gcm_init_rv64i_zbc__zbb;
548             ctx->gmult = gcm_gmult_rv64i_zbc;
549             ctx->ghash = gcm_ghash_rv64i_zbc;
550         } else {
551             ctx->ginit = gcm_init_rv64i_zbc;
552             ctx->gmult = gcm_gmult_rv64i_zbc;
553             ctx->ghash = gcm_ghash_rv64i_zbc;
554         }
555     }
556     return;
557 #elif defined(GHASH_ASM)
558     /* all other architectures use the generic names */
559     ctx->gmult = gcm_gmult_4bit;
560     ctx->ghash = gcm_ghash_4bit;
561     return;
562 #endif
563 }
564 
ossl_gcm_init_4bit(u128 Htable[16],const u64 H[2])565 void ossl_gcm_init_4bit(u128 Htable[16], const u64 H[2])
566 {
567     struct gcm_funcs_st funcs;
568 
569     gcm_get_funcs(&funcs);
570     funcs.ginit(Htable, H);
571 }
572 
ossl_gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16])573 void ossl_gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
574 {
575     struct gcm_funcs_st funcs;
576 
577     gcm_get_funcs(&funcs);
578     funcs.gmult(Xi, Htable);
579 }
580 
ossl_gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 * inp,size_t len)581 void ossl_gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
582                          const u8 *inp, size_t len)
583 {
584     struct gcm_funcs_st funcs;
585     u64 tmp[2];
586     size_t i;
587 
588     gcm_get_funcs(&funcs);
589     if (funcs.ghash != NULL) {
590         funcs.ghash(Xi, Htable, inp, len);
591     } else {
592         /* Emulate ghash if needed */
593         for (i = 0; i < len; i += 16) {
594             memcpy(tmp, &inp[i], sizeof(tmp));
595             Xi[0] ^= tmp[0];
596             Xi[1] ^= tmp[1];
597             funcs.gmult(Xi, Htable);
598         }
599     }
600 }
601 
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,void * key,block128_f block)602 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
603 {
604     DECLARE_IS_ENDIAN;
605 
606     memset(ctx, 0, sizeof(*ctx));
607     ctx->block = block;
608     ctx->key = key;
609 
610     (*block) (ctx->H.c, ctx->H.c, key);
611 
612     if (IS_LITTLE_ENDIAN) {
613         /* H is stored in host byte order */
614 #ifdef BSWAP8
615         ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
616         ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
617 #else
618         u8 *p = ctx->H.c;
619         u64 hi, lo;
620         hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
621         lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
622         ctx->H.u[0] = hi;
623         ctx->H.u[1] = lo;
624 #endif
625     }
626 
627     gcm_get_funcs(&ctx->funcs);
628     ctx->funcs.ginit(ctx->Htable, ctx->H.u);
629 }
630 
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const unsigned char * iv,size_t len)631 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
632                          size_t len)
633 {
634     DECLARE_IS_ENDIAN;
635     unsigned int ctr;
636 
637     ctx->len.u[0] = 0;          /* AAD length */
638     ctx->len.u[1] = 0;          /* message length */
639     ctx->ares = 0;
640     ctx->mres = 0;
641 
642     if (len == 12) {
643         memcpy(ctx->Yi.c, iv, 12);
644         ctx->Yi.c[12] = 0;
645         ctx->Yi.c[13] = 0;
646         ctx->Yi.c[14] = 0;
647         ctx->Yi.c[15] = 1;
648         ctr = 1;
649     } else {
650         size_t i;
651         u64 len0 = len;
652 
653         /* Borrow ctx->Xi to calculate initial Yi */
654         ctx->Xi.u[0] = 0;
655         ctx->Xi.u[1] = 0;
656 
657         while (len >= 16) {
658             for (i = 0; i < 16; ++i)
659                 ctx->Xi.c[i] ^= iv[i];
660             GCM_MUL(ctx);
661             iv += 16;
662             len -= 16;
663         }
664         if (len) {
665             for (i = 0; i < len; ++i)
666                 ctx->Xi.c[i] ^= iv[i];
667             GCM_MUL(ctx);
668         }
669         len0 <<= 3;
670         if (IS_LITTLE_ENDIAN) {
671 #ifdef BSWAP8
672             ctx->Xi.u[1] ^= BSWAP8(len0);
673 #else
674             ctx->Xi.c[8] ^= (u8)(len0 >> 56);
675             ctx->Xi.c[9] ^= (u8)(len0 >> 48);
676             ctx->Xi.c[10] ^= (u8)(len0 >> 40);
677             ctx->Xi.c[11] ^= (u8)(len0 >> 32);
678             ctx->Xi.c[12] ^= (u8)(len0 >> 24);
679             ctx->Xi.c[13] ^= (u8)(len0 >> 16);
680             ctx->Xi.c[14] ^= (u8)(len0 >> 8);
681             ctx->Xi.c[15] ^= (u8)(len0);
682 #endif
683         } else {
684             ctx->Xi.u[1] ^= len0;
685         }
686 
687         GCM_MUL(ctx);
688 
689         if (IS_LITTLE_ENDIAN)
690 #ifdef BSWAP4
691             ctr = BSWAP4(ctx->Xi.d[3]);
692 #else
693             ctr = GETU32(ctx->Xi.c + 12);
694 #endif
695         else
696             ctr = ctx->Xi.d[3];
697 
698         /* Copy borrowed Xi to Yi */
699         ctx->Yi.u[0] = ctx->Xi.u[0];
700         ctx->Yi.u[1] = ctx->Xi.u[1];
701     }
702 
703     ctx->Xi.u[0] = 0;
704     ctx->Xi.u[1] = 0;
705 
706     (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
707     ++ctr;
708     if (IS_LITTLE_ENDIAN)
709 #ifdef BSWAP4
710         ctx->Yi.d[3] = BSWAP4(ctr);
711 #else
712         PUTU32(ctx->Yi.c + 12, ctr);
713 #endif
714     else
715         ctx->Yi.d[3] = ctr;
716 }
717 
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const unsigned char * aad,size_t len)718 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
719                       size_t len)
720 {
721     size_t i;
722     unsigned int n;
723     u64 alen = ctx->len.u[0];
724 
725     if (ctx->len.u[1])
726         return -2;
727 
728     alen += len;
729     if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
730         return -1;
731     ctx->len.u[0] = alen;
732 
733     n = ctx->ares;
734     if (n) {
735         while (n && len) {
736             ctx->Xi.c[n] ^= *(aad++);
737             --len;
738             n = (n + 1) % 16;
739         }
740         if (n == 0)
741             GCM_MUL(ctx);
742         else {
743             ctx->ares = n;
744             return 0;
745         }
746     }
747 #ifdef GHASH
748     if ((i = (len & (size_t)-16))) {
749         GHASH(ctx, aad, i);
750         aad += i;
751         len -= i;
752     }
753 #else
754     while (len >= 16) {
755         for (i = 0; i < 16; ++i)
756             ctx->Xi.c[i] ^= aad[i];
757         GCM_MUL(ctx);
758         aad += 16;
759         len -= 16;
760     }
761 #endif
762     if (len) {
763         n = (unsigned int)len;
764         for (i = 0; i < len; ++i)
765             ctx->Xi.c[i] ^= aad[i];
766     }
767 
768     ctx->ares = n;
769     return 0;
770 }
771 
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)772 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
773                           const unsigned char *in, unsigned char *out,
774                           size_t len)
775 {
776     DECLARE_IS_ENDIAN;
777     unsigned int n, ctr, mres;
778     size_t i;
779     u64 mlen = ctx->len.u[1];
780     block128_f block = ctx->block;
781     void *key = ctx->key;
782 
783     mlen += len;
784     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
785         return -1;
786     ctx->len.u[1] = mlen;
787 
788     mres = ctx->mres;
789 
790     if (ctx->ares) {
791         /* First call to encrypt finalizes GHASH(AAD) */
792 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
793         if (len == 0) {
794             GCM_MUL(ctx);
795             ctx->ares = 0;
796             return 0;
797         }
798         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
799         ctx->Xi.u[0] = 0;
800         ctx->Xi.u[1] = 0;
801         mres = sizeof(ctx->Xi);
802 #else
803         GCM_MUL(ctx);
804 #endif
805         ctx->ares = 0;
806     }
807 
808     if (IS_LITTLE_ENDIAN)
809 #ifdef BSWAP4
810         ctr = BSWAP4(ctx->Yi.d[3]);
811 #else
812         ctr = GETU32(ctx->Yi.c + 12);
813 #endif
814     else
815         ctr = ctx->Yi.d[3];
816 
817     n = mres % 16;
818 #if !defined(OPENSSL_SMALL_FOOTPRINT)
819     if (16 % sizeof(size_t) == 0) { /* always true actually */
820         do {
821             if (n) {
822 # if defined(GHASH)
823                 while (n && len) {
824                     ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
825                     --len;
826                     n = (n + 1) % 16;
827                 }
828                 if (n == 0) {
829                     GHASH(ctx, ctx->Xn, mres);
830                     mres = 0;
831                 } else {
832                     ctx->mres = mres;
833                     return 0;
834                 }
835 # else
836                 while (n && len) {
837                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
838                     --len;
839                     n = (n + 1) % 16;
840                 }
841                 if (n == 0) {
842                     GCM_MUL(ctx);
843                     mres = 0;
844                 } else {
845                     ctx->mres = n;
846                     return 0;
847                 }
848 # endif
849             }
850 # if defined(STRICT_ALIGNMENT)
851             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
852                 break;
853 # endif
854 # if defined(GHASH)
855             if (len >= 16 && mres) {
856                 GHASH(ctx, ctx->Xn, mres);
857                 mres = 0;
858             }
859 #  if defined(GHASH_CHUNK)
860             while (len >= GHASH_CHUNK) {
861                 size_t j = GHASH_CHUNK;
862 
863                 while (j) {
864                     size_t_aX *out_t = (size_t_aX *)out;
865                     const size_t_aX *in_t = (const size_t_aX *)in;
866 
867                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
868                     ++ctr;
869                     if (IS_LITTLE_ENDIAN)
870 #   ifdef BSWAP4
871                         ctx->Yi.d[3] = BSWAP4(ctr);
872 #   else
873                         PUTU32(ctx->Yi.c + 12, ctr);
874 #   endif
875                     else
876                         ctx->Yi.d[3] = ctr;
877                     for (i = 0; i < 16 / sizeof(size_t); ++i)
878                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
879                     out += 16;
880                     in += 16;
881                     j -= 16;
882                 }
883                 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
884                 len -= GHASH_CHUNK;
885             }
886 #  endif
887             if ((i = (len & (size_t)-16))) {
888                 size_t j = i;
889 
890                 while (len >= 16) {
891                     size_t_aX *out_t = (size_t_aX *)out;
892                     const size_t_aX *in_t = (const size_t_aX *)in;
893 
894                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
895                     ++ctr;
896                     if (IS_LITTLE_ENDIAN)
897 #  ifdef BSWAP4
898                         ctx->Yi.d[3] = BSWAP4(ctr);
899 #  else
900                         PUTU32(ctx->Yi.c + 12, ctr);
901 #  endif
902                     else
903                         ctx->Yi.d[3] = ctr;
904                     for (i = 0; i < 16 / sizeof(size_t); ++i)
905                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
906                     out += 16;
907                     in += 16;
908                     len -= 16;
909                 }
910                 GHASH(ctx, out - j, j);
911             }
912 # else
913             while (len >= 16) {
914                 size_t *out_t = (size_t *)out;
915                 const size_t *in_t = (const size_t *)in;
916 
917                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
918                 ++ctr;
919                 if (IS_LITTLE_ENDIAN)
920 #  ifdef BSWAP4
921                     ctx->Yi.d[3] = BSWAP4(ctr);
922 #  else
923                     PUTU32(ctx->Yi.c + 12, ctr);
924 #  endif
925                 else
926                     ctx->Yi.d[3] = ctr;
927                 for (i = 0; i < 16 / sizeof(size_t); ++i)
928                     ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
929                 GCM_MUL(ctx);
930                 out += 16;
931                 in += 16;
932                 len -= 16;
933             }
934 # endif
935             if (len) {
936                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
937                 ++ctr;
938                 if (IS_LITTLE_ENDIAN)
939 # ifdef BSWAP4
940                     ctx->Yi.d[3] = BSWAP4(ctr);
941 # else
942                     PUTU32(ctx->Yi.c + 12, ctr);
943 # endif
944                 else
945                     ctx->Yi.d[3] = ctr;
946 # if defined(GHASH)
947                 while (len--) {
948                     ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
949                     ++n;
950                 }
951 # else
952                 while (len--) {
953                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
954                     ++n;
955                 }
956                 mres = n;
957 # endif
958             }
959 
960             ctx->mres = mres;
961             return 0;
962         } while (0);
963     }
964 #endif
965     for (i = 0; i < len; ++i) {
966         if (n == 0) {
967             (*block) (ctx->Yi.c, ctx->EKi.c, key);
968             ++ctr;
969             if (IS_LITTLE_ENDIAN)
970 #ifdef BSWAP4
971                 ctx->Yi.d[3] = BSWAP4(ctr);
972 #else
973                 PUTU32(ctx->Yi.c + 12, ctr);
974 #endif
975             else
976                 ctx->Yi.d[3] = ctr;
977         }
978 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979         ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
980         n = (n + 1) % 16;
981         if (mres == sizeof(ctx->Xn)) {
982             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
983             mres = 0;
984         }
985 #else
986         ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
987         mres = n = (n + 1) % 16;
988         if (n == 0)
989             GCM_MUL(ctx);
990 #endif
991     }
992 
993     ctx->mres = mres;
994     return 0;
995 }
996 
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len)997 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
998                           const unsigned char *in, unsigned char *out,
999                           size_t len)
1000 {
1001     DECLARE_IS_ENDIAN;
1002     unsigned int n, ctr, mres;
1003     size_t i;
1004     u64 mlen = ctx->len.u[1];
1005     block128_f block = ctx->block;
1006     void *key = ctx->key;
1007 
1008     mlen += len;
1009     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1010         return -1;
1011     ctx->len.u[1] = mlen;
1012 
1013     mres = ctx->mres;
1014 
1015     if (ctx->ares) {
1016         /* First call to decrypt finalizes GHASH(AAD) */
1017 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1018         if (len == 0) {
1019             GCM_MUL(ctx);
1020             ctx->ares = 0;
1021             return 0;
1022         }
1023         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1024         ctx->Xi.u[0] = 0;
1025         ctx->Xi.u[1] = 0;
1026         mres = sizeof(ctx->Xi);
1027 #else
1028         GCM_MUL(ctx);
1029 #endif
1030         ctx->ares = 0;
1031     }
1032 
1033     if (IS_LITTLE_ENDIAN)
1034 #ifdef BSWAP4
1035         ctr = BSWAP4(ctx->Yi.d[3]);
1036 #else
1037         ctr = GETU32(ctx->Yi.c + 12);
1038 #endif
1039     else
1040         ctr = ctx->Yi.d[3];
1041 
1042     n = mres % 16;
1043 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1044     if (16 % sizeof(size_t) == 0) { /* always true actually */
1045         do {
1046             if (n) {
1047 # if defined(GHASH)
1048                 while (n && len) {
1049                     *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1050                     --len;
1051                     n = (n + 1) % 16;
1052                 }
1053                 if (n == 0) {
1054                     GHASH(ctx, ctx->Xn, mres);
1055                     mres = 0;
1056                 } else {
1057                     ctx->mres = mres;
1058                     return 0;
1059                 }
1060 # else
1061                 while (n && len) {
1062                     u8 c = *(in++);
1063                     *(out++) = c ^ ctx->EKi.c[n];
1064                     ctx->Xi.c[n] ^= c;
1065                     --len;
1066                     n = (n + 1) % 16;
1067                 }
1068                 if (n == 0) {
1069                     GCM_MUL(ctx);
1070                     mres = 0;
1071                 } else {
1072                     ctx->mres = n;
1073                     return 0;
1074                 }
1075 # endif
1076             }
1077 # if defined(STRICT_ALIGNMENT)
1078             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1079                 break;
1080 # endif
1081 # if defined(GHASH)
1082             if (len >= 16 && mres) {
1083                 GHASH(ctx, ctx->Xn, mres);
1084                 mres = 0;
1085             }
1086 #  if defined(GHASH_CHUNK)
1087             while (len >= GHASH_CHUNK) {
1088                 size_t j = GHASH_CHUNK;
1089 
1090                 GHASH(ctx, in, GHASH_CHUNK);
1091                 while (j) {
1092                     size_t_aX *out_t = (size_t_aX *)out;
1093                     const size_t_aX *in_t = (const size_t_aX *)in;
1094 
1095                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096                     ++ctr;
1097                     if (IS_LITTLE_ENDIAN)
1098 #   ifdef BSWAP4
1099                         ctx->Yi.d[3] = BSWAP4(ctr);
1100 #   else
1101                         PUTU32(ctx->Yi.c + 12, ctr);
1102 #   endif
1103                     else
1104                         ctx->Yi.d[3] = ctr;
1105                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1106                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107                     out += 16;
1108                     in += 16;
1109                     j -= 16;
1110                 }
1111                 len -= GHASH_CHUNK;
1112             }
1113 #  endif
1114             if ((i = (len & (size_t)-16))) {
1115                 GHASH(ctx, in, i);
1116                 while (len >= 16) {
1117                     size_t_aX *out_t = (size_t_aX *)out;
1118                     const size_t_aX *in_t = (const size_t_aX *)in;
1119 
1120                     (*block) (ctx->Yi.c, ctx->EKi.c, key);
1121                     ++ctr;
1122                     if (IS_LITTLE_ENDIAN)
1123 #  ifdef BSWAP4
1124                         ctx->Yi.d[3] = BSWAP4(ctr);
1125 #  else
1126                         PUTU32(ctx->Yi.c + 12, ctr);
1127 #  endif
1128                     else
1129                         ctx->Yi.d[3] = ctr;
1130                     for (i = 0; i < 16 / sizeof(size_t); ++i)
1131                         out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1132                     out += 16;
1133                     in += 16;
1134                     len -= 16;
1135                 }
1136             }
1137 # else
1138             while (len >= 16) {
1139                 size_t *out_t = (size_t *)out;
1140                 const size_t *in_t = (const size_t *)in;
1141 
1142                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1143                 ++ctr;
1144                 if (IS_LITTLE_ENDIAN)
1145 #  ifdef BSWAP4
1146                     ctx->Yi.d[3] = BSWAP4(ctr);
1147 #  else
1148                     PUTU32(ctx->Yi.c + 12, ctr);
1149 #  endif
1150                 else
1151                     ctx->Yi.d[3] = ctr;
1152                 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1153                     size_t c = in_t[i];
1154                     out_t[i] = c ^ ctx->EKi.t[i];
1155                     ctx->Xi.t[i] ^= c;
1156                 }
1157                 GCM_MUL(ctx);
1158                 out += 16;
1159                 in += 16;
1160                 len -= 16;
1161             }
1162 # endif
1163             if (len) {
1164                 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1165                 ++ctr;
1166                 if (IS_LITTLE_ENDIAN)
1167 # ifdef BSWAP4
1168                     ctx->Yi.d[3] = BSWAP4(ctr);
1169 # else
1170                     PUTU32(ctx->Yi.c + 12, ctr);
1171 # endif
1172                 else
1173                     ctx->Yi.d[3] = ctr;
1174 # if defined(GHASH)
1175                 while (len--) {
1176                     out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1177                     ++n;
1178                 }
1179 # else
1180                 while (len--) {
1181                     u8 c = in[n];
1182                     ctx->Xi.c[n] ^= c;
1183                     out[n] = c ^ ctx->EKi.c[n];
1184                     ++n;
1185                 }
1186                 mres = n;
1187 # endif
1188             }
1189 
1190             ctx->mres = mres;
1191             return 0;
1192         } while (0);
1193     }
1194 #endif
1195     for (i = 0; i < len; ++i) {
1196         u8 c;
1197         if (n == 0) {
1198             (*block) (ctx->Yi.c, ctx->EKi.c, key);
1199             ++ctr;
1200             if (IS_LITTLE_ENDIAN)
1201 #ifdef BSWAP4
1202                 ctx->Yi.d[3] = BSWAP4(ctr);
1203 #else
1204                 PUTU32(ctx->Yi.c + 12, ctr);
1205 #endif
1206             else
1207                 ctx->Yi.d[3] = ctr;
1208         }
1209 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1210         out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1211         n = (n + 1) % 16;
1212         if (mres == sizeof(ctx->Xn)) {
1213             GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1214             mres = 0;
1215         }
1216 #else
1217         c = in[i];
1218         out[i] = c ^ ctx->EKi.c[n];
1219         ctx->Xi.c[n] ^= c;
1220         mres = n = (n + 1) % 16;
1221         if (n == 0)
1222             GCM_MUL(ctx);
1223 #endif
1224     }
1225 
1226     ctx->mres = mres;
1227     return 0;
1228 }
1229 
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1230 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1231                                 const unsigned char *in, unsigned char *out,
1232                                 size_t len, ctr128_f stream)
1233 {
1234 #if defined(OPENSSL_SMALL_FOOTPRINT)
1235     return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1236 #else
1237     DECLARE_IS_ENDIAN;
1238     unsigned int n, ctr, mres;
1239     size_t i;
1240     u64 mlen = ctx->len.u[1];
1241     void *key = ctx->key;
1242 
1243     mlen += len;
1244     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1245         return -1;
1246     ctx->len.u[1] = mlen;
1247 
1248     mres = ctx->mres;
1249 
1250     if (ctx->ares) {
1251         /* First call to encrypt finalizes GHASH(AAD) */
1252 #if defined(GHASH)
1253         if (len == 0) {
1254             GCM_MUL(ctx);
1255             ctx->ares = 0;
1256             return 0;
1257         }
1258         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1259         ctx->Xi.u[0] = 0;
1260         ctx->Xi.u[1] = 0;
1261         mres = sizeof(ctx->Xi);
1262 #else
1263         GCM_MUL(ctx);
1264 #endif
1265         ctx->ares = 0;
1266     }
1267 
1268     if (IS_LITTLE_ENDIAN)
1269 # ifdef BSWAP4
1270         ctr = BSWAP4(ctx->Yi.d[3]);
1271 # else
1272         ctr = GETU32(ctx->Yi.c + 12);
1273 # endif
1274     else
1275         ctr = ctx->Yi.d[3];
1276 
1277     n = mres % 16;
1278     if (n) {
1279 # if defined(GHASH)
1280         while (n && len) {
1281             ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1282             --len;
1283             n = (n + 1) % 16;
1284         }
1285         if (n == 0) {
1286             GHASH(ctx, ctx->Xn, mres);
1287             mres = 0;
1288         } else {
1289             ctx->mres = mres;
1290             return 0;
1291         }
1292 # else
1293         while (n && len) {
1294             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1295             --len;
1296             n = (n + 1) % 16;
1297         }
1298         if (n == 0) {
1299             GCM_MUL(ctx);
1300             mres = 0;
1301         } else {
1302             ctx->mres = n;
1303             return 0;
1304         }
1305 # endif
1306     }
1307 # if defined(GHASH)
1308         if (len >= 16 && mres) {
1309             GHASH(ctx, ctx->Xn, mres);
1310             mres = 0;
1311         }
1312 #  if defined(GHASH_CHUNK)
1313     while (len >= GHASH_CHUNK) {
1314         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1315         ctr += GHASH_CHUNK / 16;
1316         if (IS_LITTLE_ENDIAN)
1317 #   ifdef BSWAP4
1318             ctx->Yi.d[3] = BSWAP4(ctr);
1319 #   else
1320             PUTU32(ctx->Yi.c + 12, ctr);
1321 #   endif
1322         else
1323             ctx->Yi.d[3] = ctr;
1324         GHASH(ctx, out, GHASH_CHUNK);
1325         out += GHASH_CHUNK;
1326         in += GHASH_CHUNK;
1327         len -= GHASH_CHUNK;
1328     }
1329 #  endif
1330 # endif
1331     if ((i = (len & (size_t)-16))) {
1332         size_t j = i / 16;
1333 
1334         (*stream) (in, out, j, key, ctx->Yi.c);
1335         ctr += (unsigned int)j;
1336         if (IS_LITTLE_ENDIAN)
1337 # ifdef BSWAP4
1338             ctx->Yi.d[3] = BSWAP4(ctr);
1339 # else
1340             PUTU32(ctx->Yi.c + 12, ctr);
1341 # endif
1342         else
1343             ctx->Yi.d[3] = ctr;
1344         in += i;
1345         len -= i;
1346 # if defined(GHASH)
1347         GHASH(ctx, out, i);
1348         out += i;
1349 # else
1350         while (j--) {
1351             for (i = 0; i < 16; ++i)
1352                 ctx->Xi.c[i] ^= out[i];
1353             GCM_MUL(ctx);
1354             out += 16;
1355         }
1356 # endif
1357     }
1358     if (len) {
1359         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1360         ++ctr;
1361         if (IS_LITTLE_ENDIAN)
1362 # ifdef BSWAP4
1363             ctx->Yi.d[3] = BSWAP4(ctr);
1364 # else
1365             PUTU32(ctx->Yi.c + 12, ctr);
1366 # endif
1367         else
1368             ctx->Yi.d[3] = ctr;
1369         while (len--) {
1370 # if defined(GHASH)
1371             ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1372 # else
1373             ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1374 # endif
1375             ++n;
1376         }
1377     }
1378 
1379     ctx->mres = mres;
1380     return 0;
1381 #endif
1382 }
1383 
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const unsigned char * in,unsigned char * out,size_t len,ctr128_f stream)1384 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1385                                 const unsigned char *in, unsigned char *out,
1386                                 size_t len, ctr128_f stream)
1387 {
1388 #if defined(OPENSSL_SMALL_FOOTPRINT)
1389     return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1390 #else
1391     DECLARE_IS_ENDIAN;
1392     unsigned int n, ctr, mres;
1393     size_t i;
1394     u64 mlen = ctx->len.u[1];
1395     void *key = ctx->key;
1396 
1397     mlen += len;
1398     if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1399         return -1;
1400     ctx->len.u[1] = mlen;
1401 
1402     mres = ctx->mres;
1403 
1404     if (ctx->ares) {
1405         /* First call to decrypt finalizes GHASH(AAD) */
1406 # if defined(GHASH)
1407         if (len == 0) {
1408             GCM_MUL(ctx);
1409             ctx->ares = 0;
1410             return 0;
1411         }
1412         memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1413         ctx->Xi.u[0] = 0;
1414         ctx->Xi.u[1] = 0;
1415         mres = sizeof(ctx->Xi);
1416 # else
1417         GCM_MUL(ctx);
1418 # endif
1419         ctx->ares = 0;
1420     }
1421 
1422     if (IS_LITTLE_ENDIAN)
1423 # ifdef BSWAP4
1424         ctr = BSWAP4(ctx->Yi.d[3]);
1425 # else
1426         ctr = GETU32(ctx->Yi.c + 12);
1427 # endif
1428     else
1429         ctr = ctx->Yi.d[3];
1430 
1431     n = mres % 16;
1432     if (n) {
1433 # if defined(GHASH)
1434         while (n && len) {
1435             *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1436             --len;
1437             n = (n + 1) % 16;
1438         }
1439         if (n == 0) {
1440             GHASH(ctx, ctx->Xn, mres);
1441             mres = 0;
1442         } else {
1443             ctx->mres = mres;
1444             return 0;
1445         }
1446 # else
1447         while (n && len) {
1448             u8 c = *(in++);
1449             *(out++) = c ^ ctx->EKi.c[n];
1450             ctx->Xi.c[n] ^= c;
1451             --len;
1452             n = (n + 1) % 16;
1453         }
1454         if (n == 0) {
1455             GCM_MUL(ctx);
1456             mres = 0;
1457         } else {
1458             ctx->mres = n;
1459             return 0;
1460         }
1461 # endif
1462     }
1463 # if defined(GHASH)
1464     if (len >= 16 && mres) {
1465         GHASH(ctx, ctx->Xn, mres);
1466         mres = 0;
1467     }
1468 #  if defined(GHASH_CHUNK)
1469     while (len >= GHASH_CHUNK) {
1470         GHASH(ctx, in, GHASH_CHUNK);
1471         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1472         ctr += GHASH_CHUNK / 16;
1473         if (IS_LITTLE_ENDIAN)
1474 #   ifdef BSWAP4
1475             ctx->Yi.d[3] = BSWAP4(ctr);
1476 #   else
1477             PUTU32(ctx->Yi.c + 12, ctr);
1478 #   endif
1479         else
1480             ctx->Yi.d[3] = ctr;
1481         out += GHASH_CHUNK;
1482         in += GHASH_CHUNK;
1483         len -= GHASH_CHUNK;
1484     }
1485 #  endif
1486 # endif
1487     if ((i = (len & (size_t)-16))) {
1488         size_t j = i / 16;
1489 
1490 # if defined(GHASH)
1491         GHASH(ctx, in, i);
1492 # else
1493         while (j--) {
1494             size_t k;
1495             for (k = 0; k < 16; ++k)
1496                 ctx->Xi.c[k] ^= in[k];
1497             GCM_MUL(ctx);
1498             in += 16;
1499         }
1500         j = i / 16;
1501         in -= i;
1502 # endif
1503         (*stream) (in, out, j, key, ctx->Yi.c);
1504         ctr += (unsigned int)j;
1505         if (IS_LITTLE_ENDIAN)
1506 # ifdef BSWAP4
1507             ctx->Yi.d[3] = BSWAP4(ctr);
1508 # else
1509             PUTU32(ctx->Yi.c + 12, ctr);
1510 # endif
1511         else
1512             ctx->Yi.d[3] = ctr;
1513         out += i;
1514         in += i;
1515         len -= i;
1516     }
1517     if (len) {
1518         (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1519         ++ctr;
1520         if (IS_LITTLE_ENDIAN)
1521 # ifdef BSWAP4
1522             ctx->Yi.d[3] = BSWAP4(ctr);
1523 # else
1524             PUTU32(ctx->Yi.c + 12, ctr);
1525 # endif
1526         else
1527             ctx->Yi.d[3] = ctr;
1528         while (len--) {
1529 # if defined(GHASH)
1530             out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1531 # else
1532             u8 c = in[n];
1533             ctx->Xi.c[mres++] ^= c;
1534             out[n] = c ^ ctx->EKi.c[n];
1535 # endif
1536             ++n;
1537         }
1538     }
1539 
1540     ctx->mres = mres;
1541     return 0;
1542 #endif
1543 }
1544 
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const unsigned char * tag,size_t len)1545 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1546                          size_t len)
1547 {
1548     DECLARE_IS_ENDIAN;
1549     u64 alen = ctx->len.u[0] << 3;
1550     u64 clen = ctx->len.u[1] << 3;
1551 
1552 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1553     u128 bitlen;
1554     unsigned int mres = ctx->mres;
1555 
1556     if (mres) {
1557         unsigned blocks = (mres + 15) & -16;
1558 
1559         memset(ctx->Xn + mres, 0, blocks - mres);
1560         mres = blocks;
1561         if (mres == sizeof(ctx->Xn)) {
1562             GHASH(ctx, ctx->Xn, mres);
1563             mres = 0;
1564         }
1565     } else if (ctx->ares) {
1566         GCM_MUL(ctx);
1567     }
1568 #else
1569     if (ctx->mres || ctx->ares)
1570         GCM_MUL(ctx);
1571 #endif
1572 
1573     if (IS_LITTLE_ENDIAN) {
1574 #ifdef BSWAP8
1575         alen = BSWAP8(alen);
1576         clen = BSWAP8(clen);
1577 #else
1578         u8 *p = ctx->len.c;
1579 
1580         ctx->len.u[0] = alen;
1581         ctx->len.u[1] = clen;
1582 
1583         alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1584         clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1585 #endif
1586     }
1587 
1588 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1589     bitlen.hi = alen;
1590     bitlen.lo = clen;
1591     memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1592     mres += sizeof(bitlen);
1593     GHASH(ctx, ctx->Xn, mres);
1594 #else
1595     ctx->Xi.u[0] ^= alen;
1596     ctx->Xi.u[1] ^= clen;
1597     GCM_MUL(ctx);
1598 #endif
1599 
1600     ctx->Xi.u[0] ^= ctx->EK0.u[0];
1601     ctx->Xi.u[1] ^= ctx->EK0.u[1];
1602 
1603     if (tag && len <= sizeof(ctx->Xi))
1604         return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1605     else
1606         return -1;
1607 }
1608 
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1609 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1610 {
1611     CRYPTO_gcm128_finish(ctx, NULL, 0);
1612     memcpy(tag, ctx->Xi.c,
1613            len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1614 }
1615 
CRYPTO_gcm128_new(void * key,block128_f block)1616 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1617 {
1618     GCM128_CONTEXT *ret;
1619 
1620     if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1621         CRYPTO_gcm128_init(ret, key, block);
1622 
1623     return ret;
1624 }
1625 
CRYPTO_gcm128_release(GCM128_CONTEXT * ctx)1626 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1627 {
1628     OPENSSL_clear_free(ctx, sizeof(*ctx));
1629 }
1630