xref: /linux/lib/crypto/gf128hash.c (revision 370c3883195566ee3e7d79e0146c3d735a406573)
161f66c52SEric Biggers // SPDX-License-Identifier: GPL-2.0-or-later
261f66c52SEric Biggers /*
361f66c52SEric Biggers  * GF(2^128) polynomial hashing: GHASH and POLYVAL
461f66c52SEric Biggers  *
561f66c52SEric Biggers  * Copyright 2025 Google LLC
661f66c52SEric Biggers  */
761f66c52SEric Biggers 
861f66c52SEric Biggers #include <crypto/gf128hash.h>
961f66c52SEric Biggers #include <linux/export.h>
1061f66c52SEric Biggers #include <linux/module.h>
1161f66c52SEric Biggers #include <linux/string.h>
1261f66c52SEric Biggers #include <linux/unaligned.h>
1361f66c52SEric Biggers 
1461f66c52SEric Biggers /*
15*c417e704SEric Biggers  * GHASH and POLYVAL are almost-XOR-universal hash functions.  They interpret
16*c417e704SEric Biggers  * the message as the coefficients of a polynomial in the finite field GF(2^128)
17*c417e704SEric Biggers  * and evaluate that polynomial at a secret point.
1861f66c52SEric Biggers  *
19*c417e704SEric Biggers  * Neither GHASH nor POLYVAL is a cryptographic hash function.  They should be
20*c417e704SEric Biggers  * used only by algorithms that are specifically designed to use them.
2161f66c52SEric Biggers  *
22*c417e704SEric Biggers  * GHASH is the older variant, defined as part of GCM in NIST SP 800-38D
23*c417e704SEric Biggers  * (https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38d.pdf).
24*c417e704SEric Biggers  * GHASH is hard to implement directly, due to its backwards mapping between
25*c417e704SEric Biggers  * bits and polynomial coefficients.  GHASH implementations typically pre and
26*c417e704SEric Biggers  * post-process the inputs and outputs (mainly by byte-swapping) to convert the
27*c417e704SEric Biggers  * GHASH computation into an equivalent computation over a different,
28*c417e704SEric Biggers  * easier-to-use representation of GF(2^128).
2961f66c52SEric Biggers  *
30*c417e704SEric Biggers  * POLYVAL is a newer GF(2^128) polynomial hash, originally defined as part of
31*c417e704SEric Biggers  * AES-GCM-SIV (https://datatracker.ietf.org/doc/html/rfc8452) and also used by
32*c417e704SEric Biggers  * HCTR2 (https://eprint.iacr.org/2021/1441.pdf).  It uses that easier-to-use
33*c417e704SEric Biggers  * field representation directly, eliminating the data conversion steps.
3461f66c52SEric Biggers  *
35*c417e704SEric Biggers  * This file provides library APIs for GHASH and POLYVAL.  These APIs can
36*c417e704SEric Biggers  * delegate to either a generic implementation or an architecture-optimized
37*c417e704SEric Biggers  * implementation.  Due to the mathematical relationship between GHASH and
38*c417e704SEric Biggers  * POLYVAL, in some cases code for one is reused with the other.
3961f66c52SEric Biggers  *
4061f66c52SEric Biggers  * For the generic implementation, we don't use the traditional table approach
4161f66c52SEric Biggers  * to GF(2^128) multiplication.  That approach is not constant-time and requires
4261f66c52SEric Biggers  * a lot of memory.  Instead, we use a different approach which emulates
4361f66c52SEric Biggers  * carryless multiplication using standard multiplications by spreading the data
4461f66c52SEric Biggers  * bits apart using "holes".  This allows the carries to spill harmlessly.  This
4561f66c52SEric Biggers  * approach is borrowed from BoringSSL, which in turn credits BearSSL's
4661f66c52SEric Biggers  * documentation (https://bearssl.org/constanttime.html#ghash-for-gcm) for the
4761f66c52SEric Biggers  * "holes" trick and a presentation by Shay Gueron
4861f66c52SEric Biggers  * (https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf) for the
4961f66c52SEric Biggers  * 256-bit => 128-bit reduction algorithm.
5061f66c52SEric Biggers  */
5161f66c52SEric Biggers 
5261f66c52SEric Biggers #ifdef CONFIG_ARCH_SUPPORTS_INT128
5361f66c52SEric Biggers 
5461f66c52SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */
5561f66c52SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi)
5661f66c52SEric Biggers {
5761f66c52SEric Biggers 	/*
5861f66c52SEric Biggers 	 * With 64-bit multiplicands and one term every 4 bits, there would be
5961f66c52SEric Biggers 	 * up to 64 / 4 = 16 one bits per column when each multiplication is
6061f66c52SEric Biggers 	 * written out as a series of additions in the schoolbook manner.
6161f66c52SEric Biggers 	 * Unfortunately, that doesn't work since the value 16 is 1 too large to
6261f66c52SEric Biggers 	 * fit in 4 bits.  Carries would sometimes overflow into the next term.
6361f66c52SEric Biggers 	 *
6461f66c52SEric Biggers 	 * Using one term every 5 bits would work.  However, that would cost
6561f66c52SEric Biggers 	 * 5 x 5 = 25 multiplications instead of 4 x 4 = 16.
6661f66c52SEric Biggers 	 *
6761f66c52SEric Biggers 	 * Instead, mask off 4 bits from one multiplicand, giving a max of 15
6861f66c52SEric Biggers 	 * one bits per column.  Then handle those 4 bits separately.
6961f66c52SEric Biggers 	 */
7061f66c52SEric Biggers 	u64 a0 = a & 0x1111111111111110;
7161f66c52SEric Biggers 	u64 a1 = a & 0x2222222222222220;
7261f66c52SEric Biggers 	u64 a2 = a & 0x4444444444444440;
7361f66c52SEric Biggers 	u64 a3 = a & 0x8888888888888880;
7461f66c52SEric Biggers 
7561f66c52SEric Biggers 	u64 b0 = b & 0x1111111111111111;
7661f66c52SEric Biggers 	u64 b1 = b & 0x2222222222222222;
7761f66c52SEric Biggers 	u64 b2 = b & 0x4444444444444444;
7861f66c52SEric Biggers 	u64 b3 = b & 0x8888888888888888;
7961f66c52SEric Biggers 
8061f66c52SEric Biggers 	/* Multiply the high 60 bits of @a by @b. */
8161f66c52SEric Biggers 	u128 c0 = (a0 * (u128)b0) ^ (a1 * (u128)b3) ^
8261f66c52SEric Biggers 		  (a2 * (u128)b2) ^ (a3 * (u128)b1);
8361f66c52SEric Biggers 	u128 c1 = (a0 * (u128)b1) ^ (a1 * (u128)b0) ^
8461f66c52SEric Biggers 		  (a2 * (u128)b3) ^ (a3 * (u128)b2);
8561f66c52SEric Biggers 	u128 c2 = (a0 * (u128)b2) ^ (a1 * (u128)b1) ^
8661f66c52SEric Biggers 		  (a2 * (u128)b0) ^ (a3 * (u128)b3);
8761f66c52SEric Biggers 	u128 c3 = (a0 * (u128)b3) ^ (a1 * (u128)b2) ^
8861f66c52SEric Biggers 		  (a2 * (u128)b1) ^ (a3 * (u128)b0);
8961f66c52SEric Biggers 
9061f66c52SEric Biggers 	/* Multiply the low 4 bits of @a by @b. */
9161f66c52SEric Biggers 	u64 e0 = -(a & 1) & b;
9261f66c52SEric Biggers 	u64 e1 = -((a >> 1) & 1) & b;
9361f66c52SEric Biggers 	u64 e2 = -((a >> 2) & 1) & b;
9461f66c52SEric Biggers 	u64 e3 = -((a >> 3) & 1) & b;
9561f66c52SEric Biggers 	u64 extra_lo = e0 ^ (e1 << 1) ^ (e2 << 2) ^ (e3 << 3);
9661f66c52SEric Biggers 	u64 extra_hi = (e1 >> 63) ^ (e2 >> 62) ^ (e3 >> 61);
9761f66c52SEric Biggers 
9861f66c52SEric Biggers 	/* Add all the intermediate products together. */
9961f66c52SEric Biggers 	*out_lo = (((u64)c0) & 0x1111111111111111) ^
10061f66c52SEric Biggers 		  (((u64)c1) & 0x2222222222222222) ^
10161f66c52SEric Biggers 		  (((u64)c2) & 0x4444444444444444) ^
10261f66c52SEric Biggers 		  (((u64)c3) & 0x8888888888888888) ^ extra_lo;
10361f66c52SEric Biggers 	*out_hi = (((u64)(c0 >> 64)) & 0x1111111111111111) ^
10461f66c52SEric Biggers 		  (((u64)(c1 >> 64)) & 0x2222222222222222) ^
10561f66c52SEric Biggers 		  (((u64)(c2 >> 64)) & 0x4444444444444444) ^
10661f66c52SEric Biggers 		  (((u64)(c3 >> 64)) & 0x8888888888888888) ^ extra_hi;
10761f66c52SEric Biggers }
10861f66c52SEric Biggers 
10961f66c52SEric Biggers #else /* CONFIG_ARCH_SUPPORTS_INT128 */
11061f66c52SEric Biggers 
11161f66c52SEric Biggers /* Do a 32 x 32 => 64 bit carryless multiplication. */
11261f66c52SEric Biggers static u64 clmul32(u32 a, u32 b)
11361f66c52SEric Biggers {
11461f66c52SEric Biggers 	/*
11561f66c52SEric Biggers 	 * With 32-bit multiplicands and one term every 4 bits, there are up to
11661f66c52SEric Biggers 	 * 32 / 4 = 8 one bits per column when each multiplication is written
11761f66c52SEric Biggers 	 * out as a series of additions in the schoolbook manner.  The value 8
11861f66c52SEric Biggers 	 * fits in 4 bits, so the carries don't overflow into the next term.
11961f66c52SEric Biggers 	 */
12061f66c52SEric Biggers 	u32 a0 = a & 0x11111111;
12161f66c52SEric Biggers 	u32 a1 = a & 0x22222222;
12261f66c52SEric Biggers 	u32 a2 = a & 0x44444444;
12361f66c52SEric Biggers 	u32 a3 = a & 0x88888888;
12461f66c52SEric Biggers 
12561f66c52SEric Biggers 	u32 b0 = b & 0x11111111;
12661f66c52SEric Biggers 	u32 b1 = b & 0x22222222;
12761f66c52SEric Biggers 	u32 b2 = b & 0x44444444;
12861f66c52SEric Biggers 	u32 b3 = b & 0x88888888;
12961f66c52SEric Biggers 
13061f66c52SEric Biggers 	u64 c0 = (a0 * (u64)b0) ^ (a1 * (u64)b3) ^
13161f66c52SEric Biggers 		 (a2 * (u64)b2) ^ (a3 * (u64)b1);
13261f66c52SEric Biggers 	u64 c1 = (a0 * (u64)b1) ^ (a1 * (u64)b0) ^
13361f66c52SEric Biggers 		 (a2 * (u64)b3) ^ (a3 * (u64)b2);
13461f66c52SEric Biggers 	u64 c2 = (a0 * (u64)b2) ^ (a1 * (u64)b1) ^
13561f66c52SEric Biggers 		 (a2 * (u64)b0) ^ (a3 * (u64)b3);
13661f66c52SEric Biggers 	u64 c3 = (a0 * (u64)b3) ^ (a1 * (u64)b2) ^
13761f66c52SEric Biggers 		 (a2 * (u64)b1) ^ (a3 * (u64)b0);
13861f66c52SEric Biggers 
13961f66c52SEric Biggers 	/* Add all the intermediate products together. */
14061f66c52SEric Biggers 	return (c0 & 0x1111111111111111) ^
14161f66c52SEric Biggers 	       (c1 & 0x2222222222222222) ^
14261f66c52SEric Biggers 	       (c2 & 0x4444444444444444) ^
14361f66c52SEric Biggers 	       (c3 & 0x8888888888888888);
14461f66c52SEric Biggers }
14561f66c52SEric Biggers 
14661f66c52SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */
14761f66c52SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi)
14861f66c52SEric Biggers {
14961f66c52SEric Biggers 	u32 a_lo = (u32)a;
15061f66c52SEric Biggers 	u32 a_hi = a >> 32;
15161f66c52SEric Biggers 	u32 b_lo = (u32)b;
15261f66c52SEric Biggers 	u32 b_hi = b >> 32;
15361f66c52SEric Biggers 
15461f66c52SEric Biggers 	/* Karatsuba multiplication */
15561f66c52SEric Biggers 	u64 lo = clmul32(a_lo, b_lo);
15661f66c52SEric Biggers 	u64 hi = clmul32(a_hi, b_hi);
15761f66c52SEric Biggers 	u64 mi = clmul32(a_lo ^ a_hi, b_lo ^ b_hi) ^ lo ^ hi;
15861f66c52SEric Biggers 
15961f66c52SEric Biggers 	*out_lo = lo ^ (mi << 32);
16061f66c52SEric Biggers 	*out_hi = hi ^ (mi >> 32);
16161f66c52SEric Biggers }
16261f66c52SEric Biggers #endif /* !CONFIG_ARCH_SUPPORTS_INT128 */
16361f66c52SEric Biggers 
16461f66c52SEric Biggers /* Compute @a = @a * @b * x^-128 in the POLYVAL field. */
16561f66c52SEric Biggers static void __maybe_unused
16661f66c52SEric Biggers polyval_mul_generic(struct polyval_elem *a, const struct polyval_elem *b)
16761f66c52SEric Biggers {
16861f66c52SEric Biggers 	u64 c0, c1, c2, c3, mi0, mi1;
16961f66c52SEric Biggers 
17061f66c52SEric Biggers 	/*
17161f66c52SEric Biggers 	 * Carryless-multiply @a by @b using Karatsuba multiplication.  Store
17261f66c52SEric Biggers 	 * the 256-bit product in @c0 (low) through @c3 (high).
17361f66c52SEric Biggers 	 */
17461f66c52SEric Biggers 	clmul64(le64_to_cpu(a->lo), le64_to_cpu(b->lo), &c0, &c1);
17561f66c52SEric Biggers 	clmul64(le64_to_cpu(a->hi), le64_to_cpu(b->hi), &c2, &c3);
17661f66c52SEric Biggers 	clmul64(le64_to_cpu(a->lo ^ a->hi), le64_to_cpu(b->lo ^ b->hi),
17761f66c52SEric Biggers 		&mi0, &mi1);
17861f66c52SEric Biggers 	mi0 ^= c0 ^ c2;
17961f66c52SEric Biggers 	mi1 ^= c1 ^ c3;
18061f66c52SEric Biggers 	c1 ^= mi0;
18161f66c52SEric Biggers 	c2 ^= mi1;
18261f66c52SEric Biggers 
18361f66c52SEric Biggers 	/*
18461f66c52SEric Biggers 	 * Cancel out the low 128 bits of the product by adding multiples of
18561f66c52SEric Biggers 	 * G(x) = x^128 + x^127 + x^126 + x^121 + 1.  Do this in two steps, each
18661f66c52SEric Biggers 	 * of which cancels out 64 bits.  Note that we break G(x) into three
18761f66c52SEric Biggers 	 * parts: 1, x^64 * (x^63 + x^62 + x^57), and x^128 * 1.
18861f66c52SEric Biggers 	 */
18961f66c52SEric Biggers 
19061f66c52SEric Biggers 	/*
19161f66c52SEric Biggers 	 * First, add G(x) times c0 as follows:
19261f66c52SEric Biggers 	 *
19361f66c52SEric Biggers 	 * (c0, c1, c2) = (0,
19461f66c52SEric Biggers 	 *                 c1 + (c0 * (x^63 + x^62 + x^57) mod x^64),
19561f66c52SEric Biggers 	 *		   c2 + c0 + floor((c0 * (x^63 + x^62 + x^57)) / x^64))
19661f66c52SEric Biggers 	 */
19761f66c52SEric Biggers 	c1 ^= (c0 << 63) ^ (c0 << 62) ^ (c0 << 57);
19861f66c52SEric Biggers 	c2 ^= c0 ^ (c0 >> 1) ^ (c0 >> 2) ^ (c0 >> 7);
19961f66c52SEric Biggers 
20061f66c52SEric Biggers 	/*
20161f66c52SEric Biggers 	 * Second, add G(x) times the new c1:
20261f66c52SEric Biggers 	 *
20361f66c52SEric Biggers 	 * (c1, c2, c3) = (0,
20461f66c52SEric Biggers 	 *                 c2 + (c1 * (x^63 + x^62 + x^57) mod x^64),
20561f66c52SEric Biggers 	 *		   c3 + c1 + floor((c1 * (x^63 + x^62 + x^57)) / x^64))
20661f66c52SEric Biggers 	 */
20761f66c52SEric Biggers 	c2 ^= (c1 << 63) ^ (c1 << 62) ^ (c1 << 57);
20861f66c52SEric Biggers 	c3 ^= c1 ^ (c1 >> 1) ^ (c1 >> 2) ^ (c1 >> 7);
20961f66c52SEric Biggers 
21061f66c52SEric Biggers 	/* Return (c2, c3).  This implicitly multiplies by x^-128. */
21161f66c52SEric Biggers 	a->lo = cpu_to_le64(c2);
21261f66c52SEric Biggers 	a->hi = cpu_to_le64(c3);
21361f66c52SEric Biggers }
21461f66c52SEric Biggers 
215*c417e704SEric Biggers static void __maybe_unused ghash_blocks_generic(struct polyval_elem *acc,
216*c417e704SEric Biggers 						const struct polyval_elem *key,
217*c417e704SEric Biggers 						const u8 *data, size_t nblocks)
218*c417e704SEric Biggers {
219*c417e704SEric Biggers 	do {
220*c417e704SEric Biggers 		acc->lo ^=
221*c417e704SEric Biggers 			cpu_to_le64(get_unaligned_be64((__be64 *)(data + 8)));
222*c417e704SEric Biggers 		acc->hi ^= cpu_to_le64(get_unaligned_be64((__be64 *)data));
223*c417e704SEric Biggers 		polyval_mul_generic(acc, key);
224*c417e704SEric Biggers 		data += GHASH_BLOCK_SIZE;
225*c417e704SEric Biggers 	} while (--nblocks);
226*c417e704SEric Biggers }
227*c417e704SEric Biggers 
22861f66c52SEric Biggers static void __maybe_unused
22961f66c52SEric Biggers polyval_blocks_generic(struct polyval_elem *acc, const struct polyval_elem *key,
23061f66c52SEric Biggers 		       const u8 *data, size_t nblocks)
23161f66c52SEric Biggers {
23261f66c52SEric Biggers 	do {
23361f66c52SEric Biggers 		acc->lo ^= get_unaligned((__le64 *)data);
23461f66c52SEric Biggers 		acc->hi ^= get_unaligned((__le64 *)(data + 8));
23561f66c52SEric Biggers 		polyval_mul_generic(acc, key);
23661f66c52SEric Biggers 		data += POLYVAL_BLOCK_SIZE;
23761f66c52SEric Biggers 	} while (--nblocks);
23861f66c52SEric Biggers }
23961f66c52SEric Biggers 
240*c417e704SEric Biggers /* Convert the key from GHASH format to POLYVAL format. */
241*c417e704SEric Biggers static void __maybe_unused ghash_key_to_polyval(const u8 in[GHASH_BLOCK_SIZE],
242*c417e704SEric Biggers 						struct polyval_elem *out)
243*c417e704SEric Biggers {
244*c417e704SEric Biggers 	u64 hi = get_unaligned_be64(&in[0]);
245*c417e704SEric Biggers 	u64 lo = get_unaligned_be64(&in[8]);
246*c417e704SEric Biggers 	u64 mask = (s64)hi >> 63;
247*c417e704SEric Biggers 
248*c417e704SEric Biggers 	hi = (hi << 1) ^ (lo >> 63) ^ (mask & ((u64)0xc2 << 56));
249*c417e704SEric Biggers 	lo = (lo << 1) ^ (mask & 1);
250*c417e704SEric Biggers 	out->lo = cpu_to_le64(lo);
251*c417e704SEric Biggers 	out->hi = cpu_to_le64(hi);
252*c417e704SEric Biggers }
253*c417e704SEric Biggers 
254*c417e704SEric Biggers /* Convert the accumulator from POLYVAL format to GHASH format. */
255*c417e704SEric Biggers static void polyval_acc_to_ghash(const struct polyval_elem *in,
256*c417e704SEric Biggers 				 u8 out[GHASH_BLOCK_SIZE])
257*c417e704SEric Biggers {
258*c417e704SEric Biggers 	put_unaligned_be64(le64_to_cpu(in->hi), &out[0]);
259*c417e704SEric Biggers 	put_unaligned_be64(le64_to_cpu(in->lo), &out[8]);
260*c417e704SEric Biggers }
261*c417e704SEric Biggers 
262*c417e704SEric Biggers /* Convert the accumulator from GHASH format to POLYVAL format. */
263*c417e704SEric Biggers static void __maybe_unused ghash_acc_to_polyval(const u8 in[GHASH_BLOCK_SIZE],
264*c417e704SEric Biggers 						struct polyval_elem *out)
265*c417e704SEric Biggers {
266*c417e704SEric Biggers 	out->lo = cpu_to_le64(get_unaligned_be64(&in[8]));
267*c417e704SEric Biggers 	out->hi = cpu_to_le64(get_unaligned_be64(&in[0]));
268*c417e704SEric Biggers }
269*c417e704SEric Biggers 
27061f66c52SEric Biggers #ifdef CONFIG_CRYPTO_LIB_GF128HASH_ARCH
27161f66c52SEric Biggers #include "gf128hash.h" /* $(SRCARCH)/gf128hash.h */
272b3b6e8f9SEric Biggers #endif
273b3b6e8f9SEric Biggers 
274*c417e704SEric Biggers void ghash_preparekey(struct ghash_key *key, const u8 raw_key[GHASH_BLOCK_SIZE])
275*c417e704SEric Biggers {
276*c417e704SEric Biggers #ifdef ghash_preparekey_arch
277*c417e704SEric Biggers 	ghash_preparekey_arch(key, raw_key);
278*c417e704SEric Biggers #else
279*c417e704SEric Biggers 	ghash_key_to_polyval(raw_key, &key->h);
280*c417e704SEric Biggers #endif
281*c417e704SEric Biggers }
282*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_preparekey);
283*c417e704SEric Biggers 
284*c417e704SEric Biggers static void ghash_mul(struct ghash_ctx *ctx)
285*c417e704SEric Biggers {
286*c417e704SEric Biggers #ifdef ghash_mul_arch
287*c417e704SEric Biggers 	ghash_mul_arch(&ctx->acc, ctx->key);
288*c417e704SEric Biggers #elif defined(ghash_blocks_arch)
289*c417e704SEric Biggers 	static const u8 zeroes[GHASH_BLOCK_SIZE];
290*c417e704SEric Biggers 
291*c417e704SEric Biggers 	ghash_blocks_arch(&ctx->acc, ctx->key, zeroes, 1);
292*c417e704SEric Biggers #else
293*c417e704SEric Biggers 	polyval_mul_generic(&ctx->acc, &ctx->key->h);
294*c417e704SEric Biggers #endif
295*c417e704SEric Biggers }
296*c417e704SEric Biggers 
297*c417e704SEric Biggers /* nblocks is always >= 1. */
298*c417e704SEric Biggers static void ghash_blocks(struct ghash_ctx *ctx, const u8 *data, size_t nblocks)
299*c417e704SEric Biggers {
300*c417e704SEric Biggers #ifdef ghash_blocks_arch
301*c417e704SEric Biggers 	ghash_blocks_arch(&ctx->acc, ctx->key, data, nblocks);
302*c417e704SEric Biggers #else
303*c417e704SEric Biggers 	ghash_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks);
304*c417e704SEric Biggers #endif
305*c417e704SEric Biggers }
306*c417e704SEric Biggers 
307*c417e704SEric Biggers void ghash_update(struct ghash_ctx *ctx, const u8 *data, size_t len)
308*c417e704SEric Biggers {
309*c417e704SEric Biggers 	if (unlikely(ctx->partial)) {
310*c417e704SEric Biggers 		size_t n = min(len, GHASH_BLOCK_SIZE - ctx->partial);
311*c417e704SEric Biggers 
312*c417e704SEric Biggers 		len -= n;
313*c417e704SEric Biggers 		while (n--)
314*c417e704SEric Biggers 			ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - ctx->partial++] ^=
315*c417e704SEric Biggers 				*data++;
316*c417e704SEric Biggers 		if (ctx->partial < GHASH_BLOCK_SIZE)
317*c417e704SEric Biggers 			return;
318*c417e704SEric Biggers 		ghash_mul(ctx);
319*c417e704SEric Biggers 	}
320*c417e704SEric Biggers 	if (len >= GHASH_BLOCK_SIZE) {
321*c417e704SEric Biggers 		size_t nblocks = len / GHASH_BLOCK_SIZE;
322*c417e704SEric Biggers 
323*c417e704SEric Biggers 		ghash_blocks(ctx, data, nblocks);
324*c417e704SEric Biggers 		data += len & ~(GHASH_BLOCK_SIZE - 1);
325*c417e704SEric Biggers 		len &= GHASH_BLOCK_SIZE - 1;
326*c417e704SEric Biggers 	}
327*c417e704SEric Biggers 	for (size_t i = 0; i < len; i++)
328*c417e704SEric Biggers 		ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - i] ^= data[i];
329*c417e704SEric Biggers 	ctx->partial = len;
330*c417e704SEric Biggers }
331*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_update);
332*c417e704SEric Biggers 
333*c417e704SEric Biggers void ghash_final(struct ghash_ctx *ctx, u8 out[GHASH_BLOCK_SIZE])
334*c417e704SEric Biggers {
335*c417e704SEric Biggers 	if (unlikely(ctx->partial))
336*c417e704SEric Biggers 		ghash_mul(ctx);
337*c417e704SEric Biggers 	polyval_acc_to_ghash(&ctx->acc, out);
338*c417e704SEric Biggers 	memzero_explicit(ctx, sizeof(*ctx));
339*c417e704SEric Biggers }
340*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_final);
341*c417e704SEric Biggers 
34261f66c52SEric Biggers void polyval_preparekey(struct polyval_key *key,
34361f66c52SEric Biggers 			const u8 raw_key[POLYVAL_BLOCK_SIZE])
34461f66c52SEric Biggers {
345b3b6e8f9SEric Biggers #ifdef polyval_preparekey_arch
34661f66c52SEric Biggers 	polyval_preparekey_arch(key, raw_key);
347b3b6e8f9SEric Biggers #else
348b3b6e8f9SEric Biggers 	memcpy(key->h.bytes, raw_key, POLYVAL_BLOCK_SIZE);
349b3b6e8f9SEric Biggers #endif
35061f66c52SEric Biggers }
35161f66c52SEric Biggers EXPORT_SYMBOL_GPL(polyval_preparekey);
35261f66c52SEric Biggers 
35361f66c52SEric Biggers /*
35461f66c52SEric Biggers  * polyval_mul_generic() and polyval_blocks_generic() take the key as a
35561f66c52SEric Biggers  * polyval_elem rather than a polyval_key, so that arch-optimized
35661f66c52SEric Biggers  * implementations with a different key format can use it as a fallback (if they
35761f66c52SEric Biggers  * have H^1 stored somewhere in their struct).  Thus, the following dispatch
35861f66c52SEric Biggers  * code is needed to pass the appropriate key argument.
35961f66c52SEric Biggers  */
36061f66c52SEric Biggers 
36161f66c52SEric Biggers static void polyval_mul(struct polyval_ctx *ctx)
36261f66c52SEric Biggers {
363b3b6e8f9SEric Biggers #ifdef polyval_mul_arch
36461f66c52SEric Biggers 	polyval_mul_arch(&ctx->acc, ctx->key);
365b3b6e8f9SEric Biggers #elif defined(polyval_blocks_arch)
366b3b6e8f9SEric Biggers 	static const u8 zeroes[POLYVAL_BLOCK_SIZE];
367b3b6e8f9SEric Biggers 
368b3b6e8f9SEric Biggers 	polyval_blocks_arch(&ctx->acc, ctx->key, zeroes, 1);
36961f66c52SEric Biggers #else
37061f66c52SEric Biggers 	polyval_mul_generic(&ctx->acc, &ctx->key->h);
37161f66c52SEric Biggers #endif
37261f66c52SEric Biggers }
37361f66c52SEric Biggers 
374*c417e704SEric Biggers /* nblocks is always >= 1. */
37561f66c52SEric Biggers static void polyval_blocks(struct polyval_ctx *ctx,
37661f66c52SEric Biggers 			   const u8 *data, size_t nblocks)
37761f66c52SEric Biggers {
378b3b6e8f9SEric Biggers #ifdef polyval_blocks_arch
37961f66c52SEric Biggers 	polyval_blocks_arch(&ctx->acc, ctx->key, data, nblocks);
38061f66c52SEric Biggers #else
38161f66c52SEric Biggers 	polyval_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks);
38261f66c52SEric Biggers #endif
38361f66c52SEric Biggers }
38461f66c52SEric Biggers 
38561f66c52SEric Biggers void polyval_update(struct polyval_ctx *ctx, const u8 *data, size_t len)
38661f66c52SEric Biggers {
38761f66c52SEric Biggers 	if (unlikely(ctx->partial)) {
38861f66c52SEric Biggers 		size_t n = min(len, POLYVAL_BLOCK_SIZE - ctx->partial);
38961f66c52SEric Biggers 
39061f66c52SEric Biggers 		len -= n;
39161f66c52SEric Biggers 		while (n--)
39261f66c52SEric Biggers 			ctx->acc.bytes[ctx->partial++] ^= *data++;
39361f66c52SEric Biggers 		if (ctx->partial < POLYVAL_BLOCK_SIZE)
39461f66c52SEric Biggers 			return;
39561f66c52SEric Biggers 		polyval_mul(ctx);
39661f66c52SEric Biggers 	}
39761f66c52SEric Biggers 	if (len >= POLYVAL_BLOCK_SIZE) {
39861f66c52SEric Biggers 		size_t nblocks = len / POLYVAL_BLOCK_SIZE;
39961f66c52SEric Biggers 
40061f66c52SEric Biggers 		polyval_blocks(ctx, data, nblocks);
40161f66c52SEric Biggers 		data += len & ~(POLYVAL_BLOCK_SIZE - 1);
40261f66c52SEric Biggers 		len &= POLYVAL_BLOCK_SIZE - 1;
40361f66c52SEric Biggers 	}
40461f66c52SEric Biggers 	for (size_t i = 0; i < len; i++)
40561f66c52SEric Biggers 		ctx->acc.bytes[i] ^= data[i];
40661f66c52SEric Biggers 	ctx->partial = len;
40761f66c52SEric Biggers }
40861f66c52SEric Biggers EXPORT_SYMBOL_GPL(polyval_update);
40961f66c52SEric Biggers 
41061f66c52SEric Biggers void polyval_final(struct polyval_ctx *ctx, u8 out[POLYVAL_BLOCK_SIZE])
41161f66c52SEric Biggers {
41261f66c52SEric Biggers 	if (unlikely(ctx->partial))
41361f66c52SEric Biggers 		polyval_mul(ctx);
41461f66c52SEric Biggers 	memcpy(out, &ctx->acc, POLYVAL_BLOCK_SIZE);
41561f66c52SEric Biggers 	memzero_explicit(ctx, sizeof(*ctx));
41661f66c52SEric Biggers }
41761f66c52SEric Biggers EXPORT_SYMBOL_GPL(polyval_final);
41861f66c52SEric Biggers 
41961f66c52SEric Biggers #ifdef gf128hash_mod_init_arch
42061f66c52SEric Biggers static int __init gf128hash_mod_init(void)
42161f66c52SEric Biggers {
42261f66c52SEric Biggers 	gf128hash_mod_init_arch();
42361f66c52SEric Biggers 	return 0;
42461f66c52SEric Biggers }
42561f66c52SEric Biggers subsys_initcall(gf128hash_mod_init);
42661f66c52SEric Biggers 
42761f66c52SEric Biggers static void __exit gf128hash_mod_exit(void)
42861f66c52SEric Biggers {
42961f66c52SEric Biggers }
43061f66c52SEric Biggers module_exit(gf128hash_mod_exit);
43161f66c52SEric Biggers #endif
43261f66c52SEric Biggers 
43361f66c52SEric Biggers MODULE_DESCRIPTION("GF(2^128) polynomial hashing: GHASH and POLYVAL");
43461f66c52SEric Biggers MODULE_LICENSE("GPL");
435