161f66c52SEric Biggers // SPDX-License-Identifier: GPL-2.0-or-later 261f66c52SEric Biggers /* 361f66c52SEric Biggers * GF(2^128) polynomial hashing: GHASH and POLYVAL 461f66c52SEric Biggers * 561f66c52SEric Biggers * Copyright 2025 Google LLC 661f66c52SEric Biggers */ 761f66c52SEric Biggers 861f66c52SEric Biggers #include <crypto/gf128hash.h> 961f66c52SEric Biggers #include <linux/export.h> 1061f66c52SEric Biggers #include <linux/module.h> 1161f66c52SEric Biggers #include <linux/string.h> 1261f66c52SEric Biggers #include <linux/unaligned.h> 1361f66c52SEric Biggers 1461f66c52SEric Biggers /* 15*c417e704SEric Biggers * GHASH and POLYVAL are almost-XOR-universal hash functions. They interpret 16*c417e704SEric Biggers * the message as the coefficients of a polynomial in the finite field GF(2^128) 17*c417e704SEric Biggers * and evaluate that polynomial at a secret point. 1861f66c52SEric Biggers * 19*c417e704SEric Biggers * Neither GHASH nor POLYVAL is a cryptographic hash function. They should be 20*c417e704SEric Biggers * used only by algorithms that are specifically designed to use them. 2161f66c52SEric Biggers * 22*c417e704SEric Biggers * GHASH is the older variant, defined as part of GCM in NIST SP 800-38D 23*c417e704SEric Biggers * (https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38d.pdf). 24*c417e704SEric Biggers * GHASH is hard to implement directly, due to its backwards mapping between 25*c417e704SEric Biggers * bits and polynomial coefficients. GHASH implementations typically pre and 26*c417e704SEric Biggers * post-process the inputs and outputs (mainly by byte-swapping) to convert the 27*c417e704SEric Biggers * GHASH computation into an equivalent computation over a different, 28*c417e704SEric Biggers * easier-to-use representation of GF(2^128). 2961f66c52SEric Biggers * 30*c417e704SEric Biggers * POLYVAL is a newer GF(2^128) polynomial hash, originally defined as part of 31*c417e704SEric Biggers * AES-GCM-SIV (https://datatracker.ietf.org/doc/html/rfc8452) and also used by 32*c417e704SEric Biggers * HCTR2 (https://eprint.iacr.org/2021/1441.pdf). It uses that easier-to-use 33*c417e704SEric Biggers * field representation directly, eliminating the data conversion steps. 3461f66c52SEric Biggers * 35*c417e704SEric Biggers * This file provides library APIs for GHASH and POLYVAL. These APIs can 36*c417e704SEric Biggers * delegate to either a generic implementation or an architecture-optimized 37*c417e704SEric Biggers * implementation. Due to the mathematical relationship between GHASH and 38*c417e704SEric Biggers * POLYVAL, in some cases code for one is reused with the other. 3961f66c52SEric Biggers * 4061f66c52SEric Biggers * For the generic implementation, we don't use the traditional table approach 4161f66c52SEric Biggers * to GF(2^128) multiplication. That approach is not constant-time and requires 4261f66c52SEric Biggers * a lot of memory. Instead, we use a different approach which emulates 4361f66c52SEric Biggers * carryless multiplication using standard multiplications by spreading the data 4461f66c52SEric Biggers * bits apart using "holes". This allows the carries to spill harmlessly. This 4561f66c52SEric Biggers * approach is borrowed from BoringSSL, which in turn credits BearSSL's 4661f66c52SEric Biggers * documentation (https://bearssl.org/constanttime.html#ghash-for-gcm) for the 4761f66c52SEric Biggers * "holes" trick and a presentation by Shay Gueron 4861f66c52SEric Biggers * (https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf) for the 4961f66c52SEric Biggers * 256-bit => 128-bit reduction algorithm. 5061f66c52SEric Biggers */ 5161f66c52SEric Biggers 5261f66c52SEric Biggers #ifdef CONFIG_ARCH_SUPPORTS_INT128 5361f66c52SEric Biggers 5461f66c52SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */ 5561f66c52SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi) 5661f66c52SEric Biggers { 5761f66c52SEric Biggers /* 5861f66c52SEric Biggers * With 64-bit multiplicands and one term every 4 bits, there would be 5961f66c52SEric Biggers * up to 64 / 4 = 16 one bits per column when each multiplication is 6061f66c52SEric Biggers * written out as a series of additions in the schoolbook manner. 6161f66c52SEric Biggers * Unfortunately, that doesn't work since the value 16 is 1 too large to 6261f66c52SEric Biggers * fit in 4 bits. Carries would sometimes overflow into the next term. 6361f66c52SEric Biggers * 6461f66c52SEric Biggers * Using one term every 5 bits would work. However, that would cost 6561f66c52SEric Biggers * 5 x 5 = 25 multiplications instead of 4 x 4 = 16. 6661f66c52SEric Biggers * 6761f66c52SEric Biggers * Instead, mask off 4 bits from one multiplicand, giving a max of 15 6861f66c52SEric Biggers * one bits per column. Then handle those 4 bits separately. 6961f66c52SEric Biggers */ 7061f66c52SEric Biggers u64 a0 = a & 0x1111111111111110; 7161f66c52SEric Biggers u64 a1 = a & 0x2222222222222220; 7261f66c52SEric Biggers u64 a2 = a & 0x4444444444444440; 7361f66c52SEric Biggers u64 a3 = a & 0x8888888888888880; 7461f66c52SEric Biggers 7561f66c52SEric Biggers u64 b0 = b & 0x1111111111111111; 7661f66c52SEric Biggers u64 b1 = b & 0x2222222222222222; 7761f66c52SEric Biggers u64 b2 = b & 0x4444444444444444; 7861f66c52SEric Biggers u64 b3 = b & 0x8888888888888888; 7961f66c52SEric Biggers 8061f66c52SEric Biggers /* Multiply the high 60 bits of @a by @b. */ 8161f66c52SEric Biggers u128 c0 = (a0 * (u128)b0) ^ (a1 * (u128)b3) ^ 8261f66c52SEric Biggers (a2 * (u128)b2) ^ (a3 * (u128)b1); 8361f66c52SEric Biggers u128 c1 = (a0 * (u128)b1) ^ (a1 * (u128)b0) ^ 8461f66c52SEric Biggers (a2 * (u128)b3) ^ (a3 * (u128)b2); 8561f66c52SEric Biggers u128 c2 = (a0 * (u128)b2) ^ (a1 * (u128)b1) ^ 8661f66c52SEric Biggers (a2 * (u128)b0) ^ (a3 * (u128)b3); 8761f66c52SEric Biggers u128 c3 = (a0 * (u128)b3) ^ (a1 * (u128)b2) ^ 8861f66c52SEric Biggers (a2 * (u128)b1) ^ (a3 * (u128)b0); 8961f66c52SEric Biggers 9061f66c52SEric Biggers /* Multiply the low 4 bits of @a by @b. */ 9161f66c52SEric Biggers u64 e0 = -(a & 1) & b; 9261f66c52SEric Biggers u64 e1 = -((a >> 1) & 1) & b; 9361f66c52SEric Biggers u64 e2 = -((a >> 2) & 1) & b; 9461f66c52SEric Biggers u64 e3 = -((a >> 3) & 1) & b; 9561f66c52SEric Biggers u64 extra_lo = e0 ^ (e1 << 1) ^ (e2 << 2) ^ (e3 << 3); 9661f66c52SEric Biggers u64 extra_hi = (e1 >> 63) ^ (e2 >> 62) ^ (e3 >> 61); 9761f66c52SEric Biggers 9861f66c52SEric Biggers /* Add all the intermediate products together. */ 9961f66c52SEric Biggers *out_lo = (((u64)c0) & 0x1111111111111111) ^ 10061f66c52SEric Biggers (((u64)c1) & 0x2222222222222222) ^ 10161f66c52SEric Biggers (((u64)c2) & 0x4444444444444444) ^ 10261f66c52SEric Biggers (((u64)c3) & 0x8888888888888888) ^ extra_lo; 10361f66c52SEric Biggers *out_hi = (((u64)(c0 >> 64)) & 0x1111111111111111) ^ 10461f66c52SEric Biggers (((u64)(c1 >> 64)) & 0x2222222222222222) ^ 10561f66c52SEric Biggers (((u64)(c2 >> 64)) & 0x4444444444444444) ^ 10661f66c52SEric Biggers (((u64)(c3 >> 64)) & 0x8888888888888888) ^ extra_hi; 10761f66c52SEric Biggers } 10861f66c52SEric Biggers 10961f66c52SEric Biggers #else /* CONFIG_ARCH_SUPPORTS_INT128 */ 11061f66c52SEric Biggers 11161f66c52SEric Biggers /* Do a 32 x 32 => 64 bit carryless multiplication. */ 11261f66c52SEric Biggers static u64 clmul32(u32 a, u32 b) 11361f66c52SEric Biggers { 11461f66c52SEric Biggers /* 11561f66c52SEric Biggers * With 32-bit multiplicands and one term every 4 bits, there are up to 11661f66c52SEric Biggers * 32 / 4 = 8 one bits per column when each multiplication is written 11761f66c52SEric Biggers * out as a series of additions in the schoolbook manner. The value 8 11861f66c52SEric Biggers * fits in 4 bits, so the carries don't overflow into the next term. 11961f66c52SEric Biggers */ 12061f66c52SEric Biggers u32 a0 = a & 0x11111111; 12161f66c52SEric Biggers u32 a1 = a & 0x22222222; 12261f66c52SEric Biggers u32 a2 = a & 0x44444444; 12361f66c52SEric Biggers u32 a3 = a & 0x88888888; 12461f66c52SEric Biggers 12561f66c52SEric Biggers u32 b0 = b & 0x11111111; 12661f66c52SEric Biggers u32 b1 = b & 0x22222222; 12761f66c52SEric Biggers u32 b2 = b & 0x44444444; 12861f66c52SEric Biggers u32 b3 = b & 0x88888888; 12961f66c52SEric Biggers 13061f66c52SEric Biggers u64 c0 = (a0 * (u64)b0) ^ (a1 * (u64)b3) ^ 13161f66c52SEric Biggers (a2 * (u64)b2) ^ (a3 * (u64)b1); 13261f66c52SEric Biggers u64 c1 = (a0 * (u64)b1) ^ (a1 * (u64)b0) ^ 13361f66c52SEric Biggers (a2 * (u64)b3) ^ (a3 * (u64)b2); 13461f66c52SEric Biggers u64 c2 = (a0 * (u64)b2) ^ (a1 * (u64)b1) ^ 13561f66c52SEric Biggers (a2 * (u64)b0) ^ (a3 * (u64)b3); 13661f66c52SEric Biggers u64 c3 = (a0 * (u64)b3) ^ (a1 * (u64)b2) ^ 13761f66c52SEric Biggers (a2 * (u64)b1) ^ (a3 * (u64)b0); 13861f66c52SEric Biggers 13961f66c52SEric Biggers /* Add all the intermediate products together. */ 14061f66c52SEric Biggers return (c0 & 0x1111111111111111) ^ 14161f66c52SEric Biggers (c1 & 0x2222222222222222) ^ 14261f66c52SEric Biggers (c2 & 0x4444444444444444) ^ 14361f66c52SEric Biggers (c3 & 0x8888888888888888); 14461f66c52SEric Biggers } 14561f66c52SEric Biggers 14661f66c52SEric Biggers /* Do a 64 x 64 => 128 bit carryless multiplication. */ 14761f66c52SEric Biggers static void clmul64(u64 a, u64 b, u64 *out_lo, u64 *out_hi) 14861f66c52SEric Biggers { 14961f66c52SEric Biggers u32 a_lo = (u32)a; 15061f66c52SEric Biggers u32 a_hi = a >> 32; 15161f66c52SEric Biggers u32 b_lo = (u32)b; 15261f66c52SEric Biggers u32 b_hi = b >> 32; 15361f66c52SEric Biggers 15461f66c52SEric Biggers /* Karatsuba multiplication */ 15561f66c52SEric Biggers u64 lo = clmul32(a_lo, b_lo); 15661f66c52SEric Biggers u64 hi = clmul32(a_hi, b_hi); 15761f66c52SEric Biggers u64 mi = clmul32(a_lo ^ a_hi, b_lo ^ b_hi) ^ lo ^ hi; 15861f66c52SEric Biggers 15961f66c52SEric Biggers *out_lo = lo ^ (mi << 32); 16061f66c52SEric Biggers *out_hi = hi ^ (mi >> 32); 16161f66c52SEric Biggers } 16261f66c52SEric Biggers #endif /* !CONFIG_ARCH_SUPPORTS_INT128 */ 16361f66c52SEric Biggers 16461f66c52SEric Biggers /* Compute @a = @a * @b * x^-128 in the POLYVAL field. */ 16561f66c52SEric Biggers static void __maybe_unused 16661f66c52SEric Biggers polyval_mul_generic(struct polyval_elem *a, const struct polyval_elem *b) 16761f66c52SEric Biggers { 16861f66c52SEric Biggers u64 c0, c1, c2, c3, mi0, mi1; 16961f66c52SEric Biggers 17061f66c52SEric Biggers /* 17161f66c52SEric Biggers * Carryless-multiply @a by @b using Karatsuba multiplication. Store 17261f66c52SEric Biggers * the 256-bit product in @c0 (low) through @c3 (high). 17361f66c52SEric Biggers */ 17461f66c52SEric Biggers clmul64(le64_to_cpu(a->lo), le64_to_cpu(b->lo), &c0, &c1); 17561f66c52SEric Biggers clmul64(le64_to_cpu(a->hi), le64_to_cpu(b->hi), &c2, &c3); 17661f66c52SEric Biggers clmul64(le64_to_cpu(a->lo ^ a->hi), le64_to_cpu(b->lo ^ b->hi), 17761f66c52SEric Biggers &mi0, &mi1); 17861f66c52SEric Biggers mi0 ^= c0 ^ c2; 17961f66c52SEric Biggers mi1 ^= c1 ^ c3; 18061f66c52SEric Biggers c1 ^= mi0; 18161f66c52SEric Biggers c2 ^= mi1; 18261f66c52SEric Biggers 18361f66c52SEric Biggers /* 18461f66c52SEric Biggers * Cancel out the low 128 bits of the product by adding multiples of 18561f66c52SEric Biggers * G(x) = x^128 + x^127 + x^126 + x^121 + 1. Do this in two steps, each 18661f66c52SEric Biggers * of which cancels out 64 bits. Note that we break G(x) into three 18761f66c52SEric Biggers * parts: 1, x^64 * (x^63 + x^62 + x^57), and x^128 * 1. 18861f66c52SEric Biggers */ 18961f66c52SEric Biggers 19061f66c52SEric Biggers /* 19161f66c52SEric Biggers * First, add G(x) times c0 as follows: 19261f66c52SEric Biggers * 19361f66c52SEric Biggers * (c0, c1, c2) = (0, 19461f66c52SEric Biggers * c1 + (c0 * (x^63 + x^62 + x^57) mod x^64), 19561f66c52SEric Biggers * c2 + c0 + floor((c0 * (x^63 + x^62 + x^57)) / x^64)) 19661f66c52SEric Biggers */ 19761f66c52SEric Biggers c1 ^= (c0 << 63) ^ (c0 << 62) ^ (c0 << 57); 19861f66c52SEric Biggers c2 ^= c0 ^ (c0 >> 1) ^ (c0 >> 2) ^ (c0 >> 7); 19961f66c52SEric Biggers 20061f66c52SEric Biggers /* 20161f66c52SEric Biggers * Second, add G(x) times the new c1: 20261f66c52SEric Biggers * 20361f66c52SEric Biggers * (c1, c2, c3) = (0, 20461f66c52SEric Biggers * c2 + (c1 * (x^63 + x^62 + x^57) mod x^64), 20561f66c52SEric Biggers * c3 + c1 + floor((c1 * (x^63 + x^62 + x^57)) / x^64)) 20661f66c52SEric Biggers */ 20761f66c52SEric Biggers c2 ^= (c1 << 63) ^ (c1 << 62) ^ (c1 << 57); 20861f66c52SEric Biggers c3 ^= c1 ^ (c1 >> 1) ^ (c1 >> 2) ^ (c1 >> 7); 20961f66c52SEric Biggers 21061f66c52SEric Biggers /* Return (c2, c3). This implicitly multiplies by x^-128. */ 21161f66c52SEric Biggers a->lo = cpu_to_le64(c2); 21261f66c52SEric Biggers a->hi = cpu_to_le64(c3); 21361f66c52SEric Biggers } 21461f66c52SEric Biggers 215*c417e704SEric Biggers static void __maybe_unused ghash_blocks_generic(struct polyval_elem *acc, 216*c417e704SEric Biggers const struct polyval_elem *key, 217*c417e704SEric Biggers const u8 *data, size_t nblocks) 218*c417e704SEric Biggers { 219*c417e704SEric Biggers do { 220*c417e704SEric Biggers acc->lo ^= 221*c417e704SEric Biggers cpu_to_le64(get_unaligned_be64((__be64 *)(data + 8))); 222*c417e704SEric Biggers acc->hi ^= cpu_to_le64(get_unaligned_be64((__be64 *)data)); 223*c417e704SEric Biggers polyval_mul_generic(acc, key); 224*c417e704SEric Biggers data += GHASH_BLOCK_SIZE; 225*c417e704SEric Biggers } while (--nblocks); 226*c417e704SEric Biggers } 227*c417e704SEric Biggers 22861f66c52SEric Biggers static void __maybe_unused 22961f66c52SEric Biggers polyval_blocks_generic(struct polyval_elem *acc, const struct polyval_elem *key, 23061f66c52SEric Biggers const u8 *data, size_t nblocks) 23161f66c52SEric Biggers { 23261f66c52SEric Biggers do { 23361f66c52SEric Biggers acc->lo ^= get_unaligned((__le64 *)data); 23461f66c52SEric Biggers acc->hi ^= get_unaligned((__le64 *)(data + 8)); 23561f66c52SEric Biggers polyval_mul_generic(acc, key); 23661f66c52SEric Biggers data += POLYVAL_BLOCK_SIZE; 23761f66c52SEric Biggers } while (--nblocks); 23861f66c52SEric Biggers } 23961f66c52SEric Biggers 240*c417e704SEric Biggers /* Convert the key from GHASH format to POLYVAL format. */ 241*c417e704SEric Biggers static void __maybe_unused ghash_key_to_polyval(const u8 in[GHASH_BLOCK_SIZE], 242*c417e704SEric Biggers struct polyval_elem *out) 243*c417e704SEric Biggers { 244*c417e704SEric Biggers u64 hi = get_unaligned_be64(&in[0]); 245*c417e704SEric Biggers u64 lo = get_unaligned_be64(&in[8]); 246*c417e704SEric Biggers u64 mask = (s64)hi >> 63; 247*c417e704SEric Biggers 248*c417e704SEric Biggers hi = (hi << 1) ^ (lo >> 63) ^ (mask & ((u64)0xc2 << 56)); 249*c417e704SEric Biggers lo = (lo << 1) ^ (mask & 1); 250*c417e704SEric Biggers out->lo = cpu_to_le64(lo); 251*c417e704SEric Biggers out->hi = cpu_to_le64(hi); 252*c417e704SEric Biggers } 253*c417e704SEric Biggers 254*c417e704SEric Biggers /* Convert the accumulator from POLYVAL format to GHASH format. */ 255*c417e704SEric Biggers static void polyval_acc_to_ghash(const struct polyval_elem *in, 256*c417e704SEric Biggers u8 out[GHASH_BLOCK_SIZE]) 257*c417e704SEric Biggers { 258*c417e704SEric Biggers put_unaligned_be64(le64_to_cpu(in->hi), &out[0]); 259*c417e704SEric Biggers put_unaligned_be64(le64_to_cpu(in->lo), &out[8]); 260*c417e704SEric Biggers } 261*c417e704SEric Biggers 262*c417e704SEric Biggers /* Convert the accumulator from GHASH format to POLYVAL format. */ 263*c417e704SEric Biggers static void __maybe_unused ghash_acc_to_polyval(const u8 in[GHASH_BLOCK_SIZE], 264*c417e704SEric Biggers struct polyval_elem *out) 265*c417e704SEric Biggers { 266*c417e704SEric Biggers out->lo = cpu_to_le64(get_unaligned_be64(&in[8])); 267*c417e704SEric Biggers out->hi = cpu_to_le64(get_unaligned_be64(&in[0])); 268*c417e704SEric Biggers } 269*c417e704SEric Biggers 27061f66c52SEric Biggers #ifdef CONFIG_CRYPTO_LIB_GF128HASH_ARCH 27161f66c52SEric Biggers #include "gf128hash.h" /* $(SRCARCH)/gf128hash.h */ 272b3b6e8f9SEric Biggers #endif 273b3b6e8f9SEric Biggers 274*c417e704SEric Biggers void ghash_preparekey(struct ghash_key *key, const u8 raw_key[GHASH_BLOCK_SIZE]) 275*c417e704SEric Biggers { 276*c417e704SEric Biggers #ifdef ghash_preparekey_arch 277*c417e704SEric Biggers ghash_preparekey_arch(key, raw_key); 278*c417e704SEric Biggers #else 279*c417e704SEric Biggers ghash_key_to_polyval(raw_key, &key->h); 280*c417e704SEric Biggers #endif 281*c417e704SEric Biggers } 282*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_preparekey); 283*c417e704SEric Biggers 284*c417e704SEric Biggers static void ghash_mul(struct ghash_ctx *ctx) 285*c417e704SEric Biggers { 286*c417e704SEric Biggers #ifdef ghash_mul_arch 287*c417e704SEric Biggers ghash_mul_arch(&ctx->acc, ctx->key); 288*c417e704SEric Biggers #elif defined(ghash_blocks_arch) 289*c417e704SEric Biggers static const u8 zeroes[GHASH_BLOCK_SIZE]; 290*c417e704SEric Biggers 291*c417e704SEric Biggers ghash_blocks_arch(&ctx->acc, ctx->key, zeroes, 1); 292*c417e704SEric Biggers #else 293*c417e704SEric Biggers polyval_mul_generic(&ctx->acc, &ctx->key->h); 294*c417e704SEric Biggers #endif 295*c417e704SEric Biggers } 296*c417e704SEric Biggers 297*c417e704SEric Biggers /* nblocks is always >= 1. */ 298*c417e704SEric Biggers static void ghash_blocks(struct ghash_ctx *ctx, const u8 *data, size_t nblocks) 299*c417e704SEric Biggers { 300*c417e704SEric Biggers #ifdef ghash_blocks_arch 301*c417e704SEric Biggers ghash_blocks_arch(&ctx->acc, ctx->key, data, nblocks); 302*c417e704SEric Biggers #else 303*c417e704SEric Biggers ghash_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks); 304*c417e704SEric Biggers #endif 305*c417e704SEric Biggers } 306*c417e704SEric Biggers 307*c417e704SEric Biggers void ghash_update(struct ghash_ctx *ctx, const u8 *data, size_t len) 308*c417e704SEric Biggers { 309*c417e704SEric Biggers if (unlikely(ctx->partial)) { 310*c417e704SEric Biggers size_t n = min(len, GHASH_BLOCK_SIZE - ctx->partial); 311*c417e704SEric Biggers 312*c417e704SEric Biggers len -= n; 313*c417e704SEric Biggers while (n--) 314*c417e704SEric Biggers ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - ctx->partial++] ^= 315*c417e704SEric Biggers *data++; 316*c417e704SEric Biggers if (ctx->partial < GHASH_BLOCK_SIZE) 317*c417e704SEric Biggers return; 318*c417e704SEric Biggers ghash_mul(ctx); 319*c417e704SEric Biggers } 320*c417e704SEric Biggers if (len >= GHASH_BLOCK_SIZE) { 321*c417e704SEric Biggers size_t nblocks = len / GHASH_BLOCK_SIZE; 322*c417e704SEric Biggers 323*c417e704SEric Biggers ghash_blocks(ctx, data, nblocks); 324*c417e704SEric Biggers data += len & ~(GHASH_BLOCK_SIZE - 1); 325*c417e704SEric Biggers len &= GHASH_BLOCK_SIZE - 1; 326*c417e704SEric Biggers } 327*c417e704SEric Biggers for (size_t i = 0; i < len; i++) 328*c417e704SEric Biggers ctx->acc.bytes[GHASH_BLOCK_SIZE - 1 - i] ^= data[i]; 329*c417e704SEric Biggers ctx->partial = len; 330*c417e704SEric Biggers } 331*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_update); 332*c417e704SEric Biggers 333*c417e704SEric Biggers void ghash_final(struct ghash_ctx *ctx, u8 out[GHASH_BLOCK_SIZE]) 334*c417e704SEric Biggers { 335*c417e704SEric Biggers if (unlikely(ctx->partial)) 336*c417e704SEric Biggers ghash_mul(ctx); 337*c417e704SEric Biggers polyval_acc_to_ghash(&ctx->acc, out); 338*c417e704SEric Biggers memzero_explicit(ctx, sizeof(*ctx)); 339*c417e704SEric Biggers } 340*c417e704SEric Biggers EXPORT_SYMBOL_GPL(ghash_final); 341*c417e704SEric Biggers 34261f66c52SEric Biggers void polyval_preparekey(struct polyval_key *key, 34361f66c52SEric Biggers const u8 raw_key[POLYVAL_BLOCK_SIZE]) 34461f66c52SEric Biggers { 345b3b6e8f9SEric Biggers #ifdef polyval_preparekey_arch 34661f66c52SEric Biggers polyval_preparekey_arch(key, raw_key); 347b3b6e8f9SEric Biggers #else 348b3b6e8f9SEric Biggers memcpy(key->h.bytes, raw_key, POLYVAL_BLOCK_SIZE); 349b3b6e8f9SEric Biggers #endif 35061f66c52SEric Biggers } 35161f66c52SEric Biggers EXPORT_SYMBOL_GPL(polyval_preparekey); 35261f66c52SEric Biggers 35361f66c52SEric Biggers /* 35461f66c52SEric Biggers * polyval_mul_generic() and polyval_blocks_generic() take the key as a 35561f66c52SEric Biggers * polyval_elem rather than a polyval_key, so that arch-optimized 35661f66c52SEric Biggers * implementations with a different key format can use it as a fallback (if they 35761f66c52SEric Biggers * have H^1 stored somewhere in their struct). Thus, the following dispatch 35861f66c52SEric Biggers * code is needed to pass the appropriate key argument. 35961f66c52SEric Biggers */ 36061f66c52SEric Biggers 36161f66c52SEric Biggers static void polyval_mul(struct polyval_ctx *ctx) 36261f66c52SEric Biggers { 363b3b6e8f9SEric Biggers #ifdef polyval_mul_arch 36461f66c52SEric Biggers polyval_mul_arch(&ctx->acc, ctx->key); 365b3b6e8f9SEric Biggers #elif defined(polyval_blocks_arch) 366b3b6e8f9SEric Biggers static const u8 zeroes[POLYVAL_BLOCK_SIZE]; 367b3b6e8f9SEric Biggers 368b3b6e8f9SEric Biggers polyval_blocks_arch(&ctx->acc, ctx->key, zeroes, 1); 36961f66c52SEric Biggers #else 37061f66c52SEric Biggers polyval_mul_generic(&ctx->acc, &ctx->key->h); 37161f66c52SEric Biggers #endif 37261f66c52SEric Biggers } 37361f66c52SEric Biggers 374*c417e704SEric Biggers /* nblocks is always >= 1. */ 37561f66c52SEric Biggers static void polyval_blocks(struct polyval_ctx *ctx, 37661f66c52SEric Biggers const u8 *data, size_t nblocks) 37761f66c52SEric Biggers { 378b3b6e8f9SEric Biggers #ifdef polyval_blocks_arch 37961f66c52SEric Biggers polyval_blocks_arch(&ctx->acc, ctx->key, data, nblocks); 38061f66c52SEric Biggers #else 38161f66c52SEric Biggers polyval_blocks_generic(&ctx->acc, &ctx->key->h, data, nblocks); 38261f66c52SEric Biggers #endif 38361f66c52SEric Biggers } 38461f66c52SEric Biggers 38561f66c52SEric Biggers void polyval_update(struct polyval_ctx *ctx, const u8 *data, size_t len) 38661f66c52SEric Biggers { 38761f66c52SEric Biggers if (unlikely(ctx->partial)) { 38861f66c52SEric Biggers size_t n = min(len, POLYVAL_BLOCK_SIZE - ctx->partial); 38961f66c52SEric Biggers 39061f66c52SEric Biggers len -= n; 39161f66c52SEric Biggers while (n--) 39261f66c52SEric Biggers ctx->acc.bytes[ctx->partial++] ^= *data++; 39361f66c52SEric Biggers if (ctx->partial < POLYVAL_BLOCK_SIZE) 39461f66c52SEric Biggers return; 39561f66c52SEric Biggers polyval_mul(ctx); 39661f66c52SEric Biggers } 39761f66c52SEric Biggers if (len >= POLYVAL_BLOCK_SIZE) { 39861f66c52SEric Biggers size_t nblocks = len / POLYVAL_BLOCK_SIZE; 39961f66c52SEric Biggers 40061f66c52SEric Biggers polyval_blocks(ctx, data, nblocks); 40161f66c52SEric Biggers data += len & ~(POLYVAL_BLOCK_SIZE - 1); 40261f66c52SEric Biggers len &= POLYVAL_BLOCK_SIZE - 1; 40361f66c52SEric Biggers } 40461f66c52SEric Biggers for (size_t i = 0; i < len; i++) 40561f66c52SEric Biggers ctx->acc.bytes[i] ^= data[i]; 40661f66c52SEric Biggers ctx->partial = len; 40761f66c52SEric Biggers } 40861f66c52SEric Biggers EXPORT_SYMBOL_GPL(polyval_update); 40961f66c52SEric Biggers 41061f66c52SEric Biggers void polyval_final(struct polyval_ctx *ctx, u8 out[POLYVAL_BLOCK_SIZE]) 41161f66c52SEric Biggers { 41261f66c52SEric Biggers if (unlikely(ctx->partial)) 41361f66c52SEric Biggers polyval_mul(ctx); 41461f66c52SEric Biggers memcpy(out, &ctx->acc, POLYVAL_BLOCK_SIZE); 41561f66c52SEric Biggers memzero_explicit(ctx, sizeof(*ctx)); 41661f66c52SEric Biggers } 41761f66c52SEric Biggers EXPORT_SYMBOL_GPL(polyval_final); 41861f66c52SEric Biggers 41961f66c52SEric Biggers #ifdef gf128hash_mod_init_arch 42061f66c52SEric Biggers static int __init gf128hash_mod_init(void) 42161f66c52SEric Biggers { 42261f66c52SEric Biggers gf128hash_mod_init_arch(); 42361f66c52SEric Biggers return 0; 42461f66c52SEric Biggers } 42561f66c52SEric Biggers subsys_initcall(gf128hash_mod_init); 42661f66c52SEric Biggers 42761f66c52SEric Biggers static void __exit gf128hash_mod_exit(void) 42861f66c52SEric Biggers { 42961f66c52SEric Biggers } 43061f66c52SEric Biggers module_exit(gf128hash_mod_exit); 43161f66c52SEric Biggers #endif 43261f66c52SEric Biggers 43361f66c52SEric Biggers MODULE_DESCRIPTION("GF(2^128) polynomial hashing: GHASH and POLYVAL"); 43461f66c52SEric Biggers MODULE_LICENSE("GPL"); 435