1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0-or-later 3# 4# Script that generates test vectors for the given hash function. 5# 6# Copyright 2025 Google LLC 7 8import hashlib 9import hmac 10import sys 11 12DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511, 13 513, 1000, 3333, 4096, 4128, 4160, 4224, 16384] 14 15# Generate the given number of random bytes, using the length itself as the seed 16# for a simple linear congruential generator (LCG). The C test code uses the 17# same LCG with the same seeding strategy to reconstruct the data, ensuring 18# reproducibility without explicitly storing the data in the test vectors. 19def rand_bytes(length): 20 seed = length 21 out = [] 22 for _ in range(length): 23 seed = (seed * 25214903917 + 11) % 2**48 24 out.append((seed >> 16) % 256) 25 return bytes(out) 26 27POLY1305_KEY_SIZE = 32 28 29# A straightforward, unoptimized implementation of Poly1305. 30# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf 31class Poly1305: 32 def __init__(self, key): 33 assert len(key) == POLY1305_KEY_SIZE 34 self.h = 0 35 rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff 36 self.r = int.from_bytes(key[:16], byteorder='little') & rclamp 37 self.s = int.from_bytes(key[16:], byteorder='little') 38 39 # Note: this supports partial blocks only at the end. 40 def update(self, data): 41 for i in range(0, len(data), 16): 42 chunk = data[i:i+16] 43 c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk)) 44 self.h = ((self.h + c) * self.r) % (2**130 - 5) 45 return self 46 47 # Note: gen_additional_poly1305_testvecs() relies on this being 48 # nondestructive, i.e. not changing any field of self. 49 def digest(self): 50 m = (self.h + self.s) % 2**128 51 return m.to_bytes(16, byteorder='little') 52 53POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) 54POLYVAL_BLOCK_SIZE = 16 55 56# A straightforward, unoptimized implementation of POLYVAL. 57# Reference: https://datatracker.ietf.org/doc/html/rfc8452 58class Polyval: 59 def __init__(self, key): 60 assert len(key) == 16 61 self.h = int.from_bytes(key, byteorder='little') 62 self.acc = 0 63 64 # Note: this supports partial blocks only at the end. 65 def update(self, data): 66 for i in range(0, len(data), 16): 67 # acc += block 68 self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') 69 # acc = (acc * h * x^-128) mod POLYVAL_POLY 70 product = 0 71 for j in range(128): 72 if (self.h & (1 << j)) != 0: 73 product ^= self.acc << j 74 if (product & (1 << j)) != 0: 75 product ^= POLYVAL_POLY << j 76 self.acc = product >> 128 77 return self 78 79 def digest(self): 80 return self.acc.to_bytes(16, byteorder='little') 81 82def hash_init(alg): 83 if alg == 'poly1305': 84 # Use a fixed random key here, to present Poly1305 as an unkeyed hash. 85 # This allows all the test cases for unkeyed hashes to work on Poly1305. 86 return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) 87 if alg == 'polyval': 88 return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) 89 return hashlib.new(alg) 90 91def hash_update(ctx, data): 92 ctx.update(data) 93 94def hash_final(ctx): 95 return ctx.digest() 96 97def compute_hash(alg, data): 98 ctx = hash_init(alg) 99 hash_update(ctx, data) 100 return hash_final(ctx) 101 102def print_bytes(prefix, value, bytes_per_line): 103 for i in range(0, len(value), bytes_per_line): 104 line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line]) 105 print(f'{line.rstrip()}') 106 107def print_static_u8_array_definition(name, value): 108 print('') 109 print(f'static const u8 {name} = {{') 110 print_bytes('\t', value, 8) 111 print('};') 112 113def print_c_struct_u8_array_field(name, value): 114 print(f'\t\t.{name} = {{') 115 print_bytes('\t\t\t', value, 8) 116 print('\t\t},') 117 118def alg_digest_size_const(alg): 119 if alg.startswith('blake2'): 120 return f'{alg.upper()}_HASH_SIZE' 121 return f"{alg.upper().replace('-', '_')}_DIGEST_SIZE" 122 123def gen_unkeyed_testvecs(alg): 124 print('') 125 print('static const struct {') 126 print('\tsize_t data_len;') 127 print(f'\tu8 digest[{alg_digest_size_const(alg)}];') 128 print('} hash_testvecs[] = {') 129 for data_len in DATA_LENS: 130 data = rand_bytes(data_len) 131 print('\t{') 132 print(f'\t\t.data_len = {data_len},') 133 print_c_struct_u8_array_field('digest', compute_hash(alg, data)) 134 print('\t},') 135 print('};') 136 137 data = rand_bytes(4096) 138 ctx = hash_init(alg) 139 for data_len in range(len(data) + 1): 140 hash_update(ctx, compute_hash(alg, data[:data_len])) 141 print_static_u8_array_definition( 142 f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', 143 hash_final(ctx)) 144 145def gen_additional_sha3_testvecs(): 146 max_len = 4096 147 in_data = rand_bytes(max_len) 148 for alg in ['shake128', 'shake256']: 149 ctx = hashlib.new('sha3-256') 150 for in_len in range(max_len + 1): 151 out_len = (in_len * 293) % (max_len + 1) 152 out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) 153 ctx.update(out) 154 print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', 155 ctx.digest()) 156 157def gen_hmac_testvecs(alg): 158 ctx = hmac.new(rand_bytes(32), digestmod=alg) 159 data = rand_bytes(4096) 160 for data_len in range(len(data) + 1): 161 ctx.update(data[:data_len]) 162 key_len = data_len % 293 163 key = rand_bytes(key_len) 164 mac = hmac.digest(key, data[:data_len], alg) 165 ctx.update(mac) 166 print_static_u8_array_definition( 167 f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', 168 ctx.digest()) 169 170def gen_additional_blake2_testvecs(alg): 171 if alg == 'blake2s': 172 (max_key_size, max_hash_size) = (32, 32) 173 elif alg == 'blake2b': 174 (max_key_size, max_hash_size) = (64, 64) 175 else: 176 raise ValueError(f'Unsupported alg: {alg}') 177 hashes = b'' 178 for key_len in range(max_key_size + 1): 179 for out_len in range(1, max_hash_size + 1): 180 h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) 181 h.update(rand_bytes(100)) 182 hashes += h.digest() 183 print_static_u8_array_definition( 184 f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', 185 compute_hash(alg, hashes)) 186 187def nh_extract_int(bytestr, pos, length): 188 assert pos % 8 == 0 and length % 8 == 0 189 return int.from_bytes(bytestr[pos//8 : pos//8 + length//8], byteorder='little') 190 191# The NH "almost-universal hash function" used in Adiantum. This is a 192# straightforward translation of the pseudocode from Section 6.3 of the Adiantum 193# paper (https://eprint.iacr.org/2018/720.pdf), except the outer loop is omitted 194# because we assume len(msg) <= 1024. (The kernel's nh() function is only 195# expected to handle up to 1024 bytes; it's just called repeatedly as needed.) 196def nh(key, msg): 197 (w, s, r, u) = (32, 2, 4, 8192) 198 l = 8 * len(msg) 199 assert l <= u 200 assert l % (2*s*w) == 0 201 h = bytes() 202 for i in range(0, 2*s*w*r, 2*s*w): 203 p = 0 204 for j in range(0, l, 2*s*w): 205 for k in range(0, w*s, w): 206 a0 = nh_extract_int(key, i + j + k, w) 207 a1 = nh_extract_int(key, i + j + k + s*w, w) 208 b0 = nh_extract_int(msg, j + k, w) 209 b1 = nh_extract_int(msg, j + k + s*w, w) 210 p += ((a0 + b0) % 2**w) * ((a1 + b1) % 2**w) 211 h += (p % 2**64).to_bytes(8, byteorder='little') 212 return h 213 214def gen_nh_testvecs(): 215 NH_KEY_BYTES = 1072 216 NH_MESSAGE_BYTES = 1024 217 key = rand_bytes(NH_KEY_BYTES) 218 msg = rand_bytes(NH_MESSAGE_BYTES) 219 print_static_u8_array_definition('nh_test_key[NH_KEY_BYTES]', key) 220 print_static_u8_array_definition('nh_test_msg[NH_MESSAGE_BYTES]', msg) 221 for length in [16, 96, 256, 1024]: 222 print_static_u8_array_definition(f'nh_test_val{length}[NH_HASH_BYTES]', 223 nh(key, msg[:length])) 224 225def gen_additional_poly1305_testvecs(): 226 key = b'\xff' * POLY1305_KEY_SIZE 227 data = b'' 228 ctx = Poly1305(key) 229 for _ in range(32): 230 for j in range(0, 4097, 16): 231 ctx.update(b'\xff' * j) 232 data += ctx.digest() 233 print_static_u8_array_definition( 234 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', 235 Poly1305(key).update(data).digest()) 236 237def gen_additional_polyval_testvecs(): 238 key = b'\xff' * POLYVAL_BLOCK_SIZE 239 hashes = b'' 240 for data_len in range(0, 4097, 16): 241 hashes += Polyval(key).update(b'\xff' * data_len).digest() 242 print_static_u8_array_definition( 243 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', 244 Polyval(key).update(hashes).digest()) 245 246if len(sys.argv) != 2: 247 sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') 248 sys.stderr.write('ALGORITHM may be any supported by Python hashlib; or poly1305, polyval, or sha3.\n') 249 sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') 250 sys.exit(1) 251 252alg = sys.argv[1] 253print('/* SPDX-License-Identifier: GPL-2.0-or-later */') 254print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') 255if alg.startswith('blake2'): 256 gen_unkeyed_testvecs(alg) 257 gen_additional_blake2_testvecs(alg) 258elif alg == 'nh': 259 gen_nh_testvecs() 260elif alg == 'poly1305': 261 gen_unkeyed_testvecs(alg) 262 gen_additional_poly1305_testvecs() 263elif alg == 'polyval': 264 gen_unkeyed_testvecs(alg) 265 gen_additional_polyval_testvecs() 266elif alg == 'sha3': 267 print() 268 print('/* SHA3-256 test vectors */') 269 gen_unkeyed_testvecs('sha3-256') 270 print() 271 print('/* SHAKE test vectors */') 272 gen_additional_sha3_testvecs() 273else: 274 gen_unkeyed_testvecs(alg) 275 gen_hmac_testvecs(alg) 276