1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0-or-later 3# 4# Script that generates test vectors for the given hash function. 5# 6# Copyright 2025 Google LLC 7 8import hashlib 9import hmac 10import sys 11 12DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511, 13 513, 1000, 3333, 4096, 4128, 4160, 4224, 16384] 14 15# Generate the given number of random bytes, using the length itself as the seed 16# for a simple linear congruential generator (LCG). The C test code uses the 17# same LCG with the same seeding strategy to reconstruct the data, ensuring 18# reproducibility without explicitly storing the data in the test vectors. 19def rand_bytes(length): 20 seed = length 21 out = [] 22 for _ in range(length): 23 seed = (seed * 25214903917 + 11) % 2**48 24 out.append((seed >> 16) % 256) 25 return bytes(out) 26 27POLY1305_KEY_SIZE = 32 28 29# A straightforward, unoptimized implementation of Poly1305. 30# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf 31class Poly1305: 32 def __init__(self, key): 33 assert len(key) == POLY1305_KEY_SIZE 34 self.h = 0 35 rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff 36 self.r = int.from_bytes(key[:16], byteorder='little') & rclamp 37 self.s = int.from_bytes(key[16:], byteorder='little') 38 39 # Note: this supports partial blocks only at the end. 40 def update(self, data): 41 for i in range(0, len(data), 16): 42 chunk = data[i:i+16] 43 c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk)) 44 self.h = ((self.h + c) * self.r) % (2**130 - 5) 45 return self 46 47 # Note: gen_additional_poly1305_testvecs() relies on this being 48 # nondestructive, i.e. not changing any field of self. 49 def digest(self): 50 m = (self.h + self.s) % 2**128 51 return m.to_bytes(16, byteorder='little') 52 53POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) 54POLYVAL_BLOCK_SIZE = 16 55 56# A straightforward, unoptimized implementation of POLYVAL. 57# Reference: https://datatracker.ietf.org/doc/html/rfc8452 58class Polyval: 59 def __init__(self, key): 60 assert len(key) == 16 61 self.h = int.from_bytes(key, byteorder='little') 62 self.acc = 0 63 64 # Note: this supports partial blocks only at the end. 65 def update(self, data): 66 for i in range(0, len(data), 16): 67 # acc += block 68 self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') 69 # acc = (acc * h * x^-128) mod POLYVAL_POLY 70 product = 0 71 for j in range(128): 72 if (self.h & (1 << j)) != 0: 73 product ^= self.acc << j 74 if (product & (1 << j)) != 0: 75 product ^= POLYVAL_POLY << j 76 self.acc = product >> 128 77 return self 78 79 def digest(self): 80 return self.acc.to_bytes(16, byteorder='little') 81 82def hash_init(alg): 83 if alg == 'poly1305': 84 # Use a fixed random key here, to present Poly1305 as an unkeyed hash. 85 # This allows all the test cases for unkeyed hashes to work on Poly1305. 86 return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) 87 if alg == 'polyval': 88 return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) 89 return hashlib.new(alg) 90 91def hash_update(ctx, data): 92 ctx.update(data) 93 94def hash_final(ctx): 95 return ctx.digest() 96 97def compute_hash(alg, data): 98 ctx = hash_init(alg) 99 hash_update(ctx, data) 100 return hash_final(ctx) 101 102def print_bytes(prefix, value, bytes_per_line): 103 for i in range(0, len(value), bytes_per_line): 104 line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line]) 105 print(f'{line.rstrip()}') 106 107def print_static_u8_array_definition(name, value): 108 print('') 109 print(f'static const u8 {name} = {{') 110 print_bytes('\t', value, 8) 111 print('};') 112 113def print_c_struct_u8_array_field(name, value): 114 print(f'\t\t.{name} = {{') 115 print_bytes('\t\t\t', value, 8) 116 print('\t\t},') 117 118def alg_digest_size_const(alg): 119 if alg.startswith('blake2'): 120 return f'{alg.upper()}_HASH_SIZE' 121 return f'{alg.upper().replace('-', '_')}_DIGEST_SIZE' 122 123def gen_unkeyed_testvecs(alg): 124 print('') 125 print('static const struct {') 126 print('\tsize_t data_len;') 127 print(f'\tu8 digest[{alg_digest_size_const(alg)}];') 128 print('} hash_testvecs[] = {') 129 for data_len in DATA_LENS: 130 data = rand_bytes(data_len) 131 print('\t{') 132 print(f'\t\t.data_len = {data_len},') 133 print_c_struct_u8_array_field('digest', compute_hash(alg, data)) 134 print('\t},') 135 print('};') 136 137 data = rand_bytes(4096) 138 ctx = hash_init(alg) 139 for data_len in range(len(data) + 1): 140 hash_update(ctx, compute_hash(alg, data[:data_len])) 141 print_static_u8_array_definition( 142 f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', 143 hash_final(ctx)) 144 145def gen_additional_sha3_testvecs(): 146 max_len = 4096 147 in_data = rand_bytes(max_len) 148 for alg in ['shake128', 'shake256']: 149 ctx = hashlib.new('sha3-256') 150 for in_len in range(max_len + 1): 151 out_len = (in_len * 293) % (max_len + 1) 152 out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) 153 ctx.update(out) 154 print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', 155 ctx.digest()) 156 157def gen_hmac_testvecs(alg): 158 ctx = hmac.new(rand_bytes(32), digestmod=alg) 159 data = rand_bytes(4096) 160 for data_len in range(len(data) + 1): 161 ctx.update(data[:data_len]) 162 key_len = data_len % 293 163 key = rand_bytes(key_len) 164 mac = hmac.digest(key, data[:data_len], alg) 165 ctx.update(mac) 166 print_static_u8_array_definition( 167 f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', 168 ctx.digest()) 169 170def gen_additional_blake2_testvecs(alg): 171 if alg == 'blake2s': 172 (max_key_size, max_hash_size) = (32, 32) 173 elif alg == 'blake2b': 174 (max_key_size, max_hash_size) = (64, 64) 175 else: 176 raise ValueError(f'Unsupported alg: {alg}') 177 hashes = b'' 178 for key_len in range(max_key_size + 1): 179 for out_len in range(1, max_hash_size + 1): 180 h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) 181 h.update(rand_bytes(100)) 182 hashes += h.digest() 183 print_static_u8_array_definition( 184 f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', 185 compute_hash(alg, hashes)) 186 187def gen_additional_poly1305_testvecs(): 188 key = b'\xff' * POLY1305_KEY_SIZE 189 data = b'' 190 ctx = Poly1305(key) 191 for _ in range(32): 192 for j in range(0, 4097, 16): 193 ctx.update(b'\xff' * j) 194 data += ctx.digest() 195 print_static_u8_array_definition( 196 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', 197 Poly1305(key).update(data).digest()) 198 199def gen_additional_polyval_testvecs(): 200 key = b'\xff' * POLYVAL_BLOCK_SIZE 201 hashes = b'' 202 for data_len in range(0, 4097, 16): 203 hashes += Polyval(key).update(b'\xff' * data_len).digest() 204 print_static_u8_array_definition( 205 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', 206 Polyval(key).update(hashes).digest()) 207 208if len(sys.argv) != 2: 209 sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') 210 sys.stderr.write('ALGORITHM may be any supported by Python hashlib; or poly1305, polyval, or sha3.\n') 211 sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') 212 sys.exit(1) 213 214alg = sys.argv[1] 215print('/* SPDX-License-Identifier: GPL-2.0-or-later */') 216print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') 217if alg.startswith('blake2'): 218 gen_unkeyed_testvecs(alg) 219 gen_additional_blake2_testvecs(alg) 220elif alg == 'poly1305': 221 gen_unkeyed_testvecs(alg) 222 gen_additional_poly1305_testvecs() 223elif alg == 'polyval': 224 gen_unkeyed_testvecs(alg) 225 gen_additional_polyval_testvecs() 226elif alg == 'sha3': 227 print() 228 print('/* SHA3-256 test vectors */') 229 gen_unkeyed_testvecs('sha3-256') 230 print() 231 print('/* SHAKE test vectors */') 232 gen_additional_sha3_testvecs() 233else: 234 gen_unkeyed_testvecs(alg) 235 gen_hmac_testvecs(alg) 236