1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0-or-later 3# 4# Script that generates test vectors for the given hash function. 5# 6# Requires that python-cryptography be installed. 7# 8# Copyright 2025 Google LLC 9 10import cryptography.hazmat.primitives.ciphers 11import cryptography.hazmat.primitives.cmac 12import hashlib 13import hmac 14import sys 15 16DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511, 17 513, 1000, 3333, 4096, 4128, 4160, 4224, 16384] 18 19# Generate the given number of random bytes, using the length itself as the seed 20# for a simple linear congruential generator (LCG). The C test code uses the 21# same LCG with the same seeding strategy to reconstruct the data, ensuring 22# reproducibility without explicitly storing the data in the test vectors. 23def rand_bytes(length): 24 seed = length 25 out = [] 26 for _ in range(length): 27 seed = (seed * 25214903917 + 11) % 2**48 28 out.append((seed >> 16) % 256) 29 return bytes(out) 30 31AES_256_KEY_SIZE = 32 32 33# AES-CMAC. Just wraps the implementation from python-cryptography. 34class AesCmac: 35 def __init__(self, key): 36 aes = cryptography.hazmat.primitives.ciphers.algorithms.AES(key) 37 self.cmac = cryptography.hazmat.primitives.cmac.CMAC(aes) 38 39 def update(self, data): 40 self.cmac.update(data) 41 42 def digest(self): 43 return self.cmac.finalize() 44 45POLY1305_KEY_SIZE = 32 46 47# A straightforward, unoptimized implementation of Poly1305. 48# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf 49class Poly1305: 50 def __init__(self, key): 51 assert len(key) == POLY1305_KEY_SIZE 52 self.h = 0 53 rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff 54 self.r = int.from_bytes(key[:16], byteorder='little') & rclamp 55 self.s = int.from_bytes(key[16:], byteorder='little') 56 57 # Note: this supports partial blocks only at the end. 58 def update(self, data): 59 for i in range(0, len(data), 16): 60 chunk = data[i:i+16] 61 c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk)) 62 self.h = ((self.h + c) * self.r) % (2**130 - 5) 63 return self 64 65 # Note: gen_additional_poly1305_testvecs() relies on this being 66 # nondestructive, i.e. not changing any field of self. 67 def digest(self): 68 m = (self.h + self.s) % 2**128 69 return m.to_bytes(16, byteorder='little') 70 71POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) 72POLYVAL_BLOCK_SIZE = 16 73 74# A straightforward, unoptimized implementation of POLYVAL. 75# Reference: https://datatracker.ietf.org/doc/html/rfc8452 76class Polyval: 77 def __init__(self, key): 78 assert len(key) == 16 79 self.h = int.from_bytes(key, byteorder='little') 80 self.acc = 0 81 82 # Note: this supports partial blocks only at the end. 83 def update(self, data): 84 for i in range(0, len(data), 16): 85 # acc += block 86 self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') 87 # acc = (acc * h * x^-128) mod POLYVAL_POLY 88 product = 0 89 for j in range(128): 90 if (self.h & (1 << j)) != 0: 91 product ^= self.acc << j 92 if (product & (1 << j)) != 0: 93 product ^= POLYVAL_POLY << j 94 self.acc = product >> 128 95 return self 96 97 def digest(self): 98 return self.acc.to_bytes(16, byteorder='little') 99 100def hash_init(alg): 101 # The keyed hash functions are assigned a fixed random key here, to present 102 # them as unkeyed hash functions. This allows all the test cases for 103 # unkeyed hash functions to work on them. 104 if alg == 'aes-cmac': 105 return AesCmac(rand_bytes(AES_256_KEY_SIZE)) 106 if alg == 'poly1305': 107 return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) 108 if alg == 'polyval': 109 return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) 110 return hashlib.new(alg) 111 112def hash_update(ctx, data): 113 ctx.update(data) 114 115def hash_final(ctx): 116 return ctx.digest() 117 118def compute_hash(alg, data): 119 ctx = hash_init(alg) 120 hash_update(ctx, data) 121 return hash_final(ctx) 122 123def print_bytes(prefix, value, bytes_per_line): 124 for i in range(0, len(value), bytes_per_line): 125 line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line]) 126 print(f'{line.rstrip()}') 127 128def print_static_u8_array_definition(name, value): 129 print('') 130 print(f'static const u8 {name} = {{') 131 print_bytes('\t', value, 8) 132 print('};') 133 134def print_c_struct_u8_array_field(name, value): 135 print(f'\t\t.{name} = {{') 136 print_bytes('\t\t\t', value, 8) 137 print('\t\t},') 138 139def alg_digest_size_const(alg): 140 if alg == 'aes-cmac': 141 return 'AES_BLOCK_SIZE' 142 if alg.startswith('blake2'): 143 return f'{alg.upper()}_HASH_SIZE' 144 return f"{alg.upper().replace('-', '_')}_DIGEST_SIZE" 145 146def gen_unkeyed_testvecs(alg): 147 print('') 148 print('static const struct {') 149 print('\tsize_t data_len;') 150 print(f'\tu8 digest[{alg_digest_size_const(alg)}];') 151 print('} hash_testvecs[] = {') 152 for data_len in DATA_LENS: 153 data = rand_bytes(data_len) 154 print('\t{') 155 print(f'\t\t.data_len = {data_len},') 156 print_c_struct_u8_array_field('digest', compute_hash(alg, data)) 157 print('\t},') 158 print('};') 159 160 data = rand_bytes(4096) 161 ctx = hash_init(alg) 162 for data_len in range(len(data) + 1): 163 hash_update(ctx, compute_hash(alg, data[:data_len])) 164 print_static_u8_array_definition( 165 f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', 166 hash_final(ctx)) 167 168def gen_additional_sha3_testvecs(): 169 max_len = 4096 170 in_data = rand_bytes(max_len) 171 for alg in ['shake128', 'shake256']: 172 ctx = hashlib.new('sha3-256') 173 for in_len in range(max_len + 1): 174 out_len = (in_len * 293) % (max_len + 1) 175 out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) 176 ctx.update(out) 177 print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', 178 ctx.digest()) 179 180def gen_hmac_testvecs(alg): 181 ctx = hmac.new(rand_bytes(32), digestmod=alg) 182 data = rand_bytes(4096) 183 for data_len in range(len(data) + 1): 184 ctx.update(data[:data_len]) 185 key_len = data_len % 293 186 key = rand_bytes(key_len) 187 mac = hmac.digest(key, data[:data_len], alg) 188 ctx.update(mac) 189 print_static_u8_array_definition( 190 f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', 191 ctx.digest()) 192 193def gen_additional_blake2_testvecs(alg): 194 if alg == 'blake2s': 195 (max_key_size, max_hash_size) = (32, 32) 196 elif alg == 'blake2b': 197 (max_key_size, max_hash_size) = (64, 64) 198 else: 199 raise ValueError(f'Unsupported alg: {alg}') 200 hashes = b'' 201 for key_len in range(max_key_size + 1): 202 for out_len in range(1, max_hash_size + 1): 203 h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) 204 h.update(rand_bytes(100)) 205 hashes += h.digest() 206 print_static_u8_array_definition( 207 f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', 208 compute_hash(alg, hashes)) 209 210def nh_extract_int(bytestr, pos, length): 211 assert pos % 8 == 0 and length % 8 == 0 212 return int.from_bytes(bytestr[pos//8 : pos//8 + length//8], byteorder='little') 213 214# The NH "almost-universal hash function" used in Adiantum. This is a 215# straightforward translation of the pseudocode from Section 6.3 of the Adiantum 216# paper (https://eprint.iacr.org/2018/720.pdf), except the outer loop is omitted 217# because we assume len(msg) <= 1024. (The kernel's nh() function is only 218# expected to handle up to 1024 bytes; it's just called repeatedly as needed.) 219def nh(key, msg): 220 (w, s, r, u) = (32, 2, 4, 8192) 221 l = 8 * len(msg) 222 assert l <= u 223 assert l % (2*s*w) == 0 224 h = bytes() 225 for i in range(0, 2*s*w*r, 2*s*w): 226 p = 0 227 for j in range(0, l, 2*s*w): 228 for k in range(0, w*s, w): 229 a0 = nh_extract_int(key, i + j + k, w) 230 a1 = nh_extract_int(key, i + j + k + s*w, w) 231 b0 = nh_extract_int(msg, j + k, w) 232 b1 = nh_extract_int(msg, j + k + s*w, w) 233 p += ((a0 + b0) % 2**w) * ((a1 + b1) % 2**w) 234 h += (p % 2**64).to_bytes(8, byteorder='little') 235 return h 236 237def gen_nh_testvecs(): 238 NH_KEY_BYTES = 1072 239 NH_MESSAGE_BYTES = 1024 240 key = rand_bytes(NH_KEY_BYTES) 241 msg = rand_bytes(NH_MESSAGE_BYTES) 242 print_static_u8_array_definition('nh_test_key[NH_KEY_BYTES]', key) 243 print_static_u8_array_definition('nh_test_msg[NH_MESSAGE_BYTES]', msg) 244 for length in [16, 96, 256, 1024]: 245 print_static_u8_array_definition(f'nh_test_val{length}[NH_HASH_BYTES]', 246 nh(key, msg[:length])) 247 248def gen_additional_poly1305_testvecs(): 249 key = b'\xff' * POLY1305_KEY_SIZE 250 data = b'' 251 ctx = Poly1305(key) 252 for _ in range(32): 253 for j in range(0, 4097, 16): 254 ctx.update(b'\xff' * j) 255 data += ctx.digest() 256 print_static_u8_array_definition( 257 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', 258 Poly1305(key).update(data).digest()) 259 260def gen_additional_polyval_testvecs(): 261 key = b'\xff' * POLYVAL_BLOCK_SIZE 262 hashes = b'' 263 for data_len in range(0, 4097, 16): 264 hashes += Polyval(key).update(b'\xff' * data_len).digest() 265 print_static_u8_array_definition( 266 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', 267 Polyval(key).update(hashes).digest()) 268 269if len(sys.argv) != 2: 270 sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') 271 sys.stderr.write('ALGORITHM may be any supported by Python hashlib; or poly1305, polyval, or sha3.\n') 272 sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') 273 sys.exit(1) 274 275alg = sys.argv[1] 276print('/* SPDX-License-Identifier: GPL-2.0-or-later */') 277print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') 278if alg == 'aes-cmac': 279 gen_unkeyed_testvecs(alg) 280elif alg.startswith('blake2'): 281 gen_unkeyed_testvecs(alg) 282 gen_additional_blake2_testvecs(alg) 283elif alg == 'nh': 284 gen_nh_testvecs() 285elif alg == 'poly1305': 286 gen_unkeyed_testvecs(alg) 287 gen_additional_poly1305_testvecs() 288elif alg == 'polyval': 289 gen_unkeyed_testvecs(alg) 290 gen_additional_polyval_testvecs() 291elif alg == 'sha3': 292 print() 293 print('/* SHA3-256 test vectors */') 294 gen_unkeyed_testvecs('sha3-256') 295 print() 296 print('/* SHAKE test vectors */') 297 gen_additional_sha3_testvecs() 298else: 299 gen_unkeyed_testvecs(alg) 300 gen_hmac_testvecs(alg) 301