1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0-or-later 3# 4# Script that generates test vectors for the given hash function. 5# 6# Requires that python-cryptography be installed. 7# 8# Copyright 2025 Google LLC 9 10import cryptography.hazmat.primitives.ciphers 11import cryptography.hazmat.primitives.cmac 12import hashlib 13import hmac 14import sys 15 16DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511, 17 513, 1000, 3333, 4096, 4128, 4160, 4224, 16384] 18 19# Generate the given number of random bytes, using the length itself as the seed 20# for a simple linear congruential generator (LCG). The C test code uses the 21# same LCG with the same seeding strategy to reconstruct the data, ensuring 22# reproducibility without explicitly storing the data in the test vectors. 23def rand_bytes(length): 24 seed = length 25 out = [] 26 for _ in range(length): 27 seed = (seed * 25214903917 + 11) % 2**48 28 out.append((seed >> 16) % 256) 29 return bytes(out) 30 31AES_256_KEY_SIZE = 32 32 33# AES-CMAC. Just wraps the implementation from python-cryptography. 34class AesCmac: 35 def __init__(self, key): 36 aes = cryptography.hazmat.primitives.ciphers.algorithms.AES(key) 37 self.cmac = cryptography.hazmat.primitives.cmac.CMAC(aes) 38 39 def update(self, data): 40 self.cmac.update(data) 41 42 def digest(self): 43 return self.cmac.finalize() 44 45POLY1305_KEY_SIZE = 32 46 47# A straightforward, unoptimized implementation of Poly1305. 48# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf 49class Poly1305: 50 def __init__(self, key): 51 assert len(key) == POLY1305_KEY_SIZE 52 self.h = 0 53 rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff 54 self.r = int.from_bytes(key[:16], byteorder='little') & rclamp 55 self.s = int.from_bytes(key[16:], byteorder='little') 56 57 # Note: this supports partial blocks only at the end. 58 def update(self, data): 59 for i in range(0, len(data), 16): 60 chunk = data[i:i+16] 61 c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk)) 62 self.h = ((self.h + c) * self.r) % (2**130 - 5) 63 return self 64 65 # Note: gen_additional_poly1305_testvecs() relies on this being 66 # nondestructive, i.e. not changing any field of self. 67 def digest(self): 68 m = (self.h + self.s) % 2**128 69 return m.to_bytes(16, byteorder='little') 70 71GHASH_POLY = sum((1 << i) for i in [128, 7, 2, 1, 0]) 72GHASH_BLOCK_SIZE = 16 73 74# A straightforward, unoptimized implementation of GHASH. 75class Ghash: 76 77 @staticmethod 78 def reflect_bits_in_bytes(v): 79 res = 0 80 for offs in range(0, 128, 8): 81 for bit in range(8): 82 if (v & (1 << (offs + bit))) != 0: 83 res ^= 1 << (offs + 7 - bit) 84 return res 85 86 @staticmethod 87 def bytes_to_poly(data): 88 return Ghash.reflect_bits_in_bytes(int.from_bytes(data, byteorder='little')) 89 90 @staticmethod 91 def poly_to_bytes(poly): 92 return Ghash.reflect_bits_in_bytes(poly).to_bytes(16, byteorder='little') 93 94 def __init__(self, key): 95 assert len(key) == 16 96 self.h = Ghash.bytes_to_poly(key) 97 self.acc = 0 98 99 # Note: this supports partial blocks only at the end. 100 def update(self, data): 101 for i in range(0, len(data), 16): 102 # acc += block 103 self.acc ^= Ghash.bytes_to_poly(data[i:i+16]) 104 # acc = (acc * h) mod GHASH_POLY 105 product = 0 106 for j in range(127, -1, -1): 107 if (self.h & (1 << j)) != 0: 108 product ^= self.acc << j 109 if (product & (1 << (128 + j))) != 0: 110 product ^= GHASH_POLY << j 111 self.acc = product 112 return self 113 114 def digest(self): 115 return Ghash.poly_to_bytes(self.acc) 116 117POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) 118POLYVAL_BLOCK_SIZE = 16 119 120# A straightforward, unoptimized implementation of POLYVAL. 121# Reference: https://datatracker.ietf.org/doc/html/rfc8452 122class Polyval: 123 def __init__(self, key): 124 assert len(key) == 16 125 self.h = int.from_bytes(key, byteorder='little') 126 self.acc = 0 127 128 # Note: this supports partial blocks only at the end. 129 def update(self, data): 130 for i in range(0, len(data), 16): 131 # acc += block 132 self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') 133 # acc = (acc * h * x^-128) mod POLYVAL_POLY 134 product = 0 135 for j in range(128): 136 if (self.h & (1 << j)) != 0: 137 product ^= self.acc << j 138 if (product & (1 << j)) != 0: 139 product ^= POLYVAL_POLY << j 140 self.acc = product >> 128 141 return self 142 143 def digest(self): 144 return self.acc.to_bytes(16, byteorder='little') 145 146def hash_init(alg): 147 # The keyed hash functions are assigned a fixed random key here, to present 148 # them as unkeyed hash functions. This allows all the test cases for 149 # unkeyed hash functions to work on them. 150 if alg == 'aes-cmac': 151 return AesCmac(rand_bytes(AES_256_KEY_SIZE)) 152 if alg == 'ghash': 153 return Ghash(rand_bytes(GHASH_BLOCK_SIZE)) 154 if alg == 'poly1305': 155 return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) 156 if alg == 'polyval': 157 return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) 158 return hashlib.new(alg) 159 160def hash_update(ctx, data): 161 ctx.update(data) 162 163def hash_final(ctx): 164 return ctx.digest() 165 166def compute_hash(alg, data): 167 ctx = hash_init(alg) 168 hash_update(ctx, data) 169 return hash_final(ctx) 170 171def print_bytes(prefix, value, bytes_per_line): 172 for i in range(0, len(value), bytes_per_line): 173 line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line]) 174 print(f'{line.rstrip()}') 175 176def print_static_u8_array_definition(name, value): 177 print('') 178 print(f'static const u8 {name} = {{') 179 print_bytes('\t', value, 8) 180 print('};') 181 182def print_c_struct_u8_array_field(name, value): 183 print(f'\t\t.{name} = {{') 184 print_bytes('\t\t\t', value, 8) 185 print('\t\t},') 186 187def alg_digest_size_const(alg): 188 if alg == 'aes-cmac': 189 return 'AES_BLOCK_SIZE' 190 if alg.startswith('blake2'): 191 return f'{alg.upper()}_HASH_SIZE' 192 return f"{alg.upper().replace('-', '_')}_DIGEST_SIZE" 193 194def gen_unkeyed_testvecs(alg): 195 print('') 196 print('static const struct {') 197 print('\tsize_t data_len;') 198 print(f'\tu8 digest[{alg_digest_size_const(alg)}];') 199 print('} hash_testvecs[] = {') 200 for data_len in DATA_LENS: 201 data = rand_bytes(data_len) 202 print('\t{') 203 print(f'\t\t.data_len = {data_len},') 204 print_c_struct_u8_array_field('digest', compute_hash(alg, data)) 205 print('\t},') 206 print('};') 207 208 data = rand_bytes(4096) 209 ctx = hash_init(alg) 210 for data_len in range(len(data) + 1): 211 hash_update(ctx, compute_hash(alg, data[:data_len])) 212 print_static_u8_array_definition( 213 f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', 214 hash_final(ctx)) 215 216def gen_additional_sha3_testvecs(): 217 max_len = 4096 218 in_data = rand_bytes(max_len) 219 for alg in ['shake128', 'shake256']: 220 ctx = hashlib.new('sha3-256') 221 for in_len in range(max_len + 1): 222 out_len = (in_len * 293) % (max_len + 1) 223 out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) 224 ctx.update(out) 225 print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', 226 ctx.digest()) 227 228def gen_hmac_testvecs(alg): 229 ctx = hmac.new(rand_bytes(32), digestmod=alg) 230 data = rand_bytes(4096) 231 for data_len in range(len(data) + 1): 232 ctx.update(data[:data_len]) 233 key_len = data_len % 293 234 key = rand_bytes(key_len) 235 mac = hmac.digest(key, data[:data_len], alg) 236 ctx.update(mac) 237 print_static_u8_array_definition( 238 f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', 239 ctx.digest()) 240 241def gen_additional_blake2_testvecs(alg): 242 if alg == 'blake2s': 243 (max_key_size, max_hash_size) = (32, 32) 244 elif alg == 'blake2b': 245 (max_key_size, max_hash_size) = (64, 64) 246 else: 247 raise ValueError(f'Unsupported alg: {alg}') 248 hashes = b'' 249 for key_len in range(max_key_size + 1): 250 for out_len in range(1, max_hash_size + 1): 251 h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) 252 h.update(rand_bytes(100)) 253 hashes += h.digest() 254 print_static_u8_array_definition( 255 f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', 256 compute_hash(alg, hashes)) 257 258def nh_extract_int(bytestr, pos, length): 259 assert pos % 8 == 0 and length % 8 == 0 260 return int.from_bytes(bytestr[pos//8 : pos//8 + length//8], byteorder='little') 261 262# The NH "almost-universal hash function" used in Adiantum. This is a 263# straightforward translation of the pseudocode from Section 6.3 of the Adiantum 264# paper (https://eprint.iacr.org/2018/720.pdf), except the outer loop is omitted 265# because we assume len(msg) <= 1024. (The kernel's nh() function is only 266# expected to handle up to 1024 bytes; it's just called repeatedly as needed.) 267def nh(key, msg): 268 (w, s, r, u) = (32, 2, 4, 8192) 269 l = 8 * len(msg) 270 assert l <= u 271 assert l % (2*s*w) == 0 272 h = bytes() 273 for i in range(0, 2*s*w*r, 2*s*w): 274 p = 0 275 for j in range(0, l, 2*s*w): 276 for k in range(0, w*s, w): 277 a0 = nh_extract_int(key, i + j + k, w) 278 a1 = nh_extract_int(key, i + j + k + s*w, w) 279 b0 = nh_extract_int(msg, j + k, w) 280 b1 = nh_extract_int(msg, j + k + s*w, w) 281 p += ((a0 + b0) % 2**w) * ((a1 + b1) % 2**w) 282 h += (p % 2**64).to_bytes(8, byteorder='little') 283 return h 284 285def gen_nh_testvecs(): 286 NH_KEY_BYTES = 1072 287 NH_MESSAGE_BYTES = 1024 288 key = rand_bytes(NH_KEY_BYTES) 289 msg = rand_bytes(NH_MESSAGE_BYTES) 290 print_static_u8_array_definition('nh_test_key[NH_KEY_BYTES]', key) 291 print_static_u8_array_definition('nh_test_msg[NH_MESSAGE_BYTES]', msg) 292 for length in [16, 96, 256, 1024]: 293 print_static_u8_array_definition(f'nh_test_val{length}[NH_HASH_BYTES]', 294 nh(key, msg[:length])) 295 296def gen_additional_poly1305_testvecs(): 297 key = b'\xff' * POLY1305_KEY_SIZE 298 data = b'' 299 ctx = Poly1305(key) 300 for _ in range(32): 301 for j in range(0, 4097, 16): 302 ctx.update(b'\xff' * j) 303 data += ctx.digest() 304 print_static_u8_array_definition( 305 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', 306 Poly1305(key).update(data).digest()) 307 308def gen_additional_ghash_testvecs(): 309 key = b'\xff' * GHASH_BLOCK_SIZE 310 hashes = b'' 311 for data_len in range(0, 4097, 16): 312 hashes += Ghash(key).update(b'\xff' * data_len).digest() 313 print_static_u8_array_definition( 314 'ghash_allones_hashofhashes[GHASH_DIGEST_SIZE]', 315 Ghash(key).update(hashes).digest()) 316 317def gen_additional_polyval_testvecs(): 318 key = b'\xff' * POLYVAL_BLOCK_SIZE 319 hashes = b'' 320 for data_len in range(0, 4097, 16): 321 hashes += Polyval(key).update(b'\xff' * data_len).digest() 322 print_static_u8_array_definition( 323 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', 324 Polyval(key).update(hashes).digest()) 325 326if len(sys.argv) != 2: 327 sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') 328 sys.stderr.write('ALGORITHM may be any supported by Python hashlib;\n') 329 sys.stderr.write(' or aes-cmac, ghash, nh, poly1305, polyval, or sha3.\n') 330 sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') 331 sys.exit(1) 332 333alg = sys.argv[1] 334print('/* SPDX-License-Identifier: GPL-2.0-or-later */') 335print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') 336if alg == 'aes-cmac': 337 gen_unkeyed_testvecs(alg) 338elif alg.startswith('blake2'): 339 gen_unkeyed_testvecs(alg) 340 gen_additional_blake2_testvecs(alg) 341elif alg == 'ghash': 342 gen_unkeyed_testvecs(alg) 343 gen_additional_ghash_testvecs() 344elif alg == 'nh': 345 gen_nh_testvecs() 346elif alg == 'poly1305': 347 gen_unkeyed_testvecs(alg) 348 gen_additional_poly1305_testvecs() 349elif alg == 'polyval': 350 gen_unkeyed_testvecs(alg) 351 gen_additional_polyval_testvecs() 352elif alg == 'sha3': 353 print() 354 print('/* SHA3-256 test vectors */') 355 gen_unkeyed_testvecs('sha3-256') 356 print() 357 print('/* SHAKE test vectors */') 358 gen_additional_sha3_testvecs() 359elif alg == 'sm3': 360 gen_unkeyed_testvecs(alg) 361 # Kernel doesn't implement HMAC-SM3 library functions yet. 362else: 363 gen_unkeyed_testvecs(alg) 364 gen_hmac_testvecs(alg) 365