xref: /linux/scripts/crypto/gen-hash-testvecs.py (revision b2210f35161d6202fcca4244800a1d54c80e8bc1)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-or-later
3#
4# Script that generates test vectors for the given cryptographic hash function.
5#
6# Copyright 2025 Google LLC
7
8import hashlib
9import hmac
10import sys
11
12DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511,
13             513, 1000, 3333, 4096, 4128, 4160, 4224, 16384]
14
15# Generate the given number of random bytes, using the length itself as the seed
16# for a simple linear congruential generator (LCG).  The C test code uses the
17# same LCG with the same seeding strategy to reconstruct the data, ensuring
18# reproducibility without explicitly storing the data in the test vectors.
19def rand_bytes(length):
20    seed = length
21    out = []
22    for _ in range(length):
23        seed = (seed * 25214903917 + 11) % 2**48
24        out.append((seed >> 16) % 256)
25    return bytes(out)
26
27POLY1305_KEY_SIZE = 32
28
29# A straightforward, unoptimized implementation of Poly1305.
30# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf
31class Poly1305:
32    def __init__(self, key):
33        assert len(key) == POLY1305_KEY_SIZE
34        self.h = 0
35        rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff
36        self.r = int.from_bytes(key[:16], byteorder='little') & rclamp
37        self.s = int.from_bytes(key[16:], byteorder='little')
38
39    # Note: this supports partial blocks only at the end.
40    def update(self, data):
41        for i in range(0, len(data), 16):
42            chunk = data[i:i+16]
43            c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk))
44            self.h = ((self.h + c) * self.r) % (2**130 - 5)
45        return self
46
47    # Note: gen_additional_poly1305_testvecs() relies on this being
48    # nondestructive, i.e. not changing any field of self.
49    def digest(self):
50        m = (self.h + self.s) % 2**128
51        return m.to_bytes(16, byteorder='little')
52
53def hash_init(alg):
54    if alg == 'poly1305':
55        # Use a fixed random key here, to present Poly1305 as an unkeyed hash.
56        # This allows all the test cases for unkeyed hashes to work on Poly1305.
57        return Poly1305(rand_bytes(POLY1305_KEY_SIZE))
58    return hashlib.new(alg)
59
60def hash_update(ctx, data):
61    ctx.update(data)
62
63def hash_final(ctx):
64    return ctx.digest()
65
66def compute_hash(alg, data):
67    ctx = hash_init(alg)
68    hash_update(ctx, data)
69    return hash_final(ctx)
70
71def print_bytes(prefix, value, bytes_per_line):
72    for i in range(0, len(value), bytes_per_line):
73        line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line])
74        print(f'{line.rstrip()}')
75
76def print_static_u8_array_definition(name, value):
77    print('')
78    print(f'static const u8 {name} = {{')
79    print_bytes('\t', value, 8)
80    print('};')
81
82def print_c_struct_u8_array_field(name, value):
83    print(f'\t\t.{name} = {{')
84    print_bytes('\t\t\t', value, 8)
85    print('\t\t},')
86
87def alg_digest_size_const(alg):
88    if alg.startswith('blake2'):
89        return f'{alg.upper()}_HASH_SIZE'
90    return f'{alg.upper().replace('-', '_')}_DIGEST_SIZE'
91
92def gen_unkeyed_testvecs(alg):
93    print('')
94    print('static const struct {')
95    print('\tsize_t data_len;')
96    print(f'\tu8 digest[{alg_digest_size_const(alg)}];')
97    print('} hash_testvecs[] = {')
98    for data_len in DATA_LENS:
99        data = rand_bytes(data_len)
100        print('\t{')
101        print(f'\t\t.data_len = {data_len},')
102        print_c_struct_u8_array_field('digest', compute_hash(alg, data))
103        print('\t},')
104    print('};')
105
106    data = rand_bytes(4096)
107    ctx = hash_init(alg)
108    for data_len in range(len(data) + 1):
109        hash_update(ctx, compute_hash(alg, data[:data_len]))
110    print_static_u8_array_definition(
111            f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]',
112            hash_final(ctx))
113
114def gen_additional_sha3_testvecs():
115    max_len = 4096
116    in_data = rand_bytes(max_len)
117    for alg in ['shake128', 'shake256']:
118        ctx = hashlib.new('sha3-256')
119        for in_len in range(max_len + 1):
120            out_len = (in_len * 293) % (max_len + 1)
121            out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len)
122            ctx.update(out)
123        print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]',
124                                         ctx.digest())
125
126def gen_hmac_testvecs(alg):
127    ctx = hmac.new(rand_bytes(32), digestmod=alg)
128    data = rand_bytes(4096)
129    for data_len in range(len(data) + 1):
130        ctx.update(data[:data_len])
131        key_len = data_len % 293
132        key = rand_bytes(key_len)
133        mac = hmac.digest(key, data[:data_len], alg)
134        ctx.update(mac)
135    print_static_u8_array_definition(
136            f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]',
137            ctx.digest())
138
139def gen_additional_blake2_testvecs(alg):
140    if alg == 'blake2s':
141        (max_key_size, max_hash_size) = (32, 32)
142    elif alg == 'blake2b':
143        (max_key_size, max_hash_size) = (64, 64)
144    else:
145        raise ValueError(f'Unsupported alg: {alg}')
146    hashes = b''
147    for key_len in range(max_key_size + 1):
148        for out_len in range(1, max_hash_size + 1):
149            h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len))
150            h.update(rand_bytes(100))
151            hashes += h.digest()
152    print_static_u8_array_definition(
153            f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]',
154            compute_hash(alg, hashes))
155
156def gen_additional_poly1305_testvecs():
157    key = b'\xff' * POLY1305_KEY_SIZE
158    data = b''
159    ctx = Poly1305(key)
160    for _ in range(32):
161        for j in range(0, 4097, 16):
162            ctx.update(b'\xff' * j)
163            data += ctx.digest()
164    print_static_u8_array_definition(
165            'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]',
166            Poly1305(key).update(data).digest())
167
168if len(sys.argv) != 2:
169    sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n')
170    sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305 or sha3.\n')
171    sys.stderr.write('Example: gen-hash-testvecs.py sha512\n')
172    sys.exit(1)
173
174alg = sys.argv[1]
175print('/* SPDX-License-Identifier: GPL-2.0-or-later */')
176print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */')
177if alg.startswith('blake2'):
178    gen_unkeyed_testvecs(alg)
179    gen_additional_blake2_testvecs(alg)
180elif alg == 'poly1305':
181    gen_unkeyed_testvecs(alg)
182    gen_additional_poly1305_testvecs()
183elif alg == 'sha3':
184    print()
185    print('/* SHA3-256 test vectors */')
186    gen_unkeyed_testvecs('sha3-256')
187    print()
188    print('/* SHAKE test vectors */')
189    gen_additional_sha3_testvecs()
190else:
191    gen_unkeyed_testvecs(alg)
192    gen_hmac_testvecs(alg)
193