xref: /linux/scripts/crypto/gen-hash-testvecs.py (revision d6781b8ba33ae9f6ab2e88c1158e989a24847c4b)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-or-later
3#
4# Script that generates test vectors for the given hash function.
5#
6# Requires that python-cryptography be installed.
7#
8# Copyright 2025 Google LLC
9
10import cryptography.hazmat.primitives.ciphers
11import cryptography.hazmat.primitives.cmac
12import hashlib
13import hmac
14import sys
15
16DATA_LENS = [0, 1, 2, 3, 16, 32, 48, 49, 63, 64, 65, 127, 128, 129, 256, 511,
17             513, 1000, 3333, 4096, 4128, 4160, 4224, 16384]
18
19# Generate the given number of random bytes, using the length itself as the seed
20# for a simple linear congruential generator (LCG).  The C test code uses the
21# same LCG with the same seeding strategy to reconstruct the data, ensuring
22# reproducibility without explicitly storing the data in the test vectors.
23def rand_bytes(length):
24    seed = length
25    out = []
26    for _ in range(length):
27        seed = (seed * 25214903917 + 11) % 2**48
28        out.append((seed >> 16) % 256)
29    return bytes(out)
30
31AES_256_KEY_SIZE = 32
32
33# AES-CMAC.  Just wraps the implementation from python-cryptography.
34class AesCmac:
35    def __init__(self, key):
36        aes = cryptography.hazmat.primitives.ciphers.algorithms.AES(key)
37        self.cmac = cryptography.hazmat.primitives.cmac.CMAC(aes)
38
39    def update(self, data):
40        self.cmac.update(data)
41
42    def digest(self):
43        return self.cmac.finalize()
44
45POLY1305_KEY_SIZE = 32
46
47# A straightforward, unoptimized implementation of Poly1305.
48# Reference: https://cr.yp.to/mac/poly1305-20050329.pdf
49class Poly1305:
50    def __init__(self, key):
51        assert len(key) == POLY1305_KEY_SIZE
52        self.h = 0
53        rclamp = 0x0ffffffc0ffffffc0ffffffc0fffffff
54        self.r = int.from_bytes(key[:16], byteorder='little') & rclamp
55        self.s = int.from_bytes(key[16:], byteorder='little')
56
57    # Note: this supports partial blocks only at the end.
58    def update(self, data):
59        for i in range(0, len(data), 16):
60            chunk = data[i:i+16]
61            c = int.from_bytes(chunk, byteorder='little') + 2**(8 * len(chunk))
62            self.h = ((self.h + c) * self.r) % (2**130 - 5)
63        return self
64
65    # Note: gen_additional_poly1305_testvecs() relies on this being
66    # nondestructive, i.e. not changing any field of self.
67    def digest(self):
68        m = (self.h + self.s) % 2**128
69        return m.to_bytes(16, byteorder='little')
70
71GHASH_POLY = sum((1 << i) for i in [128, 7, 2, 1, 0])
72GHASH_BLOCK_SIZE = 16
73
74# A straightforward, unoptimized implementation of GHASH.
75class Ghash:
76
77    @staticmethod
78    def reflect_bits_in_bytes(v):
79        res = 0
80        for offs in range(0, 128, 8):
81            for bit in range(8):
82                if (v & (1 << (offs + bit))) != 0:
83                    res ^= 1 << (offs + 7 - bit)
84        return res
85
86    @staticmethod
87    def bytes_to_poly(data):
88        return Ghash.reflect_bits_in_bytes(int.from_bytes(data, byteorder='little'))
89
90    @staticmethod
91    def poly_to_bytes(poly):
92        return Ghash.reflect_bits_in_bytes(poly).to_bytes(16, byteorder='little')
93
94    def __init__(self, key):
95        assert len(key) == 16
96        self.h = Ghash.bytes_to_poly(key)
97        self.acc = 0
98
99    # Note: this supports partial blocks only at the end.
100    def update(self, data):
101        for i in range(0, len(data), 16):
102            # acc += block
103            self.acc ^= Ghash.bytes_to_poly(data[i:i+16])
104            # acc = (acc * h) mod GHASH_POLY
105            product = 0
106            for j in range(127, -1, -1):
107                if (self.h & (1 << j)) != 0:
108                    product ^= self.acc << j
109                if (product & (1 << (128 + j))) != 0:
110                    product ^= GHASH_POLY << j
111            self.acc = product
112        return self
113
114    def digest(self):
115        return Ghash.poly_to_bytes(self.acc)
116
117POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0])
118POLYVAL_BLOCK_SIZE = 16
119
120# A straightforward, unoptimized implementation of POLYVAL.
121# Reference: https://datatracker.ietf.org/doc/html/rfc8452
122class Polyval:
123    def __init__(self, key):
124        assert len(key) == 16
125        self.h = int.from_bytes(key, byteorder='little')
126        self.acc = 0
127
128    # Note: this supports partial blocks only at the end.
129    def update(self, data):
130        for i in range(0, len(data), 16):
131            # acc += block
132            self.acc ^= int.from_bytes(data[i:i+16], byteorder='little')
133            # acc = (acc * h * x^-128) mod POLYVAL_POLY
134            product = 0
135            for j in range(128):
136                if (self.h & (1 << j)) != 0:
137                    product ^= self.acc << j
138                if (product & (1 << j)) != 0:
139                    product ^= POLYVAL_POLY << j
140            self.acc = product >> 128
141        return self
142
143    def digest(self):
144        return self.acc.to_bytes(16, byteorder='little')
145
146def hash_init(alg):
147    # The keyed hash functions are assigned a fixed random key here, to present
148    # them as unkeyed hash functions.  This allows all the test cases for
149    # unkeyed hash functions to work on them.
150    if alg == 'aes-cmac':
151        return AesCmac(rand_bytes(AES_256_KEY_SIZE))
152    if alg == 'ghash':
153        return Ghash(rand_bytes(GHASH_BLOCK_SIZE))
154    if alg == 'poly1305':
155        return Poly1305(rand_bytes(POLY1305_KEY_SIZE))
156    if alg == 'polyval':
157        return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE))
158    return hashlib.new(alg)
159
160def hash_update(ctx, data):
161    ctx.update(data)
162
163def hash_final(ctx):
164    return ctx.digest()
165
166def compute_hash(alg, data):
167    ctx = hash_init(alg)
168    hash_update(ctx, data)
169    return hash_final(ctx)
170
171def print_bytes(prefix, value, bytes_per_line):
172    for i in range(0, len(value), bytes_per_line):
173        line = prefix + ''.join(f'0x{b:02x}, ' for b in value[i:i+bytes_per_line])
174        print(f'{line.rstrip()}')
175
176def print_static_u8_array_definition(name, value):
177    print('')
178    print(f'static const u8 {name} = {{')
179    print_bytes('\t', value, 8)
180    print('};')
181
182def print_c_struct_u8_array_field(name, value):
183    print(f'\t\t.{name} = {{')
184    print_bytes('\t\t\t', value, 8)
185    print('\t\t},')
186
187def alg_digest_size_const(alg):
188    if alg == 'aes-cmac':
189        return 'AES_BLOCK_SIZE'
190    if alg.startswith('blake2'):
191        return f'{alg.upper()}_HASH_SIZE'
192    return f"{alg.upper().replace('-', '_')}_DIGEST_SIZE"
193
194def gen_unkeyed_testvecs(alg):
195    print('')
196    print('static const struct {')
197    print('\tsize_t data_len;')
198    print(f'\tu8 digest[{alg_digest_size_const(alg)}];')
199    print('} hash_testvecs[] = {')
200    for data_len in DATA_LENS:
201        data = rand_bytes(data_len)
202        print('\t{')
203        print(f'\t\t.data_len = {data_len},')
204        print_c_struct_u8_array_field('digest', compute_hash(alg, data))
205        print('\t},')
206    print('};')
207
208    data = rand_bytes(4096)
209    ctx = hash_init(alg)
210    for data_len in range(len(data) + 1):
211        hash_update(ctx, compute_hash(alg, data[:data_len]))
212    print_static_u8_array_definition(
213            f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]',
214            hash_final(ctx))
215
216def gen_additional_sha3_testvecs():
217    max_len = 4096
218    in_data = rand_bytes(max_len)
219    for alg in ['shake128', 'shake256']:
220        ctx = hashlib.new('sha3-256')
221        for in_len in range(max_len + 1):
222            out_len = (in_len * 293) % (max_len + 1)
223            out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len)
224            ctx.update(out)
225        print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]',
226                                         ctx.digest())
227
228def gen_hmac_testvecs(alg):
229    ctx = hmac.new(rand_bytes(32), digestmod=alg)
230    data = rand_bytes(4096)
231    for data_len in range(len(data) + 1):
232        ctx.update(data[:data_len])
233        key_len = data_len % 293
234        key = rand_bytes(key_len)
235        mac = hmac.digest(key, data[:data_len], alg)
236        ctx.update(mac)
237    print_static_u8_array_definition(
238            f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]',
239            ctx.digest())
240
241def gen_additional_blake2_testvecs(alg):
242    if alg == 'blake2s':
243        (max_key_size, max_hash_size) = (32, 32)
244    elif alg == 'blake2b':
245        (max_key_size, max_hash_size) = (64, 64)
246    else:
247        raise ValueError(f'Unsupported alg: {alg}')
248    hashes = b''
249    for key_len in range(max_key_size + 1):
250        for out_len in range(1, max_hash_size + 1):
251            h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len))
252            h.update(rand_bytes(100))
253            hashes += h.digest()
254    print_static_u8_array_definition(
255            f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]',
256            compute_hash(alg, hashes))
257
258def nh_extract_int(bytestr, pos, length):
259    assert pos % 8 == 0 and length % 8 == 0
260    return int.from_bytes(bytestr[pos//8 : pos//8 + length//8], byteorder='little')
261
262# The NH "almost-universal hash function" used in Adiantum.  This is a
263# straightforward translation of the pseudocode from Section 6.3 of the Adiantum
264# paper (https://eprint.iacr.org/2018/720.pdf), except the outer loop is omitted
265# because we assume len(msg) <= 1024.  (The kernel's nh() function is only
266# expected to handle up to 1024 bytes; it's just called repeatedly as needed.)
267def nh(key, msg):
268    (w, s, r, u) = (32, 2, 4, 8192)
269    l = 8 * len(msg)
270    assert l <= u
271    assert l % (2*s*w) == 0
272    h = bytes()
273    for i in range(0, 2*s*w*r, 2*s*w):
274        p = 0
275        for j in range(0, l, 2*s*w):
276            for k in range(0, w*s, w):
277                a0 = nh_extract_int(key, i + j + k, w)
278                a1 = nh_extract_int(key, i + j + k + s*w, w)
279                b0 = nh_extract_int(msg, j + k, w)
280                b1 = nh_extract_int(msg, j + k + s*w, w)
281                p += ((a0 + b0) % 2**w) * ((a1 + b1) % 2**w)
282        h += (p % 2**64).to_bytes(8, byteorder='little')
283    return h
284
285def gen_nh_testvecs():
286    NH_KEY_BYTES = 1072
287    NH_MESSAGE_BYTES = 1024
288    key = rand_bytes(NH_KEY_BYTES)
289    msg = rand_bytes(NH_MESSAGE_BYTES)
290    print_static_u8_array_definition('nh_test_key[NH_KEY_BYTES]', key)
291    print_static_u8_array_definition('nh_test_msg[NH_MESSAGE_BYTES]', msg)
292    for length in [16, 96, 256, 1024]:
293        print_static_u8_array_definition(f'nh_test_val{length}[NH_HASH_BYTES]',
294                                         nh(key, msg[:length]))
295
296def gen_additional_poly1305_testvecs():
297    key = b'\xff' * POLY1305_KEY_SIZE
298    data = b''
299    ctx = Poly1305(key)
300    for _ in range(32):
301        for j in range(0, 4097, 16):
302            ctx.update(b'\xff' * j)
303            data += ctx.digest()
304    print_static_u8_array_definition(
305            'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]',
306            Poly1305(key).update(data).digest())
307
308def gen_additional_ghash_testvecs():
309    key = b'\xff' * GHASH_BLOCK_SIZE
310    hashes = b''
311    for data_len in range(0, 4097, 16):
312        hashes += Ghash(key).update(b'\xff' * data_len).digest()
313    print_static_u8_array_definition(
314            'ghash_allones_hashofhashes[GHASH_DIGEST_SIZE]',
315            Ghash(key).update(hashes).digest())
316
317def gen_additional_polyval_testvecs():
318    key = b'\xff' * POLYVAL_BLOCK_SIZE
319    hashes = b''
320    for data_len in range(0, 4097, 16):
321        hashes += Polyval(key).update(b'\xff' * data_len).digest()
322    print_static_u8_array_definition(
323            'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]',
324            Polyval(key).update(hashes).digest())
325
326if len(sys.argv) != 2:
327    sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n')
328    sys.stderr.write('ALGORITHM may be any supported by Python hashlib;\n')
329    sys.stderr.write('  or aes-cmac, ghash, nh, poly1305, polyval, or sha3.\n')
330    sys.stderr.write('Example: gen-hash-testvecs.py sha512\n')
331    sys.exit(1)
332
333alg = sys.argv[1]
334print('/* SPDX-License-Identifier: GPL-2.0-or-later */')
335print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */')
336if alg == 'aes-cmac':
337    gen_unkeyed_testvecs(alg)
338elif alg.startswith('blake2'):
339    gen_unkeyed_testvecs(alg)
340    gen_additional_blake2_testvecs(alg)
341elif alg == 'ghash':
342    gen_unkeyed_testvecs(alg)
343    gen_additional_ghash_testvecs()
344elif alg == 'nh':
345    gen_nh_testvecs()
346elif alg == 'poly1305':
347    gen_unkeyed_testvecs(alg)
348    gen_additional_poly1305_testvecs()
349elif alg == 'polyval':
350    gen_unkeyed_testvecs(alg)
351    gen_additional_polyval_testvecs()
352elif alg == 'sha3':
353    print()
354    print('/* SHA3-256 test vectors */')
355    gen_unkeyed_testvecs('sha3-256')
356    print()
357    print('/* SHAKE test vectors */')
358    gen_additional_sha3_testvecs()
359elif alg == 'sm3':
360    gen_unkeyed_testvecs(alg)
361    # Kernel doesn't implement HMAC-SM3 library functions yet.
362else:
363    gen_unkeyed_testvecs(alg)
364    gen_hmac_testvecs(alg)
365