xref: /freebsd/contrib/llvm-project/llvm/lib/Support/BLAKE3/README.md (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1Implementation of BLAKE3, originating from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c
2
3# Example
4
5An example program that hashes bytes from standard input and prints the
6result:
7
8Using the C++ API:
9
10```c++
11#include "llvm/Support/BLAKE3.h"
12#include <errno.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <unistd.h>
17
18int main() {
19  // Initialize the hasher.
20  llvm::BLAKE3 hasher;
21
22  // Read input bytes from stdin.
23  char buf[65536];
24  while (1) {
25    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
26    if (n > 0) {
27      hasher.update(llvm::StringRef(buf, n));
28    } else if (n == 0) {
29      break; // end of file
30    } else {
31      fprintf(stderr, "read failed: %s\n", strerror(errno));
32      exit(1);
33    }
34  }
35
36  // Finalize the hash. Default output length is 32 bytes.
37  auto output = hasher.final();
38
39  // Print the hash as hexadecimal.
40  for (uint8_t byte : output) {
41    printf("%02x", byte);
42  }
43  printf("\n");
44  return 0;
45}
46```
47
48Using the C API:
49
50```c
51#include "llvm-c/blake3.h"
52#include <errno.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <unistd.h>
57
58int main() {
59  // Initialize the hasher.
60  llvm_blake3_hasher hasher;
61  llvm_blake3_hasher_init(&hasher);
62
63  // Read input bytes from stdin.
64  unsigned char buf[65536];
65  while (1) {
66    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
67    if (n > 0) {
68      llvm_blake3_hasher_update(&hasher, buf, n);
69    } else if (n == 0) {
70      break; // end of file
71    } else {
72      fprintf(stderr, "read failed: %s\n", strerror(errno));
73      exit(1);
74    }
75  }
76
77  // Finalize the hash. LLVM_BLAKE3_OUT_LEN is the default output length, 32 bytes.
78  uint8_t output[LLVM_BLAKE3_OUT_LEN];
79  llvm_blake3_hasher_finalize(&hasher, output, LLVM_BLAKE3_OUT_LEN);
80
81  // Print the hash as hexadecimal.
82  for (size_t i = 0; i < LLVM_BLAKE3_OUT_LEN; i++) {
83    printf("%02x", output[i]);
84  }
85  printf("\n");
86  return 0;
87}
88```
89
90# API
91
92## The Class/Struct
93
94```c++
95class BLAKE3 {
96  // API
97private:
98  llvm_blake3_hasher Hasher;
99};
100```
101```c
102typedef struct {
103  // private fields
104} llvm_blake3_hasher;
105```
106
107An incremental BLAKE3 hashing state, which can accept any number of
108updates. This implementation doesn't allocate any heap memory, but
109`sizeof(llvm_blake3_hasher)` itself is relatively large, currently 1912 bytes
110on x86-64. This size can be reduced by restricting the maximum input
111length, as described in Section 5.4 of [the BLAKE3
112spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf),
113but this implementation doesn't currently support that strategy.
114
115## Common API Functions
116
117```c++
118BLAKE3::BLAKE3();
119
120void BLAKE3::init();
121```
122```c
123void llvm_blake3_hasher_init(
124  llvm_blake3_hasher *self);
125```
126
127Initialize a `llvm_blake3_hasher` in the default hashing mode.
128
129---
130
131```c++
132void BLAKE3::update(ArrayRef<uint8_t> Data);
133
134void BLAKE3::update(StringRef Str);
135```
136```c
137void llvm_blake3_hasher_update(
138  llvm_blake3_hasher *self,
139  const void *input,
140  size_t input_len);
141```
142
143Add input to the hasher. This can be called any number of times.
144
145---
146
147```c++
148template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
149using BLAKE3Result = std::array<uint8_t, NumBytes>;
150
151template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
152void BLAKE3::final(BLAKE3Result<NumBytes> &Result);
153
154template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
155BLAKE3Result<NumBytes> BLAKE3::final();
156```
157```c
158void llvm_blake3_hasher_finalize(
159  const llvm_blake3_hasher *self,
160  uint8_t *out,
161  size_t out_len);
162```
163
164Finalize the hasher and return an output of any length, given in bytes.
165This doesn't modify the hasher itself, and it's possible to finalize
166again after adding more input. The constant `LLVM_BLAKE3_OUT_LEN` provides
167the default output length, 32 bytes, which is recommended for most
168callers.
169
170Outputs shorter than the default length of 32 bytes (256 bits) provide
171less security. An N-bit BLAKE3 output is intended to provide N bits of
172first and second preimage resistance and N/2 bits of collision
173resistance, for any N up to 256. Longer outputs don't provide any
174additional security.
175
176Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly
177requesting a short output is equivalent to truncating the default-length
178output. (Note that this is different between BLAKE2 and BLAKE3.)
179
180## Less Common API Functions
181
182```c
183void llvm_blake3_hasher_init_keyed(
184  llvm_blake3_hasher *self,
185  const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
186```
187
188Initialize a `llvm_blake3_hasher` in the keyed hashing mode. The key must be
189exactly 32 bytes.
190
191---
192
193```c
194void llvm_blake3_hasher_init_derive_key(
195  llvm_blake3_hasher *self,
196  const char *context);
197```
198
199Initialize a `llvm_blake3_hasher` in the key derivation mode. The context
200string is given as an initialization parameter, and afterwards input key
201material should be given with `llvm_blake3_hasher_update`. The context string
202is a null-terminated C string which should be **hardcoded, globally
203unique, and application-specific**. The context string should not
204include any dynamic input like salts, nonces, or identifiers read from a
205database at runtime. A good default format for the context string is
206`"[application] [commit timestamp] [purpose]"`, e.g., `"example.com
2072019-12-25 16:18:03 session tokens v1"`.
208
209This function is intended for application code written in C. For
210language bindings, see `llvm_blake3_hasher_init_derive_key_raw` below.
211
212---
213
214```c
215void llvm_blake3_hasher_init_derive_key_raw(
216  llvm_blake3_hasher *self,
217  const void *context,
218  size_t context_len);
219```
220
221As `llvm_blake3_hasher_init_derive_key` above, except that the context string
222is given as a pointer to an array of arbitrary bytes with a provided
223length. This is intended for writing language bindings, where C string
224conversion would add unnecessary overhead and new error cases. Unicode
225strings should be encoded as UTF-8.
226
227Application code in C should prefer `llvm_blake3_hasher_init_derive_key`,
228which takes the context as a C string. If you need to use arbitrary
229bytes as a context string in application code, consider whether you're
230violating the requirement that context strings should be hardcoded.
231
232---
233
234```c
235void llvm_blake3_hasher_finalize_seek(
236  const llvm_blake3_hasher *self,
237  uint64_t seek,
238  uint8_t *out,
239  size_t out_len);
240```
241
242The same as `llvm_blake3_hasher_finalize`, but with an additional `seek`
243parameter for the starting byte position in the output stream. To
244efficiently stream a large output without allocating memory, call this
245function in a loop, incrementing `seek` by the output length each time.
246
247---
248
249```c
250void llvm_blake3_hasher_reset(
251  llvm_blake3_hasher *self);
252```
253
254Reset the hasher to its initial state, prior to any calls to
255`llvm_blake3_hasher_update`. Currently this is no different from calling
256`llvm_blake3_hasher_init` or similar again. However, if this implementation gains
257multithreading support in the future, and if `llvm_blake3_hasher` holds (optional)
258threading resources, this function will reuse those resources.
259
260
261# Building
262
263This implementation is just C and assembly files.
264
265## x86
266
267Dynamic dispatch is enabled by default on x86. The implementation will
268query the CPU at runtime to detect SIMD support, and it will use the
269widest instruction set available. By default, `blake3_dispatch.c`
270expects to be linked with code for five different instruction sets:
271portable C, SSE2, SSE4.1, AVX2, and AVX-512.
272
273For each of the x86 SIMD instruction sets, four versions are available:
274three flavors of assembly (Unix, Windows MSVC, and Windows GNU) and one
275version using C intrinsics. The assembly versions are generally
276preferred. They perform better, they perform more consistently across
277different compilers, and they build more quickly. On the other hand, the
278assembly versions are x86\_64-only, and you need to select the right
279flavor for your target platform.
280
281## ARM NEON
282
283The NEON implementation is enabled by default on AArch64, but not on
284other ARM targets, since not all of them support it. To enable it, set
285`BLAKE3_USE_NEON=1`.
286
287To explicitiy disable using NEON instructions on AArch64, set
288`BLAKE3_USE_NEON=0`.
289
290## Other Platforms
291
292The portable implementation should work on most other architectures.
293
294# Multithreading
295
296The implementation doesn't currently support multithreading.
297