1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * This random algorithm is derived in part from the "Windows 10 random number
30 * generation infrastructure" whitepaper published by Niels Ferguson and
31 * Microsoft: https://aka.ms/win10rng
32 *
33 * It is also inspired by DJB's writing on buffered key-erasure PRNGs:
34 * https://blog.cr.yp.to/20170723-random.html
35 *
36 * The Windows 10 RNG bears some similarity to Fortuna, which Ferguson was also
37 * involved with. Notable differences include:
38 * - Extended to multi-CPU design
39 * - Extended to pre-buffer some PRNG output
40 * - Pool-based reseeding is solely time-based (rather than on-access w/
41 * pacing)
42 * - Extended to specify efficient userspace design
43 * - Always-available design (requires the equivalent of loader(8) for all
44 * boots; probably relatively easy given the limited platforms Windows 10
45 * supports)
46 *
47 * Some aspects of the design document I found confusing and may have
48 * misinterpreted:
49 * - Relationship between root PRNG seed version and periodic reseed pool use.
50 * I interpreted these as separate sequences. The root PRNG seed version is
51 * bumped both by the periodic pool based reseed, and also special
52 * conditions such as the first time an entropy source provides entropy. I
53 * don't think first-time entropy sources should cause us to skip an entropy
54 * pool reseed.
55 * - Initial seeding. The paper is pretty terse on the subject. My
56 * interpretation of the document is that the Windows RNG infrastructure
57 * relies on the loader(8)-provided material for initial seeding and either
58 * ignores or doesn't start entropy sources until after that time. So when
59 * the paper says that first-time entropy source material "bypasses the
60 * pools," the root PRNG state already has been keyed for the first time and
61 * can generate 256 bits, mix it with the first-time entropy, and reseed
62 * immediately.
63 *
64 * Some notable design choices in this implementation divergent from that
65 * specified in the document above:
66 * - Blake2b instead of SHA-2 512 for entropy pooling
67 * - Chacha20 instead of AES-CTR DRBG for PRF
68 * - Initial seeding. We treat the 0->1 seed version (brng_generation) edge
69 * as the transition from blocked to unblocked. That edge is also the first
70 * time the key of the root BRNG's PRF is set. We perform initial seeding
71 * when the first request for entropy arrives.
72 * • As a result: Entropy callbacks prior to this edge do not have a keyed
73 * root PRNG, so bypassing the pools is kind of meaningless. Instead,
74 * they feed into pool0. (They also do not set the root PRNG key or bump
75 * the root PRNG seed version.)
76 * • Entropy callbacks after the edge behave like the specification.
77 * • All one-off sources are fed into pool0 and the result used to seed the
78 * root BRNG during the initial seed step.
79 * • All memory needed for initial seeding must be preallocated or static or
80 * fit on the stack; random reads can occur in nonsleepable contexts and
81 * we cannot allocate M_WAITOK. (We also cannot fail to incorporate any
82 * present one-off source, to the extent it is in the control of
83 * software.)
84 * - Timer interval reseeding. We also start the timer-based reseeding at
85 * initial seed, but unlike the design, our initial seed is some time after
86 * load (usually within the order of micro- or milliseconds due to
87 * stack_guard on x86, but conceivably later if nothing reads from random for
88 * a while).
89 *
90 * Not yet implemented, not in scope, or todo:
91 * - Various initial seeding sources we don't have yet
92 * - In particular, VM migration/copy detection
93 */
94
95 #include <sys/param.h>
96 #include <sys/domainset.h>
97 #include <sys/fail.h>
98 #include <sys/limits.h>
99 #include <sys/lock.h>
100 #include <sys/kernel.h>
101 #include <sys/malloc.h>
102 #include <sys/mutex.h>
103 #include <sys/random.h>
104 #include <sys/sdt.h>
105 #include <sys/smp.h>
106 #include <sys/sysctl.h>
107 #include <sys/systm.h>
108
109 #include <machine/cpu.h>
110
111 #include <vm/vm.h>
112 #include <vm/vm_param.h>
113 #include <vm/vm_page.h>
114 #include <vm/vm_phys.h>
115 #include <vm/vm_pagequeue.h>
116
117 #include <dev/random/randomdev.h>
118 #include <dev/random/random_harvestq.h>
119 #include <dev/random/uint128.h>
120
121 #include <dev/random/fenestrasX/fx_brng.h>
122 #include <dev/random/fenestrasX/fx_hash.h>
123 #include <dev/random/fenestrasX/fx_pool.h>
124 #include <dev/random/fenestrasX/fx_priv.h>
125 #include <dev/random/fenestrasX/fx_pub.h>
126 #include <dev/random/fenestrasX/fx_rng.h>
127
128 struct fxrng_buffered_rng fxrng_root;
129 uint64_t __read_mostly fxrng_root_generation;
130 DPCPU_DEFINE_STATIC(struct fxrng_buffered_rng *, fxrng_brng);
131
132 /*
133 * Top-level read API from randomdev. Responsible for NOWAIT-allocating
134 * per-cpu NUMA-local BRNGs, if needed and satisfiable; subroutines handle
135 * reseeding if the local BRNG is stale and rekeying when necessary. In
136 * low-memory conditions when a local BRNG cannot be allocated, the request is
137 * simply forwarded to the root BRNG.
138 *
139 * It is a precondition is that the root BRNG initial seeding has completed and
140 * the root generation number >0.
141 */
142 static void
_fxrng_alg_read(uint8_t * output,size_t nbytes,uint64_t * seed_version_out)143 _fxrng_alg_read(uint8_t *output, size_t nbytes, uint64_t *seed_version_out)
144 {
145 struct fxrng_buffered_rng **pcpu_brng_p, *rng, *tmp;
146 struct pcpu *pcpu;
147
148 pcpu = get_pcpu();
149
150 /*
151 * The following statement directly accesses an implementation detail
152 * of DPCPU, but the macros cater only to pinned threads; we want to
153 * operate on our initial CPU, without pinning, *even if* we migrate.
154 */
155 pcpu_brng_p = _DPCPU_PTR(pcpu->pc_dynamic, fxrng_brng);
156
157 rng = (void *)atomic_load_acq_ptr((uintptr_t *)pcpu_brng_p);
158
159 /*
160 * Usually the pcpu BRNG has already been allocated, but we do it
161 * on-demand and need to check first. BRNGs are never deallocated and
162 * are valid as soon as the pointer is initialized.
163 */
164 if (__predict_false(rng == NULL)) {
165 uint8_t newkey[FX_CHACHA20_KEYSIZE];
166 struct domainset *ds;
167 int domain;
168
169 domain = pcpu->pc_domain;
170
171 /*
172 * Allocate pcpu BRNGs off-domain on weird NUMA machines like
173 * AMD Threadripper 2990WX, which has 2 NUMA nodes without
174 * local memory controllers. The PREF policy is automatically
175 * converted to something appropriate when domains are empty.
176 * (FIXED is not.)
177 *
178 * Otherwise, allocate strictly CPU-local memory. The
179 * rationale is this: if there is a memory shortage such that
180 * PREF policy would fallback to RR, we have no business
181 * wasting memory on a faster BRNG. So, use a FIXED domainset
182 * policy. If we cannot allocate, that's fine! We fall back
183 * to invoking the root BRNG.
184 */
185 if (VM_DOMAIN_EMPTY(domain))
186 ds = DOMAINSET_PREF(domain);
187 else
188 ds = DOMAINSET_FIXED(domain);
189
190 rng = malloc_domainset(sizeof(*rng), M_ENTROPY, ds,
191 M_NOWAIT | M_ZERO);
192 if (rng == NULL) {
193 /* Relatively easy case: fall back to root BRNG. */
194 rng = &fxrng_root;
195 goto have_valid_rng;
196 }
197
198 fxrng_brng_init(rng);
199
200 /*
201 * The root BRNG is always up and available. Requests are
202 * always satisfiable. This is a design invariant.
203 */
204 ASSERT_DEBUG(atomic_load_acq_64(&fxrng_root_generation) != 0,
205 "%s: attempting to seed child BRNG when root hasn't "
206 "been initialized yet.", __func__);
207
208 FXRNG_BRNG_LOCK(&fxrng_root);
209 #ifdef WITNESS
210 /* Establish lock order root->pcpu for WITNESS. */
211 FXRNG_BRNG_LOCK(rng);
212 FXRNG_BRNG_UNLOCK(rng);
213 #endif
214 fxrng_brng_produce_seed_data_internal(&fxrng_root, newkey,
215 sizeof(newkey), &rng->brng_generation);
216 FXRNG_BRNG_ASSERT_NOT(&fxrng_root);
217
218 fxrng_rng_setkey(&rng->brng_rng, newkey, sizeof(newkey));
219 explicit_bzero(newkey, sizeof(newkey));
220
221 /*
222 * We have a valid RNG. Try to install it, or grab the other
223 * one if we lost the race.
224 */
225 tmp = NULL;
226 while (tmp == NULL)
227 if (atomic_fcmpset_ptr((uintptr_t *)pcpu_brng_p,
228 (uintptr_t *)&tmp, (uintptr_t)rng))
229 goto have_valid_rng;
230
231 /*
232 * We lost the race. There's nothing sensitive about
233 * our BRNG's PRF state, because it will never be used
234 * for anything and the key doesn't expose any
235 * information about the parent (root) generator's
236 * state -- it has already rekeyed. The generation
237 * number is public, and a zero counter isn't sensitive.
238 */
239 free(rng, M_ENTROPY);
240 /*
241 * Use the winner's PCPU BRNG.
242 */
243 rng = tmp;
244 }
245
246 have_valid_rng:
247 /* At this point we have a valid, initialized and seeded rng pointer. */
248 FXRNG_BRNG_LOCK(rng);
249 if (seed_version_out != NULL)
250 *seed_version_out = rng->brng_generation;
251 fxrng_brng_read(rng, output, nbytes);
252 FXRNG_BRNG_ASSERT_NOT(rng);
253 }
254
255 static void
fxrng_alg_read(uint8_t * output,size_t nbytes)256 fxrng_alg_read(uint8_t *output, size_t nbytes)
257 {
258 _fxrng_alg_read(output, nbytes, NULL);
259 }
260
261 /*
262 * External API for arc4random(9) to fetch new key material and associated seed
263 * version in chacha20_randomstir().
264 */
265 void
read_random_key(void * output,size_t nbytes,uint64_t * seed_version_out)266 read_random_key(void *output, size_t nbytes, uint64_t *seed_version_out)
267 {
268 /* Ensure _fxrng_alg_read invariant. */
269 if (__predict_false(atomic_load_acq_64(&fxrng_root_generation) == 0))
270 (void)fxrng_alg_seeded();
271
272 _fxrng_alg_read(output, nbytes, seed_version_out);
273 }
274
275 static void
fxrng_init_alg(void * dummy __unused)276 fxrng_init_alg(void *dummy __unused)
277 {
278 DPCPU_ZERO(fxrng_brng);
279 fxrng_brng_init(&fxrng_root);
280 fxrng_pools_init();
281 }
282 SYSINIT(random_alg, SI_SUB_RANDOM, SI_ORDER_SECOND, fxrng_init_alg, NULL);
283
284 /*
285 * Public visibility struct referenced directly by other parts of randomdev.
286 */
287 const struct random_algorithm random_alg_context = {
288 .ra_ident = "fenestrasX",
289 .ra_pre_read = (void (*)(void))nullop,
290 .ra_read = fxrng_alg_read,
291 .ra_seeded = fxrng_alg_seeded,
292 .ra_event_processor = fxrng_event_processor,
293 .ra_poolcount = FXRNG_NPOOLS,
294 };
295