1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * This random algorithm is derived in part from the "Windows 10 random number 30 * generation infrastructure" whitepaper published by Niels Ferguson and 31 * Microsoft: https://aka.ms/win10rng 32 * 33 * It is also inspired by DJB's writing on buffered key-erasure PRNGs: 34 * https://blog.cr.yp.to/20170723-random.html 35 * 36 * The Windows 10 RNG bears some similarity to Fortuna, which Ferguson was also 37 * involved with. Notable differences include: 38 * - Extended to multi-CPU design 39 * - Extended to pre-buffer some PRNG output 40 * - Pool-based reseeding is solely time-based (rather than on-access w/ 41 * pacing) 42 * - Extended to specify efficient userspace design 43 * - Always-available design (requires the equivalent of loader(8) for all 44 * boots; probably relatively easy given the limited platforms Windows 10 45 * supports) 46 * 47 * Some aspects of the design document I found confusing and may have 48 * misinterpreted: 49 * - Relationship between root PRNG seed version and periodic reseed pool use. 50 * I interpreted these as separate sequences. The root PRNG seed version is 51 * bumped both by the periodic pool based reseed, and also special 52 * conditions such as the first time an entropy source provides entropy. I 53 * don't think first-time entropy sources should cause us to skip an entropy 54 * pool reseed. 55 * - Initial seeding. The paper is pretty terse on the subject. My 56 * interpretation of the document is that the Windows RNG infrastructure 57 * relies on the loader(8)-provided material for initial seeding and either 58 * ignores or doesn't start entropy sources until after that time. So when 59 * the paper says that first-time entropy source material "bypasses the 60 * pools," the root PRNG state already has been keyed for the first time and 61 * can generate 256 bits, mix it with the first-time entropy, and reseed 62 * immediately. 63 * 64 * Some notable design choices in this implementation divergent from that 65 * specified in the document above: 66 * - Blake2b instead of SHA-2 512 for entropy pooling 67 * - Chacha20 instead of AES-CTR DRBG for PRF 68 * - Initial seeding. We treat the 0->1 seed version (brng_generation) edge 69 * as the transition from blocked to unblocked. That edge is also the first 70 * time the key of the root BRNG's PRF is set. We perform initial seeding 71 * when the first request for entropy arrives. 72 * • As a result: Entropy callbacks prior to this edge do not have a keyed 73 * root PRNG, so bypassing the pools is kind of meaningless. Instead, 74 * they feed into pool0. (They also do not set the root PRNG key or bump 75 * the root PRNG seed version.) 76 * • Entropy callbacks after the edge behave like the specification. 77 * • All one-off sources are fed into pool0 and the result used to seed the 78 * root BRNG during the initial seed step. 79 * • All memory needed for initial seeding must be preallocated or static or 80 * fit on the stack; random reads can occur in nonsleepable contexts and 81 * we cannot allocate M_WAITOK. (We also cannot fail to incorporate any 82 * present one-off source, to the extent it is in the control of 83 * software.) 84 * - Timer interval reseeding. We also start the timer-based reseeding at 85 * initial seed, but unlike the design, our initial seed is some time after 86 * load (usually within the order of micro- or milliseconds due to 87 * stack_guard on x86, but conceivably later if nothing reads from random for 88 * a while). 89 * 90 * Not yet implemented, not in scope, or todo: 91 * - Various initial seeding sources we don't have yet 92 * - In particular, VM migration/copy detection 93 */ 94 95 #include <sys/cdefs.h> 96 __FBSDID("$FreeBSD$"); 97 98 #include <sys/param.h> 99 #include <sys/domainset.h> 100 #include <sys/fail.h> 101 #include <sys/limits.h> 102 #include <sys/lock.h> 103 #include <sys/kernel.h> 104 #include <sys/malloc.h> 105 #include <sys/mutex.h> 106 #include <sys/random.h> 107 #include <sys/sdt.h> 108 #include <sys/smp.h> 109 #include <sys/sysctl.h> 110 #include <sys/systm.h> 111 112 #include <machine/cpu.h> 113 114 #include <vm/vm.h> 115 #include <vm/vm_param.h> 116 #include <vm/vm_page.h> 117 #include <vm/vm_phys.h> 118 #include <vm/vm_pagequeue.h> 119 120 #include <dev/random/randomdev.h> 121 #include <dev/random/random_harvestq.h> 122 #include <dev/random/uint128.h> 123 124 #include <dev/random/fenestrasX/fx_brng.h> 125 #include <dev/random/fenestrasX/fx_hash.h> 126 #include <dev/random/fenestrasX/fx_pool.h> 127 #include <dev/random/fenestrasX/fx_priv.h> 128 #include <dev/random/fenestrasX/fx_pub.h> 129 #include <dev/random/fenestrasX/fx_rng.h> 130 131 struct fxrng_buffered_rng fxrng_root; 132 uint64_t __read_mostly fxrng_root_generation; 133 DPCPU_DEFINE_STATIC(struct fxrng_buffered_rng *, fxrng_brng); 134 135 /* 136 * Top-level read API from randomdev. Responsible for NOWAIT-allocating 137 * per-cpu NUMA-local BRNGs, if needed and satisfiable; subroutines handle 138 * reseeding if the local BRNG is stale and rekeying when necessary. In 139 * low-memory conditions when a local BRNG cannot be allocated, the request is 140 * simply forwarded to the root BRNG. 141 * 142 * It is a precondition is that the root BRNG initial seeding has completed and 143 * the root generation number >0. 144 */ 145 static void 146 _fxrng_alg_read(uint8_t *output, size_t nbytes, uint64_t *seed_version_out) 147 { 148 struct fxrng_buffered_rng **pcpu_brng_p, *rng, *tmp; 149 struct pcpu *pcpu; 150 151 pcpu = get_pcpu(); 152 153 /* 154 * The following statement directly accesses an implementation detail 155 * of DPCPU, but the macros cater only to pinned threads; we want to 156 * operate on our initial CPU, without pinning, *even if* we migrate. 157 */ 158 pcpu_brng_p = _DPCPU_PTR(pcpu->pc_dynamic, fxrng_brng); 159 160 rng = (void *)atomic_load_acq_ptr((uintptr_t *)pcpu_brng_p); 161 162 /* 163 * Usually the pcpu BRNG has already been allocated, but we do it 164 * on-demand and need to check first. BRNGs are never deallocated and 165 * are valid as soon as the pointer is initialized. 166 */ 167 if (__predict_false(rng == NULL)) { 168 uint8_t newkey[FX_CHACHA20_KEYSIZE]; 169 struct domainset *ds; 170 int domain; 171 172 domain = pcpu->pc_domain; 173 174 /* 175 * Allocate pcpu BRNGs off-domain on weird NUMA machines like 176 * AMD Threadripper 2990WX, which has 2 NUMA nodes without 177 * local memory controllers. The PREF policy is automatically 178 * converted to something appropriate when domains are empty. 179 * (FIXED is not.) 180 * 181 * Otherwise, allocate strictly CPU-local memory. The 182 * rationale is this: if there is a memory shortage such that 183 * PREF policy would fallback to RR, we have no business 184 * wasting memory on a faster BRNG. So, use a FIXED domainset 185 * policy. If we cannot allocate, that's fine! We fall back 186 * to invoking the root BRNG. 187 */ 188 if (VM_DOMAIN_EMPTY(domain)) 189 ds = DOMAINSET_PREF(domain); 190 else 191 ds = DOMAINSET_FIXED(domain); 192 193 rng = malloc_domainset(sizeof(*rng), M_ENTROPY, ds, 194 M_NOWAIT | M_ZERO); 195 if (rng == NULL) { 196 /* Relatively easy case: fall back to root BRNG. */ 197 rng = &fxrng_root; 198 goto have_valid_rng; 199 } 200 201 fxrng_brng_init(rng); 202 203 /* 204 * The root BRNG is always up and available. Requests are 205 * always satisfiable. This is a design invariant. 206 */ 207 ASSERT_DEBUG(atomic_load_acq_64(&fxrng_root_generation) != 0, 208 "%s: attempting to seed child BRNG when root hasn't " 209 "been initialized yet.", __func__); 210 211 FXRNG_BRNG_LOCK(&fxrng_root); 212 #ifdef WITNESS 213 /* Establish lock order root->pcpu for WITNESS. */ 214 FXRNG_BRNG_LOCK(rng); 215 FXRNG_BRNG_UNLOCK(rng); 216 #endif 217 fxrng_brng_produce_seed_data_internal(&fxrng_root, newkey, 218 sizeof(newkey), &rng->brng_generation); 219 FXRNG_BRNG_ASSERT_NOT(&fxrng_root); 220 221 fxrng_rng_setkey(&rng->brng_rng, newkey, sizeof(newkey)); 222 explicit_bzero(newkey, sizeof(newkey)); 223 224 /* 225 * We have a valid RNG. Try to install it, or grab the other 226 * one if we lost the race. 227 */ 228 tmp = NULL; 229 while (tmp == NULL) 230 if (atomic_fcmpset_ptr((uintptr_t *)pcpu_brng_p, 231 (uintptr_t *)&tmp, (uintptr_t)rng)) 232 goto have_valid_rng; 233 234 /* 235 * We lost the race. There's nothing sensitive about 236 * our BRNG's PRF state, because it will never be used 237 * for anything and the key doesn't expose any 238 * information about the parent (root) generator's 239 * state -- it has already rekeyed. The generation 240 * number is public, and a zero counter isn't sensitive. 241 */ 242 free(rng, M_ENTROPY); 243 /* 244 * Use the winner's PCPU BRNG. 245 */ 246 rng = tmp; 247 } 248 249 have_valid_rng: 250 /* At this point we have a valid, initialized and seeded rng pointer. */ 251 FXRNG_BRNG_LOCK(rng); 252 if (seed_version_out != NULL) 253 *seed_version_out = rng->brng_generation; 254 fxrng_brng_read(rng, output, nbytes); 255 FXRNG_BRNG_ASSERT_NOT(rng); 256 } 257 258 static void 259 fxrng_alg_read(uint8_t *output, size_t nbytes) 260 { 261 _fxrng_alg_read(output, nbytes, NULL); 262 } 263 264 /* 265 * External API for arc4random(9) to fetch new key material and associated seed 266 * version in chacha20_randomstir(). 267 */ 268 void 269 read_random_key(void *output, size_t nbytes, uint64_t *seed_version_out) 270 { 271 /* Ensure _fxrng_alg_read invariant. */ 272 if (__predict_false(atomic_load_acq_64(&fxrng_root_generation) == 0)) 273 (void)fxrng_alg_seeded(); 274 275 _fxrng_alg_read(output, nbytes, seed_version_out); 276 } 277 278 static void 279 fxrng_init_alg(void *dummy __unused) 280 { 281 DPCPU_ZERO(fxrng_brng); 282 fxrng_brng_init(&fxrng_root); 283 fxrng_pools_init(); 284 } 285 SYSINIT(random_alg, SI_SUB_RANDOM, SI_ORDER_SECOND, fxrng_init_alg, NULL); 286 287 /* 288 * Public visibility struct referenced directly by other parts of randomdev. 289 */ 290 const struct random_algorithm random_alg_context = { 291 .ra_ident = "fenestrasX", 292 .ra_pre_read = (void (*)(void))nullop, 293 .ra_read = fxrng_alg_read, 294 .ra_seeded = fxrng_alg_seeded, 295 .ra_event_processor = fxrng_event_processor, 296 .ra_poolcount = FXRNG_NPOOLS, 297 }; 298