1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * This random algorithm is derived in part from the "Windows 10 random number 30 * generation infrastructure" whitepaper published by Niels Ferguson and 31 * Microsoft: https://aka.ms/win10rng 32 * 33 * It is also inspired by DJB's writing on buffered key-erasure PRNGs: 34 * https://blog.cr.yp.to/20170723-random.html 35 * 36 * The Windows 10 RNG bears some similarity to Fortuna, which Ferguson was also 37 * involved with. Notable differences include: 38 * - Extended to multi-CPU design 39 * - Extended to pre-buffer some PRNG output 40 * - Pool-based reseeding is solely time-based (rather than on-access w/ 41 * pacing) 42 * - Extended to specify efficient userspace design 43 * - Always-available design (requires the equivalent of loader(8) for all 44 * boots; probably relatively easy given the limited platforms Windows 10 45 * supports) 46 * 47 * Some aspects of the design document I found confusing and may have 48 * misinterpreted: 49 * - Relationship between root PRNG seed version and periodic reseed pool use. 50 * I interpreted these as separate sequences. The root PRNG seed version is 51 * bumped both by the periodic pool based reseed, and also special 52 * conditions such as the first time an entropy source provides entropy. I 53 * don't think first-time entropy sources should cause us to skip an entropy 54 * pool reseed. 55 * - Initial seeding. The paper is pretty terse on the subject. My 56 * interpretation of the document is that the Windows RNG infrastructure 57 * relies on the loader(8)-provided material for initial seeding and either 58 * ignores or doesn't start entropy sources until after that time. So when 59 * the paper says that first-time entropy source material "bypasses the 60 * pools," the root PRNG state already has been keyed for the first time and 61 * can generate 256 bits, mix it with the first-time entropy, and reseed 62 * immediately. 63 * 64 * Some notable design choices in this implementation divergent from that 65 * specified in the document above: 66 * - Blake2b instead of SHA-2 512 for entropy pooling 67 * - Chacha20 instead of AES-CTR DRBG for PRF 68 * - Initial seeding. We treat the 0->1 seed version (brng_generation) edge 69 * as the transition from blocked to unblocked. That edge is also the first 70 * time the key of the root BRNG's PRF is set. We perform initial seeding 71 * when the first request for entropy arrives. 72 * • As a result: Entropy callbacks prior to this edge do not have a keyed 73 * root PRNG, so bypassing the pools is kind of meaningless. Instead, 74 * they feed into pool0. (They also do not set the root PRNG key or bump 75 * the root PRNG seed version.) 76 * • Entropy callbacks after the edge behave like the specification. 77 * • All one-off sources are fed into pool0 and the result used to seed the 78 * root BRNG during the initial seed step. 79 * • All memory needed for initial seeding must be preallocated or static or 80 * fit on the stack; random reads can occur in nonsleepable contexts and 81 * we cannot allocate M_WAITOK. (We also cannot fail to incorporate any 82 * present one-off source, to the extent it is in the control of 83 * software.) 84 * - Timer interval reseeding. We also start the timer-based reseeding at 85 * initial seed, but unlike the design, our initial seed is some time after 86 * load (usually within the order of micro- or milliseconds due to 87 * stack_guard on x86, but conceivably later if nothing reads from random for 88 * a while). 89 * 90 * Not yet implemented, not in scope, or todo: 91 * - Various initial seeding sources we don't have yet 92 * - In particular, VM migration/copy detection 93 */ 94 95 #include <sys/cdefs.h> 96 #include <sys/param.h> 97 #include <sys/domainset.h> 98 #include <sys/fail.h> 99 #include <sys/limits.h> 100 #include <sys/lock.h> 101 #include <sys/kernel.h> 102 #include <sys/malloc.h> 103 #include <sys/mutex.h> 104 #include <sys/random.h> 105 #include <sys/sdt.h> 106 #include <sys/smp.h> 107 #include <sys/sysctl.h> 108 #include <sys/systm.h> 109 110 #include <machine/cpu.h> 111 112 #include <vm/vm.h> 113 #include <vm/vm_param.h> 114 #include <vm/vm_page.h> 115 #include <vm/vm_phys.h> 116 #include <vm/vm_pagequeue.h> 117 118 #include <dev/random/randomdev.h> 119 #include <dev/random/random_harvestq.h> 120 #include <dev/random/uint128.h> 121 122 #include <dev/random/fenestrasX/fx_brng.h> 123 #include <dev/random/fenestrasX/fx_hash.h> 124 #include <dev/random/fenestrasX/fx_pool.h> 125 #include <dev/random/fenestrasX/fx_priv.h> 126 #include <dev/random/fenestrasX/fx_pub.h> 127 #include <dev/random/fenestrasX/fx_rng.h> 128 129 struct fxrng_buffered_rng fxrng_root; 130 uint64_t __read_mostly fxrng_root_generation; 131 DPCPU_DEFINE_STATIC(struct fxrng_buffered_rng *, fxrng_brng); 132 133 /* 134 * Top-level read API from randomdev. Responsible for NOWAIT-allocating 135 * per-cpu NUMA-local BRNGs, if needed and satisfiable; subroutines handle 136 * reseeding if the local BRNG is stale and rekeying when necessary. In 137 * low-memory conditions when a local BRNG cannot be allocated, the request is 138 * simply forwarded to the root BRNG. 139 * 140 * It is a precondition is that the root BRNG initial seeding has completed and 141 * the root generation number >0. 142 */ 143 static void 144 _fxrng_alg_read(uint8_t *output, size_t nbytes, uint64_t *seed_version_out) 145 { 146 struct fxrng_buffered_rng **pcpu_brng_p, *rng, *tmp; 147 struct pcpu *pcpu; 148 149 pcpu = get_pcpu(); 150 151 /* 152 * The following statement directly accesses an implementation detail 153 * of DPCPU, but the macros cater only to pinned threads; we want to 154 * operate on our initial CPU, without pinning, *even if* we migrate. 155 */ 156 pcpu_brng_p = _DPCPU_PTR(pcpu->pc_dynamic, fxrng_brng); 157 158 rng = (void *)atomic_load_acq_ptr((uintptr_t *)pcpu_brng_p); 159 160 /* 161 * Usually the pcpu BRNG has already been allocated, but we do it 162 * on-demand and need to check first. BRNGs are never deallocated and 163 * are valid as soon as the pointer is initialized. 164 */ 165 if (__predict_false(rng == NULL)) { 166 uint8_t newkey[FX_CHACHA20_KEYSIZE]; 167 struct domainset *ds; 168 int domain; 169 170 domain = pcpu->pc_domain; 171 172 /* 173 * Allocate pcpu BRNGs off-domain on weird NUMA machines like 174 * AMD Threadripper 2990WX, which has 2 NUMA nodes without 175 * local memory controllers. The PREF policy is automatically 176 * converted to something appropriate when domains are empty. 177 * (FIXED is not.) 178 * 179 * Otherwise, allocate strictly CPU-local memory. The 180 * rationale is this: if there is a memory shortage such that 181 * PREF policy would fallback to RR, we have no business 182 * wasting memory on a faster BRNG. So, use a FIXED domainset 183 * policy. If we cannot allocate, that's fine! We fall back 184 * to invoking the root BRNG. 185 */ 186 if (VM_DOMAIN_EMPTY(domain)) 187 ds = DOMAINSET_PREF(domain); 188 else 189 ds = DOMAINSET_FIXED(domain); 190 191 rng = malloc_domainset(sizeof(*rng), M_ENTROPY, ds, 192 M_NOWAIT | M_ZERO); 193 if (rng == NULL) { 194 /* Relatively easy case: fall back to root BRNG. */ 195 rng = &fxrng_root; 196 goto have_valid_rng; 197 } 198 199 fxrng_brng_init(rng); 200 201 /* 202 * The root BRNG is always up and available. Requests are 203 * always satisfiable. This is a design invariant. 204 */ 205 ASSERT_DEBUG(atomic_load_acq_64(&fxrng_root_generation) != 0, 206 "%s: attempting to seed child BRNG when root hasn't " 207 "been initialized yet.", __func__); 208 209 FXRNG_BRNG_LOCK(&fxrng_root); 210 #ifdef WITNESS 211 /* Establish lock order root->pcpu for WITNESS. */ 212 FXRNG_BRNG_LOCK(rng); 213 FXRNG_BRNG_UNLOCK(rng); 214 #endif 215 fxrng_brng_produce_seed_data_internal(&fxrng_root, newkey, 216 sizeof(newkey), &rng->brng_generation); 217 FXRNG_BRNG_ASSERT_NOT(&fxrng_root); 218 219 fxrng_rng_setkey(&rng->brng_rng, newkey, sizeof(newkey)); 220 explicit_bzero(newkey, sizeof(newkey)); 221 222 /* 223 * We have a valid RNG. Try to install it, or grab the other 224 * one if we lost the race. 225 */ 226 tmp = NULL; 227 while (tmp == NULL) 228 if (atomic_fcmpset_ptr((uintptr_t *)pcpu_brng_p, 229 (uintptr_t *)&tmp, (uintptr_t)rng)) 230 goto have_valid_rng; 231 232 /* 233 * We lost the race. There's nothing sensitive about 234 * our BRNG's PRF state, because it will never be used 235 * for anything and the key doesn't expose any 236 * information about the parent (root) generator's 237 * state -- it has already rekeyed. The generation 238 * number is public, and a zero counter isn't sensitive. 239 */ 240 free(rng, M_ENTROPY); 241 /* 242 * Use the winner's PCPU BRNG. 243 */ 244 rng = tmp; 245 } 246 247 have_valid_rng: 248 /* At this point we have a valid, initialized and seeded rng pointer. */ 249 FXRNG_BRNG_LOCK(rng); 250 if (seed_version_out != NULL) 251 *seed_version_out = rng->brng_generation; 252 fxrng_brng_read(rng, output, nbytes); 253 FXRNG_BRNG_ASSERT_NOT(rng); 254 } 255 256 static void 257 fxrng_alg_read(uint8_t *output, size_t nbytes) 258 { 259 _fxrng_alg_read(output, nbytes, NULL); 260 } 261 262 /* 263 * External API for arc4random(9) to fetch new key material and associated seed 264 * version in chacha20_randomstir(). 265 */ 266 void 267 read_random_key(void *output, size_t nbytes, uint64_t *seed_version_out) 268 { 269 /* Ensure _fxrng_alg_read invariant. */ 270 if (__predict_false(atomic_load_acq_64(&fxrng_root_generation) == 0)) 271 (void)fxrng_alg_seeded(); 272 273 _fxrng_alg_read(output, nbytes, seed_version_out); 274 } 275 276 static void 277 fxrng_init_alg(void *dummy __unused) 278 { 279 DPCPU_ZERO(fxrng_brng); 280 fxrng_brng_init(&fxrng_root); 281 fxrng_pools_init(); 282 } 283 SYSINIT(random_alg, SI_SUB_RANDOM, SI_ORDER_SECOND, fxrng_init_alg, NULL); 284 285 /* 286 * Public visibility struct referenced directly by other parts of randomdev. 287 */ 288 const struct random_algorithm random_alg_context = { 289 .ra_ident = "fenestrasX", 290 .ra_pre_read = (void (*)(void))nullop, 291 .ra_read = fxrng_alg_read, 292 .ra_seeded = fxrng_alg_seeded, 293 .ra_event_processor = fxrng_event_processor, 294 .ra_poolcount = FXRNG_NPOOLS, 295 }; 296