1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2019 Conrad Meyer <cem@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * This random algorithm is derived in part from the "Windows 10 random number 30 * generation infrastructure" whitepaper published by Niels Ferguson and 31 * Microsoft: https://aka.ms/win10rng 32 * 33 * It is also inspired by DJB's writing on buffered key-erasure PRNGs: 34 * https://blog.cr.yp.to/20170723-random.html 35 * 36 * The Windows 10 RNG bears some similarity to Fortuna, which Ferguson was also 37 * involved with. Notable differences include: 38 * - Extended to multi-CPU design 39 * - Extended to pre-buffer some PRNG output 40 * - Pool-based reseeding is solely time-based (rather than on-access w/ 41 * pacing) 42 * - Extended to specify efficient userspace design 43 * - Always-available design (requires the equivalent of loader(8) for all 44 * boots; probably relatively easy given the limited platforms Windows 10 45 * supports) 46 * 47 * Some aspects of the design document I found confusing and may have 48 * misinterpreted: 49 * - Relationship between root PRNG seed version and periodic reseed pool use. 50 * I interpreted these as separate sequences. The root PRNG seed version is 51 * bumped both by the periodic pool based reseed, and also special 52 * conditions such as the first time an entropy source provides entropy. I 53 * don't think first-time entropy sources should cause us to skip an entropy 54 * pool reseed. 55 * - Initial seeding. The paper is pretty terse on the subject. My 56 * interpretation of the document is that the Windows RNG infrastructure 57 * relies on the loader(8)-provided material for initial seeding and either 58 * ignores or doesn't start entropy sources until after that time. So when 59 * the paper says that first-time entropy source material "bypasses the 60 * pools," the root PRNG state already has been keyed for the first time and 61 * can generate 256 bits, mix it with the first-time entropy, and reseed 62 * immediately. 63 * 64 * Some notable design choices in this implementation divergent from that 65 * specified in the document above: 66 * - Blake2b instead of SHA-2 512 for entropy pooling 67 * - Chacha20 instead of AES-CTR DRBG for PRF 68 * - Initial seeding. We treat the 0->1 seed version (brng_generation) edge 69 * as the transition from blocked to unblocked. That edge is also the first 70 * time the key of the root BRNG's PRF is set. We perform initial seeding 71 * when the first request for entropy arrives. 72 * • As a result: Entropy callbacks prior to this edge do not have a keyed 73 * root PRNG, so bypassing the pools is kind of meaningless. Instead, 74 * they feed into pool0. (They also do not set the root PRNG key or bump 75 * the root PRNG seed version.) 76 * • Entropy callbacks after the edge behave like the specification. 77 * • All one-off sources are fed into pool0 and the result used to seed the 78 * root BRNG during the initial seed step. 79 * • All memory needed for initial seeding must be preallocated or static or 80 * fit on the stack; random reads can occur in nonsleepable contexts and 81 * we cannot allocate M_WAITOK. (We also cannot fail to incorporate any 82 * present one-off source, to the extent it is in the control of 83 * software.) 84 * - Timer interval reseeding. We also start the timer-based reseeding at 85 * initial seed, but unlike the design, our initial seed is some time after 86 * load (usually within the order of micro- or milliseconds due to 87 * stack_guard on x86, but conceivably later if nothing reads from random for 88 * a while). 89 * 90 * Not yet implemented, not in scope, or todo: 91 * - Various initial seeding sources we don't have yet 92 * - In particular, VM migration/copy detection 93 */ 94 95 #include <sys/param.h> 96 #include <sys/domainset.h> 97 #include <sys/fail.h> 98 #include <sys/limits.h> 99 #include <sys/lock.h> 100 #include <sys/kernel.h> 101 #include <sys/malloc.h> 102 #include <sys/mutex.h> 103 #include <sys/random.h> 104 #include <sys/sdt.h> 105 #include <sys/smp.h> 106 #include <sys/sysctl.h> 107 #include <sys/systm.h> 108 109 #include <machine/cpu.h> 110 111 #include <vm/vm.h> 112 #include <vm/vm_param.h> 113 #include <vm/vm_page.h> 114 #include <vm/vm_phys.h> 115 #include <vm/vm_pagequeue.h> 116 117 #include <dev/random/randomdev.h> 118 #include <dev/random/random_harvestq.h> 119 #include <dev/random/uint128.h> 120 121 #include <dev/random/fenestrasX/fx_brng.h> 122 #include <dev/random/fenestrasX/fx_hash.h> 123 #include <dev/random/fenestrasX/fx_pool.h> 124 #include <dev/random/fenestrasX/fx_priv.h> 125 #include <dev/random/fenestrasX/fx_pub.h> 126 #include <dev/random/fenestrasX/fx_rng.h> 127 128 struct fxrng_buffered_rng fxrng_root; 129 uint64_t __read_mostly fxrng_root_generation; 130 DPCPU_DEFINE_STATIC(struct fxrng_buffered_rng *, fxrng_brng); 131 132 /* 133 * Top-level read API from randomdev. Responsible for NOWAIT-allocating 134 * per-cpu NUMA-local BRNGs, if needed and satisfiable; subroutines handle 135 * reseeding if the local BRNG is stale and rekeying when necessary. In 136 * low-memory conditions when a local BRNG cannot be allocated, the request is 137 * simply forwarded to the root BRNG. 138 * 139 * It is a precondition is that the root BRNG initial seeding has completed and 140 * the root generation number >0. 141 */ 142 static void 143 _fxrng_alg_read(uint8_t *output, size_t nbytes, uint64_t *seed_version_out) 144 { 145 struct fxrng_buffered_rng **pcpu_brng_p, *rng, *tmp; 146 struct pcpu *pcpu; 147 148 pcpu = get_pcpu(); 149 150 /* 151 * The following statement directly accesses an implementation detail 152 * of DPCPU, but the macros cater only to pinned threads; we want to 153 * operate on our initial CPU, without pinning, *even if* we migrate. 154 */ 155 pcpu_brng_p = _DPCPU_PTR(pcpu->pc_dynamic, fxrng_brng); 156 157 rng = (void *)atomic_load_acq_ptr((uintptr_t *)pcpu_brng_p); 158 159 /* 160 * Usually the pcpu BRNG has already been allocated, but we do it 161 * on-demand and need to check first. BRNGs are never deallocated and 162 * are valid as soon as the pointer is initialized. 163 */ 164 if (__predict_false(rng == NULL)) { 165 uint8_t newkey[FX_CHACHA20_KEYSIZE]; 166 struct domainset *ds; 167 int domain; 168 169 domain = pcpu->pc_domain; 170 171 /* 172 * Allocate pcpu BRNGs off-domain on weird NUMA machines like 173 * AMD Threadripper 2990WX, which has 2 NUMA nodes without 174 * local memory controllers. The PREF policy is automatically 175 * converted to something appropriate when domains are empty. 176 * (FIXED is not.) 177 * 178 * Otherwise, allocate strictly CPU-local memory. The 179 * rationale is this: if there is a memory shortage such that 180 * PREF policy would fallback to RR, we have no business 181 * wasting memory on a faster BRNG. So, use a FIXED domainset 182 * policy. If we cannot allocate, that's fine! We fall back 183 * to invoking the root BRNG. 184 */ 185 if (VM_DOMAIN_EMPTY(domain)) 186 ds = DOMAINSET_PREF(domain); 187 else 188 ds = DOMAINSET_FIXED(domain); 189 190 rng = malloc_domainset(sizeof(*rng), M_ENTROPY, ds, 191 M_NOWAIT | M_ZERO); 192 if (rng == NULL) { 193 /* Relatively easy case: fall back to root BRNG. */ 194 rng = &fxrng_root; 195 goto have_valid_rng; 196 } 197 198 fxrng_brng_init(rng); 199 200 /* 201 * The root BRNG is always up and available. Requests are 202 * always satisfiable. This is a design invariant. 203 */ 204 ASSERT_DEBUG(atomic_load_acq_64(&fxrng_root_generation) != 0, 205 "%s: attempting to seed child BRNG when root hasn't " 206 "been initialized yet.", __func__); 207 208 FXRNG_BRNG_LOCK(&fxrng_root); 209 #ifdef WITNESS 210 /* Establish lock order root->pcpu for WITNESS. */ 211 FXRNG_BRNG_LOCK(rng); 212 FXRNG_BRNG_UNLOCK(rng); 213 #endif 214 fxrng_brng_produce_seed_data_internal(&fxrng_root, newkey, 215 sizeof(newkey), &rng->brng_generation); 216 FXRNG_BRNG_ASSERT_NOT(&fxrng_root); 217 218 fxrng_rng_setkey(&rng->brng_rng, newkey, sizeof(newkey)); 219 explicit_bzero(newkey, sizeof(newkey)); 220 221 /* 222 * We have a valid RNG. Try to install it, or grab the other 223 * one if we lost the race. 224 */ 225 tmp = NULL; 226 while (tmp == NULL) 227 if (atomic_fcmpset_ptr((uintptr_t *)pcpu_brng_p, 228 (uintptr_t *)&tmp, (uintptr_t)rng)) 229 goto have_valid_rng; 230 231 /* 232 * We lost the race. There's nothing sensitive about 233 * our BRNG's PRF state, because it will never be used 234 * for anything and the key doesn't expose any 235 * information about the parent (root) generator's 236 * state -- it has already rekeyed. The generation 237 * number is public, and a zero counter isn't sensitive. 238 */ 239 free(rng, M_ENTROPY); 240 /* 241 * Use the winner's PCPU BRNG. 242 */ 243 rng = tmp; 244 } 245 246 have_valid_rng: 247 /* At this point we have a valid, initialized and seeded rng pointer. */ 248 FXRNG_BRNG_LOCK(rng); 249 if (seed_version_out != NULL) 250 *seed_version_out = rng->brng_generation; 251 fxrng_brng_read(rng, output, nbytes); 252 FXRNG_BRNG_ASSERT_NOT(rng); 253 } 254 255 static void 256 fxrng_alg_read(uint8_t *output, size_t nbytes) 257 { 258 _fxrng_alg_read(output, nbytes, NULL); 259 } 260 261 /* 262 * External API for arc4random(9) to fetch new key material and associated seed 263 * version in chacha20_randomstir(). 264 */ 265 void 266 read_random_key(void *output, size_t nbytes, uint64_t *seed_version_out) 267 { 268 /* Ensure _fxrng_alg_read invariant. */ 269 if (__predict_false(atomic_load_acq_64(&fxrng_root_generation) == 0)) 270 (void)fxrng_alg_seeded(); 271 272 _fxrng_alg_read(output, nbytes, seed_version_out); 273 } 274 275 static void 276 fxrng_init_alg(void *dummy __unused) 277 { 278 DPCPU_ZERO(fxrng_brng); 279 fxrng_brng_init(&fxrng_root); 280 fxrng_pools_init(); 281 } 282 SYSINIT(random_alg, SI_SUB_RANDOM, SI_ORDER_SECOND, fxrng_init_alg, NULL); 283 284 /* 285 * Public visibility struct referenced directly by other parts of randomdev. 286 */ 287 const struct random_algorithm random_alg_context = { 288 .ra_ident = "fenestrasX", 289 .ra_pre_read = (void (*)(void))nullop, 290 .ra_read = fxrng_alg_read, 291 .ra_seeded = fxrng_alg_seeded, 292 .ra_event_processor = fxrng_event_processor, 293 .ra_poolcount = FXRNG_NPOOLS, 294 }; 295