1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 */ 5 6 #include <linux/array_size.h> 7 #include <linux/minmax.h> 8 #include <vdso/datapage.h> 9 #include <vdso/getrandom.h> 10 #include <vdso/limits.h> 11 #include <vdso/unaligned.h> 12 #include <asm/barrier.h> 13 #include <asm/vdso/getrandom.h> 14 #include <uapi/linux/errno.h> 15 #include <uapi/linux/mman.h> 16 #include <uapi/linux/random.h> 17 18 /* Bring in default accessors */ 19 #include <vdso/vsyscall.h> 20 21 #undef PAGE_SIZE 22 #undef PAGE_MASK 23 #define PAGE_SIZE (1UL << CONFIG_PAGE_SHIFT) 24 #define PAGE_MASK (~(PAGE_SIZE - 1)) 25 26 #define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do { \ 27 while (len >= sizeof(type)) { \ 28 __put_unaligned_t(type, __get_unaligned_t(type, src), dst); \ 29 __put_unaligned_t(type, 0, src); \ 30 dst += sizeof(type); \ 31 src += sizeof(type); \ 32 len -= sizeof(type); \ 33 } \ 34 } while (0) 35 36 static void memcpy_and_zero_src(void *dst, void *src, size_t len) 37 { 38 if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) { 39 if (IS_ENABLED(CONFIG_64BIT)) 40 MEMCPY_AND_ZERO_SRC(u64, dst, src, len); 41 MEMCPY_AND_ZERO_SRC(u32, dst, src, len); 42 MEMCPY_AND_ZERO_SRC(u16, dst, src, len); 43 } 44 MEMCPY_AND_ZERO_SRC(u8, dst, src, len); 45 } 46 47 /** 48 * __cvdso_getrandom_data - Generic vDSO implementation of getrandom() syscall. 49 * @rng_info: Describes state of kernel RNG, memory shared with kernel. 50 * @buffer: Destination buffer to fill with random bytes. 51 * @len: Size of @buffer in bytes. 52 * @flags: Zero or more GRND_* flags. 53 * @opaque_state: Pointer to an opaque state area. 54 * @opaque_len: Length of opaque state area. 55 * 56 * This implements a "fast key erasure" RNG using ChaCha20, in the same way that the kernel's 57 * getrandom() syscall does. It periodically reseeds its key from the kernel's RNG, at the same 58 * schedule that the kernel's RNG is reseeded. If the kernel's RNG is not ready, then this always 59 * calls into the syscall. 60 * 61 * If @buffer, @len, and @flags are 0, and @opaque_len is ~0UL, then @opaque_state is populated 62 * with a struct vgetrandom_opaque_params and the function returns 0; if it does not return 0, 63 * this function should not be used. 64 * 65 * @opaque_state *must* be allocated by calling mmap(2) using the mmap_prot and mmap_flags fields 66 * from the struct vgetrandom_opaque_params, and states must not straddle pages. Unless external 67 * locking is used, one state must be allocated per thread, as it is not safe to call this function 68 * concurrently with the same @opaque_state. However, it is safe to call this using the same 69 * @opaque_state that is shared between main code and signal handling code, within the same thread. 70 * 71 * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. 72 */ 73 static __always_inline ssize_t 74 __cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_t len, 75 unsigned int flags, void *opaque_state, size_t opaque_len) 76 { 77 ssize_t ret = min_t(size_t, INT_MAX & PAGE_MASK /* = MAX_RW_COUNT */, len); 78 struct vgetrandom_state *state = opaque_state; 79 size_t batch_len, nblocks, orig_len = len; 80 bool in_use, have_retried = false; 81 void *orig_buffer = buffer; 82 u64 current_generation; 83 u32 counter[2] = { 0 }; 84 85 if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) { 86 struct vgetrandom_opaque_params *params = opaque_state; 87 params->size_of_opaque_state = sizeof(*state); 88 params->mmap_prot = PROT_READ | PROT_WRITE; 89 params->mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS; 90 for (size_t i = 0; i < ARRAY_SIZE(params->reserved); ++i) 91 params->reserved[i] = 0; 92 return 0; 93 } 94 95 /* The state must not straddle a page, since pages can be zeroed at any time. */ 96 if (unlikely(((unsigned long)opaque_state & ~PAGE_MASK) + sizeof(*state) > PAGE_SIZE)) 97 return -EFAULT; 98 99 /* Handle unexpected flags by falling back to the kernel. */ 100 if (unlikely(flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE))) 101 goto fallback_syscall; 102 103 /* If the caller passes the wrong size, which might happen due to CRIU, fallback. */ 104 if (unlikely(opaque_len != sizeof(*state))) 105 goto fallback_syscall; 106 107 /* 108 * If the kernel's RNG is not yet ready, then it's not possible to provide random bytes from 109 * userspace, because A) the various @flags require this to block, or not, depending on 110 * various factors unavailable to userspace, and B) the kernel's behavior before the RNG is 111 * ready is to reseed from the entropy pool at every invocation. 112 */ 113 if (unlikely(!READ_ONCE(rng_info->is_ready))) 114 goto fallback_syscall; 115 116 /* 117 * This condition is checked after @rng_info->is_ready, because before the kernel's RNG is 118 * initialized, the @flags parameter may require this to block or return an error, even when 119 * len is zero. 120 */ 121 if (unlikely(!len)) 122 return 0; 123 124 /* 125 * @state->in_use is basic reentrancy protection against this running in a signal handler 126 * with the same @opaque_state, but obviously not atomic wrt multiple CPUs or more than one 127 * level of reentrancy. If a signal interrupts this after reading @state->in_use, but before 128 * writing @state->in_use, there is still no race, because the signal handler will run to 129 * its completion before returning execution. 130 */ 131 in_use = READ_ONCE(state->in_use); 132 if (unlikely(in_use)) 133 /* The syscall simply fills the buffer and does not touch @state, so fallback. */ 134 goto fallback_syscall; 135 WRITE_ONCE(state->in_use, true); 136 137 retry_generation: 138 /* 139 * @rng_info->generation must always be read here, as it serializes @state->key with the 140 * kernel's RNG reseeding schedule. 141 */ 142 current_generation = READ_ONCE(rng_info->generation); 143 144 /* 145 * If @state->generation doesn't match the kernel RNG's generation, then it means the 146 * kernel's RNG has reseeded, and so @state->key is reseeded as well. 147 */ 148 if (unlikely(state->generation != current_generation)) { 149 /* 150 * Write the generation before filling the key, in case of fork. If there is a fork 151 * just after this line, the parent and child will get different random bytes from 152 * the syscall, which is good. However, were this line to occur after the getrandom 153 * syscall, then both child and parent could have the same bytes and the same 154 * generation counter, so the fork would not be detected. Therefore, write 155 * @state->generation before the call to the getrandom syscall. 156 */ 157 WRITE_ONCE(state->generation, current_generation); 158 159 /* 160 * Prevent the syscall from being reordered wrt current_generation. Pairs with the 161 * smp_store_release(&vdso_k_rng_data->generation) in random.c. 162 */ 163 smp_rmb(); 164 165 /* Reseed @state->key using fresh bytes from the kernel. */ 166 if (getrandom_syscall(state->key, sizeof(state->key), 0) != sizeof(state->key)) { 167 /* 168 * If the syscall failed to refresh the key, then @state->key is now 169 * invalid, so invalidate the generation so that it is not used again, and 170 * fallback to using the syscall entirely. 171 */ 172 WRITE_ONCE(state->generation, 0); 173 174 /* 175 * Set @state->in_use to false only after the last write to @state in the 176 * line above. 177 */ 178 WRITE_ONCE(state->in_use, false); 179 180 goto fallback_syscall; 181 } 182 183 /* 184 * Set @state->pos to beyond the end of the batch, so that the batch is refilled 185 * using the new key. 186 */ 187 state->pos = sizeof(state->batch); 188 } 189 190 /* Set len to the total amount of bytes that this function is allowed to read, ret. */ 191 len = ret; 192 more_batch: 193 /* 194 * First use bytes out of @state->batch, which may have been filled by the last call to this 195 * function. 196 */ 197 batch_len = min_t(size_t, sizeof(state->batch) - state->pos, len); 198 if (batch_len) { 199 /* Zeroing at the same time as memcpying helps preserve forward secrecy. */ 200 memcpy_and_zero_src(buffer, state->batch + state->pos, batch_len); 201 state->pos += batch_len; 202 buffer += batch_len; 203 len -= batch_len; 204 } 205 206 if (!len) { 207 /* Prevent the loop from being reordered wrt ->generation. */ 208 barrier(); 209 210 /* 211 * Since @rng_info->generation will never be 0, re-read @state->generation, rather 212 * than using the local current_generation variable, to learn whether a fork 213 * occurred or if @state was zeroed due to memory pressure. Primarily, though, this 214 * indicates whether the kernel's RNG has reseeded, in which case generate a new key 215 * and start over. 216 */ 217 if (unlikely(READ_ONCE(state->generation) != READ_ONCE(rng_info->generation))) { 218 /* 219 * Prevent this from looping forever in case of low memory or racing with a 220 * user force-reseeding the kernel's RNG using the ioctl. 221 */ 222 if (have_retried) { 223 WRITE_ONCE(state->in_use, false); 224 goto fallback_syscall; 225 } 226 227 have_retried = true; 228 buffer = orig_buffer; 229 goto retry_generation; 230 } 231 232 /* 233 * Set @state->in_use to false only when there will be no more reads or writes of 234 * @state. 235 */ 236 WRITE_ONCE(state->in_use, false); 237 return ret; 238 } 239 240 /* Generate blocks of RNG output directly into @buffer while there's enough room left. */ 241 nblocks = len / CHACHA_BLOCK_SIZE; 242 if (nblocks) { 243 __arch_chacha20_blocks_nostack(buffer, state->key, counter, nblocks); 244 buffer += nblocks * CHACHA_BLOCK_SIZE; 245 len -= nblocks * CHACHA_BLOCK_SIZE; 246 } 247 248 BUILD_BUG_ON(sizeof(state->batch_key) % CHACHA_BLOCK_SIZE != 0); 249 250 /* Refill the batch and overwrite the key, in order to preserve forward secrecy. */ 251 __arch_chacha20_blocks_nostack(state->batch_key, state->key, counter, 252 sizeof(state->batch_key) / CHACHA_BLOCK_SIZE); 253 254 /* Since the batch was just refilled, set the position back to 0 to indicate a full batch. */ 255 state->pos = 0; 256 goto more_batch; 257 258 fallback_syscall: 259 return getrandom_syscall(orig_buffer, orig_len, flags); 260 } 261 262 static __always_inline ssize_t 263 __cvdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) 264 { 265 return __cvdso_getrandom_data(__arch_get_vdso_u_rng_data(), buffer, len, flags, 266 opaque_state, opaque_len); 267 } 268