1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024 Rivos, Inc. 4 * Deepak Gupta <debug@rivosinc.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/bitops.h> 9 #include <linux/types.h> 10 #include <linux/mm.h> 11 #include <linux/mman.h> 12 #include <linux/uaccess.h> 13 #include <linux/sizes.h> 14 #include <linux/user.h> 15 #include <linux/syscalls.h> 16 #include <linux/prctl.h> 17 #include <asm/csr.h> 18 #include <asm/usercfi.h> 19 20 #define SHSTK_ENTRY_SIZE sizeof(void *) 21 22 bool is_shstk_enabled(struct task_struct *task) 23 { 24 return task->thread_info.user_cfi_state.ubcfi_en; 25 } 26 27 bool is_shstk_allocated(struct task_struct *task) 28 { 29 return task->thread_info.user_cfi_state.shdw_stk_base; 30 } 31 32 bool is_shstk_locked(struct task_struct *task) 33 { 34 return task->thread_info.user_cfi_state.ubcfi_locked; 35 } 36 37 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) 38 { 39 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; 40 task->thread_info.user_cfi_state.shdw_stk_size = size; 41 } 42 43 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) 44 { 45 if (size) 46 *size = task->thread_info.user_cfi_state.shdw_stk_size; 47 return task->thread_info.user_cfi_state.shdw_stk_base; 48 } 49 50 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) 51 { 52 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; 53 } 54 55 void set_shstk_status(struct task_struct *task, bool enable) 56 { 57 if (!cpu_supports_shadow_stack()) 58 return; 59 60 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; 61 62 if (enable) 63 task->thread.envcfg |= ENVCFG_SSE; 64 else 65 task->thread.envcfg &= ~ENVCFG_SSE; 66 67 csr_write(CSR_ENVCFG, task->thread.envcfg); 68 } 69 70 void set_shstk_lock(struct task_struct *task) 71 { 72 task->thread_info.user_cfi_state.ubcfi_locked = 1; 73 } 74 75 bool is_indir_lp_enabled(struct task_struct *task) 76 { 77 return task->thread_info.user_cfi_state.ufcfi_en; 78 } 79 80 bool is_indir_lp_locked(struct task_struct *task) 81 { 82 return task->thread_info.user_cfi_state.ufcfi_locked; 83 } 84 85 void set_indir_lp_status(struct task_struct *task, bool enable) 86 { 87 if (!cpu_supports_indirect_br_lp_instr()) 88 return; 89 90 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; 91 92 if (enable) 93 task->thread.envcfg |= ENVCFG_LPE; 94 else 95 task->thread.envcfg &= ~ENVCFG_LPE; 96 97 csr_write(CSR_ENVCFG, task->thread.envcfg); 98 } 99 100 void set_indir_lp_lock(struct task_struct *task) 101 { 102 task->thread_info.user_cfi_state.ufcfi_locked = 1; 103 } 104 /* 105 * If size is 0, then to be compatible with regular stack we want it to be as big as 106 * regular stack. Else PAGE_ALIGN it and return back 107 */ 108 static unsigned long calc_shstk_size(unsigned long size) 109 { 110 if (size) 111 return PAGE_ALIGN(size); 112 113 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); 114 } 115 116 /* 117 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen 118 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to 119 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow 120 * stack. 121 */ 122 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) 123 { 124 /* 125 * Never expect -1 on shadow stack. Expect return addresses and zero 126 */ 127 unsigned long swap = -1; 128 129 __enable_user_access(); 130 asm goto(".option push\n" 131 ".option arch, +zicfiss\n" 132 "1: ssamoswap.d %[swap], %[val], %[addr]\n" 133 _ASM_EXTABLE(1b, %l[fault]) 134 ".option pop\n" 135 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) 136 : [val] "r" (val) 137 : "memory" 138 : fault 139 ); 140 __disable_user_access(); 141 return swap; 142 fault: 143 __disable_user_access(); 144 return -1; 145 } 146 147 /* 148 * Create a restore token on the shadow stack. A token is always XLEN wide 149 * and aligned to XLEN. 150 */ 151 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 152 { 153 unsigned long addr; 154 155 /* Token must be aligned */ 156 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) 157 return -EINVAL; 158 159 /* On RISC-V we're constructing token to be function of address itself */ 160 addr = ssp - SHSTK_ENTRY_SIZE; 161 162 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) 163 return -EFAULT; 164 165 if (token_addr) 166 *token_addr = addr; 167 168 return 0; 169 } 170 171 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, 172 unsigned long token_offset, bool set_tok) 173 { 174 int flags = MAP_ANONYMOUS | MAP_PRIVATE; 175 struct mm_struct *mm = current->mm; 176 unsigned long populate; 177 178 if (addr) 179 flags |= MAP_FIXED_NOREPLACE; 180 181 mmap_write_lock(mm); 182 addr = do_mmap(NULL, addr, size, PROT_READ, flags, 183 VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL); 184 mmap_write_unlock(mm); 185 186 if (!set_tok || IS_ERR_VALUE(addr)) 187 goto out; 188 189 if (create_rstor_token(addr + token_offset, NULL)) { 190 vm_munmap(addr, size); 191 return -EINVAL; 192 } 193 194 out: 195 return addr; 196 } 197 198 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 199 { 200 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 201 unsigned long aligned_size = 0; 202 203 if (!cpu_supports_shadow_stack()) 204 return -EOPNOTSUPP; 205 206 /* Anything other than set token should result in invalid param */ 207 if (flags & ~SHADOW_STACK_SET_TOKEN) 208 return -EINVAL; 209 210 /* 211 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available 212 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction 213 * itself. This provides static property on register programming and writes to CSR can't 214 * be unintentional from programmer's perspective. As long as programmer has guarded areas 215 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since 216 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent 217 * to allocation. Although in order to provide portablity with other architectures (because 218 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token 219 * flag in flags and if provided in flags, will setup a token at the base. 220 */ 221 222 /* If there isn't space for a token */ 223 if (set_tok && size < SHSTK_ENTRY_SIZE) 224 return -ENOSPC; 225 226 if (addr && (addr & (PAGE_SIZE - 1))) 227 return -EINVAL; 228 229 aligned_size = PAGE_ALIGN(size); 230 if (aligned_size < size) 231 return -EOVERFLOW; 232 233 return allocate_shadow_stack(addr, aligned_size, size, set_tok); 234 } 235 236 /* 237 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for 238 * cases where CLONE_VM is specified and thus a different stack is specified by user. We 239 * thus need a separate shadow stack too. How a separate shadow stack is specified by 240 * user is still being debated. Once that's settled, remove this part of the comment. 241 * This function simply returns 0 if shadow stacks are not supported or if separate shadow 242 * stack allocation is not needed (like in case of !CLONE_VM) 243 */ 244 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, 245 const struct kernel_clone_args *args) 246 { 247 unsigned long addr, size; 248 249 /* If shadow stack is not supported, return 0 */ 250 if (!cpu_supports_shadow_stack()) 251 return 0; 252 253 /* 254 * If shadow stack is not enabled on the new thread, skip any 255 * switch to a new shadow stack. 256 */ 257 if (!is_shstk_enabled(tsk)) 258 return 0; 259 260 /* 261 * For CLONE_VFORK the child will share the parents shadow stack. 262 * Set base = 0 and size = 0, this is special means to track this state 263 * so the freeing logic run for child knows to leave it alone. 264 */ 265 if (args->flags & CLONE_VFORK) { 266 set_shstk_base(tsk, 0, 0); 267 return 0; 268 } 269 270 /* 271 * For !CLONE_VM the child will use a copy of the parents shadow 272 * stack. 273 */ 274 if (!(args->flags & CLONE_VM)) 275 return 0; 276 277 /* 278 * reaching here means, CLONE_VM was specified and thus a separate shadow 279 * stack is needed for new cloned thread. Note: below allocation is happening 280 * using current mm. 281 */ 282 size = calc_shstk_size(args->stack_size); 283 addr = allocate_shadow_stack(0, size, 0, false); 284 if (IS_ERR_VALUE(addr)) 285 return addr; 286 287 set_shstk_base(tsk, addr, size); 288 289 return addr + size; 290 } 291 292 void shstk_release(struct task_struct *tsk) 293 { 294 unsigned long base = 0, size = 0; 295 /* If shadow stack is not supported or not enabled, nothing to release */ 296 if (!cpu_supports_shadow_stack() || !is_shstk_enabled(tsk)) 297 return; 298 299 /* 300 * When fork() with CLONE_VM fails, the child (tsk) already has a 301 * shadow stack allocated, and exit_thread() calls this function to 302 * free it. In this case the parent (current) and the child share 303 * the same mm struct. Move forward only when they're same. 304 */ 305 if (!tsk->mm || tsk->mm != current->mm) 306 return; 307 308 /* 309 * We know shadow stack is enabled but if base is NULL, then 310 * this task is not managing its own shadow stack (CLONE_VFORK). So 311 * skip freeing it. 312 */ 313 base = get_shstk_base(tsk, &size); 314 if (!base) 315 return; 316 317 vm_munmap(base, size); 318 set_shstk_base(tsk, 0, 0); 319 } 320 321 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 322 { 323 unsigned long bcfi_status = 0; 324 325 if (!cpu_supports_shadow_stack()) 326 return -EINVAL; 327 328 /* this means shadow stack is enabled on the task */ 329 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); 330 331 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; 332 } 333 334 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 335 { 336 unsigned long size = 0, addr = 0; 337 bool enable_shstk = false; 338 339 if (!cpu_supports_shadow_stack()) 340 return -EINVAL; 341 342 /* Reject unknown flags */ 343 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 344 return -EINVAL; 345 346 /* bcfi status is locked and further can't be modified by user */ 347 if (is_shstk_locked(t)) 348 return -EINVAL; 349 350 enable_shstk = status & PR_SHADOW_STACK_ENABLE; 351 /* Request is to enable shadow stack and shadow stack is not enabled already */ 352 if (enable_shstk && !is_shstk_enabled(t)) { 353 /* shadow stack was allocated and enable request again 354 * no need to support such usecase and return EINVAL. 355 */ 356 if (is_shstk_allocated(t)) 357 return -EINVAL; 358 359 size = calc_shstk_size(0); 360 addr = allocate_shadow_stack(0, size, 0, false); 361 if (IS_ERR_VALUE(addr)) 362 return -ENOMEM; 363 set_shstk_base(t, addr, size); 364 set_active_shstk(t, addr + size); 365 } 366 367 /* 368 * If a request to disable shadow stack happens, let's go ahead and release it 369 * Although, if CLONE_VFORKed child did this, then in that case we will end up 370 * not releasing the shadow stack (because it might be needed in parent). Although 371 * we will disable it for VFORKed child. And if VFORKed child tries to enable again 372 * then in that case, it'll get entirely new shadow stack because following condition 373 * are true 374 * - shadow stack was not enabled for vforked child 375 * - shadow stack base was anyways pointing to 0 376 * This shouldn't be a big issue because we want parent to have availability of shadow 377 * stack whenever VFORKed child releases resources via exit or exec but at the same 378 * time we want VFORKed child to break away and establish new shadow stack if it desires 379 * 380 */ 381 if (!enable_shstk) 382 shstk_release(t); 383 384 set_shstk_status(t, enable_shstk); 385 return 0; 386 } 387 388 int arch_lock_shadow_stack_status(struct task_struct *task, 389 unsigned long arg) 390 { 391 /* If shtstk not supported or not enabled on task, nothing to lock here */ 392 if (!cpu_supports_shadow_stack() || 393 !is_shstk_enabled(task) || arg != 0) 394 return -EINVAL; 395 396 set_shstk_lock(task); 397 398 return 0; 399 } 400 401 int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) 402 { 403 unsigned long fcfi_status = 0; 404 405 if (!cpu_supports_indirect_br_lp_instr()) 406 return -EINVAL; 407 408 /* indirect branch tracking is enabled on the task or not */ 409 fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); 410 411 return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; 412 } 413 414 int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) 415 { 416 bool enable_indir_lp = false; 417 418 if (!cpu_supports_indirect_br_lp_instr()) 419 return -EINVAL; 420 421 /* indirect branch tracking is locked and further can't be modified by user */ 422 if (is_indir_lp_locked(t)) 423 return -EINVAL; 424 425 /* Reject unknown flags */ 426 if (status & ~PR_INDIR_BR_LP_ENABLE) 427 return -EINVAL; 428 429 enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE); 430 set_indir_lp_status(t, enable_indir_lp); 431 432 return 0; 433 } 434 435 int arch_lock_indir_br_lp_status(struct task_struct *task, 436 unsigned long arg) 437 { 438 /* 439 * If indirect branch tracking is not supported or not enabled on task, 440 * nothing to lock here 441 */ 442 if (!cpu_supports_indirect_br_lp_instr() || 443 !is_indir_lp_enabled(task) || arg != 0) 444 return -EINVAL; 445 446 set_indir_lp_lock(task); 447 448 return 0; 449 } 450