1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024 Rivos, Inc. 4 * Deepak Gupta <debug@rivosinc.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/bitops.h> 9 #include <linux/types.h> 10 #include <linux/mm.h> 11 #include <linux/mman.h> 12 #include <linux/uaccess.h> 13 #include <linux/sizes.h> 14 #include <linux/user.h> 15 #include <linux/syscalls.h> 16 #include <linux/prctl.h> 17 #include <asm/csr.h> 18 #include <asm/usercfi.h> 19 20 unsigned long riscv_nousercfi __read_mostly; 21 22 #define SHSTK_ENTRY_SIZE sizeof(void *) 23 24 bool is_shstk_enabled(struct task_struct *task) 25 { 26 return task->thread_info.user_cfi_state.ubcfi_en; 27 } 28 29 bool is_shstk_allocated(struct task_struct *task) 30 { 31 return task->thread_info.user_cfi_state.shdw_stk_base; 32 } 33 34 bool is_shstk_locked(struct task_struct *task) 35 { 36 return task->thread_info.user_cfi_state.ubcfi_locked; 37 } 38 39 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) 40 { 41 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; 42 task->thread_info.user_cfi_state.shdw_stk_size = size; 43 } 44 45 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) 46 { 47 if (size) 48 *size = task->thread_info.user_cfi_state.shdw_stk_size; 49 return task->thread_info.user_cfi_state.shdw_stk_base; 50 } 51 52 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) 53 { 54 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; 55 } 56 57 unsigned long get_active_shstk(struct task_struct *task) 58 { 59 return task->thread_info.user_cfi_state.user_shdw_stk; 60 } 61 62 void set_shstk_status(struct task_struct *task, bool enable) 63 { 64 if (!is_user_shstk_enabled()) 65 return; 66 67 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; 68 69 if (enable) 70 task->thread.envcfg |= ENVCFG_SSE; 71 else 72 task->thread.envcfg &= ~ENVCFG_SSE; 73 74 csr_write(CSR_ENVCFG, task->thread.envcfg); 75 } 76 77 void set_shstk_lock(struct task_struct *task, bool lock) 78 { 79 task->thread_info.user_cfi_state.ubcfi_locked = lock; 80 } 81 82 bool is_indir_lp_enabled(struct task_struct *task) 83 { 84 return task->thread_info.user_cfi_state.ufcfi_en; 85 } 86 87 bool is_indir_lp_locked(struct task_struct *task) 88 { 89 return task->thread_info.user_cfi_state.ufcfi_locked; 90 } 91 92 void set_indir_lp_status(struct task_struct *task, bool enable) 93 { 94 if (!is_user_lpad_enabled()) 95 return; 96 97 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; 98 99 if (enable) 100 task->thread.envcfg |= ENVCFG_LPE; 101 else 102 task->thread.envcfg &= ~ENVCFG_LPE; 103 104 csr_write(CSR_ENVCFG, task->thread.envcfg); 105 } 106 107 void set_indir_lp_lock(struct task_struct *task, bool lock) 108 { 109 task->thread_info.user_cfi_state.ufcfi_locked = lock; 110 } 111 /* 112 * The shadow stack only stores the return address and not any variables 113 * this should be more than sufficient for most applications. 114 * Else PAGE_ALIGN it and return back 115 */ 116 static unsigned long calc_shstk_size(unsigned long size) 117 { 118 if (size) 119 return PAGE_ALIGN(size); 120 121 return PAGE_ALIGN(min(rlimit(RLIMIT_STACK) / 2, SZ_2G)); 122 } 123 124 /* 125 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen 126 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to 127 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow 128 * stack. 129 */ 130 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) 131 { 132 /* 133 * Never expect -1 on shadow stack. Expect return addresses and zero 134 */ 135 unsigned long swap = -1; 136 137 __enable_user_access(); 138 asm goto(".option push\n" 139 ".option arch, +zicfiss\n" 140 "1: ssamoswap.d %[swap], %[val], %[addr]\n" 141 _ASM_EXTABLE(1b, %l[fault]) 142 ".option pop\n" 143 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) 144 : [val] "r" (val) 145 : "memory" 146 : fault 147 ); 148 __disable_user_access(); 149 return swap; 150 fault: 151 __disable_user_access(); 152 return -1; 153 } 154 155 /* 156 * Create a restore token on the shadow stack. A token is always XLEN wide 157 * and aligned to XLEN. 158 */ 159 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 160 { 161 unsigned long addr; 162 163 /* Token must be aligned */ 164 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) 165 return -EINVAL; 166 167 /* On RISC-V we're constructing token to be function of address itself */ 168 addr = ssp - SHSTK_ENTRY_SIZE; 169 170 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) 171 return -EFAULT; 172 173 if (token_addr) 174 *token_addr = addr; 175 176 return 0; 177 } 178 179 /* 180 * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location. 181 * Returns -EFAULT if unsuccessful. 182 */ 183 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr) 184 { 185 unsigned long ss_ptr = 0; 186 unsigned long token_loc = 0; 187 int ret = 0; 188 189 if (!saved_shstk_ptr) 190 return -EINVAL; 191 192 ss_ptr = get_active_shstk(tsk); 193 ret = create_rstor_token(ss_ptr, &token_loc); 194 195 if (!ret) { 196 *saved_shstk_ptr = token_loc; 197 set_active_shstk(tsk, token_loc); 198 } 199 200 return ret; 201 } 202 203 /* 204 * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'. 205 * Returns -EFAULT if unsuccessful. 206 */ 207 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr) 208 { 209 unsigned long token = 0; 210 211 token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0); 212 213 if (token == -1) 214 return -EFAULT; 215 216 /* invalid token, return EINVAL */ 217 if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) { 218 pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n", 219 tsk->comm, task_pid_nr(tsk), __func__, 220 (void *)(task_pt_regs(tsk)->epc), 221 (void *)(task_pt_regs(tsk)->sp), 222 (void *)token, (void *)shstk_ptr); 223 return -EINVAL; 224 } 225 226 /* all checks passed, set active shstk and return success */ 227 set_active_shstk(tsk, token); 228 return 0; 229 } 230 231 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, 232 unsigned long token_offset, bool set_tok) 233 { 234 addr = vm_mmap_shadow_stack(addr, size, 0); 235 236 if (!set_tok || IS_ERR_VALUE(addr)) 237 goto out; 238 239 if (create_rstor_token(addr + token_offset, NULL)) { 240 vm_munmap(addr, size); 241 return -EINVAL; 242 } 243 244 out: 245 return addr; 246 } 247 248 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 249 { 250 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 251 unsigned long aligned_size = 0; 252 253 if (!is_user_shstk_enabled()) 254 return -EOPNOTSUPP; 255 256 /* Anything other than set token should result in invalid param */ 257 if (flags & ~SHADOW_STACK_SET_TOKEN) 258 return -EINVAL; 259 260 /* 261 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available 262 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction 263 * itself. This provides static property on register programming and writes to CSR can't 264 * be unintentional from programmer's perspective. As long as programmer has guarded areas 265 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since 266 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent 267 * to allocation. Although in order to provide portablity with other architectures (because 268 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token 269 * flag in flags and if provided in flags, will setup a token at the base. 270 */ 271 272 /* If there isn't space for a token */ 273 if (set_tok && size < SHSTK_ENTRY_SIZE) 274 return -ENOSPC; 275 276 if (addr && (addr & (PAGE_SIZE - 1))) 277 return -EINVAL; 278 279 aligned_size = PAGE_ALIGN(size); 280 if (aligned_size < size) 281 return -EOVERFLOW; 282 283 return allocate_shadow_stack(addr, aligned_size, size, set_tok); 284 } 285 286 /* 287 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for 288 * cases where CLONE_VM is specified and thus a different stack is specified by user. We 289 * thus need a separate shadow stack too. How a separate shadow stack is specified by 290 * user is still being debated. Once that's settled, remove this part of the comment. 291 * This function simply returns 0 if shadow stacks are not supported or if separate shadow 292 * stack allocation is not needed (like in case of !CLONE_VM) 293 */ 294 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, 295 const struct kernel_clone_args *args) 296 { 297 unsigned long addr, size; 298 299 /* If shadow stack is not supported, return 0 */ 300 if (!is_user_shstk_enabled()) 301 return 0; 302 303 /* 304 * If shadow stack is not enabled on the new thread, skip any 305 * switch to a new shadow stack. 306 */ 307 if (!is_shstk_enabled(tsk)) 308 return 0; 309 310 /* 311 * For CLONE_VFORK the child will share the parents shadow stack. 312 * Set base = 0 and size = 0, this is special means to track this state 313 * so the freeing logic run for child knows to leave it alone. 314 */ 315 if (args->flags & CLONE_VFORK) { 316 set_shstk_base(tsk, 0, 0); 317 return 0; 318 } 319 320 /* 321 * For !CLONE_VM the child will use a copy of the parents shadow 322 * stack. 323 */ 324 if (!(args->flags & CLONE_VM)) 325 return 0; 326 327 /* 328 * reaching here means, CLONE_VM was specified and thus a separate shadow 329 * stack is needed for new cloned thread. Note: below allocation is happening 330 * using current mm. 331 */ 332 size = calc_shstk_size(args->stack_size); 333 addr = allocate_shadow_stack(0, size, 0, false); 334 if (IS_ERR_VALUE(addr)) 335 return addr; 336 337 set_shstk_base(tsk, addr, size); 338 339 return addr + size; 340 } 341 342 void shstk_release(struct task_struct *tsk) 343 { 344 unsigned long base = 0, size = 0; 345 /* If shadow stack is not supported or not enabled, nothing to release */ 346 if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk)) 347 return; 348 349 /* 350 * When fork() with CLONE_VM fails, the child (tsk) already has a 351 * shadow stack allocated, and exit_thread() calls this function to 352 * free it. In this case the parent (current) and the child share 353 * the same mm struct. Move forward only when they're same. 354 */ 355 if (!tsk->mm || tsk->mm != current->mm) 356 return; 357 358 /* 359 * We know shadow stack is enabled but if base is NULL, then 360 * this task is not managing its own shadow stack (CLONE_VFORK). So 361 * skip freeing it. 362 */ 363 base = get_shstk_base(tsk, &size); 364 if (!base) 365 return; 366 367 vm_munmap(base, size); 368 set_shstk_base(tsk, 0, 0); 369 } 370 371 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 372 { 373 unsigned long bcfi_status = 0; 374 375 if (!is_user_shstk_enabled()) 376 return -EINVAL; 377 378 /* this means shadow stack is enabled on the task */ 379 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); 380 381 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; 382 } 383 384 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 385 { 386 unsigned long size = 0, addr = 0; 387 bool enable_shstk = false; 388 389 if (!is_user_shstk_enabled()) 390 return -EINVAL; 391 392 /* Reject unknown flags */ 393 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 394 return -EINVAL; 395 396 /* bcfi status is locked and further can't be modified by user */ 397 if (is_shstk_locked(t)) 398 return -EINVAL; 399 400 enable_shstk = status & PR_SHADOW_STACK_ENABLE; 401 /* Request is to enable shadow stack and shadow stack is not enabled already */ 402 if (enable_shstk && !is_shstk_enabled(t)) { 403 /* shadow stack was allocated and enable request again 404 * no need to support such usecase and return EINVAL. 405 */ 406 if (is_shstk_allocated(t)) 407 return -EINVAL; 408 409 size = calc_shstk_size(0); 410 addr = allocate_shadow_stack(0, size, 0, false); 411 if (IS_ERR_VALUE(addr)) 412 return -ENOMEM; 413 set_shstk_base(t, addr, size); 414 set_active_shstk(t, addr + size); 415 } 416 417 /* 418 * If a request to disable shadow stack happens, let's go ahead and release it 419 * Although, if CLONE_VFORKed child did this, then in that case we will end up 420 * not releasing the shadow stack (because it might be needed in parent). Although 421 * we will disable it for VFORKed child. And if VFORKed child tries to enable again 422 * then in that case, it'll get entirely new shadow stack because following condition 423 * are true 424 * - shadow stack was not enabled for vforked child 425 * - shadow stack base was anyways pointing to 0 426 * This shouldn't be a big issue because we want parent to have availability of shadow 427 * stack whenever VFORKed child releases resources via exit or exec but at the same 428 * time we want VFORKed child to break away and establish new shadow stack if it desires 429 * 430 */ 431 if (!enable_shstk) 432 shstk_release(t); 433 434 set_shstk_status(t, enable_shstk); 435 return 0; 436 } 437 438 int arch_lock_shadow_stack_status(struct task_struct *task, 439 unsigned long arg) 440 { 441 /* If shtstk not supported or not enabled on task, nothing to lock here */ 442 if (!is_user_shstk_enabled() || 443 !is_shstk_enabled(task) || arg != 0) 444 return -EINVAL; 445 446 set_shstk_lock(task, true); 447 448 return 0; 449 } 450 451 int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, 452 unsigned long __user *state) 453 { 454 unsigned long fcfi_status = 0; 455 456 if (!is_user_lpad_enabled()) 457 return -EINVAL; 458 459 fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE); 460 fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0); 461 462 return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; 463 } 464 465 int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) 466 { 467 if (!is_user_lpad_enabled()) 468 return -EINVAL; 469 470 /* indirect branch tracking is locked and further can't be modified by user */ 471 if (is_indir_lp_locked(t)) 472 return -EINVAL; 473 474 if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE))) 475 return -EINVAL; 476 477 if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE) 478 return -EINVAL; 479 480 set_indir_lp_status(t, !!(state & PR_CFI_ENABLE)); 481 482 return 0; 483 } 484 485 int arch_prctl_lock_branch_landing_pad_state(struct task_struct *task) 486 { 487 /* 488 * If indirect branch tracking is not supported or not enabled on task, 489 * nothing to lock here 490 */ 491 if (!is_user_lpad_enabled() || 492 !is_indir_lp_enabled(task)) 493 return -EINVAL; 494 495 set_indir_lp_lock(task, true); 496 497 return 0; 498 } 499 500 bool is_user_shstk_enabled(void) 501 { 502 return (cpu_supports_shadow_stack() && 503 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI)); 504 } 505 506 bool is_user_lpad_enabled(void) 507 { 508 return (cpu_supports_indirect_br_lp_instr() && 509 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI)); 510 } 511 512 static int __init setup_global_riscv_enable(char *str) 513 { 514 if (strcmp(str, "all") == 0) 515 riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI; 516 517 if (strcmp(str, "fcfi") == 0) 518 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI; 519 520 if (strcmp(str, "bcfi") == 0) 521 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI; 522 523 if (riscv_nousercfi) 524 pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n", 525 (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" : 526 "enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ? 527 "disabled" : "enabled"); 528 529 return 1; 530 } 531 532 __setup("riscv_nousercfi=", setup_global_riscv_enable); 533