1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024 Rivos, Inc. 4 * Deepak Gupta <debug@rivosinc.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/bitops.h> 9 #include <linux/types.h> 10 #include <linux/mm.h> 11 #include <linux/mman.h> 12 #include <linux/uaccess.h> 13 #include <linux/sizes.h> 14 #include <linux/user.h> 15 #include <linux/syscalls.h> 16 #include <linux/prctl.h> 17 #include <asm/csr.h> 18 #include <asm/usercfi.h> 19 20 unsigned long riscv_nousercfi __read_mostly; 21 22 #define SHSTK_ENTRY_SIZE sizeof(void *) 23 24 bool is_shstk_enabled(struct task_struct *task) 25 { 26 return task->thread_info.user_cfi_state.ubcfi_en; 27 } 28 29 bool is_shstk_allocated(struct task_struct *task) 30 { 31 return task->thread_info.user_cfi_state.shdw_stk_base; 32 } 33 34 bool is_shstk_locked(struct task_struct *task) 35 { 36 return task->thread_info.user_cfi_state.ubcfi_locked; 37 } 38 39 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) 40 { 41 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; 42 task->thread_info.user_cfi_state.shdw_stk_size = size; 43 } 44 45 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) 46 { 47 if (size) 48 *size = task->thread_info.user_cfi_state.shdw_stk_size; 49 return task->thread_info.user_cfi_state.shdw_stk_base; 50 } 51 52 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) 53 { 54 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; 55 } 56 57 unsigned long get_active_shstk(struct task_struct *task) 58 { 59 return task->thread_info.user_cfi_state.user_shdw_stk; 60 } 61 62 void set_shstk_status(struct task_struct *task, bool enable) 63 { 64 if (!is_user_shstk_enabled()) 65 return; 66 67 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; 68 69 if (enable) 70 task->thread.envcfg |= ENVCFG_SSE; 71 else 72 task->thread.envcfg &= ~ENVCFG_SSE; 73 74 csr_write(CSR_ENVCFG, task->thread.envcfg); 75 } 76 77 void set_shstk_lock(struct task_struct *task, bool lock) 78 { 79 task->thread_info.user_cfi_state.ubcfi_locked = lock; 80 } 81 82 bool is_indir_lp_enabled(struct task_struct *task) 83 { 84 return task->thread_info.user_cfi_state.ufcfi_en; 85 } 86 87 bool is_indir_lp_locked(struct task_struct *task) 88 { 89 return task->thread_info.user_cfi_state.ufcfi_locked; 90 } 91 92 void set_indir_lp_status(struct task_struct *task, bool enable) 93 { 94 if (!is_user_lpad_enabled()) 95 return; 96 97 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; 98 99 if (enable) 100 task->thread.envcfg |= ENVCFG_LPE; 101 else 102 task->thread.envcfg &= ~ENVCFG_LPE; 103 104 csr_write(CSR_ENVCFG, task->thread.envcfg); 105 } 106 107 void set_indir_lp_lock(struct task_struct *task, bool lock) 108 { 109 task->thread_info.user_cfi_state.ufcfi_locked = lock; 110 } 111 /* 112 * If size is 0, then to be compatible with regular stack we want it to be as big as 113 * regular stack. Else PAGE_ALIGN it and return back 114 */ 115 static unsigned long calc_shstk_size(unsigned long size) 116 { 117 if (size) 118 return PAGE_ALIGN(size); 119 120 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); 121 } 122 123 /* 124 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen 125 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to 126 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow 127 * stack. 128 */ 129 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) 130 { 131 /* 132 * Never expect -1 on shadow stack. Expect return addresses and zero 133 */ 134 unsigned long swap = -1; 135 136 __enable_user_access(); 137 asm goto(".option push\n" 138 ".option arch, +zicfiss\n" 139 "1: ssamoswap.d %[swap], %[val], %[addr]\n" 140 _ASM_EXTABLE(1b, %l[fault]) 141 ".option pop\n" 142 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) 143 : [val] "r" (val) 144 : "memory" 145 : fault 146 ); 147 __disable_user_access(); 148 return swap; 149 fault: 150 __disable_user_access(); 151 return -1; 152 } 153 154 /* 155 * Create a restore token on the shadow stack. A token is always XLEN wide 156 * and aligned to XLEN. 157 */ 158 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 159 { 160 unsigned long addr; 161 162 /* Token must be aligned */ 163 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) 164 return -EINVAL; 165 166 /* On RISC-V we're constructing token to be function of address itself */ 167 addr = ssp - SHSTK_ENTRY_SIZE; 168 169 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) 170 return -EFAULT; 171 172 if (token_addr) 173 *token_addr = addr; 174 175 return 0; 176 } 177 178 /* 179 * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location. 180 * Returns -EFAULT if unsuccessful. 181 */ 182 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr) 183 { 184 unsigned long ss_ptr = 0; 185 unsigned long token_loc = 0; 186 int ret = 0; 187 188 if (!saved_shstk_ptr) 189 return -EINVAL; 190 191 ss_ptr = get_active_shstk(tsk); 192 ret = create_rstor_token(ss_ptr, &token_loc); 193 194 if (!ret) { 195 *saved_shstk_ptr = token_loc; 196 set_active_shstk(tsk, token_loc); 197 } 198 199 return ret; 200 } 201 202 /* 203 * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'. 204 * Returns -EFAULT if unsuccessful. 205 */ 206 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr) 207 { 208 unsigned long token = 0; 209 210 token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0); 211 212 if (token == -1) 213 return -EFAULT; 214 215 /* invalid token, return EINVAL */ 216 if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) { 217 pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n", 218 tsk->comm, task_pid_nr(tsk), __func__, 219 (void *)(task_pt_regs(tsk)->epc), 220 (void *)(task_pt_regs(tsk)->sp), 221 (void *)token, (void *)shstk_ptr); 222 return -EINVAL; 223 } 224 225 /* all checks passed, set active shstk and return success */ 226 set_active_shstk(tsk, token); 227 return 0; 228 } 229 230 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, 231 unsigned long token_offset, bool set_tok) 232 { 233 int flags = MAP_ANONYMOUS | MAP_PRIVATE; 234 struct mm_struct *mm = current->mm; 235 unsigned long populate; 236 237 if (addr) 238 flags |= MAP_FIXED_NOREPLACE; 239 240 mmap_write_lock(mm); 241 addr = do_mmap(NULL, addr, size, PROT_READ, flags, 242 VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL); 243 mmap_write_unlock(mm); 244 245 if (!set_tok || IS_ERR_VALUE(addr)) 246 goto out; 247 248 if (create_rstor_token(addr + token_offset, NULL)) { 249 vm_munmap(addr, size); 250 return -EINVAL; 251 } 252 253 out: 254 return addr; 255 } 256 257 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 258 { 259 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 260 unsigned long aligned_size = 0; 261 262 if (!is_user_shstk_enabled()) 263 return -EOPNOTSUPP; 264 265 /* Anything other than set token should result in invalid param */ 266 if (flags & ~SHADOW_STACK_SET_TOKEN) 267 return -EINVAL; 268 269 /* 270 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available 271 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction 272 * itself. This provides static property on register programming and writes to CSR can't 273 * be unintentional from programmer's perspective. As long as programmer has guarded areas 274 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since 275 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent 276 * to allocation. Although in order to provide portablity with other architectures (because 277 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token 278 * flag in flags and if provided in flags, will setup a token at the base. 279 */ 280 281 /* If there isn't space for a token */ 282 if (set_tok && size < SHSTK_ENTRY_SIZE) 283 return -ENOSPC; 284 285 if (addr && (addr & (PAGE_SIZE - 1))) 286 return -EINVAL; 287 288 aligned_size = PAGE_ALIGN(size); 289 if (aligned_size < size) 290 return -EOVERFLOW; 291 292 return allocate_shadow_stack(addr, aligned_size, size, set_tok); 293 } 294 295 /* 296 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for 297 * cases where CLONE_VM is specified and thus a different stack is specified by user. We 298 * thus need a separate shadow stack too. How a separate shadow stack is specified by 299 * user is still being debated. Once that's settled, remove this part of the comment. 300 * This function simply returns 0 if shadow stacks are not supported or if separate shadow 301 * stack allocation is not needed (like in case of !CLONE_VM) 302 */ 303 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, 304 const struct kernel_clone_args *args) 305 { 306 unsigned long addr, size; 307 308 /* If shadow stack is not supported, return 0 */ 309 if (!is_user_shstk_enabled()) 310 return 0; 311 312 /* 313 * If shadow stack is not enabled on the new thread, skip any 314 * switch to a new shadow stack. 315 */ 316 if (!is_shstk_enabled(tsk)) 317 return 0; 318 319 /* 320 * For CLONE_VFORK the child will share the parents shadow stack. 321 * Set base = 0 and size = 0, this is special means to track this state 322 * so the freeing logic run for child knows to leave it alone. 323 */ 324 if (args->flags & CLONE_VFORK) { 325 set_shstk_base(tsk, 0, 0); 326 return 0; 327 } 328 329 /* 330 * For !CLONE_VM the child will use a copy of the parents shadow 331 * stack. 332 */ 333 if (!(args->flags & CLONE_VM)) 334 return 0; 335 336 /* 337 * reaching here means, CLONE_VM was specified and thus a separate shadow 338 * stack is needed for new cloned thread. Note: below allocation is happening 339 * using current mm. 340 */ 341 size = calc_shstk_size(args->stack_size); 342 addr = allocate_shadow_stack(0, size, 0, false); 343 if (IS_ERR_VALUE(addr)) 344 return addr; 345 346 set_shstk_base(tsk, addr, size); 347 348 return addr + size; 349 } 350 351 void shstk_release(struct task_struct *tsk) 352 { 353 unsigned long base = 0, size = 0; 354 /* If shadow stack is not supported or not enabled, nothing to release */ 355 if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk)) 356 return; 357 358 /* 359 * When fork() with CLONE_VM fails, the child (tsk) already has a 360 * shadow stack allocated, and exit_thread() calls this function to 361 * free it. In this case the parent (current) and the child share 362 * the same mm struct. Move forward only when they're same. 363 */ 364 if (!tsk->mm || tsk->mm != current->mm) 365 return; 366 367 /* 368 * We know shadow stack is enabled but if base is NULL, then 369 * this task is not managing its own shadow stack (CLONE_VFORK). So 370 * skip freeing it. 371 */ 372 base = get_shstk_base(tsk, &size); 373 if (!base) 374 return; 375 376 vm_munmap(base, size); 377 set_shstk_base(tsk, 0, 0); 378 } 379 380 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 381 { 382 unsigned long bcfi_status = 0; 383 384 if (!is_user_shstk_enabled()) 385 return -EINVAL; 386 387 /* this means shadow stack is enabled on the task */ 388 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); 389 390 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; 391 } 392 393 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 394 { 395 unsigned long size = 0, addr = 0; 396 bool enable_shstk = false; 397 398 if (!is_user_shstk_enabled()) 399 return -EINVAL; 400 401 /* Reject unknown flags */ 402 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 403 return -EINVAL; 404 405 /* bcfi status is locked and further can't be modified by user */ 406 if (is_shstk_locked(t)) 407 return -EINVAL; 408 409 enable_shstk = status & PR_SHADOW_STACK_ENABLE; 410 /* Request is to enable shadow stack and shadow stack is not enabled already */ 411 if (enable_shstk && !is_shstk_enabled(t)) { 412 /* shadow stack was allocated and enable request again 413 * no need to support such usecase and return EINVAL. 414 */ 415 if (is_shstk_allocated(t)) 416 return -EINVAL; 417 418 size = calc_shstk_size(0); 419 addr = allocate_shadow_stack(0, size, 0, false); 420 if (IS_ERR_VALUE(addr)) 421 return -ENOMEM; 422 set_shstk_base(t, addr, size); 423 set_active_shstk(t, addr + size); 424 } 425 426 /* 427 * If a request to disable shadow stack happens, let's go ahead and release it 428 * Although, if CLONE_VFORKed child did this, then in that case we will end up 429 * not releasing the shadow stack (because it might be needed in parent). Although 430 * we will disable it for VFORKed child. And if VFORKed child tries to enable again 431 * then in that case, it'll get entirely new shadow stack because following condition 432 * are true 433 * - shadow stack was not enabled for vforked child 434 * - shadow stack base was anyways pointing to 0 435 * This shouldn't be a big issue because we want parent to have availability of shadow 436 * stack whenever VFORKed child releases resources via exit or exec but at the same 437 * time we want VFORKed child to break away and establish new shadow stack if it desires 438 * 439 */ 440 if (!enable_shstk) 441 shstk_release(t); 442 443 set_shstk_status(t, enable_shstk); 444 return 0; 445 } 446 447 int arch_lock_shadow_stack_status(struct task_struct *task, 448 unsigned long arg) 449 { 450 /* If shtstk not supported or not enabled on task, nothing to lock here */ 451 if (!is_user_shstk_enabled() || 452 !is_shstk_enabled(task) || arg != 0) 453 return -EINVAL; 454 455 set_shstk_lock(task, true); 456 457 return 0; 458 } 459 460 int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, 461 unsigned long __user *state) 462 { 463 unsigned long fcfi_status = 0; 464 465 if (!is_user_lpad_enabled()) 466 return -EINVAL; 467 468 fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE); 469 fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0); 470 471 return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; 472 } 473 474 int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) 475 { 476 if (!is_user_lpad_enabled()) 477 return -EINVAL; 478 479 /* indirect branch tracking is locked and further can't be modified by user */ 480 if (is_indir_lp_locked(t)) 481 return -EINVAL; 482 483 if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE))) 484 return -EINVAL; 485 486 if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE) 487 return -EINVAL; 488 489 set_indir_lp_status(t, !!(state & PR_CFI_ENABLE)); 490 491 return 0; 492 } 493 494 int arch_prctl_lock_branch_landing_pad_state(struct task_struct *task) 495 { 496 /* 497 * If indirect branch tracking is not supported or not enabled on task, 498 * nothing to lock here 499 */ 500 if (!is_user_lpad_enabled() || 501 !is_indir_lp_enabled(task)) 502 return -EINVAL; 503 504 set_indir_lp_lock(task, true); 505 506 return 0; 507 } 508 509 bool is_user_shstk_enabled(void) 510 { 511 return (cpu_supports_shadow_stack() && 512 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI)); 513 } 514 515 bool is_user_lpad_enabled(void) 516 { 517 return (cpu_supports_indirect_br_lp_instr() && 518 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI)); 519 } 520 521 static int __init setup_global_riscv_enable(char *str) 522 { 523 if (strcmp(str, "all") == 0) 524 riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI; 525 526 if (strcmp(str, "fcfi") == 0) 527 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI; 528 529 if (strcmp(str, "bcfi") == 0) 530 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI; 531 532 if (riscv_nousercfi) 533 pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n", 534 (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" : 535 "enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ? 536 "disabled" : "enabled"); 537 538 return 1; 539 } 540 541 __setup("riscv_nousercfi=", setup_global_riscv_enable); 542