1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024 Rivos, Inc. 4 * Deepak Gupta <debug@rivosinc.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/bitops.h> 9 #include <linux/types.h> 10 #include <linux/mm.h> 11 #include <linux/mman.h> 12 #include <linux/uaccess.h> 13 #include <linux/sizes.h> 14 #include <linux/user.h> 15 #include <linux/syscalls.h> 16 #include <linux/prctl.h> 17 #include <asm/csr.h> 18 #include <asm/usercfi.h> 19 20 unsigned long riscv_nousercfi __read_mostly; 21 22 #define SHSTK_ENTRY_SIZE sizeof(void *) 23 24 bool is_shstk_enabled(struct task_struct *task) 25 { 26 return task->thread_info.user_cfi_state.ubcfi_en; 27 } 28 29 bool is_shstk_allocated(struct task_struct *task) 30 { 31 return task->thread_info.user_cfi_state.shdw_stk_base; 32 } 33 34 bool is_shstk_locked(struct task_struct *task) 35 { 36 return task->thread_info.user_cfi_state.ubcfi_locked; 37 } 38 39 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) 40 { 41 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; 42 task->thread_info.user_cfi_state.shdw_stk_size = size; 43 } 44 45 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) 46 { 47 if (size) 48 *size = task->thread_info.user_cfi_state.shdw_stk_size; 49 return task->thread_info.user_cfi_state.shdw_stk_base; 50 } 51 52 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) 53 { 54 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; 55 } 56 57 unsigned long get_active_shstk(struct task_struct *task) 58 { 59 return task->thread_info.user_cfi_state.user_shdw_stk; 60 } 61 62 void set_shstk_status(struct task_struct *task, bool enable) 63 { 64 if (!is_user_shstk_enabled()) 65 return; 66 67 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; 68 69 if (enable) 70 task->thread.envcfg |= ENVCFG_SSE; 71 else 72 task->thread.envcfg &= ~ENVCFG_SSE; 73 74 csr_write(CSR_ENVCFG, task->thread.envcfg); 75 } 76 77 void set_shstk_lock(struct task_struct *task, bool lock) 78 { 79 task->thread_info.user_cfi_state.ubcfi_locked = lock; 80 } 81 82 bool is_indir_lp_enabled(struct task_struct *task) 83 { 84 return task->thread_info.user_cfi_state.ufcfi_en; 85 } 86 87 bool is_indir_lp_locked(struct task_struct *task) 88 { 89 return task->thread_info.user_cfi_state.ufcfi_locked; 90 } 91 92 void set_indir_lp_status(struct task_struct *task, bool enable) 93 { 94 if (!is_user_lpad_enabled()) 95 return; 96 97 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; 98 99 if (enable) 100 task->thread.envcfg |= ENVCFG_LPE; 101 else 102 task->thread.envcfg &= ~ENVCFG_LPE; 103 104 csr_write(CSR_ENVCFG, task->thread.envcfg); 105 } 106 107 void set_indir_lp_lock(struct task_struct *task, bool lock) 108 { 109 task->thread_info.user_cfi_state.ufcfi_locked = lock; 110 } 111 /* 112 * If size is 0, then to be compatible with regular stack we want it to be as big as 113 * regular stack. Else PAGE_ALIGN it and return back 114 */ 115 static unsigned long calc_shstk_size(unsigned long size) 116 { 117 if (size) 118 return PAGE_ALIGN(size); 119 120 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); 121 } 122 123 /* 124 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen 125 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to 126 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow 127 * stack. 128 */ 129 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) 130 { 131 /* 132 * Never expect -1 on shadow stack. Expect return addresses and zero 133 */ 134 unsigned long swap = -1; 135 136 __enable_user_access(); 137 asm goto(".option push\n" 138 ".option arch, +zicfiss\n" 139 "1: ssamoswap.d %[swap], %[val], %[addr]\n" 140 _ASM_EXTABLE(1b, %l[fault]) 141 ".option pop\n" 142 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) 143 : [val] "r" (val) 144 : "memory" 145 : fault 146 ); 147 __disable_user_access(); 148 return swap; 149 fault: 150 __disable_user_access(); 151 return -1; 152 } 153 154 /* 155 * Create a restore token on the shadow stack. A token is always XLEN wide 156 * and aligned to XLEN. 157 */ 158 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 159 { 160 unsigned long addr; 161 162 /* Token must be aligned */ 163 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) 164 return -EINVAL; 165 166 /* On RISC-V we're constructing token to be function of address itself */ 167 addr = ssp - SHSTK_ENTRY_SIZE; 168 169 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) 170 return -EFAULT; 171 172 if (token_addr) 173 *token_addr = addr; 174 175 return 0; 176 } 177 178 /* 179 * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location. 180 * Returns -EFAULT if unsuccessful. 181 */ 182 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr) 183 { 184 unsigned long ss_ptr = 0; 185 unsigned long token_loc = 0; 186 int ret = 0; 187 188 if (!saved_shstk_ptr) 189 return -EINVAL; 190 191 ss_ptr = get_active_shstk(tsk); 192 ret = create_rstor_token(ss_ptr, &token_loc); 193 194 if (!ret) { 195 *saved_shstk_ptr = token_loc; 196 set_active_shstk(tsk, token_loc); 197 } 198 199 return ret; 200 } 201 202 /* 203 * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'. 204 * Returns -EFAULT if unsuccessful. 205 */ 206 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr) 207 { 208 unsigned long token = 0; 209 210 token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0); 211 212 if (token == -1) 213 return -EFAULT; 214 215 /* invalid token, return EINVAL */ 216 if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) { 217 pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n", 218 tsk->comm, task_pid_nr(tsk), __func__, 219 (void *)(task_pt_regs(tsk)->epc), 220 (void *)(task_pt_regs(tsk)->sp), 221 (void *)token, (void *)shstk_ptr); 222 return -EINVAL; 223 } 224 225 /* all checks passed, set active shstk and return success */ 226 set_active_shstk(tsk, token); 227 return 0; 228 } 229 230 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, 231 unsigned long token_offset, bool set_tok) 232 { 233 addr = vm_mmap_shadow_stack(addr, size, 0); 234 235 if (!set_tok || IS_ERR_VALUE(addr)) 236 goto out; 237 238 if (create_rstor_token(addr + token_offset, NULL)) { 239 vm_munmap(addr, size); 240 return -EINVAL; 241 } 242 243 out: 244 return addr; 245 } 246 247 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 248 { 249 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 250 unsigned long aligned_size = 0; 251 252 if (!is_user_shstk_enabled()) 253 return -EOPNOTSUPP; 254 255 /* Anything other than set token should result in invalid param */ 256 if (flags & ~SHADOW_STACK_SET_TOKEN) 257 return -EINVAL; 258 259 /* 260 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available 261 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction 262 * itself. This provides static property on register programming and writes to CSR can't 263 * be unintentional from programmer's perspective. As long as programmer has guarded areas 264 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since 265 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent 266 * to allocation. Although in order to provide portablity with other architectures (because 267 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token 268 * flag in flags and if provided in flags, will setup a token at the base. 269 */ 270 271 /* If there isn't space for a token */ 272 if (set_tok && size < SHSTK_ENTRY_SIZE) 273 return -ENOSPC; 274 275 if (addr && (addr & (PAGE_SIZE - 1))) 276 return -EINVAL; 277 278 aligned_size = PAGE_ALIGN(size); 279 if (aligned_size < size) 280 return -EOVERFLOW; 281 282 return allocate_shadow_stack(addr, aligned_size, size, set_tok); 283 } 284 285 /* 286 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for 287 * cases where CLONE_VM is specified and thus a different stack is specified by user. We 288 * thus need a separate shadow stack too. How a separate shadow stack is specified by 289 * user is still being debated. Once that's settled, remove this part of the comment. 290 * This function simply returns 0 if shadow stacks are not supported or if separate shadow 291 * stack allocation is not needed (like in case of !CLONE_VM) 292 */ 293 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, 294 const struct kernel_clone_args *args) 295 { 296 unsigned long addr, size; 297 298 /* If shadow stack is not supported, return 0 */ 299 if (!is_user_shstk_enabled()) 300 return 0; 301 302 /* 303 * If shadow stack is not enabled on the new thread, skip any 304 * switch to a new shadow stack. 305 */ 306 if (!is_shstk_enabled(tsk)) 307 return 0; 308 309 /* 310 * For CLONE_VFORK the child will share the parents shadow stack. 311 * Set base = 0 and size = 0, this is special means to track this state 312 * so the freeing logic run for child knows to leave it alone. 313 */ 314 if (args->flags & CLONE_VFORK) { 315 set_shstk_base(tsk, 0, 0); 316 return 0; 317 } 318 319 /* 320 * For !CLONE_VM the child will use a copy of the parents shadow 321 * stack. 322 */ 323 if (!(args->flags & CLONE_VM)) 324 return 0; 325 326 /* 327 * reaching here means, CLONE_VM was specified and thus a separate shadow 328 * stack is needed for new cloned thread. Note: below allocation is happening 329 * using current mm. 330 */ 331 size = calc_shstk_size(args->stack_size); 332 addr = allocate_shadow_stack(0, size, 0, false); 333 if (IS_ERR_VALUE(addr)) 334 return addr; 335 336 set_shstk_base(tsk, addr, size); 337 338 return addr + size; 339 } 340 341 void shstk_release(struct task_struct *tsk) 342 { 343 unsigned long base = 0, size = 0; 344 /* If shadow stack is not supported or not enabled, nothing to release */ 345 if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk)) 346 return; 347 348 /* 349 * When fork() with CLONE_VM fails, the child (tsk) already has a 350 * shadow stack allocated, and exit_thread() calls this function to 351 * free it. In this case the parent (current) and the child share 352 * the same mm struct. Move forward only when they're same. 353 */ 354 if (!tsk->mm || tsk->mm != current->mm) 355 return; 356 357 /* 358 * We know shadow stack is enabled but if base is NULL, then 359 * this task is not managing its own shadow stack (CLONE_VFORK). So 360 * skip freeing it. 361 */ 362 base = get_shstk_base(tsk, &size); 363 if (!base) 364 return; 365 366 vm_munmap(base, size); 367 set_shstk_base(tsk, 0, 0); 368 } 369 370 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 371 { 372 unsigned long bcfi_status = 0; 373 374 if (!is_user_shstk_enabled()) 375 return -EINVAL; 376 377 /* this means shadow stack is enabled on the task */ 378 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); 379 380 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; 381 } 382 383 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 384 { 385 unsigned long size = 0, addr = 0; 386 bool enable_shstk = false; 387 388 if (!is_user_shstk_enabled()) 389 return -EINVAL; 390 391 /* Reject unknown flags */ 392 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 393 return -EINVAL; 394 395 /* bcfi status is locked and further can't be modified by user */ 396 if (is_shstk_locked(t)) 397 return -EINVAL; 398 399 enable_shstk = status & PR_SHADOW_STACK_ENABLE; 400 /* Request is to enable shadow stack and shadow stack is not enabled already */ 401 if (enable_shstk && !is_shstk_enabled(t)) { 402 /* shadow stack was allocated and enable request again 403 * no need to support such usecase and return EINVAL. 404 */ 405 if (is_shstk_allocated(t)) 406 return -EINVAL; 407 408 size = calc_shstk_size(0); 409 addr = allocate_shadow_stack(0, size, 0, false); 410 if (IS_ERR_VALUE(addr)) 411 return -ENOMEM; 412 set_shstk_base(t, addr, size); 413 set_active_shstk(t, addr + size); 414 } 415 416 /* 417 * If a request to disable shadow stack happens, let's go ahead and release it 418 * Although, if CLONE_VFORKed child did this, then in that case we will end up 419 * not releasing the shadow stack (because it might be needed in parent). Although 420 * we will disable it for VFORKed child. And if VFORKed child tries to enable again 421 * then in that case, it'll get entirely new shadow stack because following condition 422 * are true 423 * - shadow stack was not enabled for vforked child 424 * - shadow stack base was anyways pointing to 0 425 * This shouldn't be a big issue because we want parent to have availability of shadow 426 * stack whenever VFORKed child releases resources via exit or exec but at the same 427 * time we want VFORKed child to break away and establish new shadow stack if it desires 428 * 429 */ 430 if (!enable_shstk) 431 shstk_release(t); 432 433 set_shstk_status(t, enable_shstk); 434 return 0; 435 } 436 437 int arch_lock_shadow_stack_status(struct task_struct *task, 438 unsigned long arg) 439 { 440 /* If shtstk not supported or not enabled on task, nothing to lock here */ 441 if (!is_user_shstk_enabled() || 442 !is_shstk_enabled(task) || arg != 0) 443 return -EINVAL; 444 445 set_shstk_lock(task, true); 446 447 return 0; 448 } 449 450 int arch_prctl_get_branch_landing_pad_state(struct task_struct *t, 451 unsigned long __user *state) 452 { 453 unsigned long fcfi_status = 0; 454 455 if (!is_user_lpad_enabled()) 456 return -EINVAL; 457 458 fcfi_status = (is_indir_lp_enabled(t) ? PR_CFI_ENABLE : PR_CFI_DISABLE); 459 fcfi_status |= (is_indir_lp_locked(t) ? PR_CFI_LOCK : 0); 460 461 return copy_to_user(state, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; 462 } 463 464 int arch_prctl_set_branch_landing_pad_state(struct task_struct *t, unsigned long state) 465 { 466 if (!is_user_lpad_enabled()) 467 return -EINVAL; 468 469 /* indirect branch tracking is locked and further can't be modified by user */ 470 if (is_indir_lp_locked(t)) 471 return -EINVAL; 472 473 if (!(state & (PR_CFI_ENABLE | PR_CFI_DISABLE))) 474 return -EINVAL; 475 476 if (state & PR_CFI_ENABLE && state & PR_CFI_DISABLE) 477 return -EINVAL; 478 479 set_indir_lp_status(t, !!(state & PR_CFI_ENABLE)); 480 481 return 0; 482 } 483 484 int arch_prctl_lock_branch_landing_pad_state(struct task_struct *task) 485 { 486 /* 487 * If indirect branch tracking is not supported or not enabled on task, 488 * nothing to lock here 489 */ 490 if (!is_user_lpad_enabled() || 491 !is_indir_lp_enabled(task)) 492 return -EINVAL; 493 494 set_indir_lp_lock(task, true); 495 496 return 0; 497 } 498 499 bool is_user_shstk_enabled(void) 500 { 501 return (cpu_supports_shadow_stack() && 502 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI)); 503 } 504 505 bool is_user_lpad_enabled(void) 506 { 507 return (cpu_supports_indirect_br_lp_instr() && 508 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI)); 509 } 510 511 static int __init setup_global_riscv_enable(char *str) 512 { 513 if (strcmp(str, "all") == 0) 514 riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI; 515 516 if (strcmp(str, "fcfi") == 0) 517 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI; 518 519 if (strcmp(str, "bcfi") == 0) 520 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI; 521 522 if (riscv_nousercfi) 523 pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n", 524 (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" : 525 "enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ? 526 "disabled" : "enabled"); 527 528 return 1; 529 } 530 531 __setup("riscv_nousercfi=", setup_global_riscv_enable); 532