1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024 Rivos, Inc. 4 * Deepak Gupta <debug@rivosinc.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/bitops.h> 9 #include <linux/types.h> 10 #include <linux/mm.h> 11 #include <linux/mman.h> 12 #include <linux/uaccess.h> 13 #include <linux/sizes.h> 14 #include <linux/user.h> 15 #include <linux/syscalls.h> 16 #include <linux/prctl.h> 17 #include <asm/csr.h> 18 #include <asm/usercfi.h> 19 20 unsigned long riscv_nousercfi __read_mostly; 21 22 #define SHSTK_ENTRY_SIZE sizeof(void *) 23 24 bool is_shstk_enabled(struct task_struct *task) 25 { 26 return task->thread_info.user_cfi_state.ubcfi_en; 27 } 28 29 bool is_shstk_allocated(struct task_struct *task) 30 { 31 return task->thread_info.user_cfi_state.shdw_stk_base; 32 } 33 34 bool is_shstk_locked(struct task_struct *task) 35 { 36 return task->thread_info.user_cfi_state.ubcfi_locked; 37 } 38 39 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) 40 { 41 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; 42 task->thread_info.user_cfi_state.shdw_stk_size = size; 43 } 44 45 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) 46 { 47 if (size) 48 *size = task->thread_info.user_cfi_state.shdw_stk_size; 49 return task->thread_info.user_cfi_state.shdw_stk_base; 50 } 51 52 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) 53 { 54 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; 55 } 56 57 unsigned long get_active_shstk(struct task_struct *task) 58 { 59 return task->thread_info.user_cfi_state.user_shdw_stk; 60 } 61 62 void set_shstk_status(struct task_struct *task, bool enable) 63 { 64 if (!is_user_shstk_enabled()) 65 return; 66 67 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; 68 69 if (enable) 70 task->thread.envcfg |= ENVCFG_SSE; 71 else 72 task->thread.envcfg &= ~ENVCFG_SSE; 73 74 csr_write(CSR_ENVCFG, task->thread.envcfg); 75 } 76 77 void set_shstk_lock(struct task_struct *task) 78 { 79 task->thread_info.user_cfi_state.ubcfi_locked = 1; 80 } 81 82 bool is_indir_lp_enabled(struct task_struct *task) 83 { 84 return task->thread_info.user_cfi_state.ufcfi_en; 85 } 86 87 bool is_indir_lp_locked(struct task_struct *task) 88 { 89 return task->thread_info.user_cfi_state.ufcfi_locked; 90 } 91 92 void set_indir_lp_status(struct task_struct *task, bool enable) 93 { 94 if (!is_user_lpad_enabled()) 95 return; 96 97 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; 98 99 if (enable) 100 task->thread.envcfg |= ENVCFG_LPE; 101 else 102 task->thread.envcfg &= ~ENVCFG_LPE; 103 104 csr_write(CSR_ENVCFG, task->thread.envcfg); 105 } 106 107 void set_indir_lp_lock(struct task_struct *task) 108 { 109 task->thread_info.user_cfi_state.ufcfi_locked = 1; 110 } 111 /* 112 * If size is 0, then to be compatible with regular stack we want it to be as big as 113 * regular stack. Else PAGE_ALIGN it and return back 114 */ 115 static unsigned long calc_shstk_size(unsigned long size) 116 { 117 if (size) 118 return PAGE_ALIGN(size); 119 120 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); 121 } 122 123 /* 124 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen 125 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to 126 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow 127 * stack. 128 */ 129 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) 130 { 131 /* 132 * Never expect -1 on shadow stack. Expect return addresses and zero 133 */ 134 unsigned long swap = -1; 135 136 __enable_user_access(); 137 asm goto(".option push\n" 138 ".option arch, +zicfiss\n" 139 "1: ssamoswap.d %[swap], %[val], %[addr]\n" 140 _ASM_EXTABLE(1b, %l[fault]) 141 ".option pop\n" 142 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) 143 : [val] "r" (val) 144 : "memory" 145 : fault 146 ); 147 __disable_user_access(); 148 return swap; 149 fault: 150 __disable_user_access(); 151 return -1; 152 } 153 154 /* 155 * Create a restore token on the shadow stack. A token is always XLEN wide 156 * and aligned to XLEN. 157 */ 158 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 159 { 160 unsigned long addr; 161 162 /* Token must be aligned */ 163 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) 164 return -EINVAL; 165 166 /* On RISC-V we're constructing token to be function of address itself */ 167 addr = ssp - SHSTK_ENTRY_SIZE; 168 169 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) 170 return -EFAULT; 171 172 if (token_addr) 173 *token_addr = addr; 174 175 return 0; 176 } 177 178 /* 179 * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location. 180 * Returns -EFAULT if unsuccessful. 181 */ 182 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr) 183 { 184 unsigned long ss_ptr = 0; 185 unsigned long token_loc = 0; 186 int ret = 0; 187 188 if (!saved_shstk_ptr) 189 return -EINVAL; 190 191 ss_ptr = get_active_shstk(tsk); 192 ret = create_rstor_token(ss_ptr, &token_loc); 193 194 if (!ret) { 195 *saved_shstk_ptr = token_loc; 196 set_active_shstk(tsk, token_loc); 197 } 198 199 return ret; 200 } 201 202 /* 203 * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'. 204 * Returns -EFAULT if unsuccessful. 205 */ 206 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr) 207 { 208 unsigned long token = 0; 209 210 token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0); 211 212 if (token == -1) 213 return -EFAULT; 214 215 /* invalid token, return EINVAL */ 216 if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) { 217 pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n", 218 tsk->comm, task_pid_nr(tsk), __func__, 219 (void *)(task_pt_regs(tsk)->epc), 220 (void *)(task_pt_regs(tsk)->sp), 221 (void *)token, (void *)shstk_ptr); 222 return -EINVAL; 223 } 224 225 /* all checks passed, set active shstk and return success */ 226 set_active_shstk(tsk, token); 227 return 0; 228 } 229 230 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, 231 unsigned long token_offset, bool set_tok) 232 { 233 int flags = MAP_ANONYMOUS | MAP_PRIVATE; 234 struct mm_struct *mm = current->mm; 235 unsigned long populate; 236 237 if (addr) 238 flags |= MAP_FIXED_NOREPLACE; 239 240 mmap_write_lock(mm); 241 addr = do_mmap(NULL, addr, size, PROT_READ, flags, 242 VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL); 243 mmap_write_unlock(mm); 244 245 if (!set_tok || IS_ERR_VALUE(addr)) 246 goto out; 247 248 if (create_rstor_token(addr + token_offset, NULL)) { 249 vm_munmap(addr, size); 250 return -EINVAL; 251 } 252 253 out: 254 return addr; 255 } 256 257 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 258 { 259 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 260 unsigned long aligned_size = 0; 261 262 if (!is_user_shstk_enabled()) 263 return -EOPNOTSUPP; 264 265 /* Anything other than set token should result in invalid param */ 266 if (flags & ~SHADOW_STACK_SET_TOKEN) 267 return -EINVAL; 268 269 /* 270 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available 271 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction 272 * itself. This provides static property on register programming and writes to CSR can't 273 * be unintentional from programmer's perspective. As long as programmer has guarded areas 274 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since 275 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent 276 * to allocation. Although in order to provide portablity with other architectures (because 277 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token 278 * flag in flags and if provided in flags, will setup a token at the base. 279 */ 280 281 /* If there isn't space for a token */ 282 if (set_tok && size < SHSTK_ENTRY_SIZE) 283 return -ENOSPC; 284 285 if (addr && (addr & (PAGE_SIZE - 1))) 286 return -EINVAL; 287 288 aligned_size = PAGE_ALIGN(size); 289 if (aligned_size < size) 290 return -EOVERFLOW; 291 292 return allocate_shadow_stack(addr, aligned_size, size, set_tok); 293 } 294 295 /* 296 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for 297 * cases where CLONE_VM is specified and thus a different stack is specified by user. We 298 * thus need a separate shadow stack too. How a separate shadow stack is specified by 299 * user is still being debated. Once that's settled, remove this part of the comment. 300 * This function simply returns 0 if shadow stacks are not supported or if separate shadow 301 * stack allocation is not needed (like in case of !CLONE_VM) 302 */ 303 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, 304 const struct kernel_clone_args *args) 305 { 306 unsigned long addr, size; 307 308 /* If shadow stack is not supported, return 0 */ 309 if (!is_user_shstk_enabled()) 310 return 0; 311 312 /* 313 * If shadow stack is not enabled on the new thread, skip any 314 * switch to a new shadow stack. 315 */ 316 if (!is_shstk_enabled(tsk)) 317 return 0; 318 319 /* 320 * For CLONE_VFORK the child will share the parents shadow stack. 321 * Set base = 0 and size = 0, this is special means to track this state 322 * so the freeing logic run for child knows to leave it alone. 323 */ 324 if (args->flags & CLONE_VFORK) { 325 set_shstk_base(tsk, 0, 0); 326 return 0; 327 } 328 329 /* 330 * For !CLONE_VM the child will use a copy of the parents shadow 331 * stack. 332 */ 333 if (!(args->flags & CLONE_VM)) 334 return 0; 335 336 /* 337 * reaching here means, CLONE_VM was specified and thus a separate shadow 338 * stack is needed for new cloned thread. Note: below allocation is happening 339 * using current mm. 340 */ 341 size = calc_shstk_size(args->stack_size); 342 addr = allocate_shadow_stack(0, size, 0, false); 343 if (IS_ERR_VALUE(addr)) 344 return addr; 345 346 set_shstk_base(tsk, addr, size); 347 348 return addr + size; 349 } 350 351 void shstk_release(struct task_struct *tsk) 352 { 353 unsigned long base = 0, size = 0; 354 /* If shadow stack is not supported or not enabled, nothing to release */ 355 if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk)) 356 return; 357 358 /* 359 * When fork() with CLONE_VM fails, the child (tsk) already has a 360 * shadow stack allocated, and exit_thread() calls this function to 361 * free it. In this case the parent (current) and the child share 362 * the same mm struct. Move forward only when they're same. 363 */ 364 if (!tsk->mm || tsk->mm != current->mm) 365 return; 366 367 /* 368 * We know shadow stack is enabled but if base is NULL, then 369 * this task is not managing its own shadow stack (CLONE_VFORK). So 370 * skip freeing it. 371 */ 372 base = get_shstk_base(tsk, &size); 373 if (!base) 374 return; 375 376 vm_munmap(base, size); 377 set_shstk_base(tsk, 0, 0); 378 } 379 380 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 381 { 382 unsigned long bcfi_status = 0; 383 384 if (!is_user_shstk_enabled()) 385 return -EINVAL; 386 387 /* this means shadow stack is enabled on the task */ 388 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); 389 390 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; 391 } 392 393 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 394 { 395 unsigned long size = 0, addr = 0; 396 bool enable_shstk = false; 397 398 if (!is_user_shstk_enabled()) 399 return -EINVAL; 400 401 /* Reject unknown flags */ 402 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 403 return -EINVAL; 404 405 /* bcfi status is locked and further can't be modified by user */ 406 if (is_shstk_locked(t)) 407 return -EINVAL; 408 409 enable_shstk = status & PR_SHADOW_STACK_ENABLE; 410 /* Request is to enable shadow stack and shadow stack is not enabled already */ 411 if (enable_shstk && !is_shstk_enabled(t)) { 412 /* shadow stack was allocated and enable request again 413 * no need to support such usecase and return EINVAL. 414 */ 415 if (is_shstk_allocated(t)) 416 return -EINVAL; 417 418 size = calc_shstk_size(0); 419 addr = allocate_shadow_stack(0, size, 0, false); 420 if (IS_ERR_VALUE(addr)) 421 return -ENOMEM; 422 set_shstk_base(t, addr, size); 423 set_active_shstk(t, addr + size); 424 } 425 426 /* 427 * If a request to disable shadow stack happens, let's go ahead and release it 428 * Although, if CLONE_VFORKed child did this, then in that case we will end up 429 * not releasing the shadow stack (because it might be needed in parent). Although 430 * we will disable it for VFORKed child. And if VFORKed child tries to enable again 431 * then in that case, it'll get entirely new shadow stack because following condition 432 * are true 433 * - shadow stack was not enabled for vforked child 434 * - shadow stack base was anyways pointing to 0 435 * This shouldn't be a big issue because we want parent to have availability of shadow 436 * stack whenever VFORKed child releases resources via exit or exec but at the same 437 * time we want VFORKed child to break away and establish new shadow stack if it desires 438 * 439 */ 440 if (!enable_shstk) 441 shstk_release(t); 442 443 set_shstk_status(t, enable_shstk); 444 return 0; 445 } 446 447 int arch_lock_shadow_stack_status(struct task_struct *task, 448 unsigned long arg) 449 { 450 /* If shtstk not supported or not enabled on task, nothing to lock here */ 451 if (!is_user_shstk_enabled() || 452 !is_shstk_enabled(task) || arg != 0) 453 return -EINVAL; 454 455 set_shstk_lock(task); 456 457 return 0; 458 } 459 460 int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) 461 { 462 unsigned long fcfi_status = 0; 463 464 if (!is_user_lpad_enabled()) 465 return -EINVAL; 466 467 /* indirect branch tracking is enabled on the task or not */ 468 fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); 469 470 return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; 471 } 472 473 int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) 474 { 475 bool enable_indir_lp = false; 476 477 if (!is_user_lpad_enabled()) 478 return -EINVAL; 479 480 /* indirect branch tracking is locked and further can't be modified by user */ 481 if (is_indir_lp_locked(t)) 482 return -EINVAL; 483 484 /* Reject unknown flags */ 485 if (status & ~PR_INDIR_BR_LP_ENABLE) 486 return -EINVAL; 487 488 enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE); 489 set_indir_lp_status(t, enable_indir_lp); 490 491 return 0; 492 } 493 494 int arch_lock_indir_br_lp_status(struct task_struct *task, 495 unsigned long arg) 496 { 497 /* 498 * If indirect branch tracking is not supported or not enabled on task, 499 * nothing to lock here 500 */ 501 if (!is_user_lpad_enabled() || 502 !is_indir_lp_enabled(task) || arg != 0) 503 return -EINVAL; 504 505 set_indir_lp_lock(task); 506 507 return 0; 508 } 509 510 bool is_user_shstk_enabled(void) 511 { 512 return (cpu_supports_shadow_stack() && 513 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI)); 514 } 515 516 bool is_user_lpad_enabled(void) 517 { 518 return (cpu_supports_indirect_br_lp_instr() && 519 !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI)); 520 } 521 522 static int __init setup_global_riscv_enable(char *str) 523 { 524 if (strcmp(str, "all") == 0) 525 riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI; 526 527 if (strcmp(str, "fcfi") == 0) 528 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI; 529 530 if (strcmp(str, "bcfi") == 0) 531 riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI; 532 533 if (riscv_nousercfi) 534 pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n", 535 (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" : 536 "enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ? 537 "disabled" : "enabled"); 538 539 return 1; 540 } 541 542 __setup("riscv_nousercfi=", setup_global_riscv_enable); 543