1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2024 Rivos, Inc. 4 * Deepak Gupta <debug@rivosinc.com> 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/bitops.h> 9 #include <linux/types.h> 10 #include <linux/mm.h> 11 #include <linux/mman.h> 12 #include <linux/uaccess.h> 13 #include <linux/sizes.h> 14 #include <linux/user.h> 15 #include <linux/syscalls.h> 16 #include <linux/prctl.h> 17 #include <asm/csr.h> 18 #include <asm/usercfi.h> 19 20 #define SHSTK_ENTRY_SIZE sizeof(void *) 21 22 bool is_shstk_enabled(struct task_struct *task) 23 { 24 return task->thread_info.user_cfi_state.ubcfi_en; 25 } 26 27 bool is_shstk_allocated(struct task_struct *task) 28 { 29 return task->thread_info.user_cfi_state.shdw_stk_base; 30 } 31 32 bool is_shstk_locked(struct task_struct *task) 33 { 34 return task->thread_info.user_cfi_state.ubcfi_locked; 35 } 36 37 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) 38 { 39 task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; 40 task->thread_info.user_cfi_state.shdw_stk_size = size; 41 } 42 43 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) 44 { 45 if (size) 46 *size = task->thread_info.user_cfi_state.shdw_stk_size; 47 return task->thread_info.user_cfi_state.shdw_stk_base; 48 } 49 50 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) 51 { 52 task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; 53 } 54 55 unsigned long get_active_shstk(struct task_struct *task) 56 { 57 return task->thread_info.user_cfi_state.user_shdw_stk; 58 } 59 60 void set_shstk_status(struct task_struct *task, bool enable) 61 { 62 if (!cpu_supports_shadow_stack()) 63 return; 64 65 task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; 66 67 if (enable) 68 task->thread.envcfg |= ENVCFG_SSE; 69 else 70 task->thread.envcfg &= ~ENVCFG_SSE; 71 72 csr_write(CSR_ENVCFG, task->thread.envcfg); 73 } 74 75 void set_shstk_lock(struct task_struct *task) 76 { 77 task->thread_info.user_cfi_state.ubcfi_locked = 1; 78 } 79 80 bool is_indir_lp_enabled(struct task_struct *task) 81 { 82 return task->thread_info.user_cfi_state.ufcfi_en; 83 } 84 85 bool is_indir_lp_locked(struct task_struct *task) 86 { 87 return task->thread_info.user_cfi_state.ufcfi_locked; 88 } 89 90 void set_indir_lp_status(struct task_struct *task, bool enable) 91 { 92 if (!cpu_supports_indirect_br_lp_instr()) 93 return; 94 95 task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; 96 97 if (enable) 98 task->thread.envcfg |= ENVCFG_LPE; 99 else 100 task->thread.envcfg &= ~ENVCFG_LPE; 101 102 csr_write(CSR_ENVCFG, task->thread.envcfg); 103 } 104 105 void set_indir_lp_lock(struct task_struct *task) 106 { 107 task->thread_info.user_cfi_state.ufcfi_locked = 1; 108 } 109 /* 110 * If size is 0, then to be compatible with regular stack we want it to be as big as 111 * regular stack. Else PAGE_ALIGN it and return back 112 */ 113 static unsigned long calc_shstk_size(unsigned long size) 114 { 115 if (size) 116 return PAGE_ALIGN(size); 117 118 return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); 119 } 120 121 /* 122 * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen 123 * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to 124 * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow 125 * stack. 126 */ 127 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) 128 { 129 /* 130 * Never expect -1 on shadow stack. Expect return addresses and zero 131 */ 132 unsigned long swap = -1; 133 134 __enable_user_access(); 135 asm goto(".option push\n" 136 ".option arch, +zicfiss\n" 137 "1: ssamoswap.d %[swap], %[val], %[addr]\n" 138 _ASM_EXTABLE(1b, %l[fault]) 139 ".option pop\n" 140 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) 141 : [val] "r" (val) 142 : "memory" 143 : fault 144 ); 145 __disable_user_access(); 146 return swap; 147 fault: 148 __disable_user_access(); 149 return -1; 150 } 151 152 /* 153 * Create a restore token on the shadow stack. A token is always XLEN wide 154 * and aligned to XLEN. 155 */ 156 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) 157 { 158 unsigned long addr; 159 160 /* Token must be aligned */ 161 if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) 162 return -EINVAL; 163 164 /* On RISC-V we're constructing token to be function of address itself */ 165 addr = ssp - SHSTK_ENTRY_SIZE; 166 167 if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) 168 return -EFAULT; 169 170 if (token_addr) 171 *token_addr = addr; 172 173 return 0; 174 } 175 176 /* 177 * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location. 178 * Returns -EFAULT if unsuccessful. 179 */ 180 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr) 181 { 182 unsigned long ss_ptr = 0; 183 unsigned long token_loc = 0; 184 int ret = 0; 185 186 if (!saved_shstk_ptr) 187 return -EINVAL; 188 189 ss_ptr = get_active_shstk(tsk); 190 ret = create_rstor_token(ss_ptr, &token_loc); 191 192 if (!ret) { 193 *saved_shstk_ptr = token_loc; 194 set_active_shstk(tsk, token_loc); 195 } 196 197 return ret; 198 } 199 200 /* 201 * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'. 202 * Returns -EFAULT if unsuccessful. 203 */ 204 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr) 205 { 206 unsigned long token = 0; 207 208 token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0); 209 210 if (token == -1) 211 return -EFAULT; 212 213 /* invalid token, return EINVAL */ 214 if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) { 215 pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n", 216 tsk->comm, task_pid_nr(tsk), __func__, 217 (void *)(task_pt_regs(tsk)->epc), 218 (void *)(task_pt_regs(tsk)->sp), 219 (void *)token, (void *)shstk_ptr); 220 return -EINVAL; 221 } 222 223 /* all checks passed, set active shstk and return success */ 224 set_active_shstk(tsk, token); 225 return 0; 226 } 227 228 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, 229 unsigned long token_offset, bool set_tok) 230 { 231 int flags = MAP_ANONYMOUS | MAP_PRIVATE; 232 struct mm_struct *mm = current->mm; 233 unsigned long populate; 234 235 if (addr) 236 flags |= MAP_FIXED_NOREPLACE; 237 238 mmap_write_lock(mm); 239 addr = do_mmap(NULL, addr, size, PROT_READ, flags, 240 VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL); 241 mmap_write_unlock(mm); 242 243 if (!set_tok || IS_ERR_VALUE(addr)) 244 goto out; 245 246 if (create_rstor_token(addr + token_offset, NULL)) { 247 vm_munmap(addr, size); 248 return -EINVAL; 249 } 250 251 out: 252 return addr; 253 } 254 255 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 256 { 257 bool set_tok = flags & SHADOW_STACK_SET_TOKEN; 258 unsigned long aligned_size = 0; 259 260 if (!cpu_supports_shadow_stack()) 261 return -EOPNOTSUPP; 262 263 /* Anything other than set token should result in invalid param */ 264 if (flags & ~SHADOW_STACK_SET_TOKEN) 265 return -EINVAL; 266 267 /* 268 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available 269 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction 270 * itself. This provides static property on register programming and writes to CSR can't 271 * be unintentional from programmer's perspective. As long as programmer has guarded areas 272 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since 273 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent 274 * to allocation. Although in order to provide portablity with other architectures (because 275 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token 276 * flag in flags and if provided in flags, will setup a token at the base. 277 */ 278 279 /* If there isn't space for a token */ 280 if (set_tok && size < SHSTK_ENTRY_SIZE) 281 return -ENOSPC; 282 283 if (addr && (addr & (PAGE_SIZE - 1))) 284 return -EINVAL; 285 286 aligned_size = PAGE_ALIGN(size); 287 if (aligned_size < size) 288 return -EOVERFLOW; 289 290 return allocate_shadow_stack(addr, aligned_size, size, set_tok); 291 } 292 293 /* 294 * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for 295 * cases where CLONE_VM is specified and thus a different stack is specified by user. We 296 * thus need a separate shadow stack too. How a separate shadow stack is specified by 297 * user is still being debated. Once that's settled, remove this part of the comment. 298 * This function simply returns 0 if shadow stacks are not supported or if separate shadow 299 * stack allocation is not needed (like in case of !CLONE_VM) 300 */ 301 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, 302 const struct kernel_clone_args *args) 303 { 304 unsigned long addr, size; 305 306 /* If shadow stack is not supported, return 0 */ 307 if (!cpu_supports_shadow_stack()) 308 return 0; 309 310 /* 311 * If shadow stack is not enabled on the new thread, skip any 312 * switch to a new shadow stack. 313 */ 314 if (!is_shstk_enabled(tsk)) 315 return 0; 316 317 /* 318 * For CLONE_VFORK the child will share the parents shadow stack. 319 * Set base = 0 and size = 0, this is special means to track this state 320 * so the freeing logic run for child knows to leave it alone. 321 */ 322 if (args->flags & CLONE_VFORK) { 323 set_shstk_base(tsk, 0, 0); 324 return 0; 325 } 326 327 /* 328 * For !CLONE_VM the child will use a copy of the parents shadow 329 * stack. 330 */ 331 if (!(args->flags & CLONE_VM)) 332 return 0; 333 334 /* 335 * reaching here means, CLONE_VM was specified and thus a separate shadow 336 * stack is needed for new cloned thread. Note: below allocation is happening 337 * using current mm. 338 */ 339 size = calc_shstk_size(args->stack_size); 340 addr = allocate_shadow_stack(0, size, 0, false); 341 if (IS_ERR_VALUE(addr)) 342 return addr; 343 344 set_shstk_base(tsk, addr, size); 345 346 return addr + size; 347 } 348 349 void shstk_release(struct task_struct *tsk) 350 { 351 unsigned long base = 0, size = 0; 352 /* If shadow stack is not supported or not enabled, nothing to release */ 353 if (!cpu_supports_shadow_stack() || !is_shstk_enabled(tsk)) 354 return; 355 356 /* 357 * When fork() with CLONE_VM fails, the child (tsk) already has a 358 * shadow stack allocated, and exit_thread() calls this function to 359 * free it. In this case the parent (current) and the child share 360 * the same mm struct. Move forward only when they're same. 361 */ 362 if (!tsk->mm || tsk->mm != current->mm) 363 return; 364 365 /* 366 * We know shadow stack is enabled but if base is NULL, then 367 * this task is not managing its own shadow stack (CLONE_VFORK). So 368 * skip freeing it. 369 */ 370 base = get_shstk_base(tsk, &size); 371 if (!base) 372 return; 373 374 vm_munmap(base, size); 375 set_shstk_base(tsk, 0, 0); 376 } 377 378 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) 379 { 380 unsigned long bcfi_status = 0; 381 382 if (!cpu_supports_shadow_stack()) 383 return -EINVAL; 384 385 /* this means shadow stack is enabled on the task */ 386 bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); 387 388 return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; 389 } 390 391 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) 392 { 393 unsigned long size = 0, addr = 0; 394 bool enable_shstk = false; 395 396 if (!cpu_supports_shadow_stack()) 397 return -EINVAL; 398 399 /* Reject unknown flags */ 400 if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 401 return -EINVAL; 402 403 /* bcfi status is locked and further can't be modified by user */ 404 if (is_shstk_locked(t)) 405 return -EINVAL; 406 407 enable_shstk = status & PR_SHADOW_STACK_ENABLE; 408 /* Request is to enable shadow stack and shadow stack is not enabled already */ 409 if (enable_shstk && !is_shstk_enabled(t)) { 410 /* shadow stack was allocated and enable request again 411 * no need to support such usecase and return EINVAL. 412 */ 413 if (is_shstk_allocated(t)) 414 return -EINVAL; 415 416 size = calc_shstk_size(0); 417 addr = allocate_shadow_stack(0, size, 0, false); 418 if (IS_ERR_VALUE(addr)) 419 return -ENOMEM; 420 set_shstk_base(t, addr, size); 421 set_active_shstk(t, addr + size); 422 } 423 424 /* 425 * If a request to disable shadow stack happens, let's go ahead and release it 426 * Although, if CLONE_VFORKed child did this, then in that case we will end up 427 * not releasing the shadow stack (because it might be needed in parent). Although 428 * we will disable it for VFORKed child. And if VFORKed child tries to enable again 429 * then in that case, it'll get entirely new shadow stack because following condition 430 * are true 431 * - shadow stack was not enabled for vforked child 432 * - shadow stack base was anyways pointing to 0 433 * This shouldn't be a big issue because we want parent to have availability of shadow 434 * stack whenever VFORKed child releases resources via exit or exec but at the same 435 * time we want VFORKed child to break away and establish new shadow stack if it desires 436 * 437 */ 438 if (!enable_shstk) 439 shstk_release(t); 440 441 set_shstk_status(t, enable_shstk); 442 return 0; 443 } 444 445 int arch_lock_shadow_stack_status(struct task_struct *task, 446 unsigned long arg) 447 { 448 /* If shtstk not supported or not enabled on task, nothing to lock here */ 449 if (!cpu_supports_shadow_stack() || 450 !is_shstk_enabled(task) || arg != 0) 451 return -EINVAL; 452 453 set_shstk_lock(task); 454 455 return 0; 456 } 457 458 int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) 459 { 460 unsigned long fcfi_status = 0; 461 462 if (!cpu_supports_indirect_br_lp_instr()) 463 return -EINVAL; 464 465 /* indirect branch tracking is enabled on the task or not */ 466 fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); 467 468 return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; 469 } 470 471 int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) 472 { 473 bool enable_indir_lp = false; 474 475 if (!cpu_supports_indirect_br_lp_instr()) 476 return -EINVAL; 477 478 /* indirect branch tracking is locked and further can't be modified by user */ 479 if (is_indir_lp_locked(t)) 480 return -EINVAL; 481 482 /* Reject unknown flags */ 483 if (status & ~PR_INDIR_BR_LP_ENABLE) 484 return -EINVAL; 485 486 enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE); 487 set_indir_lp_status(t, enable_indir_lp); 488 489 return 0; 490 } 491 492 int arch_lock_indir_br_lp_status(struct task_struct *task, 493 unsigned long arg) 494 { 495 /* 496 * If indirect branch tracking is not supported or not enabled on task, 497 * nothing to lock here 498 */ 499 if (!cpu_supports_indirect_br_lp_instr() || 500 !is_indir_lp_enabled(task) || arg != 0) 501 return -EINVAL; 502 503 set_indir_lp_lock(task); 504 505 return 0; 506 } 507