// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2024 Rivos, Inc. * Deepak Gupta */ #include #include #include #include #include #include #include #include #include #include #include #include unsigned long riscv_nousercfi __read_mostly; #define SHSTK_ENTRY_SIZE sizeof(void *) bool is_shstk_enabled(struct task_struct *task) { return task->thread_info.user_cfi_state.ubcfi_en; } bool is_shstk_allocated(struct task_struct *task) { return task->thread_info.user_cfi_state.shdw_stk_base; } bool is_shstk_locked(struct task_struct *task) { return task->thread_info.user_cfi_state.ubcfi_locked; } void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) { task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; task->thread_info.user_cfi_state.shdw_stk_size = size; } unsigned long get_shstk_base(struct task_struct *task, unsigned long *size) { if (size) *size = task->thread_info.user_cfi_state.shdw_stk_size; return task->thread_info.user_cfi_state.shdw_stk_base; } void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) { task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; } unsigned long get_active_shstk(struct task_struct *task) { return task->thread_info.user_cfi_state.user_shdw_stk; } void set_shstk_status(struct task_struct *task, bool enable) { if (!is_user_shstk_enabled()) return; task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; if (enable) task->thread.envcfg |= ENVCFG_SSE; else task->thread.envcfg &= ~ENVCFG_SSE; csr_write(CSR_ENVCFG, task->thread.envcfg); } void set_shstk_lock(struct task_struct *task) { task->thread_info.user_cfi_state.ubcfi_locked = 1; } bool is_indir_lp_enabled(struct task_struct *task) { return task->thread_info.user_cfi_state.ufcfi_en; } bool is_indir_lp_locked(struct task_struct *task) { return task->thread_info.user_cfi_state.ufcfi_locked; } void set_indir_lp_status(struct task_struct *task, bool enable) { if (!is_user_lpad_enabled()) return; task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; if (enable) task->thread.envcfg |= ENVCFG_LPE; else task->thread.envcfg &= ~ENVCFG_LPE; csr_write(CSR_ENVCFG, task->thread.envcfg); } void set_indir_lp_lock(struct task_struct *task) { task->thread_info.user_cfi_state.ufcfi_locked = 1; } /* * If size is 0, then to be compatible with regular stack we want it to be as big as * regular stack. Else PAGE_ALIGN it and return back */ static unsigned long calc_shstk_size(unsigned long size) { if (size) return PAGE_ALIGN(size); return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); } /* * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow * stack. */ static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val) { /* * Never expect -1 on shadow stack. Expect return addresses and zero */ unsigned long swap = -1; __enable_user_access(); asm goto(".option push\n" ".option arch, +zicfiss\n" "1: ssamoswap.d %[swap], %[val], %[addr]\n" _ASM_EXTABLE(1b, %l[fault]) ".option pop\n" : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr) : [val] "r" (val) : "memory" : fault ); __disable_user_access(); return swap; fault: __disable_user_access(); return -1; } /* * Create a restore token on the shadow stack. A token is always XLEN wide * and aligned to XLEN. */ static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) { unsigned long addr; /* Token must be aligned */ if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) return -EINVAL; /* On RISC-V we're constructing token to be function of address itself */ addr = ssp - SHSTK_ENTRY_SIZE; if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) return -EFAULT; if (token_addr) *token_addr = addr; return 0; } /* * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location. * Returns -EFAULT if unsuccessful. */ int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr) { unsigned long ss_ptr = 0; unsigned long token_loc = 0; int ret = 0; if (!saved_shstk_ptr) return -EINVAL; ss_ptr = get_active_shstk(tsk); ret = create_rstor_token(ss_ptr, &token_loc); if (!ret) { *saved_shstk_ptr = token_loc; set_active_shstk(tsk, token_loc); } return ret; } /* * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'. * Returns -EFAULT if unsuccessful. */ int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr) { unsigned long token = 0; token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0); if (token == -1) return -EFAULT; /* invalid token, return EINVAL */ if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) { pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n", tsk->comm, task_pid_nr(tsk), __func__, (void *)(task_pt_regs(tsk)->epc), (void *)(task_pt_regs(tsk)->sp), (void *)token, (void *)shstk_ptr); return -EINVAL; } /* all checks passed, set active shstk and return success */ set_active_shstk(tsk, token); return 0; } static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, unsigned long token_offset, bool set_tok) { int flags = MAP_ANONYMOUS | MAP_PRIVATE; struct mm_struct *mm = current->mm; unsigned long populate; if (addr) flags |= MAP_FIXED_NOREPLACE; mmap_write_lock(mm); addr = do_mmap(NULL, addr, size, PROT_READ, flags, VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL); mmap_write_unlock(mm); if (!set_tok || IS_ERR_VALUE(addr)) goto out; if (create_rstor_token(addr + token_offset, NULL)) { vm_munmap(addr, size); return -EINVAL; } out: return addr; } SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) { bool set_tok = flags & SHADOW_STACK_SET_TOKEN; unsigned long aligned_size = 0; if (!is_user_shstk_enabled()) return -EOPNOTSUPP; /* Anything other than set token should result in invalid param */ if (flags & ~SHADOW_STACK_SET_TOKEN) return -EINVAL; /* * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction * itself. This provides static property on register programming and writes to CSR can't * be unintentional from programmer's perspective. As long as programmer has guarded areas * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent * to allocation. Although in order to provide portablity with other architectures (because * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token * flag in flags and if provided in flags, will setup a token at the base. */ /* If there isn't space for a token */ if (set_tok && size < SHSTK_ENTRY_SIZE) return -ENOSPC; if (addr && (addr & (PAGE_SIZE - 1))) return -EINVAL; aligned_size = PAGE_ALIGN(size); if (aligned_size < size) return -EOVERFLOW; return allocate_shadow_stack(addr, aligned_size, size, set_tok); } /* * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for * cases where CLONE_VM is specified and thus a different stack is specified by user. We * thus need a separate shadow stack too. How a separate shadow stack is specified by * user is still being debated. Once that's settled, remove this part of the comment. * This function simply returns 0 if shadow stacks are not supported or if separate shadow * stack allocation is not needed (like in case of !CLONE_VM) */ unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, const struct kernel_clone_args *args) { unsigned long addr, size; /* If shadow stack is not supported, return 0 */ if (!is_user_shstk_enabled()) return 0; /* * If shadow stack is not enabled on the new thread, skip any * switch to a new shadow stack. */ if (!is_shstk_enabled(tsk)) return 0; /* * For CLONE_VFORK the child will share the parents shadow stack. * Set base = 0 and size = 0, this is special means to track this state * so the freeing logic run for child knows to leave it alone. */ if (args->flags & CLONE_VFORK) { set_shstk_base(tsk, 0, 0); return 0; } /* * For !CLONE_VM the child will use a copy of the parents shadow * stack. */ if (!(args->flags & CLONE_VM)) return 0; /* * reaching here means, CLONE_VM was specified and thus a separate shadow * stack is needed for new cloned thread. Note: below allocation is happening * using current mm. */ size = calc_shstk_size(args->stack_size); addr = allocate_shadow_stack(0, size, 0, false); if (IS_ERR_VALUE(addr)) return addr; set_shstk_base(tsk, addr, size); return addr + size; } void shstk_release(struct task_struct *tsk) { unsigned long base = 0, size = 0; /* If shadow stack is not supported or not enabled, nothing to release */ if (!is_user_shstk_enabled() || !is_shstk_enabled(tsk)) return; /* * When fork() with CLONE_VM fails, the child (tsk) already has a * shadow stack allocated, and exit_thread() calls this function to * free it. In this case the parent (current) and the child share * the same mm struct. Move forward only when they're same. */ if (!tsk->mm || tsk->mm != current->mm) return; /* * We know shadow stack is enabled but if base is NULL, then * this task is not managing its own shadow stack (CLONE_VFORK). So * skip freeing it. */ base = get_shstk_base(tsk, &size); if (!base) return; vm_munmap(base, size); set_shstk_base(tsk, 0, 0); } int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) { unsigned long bcfi_status = 0; if (!is_user_shstk_enabled()) return -EINVAL; /* this means shadow stack is enabled on the task */ bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0); return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0; } int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) { unsigned long size = 0, addr = 0; bool enable_shstk = false; if (!is_user_shstk_enabled()) return -EINVAL; /* Reject unknown flags */ if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) return -EINVAL; /* bcfi status is locked and further can't be modified by user */ if (is_shstk_locked(t)) return -EINVAL; enable_shstk = status & PR_SHADOW_STACK_ENABLE; /* Request is to enable shadow stack and shadow stack is not enabled already */ if (enable_shstk && !is_shstk_enabled(t)) { /* shadow stack was allocated and enable request again * no need to support such usecase and return EINVAL. */ if (is_shstk_allocated(t)) return -EINVAL; size = calc_shstk_size(0); addr = allocate_shadow_stack(0, size, 0, false); if (IS_ERR_VALUE(addr)) return -ENOMEM; set_shstk_base(t, addr, size); set_active_shstk(t, addr + size); } /* * If a request to disable shadow stack happens, let's go ahead and release it * Although, if CLONE_VFORKed child did this, then in that case we will end up * not releasing the shadow stack (because it might be needed in parent). Although * we will disable it for VFORKed child. And if VFORKed child tries to enable again * then in that case, it'll get entirely new shadow stack because following condition * are true * - shadow stack was not enabled for vforked child * - shadow stack base was anyways pointing to 0 * This shouldn't be a big issue because we want parent to have availability of shadow * stack whenever VFORKed child releases resources via exit or exec but at the same * time we want VFORKed child to break away and establish new shadow stack if it desires * */ if (!enable_shstk) shstk_release(t); set_shstk_status(t, enable_shstk); return 0; } int arch_lock_shadow_stack_status(struct task_struct *task, unsigned long arg) { /* If shtstk not supported or not enabled on task, nothing to lock here */ if (!is_user_shstk_enabled() || !is_shstk_enabled(task) || arg != 0) return -EINVAL; set_shstk_lock(task); return 0; } int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) { unsigned long fcfi_status = 0; if (!is_user_lpad_enabled()) return -EINVAL; /* indirect branch tracking is enabled on the task or not */ fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0; } int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) { bool enable_indir_lp = false; if (!is_user_lpad_enabled()) return -EINVAL; /* indirect branch tracking is locked and further can't be modified by user */ if (is_indir_lp_locked(t)) return -EINVAL; /* Reject unknown flags */ if (status & ~PR_INDIR_BR_LP_ENABLE) return -EINVAL; enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE); set_indir_lp_status(t, enable_indir_lp); return 0; } int arch_lock_indir_br_lp_status(struct task_struct *task, unsigned long arg) { /* * If indirect branch tracking is not supported or not enabled on task, * nothing to lock here */ if (!is_user_lpad_enabled() || !is_indir_lp_enabled(task) || arg != 0) return -EINVAL; set_indir_lp_lock(task); return 0; } bool is_user_shstk_enabled(void) { return (cpu_supports_shadow_stack() && !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI)); } bool is_user_lpad_enabled(void) { return (cpu_supports_indirect_br_lp_instr() && !(riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI)); } static int __init setup_global_riscv_enable(char *str) { if (strcmp(str, "all") == 0) riscv_nousercfi = CMDLINE_DISABLE_RISCV_USERCFI; if (strcmp(str, "fcfi") == 0) riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_FCFI; if (strcmp(str, "bcfi") == 0) riscv_nousercfi |= CMDLINE_DISABLE_RISCV_USERCFI_BCFI; if (riscv_nousercfi) pr_info("RISC-V user CFI disabled via cmdline - shadow stack status : %s, landing pad status : %s\n", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_BCFI) ? "disabled" : "enabled", (riscv_nousercfi & CMDLINE_DISABLE_RISCV_USERCFI_FCFI) ? "disabled" : "enabled"); return 1; } __setup("riscv_nousercfi=", setup_global_riscv_enable);