1fc84bc53SMark Brown // SPDX-License-Identifier: GPL-2.0-only 2fc84bc53SMark Brown 3fc84bc53SMark Brown #include <linux/mm.h> 4fc84bc53SMark Brown #include <linux/mman.h> 5fc84bc53SMark Brown #include <linux/syscalls.h> 6fc84bc53SMark Brown #include <linux/types.h> 7fc84bc53SMark Brown 8506496bcSMark Brown #include <asm/cmpxchg.h> 9fc84bc53SMark Brown #include <asm/cpufeature.h> 10506496bcSMark Brown #include <asm/gcs.h> 11fc84bc53SMark Brown #include <asm/page.h> 12fc84bc53SMark Brown 13506496bcSMark Brown static unsigned long alloc_gcs(unsigned long addr, unsigned long size) 14506496bcSMark Brown { 15506496bcSMark Brown int flags = MAP_ANONYMOUS | MAP_PRIVATE; 16506496bcSMark Brown struct mm_struct *mm = current->mm; 17506496bcSMark Brown unsigned long mapped_addr, unused; 18506496bcSMark Brown 19506496bcSMark Brown if (addr) 20506496bcSMark Brown flags |= MAP_FIXED_NOREPLACE; 21506496bcSMark Brown 22506496bcSMark Brown mmap_write_lock(mm); 23506496bcSMark Brown mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags, 24506496bcSMark Brown VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); 25506496bcSMark Brown mmap_write_unlock(mm); 26506496bcSMark Brown 27506496bcSMark Brown return mapped_addr; 28506496bcSMark Brown } 29506496bcSMark Brown 30506496bcSMark Brown static unsigned long gcs_size(unsigned long size) 31506496bcSMark Brown { 32506496bcSMark Brown if (size) 33506496bcSMark Brown return PAGE_ALIGN(size); 34506496bcSMark Brown 35506496bcSMark Brown /* Allocate RLIMIT_STACK/2 with limits of PAGE_SIZE..2G */ 36506496bcSMark Brown size = PAGE_ALIGN(min_t(unsigned long long, 37506496bcSMark Brown rlimit(RLIMIT_STACK) / 2, SZ_2G)); 38506496bcSMark Brown return max(PAGE_SIZE, size); 39506496bcSMark Brown } 40506496bcSMark Brown 41506496bcSMark Brown unsigned long gcs_alloc_thread_stack(struct task_struct *tsk, 42506496bcSMark Brown const struct kernel_clone_args *args) 43506496bcSMark Brown { 44506496bcSMark Brown unsigned long addr, size; 45506496bcSMark Brown 46506496bcSMark Brown if (!system_supports_gcs()) 47506496bcSMark Brown return 0; 48506496bcSMark Brown 49506496bcSMark Brown if (!task_gcs_el0_enabled(tsk)) 50506496bcSMark Brown return 0; 51506496bcSMark Brown 52506496bcSMark Brown if ((args->flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM) { 53506496bcSMark Brown tsk->thread.gcspr_el0 = read_sysreg_s(SYS_GCSPR_EL0); 54506496bcSMark Brown return 0; 55506496bcSMark Brown } 56506496bcSMark Brown 57506496bcSMark Brown size = args->stack_size / 2; 58506496bcSMark Brown 59506496bcSMark Brown size = gcs_size(size); 60506496bcSMark Brown addr = alloc_gcs(0, size); 61506496bcSMark Brown if (IS_ERR_VALUE(addr)) 62506496bcSMark Brown return addr; 63506496bcSMark Brown 64506496bcSMark Brown tsk->thread.gcs_base = addr; 65506496bcSMark Brown tsk->thread.gcs_size = size; 66506496bcSMark Brown tsk->thread.gcspr_el0 = addr + size - sizeof(u64); 67506496bcSMark Brown 68506496bcSMark Brown return addr; 69506496bcSMark Brown } 70506496bcSMark Brown 71*8f3e7506SMark Brown SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) 72*8f3e7506SMark Brown { 73*8f3e7506SMark Brown unsigned long alloc_size; 74*8f3e7506SMark Brown unsigned long __user *cap_ptr; 75*8f3e7506SMark Brown unsigned long cap_val; 76*8f3e7506SMark Brown int ret = 0; 77*8f3e7506SMark Brown int cap_offset; 78*8f3e7506SMark Brown 79*8f3e7506SMark Brown if (!system_supports_gcs()) 80*8f3e7506SMark Brown return -EOPNOTSUPP; 81*8f3e7506SMark Brown 82*8f3e7506SMark Brown if (flags & ~(SHADOW_STACK_SET_TOKEN | SHADOW_STACK_SET_MARKER)) 83*8f3e7506SMark Brown return -EINVAL; 84*8f3e7506SMark Brown 85*8f3e7506SMark Brown if (!PAGE_ALIGNED(addr)) 86*8f3e7506SMark Brown return -EINVAL; 87*8f3e7506SMark Brown 88*8f3e7506SMark Brown if (size == 8 || !IS_ALIGNED(size, 8)) 89*8f3e7506SMark Brown return -EINVAL; 90*8f3e7506SMark Brown 91*8f3e7506SMark Brown /* 92*8f3e7506SMark Brown * An overflow would result in attempting to write the restore token 93*8f3e7506SMark Brown * to the wrong location. Not catastrophic, but just return the right 94*8f3e7506SMark Brown * error code and block it. 95*8f3e7506SMark Brown */ 96*8f3e7506SMark Brown alloc_size = PAGE_ALIGN(size); 97*8f3e7506SMark Brown if (alloc_size < size) 98*8f3e7506SMark Brown return -EOVERFLOW; 99*8f3e7506SMark Brown 100*8f3e7506SMark Brown addr = alloc_gcs(addr, alloc_size); 101*8f3e7506SMark Brown if (IS_ERR_VALUE(addr)) 102*8f3e7506SMark Brown return addr; 103*8f3e7506SMark Brown 104*8f3e7506SMark Brown /* 105*8f3e7506SMark Brown * Put a cap token at the end of the allocated region so it 106*8f3e7506SMark Brown * can be switched to. 107*8f3e7506SMark Brown */ 108*8f3e7506SMark Brown if (flags & SHADOW_STACK_SET_TOKEN) { 109*8f3e7506SMark Brown /* Leave an extra empty frame as a top of stack marker? */ 110*8f3e7506SMark Brown if (flags & SHADOW_STACK_SET_MARKER) 111*8f3e7506SMark Brown cap_offset = 2; 112*8f3e7506SMark Brown else 113*8f3e7506SMark Brown cap_offset = 1; 114*8f3e7506SMark Brown 115*8f3e7506SMark Brown cap_ptr = (unsigned long __user *)(addr + size - 116*8f3e7506SMark Brown (cap_offset * sizeof(unsigned long))); 117*8f3e7506SMark Brown cap_val = GCS_CAP(cap_ptr); 118*8f3e7506SMark Brown 119*8f3e7506SMark Brown put_user_gcs(cap_val, cap_ptr, &ret); 120*8f3e7506SMark Brown if (ret != 0) { 121*8f3e7506SMark Brown vm_munmap(addr, size); 122*8f3e7506SMark Brown return -EFAULT; 123*8f3e7506SMark Brown } 124*8f3e7506SMark Brown 125*8f3e7506SMark Brown /* 126*8f3e7506SMark Brown * Ensure the new cap is ordered before standard 127*8f3e7506SMark Brown * memory accesses to the same location. 128*8f3e7506SMark Brown */ 129*8f3e7506SMark Brown gcsb_dsync(); 130*8f3e7506SMark Brown } 131*8f3e7506SMark Brown 132*8f3e7506SMark Brown return addr; 133*8f3e7506SMark Brown } 134*8f3e7506SMark Brown 135fc84bc53SMark Brown /* 136fc84bc53SMark Brown * Apply the GCS mode configured for the specified task to the 137fc84bc53SMark Brown * hardware. 138fc84bc53SMark Brown */ 139fc84bc53SMark Brown void gcs_set_el0_mode(struct task_struct *task) 140fc84bc53SMark Brown { 141fc84bc53SMark Brown u64 gcscre0_el1 = GCSCRE0_EL1_nTR; 142fc84bc53SMark Brown 143fc84bc53SMark Brown if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_ENABLE) 144fc84bc53SMark Brown gcscre0_el1 |= GCSCRE0_EL1_RVCHKEN | GCSCRE0_EL1_PCRSEL; 145fc84bc53SMark Brown 146fc84bc53SMark Brown if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_WRITE) 147fc84bc53SMark Brown gcscre0_el1 |= GCSCRE0_EL1_STREn; 148fc84bc53SMark Brown 149fc84bc53SMark Brown if (task->thread.gcs_el0_mode & PR_SHADOW_STACK_PUSH) 150fc84bc53SMark Brown gcscre0_el1 |= GCSCRE0_EL1_PUSHMEn; 151fc84bc53SMark Brown 152fc84bc53SMark Brown write_sysreg_s(gcscre0_el1, SYS_GCSCRE0_EL1); 153fc84bc53SMark Brown } 154fc84bc53SMark Brown 155fc84bc53SMark Brown void gcs_free(struct task_struct *task) 156fc84bc53SMark Brown { 157fc84bc53SMark Brown if (!system_supports_gcs()) 158fc84bc53SMark Brown return; 159fc84bc53SMark Brown 160506496bcSMark Brown /* 161506496bcSMark Brown * When fork() with CLONE_VM fails, the child (tsk) already 162506496bcSMark Brown * has a GCS allocated, and exit_thread() calls this function 163506496bcSMark Brown * to free it. In this case the parent (current) and the 164506496bcSMark Brown * child share the same mm struct. 165506496bcSMark Brown */ 166506496bcSMark Brown if (!task->mm || task->mm != current->mm) 167506496bcSMark Brown return; 168506496bcSMark Brown 169fc84bc53SMark Brown if (task->thread.gcs_base) 170fc84bc53SMark Brown vm_munmap(task->thread.gcs_base, task->thread.gcs_size); 171fc84bc53SMark Brown 172fc84bc53SMark Brown task->thread.gcspr_el0 = 0; 173fc84bc53SMark Brown task->thread.gcs_base = 0; 174fc84bc53SMark Brown task->thread.gcs_size = 0; 175fc84bc53SMark Brown } 176b57180c7SMark Brown 177b57180c7SMark Brown int arch_set_shadow_stack_status(struct task_struct *task, unsigned long arg) 178b57180c7SMark Brown { 179b57180c7SMark Brown unsigned long gcs, size; 180b57180c7SMark Brown int ret; 181b57180c7SMark Brown 182b57180c7SMark Brown if (!system_supports_gcs()) 183b57180c7SMark Brown return -EINVAL; 184b57180c7SMark Brown 185b57180c7SMark Brown if (is_compat_thread(task_thread_info(task))) 186b57180c7SMark Brown return -EINVAL; 187b57180c7SMark Brown 188b57180c7SMark Brown /* Reject unknown flags */ 189b57180c7SMark Brown if (arg & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) 190b57180c7SMark Brown return -EINVAL; 191b57180c7SMark Brown 192b57180c7SMark Brown ret = gcs_check_locked(task, arg); 193b57180c7SMark Brown if (ret != 0) 194b57180c7SMark Brown return ret; 195b57180c7SMark Brown 196b57180c7SMark Brown /* If we are enabling GCS then make sure we have a stack */ 197b57180c7SMark Brown if (arg & PR_SHADOW_STACK_ENABLE && 198b57180c7SMark Brown !task_gcs_el0_enabled(task)) { 199b57180c7SMark Brown /* Do not allow GCS to be reenabled */ 200b57180c7SMark Brown if (task->thread.gcs_base || task->thread.gcspr_el0) 201b57180c7SMark Brown return -EINVAL; 202b57180c7SMark Brown 203b57180c7SMark Brown if (task != current) 204b57180c7SMark Brown return -EBUSY; 205b57180c7SMark Brown 206b57180c7SMark Brown size = gcs_size(0); 207b57180c7SMark Brown gcs = alloc_gcs(0, size); 208b57180c7SMark Brown if (!gcs) 209b57180c7SMark Brown return -ENOMEM; 210b57180c7SMark Brown 211b57180c7SMark Brown task->thread.gcspr_el0 = gcs + size - sizeof(u64); 212b57180c7SMark Brown task->thread.gcs_base = gcs; 213b57180c7SMark Brown task->thread.gcs_size = size; 214b57180c7SMark Brown if (task == current) 215b57180c7SMark Brown write_sysreg_s(task->thread.gcspr_el0, 216b57180c7SMark Brown SYS_GCSPR_EL0); 217b57180c7SMark Brown } 218b57180c7SMark Brown 219b57180c7SMark Brown task->thread.gcs_el0_mode = arg; 220b57180c7SMark Brown if (task == current) 221b57180c7SMark Brown gcs_set_el0_mode(task); 222b57180c7SMark Brown 223b57180c7SMark Brown return 0; 224b57180c7SMark Brown } 225b57180c7SMark Brown 226b57180c7SMark Brown int arch_get_shadow_stack_status(struct task_struct *task, 227b57180c7SMark Brown unsigned long __user *arg) 228b57180c7SMark Brown { 229b57180c7SMark Brown if (!system_supports_gcs()) 230b57180c7SMark Brown return -EINVAL; 231b57180c7SMark Brown 232b57180c7SMark Brown if (is_compat_thread(task_thread_info(task))) 233b57180c7SMark Brown return -EINVAL; 234b57180c7SMark Brown 235b57180c7SMark Brown return put_user(task->thread.gcs_el0_mode, arg); 236b57180c7SMark Brown } 237b57180c7SMark Brown 238b57180c7SMark Brown int arch_lock_shadow_stack_status(struct task_struct *task, 239b57180c7SMark Brown unsigned long arg) 240b57180c7SMark Brown { 241b57180c7SMark Brown if (!system_supports_gcs()) 242b57180c7SMark Brown return -EINVAL; 243b57180c7SMark Brown 244b57180c7SMark Brown if (is_compat_thread(task_thread_info(task))) 245b57180c7SMark Brown return -EINVAL; 246b57180c7SMark Brown 247b57180c7SMark Brown /* 248b57180c7SMark Brown * We support locking unknown bits so applications can prevent 249b57180c7SMark Brown * any changes in a future proof manner. 250b57180c7SMark Brown */ 251b57180c7SMark Brown task->thread.gcs_el0_locked |= arg; 252b57180c7SMark Brown 253b57180c7SMark Brown return 0; 254b57180c7SMark Brown } 255