xref: /linux/arch/riscv/kernel/usercfi.c (revision 66c9c713de597f9b40a319ebda4d3466ce2cdff0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2024 Rivos, Inc.
4  * Deepak Gupta <debug@rivosinc.com>
5  */
6 
7 #include <linux/sched.h>
8 #include <linux/bitops.h>
9 #include <linux/types.h>
10 #include <linux/mm.h>
11 #include <linux/mman.h>
12 #include <linux/uaccess.h>
13 #include <linux/sizes.h>
14 #include <linux/user.h>
15 #include <linux/syscalls.h>
16 #include <linux/prctl.h>
17 #include <asm/csr.h>
18 #include <asm/usercfi.h>
19 
20 #define SHSTK_ENTRY_SIZE sizeof(void *)
21 
22 bool is_shstk_enabled(struct task_struct *task)
23 {
24 	return task->thread_info.user_cfi_state.ubcfi_en;
25 }
26 
27 bool is_shstk_allocated(struct task_struct *task)
28 {
29 	return task->thread_info.user_cfi_state.shdw_stk_base;
30 }
31 
32 bool is_shstk_locked(struct task_struct *task)
33 {
34 	return task->thread_info.user_cfi_state.ubcfi_locked;
35 }
36 
37 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size)
38 {
39 	task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr;
40 	task->thread_info.user_cfi_state.shdw_stk_size = size;
41 }
42 
43 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size)
44 {
45 	if (size)
46 		*size = task->thread_info.user_cfi_state.shdw_stk_size;
47 	return task->thread_info.user_cfi_state.shdw_stk_base;
48 }
49 
50 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr)
51 {
52 	task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr;
53 }
54 
55 unsigned long get_active_shstk(struct task_struct *task)
56 {
57 	return task->thread_info.user_cfi_state.user_shdw_stk;
58 }
59 
60 void set_shstk_status(struct task_struct *task, bool enable)
61 {
62 	if (!cpu_supports_shadow_stack())
63 		return;
64 
65 	task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0;
66 
67 	if (enable)
68 		task->thread.envcfg |= ENVCFG_SSE;
69 	else
70 		task->thread.envcfg &= ~ENVCFG_SSE;
71 
72 	csr_write(CSR_ENVCFG, task->thread.envcfg);
73 }
74 
75 void set_shstk_lock(struct task_struct *task)
76 {
77 	task->thread_info.user_cfi_state.ubcfi_locked = 1;
78 }
79 
80 bool is_indir_lp_enabled(struct task_struct *task)
81 {
82 	return task->thread_info.user_cfi_state.ufcfi_en;
83 }
84 
85 bool is_indir_lp_locked(struct task_struct *task)
86 {
87 	return task->thread_info.user_cfi_state.ufcfi_locked;
88 }
89 
90 void set_indir_lp_status(struct task_struct *task, bool enable)
91 {
92 	if (!cpu_supports_indirect_br_lp_instr())
93 		return;
94 
95 	task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0;
96 
97 	if (enable)
98 		task->thread.envcfg |= ENVCFG_LPE;
99 	else
100 		task->thread.envcfg &= ~ENVCFG_LPE;
101 
102 	csr_write(CSR_ENVCFG, task->thread.envcfg);
103 }
104 
105 void set_indir_lp_lock(struct task_struct *task)
106 {
107 	task->thread_info.user_cfi_state.ufcfi_locked = 1;
108 }
109 /*
110  * If size is 0, then to be compatible with regular stack we want it to be as big as
111  * regular stack. Else PAGE_ALIGN it and return back
112  */
113 static unsigned long calc_shstk_size(unsigned long size)
114 {
115 	if (size)
116 		return PAGE_ALIGN(size);
117 
118 	return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
119 }
120 
121 /*
122  * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen
123  * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to
124  * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow
125  * stack.
126  */
127 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val)
128 {
129 	/*
130 	 * Never expect -1 on shadow stack. Expect return addresses and zero
131 	 */
132 	unsigned long swap = -1;
133 
134 	__enable_user_access();
135 	asm goto(".option push\n"
136 		".option arch, +zicfiss\n"
137 		"1: ssamoswap.d %[swap], %[val], %[addr]\n"
138 		_ASM_EXTABLE(1b, %l[fault])
139 		".option pop\n"
140 		 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr)
141 		: [val] "r" (val)
142 		: "memory"
143 		: fault
144 		);
145 	__disable_user_access();
146 	return swap;
147 fault:
148 	__disable_user_access();
149 	return -1;
150 }
151 
152 /*
153  * Create a restore token on the shadow stack.  A token is always XLEN wide
154  * and aligned to XLEN.
155  */
156 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
157 {
158 	unsigned long addr;
159 
160 	/* Token must be aligned */
161 	if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE))
162 		return -EINVAL;
163 
164 	/* On RISC-V we're constructing token to be function of address itself */
165 	addr = ssp - SHSTK_ENTRY_SIZE;
166 
167 	if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1)
168 		return -EFAULT;
169 
170 	if (token_addr)
171 		*token_addr = addr;
172 
173 	return 0;
174 }
175 
176 /*
177  * Save user shadow stack pointer on the shadow stack itself and return a pointer to saved location.
178  * Returns -EFAULT if unsuccessful.
179  */
180 int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr)
181 {
182 	unsigned long ss_ptr = 0;
183 	unsigned long token_loc = 0;
184 	int ret = 0;
185 
186 	if (!saved_shstk_ptr)
187 		return -EINVAL;
188 
189 	ss_ptr = get_active_shstk(tsk);
190 	ret = create_rstor_token(ss_ptr, &token_loc);
191 
192 	if (!ret) {
193 		*saved_shstk_ptr = token_loc;
194 		set_active_shstk(tsk, token_loc);
195 	}
196 
197 	return ret;
198 }
199 
200 /*
201  * Restores the user shadow stack pointer from the token on the shadow stack for task 'tsk'.
202  * Returns -EFAULT if unsuccessful.
203  */
204 int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr)
205 {
206 	unsigned long token = 0;
207 
208 	token = amo_user_shstk((unsigned long __user *)shstk_ptr, 0);
209 
210 	if (token == -1)
211 		return -EFAULT;
212 
213 	/* invalid token, return EINVAL */
214 	if ((token - shstk_ptr) != SHSTK_ENTRY_SIZE) {
215 		pr_info_ratelimited("%s[%d]: bad restore token in %s: pc=%p sp=%p, token=%p, shstk_ptr=%p\n",
216 				    tsk->comm, task_pid_nr(tsk), __func__,
217 				    (void *)(task_pt_regs(tsk)->epc),
218 				    (void *)(task_pt_regs(tsk)->sp),
219 				    (void *)token, (void *)shstk_ptr);
220 		return -EINVAL;
221 	}
222 
223 	/* all checks passed, set active shstk and return success */
224 	set_active_shstk(tsk, token);
225 	return 0;
226 }
227 
228 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
229 					   unsigned long token_offset, bool set_tok)
230 {
231 	int flags = MAP_ANONYMOUS | MAP_PRIVATE;
232 	struct mm_struct *mm = current->mm;
233 	unsigned long populate;
234 
235 	if (addr)
236 		flags |= MAP_FIXED_NOREPLACE;
237 
238 	mmap_write_lock(mm);
239 	addr = do_mmap(NULL, addr, size, PROT_READ, flags,
240 		       VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL);
241 	mmap_write_unlock(mm);
242 
243 	if (!set_tok || IS_ERR_VALUE(addr))
244 		goto out;
245 
246 	if (create_rstor_token(addr + token_offset, NULL)) {
247 		vm_munmap(addr, size);
248 		return -EINVAL;
249 	}
250 
251 out:
252 	return addr;
253 }
254 
255 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
256 {
257 	bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
258 	unsigned long aligned_size = 0;
259 
260 	if (!cpu_supports_shadow_stack())
261 		return -EOPNOTSUPP;
262 
263 	/* Anything other than set token should result in invalid param */
264 	if (flags & ~SHADOW_STACK_SET_TOKEN)
265 		return -EINVAL;
266 
267 	/*
268 	 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available
269 	 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction
270 	 * itself. This provides static property on register programming and writes to CSR can't
271 	 * be unintentional from programmer's perspective. As long as programmer has guarded areas
272 	 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since
273 	 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent
274 	 * to allocation. Although in order to provide portablity with other architectures (because
275 	 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token
276 	 * flag in flags and if provided in flags, will setup a token at the base.
277 	 */
278 
279 	/* If there isn't space for a token */
280 	if (set_tok && size < SHSTK_ENTRY_SIZE)
281 		return -ENOSPC;
282 
283 	if (addr && (addr & (PAGE_SIZE - 1)))
284 		return -EINVAL;
285 
286 	aligned_size = PAGE_ALIGN(size);
287 	if (aligned_size < size)
288 		return -EOVERFLOW;
289 
290 	return allocate_shadow_stack(addr, aligned_size, size, set_tok);
291 }
292 
293 /*
294  * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for
295  * cases where CLONE_VM is specified and thus a different stack is specified by user. We
296  * thus need a separate shadow stack too. How a separate shadow stack is specified by
297  * user is still being debated. Once that's settled, remove this part of the comment.
298  * This function simply returns 0 if shadow stacks are not supported or if separate shadow
299  * stack allocation is not needed (like in case of !CLONE_VM)
300  */
301 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
302 				       const struct kernel_clone_args *args)
303 {
304 	unsigned long addr, size;
305 
306 	/* If shadow stack is not supported, return 0 */
307 	if (!cpu_supports_shadow_stack())
308 		return 0;
309 
310 	/*
311 	 * If shadow stack is not enabled on the new thread, skip any
312 	 * switch to a new shadow stack.
313 	 */
314 	if (!is_shstk_enabled(tsk))
315 		return 0;
316 
317 	/*
318 	 * For CLONE_VFORK the child will share the parents shadow stack.
319 	 * Set base = 0 and size = 0, this is special means to track this state
320 	 * so the freeing logic run for child knows to leave it alone.
321 	 */
322 	if (args->flags & CLONE_VFORK) {
323 		set_shstk_base(tsk, 0, 0);
324 		return 0;
325 	}
326 
327 	/*
328 	 * For !CLONE_VM the child will use a copy of the parents shadow
329 	 * stack.
330 	 */
331 	if (!(args->flags & CLONE_VM))
332 		return 0;
333 
334 	/*
335 	 * reaching here means, CLONE_VM was specified and thus a separate shadow
336 	 * stack is needed for new cloned thread. Note: below allocation is happening
337 	 * using current mm.
338 	 */
339 	size = calc_shstk_size(args->stack_size);
340 	addr = allocate_shadow_stack(0, size, 0, false);
341 	if (IS_ERR_VALUE(addr))
342 		return addr;
343 
344 	set_shstk_base(tsk, addr, size);
345 
346 	return addr + size;
347 }
348 
349 void shstk_release(struct task_struct *tsk)
350 {
351 	unsigned long base = 0, size = 0;
352 	/* If shadow stack is not supported or not enabled, nothing to release */
353 	if (!cpu_supports_shadow_stack() || !is_shstk_enabled(tsk))
354 		return;
355 
356 	/*
357 	 * When fork() with CLONE_VM fails, the child (tsk) already has a
358 	 * shadow stack allocated, and exit_thread() calls this function to
359 	 * free it.  In this case the parent (current) and the child share
360 	 * the same mm struct. Move forward only when they're same.
361 	 */
362 	if (!tsk->mm || tsk->mm != current->mm)
363 		return;
364 
365 	/*
366 	 * We know shadow stack is enabled but if base is NULL, then
367 	 * this task is not managing its own shadow stack (CLONE_VFORK). So
368 	 * skip freeing it.
369 	 */
370 	base = get_shstk_base(tsk, &size);
371 	if (!base)
372 		return;
373 
374 	vm_munmap(base, size);
375 	set_shstk_base(tsk, 0, 0);
376 }
377 
378 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status)
379 {
380 	unsigned long bcfi_status = 0;
381 
382 	if (!cpu_supports_shadow_stack())
383 		return -EINVAL;
384 
385 	/* this means shadow stack is enabled on the task */
386 	bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0);
387 
388 	return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0;
389 }
390 
391 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status)
392 {
393 	unsigned long size = 0, addr = 0;
394 	bool enable_shstk = false;
395 
396 	if (!cpu_supports_shadow_stack())
397 		return -EINVAL;
398 
399 	/* Reject unknown flags */
400 	if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
401 		return -EINVAL;
402 
403 	/* bcfi status is locked and further can't be modified by user */
404 	if (is_shstk_locked(t))
405 		return -EINVAL;
406 
407 	enable_shstk = status & PR_SHADOW_STACK_ENABLE;
408 	/* Request is to enable shadow stack and shadow stack is not enabled already */
409 	if (enable_shstk && !is_shstk_enabled(t)) {
410 		/* shadow stack was allocated and enable request again
411 		 * no need to support such usecase and return EINVAL.
412 		 */
413 		if (is_shstk_allocated(t))
414 			return -EINVAL;
415 
416 		size = calc_shstk_size(0);
417 		addr = allocate_shadow_stack(0, size, 0, false);
418 		if (IS_ERR_VALUE(addr))
419 			return -ENOMEM;
420 		set_shstk_base(t, addr, size);
421 		set_active_shstk(t, addr + size);
422 	}
423 
424 	/*
425 	 * If a request to disable shadow stack happens, let's go ahead and release it
426 	 * Although, if CLONE_VFORKed child did this, then in that case we will end up
427 	 * not releasing the shadow stack (because it might be needed in parent). Although
428 	 * we will disable it for VFORKed child. And if VFORKed child tries to enable again
429 	 * then in that case, it'll get entirely new shadow stack because following condition
430 	 * are true
431 	 *  - shadow stack was not enabled for vforked child
432 	 *  - shadow stack base was anyways pointing to 0
433 	 * This shouldn't be a big issue because we want parent to have availability of shadow
434 	 * stack whenever VFORKed child releases resources via exit or exec but at the same
435 	 * time we want VFORKed child to break away and establish new shadow stack if it desires
436 	 *
437 	 */
438 	if (!enable_shstk)
439 		shstk_release(t);
440 
441 	set_shstk_status(t, enable_shstk);
442 	return 0;
443 }
444 
445 int arch_lock_shadow_stack_status(struct task_struct *task,
446 				  unsigned long arg)
447 {
448 	/* If shtstk not supported or not enabled on task, nothing to lock here */
449 	if (!cpu_supports_shadow_stack() ||
450 	    !is_shstk_enabled(task) || arg != 0)
451 		return -EINVAL;
452 
453 	set_shstk_lock(task);
454 
455 	return 0;
456 }
457 
458 int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status)
459 {
460 	unsigned long fcfi_status = 0;
461 
462 	if (!cpu_supports_indirect_br_lp_instr())
463 		return -EINVAL;
464 
465 	/* indirect branch tracking is enabled on the task or not */
466 	fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0);
467 
468 	return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0;
469 }
470 
471 int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status)
472 {
473 	bool enable_indir_lp = false;
474 
475 	if (!cpu_supports_indirect_br_lp_instr())
476 		return -EINVAL;
477 
478 	/* indirect branch tracking is locked and further can't be modified by user */
479 	if (is_indir_lp_locked(t))
480 		return -EINVAL;
481 
482 	/* Reject unknown flags */
483 	if (status & ~PR_INDIR_BR_LP_ENABLE)
484 		return -EINVAL;
485 
486 	enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE);
487 	set_indir_lp_status(t, enable_indir_lp);
488 
489 	return 0;
490 }
491 
492 int arch_lock_indir_br_lp_status(struct task_struct *task,
493 				 unsigned long arg)
494 {
495 	/*
496 	 * If indirect branch tracking is not supported or not enabled on task,
497 	 * nothing to lock here
498 	 */
499 	if (!cpu_supports_indirect_br_lp_instr() ||
500 	    !is_indir_lp_enabled(task) || arg != 0)
501 		return -EINVAL;
502 
503 	set_indir_lp_lock(task);
504 
505 	return 0;
506 }
507