xref: /linux/arch/riscv/kernel/usercfi.c (revision 8a9e22d2ca5855263d6e3f83509eabf16d7b8a0a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2024 Rivos, Inc.
4  * Deepak Gupta <debug@rivosinc.com>
5  */
6 
7 #include <linux/sched.h>
8 #include <linux/bitops.h>
9 #include <linux/types.h>
10 #include <linux/mm.h>
11 #include <linux/mman.h>
12 #include <linux/uaccess.h>
13 #include <linux/sizes.h>
14 #include <linux/user.h>
15 #include <linux/syscalls.h>
16 #include <linux/prctl.h>
17 #include <asm/csr.h>
18 #include <asm/usercfi.h>
19 
20 #define SHSTK_ENTRY_SIZE sizeof(void *)
21 
22 bool is_shstk_enabled(struct task_struct *task)
23 {
24 	return task->thread_info.user_cfi_state.ubcfi_en;
25 }
26 
27 bool is_shstk_allocated(struct task_struct *task)
28 {
29 	return task->thread_info.user_cfi_state.shdw_stk_base;
30 }
31 
32 bool is_shstk_locked(struct task_struct *task)
33 {
34 	return task->thread_info.user_cfi_state.ubcfi_locked;
35 }
36 
37 void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size)
38 {
39 	task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr;
40 	task->thread_info.user_cfi_state.shdw_stk_size = size;
41 }
42 
43 unsigned long get_shstk_base(struct task_struct *task, unsigned long *size)
44 {
45 	if (size)
46 		*size = task->thread_info.user_cfi_state.shdw_stk_size;
47 	return task->thread_info.user_cfi_state.shdw_stk_base;
48 }
49 
50 void set_active_shstk(struct task_struct *task, unsigned long shstk_addr)
51 {
52 	task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr;
53 }
54 
55 void set_shstk_status(struct task_struct *task, bool enable)
56 {
57 	if (!cpu_supports_shadow_stack())
58 		return;
59 
60 	task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0;
61 
62 	if (enable)
63 		task->thread.envcfg |= ENVCFG_SSE;
64 	else
65 		task->thread.envcfg &= ~ENVCFG_SSE;
66 
67 	csr_write(CSR_ENVCFG, task->thread.envcfg);
68 }
69 
70 void set_shstk_lock(struct task_struct *task)
71 {
72 	task->thread_info.user_cfi_state.ubcfi_locked = 1;
73 }
74 
75 bool is_indir_lp_enabled(struct task_struct *task)
76 {
77 	return task->thread_info.user_cfi_state.ufcfi_en;
78 }
79 
80 bool is_indir_lp_locked(struct task_struct *task)
81 {
82 	return task->thread_info.user_cfi_state.ufcfi_locked;
83 }
84 
85 void set_indir_lp_status(struct task_struct *task, bool enable)
86 {
87 	if (!cpu_supports_indirect_br_lp_instr())
88 		return;
89 
90 	task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0;
91 
92 	if (enable)
93 		task->thread.envcfg |= ENVCFG_LPE;
94 	else
95 		task->thread.envcfg &= ~ENVCFG_LPE;
96 
97 	csr_write(CSR_ENVCFG, task->thread.envcfg);
98 }
99 
100 void set_indir_lp_lock(struct task_struct *task)
101 {
102 	task->thread_info.user_cfi_state.ufcfi_locked = 1;
103 }
104 /*
105  * If size is 0, then to be compatible with regular stack we want it to be as big as
106  * regular stack. Else PAGE_ALIGN it and return back
107  */
108 static unsigned long calc_shstk_size(unsigned long size)
109 {
110 	if (size)
111 		return PAGE_ALIGN(size);
112 
113 	return PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G));
114 }
115 
116 /*
117  * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen
118  * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to
119  * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow
120  * stack.
121  */
122 static noinline unsigned long amo_user_shstk(unsigned long __user *addr, unsigned long val)
123 {
124 	/*
125 	 * Never expect -1 on shadow stack. Expect return addresses and zero
126 	 */
127 	unsigned long swap = -1;
128 
129 	__enable_user_access();
130 	asm goto(".option push\n"
131 		".option arch, +zicfiss\n"
132 		"1: ssamoswap.d %[swap], %[val], %[addr]\n"
133 		_ASM_EXTABLE(1b, %l[fault])
134 		".option pop\n"
135 		 : [swap] "=r" (swap), [addr] "+A" (*(__force unsigned long *)addr)
136 		: [val] "r" (val)
137 		: "memory"
138 		: fault
139 		);
140 	__disable_user_access();
141 	return swap;
142 fault:
143 	__disable_user_access();
144 	return -1;
145 }
146 
147 /*
148  * Create a restore token on the shadow stack.  A token is always XLEN wide
149  * and aligned to XLEN.
150  */
151 static int create_rstor_token(unsigned long ssp, unsigned long *token_addr)
152 {
153 	unsigned long addr;
154 
155 	/* Token must be aligned */
156 	if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE))
157 		return -EINVAL;
158 
159 	/* On RISC-V we're constructing token to be function of address itself */
160 	addr = ssp - SHSTK_ENTRY_SIZE;
161 
162 	if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1)
163 		return -EFAULT;
164 
165 	if (token_addr)
166 		*token_addr = addr;
167 
168 	return 0;
169 }
170 
171 static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
172 					   unsigned long token_offset, bool set_tok)
173 {
174 	int flags = MAP_ANONYMOUS | MAP_PRIVATE;
175 	struct mm_struct *mm = current->mm;
176 	unsigned long populate;
177 
178 	if (addr)
179 		flags |= MAP_FIXED_NOREPLACE;
180 
181 	mmap_write_lock(mm);
182 	addr = do_mmap(NULL, addr, size, PROT_READ, flags,
183 		       VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL);
184 	mmap_write_unlock(mm);
185 
186 	if (!set_tok || IS_ERR_VALUE(addr))
187 		goto out;
188 
189 	if (create_rstor_token(addr + token_offset, NULL)) {
190 		vm_munmap(addr, size);
191 		return -EINVAL;
192 	}
193 
194 out:
195 	return addr;
196 }
197 
198 SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags)
199 {
200 	bool set_tok = flags & SHADOW_STACK_SET_TOKEN;
201 	unsigned long aligned_size = 0;
202 
203 	if (!cpu_supports_shadow_stack())
204 		return -EOPNOTSUPP;
205 
206 	/* Anything other than set token should result in invalid param */
207 	if (flags & ~SHADOW_STACK_SET_TOKEN)
208 		return -EINVAL;
209 
210 	/*
211 	 * Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is an available
212 	 * CSR in all modes. CSR accesses are performed using 12bit index programmed in instruction
213 	 * itself. This provides static property on register programming and writes to CSR can't
214 	 * be unintentional from programmer's perspective. As long as programmer has guarded areas
215 	 * which perform writes to CSR_SSP properly, shadow stack pivoting is not possible. Since
216 	 * CSR_SSP is writable by user mode, it itself can setup a shadow stack token subsequent
217 	 * to allocation. Although in order to provide portablity with other architectures (because
218 	 * `map_shadow_stack` is arch agnostic syscall), RISC-V will follow expectation of a token
219 	 * flag in flags and if provided in flags, will setup a token at the base.
220 	 */
221 
222 	/* If there isn't space for a token */
223 	if (set_tok && size < SHSTK_ENTRY_SIZE)
224 		return -ENOSPC;
225 
226 	if (addr && (addr & (PAGE_SIZE - 1)))
227 		return -EINVAL;
228 
229 	aligned_size = PAGE_ALIGN(size);
230 	if (aligned_size < size)
231 		return -EOVERFLOW;
232 
233 	return allocate_shadow_stack(addr, aligned_size, size, set_tok);
234 }
235 
236 /*
237  * This gets called during clone/clone3/fork. And is needed to allocate a shadow stack for
238  * cases where CLONE_VM is specified and thus a different stack is specified by user. We
239  * thus need a separate shadow stack too. How a separate shadow stack is specified by
240  * user is still being debated. Once that's settled, remove this part of the comment.
241  * This function simply returns 0 if shadow stacks are not supported or if separate shadow
242  * stack allocation is not needed (like in case of !CLONE_VM)
243  */
244 unsigned long shstk_alloc_thread_stack(struct task_struct *tsk,
245 				       const struct kernel_clone_args *args)
246 {
247 	unsigned long addr, size;
248 
249 	/* If shadow stack is not supported, return 0 */
250 	if (!cpu_supports_shadow_stack())
251 		return 0;
252 
253 	/*
254 	 * If shadow stack is not enabled on the new thread, skip any
255 	 * switch to a new shadow stack.
256 	 */
257 	if (!is_shstk_enabled(tsk))
258 		return 0;
259 
260 	/*
261 	 * For CLONE_VFORK the child will share the parents shadow stack.
262 	 * Set base = 0 and size = 0, this is special means to track this state
263 	 * so the freeing logic run for child knows to leave it alone.
264 	 */
265 	if (args->flags & CLONE_VFORK) {
266 		set_shstk_base(tsk, 0, 0);
267 		return 0;
268 	}
269 
270 	/*
271 	 * For !CLONE_VM the child will use a copy of the parents shadow
272 	 * stack.
273 	 */
274 	if (!(args->flags & CLONE_VM))
275 		return 0;
276 
277 	/*
278 	 * reaching here means, CLONE_VM was specified and thus a separate shadow
279 	 * stack is needed for new cloned thread. Note: below allocation is happening
280 	 * using current mm.
281 	 */
282 	size = calc_shstk_size(args->stack_size);
283 	addr = allocate_shadow_stack(0, size, 0, false);
284 	if (IS_ERR_VALUE(addr))
285 		return addr;
286 
287 	set_shstk_base(tsk, addr, size);
288 
289 	return addr + size;
290 }
291 
292 void shstk_release(struct task_struct *tsk)
293 {
294 	unsigned long base = 0, size = 0;
295 	/* If shadow stack is not supported or not enabled, nothing to release */
296 	if (!cpu_supports_shadow_stack() || !is_shstk_enabled(tsk))
297 		return;
298 
299 	/*
300 	 * When fork() with CLONE_VM fails, the child (tsk) already has a
301 	 * shadow stack allocated, and exit_thread() calls this function to
302 	 * free it.  In this case the parent (current) and the child share
303 	 * the same mm struct. Move forward only when they're same.
304 	 */
305 	if (!tsk->mm || tsk->mm != current->mm)
306 		return;
307 
308 	/*
309 	 * We know shadow stack is enabled but if base is NULL, then
310 	 * this task is not managing its own shadow stack (CLONE_VFORK). So
311 	 * skip freeing it.
312 	 */
313 	base = get_shstk_base(tsk, &size);
314 	if (!base)
315 		return;
316 
317 	vm_munmap(base, size);
318 	set_shstk_base(tsk, 0, 0);
319 }
320 
321 int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status)
322 {
323 	unsigned long bcfi_status = 0;
324 
325 	if (!cpu_supports_shadow_stack())
326 		return -EINVAL;
327 
328 	/* this means shadow stack is enabled on the task */
329 	bcfi_status |= (is_shstk_enabled(t) ? PR_SHADOW_STACK_ENABLE : 0);
330 
331 	return copy_to_user(status, &bcfi_status, sizeof(bcfi_status)) ? -EFAULT : 0;
332 }
333 
334 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status)
335 {
336 	unsigned long size = 0, addr = 0;
337 	bool enable_shstk = false;
338 
339 	if (!cpu_supports_shadow_stack())
340 		return -EINVAL;
341 
342 	/* Reject unknown flags */
343 	if (status & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK)
344 		return -EINVAL;
345 
346 	/* bcfi status is locked and further can't be modified by user */
347 	if (is_shstk_locked(t))
348 		return -EINVAL;
349 
350 	enable_shstk = status & PR_SHADOW_STACK_ENABLE;
351 	/* Request is to enable shadow stack and shadow stack is not enabled already */
352 	if (enable_shstk && !is_shstk_enabled(t)) {
353 		/* shadow stack was allocated and enable request again
354 		 * no need to support such usecase and return EINVAL.
355 		 */
356 		if (is_shstk_allocated(t))
357 			return -EINVAL;
358 
359 		size = calc_shstk_size(0);
360 		addr = allocate_shadow_stack(0, size, 0, false);
361 		if (IS_ERR_VALUE(addr))
362 			return -ENOMEM;
363 		set_shstk_base(t, addr, size);
364 		set_active_shstk(t, addr + size);
365 	}
366 
367 	/*
368 	 * If a request to disable shadow stack happens, let's go ahead and release it
369 	 * Although, if CLONE_VFORKed child did this, then in that case we will end up
370 	 * not releasing the shadow stack (because it might be needed in parent). Although
371 	 * we will disable it for VFORKed child. And if VFORKed child tries to enable again
372 	 * then in that case, it'll get entirely new shadow stack because following condition
373 	 * are true
374 	 *  - shadow stack was not enabled for vforked child
375 	 *  - shadow stack base was anyways pointing to 0
376 	 * This shouldn't be a big issue because we want parent to have availability of shadow
377 	 * stack whenever VFORKed child releases resources via exit or exec but at the same
378 	 * time we want VFORKed child to break away and establish new shadow stack if it desires
379 	 *
380 	 */
381 	if (!enable_shstk)
382 		shstk_release(t);
383 
384 	set_shstk_status(t, enable_shstk);
385 	return 0;
386 }
387 
388 int arch_lock_shadow_stack_status(struct task_struct *task,
389 				  unsigned long arg)
390 {
391 	/* If shtstk not supported or not enabled on task, nothing to lock here */
392 	if (!cpu_supports_shadow_stack() ||
393 	    !is_shstk_enabled(task) || arg != 0)
394 		return -EINVAL;
395 
396 	set_shstk_lock(task);
397 
398 	return 0;
399 }
400 
401 int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status)
402 {
403 	unsigned long fcfi_status = 0;
404 
405 	if (!cpu_supports_indirect_br_lp_instr())
406 		return -EINVAL;
407 
408 	/* indirect branch tracking is enabled on the task or not */
409 	fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0);
410 
411 	return copy_to_user(status, &fcfi_status, sizeof(fcfi_status)) ? -EFAULT : 0;
412 }
413 
414 int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status)
415 {
416 	bool enable_indir_lp = false;
417 
418 	if (!cpu_supports_indirect_br_lp_instr())
419 		return -EINVAL;
420 
421 	/* indirect branch tracking is locked and further can't be modified by user */
422 	if (is_indir_lp_locked(t))
423 		return -EINVAL;
424 
425 	/* Reject unknown flags */
426 	if (status & ~PR_INDIR_BR_LP_ENABLE)
427 		return -EINVAL;
428 
429 	enable_indir_lp = (status & PR_INDIR_BR_LP_ENABLE);
430 	set_indir_lp_status(t, enable_indir_lp);
431 
432 	return 0;
433 }
434 
435 int arch_lock_indir_br_lp_status(struct task_struct *task,
436 				 unsigned long arg)
437 {
438 	/*
439 	 * If indirect branch tracking is not supported or not enabled on task,
440 	 * nothing to lock here
441 	 */
442 	if (!cpu_supports_indirect_br_lp_instr() ||
443 	    !is_indir_lp_enabled(task) || arg != 0)
444 		return -EINVAL;
445 
446 	set_indir_lp_lock(task);
447 
448 	return 0;
449 }
450