xref: /linux/include/linux/randomize_kstack.h (revision 1ff297584fad2eef390f212b860e0fbb7363e0e8)
139218ff4SKees Cook /* SPDX-License-Identifier: GPL-2.0-only */
239218ff4SKees Cook #ifndef _LINUX_RANDOMIZE_KSTACK_H
339218ff4SKees Cook #define _LINUX_RANDOMIZE_KSTACK_H
439218ff4SKees Cook 
58cb37a59SMarco Elver #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
639218ff4SKees Cook #include <linux/kernel.h>
739218ff4SKees Cook #include <linux/jump_label.h>
839218ff4SKees Cook #include <linux/percpu-defs.h>
939218ff4SKees Cook 
1039218ff4SKees Cook DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
1139218ff4SKees Cook 			 randomize_kstack_offset);
1239218ff4SKees Cook DECLARE_PER_CPU(u32, kstack_offset);
1339218ff4SKees Cook 
1439218ff4SKees Cook /*
1539218ff4SKees Cook  * Do not use this anywhere else in the kernel. This is used here because
1639218ff4SKees Cook  * it provides an arch-agnostic way to grow the stack with correct
1739218ff4SKees Cook  * alignment. Also, since this use is being explicitly masked to a max of
1839218ff4SKees Cook  * 10 bits, stack-clash style attacks are unlikely. For more details see
1939218ff4SKees Cook  * "VLAs" in Documentation/process/deprecated.rst
20efa90c11SMarco Elver  *
21efa90c11SMarco Elver  * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
22efa90c11SMarco Elver  * only with Clang and not GCC). Initializing the unused area on each syscall
23efa90c11SMarco Elver  * entry is expensive, and generating an implicit call to memset() may also be
24efa90c11SMarco Elver  * problematic (such as in noinstr functions). Therefore, if the compiler
25efa90c11SMarco Elver  * supports it (which it should if it initializes allocas), always use the
26efa90c11SMarco Elver  * "uninitialized" variant of the builtin.
2739218ff4SKees Cook  */
28efa90c11SMarco Elver #if __has_builtin(__builtin_alloca_uninitialized)
29efa90c11SMarco Elver #define __kstack_alloca __builtin_alloca_uninitialized
30efa90c11SMarco Elver #else
31efa90c11SMarco Elver #define __kstack_alloca __builtin_alloca
32efa90c11SMarco Elver #endif
33efa90c11SMarco Elver 
3439218ff4SKees Cook /*
3539218ff4SKees Cook  * Use, at most, 10 bits of entropy. We explicitly cap this to keep the
3639218ff4SKees Cook  * "VLA" from being unbounded (see above). 10 bits leaves enough room for
3739218ff4SKees Cook  * per-arch offset masks to reduce entropy (by removing higher bits, since
3839218ff4SKees Cook  * high entropy may overly constrain usable stack space), and for
3939218ff4SKees Cook  * compiler/arch-specific stack alignment to remove the lower bits.
4039218ff4SKees Cook  */
4139218ff4SKees Cook #define KSTACK_OFFSET_MAX(x)	((x) & 0x3FF)
4239218ff4SKees Cook 
43*1ff29758SKees Cook /**
44*1ff29758SKees Cook  * add_random_kstack_offset - Increase stack utilization by previously
45*1ff29758SKees Cook  *			      chosen random offset
46*1ff29758SKees Cook  *
47*1ff29758SKees Cook  * This should be used in the syscall entry path when interrupts and
4839218ff4SKees Cook  * preempt are disabled, and after user registers have been stored to
49*1ff29758SKees Cook  * the stack. For testing the resulting entropy, please see:
50*1ff29758SKees Cook  * tools/testing/selftests/lkdtm/stack-entropy.sh
5139218ff4SKees Cook  */
5239218ff4SKees Cook #define add_random_kstack_offset() do {					\
5339218ff4SKees Cook 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
5439218ff4SKees Cook 				&randomize_kstack_offset)) {		\
5539218ff4SKees Cook 		u32 offset = raw_cpu_read(kstack_offset);		\
56efa90c11SMarco Elver 		u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));	\
5739218ff4SKees Cook 		/* Keep allocation even after "ptr" loses scope. */	\
582515dd6cSNick Desaulniers 		asm volatile("" :: "r"(ptr) : "memory");		\
5939218ff4SKees Cook 	}								\
6039218ff4SKees Cook } while (0)
6139218ff4SKees Cook 
62*1ff29758SKees Cook /**
63*1ff29758SKees Cook  * choose_random_kstack_offset - Choose the random offset for the next
64*1ff29758SKees Cook  *				 add_random_kstack_offset()
65*1ff29758SKees Cook  *
66*1ff29758SKees Cook  * This should only be used during syscall exit when interrupts and
67*1ff29758SKees Cook  * preempt are disabled. This position in the syscall flow is done to
68*1ff29758SKees Cook  * frustrate attacks from userspace attempting to learn the next offset:
69*1ff29758SKees Cook  * - Maximize the timing uncertainty visible from userspace: if the
70*1ff29758SKees Cook  *   offset is chosen at syscall entry, userspace has much more control
71*1ff29758SKees Cook  *   over the timing between choosing offsets. "How long will we be in
72*1ff29758SKees Cook  *   kernel mode?" tends to be more difficult to predict than "how long
73*1ff29758SKees Cook  *   will we be in user mode?"
74*1ff29758SKees Cook  * - Reduce the lifetime of the new offset sitting in memory during
75*1ff29758SKees Cook  *   kernel mode execution. Exposure of "thread-local" memory content
76*1ff29758SKees Cook  *   (e.g. current, percpu, etc) tends to be easier than arbitrary
77*1ff29758SKees Cook  *   location memory exposure.
78*1ff29758SKees Cook  */
7939218ff4SKees Cook #define choose_random_kstack_offset(rand) do {				\
8039218ff4SKees Cook 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
8139218ff4SKees Cook 				&randomize_kstack_offset)) {		\
8239218ff4SKees Cook 		u32 offset = raw_cpu_read(kstack_offset);		\
8339218ff4SKees Cook 		offset ^= (rand);					\
8439218ff4SKees Cook 		raw_cpu_write(kstack_offset, offset);			\
8539218ff4SKees Cook 	}								\
8639218ff4SKees Cook } while (0)
878cb37a59SMarco Elver #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
888cb37a59SMarco Elver #define add_random_kstack_offset()		do { } while (0)
898cb37a59SMarco Elver #define choose_random_kstack_offset(rand)	do { } while (0)
908cb37a59SMarco Elver #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
9139218ff4SKees Cook 
9239218ff4SKees Cook #endif
93