xref: /linux/include/uapi/linux/rseq.h (revision 61706251492eff650e91c58507bc77e1a12c7fbb)
1 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2 #ifndef _UAPI_LINUX_RSEQ_H
3 #define _UAPI_LINUX_RSEQ_H
4 
5 /*
6  * linux/rseq.h
7  *
8  * Restartable sequences system call API
9  *
10  * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
11  */
12 
13 #include <linux/types.h>
14 #include <asm/byteorder.h>
15 
16 enum rseq_cpu_id_state {
17 	RSEQ_CPU_ID_UNINITIALIZED		= -1,
18 	RSEQ_CPU_ID_REGISTRATION_FAILED		= -2,
19 };
20 
21 enum rseq_flags {
22 	RSEQ_FLAG_UNREGISTER			= (1 << 0),
23 	RSEQ_FLAG_SLICE_EXT_DEFAULT_ON		= (1 << 1),
24 };
25 
26 enum rseq_cs_flags_bit {
27 	/* Historical and unsupported bits */
28 	RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT	= 0,
29 	RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT	= 1,
30 	RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT	= 2,
31 	/* (3) Intentional gap to put new bits into a separate byte */
32 
33 	/* User read only feature flags */
34 	RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT	= 4,
35 	RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT	= 5,
36 };
37 
38 enum rseq_cs_flags {
39 	RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT	=
40 		(1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
41 	RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL	=
42 		(1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
43 	RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE	=
44 		(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
45 
46 	RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE	=
47 		(1U << RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT),
48 	RSEQ_CS_FLAG_SLICE_EXT_ENABLED		=
49 		(1U << RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT),
50 };
51 
52 /*
53  * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always
54  * contained within a single cache-line. It is usually declared as
55  * link-time constant data.
56  */
57 struct rseq_cs {
58 	/* Version of this structure. */
59 	__u32 version;
60 	/* enum rseq_cs_flags */
61 	__u32 flags;
62 	__u64 start_ip;
63 	/* Offset from start_ip. */
64 	__u64 post_commit_offset;
65 	__u64 abort_ip;
66 } __attribute__((aligned(4 * sizeof(__u64))));
67 
68 /**
69  * rseq_slice_ctrl - Time slice extension control structure
70  * @all:	Compound value
71  * @request:	Request for a time slice extension
72  * @granted:	Granted time slice extension
73  *
74  * @request is set by user space and can be cleared by user space or kernel
75  * space.  @granted is set and cleared by the kernel and must only be read
76  * by user space.
77  */
78 struct rseq_slice_ctrl {
79 	union {
80 		__u32		all;
81 		struct {
82 			__u8	request;
83 			__u8	granted;
84 			__u16	__reserved;
85 		};
86 	};
87 };
88 
89 /*
90  * The original size and alignment of the allocation for struct rseq is
91  * 32 bytes.
92  *
93  * The allocation size needs to be greater or equal to
94  * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
95  * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
96  *
97  * As an alternative, userspace is allowed to use both the original size
98  * and alignment of 32 bytes for backward compatibility.
99  *
100  * A single active struct rseq registration per thread is allowed.
101  */
102 struct rseq {
103 	/*
104 	 * Restartable sequences cpu_id_start field. Updated by the
105 	 * kernel. Read by user-space with single-copy atomicity
106 	 * semantics. This field should only be read by the thread which
107 	 * registered this data structure. Aligned on 32-bit. Always
108 	 * contains a value in the range of possible CPUs, although the
109 	 * value may not be the actual current CPU (e.g. if rseq is not
110 	 * initialized). This CPU number value should always be compared
111 	 * against the value of the cpu_id field before performing a rseq
112 	 * commit or returning a value read from a data structure indexed
113 	 * using the cpu_id_start value.
114 	 */
115 	__u32 cpu_id_start;
116 	/*
117 	 * Restartable sequences cpu_id field. Updated by the kernel.
118 	 * Read by user-space with single-copy atomicity semantics. This
119 	 * field should only be read by the thread which registered this
120 	 * data structure. Aligned on 32-bit. Values
121 	 * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
122 	 * have a special semantic: the former means "rseq uninitialized",
123 	 * and latter means "rseq initialization failed". This value is
124 	 * meant to be read within rseq critical sections and compared
125 	 * with the cpu_id_start value previously read, before performing
126 	 * the commit instruction, or read and compared with the
127 	 * cpu_id_start value before returning a value loaded from a data
128 	 * structure indexed using the cpu_id_start value.
129 	 */
130 	__u32 cpu_id;
131 	/*
132 	 * Restartable sequences rseq_cs field.
133 	 *
134 	 * Contains NULL when no critical section is active for the current
135 	 * thread, or holds a pointer to the currently active struct rseq_cs.
136 	 *
137 	 * Updated by user-space, which sets the address of the currently
138 	 * active rseq_cs at the beginning of assembly instruction sequence
139 	 * block, and set to NULL by the kernel when it restarts an assembly
140 	 * instruction sequence block, as well as when the kernel detects that
141 	 * it is preempting or delivering a signal outside of the range
142 	 * targeted by the rseq_cs. Also needs to be set to NULL by user-space
143 	 * before reclaiming memory that contains the targeted struct rseq_cs.
144 	 *
145 	 * Read and set by the kernel. Set by user-space with single-copy
146 	 * atomicity semantics. This field should only be updated by the
147 	 * thread which registered this data structure. Aligned on 64-bit.
148 	 *
149 	 * 32-bit architectures should update the low order bits of the
150 	 * rseq_cs field, leaving the high order bits initialized to 0.
151 	 */
152 	__u64 rseq_cs;
153 
154 	/*
155 	 * Restartable sequences flags field.
156 	 *
157 	 * This field was initially intended to allow event masking for
158 	 * single-stepping through rseq critical sections with debuggers.
159 	 * The kernel does not support this anymore and the relevant bits
160 	 * are checked for being always false:
161 	 *	- RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
162 	 *	- RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
163 	 *	- RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
164 	 */
165 	__u32 flags;
166 
167 	/*
168 	 * Restartable sequences node_id field. Updated by the kernel. Read by
169 	 * user-space with single-copy atomicity semantics. This field should
170 	 * only be read by the thread which registered this data structure.
171 	 * Aligned on 32-bit. Contains the current NUMA node ID.
172 	 */
173 	__u32 node_id;
174 
175 	/*
176 	 * Restartable sequences mm_cid field. Updated by the kernel. Read by
177 	 * user-space with single-copy atomicity semantics. This field should
178 	 * only be read by the thread which registered this data structure.
179 	 * Aligned on 32-bit. Contains the current thread's concurrency ID
180 	 * (allocated uniquely within a memory map).
181 	 */
182 	__u32 mm_cid;
183 
184 	/*
185 	 * Time slice extension control structure. CPU local updates from
186 	 * kernel and user space.
187 	 */
188 	struct rseq_slice_ctrl slice_ctrl;
189 
190 	/*
191 	 * Before rseq became extensible, its original size was 32 bytes even
192 	 * though the active rseq area was only 20 bytes.
193 	 * Exposing a 32 bytes feature size would make life needlessly painful
194 	 * for userspace. Therefore, add a reserved byte after byte 32
195 	 * to bump the rseq feature size from 32 to 33.
196 	 * The next field to be added to the rseq area will be larger
197 	 * than one byte, and will replace this reserved byte.
198 	 */
199 	__u8 __reserved;
200 
201 	/*
202 	 * Flexible array member at end of structure, after last feature field.
203 	 */
204 	char end[];
205 } __attribute__((aligned(32)));
206 
207 #endif /* _UAPI_LINUX_RSEQ_H */
208