xref: /linux/kernel/unwind/deferred.c (revision 2dffa355f6c279e7d2e574abf9446c41a631c9e5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Deferred user space unwinding
4  */
5 #include <linux/sched/task_stack.h>
6 #include <linux/unwind_deferred.h>
7 #include <linux/sched/clock.h>
8 #include <linux/task_work.h>
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/sizes.h>
12 #include <linux/slab.h>
13 #include <linux/mm.h>
14 
15 /* Make the cache fit in a 4K page */
16 #define UNWIND_MAX_ENTRIES					\
17 	((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
18 
19 /* Guards adding to and reading the list of callbacks */
20 static DEFINE_MUTEX(callback_mutex);
21 static LIST_HEAD(callbacks);
22 
23 /*
24  * This is a unique percpu identifier for a given task entry context.
25  * Conceptually, it's incremented every time the CPU enters the kernel from
26  * user space, so that each "entry context" on the CPU gets a unique ID.  In
27  * reality, as an optimization, it's only incremented on demand for the first
28  * deferred unwind request after a given entry-from-user.
29  *
30  * It's combined with the CPU id to make a systemwide-unique "context cookie".
31  */
32 static DEFINE_PER_CPU(u32, unwind_ctx_ctr);
33 
34 /*
35  * The context cookie is a unique identifier that is assigned to a user
36  * space stacktrace. As the user space stacktrace remains the same while
37  * the task is in the kernel, the cookie is an identifier for the stacktrace.
38  * Although it is possible for the stacktrace to get another cookie if another
39  * request is made after the cookie was cleared and before reentering user
40  * space.
41  */
42 static u64 get_cookie(struct unwind_task_info *info)
43 {
44 	u32 cnt = 1;
45 	u32 old = 0;
46 
47 	if (info->id.cpu)
48 		return info->id.id;
49 
50 	/* LSB is always set to ensure 0 is an invalid value */
51 	cnt |= __this_cpu_read(unwind_ctx_ctr) + 2;
52 	if (try_cmpxchg(&info->id.cnt, &old, cnt)) {
53 		/* Update the per cpu counter */
54 		__this_cpu_write(unwind_ctx_ctr, cnt);
55 	}
56 	/* Interrupts are disabled, the CPU will always be same */
57 	info->id.cpu = smp_processor_id() + 1; /* Must be non zero */
58 
59 	return info->id.id;
60 }
61 
62 /**
63  * unwind_user_faultable - Produce a user stacktrace in faultable context
64  * @trace: The descriptor that will store the user stacktrace
65  *
66  * This must be called in a known faultable context (usually when entering
67  * or exiting user space). Depending on the available implementations
68  * the @trace will be loaded with the addresses of the user space stacktrace
69  * if it can be found.
70  *
71  * Return: 0 on success and negative on error
72  *         On success @trace will contain the user space stacktrace
73  */
74 int unwind_user_faultable(struct unwind_stacktrace *trace)
75 {
76 	struct unwind_task_info *info = &current->unwind_info;
77 	struct unwind_cache *cache;
78 
79 	/* Should always be called from faultable context */
80 	might_fault();
81 
82 	if (current->flags & PF_EXITING)
83 		return -EINVAL;
84 
85 	if (!info->cache) {
86 		info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES),
87 				      GFP_KERNEL);
88 		if (!info->cache)
89 			return -ENOMEM;
90 	}
91 
92 	cache = info->cache;
93 	trace->entries = cache->entries;
94 
95 	if (cache->nr_entries) {
96 		/*
97 		 * The user stack has already been previously unwound in this
98 		 * entry context.  Skip the unwind and use the cache.
99 		 */
100 		trace->nr = cache->nr_entries;
101 		return 0;
102 	}
103 
104 	trace->nr = 0;
105 	unwind_user(trace, UNWIND_MAX_ENTRIES);
106 
107 	cache->nr_entries = trace->nr;
108 
109 	return 0;
110 }
111 
112 static void unwind_deferred_task_work(struct callback_head *head)
113 {
114 	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
115 	struct unwind_stacktrace trace;
116 	struct unwind_work *work;
117 	u64 cookie;
118 
119 	if (WARN_ON_ONCE(!info->pending))
120 		return;
121 
122 	/* Allow work to come in again */
123 	WRITE_ONCE(info->pending, 0);
124 
125 	/*
126 	 * From here on out, the callback must always be called, even if it's
127 	 * just an empty trace.
128 	 */
129 	trace.nr = 0;
130 	trace.entries = NULL;
131 
132 	unwind_user_faultable(&trace);
133 
134 	cookie = info->id.id;
135 
136 	guard(mutex)(&callback_mutex);
137 	list_for_each_entry(work, &callbacks, list) {
138 		work->func(work, &trace, cookie);
139 	}
140 }
141 
142 /**
143  * unwind_deferred_request - Request a user stacktrace on task kernel exit
144  * @work: Unwind descriptor requesting the trace
145  * @cookie: The cookie of the first request made for this task
146  *
147  * Schedule a user space unwind to be done in task work before exiting the
148  * kernel.
149  *
150  * The returned @cookie output is the generated cookie of the very first
151  * request for a user space stacktrace for this task since it entered the
152  * kernel. It can be from a request by any caller of this infrastructure.
153  * Its value will also be passed to the callback function.  It can be
154  * used to stitch kernel and user stack traces together in post-processing.
155  *
156  * It's valid to call this function multiple times for the same @work within
157  * the same task entry context.  Each call will return the same cookie
158  * while the task hasn't left the kernel. If the callback is not pending
159  * because it has already been previously called for the same entry context,
160  * it will be called again with the same stack trace and cookie.
161  *
162  * Return: 1 if the the callback was already queued.
163  *         0 if the callback successfully was queued.
164  *         Negative if there's an error.
165  *         @cookie holds the cookie of the first request by any user
166  */
167 int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
168 {
169 	struct unwind_task_info *info = &current->unwind_info;
170 	int ret;
171 
172 	*cookie = 0;
173 
174 	if (WARN_ON_ONCE(in_nmi()))
175 		return -EINVAL;
176 
177 	if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
178 	    !user_mode(task_pt_regs(current)))
179 		return -EINVAL;
180 
181 	guard(irqsave)();
182 
183 	*cookie = get_cookie(info);
184 
185 	/* callback already pending? */
186 	if (info->pending)
187 		return 1;
188 
189 	/* The work has been claimed, now schedule it. */
190 	ret = task_work_add(current, &info->work, TWA_RESUME);
191 	if (WARN_ON_ONCE(ret))
192 		return ret;
193 
194 	info->pending = 1;
195 	return 0;
196 }
197 
198 void unwind_deferred_cancel(struct unwind_work *work)
199 {
200 	if (!work)
201 		return;
202 
203 	guard(mutex)(&callback_mutex);
204 	list_del(&work->list);
205 }
206 
207 int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
208 {
209 	memset(work, 0, sizeof(*work));
210 
211 	guard(mutex)(&callback_mutex);
212 	list_add(&work->list, &callbacks);
213 	work->func = func;
214 	return 0;
215 }
216 
217 void unwind_task_init(struct task_struct *task)
218 {
219 	struct unwind_task_info *info = &task->unwind_info;
220 
221 	memset(info, 0, sizeof(*info));
222 	init_task_work(&info->work, unwind_deferred_task_work);
223 }
224 
225 void unwind_task_free(struct task_struct *task)
226 {
227 	struct unwind_task_info *info = &task->unwind_info;
228 
229 	kfree(info->cache);
230 	task_work_cancel(task, &info->work);
231 }
232