xref: /linux/kernel/unwind/deferred.c (revision 2dffa355f6c279e7d2e574abf9446c41a631c9e5)
15e32d0f1SSteven Rostedt // SPDX-License-Identifier: GPL-2.0
25e32d0f1SSteven Rostedt /*
35e32d0f1SSteven Rostedt  * Deferred user space unwinding
45e32d0f1SSteven Rostedt  */
5*2dffa355SJosh Poimboeuf #include <linux/sched/task_stack.h>
6*2dffa355SJosh Poimboeuf #include <linux/unwind_deferred.h>
7*2dffa355SJosh Poimboeuf #include <linux/sched/clock.h>
8*2dffa355SJosh Poimboeuf #include <linux/task_work.h>
95e32d0f1SSteven Rostedt #include <linux/kernel.h>
105e32d0f1SSteven Rostedt #include <linux/sched.h>
11b9c73524SJosh Poimboeuf #include <linux/sizes.h>
125e32d0f1SSteven Rostedt #include <linux/slab.h>
13*2dffa355SJosh Poimboeuf #include <linux/mm.h>
145e32d0f1SSteven Rostedt 
15b9c73524SJosh Poimboeuf /* Make the cache fit in a 4K page */
16b9c73524SJosh Poimboeuf #define UNWIND_MAX_ENTRIES					\
17b9c73524SJosh Poimboeuf 	((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
185e32d0f1SSteven Rostedt 
19*2dffa355SJosh Poimboeuf /* Guards adding to and reading the list of callbacks */
20*2dffa355SJosh Poimboeuf static DEFINE_MUTEX(callback_mutex);
21*2dffa355SJosh Poimboeuf static LIST_HEAD(callbacks);
22*2dffa355SJosh Poimboeuf 
23*2dffa355SJosh Poimboeuf /*
24*2dffa355SJosh Poimboeuf  * This is a unique percpu identifier for a given task entry context.
25*2dffa355SJosh Poimboeuf  * Conceptually, it's incremented every time the CPU enters the kernel from
26*2dffa355SJosh Poimboeuf  * user space, so that each "entry context" on the CPU gets a unique ID.  In
27*2dffa355SJosh Poimboeuf  * reality, as an optimization, it's only incremented on demand for the first
28*2dffa355SJosh Poimboeuf  * deferred unwind request after a given entry-from-user.
29*2dffa355SJosh Poimboeuf  *
30*2dffa355SJosh Poimboeuf  * It's combined with the CPU id to make a systemwide-unique "context cookie".
31*2dffa355SJosh Poimboeuf  */
32*2dffa355SJosh Poimboeuf static DEFINE_PER_CPU(u32, unwind_ctx_ctr);
33*2dffa355SJosh Poimboeuf 
34*2dffa355SJosh Poimboeuf /*
35*2dffa355SJosh Poimboeuf  * The context cookie is a unique identifier that is assigned to a user
36*2dffa355SJosh Poimboeuf  * space stacktrace. As the user space stacktrace remains the same while
37*2dffa355SJosh Poimboeuf  * the task is in the kernel, the cookie is an identifier for the stacktrace.
38*2dffa355SJosh Poimboeuf  * Although it is possible for the stacktrace to get another cookie if another
39*2dffa355SJosh Poimboeuf  * request is made after the cookie was cleared and before reentering user
40*2dffa355SJosh Poimboeuf  * space.
41*2dffa355SJosh Poimboeuf  */
42*2dffa355SJosh Poimboeuf static u64 get_cookie(struct unwind_task_info *info)
43*2dffa355SJosh Poimboeuf {
44*2dffa355SJosh Poimboeuf 	u32 cnt = 1;
45*2dffa355SJosh Poimboeuf 	u32 old = 0;
46*2dffa355SJosh Poimboeuf 
47*2dffa355SJosh Poimboeuf 	if (info->id.cpu)
48*2dffa355SJosh Poimboeuf 		return info->id.id;
49*2dffa355SJosh Poimboeuf 
50*2dffa355SJosh Poimboeuf 	/* LSB is always set to ensure 0 is an invalid value */
51*2dffa355SJosh Poimboeuf 	cnt |= __this_cpu_read(unwind_ctx_ctr) + 2;
52*2dffa355SJosh Poimboeuf 	if (try_cmpxchg(&info->id.cnt, &old, cnt)) {
53*2dffa355SJosh Poimboeuf 		/* Update the per cpu counter */
54*2dffa355SJosh Poimboeuf 		__this_cpu_write(unwind_ctx_ctr, cnt);
55*2dffa355SJosh Poimboeuf 	}
56*2dffa355SJosh Poimboeuf 	/* Interrupts are disabled, the CPU will always be same */
57*2dffa355SJosh Poimboeuf 	info->id.cpu = smp_processor_id() + 1; /* Must be non zero */
58*2dffa355SJosh Poimboeuf 
59*2dffa355SJosh Poimboeuf 	return info->id.id;
60*2dffa355SJosh Poimboeuf }
61*2dffa355SJosh Poimboeuf 
625e32d0f1SSteven Rostedt /**
635e32d0f1SSteven Rostedt  * unwind_user_faultable - Produce a user stacktrace in faultable context
645e32d0f1SSteven Rostedt  * @trace: The descriptor that will store the user stacktrace
655e32d0f1SSteven Rostedt  *
665e32d0f1SSteven Rostedt  * This must be called in a known faultable context (usually when entering
675e32d0f1SSteven Rostedt  * or exiting user space). Depending on the available implementations
685e32d0f1SSteven Rostedt  * the @trace will be loaded with the addresses of the user space stacktrace
695e32d0f1SSteven Rostedt  * if it can be found.
705e32d0f1SSteven Rostedt  *
715e32d0f1SSteven Rostedt  * Return: 0 on success and negative on error
725e32d0f1SSteven Rostedt  *         On success @trace will contain the user space stacktrace
735e32d0f1SSteven Rostedt  */
745e32d0f1SSteven Rostedt int unwind_user_faultable(struct unwind_stacktrace *trace)
755e32d0f1SSteven Rostedt {
765e32d0f1SSteven Rostedt 	struct unwind_task_info *info = &current->unwind_info;
77b9c73524SJosh Poimboeuf 	struct unwind_cache *cache;
785e32d0f1SSteven Rostedt 
795e32d0f1SSteven Rostedt 	/* Should always be called from faultable context */
805e32d0f1SSteven Rostedt 	might_fault();
815e32d0f1SSteven Rostedt 
825e32d0f1SSteven Rostedt 	if (current->flags & PF_EXITING)
835e32d0f1SSteven Rostedt 		return -EINVAL;
845e32d0f1SSteven Rostedt 
85b9c73524SJosh Poimboeuf 	if (!info->cache) {
86b9c73524SJosh Poimboeuf 		info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES),
875e32d0f1SSteven Rostedt 				      GFP_KERNEL);
88b9c73524SJosh Poimboeuf 		if (!info->cache)
895e32d0f1SSteven Rostedt 			return -ENOMEM;
905e32d0f1SSteven Rostedt 	}
915e32d0f1SSteven Rostedt 
92b9c73524SJosh Poimboeuf 	cache = info->cache;
93b9c73524SJosh Poimboeuf 	trace->entries = cache->entries;
94b9c73524SJosh Poimboeuf 
95b9c73524SJosh Poimboeuf 	if (cache->nr_entries) {
96b9c73524SJosh Poimboeuf 		/*
97b9c73524SJosh Poimboeuf 		 * The user stack has already been previously unwound in this
98b9c73524SJosh Poimboeuf 		 * entry context.  Skip the unwind and use the cache.
99b9c73524SJosh Poimboeuf 		 */
100b9c73524SJosh Poimboeuf 		trace->nr = cache->nr_entries;
101b9c73524SJosh Poimboeuf 		return 0;
102b9c73524SJosh Poimboeuf 	}
103b9c73524SJosh Poimboeuf 
1045e32d0f1SSteven Rostedt 	trace->nr = 0;
1055e32d0f1SSteven Rostedt 	unwind_user(trace, UNWIND_MAX_ENTRIES);
1065e32d0f1SSteven Rostedt 
107b9c73524SJosh Poimboeuf 	cache->nr_entries = trace->nr;
108b9c73524SJosh Poimboeuf 
1095e32d0f1SSteven Rostedt 	return 0;
1105e32d0f1SSteven Rostedt }
1115e32d0f1SSteven Rostedt 
112*2dffa355SJosh Poimboeuf static void unwind_deferred_task_work(struct callback_head *head)
113*2dffa355SJosh Poimboeuf {
114*2dffa355SJosh Poimboeuf 	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
115*2dffa355SJosh Poimboeuf 	struct unwind_stacktrace trace;
116*2dffa355SJosh Poimboeuf 	struct unwind_work *work;
117*2dffa355SJosh Poimboeuf 	u64 cookie;
118*2dffa355SJosh Poimboeuf 
119*2dffa355SJosh Poimboeuf 	if (WARN_ON_ONCE(!info->pending))
120*2dffa355SJosh Poimboeuf 		return;
121*2dffa355SJosh Poimboeuf 
122*2dffa355SJosh Poimboeuf 	/* Allow work to come in again */
123*2dffa355SJosh Poimboeuf 	WRITE_ONCE(info->pending, 0);
124*2dffa355SJosh Poimboeuf 
125*2dffa355SJosh Poimboeuf 	/*
126*2dffa355SJosh Poimboeuf 	 * From here on out, the callback must always be called, even if it's
127*2dffa355SJosh Poimboeuf 	 * just an empty trace.
128*2dffa355SJosh Poimboeuf 	 */
129*2dffa355SJosh Poimboeuf 	trace.nr = 0;
130*2dffa355SJosh Poimboeuf 	trace.entries = NULL;
131*2dffa355SJosh Poimboeuf 
132*2dffa355SJosh Poimboeuf 	unwind_user_faultable(&trace);
133*2dffa355SJosh Poimboeuf 
134*2dffa355SJosh Poimboeuf 	cookie = info->id.id;
135*2dffa355SJosh Poimboeuf 
136*2dffa355SJosh Poimboeuf 	guard(mutex)(&callback_mutex);
137*2dffa355SJosh Poimboeuf 	list_for_each_entry(work, &callbacks, list) {
138*2dffa355SJosh Poimboeuf 		work->func(work, &trace, cookie);
139*2dffa355SJosh Poimboeuf 	}
140*2dffa355SJosh Poimboeuf }
141*2dffa355SJosh Poimboeuf 
142*2dffa355SJosh Poimboeuf /**
143*2dffa355SJosh Poimboeuf  * unwind_deferred_request - Request a user stacktrace on task kernel exit
144*2dffa355SJosh Poimboeuf  * @work: Unwind descriptor requesting the trace
145*2dffa355SJosh Poimboeuf  * @cookie: The cookie of the first request made for this task
146*2dffa355SJosh Poimboeuf  *
147*2dffa355SJosh Poimboeuf  * Schedule a user space unwind to be done in task work before exiting the
148*2dffa355SJosh Poimboeuf  * kernel.
149*2dffa355SJosh Poimboeuf  *
150*2dffa355SJosh Poimboeuf  * The returned @cookie output is the generated cookie of the very first
151*2dffa355SJosh Poimboeuf  * request for a user space stacktrace for this task since it entered the
152*2dffa355SJosh Poimboeuf  * kernel. It can be from a request by any caller of this infrastructure.
153*2dffa355SJosh Poimboeuf  * Its value will also be passed to the callback function.  It can be
154*2dffa355SJosh Poimboeuf  * used to stitch kernel and user stack traces together in post-processing.
155*2dffa355SJosh Poimboeuf  *
156*2dffa355SJosh Poimboeuf  * It's valid to call this function multiple times for the same @work within
157*2dffa355SJosh Poimboeuf  * the same task entry context.  Each call will return the same cookie
158*2dffa355SJosh Poimboeuf  * while the task hasn't left the kernel. If the callback is not pending
159*2dffa355SJosh Poimboeuf  * because it has already been previously called for the same entry context,
160*2dffa355SJosh Poimboeuf  * it will be called again with the same stack trace and cookie.
161*2dffa355SJosh Poimboeuf  *
162*2dffa355SJosh Poimboeuf  * Return: 1 if the the callback was already queued.
163*2dffa355SJosh Poimboeuf  *         0 if the callback successfully was queued.
164*2dffa355SJosh Poimboeuf  *         Negative if there's an error.
165*2dffa355SJosh Poimboeuf  *         @cookie holds the cookie of the first request by any user
166*2dffa355SJosh Poimboeuf  */
167*2dffa355SJosh Poimboeuf int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
168*2dffa355SJosh Poimboeuf {
169*2dffa355SJosh Poimboeuf 	struct unwind_task_info *info = &current->unwind_info;
170*2dffa355SJosh Poimboeuf 	int ret;
171*2dffa355SJosh Poimboeuf 
172*2dffa355SJosh Poimboeuf 	*cookie = 0;
173*2dffa355SJosh Poimboeuf 
174*2dffa355SJosh Poimboeuf 	if (WARN_ON_ONCE(in_nmi()))
175*2dffa355SJosh Poimboeuf 		return -EINVAL;
176*2dffa355SJosh Poimboeuf 
177*2dffa355SJosh Poimboeuf 	if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
178*2dffa355SJosh Poimboeuf 	    !user_mode(task_pt_regs(current)))
179*2dffa355SJosh Poimboeuf 		return -EINVAL;
180*2dffa355SJosh Poimboeuf 
181*2dffa355SJosh Poimboeuf 	guard(irqsave)();
182*2dffa355SJosh Poimboeuf 
183*2dffa355SJosh Poimboeuf 	*cookie = get_cookie(info);
184*2dffa355SJosh Poimboeuf 
185*2dffa355SJosh Poimboeuf 	/* callback already pending? */
186*2dffa355SJosh Poimboeuf 	if (info->pending)
187*2dffa355SJosh Poimboeuf 		return 1;
188*2dffa355SJosh Poimboeuf 
189*2dffa355SJosh Poimboeuf 	/* The work has been claimed, now schedule it. */
190*2dffa355SJosh Poimboeuf 	ret = task_work_add(current, &info->work, TWA_RESUME);
191*2dffa355SJosh Poimboeuf 	if (WARN_ON_ONCE(ret))
192*2dffa355SJosh Poimboeuf 		return ret;
193*2dffa355SJosh Poimboeuf 
194*2dffa355SJosh Poimboeuf 	info->pending = 1;
195*2dffa355SJosh Poimboeuf 	return 0;
196*2dffa355SJosh Poimboeuf }
197*2dffa355SJosh Poimboeuf 
198*2dffa355SJosh Poimboeuf void unwind_deferred_cancel(struct unwind_work *work)
199*2dffa355SJosh Poimboeuf {
200*2dffa355SJosh Poimboeuf 	if (!work)
201*2dffa355SJosh Poimboeuf 		return;
202*2dffa355SJosh Poimboeuf 
203*2dffa355SJosh Poimboeuf 	guard(mutex)(&callback_mutex);
204*2dffa355SJosh Poimboeuf 	list_del(&work->list);
205*2dffa355SJosh Poimboeuf }
206*2dffa355SJosh Poimboeuf 
207*2dffa355SJosh Poimboeuf int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
208*2dffa355SJosh Poimboeuf {
209*2dffa355SJosh Poimboeuf 	memset(work, 0, sizeof(*work));
210*2dffa355SJosh Poimboeuf 
211*2dffa355SJosh Poimboeuf 	guard(mutex)(&callback_mutex);
212*2dffa355SJosh Poimboeuf 	list_add(&work->list, &callbacks);
213*2dffa355SJosh Poimboeuf 	work->func = func;
214*2dffa355SJosh Poimboeuf 	return 0;
215*2dffa355SJosh Poimboeuf }
216*2dffa355SJosh Poimboeuf 
2175e32d0f1SSteven Rostedt void unwind_task_init(struct task_struct *task)
2185e32d0f1SSteven Rostedt {
2195e32d0f1SSteven Rostedt 	struct unwind_task_info *info = &task->unwind_info;
2205e32d0f1SSteven Rostedt 
2215e32d0f1SSteven Rostedt 	memset(info, 0, sizeof(*info));
222*2dffa355SJosh Poimboeuf 	init_task_work(&info->work, unwind_deferred_task_work);
2235e32d0f1SSteven Rostedt }
2245e32d0f1SSteven Rostedt 
2255e32d0f1SSteven Rostedt void unwind_task_free(struct task_struct *task)
2265e32d0f1SSteven Rostedt {
2275e32d0f1SSteven Rostedt 	struct unwind_task_info *info = &task->unwind_info;
2285e32d0f1SSteven Rostedt 
229b9c73524SJosh Poimboeuf 	kfree(info->cache);
230*2dffa355SJosh Poimboeuf 	task_work_cancel(task, &info->work);
2315e32d0f1SSteven Rostedt }
232