xref: /linux/kernel/unwind/deferred.c (revision 357eda2d745054eb737397368bc9b0f84814b0a5)
15e32d0f1SSteven Rostedt // SPDX-License-Identifier: GPL-2.0
25e32d0f1SSteven Rostedt /*
35e32d0f1SSteven Rostedt  * Deferred user space unwinding
45e32d0f1SSteven Rostedt  */
52dffa355SJosh Poimboeuf #include <linux/sched/task_stack.h>
62dffa355SJosh Poimboeuf #include <linux/unwind_deferred.h>
72dffa355SJosh Poimboeuf #include <linux/sched/clock.h>
82dffa355SJosh Poimboeuf #include <linux/task_work.h>
95e32d0f1SSteven Rostedt #include <linux/kernel.h>
105e32d0f1SSteven Rostedt #include <linux/sched.h>
11b9c73524SJosh Poimboeuf #include <linux/sizes.h>
125e32d0f1SSteven Rostedt #include <linux/slab.h>
132dffa355SJosh Poimboeuf #include <linux/mm.h>
145e32d0f1SSteven Rostedt 
15055c7060SSteven Rostedt /*
16055c7060SSteven Rostedt  * For requesting a deferred user space stack trace from NMI context
17055c7060SSteven Rostedt  * the architecture must support a safe cmpxchg in NMI context.
18055c7060SSteven Rostedt  * For those architectures that do not have that, then it cannot ask
19055c7060SSteven Rostedt  * for a deferred user space stack trace from an NMI context. If it
20055c7060SSteven Rostedt  * does, then it will get -EINVAL.
21055c7060SSteven Rostedt  */
22055c7060SSteven Rostedt #if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)
23055c7060SSteven Rostedt # define CAN_USE_IN_NMI		1
24055c7060SSteven Rostedt static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
25055c7060SSteven Rostedt {
26055c7060SSteven Rostedt 	u32 old = 0;
27055c7060SSteven Rostedt 
28055c7060SSteven Rostedt 	return try_cmpxchg(&info->id.cnt, &old, cnt);
29055c7060SSteven Rostedt }
30055c7060SSteven Rostedt #else
31055c7060SSteven Rostedt # define CAN_USE_IN_NMI		0
32055c7060SSteven Rostedt /* When NMIs are not allowed, this always succeeds */
33055c7060SSteven Rostedt static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
34055c7060SSteven Rostedt {
35055c7060SSteven Rostedt 	info->id.cnt = cnt;
36055c7060SSteven Rostedt 	return true;
37055c7060SSteven Rostedt }
38055c7060SSteven Rostedt #endif
39055c7060SSteven Rostedt 
40b9c73524SJosh Poimboeuf /* Make the cache fit in a 4K page */
41b9c73524SJosh Poimboeuf #define UNWIND_MAX_ENTRIES					\
42b9c73524SJosh Poimboeuf 	((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
435e32d0f1SSteven Rostedt 
44*357eda2dSSteven Rostedt /* Guards adding to or removing from the list of callbacks */
452dffa355SJosh Poimboeuf static DEFINE_MUTEX(callback_mutex);
462dffa355SJosh Poimboeuf static LIST_HEAD(callbacks);
472dffa355SJosh Poimboeuf 
48858fa8a3SSteven Rostedt #define RESERVED_BITS	(UNWIND_PENDING | UNWIND_USED)
49be3d526aSSteven Rostedt 
50be3d526aSSteven Rostedt /* Zero'd bits are available for assigning callback users */
51be3d526aSSteven Rostedt static unsigned long unwind_mask = RESERVED_BITS;
52*357eda2dSSteven Rostedt DEFINE_STATIC_SRCU(unwind_srcu);
53be3d526aSSteven Rostedt 
54be3d526aSSteven Rostedt static inline bool unwind_pending(struct unwind_task_info *info)
55be3d526aSSteven Rostedt {
56be3d526aSSteven Rostedt 	return test_bit(UNWIND_PENDING_BIT, &info->unwind_mask);
57be3d526aSSteven Rostedt }
58be3d526aSSteven Rostedt 
592dffa355SJosh Poimboeuf /*
602dffa355SJosh Poimboeuf  * This is a unique percpu identifier for a given task entry context.
612dffa355SJosh Poimboeuf  * Conceptually, it's incremented every time the CPU enters the kernel from
622dffa355SJosh Poimboeuf  * user space, so that each "entry context" on the CPU gets a unique ID.  In
632dffa355SJosh Poimboeuf  * reality, as an optimization, it's only incremented on demand for the first
642dffa355SJosh Poimboeuf  * deferred unwind request after a given entry-from-user.
652dffa355SJosh Poimboeuf  *
662dffa355SJosh Poimboeuf  * It's combined with the CPU id to make a systemwide-unique "context cookie".
672dffa355SJosh Poimboeuf  */
682dffa355SJosh Poimboeuf static DEFINE_PER_CPU(u32, unwind_ctx_ctr);
692dffa355SJosh Poimboeuf 
702dffa355SJosh Poimboeuf /*
712dffa355SJosh Poimboeuf  * The context cookie is a unique identifier that is assigned to a user
722dffa355SJosh Poimboeuf  * space stacktrace. As the user space stacktrace remains the same while
732dffa355SJosh Poimboeuf  * the task is in the kernel, the cookie is an identifier for the stacktrace.
742dffa355SJosh Poimboeuf  * Although it is possible for the stacktrace to get another cookie if another
752dffa355SJosh Poimboeuf  * request is made after the cookie was cleared and before reentering user
762dffa355SJosh Poimboeuf  * space.
772dffa355SJosh Poimboeuf  */
782dffa355SJosh Poimboeuf static u64 get_cookie(struct unwind_task_info *info)
792dffa355SJosh Poimboeuf {
802dffa355SJosh Poimboeuf 	u32 cnt = 1;
812dffa355SJosh Poimboeuf 
822dffa355SJosh Poimboeuf 	if (info->id.cpu)
832dffa355SJosh Poimboeuf 		return info->id.id;
842dffa355SJosh Poimboeuf 
852dffa355SJosh Poimboeuf 	/* LSB is always set to ensure 0 is an invalid value */
862dffa355SJosh Poimboeuf 	cnt |= __this_cpu_read(unwind_ctx_ctr) + 2;
87055c7060SSteven Rostedt 	if (try_assign_cnt(info, cnt)) {
882dffa355SJosh Poimboeuf 		/* Update the per cpu counter */
892dffa355SJosh Poimboeuf 		__this_cpu_write(unwind_ctx_ctr, cnt);
902dffa355SJosh Poimboeuf 	}
912dffa355SJosh Poimboeuf 	/* Interrupts are disabled, the CPU will always be same */
922dffa355SJosh Poimboeuf 	info->id.cpu = smp_processor_id() + 1; /* Must be non zero */
932dffa355SJosh Poimboeuf 
942dffa355SJosh Poimboeuf 	return info->id.id;
952dffa355SJosh Poimboeuf }
962dffa355SJosh Poimboeuf 
975e32d0f1SSteven Rostedt /**
985e32d0f1SSteven Rostedt  * unwind_user_faultable - Produce a user stacktrace in faultable context
995e32d0f1SSteven Rostedt  * @trace: The descriptor that will store the user stacktrace
1005e32d0f1SSteven Rostedt  *
1015e32d0f1SSteven Rostedt  * This must be called in a known faultable context (usually when entering
1025e32d0f1SSteven Rostedt  * or exiting user space). Depending on the available implementations
1035e32d0f1SSteven Rostedt  * the @trace will be loaded with the addresses of the user space stacktrace
1045e32d0f1SSteven Rostedt  * if it can be found.
1055e32d0f1SSteven Rostedt  *
1065e32d0f1SSteven Rostedt  * Return: 0 on success and negative on error
1075e32d0f1SSteven Rostedt  *         On success @trace will contain the user space stacktrace
1085e32d0f1SSteven Rostedt  */
1095e32d0f1SSteven Rostedt int unwind_user_faultable(struct unwind_stacktrace *trace)
1105e32d0f1SSteven Rostedt {
1115e32d0f1SSteven Rostedt 	struct unwind_task_info *info = &current->unwind_info;
112b9c73524SJosh Poimboeuf 	struct unwind_cache *cache;
1135e32d0f1SSteven Rostedt 
1145e32d0f1SSteven Rostedt 	/* Should always be called from faultable context */
1155e32d0f1SSteven Rostedt 	might_fault();
1165e32d0f1SSteven Rostedt 
1175e32d0f1SSteven Rostedt 	if (current->flags & PF_EXITING)
1185e32d0f1SSteven Rostedt 		return -EINVAL;
1195e32d0f1SSteven Rostedt 
120b9c73524SJosh Poimboeuf 	if (!info->cache) {
121b9c73524SJosh Poimboeuf 		info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES),
1225e32d0f1SSteven Rostedt 				      GFP_KERNEL);
123b9c73524SJosh Poimboeuf 		if (!info->cache)
1245e32d0f1SSteven Rostedt 			return -ENOMEM;
1255e32d0f1SSteven Rostedt 	}
1265e32d0f1SSteven Rostedt 
127b9c73524SJosh Poimboeuf 	cache = info->cache;
128b9c73524SJosh Poimboeuf 	trace->entries = cache->entries;
129b9c73524SJosh Poimboeuf 
130b9c73524SJosh Poimboeuf 	if (cache->nr_entries) {
131b9c73524SJosh Poimboeuf 		/*
132b9c73524SJosh Poimboeuf 		 * The user stack has already been previously unwound in this
133b9c73524SJosh Poimboeuf 		 * entry context.  Skip the unwind and use the cache.
134b9c73524SJosh Poimboeuf 		 */
135b9c73524SJosh Poimboeuf 		trace->nr = cache->nr_entries;
136b9c73524SJosh Poimboeuf 		return 0;
137b9c73524SJosh Poimboeuf 	}
138b9c73524SJosh Poimboeuf 
1395e32d0f1SSteven Rostedt 	trace->nr = 0;
1405e32d0f1SSteven Rostedt 	unwind_user(trace, UNWIND_MAX_ENTRIES);
1415e32d0f1SSteven Rostedt 
142b9c73524SJosh Poimboeuf 	cache->nr_entries = trace->nr;
143b9c73524SJosh Poimboeuf 
144858fa8a3SSteven Rostedt 	/* Clear nr_entries on way back to user space */
145858fa8a3SSteven Rostedt 	set_bit(UNWIND_USED_BIT, &info->unwind_mask);
146858fa8a3SSteven Rostedt 
1475e32d0f1SSteven Rostedt 	return 0;
1485e32d0f1SSteven Rostedt }
1495e32d0f1SSteven Rostedt 
1502dffa355SJosh Poimboeuf static void unwind_deferred_task_work(struct callback_head *head)
1512dffa355SJosh Poimboeuf {
1522dffa355SJosh Poimboeuf 	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
1532dffa355SJosh Poimboeuf 	struct unwind_stacktrace trace;
1542dffa355SJosh Poimboeuf 	struct unwind_work *work;
155be3d526aSSteven Rostedt 	unsigned long bits;
1562dffa355SJosh Poimboeuf 	u64 cookie;
1572dffa355SJosh Poimboeuf 
158be3d526aSSteven Rostedt 	if (WARN_ON_ONCE(!unwind_pending(info)))
1592dffa355SJosh Poimboeuf 		return;
1602dffa355SJosh Poimboeuf 
161be3d526aSSteven Rostedt 	/* Clear pending bit but make sure to have the current bits */
162be3d526aSSteven Rostedt 	bits = atomic_long_fetch_andnot(UNWIND_PENDING,
163be3d526aSSteven Rostedt 				  (atomic_long_t *)&info->unwind_mask);
1642dffa355SJosh Poimboeuf 	/*
1652dffa355SJosh Poimboeuf 	 * From here on out, the callback must always be called, even if it's
1662dffa355SJosh Poimboeuf 	 * just an empty trace.
1672dffa355SJosh Poimboeuf 	 */
1682dffa355SJosh Poimboeuf 	trace.nr = 0;
1692dffa355SJosh Poimboeuf 	trace.entries = NULL;
1702dffa355SJosh Poimboeuf 
1712dffa355SJosh Poimboeuf 	unwind_user_faultable(&trace);
1722dffa355SJosh Poimboeuf 
1734c75133eSSteven Rostedt 	if (info->cache)
1744c75133eSSteven Rostedt 		bits &= ~(info->cache->unwind_completed);
1754c75133eSSteven Rostedt 
1762dffa355SJosh Poimboeuf 	cookie = info->id.id;
1772dffa355SJosh Poimboeuf 
178*357eda2dSSteven Rostedt 	guard(srcu)(&unwind_srcu);
179*357eda2dSSteven Rostedt 	list_for_each_entry_srcu(work, &callbacks, list,
180*357eda2dSSteven Rostedt 				 srcu_read_lock_held(&unwind_srcu)) {
1814c75133eSSteven Rostedt 		if (test_bit(work->bit, &bits)) {
1822dffa355SJosh Poimboeuf 			work->func(work, &trace, cookie);
1834c75133eSSteven Rostedt 			if (info->cache)
1844c75133eSSteven Rostedt 				info->cache->unwind_completed |= BIT(work->bit);
1854c75133eSSteven Rostedt 		}
1862dffa355SJosh Poimboeuf 	}
1872dffa355SJosh Poimboeuf }
1882dffa355SJosh Poimboeuf 
1892dffa355SJosh Poimboeuf /**
1902dffa355SJosh Poimboeuf  * unwind_deferred_request - Request a user stacktrace on task kernel exit
1912dffa355SJosh Poimboeuf  * @work: Unwind descriptor requesting the trace
1922dffa355SJosh Poimboeuf  * @cookie: The cookie of the first request made for this task
1932dffa355SJosh Poimboeuf  *
1942dffa355SJosh Poimboeuf  * Schedule a user space unwind to be done in task work before exiting the
1952dffa355SJosh Poimboeuf  * kernel.
1962dffa355SJosh Poimboeuf  *
1972dffa355SJosh Poimboeuf  * The returned @cookie output is the generated cookie of the very first
1982dffa355SJosh Poimboeuf  * request for a user space stacktrace for this task since it entered the
1992dffa355SJosh Poimboeuf  * kernel. It can be from a request by any caller of this infrastructure.
2002dffa355SJosh Poimboeuf  * Its value will also be passed to the callback function.  It can be
2012dffa355SJosh Poimboeuf  * used to stitch kernel and user stack traces together in post-processing.
2022dffa355SJosh Poimboeuf  *
2032dffa355SJosh Poimboeuf  * It's valid to call this function multiple times for the same @work within
2042dffa355SJosh Poimboeuf  * the same task entry context.  Each call will return the same cookie
2052dffa355SJosh Poimboeuf  * while the task hasn't left the kernel. If the callback is not pending
2062dffa355SJosh Poimboeuf  * because it has already been previously called for the same entry context,
2072dffa355SJosh Poimboeuf  * it will be called again with the same stack trace and cookie.
2082dffa355SJosh Poimboeuf  *
209be3d526aSSteven Rostedt  * Return: 0 if the callback successfully was queued.
210be3d526aSSteven Rostedt  *         1 if the callback is pending or was already executed.
2112dffa355SJosh Poimboeuf  *         Negative if there's an error.
2122dffa355SJosh Poimboeuf  *         @cookie holds the cookie of the first request by any user
2132dffa355SJosh Poimboeuf  */
2142dffa355SJosh Poimboeuf int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
2152dffa355SJosh Poimboeuf {
2162dffa355SJosh Poimboeuf 	struct unwind_task_info *info = &current->unwind_info;
217be3d526aSSteven Rostedt 	unsigned long old, bits;
218*357eda2dSSteven Rostedt 	unsigned long bit;
2192dffa355SJosh Poimboeuf 	int ret;
2202dffa355SJosh Poimboeuf 
2212dffa355SJosh Poimboeuf 	*cookie = 0;
2222dffa355SJosh Poimboeuf 
2232dffa355SJosh Poimboeuf 	if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
2242dffa355SJosh Poimboeuf 	    !user_mode(task_pt_regs(current)))
2252dffa355SJosh Poimboeuf 		return -EINVAL;
2262dffa355SJosh Poimboeuf 
227055c7060SSteven Rostedt 	/*
228055c7060SSteven Rostedt 	 * NMI requires having safe cmpxchg operations.
229055c7060SSteven Rostedt 	 * Trigger a warning to make it obvious that an architecture
230055c7060SSteven Rostedt 	 * is using this in NMI when it should not be.
231055c7060SSteven Rostedt 	 */
232055c7060SSteven Rostedt 	if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi()))
233055c7060SSteven Rostedt 		return -EINVAL;
234055c7060SSteven Rostedt 
235*357eda2dSSteven Rostedt 	/* Do not allow cancelled works to request again */
236*357eda2dSSteven Rostedt 	bit = READ_ONCE(work->bit);
237*357eda2dSSteven Rostedt 	if (WARN_ON_ONCE(bit < 0))
238*357eda2dSSteven Rostedt 		return -EINVAL;
239*357eda2dSSteven Rostedt 
240*357eda2dSSteven Rostedt 	/* Only need the mask now */
241*357eda2dSSteven Rostedt 	bit = BIT(bit);
242*357eda2dSSteven Rostedt 
2432dffa355SJosh Poimboeuf 	guard(irqsave)();
2442dffa355SJosh Poimboeuf 
2452dffa355SJosh Poimboeuf 	*cookie = get_cookie(info);
2462dffa355SJosh Poimboeuf 
247be3d526aSSteven Rostedt 	old = READ_ONCE(info->unwind_mask);
248be3d526aSSteven Rostedt 
249be3d526aSSteven Rostedt 	/* Is this already queued or executed */
250be3d526aSSteven Rostedt 	if (old & bit)
251055c7060SSteven Rostedt 		return 1;
252055c7060SSteven Rostedt 
253be3d526aSSteven Rostedt 	/*
254be3d526aSSteven Rostedt 	 * This work's bit hasn't been set yet. Now set it with the PENDING
255be3d526aSSteven Rostedt 	 * bit and fetch the current value of unwind_mask. If ether the
256be3d526aSSteven Rostedt 	 * work's bit or PENDING was already set, then this is already queued
257be3d526aSSteven Rostedt 	 * to have a callback.
258be3d526aSSteven Rostedt 	 */
259be3d526aSSteven Rostedt 	bits = UNWIND_PENDING | bit;
260be3d526aSSteven Rostedt 	old = atomic_long_fetch_or(bits, (atomic_long_t *)&info->unwind_mask);
261be3d526aSSteven Rostedt 	if (old & bits) {
262be3d526aSSteven Rostedt 		/*
263be3d526aSSteven Rostedt 		 * If the work's bit was set, whatever set it had better
264be3d526aSSteven Rostedt 		 * have also set pending and queued a callback.
265be3d526aSSteven Rostedt 		 */
266be3d526aSSteven Rostedt 		WARN_ON_ONCE(!(old & UNWIND_PENDING));
267be3d526aSSteven Rostedt 		return old & bit;
268be3d526aSSteven Rostedt 	}
2692dffa355SJosh Poimboeuf 
2702dffa355SJosh Poimboeuf 	/* The work has been claimed, now schedule it. */
2712dffa355SJosh Poimboeuf 	ret = task_work_add(current, &info->work, TWA_RESUME);
2722dffa355SJosh Poimboeuf 
273be3d526aSSteven Rostedt 	if (WARN_ON_ONCE(ret))
274be3d526aSSteven Rostedt 		WRITE_ONCE(info->unwind_mask, 0);
275be3d526aSSteven Rostedt 
276be3d526aSSteven Rostedt 	return ret;
2772dffa355SJosh Poimboeuf }
2782dffa355SJosh Poimboeuf 
2792dffa355SJosh Poimboeuf void unwind_deferred_cancel(struct unwind_work *work)
2802dffa355SJosh Poimboeuf {
281be3d526aSSteven Rostedt 	struct task_struct *g, *t;
2824c75133eSSteven Rostedt 	int bit;
283be3d526aSSteven Rostedt 
2842dffa355SJosh Poimboeuf 	if (!work)
2852dffa355SJosh Poimboeuf 		return;
2862dffa355SJosh Poimboeuf 
2874c75133eSSteven Rostedt 	bit = work->bit;
2884c75133eSSteven Rostedt 
289be3d526aSSteven Rostedt 	/* No work should be using a reserved bit */
2904c75133eSSteven Rostedt 	if (WARN_ON_ONCE(BIT(bit) & RESERVED_BITS))
291be3d526aSSteven Rostedt 		return;
292be3d526aSSteven Rostedt 
2932dffa355SJosh Poimboeuf 	guard(mutex)(&callback_mutex);
294*357eda2dSSteven Rostedt 	list_del_rcu(&work->list);
295*357eda2dSSteven Rostedt 
296*357eda2dSSteven Rostedt 	/* Do not allow any more requests and prevent callbacks */
297*357eda2dSSteven Rostedt 	work->bit = -1;
298be3d526aSSteven Rostedt 
2994c75133eSSteven Rostedt 	__clear_bit(bit, &unwind_mask);
300be3d526aSSteven Rostedt 
301*357eda2dSSteven Rostedt 	synchronize_srcu(&unwind_srcu);
302*357eda2dSSteven Rostedt 
303be3d526aSSteven Rostedt 	guard(rcu)();
304be3d526aSSteven Rostedt 	/* Clear this bit from all threads */
305be3d526aSSteven Rostedt 	for_each_process_thread(g, t) {
3064c75133eSSteven Rostedt 		clear_bit(bit, &t->unwind_info.unwind_mask);
3074c75133eSSteven Rostedt 		if (t->unwind_info.cache)
3084c75133eSSteven Rostedt 			clear_bit(bit, &t->unwind_info.cache->unwind_completed);
309be3d526aSSteven Rostedt 	}
3102dffa355SJosh Poimboeuf }
3112dffa355SJosh Poimboeuf 
3122dffa355SJosh Poimboeuf int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
3132dffa355SJosh Poimboeuf {
3142dffa355SJosh Poimboeuf 	memset(work, 0, sizeof(*work));
3152dffa355SJosh Poimboeuf 
3162dffa355SJosh Poimboeuf 	guard(mutex)(&callback_mutex);
317be3d526aSSteven Rostedt 
318be3d526aSSteven Rostedt 	/* See if there's a bit in the mask available */
319be3d526aSSteven Rostedt 	if (unwind_mask == ~0UL)
320be3d526aSSteven Rostedt 		return -EBUSY;
321be3d526aSSteven Rostedt 
322be3d526aSSteven Rostedt 	work->bit = ffz(unwind_mask);
323be3d526aSSteven Rostedt 	__set_bit(work->bit, &unwind_mask);
324be3d526aSSteven Rostedt 
325*357eda2dSSteven Rostedt 	list_add_rcu(&work->list, &callbacks);
3262dffa355SJosh Poimboeuf 	work->func = func;
3272dffa355SJosh Poimboeuf 	return 0;
3282dffa355SJosh Poimboeuf }
3292dffa355SJosh Poimboeuf 
3305e32d0f1SSteven Rostedt void unwind_task_init(struct task_struct *task)
3315e32d0f1SSteven Rostedt {
3325e32d0f1SSteven Rostedt 	struct unwind_task_info *info = &task->unwind_info;
3335e32d0f1SSteven Rostedt 
3345e32d0f1SSteven Rostedt 	memset(info, 0, sizeof(*info));
3352dffa355SJosh Poimboeuf 	init_task_work(&info->work, unwind_deferred_task_work);
336be3d526aSSteven Rostedt 	info->unwind_mask = 0;
3375e32d0f1SSteven Rostedt }
3385e32d0f1SSteven Rostedt 
3395e32d0f1SSteven Rostedt void unwind_task_free(struct task_struct *task)
3405e32d0f1SSteven Rostedt {
3415e32d0f1SSteven Rostedt 	struct unwind_task_info *info = &task->unwind_info;
3425e32d0f1SSteven Rostedt 
343b9c73524SJosh Poimboeuf 	kfree(info->cache);
3442dffa355SJosh Poimboeuf 	task_work_cancel(task, &info->work);
3455e32d0f1SSteven Rostedt }
346