15e32d0f1SSteven Rostedt // SPDX-License-Identifier: GPL-2.0 25e32d0f1SSteven Rostedt /* 35e32d0f1SSteven Rostedt * Deferred user space unwinding 45e32d0f1SSteven Rostedt */ 5*2dffa355SJosh Poimboeuf #include <linux/sched/task_stack.h> 6*2dffa355SJosh Poimboeuf #include <linux/unwind_deferred.h> 7*2dffa355SJosh Poimboeuf #include <linux/sched/clock.h> 8*2dffa355SJosh Poimboeuf #include <linux/task_work.h> 95e32d0f1SSteven Rostedt #include <linux/kernel.h> 105e32d0f1SSteven Rostedt #include <linux/sched.h> 11b9c73524SJosh Poimboeuf #include <linux/sizes.h> 125e32d0f1SSteven Rostedt #include <linux/slab.h> 13*2dffa355SJosh Poimboeuf #include <linux/mm.h> 145e32d0f1SSteven Rostedt 15b9c73524SJosh Poimboeuf /* Make the cache fit in a 4K page */ 16b9c73524SJosh Poimboeuf #define UNWIND_MAX_ENTRIES \ 17b9c73524SJosh Poimboeuf ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) 185e32d0f1SSteven Rostedt 19*2dffa355SJosh Poimboeuf /* Guards adding to and reading the list of callbacks */ 20*2dffa355SJosh Poimboeuf static DEFINE_MUTEX(callback_mutex); 21*2dffa355SJosh Poimboeuf static LIST_HEAD(callbacks); 22*2dffa355SJosh Poimboeuf 23*2dffa355SJosh Poimboeuf /* 24*2dffa355SJosh Poimboeuf * This is a unique percpu identifier for a given task entry context. 25*2dffa355SJosh Poimboeuf * Conceptually, it's incremented every time the CPU enters the kernel from 26*2dffa355SJosh Poimboeuf * user space, so that each "entry context" on the CPU gets a unique ID. In 27*2dffa355SJosh Poimboeuf * reality, as an optimization, it's only incremented on demand for the first 28*2dffa355SJosh Poimboeuf * deferred unwind request after a given entry-from-user. 29*2dffa355SJosh Poimboeuf * 30*2dffa355SJosh Poimboeuf * It's combined with the CPU id to make a systemwide-unique "context cookie". 31*2dffa355SJosh Poimboeuf */ 32*2dffa355SJosh Poimboeuf static DEFINE_PER_CPU(u32, unwind_ctx_ctr); 33*2dffa355SJosh Poimboeuf 34*2dffa355SJosh Poimboeuf /* 35*2dffa355SJosh Poimboeuf * The context cookie is a unique identifier that is assigned to a user 36*2dffa355SJosh Poimboeuf * space stacktrace. As the user space stacktrace remains the same while 37*2dffa355SJosh Poimboeuf * the task is in the kernel, the cookie is an identifier for the stacktrace. 38*2dffa355SJosh Poimboeuf * Although it is possible for the stacktrace to get another cookie if another 39*2dffa355SJosh Poimboeuf * request is made after the cookie was cleared and before reentering user 40*2dffa355SJosh Poimboeuf * space. 41*2dffa355SJosh Poimboeuf */ 42*2dffa355SJosh Poimboeuf static u64 get_cookie(struct unwind_task_info *info) 43*2dffa355SJosh Poimboeuf { 44*2dffa355SJosh Poimboeuf u32 cnt = 1; 45*2dffa355SJosh Poimboeuf u32 old = 0; 46*2dffa355SJosh Poimboeuf 47*2dffa355SJosh Poimboeuf if (info->id.cpu) 48*2dffa355SJosh Poimboeuf return info->id.id; 49*2dffa355SJosh Poimboeuf 50*2dffa355SJosh Poimboeuf /* LSB is always set to ensure 0 is an invalid value */ 51*2dffa355SJosh Poimboeuf cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; 52*2dffa355SJosh Poimboeuf if (try_cmpxchg(&info->id.cnt, &old, cnt)) { 53*2dffa355SJosh Poimboeuf /* Update the per cpu counter */ 54*2dffa355SJosh Poimboeuf __this_cpu_write(unwind_ctx_ctr, cnt); 55*2dffa355SJosh Poimboeuf } 56*2dffa355SJosh Poimboeuf /* Interrupts are disabled, the CPU will always be same */ 57*2dffa355SJosh Poimboeuf info->id.cpu = smp_processor_id() + 1; /* Must be non zero */ 58*2dffa355SJosh Poimboeuf 59*2dffa355SJosh Poimboeuf return info->id.id; 60*2dffa355SJosh Poimboeuf } 61*2dffa355SJosh Poimboeuf 625e32d0f1SSteven Rostedt /** 635e32d0f1SSteven Rostedt * unwind_user_faultable - Produce a user stacktrace in faultable context 645e32d0f1SSteven Rostedt * @trace: The descriptor that will store the user stacktrace 655e32d0f1SSteven Rostedt * 665e32d0f1SSteven Rostedt * This must be called in a known faultable context (usually when entering 675e32d0f1SSteven Rostedt * or exiting user space). Depending on the available implementations 685e32d0f1SSteven Rostedt * the @trace will be loaded with the addresses of the user space stacktrace 695e32d0f1SSteven Rostedt * if it can be found. 705e32d0f1SSteven Rostedt * 715e32d0f1SSteven Rostedt * Return: 0 on success and negative on error 725e32d0f1SSteven Rostedt * On success @trace will contain the user space stacktrace 735e32d0f1SSteven Rostedt */ 745e32d0f1SSteven Rostedt int unwind_user_faultable(struct unwind_stacktrace *trace) 755e32d0f1SSteven Rostedt { 765e32d0f1SSteven Rostedt struct unwind_task_info *info = ¤t->unwind_info; 77b9c73524SJosh Poimboeuf struct unwind_cache *cache; 785e32d0f1SSteven Rostedt 795e32d0f1SSteven Rostedt /* Should always be called from faultable context */ 805e32d0f1SSteven Rostedt might_fault(); 815e32d0f1SSteven Rostedt 825e32d0f1SSteven Rostedt if (current->flags & PF_EXITING) 835e32d0f1SSteven Rostedt return -EINVAL; 845e32d0f1SSteven Rostedt 85b9c73524SJosh Poimboeuf if (!info->cache) { 86b9c73524SJosh Poimboeuf info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES), 875e32d0f1SSteven Rostedt GFP_KERNEL); 88b9c73524SJosh Poimboeuf if (!info->cache) 895e32d0f1SSteven Rostedt return -ENOMEM; 905e32d0f1SSteven Rostedt } 915e32d0f1SSteven Rostedt 92b9c73524SJosh Poimboeuf cache = info->cache; 93b9c73524SJosh Poimboeuf trace->entries = cache->entries; 94b9c73524SJosh Poimboeuf 95b9c73524SJosh Poimboeuf if (cache->nr_entries) { 96b9c73524SJosh Poimboeuf /* 97b9c73524SJosh Poimboeuf * The user stack has already been previously unwound in this 98b9c73524SJosh Poimboeuf * entry context. Skip the unwind and use the cache. 99b9c73524SJosh Poimboeuf */ 100b9c73524SJosh Poimboeuf trace->nr = cache->nr_entries; 101b9c73524SJosh Poimboeuf return 0; 102b9c73524SJosh Poimboeuf } 103b9c73524SJosh Poimboeuf 1045e32d0f1SSteven Rostedt trace->nr = 0; 1055e32d0f1SSteven Rostedt unwind_user(trace, UNWIND_MAX_ENTRIES); 1065e32d0f1SSteven Rostedt 107b9c73524SJosh Poimboeuf cache->nr_entries = trace->nr; 108b9c73524SJosh Poimboeuf 1095e32d0f1SSteven Rostedt return 0; 1105e32d0f1SSteven Rostedt } 1115e32d0f1SSteven Rostedt 112*2dffa355SJosh Poimboeuf static void unwind_deferred_task_work(struct callback_head *head) 113*2dffa355SJosh Poimboeuf { 114*2dffa355SJosh Poimboeuf struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); 115*2dffa355SJosh Poimboeuf struct unwind_stacktrace trace; 116*2dffa355SJosh Poimboeuf struct unwind_work *work; 117*2dffa355SJosh Poimboeuf u64 cookie; 118*2dffa355SJosh Poimboeuf 119*2dffa355SJosh Poimboeuf if (WARN_ON_ONCE(!info->pending)) 120*2dffa355SJosh Poimboeuf return; 121*2dffa355SJosh Poimboeuf 122*2dffa355SJosh Poimboeuf /* Allow work to come in again */ 123*2dffa355SJosh Poimboeuf WRITE_ONCE(info->pending, 0); 124*2dffa355SJosh Poimboeuf 125*2dffa355SJosh Poimboeuf /* 126*2dffa355SJosh Poimboeuf * From here on out, the callback must always be called, even if it's 127*2dffa355SJosh Poimboeuf * just an empty trace. 128*2dffa355SJosh Poimboeuf */ 129*2dffa355SJosh Poimboeuf trace.nr = 0; 130*2dffa355SJosh Poimboeuf trace.entries = NULL; 131*2dffa355SJosh Poimboeuf 132*2dffa355SJosh Poimboeuf unwind_user_faultable(&trace); 133*2dffa355SJosh Poimboeuf 134*2dffa355SJosh Poimboeuf cookie = info->id.id; 135*2dffa355SJosh Poimboeuf 136*2dffa355SJosh Poimboeuf guard(mutex)(&callback_mutex); 137*2dffa355SJosh Poimboeuf list_for_each_entry(work, &callbacks, list) { 138*2dffa355SJosh Poimboeuf work->func(work, &trace, cookie); 139*2dffa355SJosh Poimboeuf } 140*2dffa355SJosh Poimboeuf } 141*2dffa355SJosh Poimboeuf 142*2dffa355SJosh Poimboeuf /** 143*2dffa355SJosh Poimboeuf * unwind_deferred_request - Request a user stacktrace on task kernel exit 144*2dffa355SJosh Poimboeuf * @work: Unwind descriptor requesting the trace 145*2dffa355SJosh Poimboeuf * @cookie: The cookie of the first request made for this task 146*2dffa355SJosh Poimboeuf * 147*2dffa355SJosh Poimboeuf * Schedule a user space unwind to be done in task work before exiting the 148*2dffa355SJosh Poimboeuf * kernel. 149*2dffa355SJosh Poimboeuf * 150*2dffa355SJosh Poimboeuf * The returned @cookie output is the generated cookie of the very first 151*2dffa355SJosh Poimboeuf * request for a user space stacktrace for this task since it entered the 152*2dffa355SJosh Poimboeuf * kernel. It can be from a request by any caller of this infrastructure. 153*2dffa355SJosh Poimboeuf * Its value will also be passed to the callback function. It can be 154*2dffa355SJosh Poimboeuf * used to stitch kernel and user stack traces together in post-processing. 155*2dffa355SJosh Poimboeuf * 156*2dffa355SJosh Poimboeuf * It's valid to call this function multiple times for the same @work within 157*2dffa355SJosh Poimboeuf * the same task entry context. Each call will return the same cookie 158*2dffa355SJosh Poimboeuf * while the task hasn't left the kernel. If the callback is not pending 159*2dffa355SJosh Poimboeuf * because it has already been previously called for the same entry context, 160*2dffa355SJosh Poimboeuf * it will be called again with the same stack trace and cookie. 161*2dffa355SJosh Poimboeuf * 162*2dffa355SJosh Poimboeuf * Return: 1 if the the callback was already queued. 163*2dffa355SJosh Poimboeuf * 0 if the callback successfully was queued. 164*2dffa355SJosh Poimboeuf * Negative if there's an error. 165*2dffa355SJosh Poimboeuf * @cookie holds the cookie of the first request by any user 166*2dffa355SJosh Poimboeuf */ 167*2dffa355SJosh Poimboeuf int unwind_deferred_request(struct unwind_work *work, u64 *cookie) 168*2dffa355SJosh Poimboeuf { 169*2dffa355SJosh Poimboeuf struct unwind_task_info *info = ¤t->unwind_info; 170*2dffa355SJosh Poimboeuf int ret; 171*2dffa355SJosh Poimboeuf 172*2dffa355SJosh Poimboeuf *cookie = 0; 173*2dffa355SJosh Poimboeuf 174*2dffa355SJosh Poimboeuf if (WARN_ON_ONCE(in_nmi())) 175*2dffa355SJosh Poimboeuf return -EINVAL; 176*2dffa355SJosh Poimboeuf 177*2dffa355SJosh Poimboeuf if ((current->flags & (PF_KTHREAD | PF_EXITING)) || 178*2dffa355SJosh Poimboeuf !user_mode(task_pt_regs(current))) 179*2dffa355SJosh Poimboeuf return -EINVAL; 180*2dffa355SJosh Poimboeuf 181*2dffa355SJosh Poimboeuf guard(irqsave)(); 182*2dffa355SJosh Poimboeuf 183*2dffa355SJosh Poimboeuf *cookie = get_cookie(info); 184*2dffa355SJosh Poimboeuf 185*2dffa355SJosh Poimboeuf /* callback already pending? */ 186*2dffa355SJosh Poimboeuf if (info->pending) 187*2dffa355SJosh Poimboeuf return 1; 188*2dffa355SJosh Poimboeuf 189*2dffa355SJosh Poimboeuf /* The work has been claimed, now schedule it. */ 190*2dffa355SJosh Poimboeuf ret = task_work_add(current, &info->work, TWA_RESUME); 191*2dffa355SJosh Poimboeuf if (WARN_ON_ONCE(ret)) 192*2dffa355SJosh Poimboeuf return ret; 193*2dffa355SJosh Poimboeuf 194*2dffa355SJosh Poimboeuf info->pending = 1; 195*2dffa355SJosh Poimboeuf return 0; 196*2dffa355SJosh Poimboeuf } 197*2dffa355SJosh Poimboeuf 198*2dffa355SJosh Poimboeuf void unwind_deferred_cancel(struct unwind_work *work) 199*2dffa355SJosh Poimboeuf { 200*2dffa355SJosh Poimboeuf if (!work) 201*2dffa355SJosh Poimboeuf return; 202*2dffa355SJosh Poimboeuf 203*2dffa355SJosh Poimboeuf guard(mutex)(&callback_mutex); 204*2dffa355SJosh Poimboeuf list_del(&work->list); 205*2dffa355SJosh Poimboeuf } 206*2dffa355SJosh Poimboeuf 207*2dffa355SJosh Poimboeuf int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) 208*2dffa355SJosh Poimboeuf { 209*2dffa355SJosh Poimboeuf memset(work, 0, sizeof(*work)); 210*2dffa355SJosh Poimboeuf 211*2dffa355SJosh Poimboeuf guard(mutex)(&callback_mutex); 212*2dffa355SJosh Poimboeuf list_add(&work->list, &callbacks); 213*2dffa355SJosh Poimboeuf work->func = func; 214*2dffa355SJosh Poimboeuf return 0; 215*2dffa355SJosh Poimboeuf } 216*2dffa355SJosh Poimboeuf 2175e32d0f1SSteven Rostedt void unwind_task_init(struct task_struct *task) 2185e32d0f1SSteven Rostedt { 2195e32d0f1SSteven Rostedt struct unwind_task_info *info = &task->unwind_info; 2205e32d0f1SSteven Rostedt 2215e32d0f1SSteven Rostedt memset(info, 0, sizeof(*info)); 222*2dffa355SJosh Poimboeuf init_task_work(&info->work, unwind_deferred_task_work); 2235e32d0f1SSteven Rostedt } 2245e32d0f1SSteven Rostedt 2255e32d0f1SSteven Rostedt void unwind_task_free(struct task_struct *task) 2265e32d0f1SSteven Rostedt { 2275e32d0f1SSteven Rostedt struct unwind_task_info *info = &task->unwind_info; 2285e32d0f1SSteven Rostedt 229b9c73524SJosh Poimboeuf kfree(info->cache); 230*2dffa355SJosh Poimboeuf task_work_cancel(task, &info->work); 2315e32d0f1SSteven Rostedt } 232