1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Deferred user space unwinding 4 */ 5 #include <linux/sched/task_stack.h> 6 #include <linux/unwind_deferred.h> 7 #include <linux/sched/clock.h> 8 #include <linux/task_work.h> 9 #include <linux/kernel.h> 10 #include <linux/sched.h> 11 #include <linux/sizes.h> 12 #include <linux/slab.h> 13 #include <linux/mm.h> 14 15 /* Make the cache fit in a 4K page */ 16 #define UNWIND_MAX_ENTRIES \ 17 ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) 18 19 /* Guards adding to and reading the list of callbacks */ 20 static DEFINE_MUTEX(callback_mutex); 21 static LIST_HEAD(callbacks); 22 23 /* 24 * This is a unique percpu identifier for a given task entry context. 25 * Conceptually, it's incremented every time the CPU enters the kernel from 26 * user space, so that each "entry context" on the CPU gets a unique ID. In 27 * reality, as an optimization, it's only incremented on demand for the first 28 * deferred unwind request after a given entry-from-user. 29 * 30 * It's combined with the CPU id to make a systemwide-unique "context cookie". 31 */ 32 static DEFINE_PER_CPU(u32, unwind_ctx_ctr); 33 34 /* 35 * The context cookie is a unique identifier that is assigned to a user 36 * space stacktrace. As the user space stacktrace remains the same while 37 * the task is in the kernel, the cookie is an identifier for the stacktrace. 38 * Although it is possible for the stacktrace to get another cookie if another 39 * request is made after the cookie was cleared and before reentering user 40 * space. 41 */ 42 static u64 get_cookie(struct unwind_task_info *info) 43 { 44 u32 cnt = 1; 45 u32 old = 0; 46 47 if (info->id.cpu) 48 return info->id.id; 49 50 /* LSB is always set to ensure 0 is an invalid value */ 51 cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; 52 if (try_cmpxchg(&info->id.cnt, &old, cnt)) { 53 /* Update the per cpu counter */ 54 __this_cpu_write(unwind_ctx_ctr, cnt); 55 } 56 /* Interrupts are disabled, the CPU will always be same */ 57 info->id.cpu = smp_processor_id() + 1; /* Must be non zero */ 58 59 return info->id.id; 60 } 61 62 /** 63 * unwind_user_faultable - Produce a user stacktrace in faultable context 64 * @trace: The descriptor that will store the user stacktrace 65 * 66 * This must be called in a known faultable context (usually when entering 67 * or exiting user space). Depending on the available implementations 68 * the @trace will be loaded with the addresses of the user space stacktrace 69 * if it can be found. 70 * 71 * Return: 0 on success and negative on error 72 * On success @trace will contain the user space stacktrace 73 */ 74 int unwind_user_faultable(struct unwind_stacktrace *trace) 75 { 76 struct unwind_task_info *info = ¤t->unwind_info; 77 struct unwind_cache *cache; 78 79 /* Should always be called from faultable context */ 80 might_fault(); 81 82 if (current->flags & PF_EXITING) 83 return -EINVAL; 84 85 if (!info->cache) { 86 info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES), 87 GFP_KERNEL); 88 if (!info->cache) 89 return -ENOMEM; 90 } 91 92 cache = info->cache; 93 trace->entries = cache->entries; 94 95 if (cache->nr_entries) { 96 /* 97 * The user stack has already been previously unwound in this 98 * entry context. Skip the unwind and use the cache. 99 */ 100 trace->nr = cache->nr_entries; 101 return 0; 102 } 103 104 trace->nr = 0; 105 unwind_user(trace, UNWIND_MAX_ENTRIES); 106 107 cache->nr_entries = trace->nr; 108 109 return 0; 110 } 111 112 static void unwind_deferred_task_work(struct callback_head *head) 113 { 114 struct unwind_task_info *info = container_of(head, struct unwind_task_info, work); 115 struct unwind_stacktrace trace; 116 struct unwind_work *work; 117 u64 cookie; 118 119 if (WARN_ON_ONCE(!info->pending)) 120 return; 121 122 /* Allow work to come in again */ 123 WRITE_ONCE(info->pending, 0); 124 125 /* 126 * From here on out, the callback must always be called, even if it's 127 * just an empty trace. 128 */ 129 trace.nr = 0; 130 trace.entries = NULL; 131 132 unwind_user_faultable(&trace); 133 134 cookie = info->id.id; 135 136 guard(mutex)(&callback_mutex); 137 list_for_each_entry(work, &callbacks, list) { 138 work->func(work, &trace, cookie); 139 } 140 } 141 142 /** 143 * unwind_deferred_request - Request a user stacktrace on task kernel exit 144 * @work: Unwind descriptor requesting the trace 145 * @cookie: The cookie of the first request made for this task 146 * 147 * Schedule a user space unwind to be done in task work before exiting the 148 * kernel. 149 * 150 * The returned @cookie output is the generated cookie of the very first 151 * request for a user space stacktrace for this task since it entered the 152 * kernel. It can be from a request by any caller of this infrastructure. 153 * Its value will also be passed to the callback function. It can be 154 * used to stitch kernel and user stack traces together in post-processing. 155 * 156 * It's valid to call this function multiple times for the same @work within 157 * the same task entry context. Each call will return the same cookie 158 * while the task hasn't left the kernel. If the callback is not pending 159 * because it has already been previously called for the same entry context, 160 * it will be called again with the same stack trace and cookie. 161 * 162 * Return: 1 if the the callback was already queued. 163 * 0 if the callback successfully was queued. 164 * Negative if there's an error. 165 * @cookie holds the cookie of the first request by any user 166 */ 167 int unwind_deferred_request(struct unwind_work *work, u64 *cookie) 168 { 169 struct unwind_task_info *info = ¤t->unwind_info; 170 int ret; 171 172 *cookie = 0; 173 174 if (WARN_ON_ONCE(in_nmi())) 175 return -EINVAL; 176 177 if ((current->flags & (PF_KTHREAD | PF_EXITING)) || 178 !user_mode(task_pt_regs(current))) 179 return -EINVAL; 180 181 guard(irqsave)(); 182 183 *cookie = get_cookie(info); 184 185 /* callback already pending? */ 186 if (info->pending) 187 return 1; 188 189 /* The work has been claimed, now schedule it. */ 190 ret = task_work_add(current, &info->work, TWA_RESUME); 191 if (WARN_ON_ONCE(ret)) 192 return ret; 193 194 info->pending = 1; 195 return 0; 196 } 197 198 void unwind_deferred_cancel(struct unwind_work *work) 199 { 200 if (!work) 201 return; 202 203 guard(mutex)(&callback_mutex); 204 list_del(&work->list); 205 } 206 207 int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) 208 { 209 memset(work, 0, sizeof(*work)); 210 211 guard(mutex)(&callback_mutex); 212 list_add(&work->list, &callbacks); 213 work->func = func; 214 return 0; 215 } 216 217 void unwind_task_init(struct task_struct *task) 218 { 219 struct unwind_task_info *info = &task->unwind_info; 220 221 memset(info, 0, sizeof(*info)); 222 init_task_work(&info->work, unwind_deferred_task_work); 223 } 224 225 void unwind_task_free(struct task_struct *task) 226 { 227 struct unwind_task_info *info = &task->unwind_info; 228 229 kfree(info->cache); 230 task_work_cancel(task, &info->work); 231 } 232