1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance events callchain code, extracted from core.c: 4 * 5 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 6 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar 7 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra 8 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 9 */ 10 11 #include <linux/perf_event.h> 12 #include <linux/slab.h> 13 #include <linux/sched/task_stack.h> 14 #include <linux/uprobes.h> 15 16 #include "internal.h" 17 18 struct callchain_cpus_entries { 19 struct rcu_head rcu_head; 20 struct perf_callchain_entry *cpu_entries[]; 21 }; 22 23 int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; 24 int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK; 25 static const int six_hundred_forty_kb = 640 * 1024; 26 27 static inline size_t perf_callchain_entry__sizeof(void) 28 { 29 return (sizeof(struct perf_callchain_entry) + 30 sizeof(__u64) * (sysctl_perf_event_max_stack + 31 sysctl_perf_event_max_contexts_per_stack)); 32 } 33 34 static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]); 35 static atomic_t nr_callchain_events; 36 static DEFINE_MUTEX(callchain_mutex); 37 static struct callchain_cpus_entries *callchain_cpus_entries; 38 39 40 __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 41 struct pt_regs *regs) 42 { 43 } 44 45 __weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 46 struct pt_regs *regs) 47 { 48 } 49 50 static void release_callchain_buffers_rcu(struct rcu_head *head) 51 { 52 struct callchain_cpus_entries *entries; 53 int cpu; 54 55 entries = container_of(head, struct callchain_cpus_entries, rcu_head); 56 57 for_each_possible_cpu(cpu) 58 kfree(entries->cpu_entries[cpu]); 59 60 kfree(entries); 61 } 62 63 static void release_callchain_buffers(void) 64 { 65 struct callchain_cpus_entries *entries; 66 67 entries = callchain_cpus_entries; 68 RCU_INIT_POINTER(callchain_cpus_entries, NULL); 69 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); 70 } 71 72 static int alloc_callchain_buffers(void) 73 { 74 int cpu; 75 int size; 76 struct callchain_cpus_entries *entries; 77 78 /* 79 * We can't use the percpu allocation API for data that can be 80 * accessed from NMI. Use a temporary manual per cpu allocation 81 * until that gets sorted out. 82 */ 83 size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); 84 85 entries = kzalloc(size, GFP_KERNEL); 86 if (!entries) 87 return -ENOMEM; 88 89 size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS; 90 91 for_each_possible_cpu(cpu) { 92 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, 93 cpu_to_node(cpu)); 94 if (!entries->cpu_entries[cpu]) 95 goto fail; 96 } 97 98 rcu_assign_pointer(callchain_cpus_entries, entries); 99 100 return 0; 101 102 fail: 103 for_each_possible_cpu(cpu) 104 kfree(entries->cpu_entries[cpu]); 105 kfree(entries); 106 107 return -ENOMEM; 108 } 109 110 int get_callchain_buffers(int event_max_stack) 111 { 112 int err = 0; 113 int count; 114 115 mutex_lock(&callchain_mutex); 116 117 count = atomic_inc_return(&nr_callchain_events); 118 if (WARN_ON_ONCE(count < 1)) { 119 err = -EINVAL; 120 goto exit; 121 } 122 123 /* 124 * If requesting per event more than the global cap, 125 * return a different error to help userspace figure 126 * this out. 127 * 128 * And also do it here so that we have &callchain_mutex held. 129 */ 130 if (event_max_stack > sysctl_perf_event_max_stack) { 131 err = -EOVERFLOW; 132 goto exit; 133 } 134 135 if (count == 1) 136 err = alloc_callchain_buffers(); 137 exit: 138 if (err) 139 atomic_dec(&nr_callchain_events); 140 141 mutex_unlock(&callchain_mutex); 142 143 return err; 144 } 145 146 void put_callchain_buffers(void) 147 { 148 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { 149 release_callchain_buffers(); 150 mutex_unlock(&callchain_mutex); 151 } 152 } 153 154 struct perf_callchain_entry *get_callchain_entry(int *rctx) 155 { 156 int cpu; 157 struct callchain_cpus_entries *entries; 158 159 *rctx = get_recursion_context(this_cpu_ptr(callchain_recursion)); 160 if (*rctx == -1) 161 return NULL; 162 163 entries = rcu_dereference(callchain_cpus_entries); 164 if (!entries) { 165 put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx); 166 return NULL; 167 } 168 169 cpu = smp_processor_id(); 170 171 return (((void *)entries->cpu_entries[cpu]) + 172 (*rctx * perf_callchain_entry__sizeof())); 173 } 174 175 void 176 put_callchain_entry(int rctx) 177 { 178 put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); 179 } 180 181 static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry, 182 int start_entry_idx) 183 { 184 #ifdef CONFIG_UPROBES 185 struct uprobe_task *utask = current->utask; 186 struct return_instance *ri; 187 __u64 *cur_ip, *last_ip, tramp_addr; 188 189 if (likely(!utask || !utask->return_instances)) 190 return; 191 192 cur_ip = &entry->ip[start_entry_idx]; 193 last_ip = &entry->ip[entry->nr - 1]; 194 ri = utask->return_instances; 195 tramp_addr = uprobe_get_trampoline_vaddr(); 196 197 /* 198 * If there are pending uretprobes for the current thread, they are 199 * recorded in a list inside utask->return_instances; each such 200 * pending uretprobe replaces traced user function's return address on 201 * the stack, so when stack trace is captured, instead of seeing 202 * actual function's return address, we'll have one or many uretprobe 203 * trampoline addresses in the stack trace, which are not helpful and 204 * misleading to users. 205 * So here we go over the pending list of uretprobes, and each 206 * encountered trampoline address is replaced with actual return 207 * address. 208 */ 209 while (ri && cur_ip <= last_ip) { 210 if (*cur_ip == tramp_addr) { 211 *cur_ip = ri->orig_ret_vaddr; 212 ri = ri->next; 213 } 214 cur_ip++; 215 } 216 #endif 217 } 218 219 struct perf_callchain_entry * 220 get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, 221 u32 max_stack, bool crosstask, bool add_mark, u64 defer_cookie) 222 { 223 struct perf_callchain_entry *entry; 224 struct perf_callchain_entry_ctx ctx; 225 int rctx, start_entry_idx; 226 227 /* crosstask is not supported for user stacks */ 228 if (crosstask && user && !kernel) 229 return NULL; 230 231 entry = get_callchain_entry(&rctx); 232 if (!entry) 233 return NULL; 234 235 ctx.entry = entry; 236 ctx.max_stack = max_stack; 237 ctx.nr = entry->nr = 0; 238 ctx.contexts = 0; 239 ctx.contexts_maxed = false; 240 241 if (kernel && !user_mode(regs)) { 242 if (add_mark) 243 perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL); 244 perf_callchain_kernel(&ctx, regs); 245 } 246 247 if (user && !crosstask) { 248 if (!user_mode(regs)) { 249 if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) 250 goto exit_put; 251 regs = task_pt_regs(current); 252 } 253 254 if (defer_cookie) { 255 /* 256 * Foretell the coming of PERF_RECORD_CALLCHAIN_DEFERRED 257 * which can be stitched to this one, and add 258 * the cookie after it (it will be cut off when the 259 * user stack is copied to the callchain). 260 */ 261 perf_callchain_store_context(&ctx, PERF_CONTEXT_USER_DEFERRED); 262 perf_callchain_store_context(&ctx, defer_cookie); 263 goto exit_put; 264 } 265 266 if (add_mark) 267 perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); 268 269 start_entry_idx = entry->nr; 270 perf_callchain_user(&ctx, regs); 271 fixup_uretprobe_trampoline_entries(entry, start_entry_idx); 272 } 273 274 exit_put: 275 put_callchain_entry(rctx); 276 277 return entry; 278 } 279 280 static int perf_event_max_stack_handler(const struct ctl_table *table, int write, 281 void *buffer, size_t *lenp, loff_t *ppos) 282 { 283 int *value = table->data; 284 int new_value = *value, ret; 285 struct ctl_table new_table = *table; 286 287 new_table.data = &new_value; 288 ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos); 289 if (ret || !write) 290 return ret; 291 292 mutex_lock(&callchain_mutex); 293 if (atomic_read(&nr_callchain_events)) 294 ret = -EBUSY; 295 else 296 *value = new_value; 297 298 mutex_unlock(&callchain_mutex); 299 300 return ret; 301 } 302 303 static const struct ctl_table callchain_sysctl_table[] = { 304 { 305 .procname = "perf_event_max_stack", 306 .data = &sysctl_perf_event_max_stack, 307 .maxlen = sizeof(sysctl_perf_event_max_stack), 308 .mode = 0644, 309 .proc_handler = perf_event_max_stack_handler, 310 .extra1 = SYSCTL_ZERO, 311 .extra2 = (void *)&six_hundred_forty_kb, 312 }, 313 { 314 .procname = "perf_event_max_contexts_per_stack", 315 .data = &sysctl_perf_event_max_contexts_per_stack, 316 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), 317 .mode = 0644, 318 .proc_handler = perf_event_max_stack_handler, 319 .extra1 = SYSCTL_ZERO, 320 .extra2 = SYSCTL_ONE_THOUSAND, 321 }, 322 }; 323 324 static int __init init_callchain_sysctls(void) 325 { 326 register_sysctl_init("kernel", callchain_sysctl_table); 327 return 0; 328 } 329 core_initcall(init_callchain_sysctls); 330 331