1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance events callchain code, extracted from core.c: 4 * 5 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 6 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar 7 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra 8 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 9 */ 10 11 #include <linux/perf_event.h> 12 #include <linux/slab.h> 13 #include <linux/sched/task_stack.h> 14 #include <linux/uprobes.h> 15 16 #include "internal.h" 17 18 struct callchain_cpus_entries { 19 struct rcu_head rcu_head; 20 struct perf_callchain_entry *cpu_entries[]; 21 }; 22 23 int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; 24 int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK; 25 static const int six_hundred_forty_kb = 640 * 1024; 26 27 static inline size_t perf_callchain_entry__sizeof(void) 28 { 29 return (sizeof(struct perf_callchain_entry) + 30 sizeof(__u64) * (sysctl_perf_event_max_stack + 31 sysctl_perf_event_max_contexts_per_stack)); 32 } 33 34 static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]); 35 static atomic_t nr_callchain_events; 36 static DEFINE_MUTEX(callchain_mutex); 37 static struct callchain_cpus_entries *callchain_cpus_entries; 38 39 40 __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, 41 struct pt_regs *regs) 42 { 43 } 44 45 __weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry, 46 struct pt_regs *regs) 47 { 48 } 49 50 static void release_callchain_buffers_rcu(struct rcu_head *head) 51 { 52 struct callchain_cpus_entries *entries; 53 int cpu; 54 55 entries = container_of(head, struct callchain_cpus_entries, rcu_head); 56 57 for_each_possible_cpu(cpu) 58 kfree(entries->cpu_entries[cpu]); 59 60 kfree(entries); 61 } 62 63 static void release_callchain_buffers(void) 64 { 65 struct callchain_cpus_entries *entries; 66 67 entries = callchain_cpus_entries; 68 RCU_INIT_POINTER(callchain_cpus_entries, NULL); 69 call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); 70 } 71 72 static int alloc_callchain_buffers(void) 73 { 74 int cpu; 75 int size; 76 struct callchain_cpus_entries *entries; 77 78 /* 79 * We can't use the percpu allocation API for data that can be 80 * accessed from NMI. Use a temporary manual per cpu allocation 81 * until that gets sorted out. 82 */ 83 size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); 84 85 entries = kzalloc(size, GFP_KERNEL); 86 if (!entries) 87 return -ENOMEM; 88 89 size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS; 90 91 for_each_possible_cpu(cpu) { 92 entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, 93 cpu_to_node(cpu)); 94 if (!entries->cpu_entries[cpu]) 95 goto fail; 96 } 97 98 rcu_assign_pointer(callchain_cpus_entries, entries); 99 100 return 0; 101 102 fail: 103 for_each_possible_cpu(cpu) 104 kfree(entries->cpu_entries[cpu]); 105 kfree(entries); 106 107 return -ENOMEM; 108 } 109 110 int get_callchain_buffers(int event_max_stack) 111 { 112 int err = 0; 113 int count; 114 115 mutex_lock(&callchain_mutex); 116 117 count = atomic_inc_return(&nr_callchain_events); 118 if (WARN_ON_ONCE(count < 1)) { 119 err = -EINVAL; 120 goto exit; 121 } 122 123 /* 124 * If requesting per event more than the global cap, 125 * return a different error to help userspace figure 126 * this out. 127 * 128 * And also do it here so that we have &callchain_mutex held. 129 */ 130 if (event_max_stack > sysctl_perf_event_max_stack) { 131 err = -EOVERFLOW; 132 goto exit; 133 } 134 135 if (count == 1) 136 err = alloc_callchain_buffers(); 137 exit: 138 if (err) 139 atomic_dec(&nr_callchain_events); 140 141 mutex_unlock(&callchain_mutex); 142 143 return err; 144 } 145 146 void put_callchain_buffers(void) 147 { 148 if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { 149 release_callchain_buffers(); 150 mutex_unlock(&callchain_mutex); 151 } 152 } 153 154 struct perf_callchain_entry *get_callchain_entry(int *rctx) 155 { 156 int cpu; 157 struct callchain_cpus_entries *entries; 158 159 *rctx = get_recursion_context(this_cpu_ptr(callchain_recursion)); 160 if (*rctx == -1) 161 return NULL; 162 163 entries = rcu_dereference(callchain_cpus_entries); 164 if (!entries) { 165 put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx); 166 return NULL; 167 } 168 169 cpu = smp_processor_id(); 170 171 return (((void *)entries->cpu_entries[cpu]) + 172 (*rctx * perf_callchain_entry__sizeof())); 173 } 174 175 void 176 put_callchain_entry(int rctx) 177 { 178 put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); 179 } 180 181 static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry, 182 int start_entry_idx) 183 { 184 #ifdef CONFIG_UPROBES 185 struct uprobe_task *utask = current->utask; 186 struct return_instance *ri; 187 __u64 *cur_ip, *last_ip, tramp_addr; 188 189 if (likely(!utask || !utask->return_instances)) 190 return; 191 192 cur_ip = &entry->ip[start_entry_idx]; 193 last_ip = &entry->ip[entry->nr - 1]; 194 ri = utask->return_instances; 195 tramp_addr = uprobe_get_trampoline_vaddr(); 196 197 /* 198 * If there are pending uretprobes for the current thread, they are 199 * recorded in a list inside utask->return_instances; each such 200 * pending uretprobe replaces traced user function's return address on 201 * the stack, so when stack trace is captured, instead of seeing 202 * actual function's return address, we'll have one or many uretprobe 203 * trampoline addresses in the stack trace, which are not helpful and 204 * misleading to users. 205 * So here we go over the pending list of uretprobes, and each 206 * encountered trampoline address is replaced with actual return 207 * address. 208 */ 209 while (ri && cur_ip <= last_ip) { 210 if (*cur_ip == tramp_addr) { 211 *cur_ip = ri->orig_ret_vaddr; 212 ri = ri->next; 213 } 214 cur_ip++; 215 } 216 #endif 217 } 218 219 struct perf_callchain_entry * 220 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, 221 u32 max_stack, bool crosstask, bool add_mark) 222 { 223 struct perf_callchain_entry *entry; 224 struct perf_callchain_entry_ctx ctx; 225 int rctx, start_entry_idx; 226 227 entry = get_callchain_entry(&rctx); 228 if (!entry) 229 return NULL; 230 231 ctx.entry = entry; 232 ctx.max_stack = max_stack; 233 ctx.nr = entry->nr = init_nr; 234 ctx.contexts = 0; 235 ctx.contexts_maxed = false; 236 237 if (kernel && !user_mode(regs)) { 238 if (add_mark) 239 perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL); 240 perf_callchain_kernel(&ctx, regs); 241 } 242 243 if (user) { 244 if (!user_mode(regs)) { 245 if (current->mm) 246 regs = task_pt_regs(current); 247 else 248 regs = NULL; 249 } 250 251 if (regs) { 252 if (crosstask) 253 goto exit_put; 254 255 if (add_mark) 256 perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); 257 258 start_entry_idx = entry->nr; 259 perf_callchain_user(&ctx, regs); 260 fixup_uretprobe_trampoline_entries(entry, start_entry_idx); 261 } 262 } 263 264 exit_put: 265 put_callchain_entry(rctx); 266 267 return entry; 268 } 269 270 static int perf_event_max_stack_handler(const struct ctl_table *table, int write, 271 void *buffer, size_t *lenp, loff_t *ppos) 272 { 273 int *value = table->data; 274 int new_value = *value, ret; 275 struct ctl_table new_table = *table; 276 277 new_table.data = &new_value; 278 ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos); 279 if (ret || !write) 280 return ret; 281 282 mutex_lock(&callchain_mutex); 283 if (atomic_read(&nr_callchain_events)) 284 ret = -EBUSY; 285 else 286 *value = new_value; 287 288 mutex_unlock(&callchain_mutex); 289 290 return ret; 291 } 292 293 static const struct ctl_table callchain_sysctl_table[] = { 294 { 295 .procname = "perf_event_max_stack", 296 .data = &sysctl_perf_event_max_stack, 297 .maxlen = sizeof(sysctl_perf_event_max_stack), 298 .mode = 0644, 299 .proc_handler = perf_event_max_stack_handler, 300 .extra1 = SYSCTL_ZERO, 301 .extra2 = (void *)&six_hundred_forty_kb, 302 }, 303 { 304 .procname = "perf_event_max_contexts_per_stack", 305 .data = &sysctl_perf_event_max_contexts_per_stack, 306 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), 307 .mode = 0644, 308 .proc_handler = perf_event_max_stack_handler, 309 .extra1 = SYSCTL_ZERO, 310 .extra2 = SYSCTL_ONE_THOUSAND, 311 }, 312 }; 313 314 static int __init init_callchain_sysctls(void) 315 { 316 register_sysctl_init("kernel", callchain_sysctl_table); 317 return 0; 318 } 319 core_initcall(init_callchain_sysctls); 320 321