197d5a220SFrederic Weisbecker /* 297d5a220SFrederic Weisbecker * trace event based perf event profiling/tracing 397d5a220SFrederic Weisbecker * 497d5a220SFrederic Weisbecker * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 597d5a220SFrederic Weisbecker * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> 697d5a220SFrederic Weisbecker */ 797d5a220SFrederic Weisbecker 897d5a220SFrederic Weisbecker #include <linux/module.h> 997d5a220SFrederic Weisbecker #include <linux/kprobes.h> 1097d5a220SFrederic Weisbecker #include "trace.h" 1197d5a220SFrederic Weisbecker 126016ee13SNamhyung Kim static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; 1397d5a220SFrederic Weisbecker 14eb1e7961SFrederic Weisbecker /* 15eb1e7961SFrederic Weisbecker * Force it to be aligned to unsigned long to avoid misaligned accesses 16eb1e7961SFrederic Weisbecker * suprises 17eb1e7961SFrederic Weisbecker */ 18eb1e7961SFrederic Weisbecker typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) 19eb1e7961SFrederic Weisbecker perf_trace_t; 2097d5a220SFrederic Weisbecker 2197d5a220SFrederic Weisbecker /* Count the events in use (per event id, not per instance) */ 2297d5a220SFrederic Weisbecker static int total_ref_count; 2397d5a220SFrederic Weisbecker 2461c32659SFrederic Weisbecker static int perf_trace_event_perm(struct ftrace_event_call *tp_event, 2561c32659SFrederic Weisbecker struct perf_event *p_event) 2661c32659SFrederic Weisbecker { 27d5b5f391SPeter Zijlstra if (tp_event->perf_perm) { 28d5b5f391SPeter Zijlstra int ret = tp_event->perf_perm(tp_event, p_event); 29d5b5f391SPeter Zijlstra if (ret) 30d5b5f391SPeter Zijlstra return ret; 31d5b5f391SPeter Zijlstra } 32d5b5f391SPeter Zijlstra 33ced39002SJiri Olsa /* The ftrace function trace is allowed only for root. */ 34cfa77bc4SJiri Olsa if (ftrace_event_is_function(tp_event)) { 35cfa77bc4SJiri Olsa if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) 36ced39002SJiri Olsa return -EPERM; 37ced39002SJiri Olsa 38cfa77bc4SJiri Olsa /* 39cfa77bc4SJiri Olsa * We don't allow user space callchains for function trace 40cfa77bc4SJiri Olsa * event, due to issues with page faults while tracing page 41cfa77bc4SJiri Olsa * fault handler and its overall trickiness nature. 42cfa77bc4SJiri Olsa */ 43cfa77bc4SJiri Olsa if (!p_event->attr.exclude_callchain_user) 44cfa77bc4SJiri Olsa return -EINVAL; 4563c45f4bSJiri Olsa 4663c45f4bSJiri Olsa /* 4763c45f4bSJiri Olsa * Same reason to disable user stack dump as for user space 4863c45f4bSJiri Olsa * callchains above. 4963c45f4bSJiri Olsa */ 5063c45f4bSJiri Olsa if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) 5163c45f4bSJiri Olsa return -EINVAL; 52cfa77bc4SJiri Olsa } 53cfa77bc4SJiri Olsa 5461c32659SFrederic Weisbecker /* No tracing, just counting, so no obvious leak */ 5561c32659SFrederic Weisbecker if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) 5661c32659SFrederic Weisbecker return 0; 5761c32659SFrederic Weisbecker 5861c32659SFrederic Weisbecker /* Some events are ok to be traced by non-root users... */ 5961c32659SFrederic Weisbecker if (p_event->attach_state == PERF_ATTACH_TASK) { 6061c32659SFrederic Weisbecker if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY) 6161c32659SFrederic Weisbecker return 0; 6261c32659SFrederic Weisbecker } 6361c32659SFrederic Weisbecker 6461c32659SFrederic Weisbecker /* 6561c32659SFrederic Weisbecker * ...otherwise raw tracepoint data can be a severe data leak, 6661c32659SFrederic Weisbecker * only allow root to have these. 6761c32659SFrederic Weisbecker */ 6861c32659SFrederic Weisbecker if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) 6961c32659SFrederic Weisbecker return -EPERM; 7061c32659SFrederic Weisbecker 7161c32659SFrederic Weisbecker return 0; 7261c32659SFrederic Weisbecker } 7361c32659SFrederic Weisbecker 74ceec0b6fSJiri Olsa static int perf_trace_event_reg(struct ftrace_event_call *tp_event, 751c024ecaSPeter Zijlstra struct perf_event *p_event) 7697d5a220SFrederic Weisbecker { 776016ee13SNamhyung Kim struct hlist_head __percpu *list; 78ceec0b6fSJiri Olsa int ret = -ENOMEM; 791c024ecaSPeter Zijlstra int cpu; 8097d5a220SFrederic Weisbecker 811c024ecaSPeter Zijlstra p_event->tp_event = tp_event; 821c024ecaSPeter Zijlstra if (tp_event->perf_refcount++ > 0) 8397d5a220SFrederic Weisbecker return 0; 8497d5a220SFrederic Weisbecker 851c024ecaSPeter Zijlstra list = alloc_percpu(struct hlist_head); 861c024ecaSPeter Zijlstra if (!list) 871c024ecaSPeter Zijlstra goto fail; 881c024ecaSPeter Zijlstra 891c024ecaSPeter Zijlstra for_each_possible_cpu(cpu) 901c024ecaSPeter Zijlstra INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); 911c024ecaSPeter Zijlstra 921c024ecaSPeter Zijlstra tp_event->perf_events = list; 9397d5a220SFrederic Weisbecker 9497d5a220SFrederic Weisbecker if (!total_ref_count) { 956016ee13SNamhyung Kim char __percpu *buf; 96b7e2ecefSPeter Zijlstra int i; 97b7e2ecefSPeter Zijlstra 987ae07ea3SFrederic Weisbecker for (i = 0; i < PERF_NR_CONTEXTS; i++) { 996016ee13SNamhyung Kim buf = (char __percpu *)alloc_percpu(perf_trace_t); 10097d5a220SFrederic Weisbecker if (!buf) 1011c024ecaSPeter Zijlstra goto fail; 10297d5a220SFrederic Weisbecker 1031c024ecaSPeter Zijlstra perf_trace_buf[i] = buf; 104b7e2ecefSPeter Zijlstra } 10597d5a220SFrederic Weisbecker } 10697d5a220SFrederic Weisbecker 107ceec0b6fSJiri Olsa ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL); 1081c024ecaSPeter Zijlstra if (ret) 1091c024ecaSPeter Zijlstra goto fail; 1101c024ecaSPeter Zijlstra 11197d5a220SFrederic Weisbecker total_ref_count++; 11297d5a220SFrederic Weisbecker return 0; 11397d5a220SFrederic Weisbecker 1141c024ecaSPeter Zijlstra fail: 115b7e2ecefSPeter Zijlstra if (!total_ref_count) { 116b7e2ecefSPeter Zijlstra int i; 117b7e2ecefSPeter Zijlstra 1187ae07ea3SFrederic Weisbecker for (i = 0; i < PERF_NR_CONTEXTS; i++) { 119b7e2ecefSPeter Zijlstra free_percpu(perf_trace_buf[i]); 120b7e2ecefSPeter Zijlstra perf_trace_buf[i] = NULL; 121b7e2ecefSPeter Zijlstra } 12297d5a220SFrederic Weisbecker } 12397d5a220SFrederic Weisbecker 1241c024ecaSPeter Zijlstra if (!--tp_event->perf_refcount) { 1251c024ecaSPeter Zijlstra free_percpu(tp_event->perf_events); 1261c024ecaSPeter Zijlstra tp_event->perf_events = NULL; 12797d5a220SFrederic Weisbecker } 12897d5a220SFrederic Weisbecker 12997d5a220SFrederic Weisbecker return ret; 13097d5a220SFrederic Weisbecker } 13197d5a220SFrederic Weisbecker 132ceec0b6fSJiri Olsa static void perf_trace_event_unreg(struct perf_event *p_event) 133ceec0b6fSJiri Olsa { 134ceec0b6fSJiri Olsa struct ftrace_event_call *tp_event = p_event->tp_event; 135ceec0b6fSJiri Olsa int i; 136ceec0b6fSJiri Olsa 137ceec0b6fSJiri Olsa if (--tp_event->perf_refcount > 0) 138ceec0b6fSJiri Olsa goto out; 139ceec0b6fSJiri Olsa 140ceec0b6fSJiri Olsa tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL); 141ceec0b6fSJiri Olsa 142ceec0b6fSJiri Olsa /* 143ceec0b6fSJiri Olsa * Ensure our callback won't be called anymore. The buffers 144ceec0b6fSJiri Olsa * will be freed after that. 145ceec0b6fSJiri Olsa */ 146ceec0b6fSJiri Olsa tracepoint_synchronize_unregister(); 147ceec0b6fSJiri Olsa 148ceec0b6fSJiri Olsa free_percpu(tp_event->perf_events); 149ceec0b6fSJiri Olsa tp_event->perf_events = NULL; 150ceec0b6fSJiri Olsa 151ceec0b6fSJiri Olsa if (!--total_ref_count) { 152ceec0b6fSJiri Olsa for (i = 0; i < PERF_NR_CONTEXTS; i++) { 153ceec0b6fSJiri Olsa free_percpu(perf_trace_buf[i]); 154ceec0b6fSJiri Olsa perf_trace_buf[i] = NULL; 155ceec0b6fSJiri Olsa } 156ceec0b6fSJiri Olsa } 157ceec0b6fSJiri Olsa out: 158ceec0b6fSJiri Olsa module_put(tp_event->mod); 159ceec0b6fSJiri Olsa } 160ceec0b6fSJiri Olsa 161ceec0b6fSJiri Olsa static int perf_trace_event_open(struct perf_event *p_event) 162ceec0b6fSJiri Olsa { 163ceec0b6fSJiri Olsa struct ftrace_event_call *tp_event = p_event->tp_event; 164ceec0b6fSJiri Olsa return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); 165ceec0b6fSJiri Olsa } 166ceec0b6fSJiri Olsa 167ceec0b6fSJiri Olsa static void perf_trace_event_close(struct perf_event *p_event) 168ceec0b6fSJiri Olsa { 169ceec0b6fSJiri Olsa struct ftrace_event_call *tp_event = p_event->tp_event; 170ceec0b6fSJiri Olsa tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); 171ceec0b6fSJiri Olsa } 172ceec0b6fSJiri Olsa 173ceec0b6fSJiri Olsa static int perf_trace_event_init(struct ftrace_event_call *tp_event, 174ceec0b6fSJiri Olsa struct perf_event *p_event) 175ceec0b6fSJiri Olsa { 176ceec0b6fSJiri Olsa int ret; 177ceec0b6fSJiri Olsa 178ceec0b6fSJiri Olsa ret = perf_trace_event_perm(tp_event, p_event); 179ceec0b6fSJiri Olsa if (ret) 180ceec0b6fSJiri Olsa return ret; 181ceec0b6fSJiri Olsa 182ceec0b6fSJiri Olsa ret = perf_trace_event_reg(tp_event, p_event); 183ceec0b6fSJiri Olsa if (ret) 184ceec0b6fSJiri Olsa return ret; 185ceec0b6fSJiri Olsa 186ceec0b6fSJiri Olsa ret = perf_trace_event_open(p_event); 187ceec0b6fSJiri Olsa if (ret) { 188ceec0b6fSJiri Olsa perf_trace_event_unreg(p_event); 189ceec0b6fSJiri Olsa return ret; 190ceec0b6fSJiri Olsa } 191ceec0b6fSJiri Olsa 192ceec0b6fSJiri Olsa return 0; 193ceec0b6fSJiri Olsa } 194ceec0b6fSJiri Olsa 1951c024ecaSPeter Zijlstra int perf_trace_init(struct perf_event *p_event) 19697d5a220SFrederic Weisbecker { 1971c024ecaSPeter Zijlstra struct ftrace_event_call *tp_event; 1980022ceddSVince Weaver u64 event_id = p_event->attr.config; 19997d5a220SFrederic Weisbecker int ret = -EINVAL; 20097d5a220SFrederic Weisbecker 20197d5a220SFrederic Weisbecker mutex_lock(&event_mutex); 2021c024ecaSPeter Zijlstra list_for_each_entry(tp_event, &ftrace_events, list) { 203ff5f149bSSteven Rostedt if (tp_event->event.type == event_id && 204a1d0ce82SSteven Rostedt tp_event->class && tp_event->class->reg && 2051c024ecaSPeter Zijlstra try_module_get(tp_event->mod)) { 2061c024ecaSPeter Zijlstra ret = perf_trace_event_init(tp_event, p_event); 2079cb627d5SLi Zefan if (ret) 2089cb627d5SLi Zefan module_put(tp_event->mod); 20997d5a220SFrederic Weisbecker break; 21097d5a220SFrederic Weisbecker } 21197d5a220SFrederic Weisbecker } 21297d5a220SFrederic Weisbecker mutex_unlock(&event_mutex); 21397d5a220SFrederic Weisbecker 21497d5a220SFrederic Weisbecker return ret; 21597d5a220SFrederic Weisbecker } 21697d5a220SFrederic Weisbecker 217ceec0b6fSJiri Olsa void perf_trace_destroy(struct perf_event *p_event) 218ceec0b6fSJiri Olsa { 219ceec0b6fSJiri Olsa mutex_lock(&event_mutex); 220ceec0b6fSJiri Olsa perf_trace_event_close(p_event); 221ceec0b6fSJiri Olsa perf_trace_event_unreg(p_event); 222ceec0b6fSJiri Olsa mutex_unlock(&event_mutex); 223ceec0b6fSJiri Olsa } 224ceec0b6fSJiri Olsa 225a4eaf7f1SPeter Zijlstra int perf_trace_add(struct perf_event *p_event, int flags) 22697d5a220SFrederic Weisbecker { 2271c024ecaSPeter Zijlstra struct ftrace_event_call *tp_event = p_event->tp_event; 2286016ee13SNamhyung Kim struct hlist_head __percpu *pcpu_list; 2291c024ecaSPeter Zijlstra struct hlist_head *list; 23097d5a220SFrederic Weisbecker 2316016ee13SNamhyung Kim pcpu_list = tp_event->perf_events; 2326016ee13SNamhyung Kim if (WARN_ON_ONCE(!pcpu_list)) 2331c024ecaSPeter Zijlstra return -EINVAL; 23497d5a220SFrederic Weisbecker 235a4eaf7f1SPeter Zijlstra if (!(flags & PERF_EF_START)) 236a4eaf7f1SPeter Zijlstra p_event->hw.state = PERF_HES_STOPPED; 237a4eaf7f1SPeter Zijlstra 2386016ee13SNamhyung Kim list = this_cpu_ptr(pcpu_list); 2391c024ecaSPeter Zijlstra hlist_add_head_rcu(&p_event->hlist_entry, list); 2401c024ecaSPeter Zijlstra 241489c75c3SJiri Olsa return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); 2421c024ecaSPeter Zijlstra } 2431c024ecaSPeter Zijlstra 244a4eaf7f1SPeter Zijlstra void perf_trace_del(struct perf_event *p_event, int flags) 2451c024ecaSPeter Zijlstra { 246489c75c3SJiri Olsa struct ftrace_event_call *tp_event = p_event->tp_event; 2471c024ecaSPeter Zijlstra hlist_del_rcu(&p_event->hlist_entry); 248489c75c3SJiri Olsa tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); 2491c024ecaSPeter Zijlstra } 2501c024ecaSPeter Zijlstra 251*3da0f180SMasami Hiramatsu void *perf_trace_buf_prepare(int size, unsigned short type, 252b7e2ecefSPeter Zijlstra struct pt_regs *regs, int *rctxp) 25397d5a220SFrederic Weisbecker { 25497d5a220SFrederic Weisbecker struct trace_entry *entry; 25587f44bbcSPeter Zijlstra unsigned long flags; 2561c024ecaSPeter Zijlstra char *raw_data; 257b7e2ecefSPeter Zijlstra int pc; 25897d5a220SFrederic Weisbecker 259eb1e7961SFrederic Weisbecker BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); 260eb1e7961SFrederic Weisbecker 261cd92bf61SOleg Nesterov if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 262cd92bf61SOleg Nesterov "perf buffer not large enough")) 263cd92bf61SOleg Nesterov return NULL; 264cd92bf61SOleg Nesterov 26597d5a220SFrederic Weisbecker pc = preempt_count(); 26697d5a220SFrederic Weisbecker 26797d5a220SFrederic Weisbecker *rctxp = perf_swevent_get_recursion_context(); 26897d5a220SFrederic Weisbecker if (*rctxp < 0) 2691c024ecaSPeter Zijlstra return NULL; 27097d5a220SFrederic Weisbecker 2713771f077SPeter Zijlstra raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); 27297d5a220SFrederic Weisbecker 27397d5a220SFrederic Weisbecker /* zero the dead bytes from align to not leak stack to user */ 274eb1e7961SFrederic Weisbecker memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); 27597d5a220SFrederic Weisbecker 27697d5a220SFrederic Weisbecker entry = (struct trace_entry *)raw_data; 27787f44bbcSPeter Zijlstra local_save_flags(flags); 27887f44bbcSPeter Zijlstra tracing_generic_entry_update(entry, flags, pc); 27997d5a220SFrederic Weisbecker entry->type = type; 28097d5a220SFrederic Weisbecker 28197d5a220SFrederic Weisbecker return raw_data; 28297d5a220SFrederic Weisbecker } 28397d5a220SFrederic Weisbecker EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 284*3da0f180SMasami Hiramatsu NOKPROBE_SYMBOL(perf_trace_buf_prepare); 285ced39002SJiri Olsa 286ced39002SJiri Olsa #ifdef CONFIG_FUNCTION_TRACER 287ced39002SJiri Olsa static void 2882f5f6ad9SSteven Rostedt perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, 289a1e2e31dSSteven Rostedt struct ftrace_ops *ops, struct pt_regs *pt_regs) 290ced39002SJiri Olsa { 291ced39002SJiri Olsa struct ftrace_entry *entry; 292ced39002SJiri Olsa struct hlist_head *head; 293ced39002SJiri Olsa struct pt_regs regs; 294ced39002SJiri Olsa int rctx; 295ced39002SJiri Olsa 296b8ebfd3fSOleg Nesterov head = this_cpu_ptr(event_function.perf_events); 297b8ebfd3fSOleg Nesterov if (hlist_empty(head)) 298b8ebfd3fSOleg Nesterov return; 299b8ebfd3fSOleg Nesterov 300ced39002SJiri Olsa #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ 301ced39002SJiri Olsa sizeof(u64)) - sizeof(u32)) 302ced39002SJiri Olsa 303ced39002SJiri Olsa BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE); 304ced39002SJiri Olsa 305ced39002SJiri Olsa perf_fetch_caller_regs(®s); 306ced39002SJiri Olsa 307ced39002SJiri Olsa entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx); 308ced39002SJiri Olsa if (!entry) 309ced39002SJiri Olsa return; 310ced39002SJiri Olsa 311ced39002SJiri Olsa entry->ip = ip; 312ced39002SJiri Olsa entry->parent_ip = parent_ip; 313ced39002SJiri Olsa perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0, 314e6dab5ffSAndrew Vagin 1, ®s, head, NULL); 315ced39002SJiri Olsa 316ced39002SJiri Olsa #undef ENTRY_SIZE 317ced39002SJiri Olsa } 318ced39002SJiri Olsa 319ced39002SJiri Olsa static int perf_ftrace_function_register(struct perf_event *event) 320ced39002SJiri Olsa { 321ced39002SJiri Olsa struct ftrace_ops *ops = &event->ftrace_ops; 322ced39002SJiri Olsa 323ced39002SJiri Olsa ops->flags |= FTRACE_OPS_FL_CONTROL; 324ced39002SJiri Olsa ops->func = perf_ftrace_function_call; 325ced39002SJiri Olsa return register_ftrace_function(ops); 326ced39002SJiri Olsa } 327ced39002SJiri Olsa 328ced39002SJiri Olsa static int perf_ftrace_function_unregister(struct perf_event *event) 329ced39002SJiri Olsa { 330ced39002SJiri Olsa struct ftrace_ops *ops = &event->ftrace_ops; 3315500fa51SJiri Olsa int ret = unregister_ftrace_function(ops); 3325500fa51SJiri Olsa ftrace_free_filter(ops); 3335500fa51SJiri Olsa return ret; 334ced39002SJiri Olsa } 335ced39002SJiri Olsa 336ced39002SJiri Olsa static void perf_ftrace_function_enable(struct perf_event *event) 337ced39002SJiri Olsa { 338ced39002SJiri Olsa ftrace_function_local_enable(&event->ftrace_ops); 339ced39002SJiri Olsa } 340ced39002SJiri Olsa 341ced39002SJiri Olsa static void perf_ftrace_function_disable(struct perf_event *event) 342ced39002SJiri Olsa { 343ced39002SJiri Olsa ftrace_function_local_disable(&event->ftrace_ops); 344ced39002SJiri Olsa } 345ced39002SJiri Olsa 346ced39002SJiri Olsa int perf_ftrace_event_register(struct ftrace_event_call *call, 347ced39002SJiri Olsa enum trace_reg type, void *data) 348ced39002SJiri Olsa { 349ced39002SJiri Olsa switch (type) { 350ced39002SJiri Olsa case TRACE_REG_REGISTER: 351ced39002SJiri Olsa case TRACE_REG_UNREGISTER: 352ced39002SJiri Olsa break; 353ced39002SJiri Olsa case TRACE_REG_PERF_REGISTER: 354ced39002SJiri Olsa case TRACE_REG_PERF_UNREGISTER: 355ced39002SJiri Olsa return 0; 356ced39002SJiri Olsa case TRACE_REG_PERF_OPEN: 357ced39002SJiri Olsa return perf_ftrace_function_register(data); 358ced39002SJiri Olsa case TRACE_REG_PERF_CLOSE: 359ced39002SJiri Olsa return perf_ftrace_function_unregister(data); 360ced39002SJiri Olsa case TRACE_REG_PERF_ADD: 361ced39002SJiri Olsa perf_ftrace_function_enable(data); 362ced39002SJiri Olsa return 0; 363ced39002SJiri Olsa case TRACE_REG_PERF_DEL: 364ced39002SJiri Olsa perf_ftrace_function_disable(data); 365ced39002SJiri Olsa return 0; 366ced39002SJiri Olsa } 367ced39002SJiri Olsa 368ced39002SJiri Olsa return -EINVAL; 369ced39002SJiri Olsa } 370ced39002SJiri Olsa #endif /* CONFIG_FUNCTION_TRACER */ 371