xref: /linux/kernel/trace/trace_event_perf.c (revision 092e0e7e520a1fca03e13c9f2d157432a8657ff2)
1 /*
2  * trace event based perf event profiling/tracing
3  *
4  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6  */
7 
8 #include <linux/module.h>
9 #include <linux/kprobes.h>
10 #include "trace.h"
11 
12 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 
14 /*
15  * Force it to be aligned to unsigned long to avoid misaligned accesses
16  * suprises
17  */
18 typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19 	perf_trace_t;
20 
21 /* Count the events in use (per event id, not per instance) */
22 static int	total_ref_count;
23 
24 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 				 struct perf_event *p_event)
26 {
27 	struct hlist_head __percpu *list;
28 	int ret = -ENOMEM;
29 	int cpu;
30 
31 	p_event->tp_event = tp_event;
32 	if (tp_event->perf_refcount++ > 0)
33 		return 0;
34 
35 	list = alloc_percpu(struct hlist_head);
36 	if (!list)
37 		goto fail;
38 
39 	for_each_possible_cpu(cpu)
40 		INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
41 
42 	tp_event->perf_events = list;
43 
44 	if (!total_ref_count) {
45 		char __percpu *buf;
46 		int i;
47 
48 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
49 			buf = (char __percpu *)alloc_percpu(perf_trace_t);
50 			if (!buf)
51 				goto fail;
52 
53 			perf_trace_buf[i] = buf;
54 		}
55 	}
56 
57 	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
58 	if (ret)
59 		goto fail;
60 
61 	total_ref_count++;
62 	return 0;
63 
64 fail:
65 	if (!total_ref_count) {
66 		int i;
67 
68 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
69 			free_percpu(perf_trace_buf[i]);
70 			perf_trace_buf[i] = NULL;
71 		}
72 	}
73 
74 	if (!--tp_event->perf_refcount) {
75 		free_percpu(tp_event->perf_events);
76 		tp_event->perf_events = NULL;
77 	}
78 
79 	return ret;
80 }
81 
82 int perf_trace_init(struct perf_event *p_event)
83 {
84 	struct ftrace_event_call *tp_event;
85 	int event_id = p_event->attr.config;
86 	int ret = -EINVAL;
87 
88 	mutex_lock(&event_mutex);
89 	list_for_each_entry(tp_event, &ftrace_events, list) {
90 		if (tp_event->event.type == event_id &&
91 		    tp_event->class && tp_event->class->reg &&
92 		    try_module_get(tp_event->mod)) {
93 			ret = perf_trace_event_init(tp_event, p_event);
94 			if (ret)
95 				module_put(tp_event->mod);
96 			break;
97 		}
98 	}
99 	mutex_unlock(&event_mutex);
100 
101 	return ret;
102 }
103 
104 int perf_trace_add(struct perf_event *p_event, int flags)
105 {
106 	struct ftrace_event_call *tp_event = p_event->tp_event;
107 	struct hlist_head __percpu *pcpu_list;
108 	struct hlist_head *list;
109 
110 	pcpu_list = tp_event->perf_events;
111 	if (WARN_ON_ONCE(!pcpu_list))
112 		return -EINVAL;
113 
114 	if (!(flags & PERF_EF_START))
115 		p_event->hw.state = PERF_HES_STOPPED;
116 
117 	list = this_cpu_ptr(pcpu_list);
118 	hlist_add_head_rcu(&p_event->hlist_entry, list);
119 
120 	return 0;
121 }
122 
123 void perf_trace_del(struct perf_event *p_event, int flags)
124 {
125 	hlist_del_rcu(&p_event->hlist_entry);
126 }
127 
128 void perf_trace_destroy(struct perf_event *p_event)
129 {
130 	struct ftrace_event_call *tp_event = p_event->tp_event;
131 	int i;
132 
133 	mutex_lock(&event_mutex);
134 	if (--tp_event->perf_refcount > 0)
135 		goto out;
136 
137 	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
138 
139 	/*
140 	 * Ensure our callback won't be called anymore. The buffers
141 	 * will be freed after that.
142 	 */
143 	tracepoint_synchronize_unregister();
144 
145 	free_percpu(tp_event->perf_events);
146 	tp_event->perf_events = NULL;
147 
148 	if (!--total_ref_count) {
149 		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
150 			free_percpu(perf_trace_buf[i]);
151 			perf_trace_buf[i] = NULL;
152 		}
153 	}
154 out:
155 	module_put(tp_event->mod);
156 	mutex_unlock(&event_mutex);
157 }
158 
159 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
160 				       struct pt_regs *regs, int *rctxp)
161 {
162 	struct trace_entry *entry;
163 	unsigned long flags;
164 	char *raw_data;
165 	int pc;
166 
167 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
168 
169 	pc = preempt_count();
170 
171 	*rctxp = perf_swevent_get_recursion_context();
172 	if (*rctxp < 0)
173 		return NULL;
174 
175 	raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
176 
177 	/* zero the dead bytes from align to not leak stack to user */
178 	memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
179 
180 	entry = (struct trace_entry *)raw_data;
181 	local_save_flags(flags);
182 	tracing_generic_entry_update(entry, flags, pc);
183 	entry->type = type;
184 
185 	return raw_data;
186 }
187 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
188