xref: /linux/tools/perf/util/annotate-data.c (revision 257ca10c7317d4a424e48bb95d14ca53a1f1dd6f)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Convert sample address to data type using DWARF debug info.
4  *
5  * Written by Namhyung Kim <namhyung@kernel.org>
6  */
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <inttypes.h>
11 
12 #include "annotate-data.h"
13 #include "debuginfo.h"
14 #include "debug.h"
15 #include "dso.h"
16 #include "evsel.h"
17 #include "evlist.h"
18 #include "map.h"
19 #include "map_symbol.h"
20 #include "strbuf.h"
21 #include "symbol.h"
22 #include "symbol_conf.h"
23 
24 /*
25  * Compare type name and size to maintain them in a tree.
26  * I'm not sure if DWARF would have information of a single type in many
27  * different places (compilation units).  If not, it could compare the
28  * offset of the type entry in the .debug_info section.
29  */
30 static int data_type_cmp(const void *_key, const struct rb_node *node)
31 {
32 	const struct annotated_data_type *key = _key;
33 	struct annotated_data_type *type;
34 
35 	type = rb_entry(node, struct annotated_data_type, node);
36 
37 	if (key->self.size != type->self.size)
38 		return key->self.size - type->self.size;
39 	return strcmp(key->self.type_name, type->self.type_name);
40 }
41 
42 static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b)
43 {
44 	struct annotated_data_type *a, *b;
45 
46 	a = rb_entry(node_a, struct annotated_data_type, node);
47 	b = rb_entry(node_b, struct annotated_data_type, node);
48 
49 	if (a->self.size != b->self.size)
50 		return a->self.size < b->self.size;
51 	return strcmp(a->self.type_name, b->self.type_name) < 0;
52 }
53 
54 /* Recursively add new members for struct/union */
55 static int __add_member_cb(Dwarf_Die *die, void *arg)
56 {
57 	struct annotated_member *parent = arg;
58 	struct annotated_member *member;
59 	Dwarf_Die member_type, die_mem;
60 	Dwarf_Word size, loc;
61 	Dwarf_Attribute attr;
62 	struct strbuf sb;
63 	int tag;
64 
65 	if (dwarf_tag(die) != DW_TAG_member)
66 		return DIE_FIND_CB_SIBLING;
67 
68 	member = zalloc(sizeof(*member));
69 	if (member == NULL)
70 		return DIE_FIND_CB_END;
71 
72 	strbuf_init(&sb, 32);
73 	die_get_typename(die, &sb);
74 
75 	die_get_real_type(die, &member_type);
76 	if (dwarf_aggregate_size(&member_type, &size) < 0)
77 		size = 0;
78 
79 	if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr))
80 		loc = 0;
81 	else
82 		dwarf_formudata(&attr, &loc);
83 
84 	member->type_name = strbuf_detach(&sb, NULL);
85 	/* member->var_name can be NULL */
86 	if (dwarf_diename(die))
87 		member->var_name = strdup(dwarf_diename(die));
88 	member->size = size;
89 	member->offset = loc + parent->offset;
90 	INIT_LIST_HEAD(&member->children);
91 	list_add_tail(&member->node, &parent->children);
92 
93 	tag = dwarf_tag(&member_type);
94 	switch (tag) {
95 	case DW_TAG_structure_type:
96 	case DW_TAG_union_type:
97 		die_find_child(&member_type, __add_member_cb, member, &die_mem);
98 		break;
99 	default:
100 		break;
101 	}
102 	return DIE_FIND_CB_SIBLING;
103 }
104 
105 static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type)
106 {
107 	Dwarf_Die die_mem;
108 
109 	die_find_child(type, __add_member_cb, &parent->self, &die_mem);
110 }
111 
112 static void delete_members(struct annotated_member *member)
113 {
114 	struct annotated_member *child, *tmp;
115 
116 	list_for_each_entry_safe(child, tmp, &member->children, node) {
117 		list_del(&child->node);
118 		delete_members(child);
119 		free(child->type_name);
120 		free(child->var_name);
121 		free(child);
122 	}
123 }
124 
125 static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
126 							  Dwarf_Die *type_die)
127 {
128 	struct annotated_data_type *result = NULL;
129 	struct annotated_data_type key;
130 	struct rb_node *node;
131 	struct strbuf sb;
132 	char *type_name;
133 	Dwarf_Word size;
134 
135 	strbuf_init(&sb, 32);
136 	if (die_get_typename_from_type(type_die, &sb) < 0)
137 		strbuf_add(&sb, "(unknown type)", 14);
138 	type_name = strbuf_detach(&sb, NULL);
139 	dwarf_aggregate_size(type_die, &size);
140 
141 	/* Check existing nodes in dso->data_types tree */
142 	key.self.type_name = type_name;
143 	key.self.size = size;
144 	node = rb_find(&key, &dso->data_types, data_type_cmp);
145 	if (node) {
146 		result = rb_entry(node, struct annotated_data_type, node);
147 		free(type_name);
148 		return result;
149 	}
150 
151 	/* If not, add a new one */
152 	result = zalloc(sizeof(*result));
153 	if (result == NULL) {
154 		free(type_name);
155 		return NULL;
156 	}
157 
158 	result->self.type_name = type_name;
159 	result->self.size = size;
160 	INIT_LIST_HEAD(&result->self.children);
161 
162 	if (symbol_conf.annotate_data_member)
163 		add_member_types(result, type_die);
164 
165 	rb_add(&result->node, &dso->data_types, data_type_less);
166 	return result;
167 }
168 
169 static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
170 {
171 	Dwarf_Off off, next_off;
172 	size_t header_size;
173 
174 	if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL)
175 		return cu_die;
176 
177 	/*
178 	 * There are some kernels don't have full aranges and contain only a few
179 	 * aranges entries.  Fallback to iterate all CU entries in .debug_info
180 	 * in case it's missing.
181 	 */
182 	off = 0;
183 	while (dwarf_nextcu(di->dbg, off, &next_off, &header_size,
184 			    NULL, NULL, NULL) == 0) {
185 		if (dwarf_offdie(di->dbg, off + header_size, cu_die) &&
186 		    dwarf_haspc(cu_die, pc))
187 			return true;
188 
189 		off = next_off;
190 	}
191 	return false;
192 }
193 
194 /* The type info will be saved in @type_die */
195 static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
196 {
197 	Dwarf_Word size;
198 
199 	/* Get the type of the variable */
200 	if (die_get_real_type(var_die, type_die) == NULL) {
201 		pr_debug("variable has no type\n");
202 		ann_data_stat.no_typeinfo++;
203 		return -1;
204 	}
205 
206 	/*
207 	 * It expects a pointer type for a memory access.
208 	 * Convert to a real type it points to.
209 	 */
210 	if (dwarf_tag(type_die) != DW_TAG_pointer_type ||
211 	    die_get_real_type(type_die, type_die) == NULL) {
212 		pr_debug("no pointer or no type\n");
213 		ann_data_stat.no_typeinfo++;
214 		return -1;
215 	}
216 
217 	/* Get the size of the actual type */
218 	if (dwarf_aggregate_size(type_die, &size) < 0) {
219 		pr_debug("type size is unknown\n");
220 		ann_data_stat.invalid_size++;
221 		return -1;
222 	}
223 
224 	/* Minimal sanity check */
225 	if ((unsigned)offset >= size) {
226 		pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size);
227 		ann_data_stat.bad_offset++;
228 		return -1;
229 	}
230 
231 	return 0;
232 }
233 
234 /* The result will be saved in @type_die */
235 static int find_data_type_die(struct debuginfo *di, u64 pc,
236 			      int reg, int offset, Dwarf_Die *type_die)
237 {
238 	Dwarf_Die cu_die, var_die;
239 	Dwarf_Die *scopes = NULL;
240 	int ret = -1;
241 	int i, nr_scopes;
242 
243 	/* Get a compile_unit for this address */
244 	if (!find_cu_die(di, pc, &cu_die)) {
245 		pr_debug("cannot find CU for address %" PRIx64 "\n", pc);
246 		ann_data_stat.no_cuinfo++;
247 		return -1;
248 	}
249 
250 	/* Get a list of nested scopes - i.e. (inlined) functions and blocks. */
251 	nr_scopes = die_get_scopes(&cu_die, pc, &scopes);
252 
253 	/* Search from the inner-most scope to the outer */
254 	for (i = nr_scopes - 1; i >= 0; i--) {
255 		/* Look up variables/parameters in this scope */
256 		if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die))
257 			continue;
258 
259 		/* Found a variable, see if it's correct */
260 		ret = check_variable(&var_die, type_die, offset);
261 		goto out;
262 	}
263 	if (ret < 0)
264 		ann_data_stat.no_var++;
265 
266 out:
267 	free(scopes);
268 	return ret;
269 }
270 
271 /**
272  * find_data_type - Return a data type at the location
273  * @ms: map and symbol at the location
274  * @ip: instruction address of the memory access
275  * @reg: register that holds the base address
276  * @offset: offset from the base address
277  *
278  * This functions searches the debug information of the binary to get the data
279  * type it accesses.  The exact location is expressed by (ip, reg, offset).
280  * It return %NULL if not found.
281  */
282 struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
283 					   int reg, int offset)
284 {
285 	struct annotated_data_type *result = NULL;
286 	struct dso *dso = map__dso(ms->map);
287 	struct debuginfo *di;
288 	Dwarf_Die type_die;
289 	u64 pc;
290 
291 	di = debuginfo__new(dso->long_name);
292 	if (di == NULL) {
293 		pr_debug("cannot get the debug info\n");
294 		return NULL;
295 	}
296 
297 	/*
298 	 * IP is a relative instruction address from the start of the map, as
299 	 * it can be randomized/relocated, it needs to translate to PC which is
300 	 * a file address for DWARF processing.
301 	 */
302 	pc = map__rip_2objdump(ms->map, ip);
303 	if (find_data_type_die(di, pc, reg, offset, &type_die) < 0)
304 		goto out;
305 
306 	result = dso__findnew_data_type(dso, &type_die);
307 
308 out:
309 	debuginfo__delete(di);
310 	return result;
311 }
312 
313 static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries)
314 {
315 	int i;
316 	size_t sz = sizeof(struct type_hist);
317 
318 	sz += sizeof(struct type_hist_entry) * adt->self.size;
319 
320 	/* Allocate a table of pointers for each event */
321 	adt->nr_histograms = nr_entries;
322 	adt->histograms = calloc(nr_entries, sizeof(*adt->histograms));
323 	if (adt->histograms == NULL)
324 		return -ENOMEM;
325 
326 	/*
327 	 * Each histogram is allocated for the whole size of the type.
328 	 * TODO: Probably we can move the histogram to members.
329 	 */
330 	for (i = 0; i < nr_entries; i++) {
331 		adt->histograms[i] = zalloc(sz);
332 		if (adt->histograms[i] == NULL)
333 			goto err;
334 	}
335 	return 0;
336 
337 err:
338 	while (--i >= 0)
339 		free(adt->histograms[i]);
340 	free(adt->histograms);
341 	return -ENOMEM;
342 }
343 
344 static void delete_data_type_histograms(struct annotated_data_type *adt)
345 {
346 	for (int i = 0; i < adt->nr_histograms; i++)
347 		free(adt->histograms[i]);
348 	free(adt->histograms);
349 }
350 
351 void annotated_data_type__tree_delete(struct rb_root *root)
352 {
353 	struct annotated_data_type *pos;
354 
355 	while (!RB_EMPTY_ROOT(root)) {
356 		struct rb_node *node = rb_first(root);
357 
358 		rb_erase(node, root);
359 		pos = rb_entry(node, struct annotated_data_type, node);
360 		delete_members(&pos->self);
361 		delete_data_type_histograms(pos);
362 		free(pos->self.type_name);
363 		free(pos);
364 	}
365 }
366 
367 /**
368  * annotated_data_type__update_samples - Update histogram
369  * @adt: Data type to update
370  * @evsel: Event to update
371  * @offset: Offset in the type
372  * @nr_samples: Number of samples at this offset
373  * @period: Event count at this offset
374  *
375  * This function updates type histogram at @ofs for @evsel.  Samples are
376  * aggregated before calling this function so it can be called with more
377  * than one samples at a certain offset.
378  */
379 int annotated_data_type__update_samples(struct annotated_data_type *adt,
380 					struct evsel *evsel, int offset,
381 					int nr_samples, u64 period)
382 {
383 	struct type_hist *h;
384 
385 	if (adt == NULL)
386 		return 0;
387 
388 	if (adt->histograms == NULL) {
389 		int nr = evsel->evlist->core.nr_entries;
390 
391 		if (alloc_data_type_histograms(adt, nr) < 0)
392 			return -1;
393 	}
394 
395 	if (offset < 0 || offset >= adt->self.size)
396 		return -1;
397 
398 	h = adt->histograms[evsel->core.idx];
399 
400 	h->nr_samples += nr_samples;
401 	h->addr[offset].nr_samples += nr_samples;
402 	h->period += period;
403 	h->addr[offset].period += period;
404 	return 0;
405 }
406