1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Convert sample address to data type using DWARF debug info. 4 * 5 * Written by Namhyung Kim <namhyung@kernel.org> 6 */ 7 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <inttypes.h> 11 12 #include "annotate.h" 13 #include "annotate-data.h" 14 #include "debuginfo.h" 15 #include "debug.h" 16 #include "dso.h" 17 #include "dwarf-regs.h" 18 #include "evsel.h" 19 #include "evlist.h" 20 #include "map.h" 21 #include "map_symbol.h" 22 #include "strbuf.h" 23 #include "symbol.h" 24 #include "symbol_conf.h" 25 26 /* 27 * Compare type name and size to maintain them in a tree. 28 * I'm not sure if DWARF would have information of a single type in many 29 * different places (compilation units). If not, it could compare the 30 * offset of the type entry in the .debug_info section. 31 */ 32 static int data_type_cmp(const void *_key, const struct rb_node *node) 33 { 34 const struct annotated_data_type *key = _key; 35 struct annotated_data_type *type; 36 37 type = rb_entry(node, struct annotated_data_type, node); 38 39 if (key->self.size != type->self.size) 40 return key->self.size - type->self.size; 41 return strcmp(key->self.type_name, type->self.type_name); 42 } 43 44 static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) 45 { 46 struct annotated_data_type *a, *b; 47 48 a = rb_entry(node_a, struct annotated_data_type, node); 49 b = rb_entry(node_b, struct annotated_data_type, node); 50 51 if (a->self.size != b->self.size) 52 return a->self.size < b->self.size; 53 return strcmp(a->self.type_name, b->self.type_name) < 0; 54 } 55 56 /* Recursively add new members for struct/union */ 57 static int __add_member_cb(Dwarf_Die *die, void *arg) 58 { 59 struct annotated_member *parent = arg; 60 struct annotated_member *member; 61 Dwarf_Die member_type, die_mem; 62 Dwarf_Word size, loc; 63 Dwarf_Attribute attr; 64 struct strbuf sb; 65 int tag; 66 67 if (dwarf_tag(die) != DW_TAG_member) 68 return DIE_FIND_CB_SIBLING; 69 70 member = zalloc(sizeof(*member)); 71 if (member == NULL) 72 return DIE_FIND_CB_END; 73 74 strbuf_init(&sb, 32); 75 die_get_typename(die, &sb); 76 77 die_get_real_type(die, &member_type); 78 if (dwarf_aggregate_size(&member_type, &size) < 0) 79 size = 0; 80 81 if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) 82 loc = 0; 83 else 84 dwarf_formudata(&attr, &loc); 85 86 member->type_name = strbuf_detach(&sb, NULL); 87 /* member->var_name can be NULL */ 88 if (dwarf_diename(die)) 89 member->var_name = strdup(dwarf_diename(die)); 90 member->size = size; 91 member->offset = loc + parent->offset; 92 INIT_LIST_HEAD(&member->children); 93 list_add_tail(&member->node, &parent->children); 94 95 tag = dwarf_tag(&member_type); 96 switch (tag) { 97 case DW_TAG_structure_type: 98 case DW_TAG_union_type: 99 die_find_child(&member_type, __add_member_cb, member, &die_mem); 100 break; 101 default: 102 break; 103 } 104 return DIE_FIND_CB_SIBLING; 105 } 106 107 static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) 108 { 109 Dwarf_Die die_mem; 110 111 die_find_child(type, __add_member_cb, &parent->self, &die_mem); 112 } 113 114 static void delete_members(struct annotated_member *member) 115 { 116 struct annotated_member *child, *tmp; 117 118 list_for_each_entry_safe(child, tmp, &member->children, node) { 119 list_del(&child->node); 120 delete_members(child); 121 free(child->type_name); 122 free(child->var_name); 123 free(child); 124 } 125 } 126 127 static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, 128 Dwarf_Die *type_die) 129 { 130 struct annotated_data_type *result = NULL; 131 struct annotated_data_type key; 132 struct rb_node *node; 133 struct strbuf sb; 134 char *type_name; 135 Dwarf_Word size; 136 137 strbuf_init(&sb, 32); 138 if (die_get_typename_from_type(type_die, &sb) < 0) 139 strbuf_add(&sb, "(unknown type)", 14); 140 type_name = strbuf_detach(&sb, NULL); 141 dwarf_aggregate_size(type_die, &size); 142 143 /* Check existing nodes in dso->data_types tree */ 144 key.self.type_name = type_name; 145 key.self.size = size; 146 node = rb_find(&key, &dso->data_types, data_type_cmp); 147 if (node) { 148 result = rb_entry(node, struct annotated_data_type, node); 149 free(type_name); 150 return result; 151 } 152 153 /* If not, add a new one */ 154 result = zalloc(sizeof(*result)); 155 if (result == NULL) { 156 free(type_name); 157 return NULL; 158 } 159 160 result->self.type_name = type_name; 161 result->self.size = size; 162 INIT_LIST_HEAD(&result->self.children); 163 164 if (symbol_conf.annotate_data_member) 165 add_member_types(result, type_die); 166 167 rb_add(&result->node, &dso->data_types, data_type_less); 168 return result; 169 } 170 171 static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) 172 { 173 Dwarf_Off off, next_off; 174 size_t header_size; 175 176 if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) 177 return cu_die; 178 179 /* 180 * There are some kernels don't have full aranges and contain only a few 181 * aranges entries. Fallback to iterate all CU entries in .debug_info 182 * in case it's missing. 183 */ 184 off = 0; 185 while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, 186 NULL, NULL, NULL) == 0) { 187 if (dwarf_offdie(di->dbg, off + header_size, cu_die) && 188 dwarf_haspc(cu_die, pc)) 189 return true; 190 191 off = next_off; 192 } 193 return false; 194 } 195 196 /* The type info will be saved in @type_die */ 197 static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset, 198 bool is_pointer) 199 { 200 Dwarf_Word size; 201 202 /* Get the type of the variable */ 203 if (die_get_real_type(var_die, type_die) == NULL) { 204 pr_debug("variable has no type\n"); 205 ann_data_stat.no_typeinfo++; 206 return -1; 207 } 208 209 /* 210 * Usually it expects a pointer type for a memory access. 211 * Convert to a real type it points to. But global variables 212 * and local variables are accessed directly without a pointer. 213 */ 214 if (is_pointer) { 215 if ((dwarf_tag(type_die) != DW_TAG_pointer_type && 216 dwarf_tag(type_die) != DW_TAG_array_type) || 217 die_get_real_type(type_die, type_die) == NULL) { 218 pr_debug("no pointer or no type\n"); 219 ann_data_stat.no_typeinfo++; 220 return -1; 221 } 222 } 223 224 /* Get the size of the actual type */ 225 if (dwarf_aggregate_size(type_die, &size) < 0) { 226 pr_debug("type size is unknown\n"); 227 ann_data_stat.invalid_size++; 228 return -1; 229 } 230 231 /* Minimal sanity check */ 232 if ((unsigned)offset >= size) { 233 pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); 234 ann_data_stat.bad_offset++; 235 return -1; 236 } 237 238 return 0; 239 } 240 241 /* The result will be saved in @type_die */ 242 static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, 243 const char *var_name, struct annotated_op_loc *loc, 244 Dwarf_Die *type_die) 245 { 246 Dwarf_Die cu_die, var_die; 247 Dwarf_Die *scopes = NULL; 248 int reg, offset; 249 int ret = -1; 250 int i, nr_scopes; 251 int fbreg = -1; 252 bool is_fbreg = false; 253 int fb_offset = 0; 254 255 /* Get a compile_unit for this address */ 256 if (!find_cu_die(di, pc, &cu_die)) { 257 pr_debug("cannot find CU for address %" PRIx64 "\n", pc); 258 ann_data_stat.no_cuinfo++; 259 return -1; 260 } 261 262 reg = loc->reg1; 263 offset = loc->offset; 264 265 if (reg == DWARF_REG_PC) { 266 if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { 267 ret = check_variable(&var_die, type_die, offset, 268 /*is_pointer=*/false); 269 loc->offset = offset; 270 goto out; 271 } 272 273 if (var_name && die_find_variable_at(&cu_die, var_name, pc, 274 &var_die)) { 275 ret = check_variable(&var_die, type_die, 0, 276 /*is_pointer=*/false); 277 /* loc->offset will be updated by the caller */ 278 goto out; 279 } 280 } 281 282 /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ 283 nr_scopes = die_get_scopes(&cu_die, pc, &scopes); 284 285 if (reg != DWARF_REG_PC && dwarf_hasattr(&scopes[0], DW_AT_frame_base)) { 286 Dwarf_Attribute attr; 287 Dwarf_Block block; 288 289 /* Check if the 'reg' is assigned as frame base register */ 290 if (dwarf_attr(&scopes[0], DW_AT_frame_base, &attr) != NULL && 291 dwarf_formblock(&attr, &block) == 0 && block.length == 1) { 292 switch (*block.data) { 293 case DW_OP_reg0 ... DW_OP_reg31: 294 fbreg = *block.data - DW_OP_reg0; 295 break; 296 case DW_OP_call_frame_cfa: 297 if (die_get_cfa(di->dbg, pc, &fbreg, 298 &fb_offset) < 0) 299 fbreg = -1; 300 break; 301 default: 302 break; 303 } 304 } 305 } 306 307 retry: 308 is_fbreg = (reg == fbreg); 309 if (is_fbreg) 310 offset = loc->offset - fb_offset; 311 312 /* Search from the inner-most scope to the outer */ 313 for (i = nr_scopes - 1; i >= 0; i--) { 314 if (reg == DWARF_REG_PC) { 315 if (!die_find_variable_by_addr(&scopes[i], pc, addr, 316 &var_die, &offset)) 317 continue; 318 } else { 319 /* Look up variables/parameters in this scope */ 320 if (!die_find_variable_by_reg(&scopes[i], pc, reg, 321 &offset, is_fbreg, &var_die)) 322 continue; 323 } 324 325 /* Found a variable, see if it's correct */ 326 ret = check_variable(&var_die, type_die, offset, 327 reg != DWARF_REG_PC && !is_fbreg); 328 loc->offset = offset; 329 goto out; 330 } 331 332 if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { 333 reg = loc->reg2; 334 goto retry; 335 } 336 337 if (ret < 0) 338 ann_data_stat.no_var++; 339 340 out: 341 free(scopes); 342 return ret; 343 } 344 345 /** 346 * find_data_type - Return a data type at the location 347 * @ms: map and symbol at the location 348 * @ip: instruction address of the memory access 349 * @loc: instruction operand location 350 * @addr: data address of the memory access 351 * @var_name: global variable name 352 * 353 * This functions searches the debug information of the binary to get the data 354 * type it accesses. The exact location is expressed by (@ip, reg, offset) 355 * for pointer variables or (@ip, @addr) for global variables. Note that global 356 * variables might update the @loc->offset after finding the start of the variable. 357 * If it cannot find a global variable by address, it tried to fine a declaration 358 * of the variable using @var_name. In that case, @loc->offset won't be updated. 359 * 360 * It return %NULL if not found. 361 */ 362 struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, 363 struct annotated_op_loc *loc, u64 addr, 364 const char *var_name) 365 { 366 struct annotated_data_type *result = NULL; 367 struct dso *dso = map__dso(ms->map); 368 struct debuginfo *di; 369 Dwarf_Die type_die; 370 u64 pc; 371 372 di = debuginfo__new(dso->long_name); 373 if (di == NULL) { 374 pr_debug("cannot get the debug info\n"); 375 return NULL; 376 } 377 378 /* 379 * IP is a relative instruction address from the start of the map, as 380 * it can be randomized/relocated, it needs to translate to PC which is 381 * a file address for DWARF processing. 382 */ 383 pc = map__rip_2objdump(ms->map, ip); 384 if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0) 385 goto out; 386 387 result = dso__findnew_data_type(dso, &type_die); 388 389 out: 390 debuginfo__delete(di); 391 return result; 392 } 393 394 static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) 395 { 396 int i; 397 size_t sz = sizeof(struct type_hist); 398 399 sz += sizeof(struct type_hist_entry) * adt->self.size; 400 401 /* Allocate a table of pointers for each event */ 402 adt->nr_histograms = nr_entries; 403 adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); 404 if (adt->histograms == NULL) 405 return -ENOMEM; 406 407 /* 408 * Each histogram is allocated for the whole size of the type. 409 * TODO: Probably we can move the histogram to members. 410 */ 411 for (i = 0; i < nr_entries; i++) { 412 adt->histograms[i] = zalloc(sz); 413 if (adt->histograms[i] == NULL) 414 goto err; 415 } 416 return 0; 417 418 err: 419 while (--i >= 0) 420 free(adt->histograms[i]); 421 free(adt->histograms); 422 return -ENOMEM; 423 } 424 425 static void delete_data_type_histograms(struct annotated_data_type *adt) 426 { 427 for (int i = 0; i < adt->nr_histograms; i++) 428 free(adt->histograms[i]); 429 free(adt->histograms); 430 } 431 432 void annotated_data_type__tree_delete(struct rb_root *root) 433 { 434 struct annotated_data_type *pos; 435 436 while (!RB_EMPTY_ROOT(root)) { 437 struct rb_node *node = rb_first(root); 438 439 rb_erase(node, root); 440 pos = rb_entry(node, struct annotated_data_type, node); 441 delete_members(&pos->self); 442 delete_data_type_histograms(pos); 443 free(pos->self.type_name); 444 free(pos); 445 } 446 } 447 448 /** 449 * annotated_data_type__update_samples - Update histogram 450 * @adt: Data type to update 451 * @evsel: Event to update 452 * @offset: Offset in the type 453 * @nr_samples: Number of samples at this offset 454 * @period: Event count at this offset 455 * 456 * This function updates type histogram at @ofs for @evsel. Samples are 457 * aggregated before calling this function so it can be called with more 458 * than one samples at a certain offset. 459 */ 460 int annotated_data_type__update_samples(struct annotated_data_type *adt, 461 struct evsel *evsel, int offset, 462 int nr_samples, u64 period) 463 { 464 struct type_hist *h; 465 466 if (adt == NULL) 467 return 0; 468 469 if (adt->histograms == NULL) { 470 int nr = evsel->evlist->core.nr_entries; 471 472 if (alloc_data_type_histograms(adt, nr) < 0) 473 return -1; 474 } 475 476 if (offset < 0 || offset >= adt->self.size) 477 return -1; 478 479 h = adt->histograms[evsel->core.idx]; 480 481 h->nr_samples += nr_samples; 482 h->addr[offset].nr_samples += nr_samples; 483 h->period += period; 484 h->addr[offset].period += period; 485 return 0; 486 } 487