1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2012 Jason King. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 /* 31 * Copyright 2020 Joyent, Inc. 32 * Copyright 2020 Robert Mustacchi 33 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 34 */ 35 36 /* 37 * CTF DWARF conversion theory. 38 * 39 * DWARF data contains a series of compilation units. Each compilation unit 40 * generally refers to an object file or what once was, in the case of linked 41 * binaries and shared objects. Each compilation unit has a series of what DWARF 42 * calls a DIE (Debugging Information Entry). The set of entries that we care 43 * about have type information stored in a series of attributes. Each DIE also 44 * has a tag that identifies the kind of attributes that it has. 45 * 46 * A given DIE may itself have children. For example, a DIE that represents a 47 * structure has children which represent members. Whenever we encounter a DIE 48 * that has children or other values or types associated with it, we recursively 49 * process those children first so that way we can then refer to the generated 50 * CTF type id while processing its parent. This reduces the amount of unknowns 51 * and fixups that we need. It also ensures that we don't accidentally add types 52 * that an overzealous compiler might add to the DWARF data but aren't used by 53 * anything in the system. 54 * 55 * Once we do a conversion, we store a mapping in an AVL tree that goes from the 56 * DWARF's die offset, which is relative to the given compilation unit, to a 57 * ctf_id_t. 58 * 59 * Unfortunately, some compilers actually will emit duplicate entries for a 60 * given type that look similar, but aren't quite. To that end, we go through 61 * and do a variant on a merge once we're done processing a single compilation 62 * unit which deduplicates all of the types that are in the unit. 63 * 64 * Finally, if we encounter an object that has multiple compilation units, then 65 * we'll convert all of the compilation units separately and then do a merge, so 66 * that way we can result in one single ctf_file_t that represents everything 67 * for the object. 68 * 69 * Conversion Steps 70 * ---------------- 71 * 72 * Because a given object we've been given to convert may have multiple 73 * compilation units, we break the work into two halves. The first half 74 * processes each compilation unit (potentially in parallel) and then the second 75 * half optionally merges all of the dies in the first half. First, we'll cover 76 * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers 77 * the work done in ctf_dwarf_convert_one(). 78 * 79 * An individual ctf_cu_t, which represents a compilation unit, is converted to 80 * CTF in a series of multiple passes. 81 * 82 * Pass 1: During the first pass we walk all of the top-level dies and if we 83 * find a function, variable, struct, union, enum or typedef, we recursively 84 * transform all of its types. We don't recurse or process everything, because 85 * we don't want to add some of the types that compilers may add which are 86 * effectively unused. 87 * 88 * During pass 1, if we encounter any structures or unions we mark them for 89 * fixing up later. This is necessary because we may not be able to determine 90 * the full size of a structure at the beginning of time. This will happen if 91 * the DWARF attribute DW_AT_byte_size is not present for a member. Because of 92 * this possibility we defer adding members to structures or even converting 93 * them during pass 1 and save that for pass 2. Adding all of the base 94 * structures without any of their members helps deal with any circular 95 * dependencies that we might encounter. 96 * 97 * Pass 2: This pass is used to do the first half of fixing up structures and 98 * unions. Rather than walk the entire type space again, we actually walk the 99 * list of structures and unions that we marked for later fixing up. Here, we 100 * iterate over every structure and add members to the underlying ctf_file_t, 101 * but not to the structs themselves. One might wonder why we don't, and the 102 * main reason is that libctf requires a ctf_update() be done before adding the 103 * members to structures or unions. 104 * 105 * Pass 3: This pass is used to do the second half of fixing up structures and 106 * unions. During this part we always go through and add members to structures 107 * and unions that we added to the container in the previous pass. In addition, 108 * we set the structure and union's actual size, which may have additional 109 * padding added by the compiler, it isn't simply the last offset. DWARF always 110 * guarantees an attribute exists for this. Importantly no ctf_id_t's change 111 * during pass 2. 112 * 113 * Pass 4: The next phase is to add CTF entries for all of the symbols and 114 * variables that are present in this die. During pass 1 we added entries to a 115 * map for each variable and function. During this pass, we iterate over the 116 * symbol table and when we encounter a symbol that we have in our lists of 117 * translated information which matches, we then add it to the ctf_file_t. 118 * 119 * Pass 5: Here we go and look for any weak symbols and functions and see if 120 * they match anything that we recognize. If so, then we add type information 121 * for them at this point based on the matching type. 122 * 123 * Pass 6: This pass is actually a variant on a merge. The traditional merge 124 * process expects there to be no duplicate types. As such, at the end of 125 * conversion, we do a dedup on all of the types in the system. The 126 * deduplication process is described in lib/libctf/common/ctf_merge.c. 127 * 128 * Once pass 6 is done, we've finished processing the individual compilation 129 * unit. 130 * 131 * The following steps reflect the general process of doing a conversion. 132 * 133 * 1) Walk the dwarf section and determine the number of compilation units 134 * 2) Create a ctf_cu_t for each compilation unit 135 * 3) Add all ctf_cu_t's to a workq 136 * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself 137 * is comprised of several steps, which were already enumerated. 138 * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics 139 * of the merge are discussed in lib/libctf/common/ctf_merge.c. 140 * 6) Free everything up and return a ctf_file_t to the user. If we only had a 141 * single compilation unit, then we give that to the user. Otherwise, we 142 * return the merged ctf_file_t. 143 * 144 * Threading 145 * --------- 146 * 147 * The process has been designed to be amenable to threading. Each compilation 148 * unit has its own type stream, therefore the logical place to divide and 149 * conquer is at the compilation unit. Each ctf_cu_t has been built to be able 150 * to be processed independently of the others. It has its own libdwarf handle, 151 * as a given libdwarf handle may only be used by a single thread at a time. 152 * This allows the various ctf_cu_t's to be processed in parallel by different 153 * threads. 154 * 155 * All of the ctf_cu_t's are loaded into a workq which allows for a number of 156 * threads to be specified and used as a thread pool to process all of the 157 * queued work. We set the number of threads to use in the workq equal to the 158 * number of threads that the user has specified. 159 * 160 * After all of the compilation units have been drained, we use the same number 161 * of threads when performing a merge of multiple compilation units, if they 162 * exist. 163 * 164 * While all of these different parts do support and allow for multiple threads, 165 * it's important that when only a single thread is specified, that it be the 166 * calling thread. This allows the conversion routines to be used in a context 167 * that doesn't allow additional threads, such as rtld. 168 * 169 * Common DWARF Mechanics and Notes 170 * -------------------------------- 171 * 172 * At this time, we really only support DWARFv2, though support for DWARFv4 is 173 * mostly there. There is no intent to support DWARFv3. 174 * 175 * Generally types for something are stored in the DW_AT_type attribute. For 176 * example, a function's return type will be stored in the local DW_AT_type 177 * attribute while the arguments will be in child DIEs. There are also various 178 * times when we don't have any DW_AT_type. In that case, the lack of a type 179 * implies, at least for C, that its C type is void. Because DWARF doesn't emit 180 * one, we have a synthetic void type that we create and manipulate instead and 181 * pass it off to consumers on an as-needed basis. If nothing has a void type, 182 * it will not be emitted. 183 * 184 * Architecture Specific Parts 185 * --------------------------- 186 * 187 * The CTF tooling encodes various information about the various architectures 188 * in the system. Importantly, the tool assumes that every architecture has a 189 * data model where long and pointer are the same size. This is currently the 190 * case, as the two data models illumos supports are ILP32 and LP64. 191 * 192 * In addition, we encode the mapping of various floating point sizes to various 193 * types for each architecture. If a new architecture is being added, it should 194 * be added to the list. The general design of the ctf conversion tools is to be 195 * architecture independent. eg. any of the tools here should be able to convert 196 * any architecture's DWARF into ctf; however, this has not been rigorously 197 * tested and more importantly, the ctf routines don't currently write out the 198 * data in an endian-aware form, they only use that of the currently running 199 * library. 200 */ 201 202 #include <libctf_impl.h> 203 #include <sys/avl.h> 204 #include <sys/debug.h> 205 #include <gelf.h> 206 #include <libdwarf.h> 207 #include <dwarf.h> 208 #include <libgen.h> 209 #include <workq.h> 210 #include <thread.h> 211 #include <macros.h> 212 #include <errno.h> 213 214 #define DWARF_VERSION_TWO 2 215 #define DWARF_VERSION_FOUR 4 216 #define DWARF_VARARGS_NAME "..." 217 218 /* 219 * Dwarf may refer recursively to other types that we've already processed. To 220 * see if we've already converted them, we look them up in an AVL tree that's 221 * sorted by the DWARF id. 222 */ 223 typedef struct ctf_dwmap { 224 avl_node_t cdm_avl; 225 Dwarf_Off cdm_off; 226 Dwarf_Die cdm_die; 227 ctf_id_t cdm_id; 228 boolean_t cdm_fix; 229 } ctf_dwmap_t; 230 231 typedef struct ctf_dwvar { 232 ctf_list_t cdv_list; 233 char *cdv_name; 234 ctf_id_t cdv_type; 235 boolean_t cdv_global; 236 } ctf_dwvar_t; 237 238 typedef struct ctf_dwfunc { 239 ctf_list_t cdf_list; 240 char *cdf_name; 241 ctf_funcinfo_t cdf_fip; 242 ctf_id_t *cdf_argv; 243 boolean_t cdf_global; 244 } ctf_dwfunc_t; 245 246 typedef struct ctf_dwbitf { 247 ctf_list_t cdb_list; 248 ctf_id_t cdb_base; 249 uint_t cdb_nbits; 250 ctf_id_t cdb_id; 251 } ctf_dwbitf_t; 252 253 /* 254 * The ctf_cu_t represents a single top-level DWARF die unit. While generally, 255 * the typical object file has only a single die, if we're asked to convert 256 * something that's been linked from multiple sources, multiple dies will exist. 257 */ 258 typedef struct ctf_die { 259 Elf *cu_elf; /* shared libelf handle */ 260 int cu_fd; /* shared file descriptor */ 261 char *cu_name; /* basename of the DIE */ 262 ctf_merge_t *cu_cmh; /* merge handle */ 263 ctf_list_t cu_vars; /* List of variables */ 264 ctf_list_t cu_funcs; /* List of functions */ 265 ctf_list_t cu_bitfields; /* Bit field members */ 266 Dwarf_Debug cu_dwarf; /* libdwarf handle */ 267 mutex_t *cu_dwlock; /* libdwarf lock */ 268 Dwarf_Die cu_cu; /* libdwarf compilation unit */ 269 Dwarf_Off cu_cuoff; /* cu's offset */ 270 Dwarf_Off cu_maxoff; /* maximum offset */ 271 Dwarf_Half cu_vers; /* Dwarf Version */ 272 Dwarf_Half cu_addrsz; /* Dwarf Address Size */ 273 ctf_file_t *cu_ctfp; /* output CTF file */ 274 avl_tree_t cu_map; /* map die offsets to CTF types */ 275 char *cu_errbuf; /* error message buffer */ 276 size_t cu_errlen; /* error message buffer length */ 277 size_t cu_ptrsz; /* object's pointer size */ 278 boolean_t cu_bigend; /* is it big endian */ 279 boolean_t cu_doweaks; /* should we convert weak symbols? */ 280 uint_t cu_mach; /* machine type */ 281 ctf_id_t cu_voidtid; /* void pointer */ 282 ctf_id_t cu_longtid; /* id for a 'long' */ 283 } ctf_cu_t; 284 285 static int ctf_dwarf_init_die(ctf_cu_t *); 286 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *); 287 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die); 288 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int); 289 290 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *, 291 boolean_t); 292 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *, 293 ctf_id_t *); 294 295 #define DWARF_LOCK(cup) \ 296 if ((cup)->cu_dwlock != NULL) \ 297 mutex_enter((cup)->cu_dwlock) 298 #define DWARF_UNLOCK(cup) \ 299 if ((cup)->cu_dwlock != NULL) \ 300 mutex_exit((cup)->cu_dwlock) 301 302 /* 303 * This is a generic way to set a CTF Conversion backend error depending on what 304 * we were doing. Unless it was one of a specific set of errors that don't 305 * indicate a programming / translation bug, eg. ENOMEM, then we transform it 306 * into a CTF backend error and fill in the error buffer. 307 */ 308 static int 309 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...) 310 { 311 va_list ap; 312 int ret; 313 size_t off = 0; 314 ssize_t rem = cup->cu_errlen; 315 if (cfp != NULL) 316 err = ctf_errno(cfp); 317 318 if (err == ENOMEM) 319 return (err); 320 321 ret = snprintf(cup->cu_errbuf, rem, "die %s: ", 322 cup->cu_name != NULL ? cup->cu_name : "NULL"); 323 if (ret < 0) 324 goto err; 325 off += ret; 326 rem = MAX(rem - ret, 0); 327 328 va_start(ap, fmt); 329 ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap); 330 va_end(ap); 331 if (ret < 0) 332 goto err; 333 334 off += ret; 335 rem = MAX(rem - ret, 0); 336 if (fmt[strlen(fmt) - 1] != '\n') { 337 (void) snprintf(cup->cu_errbuf + off, rem, 338 ": %s\n", ctf_errmsg(err)); 339 } 340 va_end(ap); 341 return (ECTF_CONVBKERR); 342 343 err: 344 cup->cu_errbuf[0] = '\0'; 345 return (ECTF_CONVBKERR); 346 } 347 348 /* 349 * DWARF often opts to put no explicit type to describe a void type. eg. if we 350 * have a reference type whose DW_AT_type member doesn't exist, then we should 351 * instead assume it points to void. Because this isn't represented, we 352 * instead cause it to come into existence. 353 */ 354 static ctf_id_t 355 ctf_dwarf_void(ctf_cu_t *cup) 356 { 357 if (cup->cu_voidtid == CTF_ERR) { 358 ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 }; 359 cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT, 360 "void", &enc); 361 if (cup->cu_voidtid == CTF_ERR) { 362 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 363 "failed to create void type: %s\n", 364 ctf_errmsg(ctf_errno(cup->cu_ctfp))); 365 } 366 } 367 368 return (cup->cu_voidtid); 369 } 370 371 /* 372 * There are many different forms that an array index may take. However, we just 373 * always force it to be of a type long no matter what. Therefore we use this to 374 * have a single instance of long across everything. 375 */ 376 static ctf_id_t 377 ctf_dwarf_long(ctf_cu_t *cup) 378 { 379 if (cup->cu_longtid == CTF_ERR) { 380 ctf_encoding_t enc; 381 382 enc.cte_format = CTF_INT_SIGNED; 383 enc.cte_offset = 0; 384 /* All illumos systems are LP */ 385 enc.cte_bits = cup->cu_ptrsz * 8; 386 cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT, 387 "long", &enc); 388 if (cup->cu_longtid == CTF_ERR) { 389 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 390 "failed to create long type: %s\n", 391 ctf_errmsg(ctf_errno(cup->cu_ctfp))); 392 } 393 394 } 395 396 return (cup->cu_longtid); 397 } 398 399 static int 400 ctf_dwmap_comp(const void *a, const void *b) 401 { 402 const ctf_dwmap_t *ca = a; 403 const ctf_dwmap_t *cb = b; 404 405 if (ca->cdm_off > cb->cdm_off) 406 return (1); 407 if (ca->cdm_off < cb->cdm_off) 408 return (-1); 409 return (0); 410 } 411 412 static int 413 ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix) 414 { 415 int ret; 416 avl_index_t index; 417 ctf_dwmap_t *dwmap; 418 Dwarf_Off off; 419 420 VERIFY(id > 0 && id < CTF_MAX_TYPE); 421 422 if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0) 423 return (ret); 424 425 if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL) 426 return (ENOMEM); 427 428 dwmap->cdm_die = die; 429 dwmap->cdm_off = off; 430 dwmap->cdm_id = id; 431 dwmap->cdm_fix = fix; 432 433 ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id); 434 VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL); 435 avl_insert(&cup->cu_map, dwmap, index); 436 return (0); 437 } 438 439 static int 440 ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, 441 Dwarf_Attribute *attrp) 442 { 443 int ret; 444 Dwarf_Error derr; 445 446 DWARF_LOCK(cup); 447 ret = dwarf_attr(die, name, attrp, &derr); 448 DWARF_UNLOCK(cup); 449 if (ret == DW_DLV_OK) 450 return (0); 451 if (ret == DW_DLV_NO_ENTRY) { 452 *attrp = NULL; 453 return (ENOENT); 454 } 455 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 456 "failed to get attribute for type: %s\n", 457 dwarf_errmsg(derr)); 458 return (ECTF_CONVBKERR); 459 } 460 461 static void 462 ctf_dwarf_dealloc(ctf_cu_t *cup, Dwarf_Ptr ptr, Dwarf_Unsigned type) 463 { 464 DWARF_LOCK(cup); 465 dwarf_dealloc(cup->cu_dwarf, ptr, type); 466 DWARF_UNLOCK(cup); 467 } 468 469 static int 470 ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp) 471 { 472 int ret; 473 Dwarf_Attribute attr; 474 Dwarf_Error derr; 475 476 if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) 477 return (ret); 478 479 DWARF_LOCK(cup); 480 ret = dwarf_formref(attr, refp, &derr); 481 DWARF_UNLOCK(cup); 482 if (ret == DW_DLV_OK) { 483 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 484 return (0); 485 } 486 487 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 488 "failed to get unsigned attribute for type: %s\n", 489 dwarf_errmsg(derr)); 490 return (ECTF_CONVBKERR); 491 } 492 493 static int 494 ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, 495 Dwarf_Die *diep) 496 { 497 int ret; 498 Dwarf_Off off; 499 Dwarf_Error derr; 500 501 if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0) 502 return (ret); 503 504 off += cup->cu_cuoff; 505 DWARF_LOCK(cup); 506 ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr); 507 DWARF_UNLOCK(cup); 508 if (ret != DW_DLV_OK) { 509 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 510 "failed to get die from offset %" DW_PR_DUu ": %s\n", 511 off, dwarf_errmsg(derr)); 512 return (ECTF_CONVBKERR); 513 } 514 515 return (0); 516 } 517 518 static int 519 ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, 520 Dwarf_Signed *valp) 521 { 522 int ret; 523 Dwarf_Attribute attr; 524 Dwarf_Error derr; 525 526 if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) 527 return (ret); 528 529 DWARF_LOCK(cup); 530 ret = dwarf_formsdata(attr, valp, &derr); 531 DWARF_UNLOCK(cup); 532 if (ret == DW_DLV_OK) { 533 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 534 return (0); 535 } 536 537 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 538 "failed to get unsigned attribute for type: %s\n", 539 dwarf_errmsg(derr)); 540 return (ECTF_CONVBKERR); 541 } 542 543 static int 544 ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, 545 Dwarf_Unsigned *valp) 546 { 547 int ret; 548 Dwarf_Attribute attr; 549 Dwarf_Error derr; 550 551 if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) 552 return (ret); 553 554 DWARF_LOCK(cup); 555 ret = dwarf_formudata(attr, valp, &derr); 556 DWARF_UNLOCK(cup); 557 if (ret == DW_DLV_OK) { 558 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 559 return (0); 560 } 561 562 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 563 "failed to get unsigned attribute for type: %s\n", 564 dwarf_errmsg(derr)); 565 return (ECTF_CONVBKERR); 566 } 567 568 static int 569 ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, 570 Dwarf_Bool *val) 571 { 572 int ret; 573 Dwarf_Attribute attr; 574 Dwarf_Error derr; 575 576 if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) 577 return (ret); 578 579 DWARF_LOCK(cup); 580 ret = dwarf_formflag(attr, val, &derr); 581 DWARF_UNLOCK(cup); 582 if (ret == DW_DLV_OK) { 583 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 584 return (0); 585 } 586 587 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 588 "failed to get boolean attribute for type: %s\n", 589 dwarf_errmsg(derr)); 590 591 return (ECTF_CONVBKERR); 592 } 593 594 static int 595 ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp) 596 { 597 int ret; 598 char *s; 599 Dwarf_Attribute attr; 600 Dwarf_Error derr; 601 602 *strp = NULL; 603 if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0) 604 return (ret); 605 606 DWARF_LOCK(cup); 607 ret = dwarf_formstring(attr, &s, &derr); 608 DWARF_UNLOCK(cup); 609 if (ret == DW_DLV_OK) { 610 if ((*strp = ctf_strdup(s)) == NULL) 611 ret = ENOMEM; 612 else 613 ret = 0; 614 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 615 return (ret); 616 } 617 618 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 619 "failed to get string attribute for type: %s\n", 620 dwarf_errmsg(derr)); 621 return (ECTF_CONVBKERR); 622 } 623 624 /* 625 * The encoding of a DW_AT_data_member_location has changed between different 626 * revisions of the specification. It may be a general udata form or it may be 627 * location data information. In DWARF 2, it is only the latter. In later 628 * revisions of the spec, it may be either. To determine the form, we ask the 629 * class, which will be of type CONSTANT. 630 */ 631 static int 632 ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp) 633 { 634 int ret; 635 Dwarf_Error derr; 636 Dwarf_Attribute attr; 637 Dwarf_Locdesc *loc; 638 Dwarf_Signed locnum; 639 Dwarf_Half form; 640 enum Dwarf_Form_Class class; 641 642 if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location, 643 &attr)) != 0) { 644 return (ret); 645 } 646 647 DWARF_LOCK(cup); 648 ret = dwarf_whatform(attr, &form, &derr); 649 DWARF_UNLOCK(cup); 650 if (ret != DW_DLV_OK) { 651 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 652 "failed to get dwarf attribute for for member location: %s", 653 dwarf_errmsg(derr)); 654 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 655 return (ECTF_CONVBKERR); 656 } 657 658 DWARF_LOCK(cup); 659 class = dwarf_get_form_class(cup->cu_vers, DW_AT_data_member_location, 660 cup->cu_addrsz, form); 661 if (class == DW_FORM_CLASS_CONSTANT) { 662 Dwarf_Signed sign; 663 664 /* 665 * We have a constant. We need to try to get both this as signed 666 * and unsigned data, as unfortunately, DWARF doesn't define the 667 * sign. Which is a joy. We try unsigned first. If neither 668 * match, fall through to the normal path. 669 */ 670 if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) { 671 DWARF_UNLOCK(cup); 672 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 673 return (0); 674 } 675 676 if (dwarf_formsdata(attr, &sign, &derr) == DW_DLV_OK) { 677 DWARF_UNLOCK(cup); 678 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 679 if (sign < 0) { 680 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 681 "encountered negative member data " 682 "location: %d", sign); 683 } 684 *valp = (Dwarf_Unsigned)sign; 685 return (0); 686 } 687 } 688 689 if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) { 690 DWARF_UNLOCK(cup); 691 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 692 "failed to obtain location list for member offset: %s", 693 dwarf_errmsg(derr)); 694 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 695 return (ECTF_CONVBKERR); 696 } 697 DWARF_UNLOCK(cup); 698 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 699 700 if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) { 701 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 702 "failed to parse location structure for member"); 703 ctf_dwarf_dealloc(cup, loc->ld_s, DW_DLA_LOC_BLOCK); 704 ctf_dwarf_dealloc(cup, loc, DW_DLA_LOCDESC); 705 return (ECTF_CONVBKERR); 706 } 707 708 *valp = loc->ld_s->lr_number; 709 710 ctf_dwarf_dealloc(cup, loc->ld_s, DW_DLA_LOC_BLOCK); 711 ctf_dwarf_dealloc(cup, loc, DW_DLA_LOCDESC); 712 return (0); 713 } 714 715 716 static int 717 ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp) 718 { 719 Dwarf_Error derr; 720 int ret; 721 722 DWARF_LOCK(cup); 723 ret = dwarf_dieoffset(die, offsetp, &derr); 724 DWARF_UNLOCK(cup); 725 if (ret == DW_DLV_OK) 726 return (0); 727 728 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 729 "failed to get die offset: %s\n", 730 dwarf_errmsg(derr)); 731 return (ECTF_CONVBKERR); 732 } 733 734 /* simpler variant for debugging output */ 735 static Dwarf_Off 736 ctf_die_offset(ctf_cu_t *cup, Dwarf_Die die) 737 { 738 Dwarf_Off off = -1; 739 Dwarf_Error derr; 740 741 DWARF_LOCK(cup); 742 (void) dwarf_dieoffset(die, &off, &derr); 743 DWARF_UNLOCK(cup); 744 return (off); 745 } 746 747 static int 748 ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp) 749 { 750 Dwarf_Error derr; 751 int ret; 752 753 DWARF_LOCK(cup); 754 ret = dwarf_tag(die, tagp, &derr); 755 DWARF_UNLOCK(cup); 756 if (ret == DW_DLV_OK) 757 return (0); 758 759 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 760 "failed to get tag type: %s\n", 761 dwarf_errmsg(derr)); 762 return (ECTF_CONVBKERR); 763 } 764 765 static int 766 ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp) 767 { 768 Dwarf_Error derr; 769 int ret; 770 771 *sibp = NULL; 772 DWARF_LOCK(cup); 773 ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr); 774 DWARF_UNLOCK(cup); 775 if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY) 776 return (0); 777 778 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 779 "failed to sibling from die: %s\n", 780 dwarf_errmsg(derr)); 781 return (ECTF_CONVBKERR); 782 } 783 784 static int 785 ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp) 786 { 787 Dwarf_Error derr; 788 int ret; 789 790 *childp = NULL; 791 DWARF_LOCK(cup); 792 ret = dwarf_child(base, childp, &derr); 793 DWARF_UNLOCK(cup); 794 if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY) 795 return (0); 796 797 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 798 "failed to child from die: %s\n", 799 dwarf_errmsg(derr)); 800 return (ECTF_CONVBKERR); 801 } 802 803 /* 804 * Compilers disagree on what to do to determine if something has global 805 * visiblity. Traditionally gcc has used DW_AT_external to indicate this while 806 * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then 807 * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not. 808 */ 809 static int 810 ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp) 811 { 812 int ret; 813 Dwarf_Signed vis; 814 Dwarf_Bool ext; 815 816 if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) { 817 *igp = vis == DW_VIS_exported; 818 return (0); 819 } else if (ret != ENOENT) { 820 return (ret); 821 } 822 823 if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) { 824 if (ret == ENOENT) { 825 *igp = B_FALSE; 826 return (0); 827 } 828 return (ret); 829 } 830 *igp = ext != 0 ? B_TRUE : B_FALSE; 831 return (0); 832 } 833 834 static int 835 ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen) 836 { 837 GElf_Ehdr ehdr; 838 839 if (gelf_getehdr(elf, &ehdr) == NULL) { 840 (void) snprintf(errbuf, errlen, 841 "failed to get ELF header: %s\n", 842 elf_errmsg(elf_errno())); 843 return (ECTF_CONVBKERR); 844 } 845 846 cup->cu_mach = ehdr.e_machine; 847 848 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 849 cup->cu_ptrsz = 4; 850 VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0); 851 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 852 cup->cu_ptrsz = 8; 853 VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0); 854 } else { 855 (void) snprintf(errbuf, errlen, 856 "unknown ELF class %d", ehdr.e_ident[EI_CLASS]); 857 return (ECTF_CONVBKERR); 858 } 859 860 if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) { 861 cup->cu_bigend = B_FALSE; 862 } else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { 863 cup->cu_bigend = B_TRUE; 864 } else { 865 (void) snprintf(errbuf, errlen, 866 "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]); 867 return (ECTF_CONVBKERR); 868 } 869 870 return (0); 871 } 872 873 typedef struct ctf_dwarf_fpent { 874 size_t cdfe_size; 875 uint_t cdfe_enc[3]; 876 } ctf_dwarf_fpent_t; 877 878 typedef struct ctf_dwarf_fpmap { 879 uint_t cdf_mach; 880 ctf_dwarf_fpent_t cdf_ents[4]; 881 } ctf_dwarf_fpmap_t; 882 883 static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = { 884 { EM_SPARC, { 885 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, 886 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, 887 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, 888 { 0, { 0 } } 889 } }, 890 { EM_SPARC32PLUS, { 891 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, 892 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, 893 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, 894 { 0, { 0 } } 895 } }, 896 { EM_SPARCV9, { 897 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, 898 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, 899 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, 900 { 0, { 0 } } 901 } }, 902 { EM_386, { 903 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, 904 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, 905 { 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, 906 { 0, { 0 } } 907 } }, 908 { EM_X86_64, { 909 { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } }, 910 { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } }, 911 { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } }, 912 { 0, { 0 } } 913 } }, 914 { EM_NONE } 915 }; 916 917 /* 918 * We want to normalize the type names that are used between compilers in the 919 * case of complex. gcc prefixes things with types like 'long complex' where as 920 * clang only calls them 'complex' in the dwarf even if in the C they are long 921 * complex or similar. 922 */ 923 static int 924 ctf_dwarf_fixup_complex(ctf_cu_t *cup, ctf_encoding_t *enc, char **namep) 925 { 926 const char *name; 927 *namep = NULL; 928 929 switch (enc->cte_format) { 930 case CTF_FP_CPLX: 931 name = "complex float"; 932 break; 933 case CTF_FP_DCPLX: 934 name = "complex double"; 935 break; 936 case CTF_FP_LDCPLX: 937 name = "complex long double"; 938 break; 939 default: 940 return (0); 941 } 942 943 *namep = ctf_strdup(name); 944 if (*namep == NULL) { 945 return (ENOMEM); 946 } 947 948 return (0); 949 } 950 951 static int 952 ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc) 953 { 954 const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0]; 955 const ctf_dwarf_fpent_t *ent; 956 uint_t col = 0, mult = 1; 957 958 for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) { 959 if (map->cdf_mach == cup->cu_mach) 960 break; 961 } 962 963 if (map->cdf_mach == EM_NONE) { 964 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 965 "Unsupported machine type: %d\n", cup->cu_mach); 966 return (ENOTSUP); 967 } 968 969 if (type == DW_ATE_complex_float) { 970 mult = 2; 971 col = 1; 972 } else if (type == DW_ATE_imaginary_float || 973 type == DW_ATE_SUN_imaginary_float) { 974 col = 2; 975 } 976 977 ent = &map->cdf_ents[0]; 978 for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) { 979 if (ent->cdfe_size * mult * 8 == enc->cte_bits) { 980 enc->cte_format = ent->cdfe_enc[col]; 981 return (0); 982 } 983 } 984 985 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 986 "failed to find valid fp mapping for encoding %d, size %d bits\n", 987 type, enc->cte_bits); 988 return (EINVAL); 989 } 990 991 static int 992 ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp, 993 ctf_encoding_t *enc) 994 { 995 int ret; 996 Dwarf_Signed type; 997 998 if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0) 999 return (ret); 1000 1001 switch (type) { 1002 case DW_ATE_unsigned: 1003 case DW_ATE_address: 1004 *kindp = CTF_K_INTEGER; 1005 enc->cte_format = 0; 1006 break; 1007 case DW_ATE_unsigned_char: 1008 *kindp = CTF_K_INTEGER; 1009 enc->cte_format = CTF_INT_CHAR; 1010 break; 1011 case DW_ATE_signed: 1012 *kindp = CTF_K_INTEGER; 1013 enc->cte_format = CTF_INT_SIGNED; 1014 break; 1015 case DW_ATE_signed_char: 1016 *kindp = CTF_K_INTEGER; 1017 enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR; 1018 break; 1019 case DW_ATE_boolean: 1020 *kindp = CTF_K_INTEGER; 1021 enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL; 1022 break; 1023 case DW_ATE_float: 1024 case DW_ATE_complex_float: 1025 case DW_ATE_imaginary_float: 1026 case DW_ATE_SUN_imaginary_float: 1027 case DW_ATE_SUN_interval_float: 1028 *kindp = CTF_K_FLOAT; 1029 if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0) 1030 return (ret); 1031 break; 1032 default: 1033 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1034 "encountered unknown DWARF encoding: %d", type); 1035 return (ECTF_CONVBKERR); 1036 } 1037 1038 return (0); 1039 } 1040 1041 /* 1042 * Different compilers (at least GCC and Studio) use different names for types. 1043 * This parses the types and attempts to unify them. If this fails, we just fall 1044 * back to using the DWARF itself. 1045 */ 1046 static int 1047 ctf_dwarf_parse_int(const char *name, int *kindp, ctf_encoding_t *enc, 1048 char **newnamep) 1049 { 1050 char buf[256]; 1051 char *base, *c, *last; 1052 int nlong = 0, nshort = 0, nchar = 0, nint = 0; 1053 int sign = 1; 1054 1055 if (strlen(name) + 1 > sizeof (buf)) 1056 return (EINVAL); 1057 1058 (void) strlcpy(buf, name, sizeof (buf)); 1059 for (c = strtok_r(buf, " ", &last); c != NULL; 1060 c = strtok_r(NULL, " ", &last)) { 1061 if (strcmp(c, "signed") == 0) { 1062 sign = 1; 1063 } else if (strcmp(c, "unsigned") == 0) { 1064 sign = 0; 1065 } else if (strcmp(c, "long") == 0) { 1066 nlong++; 1067 } else if (strcmp(c, "char") == 0) { 1068 nchar++; 1069 } else if (strcmp(c, "short") == 0) { 1070 nshort++; 1071 } else if (strcmp(c, "int") == 0) { 1072 nint++; 1073 } else { 1074 /* 1075 * If we don't recognize any of the tokens, we'll tell 1076 * the caller to fall back to the dwarf-provided 1077 * encoding information. 1078 */ 1079 return (EINVAL); 1080 } 1081 } 1082 1083 if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2) 1084 return (EINVAL); 1085 1086 if (nchar > 0) { 1087 if (nlong > 0 || nshort > 0 || nint > 0) 1088 return (EINVAL); 1089 base = "char"; 1090 } else if (nshort > 0) { 1091 if (nlong > 0) 1092 return (EINVAL); 1093 base = "short"; 1094 } else if (nlong > 0) { 1095 base = "long"; 1096 } else { 1097 base = "int"; 1098 } 1099 1100 if (nchar > 0) 1101 enc->cte_format = CTF_INT_CHAR; 1102 else 1103 enc->cte_format = 0; 1104 1105 if (sign > 0) 1106 enc->cte_format |= CTF_INT_SIGNED; 1107 1108 (void) snprintf(buf, sizeof (buf), "%s%s%s", 1109 (sign ? "" : "unsigned "), 1110 (nlong > 1 ? "long " : ""), 1111 base); 1112 1113 *newnamep = ctf_strdup(buf); 1114 if (*newnamep == NULL) 1115 return (ENOMEM); 1116 *kindp = CTF_K_INTEGER; 1117 return (0); 1118 } 1119 1120 static int 1121 ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot, 1122 Dwarf_Off off) 1123 { 1124 int ret; 1125 char *name, *nname = NULL; 1126 Dwarf_Unsigned sz; 1127 int kind; 1128 ctf_encoding_t enc; 1129 ctf_id_t id; 1130 1131 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) 1132 return (ret); 1133 if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) { 1134 goto out; 1135 } 1136 ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name, 1137 off, sz); 1138 1139 bzero(&enc, sizeof (ctf_encoding_t)); 1140 enc.cte_bits = sz * 8; 1141 if ((ret = ctf_dwarf_parse_int(name, &kind, &enc, &nname)) == 0) { 1142 ctf_free(name, strlen(name) + 1); 1143 name = nname; 1144 } else { 1145 if (ret != EINVAL) { 1146 goto out; 1147 } 1148 ctf_dprintf("falling back to dwarf for base type %s\n", name); 1149 if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0) { 1150 goto out; 1151 } 1152 1153 if (kind == CTF_K_FLOAT && (ret = ctf_dwarf_fixup_complex(cup, 1154 &enc, &nname)) != 0) { 1155 goto out; 1156 } else if (nname != NULL) { 1157 ctf_free(name, strlen(name) + 1); 1158 name = nname; 1159 } 1160 } 1161 1162 id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind); 1163 if (id == CTF_ERR) { 1164 ret = ctf_errno(cup->cu_ctfp); 1165 } else { 1166 *idp = id; 1167 ret = ctf_dwmap_add(cup, id, die, B_FALSE); 1168 } 1169 out: 1170 ctf_free(name, strlen(name) + 1); 1171 return (ret); 1172 } 1173 1174 /* 1175 * Getting a member's offset is a surprisingly intricate dance. It works as 1176 * follows: 1177 * 1178 * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we 1179 * have a DW_AT_data_member_location. We won't have both. Thus we check first 1180 * for DW_AT_data_bit_offset, and if it exists, we're set. 1181 * 1182 * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then 1183 * we have to grab the data location and use the following dance: 1184 * 1185 * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size. 1186 * Of course, the DW_AT_byte_size may be omitted, even though it isn't always. 1187 * When it's been omitted, we then have to say that the size is that of the 1188 * underlying type, which forces that to be after a ctf_update(). Here, we have 1189 * to do different things based on whether or not we're using big endian or 1190 * little endian to obtain the proper offset. 1191 */ 1192 static int 1193 ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid, 1194 ulong_t *offp) 1195 { 1196 int ret; 1197 Dwarf_Unsigned loc, bitsz, bytesz; 1198 Dwarf_Signed bitoff; 1199 size_t off; 1200 ssize_t tsz; 1201 1202 if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset, 1203 &loc)) == 0) { 1204 *offp = loc; 1205 return (0); 1206 } else if (ret != ENOENT) { 1207 return (ret); 1208 } 1209 1210 if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0) 1211 return (ret); 1212 off = loc * 8; 1213 1214 if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset, 1215 &bitoff)) != 0) { 1216 if (ret != ENOENT) 1217 return (ret); 1218 *offp = off; 1219 return (0); 1220 } 1221 1222 /* At this point we have to have DW_AT_bit_size */ 1223 if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) 1224 return (ret); 1225 1226 if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, 1227 &bytesz)) != 0) { 1228 if (ret != ENOENT) 1229 return (ret); 1230 if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) { 1231 int e = ctf_errno(cup->cu_ctfp); 1232 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1233 "failed to get type size: %s", ctf_errmsg(e)); 1234 return (ECTF_CONVBKERR); 1235 } 1236 } else { 1237 tsz = bytesz; 1238 } 1239 tsz *= 8; 1240 if (cup->cu_bigend == B_TRUE) { 1241 *offp = off + bitoff; 1242 } else { 1243 *offp = off + tsz - bitoff - bitsz; 1244 } 1245 1246 return (0); 1247 } 1248 1249 /* 1250 * We need to determine if the member in question is a bitfield. If it is, then 1251 * we need to go through and create a new type that's based on the actual base 1252 * type, but has a different size. We also rename the type as a result to help 1253 * deal with future collisions. 1254 * 1255 * Here we need to look and see if we have a DW_AT_bit_size value. If we have a 1256 * bit size member and it does not equal the byte size member, then we need to 1257 * create a bitfield type based on this. 1258 * 1259 * Note: When we support DWARFv4, there may be a chance that we need to also 1260 * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member. 1261 */ 1262 static int 1263 ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp) 1264 { 1265 int ret; 1266 Dwarf_Unsigned bitsz; 1267 ctf_encoding_t e; 1268 ctf_dwbitf_t *cdb; 1269 ctf_dtdef_t *dtd; 1270 ctf_id_t base = *idp; 1271 int kind; 1272 1273 if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) { 1274 if (ret == ENOENT) 1275 return (0); 1276 return (ret); 1277 } 1278 1279 ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz); 1280 /* 1281 * Given that we now have a bitsize, time to go do something about it. 1282 * We're going to create a new type based on the current one, but first 1283 * we need to find the base type. This means we need to traverse any 1284 * typedef's, consts, and volatiles until we get to what should be 1285 * something of type integer or enumeration. 1286 */ 1287 VERIFY(bitsz < UINT32_MAX); 1288 dtd = ctf_dtd_lookup(cup->cu_ctfp, base); 1289 VERIFY(dtd != NULL); 1290 kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info); 1291 while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST || 1292 kind == CTF_K_VOLATILE) { 1293 dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type); 1294 VERIFY(dtd != NULL); 1295 kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info); 1296 } 1297 ctf_dprintf("got kind %d\n", kind); 1298 VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM); 1299 1300 /* 1301 * As surprising as it may be, it is strictly possible to create a 1302 * bitfield that is based on an enum. Of course, the C standard leaves 1303 * enums sizing as an ABI concern more or less. To that effect, today on 1304 * all illumos platforms the size of an enum is generally that of an 1305 * int as our supported data models and ABIs all agree on that. So what 1306 * we'll do is fake up a CTF encoding here to use. In this case, we'll 1307 * treat it as an unsigned value of whatever size the underlying enum 1308 * currently has (which is in the ctt_size member of its dynamic type 1309 * data). 1310 */ 1311 if (kind == CTF_K_INTEGER) { 1312 e = dtd->dtd_u.dtu_enc; 1313 } else { 1314 bzero(&e, sizeof (ctf_encoding_t)); 1315 e.cte_bits = dtd->dtd_data.ctt_size * NBBY; 1316 } 1317 1318 for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; 1319 cdb = ctf_list_next(cdb)) { 1320 if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz) 1321 break; 1322 } 1323 1324 /* 1325 * Create a new type if none exists. We name all types in a way that is 1326 * guaranteed not to conflict with the corresponding C type. We do this 1327 * by using the ':' operator. 1328 */ 1329 if (cdb == NULL) { 1330 size_t namesz; 1331 char *name; 1332 1333 e.cte_bits = bitsz; 1334 namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name, 1335 (uint32_t)bitsz); 1336 name = ctf_alloc(namesz + 1); 1337 if (name == NULL) 1338 return (ENOMEM); 1339 cdb = ctf_alloc(sizeof (ctf_dwbitf_t)); 1340 if (cdb == NULL) { 1341 ctf_free(name, namesz + 1); 1342 return (ENOMEM); 1343 } 1344 (void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name, 1345 (uint32_t)bitsz); 1346 1347 cdb->cdb_base = base; 1348 cdb->cdb_nbits = bitsz; 1349 cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT, 1350 name, &e); 1351 if (cdb->cdb_id == CTF_ERR) { 1352 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1353 "failed to get add bitfield type %s: %s", name, 1354 ctf_errmsg(ctf_errno(cup->cu_ctfp))); 1355 ctf_free(name, namesz + 1); 1356 ctf_free(cdb, sizeof (ctf_dwbitf_t)); 1357 return (ECTF_CONVBKERR); 1358 } 1359 ctf_free(name, namesz + 1); 1360 ctf_list_append(&cup->cu_bitfields, cdb); 1361 } 1362 1363 *idp = cdb->cdb_id; 1364 1365 return (0); 1366 } 1367 1368 static int 1369 ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add) 1370 { 1371 int ret, kind; 1372 Dwarf_Die child, memb; 1373 Dwarf_Unsigned size; 1374 1375 kind = ctf_type_kind(cup->cu_ctfp, base); 1376 VERIFY(kind != CTF_ERR); 1377 VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION); 1378 1379 /* 1380 * Members are in children. However, gcc also allows empty ones. 1381 */ 1382 if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) 1383 return (ret); 1384 if (child == NULL) 1385 return (0); 1386 1387 memb = child; 1388 while (memb != NULL) { 1389 Dwarf_Die sib, tdie; 1390 Dwarf_Half tag; 1391 ctf_id_t mid; 1392 char *mname; 1393 ulong_t memboff = 0; 1394 1395 if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0) 1396 return (ret); 1397 1398 if (tag != DW_TAG_member) 1399 goto next; 1400 1401 if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0) 1402 return (ret); 1403 1404 if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid, 1405 CTF_ADD_NONROOT)) != 0) 1406 return (ret); 1407 ctf_dprintf("Got back type id: %d\n", mid); 1408 1409 /* 1410 * If we're not adding a member, just go ahead and return. 1411 */ 1412 if (add == B_FALSE) { 1413 if ((ret = ctf_dwarf_member_bitfield(cup, memb, 1414 &mid)) != 0) 1415 return (ret); 1416 goto next; 1417 } 1418 1419 if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name, 1420 &mname)) != 0 && ret != ENOENT) 1421 return (ret); 1422 if (ret == ENOENT) 1423 mname = NULL; 1424 1425 if (kind == CTF_K_UNION) { 1426 memboff = 0; 1427 } else if ((ret = ctf_dwarf_member_offset(cup, memb, mid, 1428 &memboff)) != 0) { 1429 if (mname != NULL) 1430 ctf_free(mname, strlen(mname) + 1); 1431 return (ret); 1432 } 1433 1434 if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0) 1435 return (ret); 1436 1437 ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff); 1438 if (ret == CTF_ERR) { 1439 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1440 "failed to add member %s: %s", 1441 mname, ctf_errmsg(ctf_errno(cup->cu_ctfp))); 1442 if (mname != NULL) 1443 ctf_free(mname, strlen(mname) + 1); 1444 return (ECTF_CONVBKERR); 1445 } 1446 1447 if (mname != NULL) 1448 ctf_free(mname, strlen(mname) + 1); 1449 1450 next: 1451 if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0) 1452 return (ret); 1453 memb = sib; 1454 } 1455 1456 /* 1457 * If we're not adding members, then we don't know the final size of the 1458 * structure, so end here. 1459 */ 1460 if (add == B_FALSE) 1461 return (0); 1462 1463 /* Finally set the size of the structure to the actual byte size */ 1464 if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0) 1465 return (ret); 1466 if ((ctf_set_size(cup->cu_ctfp, base, size)) == CTF_ERR) { 1467 int e = ctf_errno(cup->cu_ctfp); 1468 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1469 "failed to set type size for %d to 0x%x: %s", base, 1470 (uint32_t)size, ctf_errmsg(e)); 1471 return (ECTF_CONVBKERR); 1472 } 1473 1474 return (0); 1475 } 1476 1477 static int 1478 ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, 1479 int kind, int isroot) 1480 { 1481 int ret; 1482 char *name; 1483 ctf_id_t base; 1484 Dwarf_Die child; 1485 Dwarf_Bool decl; 1486 1487 /* 1488 * Deal with the terribly annoying case of anonymous structs and unions. 1489 * If they don't have a name, set the name to the empty string. 1490 */ 1491 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && 1492 ret != ENOENT) 1493 return (ret); 1494 if (ret == ENOENT) 1495 name = NULL; 1496 1497 /* 1498 * We need to check if we just have a declaration here. If we do, then 1499 * instead of creating an actual structure or union, we're just going to 1500 * go ahead and create a forward. During a dedup or merge, the forward 1501 * will be replaced with the real thing. 1502 */ 1503 if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, 1504 &decl)) != 0) { 1505 if (ret != ENOENT) 1506 return (ret); 1507 decl = 0; 1508 } 1509 1510 if (decl == B_TRUE) { 1511 base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind); 1512 } else if (kind == CTF_K_STRUCT) { 1513 base = ctf_add_struct(cup->cu_ctfp, isroot, name); 1514 } else { 1515 base = ctf_add_union(cup->cu_ctfp, isroot, name); 1516 } 1517 ctf_dprintf("added sou %s (%d) (%ld) forward=%d\n", 1518 name, kind, base, decl == B_TRUE); 1519 if (name != NULL) 1520 ctf_free(name, strlen(name) + 1); 1521 if (base == CTF_ERR) 1522 return (ctf_errno(cup->cu_ctfp)); 1523 *idp = base; 1524 1525 /* 1526 * If it's just a declaration, we're not going to mark it for fix up or 1527 * do anything else. 1528 */ 1529 if (decl == B_TRUE) 1530 return (ctf_dwmap_add(cup, base, die, B_FALSE)); 1531 if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0) 1532 return (ret); 1533 1534 /* 1535 * The children of a structure or union are generally members. However, 1536 * some compilers actually insert structs and unions there and not as a 1537 * top-level die. Therefore, to make sure we honor our pass 1 contract 1538 * of having all the base types, but not members, we need to walk this 1539 * for instances of a DW_TAG_union_type. 1540 */ 1541 if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) 1542 return (ret); 1543 1544 while (child != NULL) { 1545 Dwarf_Half tag; 1546 Dwarf_Die sib; 1547 1548 if ((ret = ctf_dwarf_tag(cup, child, &tag)) != 0) 1549 return (ret); 1550 1551 switch (tag) { 1552 case DW_TAG_union_type: 1553 case DW_TAG_structure_type: 1554 ret = ctf_dwarf_convert_type(cup, child, NULL, 1555 CTF_ADD_NONROOT); 1556 if (ret != 0) { 1557 return (ret); 1558 } 1559 break; 1560 default: 1561 break; 1562 } 1563 1564 if ((ret = ctf_dwarf_sib(cup, child, &sib)) != 0) 1565 return (ret); 1566 child = sib; 1567 } 1568 1569 return (0); 1570 } 1571 1572 static int 1573 ctf_dwarf_array_upper_bound(ctf_cu_t *cup, Dwarf_Die range, ctf_arinfo_t *ar) 1574 { 1575 Dwarf_Attribute attr; 1576 Dwarf_Unsigned uval; 1577 Dwarf_Signed sval; 1578 Dwarf_Half form; 1579 Dwarf_Error derr; 1580 const char *formstr = NULL; 1581 uint_t adj = 0; 1582 int ret = 0; 1583 1584 ctf_dprintf("setting array upper bound\n"); 1585 1586 ar->ctr_nelems = 0; 1587 1588 /* 1589 * Different compilers use different attributes to indicate the size of 1590 * an array. GCC has traditionally used DW_AT_upper_bound, while Clang 1591 * uses DW_AT_count. They have slightly different semantics. DW_AT_count 1592 * indicates the total number of elements that are present, while 1593 * DW_AT_upper_bound indicates the last index, hence we need to add one 1594 * to that index to get the count. 1595 * 1596 * We first search for DW_AT_count and then for DW_AT_upper_bound. If we 1597 * find neither, then we treat the lack of this as a zero element array. 1598 * Our value is initialized assuming we find a DW_AT_count value. 1599 */ 1600 ret = ctf_dwarf_attribute(cup, range, DW_AT_count, &attr); 1601 if (ret != 0 && ret != ENOENT) { 1602 return (ret); 1603 } else if (ret == ENOENT) { 1604 ret = ctf_dwarf_attribute(cup, range, DW_AT_upper_bound, &attr); 1605 if (ret != 0 && ret != ENOENT) { 1606 return (ret); 1607 } else if (ret == ENOENT) { 1608 return (0); 1609 } else { 1610 adj = 1; 1611 } 1612 } 1613 1614 DWARF_LOCK(cup); 1615 ret = dwarf_whatform(attr, &form, &derr); 1616 if (ret != DW_DLV_OK) { 1617 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1618 "failed to get DW_AT_upper_bound attribute form: %s\n", 1619 dwarf_errmsg(derr)); 1620 ret = ECTF_CONVBKERR; 1621 goto done; 1622 } 1623 1624 /* 1625 * Compilers can indicate array bounds using signed or unsigned values. 1626 * Additionally, some compilers may also store the array bounds 1627 * using as DW_FORM_data{1,2,4,8} (which DWARF treats as raw data and 1628 * expects the caller to understand how to interpret the value). 1629 * 1630 * GCC 4.4.4 appears to always use unsigned values to encode the 1631 * array size (using '(unsigned)-1' to represent a zero-length or 1632 * unknown length array). Later versions of GCC use a signed value of 1633 * -1 for zero/unknown length arrays, and unsigned values to encode 1634 * known array sizes. 1635 * 1636 * Both dwarf_formsdata() and dwarf_formudata() will retrieve values 1637 * as their respective signed/unsigned forms, but both will also 1638 * retreive DW_FORM_data{1,2,4,8} values and treat them as signed or 1639 * unsigned integers (i.e. dwarf_formsdata() treats DW_FORM_dataXX 1640 * as signed integers and dwarf_formudata() treats DW_FORM_dataXX as 1641 * unsigned integers). Both will return an error if the form is not 1642 * their respective signed/unsigned form, or DW_FORM_dataXX. 1643 * 1644 * To obtain the upper bound, we use the appropriate 1645 * dwarf_form[su]data() function based on the form of DW_AT_upper_bound. 1646 * Additionally, we let dwarf_formudata() handle the DW_FORM_dataXX 1647 * forms (via the default option in the switch). If the value is in an 1648 * unexpected form (i.e. not DW_FORM_udata or DW_FORM_dataXX), 1649 * dwarf_formudata() will return failure (i.e. not DW_DLV_OK) and set 1650 * derr with the specific error value. 1651 */ 1652 switch (form) { 1653 case DW_FORM_sdata: 1654 if (dwarf_formsdata(attr, &sval, &derr) == DW_DLV_OK) { 1655 ar->ctr_nelems = sval + adj; 1656 goto done; 1657 } 1658 break; 1659 case DW_FORM_udata: 1660 default: 1661 if (dwarf_formudata(attr, &uval, &derr) == DW_DLV_OK) { 1662 ar->ctr_nelems = uval + adj; 1663 goto done; 1664 } 1665 break; 1666 } 1667 1668 if (dwarf_get_FORM_name(form, &formstr) != DW_DLV_OK) 1669 formstr = "unknown DWARF form"; 1670 1671 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1672 "failed to get %s (%hu) value for DW_AT_upper_bound: %s\n", 1673 formstr, form, dwarf_errmsg(derr)); 1674 ret = ECTF_CONVBKERR; 1675 1676 done: 1677 DWARF_UNLOCK(cup); 1678 ctf_dwarf_dealloc(cup, attr, DW_DLA_ATTR); 1679 return (ret); 1680 } 1681 1682 static int 1683 ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp, 1684 ctf_id_t base, int isroot) 1685 { 1686 int ret; 1687 Dwarf_Die sib; 1688 ctf_arinfo_t ar; 1689 1690 ctf_dprintf("creating array range\n"); 1691 1692 if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0) 1693 return (ret); 1694 if (sib != NULL) { 1695 ctf_id_t id; 1696 if ((ret = ctf_dwarf_create_array_range(cup, sib, &id, 1697 base, CTF_ADD_NONROOT)) != 0) 1698 return (ret); 1699 ar.ctr_contents = id; 1700 } else { 1701 ar.ctr_contents = base; 1702 } 1703 1704 if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR) 1705 return (ctf_errno(cup->cu_ctfp)); 1706 1707 if ((ret = ctf_dwarf_array_upper_bound(cup, range, &ar)) != 0) 1708 return (ret); 1709 1710 if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR) 1711 return (ctf_errno(cup->cu_ctfp)); 1712 1713 return (0); 1714 } 1715 1716 /* 1717 * Try and create an array type. First, the kind of the array is specified in 1718 * the DW_AT_type entry. Next, the number of entries is stored in a more 1719 * complicated form, we should have a child that has the DW_TAG_subrange type. 1720 */ 1721 static int 1722 ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot) 1723 { 1724 int ret; 1725 Dwarf_Die tdie, rdie; 1726 ctf_id_t tid; 1727 Dwarf_Half rtag; 1728 1729 if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) 1730 return (ret); 1731 if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid, 1732 CTF_ADD_NONROOT)) != 0) 1733 return (ret); 1734 1735 if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0) 1736 return (ret); 1737 if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0) 1738 return (ret); 1739 if (rtag != DW_TAG_subrange_type) { 1740 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1741 "encountered array without DW_TAG_subrange_type child\n"); 1742 return (ECTF_CONVBKERR); 1743 } 1744 1745 /* 1746 * The compiler may opt to describe a multi-dimensional array as one 1747 * giant array or it may opt to instead encode it as a series of 1748 * subranges. If it's the latter, then for each subrange we introduce a 1749 * type. We can always use the base type. 1750 */ 1751 if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid, 1752 isroot)) != 0) 1753 return (ret); 1754 ctf_dprintf("Got back id %d\n", *idp); 1755 return (ctf_dwmap_add(cup, *idp, die, B_FALSE)); 1756 } 1757 1758 /* 1759 * Given "const int const_array3[11]", GCC7 at least will create a DIE tree of 1760 * DW_TAG_const_type:DW_TAG_array_type:DW_Tag_const_type:<member_type>. 1761 * 1762 * Given C's syntax, this renders out as "const const int const_array3[11]". To 1763 * get closer to round-tripping (and make the unit tests work), we'll peek for 1764 * this case, and avoid adding the extraneous qualifier if we see that the 1765 * underlying array referent already has the same qualifier. 1766 * 1767 * This is unfortunately less trivial than it could be: this issue applies to 1768 * qualifier sets like "const volatile", as well as multi-dimensional arrays, so 1769 * we need to descend down those. 1770 * 1771 * Returns CTF_ERR on error, or a boolean value otherwise. 1772 */ 1773 static int 1774 needed_array_qualifier(ctf_cu_t *cup, int kind, ctf_id_t ref_id) 1775 { 1776 const ctf_type_t *t; 1777 ctf_arinfo_t arinfo; 1778 int akind; 1779 1780 if (kind != CTF_K_CONST && kind != CTF_K_VOLATILE && 1781 kind != CTF_K_RESTRICT) 1782 return (1); 1783 1784 if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, ref_id)) == NULL) 1785 return (CTF_ERR); 1786 1787 if (LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info) != CTF_K_ARRAY) 1788 return (1); 1789 1790 if (ctf_dyn_array_info(cup->cu_ctfp, ref_id, &arinfo) != 0) 1791 return (CTF_ERR); 1792 1793 ctf_id_t id = arinfo.ctr_contents; 1794 1795 for (;;) { 1796 if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, id)) == NULL) 1797 return (CTF_ERR); 1798 1799 akind = LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info); 1800 1801 if (akind == kind) 1802 break; 1803 1804 if (akind == CTF_K_ARRAY) { 1805 if (ctf_dyn_array_info(cup->cu_ctfp, 1806 id, &arinfo) != 0) 1807 return (CTF_ERR); 1808 id = arinfo.ctr_contents; 1809 continue; 1810 } 1811 1812 if (akind != CTF_K_CONST && akind != CTF_K_VOLATILE && 1813 akind != CTF_K_RESTRICT) 1814 break; 1815 1816 id = t->ctt_type; 1817 } 1818 1819 if (kind == akind) { 1820 ctf_dprintf("ignoring extraneous %s qualifier for array %d\n", 1821 ctf_kind_name(cup->cu_ctfp, kind), ref_id); 1822 } 1823 1824 return (kind != akind); 1825 } 1826 1827 static int 1828 ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, 1829 int kind, int isroot) 1830 { 1831 int ret; 1832 ctf_id_t id; 1833 Dwarf_Die tdie; 1834 char *name; 1835 size_t namelen; 1836 1837 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && 1838 ret != ENOENT) 1839 return (ret); 1840 if (ret == ENOENT) { 1841 name = NULL; 1842 namelen = 0; 1843 } else { 1844 namelen = strlen(name); 1845 } 1846 1847 ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>"); 1848 1849 if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) { 1850 if (ret != ENOENT) { 1851 ctf_free(name, namelen); 1852 return (ret); 1853 } 1854 if ((id = ctf_dwarf_void(cup)) == CTF_ERR) { 1855 ctf_free(name, namelen); 1856 return (ctf_errno(cup->cu_ctfp)); 1857 } 1858 } else { 1859 if ((ret = ctf_dwarf_convert_type(cup, tdie, &id, 1860 CTF_ADD_NONROOT)) != 0) { 1861 ctf_free(name, namelen); 1862 return (ret); 1863 } 1864 } 1865 1866 if ((ret = needed_array_qualifier(cup, kind, id)) <= 0) { 1867 if (ret != 0) { 1868 ret = (ctf_errno(cup->cu_ctfp)); 1869 } else { 1870 *idp = id; 1871 } 1872 1873 ctf_free(name, namelen); 1874 return (ret); 1875 } 1876 1877 if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) == 1878 CTF_ERR) { 1879 ctf_free(name, namelen); 1880 return (ctf_errno(cup->cu_ctfp)); 1881 } 1882 1883 ctf_free(name, namelen); 1884 return (ctf_dwmap_add(cup, *idp, die, B_FALSE)); 1885 } 1886 1887 static int 1888 ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot) 1889 { 1890 size_t size = 0; 1891 Dwarf_Die child; 1892 Dwarf_Unsigned dw; 1893 ctf_id_t id; 1894 char *name; 1895 int ret; 1896 1897 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && 1898 ret != ENOENT) 1899 return (ret); 1900 if (ret == ENOENT) 1901 name = NULL; 1902 1903 /* 1904 * Enumerations may have a size associated with them, particularly if 1905 * they're packed. Note, a Dwarf_Unsigned is larger than a size_t on an 1906 * ILP32 system. 1907 */ 1908 if (ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &dw) == 0 && 1909 dw < SIZE_MAX) { 1910 size = (size_t)dw; 1911 } 1912 1913 id = ctf_add_enum(cup->cu_ctfp, isroot, name, size); 1914 ctf_dprintf("added enum %s (%d)\n", name, id); 1915 if (name != NULL) 1916 ctf_free(name, strlen(name) + 1); 1917 if (id == CTF_ERR) 1918 return (ctf_errno(cup->cu_ctfp)); 1919 *idp = id; 1920 if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0) 1921 return (ret); 1922 1923 if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) { 1924 if (ret == ENOENT) 1925 ret = 0; 1926 return (ret); 1927 } 1928 1929 while (child != NULL) { 1930 Dwarf_Half tag; 1931 Dwarf_Signed sval; 1932 Dwarf_Unsigned uval; 1933 Dwarf_Die arg = child; 1934 int eval; 1935 1936 if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0) 1937 return (ret); 1938 1939 if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0) 1940 return (ret); 1941 1942 if (tag != DW_TAG_enumerator) { 1943 if ((ret = ctf_dwarf_convert_type(cup, arg, NULL, 1944 CTF_ADD_NONROOT)) != 0) 1945 return (ret); 1946 continue; 1947 } 1948 1949 /* 1950 * DWARF v4 section 5.7 tells us we'll always have names. 1951 */ 1952 if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0) 1953 return (ret); 1954 1955 /* 1956 * We have to be careful here: newer GCCs generate DWARF where 1957 * an unsigned value will happily pass ctf_dwarf_signed(). 1958 * Since negative values will fail ctf_dwarf_unsigned(), we try 1959 * that first to make sure we get the right value. 1960 */ 1961 if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value, 1962 &uval)) == 0) { 1963 eval = (int)uval; 1964 } else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value, 1965 &sval)) == 0) { 1966 eval = sval; 1967 } 1968 1969 if (ret != 0) { 1970 if (ret != ENOENT) 1971 return (ret); 1972 1973 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1974 "encountered enumeration without constant value\n"); 1975 return (ECTF_CONVBKERR); 1976 } 1977 1978 ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval); 1979 if (ret == CTF_ERR) { 1980 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 1981 "failed to add enumarator %s (%d) to %d\n", 1982 name, eval, id); 1983 ctf_free(name, strlen(name) + 1); 1984 return (ctf_errno(cup->cu_ctfp)); 1985 } 1986 ctf_free(name, strlen(name) + 1); 1987 } 1988 1989 return (0); 1990 } 1991 1992 /* 1993 * For a function pointer, walk over and process all of its children, unless we 1994 * encounter one that's just a declaration. In which case, we error on it. 1995 */ 1996 static int 1997 ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot) 1998 { 1999 int ret; 2000 Dwarf_Bool b; 2001 ctf_funcinfo_t fi; 2002 Dwarf_Die retdie; 2003 ctf_id_t *argv = NULL; 2004 2005 bzero(&fi, sizeof (ctf_funcinfo_t)); 2006 2007 if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) { 2008 if (ret != ENOENT) 2009 return (ret); 2010 } else { 2011 if (b != 0) 2012 return (EPROTOTYPE); 2013 } 2014 2015 /* 2016 * Return type is in DW_AT_type, if none, it returns void. 2017 */ 2018 if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) { 2019 if (ret != ENOENT) 2020 return (ret); 2021 if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR) 2022 return (ctf_errno(cup->cu_ctfp)); 2023 } else { 2024 if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return, 2025 CTF_ADD_NONROOT)) != 0) 2026 return (ret); 2027 } 2028 2029 if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) { 2030 return (ret); 2031 } 2032 2033 if (fi.ctc_argc != 0) { 2034 argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc); 2035 if (argv == NULL) 2036 return (ENOMEM); 2037 2038 if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) { 2039 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc); 2040 return (ret); 2041 } 2042 } 2043 2044 if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) == 2045 CTF_ERR) { 2046 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc); 2047 return (ctf_errno(cup->cu_ctfp)); 2048 } 2049 2050 ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc); 2051 return (ctf_dwmap_add(cup, *idp, die, B_FALSE)); 2052 } 2053 2054 static int 2055 ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, 2056 int isroot) 2057 { 2058 int ret; 2059 Dwarf_Off offset; 2060 Dwarf_Half tag; 2061 ctf_dwmap_t lookup, *map; 2062 ctf_id_t id; 2063 2064 if (idp == NULL) 2065 idp = &id; 2066 2067 if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0) 2068 return (ret); 2069 2070 if (offset > cup->cu_maxoff) { 2071 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 2072 "die offset %llu beyond maximum for header %llu\n", 2073 offset, cup->cu_maxoff); 2074 return (ECTF_CONVBKERR); 2075 } 2076 2077 /* 2078 * If we've already added an entry for this offset, then we're done. 2079 */ 2080 lookup.cdm_off = offset; 2081 if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) { 2082 *idp = map->cdm_id; 2083 return (0); 2084 } 2085 2086 if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0) 2087 return (ret); 2088 2089 ret = ENOTSUP; 2090 switch (tag) { 2091 case DW_TAG_base_type: 2092 ctf_dprintf("base\n"); 2093 ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset); 2094 break; 2095 case DW_TAG_array_type: 2096 ctf_dprintf("array\n"); 2097 ret = ctf_dwarf_create_array(cup, die, idp, isroot); 2098 break; 2099 case DW_TAG_enumeration_type: 2100 ctf_dprintf("enum\n"); 2101 ret = ctf_dwarf_create_enum(cup, die, idp, isroot); 2102 break; 2103 case DW_TAG_pointer_type: 2104 ctf_dprintf("pointer\n"); 2105 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER, 2106 isroot); 2107 break; 2108 case DW_TAG_structure_type: 2109 ctf_dprintf("struct\n"); 2110 ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT, 2111 isroot); 2112 break; 2113 case DW_TAG_subroutine_type: 2114 ctf_dprintf("fptr\n"); 2115 ret = ctf_dwarf_create_fptr(cup, die, idp, isroot); 2116 break; 2117 case DW_TAG_typedef: 2118 ctf_dprintf("typedef\n"); 2119 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF, 2120 isroot); 2121 break; 2122 case DW_TAG_union_type: 2123 ctf_dprintf("union\n"); 2124 ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION, 2125 isroot); 2126 break; 2127 case DW_TAG_const_type: 2128 ctf_dprintf("const\n"); 2129 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST, 2130 isroot); 2131 break; 2132 case DW_TAG_volatile_type: 2133 ctf_dprintf("volatile\n"); 2134 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE, 2135 isroot); 2136 break; 2137 case DW_TAG_restrict_type: 2138 ctf_dprintf("restrict\n"); 2139 ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT, 2140 isroot); 2141 break; 2142 default: 2143 ctf_dprintf("ignoring tag type %x\n", tag); 2144 *idp = CTF_ERR; 2145 ret = 0; 2146 break; 2147 } 2148 ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n", 2149 ret); 2150 2151 return (ret); 2152 } 2153 2154 static int 2155 ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die) 2156 { 2157 int ret; 2158 Dwarf_Die child; 2159 2160 if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) 2161 return (ret); 2162 2163 if (child == NULL) 2164 return (0); 2165 2166 return (ctf_dwarf_convert_die(cup, die)); 2167 } 2168 2169 static int 2170 ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip, 2171 boolean_t fptr) 2172 { 2173 int ret; 2174 Dwarf_Die child, sib, arg; 2175 2176 if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) 2177 return (ret); 2178 2179 arg = child; 2180 while (arg != NULL) { 2181 Dwarf_Half tag; 2182 2183 if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0) 2184 return (ret); 2185 2186 /* 2187 * We have to check for a varargs type declaration. This will 2188 * happen in one of two ways. If we have a function pointer 2189 * type, then it'll be done with a tag of type 2190 * DW_TAG_unspecified_parameters. However, it only means we have 2191 * a variable number of arguments, if we have more than one 2192 * argument found so far. Otherwise, when we have a function 2193 * type, it instead uses a formal parameter whose name is '...' 2194 * to indicate a variable arguments member. 2195 * 2196 * Also, if we have a function pointer, then we have to expect 2197 * that we might not get a name at all. 2198 */ 2199 if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) { 2200 char *name; 2201 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, 2202 &name)) != 0) 2203 return (ret); 2204 if (strcmp(name, DWARF_VARARGS_NAME) == 0) 2205 fip->ctc_flags |= CTF_FUNC_VARARG; 2206 else 2207 fip->ctc_argc++; 2208 ctf_free(name, strlen(name) + 1); 2209 } else if (tag == DW_TAG_formal_parameter) { 2210 fip->ctc_argc++; 2211 } else if (tag == DW_TAG_unspecified_parameters && 2212 fip->ctc_argc > 0) { 2213 fip->ctc_flags |= CTF_FUNC_VARARG; 2214 } 2215 if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0) 2216 return (ret); 2217 arg = sib; 2218 } 2219 2220 return (0); 2221 } 2222 2223 static int 2224 ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip, 2225 ctf_id_t *argv) 2226 { 2227 int ret; 2228 int i = 0; 2229 Dwarf_Die child, sib, arg; 2230 2231 if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) 2232 return (ret); 2233 2234 arg = child; 2235 while (arg != NULL) { 2236 Dwarf_Half tag; 2237 2238 if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0) 2239 return (ret); 2240 if (tag == DW_TAG_formal_parameter) { 2241 Dwarf_Die tdie; 2242 2243 if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type, 2244 &tdie)) != 0) 2245 return (ret); 2246 2247 if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i], 2248 CTF_ADD_ROOT)) != 0) 2249 return (ret); 2250 i++; 2251 2252 /* 2253 * Once we hit argc entries, we're done. This ensures we 2254 * don't accidentally hit a varargs which should be the 2255 * last entry. 2256 */ 2257 if (i == fip->ctc_argc) 2258 break; 2259 } 2260 2261 if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0) 2262 return (ret); 2263 arg = sib; 2264 } 2265 2266 return (0); 2267 } 2268 2269 static int 2270 ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die) 2271 { 2272 ctf_dwfunc_t *cdf; 2273 Dwarf_Die tdie; 2274 Dwarf_Bool b; 2275 char *name; 2276 int ret; 2277 2278 /* 2279 * Functions that don't have a name are generally functions that have 2280 * been inlined and thus most information about them has been lost. If 2281 * we can't get a name, then instead of returning ENOENT, we silently 2282 * swallow the error. 2283 */ 2284 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) { 2285 if (ret == ENOENT) 2286 return (0); 2287 return (ret); 2288 } 2289 2290 ctf_dprintf("beginning work on function %s (die %llx)\n", 2291 name, ctf_die_offset(cup, die)); 2292 2293 if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) { 2294 if (ret != ENOENT) { 2295 ctf_free(name, strlen(name) + 1); 2296 return (ret); 2297 } 2298 } else if (b != 0) { 2299 /* 2300 * GCC7 at least creates empty DW_AT_declarations for functions 2301 * defined in headers. As they lack details on the function 2302 * prototype, we need to ignore them. If we later actually 2303 * see the relevant function's definition, we will see another 2304 * DW_TAG_subprogram that is more complete. 2305 */ 2306 ctf_dprintf("ignoring declaration of function %s (die %llx)\n", 2307 name, ctf_die_offset(cup, die)); 2308 ctf_free(name, strlen(name) + 1); 2309 return (0); 2310 } 2311 2312 if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) { 2313 ctf_free(name, strlen(name) + 1); 2314 return (ENOMEM); 2315 } 2316 bzero(cdf, sizeof (ctf_dwfunc_t)); 2317 cdf->cdf_name = name; 2318 2319 if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) { 2320 if ((ret = ctf_dwarf_convert_type(cup, tdie, 2321 &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) { 2322 ctf_free(name, strlen(name) + 1); 2323 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2324 return (ret); 2325 } 2326 } else if (ret != ENOENT) { 2327 ctf_free(name, strlen(name) + 1); 2328 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2329 return (ret); 2330 } else { 2331 if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) == 2332 CTF_ERR) { 2333 ctf_free(name, strlen(name) + 1); 2334 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2335 return (ctf_errno(cup->cu_ctfp)); 2336 } 2337 } 2338 2339 /* 2340 * A function has a number of children, some of which may not be ones we 2341 * care about. Children that we care about have a type of 2342 * DW_TAG_formal_parameter. We're going to do two passes, the first to 2343 * count the arguments, the second to process them. Afterwards, we 2344 * should be good to go ahead and add this function. 2345 * 2346 * Note, we already got the return type by going in and grabbing it out 2347 * of the DW_AT_type. 2348 */ 2349 if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip, 2350 B_FALSE)) != 0) { 2351 ctf_free(name, strlen(name) + 1); 2352 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2353 return (ret); 2354 } 2355 2356 ctf_dprintf("beginning to convert function arguments %s\n", name); 2357 if (cdf->cdf_fip.ctc_argc != 0) { 2358 uint_t argc = cdf->cdf_fip.ctc_argc; 2359 cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc); 2360 if (cdf->cdf_argv == NULL) { 2361 ctf_free(name, strlen(name) + 1); 2362 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2363 return (ENOMEM); 2364 } 2365 if ((ret = ctf_dwarf_convert_fargs(cup, die, 2366 &cdf->cdf_fip, cdf->cdf_argv)) != 0) { 2367 ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc); 2368 ctf_free(name, strlen(name) + 1); 2369 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2370 return (ret); 2371 } 2372 } else { 2373 cdf->cdf_argv = NULL; 2374 } 2375 2376 if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) { 2377 ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * 2378 cdf->cdf_fip.ctc_argc); 2379 ctf_free(name, strlen(name) + 1); 2380 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 2381 return (ret); 2382 } 2383 2384 ctf_list_append(&cup->cu_funcs, cdf); 2385 return (ret); 2386 } 2387 2388 /* 2389 * Convert variables, but only if they're not prototypes and have names. 2390 */ 2391 static int 2392 ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die) 2393 { 2394 int ret; 2395 char *name; 2396 Dwarf_Bool b; 2397 Dwarf_Die tdie; 2398 ctf_id_t id; 2399 ctf_dwvar_t *cdv; 2400 2401 /* Skip "Non-Defining Declarations" */ 2402 if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) { 2403 if (b != 0) 2404 return (0); 2405 } else if (ret != ENOENT) { 2406 return (ret); 2407 } 2408 2409 /* 2410 * If we find a DIE of "Declarations Completing Non-Defining 2411 * Declarations", we will use the referenced type's DIE. This isn't 2412 * quite correct, e.g. DW_AT_decl_line will be the forward declaration 2413 * not this site. It's sufficient for what we need, however: in 2414 * particular, we should find DW_AT_external as needed there. 2415 */ 2416 if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification, 2417 &tdie)) == 0) { 2418 Dwarf_Off offset; 2419 if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0) 2420 return (ret); 2421 ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n", 2422 ctf_die_offset(cup, die), ctf_die_offset(cup, tdie)); 2423 die = tdie; 2424 } else if (ret != ENOENT) { 2425 return (ret); 2426 } 2427 2428 if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 && 2429 ret != ENOENT) 2430 return (ret); 2431 if (ret == ENOENT) 2432 return (0); 2433 2434 if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) { 2435 ctf_free(name, strlen(name) + 1); 2436 return (ret); 2437 } 2438 2439 if ((ret = ctf_dwarf_convert_type(cup, tdie, &id, 2440 CTF_ADD_ROOT)) != 0) 2441 return (ret); 2442 2443 if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) { 2444 ctf_free(name, strlen(name) + 1); 2445 return (ENOMEM); 2446 } 2447 2448 cdv->cdv_name = name; 2449 cdv->cdv_type = id; 2450 2451 if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) { 2452 ctf_free(cdv, sizeof (ctf_dwvar_t)); 2453 ctf_free(name, strlen(name) + 1); 2454 return (ret); 2455 } 2456 2457 ctf_list_append(&cup->cu_vars, cdv); 2458 return (0); 2459 } 2460 2461 /* 2462 * Walk through our set of top-level types and process them. 2463 */ 2464 static int 2465 ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die) 2466 { 2467 int ret; 2468 Dwarf_Off offset; 2469 Dwarf_Half tag; 2470 2471 if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0) 2472 return (ret); 2473 2474 if (offset > cup->cu_maxoff) { 2475 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 2476 "die offset %llu beyond maximum for header %llu\n", 2477 offset, cup->cu_maxoff); 2478 return (ECTF_CONVBKERR); 2479 } 2480 2481 if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0) 2482 return (ret); 2483 2484 ret = 0; 2485 switch (tag) { 2486 case DW_TAG_subprogram: 2487 ctf_dprintf("top level func\n"); 2488 ret = ctf_dwarf_convert_function(cup, die); 2489 break; 2490 case DW_TAG_variable: 2491 ctf_dprintf("top level var\n"); 2492 ret = ctf_dwarf_convert_variable(cup, die); 2493 break; 2494 case DW_TAG_lexical_block: 2495 ctf_dprintf("top level block\n"); 2496 ret = ctf_dwarf_walk_lexical(cup, die); 2497 break; 2498 case DW_TAG_enumeration_type: 2499 case DW_TAG_structure_type: 2500 case DW_TAG_typedef: 2501 case DW_TAG_union_type: 2502 ctf_dprintf("top level type\n"); 2503 ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE); 2504 break; 2505 default: 2506 break; 2507 } 2508 2509 return (ret); 2510 } 2511 2512 2513 /* 2514 * We're given a node. At this node we need to convert it and then proceed to 2515 * convert any siblings that are associaed with this die. 2516 */ 2517 static int 2518 ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die) 2519 { 2520 while (die != NULL) { 2521 int ret; 2522 Dwarf_Die sib; 2523 2524 if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0) 2525 return (ret); 2526 2527 if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0) 2528 return (ret); 2529 die = sib; 2530 } 2531 return (0); 2532 } 2533 2534 static int 2535 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass) 2536 { 2537 ctf_dwmap_t *map; 2538 2539 for (map = avl_first(&cup->cu_map); map != NULL; 2540 map = AVL_NEXT(&cup->cu_map, map)) { 2541 int ret; 2542 if (map->cdm_fix == B_FALSE) 2543 continue; 2544 if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id, 2545 addpass)) != 0) 2546 return (ret); 2547 } 2548 2549 return (0); 2550 } 2551 2552 /* 2553 * The DWARF information about a symbol and the information in the symbol table 2554 * may not be the same due to symbol reduction that is performed by ld due to a 2555 * mapfile or other such directive. We process weak symbols at a later time. 2556 * 2557 * The following are the rules that we employ: 2558 * 2559 * 1. A DWARF function that is considered exported matches STB_GLOBAL entries 2560 * with the same name. 2561 * 2562 * 2. A DWARF function that is considered exported matches STB_LOCAL entries 2563 * with the same name and the same file. This case may happen due to mapfile 2564 * reduction. 2565 * 2566 * 3. A DWARF function that is not considered exported matches STB_LOCAL entries 2567 * with the same name and the same file. 2568 * 2569 * 4. A DWARF function that has the same name as the symbol table entry, but the 2570 * files do not match. This is considered a 'fuzzy' match. This may also happen 2571 * due to a mapfile reduction. Fuzzy matching is only used when we know that the 2572 * file in question refers to the primary object. This is because when a symbol 2573 * is reduced in a mapfile, it's always going to be tagged as a local value in 2574 * the generated output and it is considered as to belong to the primary file 2575 * which is the first STT_FILE symbol we see. 2576 */ 2577 static boolean_t 2578 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name, 2579 uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name, 2580 boolean_t dwarf_global, boolean_t *is_fuzzy) 2581 { 2582 *is_fuzzy = B_FALSE; 2583 2584 if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) { 2585 return (B_FALSE); 2586 } 2587 2588 if (strcmp(symtab_name, dwarf_name) != 0) { 2589 return (B_FALSE); 2590 } 2591 2592 if (symtab_bind == STB_GLOBAL) { 2593 return (dwarf_global); 2594 } 2595 2596 if (strcmp(symtab_file, dwarf_file) == 0) { 2597 return (B_TRUE); 2598 } 2599 2600 if (dwarf_global) { 2601 *is_fuzzy = B_TRUE; 2602 return (B_TRUE); 2603 } 2604 2605 return (B_FALSE); 2606 } 2607 2608 static ctf_dwfunc_t * 2609 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name, 2610 uint_t bind, boolean_t primary) 2611 { 2612 ctf_dwfunc_t *cdf, *fuzzy = NULL; 2613 2614 if (bind == STB_WEAK) 2615 return (NULL); 2616 2617 if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL)) 2618 return (NULL); 2619 2620 for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; 2621 cdf = ctf_list_next(cdf)) { 2622 boolean_t is_fuzzy = B_FALSE; 2623 2624 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name, 2625 cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) { 2626 if (is_fuzzy) { 2627 if (primary) { 2628 fuzzy = cdf; 2629 } 2630 continue; 2631 } else { 2632 return (cdf); 2633 } 2634 } 2635 } 2636 2637 return (fuzzy); 2638 } 2639 2640 static ctf_dwvar_t * 2641 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name, 2642 uint_t bind, boolean_t primary) 2643 { 2644 ctf_dwvar_t *cdv, *fuzzy = NULL; 2645 2646 if (bind == STB_WEAK) 2647 return (NULL); 2648 2649 if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL)) 2650 return (NULL); 2651 2652 for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; 2653 cdv = ctf_list_next(cdv)) { 2654 boolean_t is_fuzzy = B_FALSE; 2655 2656 if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name, 2657 cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) { 2658 if (is_fuzzy) { 2659 if (primary) { 2660 fuzzy = cdv; 2661 } 2662 } else { 2663 return (cdv); 2664 } 2665 } 2666 } 2667 2668 return (fuzzy); 2669 } 2670 2671 static int 2672 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx, 2673 const char *file, const char *name, boolean_t primary, void *arg) 2674 { 2675 int ret; 2676 uint_t bind, type; 2677 ctf_cu_t *cup = arg; 2678 2679 bind = GELF_ST_BIND(symp->st_info); 2680 type = GELF_ST_TYPE(symp->st_info); 2681 2682 /* 2683 * Come back to weak symbols in another pass 2684 */ 2685 if (bind == STB_WEAK) 2686 return (0); 2687 2688 if (type == STT_OBJECT) { 2689 ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name, 2690 bind, primary); 2691 if (cdv == NULL) 2692 return (0); 2693 ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type); 2694 ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type); 2695 } else { 2696 ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name, 2697 bind, primary); 2698 if (cdf == NULL) 2699 return (0); 2700 ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip, 2701 cdf->cdf_argv); 2702 ctf_dprintf("added function %s\n", name); 2703 } 2704 2705 if (ret == CTF_ERR) { 2706 return (ctf_errno(cup->cu_ctfp)); 2707 } 2708 2709 return (0); 2710 } 2711 2712 static int 2713 ctf_dwarf_conv_funcvars(ctf_cu_t *cup) 2714 { 2715 return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup)); 2716 } 2717 2718 /* 2719 * If we have a weak symbol, attempt to find the strong symbol it will resolve 2720 * to. Note: the code where this actually happens is in sym_process() in 2721 * cmd/sgs/libld/common/syms.c 2722 * 2723 * Finding the matching symbol is unfortunately not trivial. For a symbol to be 2724 * a candidate, it must: 2725 * 2726 * - have the same type (function, object) 2727 * - have the same value (address) 2728 * - have the same size 2729 * - not be another weak symbol 2730 * - belong to the same section (checked via section index) 2731 * 2732 * To perform this check, we first iterate over the symbol table. For each weak 2733 * symbol that we encounter, we then do a second walk over the symbol table, 2734 * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's 2735 * either a local or global symbol. If we find a global symbol then we go with 2736 * it and stop searching for additional matches. 2737 * 2738 * If instead, we find a local symbol, things are more complicated. The first 2739 * thing we do is to try and see if we have file information about both symbols 2740 * (STT_FILE). If they both have file information and it matches, then we treat 2741 * that as a good match and stop searching for additional matches. 2742 * 2743 * Otherwise, this means we have a non-matching file and a local symbol. We 2744 * treat this as a candidate and if we find a better match (one of the two cases 2745 * above), use that instead. There are two different ways this can happen. 2746 * Either this is a completely different symbol, or it's a once-global symbol 2747 * that was scoped to local via a mapfile. In the former case, curfile is 2748 * likely inaccurate since the linker does not preserve the needed curfile in 2749 * the order of the symbol table (see the comments about locally scoped symbols 2750 * in libld's update_osym()). As we can't tell this case from the former one, 2751 * we use this symbol iff no other matching symbol is found. 2752 * 2753 * What we really need here is a SUNW section containing weak<->strong mappings 2754 * that we can consume. 2755 */ 2756 typedef struct ctf_dwarf_weak_arg { 2757 const Elf64_Sym *cweak_symp; 2758 const char *cweak_file; 2759 boolean_t cweak_candidate; 2760 ulong_t cweak_idx; 2761 } ctf_dwarf_weak_arg_t; 2762 2763 static int 2764 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file, 2765 const char *name, boolean_t primary, void *arg) 2766 { 2767 ctf_dwarf_weak_arg_t *cweak = arg; 2768 2769 const Elf64_Sym *wsymp = cweak->cweak_symp; 2770 2771 ctf_dprintf("comparing weak to %s\n", name); 2772 2773 if (GELF_ST_BIND(symp->st_info) == STB_WEAK) { 2774 return (0); 2775 } 2776 2777 if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) { 2778 return (0); 2779 } 2780 2781 if (wsymp->st_value != symp->st_value) { 2782 return (0); 2783 } 2784 2785 if (wsymp->st_size != symp->st_size) { 2786 return (0); 2787 } 2788 2789 if (wsymp->st_shndx != symp->st_shndx) { 2790 return (0); 2791 } 2792 2793 /* 2794 * Check if it's a weak candidate. 2795 */ 2796 if (GELF_ST_BIND(symp->st_info) == STB_LOCAL && 2797 (file == NULL || cweak->cweak_file == NULL || 2798 strcmp(file, cweak->cweak_file) != 0)) { 2799 cweak->cweak_candidate = B_TRUE; 2800 cweak->cweak_idx = idx; 2801 return (0); 2802 } 2803 2804 /* 2805 * Found a match, break. 2806 */ 2807 cweak->cweak_idx = idx; 2808 return (1); 2809 } 2810 2811 static int 2812 ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx) 2813 { 2814 ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx); 2815 2816 /* 2817 * If we matched something that for some reason didn't have type data, 2818 * we don't consider that a fatal error and silently swallow it. 2819 */ 2820 if (id == CTF_ERR) { 2821 if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT) 2822 return (0); 2823 else 2824 return (ctf_errno(cup->cu_ctfp)); 2825 } 2826 2827 if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR) 2828 return (ctf_errno(cup->cu_ctfp)); 2829 2830 return (0); 2831 } 2832 2833 static int 2834 ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx) 2835 { 2836 int ret; 2837 ctf_funcinfo_t fip; 2838 ctf_id_t *args = NULL; 2839 2840 if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) { 2841 if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT) 2842 return (0); 2843 else 2844 return (ctf_errno(cup->cu_ctfp)); 2845 } 2846 2847 if (fip.ctc_argc != 0) { 2848 args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc); 2849 if (args == NULL) 2850 return (ENOMEM); 2851 2852 if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) == 2853 CTF_ERR) { 2854 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc); 2855 return (ctf_errno(cup->cu_ctfp)); 2856 } 2857 } 2858 2859 ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args); 2860 if (args != NULL) 2861 ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc); 2862 if (ret == CTF_ERR) 2863 return (ctf_errno(cup->cu_ctfp)); 2864 2865 return (0); 2866 } 2867 2868 static int 2869 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file, 2870 const char *name, boolean_t primary, void *arg) 2871 { 2872 int ret, type; 2873 ctf_dwarf_weak_arg_t cweak; 2874 ctf_cu_t *cup = arg; 2875 2876 /* 2877 * We only care about weak symbols. 2878 */ 2879 if (GELF_ST_BIND(symp->st_info) != STB_WEAK) 2880 return (0); 2881 2882 type = GELF_ST_TYPE(symp->st_info); 2883 ASSERT(type == STT_OBJECT || type == STT_FUNC); 2884 2885 /* 2886 * For each weak symbol we encounter, we need to do a second iteration 2887 * to try and find a match. We should probably think about other 2888 * techniques to try and save us time in the future. 2889 */ 2890 cweak.cweak_symp = symp; 2891 cweak.cweak_file = file; 2892 cweak.cweak_candidate = B_FALSE; 2893 cweak.cweak_idx = 0; 2894 2895 ctf_dprintf("Trying to find weak equiv for %s\n", name); 2896 2897 ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak); 2898 VERIFY(ret == 0 || ret == 1); 2899 2900 /* 2901 * Nothing was ever found, we're not going to add anything for this 2902 * entry. 2903 */ 2904 if (ret == 0 && cweak.cweak_candidate == B_FALSE) { 2905 ctf_dprintf("found no weak match for %s\n", name); 2906 return (0); 2907 } 2908 2909 /* 2910 * Now, finally go and add the type based on the match. 2911 */ 2912 ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx); 2913 if (type == STT_OBJECT) { 2914 ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx); 2915 } else { 2916 ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx); 2917 } 2918 2919 return (ret); 2920 } 2921 2922 static int 2923 ctf_dwarf_conv_weaks(ctf_cu_t *cup) 2924 { 2925 return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup)); 2926 } 2927 2928 static int 2929 ctf_dwarf_convert_one(void *arg, void *unused) 2930 { 2931 int ret; 2932 ctf_file_t *dedup; 2933 ctf_cu_t *cup = arg; 2934 const char *name = cup->cu_name != NULL ? cup->cu_name : "NULL"; 2935 2936 VERIFY(cup != NULL); 2937 2938 if ((ret = ctf_dwarf_init_die(cup)) != 0) 2939 return (ret); 2940 2941 ctf_dprintf("converting die: %s - max offset: %x\n", name, 2942 cup->cu_maxoff); 2943 2944 ret = ctf_dwarf_convert_die(cup, cup->cu_cu); 2945 ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", name, 2946 ret); 2947 if (ret != 0) 2948 return (ret); 2949 2950 if (ctf_update(cup->cu_ctfp) != 0) { 2951 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, 2952 "failed to update output ctf container")); 2953 } 2954 2955 ret = ctf_dwarf_fixup_die(cup, B_FALSE); 2956 ctf_dprintf("ctf_dwarf_fixup_die (%s, FALSE) returned %d\n", name, 2957 ret); 2958 if (ret != 0) 2959 return (ret); 2960 2961 if (ctf_update(cup->cu_ctfp) != 0) { 2962 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, 2963 "failed to update output ctf container")); 2964 } 2965 2966 ret = ctf_dwarf_fixup_die(cup, B_TRUE); 2967 ctf_dprintf("ctf_dwarf_fixup_die (%s, TRUE) returned %d\n", name, 2968 ret); 2969 if (ret != 0) 2970 return (ret); 2971 2972 if (ctf_update(cup->cu_ctfp) != 0) { 2973 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, 2974 "failed to update output ctf container")); 2975 } 2976 2977 if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) { 2978 ctf_dprintf("ctf_dwarf_conv_funcvars (%s) returned %d\n", 2979 name, ret); 2980 return (ctf_dwarf_error(cup, NULL, ret, 2981 "failed to convert strong functions and variables")); 2982 } 2983 2984 if (ctf_update(cup->cu_ctfp) != 0) { 2985 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, 2986 "failed to update output ctf container")); 2987 } 2988 2989 if (cup->cu_doweaks == B_TRUE) { 2990 if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) { 2991 ctf_dprintf("ctf_dwarf_conv_weaks (%s) returned %d\n", 2992 name, ret); 2993 return (ctf_dwarf_error(cup, NULL, ret, 2994 "failed to convert weak functions and variables")); 2995 } 2996 2997 if (ctf_update(cup->cu_ctfp) != 0) { 2998 return (ctf_dwarf_error(cup, cup->cu_ctfp, 0, 2999 "failed to update output ctf container")); 3000 } 3001 } 3002 3003 ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", name); 3004 ctf_dprintf("adding inputs for dedup\n"); 3005 if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) { 3006 return (ctf_dwarf_error(cup, NULL, ret, 3007 "failed to add inputs for merge")); 3008 } 3009 3010 ctf_dprintf("starting dedup of %s\n", name); 3011 if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) { 3012 return (ctf_dwarf_error(cup, NULL, ret, 3013 "failed to deduplicate die")); 3014 } 3015 3016 ctf_close(cup->cu_ctfp); 3017 cup->cu_ctfp = dedup; 3018 ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", name); 3019 3020 return (0); 3021 } 3022 3023 static void 3024 ctf_dwarf_free_die(ctf_cu_t *cup) 3025 { 3026 ctf_dwfunc_t *cdf, *ndf; 3027 ctf_dwvar_t *cdv, *ndv; 3028 ctf_dwbitf_t *cdb, *ndb; 3029 ctf_dwmap_t *map; 3030 void *cookie; 3031 3032 ctf_dprintf("Beginning to free die: %p\n", cup); 3033 3034 VERIFY3P(cup->cu_elf, !=, NULL); 3035 cup->cu_elf = NULL; 3036 3037 ctf_dprintf("Trying to free name: %p\n", cup->cu_name); 3038 if (cup->cu_name != NULL) { 3039 ctf_free(cup->cu_name, strlen(cup->cu_name) + 1); 3040 cup->cu_name = NULL; 3041 } 3042 3043 ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh); 3044 if (cup->cu_cmh != NULL) { 3045 ctf_merge_fini(cup->cu_cmh); 3046 cup->cu_cmh = NULL; 3047 } 3048 3049 ctf_dprintf("Trying to free functions\n"); 3050 for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) { 3051 ndf = ctf_list_next(cdf); 3052 ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1); 3053 if (cdf->cdf_fip.ctc_argc != 0) { 3054 ctf_free(cdf->cdf_argv, 3055 sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc); 3056 } 3057 ctf_free(cdf, sizeof (ctf_dwfunc_t)); 3058 } 3059 3060 ctf_dprintf("Trying to free variables\n"); 3061 for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) { 3062 ndv = ctf_list_next(cdv); 3063 ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1); 3064 ctf_free(cdv, sizeof (ctf_dwvar_t)); 3065 } 3066 3067 ctf_dprintf("Trying to free bitfields\n"); 3068 for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) { 3069 ndb = ctf_list_next(cdb); 3070 ctf_free(cdb, sizeof (ctf_dwbitf_t)); 3071 } 3072 3073 if (cup->cu_ctfp != NULL) { 3074 ctf_close(cup->cu_ctfp); 3075 cup->cu_ctfp = NULL; 3076 } 3077 3078 cookie = NULL; 3079 while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) 3080 ctf_free(map, sizeof (ctf_dwmap_t)); 3081 avl_destroy(&cup->cu_map); 3082 cup->cu_errbuf = NULL; 3083 3084 if (cup->cu_cu != NULL) { 3085 ctf_dwarf_dealloc(cup, cup->cu_cu, DW_DLA_DIE); 3086 cup->cu_cu = NULL; 3087 } 3088 } 3089 3090 static int 3091 ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, uint_t *ndies, 3092 char *errbuf, size_t errlen) 3093 { 3094 int ret; 3095 Dwarf_Half vers; 3096 Dwarf_Unsigned nexthdr; 3097 3098 while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL, 3099 &nexthdr, derr)) != DW_DLV_NO_ENTRY) { 3100 if (ret != DW_DLV_OK) { 3101 (void) snprintf(errbuf, errlen, 3102 "file does not contain valid DWARF data: %s\n", 3103 dwarf_errmsg(*derr)); 3104 return (ECTF_CONVBKERR); 3105 } 3106 3107 switch (vers) { 3108 case DWARF_VERSION_TWO: 3109 case DWARF_VERSION_FOUR: 3110 break; 3111 default: 3112 (void) snprintf(errbuf, errlen, 3113 "unsupported DWARF version: %d\n", vers); 3114 return (ECTF_CONVBKERR); 3115 } 3116 *ndies = *ndies + 1; 3117 } 3118 3119 return (0); 3120 } 3121 3122 /* 3123 * Fill out just enough of each ctf_cu_t for the conversion process to 3124 * be able to finish the rest in a (potentially) multithreaded context. 3125 */ 3126 static int 3127 ctf_dwarf_preinit_dies(int fd, Elf *elf, Dwarf_Debug dw, 3128 mutex_t *dwlock, Dwarf_Error *derr, uint_t ndies, ctf_cu_t *cdies, 3129 uint_t flags, char *errbuf, size_t errlen) 3130 { 3131 Dwarf_Unsigned hdrlen, abboff, nexthdr; 3132 Dwarf_Half addrsz, vers; 3133 Dwarf_Unsigned offset = 0; 3134 uint_t added = 0; 3135 int ret, i = 0; 3136 3137 while ((ret = dwarf_next_cu_header(dw, &hdrlen, &vers, &abboff, 3138 &addrsz, &nexthdr, derr)) != DW_DLV_NO_ENTRY) { 3139 Dwarf_Die cu; 3140 ctf_cu_t *cup; 3141 char *name; 3142 3143 VERIFY3U(i, <, ndies); 3144 3145 cup = &cdies[i++]; 3146 3147 cup->cu_fd = fd; 3148 cup->cu_elf = elf; 3149 cup->cu_dwarf = dw; 3150 cup->cu_errbuf = errbuf; 3151 cup->cu_errlen = errlen; 3152 cup->cu_dwarf = dw; 3153 if (ndies > 1) { 3154 /* 3155 * Only need to lock calls into libdwarf if there are 3156 * multiple CUs. 3157 */ 3158 cup->cu_dwlock = dwlock; 3159 cup->cu_doweaks = B_FALSE; 3160 } else { 3161 cup->cu_doweaks = B_TRUE; 3162 } 3163 3164 cup->cu_voidtid = CTF_ERR; 3165 cup->cu_longtid = CTF_ERR; 3166 cup->cu_cuoff = offset; 3167 cup->cu_maxoff = nexthdr - 1; 3168 cup->cu_vers = vers; 3169 cup->cu_addrsz = addrsz; 3170 3171 if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0) { 3172 ctf_dprintf("cu %d - no cu %d\n", i, ret); 3173 return (ret); 3174 } 3175 3176 if (cu == NULL) { 3177 ctf_dprintf("cu %d - no cu data\n", i); 3178 (void) snprintf(cup->cu_errbuf, cup->cu_errlen, 3179 "file does not contain DWARF data"); 3180 return (ECTF_CONVNODEBUG); 3181 } 3182 3183 if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) { 3184 size_t len = strlen(name) + 1; 3185 char *b = basename(name); 3186 3187 cup->cu_name = strdup(b); 3188 ctf_free(name, len); 3189 if (cup->cu_name == NULL) 3190 return (ENOMEM); 3191 } 3192 3193 ret = ctf_dwarf_child(cup, cu, &cup->cu_cu); 3194 dwarf_dealloc(cup->cu_dwarf, cu, DW_DLA_DIE); 3195 if (ret != 0) { 3196 ctf_dprintf("cu %d - no child '%s' %d\n", 3197 i, cup->cu_name != NULL ? cup->cu_name : "NULL", 3198 ret); 3199 return (ret); 3200 } 3201 3202 if (cup->cu_cu == NULL) { 3203 size_t len; 3204 3205 ctf_dprintf("cu %d - no child data '%s' %d\n", 3206 i, cup->cu_name != NULL ? cup->cu_name : "NULL", 3207 ret); 3208 if (cup->cu_name != NULL && 3209 (len = strlen(cup->cu_name)) > 2 && 3210 strncmp(".c", &cup->cu_name[len - 2], 2) == 0) { 3211 /* 3212 * Missing DEBUG data for a .c file, return an 3213 * error unless this is permitted. 3214 */ 3215 if (!(flags & CTF_ALLOW_MISSING_DEBUG)) { 3216 (void) snprintf( 3217 cup->cu_errbuf, cup->cu_errlen, 3218 "file %s is missing debug info", 3219 cup->cu_name); 3220 return (ECTF_CONVNODEBUG); 3221 } 3222 } 3223 } else { 3224 added++; 3225 } 3226 3227 ctf_dprintf("Pre-initialised cu %d - '%s'\n", i, 3228 cup->cu_name != NULL ? cup->cu_name : "NULL"); 3229 3230 offset = nexthdr; 3231 } 3232 3233 /* 3234 * If none of the CUs had debug data, return an error. 3235 */ 3236 if (added == 0) 3237 return (ECTF_CONVNODEBUG); 3238 3239 return (0); 3240 } 3241 3242 static int 3243 ctf_dwarf_init_die(ctf_cu_t *cup) 3244 { 3245 int ret; 3246 3247 cup->cu_ctfp = ctf_fdcreate(cup->cu_fd, &ret); 3248 if (cup->cu_ctfp == NULL) 3249 return (ret); 3250 3251 avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t), 3252 offsetof(ctf_dwmap_t, cdm_avl)); 3253 3254 if ((ret = ctf_dwarf_die_elfenc(cup->cu_elf, cup, 3255 cup->cu_errbuf, cup->cu_errlen)) != 0) { 3256 return (ret); 3257 } 3258 3259 if ((cup->cu_cmh = ctf_merge_init(cup->cu_fd, &ret)) == NULL) 3260 return (ret); 3261 3262 return (0); 3263 } 3264 3265 /* 3266 * This is our only recourse to identify a C source file that is missing debug 3267 * info: it will be mentioned as an STT_FILE, but not have a compile unit entry. 3268 * (A traditional ctfmerge works on individual files, so can identify missing 3269 * DWARF more directly, via ctf_has_c_source() on the .o file.) 3270 * 3271 * As we operate on basenames, this can of course miss some cases, but it's 3272 * better than not checking at all. 3273 * 3274 * We explicitly whitelist some CRT components. Failing that, there's always 3275 * the -m option. 3276 */ 3277 static boolean_t 3278 c_source_has_debug(const char *file, ctf_cu_t *cus, size_t nr_cus) 3279 { 3280 const char *basename = strrchr(file, '/'); 3281 3282 if (basename == NULL) 3283 basename = file; 3284 else 3285 basename++; 3286 3287 if (strcmp(basename, "common-crt.c") == 0 || 3288 strcmp(basename, "gmon.c") == 0 || 3289 strcmp(basename, "dlink_init.c") == 0 || 3290 strcmp(basename, "dlink_common.c") == 0 || 3291 strcmp(basename, "ssp_ns.c") == 0 || 3292 strncmp(basename, "crt", strlen("crt")) == 0 || 3293 strncmp(basename, "values-", strlen("values-")) == 0) 3294 return (B_TRUE); 3295 3296 for (size_t i = 0; i < nr_cus; i++) { 3297 if (cus[i].cu_name != NULL && 3298 strcmp(basename, cus[i].cu_name) == 0) { 3299 return (B_TRUE); 3300 } 3301 } 3302 3303 return (B_FALSE); 3304 } 3305 3306 static int 3307 ctf_dwarf_check_missing(ctf_cu_t *cus, size_t nr_cus, Elf *elf, 3308 char *errmsg, size_t errlen) 3309 { 3310 Elf_Scn *scn, *strscn; 3311 Elf_Data *data, *strdata; 3312 GElf_Shdr shdr; 3313 ulong_t i; 3314 3315 scn = NULL; 3316 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3317 if (gelf_getshdr(scn, &shdr) == NULL) { 3318 (void) snprintf(errmsg, errlen, 3319 "failed to get section header: %s\n", 3320 elf_errmsg(elf_errno())); 3321 return (EINVAL); 3322 } 3323 3324 if (shdr.sh_type == SHT_SYMTAB) 3325 break; 3326 } 3327 3328 if (scn == NULL) 3329 return (0); 3330 3331 if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) { 3332 (void) snprintf(errmsg, errlen, 3333 "failed to get str section: %s\n", 3334 elf_errmsg(elf_errno())); 3335 return (EINVAL); 3336 } 3337 3338 if ((data = elf_getdata(scn, NULL)) == NULL) { 3339 (void) snprintf(errmsg, errlen, "failed to read section: %s\n", 3340 elf_errmsg(elf_errno())); 3341 return (EINVAL); 3342 } 3343 3344 if ((strdata = elf_getdata(strscn, NULL)) == NULL) { 3345 (void) snprintf(errmsg, errlen, 3346 "failed to read string table: %s\n", 3347 elf_errmsg(elf_errno())); 3348 return (EINVAL); 3349 } 3350 3351 for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) { 3352 GElf_Sym sym; 3353 const char *file; 3354 size_t len; 3355 3356 if (gelf_getsym(data, i, &sym) == NULL) { 3357 (void) snprintf(errmsg, errlen, 3358 "failed to read sym %lu: %s\n", 3359 i, elf_errmsg(elf_errno())); 3360 return (EINVAL); 3361 } 3362 3363 if (GELF_ST_TYPE(sym.st_info) != STT_FILE) 3364 continue; 3365 3366 file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name); 3367 len = strlen(file); 3368 if (len < 2 || strncmp(".c", &file[len - 2], 2) != 0) 3369 continue; 3370 3371 if (!c_source_has_debug(file, cus, nr_cus)) { 3372 (void) snprintf(errmsg, errlen, 3373 "file %s is missing debug info", file); 3374 return (ECTF_CONVNODEBUG); 3375 } 3376 } 3377 3378 return (0); 3379 } 3380 3381 static int 3382 ctf_dwarf_convert_batch(uint_t start, uint_t end, int fd, uint_t nthrs, 3383 workq_t *wqp, ctf_cu_t *cdies, ctf_file_t **fpp) 3384 { 3385 ctf_file_t *fpprev = NULL; 3386 uint_t i, added; 3387 ctf_cu_t *cup; 3388 int ret, err; 3389 3390 ctf_dprintf("Processing CU batch %u - %u\n", start, end - 1); 3391 3392 added = 0; 3393 for (i = start; i < end; i++) { 3394 cup = &cdies[i]; 3395 if (cup->cu_cu == NULL) 3396 continue; 3397 ctf_dprintf("adding cu %s: %p, %x %x\n", 3398 cup->cu_name != NULL ? cup->cu_name : "NULL", 3399 cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff); 3400 if (workq_add(wqp, cup) == -1) { 3401 err = errno; 3402 goto out; 3403 } 3404 added++; 3405 } 3406 3407 /* 3408 * No debug data found in this batch, move on to the next. 3409 * NB: ctf_dwarf_preinit_dies() has already checked that there is at 3410 * least one CU with debug data present. 3411 */ 3412 if (added == 0) { 3413 err = 0; 3414 goto out; 3415 } 3416 3417 ctf_dprintf("Running conversion phase\n"); 3418 3419 /* Run the conversions */ 3420 ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, &err); 3421 if (ret == WORKQ_ERROR) { 3422 err = errno; 3423 goto out; 3424 } else if (ret == WORKQ_UERROR) { 3425 ctf_dprintf("internal convert failed: %s\n", 3426 ctf_errmsg(err)); 3427 goto out; 3428 } 3429 3430 ctf_dprintf("starting merge phase\n"); 3431 3432 ctf_merge_t *cmp = ctf_merge_init(fd, &err); 3433 if (cmp == NULL) 3434 goto out; 3435 3436 if ((err = ctf_merge_set_nthreads(cmp, nthrs)) != 0) { 3437 ctf_merge_fini(cmp); 3438 goto out; 3439 } 3440 3441 /* 3442 * If we have the result of a previous merge then add it as an input to 3443 * the next one. 3444 */ 3445 if (*fpp != NULL) { 3446 ctf_dprintf("adding previous merge CU\n"); 3447 fpprev = *fpp; 3448 *fpp = NULL; 3449 if ((err = ctf_merge_add(cmp, fpprev)) != 0) { 3450 ctf_merge_fini(cmp); 3451 goto out; 3452 } 3453 } 3454 3455 ctf_dprintf("adding CUs to merge\n"); 3456 for (i = start; i < end; i++) { 3457 cup = &cdies[i]; 3458 if (cup->cu_cu == NULL) 3459 continue; 3460 if ((err = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) { 3461 ctf_merge_fini(cmp); 3462 *fpp = NULL; 3463 goto out; 3464 } 3465 } 3466 3467 ctf_dprintf("performing merge\n"); 3468 err = ctf_merge_merge(cmp, fpp); 3469 if (err != 0) { 3470 ctf_dprintf("failed merge!\n"); 3471 *fpp = NULL; 3472 ctf_merge_fini(cmp); 3473 goto out; 3474 } 3475 3476 ctf_merge_fini(cmp); 3477 3478 ctf_dprintf("freeing CUs\n"); 3479 for (i = start; i < end; i++) { 3480 cup = &cdies[i]; 3481 ctf_dprintf("freeing cu %d\n", i); 3482 ctf_dwarf_free_die(cup); 3483 } 3484 3485 out: 3486 ctf_close(fpprev); 3487 return (err); 3488 } 3489 3490 int 3491 ctf_dwarf_convert(int fd, Elf *elf, uint_t bsize, uint_t nthrs, uint_t flags, 3492 ctf_file_t **fpp, char *errbuf, size_t errlen) 3493 { 3494 int err, ret; 3495 uint_t ndies, i; 3496 Dwarf_Debug dw; 3497 Dwarf_Error derr; 3498 ctf_cu_t *cdies = NULL, *cup; 3499 workq_t *wqp = NULL; 3500 mutex_t dwlock = ERRORCHECKMUTEX; 3501 3502 *fpp = NULL; 3503 3504 ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr); 3505 if (ret != DW_DLV_OK) { 3506 if (ret == DW_DLV_NO_ENTRY || 3507 dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) { 3508 (void) snprintf(errbuf, errlen, 3509 "file does not contain DWARF data\n"); 3510 return (ECTF_CONVNODEBUG); 3511 } 3512 3513 (void) snprintf(errbuf, errlen, 3514 "dwarf_elf_init() failed: %s\n", dwarf_errmsg(derr)); 3515 return (ECTF_CONVBKERR); 3516 } 3517 3518 /* 3519 * Iterate over all of the compilation units and create a ctf_cu_t for 3520 * each of them. This is used to determine if we have zero, one, or 3521 * multiple dies to convert. If we have zero, that's an error. If 3522 * there's only one die, that's the simple case. No merge needed and 3523 * only a single Dwarf_Debug as well. 3524 */ 3525 ndies = 0; 3526 err = ctf_dwarf_count_dies(dw, &derr, &ndies, errbuf, errlen); 3527 3528 ctf_dprintf("found %d DWARF CUs\n", ndies); 3529 3530 if (ndies == 0) { 3531 (void) snprintf(errbuf, errlen, 3532 "file does not contain DWARF data\n"); 3533 (void) dwarf_finish(dw, &derr); 3534 return (ECTF_CONVNODEBUG); 3535 } 3536 3537 cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies); 3538 if (cdies == NULL) { 3539 (void) dwarf_finish(dw, &derr); 3540 return (ENOMEM); 3541 } 3542 3543 bzero(cdies, sizeof (ctf_cu_t) * ndies); 3544 3545 if ((err = ctf_dwarf_preinit_dies(fd, elf, dw, &dwlock, &derr, 3546 ndies, cdies, flags, errbuf, errlen)) != 0) { 3547 goto out; 3548 } 3549 3550 if (!(flags & CTF_ALLOW_MISSING_DEBUG) && 3551 (err = ctf_dwarf_check_missing(cdies, ndies, 3552 elf, errbuf, errlen)) != 0) { 3553 goto out; 3554 } 3555 3556 /* Only one cu, no merge required */ 3557 if (ndies == 1) { 3558 cup = cdies; 3559 3560 if ((err = ctf_dwarf_convert_one(cup, NULL)) != 0) 3561 goto out; 3562 3563 *fpp = cup->cu_ctfp; 3564 cup->cu_ctfp = NULL; 3565 ctf_dwarf_free_die(cup); 3566 goto success; 3567 } 3568 3569 /* 3570 * There's no need to have either more threads or a batch size larger 3571 * than the total number of dies, even if the user requested them. 3572 */ 3573 nthrs = min(ndies, nthrs); 3574 bsize = min(ndies, bsize); 3575 3576 if (workq_init(&wqp, nthrs) == -1) { 3577 err = errno; 3578 goto out; 3579 } 3580 3581 /* 3582 * In order to avoid exhausting memory limits when converting files 3583 * with a large number of dies, we process them in batches. 3584 */ 3585 for (i = 0; i < ndies; i += bsize) { 3586 err = ctf_dwarf_convert_batch(i, min(i + bsize, ndies), 3587 fd, nthrs, wqp, cdies, fpp); 3588 if (err != 0) { 3589 *fpp = NULL; 3590 goto out; 3591 } 3592 } 3593 3594 success: 3595 err = 0; 3596 ctf_dprintf("successfully converted!\n"); 3597 3598 out: 3599 (void) dwarf_finish(dw, &derr); 3600 workq_fini(wqp); 3601 ctf_free(cdies, sizeof (ctf_cu_t) * ndies); 3602 return (err); 3603 } 3604