1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019, Joyent, Inc. 14 */ 15 16 /* 17 * To perform a merge of two CTF containers, we first diff the two containers 18 * types. For every type that's in the src container, but not in the dst 19 * container, we note it and add it to dst container. If there are any objects 20 * or functions associated with src, we go through and update the types that 21 * they refer to such that they all refer to types in the dst container. 22 * 23 * The bulk of the logic for the merge, after we've run the diff, occurs in 24 * ctf_merge_common(). 25 * 26 * In terms of exported APIs, we don't really export a simple merge two 27 * containers, as the general way this is used, in something like ctfmerge(1), 28 * is to add all the containers and then let us figure out the best way to merge 29 * it. 30 */ 31 32 #include <libctf_impl.h> 33 #include <sys/debug.h> 34 #include <sys/list.h> 35 #include <stddef.h> 36 #include <fcntl.h> 37 #include <sys/types.h> 38 #include <sys/stat.h> 39 #include <mergeq.h> 40 #include <errno.h> 41 42 typedef struct ctf_merge_tinfo { 43 uint16_t cmt_map; /* Map to the type in out */ 44 boolean_t cmt_fixup; 45 boolean_t cmt_forward; 46 boolean_t cmt_missing; 47 } ctf_merge_tinfo_t; 48 49 /* 50 * State required for doing an individual merge of two containers. 51 */ 52 typedef struct ctf_merge_types { 53 ctf_file_t *cm_out; /* Output CTF file */ 54 ctf_file_t *cm_src; /* Input CTF file */ 55 ctf_merge_tinfo_t *cm_tmap; /* Type state information */ 56 boolean_t cm_dedup; /* Are we doing a dedup? */ 57 boolean_t cm_unique; /* are we doing a uniquify? */ 58 } ctf_merge_types_t; 59 60 typedef struct ctf_merge_objmap { 61 list_node_t cmo_node; 62 const char *cmo_name; /* Symbol name */ 63 const char *cmo_file; /* Symbol file */ 64 ulong_t cmo_idx; /* Symbol ID */ 65 Elf64_Sym cmo_sym; /* Symbol Entry */ 66 ctf_id_t cmo_tid; /* Type ID */ 67 } ctf_merge_objmap_t; 68 69 typedef struct ctf_merge_funcmap { 70 list_node_t cmf_node; 71 const char *cmf_name; /* Symbol name */ 72 const char *cmf_file; /* Symbol file */ 73 ulong_t cmf_idx; /* Symbol ID */ 74 Elf64_Sym cmf_sym; /* Symbol Entry */ 75 ctf_id_t cmf_rtid; /* Type ID */ 76 uint_t cmf_flags; /* ctf_funcinfo_t ctc_flags */ 77 uint_t cmf_argc; /* Number of arguments */ 78 ctf_id_t cmf_args[]; /* Types of arguments */ 79 } ctf_merge_funcmap_t; 80 81 typedef struct ctf_merge_input { 82 list_node_t cmi_node; 83 ctf_file_t *cmi_input; 84 list_t cmi_omap; 85 list_t cmi_fmap; 86 boolean_t cmi_created; 87 } ctf_merge_input_t; 88 89 struct ctf_merge_handle { 90 list_t cmh_inputs; /* Input list */ 91 uint_t cmh_ninputs; /* Number of inputs */ 92 uint_t cmh_nthreads; /* Number of threads to use */ 93 ctf_file_t *cmh_unique; /* ctf to uniquify against */ 94 boolean_t cmh_msyms; /* Should we merge symbols/funcs? */ 95 int cmh_ofd; /* FD for output file */ 96 int cmh_flags; /* Flags that control merge behavior */ 97 char *cmh_label; /* Optional label */ 98 char *cmh_pname; /* Parent name */ 99 }; 100 101 typedef struct ctf_merge_symbol_arg { 102 list_t *cmsa_objmap; 103 list_t *cmsa_funcmap; 104 ctf_file_t *cmsa_out; 105 boolean_t cmsa_dedup; 106 } ctf_merge_symbol_arg_t; 107 108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t); 109 110 static ctf_id_t 111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id) 112 { 113 if (cmp->cm_dedup == B_FALSE) { 114 VERIFY(cmp->cm_tmap[id].cmt_map != 0); 115 return (cmp->cm_tmap[id].cmt_map); 116 } 117 118 while (cmp->cm_tmap[id].cmt_missing == B_FALSE) { 119 VERIFY(cmp->cm_tmap[id].cmt_map != 0); 120 id = cmp->cm_tmap[id].cmt_map; 121 } 122 VERIFY(cmp->cm_tmap[id].cmt_map != 0); 123 return (cmp->cm_tmap[id].cmt_map); 124 } 125 126 static void 127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, 128 ctf_id_t oid, void *arg) 129 { 130 ctf_merge_types_t *cmp = arg; 131 ctf_merge_tinfo_t *cmt = cmp->cm_tmap; 132 133 if (same == B_TRUE) { 134 if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD && 135 ctf_type_kind(ofp, oid) != CTF_K_FORWARD) { 136 VERIFY(cmt[oid].cmt_map == 0); 137 138 /* 139 * If we're uniquifying types, it's possible for the 140 * container that we're uniquifying against to have a 141 * forward which exists in the container being reduced. 142 * For example, genunix has the machcpu structure as a 143 * forward which is actually in unix and we uniquify 144 * unix against genunix. In such cases, we explicitly do 145 * not do any mapping of the forward information, lest 146 * we risk losing the real definition. Instead, mark 147 * that it's missing. 148 */ 149 if (cmp->cm_unique == B_TRUE) { 150 cmt[oid].cmt_missing = B_TRUE; 151 return; 152 } 153 154 cmt[oid].cmt_map = iid; 155 cmt[oid].cmt_forward = B_TRUE; 156 ctf_dprintf("merge diff forward mapped %d->%d\n", oid, 157 iid); 158 return; 159 } 160 161 /* 162 * We could have multiple things that a given type ends up 163 * matching in the world of forwards and pointers to forwards. 164 * For now just take the first one... 165 */ 166 if (cmt[oid].cmt_map != 0) 167 return; 168 cmt[oid].cmt_map = iid; 169 ctf_dprintf("merge diff mapped %d->%d\n", oid, iid); 170 } else if (ifp == cmp->cm_src) { 171 VERIFY(cmt[iid].cmt_map == 0); 172 cmt[iid].cmt_missing = B_TRUE; 173 ctf_dprintf("merge diff said %d is missing\n", iid); 174 } 175 } 176 177 static int 178 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id) 179 { 180 int ret, flags; 181 const ctf_type_t *tp; 182 const char *name; 183 ctf_encoding_t en; 184 185 if (ctf_type_encoding(cmp->cm_src, id, &en) != 0) 186 return (CTF_ERR); 187 188 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 189 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 190 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 191 flags = CTF_ADD_ROOT; 192 else 193 flags = CTF_ADD_NONROOT; 194 195 ret = ctf_add_encoded(cmp->cm_out, flags, name, &en, 196 ctf_type_kind(cmp->cm_src, id)); 197 198 if (ret == CTF_ERR) 199 return (ret); 200 201 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 202 cmp->cm_tmap[id].cmt_map = ret; 203 return (0); 204 } 205 206 static int 207 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id) 208 { 209 int ret, flags; 210 const ctf_type_t *tp; 211 ctf_arinfo_t ar; 212 213 if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR) 214 return (CTF_ERR); 215 216 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 217 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 218 flags = CTF_ADD_ROOT; 219 else 220 flags = CTF_ADD_NONROOT; 221 222 if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) { 223 ret = ctf_merge_add_type(cmp, ar.ctr_contents); 224 if (ret != 0) 225 return (ret); 226 ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0); 227 } 228 ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents); 229 230 if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) { 231 ret = ctf_merge_add_type(cmp, ar.ctr_index); 232 if (ret != 0) 233 return (ret); 234 ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0); 235 } 236 ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index); 237 238 ret = ctf_add_array(cmp->cm_out, flags, &ar); 239 if (ret == CTF_ERR) 240 return (ret); 241 242 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 243 cmp->cm_tmap[id].cmt_map = ret; 244 245 return (0); 246 } 247 248 static int 249 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id) 250 { 251 int ret, flags; 252 const ctf_type_t *tp; 253 ctf_id_t reftype; 254 const char *name; 255 256 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 257 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 258 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 259 flags = CTF_ADD_ROOT; 260 else 261 flags = CTF_ADD_NONROOT; 262 263 reftype = ctf_type_reference(cmp->cm_src, id); 264 if (reftype == CTF_ERR) 265 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 266 267 if (cmp->cm_tmap[reftype].cmt_map == 0) { 268 ret = ctf_merge_add_type(cmp, reftype); 269 if (ret != 0) 270 return (ret); 271 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0); 272 } 273 reftype = ctf_merge_gettype(cmp, reftype); 274 275 ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype, 276 ctf_type_kind(cmp->cm_src, id)); 277 if (ret == CTF_ERR) 278 return (ret); 279 280 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 281 cmp->cm_tmap[id].cmt_map = ret; 282 return (0); 283 } 284 285 static int 286 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id) 287 { 288 int ret, flags; 289 const ctf_type_t *tp; 290 const char *name; 291 ctf_id_t reftype; 292 293 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 294 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 295 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 296 flags = CTF_ADD_ROOT; 297 else 298 flags = CTF_ADD_NONROOT; 299 300 reftype = ctf_type_reference(cmp->cm_src, id); 301 if (reftype == CTF_ERR) 302 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 303 304 if (cmp->cm_tmap[reftype].cmt_map == 0) { 305 ret = ctf_merge_add_type(cmp, reftype); 306 if (ret != 0) 307 return (ret); 308 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0); 309 } 310 reftype = ctf_merge_gettype(cmp, reftype); 311 312 ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype); 313 if (ret == CTF_ERR) 314 return (ret); 315 316 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 317 cmp->cm_tmap[id].cmt_map = ret; 318 return (0); 319 } 320 321 typedef struct ctf_merge_enum { 322 ctf_file_t *cme_fp; 323 ctf_id_t cme_id; 324 } ctf_merge_enum_t; 325 326 static int 327 ctf_merge_add_enumerator(const char *name, int value, void *arg) 328 { 329 ctf_merge_enum_t *cmep = arg; 330 331 return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) == 332 CTF_ERR); 333 } 334 335 static int 336 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id) 337 { 338 int flags; 339 const ctf_type_t *tp; 340 const char *name; 341 ctf_id_t enumid; 342 ctf_merge_enum_t cme; 343 344 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 345 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 346 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 347 flags = CTF_ADD_ROOT; 348 else 349 flags = CTF_ADD_NONROOT; 350 351 enumid = ctf_add_enum(cmp->cm_out, flags, name); 352 if (enumid == CTF_ERR) 353 return (enumid); 354 355 cme.cme_fp = cmp->cm_out; 356 cme.cme_id = enumid; 357 if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator, 358 &cme) != 0) 359 return (CTF_ERR); 360 361 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 362 cmp->cm_tmap[id].cmt_map = enumid; 363 return (0); 364 } 365 366 static int 367 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id) 368 { 369 int ret, flags, i; 370 const ctf_type_t *tp; 371 ctf_funcinfo_t ctc; 372 ctf_id_t *argv; 373 374 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 375 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 376 flags = CTF_ADD_ROOT; 377 else 378 flags = CTF_ADD_NONROOT; 379 380 if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR) 381 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 382 383 argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc); 384 if (argv == NULL) 385 return (ctf_set_errno(cmp->cm_out, ENOMEM)); 386 if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) == 387 CTF_ERR) { 388 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc); 389 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 390 } 391 392 if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) { 393 ret = ctf_merge_add_type(cmp, ctc.ctc_return); 394 if (ret != 0) 395 return (ret); 396 ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0); 397 } 398 ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return); 399 400 for (i = 0; i < ctc.ctc_argc; i++) { 401 if (cmp->cm_tmap[argv[i]].cmt_map == 0) { 402 ret = ctf_merge_add_type(cmp, argv[i]); 403 if (ret != 0) 404 return (ret); 405 ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0); 406 } 407 argv[i] = ctf_merge_gettype(cmp, argv[i]); 408 } 409 410 ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv); 411 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc); 412 if (ret == CTF_ERR) 413 return (ret); 414 415 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 416 cmp->cm_tmap[id].cmt_map = ret; 417 return (0); 418 } 419 420 static int 421 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id) 422 { 423 int ret, flags; 424 const ctf_type_t *tp; 425 const char *name; 426 427 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 428 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 429 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 430 flags = CTF_ADD_ROOT; 431 else 432 flags = CTF_ADD_NONROOT; 433 434 /* 435 * ctf_add_forward tries to check to see if a given forward already 436 * exists in one of its hash tables. If we're here then we know that we 437 * have a forward in a container that isn't present in another. 438 * Therefore, we choose a token hash table to satisfy the API choice 439 * here. 440 */ 441 ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT); 442 if (ret == CTF_ERR) 443 return (CTF_ERR); 444 445 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 446 cmp->cm_tmap[id].cmt_map = ret; 447 return (0); 448 } 449 450 typedef struct ctf_merge_su { 451 ctf_merge_types_t *cms_cm; 452 ctf_id_t cms_id; 453 } ctf_merge_su_t; 454 455 static int 456 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg) 457 { 458 ctf_merge_su_t *cms = arg; 459 460 VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0); 461 type = cms->cms_cm->cm_tmap[type].cmt_map; 462 463 ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id); 464 return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name, 465 type, offset) == CTF_ERR); 466 } 467 468 /* 469 * During the first pass, we always add the generic structure and union but none 470 * of its members as they might not all have been mapped yet. Instead we just 471 * mark all structures and unions as needing to be fixed up. 472 */ 473 static int 474 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward) 475 { 476 int flags, kind; 477 const ctf_type_t *tp; 478 const char *name; 479 ctf_id_t suid; 480 481 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 482 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 483 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 484 flags = CTF_ADD_ROOT; 485 else 486 flags = CTF_ADD_NONROOT; 487 kind = ctf_type_kind(cmp->cm_src, id); 488 489 if (kind == CTF_K_STRUCT) 490 suid = ctf_add_struct(cmp->cm_out, flags, name); 491 else 492 suid = ctf_add_union(cmp->cm_out, flags, name); 493 494 if (suid == CTF_ERR) 495 return (suid); 496 497 /* 498 * If this is a forward reference then its mapping should already 499 * exist. 500 */ 501 if (forward == B_FALSE) { 502 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 503 cmp->cm_tmap[id].cmt_map = suid; 504 ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id, 505 suid); 506 } else { 507 VERIFY(cmp->cm_tmap[id].cmt_map == suid); 508 } 509 cmp->cm_tmap[id].cmt_fixup = B_TRUE; 510 511 return (0); 512 } 513 514 static int 515 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id) 516 { 517 int kind, ret; 518 519 /* 520 * We may end up evaluating a type more than once as we may deal with it 521 * as we recursively evaluate some kind of reference and then we may see 522 * it normally. 523 */ 524 if (cmp->cm_tmap[id].cmt_map != 0) 525 return (0); 526 527 kind = ctf_type_kind(cmp->cm_src, id); 528 switch (kind) { 529 case CTF_K_INTEGER: 530 case CTF_K_FLOAT: 531 ret = ctf_merge_add_number(cmp, id); 532 break; 533 case CTF_K_ARRAY: 534 ret = ctf_merge_add_array(cmp, id); 535 break; 536 case CTF_K_POINTER: 537 case CTF_K_VOLATILE: 538 case CTF_K_CONST: 539 case CTF_K_RESTRICT: 540 ret = ctf_merge_add_reftype(cmp, id); 541 break; 542 case CTF_K_TYPEDEF: 543 ret = ctf_merge_add_typedef(cmp, id); 544 break; 545 case CTF_K_ENUM: 546 ret = ctf_merge_add_enum(cmp, id); 547 break; 548 case CTF_K_FUNCTION: 549 ret = ctf_merge_add_func(cmp, id); 550 break; 551 case CTF_K_FORWARD: 552 ret = ctf_merge_add_forward(cmp, id); 553 break; 554 case CTF_K_STRUCT: 555 case CTF_K_UNION: 556 ret = ctf_merge_add_sou(cmp, id, B_FALSE); 557 break; 558 case CTF_K_UNKNOWN: 559 /* 560 * We don't add unknown types, and we later assert that nothing 561 * should reference them. 562 */ 563 return (0); 564 default: 565 abort(); 566 } 567 568 return (ret); 569 } 570 571 static int 572 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id) 573 { 574 ctf_dtdef_t *dtd; 575 ctf_merge_su_t cms; 576 ctf_id_t mapid; 577 ssize_t size; 578 579 mapid = cmp->cm_tmap[id].cmt_map; 580 VERIFY(mapid != 0); 581 dtd = ctf_dtd_lookup(cmp->cm_out, mapid); 582 VERIFY(dtd != NULL); 583 584 ctf_dprintf("Trying to fix up sou %d\n", id); 585 cms.cms_cm = cmp; 586 cms.cms_id = mapid; 587 if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0) 588 return (CTF_ERR); 589 590 if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR) 591 return (CTF_ERR); 592 if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR) 593 return (CTF_ERR); 594 595 return (0); 596 } 597 598 static int 599 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id) 600 { 601 int kind, ret; 602 603 kind = ctf_type_kind(cmp->cm_src, id); 604 switch (kind) { 605 case CTF_K_STRUCT: 606 case CTF_K_UNION: 607 ret = ctf_merge_fixup_sou(cmp, id); 608 break; 609 default: 610 VERIFY(0); 611 ret = CTF_ERR; 612 } 613 614 return (ret); 615 } 616 617 /* 618 * Now that we've successfully merged everything, we're going to remap the type 619 * table. 620 * 621 * Remember we have two containers: ->cm_src is what we're working from, and 622 * ->cm_out is where we are building the de-duplicated CTF. 623 * 624 * The index of this table is always the type IDs in ->cm_src. 625 * 626 * When we built this table originally in ctf_diff_self(), if we found a novel 627 * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out. 628 * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates. 629 * 630 * Then, in ctf_merge_common(), we walked through and added all "cmt_missing" 631 * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map 632 * to be the *new* type ID in ->cm_out. In this function, you can read 633 * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated". 634 * 635 * So at this point, we need to mop up all types where .cmt_missing == B_FALSE, 636 * making sure *their* .cmt_map values also point to the ->cm_out container. 637 */ 638 static void 639 ctf_merge_dedup_remap(ctf_merge_types_t *cmp) 640 { 641 int i; 642 643 for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) { 644 ctf_id_t tid; 645 646 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) { 647 VERIFY(cmp->cm_tmap[i].cmt_map != 0); 648 continue; 649 } 650 651 tid = i; 652 while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) { 653 VERIFY(cmp->cm_tmap[tid].cmt_map != 0); 654 tid = cmp->cm_tmap[tid].cmt_map; 655 } 656 VERIFY(cmp->cm_tmap[tid].cmt_map != 0); 657 cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map; 658 } 659 } 660 661 662 /* 663 * We're going to do three passes over the containers. 664 * 665 * Pass 1 checks for forward references in the output container that we know 666 * exist in the source container. 667 * 668 * Pass 2 adds all the missing types from the source container. As part of this 669 * we may be adding a type as a forward reference that doesn't exist yet. 670 * Any types that we encounter in this form, we need to add to a third pass. 671 * 672 * Pass 3 is the fixup pass. Here we go through and find all the types that were 673 * missing in the first. 674 * 675 * Importantly, we *must* call ctf_update between the second and third pass, 676 * otherwise several of the libctf functions will not properly find the data in 677 * the container. If we're doing a dedup we also fix up the type mapping. 678 */ 679 static int 680 ctf_merge_common(ctf_merge_types_t *cmp) 681 { 682 int ret, i; 683 684 ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL); 685 ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL); 686 687 /* Pass 1 */ 688 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 689 if (cmp->cm_tmap[i].cmt_forward == B_TRUE) { 690 ret = ctf_merge_add_sou(cmp, i, B_TRUE); 691 if (ret != 0) { 692 return (ret); 693 } 694 } 695 } 696 697 /* Pass 2 */ 698 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 699 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) { 700 ret = ctf_merge_add_type(cmp, i); 701 if (ret != 0) { 702 ctf_dprintf("Failed to merge type %d\n", i); 703 return (ret); 704 } 705 } 706 } 707 708 ret = ctf_update(cmp->cm_out); 709 if (ret != 0) 710 return (ret); 711 712 if (cmp->cm_dedup == B_TRUE) { 713 ctf_merge_dedup_remap(cmp); 714 } 715 716 ctf_dprintf("Beginning merge pass 3\n"); 717 /* Pass 3 */ 718 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 719 if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) { 720 ret = ctf_merge_fixup_type(cmp, i); 721 if (ret != 0) 722 return (ret); 723 } 724 } 725 726 return (0); 727 } 728 729 /* 730 * Uniquification is slightly different from a stock merge. For starters, we 731 * don't need to replace any forward references in the output. In this case 732 * though, the types that already exist are in a parent container to the empty 733 * output container. 734 */ 735 static int 736 ctf_merge_uniquify_types(ctf_merge_types_t *cmp) 737 { 738 int i, ret; 739 740 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 741 if (cmp->cm_tmap[i].cmt_missing == B_FALSE) 742 continue; 743 ret = ctf_merge_add_type(cmp, i); 744 if (ret != 0) 745 return (ret); 746 } 747 748 ret = ctf_update(cmp->cm_out); 749 if (ret != 0) 750 return (ret); 751 752 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 753 if (cmp->cm_tmap[i].cmt_fixup == B_FALSE) 754 continue; 755 ret = ctf_merge_fixup_type(cmp, i); 756 if (ret != 0) 757 return (ret); 758 } 759 760 return (0); 761 } 762 763 static int 764 ctf_merge_types_init(ctf_merge_types_t *cmp) 765 { 766 cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) * 767 (cmp->cm_src->ctf_typemax + 1)); 768 if (cmp->cm_tmap == NULL) 769 return (ctf_set_errno(cmp->cm_out, ENOMEM)); 770 bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) * 771 (cmp->cm_src->ctf_typemax + 1)); 772 return (0); 773 } 774 775 static void 776 ctf_merge_types_fini(ctf_merge_types_t *cmp) 777 { 778 ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) * 779 (cmp->cm_src->ctf_typemax + 1)); 780 } 781 782 /* 783 * After performing a pass, we need to go through the object and function type 784 * maps and potentially fix them up based on the new maps that we have. 785 */ 786 static void 787 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi) 788 { 789 ctf_merge_objmap_t *cmo; 790 ctf_merge_funcmap_t *cmf; 791 792 for (cmo = list_head(&cmi->cmi_omap); cmo != NULL; 793 cmo = list_next(&cmi->cmi_omap, cmo)) { 794 VERIFY3S(cmo->cmo_tid, !=, 0); 795 VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0); 796 cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map; 797 } 798 799 for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL; 800 cmf = list_next(&cmi->cmi_fmap, cmf)) { 801 int i; 802 803 VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0); 804 cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map; 805 for (i = 0; i < cmf->cmf_argc; i++) { 806 VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0); 807 cmf->cmf_args[i] = 808 cmp->cm_tmap[cmf->cmf_args[i]].cmt_map; 809 } 810 } 811 } 812 813 /* 814 * Merge the types contained inside of two input files. The second input file is 815 * always going to be the destination. We're guaranteed that it's always 816 * writeable. 817 */ 818 static int 819 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued) 820 { 821 int ret; 822 ctf_merge_types_t cm; 823 ctf_diff_t *cdp; 824 ctf_merge_input_t *scmi = arg; 825 ctf_merge_input_t *dcmi = arg2; 826 ctf_file_t *out = dcmi->cmi_input; 827 ctf_file_t *source = scmi->cmi_input; 828 829 ctf_dprintf("merging %p->%p\n", source, out); 830 831 if (!(out->ctf_flags & LCTF_RDWR)) 832 return (ctf_set_errno(out, ECTF_RDONLY)); 833 834 if (ctf_getmodel(out) != ctf_getmodel(source)) 835 return (ctf_set_errno(out, ECTF_DMODEL)); 836 837 if ((ret = ctf_diff_init(out, source, &cdp)) != 0) 838 return (ret); 839 840 cm.cm_out = out; 841 cm.cm_src = source; 842 cm.cm_dedup = B_FALSE; 843 cm.cm_unique = B_FALSE; 844 ret = ctf_merge_types_init(&cm); 845 if (ret != 0) { 846 ctf_diff_fini(cdp); 847 return (ctf_set_errno(out, ret)); 848 } 849 850 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm); 851 if (ret != 0) 852 goto cleanup; 853 ret = ctf_merge_common(&cm); 854 ctf_dprintf("merge common returned with %d\n", ret); 855 if (ret == 0) { 856 ret = ctf_update(out); 857 ctf_dprintf("update returned with %d\n", ret); 858 } else { 859 goto cleanup; 860 } 861 862 /* 863 * Now we need to fix up the object and function maps. 864 */ 865 ctf_merge_fixup_symmaps(&cm, scmi); 866 867 /* 868 * Now that we've fixed things up, we need to give our function and 869 * object maps to the destination, such that it can continue to update 870 * them going forward. 871 */ 872 list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap); 873 list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap); 874 875 cleanup: 876 if (ret == 0) 877 *outp = dcmi; 878 ctf_merge_types_fini(&cm); 879 ctf_diff_fini(cdp); 880 if (ret != 0) 881 return (ctf_errno(out)); 882 ctf_phase_bump(); 883 return (0); 884 } 885 886 static int 887 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp) 888 { 889 int err, ret; 890 ctf_file_t *out; 891 ctf_merge_types_t cm; 892 ctf_diff_t *cdp; 893 ctf_merge_input_t *cmi; 894 ctf_file_t *parent = cmh->cmh_unique; 895 896 *outp = NULL; 897 out = ctf_fdcreate(cmh->cmh_ofd, &err); 898 if (out == NULL) 899 return (ctf_set_errno(src, err)); 900 901 out->ctf_parname = cmh->cmh_pname; 902 if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) { 903 (void) ctf_set_errno(src, ctf_errno(out)); 904 ctf_close(out); 905 return (CTF_ERR); 906 } 907 908 if (ctf_import(out, parent) != 0) { 909 (void) ctf_set_errno(src, ctf_errno(out)); 910 ctf_close(out); 911 return (CTF_ERR); 912 } 913 914 if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) { 915 ctf_close(out); 916 return (ctf_set_errno(src, ctf_errno(parent))); 917 } 918 919 cm.cm_out = parent; 920 cm.cm_src = src; 921 cm.cm_dedup = B_FALSE; 922 cm.cm_unique = B_TRUE; 923 ret = ctf_merge_types_init(&cm); 924 if (ret != 0) { 925 ctf_close(out); 926 ctf_diff_fini(cdp); 927 return (ctf_set_errno(src, ret)); 928 } 929 930 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm); 931 if (ret == 0) { 932 cm.cm_out = out; 933 ret = ctf_merge_uniquify_types(&cm); 934 if (ret == 0) 935 ret = ctf_update(out); 936 } 937 938 if (ret != 0) { 939 ctf_merge_types_fini(&cm); 940 ctf_diff_fini(cdp); 941 return (ctf_set_errno(src, ctf_errno(cm.cm_out))); 942 } 943 944 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; 945 cmi = list_next(&cmh->cmh_inputs, cmi)) { 946 ctf_merge_fixup_symmaps(&cm, cmi); 947 } 948 949 ctf_merge_types_fini(&cm); 950 ctf_diff_fini(cdp); 951 *outp = out; 952 return (0); 953 } 954 955 static void 956 ctf_merge_fini_input(ctf_merge_input_t *cmi) 957 { 958 ctf_merge_objmap_t *cmo; 959 ctf_merge_funcmap_t *cmf; 960 961 while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL) 962 ctf_free(cmo, sizeof (ctf_merge_objmap_t)); 963 964 while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL) 965 ctf_free(cmf, sizeof (ctf_merge_funcmap_t) + 966 sizeof (ctf_id_t) * cmf->cmf_argc); 967 968 if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL) 969 ctf_close(cmi->cmi_input); 970 971 ctf_free(cmi, sizeof (ctf_merge_input_t)); 972 } 973 974 void 975 ctf_merge_fini(ctf_merge_t *cmh) 976 { 977 size_t len; 978 ctf_merge_input_t *cmi; 979 980 if (cmh->cmh_label != NULL) { 981 len = strlen(cmh->cmh_label) + 1; 982 ctf_free(cmh->cmh_label, len); 983 } 984 985 if (cmh->cmh_pname != NULL) { 986 len = strlen(cmh->cmh_pname) + 1; 987 ctf_free(cmh->cmh_pname, len); 988 } 989 990 while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL) 991 ctf_merge_fini_input(cmi); 992 993 ctf_free(cmh, sizeof (ctf_merge_t)); 994 } 995 996 ctf_merge_t * 997 ctf_merge_init(int fd, int *errp) 998 { 999 int err; 1000 ctf_merge_t *out; 1001 struct stat st; 1002 1003 if (errp == NULL) 1004 errp = &err; 1005 1006 if (fd != -1 && fstat(fd, &st) != 0) { 1007 *errp = EINVAL; 1008 return (NULL); 1009 } 1010 1011 out = ctf_alloc(sizeof (ctf_merge_t)); 1012 if (out == NULL) { 1013 *errp = ENOMEM; 1014 return (NULL); 1015 } 1016 1017 if (fd == -1) { 1018 out->cmh_msyms = B_FALSE; 1019 } else { 1020 out->cmh_msyms = B_TRUE; 1021 } 1022 1023 list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t), 1024 offsetof(ctf_merge_input_t, cmi_node)); 1025 out->cmh_ninputs = 0; 1026 out->cmh_nthreads = 1; 1027 out->cmh_unique = NULL; 1028 out->cmh_ofd = fd; 1029 out->cmh_flags = 0; 1030 out->cmh_label = NULL; 1031 out->cmh_pname = NULL; 1032 1033 return (out); 1034 } 1035 1036 int 1037 ctf_merge_label(ctf_merge_t *cmh, const char *label) 1038 { 1039 char *dup; 1040 1041 if (label == NULL) 1042 return (EINVAL); 1043 1044 dup = ctf_strdup(label); 1045 if (dup == NULL) 1046 return (EAGAIN); 1047 1048 if (cmh->cmh_label != NULL) { 1049 size_t len = strlen(cmh->cmh_label) + 1; 1050 ctf_free(cmh->cmh_label, len); 1051 } 1052 1053 cmh->cmh_label = dup; 1054 return (0); 1055 } 1056 1057 static int 1058 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx, 1059 const char *file, const char *name, const Elf64_Sym *symp) 1060 { 1061 ctf_merge_funcmap_t *fmap; 1062 1063 fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) + 1064 sizeof (ctf_id_t) * fip->ctc_argc); 1065 if (fmap == NULL) 1066 return (ENOMEM); 1067 1068 fmap->cmf_idx = idx; 1069 fmap->cmf_sym = *symp; 1070 fmap->cmf_rtid = fip->ctc_return; 1071 fmap->cmf_flags = fip->ctc_flags; 1072 fmap->cmf_argc = fip->ctc_argc; 1073 fmap->cmf_name = name; 1074 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) { 1075 fmap->cmf_file = file; 1076 } else { 1077 fmap->cmf_file = NULL; 1078 } 1079 1080 if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc, 1081 fmap->cmf_args) != 0) { 1082 ctf_free(fmap, sizeof (ctf_merge_funcmap_t) + 1083 sizeof (ctf_id_t) * fip->ctc_argc); 1084 return (ctf_errno(cmi->cmi_input)); 1085 } 1086 1087 ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx, 1088 fmap->cmf_file != NULL ? fmap->cmf_file : "global", 1089 ELF64_ST_BIND(symp->st_info)); 1090 list_insert_tail(&cmi->cmi_fmap, fmap); 1091 return (0); 1092 } 1093 1094 static int 1095 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx, 1096 const char *file, const char *name, const Elf64_Sym *symp) 1097 { 1098 ctf_merge_objmap_t *cmo; 1099 1100 cmo = ctf_alloc(sizeof (ctf_merge_objmap_t)); 1101 if (cmo == NULL) 1102 return (ENOMEM); 1103 1104 cmo->cmo_name = name; 1105 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) { 1106 cmo->cmo_file = file; 1107 } else { 1108 cmo->cmo_file = NULL; 1109 } 1110 cmo->cmo_idx = idx; 1111 cmo->cmo_tid = id; 1112 cmo->cmo_sym = *symp; 1113 list_insert_tail(&cmi->cmi_omap, cmo); 1114 1115 ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id, 1116 cmo->cmo_file != NULL ? cmo->cmo_file : "global"); 1117 1118 return (0); 1119 } 1120 1121 static int 1122 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file, 1123 const char *name, boolean_t primary, void *arg) 1124 { 1125 ctf_merge_input_t *cmi = arg; 1126 ctf_file_t *fp = cmi->cmi_input; 1127 ushort_t *data, funcbase; 1128 uint_t type; 1129 ctf_funcinfo_t fi; 1130 1131 /* 1132 * See if there is type information for this. If there is no 1133 * type information for this entry or no translation, then we 1134 * will find the value zero. This indicates no type ID for 1135 * objects and encodes unknown information for functions. 1136 */ 1137 if (fp->ctf_sxlate[idx] == -1u) 1138 return (0); 1139 data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]); 1140 if (*data == 0) 1141 return (0); 1142 1143 type = ELF64_ST_TYPE(symp->st_info); 1144 1145 switch (type) { 1146 case STT_FUNC: 1147 funcbase = *data; 1148 if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION) 1149 return (0); 1150 data++; 1151 fi.ctc_return = *data; 1152 data++; 1153 fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase); 1154 fi.ctc_flags = 0; 1155 1156 if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) { 1157 fi.ctc_flags |= CTF_FUNC_VARARG; 1158 fi.ctc_argc--; 1159 } 1160 return (ctf_merge_add_function(cmi, &fi, idx, file, name, 1161 symp)); 1162 case STT_OBJECT: 1163 return (ctf_merge_add_object(cmi, *data, idx, file, name, 1164 symp)); 1165 default: 1166 return (0); 1167 } 1168 } 1169 1170 /* 1171 * Whenever we create an entry to merge, we then go and add a second empty 1172 * ctf_file_t which we use for the purposes of our merging. It's not the best, 1173 * but it's the best that we've got at the moment. 1174 */ 1175 int 1176 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input) 1177 { 1178 int ret; 1179 ctf_merge_input_t *cmi; 1180 ctf_file_t *empty; 1181 1182 ctf_dprintf("adding input %p\n", input); 1183 1184 if (input->ctf_flags & LCTF_CHILD) 1185 return (ECTF_MCHILD); 1186 1187 cmi = ctf_alloc(sizeof (ctf_merge_input_t)); 1188 if (cmi == NULL) 1189 return (ENOMEM); 1190 1191 cmi->cmi_created = B_FALSE; 1192 cmi->cmi_input = input; 1193 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t), 1194 offsetof(ctf_merge_funcmap_t, cmf_node)); 1195 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t), 1196 offsetof(ctf_merge_objmap_t, cmo_node)); 1197 1198 if (cmh->cmh_msyms == B_TRUE) { 1199 if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol, 1200 cmi)) != 0) { 1201 ctf_merge_fini_input(cmi); 1202 return (ret); 1203 } 1204 } 1205 1206 list_insert_tail(&cmh->cmh_inputs, cmi); 1207 cmh->cmh_ninputs++; 1208 1209 /* And now the empty one to merge into this */ 1210 cmi = ctf_alloc(sizeof (ctf_merge_input_t)); 1211 if (cmi == NULL) 1212 return (ENOMEM); 1213 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t), 1214 offsetof(ctf_merge_funcmap_t, cmf_node)); 1215 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t), 1216 offsetof(ctf_merge_objmap_t, cmo_node)); 1217 1218 empty = ctf_fdcreate(cmh->cmh_ofd, &ret); 1219 if (empty == NULL) 1220 return (ret); 1221 cmi->cmi_input = empty; 1222 cmi->cmi_created = B_TRUE; 1223 1224 if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) { 1225 return (ctf_errno(empty)); 1226 } 1227 1228 list_insert_tail(&cmh->cmh_inputs, cmi); 1229 cmh->cmh_ninputs++; 1230 ctf_dprintf("added containers %p and %p\n", input, empty); 1231 return (0); 1232 } 1233 1234 int 1235 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname) 1236 { 1237 char *dup; 1238 1239 if (u->ctf_flags & LCTF_CHILD) 1240 return (ECTF_MCHILD); 1241 if (pname == NULL) 1242 return (EINVAL); 1243 dup = ctf_strdup(pname); 1244 if (dup == NULL) 1245 return (EINVAL); 1246 if (cmh->cmh_pname != NULL) { 1247 size_t len = strlen(cmh->cmh_pname) + 1; 1248 ctf_free(cmh->cmh_pname, len); 1249 } 1250 cmh->cmh_pname = dup; 1251 cmh->cmh_unique = u; 1252 return (0); 1253 } 1254 1255 /* 1256 * Symbol matching rules: the purpose of this is to verify that the type 1257 * information that we have for a given symbol actually matches the output 1258 * symbol. This is unfortunately complicated by several different factors: 1259 * 1260 * 1. When merging multiple .o's into a single item, the symbol table index will 1261 * not match. 1262 * 1263 * 2. Visibility of a symbol may not be identical to the object file or the 1264 * DWARF information due to symbol reduction via a mapfile. 1265 * 1266 * As such, we have to employ the following rules: 1267 * 1268 * 1. A global symbol table entry always matches a global CTF symbol with the 1269 * same name. 1270 * 1271 * 2. A local symbol table entry always matches a local CTF symbol if they have 1272 * the same name and they belong to the same file. 1273 * 1274 * 3. A weak symbol matches a non-weak symbol. This happens if we find that the 1275 * types match, the values match, the sizes match, and the section indexes 1276 * match. This happens when we do a conversion in one pass, it almost never 1277 * happens when we're merging multiple object files. If we match a CTF global 1278 * symbol, that's a fixed match, otherwise it's a fuzzy match. 1279 * 1280 * 4. A local symbol table entry matches a global CTF entry if the 1281 * other pieces fail, but they have the same name. This is considered a fuzzy 1282 * match and is not used unless we have no other options. 1283 * 1284 * 5. A weak symbol table entry matches a weak CTF entry if the other pieces 1285 * fail, but they have the same name. This is considered a fuzzy match and is 1286 * not used unless we have no other options. When merging independent .o files, 1287 * this is often the only recourse we have to matching weak symbols. 1288 * 1289 * In the end, this would all be much simpler if we were able to do this as part 1290 * of libld which would be able to do all the symbol transformations. 1291 */ 1292 static boolean_t 1293 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name, 1294 const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name, 1295 const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy) 1296 { 1297 *is_fuzzy = B_FALSE; 1298 uint_t symtab_bind, ctf_bind; 1299 1300 symtab_bind = ELF64_ST_BIND(symtab_symp->st_info); 1301 ctf_bind = ELF64_ST_BIND(ctf_symp->st_info); 1302 1303 ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n", 1304 symtab_file, symtab_name, symtab_bind, 1305 ctf_file, ctf_name, ctf_bind); 1306 if (strcmp(ctf_name, symtab_name) != 0) { 1307 return (B_FALSE); 1308 } 1309 1310 if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) { 1311 return (B_TRUE); 1312 } else if (symtab_bind == STB_GLOBAL) { 1313 return (B_FALSE); 1314 } 1315 1316 if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind && 1317 ctf_file != NULL && symtab_file != NULL && 1318 strcmp(ctf_file, symtab_file) == 0) { 1319 return (B_TRUE); 1320 } 1321 1322 if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK && 1323 ELF64_ST_TYPE(symtab_symp->st_info) == 1324 ELF64_ST_TYPE(ctf_symp->st_info) && 1325 symtab_symp->st_value == ctf_symp->st_value && 1326 symtab_symp->st_size == ctf_symp->st_size && 1327 symtab_symp->st_shndx == ctf_symp->st_shndx) { 1328 if (ctf_bind == STB_GLOBAL) { 1329 return (B_TRUE); 1330 } 1331 1332 if (ctf_bind == STB_LOCAL && ctf_file != NULL && 1333 symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) { 1334 *is_fuzzy = B_TRUE; 1335 return (B_TRUE); 1336 } 1337 } 1338 1339 if (ctf_bind == STB_GLOBAL || 1340 (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) { 1341 *is_fuzzy = B_TRUE; 1342 return (B_TRUE); 1343 } 1344 1345 return (B_FALSE); 1346 } 1347 1348 /* 1349 * For each symbol, try and find a match. We will attempt to find an exact 1350 * match; however, we will settle for a fuzzy match in general. There is one 1351 * case where we will not opt to use a fuzzy match, which is when performing the 1352 * deduplication of a container. In such a case we are trying to reduce common 1353 * types and a fuzzy match would be inappropriate as if we're in the context of 1354 * a single container, the conversion process should have identified any exact 1355 * or fuzzy matches that were required. 1356 */ 1357 static int 1358 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file, 1359 const char *name, boolean_t primary, void *arg) 1360 { 1361 int err; 1362 uint_t type, bind; 1363 ctf_merge_symbol_arg_t *csa = arg; 1364 ctf_file_t *fp = csa->cmsa_out; 1365 1366 type = ELF64_ST_TYPE(symp->st_info); 1367 bind = ELF64_ST_BIND(symp->st_info); 1368 1369 ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name, 1370 ELF64_ST_BIND(symp->st_info)); 1371 1372 if (type == STT_OBJECT) { 1373 ctf_merge_objmap_t *cmo, *match = NULL; 1374 1375 for (cmo = list_head(csa->cmsa_objmap); cmo != NULL; 1376 cmo = list_next(csa->cmsa_objmap, cmo)) { 1377 boolean_t is_fuzzy = B_FALSE; 1378 if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name, 1379 &cmo->cmo_sym, file, name, symp, &is_fuzzy)) { 1380 if (is_fuzzy && csa->cmsa_dedup && 1381 bind != STB_WEAK) { 1382 continue; 1383 } 1384 match = cmo; 1385 if (is_fuzzy) { 1386 continue; 1387 } 1388 break; 1389 } 1390 } 1391 1392 if (match == NULL) { 1393 return (0); 1394 } 1395 1396 if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) { 1397 ctf_dprintf("Failed to add symbol %s->%d: %s\n", name, 1398 match->cmo_tid, ctf_errmsg(ctf_errno(fp))); 1399 return (ctf_errno(fp)); 1400 } 1401 ctf_dprintf("mapped object into output %s/%s->%ld\n", file, 1402 name, match->cmo_tid); 1403 } else { 1404 ctf_merge_funcmap_t *cmf, *match = NULL; 1405 ctf_funcinfo_t fi; 1406 1407 for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL; 1408 cmf = list_next(csa->cmsa_funcmap, cmf)) { 1409 boolean_t is_fuzzy = B_FALSE; 1410 if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name, 1411 &cmf->cmf_sym, file, name, symp, &is_fuzzy)) { 1412 if (is_fuzzy && csa->cmsa_dedup && 1413 bind != STB_WEAK) { 1414 continue; 1415 } 1416 match = cmf; 1417 if (is_fuzzy) { 1418 continue; 1419 } 1420 break; 1421 } 1422 } 1423 1424 if (match == NULL) { 1425 return (0); 1426 } 1427 1428 fi.ctc_return = match->cmf_rtid; 1429 fi.ctc_argc = match->cmf_argc; 1430 fi.ctc_flags = match->cmf_flags; 1431 if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) != 1432 0) { 1433 ctf_dprintf("Failed to add function %s: %s\n", name, 1434 ctf_errmsg(ctf_errno(fp))); 1435 return (ctf_errno(fp)); 1436 } 1437 ctf_dprintf("mapped function into output %s/%s\n", file, 1438 name); 1439 } 1440 1441 return (0); 1442 } 1443 1444 int 1445 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp) 1446 { 1447 int err, merr; 1448 ctf_merge_input_t *cmi; 1449 ctf_id_t ltype; 1450 mergeq_t *mqp; 1451 ctf_merge_input_t *final; 1452 ctf_file_t *out; 1453 1454 ctf_dprintf("Beginning ctf_merge_merge()\n"); 1455 if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) { 1456 const char *label = ctf_label_topmost(cmh->cmh_unique); 1457 if (label == NULL) 1458 return (ECTF_NOLABEL); 1459 if (strcmp(label, cmh->cmh_label) != 0) 1460 return (ECTF_LCONFLICT); 1461 } 1462 1463 if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) { 1464 return (errno); 1465 } 1466 1467 VERIFY(cmh->cmh_ninputs % 2 == 0); 1468 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; 1469 cmi = list_next(&cmh->cmh_inputs, cmi)) { 1470 if (mergeq_add(mqp, cmi) == -1) { 1471 err = errno; 1472 mergeq_fini(mqp); 1473 } 1474 } 1475 1476 err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr); 1477 mergeq_fini(mqp); 1478 1479 if (err == MERGEQ_ERROR) { 1480 return (errno); 1481 } else if (err == MERGEQ_UERROR) { 1482 return (merr); 1483 } 1484 1485 /* 1486 * Disassociate the generated ctf_file_t from the original input. That 1487 * way when the input gets cleaned up, we don't accidentally kill the 1488 * final reference to the ctf_file_t. If it gets uniquified then we'll 1489 * kill it. 1490 */ 1491 VERIFY(final->cmi_input != NULL); 1492 out = final->cmi_input; 1493 final->cmi_input = NULL; 1494 1495 ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique); 1496 if (cmh->cmh_unique != NULL) { 1497 ctf_file_t *u; 1498 err = ctf_uniquify_types(cmh, out, &u); 1499 if (err != 0) { 1500 err = ctf_errno(out); 1501 ctf_close(out); 1502 return (err); 1503 } 1504 ctf_close(out); 1505 out = u; 1506 } 1507 1508 ltype = out->ctf_typemax; 1509 if ((out->ctf_flags & LCTF_CHILD) && ltype != 0) 1510 ltype += CTF_CHILD_START; 1511 ctf_dprintf("trying to add the label\n"); 1512 if (cmh->cmh_label != NULL && 1513 ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) { 1514 ctf_close(out); 1515 return (ctf_errno(out)); 1516 } 1517 1518 ctf_dprintf("merging symbols and the like\n"); 1519 if (cmh->cmh_msyms == B_TRUE) { 1520 ctf_merge_symbol_arg_t arg; 1521 arg.cmsa_objmap = &final->cmi_omap; 1522 arg.cmsa_funcmap = &final->cmi_fmap; 1523 arg.cmsa_out = out; 1524 arg.cmsa_dedup = B_FALSE; 1525 err = ctf_symtab_iter(out, ctf_merge_symbols, &arg); 1526 if (err != 0) { 1527 ctf_close(out); 1528 return (err); 1529 } 1530 } 1531 1532 err = ctf_update(out); 1533 if (err != 0) { 1534 err = ctf_errno(out); 1535 ctf_close(out); 1536 return (err); 1537 } 1538 1539 *outp = out; 1540 return (0); 1541 } 1542 1543 /* 1544 * When we get told that something is unique, eg. same is B_FALSE, then that 1545 * tells us that we need to add it to the output. If same is B_TRUE, then we'll 1546 * want to record it in the mapping table so that we know how to redirect types 1547 * to the extant ones. 1548 */ 1549 static void 1550 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, 1551 ctf_id_t oid, void *arg) 1552 { 1553 ctf_merge_types_t *cmp = arg; 1554 ctf_merge_tinfo_t *cmt = cmp->cm_tmap; 1555 1556 if (same == B_TRUE) { 1557 /* 1558 * The output id here may itself map to something else. 1559 * Therefore, we need to basically walk a chain and see what it 1560 * points to until it itself points to a base type, eg. -1. 1561 * Otherwise we'll dedup to something which no longer exists. 1562 */ 1563 while (cmt[oid].cmt_missing == B_FALSE) 1564 oid = cmt[oid].cmt_map; 1565 cmt[iid].cmt_map = oid; 1566 ctf_dprintf("%d->%d \n", iid, oid); 1567 } else { 1568 VERIFY(cmt[iid].cmt_map == 0); 1569 cmt[iid].cmt_missing = B_TRUE; 1570 ctf_dprintf("%d is missing\n", iid); 1571 } 1572 } 1573 1574 /* 1575 * Dedup a CTF container. 1576 * 1577 * DWARF and other encoding formats that we use to create CTF data may create 1578 * multiple copies of a given type. However, after doing a conversion, and 1579 * before doing a merge, we'd prefer, if possible, to have every input container 1580 * to be unique. 1581 * 1582 * Doing a deduplication is like a normal merge. However, when we diff the types 1583 * in the container, rather than doing a normal diff, we instead want to diff 1584 * against any already processed types. eg, for a given type i in a container, 1585 * we want to diff it from 0 to i - 1. 1586 */ 1587 int 1588 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp) 1589 { 1590 int ret; 1591 ctf_diff_t *cdp = NULL; 1592 ctf_merge_input_t *cmi, *cmc; 1593 ctf_file_t *ifp, *ofp; 1594 ctf_merge_types_t cm; 1595 1596 if (cmp == NULL || outp == NULL) 1597 return (EINVAL); 1598 1599 ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs); 1600 if (cmp->cmh_ninputs != 2) 1601 return (EINVAL); 1602 1603 ctf_dprintf("passed argument sanity check\n"); 1604 1605 cmi = list_head(&cmp->cmh_inputs); 1606 VERIFY(cmi != NULL); 1607 cmc = list_next(&cmp->cmh_inputs, cmi); 1608 VERIFY(cmc != NULL); 1609 ifp = cmi->cmi_input; 1610 ofp = cmc->cmi_input; 1611 VERIFY(ifp != NULL); 1612 VERIFY(ofp != NULL); 1613 cm.cm_src = ifp; 1614 cm.cm_out = ofp; 1615 cm.cm_dedup = B_TRUE; 1616 cm.cm_unique = B_FALSE; 1617 1618 if ((ret = ctf_merge_types_init(&cm)) != 0) { 1619 return (ret); 1620 } 1621 1622 if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0) 1623 goto err; 1624 1625 ctf_dprintf("Successfully initialized dedup\n"); 1626 if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0) 1627 goto err; 1628 1629 ctf_dprintf("Successfully diffed types\n"); 1630 ret = ctf_merge_common(&cm); 1631 ctf_dprintf("deduping types result: %d\n", ret); 1632 if (ret == 0) 1633 ret = ctf_update(cm.cm_out); 1634 if (ret != 0) 1635 goto err; 1636 1637 ctf_dprintf("Successfully deduped types\n"); 1638 ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL); 1639 1640 /* 1641 * Now we need to fix up the object and function maps. 1642 */ 1643 ctf_merge_fixup_symmaps(&cm, cmi); 1644 1645 if (cmp->cmh_msyms == B_TRUE) { 1646 ctf_merge_symbol_arg_t arg; 1647 arg.cmsa_objmap = &cmi->cmi_omap; 1648 arg.cmsa_funcmap = &cmi->cmi_fmap; 1649 arg.cmsa_out = cm.cm_out; 1650 arg.cmsa_dedup = B_TRUE; 1651 ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg); 1652 if (ret != 0) { 1653 ctf_dprintf("failed to dedup symbols: %s\n", 1654 ctf_errmsg(ret)); 1655 goto err; 1656 } 1657 } 1658 1659 ret = ctf_update(cm.cm_out); 1660 if (ret == 0) { 1661 cmc->cmi_input = NULL; 1662 *outp = cm.cm_out; 1663 } 1664 ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL); 1665 err: 1666 ctf_merge_types_fini(&cm); 1667 ctf_diff_fini(cdp); 1668 return (ret); 1669 } 1670 1671 int 1672 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs) 1673 { 1674 if (nthrs == 0) 1675 return (EINVAL); 1676 cmp->cmh_nthreads = nthrs; 1677 return (0); 1678 } 1679