1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2020 Joyent, Inc. 14 */ 15 16 /* 17 * To perform a merge of two CTF containers, we first diff the two containers 18 * types. For every type that's in the src container, but not in the dst 19 * container, we note it and add it to dst container. If there are any objects 20 * or functions associated with src, we go through and update the types that 21 * they refer to such that they all refer to types in the dst container. 22 * 23 * The bulk of the logic for the merge, after we've run the diff, occurs in 24 * ctf_merge_common(). 25 * 26 * In terms of exported APIs, we don't really export a simple merge two 27 * containers, as the general way this is used, in something like ctfmerge(1), 28 * is to add all the containers and then let us figure out the best way to merge 29 * it. 30 */ 31 32 #include <libctf_impl.h> 33 #include <sys/debug.h> 34 #include <sys/list.h> 35 #include <stddef.h> 36 #include <fcntl.h> 37 #include <sys/types.h> 38 #include <sys/stat.h> 39 #include <mergeq.h> 40 #include <errno.h> 41 42 typedef struct ctf_merge_tinfo { 43 uint16_t cmt_map; /* Map to the type in out */ 44 boolean_t cmt_fixup; 45 boolean_t cmt_forward; 46 boolean_t cmt_missing; 47 } ctf_merge_tinfo_t; 48 49 /* 50 * State required for doing an individual merge of two containers. 51 */ 52 typedef struct ctf_merge_types { 53 ctf_file_t *cm_out; /* Output CTF file */ 54 ctf_file_t *cm_src; /* Input CTF file */ 55 ctf_merge_tinfo_t *cm_tmap; /* Type state information */ 56 boolean_t cm_dedup; /* Are we doing a dedup? */ 57 boolean_t cm_unique; /* are we doing a uniquify? */ 58 } ctf_merge_types_t; 59 60 typedef struct ctf_merge_objmap { 61 list_node_t cmo_node; 62 const char *cmo_name; /* Symbol name */ 63 const char *cmo_file; /* Symbol file */ 64 ulong_t cmo_idx; /* Symbol ID */ 65 Elf64_Sym cmo_sym; /* Symbol Entry */ 66 ctf_id_t cmo_tid; /* Type ID */ 67 } ctf_merge_objmap_t; 68 69 typedef struct ctf_merge_funcmap { 70 list_node_t cmf_node; 71 const char *cmf_name; /* Symbol name */ 72 const char *cmf_file; /* Symbol file */ 73 ulong_t cmf_idx; /* Symbol ID */ 74 Elf64_Sym cmf_sym; /* Symbol Entry */ 75 ctf_id_t cmf_rtid; /* Type ID */ 76 uint_t cmf_flags; /* ctf_funcinfo_t ctc_flags */ 77 uint_t cmf_argc; /* Number of arguments */ 78 ctf_id_t cmf_args[]; /* Types of arguments */ 79 } ctf_merge_funcmap_t; 80 81 typedef struct ctf_merge_input { 82 list_node_t cmi_node; 83 ctf_file_t *cmi_input; 84 list_t cmi_omap; 85 list_t cmi_fmap; 86 boolean_t cmi_created; 87 } ctf_merge_input_t; 88 89 struct ctf_merge_handle { 90 list_t cmh_inputs; /* Input list */ 91 uint_t cmh_ninputs; /* Number of inputs */ 92 uint_t cmh_nthreads; /* Number of threads to use */ 93 ctf_file_t *cmh_unique; /* ctf to uniquify against */ 94 boolean_t cmh_msyms; /* Should we merge symbols/funcs? */ 95 int cmh_ofd; /* FD for output file */ 96 int cmh_flags; /* Flags that control merge behavior */ 97 char *cmh_label; /* Optional label */ 98 char *cmh_pname; /* Parent name */ 99 }; 100 101 typedef struct ctf_merge_symbol_arg { 102 list_t *cmsa_objmap; 103 list_t *cmsa_funcmap; 104 ctf_file_t *cmsa_out; 105 boolean_t cmsa_dedup; 106 } ctf_merge_symbol_arg_t; 107 108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t); 109 110 static ctf_id_t 111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id) 112 { 113 if (cmp->cm_dedup == B_FALSE) { 114 VERIFY(cmp->cm_tmap[id].cmt_map != 0); 115 return (cmp->cm_tmap[id].cmt_map); 116 } 117 118 while (cmp->cm_tmap[id].cmt_missing == B_FALSE) { 119 VERIFY(cmp->cm_tmap[id].cmt_map != 0); 120 id = cmp->cm_tmap[id].cmt_map; 121 } 122 VERIFY(cmp->cm_tmap[id].cmt_map != 0); 123 return (cmp->cm_tmap[id].cmt_map); 124 } 125 126 static void 127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, 128 ctf_id_t oid, void *arg) 129 { 130 ctf_merge_types_t *cmp = arg; 131 ctf_merge_tinfo_t *cmt = cmp->cm_tmap; 132 133 if (same == B_TRUE) { 134 if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD && 135 ctf_type_kind(ofp, oid) != CTF_K_FORWARD) { 136 VERIFY(cmt[oid].cmt_map == 0); 137 138 /* 139 * If we're uniquifying types, it's possible for the 140 * container that we're uniquifying against to have a 141 * forward which exists in the container being reduced. 142 * For example, genunix has the machcpu structure as a 143 * forward which is actually in unix and we uniquify 144 * unix against genunix. In such cases, we explicitly do 145 * not do any mapping of the forward information, lest 146 * we risk losing the real definition. Instead, mark 147 * that it's missing. 148 */ 149 if (cmp->cm_unique == B_TRUE) { 150 cmt[oid].cmt_missing = B_TRUE; 151 return; 152 } 153 154 cmt[oid].cmt_map = iid; 155 cmt[oid].cmt_forward = B_TRUE; 156 ctf_dprintf("merge diff forward mapped %d->%d\n", oid, 157 iid); 158 return; 159 } 160 161 /* 162 * We could have multiple things that a given type ends up 163 * matching in the world of forwards and pointers to forwards. 164 * For now just take the first one... 165 */ 166 if (cmt[oid].cmt_map != 0) 167 return; 168 cmt[oid].cmt_map = iid; 169 ctf_dprintf("merge diff mapped %d->%d\n", oid, iid); 170 } else if (ifp == cmp->cm_src) { 171 VERIFY(cmt[iid].cmt_map == 0); 172 cmt[iid].cmt_missing = B_TRUE; 173 ctf_dprintf("merge diff said %d is missing\n", iid); 174 } 175 } 176 177 static int 178 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id) 179 { 180 int ret, flags; 181 const ctf_type_t *tp; 182 const char *name; 183 ctf_encoding_t en; 184 185 if (ctf_type_encoding(cmp->cm_src, id, &en) != 0) 186 return (CTF_ERR); 187 188 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 189 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 190 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 191 flags = CTF_ADD_ROOT; 192 else 193 flags = CTF_ADD_NONROOT; 194 195 ret = ctf_add_encoded(cmp->cm_out, flags, name, &en, 196 ctf_type_kind(cmp->cm_src, id)); 197 198 if (ret == CTF_ERR) 199 return (ret); 200 201 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 202 cmp->cm_tmap[id].cmt_map = ret; 203 return (0); 204 } 205 206 static int 207 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id) 208 { 209 int ret, flags; 210 const ctf_type_t *tp; 211 ctf_arinfo_t ar; 212 213 if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR) 214 return (CTF_ERR); 215 216 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 217 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 218 flags = CTF_ADD_ROOT; 219 else 220 flags = CTF_ADD_NONROOT; 221 222 if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) { 223 ret = ctf_merge_add_type(cmp, ar.ctr_contents); 224 if (ret != 0) 225 return (ret); 226 ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0); 227 } 228 ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents); 229 230 if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) { 231 ret = ctf_merge_add_type(cmp, ar.ctr_index); 232 if (ret != 0) 233 return (ret); 234 ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0); 235 } 236 ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index); 237 238 ret = ctf_add_array(cmp->cm_out, flags, &ar); 239 if (ret == CTF_ERR) 240 return (ret); 241 242 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 243 cmp->cm_tmap[id].cmt_map = ret; 244 245 return (0); 246 } 247 248 static int 249 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id) 250 { 251 int ret, flags; 252 const ctf_type_t *tp; 253 ctf_id_t reftype; 254 const char *name; 255 256 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 257 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 258 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 259 flags = CTF_ADD_ROOT; 260 else 261 flags = CTF_ADD_NONROOT; 262 263 reftype = ctf_type_reference(cmp->cm_src, id); 264 if (reftype == CTF_ERR) 265 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 266 267 if (cmp->cm_tmap[reftype].cmt_map == 0) { 268 ret = ctf_merge_add_type(cmp, reftype); 269 if (ret != 0) 270 return (ret); 271 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0); 272 } 273 reftype = ctf_merge_gettype(cmp, reftype); 274 275 ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype, 276 ctf_type_kind(cmp->cm_src, id)); 277 if (ret == CTF_ERR) 278 return (ret); 279 280 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 281 cmp->cm_tmap[id].cmt_map = ret; 282 return (0); 283 } 284 285 static int 286 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id) 287 { 288 int ret, flags; 289 const ctf_type_t *tp; 290 const char *name; 291 ctf_id_t reftype; 292 293 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 294 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 295 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 296 flags = CTF_ADD_ROOT; 297 else 298 flags = CTF_ADD_NONROOT; 299 300 reftype = ctf_type_reference(cmp->cm_src, id); 301 if (reftype == CTF_ERR) 302 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 303 304 if (cmp->cm_tmap[reftype].cmt_map == 0) { 305 ret = ctf_merge_add_type(cmp, reftype); 306 if (ret != 0) 307 return (ret); 308 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0); 309 } 310 reftype = ctf_merge_gettype(cmp, reftype); 311 312 ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype); 313 if (ret == CTF_ERR) 314 return (ret); 315 316 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 317 cmp->cm_tmap[id].cmt_map = ret; 318 return (0); 319 } 320 321 typedef struct ctf_merge_enum { 322 ctf_file_t *cme_fp; 323 ctf_id_t cme_id; 324 } ctf_merge_enum_t; 325 326 static int 327 ctf_merge_add_enumerator(const char *name, int value, void *arg) 328 { 329 ctf_merge_enum_t *cmep = arg; 330 331 return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) == 332 CTF_ERR); 333 } 334 335 static int 336 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id) 337 { 338 int flags; 339 const ctf_type_t *tp; 340 const char *name; 341 ctf_id_t enumid; 342 ctf_merge_enum_t cme; 343 size_t size; 344 345 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 346 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 347 flags = CTF_ADD_ROOT; 348 else 349 flags = CTF_ADD_NONROOT; 350 351 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 352 size = ctf_get_ctt_size(cmp->cm_src, tp, NULL, NULL); 353 354 enumid = ctf_add_enum(cmp->cm_out, flags, name, size); 355 if (enumid == CTF_ERR) 356 return (enumid); 357 358 cme.cme_fp = cmp->cm_out; 359 cme.cme_id = enumid; 360 if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator, 361 &cme) != 0) 362 return (CTF_ERR); 363 364 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 365 cmp->cm_tmap[id].cmt_map = enumid; 366 return (0); 367 } 368 369 static int 370 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id) 371 { 372 int ret, flags, i; 373 const ctf_type_t *tp; 374 ctf_funcinfo_t ctc; 375 ctf_id_t *argv; 376 377 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 378 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 379 flags = CTF_ADD_ROOT; 380 else 381 flags = CTF_ADD_NONROOT; 382 383 if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR) 384 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 385 386 argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc); 387 if (argv == NULL) 388 return (ctf_set_errno(cmp->cm_out, ENOMEM)); 389 if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) == 390 CTF_ERR) { 391 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc); 392 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src))); 393 } 394 395 if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) { 396 ret = ctf_merge_add_type(cmp, ctc.ctc_return); 397 if (ret != 0) 398 return (ret); 399 ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0); 400 } 401 ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return); 402 403 for (i = 0; i < ctc.ctc_argc; i++) { 404 if (cmp->cm_tmap[argv[i]].cmt_map == 0) { 405 ret = ctf_merge_add_type(cmp, argv[i]); 406 if (ret != 0) 407 return (ret); 408 ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0); 409 } 410 argv[i] = ctf_merge_gettype(cmp, argv[i]); 411 } 412 413 ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv); 414 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc); 415 if (ret == CTF_ERR) 416 return (ret); 417 418 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 419 cmp->cm_tmap[id].cmt_map = ret; 420 return (0); 421 } 422 423 static int 424 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id) 425 { 426 int ret, flags; 427 const ctf_type_t *tp; 428 const char *name; 429 430 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 431 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 432 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 433 flags = CTF_ADD_ROOT; 434 else 435 flags = CTF_ADD_NONROOT; 436 437 /* 438 * ctf_add_forward tries to check to see if a given forward already 439 * exists in one of its hash tables. If we're here then we know that we 440 * have a forward in a container that isn't present in another. 441 * Therefore, we choose a token hash table to satisfy the API choice 442 * here. 443 */ 444 ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT); 445 if (ret == CTF_ERR) 446 return (CTF_ERR); 447 448 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 449 cmp->cm_tmap[id].cmt_map = ret; 450 return (0); 451 } 452 453 typedef struct ctf_merge_su { 454 ctf_merge_types_t *cms_cm; 455 ctf_id_t cms_id; 456 } ctf_merge_su_t; 457 458 static int 459 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg) 460 { 461 ctf_merge_su_t *cms = arg; 462 463 VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0); 464 type = cms->cms_cm->cm_tmap[type].cmt_map; 465 466 ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id); 467 return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name, 468 type, offset) == CTF_ERR); 469 } 470 471 /* 472 * During the first pass, we always add the generic structure and union but none 473 * of its members as they might not all have been mapped yet. Instead we just 474 * mark all structures and unions as needing to be fixed up. 475 */ 476 static int 477 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward) 478 { 479 int flags, kind; 480 const ctf_type_t *tp; 481 const char *name; 482 ctf_id_t suid; 483 484 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id); 485 name = ctf_strraw(cmp->cm_src, tp->ctt_name); 486 if (CTF_INFO_ISROOT(tp->ctt_info) != 0) 487 flags = CTF_ADD_ROOT; 488 else 489 flags = CTF_ADD_NONROOT; 490 kind = ctf_type_kind(cmp->cm_src, id); 491 492 if (kind == CTF_K_STRUCT) 493 suid = ctf_add_struct(cmp->cm_out, flags, name); 494 else 495 suid = ctf_add_union(cmp->cm_out, flags, name); 496 497 if (suid == CTF_ERR) 498 return (suid); 499 500 /* 501 * If this is a forward reference then its mapping should already 502 * exist. 503 */ 504 if (forward == B_FALSE) { 505 VERIFY(cmp->cm_tmap[id].cmt_map == 0); 506 cmp->cm_tmap[id].cmt_map = suid; 507 ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id, 508 suid); 509 } else { 510 VERIFY(cmp->cm_tmap[id].cmt_map == suid); 511 } 512 cmp->cm_tmap[id].cmt_fixup = B_TRUE; 513 514 return (0); 515 } 516 517 static int 518 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id) 519 { 520 int kind, ret; 521 522 /* 523 * We may end up evaluating a type more than once as we may deal with it 524 * as we recursively evaluate some kind of reference and then we may see 525 * it normally. 526 */ 527 if (cmp->cm_tmap[id].cmt_map != 0) 528 return (0); 529 530 kind = ctf_type_kind(cmp->cm_src, id); 531 switch (kind) { 532 case CTF_K_INTEGER: 533 case CTF_K_FLOAT: 534 ret = ctf_merge_add_number(cmp, id); 535 break; 536 case CTF_K_ARRAY: 537 ret = ctf_merge_add_array(cmp, id); 538 break; 539 case CTF_K_POINTER: 540 case CTF_K_VOLATILE: 541 case CTF_K_CONST: 542 case CTF_K_RESTRICT: 543 ret = ctf_merge_add_reftype(cmp, id); 544 break; 545 case CTF_K_TYPEDEF: 546 ret = ctf_merge_add_typedef(cmp, id); 547 break; 548 case CTF_K_ENUM: 549 ret = ctf_merge_add_enum(cmp, id); 550 break; 551 case CTF_K_FUNCTION: 552 ret = ctf_merge_add_func(cmp, id); 553 break; 554 case CTF_K_FORWARD: 555 ret = ctf_merge_add_forward(cmp, id); 556 break; 557 case CTF_K_STRUCT: 558 case CTF_K_UNION: 559 ret = ctf_merge_add_sou(cmp, id, B_FALSE); 560 break; 561 case CTF_K_UNKNOWN: 562 /* 563 * We don't add unknown types, and we later assert that nothing 564 * should reference them. 565 */ 566 return (0); 567 default: 568 abort(); 569 } 570 571 return (ret); 572 } 573 574 static int 575 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id) 576 { 577 ctf_dtdef_t *dtd; 578 ctf_merge_su_t cms; 579 ctf_id_t mapid; 580 ssize_t size; 581 582 mapid = cmp->cm_tmap[id].cmt_map; 583 VERIFY(mapid != 0); 584 dtd = ctf_dtd_lookup(cmp->cm_out, mapid); 585 VERIFY(dtd != NULL); 586 587 ctf_dprintf("Trying to fix up sou %d\n", id); 588 cms.cms_cm = cmp; 589 cms.cms_id = mapid; 590 if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0) 591 return (CTF_ERR); 592 593 if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR) 594 return (CTF_ERR); 595 if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR) 596 return (CTF_ERR); 597 598 return (0); 599 } 600 601 static int 602 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id) 603 { 604 int kind, ret; 605 606 kind = ctf_type_kind(cmp->cm_src, id); 607 switch (kind) { 608 case CTF_K_STRUCT: 609 case CTF_K_UNION: 610 ret = ctf_merge_fixup_sou(cmp, id); 611 break; 612 default: 613 VERIFY(0); 614 ret = CTF_ERR; 615 } 616 617 return (ret); 618 } 619 620 /* 621 * Now that we've successfully merged everything, we're going to remap the type 622 * table. 623 * 624 * Remember we have two containers: ->cm_src is what we're working from, and 625 * ->cm_out is where we are building the de-duplicated CTF. 626 * 627 * The index of this table is always the type IDs in ->cm_src. 628 * 629 * When we built this table originally in ctf_diff_self(), if we found a novel 630 * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out. 631 * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates. 632 * 633 * Then, in ctf_merge_common(), we walked through and added all "cmt_missing" 634 * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map 635 * to be the *new* type ID in ->cm_out. In this function, you can read 636 * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated". 637 * 638 * So at this point, we need to mop up all types where .cmt_missing == B_FALSE, 639 * making sure *their* .cmt_map values also point to the ->cm_out container. 640 */ 641 static void 642 ctf_merge_dedup_remap(ctf_merge_types_t *cmp) 643 { 644 int i; 645 646 for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) { 647 ctf_id_t tid; 648 649 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) { 650 VERIFY(cmp->cm_tmap[i].cmt_map != 0); 651 continue; 652 } 653 654 tid = i; 655 while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) { 656 VERIFY(cmp->cm_tmap[tid].cmt_map != 0); 657 tid = cmp->cm_tmap[tid].cmt_map; 658 } 659 VERIFY(cmp->cm_tmap[tid].cmt_map != 0); 660 cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map; 661 } 662 } 663 664 665 /* 666 * We're going to do three passes over the containers. 667 * 668 * Pass 1 checks for forward references in the output container that we know 669 * exist in the source container. 670 * 671 * Pass 2 adds all the missing types from the source container. As part of this 672 * we may be adding a type as a forward reference that doesn't exist yet. 673 * Any types that we encounter in this form, we need to add to a third pass. 674 * 675 * Pass 3 is the fixup pass. Here we go through and find all the types that were 676 * missing in the first. 677 * 678 * Importantly, we *must* call ctf_update between the second and third pass, 679 * otherwise several of the libctf functions will not properly find the data in 680 * the container. If we're doing a dedup we also fix up the type mapping. 681 */ 682 static int 683 ctf_merge_common(ctf_merge_types_t *cmp) 684 { 685 int ret, i; 686 687 ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL); 688 ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL); 689 690 /* Pass 1 */ 691 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 692 if (cmp->cm_tmap[i].cmt_forward == B_TRUE) { 693 ret = ctf_merge_add_sou(cmp, i, B_TRUE); 694 if (ret != 0) { 695 return (ret); 696 } 697 } 698 } 699 700 /* Pass 2 */ 701 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 702 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) { 703 ret = ctf_merge_add_type(cmp, i); 704 if (ret != 0) { 705 ctf_dprintf("Failed to merge type %d\n", i); 706 return (ret); 707 } 708 } 709 } 710 711 ret = ctf_update(cmp->cm_out); 712 if (ret != 0) 713 return (ret); 714 715 if (cmp->cm_dedup == B_TRUE) { 716 ctf_merge_dedup_remap(cmp); 717 } 718 719 ctf_dprintf("Beginning merge pass 3\n"); 720 /* Pass 3 */ 721 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 722 if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) { 723 ret = ctf_merge_fixup_type(cmp, i); 724 if (ret != 0) 725 return (ret); 726 } 727 } 728 729 return (0); 730 } 731 732 /* 733 * Uniquification is slightly different from a stock merge. For starters, we 734 * don't need to replace any forward references in the output. In this case 735 * though, the types that already exist are in a parent container to the empty 736 * output container. 737 */ 738 static int 739 ctf_merge_uniquify_types(ctf_merge_types_t *cmp) 740 { 741 int i, ret; 742 743 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 744 if (cmp->cm_tmap[i].cmt_missing == B_FALSE) 745 continue; 746 ret = ctf_merge_add_type(cmp, i); 747 if (ret != 0) 748 return (ret); 749 } 750 751 ret = ctf_update(cmp->cm_out); 752 if (ret != 0) 753 return (ret); 754 755 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) { 756 if (cmp->cm_tmap[i].cmt_fixup == B_FALSE) 757 continue; 758 ret = ctf_merge_fixup_type(cmp, i); 759 if (ret != 0) 760 return (ret); 761 } 762 763 return (0); 764 } 765 766 static int 767 ctf_merge_types_init(ctf_merge_types_t *cmp) 768 { 769 cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) * 770 (cmp->cm_src->ctf_typemax + 1)); 771 if (cmp->cm_tmap == NULL) 772 return (ctf_set_errno(cmp->cm_out, ENOMEM)); 773 bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) * 774 (cmp->cm_src->ctf_typemax + 1)); 775 return (0); 776 } 777 778 static void 779 ctf_merge_types_fini(ctf_merge_types_t *cmp) 780 { 781 ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) * 782 (cmp->cm_src->ctf_typemax + 1)); 783 } 784 785 /* 786 * After performing a pass, we need to go through the object and function type 787 * maps and potentially fix them up based on the new maps that we have. 788 */ 789 static void 790 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi) 791 { 792 ctf_merge_objmap_t *cmo; 793 ctf_merge_funcmap_t *cmf; 794 795 for (cmo = list_head(&cmi->cmi_omap); cmo != NULL; 796 cmo = list_next(&cmi->cmi_omap, cmo)) { 797 VERIFY3S(cmo->cmo_tid, !=, 0); 798 VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0); 799 cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map; 800 } 801 802 for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL; 803 cmf = list_next(&cmi->cmi_fmap, cmf)) { 804 int i; 805 806 VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0); 807 cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map; 808 for (i = 0; i < cmf->cmf_argc; i++) { 809 VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0); 810 cmf->cmf_args[i] = 811 cmp->cm_tmap[cmf->cmf_args[i]].cmt_map; 812 } 813 } 814 } 815 816 /* 817 * Merge the types contained inside of two input files. The second input file is 818 * always going to be the destination. We're guaranteed that it's always 819 * writeable. 820 */ 821 static int 822 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued) 823 { 824 int ret; 825 ctf_merge_types_t cm; 826 ctf_diff_t *cdp; 827 ctf_merge_input_t *scmi = arg; 828 ctf_merge_input_t *dcmi = arg2; 829 ctf_file_t *out = dcmi->cmi_input; 830 ctf_file_t *source = scmi->cmi_input; 831 832 ctf_dprintf("merging %p->%p\n", source, out); 833 834 if (!(out->ctf_flags & LCTF_RDWR)) 835 return (ctf_set_errno(out, ECTF_RDONLY)); 836 837 if (ctf_getmodel(out) != ctf_getmodel(source)) 838 return (ctf_set_errno(out, ECTF_DMODEL)); 839 840 if ((ret = ctf_diff_init(out, source, &cdp)) != 0) 841 return (ret); 842 843 cm.cm_out = out; 844 cm.cm_src = source; 845 cm.cm_dedup = B_FALSE; 846 cm.cm_unique = B_FALSE; 847 ret = ctf_merge_types_init(&cm); 848 if (ret != 0) { 849 ctf_diff_fini(cdp); 850 return (ctf_set_errno(out, ret)); 851 } 852 853 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm); 854 if (ret != 0) 855 goto cleanup; 856 ret = ctf_merge_common(&cm); 857 ctf_dprintf("merge common returned with %d\n", ret); 858 if (ret == 0) { 859 ret = ctf_update(out); 860 ctf_dprintf("update returned with %d\n", ret); 861 } else { 862 goto cleanup; 863 } 864 865 /* 866 * Now we need to fix up the object and function maps. 867 */ 868 ctf_merge_fixup_symmaps(&cm, scmi); 869 870 /* 871 * Now that we've fixed things up, we need to give our function and 872 * object maps to the destination, such that it can continue to update 873 * them going forward. 874 */ 875 list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap); 876 list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap); 877 878 cleanup: 879 if (ret == 0) 880 *outp = dcmi; 881 ctf_merge_types_fini(&cm); 882 ctf_diff_fini(cdp); 883 if (ret != 0) 884 return (ctf_errno(out)); 885 ctf_phase_bump(); 886 return (0); 887 } 888 889 static int 890 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp) 891 { 892 int err, ret; 893 ctf_file_t *out; 894 ctf_merge_types_t cm; 895 ctf_diff_t *cdp; 896 ctf_merge_input_t *cmi; 897 ctf_file_t *parent = cmh->cmh_unique; 898 899 *outp = NULL; 900 out = ctf_fdcreate(cmh->cmh_ofd, &err); 901 if (out == NULL) 902 return (ctf_set_errno(src, err)); 903 904 out->ctf_parname = cmh->cmh_pname; 905 if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) { 906 (void) ctf_set_errno(src, ctf_errno(out)); 907 ctf_close(out); 908 return (CTF_ERR); 909 } 910 911 if (ctf_import(out, parent) != 0) { 912 (void) ctf_set_errno(src, ctf_errno(out)); 913 ctf_close(out); 914 return (CTF_ERR); 915 } 916 917 if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) { 918 ctf_close(out); 919 return (ctf_set_errno(src, ctf_errno(parent))); 920 } 921 922 cm.cm_out = parent; 923 cm.cm_src = src; 924 cm.cm_dedup = B_FALSE; 925 cm.cm_unique = B_TRUE; 926 ret = ctf_merge_types_init(&cm); 927 if (ret != 0) { 928 ctf_close(out); 929 ctf_diff_fini(cdp); 930 return (ctf_set_errno(src, ret)); 931 } 932 933 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm); 934 if (ret == 0) { 935 cm.cm_out = out; 936 ret = ctf_merge_uniquify_types(&cm); 937 if (ret == 0) 938 ret = ctf_update(out); 939 } 940 941 if (ret != 0) { 942 ctf_merge_types_fini(&cm); 943 ctf_diff_fini(cdp); 944 return (ctf_set_errno(src, ctf_errno(cm.cm_out))); 945 } 946 947 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; 948 cmi = list_next(&cmh->cmh_inputs, cmi)) { 949 ctf_merge_fixup_symmaps(&cm, cmi); 950 } 951 952 ctf_merge_types_fini(&cm); 953 ctf_diff_fini(cdp); 954 *outp = out; 955 return (0); 956 } 957 958 static void 959 ctf_merge_fini_input(ctf_merge_input_t *cmi) 960 { 961 ctf_merge_objmap_t *cmo; 962 ctf_merge_funcmap_t *cmf; 963 964 while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL) 965 ctf_free(cmo, sizeof (ctf_merge_objmap_t)); 966 967 while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL) 968 ctf_free(cmf, sizeof (ctf_merge_funcmap_t) + 969 sizeof (ctf_id_t) * cmf->cmf_argc); 970 971 if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL) 972 ctf_close(cmi->cmi_input); 973 974 ctf_free(cmi, sizeof (ctf_merge_input_t)); 975 } 976 977 void 978 ctf_merge_fini(ctf_merge_t *cmh) 979 { 980 size_t len; 981 ctf_merge_input_t *cmi; 982 983 if (cmh->cmh_label != NULL) { 984 len = strlen(cmh->cmh_label) + 1; 985 ctf_free(cmh->cmh_label, len); 986 } 987 988 if (cmh->cmh_pname != NULL) { 989 len = strlen(cmh->cmh_pname) + 1; 990 ctf_free(cmh->cmh_pname, len); 991 } 992 993 while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL) 994 ctf_merge_fini_input(cmi); 995 996 ctf_free(cmh, sizeof (ctf_merge_t)); 997 } 998 999 ctf_merge_t * 1000 ctf_merge_init(int fd, int *errp) 1001 { 1002 int err; 1003 ctf_merge_t *out; 1004 struct stat st; 1005 1006 if (errp == NULL) 1007 errp = &err; 1008 1009 if (fd != -1 && fstat(fd, &st) != 0) { 1010 *errp = EINVAL; 1011 return (NULL); 1012 } 1013 1014 out = ctf_alloc(sizeof (ctf_merge_t)); 1015 if (out == NULL) { 1016 *errp = ENOMEM; 1017 return (NULL); 1018 } 1019 1020 if (fd == -1) { 1021 out->cmh_msyms = B_FALSE; 1022 } else { 1023 out->cmh_msyms = B_TRUE; 1024 } 1025 1026 list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t), 1027 offsetof(ctf_merge_input_t, cmi_node)); 1028 out->cmh_ninputs = 0; 1029 out->cmh_nthreads = 1; 1030 out->cmh_unique = NULL; 1031 out->cmh_ofd = fd; 1032 out->cmh_flags = 0; 1033 out->cmh_label = NULL; 1034 out->cmh_pname = NULL; 1035 1036 return (out); 1037 } 1038 1039 int 1040 ctf_merge_label(ctf_merge_t *cmh, const char *label) 1041 { 1042 char *dup; 1043 1044 if (label == NULL) 1045 return (EINVAL); 1046 1047 dup = ctf_strdup(label); 1048 if (dup == NULL) 1049 return (EAGAIN); 1050 1051 if (cmh->cmh_label != NULL) { 1052 size_t len = strlen(cmh->cmh_label) + 1; 1053 ctf_free(cmh->cmh_label, len); 1054 } 1055 1056 cmh->cmh_label = dup; 1057 return (0); 1058 } 1059 1060 static int 1061 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx, 1062 const char *file, const char *name, const Elf64_Sym *symp) 1063 { 1064 ctf_merge_funcmap_t *fmap; 1065 1066 fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) + 1067 sizeof (ctf_id_t) * fip->ctc_argc); 1068 if (fmap == NULL) 1069 return (ENOMEM); 1070 1071 fmap->cmf_idx = idx; 1072 fmap->cmf_sym = *symp; 1073 fmap->cmf_rtid = fip->ctc_return; 1074 fmap->cmf_flags = fip->ctc_flags; 1075 fmap->cmf_argc = fip->ctc_argc; 1076 fmap->cmf_name = name; 1077 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) { 1078 fmap->cmf_file = file; 1079 } else { 1080 fmap->cmf_file = NULL; 1081 } 1082 1083 if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc, 1084 fmap->cmf_args) != 0) { 1085 ctf_free(fmap, sizeof (ctf_merge_funcmap_t) + 1086 sizeof (ctf_id_t) * fip->ctc_argc); 1087 return (ctf_errno(cmi->cmi_input)); 1088 } 1089 1090 ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx, 1091 fmap->cmf_file != NULL ? fmap->cmf_file : "global", 1092 ELF64_ST_BIND(symp->st_info)); 1093 list_insert_tail(&cmi->cmi_fmap, fmap); 1094 return (0); 1095 } 1096 1097 static int 1098 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx, 1099 const char *file, const char *name, const Elf64_Sym *symp) 1100 { 1101 ctf_merge_objmap_t *cmo; 1102 1103 cmo = ctf_alloc(sizeof (ctf_merge_objmap_t)); 1104 if (cmo == NULL) 1105 return (ENOMEM); 1106 1107 cmo->cmo_name = name; 1108 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) { 1109 cmo->cmo_file = file; 1110 } else { 1111 cmo->cmo_file = NULL; 1112 } 1113 cmo->cmo_idx = idx; 1114 cmo->cmo_tid = id; 1115 cmo->cmo_sym = *symp; 1116 list_insert_tail(&cmi->cmi_omap, cmo); 1117 1118 ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id, 1119 cmo->cmo_file != NULL ? cmo->cmo_file : "global"); 1120 1121 return (0); 1122 } 1123 1124 static int 1125 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file, 1126 const char *name, boolean_t primary, void *arg) 1127 { 1128 ctf_merge_input_t *cmi = arg; 1129 ctf_file_t *fp = cmi->cmi_input; 1130 ushort_t *data, funcbase; 1131 uint_t type; 1132 ctf_funcinfo_t fi; 1133 1134 /* 1135 * See if there is type information for this. If there is no 1136 * type information for this entry or no translation, then we 1137 * will find the value zero. This indicates no type ID for 1138 * objects and encodes unknown information for functions. 1139 */ 1140 if (fp->ctf_sxlate[idx] == -1u) 1141 return (0); 1142 data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]); 1143 if (*data == 0) 1144 return (0); 1145 1146 type = ELF64_ST_TYPE(symp->st_info); 1147 1148 switch (type) { 1149 case STT_FUNC: 1150 funcbase = *data; 1151 if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION) 1152 return (0); 1153 data++; 1154 fi.ctc_return = *data; 1155 data++; 1156 fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase); 1157 fi.ctc_flags = 0; 1158 1159 if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) { 1160 fi.ctc_flags |= CTF_FUNC_VARARG; 1161 fi.ctc_argc--; 1162 } 1163 return (ctf_merge_add_function(cmi, &fi, idx, file, name, 1164 symp)); 1165 case STT_OBJECT: 1166 return (ctf_merge_add_object(cmi, *data, idx, file, name, 1167 symp)); 1168 default: 1169 return (0); 1170 } 1171 } 1172 1173 /* 1174 * Whenever we create an entry to merge, we then go and add a second empty 1175 * ctf_file_t which we use for the purposes of our merging. It's not the best, 1176 * but it's the best that we've got at the moment. 1177 */ 1178 int 1179 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input) 1180 { 1181 int ret; 1182 ctf_merge_input_t *cmi; 1183 ctf_file_t *empty; 1184 1185 ctf_dprintf("adding input %p\n", input); 1186 1187 if (input->ctf_flags & LCTF_CHILD) 1188 return (ECTF_MCHILD); 1189 1190 cmi = ctf_alloc(sizeof (ctf_merge_input_t)); 1191 if (cmi == NULL) 1192 return (ENOMEM); 1193 1194 cmi->cmi_created = B_FALSE; 1195 cmi->cmi_input = input; 1196 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t), 1197 offsetof(ctf_merge_funcmap_t, cmf_node)); 1198 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t), 1199 offsetof(ctf_merge_objmap_t, cmo_node)); 1200 1201 if (cmh->cmh_msyms == B_TRUE) { 1202 if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol, 1203 cmi)) != 0) { 1204 ctf_merge_fini_input(cmi); 1205 return (ret); 1206 } 1207 } 1208 1209 list_insert_tail(&cmh->cmh_inputs, cmi); 1210 cmh->cmh_ninputs++; 1211 1212 /* And now the empty one to merge into this */ 1213 cmi = ctf_alloc(sizeof (ctf_merge_input_t)); 1214 if (cmi == NULL) 1215 return (ENOMEM); 1216 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t), 1217 offsetof(ctf_merge_funcmap_t, cmf_node)); 1218 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t), 1219 offsetof(ctf_merge_objmap_t, cmo_node)); 1220 1221 empty = ctf_fdcreate(cmh->cmh_ofd, &ret); 1222 if (empty == NULL) 1223 return (ret); 1224 cmi->cmi_input = empty; 1225 cmi->cmi_created = B_TRUE; 1226 1227 if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) { 1228 return (ctf_errno(empty)); 1229 } 1230 1231 list_insert_tail(&cmh->cmh_inputs, cmi); 1232 cmh->cmh_ninputs++; 1233 ctf_dprintf("added containers %p and %p\n", input, empty); 1234 return (0); 1235 } 1236 1237 int 1238 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname) 1239 { 1240 char *dup; 1241 1242 if (u->ctf_flags & LCTF_CHILD) 1243 return (ECTF_MCHILD); 1244 if (pname == NULL) 1245 return (EINVAL); 1246 dup = ctf_strdup(pname); 1247 if (dup == NULL) 1248 return (EINVAL); 1249 if (cmh->cmh_pname != NULL) { 1250 size_t len = strlen(cmh->cmh_pname) + 1; 1251 ctf_free(cmh->cmh_pname, len); 1252 } 1253 cmh->cmh_pname = dup; 1254 cmh->cmh_unique = u; 1255 return (0); 1256 } 1257 1258 /* 1259 * Symbol matching rules: the purpose of this is to verify that the type 1260 * information that we have for a given symbol actually matches the output 1261 * symbol. This is unfortunately complicated by several different factors: 1262 * 1263 * 1. When merging multiple .o's into a single item, the symbol table index will 1264 * not match. 1265 * 1266 * 2. Visibility of a symbol may not be identical to the object file or the 1267 * DWARF information due to symbol reduction via a mapfile. 1268 * 1269 * As such, we have to employ the following rules: 1270 * 1271 * 1. A global symbol table entry always matches a global CTF symbol with the 1272 * same name. 1273 * 1274 * 2. A local symbol table entry always matches a local CTF symbol if they have 1275 * the same name and they belong to the same file. 1276 * 1277 * 3. A weak symbol matches a non-weak symbol. This happens if we find that the 1278 * types match, the values match, the sizes match, and the section indexes 1279 * match. This happens when we do a conversion in one pass, it almost never 1280 * happens when we're merging multiple object files. If we match a CTF global 1281 * symbol, that's a fixed match, otherwise it's a fuzzy match. 1282 * 1283 * 4. A local symbol table entry matches a global CTF entry if the 1284 * other pieces fail, but they have the same name. This is considered a fuzzy 1285 * match and is not used unless we have no other options. 1286 * 1287 * 5. A weak symbol table entry matches a weak CTF entry if the other pieces 1288 * fail, but they have the same name. This is considered a fuzzy match and is 1289 * not used unless we have no other options. When merging independent .o files, 1290 * this is often the only recourse we have to matching weak symbols. 1291 * 1292 * In the end, this would all be much simpler if we were able to do this as part 1293 * of libld which would be able to do all the symbol transformations. 1294 */ 1295 static boolean_t 1296 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name, 1297 const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name, 1298 const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy) 1299 { 1300 *is_fuzzy = B_FALSE; 1301 uint_t symtab_bind, ctf_bind; 1302 1303 symtab_bind = ELF64_ST_BIND(symtab_symp->st_info); 1304 ctf_bind = ELF64_ST_BIND(ctf_symp->st_info); 1305 1306 ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n", 1307 symtab_file, symtab_name, symtab_bind, 1308 ctf_file, ctf_name, ctf_bind); 1309 if (strcmp(ctf_name, symtab_name) != 0) { 1310 return (B_FALSE); 1311 } 1312 1313 if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) { 1314 return (B_TRUE); 1315 } else if (symtab_bind == STB_GLOBAL) { 1316 return (B_FALSE); 1317 } 1318 1319 if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind && 1320 ctf_file != NULL && symtab_file != NULL && 1321 strcmp(ctf_file, symtab_file) == 0) { 1322 return (B_TRUE); 1323 } 1324 1325 if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK && 1326 ELF64_ST_TYPE(symtab_symp->st_info) == 1327 ELF64_ST_TYPE(ctf_symp->st_info) && 1328 symtab_symp->st_value == ctf_symp->st_value && 1329 symtab_symp->st_size == ctf_symp->st_size && 1330 symtab_symp->st_shndx == ctf_symp->st_shndx) { 1331 if (ctf_bind == STB_GLOBAL) { 1332 return (B_TRUE); 1333 } 1334 1335 if (ctf_bind == STB_LOCAL && ctf_file != NULL && 1336 symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) { 1337 *is_fuzzy = B_TRUE; 1338 return (B_TRUE); 1339 } 1340 } 1341 1342 if (ctf_bind == STB_GLOBAL || 1343 (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) { 1344 *is_fuzzy = B_TRUE; 1345 return (B_TRUE); 1346 } 1347 1348 return (B_FALSE); 1349 } 1350 1351 /* 1352 * For each symbol, try and find a match. We will attempt to find an exact 1353 * match; however, we will settle for a fuzzy match in general. There is one 1354 * case where we will not opt to use a fuzzy match, which is when performing the 1355 * deduplication of a container. In such a case we are trying to reduce common 1356 * types and a fuzzy match would be inappropriate as if we're in the context of 1357 * a single container, the conversion process should have identified any exact 1358 * or fuzzy matches that were required. 1359 */ 1360 static int 1361 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file, 1362 const char *name, boolean_t primary, void *arg) 1363 { 1364 int err; 1365 uint_t type, bind; 1366 ctf_merge_symbol_arg_t *csa = arg; 1367 ctf_file_t *fp = csa->cmsa_out; 1368 1369 type = ELF64_ST_TYPE(symp->st_info); 1370 bind = ELF64_ST_BIND(symp->st_info); 1371 1372 ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name, 1373 ELF64_ST_BIND(symp->st_info)); 1374 1375 if (type == STT_OBJECT) { 1376 ctf_merge_objmap_t *cmo, *match = NULL; 1377 1378 for (cmo = list_head(csa->cmsa_objmap); cmo != NULL; 1379 cmo = list_next(csa->cmsa_objmap, cmo)) { 1380 boolean_t is_fuzzy = B_FALSE; 1381 if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name, 1382 &cmo->cmo_sym, file, name, symp, &is_fuzzy)) { 1383 if (is_fuzzy && csa->cmsa_dedup && 1384 bind != STB_WEAK) { 1385 continue; 1386 } 1387 match = cmo; 1388 if (is_fuzzy) { 1389 continue; 1390 } 1391 break; 1392 } 1393 } 1394 1395 if (match == NULL) { 1396 return (0); 1397 } 1398 1399 if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) { 1400 ctf_dprintf("Failed to add symbol %s->%d: %s\n", name, 1401 match->cmo_tid, ctf_errmsg(ctf_errno(fp))); 1402 return (ctf_errno(fp)); 1403 } 1404 ctf_dprintf("mapped object into output %s/%s->%ld\n", file, 1405 name, match->cmo_tid); 1406 } else { 1407 ctf_merge_funcmap_t *cmf, *match = NULL; 1408 ctf_funcinfo_t fi; 1409 1410 for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL; 1411 cmf = list_next(csa->cmsa_funcmap, cmf)) { 1412 boolean_t is_fuzzy = B_FALSE; 1413 if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name, 1414 &cmf->cmf_sym, file, name, symp, &is_fuzzy)) { 1415 if (is_fuzzy && csa->cmsa_dedup && 1416 bind != STB_WEAK) { 1417 continue; 1418 } 1419 match = cmf; 1420 if (is_fuzzy) { 1421 continue; 1422 } 1423 break; 1424 } 1425 } 1426 1427 if (match == NULL) { 1428 return (0); 1429 } 1430 1431 fi.ctc_return = match->cmf_rtid; 1432 fi.ctc_argc = match->cmf_argc; 1433 fi.ctc_flags = match->cmf_flags; 1434 if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) != 1435 0) { 1436 ctf_dprintf("Failed to add function %s: %s\n", name, 1437 ctf_errmsg(ctf_errno(fp))); 1438 return (ctf_errno(fp)); 1439 } 1440 ctf_dprintf("mapped function into output %s/%s\n", file, 1441 name); 1442 } 1443 1444 return (0); 1445 } 1446 1447 int 1448 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp) 1449 { 1450 int err, merr; 1451 ctf_merge_input_t *cmi; 1452 ctf_id_t ltype; 1453 mergeq_t *mqp; 1454 ctf_merge_input_t *final; 1455 ctf_file_t *out; 1456 1457 ctf_dprintf("Beginning ctf_merge_merge()\n"); 1458 if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) { 1459 const char *label = ctf_label_topmost(cmh->cmh_unique); 1460 if (label == NULL) 1461 return (ECTF_NOLABEL); 1462 if (strcmp(label, cmh->cmh_label) != 0) 1463 return (ECTF_LCONFLICT); 1464 } 1465 1466 if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) { 1467 return (errno); 1468 } 1469 1470 VERIFY(cmh->cmh_ninputs % 2 == 0); 1471 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL; 1472 cmi = list_next(&cmh->cmh_inputs, cmi)) { 1473 if (mergeq_add(mqp, cmi) == -1) { 1474 err = errno; 1475 mergeq_fini(mqp); 1476 } 1477 } 1478 1479 err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr); 1480 mergeq_fini(mqp); 1481 1482 if (err == MERGEQ_ERROR) { 1483 return (errno); 1484 } else if (err == MERGEQ_UERROR) { 1485 return (merr); 1486 } 1487 1488 /* 1489 * Disassociate the generated ctf_file_t from the original input. That 1490 * way when the input gets cleaned up, we don't accidentally kill the 1491 * final reference to the ctf_file_t. If it gets uniquified then we'll 1492 * kill it. 1493 */ 1494 VERIFY(final->cmi_input != NULL); 1495 out = final->cmi_input; 1496 final->cmi_input = NULL; 1497 1498 ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique); 1499 if (cmh->cmh_unique != NULL) { 1500 ctf_file_t *u; 1501 err = ctf_uniquify_types(cmh, out, &u); 1502 if (err != 0) { 1503 err = ctf_errno(out); 1504 ctf_close(out); 1505 return (err); 1506 } 1507 ctf_close(out); 1508 out = u; 1509 } 1510 1511 ltype = out->ctf_typemax; 1512 if ((out->ctf_flags & LCTF_CHILD) && ltype != 0) 1513 ltype += CTF_CHILD_START; 1514 ctf_dprintf("trying to add the label\n"); 1515 if (cmh->cmh_label != NULL && 1516 ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) { 1517 ctf_close(out); 1518 return (ctf_errno(out)); 1519 } 1520 1521 ctf_dprintf("merging symbols and the like\n"); 1522 if (cmh->cmh_msyms == B_TRUE) { 1523 ctf_merge_symbol_arg_t arg; 1524 arg.cmsa_objmap = &final->cmi_omap; 1525 arg.cmsa_funcmap = &final->cmi_fmap; 1526 arg.cmsa_out = out; 1527 arg.cmsa_dedup = B_FALSE; 1528 err = ctf_symtab_iter(out, ctf_merge_symbols, &arg); 1529 if (err != 0) { 1530 ctf_close(out); 1531 return (err); 1532 } 1533 } 1534 1535 err = ctf_update(out); 1536 if (err != 0) { 1537 err = ctf_errno(out); 1538 ctf_close(out); 1539 return (err); 1540 } 1541 1542 *outp = out; 1543 return (0); 1544 } 1545 1546 /* 1547 * When we get told that something is unique, eg. same is B_FALSE, then that 1548 * tells us that we need to add it to the output. If same is B_TRUE, then we'll 1549 * want to record it in the mapping table so that we know how to redirect types 1550 * to the extant ones. 1551 */ 1552 static void 1553 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp, 1554 ctf_id_t oid, void *arg) 1555 { 1556 ctf_merge_types_t *cmp = arg; 1557 ctf_merge_tinfo_t *cmt = cmp->cm_tmap; 1558 1559 if (same == B_TRUE) { 1560 /* 1561 * The output id here may itself map to something else. 1562 * Therefore, we need to basically walk a chain and see what it 1563 * points to until it itself points to a base type, eg. -1. 1564 * Otherwise we'll dedup to something which no longer exists. 1565 */ 1566 while (cmt[oid].cmt_missing == B_FALSE) 1567 oid = cmt[oid].cmt_map; 1568 cmt[iid].cmt_map = oid; 1569 ctf_dprintf("%d->%d \n", iid, oid); 1570 } else { 1571 VERIFY(cmt[iid].cmt_map == 0); 1572 cmt[iid].cmt_missing = B_TRUE; 1573 ctf_dprintf("%d is missing\n", iid); 1574 } 1575 } 1576 1577 /* 1578 * Dedup a CTF container. 1579 * 1580 * DWARF and other encoding formats that we use to create CTF data may create 1581 * multiple copies of a given type. However, after doing a conversion, and 1582 * before doing a merge, we'd prefer, if possible, to have every input container 1583 * to be unique. 1584 * 1585 * Doing a deduplication is like a normal merge. However, when we diff the types 1586 * in the container, rather than doing a normal diff, we instead want to diff 1587 * against any already processed types. eg, for a given type i in a container, 1588 * we want to diff it from 0 to i - 1. 1589 */ 1590 int 1591 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp) 1592 { 1593 int ret; 1594 ctf_diff_t *cdp = NULL; 1595 ctf_merge_input_t *cmi, *cmc; 1596 ctf_file_t *ifp, *ofp; 1597 ctf_merge_types_t cm; 1598 1599 if (cmp == NULL || outp == NULL) 1600 return (EINVAL); 1601 1602 ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs); 1603 if (cmp->cmh_ninputs != 2) 1604 return (EINVAL); 1605 1606 ctf_dprintf("passed argument sanity check\n"); 1607 1608 cmi = list_head(&cmp->cmh_inputs); 1609 VERIFY(cmi != NULL); 1610 cmc = list_next(&cmp->cmh_inputs, cmi); 1611 VERIFY(cmc != NULL); 1612 ifp = cmi->cmi_input; 1613 ofp = cmc->cmi_input; 1614 VERIFY(ifp != NULL); 1615 VERIFY(ofp != NULL); 1616 cm.cm_src = ifp; 1617 cm.cm_out = ofp; 1618 cm.cm_dedup = B_TRUE; 1619 cm.cm_unique = B_FALSE; 1620 1621 if ((ret = ctf_merge_types_init(&cm)) != 0) { 1622 return (ret); 1623 } 1624 1625 if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0) 1626 goto err; 1627 1628 ctf_dprintf("Successfully initialized dedup\n"); 1629 if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0) 1630 goto err; 1631 1632 ctf_dprintf("Successfully diffed types\n"); 1633 ret = ctf_merge_common(&cm); 1634 ctf_dprintf("deduping types result: %d\n", ret); 1635 if (ret == 0) 1636 ret = ctf_update(cm.cm_out); 1637 if (ret != 0) 1638 goto err; 1639 1640 ctf_dprintf("Successfully deduped types\n"); 1641 ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL); 1642 1643 /* 1644 * Now we need to fix up the object and function maps. 1645 */ 1646 ctf_merge_fixup_symmaps(&cm, cmi); 1647 1648 if (cmp->cmh_msyms == B_TRUE) { 1649 ctf_merge_symbol_arg_t arg; 1650 arg.cmsa_objmap = &cmi->cmi_omap; 1651 arg.cmsa_funcmap = &cmi->cmi_fmap; 1652 arg.cmsa_out = cm.cm_out; 1653 arg.cmsa_dedup = B_TRUE; 1654 ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg); 1655 if (ret != 0) { 1656 ctf_dprintf("failed to dedup symbols: %s\n", 1657 ctf_errmsg(ret)); 1658 goto err; 1659 } 1660 } 1661 1662 ret = ctf_update(cm.cm_out); 1663 if (ret == 0) { 1664 cmc->cmi_input = NULL; 1665 *outp = cm.cm_out; 1666 } 1667 ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL); 1668 err: 1669 ctf_merge_types_fini(&cm); 1670 ctf_diff_fini(cdp); 1671 return (ret); 1672 } 1673 1674 int 1675 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs) 1676 { 1677 if (nthrs == 0) 1678 return (EINVAL); 1679 cmp->cmh_nthreads = nthrs; 1680 return (0); 1681 } 1682