1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2020 Joyent, Inc.
14 */
15
16 /*
17 * To perform a merge of two CTF containers, we first diff the two containers
18 * types. For every type that's in the src container, but not in the dst
19 * container, we note it and add it to dst container. If there are any objects
20 * or functions associated with src, we go through and update the types that
21 * they refer to such that they all refer to types in the dst container.
22 *
23 * The bulk of the logic for the merge, after we've run the diff, occurs in
24 * ctf_merge_common().
25 *
26 * In terms of exported APIs, we don't really export a simple merge two
27 * containers, as the general way this is used, in something like ctfmerge(1),
28 * is to add all the containers and then let us figure out the best way to merge
29 * it.
30 */
31
32 #include <libctf_impl.h>
33 #include <sys/debug.h>
34 #include <sys/list.h>
35 #include <stddef.h>
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <mergeq.h>
40 #include <errno.h>
41
42 typedef struct ctf_merge_tinfo {
43 uint16_t cmt_map; /* Map to the type in out */
44 boolean_t cmt_fixup;
45 boolean_t cmt_forward;
46 boolean_t cmt_missing;
47 } ctf_merge_tinfo_t;
48
49 /*
50 * State required for doing an individual merge of two containers.
51 */
52 typedef struct ctf_merge_types {
53 ctf_file_t *cm_out; /* Output CTF file */
54 ctf_file_t *cm_src; /* Input CTF file */
55 ctf_merge_tinfo_t *cm_tmap; /* Type state information */
56 boolean_t cm_dedup; /* Are we doing a dedup? */
57 boolean_t cm_unique; /* are we doing a uniquify? */
58 } ctf_merge_types_t;
59
60 typedef struct ctf_merge_objmap {
61 list_node_t cmo_node;
62 const char *cmo_name; /* Symbol name */
63 const char *cmo_file; /* Symbol file */
64 ulong_t cmo_idx; /* Symbol ID */
65 Elf64_Sym cmo_sym; /* Symbol Entry */
66 ctf_id_t cmo_tid; /* Type ID */
67 } ctf_merge_objmap_t;
68
69 typedef struct ctf_merge_funcmap {
70 list_node_t cmf_node;
71 const char *cmf_name; /* Symbol name */
72 const char *cmf_file; /* Symbol file */
73 ulong_t cmf_idx; /* Symbol ID */
74 Elf64_Sym cmf_sym; /* Symbol Entry */
75 ctf_id_t cmf_rtid; /* Type ID */
76 uint_t cmf_flags; /* ctf_funcinfo_t ctc_flags */
77 uint_t cmf_argc; /* Number of arguments */
78 ctf_id_t cmf_args[]; /* Types of arguments */
79 } ctf_merge_funcmap_t;
80
81 typedef struct ctf_merge_input {
82 list_node_t cmi_node;
83 ctf_file_t *cmi_input;
84 list_t cmi_omap;
85 list_t cmi_fmap;
86 boolean_t cmi_created;
87 } ctf_merge_input_t;
88
89 struct ctf_merge_handle {
90 list_t cmh_inputs; /* Input list */
91 uint_t cmh_ninputs; /* Number of inputs */
92 uint_t cmh_nthreads; /* Number of threads to use */
93 ctf_file_t *cmh_unique; /* ctf to uniquify against */
94 boolean_t cmh_msyms; /* Should we merge symbols/funcs? */
95 int cmh_ofd; /* FD for output file */
96 int cmh_flags; /* Flags that control merge behavior */
97 char *cmh_label; /* Optional label */
98 char *cmh_pname; /* Parent name */
99 };
100
101 typedef struct ctf_merge_symbol_arg {
102 list_t *cmsa_objmap;
103 list_t *cmsa_funcmap;
104 ctf_file_t *cmsa_out;
105 boolean_t cmsa_dedup;
106 } ctf_merge_symbol_arg_t;
107
108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
109
110 static ctf_id_t
ctf_merge_gettype(ctf_merge_types_t * cmp,ctf_id_t id)111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
112 {
113 if (cmp->cm_dedup == B_FALSE) {
114 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
115 return (cmp->cm_tmap[id].cmt_map);
116 }
117
118 while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
119 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
120 id = cmp->cm_tmap[id].cmt_map;
121 }
122 VERIFY(cmp->cm_tmap[id].cmt_map != 0);
123 return (cmp->cm_tmap[id].cmt_map);
124 }
125
126 static void
ctf_merge_diffcb(ctf_file_t * ifp,ctf_id_t iid,boolean_t same,ctf_file_t * ofp,ctf_id_t oid,void * arg)127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
128 ctf_id_t oid, void *arg)
129 {
130 ctf_merge_types_t *cmp = arg;
131 ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
132 uint_t kind;
133
134 if (same == B_TRUE) {
135 if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
136 (kind = ctf_type_kind(ofp, oid)) != CTF_K_FORWARD) {
137 VERIFY(cmt[oid].cmt_map == 0);
138
139 /*
140 * If we're uniquifying types, it's possible for the
141 * container that we're uniquifying against to have a
142 * forward which exists in the container being reduced.
143 * For example, genunix has the machcpu structure as a
144 * forward which is actually in unix and we uniquify
145 * unix against genunix. In such cases, we explicitly do
146 * not do any mapping of the forward information, lest
147 * we risk losing the real definition. Instead, mark
148 * that it's missing.
149 */
150 if (cmp->cm_unique == B_TRUE) {
151 cmt[oid].cmt_missing = B_TRUE;
152 return;
153 }
154
155 cmt[oid].cmt_map = iid;
156 cmt[oid].cmt_forward = B_TRUE;
157 ctf_dprintf("merge diff forward mapped %ld->%ld (%u)\n",
158 oid, iid, kind);
159 return;
160 }
161
162 /*
163 * We could have multiple things that a given type ends up
164 * matching in the world of forwards and pointers to forwards.
165 * For now just take the first one...
166 */
167 if (cmt[oid].cmt_map != 0)
168 return;
169 cmt[oid].cmt_map = iid;
170 ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
171 } else if (ifp == cmp->cm_src) {
172 VERIFY(cmt[iid].cmt_map == 0);
173 cmt[iid].cmt_missing = B_TRUE;
174 ctf_dprintf("merge diff said %d is missing\n", iid);
175 }
176 }
177
178 static int
ctf_merge_add_number(ctf_merge_types_t * cmp,ctf_id_t id)179 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
180 {
181 int ret, flags;
182 const ctf_type_t *tp;
183 const char *name;
184 ctf_encoding_t en;
185
186 if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
187 return (CTF_ERR);
188
189 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
190 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
191 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
192 flags = CTF_ADD_ROOT;
193 else
194 flags = CTF_ADD_NONROOT;
195
196 ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
197 ctf_type_kind(cmp->cm_src, id));
198
199 if (ret == CTF_ERR)
200 return (ret);
201
202 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
203 cmp->cm_tmap[id].cmt_map = ret;
204 return (0);
205 }
206
207 static int
ctf_merge_add_array(ctf_merge_types_t * cmp,ctf_id_t id)208 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
209 {
210 int ret, flags;
211 const ctf_type_t *tp;
212 ctf_arinfo_t ar;
213
214 if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
215 return (CTF_ERR);
216
217 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
218 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
219 flags = CTF_ADD_ROOT;
220 else
221 flags = CTF_ADD_NONROOT;
222
223 if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
224 ret = ctf_merge_add_type(cmp, ar.ctr_contents);
225 if (ret != 0)
226 return (ret);
227 ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
228 }
229 ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
230
231 if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
232 ret = ctf_merge_add_type(cmp, ar.ctr_index);
233 if (ret != 0)
234 return (ret);
235 ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
236 }
237 ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
238
239 ret = ctf_add_array(cmp->cm_out, flags, &ar);
240 if (ret == CTF_ERR)
241 return (ret);
242
243 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
244 cmp->cm_tmap[id].cmt_map = ret;
245
246 return (0);
247 }
248
249 static int
ctf_merge_add_reftype(ctf_merge_types_t * cmp,ctf_id_t id)250 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
251 {
252 int ret, flags;
253 const ctf_type_t *tp;
254 ctf_id_t reftype;
255 const char *name;
256
257 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
258 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
259 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
260 flags = CTF_ADD_ROOT;
261 else
262 flags = CTF_ADD_NONROOT;
263
264 reftype = ctf_type_reference(cmp->cm_src, id);
265 if (reftype == CTF_ERR)
266 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
267
268 if (cmp->cm_tmap[reftype].cmt_map == 0) {
269 ret = ctf_merge_add_type(cmp, reftype);
270 if (ret != 0)
271 return (ret);
272 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
273 }
274 reftype = ctf_merge_gettype(cmp, reftype);
275
276 ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
277 ctf_type_kind(cmp->cm_src, id));
278 if (ret == CTF_ERR)
279 return (ret);
280
281 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
282 cmp->cm_tmap[id].cmt_map = ret;
283 return (0);
284 }
285
286 static int
ctf_merge_add_typedef(ctf_merge_types_t * cmp,ctf_id_t id)287 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
288 {
289 int ret, flags;
290 const ctf_type_t *tp;
291 const char *name;
292 ctf_id_t reftype;
293
294 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
295 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
296 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
297 flags = CTF_ADD_ROOT;
298 else
299 flags = CTF_ADD_NONROOT;
300
301 reftype = ctf_type_reference(cmp->cm_src, id);
302 if (reftype == CTF_ERR)
303 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
304
305 if (cmp->cm_tmap[reftype].cmt_map == 0) {
306 ret = ctf_merge_add_type(cmp, reftype);
307 if (ret != 0)
308 return (ret);
309 ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
310 }
311 reftype = ctf_merge_gettype(cmp, reftype);
312
313 ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
314 if (ret == CTF_ERR)
315 return (ret);
316
317 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
318 cmp->cm_tmap[id].cmt_map = ret;
319 return (0);
320 }
321
322 typedef struct ctf_merge_enum {
323 ctf_file_t *cme_fp;
324 ctf_id_t cme_id;
325 } ctf_merge_enum_t;
326
327 static int
ctf_merge_add_enumerator(const char * name,int value,void * arg)328 ctf_merge_add_enumerator(const char *name, int value, void *arg)
329 {
330 ctf_merge_enum_t *cmep = arg;
331
332 return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
333 CTF_ERR);
334 }
335
336 static int
ctf_merge_add_enum(ctf_merge_types_t * cmp,ctf_id_t id)337 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
338 {
339 int flags;
340 const ctf_type_t *tp;
341 const char *name;
342 ctf_id_t enumid;
343 ctf_merge_enum_t cme;
344 size_t size;
345
346 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
347 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
348 flags = CTF_ADD_ROOT;
349 else
350 flags = CTF_ADD_NONROOT;
351
352 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
353 size = ctf_get_ctt_size(cmp->cm_src, tp, NULL, NULL);
354
355 enumid = ctf_add_enum(cmp->cm_out, flags, name, size);
356 if (enumid == CTF_ERR)
357 return (enumid);
358
359 cme.cme_fp = cmp->cm_out;
360 cme.cme_id = enumid;
361 if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
362 &cme) != 0)
363 return (CTF_ERR);
364
365 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
366 cmp->cm_tmap[id].cmt_map = enumid;
367 return (0);
368 }
369
370 static int
ctf_merge_add_func(ctf_merge_types_t * cmp,ctf_id_t id)371 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
372 {
373 int ret, flags, i;
374 const ctf_type_t *tp;
375 ctf_funcinfo_t ctc;
376 ctf_id_t *argv;
377
378 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
379 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
380 flags = CTF_ADD_ROOT;
381 else
382 flags = CTF_ADD_NONROOT;
383
384 if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
385 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
386
387 argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
388 if (argv == NULL)
389 return (ctf_set_errno(cmp->cm_out, ENOMEM));
390 if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
391 CTF_ERR) {
392 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
393 return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
394 }
395
396 if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
397 ret = ctf_merge_add_type(cmp, ctc.ctc_return);
398 if (ret != 0)
399 return (ret);
400 ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
401 }
402 ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
403
404 for (i = 0; i < ctc.ctc_argc; i++) {
405 if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
406 ret = ctf_merge_add_type(cmp, argv[i]);
407 if (ret != 0)
408 return (ret);
409 ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
410 }
411 argv[i] = ctf_merge_gettype(cmp, argv[i]);
412 }
413
414 ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
415 ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
416 if (ret == CTF_ERR)
417 return (ret);
418
419 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
420 cmp->cm_tmap[id].cmt_map = ret;
421 return (0);
422 }
423
424 static int
ctf_merge_add_forward(ctf_merge_types_t * cmp,ctf_id_t id,uint_t kind)425 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id, uint_t kind)
426 {
427 int ret, flags;
428 const ctf_type_t *tp;
429 const char *name;
430
431 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
432 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
433 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
434 flags = CTF_ADD_ROOT;
435 else
436 flags = CTF_ADD_NONROOT;
437
438 ret = ctf_add_forward(cmp->cm_out, flags, name, kind);
439 if (ret == CTF_ERR)
440 return (CTF_ERR);
441
442 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
443 cmp->cm_tmap[id].cmt_map = ret;
444 return (0);
445 }
446
447 typedef struct ctf_merge_su {
448 ctf_merge_types_t *cms_cm;
449 ctf_id_t cms_id;
450 } ctf_merge_su_t;
451
452 static int
ctf_merge_add_member(const char * name,ctf_id_t type,ulong_t offset,void * arg)453 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
454 {
455 ctf_merge_su_t *cms = arg;
456
457 VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
458 type = cms->cms_cm->cm_tmap[type].cmt_map;
459
460 ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
461 return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
462 type, offset) == CTF_ERR);
463 }
464
465 /*
466 * During the first pass, we always add the generic structure and union but none
467 * of its members as they might not all have been mapped yet. Instead we just
468 * mark all structures and unions as needing to be fixed up.
469 */
470 static int
ctf_merge_add_sou(ctf_merge_types_t * cmp,ctf_id_t id,boolean_t forward)471 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
472 {
473 int flags, kind;
474 const ctf_type_t *tp;
475 const char *name;
476 ctf_id_t suid;
477
478 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
479 name = ctf_strraw(cmp->cm_src, tp->ctt_name);
480 if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
481 flags = CTF_ADD_ROOT;
482 else
483 flags = CTF_ADD_NONROOT;
484 kind = ctf_type_kind(cmp->cm_src, id);
485
486 if (kind == CTF_K_STRUCT)
487 suid = ctf_add_struct(cmp->cm_out, flags, name);
488 else
489 suid = ctf_add_union(cmp->cm_out, flags, name);
490
491 ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id, suid);
492
493 if (suid == CTF_ERR)
494 return (suid);
495
496 if (forward == B_FALSE) {
497 VERIFY(cmp->cm_tmap[id].cmt_map == 0);
498 cmp->cm_tmap[id].cmt_map = suid;
499 } else {
500 /*
501 * If this is a forward reference then its mapping should
502 * already exist.
503 */
504 if (cmp->cm_tmap[id].cmt_map != suid) {
505 ctf_dprintf(
506 "mismatch sou \"%s\" as (%d) %d->%d (exp %d)\n",
507 name, kind, id, suid, cmp->cm_tmap[id].cmt_map);
508 ctf_hash_dump("src structs",
509 &cmp->cm_src->ctf_structs, cmp->cm_src);
510 ctf_hash_dump("src unions",
511 &cmp->cm_src->ctf_unions, cmp->cm_src);
512 ctf_hash_dump("out structs",
513 &cmp->cm_out->ctf_structs, cmp->cm_out);
514 ctf_hash_dump("out unions",
515 &cmp->cm_out->ctf_unions, cmp->cm_out);
516 }
517 VERIFY(cmp->cm_tmap[id].cmt_map == suid);
518 }
519 cmp->cm_tmap[id].cmt_fixup = B_TRUE;
520
521 return (0);
522 }
523
524 static int
ctf_merge_add_type(ctf_merge_types_t * cmp,ctf_id_t id)525 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
526 {
527 int kind, ret;
528
529 /*
530 * We may end up evaluating a type more than once as we may deal with it
531 * as we recursively evaluate some kind of reference and then we may see
532 * it normally.
533 */
534 if (cmp->cm_tmap[id].cmt_map != 0)
535 return (0);
536
537 kind = ctf_type_kind(cmp->cm_src, id);
538 switch (kind) {
539 case CTF_K_INTEGER:
540 case CTF_K_FLOAT:
541 ret = ctf_merge_add_number(cmp, id);
542 break;
543 case CTF_K_ARRAY:
544 ret = ctf_merge_add_array(cmp, id);
545 break;
546 case CTF_K_POINTER:
547 case CTF_K_VOLATILE:
548 case CTF_K_CONST:
549 case CTF_K_RESTRICT:
550 ret = ctf_merge_add_reftype(cmp, id);
551 break;
552 case CTF_K_TYPEDEF:
553 ret = ctf_merge_add_typedef(cmp, id);
554 break;
555 case CTF_K_ENUM:
556 ret = ctf_merge_add_enum(cmp, id);
557 break;
558 case CTF_K_FUNCTION:
559 ret = ctf_merge_add_func(cmp, id);
560 break;
561 case CTF_K_FORWARD: {
562 const ctf_type_t *tp;
563 uint_t kind;
564
565 tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
566
567 /*
568 * For forward declarations, ctt_type is the CTF_K_*
569 * kind for the tag. Older versions of the CTF tools may
570 * not have filled this in so if ctt_type is unknown or
571 * invalid, treat it as a struct. This mirrors the logic in
572 * ctf_bufopen().
573 */
574
575 kind = tp->ctt_type;
576 if (kind == CTF_K_UNKNOWN || kind >= CTF_K_MAX)
577 kind = CTF_K_STRUCT;
578
579 ret = ctf_merge_add_forward(cmp, id, kind);
580 break;
581 }
582 case CTF_K_STRUCT:
583 case CTF_K_UNION:
584 ret = ctf_merge_add_sou(cmp, id, B_FALSE);
585 break;
586 case CTF_K_UNKNOWN:
587 /*
588 * We don't add unknown types, and we later assert that nothing
589 * should reference them.
590 */
591 return (0);
592 default:
593 abort();
594 }
595
596 return (ret);
597 }
598
599 static int
ctf_merge_fixup_sou(ctf_merge_types_t * cmp,ctf_id_t id)600 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
601 {
602 ctf_dtdef_t *dtd;
603 ctf_merge_su_t cms;
604 ctf_id_t mapid;
605 ssize_t size;
606
607 mapid = cmp->cm_tmap[id].cmt_map;
608 VERIFY(mapid != 0);
609 dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
610 VERIFY(dtd != NULL);
611
612 ctf_dprintf("Trying to fix up sou %d\n", id);
613 cms.cms_cm = cmp;
614 cms.cms_id = mapid;
615 if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
616 return (CTF_ERR);
617
618 if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
619 return (CTF_ERR);
620 if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
621 return (CTF_ERR);
622
623 return (0);
624 }
625
626 static int
ctf_merge_fixup_type(ctf_merge_types_t * cmp,ctf_id_t id)627 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
628 {
629 int kind, ret;
630
631 kind = ctf_type_kind(cmp->cm_src, id);
632 switch (kind) {
633 case CTF_K_STRUCT:
634 case CTF_K_UNION:
635 ret = ctf_merge_fixup_sou(cmp, id);
636 break;
637 default:
638 VERIFY(0);
639 ret = CTF_ERR;
640 }
641
642 return (ret);
643 }
644
645 /*
646 * Now that we've successfully merged everything, we're going to remap the type
647 * table.
648 *
649 * Remember we have two containers: ->cm_src is what we're working from, and
650 * ->cm_out is where we are building the de-duplicated CTF.
651 *
652 * The index of this table is always the type IDs in ->cm_src.
653 *
654 * When we built this table originally in ctf_diff_self(), if we found a novel
655 * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out.
656 * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates.
657 *
658 * Then, in ctf_merge_common(), we walked through and added all "cmt_missing"
659 * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map
660 * to be the *new* type ID in ->cm_out. In this function, you can read
661 * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated".
662 *
663 * So at this point, we need to mop up all types where .cmt_missing == B_FALSE,
664 * making sure *their* .cmt_map values also point to the ->cm_out container.
665 */
666 static void
ctf_merge_dedup_remap(ctf_merge_types_t * cmp)667 ctf_merge_dedup_remap(ctf_merge_types_t *cmp)
668 {
669 int i;
670
671 for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
672 ctf_id_t tid;
673
674 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
675 VERIFY(cmp->cm_tmap[i].cmt_map != 0);
676 continue;
677 }
678
679 tid = i;
680 while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
681 VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
682 tid = cmp->cm_tmap[tid].cmt_map;
683 }
684 VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
685 cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
686 }
687 }
688
689
690 /*
691 * We're going to do three passes over the containers.
692 *
693 * Pass 1 checks for forward references in the output container that we know
694 * exist in the source container.
695 *
696 * Pass 2 adds all the missing types from the source container. As part of this
697 * we may be adding a type as a forward reference that doesn't exist yet.
698 * Any types that we encounter in this form, we need to add to a third pass.
699 *
700 * Pass 3 is the fixup pass. Here we go through and find all the types that were
701 * missing in the first.
702 *
703 * Importantly, we *must* call ctf_update between the second and third pass,
704 * otherwise several of the libctf functions will not properly find the data in
705 * the container. If we're doing a dedup we also fix up the type mapping.
706 */
707 static int
ctf_merge_common(ctf_merge_types_t * cmp)708 ctf_merge_common(ctf_merge_types_t *cmp)
709 {
710 int ret, i;
711
712 ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
713 ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
714
715 /* Pass 1 */
716 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
717 if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
718 ctf_dprintf("Forward %d\n", i);
719 ret = ctf_merge_add_sou(cmp, i, B_TRUE);
720 if (ret != 0) {
721 return (ret);
722 }
723 }
724 }
725
726 /* Pass 2 */
727 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
728 if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
729 ret = ctf_merge_add_type(cmp, i);
730 if (ret != 0) {
731 ctf_dprintf("Failed to merge type %d\n", i);
732 return (ret);
733 }
734 }
735 }
736
737 ret = ctf_update(cmp->cm_out);
738 if (ret != 0)
739 return (ret);
740
741 if (cmp->cm_dedup == B_TRUE) {
742 ctf_merge_dedup_remap(cmp);
743 }
744
745 ctf_dprintf("Beginning merge pass 3\n");
746 /* Pass 3 */
747 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
748 if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
749 ret = ctf_merge_fixup_type(cmp, i);
750 if (ret != 0)
751 return (ret);
752 }
753 }
754
755 return (0);
756 }
757
758 /*
759 * Uniquification is slightly different from a stock merge. For starters, we
760 * don't need to replace any forward references in the output. In this case
761 * though, the types that already exist are in a parent container to the empty
762 * output container.
763 */
764 static int
ctf_merge_uniquify_types(ctf_merge_types_t * cmp)765 ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
766 {
767 int i, ret;
768
769 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
770 if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
771 continue;
772 ret = ctf_merge_add_type(cmp, i);
773 if (ret != 0)
774 return (ret);
775 }
776
777 ret = ctf_update(cmp->cm_out);
778 if (ret != 0)
779 return (ret);
780
781 for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
782 if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
783 continue;
784 ret = ctf_merge_fixup_type(cmp, i);
785 if (ret != 0)
786 return (ret);
787 }
788
789 return (0);
790 }
791
792 static int
ctf_merge_types_init(ctf_merge_types_t * cmp)793 ctf_merge_types_init(ctf_merge_types_t *cmp)
794 {
795 cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
796 (cmp->cm_src->ctf_typemax + 1));
797 if (cmp->cm_tmap == NULL)
798 return (ctf_set_errno(cmp->cm_out, ENOMEM));
799 bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
800 (cmp->cm_src->ctf_typemax + 1));
801 return (0);
802 }
803
804 static void
ctf_merge_types_fini(ctf_merge_types_t * cmp)805 ctf_merge_types_fini(ctf_merge_types_t *cmp)
806 {
807 ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
808 (cmp->cm_src->ctf_typemax + 1));
809 }
810
811 /*
812 * After performing a pass, we need to go through the object and function type
813 * maps and potentially fix them up based on the new maps that we have.
814 */
815 static void
ctf_merge_fixup_symmaps(ctf_merge_types_t * cmp,ctf_merge_input_t * cmi)816 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
817 {
818 ctf_merge_objmap_t *cmo;
819 ctf_merge_funcmap_t *cmf;
820
821 for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
822 cmo = list_next(&cmi->cmi_omap, cmo)) {
823 VERIFY3S(cmo->cmo_tid, !=, 0);
824 VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
825 cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
826 }
827
828 for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
829 cmf = list_next(&cmi->cmi_fmap, cmf)) {
830 int i;
831
832 VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
833 cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
834 for (i = 0; i < cmf->cmf_argc; i++) {
835 VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
836 cmf->cmf_args[i] =
837 cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
838 }
839 }
840 }
841
842 /*
843 * Merge the types contained inside of two input files. The second input file is
844 * always going to be the destination. We're guaranteed that it's always
845 * writeable.
846 */
847 static int
ctf_merge_types(void * arg,void * arg2,void ** outp,void * unsued)848 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
849 {
850 int ret;
851 ctf_merge_types_t cm;
852 ctf_diff_t *cdp;
853 ctf_merge_input_t *scmi = arg;
854 ctf_merge_input_t *dcmi = arg2;
855 ctf_file_t *out = dcmi->cmi_input;
856 ctf_file_t *source = scmi->cmi_input;
857
858 ctf_dprintf("merging %p->%p\n", source, out);
859
860 if (!(out->ctf_flags & LCTF_RDWR))
861 return (ctf_set_errno(out, ECTF_RDONLY));
862
863 if (ctf_getmodel(out) != ctf_getmodel(source))
864 return (ctf_set_errno(out, ECTF_DMODEL));
865
866 if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
867 return (ret);
868
869 cm.cm_out = out;
870 cm.cm_src = source;
871 cm.cm_dedup = B_FALSE;
872 cm.cm_unique = B_FALSE;
873 ret = ctf_merge_types_init(&cm);
874 if (ret != 0) {
875 ctf_diff_fini(cdp);
876 return (ctf_set_errno(out, ret));
877 }
878
879 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
880 if (ret != 0)
881 goto cleanup;
882 ret = ctf_merge_common(&cm);
883 ctf_dprintf("merge common returned with %d\n", ret);
884 if (ret == 0) {
885 ret = ctf_update(out);
886 ctf_dprintf("update returned with %d\n", ret);
887 } else {
888 goto cleanup;
889 }
890
891 /*
892 * Now we need to fix up the object and function maps.
893 */
894 ctf_merge_fixup_symmaps(&cm, scmi);
895
896 /*
897 * Now that we've fixed things up, we need to give our function and
898 * object maps to the destination, such that it can continue to update
899 * them going forward.
900 */
901 list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
902 list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
903
904 cleanup:
905 if (ret == 0)
906 *outp = dcmi;
907 ctf_merge_types_fini(&cm);
908 ctf_diff_fini(cdp);
909 if (ret != 0)
910 return (ctf_errno(out));
911 ctf_phase_bump();
912 return (0);
913 }
914
915 static int
ctf_uniquify_types(ctf_merge_t * cmh,ctf_file_t * src,ctf_file_t ** outp)916 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
917 {
918 int err, ret;
919 ctf_file_t *out;
920 ctf_merge_types_t cm;
921 ctf_diff_t *cdp;
922 ctf_merge_input_t *cmi;
923 ctf_file_t *parent = cmh->cmh_unique;
924
925 *outp = NULL;
926 out = ctf_fdcreate(cmh->cmh_ofd, &err);
927 if (out == NULL)
928 return (ctf_set_errno(src, err));
929
930 out->ctf_parname = cmh->cmh_pname;
931 if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
932 (void) ctf_set_errno(src, ctf_errno(out));
933 ctf_close(out);
934 return (CTF_ERR);
935 }
936
937 if (ctf_import(out, parent) != 0) {
938 (void) ctf_set_errno(src, ctf_errno(out));
939 ctf_close(out);
940 return (CTF_ERR);
941 }
942
943 if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
944 ctf_close(out);
945 return (ctf_set_errno(src, ctf_errno(parent)));
946 }
947
948 cm.cm_out = parent;
949 cm.cm_src = src;
950 cm.cm_dedup = B_FALSE;
951 cm.cm_unique = B_TRUE;
952 ret = ctf_merge_types_init(&cm);
953 if (ret != 0) {
954 ctf_close(out);
955 ctf_diff_fini(cdp);
956 return (ctf_set_errno(src, ret));
957 }
958
959 ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
960 if (ret == 0) {
961 cm.cm_out = out;
962 ret = ctf_merge_uniquify_types(&cm);
963 if (ret == 0)
964 ret = ctf_update(out);
965 }
966
967 if (ret != 0) {
968 ctf_merge_types_fini(&cm);
969 ctf_diff_fini(cdp);
970 return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
971 }
972
973 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
974 cmi = list_next(&cmh->cmh_inputs, cmi)) {
975 ctf_merge_fixup_symmaps(&cm, cmi);
976 }
977
978 ctf_merge_types_fini(&cm);
979 ctf_diff_fini(cdp);
980 *outp = out;
981 return (0);
982 }
983
984 static void
ctf_merge_fini_input(ctf_merge_input_t * cmi)985 ctf_merge_fini_input(ctf_merge_input_t *cmi)
986 {
987 ctf_merge_objmap_t *cmo;
988 ctf_merge_funcmap_t *cmf;
989
990 while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
991 ctf_free(cmo, sizeof (ctf_merge_objmap_t));
992
993 while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
994 ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
995 sizeof (ctf_id_t) * cmf->cmf_argc);
996
997 if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
998 ctf_close(cmi->cmi_input);
999
1000 ctf_free(cmi, sizeof (ctf_merge_input_t));
1001 }
1002
1003 void
ctf_merge_fini(ctf_merge_t * cmh)1004 ctf_merge_fini(ctf_merge_t *cmh)
1005 {
1006 ctf_merge_input_t *cmi;
1007
1008 ctf_strfree(cmh->cmh_label);
1009 ctf_strfree(cmh->cmh_pname);
1010
1011 while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
1012 ctf_merge_fini_input(cmi);
1013
1014 ctf_free(cmh, sizeof (ctf_merge_t));
1015 }
1016
1017 ctf_merge_t *
ctf_merge_init(int fd,int * errp)1018 ctf_merge_init(int fd, int *errp)
1019 {
1020 int err;
1021 ctf_merge_t *out;
1022 struct stat st;
1023
1024 if (errp == NULL)
1025 errp = &err;
1026
1027 if (fd != -1 && fstat(fd, &st) != 0) {
1028 *errp = EINVAL;
1029 return (NULL);
1030 }
1031
1032 out = ctf_alloc(sizeof (ctf_merge_t));
1033 if (out == NULL) {
1034 *errp = ENOMEM;
1035 return (NULL);
1036 }
1037
1038 if (fd == -1) {
1039 out->cmh_msyms = B_FALSE;
1040 } else {
1041 out->cmh_msyms = B_TRUE;
1042 }
1043
1044 list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
1045 offsetof(ctf_merge_input_t, cmi_node));
1046 out->cmh_ninputs = 0;
1047 out->cmh_nthreads = 1;
1048 out->cmh_unique = NULL;
1049 out->cmh_ofd = fd;
1050 out->cmh_flags = 0;
1051 out->cmh_label = NULL;
1052 out->cmh_pname = NULL;
1053
1054 return (out);
1055 }
1056
1057 int
ctf_merge_label(ctf_merge_t * cmh,const char * label)1058 ctf_merge_label(ctf_merge_t *cmh, const char *label)
1059 {
1060 char *dup;
1061
1062 if (label == NULL)
1063 return (EINVAL);
1064
1065 dup = ctf_strdup(label);
1066 if (dup == NULL)
1067 return (EAGAIN);
1068
1069 ctf_strfree(cmh->cmh_label);
1070 cmh->cmh_label = dup;
1071 return (0);
1072 }
1073
1074 static int
ctf_merge_add_function(ctf_merge_input_t * cmi,ctf_funcinfo_t * fip,ulong_t idx,const char * file,const char * name,const Elf64_Sym * symp)1075 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
1076 const char *file, const char *name, const Elf64_Sym *symp)
1077 {
1078 ctf_merge_funcmap_t *fmap;
1079
1080 fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1081 sizeof (ctf_id_t) * fip->ctc_argc);
1082 if (fmap == NULL)
1083 return (ENOMEM);
1084
1085 fmap->cmf_idx = idx;
1086 fmap->cmf_sym = *symp;
1087 fmap->cmf_rtid = fip->ctc_return;
1088 fmap->cmf_flags = fip->ctc_flags;
1089 fmap->cmf_argc = fip->ctc_argc;
1090 fmap->cmf_name = name;
1091 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1092 fmap->cmf_file = file;
1093 } else {
1094 fmap->cmf_file = NULL;
1095 }
1096
1097 if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1098 fmap->cmf_args) != 0) {
1099 ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1100 sizeof (ctf_id_t) * fip->ctc_argc);
1101 return (ctf_errno(cmi->cmi_input));
1102 }
1103
1104 ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
1105 fmap->cmf_file != NULL ? fmap->cmf_file : "global",
1106 ELF64_ST_BIND(symp->st_info));
1107 list_insert_tail(&cmi->cmi_fmap, fmap);
1108 return (0);
1109 }
1110
1111 static int
ctf_merge_add_object(ctf_merge_input_t * cmi,ctf_id_t id,ulong_t idx,const char * file,const char * name,const Elf64_Sym * symp)1112 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
1113 const char *file, const char *name, const Elf64_Sym *symp)
1114 {
1115 ctf_merge_objmap_t *cmo;
1116
1117 cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1118 if (cmo == NULL)
1119 return (ENOMEM);
1120
1121 cmo->cmo_name = name;
1122 if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1123 cmo->cmo_file = file;
1124 } else {
1125 cmo->cmo_file = NULL;
1126 }
1127 cmo->cmo_idx = idx;
1128 cmo->cmo_tid = id;
1129 cmo->cmo_sym = *symp;
1130 list_insert_tail(&cmi->cmi_omap, cmo);
1131
1132 ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
1133 cmo->cmo_file != NULL ? cmo->cmo_file : "global");
1134
1135 return (0);
1136 }
1137
1138 static int
ctf_merge_add_symbol(const Elf64_Sym * symp,ulong_t idx,const char * file,const char * name,boolean_t primary,void * arg)1139 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
1140 const char *name, boolean_t primary, void *arg)
1141 {
1142 ctf_merge_input_t *cmi = arg;
1143 ctf_file_t *fp = cmi->cmi_input;
1144 ushort_t *data, funcbase;
1145 uint_t type;
1146 ctf_funcinfo_t fi;
1147
1148 /*
1149 * See if there is type information for this. If there is no
1150 * type information for this entry or no translation, then we
1151 * will find the value zero. This indicates no type ID for
1152 * objects and encodes unknown information for functions.
1153 */
1154 if (fp->ctf_sxlate[idx] == -1u)
1155 return (0);
1156 data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
1157 if (*data == 0)
1158 return (0);
1159
1160 type = ELF64_ST_TYPE(symp->st_info);
1161
1162 switch (type) {
1163 case STT_FUNC:
1164 funcbase = *data;
1165 if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
1166 return (0);
1167 data++;
1168 fi.ctc_return = *data;
1169 data++;
1170 fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
1171 fi.ctc_flags = 0;
1172
1173 if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
1174 fi.ctc_flags |= CTF_FUNC_VARARG;
1175 fi.ctc_argc--;
1176 }
1177 return (ctf_merge_add_function(cmi, &fi, idx, file, name,
1178 symp));
1179 case STT_OBJECT:
1180 return (ctf_merge_add_object(cmi, *data, idx, file, name,
1181 symp));
1182 default:
1183 return (0);
1184 }
1185 }
1186
1187 /*
1188 * Whenever we create an entry to merge, we then go and add a second empty
1189 * ctf_file_t which we use for the purposes of our merging. It's not the best,
1190 * but it's the best that we've got at the moment.
1191 */
1192 int
ctf_merge_add(ctf_merge_t * cmh,ctf_file_t * input)1193 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1194 {
1195 int ret;
1196 ctf_merge_input_t *cmi;
1197 ctf_file_t *empty;
1198
1199 ctf_dprintf("adding input %p\n", input);
1200
1201 if (input->ctf_flags & LCTF_CHILD)
1202 return (ECTF_MCHILD);
1203
1204 cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1205 if (cmi == NULL)
1206 return (ENOMEM);
1207
1208 cmi->cmi_created = B_FALSE;
1209 cmi->cmi_input = input;
1210 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1211 offsetof(ctf_merge_funcmap_t, cmf_node));
1212 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1213 offsetof(ctf_merge_objmap_t, cmo_node));
1214
1215 if (cmh->cmh_msyms == B_TRUE) {
1216 if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1217 cmi)) != 0) {
1218 ctf_merge_fini_input(cmi);
1219 return (ret);
1220 }
1221 }
1222
1223 list_insert_tail(&cmh->cmh_inputs, cmi);
1224 cmh->cmh_ninputs++;
1225
1226 /* And now the empty one to merge into this */
1227 cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1228 if (cmi == NULL)
1229 return (ENOMEM);
1230 list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1231 offsetof(ctf_merge_funcmap_t, cmf_node));
1232 list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1233 offsetof(ctf_merge_objmap_t, cmo_node));
1234
1235 empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
1236 if (empty == NULL)
1237 return (ret);
1238 cmi->cmi_input = empty;
1239 cmi->cmi_created = B_TRUE;
1240
1241 if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
1242 return (ctf_errno(empty));
1243 }
1244
1245 list_insert_tail(&cmh->cmh_inputs, cmi);
1246 cmh->cmh_ninputs++;
1247 ctf_dprintf("added containers %p and %p\n", input, empty);
1248 return (0);
1249 }
1250
1251 int
ctf_merge_uniquify(ctf_merge_t * cmh,ctf_file_t * u,const char * pname)1252 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
1253 {
1254 char *dup;
1255
1256 if (u->ctf_flags & LCTF_CHILD)
1257 return (ECTF_MCHILD);
1258 if (pname == NULL)
1259 return (EINVAL);
1260 dup = ctf_strdup(pname);
1261 if (dup == NULL)
1262 return (EINVAL);
1263 ctf_strfree(cmh->cmh_pname);
1264 cmh->cmh_pname = dup;
1265 cmh->cmh_unique = u;
1266 return (0);
1267 }
1268
1269 /*
1270 * Symbol matching rules: the purpose of this is to verify that the type
1271 * information that we have for a given symbol actually matches the output
1272 * symbol. This is unfortunately complicated by several different factors:
1273 *
1274 * 1. When merging multiple .o's into a single item, the symbol table index will
1275 * not match.
1276 *
1277 * 2. Visibility of a symbol may not be identical to the object file or the
1278 * DWARF information due to symbol reduction via a mapfile.
1279 *
1280 * As such, we have to employ the following rules:
1281 *
1282 * 1. A global symbol table entry always matches a global CTF symbol with the
1283 * same name.
1284 *
1285 * 2. A local symbol table entry always matches a local CTF symbol if they have
1286 * the same name and they belong to the same file.
1287 *
1288 * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
1289 * types match, the values match, the sizes match, and the section indexes
1290 * match. This happens when we do a conversion in one pass, it almost never
1291 * happens when we're merging multiple object files. If we match a CTF global
1292 * symbol, that's a fixed match, otherwise it's a fuzzy match.
1293 *
1294 * 4. A local symbol table entry matches a global CTF entry if the
1295 * other pieces fail, but they have the same name. This is considered a fuzzy
1296 * match and is not used unless we have no other options.
1297 *
1298 * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
1299 * fail, but they have the same name. This is considered a fuzzy match and is
1300 * not used unless we have no other options. When merging independent .o files,
1301 * this is often the only recourse we have to matching weak symbols.
1302 *
1303 * In the end, this would all be much simpler if we were able to do this as part
1304 * of libld which would be able to do all the symbol transformations.
1305 */
1306 static boolean_t
ctf_merge_symbol_match(const char * ctf_file,const char * ctf_name,const Elf64_Sym * ctf_symp,const char * symtab_file,const char * symtab_name,const Elf64_Sym * symtab_symp,boolean_t * is_fuzzy)1307 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
1308 const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
1309 const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1310 {
1311 *is_fuzzy = B_FALSE;
1312 uint_t symtab_bind, ctf_bind;
1313
1314 symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
1315 ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1316
1317 ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
1318 symtab_file, symtab_name, symtab_bind,
1319 ctf_file, ctf_name, ctf_bind);
1320 if (strcmp(ctf_name, symtab_name) != 0) {
1321 return (B_FALSE);
1322 }
1323
1324 if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
1325 return (B_TRUE);
1326 } else if (symtab_bind == STB_GLOBAL) {
1327 return (B_FALSE);
1328 }
1329
1330 if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
1331 ctf_file != NULL && symtab_file != NULL &&
1332 strcmp(ctf_file, symtab_file) == 0) {
1333 return (B_TRUE);
1334 }
1335
1336 if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
1337 ELF64_ST_TYPE(symtab_symp->st_info) ==
1338 ELF64_ST_TYPE(ctf_symp->st_info) &&
1339 symtab_symp->st_value == ctf_symp->st_value &&
1340 symtab_symp->st_size == ctf_symp->st_size &&
1341 symtab_symp->st_shndx == ctf_symp->st_shndx) {
1342 if (ctf_bind == STB_GLOBAL) {
1343 return (B_TRUE);
1344 }
1345
1346 if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
1347 symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
1348 *is_fuzzy = B_TRUE;
1349 return (B_TRUE);
1350 }
1351 }
1352
1353 if (ctf_bind == STB_GLOBAL ||
1354 (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
1355 *is_fuzzy = B_TRUE;
1356 return (B_TRUE);
1357 }
1358
1359 return (B_FALSE);
1360 }
1361
1362 /*
1363 * For each symbol, try and find a match. We will attempt to find an exact
1364 * match; however, we will settle for a fuzzy match in general. There is one
1365 * case where we will not opt to use a fuzzy match, which is when performing the
1366 * deduplication of a container. In such a case we are trying to reduce common
1367 * types and a fuzzy match would be inappropriate as if we're in the context of
1368 * a single container, the conversion process should have identified any exact
1369 * or fuzzy matches that were required.
1370 */
1371 static int
ctf_merge_symbols(const Elf64_Sym * symp,ulong_t idx,const char * file,const char * name,boolean_t primary,void * arg)1372 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
1373 const char *name, boolean_t primary, void *arg)
1374 {
1375 int err;
1376 uint_t type, bind;
1377 ctf_merge_symbol_arg_t *csa = arg;
1378 ctf_file_t *fp = csa->cmsa_out;
1379
1380 type = ELF64_ST_TYPE(symp->st_info);
1381 bind = ELF64_ST_BIND(symp->st_info);
1382
1383 ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
1384 ELF64_ST_BIND(symp->st_info));
1385
1386 if (type == STT_OBJECT) {
1387 ctf_merge_objmap_t *cmo, *match = NULL;
1388
1389 for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
1390 cmo = list_next(csa->cmsa_objmap, cmo)) {
1391 boolean_t is_fuzzy = B_FALSE;
1392 if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
1393 &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
1394 if (is_fuzzy && csa->cmsa_dedup &&
1395 bind != STB_WEAK) {
1396 continue;
1397 }
1398 match = cmo;
1399 if (is_fuzzy) {
1400 continue;
1401 }
1402 break;
1403 }
1404 }
1405
1406 if (match == NULL) {
1407 return (0);
1408 }
1409
1410 if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
1411 ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
1412 match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
1413 return (ctf_errno(fp));
1414 }
1415 ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
1416 name, match->cmo_tid);
1417 } else {
1418 ctf_merge_funcmap_t *cmf, *match = NULL;
1419 ctf_funcinfo_t fi;
1420
1421 for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
1422 cmf = list_next(csa->cmsa_funcmap, cmf)) {
1423 boolean_t is_fuzzy = B_FALSE;
1424 if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
1425 &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
1426 if (is_fuzzy && csa->cmsa_dedup &&
1427 bind != STB_WEAK) {
1428 continue;
1429 }
1430 match = cmf;
1431 if (is_fuzzy) {
1432 continue;
1433 }
1434 break;
1435 }
1436 }
1437
1438 if (match == NULL) {
1439 return (0);
1440 }
1441
1442 fi.ctc_return = match->cmf_rtid;
1443 fi.ctc_argc = match->cmf_argc;
1444 fi.ctc_flags = match->cmf_flags;
1445 if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
1446 0) {
1447 ctf_dprintf("Failed to add function %s: %s\n", name,
1448 ctf_errmsg(ctf_errno(fp)));
1449 return (ctf_errno(fp));
1450 }
1451 ctf_dprintf("mapped function into output %s/%s\n", file,
1452 name);
1453 }
1454
1455 return (0);
1456 }
1457
1458 int
ctf_merge_merge(ctf_merge_t * cmh,ctf_file_t ** outp)1459 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1460 {
1461 int err, merr;
1462 ctf_merge_input_t *cmi;
1463 ctf_id_t ltype;
1464 mergeq_t *mqp;
1465 ctf_merge_input_t *final;
1466 ctf_file_t *out;
1467
1468 ctf_dprintf("Beginning ctf_merge_merge()\n");
1469 if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1470 const char *label = ctf_label_topmost(cmh->cmh_unique);
1471 if (label == NULL)
1472 return (ECTF_NOLABEL);
1473 if (strcmp(label, cmh->cmh_label) != 0)
1474 return (ECTF_LCONFLICT);
1475 }
1476
1477 if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1478 return (errno);
1479 }
1480
1481 VERIFY(cmh->cmh_ninputs % 2 == 0);
1482 for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1483 cmi = list_next(&cmh->cmh_inputs, cmi)) {
1484 if (mergeq_add(mqp, cmi) == -1) {
1485 err = errno;
1486 mergeq_fini(mqp);
1487 }
1488 }
1489
1490 err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
1491 mergeq_fini(mqp);
1492
1493 if (err == MERGEQ_ERROR) {
1494 return (errno);
1495 } else if (err == MERGEQ_UERROR) {
1496 return (merr);
1497 }
1498
1499 /*
1500 * Disassociate the generated ctf_file_t from the original input. That
1501 * way when the input gets cleaned up, we don't accidentally kill the
1502 * final reference to the ctf_file_t. If it gets uniquified then we'll
1503 * kill it.
1504 */
1505 VERIFY(final->cmi_input != NULL);
1506 out = final->cmi_input;
1507 final->cmi_input = NULL;
1508
1509 ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
1510 if (cmh->cmh_unique != NULL) {
1511 ctf_file_t *u;
1512 err = ctf_uniquify_types(cmh, out, &u);
1513 if (err != 0) {
1514 err = ctf_errno(out);
1515 ctf_close(out);
1516 return (err);
1517 }
1518 ctf_close(out);
1519 out = u;
1520 }
1521
1522 ltype = out->ctf_typemax;
1523 if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
1524 ltype += CTF_CHILD_START;
1525 ctf_dprintf("trying to add the label\n");
1526 if (cmh->cmh_label != NULL &&
1527 ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1528 ctf_close(out);
1529 return (ctf_errno(out));
1530 }
1531
1532 ctf_dprintf("merging symbols and the like\n");
1533 if (cmh->cmh_msyms == B_TRUE) {
1534 ctf_merge_symbol_arg_t arg;
1535 arg.cmsa_objmap = &final->cmi_omap;
1536 arg.cmsa_funcmap = &final->cmi_fmap;
1537 arg.cmsa_out = out;
1538 arg.cmsa_dedup = B_FALSE;
1539 err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1540 if (err != 0) {
1541 ctf_close(out);
1542 return (err);
1543 }
1544 }
1545
1546 err = ctf_update(out);
1547 if (err != 0) {
1548 err = ctf_errno(out);
1549 ctf_close(out);
1550 return (err);
1551 }
1552
1553 *outp = out;
1554 return (0);
1555 }
1556
1557 /*
1558 * When we get told that something is unique, eg. same is B_FALSE, then that
1559 * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1560 * want to record it in the mapping table so that we know how to redirect types
1561 * to the extant ones.
1562 */
1563 static void
ctf_dedup_cb(ctf_file_t * ifp,ctf_id_t iid,boolean_t same,ctf_file_t * ofp,ctf_id_t oid,void * arg)1564 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
1565 ctf_id_t oid, void *arg)
1566 {
1567 ctf_merge_types_t *cmp = arg;
1568 ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
1569
1570 if (same == B_TRUE) {
1571 /*
1572 * The output id here may itself map to something else.
1573 * Therefore, we need to basically walk a chain and see what it
1574 * points to until it itself points to a base type, eg. -1.
1575 * Otherwise we'll dedup to something which no longer exists.
1576 */
1577 while (cmt[oid].cmt_missing == B_FALSE)
1578 oid = cmt[oid].cmt_map;
1579 cmt[iid].cmt_map = oid;
1580 ctf_dprintf("dedup %d->%d \n", iid, oid);
1581 } else {
1582 VERIFY(cmt[iid].cmt_map == 0);
1583 cmt[iid].cmt_missing = B_TRUE;
1584 ctf_dprintf("dedup %d is missing\n", iid);
1585 }
1586 }
1587
1588 /*
1589 * Dedup a CTF container.
1590 *
1591 * DWARF and other encoding formats that we use to create CTF data may create
1592 * multiple copies of a given type. However, after doing a conversion, and
1593 * before doing a merge, we'd prefer, if possible, to have every input container
1594 * to be unique.
1595 *
1596 * Doing a deduplication is like a normal merge. However, when we diff the types
1597 * in the container, rather than doing a normal diff, we instead want to diff
1598 * against any already processed types. eg, for a given type i in a container,
1599 * we want to diff it from 0 to i - 1.
1600 */
1601 int
ctf_merge_dedup(ctf_merge_t * cmp,ctf_file_t ** outp)1602 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
1603 {
1604 int ret;
1605 ctf_diff_t *cdp = NULL;
1606 ctf_merge_input_t *cmi, *cmc;
1607 ctf_file_t *ifp, *ofp;
1608 ctf_merge_types_t cm;
1609
1610 if (cmp == NULL || outp == NULL)
1611 return (EINVAL);
1612
1613 ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
1614 if (cmp->cmh_ninputs != 2)
1615 return (EINVAL);
1616
1617 ctf_dprintf("passed argument sanity check\n");
1618
1619 cmi = list_head(&cmp->cmh_inputs);
1620 VERIFY(cmi != NULL);
1621 cmc = list_next(&cmp->cmh_inputs, cmi);
1622 VERIFY(cmc != NULL);
1623 ifp = cmi->cmi_input;
1624 ofp = cmc->cmi_input;
1625 VERIFY(ifp != NULL);
1626 VERIFY(ofp != NULL);
1627 cm.cm_src = ifp;
1628 cm.cm_out = ofp;
1629 cm.cm_dedup = B_TRUE;
1630 cm.cm_unique = B_FALSE;
1631
1632 if ((ret = ctf_merge_types_init(&cm)) != 0) {
1633 return (ret);
1634 }
1635
1636 if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
1637 goto err;
1638
1639 ctf_dprintf("Successfully initialized dedup\n");
1640 if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
1641 goto err;
1642
1643 ctf_dprintf("Successfully diffed types\n");
1644 ret = ctf_merge_common(&cm);
1645 ctf_dprintf("deduping types result: %d\n", ret);
1646 if (ret == 0)
1647 ret = ctf_update(cm.cm_out);
1648 if (ret != 0)
1649 goto err;
1650
1651 ctf_dprintf("Successfully deduped types\n");
1652 ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1653
1654 /*
1655 * Now we need to fix up the object and function maps.
1656 */
1657 ctf_merge_fixup_symmaps(&cm, cmi);
1658
1659 if (cmp->cmh_msyms == B_TRUE) {
1660 ctf_merge_symbol_arg_t arg;
1661 arg.cmsa_objmap = &cmi->cmi_omap;
1662 arg.cmsa_funcmap = &cmi->cmi_fmap;
1663 arg.cmsa_out = cm.cm_out;
1664 arg.cmsa_dedup = B_TRUE;
1665 ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1666 if (ret != 0) {
1667 ctf_dprintf("failed to dedup symbols: %s\n",
1668 ctf_errmsg(ret));
1669 goto err;
1670 }
1671 }
1672
1673 ret = ctf_update(cm.cm_out);
1674 if (ret == 0) {
1675 cmc->cmi_input = NULL;
1676 *outp = cm.cm_out;
1677 }
1678 ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1679 err:
1680 ctf_merge_types_fini(&cm);
1681 ctf_diff_fini(cdp);
1682 return (ret);
1683 }
1684
1685 int
ctf_merge_set_nthreads(ctf_merge_t * cmp,const uint_t nthrs)1686 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1687 {
1688 if (nthrs == 0)
1689 return (EINVAL);
1690 cmp->cmh_nthreads = nthrs;
1691 return (0);
1692 }
1693