xref: /illumos-gate/usr/src/lib/libctf/common/ctf_merge.c (revision 7b34a9a5df26271af0da06974fc361c468cd48d3)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Joyent, Inc.
14  */
15 
16 /*
17  * To perform a merge of two CTF containers, we first diff the two containers
18  * types. For every type that's in the src container, but not in the dst
19  * container, we note it and add it to dst container. If there are any objects
20  * or functions associated with src, we go through and update the types that
21  * they refer to such that they all refer to types in the dst container.
22  *
23  * The bulk of the logic for the merge, after we've run the diff, occurs in
24  * ctf_merge_common().
25  *
26  * In terms of exported APIs, we don't really export a simple merge two
27  * containers, as the general way this is used, in something like ctfmerge(1),
28  * is to add all the containers and then let us figure out the best way to merge
29  * it.
30  */
31 
32 #include <libctf_impl.h>
33 #include <sys/debug.h>
34 #include <sys/list.h>
35 #include <stddef.h>
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <mergeq.h>
40 #include <errno.h>
41 
42 typedef struct ctf_merge_tinfo {
43 	uint16_t cmt_map;	/* Map to the type in out */
44 	boolean_t cmt_fixup;
45 	boolean_t cmt_forward;
46 	boolean_t cmt_missing;
47 } ctf_merge_tinfo_t;
48 
49 /*
50  * State required for doing an individual merge of two containers.
51  */
52 typedef struct ctf_merge_types {
53 	ctf_file_t *cm_out;		/* Output CTF file */
54 	ctf_file_t *cm_src;		/* Input CTF file */
55 	ctf_merge_tinfo_t *cm_tmap;	/* Type state information */
56 	boolean_t cm_dedup;		/* Are we doing a dedup? */
57 	boolean_t cm_unique;		/* are we doing a uniquify? */
58 } ctf_merge_types_t;
59 
60 typedef struct ctf_merge_objmap {
61 	list_node_t cmo_node;
62 	const char *cmo_name;		/* Symbol name */
63 	const char *cmo_file;		/* Symbol file */
64 	ulong_t cmo_idx;		/* Symbol ID */
65 	Elf64_Sym cmo_sym;		/* Symbol Entry */
66 	ctf_id_t cmo_tid;		/* Type ID */
67 } ctf_merge_objmap_t;
68 
69 typedef struct ctf_merge_funcmap {
70 	list_node_t cmf_node;
71 	const char *cmf_name;		/* Symbol name */
72 	const char *cmf_file;		/* Symbol file */
73 	ulong_t cmf_idx;		/* Symbol ID */
74 	Elf64_Sym cmf_sym;		/* Symbol Entry */
75 	ctf_id_t cmf_rtid;		/* Type ID */
76 	uint_t cmf_flags;		/* ctf_funcinfo_t ctc_flags */
77 	uint_t cmf_argc;		/* Number of arguments */
78 	ctf_id_t cmf_args[];		/* Types of arguments */
79 } ctf_merge_funcmap_t;
80 
81 typedef struct ctf_merge_input {
82 	list_node_t cmi_node;
83 	ctf_file_t *cmi_input;
84 	list_t cmi_omap;
85 	list_t cmi_fmap;
86 	boolean_t cmi_created;
87 } ctf_merge_input_t;
88 
89 struct ctf_merge_handle {
90 	list_t cmh_inputs;		/* Input list */
91 	uint_t cmh_ninputs;		/* Number of inputs */
92 	uint_t cmh_nthreads;		/* Number of threads to use */
93 	ctf_file_t *cmh_unique;		/* ctf to uniquify against */
94 	boolean_t cmh_msyms;		/* Should we merge symbols/funcs? */
95 	int cmh_ofd;			/* FD for output file */
96 	int cmh_flags;			/* Flags that control merge behavior */
97 	char *cmh_label;		/* Optional label */
98 	char *cmh_pname;		/* Parent name */
99 };
100 
101 typedef struct ctf_merge_symbol_arg {
102 	list_t *cmsa_objmap;
103 	list_t *cmsa_funcmap;
104 	ctf_file_t *cmsa_out;
105 	boolean_t cmsa_dedup;
106 } ctf_merge_symbol_arg_t;
107 
108 static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
109 
110 static ctf_id_t
111 ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
112 {
113 	if (cmp->cm_dedup == B_FALSE) {
114 		VERIFY(cmp->cm_tmap[id].cmt_map != 0);
115 		return (cmp->cm_tmap[id].cmt_map);
116 	}
117 
118 	while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
119 		VERIFY(cmp->cm_tmap[id].cmt_map != 0);
120 		id = cmp->cm_tmap[id].cmt_map;
121 	}
122 	VERIFY(cmp->cm_tmap[id].cmt_map != 0);
123 	return (cmp->cm_tmap[id].cmt_map);
124 }
125 
126 static void
127 ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
128     ctf_id_t oid, void *arg)
129 {
130 	ctf_merge_types_t *cmp = arg;
131 	ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
132 
133 	if (same == B_TRUE) {
134 		if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
135 		    ctf_type_kind(ofp, oid) != CTF_K_FORWARD) {
136 			VERIFY(cmt[oid].cmt_map == 0);
137 
138 			/*
139 			 * If we're uniquifying types, it's possible for the
140 			 * container that we're uniquifying against to have a
141 			 * forward which exists in the container being reduced.
142 			 * For example, genunix has the machcpu structure as a
143 			 * forward which is actually in unix and we uniquify
144 			 * unix against genunix. In such cases, we explicitly do
145 			 * not do any mapping of the forward information, lest
146 			 * we risk losing the real definition. Instead, mark
147 			 * that it's missing.
148 			 */
149 			if (cmp->cm_unique == B_TRUE) {
150 				cmt[oid].cmt_missing = B_TRUE;
151 				return;
152 			}
153 
154 			cmt[oid].cmt_map = iid;
155 			cmt[oid].cmt_forward = B_TRUE;
156 			ctf_dprintf("merge diff forward mapped %d->%d\n", oid,
157 			    iid);
158 			return;
159 		}
160 
161 		/*
162 		 * We could have multiple things that a given type ends up
163 		 * matching in the world of forwards and pointers to forwards.
164 		 * For now just take the first one...
165 		 */
166 		if (cmt[oid].cmt_map != 0)
167 			return;
168 		cmt[oid].cmt_map = iid;
169 		ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
170 	} else if (ifp == cmp->cm_src) {
171 		VERIFY(cmt[iid].cmt_map == 0);
172 		cmt[iid].cmt_missing = B_TRUE;
173 		ctf_dprintf("merge diff said %d is missing\n", iid);
174 	}
175 }
176 
177 static int
178 ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
179 {
180 	int ret, flags;
181 	const ctf_type_t *tp;
182 	const char *name;
183 	ctf_encoding_t en;
184 
185 	if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
186 		return (CTF_ERR);
187 
188 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
189 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
190 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
191 		flags = CTF_ADD_ROOT;
192 	else
193 		flags = CTF_ADD_NONROOT;
194 
195 	ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
196 	    ctf_type_kind(cmp->cm_src, id));
197 
198 	if (ret == CTF_ERR)
199 		return (ret);
200 
201 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
202 	cmp->cm_tmap[id].cmt_map = ret;
203 	return (0);
204 }
205 
206 static int
207 ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
208 {
209 	int ret, flags;
210 	const ctf_type_t *tp;
211 	ctf_arinfo_t ar;
212 
213 	if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
214 		return (CTF_ERR);
215 
216 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
217 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
218 		flags = CTF_ADD_ROOT;
219 	else
220 		flags = CTF_ADD_NONROOT;
221 
222 	if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
223 		ret = ctf_merge_add_type(cmp, ar.ctr_contents);
224 		if (ret != 0)
225 			return (ret);
226 		ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
227 	}
228 	ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
229 
230 	if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
231 		ret = ctf_merge_add_type(cmp, ar.ctr_index);
232 		if (ret != 0)
233 			return (ret);
234 		ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
235 	}
236 	ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
237 
238 	ret = ctf_add_array(cmp->cm_out, flags, &ar);
239 	if (ret == CTF_ERR)
240 		return (ret);
241 
242 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
243 	cmp->cm_tmap[id].cmt_map = ret;
244 
245 	return (0);
246 }
247 
248 static int
249 ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
250 {
251 	int ret, flags;
252 	const ctf_type_t *tp;
253 	ctf_id_t reftype;
254 	const char *name;
255 
256 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
257 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
258 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
259 		flags = CTF_ADD_ROOT;
260 	else
261 		flags = CTF_ADD_NONROOT;
262 
263 	reftype = ctf_type_reference(cmp->cm_src, id);
264 	if (reftype == CTF_ERR)
265 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
266 
267 	if (cmp->cm_tmap[reftype].cmt_map == 0) {
268 		ret = ctf_merge_add_type(cmp, reftype);
269 		if (ret != 0)
270 			return (ret);
271 		ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
272 	}
273 	reftype = ctf_merge_gettype(cmp, reftype);
274 
275 	ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
276 	    ctf_type_kind(cmp->cm_src, id));
277 	if (ret == CTF_ERR)
278 		return (ret);
279 
280 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
281 	cmp->cm_tmap[id].cmt_map = ret;
282 	return (0);
283 }
284 
285 static int
286 ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
287 {
288 	int ret, flags;
289 	const ctf_type_t *tp;
290 	const char *name;
291 	ctf_id_t reftype;
292 
293 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
294 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
295 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
296 		flags = CTF_ADD_ROOT;
297 	else
298 		flags = CTF_ADD_NONROOT;
299 
300 	reftype = ctf_type_reference(cmp->cm_src, id);
301 	if (reftype == CTF_ERR)
302 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
303 
304 	if (cmp->cm_tmap[reftype].cmt_map == 0) {
305 		ret = ctf_merge_add_type(cmp, reftype);
306 		if (ret != 0)
307 			return (ret);
308 		ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
309 	}
310 	reftype = ctf_merge_gettype(cmp, reftype);
311 
312 	ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
313 	if (ret == CTF_ERR)
314 		return (ret);
315 
316 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
317 	cmp->cm_tmap[id].cmt_map = ret;
318 	return (0);
319 }
320 
321 typedef struct ctf_merge_enum {
322 	ctf_file_t *cme_fp;
323 	ctf_id_t cme_id;
324 } ctf_merge_enum_t;
325 
326 static int
327 ctf_merge_add_enumerator(const char *name, int value, void *arg)
328 {
329 	ctf_merge_enum_t *cmep = arg;
330 
331 	return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
332 	    CTF_ERR);
333 }
334 
335 static int
336 ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
337 {
338 	int flags;
339 	const ctf_type_t *tp;
340 	const char *name;
341 	ctf_id_t enumid;
342 	ctf_merge_enum_t cme;
343 	size_t size;
344 
345 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
346 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
347 		flags = CTF_ADD_ROOT;
348 	else
349 		flags = CTF_ADD_NONROOT;
350 
351 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
352 	size = ctf_get_ctt_size(cmp->cm_src, tp, NULL, NULL);
353 
354 	enumid = ctf_add_enum(cmp->cm_out, flags, name, size);
355 	if (enumid == CTF_ERR)
356 		return (enumid);
357 
358 	cme.cme_fp = cmp->cm_out;
359 	cme.cme_id = enumid;
360 	if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
361 	    &cme) != 0)
362 		return (CTF_ERR);
363 
364 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
365 	cmp->cm_tmap[id].cmt_map = enumid;
366 	return (0);
367 }
368 
369 static int
370 ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
371 {
372 	int ret, flags, i;
373 	const ctf_type_t *tp;
374 	ctf_funcinfo_t ctc;
375 	ctf_id_t *argv;
376 
377 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
378 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
379 		flags = CTF_ADD_ROOT;
380 	else
381 		flags = CTF_ADD_NONROOT;
382 
383 	if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
384 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
385 
386 	argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
387 	if (argv == NULL)
388 		return (ctf_set_errno(cmp->cm_out, ENOMEM));
389 	if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
390 	    CTF_ERR) {
391 		ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
392 		return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
393 	}
394 
395 	if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
396 		ret = ctf_merge_add_type(cmp, ctc.ctc_return);
397 		if (ret != 0)
398 			return (ret);
399 		ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
400 	}
401 	ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
402 
403 	for (i = 0; i < ctc.ctc_argc; i++) {
404 		if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
405 			ret = ctf_merge_add_type(cmp, argv[i]);
406 			if (ret != 0)
407 				return (ret);
408 			ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
409 		}
410 		argv[i] = ctf_merge_gettype(cmp, argv[i]);
411 	}
412 
413 	ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
414 	ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
415 	if (ret == CTF_ERR)
416 		return (ret);
417 
418 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
419 	cmp->cm_tmap[id].cmt_map = ret;
420 	return (0);
421 }
422 
423 static int
424 ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id)
425 {
426 	int ret, flags;
427 	const ctf_type_t *tp;
428 	const char *name;
429 
430 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
431 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
432 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
433 		flags = CTF_ADD_ROOT;
434 	else
435 		flags = CTF_ADD_NONROOT;
436 
437 	/*
438 	 * ctf_add_forward tries to check to see if a given forward already
439 	 * exists in one of its hash tables.  If we're here then we know that we
440 	 * have a forward in a container that isn't present in another.
441 	 * Therefore, we choose a token hash table to satisfy the API choice
442 	 * here.
443 	 */
444 	ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT);
445 	if (ret == CTF_ERR)
446 		return (CTF_ERR);
447 
448 	VERIFY(cmp->cm_tmap[id].cmt_map == 0);
449 	cmp->cm_tmap[id].cmt_map = ret;
450 	return (0);
451 }
452 
453 typedef struct ctf_merge_su {
454 	ctf_merge_types_t *cms_cm;
455 	ctf_id_t cms_id;
456 } ctf_merge_su_t;
457 
458 static int
459 ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
460 {
461 	ctf_merge_su_t *cms = arg;
462 
463 	VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
464 	type = cms->cms_cm->cm_tmap[type].cmt_map;
465 
466 	ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
467 	return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
468 	    type, offset) == CTF_ERR);
469 }
470 
471 /*
472  * During the first pass, we always add the generic structure and union but none
473  * of its members as they might not all have been mapped yet. Instead we just
474  * mark all structures and unions as needing to be fixed up.
475  */
476 static int
477 ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
478 {
479 	int flags, kind;
480 	const ctf_type_t *tp;
481 	const char *name;
482 	ctf_id_t suid;
483 
484 	tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
485 	name = ctf_strraw(cmp->cm_src, tp->ctt_name);
486 	if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
487 		flags = CTF_ADD_ROOT;
488 	else
489 		flags = CTF_ADD_NONROOT;
490 	kind = ctf_type_kind(cmp->cm_src, id);
491 
492 	if (kind == CTF_K_STRUCT)
493 		suid = ctf_add_struct(cmp->cm_out, flags, name);
494 	else
495 		suid = ctf_add_union(cmp->cm_out, flags, name);
496 
497 	if (suid == CTF_ERR)
498 		return (suid);
499 
500 	/*
501 	 * If this is a forward reference then its mapping should already
502 	 * exist.
503 	 */
504 	if (forward == B_FALSE) {
505 		VERIFY(cmp->cm_tmap[id].cmt_map == 0);
506 		cmp->cm_tmap[id].cmt_map = suid;
507 		ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id,
508 		    suid);
509 	} else {
510 		VERIFY(cmp->cm_tmap[id].cmt_map == suid);
511 	}
512 	cmp->cm_tmap[id].cmt_fixup = B_TRUE;
513 
514 	return (0);
515 }
516 
517 static int
518 ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
519 {
520 	int kind, ret;
521 
522 	/*
523 	 * We may end up evaluating a type more than once as we may deal with it
524 	 * as we recursively evaluate some kind of reference and then we may see
525 	 * it normally.
526 	 */
527 	if (cmp->cm_tmap[id].cmt_map != 0)
528 		return (0);
529 
530 	kind = ctf_type_kind(cmp->cm_src, id);
531 	switch (kind) {
532 	case CTF_K_INTEGER:
533 	case CTF_K_FLOAT:
534 		ret = ctf_merge_add_number(cmp, id);
535 		break;
536 	case CTF_K_ARRAY:
537 		ret = ctf_merge_add_array(cmp, id);
538 		break;
539 	case CTF_K_POINTER:
540 	case CTF_K_VOLATILE:
541 	case CTF_K_CONST:
542 	case CTF_K_RESTRICT:
543 		ret = ctf_merge_add_reftype(cmp, id);
544 		break;
545 	case CTF_K_TYPEDEF:
546 		ret = ctf_merge_add_typedef(cmp, id);
547 		break;
548 	case CTF_K_ENUM:
549 		ret = ctf_merge_add_enum(cmp, id);
550 		break;
551 	case CTF_K_FUNCTION:
552 		ret = ctf_merge_add_func(cmp, id);
553 		break;
554 	case CTF_K_FORWARD:
555 		ret = ctf_merge_add_forward(cmp, id);
556 		break;
557 	case CTF_K_STRUCT:
558 	case CTF_K_UNION:
559 		ret = ctf_merge_add_sou(cmp, id, B_FALSE);
560 		break;
561 	case CTF_K_UNKNOWN:
562 		/*
563 		 * We don't add unknown types, and we later assert that nothing
564 		 * should reference them.
565 		 */
566 		return (0);
567 	default:
568 		abort();
569 	}
570 
571 	return (ret);
572 }
573 
574 static int
575 ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
576 {
577 	ctf_dtdef_t *dtd;
578 	ctf_merge_su_t cms;
579 	ctf_id_t mapid;
580 	ssize_t size;
581 
582 	mapid = cmp->cm_tmap[id].cmt_map;
583 	VERIFY(mapid != 0);
584 	dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
585 	VERIFY(dtd != NULL);
586 
587 	ctf_dprintf("Trying to fix up sou %d\n", id);
588 	cms.cms_cm = cmp;
589 	cms.cms_id = mapid;
590 	if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
591 		return (CTF_ERR);
592 
593 	if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
594 		return (CTF_ERR);
595 	if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
596 		return (CTF_ERR);
597 
598 	return (0);
599 }
600 
601 static int
602 ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
603 {
604 	int kind, ret;
605 
606 	kind = ctf_type_kind(cmp->cm_src, id);
607 	switch (kind) {
608 	case CTF_K_STRUCT:
609 	case CTF_K_UNION:
610 		ret = ctf_merge_fixup_sou(cmp, id);
611 		break;
612 	default:
613 		VERIFY(0);
614 		ret = CTF_ERR;
615 	}
616 
617 	return (ret);
618 }
619 
620 /*
621  * Now that we've successfully merged everything, we're going to remap the type
622  * table.
623  *
624  * Remember we have two containers: ->cm_src is what we're working from, and
625  * ->cm_out is where we are building the de-duplicated CTF.
626  *
627  * The index of this table is always the type IDs in ->cm_src.
628  *
629  * When we built this table originally in ctf_diff_self(), if we found a novel
630  * type, we marked it as .cmt_missing to indicate it needs adding to ->cm_out.
631  * Otherwise, .cmt_map indicated the ->cm_src type ID that this type duplicates.
632  *
633  * Then, in ctf_merge_common(), we walked through and added all "cmt_missing"
634  * types to ->cm_out with ctf_merge_add_type(). These routines update cmt_map
635  * to be the *new* type ID in ->cm_out.  In this function, you can read
636  * "cmt_missing" as meaning "added to ->cm_out, and cmt_map updated".
637  *
638  * So at this point, we need to mop up all types where .cmt_missing == B_FALSE,
639  * making sure *their* .cmt_map values also point to the ->cm_out container.
640  */
641 static void
642 ctf_merge_dedup_remap(ctf_merge_types_t *cmp)
643 {
644 	int i;
645 
646 	for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
647 		ctf_id_t tid;
648 
649 		if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
650 			VERIFY(cmp->cm_tmap[i].cmt_map != 0);
651 			continue;
652 		}
653 
654 		tid = i;
655 		while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
656 			VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
657 			tid = cmp->cm_tmap[tid].cmt_map;
658 		}
659 		VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
660 		cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
661 	}
662 }
663 
664 
665 /*
666  * We're going to do three passes over the containers.
667  *
668  * Pass 1 checks for forward references in the output container that we know
669  * exist in the source container.
670  *
671  * Pass 2 adds all the missing types from the source container. As part of this
672  * we may be adding a type as a forward reference that doesn't exist yet.
673  * Any types that we encounter in this form, we need to add to a third pass.
674  *
675  * Pass 3 is the fixup pass. Here we go through and find all the types that were
676  * missing in the first.
677  *
678  * Importantly, we *must* call ctf_update between the second and third pass,
679  * otherwise several of the libctf functions will not properly find the data in
680  * the container. If we're doing a dedup we also fix up the type mapping.
681  */
682 static int
683 ctf_merge_common(ctf_merge_types_t *cmp)
684 {
685 	int ret, i;
686 
687 	ctf_phase_dump(cmp->cm_src, "merge-common-src", NULL);
688 	ctf_phase_dump(cmp->cm_out, "merge-common-dest", NULL);
689 
690 	/* Pass 1 */
691 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
692 		if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
693 			ret = ctf_merge_add_sou(cmp, i, B_TRUE);
694 			if (ret != 0) {
695 				return (ret);
696 			}
697 		}
698 	}
699 
700 	/* Pass 2 */
701 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
702 		if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
703 			ret = ctf_merge_add_type(cmp, i);
704 			if (ret != 0) {
705 				ctf_dprintf("Failed to merge type %d\n", i);
706 				return (ret);
707 			}
708 		}
709 	}
710 
711 	ret = ctf_update(cmp->cm_out);
712 	if (ret != 0)
713 		return (ret);
714 
715 	if (cmp->cm_dedup == B_TRUE) {
716 		ctf_merge_dedup_remap(cmp);
717 	}
718 
719 	ctf_dprintf("Beginning merge pass 3\n");
720 	/* Pass 3 */
721 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
722 		if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
723 			ret = ctf_merge_fixup_type(cmp, i);
724 			if (ret != 0)
725 				return (ret);
726 		}
727 	}
728 
729 	return (0);
730 }
731 
732 /*
733  * Uniquification is slightly different from a stock merge. For starters, we
734  * don't need to replace any forward references in the output. In this case
735  * though, the types that already exist are in a parent container to the empty
736  * output container.
737  */
738 static int
739 ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
740 {
741 	int i, ret;
742 
743 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
744 		if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
745 			continue;
746 		ret = ctf_merge_add_type(cmp, i);
747 		if (ret != 0)
748 			return (ret);
749 	}
750 
751 	ret = ctf_update(cmp->cm_out);
752 	if (ret != 0)
753 		return (ret);
754 
755 	for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
756 		if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
757 			continue;
758 		ret = ctf_merge_fixup_type(cmp, i);
759 		if (ret != 0)
760 			return (ret);
761 	}
762 
763 	return (0);
764 }
765 
766 static int
767 ctf_merge_types_init(ctf_merge_types_t *cmp)
768 {
769 	cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
770 	    (cmp->cm_src->ctf_typemax + 1));
771 	if (cmp->cm_tmap == NULL)
772 		return (ctf_set_errno(cmp->cm_out, ENOMEM));
773 	bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
774 	    (cmp->cm_src->ctf_typemax + 1));
775 	return (0);
776 }
777 
778 static void
779 ctf_merge_types_fini(ctf_merge_types_t *cmp)
780 {
781 	ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
782 	    (cmp->cm_src->ctf_typemax + 1));
783 }
784 
785 /*
786  * After performing a pass, we need to go through the object and function type
787  * maps and potentially fix them up based on the new maps that we have.
788  */
789 static void
790 ctf_merge_fixup_symmaps(ctf_merge_types_t *cmp, ctf_merge_input_t *cmi)
791 {
792 	ctf_merge_objmap_t *cmo;
793 	ctf_merge_funcmap_t *cmf;
794 
795 	for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
796 	    cmo = list_next(&cmi->cmi_omap, cmo)) {
797 		VERIFY3S(cmo->cmo_tid, !=, 0);
798 		VERIFY(cmp->cm_tmap[cmo->cmo_tid].cmt_map != 0);
799 		cmo->cmo_tid = cmp->cm_tmap[cmo->cmo_tid].cmt_map;
800 	}
801 
802 	for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
803 	    cmf = list_next(&cmi->cmi_fmap, cmf)) {
804 		int i;
805 
806 		VERIFY(cmp->cm_tmap[cmf->cmf_rtid].cmt_map != 0);
807 		cmf->cmf_rtid = cmp->cm_tmap[cmf->cmf_rtid].cmt_map;
808 		for (i = 0; i < cmf->cmf_argc; i++) {
809 			VERIFY(cmp->cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
810 			cmf->cmf_args[i] =
811 			    cmp->cm_tmap[cmf->cmf_args[i]].cmt_map;
812 		}
813 	}
814 }
815 
816 /*
817  * Merge the types contained inside of two input files. The second input file is
818  * always going to be the destination. We're guaranteed that it's always
819  * writeable.
820  */
821 static int
822 ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
823 {
824 	int ret;
825 	ctf_merge_types_t cm;
826 	ctf_diff_t *cdp;
827 	ctf_merge_input_t *scmi = arg;
828 	ctf_merge_input_t *dcmi = arg2;
829 	ctf_file_t *out = dcmi->cmi_input;
830 	ctf_file_t *source = scmi->cmi_input;
831 
832 	ctf_dprintf("merging %p->%p\n", source, out);
833 
834 	if (!(out->ctf_flags & LCTF_RDWR))
835 		return (ctf_set_errno(out, ECTF_RDONLY));
836 
837 	if (ctf_getmodel(out) != ctf_getmodel(source))
838 		return (ctf_set_errno(out, ECTF_DMODEL));
839 
840 	if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
841 		return (ret);
842 
843 	cm.cm_out = out;
844 	cm.cm_src = source;
845 	cm.cm_dedup = B_FALSE;
846 	cm.cm_unique = B_FALSE;
847 	ret = ctf_merge_types_init(&cm);
848 	if (ret != 0) {
849 		ctf_diff_fini(cdp);
850 		return (ctf_set_errno(out, ret));
851 	}
852 
853 	ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
854 	if (ret != 0)
855 		goto cleanup;
856 	ret = ctf_merge_common(&cm);
857 	ctf_dprintf("merge common returned with %d\n", ret);
858 	if (ret == 0) {
859 		ret = ctf_update(out);
860 		ctf_dprintf("update returned with %d\n", ret);
861 	} else {
862 		goto cleanup;
863 	}
864 
865 	/*
866 	 * Now we need to fix up the object and function maps.
867 	 */
868 	ctf_merge_fixup_symmaps(&cm, scmi);
869 
870 	/*
871 	 * Now that we've fixed things up, we need to give our function and
872 	 * object maps to the destination, such that it can continue to update
873 	 * them going forward.
874 	 */
875 	list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
876 	list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
877 
878 cleanup:
879 	if (ret == 0)
880 		*outp = dcmi;
881 	ctf_merge_types_fini(&cm);
882 	ctf_diff_fini(cdp);
883 	if (ret != 0)
884 		return (ctf_errno(out));
885 	ctf_phase_bump();
886 	return (0);
887 }
888 
889 static int
890 ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
891 {
892 	int err, ret;
893 	ctf_file_t *out;
894 	ctf_merge_types_t cm;
895 	ctf_diff_t *cdp;
896 	ctf_merge_input_t *cmi;
897 	ctf_file_t *parent = cmh->cmh_unique;
898 
899 	*outp = NULL;
900 	out = ctf_fdcreate(cmh->cmh_ofd, &err);
901 	if (out == NULL)
902 		return (ctf_set_errno(src, err));
903 
904 	out->ctf_parname = cmh->cmh_pname;
905 	if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
906 		(void) ctf_set_errno(src, ctf_errno(out));
907 		ctf_close(out);
908 		return (CTF_ERR);
909 	}
910 
911 	if (ctf_import(out, parent) != 0) {
912 		(void) ctf_set_errno(src, ctf_errno(out));
913 		ctf_close(out);
914 		return (CTF_ERR);
915 	}
916 
917 	if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
918 		ctf_close(out);
919 		return (ctf_set_errno(src, ctf_errno(parent)));
920 	}
921 
922 	cm.cm_out = parent;
923 	cm.cm_src = src;
924 	cm.cm_dedup = B_FALSE;
925 	cm.cm_unique = B_TRUE;
926 	ret = ctf_merge_types_init(&cm);
927 	if (ret != 0) {
928 		ctf_close(out);
929 		ctf_diff_fini(cdp);
930 		return (ctf_set_errno(src, ret));
931 	}
932 
933 	ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
934 	if (ret == 0) {
935 		cm.cm_out = out;
936 		ret = ctf_merge_uniquify_types(&cm);
937 		if (ret == 0)
938 			ret = ctf_update(out);
939 	}
940 
941 	if (ret != 0) {
942 		ctf_merge_types_fini(&cm);
943 		ctf_diff_fini(cdp);
944 		return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
945 	}
946 
947 	for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
948 	    cmi = list_next(&cmh->cmh_inputs, cmi)) {
949 		ctf_merge_fixup_symmaps(&cm, cmi);
950 	}
951 
952 	ctf_merge_types_fini(&cm);
953 	ctf_diff_fini(cdp);
954 	*outp = out;
955 	return (0);
956 }
957 
958 static void
959 ctf_merge_fini_input(ctf_merge_input_t *cmi)
960 {
961 	ctf_merge_objmap_t *cmo;
962 	ctf_merge_funcmap_t *cmf;
963 
964 	while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
965 		ctf_free(cmo, sizeof (ctf_merge_objmap_t));
966 
967 	while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
968 		ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
969 		    sizeof (ctf_id_t) * cmf->cmf_argc);
970 
971 	if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
972 		ctf_close(cmi->cmi_input);
973 
974 	ctf_free(cmi, sizeof (ctf_merge_input_t));
975 }
976 
977 void
978 ctf_merge_fini(ctf_merge_t *cmh)
979 {
980 	size_t len;
981 	ctf_merge_input_t *cmi;
982 
983 	if (cmh->cmh_label != NULL) {
984 		len = strlen(cmh->cmh_label) + 1;
985 		ctf_free(cmh->cmh_label, len);
986 	}
987 
988 	if (cmh->cmh_pname != NULL) {
989 		len = strlen(cmh->cmh_pname) + 1;
990 		ctf_free(cmh->cmh_pname, len);
991 	}
992 
993 	while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
994 		ctf_merge_fini_input(cmi);
995 
996 	ctf_free(cmh, sizeof (ctf_merge_t));
997 }
998 
999 ctf_merge_t *
1000 ctf_merge_init(int fd, int *errp)
1001 {
1002 	int err;
1003 	ctf_merge_t *out;
1004 	struct stat st;
1005 
1006 	if (errp == NULL)
1007 		errp = &err;
1008 
1009 	if (fd != -1 && fstat(fd, &st) != 0) {
1010 		*errp = EINVAL;
1011 		return (NULL);
1012 	}
1013 
1014 	out = ctf_alloc(sizeof (ctf_merge_t));
1015 	if (out == NULL) {
1016 		*errp = ENOMEM;
1017 		return (NULL);
1018 	}
1019 
1020 	if (fd == -1) {
1021 		out->cmh_msyms = B_FALSE;
1022 	} else {
1023 		out->cmh_msyms = B_TRUE;
1024 	}
1025 
1026 	list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
1027 	    offsetof(ctf_merge_input_t, cmi_node));
1028 	out->cmh_ninputs = 0;
1029 	out->cmh_nthreads = 1;
1030 	out->cmh_unique = NULL;
1031 	out->cmh_ofd = fd;
1032 	out->cmh_flags = 0;
1033 	out->cmh_label = NULL;
1034 	out->cmh_pname = NULL;
1035 
1036 	return (out);
1037 }
1038 
1039 int
1040 ctf_merge_label(ctf_merge_t *cmh, const char *label)
1041 {
1042 	char *dup;
1043 
1044 	if (label == NULL)
1045 		return (EINVAL);
1046 
1047 	dup = ctf_strdup(label);
1048 	if (dup == NULL)
1049 		return (EAGAIN);
1050 
1051 	if (cmh->cmh_label != NULL) {
1052 		size_t len = strlen(cmh->cmh_label) + 1;
1053 		ctf_free(cmh->cmh_label, len);
1054 	}
1055 
1056 	cmh->cmh_label = dup;
1057 	return (0);
1058 }
1059 
1060 static int
1061 ctf_merge_add_function(ctf_merge_input_t *cmi, ctf_funcinfo_t *fip, ulong_t idx,
1062     const char *file, const char *name, const Elf64_Sym *symp)
1063 {
1064 	ctf_merge_funcmap_t *fmap;
1065 
1066 	fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
1067 	    sizeof (ctf_id_t) * fip->ctc_argc);
1068 	if (fmap == NULL)
1069 		return (ENOMEM);
1070 
1071 	fmap->cmf_idx = idx;
1072 	fmap->cmf_sym = *symp;
1073 	fmap->cmf_rtid = fip->ctc_return;
1074 	fmap->cmf_flags = fip->ctc_flags;
1075 	fmap->cmf_argc = fip->ctc_argc;
1076 	fmap->cmf_name = name;
1077 	if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1078 		fmap->cmf_file = file;
1079 	} else {
1080 		fmap->cmf_file = NULL;
1081 	}
1082 
1083 	if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
1084 	    fmap->cmf_args) != 0) {
1085 		ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
1086 		    sizeof (ctf_id_t) * fip->ctc_argc);
1087 		return (ctf_errno(cmi->cmi_input));
1088 	}
1089 
1090 	ctf_dprintf("added initial function %s, %lu, %s %u\n", name, idx,
1091 	    fmap->cmf_file != NULL ? fmap->cmf_file : "global",
1092 	    ELF64_ST_BIND(symp->st_info));
1093 	list_insert_tail(&cmi->cmi_fmap, fmap);
1094 	return (0);
1095 }
1096 
1097 static int
1098 ctf_merge_add_object(ctf_merge_input_t *cmi, ctf_id_t id, ulong_t idx,
1099     const char *file, const char *name, const Elf64_Sym *symp)
1100 {
1101 	ctf_merge_objmap_t *cmo;
1102 
1103 	cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
1104 	if (cmo == NULL)
1105 		return (ENOMEM);
1106 
1107 	cmo->cmo_name = name;
1108 	if (ELF64_ST_BIND(symp->st_info) == STB_LOCAL) {
1109 		cmo->cmo_file = file;
1110 	} else {
1111 		cmo->cmo_file = NULL;
1112 	}
1113 	cmo->cmo_idx = idx;
1114 	cmo->cmo_tid = id;
1115 	cmo->cmo_sym = *symp;
1116 	list_insert_tail(&cmi->cmi_omap, cmo);
1117 
1118 	ctf_dprintf("added initial object %s, %lu, %ld, %s\n", name, idx, id,
1119 	    cmo->cmo_file != NULL ? cmo->cmo_file : "global");
1120 
1121 	return (0);
1122 }
1123 
1124 static int
1125 ctf_merge_add_symbol(const Elf64_Sym *symp, ulong_t idx, const char *file,
1126     const char *name, boolean_t primary, void *arg)
1127 {
1128 	ctf_merge_input_t *cmi = arg;
1129 	ctf_file_t *fp = cmi->cmi_input;
1130 	ushort_t *data, funcbase;
1131 	uint_t type;
1132 	ctf_funcinfo_t fi;
1133 
1134 	/*
1135 	 * See if there is type information for this. If there is no
1136 	 * type information for this entry or no translation, then we
1137 	 * will find the value zero. This indicates no type ID for
1138 	 * objects and encodes unknown information for functions.
1139 	 */
1140 	if (fp->ctf_sxlate[idx] == -1u)
1141 		return (0);
1142 	data = (ushort_t *)((uintptr_t)fp->ctf_buf + fp->ctf_sxlate[idx]);
1143 	if (*data == 0)
1144 		return (0);
1145 
1146 	type = ELF64_ST_TYPE(symp->st_info);
1147 
1148 	switch (type) {
1149 	case STT_FUNC:
1150 		funcbase = *data;
1151 		if (LCTF_INFO_KIND(fp, funcbase) != CTF_K_FUNCTION)
1152 			return (0);
1153 		data++;
1154 		fi.ctc_return = *data;
1155 		data++;
1156 		fi.ctc_argc = LCTF_INFO_VLEN(fp, funcbase);
1157 		fi.ctc_flags = 0;
1158 
1159 		if (fi.ctc_argc != 0 && data[fi.ctc_argc - 1] == 0) {
1160 			fi.ctc_flags |= CTF_FUNC_VARARG;
1161 			fi.ctc_argc--;
1162 		}
1163 		return (ctf_merge_add_function(cmi, &fi, idx, file, name,
1164 		    symp));
1165 	case STT_OBJECT:
1166 		return (ctf_merge_add_object(cmi, *data, idx, file, name,
1167 		    symp));
1168 	default:
1169 		return (0);
1170 	}
1171 }
1172 
1173 /*
1174  * Whenever we create an entry to merge, we then go and add a second empty
1175  * ctf_file_t which we use for the purposes of our merging. It's not the best,
1176  * but it's the best that we've got at the moment.
1177  */
1178 int
1179 ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
1180 {
1181 	int ret;
1182 	ctf_merge_input_t *cmi;
1183 	ctf_file_t *empty;
1184 
1185 	ctf_dprintf("adding input %p\n", input);
1186 
1187 	if (input->ctf_flags & LCTF_CHILD)
1188 		return (ECTF_MCHILD);
1189 
1190 	cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1191 	if (cmi == NULL)
1192 		return (ENOMEM);
1193 
1194 	cmi->cmi_created = B_FALSE;
1195 	cmi->cmi_input = input;
1196 	list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1197 	    offsetof(ctf_merge_funcmap_t, cmf_node));
1198 	list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1199 	    offsetof(ctf_merge_objmap_t, cmo_node));
1200 
1201 	if (cmh->cmh_msyms == B_TRUE) {
1202 		if ((ret = ctf_symtab_iter(input, ctf_merge_add_symbol,
1203 		    cmi)) != 0) {
1204 			ctf_merge_fini_input(cmi);
1205 			return (ret);
1206 		}
1207 	}
1208 
1209 	list_insert_tail(&cmh->cmh_inputs, cmi);
1210 	cmh->cmh_ninputs++;
1211 
1212 	/* And now the empty one to merge into this */
1213 	cmi = ctf_alloc(sizeof (ctf_merge_input_t));
1214 	if (cmi == NULL)
1215 		return (ENOMEM);
1216 	list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
1217 	    offsetof(ctf_merge_funcmap_t, cmf_node));
1218 	list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
1219 	    offsetof(ctf_merge_objmap_t, cmo_node));
1220 
1221 	empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
1222 	if (empty == NULL)
1223 		return (ret);
1224 	cmi->cmi_input = empty;
1225 	cmi->cmi_created = B_TRUE;
1226 
1227 	if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
1228 		return (ctf_errno(empty));
1229 	}
1230 
1231 	list_insert_tail(&cmh->cmh_inputs, cmi);
1232 	cmh->cmh_ninputs++;
1233 	ctf_dprintf("added containers %p and %p\n", input, empty);
1234 	return (0);
1235 }
1236 
1237 int
1238 ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
1239 {
1240 	char *dup;
1241 
1242 	if (u->ctf_flags & LCTF_CHILD)
1243 		return (ECTF_MCHILD);
1244 	if (pname == NULL)
1245 		return (EINVAL);
1246 	dup = ctf_strdup(pname);
1247 	if (dup == NULL)
1248 		return (EINVAL);
1249 	if (cmh->cmh_pname != NULL) {
1250 		size_t len = strlen(cmh->cmh_pname) + 1;
1251 		ctf_free(cmh->cmh_pname, len);
1252 	}
1253 	cmh->cmh_pname = dup;
1254 	cmh->cmh_unique = u;
1255 	return (0);
1256 }
1257 
1258 /*
1259  * Symbol matching rules: the purpose of this is to verify that the type
1260  * information that we have for a given symbol actually matches the output
1261  * symbol. This is unfortunately complicated by several different factors:
1262  *
1263  * 1. When merging multiple .o's into a single item, the symbol table index will
1264  * not match.
1265  *
1266  * 2. Visibility of a symbol may not be identical to the object file or the
1267  * DWARF information due to symbol reduction via a mapfile.
1268  *
1269  * As such, we have to employ the following rules:
1270  *
1271  * 1. A global symbol table entry always matches a global CTF symbol with the
1272  * same name.
1273  *
1274  * 2. A local symbol table entry always matches a local CTF symbol if they have
1275  * the same name and they belong to the same file.
1276  *
1277  * 3. A weak symbol matches a non-weak symbol. This happens if we find that the
1278  * types match, the values match, the sizes match, and the section indexes
1279  * match. This happens when we do a conversion in one pass, it almost never
1280  * happens when we're merging multiple object files. If we match a CTF global
1281  * symbol, that's a fixed match, otherwise it's a fuzzy match.
1282  *
1283  * 4. A local symbol table entry matches a global CTF entry if the
1284  * other pieces fail, but they have the same name. This is considered a fuzzy
1285  * match and is not used unless we have no other options.
1286  *
1287  * 5. A weak symbol table entry matches a weak CTF entry if the other pieces
1288  * fail, but they have the same name. This is considered a fuzzy match and is
1289  * not used unless we have no other options. When merging independent .o files,
1290  * this is often the only recourse we have to matching weak symbols.
1291  *
1292  * In the end, this would all be much simpler if we were able to do this as part
1293  * of libld which would be able to do all the symbol transformations.
1294  */
1295 static boolean_t
1296 ctf_merge_symbol_match(const char *ctf_file, const char *ctf_name,
1297     const Elf64_Sym *ctf_symp, const char *symtab_file, const char *symtab_name,
1298     const Elf64_Sym *symtab_symp, boolean_t *is_fuzzy)
1299 {
1300 	*is_fuzzy = B_FALSE;
1301 	uint_t symtab_bind, ctf_bind;
1302 
1303 	symtab_bind = ELF64_ST_BIND(symtab_symp->st_info);
1304 	ctf_bind = ELF64_ST_BIND(ctf_symp->st_info);
1305 
1306 	ctf_dprintf("comparing merge match for %s/%s/%u->%s/%s/%u\n",
1307 	    symtab_file, symtab_name, symtab_bind,
1308 	    ctf_file, ctf_name, ctf_bind);
1309 	if (strcmp(ctf_name, symtab_name) != 0) {
1310 		return (B_FALSE);
1311 	}
1312 
1313 	if (symtab_bind == STB_GLOBAL && ctf_bind == STB_GLOBAL) {
1314 		return (B_TRUE);
1315 	} else if (symtab_bind == STB_GLOBAL) {
1316 		return (B_FALSE);
1317 	}
1318 
1319 	if (ctf_bind == STB_LOCAL && ctf_bind == symtab_bind &&
1320 	    ctf_file != NULL && symtab_file != NULL &&
1321 	    strcmp(ctf_file, symtab_file) == 0) {
1322 		return (B_TRUE);
1323 	}
1324 
1325 	if (symtab_bind == STB_WEAK && ctf_bind != STB_WEAK &&
1326 	    ELF64_ST_TYPE(symtab_symp->st_info) ==
1327 	    ELF64_ST_TYPE(ctf_symp->st_info) &&
1328 	    symtab_symp->st_value == ctf_symp->st_value &&
1329 	    symtab_symp->st_size == ctf_symp->st_size &&
1330 	    symtab_symp->st_shndx == ctf_symp->st_shndx) {
1331 		if (ctf_bind == STB_GLOBAL) {
1332 			return (B_TRUE);
1333 		}
1334 
1335 		if (ctf_bind == STB_LOCAL && ctf_file != NULL &&
1336 		    symtab_file != NULL && strcmp(ctf_file, symtab_file) == 0) {
1337 			*is_fuzzy = B_TRUE;
1338 			return (B_TRUE);
1339 		}
1340 	}
1341 
1342 	if (ctf_bind == STB_GLOBAL ||
1343 	    (ctf_bind == STB_WEAK && symtab_bind == STB_WEAK)) {
1344 		*is_fuzzy = B_TRUE;
1345 		return (B_TRUE);
1346 	}
1347 
1348 	return (B_FALSE);
1349 }
1350 
1351 /*
1352  * For each symbol, try and find a match. We will attempt to find an exact
1353  * match; however, we will settle for a fuzzy match in general. There is one
1354  * case where we will not opt to use a fuzzy match, which is when performing the
1355  * deduplication of a container. In such a case we are trying to reduce common
1356  * types and a fuzzy match would be inappropriate as if we're in the context of
1357  * a single container, the conversion process should have identified any exact
1358  * or fuzzy matches that were required.
1359  */
1360 static int
1361 ctf_merge_symbols(const Elf64_Sym *symp, ulong_t idx, const char *file,
1362     const char *name, boolean_t primary, void *arg)
1363 {
1364 	int err;
1365 	uint_t type, bind;
1366 	ctf_merge_symbol_arg_t *csa = arg;
1367 	ctf_file_t *fp = csa->cmsa_out;
1368 
1369 	type = ELF64_ST_TYPE(symp->st_info);
1370 	bind = ELF64_ST_BIND(symp->st_info);
1371 
1372 	ctf_dprintf("Trying to find match for %s/%s/%u\n", file, name,
1373 	    ELF64_ST_BIND(symp->st_info));
1374 
1375 	if (type == STT_OBJECT) {
1376 		ctf_merge_objmap_t *cmo, *match = NULL;
1377 
1378 		for (cmo = list_head(csa->cmsa_objmap); cmo != NULL;
1379 		    cmo = list_next(csa->cmsa_objmap, cmo)) {
1380 			boolean_t is_fuzzy = B_FALSE;
1381 			if (ctf_merge_symbol_match(cmo->cmo_file, cmo->cmo_name,
1382 			    &cmo->cmo_sym, file, name, symp, &is_fuzzy)) {
1383 				if (is_fuzzy && csa->cmsa_dedup &&
1384 				    bind != STB_WEAK) {
1385 					continue;
1386 				}
1387 				match = cmo;
1388 				if (is_fuzzy) {
1389 					continue;
1390 				}
1391 				break;
1392 			}
1393 		}
1394 
1395 		if (match == NULL) {
1396 			return (0);
1397 		}
1398 
1399 		if ((err = ctf_add_object(fp, idx, match->cmo_tid)) != 0) {
1400 			ctf_dprintf("Failed to add symbol %s->%d: %s\n", name,
1401 			    match->cmo_tid, ctf_errmsg(ctf_errno(fp)));
1402 			return (ctf_errno(fp));
1403 		}
1404 		ctf_dprintf("mapped object into output %s/%s->%ld\n", file,
1405 		    name, match->cmo_tid);
1406 	} else {
1407 		ctf_merge_funcmap_t *cmf, *match = NULL;
1408 		ctf_funcinfo_t fi;
1409 
1410 		for (cmf = list_head(csa->cmsa_funcmap); cmf != NULL;
1411 		    cmf = list_next(csa->cmsa_funcmap, cmf)) {
1412 			boolean_t is_fuzzy = B_FALSE;
1413 			if (ctf_merge_symbol_match(cmf->cmf_file, cmf->cmf_name,
1414 			    &cmf->cmf_sym, file, name, symp, &is_fuzzy)) {
1415 				if (is_fuzzy && csa->cmsa_dedup &&
1416 				    bind != STB_WEAK) {
1417 					continue;
1418 				}
1419 				match = cmf;
1420 				if (is_fuzzy) {
1421 					continue;
1422 				}
1423 				break;
1424 			}
1425 		}
1426 
1427 		if (match == NULL) {
1428 			return (0);
1429 		}
1430 
1431 		fi.ctc_return = match->cmf_rtid;
1432 		fi.ctc_argc = match->cmf_argc;
1433 		fi.ctc_flags = match->cmf_flags;
1434 		if ((err = ctf_add_function(fp, idx, &fi, match->cmf_args)) !=
1435 		    0) {
1436 			ctf_dprintf("Failed to add function %s: %s\n", name,
1437 			    ctf_errmsg(ctf_errno(fp)));
1438 			return (ctf_errno(fp));
1439 		}
1440 		ctf_dprintf("mapped function into output %s/%s\n", file,
1441 		    name);
1442 	}
1443 
1444 	return (0);
1445 }
1446 
1447 int
1448 ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
1449 {
1450 	int err, merr;
1451 	ctf_merge_input_t *cmi;
1452 	ctf_id_t ltype;
1453 	mergeq_t *mqp;
1454 	ctf_merge_input_t *final;
1455 	ctf_file_t *out;
1456 
1457 	ctf_dprintf("Beginning ctf_merge_merge()\n");
1458 	if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
1459 		const char *label = ctf_label_topmost(cmh->cmh_unique);
1460 		if (label == NULL)
1461 			return (ECTF_NOLABEL);
1462 		if (strcmp(label, cmh->cmh_label) != 0)
1463 			return (ECTF_LCONFLICT);
1464 	}
1465 
1466 	if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
1467 		return (errno);
1468 	}
1469 
1470 	VERIFY(cmh->cmh_ninputs % 2 == 0);
1471 	for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
1472 	    cmi = list_next(&cmh->cmh_inputs, cmi)) {
1473 		if (mergeq_add(mqp, cmi) == -1) {
1474 			err = errno;
1475 			mergeq_fini(mqp);
1476 		}
1477 	}
1478 
1479 	err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
1480 	mergeq_fini(mqp);
1481 
1482 	if (err == MERGEQ_ERROR) {
1483 		return (errno);
1484 	} else if (err == MERGEQ_UERROR) {
1485 		return (merr);
1486 	}
1487 
1488 	/*
1489 	 * Disassociate the generated ctf_file_t from the original input. That
1490 	 * way when the input gets cleaned up, we don't accidentally kill the
1491 	 * final reference to the ctf_file_t. If it gets uniquified then we'll
1492 	 * kill it.
1493 	 */
1494 	VERIFY(final->cmi_input != NULL);
1495 	out = final->cmi_input;
1496 	final->cmi_input = NULL;
1497 
1498 	ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
1499 	if (cmh->cmh_unique != NULL) {
1500 		ctf_file_t *u;
1501 		err = ctf_uniquify_types(cmh, out, &u);
1502 		if (err != 0) {
1503 			err = ctf_errno(out);
1504 			ctf_close(out);
1505 			return (err);
1506 		}
1507 		ctf_close(out);
1508 		out = u;
1509 	}
1510 
1511 	ltype = out->ctf_typemax;
1512 	if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
1513 		ltype += CTF_CHILD_START;
1514 	ctf_dprintf("trying to add the label\n");
1515 	if (cmh->cmh_label != NULL &&
1516 	    ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
1517 		ctf_close(out);
1518 		return (ctf_errno(out));
1519 	}
1520 
1521 	ctf_dprintf("merging symbols and the like\n");
1522 	if (cmh->cmh_msyms == B_TRUE) {
1523 		ctf_merge_symbol_arg_t arg;
1524 		arg.cmsa_objmap = &final->cmi_omap;
1525 		arg.cmsa_funcmap = &final->cmi_fmap;
1526 		arg.cmsa_out = out;
1527 		arg.cmsa_dedup = B_FALSE;
1528 		err = ctf_symtab_iter(out, ctf_merge_symbols, &arg);
1529 		if (err != 0) {
1530 			ctf_close(out);
1531 			return (err);
1532 		}
1533 	}
1534 
1535 	err = ctf_update(out);
1536 	if (err != 0) {
1537 		err = ctf_errno(out);
1538 		ctf_close(out);
1539 		return (err);
1540 	}
1541 
1542 	*outp = out;
1543 	return (0);
1544 }
1545 
1546 /*
1547  * When we get told that something is unique, eg. same is B_FALSE, then that
1548  * tells us that we need to add it to the output. If same is B_TRUE, then we'll
1549  * want to record it in the mapping table so that we know how to redirect types
1550  * to the extant ones.
1551  */
1552 static void
1553 ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
1554     ctf_id_t oid, void *arg)
1555 {
1556 	ctf_merge_types_t *cmp = arg;
1557 	ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
1558 
1559 	if (same == B_TRUE) {
1560 		/*
1561 		 * The output id here may itself map to something else.
1562 		 * Therefore, we need to basically walk a chain and see what it
1563 		 * points to until it itself points to a base type, eg. -1.
1564 		 * Otherwise we'll dedup to something which no longer exists.
1565 		 */
1566 		while (cmt[oid].cmt_missing == B_FALSE)
1567 			oid = cmt[oid].cmt_map;
1568 		cmt[iid].cmt_map = oid;
1569 		ctf_dprintf("%d->%d \n", iid, oid);
1570 	} else {
1571 		VERIFY(cmt[iid].cmt_map == 0);
1572 		cmt[iid].cmt_missing = B_TRUE;
1573 		ctf_dprintf("%d is missing\n", iid);
1574 	}
1575 }
1576 
1577 /*
1578  * Dedup a CTF container.
1579  *
1580  * DWARF and other encoding formats that we use to create CTF data may create
1581  * multiple copies of a given type. However, after doing a conversion, and
1582  * before doing a merge, we'd prefer, if possible, to have every input container
1583  * to be unique.
1584  *
1585  * Doing a deduplication is like a normal merge. However, when we diff the types
1586  * in the container, rather than doing a normal diff, we instead want to diff
1587  * against any already processed types. eg, for a given type i in a container,
1588  * we want to diff it from 0 to i - 1.
1589  */
1590 int
1591 ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
1592 {
1593 	int ret;
1594 	ctf_diff_t *cdp = NULL;
1595 	ctf_merge_input_t *cmi, *cmc;
1596 	ctf_file_t *ifp, *ofp;
1597 	ctf_merge_types_t cm;
1598 
1599 	if (cmp == NULL || outp == NULL)
1600 		return (EINVAL);
1601 
1602 	ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
1603 	if (cmp->cmh_ninputs != 2)
1604 		return (EINVAL);
1605 
1606 	ctf_dprintf("passed argument sanity check\n");
1607 
1608 	cmi = list_head(&cmp->cmh_inputs);
1609 	VERIFY(cmi != NULL);
1610 	cmc = list_next(&cmp->cmh_inputs, cmi);
1611 	VERIFY(cmc != NULL);
1612 	ifp = cmi->cmi_input;
1613 	ofp = cmc->cmi_input;
1614 	VERIFY(ifp != NULL);
1615 	VERIFY(ofp != NULL);
1616 	cm.cm_src = ifp;
1617 	cm.cm_out = ofp;
1618 	cm.cm_dedup = B_TRUE;
1619 	cm.cm_unique = B_FALSE;
1620 
1621 	if ((ret = ctf_merge_types_init(&cm)) != 0) {
1622 		return (ret);
1623 	}
1624 
1625 	if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
1626 		goto err;
1627 
1628 	ctf_dprintf("Successfully initialized dedup\n");
1629 	if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
1630 		goto err;
1631 
1632 	ctf_dprintf("Successfully diffed types\n");
1633 	ret = ctf_merge_common(&cm);
1634 	ctf_dprintf("deduping types result: %d\n", ret);
1635 	if (ret == 0)
1636 		ret = ctf_update(cm.cm_out);
1637 	if (ret != 0)
1638 		goto err;
1639 
1640 	ctf_dprintf("Successfully deduped types\n");
1641 	ctf_phase_dump(cm.cm_out, "dedup-pre-syms", NULL);
1642 
1643 	/*
1644 	 * Now we need to fix up the object and function maps.
1645 	 */
1646 	ctf_merge_fixup_symmaps(&cm, cmi);
1647 
1648 	if (cmp->cmh_msyms == B_TRUE) {
1649 		ctf_merge_symbol_arg_t arg;
1650 		arg.cmsa_objmap = &cmi->cmi_omap;
1651 		arg.cmsa_funcmap = &cmi->cmi_fmap;
1652 		arg.cmsa_out = cm.cm_out;
1653 		arg.cmsa_dedup = B_TRUE;
1654 		ret = ctf_symtab_iter(cm.cm_out, ctf_merge_symbols, &arg);
1655 		if (ret != 0) {
1656 			ctf_dprintf("failed to dedup symbols: %s\n",
1657 			    ctf_errmsg(ret));
1658 			goto err;
1659 		}
1660 	}
1661 
1662 	ret = ctf_update(cm.cm_out);
1663 	if (ret == 0) {
1664 		cmc->cmi_input = NULL;
1665 		*outp = cm.cm_out;
1666 	}
1667 	ctf_phase_dump(cm.cm_out, "dedup-post-syms", NULL);
1668 err:
1669 	ctf_merge_types_fini(&cm);
1670 	ctf_diff_fini(cdp);
1671 	return (ret);
1672 }
1673 
1674 int
1675 ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
1676 {
1677 	if (nthrs == 0)
1678 		return (EINVAL);
1679 	cmp->cmh_nthreads = nthrs;
1680 	return (0);
1681 }
1682