xref: /illumos-gate/usr/src/lib/libctf/common/ctf_dwarf.c (revision e0721d5ae1542c80097f6fcd487736fdfe601233)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2012 Jason King.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 /*
31  * Copyright 2020 Joyent, Inc.
32  * Copyright 2020 Robert Mustacchi
33  */
34 
35 /*
36  * CTF DWARF conversion theory.
37  *
38  * DWARF data contains a series of compilation units. Each compilation unit
39  * generally refers to an object file or what once was, in the case of linked
40  * binaries and shared objects. Each compilation unit has a series of what DWARF
41  * calls a DIE (Debugging Information Entry). The set of entries that we care
42  * about have type information stored in a series of attributes. Each DIE also
43  * has a tag that identifies the kind of attributes that it has.
44  *
45  * A given DIE may itself have children. For example, a DIE that represents a
46  * structure has children which represent members. Whenever we encounter a DIE
47  * that has children or other values or types associated with it, we recursively
48  * process those children first so that way we can then refer to the generated
49  * CTF type id while processing its parent. This reduces the amount of unknowns
50  * and fixups that we need. It also ensures that we don't accidentally add types
51  * that an overzealous compiler might add to the DWARF data but aren't used by
52  * anything in the system.
53  *
54  * Once we do a conversion, we store a mapping in an AVL tree that goes from the
55  * DWARF's die offset, which is relative to the given compilation unit, to a
56  * ctf_id_t.
57  *
58  * Unfortunately, some compilers actually will emit duplicate entries for a
59  * given type that look similar, but aren't quite. To that end, we go through
60  * and do a variant on a merge once we're done processing a single compilation
61  * unit which deduplicates all of the types that are in the unit.
62  *
63  * Finally, if we encounter an object that has multiple compilation units, then
64  * we'll convert all of the compilation units separately and then do a merge, so
65  * that way we can result in one single ctf_file_t that represents everything
66  * for the object.
67  *
68  * Conversion Steps
69  * ----------------
70  *
71  * Because a given object we've been given to convert may have multiple
72  * compilation units, we break the work into two halves. The first half
73  * processes each compilation unit (potentially in parallel) and then the second
74  * half optionally merges all of the dies in the first half. First, we'll cover
75  * what's involved in converting a single ctf_cu_t's dwarf to CTF. This covers
76  * the work done in ctf_dwarf_convert_one().
77  *
78  * An individual ctf_cu_t, which represents a compilation unit, is converted to
79  * CTF in a series of multiple passes.
80  *
81  * Pass 1: During the first pass we walk all of the top-level dies and if we
82  * find a function, variable, struct, union, enum or typedef, we recursively
83  * transform all of its types. We don't recurse or process everything, because
84  * we don't want to add some of the types that compilers may add which are
85  * effectively unused.
86  *
87  * During pass 1, if we encounter any structures or unions we mark them for
88  * fixing up later. This is necessary because we may not be able to determine
89  * the full size of a structure at the beginning of time. This will happen if
90  * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
91  * this possibility we defer adding members to structures or even converting
92  * them during pass 1 and save that for pass 2. Adding all of the base
93  * structures without any of their members helps deal with any circular
94  * dependencies that we might encounter.
95  *
96  * Pass 2: This pass is used to do the first half of fixing up structures and
97  * unions. Rather than walk the entire type space again, we actually walk the
98  * list of structures and unions that we marked for later fixing up. Here, we
99  * iterate over every structure and add members to the underlying ctf_file_t,
100  * but not to the structs themselves. One might wonder why we don't, and the
101  * main reason is that libctf requires a ctf_update() be done before adding the
102  * members to structures or unions.
103  *
104  * Pass 3: This pass is used to do the second half of fixing up structures and
105  * unions. During this part we always go through and add members to structures
106  * and unions that we added to the container in the previous pass. In addition,
107  * we set the structure and union's actual size, which may have additional
108  * padding added by the compiler, it isn't simply the last offset. DWARF always
109  * guarantees an attribute exists for this. Importantly no ctf_id_t's change
110  * during pass 2.
111  *
112  * Pass 4: The next phase is to add CTF entries for all of the symbols and
113  * variables that are present in this die. During pass 1 we added entries to a
114  * map for each variable and function. During this pass, we iterate over the
115  * symbol table and when we encounter a symbol that we have in our lists of
116  * translated information which matches, we then add it to the ctf_file_t.
117  *
118  * Pass 5: Here we go and look for any weak symbols and functions and see if
119  * they match anything that we recognize. If so, then we add type information
120  * for them at this point based on the matching type.
121  *
122  * Pass 6: This pass is actually a variant on a merge. The traditional merge
123  * process expects there to be no duplicate types. As such, at the end of
124  * conversion, we do a dedup on all of the types in the system. The
125  * deduplication process is described in lib/libctf/common/ctf_merge.c.
126  *
127  * Once pass 6 is done, we've finished processing the individual compilation
128  * unit.
129  *
130  * The following steps reflect the general process of doing a conversion.
131  *
132  * 1) Walk the dwarf section and determine the number of compilation units
133  * 2) Create a ctf_cu_t for each compilation unit
134  * 3) Add all ctf_cu_t's to a workq
135  * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
136  *    is comprised of several steps, which were already enumerated.
137  * 5) If we have multiple cu's, we do a ctf merge of all the dies. The mechanics
138  *    of the merge are discussed in lib/libctf/common/ctf_merge.c.
139  * 6) Free everything up and return a ctf_file_t to the user. If we only had a
140  *    single compilation unit, then we give that to the user. Otherwise, we
141  *    return the merged ctf_file_t.
142  *
143  * Threading
144  * ---------
145  *
146  * The process has been designed to be amenable to threading. Each compilation
147  * unit has its own type stream, therefore the logical place to divide and
148  * conquer is at the compilation unit. Each ctf_cu_t has been built to be able
149  * to be processed independently of the others. It has its own libdwarf handle,
150  * as a given libdwarf handle may only be used by a single thread at a time.
151  * This allows the various ctf_cu_t's to be processed in parallel by different
152  * threads.
153  *
154  * All of the ctf_cu_t's are loaded into a workq which allows for a number of
155  * threads to be specified and used as a thread pool to process all of the
156  * queued work. We set the number of threads to use in the workq equal to the
157  * number of threads that the user has specified.
158  *
159  * After all of the compilation units have been drained, we use the same number
160  * of threads when performing a merge of multiple compilation units, if they
161  * exist.
162  *
163  * While all of these different parts do support and allow for multiple threads,
164  * it's important that when only a single thread is specified, that it be the
165  * calling thread. This allows the conversion routines to be used in a context
166  * that doesn't allow additional threads, such as rtld.
167  *
168  * Common DWARF Mechanics and Notes
169  * --------------------------------
170  *
171  * At this time, we really only support DWARFv2, though support for DWARFv4 is
172  * mostly there. There is no intent to support DWARFv3.
173  *
174  * Generally types for something are stored in the DW_AT_type attribute. For
175  * example, a function's return type will be stored in the local DW_AT_type
176  * attribute while the arguments will be in child DIEs. There are also various
177  * times when we don't have any DW_AT_type. In that case, the lack of a type
178  * implies, at least for C, that its C type is void. Because DWARF doesn't emit
179  * one, we have a synthetic void type that we create and manipulate instead and
180  * pass it off to consumers on an as-needed basis. If nothing has a void type,
181  * it will not be emitted.
182  *
183  * Architecture Specific Parts
184  * ---------------------------
185  *
186  * The CTF tooling encodes various information about the various architectures
187  * in the system. Importantly, the tool assumes that every architecture has a
188  * data model where long and pointer are the same size. This is currently the
189  * case, as the two data models illumos supports are ILP32 and LP64.
190  *
191  * In addition, we encode the mapping of various floating point sizes to various
192  * types for each architecture. If a new architecture is being added, it should
193  * be added to the list. The general design of the ctf conversion tools is to be
194  * architecture independent. eg. any of the tools here should be able to convert
195  * any architecture's DWARF into ctf; however, this has not been rigorously
196  * tested and more importantly, the ctf routines don't currently write out the
197  * data in an endian-aware form, they only use that of the currently running
198  * library.
199  */
200 
201 #include <libctf_impl.h>
202 #include <sys/avl.h>
203 #include <sys/debug.h>
204 #include <gelf.h>
205 #include <libdwarf.h>
206 #include <dwarf.h>
207 #include <libgen.h>
208 #include <workq.h>
209 #include <errno.h>
210 
211 #define	DWARF_VERSION_TWO	2
212 #define	DWARF_VERSION_FOUR	4
213 #define	DWARF_VARARGS_NAME	"..."
214 
215 /*
216  * Dwarf may refer recursively to other types that we've already processed. To
217  * see if we've already converted them, we look them up in an AVL tree that's
218  * sorted by the DWARF id.
219  */
220 typedef struct ctf_dwmap {
221 	avl_node_t	cdm_avl;
222 	Dwarf_Off	cdm_off;
223 	Dwarf_Die	cdm_die;
224 	ctf_id_t	cdm_id;
225 	boolean_t	cdm_fix;
226 } ctf_dwmap_t;
227 
228 typedef struct ctf_dwvar {
229 	ctf_list_t	cdv_list;
230 	char		*cdv_name;
231 	ctf_id_t	cdv_type;
232 	boolean_t	cdv_global;
233 } ctf_dwvar_t;
234 
235 typedef struct ctf_dwfunc {
236 	ctf_list_t	cdf_list;
237 	char		*cdf_name;
238 	ctf_funcinfo_t	cdf_fip;
239 	ctf_id_t	*cdf_argv;
240 	boolean_t	cdf_global;
241 } ctf_dwfunc_t;
242 
243 typedef struct ctf_dwbitf {
244 	ctf_list_t	cdb_list;
245 	ctf_id_t	cdb_base;
246 	uint_t		cdb_nbits;
247 	ctf_id_t	cdb_id;
248 } ctf_dwbitf_t;
249 
250 /*
251  * The ctf_cu_t represents a single top-level DWARF die unit. While generally,
252  * the typical object file has only a single die, if we're asked to convert
253  * something that's been linked from multiple sources, multiple dies will exist.
254  */
255 typedef struct ctf_die {
256 	Elf		*cu_elf;	/* shared libelf handle */
257 	char		*cu_name;	/* basename of the DIE */
258 	ctf_merge_t	*cu_cmh;	/* merge handle */
259 	ctf_list_t	cu_vars;	/* List of variables */
260 	ctf_list_t	cu_funcs;	/* List of functions */
261 	ctf_list_t	cu_bitfields;	/* Bit field members */
262 	Dwarf_Debug	cu_dwarf;	/* libdwarf handle */
263 	Dwarf_Die	cu_cu;		/* libdwarf compilation unit */
264 	Dwarf_Off	cu_cuoff;	/* cu's offset */
265 	Dwarf_Off	cu_maxoff;	/* maximum offset */
266 	Dwarf_Half	cu_vers;	/* Dwarf Version */
267 	Dwarf_Half	cu_addrsz;	/* Dwarf Address Size */
268 	ctf_file_t	*cu_ctfp;	/* output CTF file */
269 	avl_tree_t	cu_map;		/* map die offsets to CTF types */
270 	char		*cu_errbuf;	/* error message buffer */
271 	size_t		cu_errlen;	/* error message buffer length */
272 	size_t		cu_ptrsz;	/* object's pointer size */
273 	boolean_t	cu_bigend;	/* is it big endian */
274 	boolean_t	cu_doweaks;	/* should we convert weak symbols? */
275 	uint_t		cu_mach;	/* machine type */
276 	ctf_id_t	cu_voidtid;	/* void pointer */
277 	ctf_id_t	cu_longtid;	/* id for a 'long' */
278 } ctf_cu_t;
279 
280 static int ctf_dwarf_offset(ctf_cu_t *, Dwarf_Die, Dwarf_Off *);
281 static int ctf_dwarf_convert_die(ctf_cu_t *, Dwarf_Die);
282 static int ctf_dwarf_convert_type(ctf_cu_t *, Dwarf_Die, ctf_id_t *, int);
283 
284 static int ctf_dwarf_function_count(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
285     boolean_t);
286 static int ctf_dwarf_convert_fargs(ctf_cu_t *, Dwarf_Die, ctf_funcinfo_t *,
287     ctf_id_t *);
288 
289 /*
290  * This is a generic way to set a CTF Conversion backend error depending on what
291  * we were doing. Unless it was one of a specific set of errors that don't
292  * indicate a programming / translation bug, eg. ENOMEM, then we transform it
293  * into a CTF backend error and fill in the error buffer.
294  */
295 static int
296 ctf_dwarf_error(ctf_cu_t *cup, ctf_file_t *cfp, int err, const char *fmt, ...)
297 {
298 	va_list ap;
299 	int ret;
300 	size_t off = 0;
301 	ssize_t rem = cup->cu_errlen;
302 	if (cfp != NULL)
303 		err = ctf_errno(cfp);
304 
305 	if (err == ENOMEM)
306 		return (err);
307 
308 	ret = snprintf(cup->cu_errbuf, rem, "die %s: ", cup->cu_name);
309 	if (ret < 0)
310 		goto err;
311 	off += ret;
312 	rem = MAX(rem - ret, 0);
313 
314 	va_start(ap, fmt);
315 	ret = vsnprintf(cup->cu_errbuf + off, rem, fmt, ap);
316 	va_end(ap);
317 	if (ret < 0)
318 		goto err;
319 
320 	off += ret;
321 	rem = MAX(rem - ret, 0);
322 	if (fmt[strlen(fmt) - 1] != '\n') {
323 		(void) snprintf(cup->cu_errbuf + off, rem,
324 		    ": %s\n", ctf_errmsg(err));
325 	}
326 	va_end(ap);
327 	return (ECTF_CONVBKERR);
328 
329 err:
330 	cup->cu_errbuf[0] = '\0';
331 	return (ECTF_CONVBKERR);
332 }
333 
334 /*
335  * DWARF often opts to put no explicit type to describe a void type. eg. if we
336  * have a reference type whose DW_AT_type member doesn't exist, then we should
337  * instead assume it points to void. Because this isn't represented, we
338  * instead cause it to come into existence.
339  */
340 static ctf_id_t
341 ctf_dwarf_void(ctf_cu_t *cup)
342 {
343 	if (cup->cu_voidtid == CTF_ERR) {
344 		ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
345 		cup->cu_voidtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_ROOT,
346 		    "void", &enc);
347 		if (cup->cu_voidtid == CTF_ERR) {
348 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
349 			    "failed to create void type: %s\n",
350 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
351 		}
352 	}
353 
354 	return (cup->cu_voidtid);
355 }
356 
357 /*
358  * There are many different forms that an array index may take. However, we just
359  * always force it to be of a type long no matter what. Therefore we use this to
360  * have a single instance of long across everything.
361  */
362 static ctf_id_t
363 ctf_dwarf_long(ctf_cu_t *cup)
364 {
365 	if (cup->cu_longtid == CTF_ERR) {
366 		ctf_encoding_t enc;
367 
368 		enc.cte_format = CTF_INT_SIGNED;
369 		enc.cte_offset = 0;
370 		/* All illumos systems are LP */
371 		enc.cte_bits = cup->cu_ptrsz * 8;
372 		cup->cu_longtid = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
373 		    "long", &enc);
374 		if (cup->cu_longtid == CTF_ERR) {
375 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
376 			    "failed to create long type: %s\n",
377 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
378 		}
379 
380 	}
381 
382 	return (cup->cu_longtid);
383 }
384 
385 static int
386 ctf_dwmap_comp(const void *a, const void *b)
387 {
388 	const ctf_dwmap_t *ca = a;
389 	const ctf_dwmap_t *cb = b;
390 
391 	if (ca->cdm_off > cb->cdm_off)
392 		return (1);
393 	if (ca->cdm_off < cb->cdm_off)
394 		return (-1);
395 	return (0);
396 }
397 
398 static int
399 ctf_dwmap_add(ctf_cu_t *cup, ctf_id_t id, Dwarf_Die die, boolean_t fix)
400 {
401 	int ret;
402 	avl_index_t index;
403 	ctf_dwmap_t *dwmap;
404 	Dwarf_Off off;
405 
406 	VERIFY(id > 0 && id < CTF_MAX_TYPE);
407 
408 	if ((ret = ctf_dwarf_offset(cup, die, &off)) != 0)
409 		return (ret);
410 
411 	if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
412 		return (ENOMEM);
413 
414 	dwmap->cdm_die = die;
415 	dwmap->cdm_off = off;
416 	dwmap->cdm_id = id;
417 	dwmap->cdm_fix = fix;
418 
419 	ctf_dprintf("dwmap: %p %" DW_PR_DUx "->%d\n", dwmap, off, id);
420 	VERIFY(avl_find(&cup->cu_map, dwmap, &index) == NULL);
421 	avl_insert(&cup->cu_map, dwmap, index);
422 	return (0);
423 }
424 
425 static int
426 ctf_dwarf_attribute(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
427     Dwarf_Attribute *attrp)
428 {
429 	int ret;
430 	Dwarf_Error derr;
431 
432 	if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
433 		return (0);
434 	if (ret == DW_DLV_NO_ENTRY) {
435 		*attrp = NULL;
436 		return (ENOENT);
437 	}
438 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
439 	    "failed to get attribute for type: %s\n",
440 	    dwarf_errmsg(derr));
441 	return (ECTF_CONVBKERR);
442 }
443 
444 static int
445 ctf_dwarf_ref(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
446 {
447 	int ret;
448 	Dwarf_Attribute attr;
449 	Dwarf_Error derr;
450 
451 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
452 		return (ret);
453 
454 	if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
455 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
456 		return (0);
457 	}
458 
459 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
460 	    "failed to get unsigned attribute for type: %s\n",
461 	    dwarf_errmsg(derr));
462 	return (ECTF_CONVBKERR);
463 }
464 
465 static int
466 ctf_dwarf_refdie(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
467     Dwarf_Die *diep)
468 {
469 	int ret;
470 	Dwarf_Off off;
471 	Dwarf_Error derr;
472 
473 	if ((ret = ctf_dwarf_ref(cup, die, name, &off)) != 0)
474 		return (ret);
475 
476 	off += cup->cu_cuoff;
477 	if ((ret = dwarf_offdie(cup->cu_dwarf, off, diep, &derr)) !=
478 	    DW_DLV_OK) {
479 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
480 		    "failed to get die from offset %" DW_PR_DUu ": %s\n",
481 		    off, dwarf_errmsg(derr));
482 		return (ECTF_CONVBKERR);
483 	}
484 
485 	return (0);
486 }
487 
488 static int
489 ctf_dwarf_signed(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
490     Dwarf_Signed *valp)
491 {
492 	int ret;
493 	Dwarf_Attribute attr;
494 	Dwarf_Error derr;
495 
496 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
497 		return (ret);
498 
499 	if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
500 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
501 		return (0);
502 	}
503 
504 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
505 	    "failed to get unsigned attribute for type: %s\n",
506 	    dwarf_errmsg(derr));
507 	return (ECTF_CONVBKERR);
508 }
509 
510 static int
511 ctf_dwarf_unsigned(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
512     Dwarf_Unsigned *valp)
513 {
514 	int ret;
515 	Dwarf_Attribute attr;
516 	Dwarf_Error derr;
517 
518 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
519 		return (ret);
520 
521 	if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
522 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
523 		return (0);
524 	}
525 
526 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
527 	    "failed to get unsigned attribute for type: %s\n",
528 	    dwarf_errmsg(derr));
529 	return (ECTF_CONVBKERR);
530 }
531 
532 static int
533 ctf_dwarf_boolean(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name,
534     Dwarf_Bool *val)
535 {
536 	int ret;
537 	Dwarf_Attribute attr;
538 	Dwarf_Error derr;
539 
540 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
541 		return (ret);
542 
543 	if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
544 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
545 		return (0);
546 	}
547 
548 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
549 	    "failed to get boolean attribute for type: %s\n",
550 	    dwarf_errmsg(derr));
551 
552 	return (ECTF_CONVBKERR);
553 }
554 
555 static int
556 ctf_dwarf_string(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half name, char **strp)
557 {
558 	int ret;
559 	char *s;
560 	Dwarf_Attribute attr;
561 	Dwarf_Error derr;
562 
563 	*strp = NULL;
564 	if ((ret = ctf_dwarf_attribute(cup, die, name, &attr)) != 0)
565 		return (ret);
566 
567 	if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
568 		if ((*strp = ctf_strdup(s)) == NULL)
569 			ret = ENOMEM;
570 		else
571 			ret = 0;
572 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
573 		return (ret);
574 	}
575 
576 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
577 	    "failed to get string attribute for type: %s\n",
578 	    dwarf_errmsg(derr));
579 	return (ECTF_CONVBKERR);
580 }
581 
582 /*
583  * The encoding of a DW_AT_data_member_location has changed between different
584  * revisions of the specification. It may be a general udata form or it may be
585  * location data information. In DWARF 2, it is only the latter. In later
586  * revisions of the spec, it may be either. To determine the form, we ask the
587  * class, which will be of type CONSTANT.
588  */
589 static int
590 ctf_dwarf_member_location(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Unsigned *valp)
591 {
592 	int ret;
593 	Dwarf_Error derr;
594 	Dwarf_Attribute attr;
595 	Dwarf_Locdesc *loc;
596 	Dwarf_Signed locnum;
597 	Dwarf_Half form;
598 	enum Dwarf_Form_Class class;
599 
600 	if ((ret = ctf_dwarf_attribute(cup, die, DW_AT_data_member_location,
601 	    &attr)) != 0)
602 		return (ret);
603 
604 	if (dwarf_whatform(attr, &form, &derr) != DW_DLV_OK) {
605 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
606 		    "failed to get dwarf attribute for for member location: %s",
607 		    dwarf_errmsg(derr));
608 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
609 		return (ECTF_CONVBKERR);
610 	}
611 
612 	class = dwarf_get_form_class(cup->cu_vers, DW_AT_data_member_location,
613 	    cup->cu_addrsz, form);
614 	if (class == DW_FORM_CLASS_CONSTANT) {
615 		Dwarf_Signed sign;
616 
617 		/*
618 		 * We have a constant. We need to try to get both this as signed
619 		 * and unsigned data, as unfortunately, DWARF doesn't define the
620 		 * sign. Which is a joy. We try unsigned first. If neither
621 		 * match, fall through to the normal path.
622 		 */
623 		if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
624 			dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
625 			return (0);
626 		}
627 
628 		if (dwarf_formsdata(attr, &sign, &derr) == DW_DLV_OK) {
629 			dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
630 			if (sign < 0) {
631 				(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
632 				    "encountered negative member data "
633 				    "location: %d", sign);
634 			}
635 			*valp = (Dwarf_Unsigned)sign;
636 			return (0);
637 		}
638 	}
639 
640 	if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
641 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
642 		    "failed to obtain location list for member offset: %s",
643 		    dwarf_errmsg(derr));
644 		dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
645 		return (ECTF_CONVBKERR);
646 	}
647 	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
648 
649 	if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
650 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
651 		    "failed to parse location structure for member");
652 		dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
653 		dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
654 		return (ECTF_CONVBKERR);
655 	}
656 
657 	*valp = loc->ld_s->lr_number;
658 
659 	dwarf_dealloc(cup->cu_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
660 	dwarf_dealloc(cup->cu_dwarf, loc, DW_DLA_LOCDESC);
661 	return (0);
662 }
663 
664 
665 static int
666 ctf_dwarf_offset(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Off *offsetp)
667 {
668 	Dwarf_Error derr;
669 
670 	if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
671 		return (0);
672 
673 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
674 	    "failed to get die offset: %s\n",
675 	    dwarf_errmsg(derr));
676 	return (ECTF_CONVBKERR);
677 }
678 
679 /* simpler variant for debugging output */
680 static Dwarf_Off
681 ctf_die_offset(Dwarf_Die die)
682 {
683 	Dwarf_Off off = -1;
684 	Dwarf_Error derr;
685 
686 	(void) dwarf_dieoffset(die, &off, &derr);
687 	return (off);
688 }
689 
690 static int
691 ctf_dwarf_tag(ctf_cu_t *cup, Dwarf_Die die, Dwarf_Half *tagp)
692 {
693 	Dwarf_Error derr;
694 
695 	if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
696 		return (0);
697 
698 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
699 	    "failed to get tag type: %s\n",
700 	    dwarf_errmsg(derr));
701 	return (ECTF_CONVBKERR);
702 }
703 
704 static int
705 ctf_dwarf_sib(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *sibp)
706 {
707 	Dwarf_Error derr;
708 	int ret;
709 
710 	*sibp = NULL;
711 	ret = dwarf_siblingof(cup->cu_dwarf, base, sibp, &derr);
712 	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
713 		return (0);
714 
715 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
716 	    "failed to sibling from die: %s\n",
717 	    dwarf_errmsg(derr));
718 	return (ECTF_CONVBKERR);
719 }
720 
721 static int
722 ctf_dwarf_child(ctf_cu_t *cup, Dwarf_Die base, Dwarf_Die *childp)
723 {
724 	Dwarf_Error derr;
725 	int ret;
726 
727 	*childp = NULL;
728 	ret = dwarf_child(base, childp, &derr);
729 	if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
730 		return (0);
731 
732 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
733 	    "failed to child from die: %s\n",
734 	    dwarf_errmsg(derr));
735 	return (ECTF_CONVBKERR);
736 }
737 
738 /*
739  * Compilers disagree on what to do to determine if something has global
740  * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
741  * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
742  * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
743  */
744 static int
745 ctf_dwarf_isglobal(ctf_cu_t *cup, Dwarf_Die die, boolean_t *igp)
746 {
747 	int ret;
748 	Dwarf_Signed vis;
749 	Dwarf_Bool ext;
750 
751 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_visibility, &vis)) == 0) {
752 		*igp = vis == DW_VIS_exported;
753 		return (0);
754 	} else if (ret != ENOENT) {
755 		return (ret);
756 	}
757 
758 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_external, &ext)) != 0) {
759 		if (ret == ENOENT) {
760 			*igp = B_FALSE;
761 			return (0);
762 		}
763 		return (ret);
764 	}
765 	*igp = ext != 0 ? B_TRUE : B_FALSE;
766 	return (0);
767 }
768 
769 static int
770 ctf_dwarf_die_elfenc(Elf *elf, ctf_cu_t *cup, char *errbuf, size_t errlen)
771 {
772 	GElf_Ehdr ehdr;
773 
774 	if (gelf_getehdr(elf, &ehdr) == NULL) {
775 		(void) snprintf(errbuf, errlen,
776 		    "failed to get ELF header: %s\n",
777 		    elf_errmsg(elf_errno()));
778 		return (ECTF_CONVBKERR);
779 	}
780 
781 	cup->cu_mach = ehdr.e_machine;
782 
783 	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
784 		cup->cu_ptrsz = 4;
785 		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_ILP32) == 0);
786 	} else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
787 		cup->cu_ptrsz = 8;
788 		VERIFY(ctf_setmodel(cup->cu_ctfp, CTF_MODEL_LP64) == 0);
789 	} else {
790 		(void) snprintf(errbuf, errlen,
791 		    "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
792 		return (ECTF_CONVBKERR);
793 	}
794 
795 	if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
796 		cup->cu_bigend = B_FALSE;
797 	} else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
798 		cup->cu_bigend = B_TRUE;
799 	} else {
800 		(void) snprintf(errbuf, errlen,
801 		    "unknown ELF data encoding: %hhu", ehdr.e_ident[EI_DATA]);
802 		return (ECTF_CONVBKERR);
803 	}
804 
805 	return (0);
806 }
807 
808 typedef struct ctf_dwarf_fpent {
809 	size_t	cdfe_size;
810 	uint_t	cdfe_enc[3];
811 } ctf_dwarf_fpent_t;
812 
813 typedef struct ctf_dwarf_fpmap {
814 	uint_t			cdf_mach;
815 	ctf_dwarf_fpent_t	cdf_ents[4];
816 } ctf_dwarf_fpmap_t;
817 
818 static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
819 	{ EM_SPARC, {
820 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
821 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
822 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
823 		{ 0, { 0 } }
824 	} },
825 	{ EM_SPARC32PLUS, {
826 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
827 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
828 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
829 		{ 0, { 0 } }
830 	} },
831 	{ EM_SPARCV9, {
832 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
833 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
834 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
835 		{ 0, { 0 } }
836 	} },
837 	{ EM_386, {
838 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
839 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
840 		{ 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
841 		{ 0, { 0 } }
842 	} },
843 	{ EM_X86_64, {
844 		{ 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
845 		{ 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
846 		{ 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
847 		{ 0, { 0 } }
848 	} },
849 	{ EM_NONE }
850 };
851 
852 /*
853  * We want to normalize the type names that are used between compilers in the
854  * case of complex. gcc prefixes things with types like 'long complex' where as
855  * clang only calls them 'complex' in the dwarf even if in the C they are long
856  * complex or similar.
857  */
858 static int
859 ctf_dwarf_fixup_complex(ctf_cu_t *cup, ctf_encoding_t *enc, char **namep)
860 {
861 	const char *name;
862 	*namep = NULL;
863 
864 	switch (enc->cte_format) {
865 	case CTF_FP_CPLX:
866 		name = "complex float";
867 		break;
868 	case CTF_FP_DCPLX:
869 		name = "complex double";
870 		break;
871 	case CTF_FP_LDCPLX:
872 		name = "complex long double";
873 		break;
874 	default:
875 		return (0);
876 	}
877 
878 	*namep = ctf_strdup(name);
879 	if (*namep == NULL) {
880 		return (ENOMEM);
881 	}
882 
883 	return (0);
884 }
885 
886 static int
887 ctf_dwarf_float_base(ctf_cu_t *cup, Dwarf_Signed type, ctf_encoding_t *enc)
888 {
889 	const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
890 	const ctf_dwarf_fpent_t *ent;
891 	uint_t col = 0, mult = 1;
892 
893 	for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
894 		if (map->cdf_mach == cup->cu_mach)
895 			break;
896 	}
897 
898 	if (map->cdf_mach == EM_NONE) {
899 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
900 		    "Unsupported machine type: %d\n", cup->cu_mach);
901 		return (ENOTSUP);
902 	}
903 
904 	if (type == DW_ATE_complex_float) {
905 		mult = 2;
906 		col = 1;
907 	} else if (type == DW_ATE_imaginary_float ||
908 	    type == DW_ATE_SUN_imaginary_float) {
909 		col = 2;
910 	}
911 
912 	ent = &map->cdf_ents[0];
913 	for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
914 		if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
915 			enc->cte_format = ent->cdfe_enc[col];
916 			return (0);
917 		}
918 	}
919 
920 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
921 	    "failed to find valid fp mapping for encoding %d, size %d bits\n",
922 	    type, enc->cte_bits);
923 	return (EINVAL);
924 }
925 
926 static int
927 ctf_dwarf_dwarf_base(ctf_cu_t *cup, Dwarf_Die die, int *kindp,
928     ctf_encoding_t *enc)
929 {
930 	int ret;
931 	Dwarf_Signed type;
932 
933 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_encoding, &type)) != 0)
934 		return (ret);
935 
936 	switch (type) {
937 	case DW_ATE_unsigned:
938 	case DW_ATE_address:
939 		*kindp = CTF_K_INTEGER;
940 		enc->cte_format = 0;
941 		break;
942 	case DW_ATE_unsigned_char:
943 		*kindp = CTF_K_INTEGER;
944 		enc->cte_format = CTF_INT_CHAR;
945 		break;
946 	case DW_ATE_signed:
947 		*kindp = CTF_K_INTEGER;
948 		enc->cte_format = CTF_INT_SIGNED;
949 		break;
950 	case DW_ATE_signed_char:
951 		*kindp = CTF_K_INTEGER;
952 		enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
953 		break;
954 	case DW_ATE_boolean:
955 		*kindp = CTF_K_INTEGER;
956 		enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
957 		break;
958 	case DW_ATE_float:
959 	case DW_ATE_complex_float:
960 	case DW_ATE_imaginary_float:
961 	case DW_ATE_SUN_imaginary_float:
962 	case DW_ATE_SUN_interval_float:
963 		*kindp = CTF_K_FLOAT;
964 		if ((ret = ctf_dwarf_float_base(cup, type, enc)) != 0)
965 			return (ret);
966 		break;
967 	default:
968 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
969 		    "encountered unknown DWARF encoding: %d", type);
970 		return (ECTF_CONVBKERR);
971 	}
972 
973 	return (0);
974 }
975 
976 /*
977  * Different compilers (at least GCC and Studio) use different names for types.
978  * This parses the types and attempts to unify them. If this fails, we just fall
979  * back to using the DWARF itself.
980  */
981 static int
982 ctf_dwarf_parse_int(const char *name, int *kindp, ctf_encoding_t *enc,
983     char **newnamep)
984 {
985 	char buf[256];
986 	char *base, *c, *last;
987 	int nlong = 0, nshort = 0, nchar = 0, nint = 0;
988 	int sign = 1;
989 
990 	if (strlen(name) + 1 > sizeof (buf))
991 		return (EINVAL);
992 
993 	(void) strlcpy(buf, name, sizeof (buf));
994 	for (c = strtok_r(buf, " ", &last); c != NULL;
995 	    c = strtok_r(NULL, " ", &last)) {
996 		if (strcmp(c, "signed") == 0) {
997 			sign = 1;
998 		} else if (strcmp(c, "unsigned") == 0) {
999 			sign = 0;
1000 		} else if (strcmp(c, "long") == 0) {
1001 			nlong++;
1002 		} else if (strcmp(c, "char") == 0) {
1003 			nchar++;
1004 		} else if (strcmp(c, "short") == 0) {
1005 			nshort++;
1006 		} else if (strcmp(c, "int") == 0) {
1007 			nint++;
1008 		} else {
1009 			/*
1010 			 * If we don't recognize any of the tokens, we'll tell
1011 			 * the caller to fall back to the dwarf-provided
1012 			 * encoding information.
1013 			 */
1014 			return (EINVAL);
1015 		}
1016 	}
1017 
1018 	if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
1019 		return (EINVAL);
1020 
1021 	if (nchar > 0) {
1022 		if (nlong > 0 || nshort > 0 || nint > 0)
1023 			return (EINVAL);
1024 		base = "char";
1025 	} else if (nshort > 0) {
1026 		if (nlong > 0)
1027 			return (EINVAL);
1028 		base = "short";
1029 	} else if (nlong > 0) {
1030 		base = "long";
1031 	} else {
1032 		base = "int";
1033 	}
1034 
1035 	if (nchar > 0)
1036 		enc->cte_format = CTF_INT_CHAR;
1037 	else
1038 		enc->cte_format = 0;
1039 
1040 	if (sign > 0)
1041 		enc->cte_format |= CTF_INT_SIGNED;
1042 
1043 	(void) snprintf(buf, sizeof (buf), "%s%s%s",
1044 	    (sign ? "" : "unsigned "),
1045 	    (nlong > 1 ? "long " : ""),
1046 	    base);
1047 
1048 	*newnamep = ctf_strdup(buf);
1049 	if (*newnamep == NULL)
1050 		return (ENOMEM);
1051 	*kindp = CTF_K_INTEGER;
1052 	return (0);
1053 }
1054 
1055 static int
1056 ctf_dwarf_create_base(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot,
1057     Dwarf_Off off)
1058 {
1059 	int ret;
1060 	char *name, *nname = NULL;
1061 	Dwarf_Unsigned sz;
1062 	int kind;
1063 	ctf_encoding_t enc;
1064 	ctf_id_t id;
1065 
1066 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0)
1067 		return (ret);
1068 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &sz)) != 0) {
1069 		goto out;
1070 	}
1071 	ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
1072 	    off, sz);
1073 
1074 	bzero(&enc, sizeof (ctf_encoding_t));
1075 	enc.cte_bits = sz * 8;
1076 	if ((ret = ctf_dwarf_parse_int(name, &kind, &enc, &nname)) == 0) {
1077 		ctf_free(name, strlen(name) + 1);
1078 		name = nname;
1079 	} else {
1080 		if (ret != EINVAL) {
1081 			goto out;
1082 		}
1083 		ctf_dprintf("falling back to dwarf for base type %s\n", name);
1084 		if ((ret = ctf_dwarf_dwarf_base(cup, die, &kind, &enc)) != 0) {
1085 			goto out;
1086 		}
1087 
1088 		if (kind == CTF_K_FLOAT && (ret = ctf_dwarf_fixup_complex(cup,
1089 		    &enc, &nname)) != 0) {
1090 			goto out;
1091 		} else if (nname != NULL) {
1092 			ctf_free(name, strlen(name) + 1);
1093 			name = nname;
1094 		}
1095 	}
1096 
1097 	id = ctf_add_encoded(cup->cu_ctfp, isroot, name, &enc, kind);
1098 	if (id == CTF_ERR) {
1099 		ret = ctf_errno(cup->cu_ctfp);
1100 	} else {
1101 		*idp = id;
1102 		ret = ctf_dwmap_add(cup, id, die, B_FALSE);
1103 	}
1104 out:
1105 	ctf_free(name, strlen(name) + 1);
1106 	return (ret);
1107 }
1108 
1109 /*
1110  * Getting a member's offset is a surprisingly intricate dance. It works as
1111  * follows:
1112  *
1113  * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
1114  * have a DW_AT_data_member_location. We won't have both. Thus we check first
1115  * for DW_AT_data_bit_offset, and if it exists, we're set.
1116  *
1117  * Next, if we have a bitfield and we don't have a DW_AT_data_bit_offset, then
1118  * we have to grab the data location and use the following dance:
1119  *
1120  * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
1121  * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
1122  * When it's been omitted, we then have to say that the size is that of the
1123  * underlying type, which forces that to be after a ctf_update(). Here, we have
1124  * to do different things based on whether or not we're using big endian or
1125  * little endian to obtain the proper offset.
1126  */
1127 static int
1128 ctf_dwarf_member_offset(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t mid,
1129     ulong_t *offp)
1130 {
1131 	int ret;
1132 	Dwarf_Unsigned loc, bitsz, bytesz;
1133 	Dwarf_Signed bitoff;
1134 	size_t off;
1135 	ssize_t tsz;
1136 
1137 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_data_bit_offset,
1138 	    &loc)) == 0) {
1139 		*offp = loc;
1140 		return (0);
1141 	} else if (ret != ENOENT) {
1142 		return (ret);
1143 	}
1144 
1145 	if ((ret = ctf_dwarf_member_location(cup, die, &loc)) != 0)
1146 		return (ret);
1147 	off = loc * 8;
1148 
1149 	if ((ret = ctf_dwarf_signed(cup, die, DW_AT_bit_offset,
1150 	    &bitoff)) != 0) {
1151 		if (ret != ENOENT)
1152 			return (ret);
1153 		*offp = off;
1154 		return (0);
1155 	}
1156 
1157 	/* At this point we have to have DW_AT_bit_size */
1158 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0)
1159 		return (ret);
1160 
1161 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size,
1162 	    &bytesz)) != 0) {
1163 		if (ret != ENOENT)
1164 			return (ret);
1165 		if ((tsz = ctf_type_size(cup->cu_ctfp, mid)) == CTF_ERR) {
1166 			int e = ctf_errno(cup->cu_ctfp);
1167 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1168 			    "failed to get type size: %s", ctf_errmsg(e));
1169 			return (ECTF_CONVBKERR);
1170 		}
1171 	} else {
1172 		tsz = bytesz;
1173 	}
1174 	tsz *= 8;
1175 	if (cup->cu_bigend == B_TRUE) {
1176 		*offp = off + bitoff;
1177 	} else {
1178 		*offp = off + tsz - bitoff - bitsz;
1179 	}
1180 
1181 	return (0);
1182 }
1183 
1184 /*
1185  * We need to determine if the member in question is a bitfield. If it is, then
1186  * we need to go through and create a new type that's based on the actual base
1187  * type, but has a different size. We also rename the type as a result to help
1188  * deal with future collisions.
1189  *
1190  * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
1191  * bit size member and it does not equal the byte size member, then we need to
1192  * create a bitfield type based on this.
1193  *
1194  * Note: When we support DWARFv4, there may be a chance that we need to also
1195  * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
1196  */
1197 static int
1198 ctf_dwarf_member_bitfield(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp)
1199 {
1200 	int ret;
1201 	Dwarf_Unsigned bitsz;
1202 	ctf_encoding_t e;
1203 	ctf_dwbitf_t *cdb;
1204 	ctf_dtdef_t *dtd;
1205 	ctf_id_t base = *idp;
1206 	int kind;
1207 
1208 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_bit_size, &bitsz)) != 0) {
1209 		if (ret == ENOENT)
1210 			return (0);
1211 		return (ret);
1212 	}
1213 
1214 	ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
1215 	/*
1216 	 * Given that we now have a bitsize, time to go do something about it.
1217 	 * We're going to create a new type based on the current one, but first
1218 	 * we need to find the base type. This means we need to traverse any
1219 	 * typedef's, consts, and volatiles until we get to what should be
1220 	 * something of type integer or enumeration.
1221 	 */
1222 	VERIFY(bitsz < UINT32_MAX);
1223 	dtd = ctf_dtd_lookup(cup->cu_ctfp, base);
1224 	VERIFY(dtd != NULL);
1225 	kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1226 	while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
1227 	    kind == CTF_K_VOLATILE) {
1228 		dtd = ctf_dtd_lookup(cup->cu_ctfp, dtd->dtd_data.ctt_type);
1229 		VERIFY(dtd != NULL);
1230 		kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
1231 	}
1232 	ctf_dprintf("got kind %d\n", kind);
1233 	VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
1234 
1235 	/*
1236 	 * As surprising as it may be, it is strictly possible to create a
1237 	 * bitfield that is based on an enum. Of course, the C standard leaves
1238 	 * enums sizing as an ABI concern more or less. To that effect, today on
1239 	 * all illumos platforms the size of an enum is generally that of an
1240 	 * int as our supported data models and ABIs all agree on that. So what
1241 	 * we'll do is fake up a CTF encoding here to use. In this case, we'll
1242 	 * treat it as an unsigned value of whatever size the underlying enum
1243 	 * currently has (which is in the ctt_size member of its dynamic type
1244 	 * data).
1245 	 */
1246 	if (kind == CTF_K_INTEGER) {
1247 		e = dtd->dtd_u.dtu_enc;
1248 	} else {
1249 		bzero(&e, sizeof (ctf_encoding_t));
1250 		e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
1251 	}
1252 
1253 	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL;
1254 	    cdb = ctf_list_next(cdb)) {
1255 		if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
1256 			break;
1257 	}
1258 
1259 	/*
1260 	 * Create a new type if none exists. We name all types in a way that is
1261 	 * guaranteed not to conflict with the corresponding C type. We do this
1262 	 * by using the ':' operator.
1263 	 */
1264 	if (cdb == NULL) {
1265 		size_t namesz;
1266 		char *name;
1267 
1268 		e.cte_bits = bitsz;
1269 		namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
1270 		    (uint32_t)bitsz);
1271 		name = ctf_alloc(namesz + 1);
1272 		if (name == NULL)
1273 			return (ENOMEM);
1274 		cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
1275 		if (cdb == NULL) {
1276 			ctf_free(name, namesz + 1);
1277 			return (ENOMEM);
1278 		}
1279 		(void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
1280 		    (uint32_t)bitsz);
1281 
1282 		cdb->cdb_base = base;
1283 		cdb->cdb_nbits = bitsz;
1284 		cdb->cdb_id = ctf_add_integer(cup->cu_ctfp, CTF_ADD_NONROOT,
1285 		    name, &e);
1286 		if (cdb->cdb_id == CTF_ERR) {
1287 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1288 			    "failed to get add bitfield type %s: %s", name,
1289 			    ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1290 			ctf_free(name, namesz + 1);
1291 			ctf_free(cdb, sizeof (ctf_dwbitf_t));
1292 			return (ECTF_CONVBKERR);
1293 		}
1294 		ctf_free(name, namesz + 1);
1295 		ctf_list_append(&cup->cu_bitfields, cdb);
1296 	}
1297 
1298 	*idp = cdb->cdb_id;
1299 
1300 	return (0);
1301 }
1302 
1303 static int
1304 ctf_dwarf_fixup_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t base, boolean_t add)
1305 {
1306 	int ret, kind;
1307 	Dwarf_Die child, memb;
1308 	Dwarf_Unsigned size;
1309 
1310 	kind = ctf_type_kind(cup->cu_ctfp, base);
1311 	VERIFY(kind != CTF_ERR);
1312 	VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
1313 
1314 	/*
1315 	 * Members are in children. However, gcc also allows empty ones.
1316 	 */
1317 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1318 		return (ret);
1319 	if (child == NULL)
1320 		return (0);
1321 
1322 	memb = child;
1323 	while (memb != NULL) {
1324 		Dwarf_Die sib, tdie;
1325 		Dwarf_Half tag;
1326 		ctf_id_t mid;
1327 		char *mname;
1328 		ulong_t memboff = 0;
1329 
1330 		if ((ret = ctf_dwarf_tag(cup, memb, &tag)) != 0)
1331 			return (ret);
1332 
1333 		if (tag != DW_TAG_member)
1334 			goto next;
1335 
1336 		if ((ret = ctf_dwarf_refdie(cup, memb, DW_AT_type, &tdie)) != 0)
1337 			return (ret);
1338 
1339 		if ((ret = ctf_dwarf_convert_type(cup, tdie, &mid,
1340 		    CTF_ADD_NONROOT)) != 0)
1341 			return (ret);
1342 		ctf_dprintf("Got back type id: %d\n", mid);
1343 
1344 		/*
1345 		 * If we're not adding a member, just go ahead and return.
1346 		 */
1347 		if (add == B_FALSE) {
1348 			if ((ret = ctf_dwarf_member_bitfield(cup, memb,
1349 			    &mid)) != 0)
1350 				return (ret);
1351 			goto next;
1352 		}
1353 
1354 		if ((ret = ctf_dwarf_string(cup, memb, DW_AT_name,
1355 		    &mname)) != 0 && ret != ENOENT)
1356 			return (ret);
1357 		if (ret == ENOENT)
1358 			mname = NULL;
1359 
1360 		if (kind == CTF_K_UNION) {
1361 			memboff = 0;
1362 		} else if ((ret = ctf_dwarf_member_offset(cup, memb, mid,
1363 		    &memboff)) != 0) {
1364 			if (mname != NULL)
1365 				ctf_free(mname, strlen(mname) + 1);
1366 			return (ret);
1367 		}
1368 
1369 		if ((ret = ctf_dwarf_member_bitfield(cup, memb, &mid)) != 0)
1370 			return (ret);
1371 
1372 		ret = ctf_add_member(cup->cu_ctfp, base, mname, mid, memboff);
1373 		if (ret == CTF_ERR) {
1374 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1375 			    "failed to add member %s: %s",
1376 			    mname, ctf_errmsg(ctf_errno(cup->cu_ctfp)));
1377 			if (mname != NULL)
1378 				ctf_free(mname, strlen(mname) + 1);
1379 			return (ECTF_CONVBKERR);
1380 		}
1381 
1382 		if (mname != NULL)
1383 			ctf_free(mname, strlen(mname) + 1);
1384 
1385 next:
1386 		if ((ret = ctf_dwarf_sib(cup, memb, &sib)) != 0)
1387 			return (ret);
1388 		memb = sib;
1389 	}
1390 
1391 	/*
1392 	 * If we're not adding members, then we don't know the final size of the
1393 	 * structure, so end here.
1394 	 */
1395 	if (add == B_FALSE)
1396 		return (0);
1397 
1398 	/* Finally set the size of the structure to the actual byte size */
1399 	if ((ret = ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &size)) != 0)
1400 		return (ret);
1401 	if ((ctf_set_size(cup->cu_ctfp, base, size)) == CTF_ERR) {
1402 		int e = ctf_errno(cup->cu_ctfp);
1403 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1404 		    "failed to set type size for %d to 0x%x: %s", base,
1405 		    (uint32_t)size, ctf_errmsg(e));
1406 		return (ECTF_CONVBKERR);
1407 	}
1408 
1409 	return (0);
1410 }
1411 
1412 static int
1413 ctf_dwarf_create_sou(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1414     int kind, int isroot)
1415 {
1416 	int ret;
1417 	char *name;
1418 	ctf_id_t base;
1419 	Dwarf_Die child;
1420 	Dwarf_Bool decl;
1421 
1422 	/*
1423 	 * Deal with the terribly annoying case of anonymous structs and unions.
1424 	 * If they don't have a name, set the name to the empty string.
1425 	 */
1426 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1427 	    ret != ENOENT)
1428 		return (ret);
1429 	if (ret == ENOENT)
1430 		name = NULL;
1431 
1432 	/*
1433 	 * We need to check if we just have a declaration here. If we do, then
1434 	 * instead of creating an actual structure or union, we're just going to
1435 	 * go ahead and create a forward. During a dedup or merge, the forward
1436 	 * will be replaced with the real thing.
1437 	 */
1438 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration,
1439 	    &decl)) != 0) {
1440 		if (ret != ENOENT)
1441 			return (ret);
1442 		decl = 0;
1443 	}
1444 
1445 	if (decl != 0) {
1446 		base = ctf_add_forward(cup->cu_ctfp, isroot, name, kind);
1447 	} else if (kind == CTF_K_STRUCT) {
1448 		base = ctf_add_struct(cup->cu_ctfp, isroot, name);
1449 	} else {
1450 		base = ctf_add_union(cup->cu_ctfp, isroot, name);
1451 	}
1452 	ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
1453 	if (name != NULL)
1454 		ctf_free(name, strlen(name) + 1);
1455 	if (base == CTF_ERR)
1456 		return (ctf_errno(cup->cu_ctfp));
1457 	*idp = base;
1458 
1459 	/*
1460 	 * If it's just a declaration, we're not going to mark it for fix up or
1461 	 * do anything else.
1462 	 */
1463 	if (decl == B_TRUE)
1464 		return (ctf_dwmap_add(cup, base, die, B_FALSE));
1465 	if ((ret = ctf_dwmap_add(cup, base, die, B_TRUE)) != 0)
1466 		return (ret);
1467 
1468 	/*
1469 	 * The children of a structure or union are generally members. However,
1470 	 * some compilers actually insert structs and unions there and not as a
1471 	 * top-level die. Therefore, to make sure we honor our pass 1 contract
1472 	 * of having all the base types, but not members, we need to walk this
1473 	 * for instances of a DW_TAG_union_type.
1474 	 */
1475 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
1476 		return (ret);
1477 
1478 	while (child != NULL) {
1479 		Dwarf_Half tag;
1480 		Dwarf_Die sib;
1481 
1482 		if ((ret = ctf_dwarf_tag(cup, child, &tag)) != 0)
1483 			return (ret);
1484 
1485 		switch (tag) {
1486 		case DW_TAG_union_type:
1487 		case DW_TAG_structure_type:
1488 			ret = ctf_dwarf_convert_type(cup, child, NULL,
1489 			    CTF_ADD_NONROOT);
1490 			if (ret != 0) {
1491 				return (ret);
1492 			}
1493 			break;
1494 		default:
1495 			break;
1496 		}
1497 
1498 		if ((ret = ctf_dwarf_sib(cup, child, &sib)) != 0)
1499 			return (ret);
1500 		child = sib;
1501 	}
1502 
1503 	return (0);
1504 }
1505 
1506 static int
1507 ctf_dwarf_array_upper_bound(ctf_cu_t *cup, Dwarf_Die range, ctf_arinfo_t *ar)
1508 {
1509 	Dwarf_Attribute attr;
1510 	Dwarf_Unsigned uval;
1511 	Dwarf_Signed sval;
1512 	Dwarf_Half form;
1513 	Dwarf_Error derr;
1514 	const char *formstr = NULL;
1515 	uint_t adj = 0;
1516 	int ret = 0;
1517 
1518 	ctf_dprintf("setting array upper bound\n");
1519 
1520 	ar->ctr_nelems = 0;
1521 
1522 	/*
1523 	 * Different compilers use different attributes to indicate the size of
1524 	 * an array. GCC has traditionally used DW_AT_upper_bound, while Clang
1525 	 * uses DW_AT_count. They have slightly different semantics. DW_AT_count
1526 	 * indicates the total number of elements that are present, while
1527 	 * DW_AT_upper_bound indicates the last index, hence we need to add one
1528 	 * to that index to get the count.
1529 	 *
1530 	 * We first search for DW_AT_count and then for DW_AT_upper_bound. If we
1531 	 * find neither, then we treat the lack of this as a zero element array.
1532 	 * Our value is initialized assuming we find a DW_AT_count value.
1533 	 */
1534 	ret = ctf_dwarf_attribute(cup, range, DW_AT_count, &attr);
1535 	if (ret != 0 && ret != ENOENT) {
1536 		return (ret);
1537 	} else if (ret == ENOENT) {
1538 		ret = ctf_dwarf_attribute(cup, range, DW_AT_upper_bound, &attr);
1539 		if (ret != 0 && ret != ENOENT) {
1540 			return (ret);
1541 		} else if (ret == ENOENT) {
1542 			return (0);
1543 		} else {
1544 			adj = 1;
1545 		}
1546 	}
1547 
1548 	if (dwarf_whatform(attr, &form, &derr) != DW_DLV_OK) {
1549 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1550 		    "failed to get DW_AT_upper_bound attribute form: %s\n",
1551 		    dwarf_errmsg(derr));
1552 		ret = ECTF_CONVBKERR;
1553 		goto done;
1554 	}
1555 
1556 	/*
1557 	 * Compilers can indicate array bounds using signed or unsigned values.
1558 	 * Additionally, some compilers may also store the array bounds
1559 	 * using as DW_FORM_data{1,2,4,8} (which DWARF treats as raw data and
1560 	 * expects the caller to understand how to interpret the value).
1561 	 *
1562 	 * GCC 4.4.4 appears to always use unsigned values to encode the
1563 	 * array size (using '(unsigned)-1' to represent a zero-length or
1564 	 * unknown length array). Later versions of GCC use a signed value of
1565 	 * -1 for zero/unknown length arrays, and unsigned values to encode
1566 	 * known array sizes.
1567 	 *
1568 	 * Both dwarf_formsdata() and dwarf_formudata() will retrieve values
1569 	 * as their respective signed/unsigned forms, but both will also
1570 	 * retreive DW_FORM_data{1,2,4,8} values and treat them as signed or
1571 	 * unsigned integers (i.e. dwarf_formsdata() treats DW_FORM_dataXX
1572 	 * as signed integers and dwarf_formudata() treats DW_FORM_dataXX as
1573 	 * unsigned integers). Both will return an error if the form is not
1574 	 * their respective signed/unsigned form, or DW_FORM_dataXX.
1575 	 *
1576 	 * To obtain the upper bound, we use the appropriate
1577 	 * dwarf_form[su]data() function based on the form of DW_AT_upper_bound.
1578 	 * Additionally, we let dwarf_formudata() handle the DW_FORM_dataXX
1579 	 * forms (via the default option in the switch). If the value is in an
1580 	 * unexpected form (i.e. not DW_FORM_udata or DW_FORM_dataXX),
1581 	 * dwarf_formudata() will return failure (i.e. not DW_DLV_OK) and set
1582 	 * derr with the specific error value.
1583 	 */
1584 	switch (form) {
1585 	case DW_FORM_sdata:
1586 		if (dwarf_formsdata(attr, &sval, &derr) == DW_DLV_OK) {
1587 			ar->ctr_nelems = sval + adj;
1588 			goto done;
1589 		}
1590 		break;
1591 	case DW_FORM_udata:
1592 	default:
1593 		if (dwarf_formudata(attr, &uval, &derr) == DW_DLV_OK) {
1594 			ar->ctr_nelems = uval + adj;
1595 			goto done;
1596 		}
1597 		break;
1598 	}
1599 
1600 	if (dwarf_get_FORM_name(form, &formstr) != DW_DLV_OK)
1601 		formstr = "unknown DWARF form";
1602 
1603 	(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1604 	    "failed to get %s (%hu) value for DW_AT_upper_bound: %s\n",
1605 	    formstr, form, dwarf_errmsg(derr));
1606 	ret = ECTF_CONVBKERR;
1607 
1608 done:
1609 	dwarf_dealloc(cup->cu_dwarf, attr, DW_DLA_ATTR);
1610 	return (ret);
1611 }
1612 
1613 static int
1614 ctf_dwarf_create_array_range(ctf_cu_t *cup, Dwarf_Die range, ctf_id_t *idp,
1615     ctf_id_t base, int isroot)
1616 {
1617 	int ret;
1618 	Dwarf_Die sib;
1619 	ctf_arinfo_t ar;
1620 
1621 	ctf_dprintf("creating array range\n");
1622 
1623 	if ((ret = ctf_dwarf_sib(cup, range, &sib)) != 0)
1624 		return (ret);
1625 	if (sib != NULL) {
1626 		ctf_id_t id;
1627 		if ((ret = ctf_dwarf_create_array_range(cup, sib, &id,
1628 		    base, CTF_ADD_NONROOT)) != 0)
1629 			return (ret);
1630 		ar.ctr_contents = id;
1631 	} else {
1632 		ar.ctr_contents = base;
1633 	}
1634 
1635 	if ((ar.ctr_index = ctf_dwarf_long(cup)) == CTF_ERR)
1636 		return (ctf_errno(cup->cu_ctfp));
1637 
1638 	if ((ret = ctf_dwarf_array_upper_bound(cup, range, &ar)) != 0)
1639 		return (ret);
1640 
1641 	if ((*idp = ctf_add_array(cup->cu_ctfp, isroot, &ar)) == CTF_ERR)
1642 		return (ctf_errno(cup->cu_ctfp));
1643 
1644 	return (0);
1645 }
1646 
1647 /*
1648  * Try and create an array type. First, the kind of the array is specified in
1649  * the DW_AT_type entry. Next, the number of entries is stored in a more
1650  * complicated form, we should have a child that has the DW_TAG_subrange type.
1651  */
1652 static int
1653 ctf_dwarf_create_array(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1654 {
1655 	int ret;
1656 	Dwarf_Die tdie, rdie;
1657 	ctf_id_t tid;
1658 	Dwarf_Half rtag;
1659 
1660 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0)
1661 		return (ret);
1662 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &tid,
1663 	    CTF_ADD_NONROOT)) != 0)
1664 		return (ret);
1665 
1666 	if ((ret = ctf_dwarf_child(cup, die, &rdie)) != 0)
1667 		return (ret);
1668 	if ((ret = ctf_dwarf_tag(cup, rdie, &rtag)) != 0)
1669 		return (ret);
1670 	if (rtag != DW_TAG_subrange_type) {
1671 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1672 		    "encountered array without DW_TAG_subrange_type child\n");
1673 		return (ECTF_CONVBKERR);
1674 	}
1675 
1676 	/*
1677 	 * The compiler may opt to describe a multi-dimensional array as one
1678 	 * giant array or it may opt to instead encode it as a series of
1679 	 * subranges. If it's the latter, then for each subrange we introduce a
1680 	 * type. We can always use the base type.
1681 	 */
1682 	if ((ret = ctf_dwarf_create_array_range(cup, rdie, idp, tid,
1683 	    isroot)) != 0)
1684 		return (ret);
1685 	ctf_dprintf("Got back id %d\n", *idp);
1686 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1687 }
1688 
1689 /*
1690  * Given "const int const_array3[11]", GCC7 at least will create a DIE tree of
1691  * DW_TAG_const_type:DW_TAG_array_type:DW_Tag_const_type:<member_type>.
1692  *
1693  * Given C's syntax, this renders out as "const const int const_array3[11]".  To
1694  * get closer to round-tripping (and make the unit tests work), we'll peek for
1695  * this case, and avoid adding the extraneous qualifier if we see that the
1696  * underlying array referent already has the same qualifier.
1697  *
1698  * This is unfortunately less trivial than it could be: this issue applies to
1699  * qualifier sets like "const volatile", as well as multi-dimensional arrays, so
1700  * we need to descend down those.
1701  *
1702  * Returns CTF_ERR on error, or a boolean value otherwise.
1703  */
1704 static int
1705 needed_array_qualifier(ctf_cu_t *cup, int kind, ctf_id_t ref_id)
1706 {
1707 	const ctf_type_t *t;
1708 	ctf_arinfo_t arinfo;
1709 	int akind;
1710 
1711 	if (kind != CTF_K_CONST && kind != CTF_K_VOLATILE &&
1712 	    kind != CTF_K_RESTRICT)
1713 		return (1);
1714 
1715 	if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, ref_id)) == NULL)
1716 		return (CTF_ERR);
1717 
1718 	if (LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info) != CTF_K_ARRAY)
1719 		return (1);
1720 
1721 	if (ctf_dyn_array_info(cup->cu_ctfp, ref_id, &arinfo) != 0)
1722 		return (CTF_ERR);
1723 
1724 	ctf_id_t id = arinfo.ctr_contents;
1725 
1726 	for (;;) {
1727 		if ((t = ctf_dyn_lookup_by_id(cup->cu_ctfp, id)) == NULL)
1728 			return (CTF_ERR);
1729 
1730 		akind = LCTF_INFO_KIND(cup->cu_ctfp, t->ctt_info);
1731 
1732 		if (akind == kind)
1733 			break;
1734 
1735 		if (akind == CTF_K_ARRAY) {
1736 			if (ctf_dyn_array_info(cup->cu_ctfp,
1737 			    id, &arinfo) != 0)
1738 				return (CTF_ERR);
1739 			id = arinfo.ctr_contents;
1740 			continue;
1741 		}
1742 
1743 		if (akind != CTF_K_CONST && akind != CTF_K_VOLATILE &&
1744 		    akind != CTF_K_RESTRICT)
1745 			break;
1746 
1747 		id = t->ctt_type;
1748 	}
1749 
1750 	if (kind == akind) {
1751 		ctf_dprintf("ignoring extraneous %s qualifier for array %d\n",
1752 		    ctf_kind_name(cup->cu_ctfp, kind), ref_id);
1753 	}
1754 
1755 	return (kind != akind);
1756 }
1757 
1758 static int
1759 ctf_dwarf_create_reference(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1760     int kind, int isroot)
1761 {
1762 	int ret;
1763 	ctf_id_t id;
1764 	Dwarf_Die tdie;
1765 	char *name;
1766 	size_t namelen;
1767 
1768 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1769 	    ret != ENOENT)
1770 		return (ret);
1771 	if (ret == ENOENT) {
1772 		name = NULL;
1773 		namelen = 0;
1774 	} else {
1775 		namelen = strlen(name);
1776 	}
1777 
1778 	ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
1779 
1780 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
1781 		if (ret != ENOENT) {
1782 			ctf_free(name, namelen);
1783 			return (ret);
1784 		}
1785 		if ((id = ctf_dwarf_void(cup)) == CTF_ERR) {
1786 			ctf_free(name, namelen);
1787 			return (ctf_errno(cup->cu_ctfp));
1788 		}
1789 	} else {
1790 		if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
1791 		    CTF_ADD_NONROOT)) != 0) {
1792 			ctf_free(name, namelen);
1793 			return (ret);
1794 		}
1795 	}
1796 
1797 	if ((ret = needed_array_qualifier(cup, kind, id)) <= 0) {
1798 		if (ret != 0) {
1799 			ret = (ctf_errno(cup->cu_ctfp));
1800 		} else {
1801 			*idp = id;
1802 		}
1803 
1804 		ctf_free(name, namelen);
1805 		return (ret);
1806 	}
1807 
1808 	if ((*idp = ctf_add_reftype(cup->cu_ctfp, isroot, name, id, kind)) ==
1809 	    CTF_ERR) {
1810 		ctf_free(name, namelen);
1811 		return (ctf_errno(cup->cu_ctfp));
1812 	}
1813 
1814 	ctf_free(name, namelen);
1815 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1816 }
1817 
1818 static int
1819 ctf_dwarf_create_enum(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1820 {
1821 	size_t size = 0;
1822 	Dwarf_Die child;
1823 	Dwarf_Unsigned dw;
1824 	ctf_id_t id;
1825 	char *name;
1826 	int ret;
1827 
1828 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
1829 	    ret != ENOENT)
1830 		return (ret);
1831 	if (ret == ENOENT)
1832 		name = NULL;
1833 
1834 	/*
1835 	 * Enumerations may have a size associated with them, particularly if
1836 	 * they're packed. Note, a Dwarf_Unsigned is larger than a size_t on an
1837 	 * ILP32 system.
1838 	 */
1839 	if (ctf_dwarf_unsigned(cup, die, DW_AT_byte_size, &dw) == 0 &&
1840 	    dw < SIZE_MAX) {
1841 		size = (size_t)dw;
1842 	}
1843 
1844 	id = ctf_add_enum(cup->cu_ctfp, isroot, name, size);
1845 	ctf_dprintf("added enum %s (%d)\n", name, id);
1846 	if (name != NULL)
1847 		ctf_free(name, strlen(name) + 1);
1848 	if (id == CTF_ERR)
1849 		return (ctf_errno(cup->cu_ctfp));
1850 	*idp = id;
1851 	if ((ret = ctf_dwmap_add(cup, id, die, B_FALSE)) != 0)
1852 		return (ret);
1853 
1854 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0) {
1855 		if (ret == ENOENT)
1856 			ret = 0;
1857 		return (ret);
1858 	}
1859 
1860 	while (child != NULL) {
1861 		Dwarf_Half tag;
1862 		Dwarf_Signed sval;
1863 		Dwarf_Unsigned uval;
1864 		Dwarf_Die arg = child;
1865 		int eval;
1866 
1867 		if ((ret = ctf_dwarf_sib(cup, arg, &child)) != 0)
1868 			return (ret);
1869 
1870 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
1871 			return (ret);
1872 
1873 		if (tag != DW_TAG_enumerator) {
1874 			if ((ret = ctf_dwarf_convert_type(cup, arg, NULL,
1875 			    CTF_ADD_NONROOT)) != 0)
1876 				return (ret);
1877 			continue;
1878 		}
1879 
1880 		/*
1881 		 * DWARF v4 section 5.7 tells us we'll always have names.
1882 		 */
1883 		if ((ret = ctf_dwarf_string(cup, arg, DW_AT_name, &name)) != 0)
1884 			return (ret);
1885 
1886 		/*
1887 		 * We have to be careful here: newer GCCs generate DWARF where
1888 		 * an unsigned value will happily pass ctf_dwarf_signed().
1889 		 * Since negative values will fail ctf_dwarf_unsigned(), we try
1890 		 * that first to make sure we get the right value.
1891 		 */
1892 		if ((ret = ctf_dwarf_unsigned(cup, arg, DW_AT_const_value,
1893 		    &uval)) == 0) {
1894 			eval = (int)uval;
1895 		} else if ((ret = ctf_dwarf_signed(cup, arg, DW_AT_const_value,
1896 		    &sval)) == 0) {
1897 			eval = sval;
1898 		}
1899 
1900 		if (ret != 0) {
1901 			if (ret != ENOENT)
1902 				return (ret);
1903 
1904 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1905 			    "encountered enumeration without constant value\n");
1906 			return (ECTF_CONVBKERR);
1907 		}
1908 
1909 		ret = ctf_add_enumerator(cup->cu_ctfp, id, name, eval);
1910 		if (ret == CTF_ERR) {
1911 			(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
1912 			    "failed to add enumarator %s (%d) to %d\n",
1913 			    name, eval, id);
1914 			ctf_free(name, strlen(name) + 1);
1915 			return (ctf_errno(cup->cu_ctfp));
1916 		}
1917 		ctf_free(name, strlen(name) + 1);
1918 	}
1919 
1920 	return (0);
1921 }
1922 
1923 /*
1924  * For a function pointer, walk over and process all of its children, unless we
1925  * encounter one that's just a declaration. In which case, we error on it.
1926  */
1927 static int
1928 ctf_dwarf_create_fptr(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp, int isroot)
1929 {
1930 	int ret;
1931 	Dwarf_Bool b;
1932 	ctf_funcinfo_t fi;
1933 	Dwarf_Die retdie;
1934 	ctf_id_t *argv = NULL;
1935 
1936 	bzero(&fi, sizeof (ctf_funcinfo_t));
1937 
1938 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
1939 		if (ret != ENOENT)
1940 			return (ret);
1941 	} else {
1942 		if (b != 0)
1943 			return (EPROTOTYPE);
1944 	}
1945 
1946 	/*
1947 	 * Return type is in DW_AT_type, if none, it returns void.
1948 	 */
1949 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &retdie)) != 0) {
1950 		if (ret != ENOENT)
1951 			return (ret);
1952 		if ((fi.ctc_return = ctf_dwarf_void(cup)) == CTF_ERR)
1953 			return (ctf_errno(cup->cu_ctfp));
1954 	} else {
1955 		if ((ret = ctf_dwarf_convert_type(cup, retdie, &fi.ctc_return,
1956 		    CTF_ADD_NONROOT)) != 0)
1957 			return (ret);
1958 	}
1959 
1960 	if ((ret = ctf_dwarf_function_count(cup, die, &fi, B_TRUE)) != 0) {
1961 		return (ret);
1962 	}
1963 
1964 	if (fi.ctc_argc != 0) {
1965 		argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
1966 		if (argv == NULL)
1967 			return (ENOMEM);
1968 
1969 		if ((ret = ctf_dwarf_convert_fargs(cup, die, &fi, argv)) != 0) {
1970 			ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1971 			return (ret);
1972 		}
1973 	}
1974 
1975 	if ((*idp = ctf_add_funcptr(cup->cu_ctfp, isroot, &fi, argv)) ==
1976 	    CTF_ERR) {
1977 		ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1978 		return (ctf_errno(cup->cu_ctfp));
1979 	}
1980 
1981 	ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
1982 	return (ctf_dwmap_add(cup, *idp, die, B_FALSE));
1983 }
1984 
1985 static int
1986 ctf_dwarf_convert_type(ctf_cu_t *cup, Dwarf_Die die, ctf_id_t *idp,
1987     int isroot)
1988 {
1989 	int ret;
1990 	Dwarf_Off offset;
1991 	Dwarf_Half tag;
1992 	ctf_dwmap_t lookup, *map;
1993 	ctf_id_t id;
1994 
1995 	if (idp == NULL)
1996 		idp = &id;
1997 
1998 	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
1999 		return (ret);
2000 
2001 	if (offset > cup->cu_maxoff) {
2002 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2003 		    "die offset %llu beyond maximum for header %llu\n",
2004 		    offset, cup->cu_maxoff);
2005 		return (ECTF_CONVBKERR);
2006 	}
2007 
2008 	/*
2009 	 * If we've already added an entry for this offset, then we're done.
2010 	 */
2011 	lookup.cdm_off = offset;
2012 	if ((map = avl_find(&cup->cu_map, &lookup, NULL)) != NULL) {
2013 		*idp = map->cdm_id;
2014 		return (0);
2015 	}
2016 
2017 	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2018 		return (ret);
2019 
2020 	ret = ENOTSUP;
2021 	switch (tag) {
2022 	case DW_TAG_base_type:
2023 		ctf_dprintf("base\n");
2024 		ret = ctf_dwarf_create_base(cup, die, idp, isroot, offset);
2025 		break;
2026 	case DW_TAG_array_type:
2027 		ctf_dprintf("array\n");
2028 		ret = ctf_dwarf_create_array(cup, die, idp, isroot);
2029 		break;
2030 	case DW_TAG_enumeration_type:
2031 		ctf_dprintf("enum\n");
2032 		ret = ctf_dwarf_create_enum(cup, die, idp, isroot);
2033 		break;
2034 	case DW_TAG_pointer_type:
2035 		ctf_dprintf("pointer\n");
2036 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_POINTER,
2037 		    isroot);
2038 		break;
2039 	case DW_TAG_structure_type:
2040 		ctf_dprintf("struct\n");
2041 		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_STRUCT,
2042 		    isroot);
2043 		break;
2044 	case DW_TAG_subroutine_type:
2045 		ctf_dprintf("fptr\n");
2046 		ret = ctf_dwarf_create_fptr(cup, die, idp, isroot);
2047 		break;
2048 	case DW_TAG_typedef:
2049 		ctf_dprintf("typedef\n");
2050 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_TYPEDEF,
2051 		    isroot);
2052 		break;
2053 	case DW_TAG_union_type:
2054 		ctf_dprintf("union\n");
2055 		ret = ctf_dwarf_create_sou(cup, die, idp, CTF_K_UNION,
2056 		    isroot);
2057 		break;
2058 	case DW_TAG_const_type:
2059 		ctf_dprintf("const\n");
2060 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_CONST,
2061 		    isroot);
2062 		break;
2063 	case DW_TAG_volatile_type:
2064 		ctf_dprintf("volatile\n");
2065 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_VOLATILE,
2066 		    isroot);
2067 		break;
2068 	case DW_TAG_restrict_type:
2069 		ctf_dprintf("restrict\n");
2070 		ret = ctf_dwarf_create_reference(cup, die, idp, CTF_K_RESTRICT,
2071 		    isroot);
2072 		break;
2073 	default:
2074 		ctf_dprintf("ignoring tag type %x\n", tag);
2075 		*idp = CTF_ERR;
2076 		ret = 0;
2077 		break;
2078 	}
2079 	ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
2080 	    ret);
2081 
2082 	return (ret);
2083 }
2084 
2085 static int
2086 ctf_dwarf_walk_lexical(ctf_cu_t *cup, Dwarf_Die die)
2087 {
2088 	int ret;
2089 	Dwarf_Die child;
2090 
2091 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2092 		return (ret);
2093 
2094 	if (child == NULL)
2095 		return (0);
2096 
2097 	return (ctf_dwarf_convert_die(cup, die));
2098 }
2099 
2100 static int
2101 ctf_dwarf_function_count(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2102     boolean_t fptr)
2103 {
2104 	int ret;
2105 	Dwarf_Die child, sib, arg;
2106 
2107 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2108 		return (ret);
2109 
2110 	arg = child;
2111 	while (arg != NULL) {
2112 		Dwarf_Half tag;
2113 
2114 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2115 			return (ret);
2116 
2117 		/*
2118 		 * We have to check for a varargs type declaration. This will
2119 		 * happen in one of two ways. If we have a function pointer
2120 		 * type, then it'll be done with a tag of type
2121 		 * DW_TAG_unspecified_parameters. However, it only means we have
2122 		 * a variable number of arguments, if we have more than one
2123 		 * argument found so far. Otherwise, when we have a function
2124 		 * type, it instead uses a formal parameter whose name is '...'
2125 		 * to indicate a variable arguments member.
2126 		 *
2127 		 * Also, if we have a function pointer, then we have to expect
2128 		 * that we might not get a name at all.
2129 		 */
2130 		if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
2131 			char *name;
2132 			if ((ret = ctf_dwarf_string(cup, die, DW_AT_name,
2133 			    &name)) != 0)
2134 				return (ret);
2135 			if (strcmp(name, DWARF_VARARGS_NAME) == 0)
2136 				fip->ctc_flags |= CTF_FUNC_VARARG;
2137 			else
2138 				fip->ctc_argc++;
2139 			ctf_free(name, strlen(name) + 1);
2140 		} else if (tag == DW_TAG_formal_parameter) {
2141 			fip->ctc_argc++;
2142 		} else if (tag == DW_TAG_unspecified_parameters &&
2143 		    fip->ctc_argc > 0) {
2144 			fip->ctc_flags |= CTF_FUNC_VARARG;
2145 		}
2146 		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2147 			return (ret);
2148 		arg = sib;
2149 	}
2150 
2151 	return (0);
2152 }
2153 
2154 static int
2155 ctf_dwarf_convert_fargs(ctf_cu_t *cup, Dwarf_Die die, ctf_funcinfo_t *fip,
2156     ctf_id_t *argv)
2157 {
2158 	int ret;
2159 	int i = 0;
2160 	Dwarf_Die child, sib, arg;
2161 
2162 	if ((ret = ctf_dwarf_child(cup, die, &child)) != 0)
2163 		return (ret);
2164 
2165 	arg = child;
2166 	while (arg != NULL) {
2167 		Dwarf_Half tag;
2168 
2169 		if ((ret = ctf_dwarf_tag(cup, arg, &tag)) != 0)
2170 			return (ret);
2171 		if (tag == DW_TAG_formal_parameter) {
2172 			Dwarf_Die tdie;
2173 
2174 			if ((ret = ctf_dwarf_refdie(cup, arg, DW_AT_type,
2175 			    &tdie)) != 0)
2176 				return (ret);
2177 
2178 			if ((ret = ctf_dwarf_convert_type(cup, tdie, &argv[i],
2179 			    CTF_ADD_ROOT)) != 0)
2180 				return (ret);
2181 			i++;
2182 
2183 			/*
2184 			 * Once we hit argc entries, we're done. This ensures we
2185 			 * don't accidentally hit a varargs which should be the
2186 			 * last entry.
2187 			 */
2188 			if (i == fip->ctc_argc)
2189 				break;
2190 		}
2191 
2192 		if ((ret = ctf_dwarf_sib(cup, arg, &sib)) != 0)
2193 			return (ret);
2194 		arg = sib;
2195 	}
2196 
2197 	return (0);
2198 }
2199 
2200 static int
2201 ctf_dwarf_convert_function(ctf_cu_t *cup, Dwarf_Die die)
2202 {
2203 	ctf_dwfunc_t *cdf;
2204 	Dwarf_Die tdie;
2205 	Dwarf_Bool b;
2206 	char *name;
2207 	int ret;
2208 
2209 	/*
2210 	 * Functions that don't have a name are generally functions that have
2211 	 * been inlined and thus most information about them has been lost. If
2212 	 * we can't get a name, then instead of returning ENOENT, we silently
2213 	 * swallow the error.
2214 	 */
2215 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0) {
2216 		if (ret == ENOENT)
2217 			return (0);
2218 		return (ret);
2219 	}
2220 
2221 	ctf_dprintf("beginning work on function %s (die %llx)\n",
2222 	    name, ctf_die_offset(die));
2223 
2224 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) != 0) {
2225 		if (ret != ENOENT)
2226 			return (ret);
2227 	} else if (b != 0) {
2228 		/*
2229 		 * GCC7 at least creates empty DW_AT_declarations for functions
2230 		 * defined in headers.  As they lack details on the function
2231 		 * prototype, we need to ignore them.  If we later actually
2232 		 * see the relevant function's definition, we will see another
2233 		 * DW_TAG_subprogram that is more complete.
2234 		 */
2235 		ctf_dprintf("ignoring declaration of function %s (die %llx)\n",
2236 		    name, ctf_die_offset(die));
2237 		return (0);
2238 	}
2239 
2240 	if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
2241 		ctf_free(name, strlen(name) + 1);
2242 		return (ENOMEM);
2243 	}
2244 	bzero(cdf, sizeof (ctf_dwfunc_t));
2245 	cdf->cdf_name = name;
2246 
2247 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) == 0) {
2248 		if ((ret = ctf_dwarf_convert_type(cup, tdie,
2249 		    &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
2250 			ctf_free(name, strlen(name) + 1);
2251 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2252 			return (ret);
2253 		}
2254 	} else if (ret != ENOENT) {
2255 		ctf_free(name, strlen(name) + 1);
2256 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2257 		return (ret);
2258 	} else {
2259 		if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cup)) ==
2260 		    CTF_ERR) {
2261 			ctf_free(name, strlen(name) + 1);
2262 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2263 			return (ctf_errno(cup->cu_ctfp));
2264 		}
2265 	}
2266 
2267 	/*
2268 	 * A function has a number of children, some of which may not be ones we
2269 	 * care about. Children that we care about have a type of
2270 	 * DW_TAG_formal_parameter. We're going to do two passes, the first to
2271 	 * count the arguments, the second to process them. Afterwards, we
2272 	 * should be good to go ahead and add this function.
2273 	 *
2274 	 * Note, we already got the return type by going in and grabbing it out
2275 	 * of the DW_AT_type.
2276 	 */
2277 	if ((ret = ctf_dwarf_function_count(cup, die, &cdf->cdf_fip,
2278 	    B_FALSE)) != 0) {
2279 		ctf_free(name, strlen(name) + 1);
2280 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2281 		return (ret);
2282 	}
2283 
2284 	ctf_dprintf("beginning to convert function arguments %s\n", name);
2285 	if (cdf->cdf_fip.ctc_argc != 0) {
2286 		uint_t argc = cdf->cdf_fip.ctc_argc;
2287 		cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
2288 		if (cdf->cdf_argv == NULL) {
2289 			ctf_free(name, strlen(name) + 1);
2290 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2291 			return (ENOMEM);
2292 		}
2293 		if ((ret = ctf_dwarf_convert_fargs(cup, die,
2294 		    &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
2295 			ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
2296 			ctf_free(name, strlen(name) + 1);
2297 			ctf_free(cdf, sizeof (ctf_dwfunc_t));
2298 			return (ret);
2299 		}
2300 	} else {
2301 		cdf->cdf_argv = NULL;
2302 	}
2303 
2304 	if ((ret = ctf_dwarf_isglobal(cup, die, &cdf->cdf_global)) != 0) {
2305 		ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
2306 		    cdf->cdf_fip.ctc_argc);
2307 		ctf_free(name, strlen(name) + 1);
2308 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2309 		return (ret);
2310 	}
2311 
2312 	ctf_list_append(&cup->cu_funcs, cdf);
2313 	return (ret);
2314 }
2315 
2316 /*
2317  * Convert variables, but only if they're not prototypes and have names.
2318  */
2319 static int
2320 ctf_dwarf_convert_variable(ctf_cu_t *cup, Dwarf_Die die)
2321 {
2322 	int ret;
2323 	char *name;
2324 	Dwarf_Bool b;
2325 	Dwarf_Die tdie;
2326 	ctf_id_t id;
2327 	ctf_dwvar_t *cdv;
2328 
2329 	/* Skip "Non-Defining Declarations" */
2330 	if ((ret = ctf_dwarf_boolean(cup, die, DW_AT_declaration, &b)) == 0) {
2331 		if (b != 0)
2332 			return (0);
2333 	} else if (ret != ENOENT) {
2334 		return (ret);
2335 	}
2336 
2337 	/*
2338 	 * If we find a DIE of "Declarations Completing Non-Defining
2339 	 * Declarations", we will use the referenced type's DIE.  This isn't
2340 	 * quite correct, e.g. DW_AT_decl_line will be the forward declaration
2341 	 * not this site.  It's sufficient for what we need, however: in
2342 	 * particular, we should find DW_AT_external as needed there.
2343 	 */
2344 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_specification,
2345 	    &tdie)) == 0) {
2346 		Dwarf_Off offset;
2347 		if ((ret = ctf_dwarf_offset(cup, tdie, &offset)) != 0)
2348 			return (ret);
2349 		ctf_dprintf("die 0x%llx DW_AT_specification -> die 0x%llx\n",
2350 		    ctf_die_offset(die), ctf_die_offset(tdie));
2351 		die = tdie;
2352 	} else if (ret != ENOENT) {
2353 		return (ret);
2354 	}
2355 
2356 	if ((ret = ctf_dwarf_string(cup, die, DW_AT_name, &name)) != 0 &&
2357 	    ret != ENOENT)
2358 		return (ret);
2359 	if (ret == ENOENT)
2360 		return (0);
2361 
2362 	if ((ret = ctf_dwarf_refdie(cup, die, DW_AT_type, &tdie)) != 0) {
2363 		ctf_free(name, strlen(name) + 1);
2364 		return (ret);
2365 	}
2366 
2367 	if ((ret = ctf_dwarf_convert_type(cup, tdie, &id,
2368 	    CTF_ADD_ROOT)) != 0)
2369 		return (ret);
2370 
2371 	if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
2372 		ctf_free(name, strlen(name) + 1);
2373 		return (ENOMEM);
2374 	}
2375 
2376 	cdv->cdv_name = name;
2377 	cdv->cdv_type = id;
2378 
2379 	if ((ret = ctf_dwarf_isglobal(cup, die, &cdv->cdv_global)) != 0) {
2380 		ctf_free(cdv, sizeof (ctf_dwvar_t));
2381 		ctf_free(name, strlen(name) + 1);
2382 		return (ret);
2383 	}
2384 
2385 	ctf_list_append(&cup->cu_vars, cdv);
2386 	return (0);
2387 }
2388 
2389 /*
2390  * Walk through our set of top-level types and process them.
2391  */
2392 static int
2393 ctf_dwarf_walk_toplevel(ctf_cu_t *cup, Dwarf_Die die)
2394 {
2395 	int ret;
2396 	Dwarf_Off offset;
2397 	Dwarf_Half tag;
2398 
2399 	if ((ret = ctf_dwarf_offset(cup, die, &offset)) != 0)
2400 		return (ret);
2401 
2402 	if (offset > cup->cu_maxoff) {
2403 		(void) snprintf(cup->cu_errbuf, cup->cu_errlen,
2404 		    "die offset %llu beyond maximum for header %llu\n",
2405 		    offset, cup->cu_maxoff);
2406 		return (ECTF_CONVBKERR);
2407 	}
2408 
2409 	if ((ret = ctf_dwarf_tag(cup, die, &tag)) != 0)
2410 		return (ret);
2411 
2412 	ret = 0;
2413 	switch (tag) {
2414 	case DW_TAG_subprogram:
2415 		ctf_dprintf("top level func\n");
2416 		ret = ctf_dwarf_convert_function(cup, die);
2417 		break;
2418 	case DW_TAG_variable:
2419 		ctf_dprintf("top level var\n");
2420 		ret = ctf_dwarf_convert_variable(cup, die);
2421 		break;
2422 	case DW_TAG_lexical_block:
2423 		ctf_dprintf("top level block\n");
2424 		ret = ctf_dwarf_walk_lexical(cup, die);
2425 		break;
2426 	case DW_TAG_enumeration_type:
2427 	case DW_TAG_structure_type:
2428 	case DW_TAG_typedef:
2429 	case DW_TAG_union_type:
2430 		ctf_dprintf("top level type\n");
2431 		ret = ctf_dwarf_convert_type(cup, die, NULL, B_TRUE);
2432 		break;
2433 	default:
2434 		break;
2435 	}
2436 
2437 	return (ret);
2438 }
2439 
2440 
2441 /*
2442  * We're given a node. At this node we need to convert it and then proceed to
2443  * convert any siblings that are associaed with this die.
2444  */
2445 static int
2446 ctf_dwarf_convert_die(ctf_cu_t *cup, Dwarf_Die die)
2447 {
2448 	while (die != NULL) {
2449 		int ret;
2450 		Dwarf_Die sib;
2451 
2452 		if ((ret = ctf_dwarf_walk_toplevel(cup, die)) != 0)
2453 			return (ret);
2454 
2455 		if ((ret = ctf_dwarf_sib(cup, die, &sib)) != 0)
2456 			return (ret);
2457 		die = sib;
2458 	}
2459 	return (0);
2460 }
2461 
2462 static int
2463 ctf_dwarf_fixup_die(ctf_cu_t *cup, boolean_t addpass)
2464 {
2465 	ctf_dwmap_t *map;
2466 
2467 	for (map = avl_first(&cup->cu_map); map != NULL;
2468 	    map = AVL_NEXT(&cup->cu_map, map)) {
2469 		int ret;
2470 		if (map->cdm_fix == B_FALSE)
2471 			continue;
2472 		if ((ret = ctf_dwarf_fixup_sou(cup, map->cdm_die, map->cdm_id,
2473 		    addpass)) != 0)
2474 			return (ret);
2475 	}
2476 
2477 	return (0);
2478 }
2479 
2480 /*
2481  * The DWARF information about a symbol and the information in the symbol table
2482  * may not be the same due to symbol reduction that is performed by ld due to a
2483  * mapfile or other such directive. We process weak symbols at a later time.
2484  *
2485  * The following are the rules that we employ:
2486  *
2487  * 1. A DWARF function that is considered exported matches STB_GLOBAL entries
2488  * with the same name.
2489  *
2490  * 2. A DWARF function that is considered exported matches STB_LOCAL entries
2491  * with the same name and the same file. This case may happen due to mapfile
2492  * reduction.
2493  *
2494  * 3. A DWARF function that is not considered exported matches STB_LOCAL entries
2495  * with the same name and the same file.
2496  *
2497  * 4. A DWARF function that has the same name as the symbol table entry, but the
2498  * files do not match. This is considered a 'fuzzy' match. This may also happen
2499  * due to a mapfile reduction. Fuzzy matching is only used when we know that the
2500  * file in question refers to the primary object. This is because when a symbol
2501  * is reduced in a mapfile, it's always going to be tagged as a local value in
2502  * the generated output and it is considered as to belong to the primary file
2503  * which is the first STT_FILE symbol we see.
2504  */
2505 static boolean_t
2506 ctf_dwarf_symbol_match(const char *symtab_file, const char *symtab_name,
2507     uint_t symtab_bind, const char *dwarf_file, const char *dwarf_name,
2508     boolean_t dwarf_global, boolean_t *is_fuzzy)
2509 {
2510 	*is_fuzzy = B_FALSE;
2511 
2512 	if (symtab_bind != STB_LOCAL && symtab_bind != STB_GLOBAL) {
2513 		return (B_FALSE);
2514 	}
2515 
2516 	if (strcmp(symtab_name, dwarf_name) != 0) {
2517 		return (B_FALSE);
2518 	}
2519 
2520 	if (symtab_bind == STB_GLOBAL) {
2521 		return (dwarf_global);
2522 	}
2523 
2524 	if (strcmp(symtab_file, dwarf_file) == 0) {
2525 		return (B_TRUE);
2526 	}
2527 
2528 	if (dwarf_global) {
2529 		*is_fuzzy = B_TRUE;
2530 		return (B_TRUE);
2531 	}
2532 
2533 	return (B_FALSE);
2534 }
2535 
2536 static ctf_dwfunc_t *
2537 ctf_dwarf_match_func(ctf_cu_t *cup, const char *file, const char *name,
2538     uint_t bind, boolean_t primary)
2539 {
2540 	ctf_dwfunc_t *cdf, *fuzzy = NULL;
2541 
2542 	if (bind == STB_WEAK)
2543 		return (NULL);
2544 
2545 	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2546 		return (NULL);
2547 
2548 	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL;
2549 	    cdf = ctf_list_next(cdf)) {
2550 		boolean_t is_fuzzy = B_FALSE;
2551 
2552 		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2553 		    cdf->cdf_name, cdf->cdf_global, &is_fuzzy)) {
2554 			if (is_fuzzy) {
2555 				if (primary) {
2556 					fuzzy = cdf;
2557 				}
2558 				continue;
2559 			} else {
2560 				return (cdf);
2561 			}
2562 		}
2563 	}
2564 
2565 	return (fuzzy);
2566 }
2567 
2568 static ctf_dwvar_t *
2569 ctf_dwarf_match_var(ctf_cu_t *cup, const char *file, const char *name,
2570     uint_t bind, boolean_t primary)
2571 {
2572 	ctf_dwvar_t *cdv, *fuzzy = NULL;
2573 
2574 	if (bind == STB_WEAK)
2575 		return (NULL);
2576 
2577 	if (bind == STB_LOCAL && (file == NULL || cup->cu_name == NULL))
2578 		return (NULL);
2579 
2580 	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL;
2581 	    cdv = ctf_list_next(cdv)) {
2582 		boolean_t is_fuzzy = B_FALSE;
2583 
2584 		if (ctf_dwarf_symbol_match(file, name, bind, cup->cu_name,
2585 		    cdv->cdv_name, cdv->cdv_global, &is_fuzzy)) {
2586 			if (is_fuzzy) {
2587 				if (primary) {
2588 					fuzzy = cdv;
2589 				}
2590 			} else {
2591 				return (cdv);
2592 			}
2593 		}
2594 	}
2595 
2596 	return (fuzzy);
2597 }
2598 
2599 static int
2600 ctf_dwarf_conv_funcvars_cb(const Elf64_Sym *symp, ulong_t idx,
2601     const char *file, const char *name, boolean_t primary, void *arg)
2602 {
2603 	int ret;
2604 	uint_t bind, type;
2605 	ctf_cu_t *cup = arg;
2606 
2607 	bind = GELF_ST_BIND(symp->st_info);
2608 	type = GELF_ST_TYPE(symp->st_info);
2609 
2610 	/*
2611 	 * Come back to weak symbols in another pass
2612 	 */
2613 	if (bind == STB_WEAK)
2614 		return (0);
2615 
2616 	if (type == STT_OBJECT) {
2617 		ctf_dwvar_t *cdv = ctf_dwarf_match_var(cup, file, name,
2618 		    bind, primary);
2619 		if (cdv == NULL)
2620 			return (0);
2621 		ret = ctf_add_object(cup->cu_ctfp, idx, cdv->cdv_type);
2622 		ctf_dprintf("added object %s->%ld\n", name, cdv->cdv_type);
2623 	} else {
2624 		ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cup, file, name,
2625 		    bind, primary);
2626 		if (cdf == NULL)
2627 			return (0);
2628 		ret = ctf_add_function(cup->cu_ctfp, idx, &cdf->cdf_fip,
2629 		    cdf->cdf_argv);
2630 		ctf_dprintf("added function %s\n", name);
2631 	}
2632 
2633 	if (ret == CTF_ERR) {
2634 		return (ctf_errno(cup->cu_ctfp));
2635 	}
2636 
2637 	return (0);
2638 }
2639 
2640 static int
2641 ctf_dwarf_conv_funcvars(ctf_cu_t *cup)
2642 {
2643 	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_funcvars_cb, cup));
2644 }
2645 
2646 /*
2647  * If we have a weak symbol, attempt to find the strong symbol it will resolve
2648  * to.  Note: the code where this actually happens is in sym_process() in
2649  * cmd/sgs/libld/common/syms.c
2650  *
2651  * Finding the matching symbol is unfortunately not trivial.  For a symbol to be
2652  * a candidate, it must:
2653  *
2654  * - have the same type (function, object)
2655  * - have the same value (address)
2656  * - have the same size
2657  * - not be another weak symbol
2658  * - belong to the same section (checked via section index)
2659  *
2660  * To perform this check, we first iterate over the symbol table. For each weak
2661  * symbol that we encounter, we then do a second walk over the symbol table,
2662  * calling ctf_dwarf_conv_check_weak(). If a symbol matches the above, then it's
2663  * either a local or global symbol. If we find a global symbol then we go with
2664  * it and stop searching for additional matches.
2665  *
2666  * If instead, we find a local symbol, things are more complicated. The first
2667  * thing we do is to try and see if we have file information about both symbols
2668  * (STT_FILE). If they both have file information and it matches, then we treat
2669  * that as a good match and stop searching for additional matches.
2670  *
2671  * Otherwise, this means we have a non-matching file and a local symbol. We
2672  * treat this as a candidate and if we find a better match (one of the two cases
2673  * above), use that instead. There are two different ways this can happen.
2674  * Either this is a completely different symbol, or it's a once-global symbol
2675  * that was scoped to local via a mapfile.  In the former case, curfile is
2676  * likely inaccurate since the linker does not preserve the needed curfile in
2677  * the order of the symbol table (see the comments about locally scoped symbols
2678  * in libld's update_osym()).  As we can't tell this case from the former one,
2679  * we use this symbol iff no other matching symbol is found.
2680  *
2681  * What we really need here is a SUNW section containing weak<->strong mappings
2682  * that we can consume.
2683  */
2684 typedef struct ctf_dwarf_weak_arg {
2685 	const Elf64_Sym *cweak_symp;
2686 	const char *cweak_file;
2687 	boolean_t cweak_candidate;
2688 	ulong_t cweak_idx;
2689 } ctf_dwarf_weak_arg_t;
2690 
2691 static int
2692 ctf_dwarf_conv_check_weak(const Elf64_Sym *symp, ulong_t idx, const char *file,
2693     const char *name, boolean_t primary, void *arg)
2694 {
2695 	ctf_dwarf_weak_arg_t *cweak = arg;
2696 
2697 	const Elf64_Sym *wsymp = cweak->cweak_symp;
2698 
2699 	ctf_dprintf("comparing weak to %s\n", name);
2700 
2701 	if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
2702 		return (0);
2703 	}
2704 
2705 	if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
2706 		return (0);
2707 	}
2708 
2709 	if (wsymp->st_value != symp->st_value) {
2710 		return (0);
2711 	}
2712 
2713 	if (wsymp->st_size != symp->st_size) {
2714 		return (0);
2715 	}
2716 
2717 	if (wsymp->st_shndx != symp->st_shndx) {
2718 		return (0);
2719 	}
2720 
2721 	/*
2722 	 * Check if it's a weak candidate.
2723 	 */
2724 	if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
2725 	    (file == NULL || cweak->cweak_file == NULL ||
2726 	    strcmp(file, cweak->cweak_file) != 0)) {
2727 		cweak->cweak_candidate = B_TRUE;
2728 		cweak->cweak_idx = idx;
2729 		return (0);
2730 	}
2731 
2732 	/*
2733 	 * Found a match, break.
2734 	 */
2735 	cweak->cweak_idx = idx;
2736 	return (1);
2737 }
2738 
2739 static int
2740 ctf_dwarf_duplicate_sym(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2741 {
2742 	ctf_id_t id = ctf_lookup_by_symbol(cup->cu_ctfp, matchidx);
2743 
2744 	/*
2745 	 * If we matched something that for some reason didn't have type data,
2746 	 * we don't consider that a fatal error and silently swallow it.
2747 	 */
2748 	if (id == CTF_ERR) {
2749 		if (ctf_errno(cup->cu_ctfp) == ECTF_NOTYPEDAT)
2750 			return (0);
2751 		else
2752 			return (ctf_errno(cup->cu_ctfp));
2753 	}
2754 
2755 	if (ctf_add_object(cup->cu_ctfp, idx, id) == CTF_ERR)
2756 		return (ctf_errno(cup->cu_ctfp));
2757 
2758 	return (0);
2759 }
2760 
2761 static int
2762 ctf_dwarf_duplicate_func(ctf_cu_t *cup, ulong_t idx, ulong_t matchidx)
2763 {
2764 	int ret;
2765 	ctf_funcinfo_t fip;
2766 	ctf_id_t *args = NULL;
2767 
2768 	if (ctf_func_info(cup->cu_ctfp, matchidx, &fip) == CTF_ERR) {
2769 		if (ctf_errno(cup->cu_ctfp) == ECTF_NOFUNCDAT)
2770 			return (0);
2771 		else
2772 			return (ctf_errno(cup->cu_ctfp));
2773 	}
2774 
2775 	if (fip.ctc_argc != 0) {
2776 		args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
2777 		if (args == NULL)
2778 			return (ENOMEM);
2779 
2780 		if (ctf_func_args(cup->cu_ctfp, matchidx, fip.ctc_argc, args) ==
2781 		    CTF_ERR) {
2782 			ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2783 			return (ctf_errno(cup->cu_ctfp));
2784 		}
2785 	}
2786 
2787 	ret = ctf_add_function(cup->cu_ctfp, idx, &fip, args);
2788 	if (args != NULL)
2789 		ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
2790 	if (ret == CTF_ERR)
2791 		return (ctf_errno(cup->cu_ctfp));
2792 
2793 	return (0);
2794 }
2795 
2796 static int
2797 ctf_dwarf_conv_weaks_cb(const Elf64_Sym *symp, ulong_t idx, const char *file,
2798     const char *name, boolean_t primary, void *arg)
2799 {
2800 	int ret, type;
2801 	ctf_dwarf_weak_arg_t cweak;
2802 	ctf_cu_t *cup = arg;
2803 
2804 	/*
2805 	 * We only care about weak symbols.
2806 	 */
2807 	if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
2808 		return (0);
2809 
2810 	type = GELF_ST_TYPE(symp->st_info);
2811 	ASSERT(type == STT_OBJECT || type == STT_FUNC);
2812 
2813 	/*
2814 	 * For each weak symbol we encounter, we need to do a second iteration
2815 	 * to try and find a match. We should probably think about other
2816 	 * techniques to try and save us time in the future.
2817 	 */
2818 	cweak.cweak_symp = symp;
2819 	cweak.cweak_file = file;
2820 	cweak.cweak_candidate = B_FALSE;
2821 	cweak.cweak_idx = 0;
2822 
2823 	ctf_dprintf("Trying to find weak equiv for %s\n", name);
2824 
2825 	ret = ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_check_weak, &cweak);
2826 	VERIFY(ret == 0 || ret == 1);
2827 
2828 	/*
2829 	 * Nothing was ever found, we're not going to add anything for this
2830 	 * entry.
2831 	 */
2832 	if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
2833 		ctf_dprintf("found no weak match for %s\n", name);
2834 		return (0);
2835 	}
2836 
2837 	/*
2838 	 * Now, finally go and add the type based on the match.
2839 	 */
2840 	ctf_dprintf("matched weak symbol %lu to %lu\n", idx, cweak.cweak_idx);
2841 	if (type == STT_OBJECT) {
2842 		ret = ctf_dwarf_duplicate_sym(cup, idx, cweak.cweak_idx);
2843 	} else {
2844 		ret = ctf_dwarf_duplicate_func(cup, idx, cweak.cweak_idx);
2845 	}
2846 
2847 	return (ret);
2848 }
2849 
2850 static int
2851 ctf_dwarf_conv_weaks(ctf_cu_t *cup)
2852 {
2853 	return (ctf_symtab_iter(cup->cu_ctfp, ctf_dwarf_conv_weaks_cb, cup));
2854 }
2855 
2856 /* ARGSUSED */
2857 static int
2858 ctf_dwarf_convert_one(void *arg, void *unused)
2859 {
2860 	int ret;
2861 	ctf_file_t *dedup;
2862 	ctf_cu_t *cup = arg;
2863 
2864 	ctf_dprintf("converting die: %s\n", cup->cu_name);
2865 	ctf_dprintf("max offset: %x\n", cup->cu_maxoff);
2866 	VERIFY(cup != NULL);
2867 
2868 	ret = ctf_dwarf_convert_die(cup, cup->cu_cu);
2869 	ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cup->cu_name,
2870 	    ret);
2871 	if (ret != 0) {
2872 		return (ret);
2873 	}
2874 	if (ctf_update(cup->cu_ctfp) != 0) {
2875 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2876 		    "failed to update output ctf container"));
2877 	}
2878 
2879 	ret = ctf_dwarf_fixup_die(cup, B_FALSE);
2880 	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2881 	    ret);
2882 	if (ret != 0) {
2883 		return (ret);
2884 	}
2885 	if (ctf_update(cup->cu_ctfp) != 0) {
2886 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2887 		    "failed to update output ctf container"));
2888 	}
2889 
2890 	ret = ctf_dwarf_fixup_die(cup, B_TRUE);
2891 	ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cup->cu_name,
2892 	    ret);
2893 	if (ret != 0) {
2894 		return (ret);
2895 	}
2896 	if (ctf_update(cup->cu_ctfp) != 0) {
2897 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2898 		    "failed to update output ctf container"));
2899 	}
2900 
2901 
2902 	if ((ret = ctf_dwarf_conv_funcvars(cup)) != 0) {
2903 		return (ctf_dwarf_error(cup, NULL, ret,
2904 		    "failed to convert strong functions and variables"));
2905 	}
2906 
2907 	if (ctf_update(cup->cu_ctfp) != 0) {
2908 		return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2909 		    "failed to update output ctf container"));
2910 	}
2911 
2912 	if (cup->cu_doweaks == B_TRUE) {
2913 		if ((ret = ctf_dwarf_conv_weaks(cup)) != 0) {
2914 			return (ctf_dwarf_error(cup, NULL, ret,
2915 			    "failed to convert weak functions and variables"));
2916 		}
2917 
2918 		if (ctf_update(cup->cu_ctfp) != 0) {
2919 			return (ctf_dwarf_error(cup, cup->cu_ctfp, 0,
2920 			    "failed to update output ctf container"));
2921 		}
2922 	}
2923 
2924 	ctf_phase_dump(cup->cu_ctfp, "pre-dwarf-dedup", cup->cu_name);
2925 	ctf_dprintf("adding inputs for dedup\n");
2926 	if ((ret = ctf_merge_add(cup->cu_cmh, cup->cu_ctfp)) != 0) {
2927 		return (ctf_dwarf_error(cup, NULL, ret,
2928 		    "failed to add inputs for merge"));
2929 	}
2930 
2931 	ctf_dprintf("starting dedup of %s\n", cup->cu_name);
2932 	if ((ret = ctf_merge_dedup(cup->cu_cmh, &dedup)) != 0) {
2933 		return (ctf_dwarf_error(cup, NULL, ret,
2934 		    "failed to deduplicate die"));
2935 	}
2936 	ctf_close(cup->cu_ctfp);
2937 	cup->cu_ctfp = dedup;
2938 	ctf_phase_dump(cup->cu_ctfp, "post-dwarf-dedup", cup->cu_name);
2939 
2940 	return (0);
2941 }
2942 
2943 /*
2944  * Note, we expect that if we're returning a ctf_file_t from one of the dies,
2945  * say in the single node case, it's been saved and the entry here has been set
2946  * to NULL, which ctf_close happily ignores.
2947  */
2948 static void
2949 ctf_dwarf_free_die(ctf_cu_t *cup)
2950 {
2951 	ctf_dwfunc_t *cdf, *ndf;
2952 	ctf_dwvar_t *cdv, *ndv;
2953 	ctf_dwbitf_t *cdb, *ndb;
2954 	ctf_dwmap_t *map;
2955 	void *cookie;
2956 	Dwarf_Error derr;
2957 
2958 	ctf_dprintf("Beginning to free die: %p\n", cup);
2959 	cup->cu_elf = NULL;
2960 	ctf_dprintf("Trying to free name: %p\n", cup->cu_name);
2961 	if (cup->cu_name != NULL)
2962 		ctf_free(cup->cu_name, strlen(cup->cu_name) + 1);
2963 	ctf_dprintf("Trying to free merge handle: %p\n", cup->cu_cmh);
2964 	if (cup->cu_cmh != NULL) {
2965 		ctf_merge_fini(cup->cu_cmh);
2966 		cup->cu_cmh = NULL;
2967 	}
2968 
2969 	ctf_dprintf("Trying to free functions\n");
2970 	for (cdf = ctf_list_next(&cup->cu_funcs); cdf != NULL; cdf = ndf) {
2971 		ndf = ctf_list_next(cdf);
2972 		ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
2973 		if (cdf->cdf_fip.ctc_argc != 0) {
2974 			ctf_free(cdf->cdf_argv,
2975 			    sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
2976 		}
2977 		ctf_free(cdf, sizeof (ctf_dwfunc_t));
2978 	}
2979 
2980 	ctf_dprintf("Trying to free variables\n");
2981 	for (cdv = ctf_list_next(&cup->cu_vars); cdv != NULL; cdv = ndv) {
2982 		ndv = ctf_list_next(cdv);
2983 		ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
2984 		ctf_free(cdv, sizeof (ctf_dwvar_t));
2985 	}
2986 
2987 	ctf_dprintf("Trying to free bitfields\n");
2988 	for (cdb = ctf_list_next(&cup->cu_bitfields); cdb != NULL; cdb = ndb) {
2989 		ndb = ctf_list_next(cdb);
2990 		ctf_free(cdb, sizeof (ctf_dwbitf_t));
2991 	}
2992 
2993 	ctf_dprintf("Trying to clean up dwarf_t: %p\n", cup->cu_dwarf);
2994 	if (cup->cu_dwarf != NULL)
2995 		(void) dwarf_finish(cup->cu_dwarf, &derr);
2996 	cup->cu_dwarf = NULL;
2997 	ctf_close(cup->cu_ctfp);
2998 
2999 	cookie = NULL;
3000 	while ((map = avl_destroy_nodes(&cup->cu_map, &cookie)) != NULL) {
3001 		ctf_free(map, sizeof (ctf_dwmap_t));
3002 	}
3003 	avl_destroy(&cup->cu_map);
3004 	cup->cu_errbuf = NULL;
3005 }
3006 
3007 static void
3008 ctf_dwarf_free_dies(ctf_cu_t *cdies, int ndies)
3009 {
3010 	int i;
3011 
3012 	ctf_dprintf("Beginning to free dies\n");
3013 	for (i = 0; i < ndies; i++) {
3014 		ctf_dwarf_free_die(&cdies[i]);
3015 	}
3016 
3017 	ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3018 }
3019 
3020 static int
3021 ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
3022     char *errbuf, size_t errlen)
3023 {
3024 	int ret;
3025 	Dwarf_Half vers;
3026 	Dwarf_Unsigned nexthdr;
3027 
3028 	while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
3029 	    &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
3030 		if (ret != DW_DLV_OK) {
3031 			(void) snprintf(errbuf, errlen,
3032 			    "file does not contain valid DWARF data: %s\n",
3033 			    dwarf_errmsg(*derr));
3034 			return (ECTF_CONVBKERR);
3035 		}
3036 
3037 		switch (vers) {
3038 		case DWARF_VERSION_TWO:
3039 		case DWARF_VERSION_FOUR:
3040 			break;
3041 		default:
3042 			(void) snprintf(errbuf, errlen,
3043 			    "unsupported DWARF version: %d\n", vers);
3044 			return (ECTF_CONVBKERR);
3045 		}
3046 		*ndies = *ndies + 1;
3047 	}
3048 
3049 	return (0);
3050 }
3051 
3052 static int
3053 ctf_dwarf_init_die(int fd, Elf *elf, ctf_cu_t *cup, int ndie, char *errbuf,
3054     size_t errlen)
3055 {
3056 	int ret;
3057 	Dwarf_Unsigned hdrlen, abboff, nexthdr;
3058 	Dwarf_Half addrsz, vers;
3059 	Dwarf_Unsigned offset = 0;
3060 	Dwarf_Error derr;
3061 
3062 	while ((ret = dwarf_next_cu_header(cup->cu_dwarf, &hdrlen, &vers,
3063 	    &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
3064 		char *name;
3065 		Dwarf_Die cu, child;
3066 
3067 		/* Based on the counting above, we should be good to go */
3068 		VERIFY(ret == DW_DLV_OK);
3069 		if (ndie > 0) {
3070 			ndie--;
3071 			offset = nexthdr;
3072 			continue;
3073 		}
3074 
3075 		/*
3076 		 * Compilers are apparently inconsistent. Some emit no DWARF for
3077 		 * empty files and others emit empty compilation unit.
3078 		 */
3079 		cup->cu_voidtid = CTF_ERR;
3080 		cup->cu_longtid = CTF_ERR;
3081 		cup->cu_elf = elf;
3082 		cup->cu_maxoff = nexthdr - 1;
3083 		cup->cu_vers = vers;
3084 		cup->cu_addrsz = addrsz;
3085 		cup->cu_ctfp = ctf_fdcreate(fd, &ret);
3086 		if (cup->cu_ctfp == NULL)
3087 			return (ret);
3088 
3089 		avl_create(&cup->cu_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
3090 		    offsetof(ctf_dwmap_t, cdm_avl));
3091 		cup->cu_errbuf = errbuf;
3092 		cup->cu_errlen = errlen;
3093 		bzero(&cup->cu_vars, sizeof (ctf_list_t));
3094 		bzero(&cup->cu_funcs, sizeof (ctf_list_t));
3095 		bzero(&cup->cu_bitfields, sizeof (ctf_list_t));
3096 
3097 		if ((ret = ctf_dwarf_die_elfenc(elf, cup, errbuf,
3098 		    errlen)) != 0)
3099 			return (ret);
3100 
3101 		if ((ret = ctf_dwarf_sib(cup, NULL, &cu)) != 0)
3102 			return (ret);
3103 
3104 		if (cu == NULL) {
3105 			(void) snprintf(errbuf, errlen,
3106 			    "file does not contain DWARF data");
3107 			return (ECTF_CONVNODEBUG);
3108 		}
3109 
3110 		if ((ret = ctf_dwarf_child(cup, cu, &child)) != 0)
3111 			return (ret);
3112 
3113 		if (child == NULL) {
3114 			(void) snprintf(errbuf, errlen,
3115 			    "file does not contain DWARF data");
3116 			return (ECTF_CONVNODEBUG);
3117 		}
3118 
3119 		cup->cu_cuoff = offset;
3120 		cup->cu_cu = child;
3121 
3122 		if ((cup->cu_cmh = ctf_merge_init(fd, &ret)) == NULL)
3123 			return (ret);
3124 
3125 		if (ctf_dwarf_string(cup, cu, DW_AT_name, &name) == 0) {
3126 			size_t len = strlen(name) + 1;
3127 			char *b = basename(name);
3128 			cup->cu_name = strdup(b);
3129 			ctf_free(name, len);
3130 		}
3131 		break;
3132 	}
3133 
3134 	return (0);
3135 }
3136 
3137 /*
3138  * This is our only recourse to identify a C source file that is missing debug
3139  * info: it will be mentioned as an STT_FILE, but not have a compile unit entry.
3140  * (A traditional ctfmerge works on individual files, so can identify missing
3141  * DWARF more directly, via ctf_has_c_source() on the .o file.)
3142  *
3143  * As we operate on basenames, this can of course miss some cases, but it's
3144  * better than not checking at all.
3145  *
3146  * We explicitly whitelist some CRT components.  Failing that, there's always
3147  * the -m option.
3148  */
3149 static boolean_t
3150 c_source_has_debug(const char *file, ctf_cu_t *cus, size_t nr_cus)
3151 {
3152 	const char *basename = strrchr(file, '/');
3153 
3154 	if (basename == NULL)
3155 		basename = file;
3156 	else
3157 		basename++;
3158 
3159 	if (strcmp(basename, "common-crt.c") == 0 ||
3160 	    strcmp(basename, "gmon.c") == 0 ||
3161 	    strcmp(basename, "dlink_init.c") == 0 ||
3162 	    strcmp(basename, "dlink_common.c") == 0 ||
3163 	    strncmp(basename, "crt", strlen("crt")) == 0 ||
3164 	    strncmp(basename, "values-", strlen("values-")) == 0)
3165 		return (B_TRUE);
3166 
3167 	for (size_t i = 0; i < nr_cus; i++) {
3168 		if (strcmp(basename, cus[i].cu_name) == 0)
3169 			return (B_TRUE);
3170 	}
3171 
3172 	return (B_FALSE);
3173 }
3174 
3175 static int
3176 ctf_dwarf_check_missing(ctf_cu_t *cus, size_t nr_cus, Elf *elf,
3177     char *errmsg, size_t errlen)
3178 {
3179 	Elf_Scn *scn, *strscn;
3180 	Elf_Data *data, *strdata;
3181 	GElf_Shdr shdr;
3182 	ulong_t i;
3183 
3184 	scn = NULL;
3185 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
3186 		if (gelf_getshdr(scn, &shdr) == NULL) {
3187 			(void) snprintf(errmsg, errlen,
3188 			    "failed to get section header: %s\n",
3189 			    elf_errmsg(elf_errno()));
3190 			return (EINVAL);
3191 		}
3192 
3193 		if (shdr.sh_type == SHT_SYMTAB)
3194 			break;
3195 	}
3196 
3197 	if (scn == NULL)
3198 		return (0);
3199 
3200 	if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL) {
3201 		(void) snprintf(errmsg, errlen,
3202 		    "failed to get str section: %s\n",
3203 		    elf_errmsg(elf_errno()));
3204 		return (EINVAL);
3205 	}
3206 
3207 	if ((data = elf_getdata(scn, NULL)) == NULL) {
3208 		(void) snprintf(errmsg, errlen, "failed to read section: %s\n",
3209 		    elf_errmsg(elf_errno()));
3210 		return (EINVAL);
3211 	}
3212 
3213 	if ((strdata = elf_getdata(strscn, NULL)) == NULL) {
3214 		(void) snprintf(errmsg, errlen,
3215 		    "failed to read string table: %s\n",
3216 		    elf_errmsg(elf_errno()));
3217 		return (EINVAL);
3218 	}
3219 
3220 	for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
3221 		GElf_Sym sym;
3222 		const char *file;
3223 		size_t len;
3224 
3225 		if (gelf_getsym(data, i, &sym) == NULL) {
3226 			(void) snprintf(errmsg, errlen,
3227 			    "failed to read sym %lu: %s\n",
3228 			    i, elf_errmsg(elf_errno()));
3229 			return (EINVAL);
3230 		}
3231 
3232 		if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
3233 			continue;
3234 
3235 		file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
3236 		len = strlen(file);
3237 		if (len < 2 || strncmp(".c", &file[len - 2], 2) != 0)
3238 			continue;
3239 
3240 		if (!c_source_has_debug(file, cus, nr_cus)) {
3241 			(void) snprintf(errmsg, errlen,
3242 			    "file %s is missing debug info\n", file);
3243 			return (ECTF_CONVNODEBUG);
3244 		}
3245 	}
3246 
3247 	return (0);
3248 }
3249 
3250 int
3251 ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, uint_t flags,
3252     ctf_file_t **fpp, char *errbuf, size_t errlen)
3253 {
3254 	int err, ret, ndies, i;
3255 	Dwarf_Debug dw;
3256 	Dwarf_Error derr;
3257 	ctf_cu_t *cdies = NULL, *cup;
3258 	workq_t *wqp = NULL;
3259 
3260 	*fpp = NULL;
3261 
3262 	ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
3263 	if (ret != DW_DLV_OK) {
3264 		if (ret == DW_DLV_NO_ENTRY ||
3265 		    dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
3266 			(void) snprintf(errbuf, errlen,
3267 			    "file does not contain DWARF data\n");
3268 			return (ECTF_CONVNODEBUG);
3269 		}
3270 
3271 		(void) snprintf(errbuf, errlen,
3272 		    "dwarf_elf_init() failed: %s\n", dwarf_errmsg(derr));
3273 		return (ECTF_CONVBKERR);
3274 	}
3275 
3276 	/*
3277 	 * Iterate over all of the compilation units and create a ctf_cu_t for
3278 	 * each of them.  This is used to determine if we have zero, one, or
3279 	 * multiple dies to convert. If we have zero, that's an error. If
3280 	 * there's only one die, that's the simple case.  No merge needed and
3281 	 * only a single Dwarf_Debug as well.
3282 	 */
3283 	ndies = 0;
3284 	err = ctf_dwarf_count_dies(dw, &derr, &ndies, errbuf, errlen);
3285 
3286 	ctf_dprintf("found %d DWARF CUs\n", ndies);
3287 
3288 	if (ndies == 0) {
3289 		(void) snprintf(errbuf, errlen,
3290 		    "file does not contain DWARF data\n");
3291 		return (ECTF_CONVNODEBUG);
3292 	}
3293 
3294 	(void) dwarf_finish(dw, &derr);
3295 	cdies = ctf_alloc(sizeof (ctf_cu_t) * ndies);
3296 	if (cdies == NULL) {
3297 		return (ENOMEM);
3298 	}
3299 
3300 	bzero(cdies, sizeof (ctf_cu_t) * ndies);
3301 
3302 	for (i = 0; i < ndies; i++) {
3303 		cup = &cdies[i];
3304 		ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
3305 		    &cup->cu_dwarf, &derr);
3306 		if (ret != 0) {
3307 			ctf_free(cdies, sizeof (ctf_cu_t) * ndies);
3308 			(void) snprintf(errbuf, errlen,
3309 			    "failed to initialize DWARF: %s\n",
3310 			    dwarf_errmsg(derr));
3311 			return (ECTF_CONVBKERR);
3312 		}
3313 
3314 		err = ctf_dwarf_init_die(fd, elf, cup, i, errbuf, errlen);
3315 		if (err != 0)
3316 			goto out;
3317 
3318 		cup->cu_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
3319 	}
3320 
3321 	if (!(flags & CTF_ALLOW_MISSING_DEBUG) &&
3322 	    (err = ctf_dwarf_check_missing(cdies, ndies,
3323 	    elf, errbuf, errlen)) != 0)
3324 		goto out;
3325 
3326 	/*
3327 	 * If we only have one compilation unit, there's no reason to use
3328 	 * multiple threads, even if the user requested them. After all, they
3329 	 * just gave us an upper bound.
3330 	 */
3331 	if (ndies == 1)
3332 		nthrs = 1;
3333 
3334 	if (workq_init(&wqp, nthrs) == -1) {
3335 		err = errno;
3336 		goto out;
3337 	}
3338 
3339 	for (i = 0; i < ndies; i++) {
3340 		cup = &cdies[i];
3341 		ctf_dprintf("adding cu %s: %p, %x %x\n", cup->cu_name,
3342 		    cup->cu_cu, cup->cu_cuoff, cup->cu_maxoff);
3343 		if (workq_add(wqp, cup) == -1) {
3344 			err = errno;
3345 			goto out;
3346 		}
3347 	}
3348 
3349 	ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, &err);
3350 	if (ret == WORKQ_ERROR) {
3351 		err = errno;
3352 		goto out;
3353 	} else if (ret == WORKQ_UERROR) {
3354 		ctf_dprintf("internal convert failed: %s\n",
3355 		    ctf_errmsg(err));
3356 		goto out;
3357 	}
3358 
3359 	ctf_dprintf("Determining next phase: have %d CUs\n", ndies);
3360 	if (ndies != 1) {
3361 		ctf_merge_t *cmp;
3362 
3363 		cmp = ctf_merge_init(fd, &err);
3364 		if (cmp == NULL)
3365 			goto out;
3366 
3367 		ctf_dprintf("setting threads\n");
3368 		if ((err = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
3369 			ctf_merge_fini(cmp);
3370 			goto out;
3371 		}
3372 
3373 		for (i = 0; i < ndies; i++) {
3374 			cup = &cdies[i];
3375 			if ((err = ctf_merge_add(cmp, cup->cu_ctfp)) != 0) {
3376 				ctf_merge_fini(cmp);
3377 				goto out;
3378 			}
3379 		}
3380 
3381 		ctf_dprintf("performing merge\n");
3382 		err = ctf_merge_merge(cmp, fpp);
3383 		if (err != 0) {
3384 			ctf_dprintf("failed merge!\n");
3385 			*fpp = NULL;
3386 			ctf_merge_fini(cmp);
3387 			goto out;
3388 		}
3389 		ctf_merge_fini(cmp);
3390 		err = 0;
3391 		ctf_dprintf("successfully converted!\n");
3392 	} else {
3393 		err = 0;
3394 		*fpp = cdies->cu_ctfp;
3395 		cdies->cu_ctfp = NULL;
3396 		ctf_dprintf("successfully converted!\n");
3397 	}
3398 
3399 out:
3400 	workq_fini(wqp);
3401 	ctf_dwarf_free_dies(cdies, ndies);
3402 	return (err);
3403 }
3404