xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision b78ee15e9f04ae15c3e1200df974473167524d17)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/fcntl.h>
38 #include <sys/filio.h>
39 #include <sys/kdb.h>
40 #include <sys/kernel.h>
41 #include <sys/kmem.h>
42 #include <sys/kthread.h>
43 #include <sys/limits.h>
44 #include <sys/linker.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/mutex.h>
49 #include <sys/pcpu.h>
50 #include <sys/poll.h>
51 #include <sys/proc.h>
52 #include <sys/selinfo.h>
53 #include <sys/smp.h>
54 #include <sys/syscall.h>
55 #include <sys/sysent.h>
56 #include <sys/sysproto.h>
57 #include <sys/uio.h>
58 #include <sys/unistd.h>
59 #include <machine/stdarg.h>
60 
61 #include <sys/dtrace.h>
62 #include <sys/dtrace_bsd.h>
63 
64 #include "fbt.h"
65 
66 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
67 
68 dtrace_provider_id_t	fbt_id;
69 fbt_probe_t		**fbt_probetab;
70 int			fbt_probetab_mask;
71 
72 static d_open_t	fbt_open;
73 static int	fbt_unload(void);
74 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
75 static void	fbt_provide_module(void *, modctl_t *);
76 static void	fbt_destroy(void *, dtrace_id_t, void *);
77 static void	fbt_enable(void *, dtrace_id_t, void *);
78 static void	fbt_disable(void *, dtrace_id_t, void *);
79 static void	fbt_load(void *);
80 static void	fbt_suspend(void *, dtrace_id_t, void *);
81 static void	fbt_resume(void *, dtrace_id_t, void *);
82 
83 static struct cdevsw fbt_cdevsw = {
84 	.d_version	= D_VERSION,
85 	.d_open		= fbt_open,
86 	.d_name		= "fbt",
87 };
88 
89 static dtrace_pattr_t fbt_attr = {
90 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
91 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
93 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
94 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
95 };
96 
97 static dtrace_pops_t fbt_pops = {
98 	NULL,
99 	fbt_provide_module,
100 	fbt_enable,
101 	fbt_disable,
102 	fbt_suspend,
103 	fbt_resume,
104 	fbt_getargdesc,
105 	NULL,
106 	NULL,
107 	fbt_destroy
108 };
109 
110 static struct cdev		*fbt_cdev;
111 static int			fbt_probetab_size;
112 static int			fbt_verbose = 0;
113 
114 int
115 fbt_excluded(const char *name)
116 {
117 
118 	if (strncmp(name, "dtrace_", 7) == 0 &&
119 	    strncmp(name, "dtrace_safe_", 12) != 0) {
120 		/*
121 		 * Anything beginning with "dtrace_" may be called
122 		 * from probe context unless it explicitly indicates
123 		 * that it won't be called from probe context by
124 		 * using the prefix "dtrace_safe_".
125 		 */
126 		return (1);
127 	}
128 
129 	/* Exclude some internal functions */
130 	if (name[0] == '_' && name[1] == '_')
131 		return (1);
132 
133 	/*
134 	 * When DTrace is built into the kernel we need to exclude
135 	 * the FBT functions from instrumentation.
136 	 */
137 #ifndef _KLD_MODULE
138 	if (strncmp(name, "fbt_", 4) == 0)
139 		return (1);
140 #endif
141 
142 	return (0);
143 }
144 
145 static void
146 fbt_doubletrap(void)
147 {
148 	fbt_probe_t *fbt;
149 	int i;
150 
151 	for (i = 0; i < fbt_probetab_size; i++) {
152 		fbt = fbt_probetab[i];
153 
154 		for (; fbt != NULL; fbt = fbt->fbtp_next)
155 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
156 	}
157 }
158 
159 static void
160 fbt_provide_module(void *arg, modctl_t *lf)
161 {
162 	char modname[MAXPATHLEN];
163 	int i;
164 	size_t len;
165 
166 	strlcpy(modname, lf->filename, sizeof(modname));
167 	len = strlen(modname);
168 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
169 		modname[len - 3] = '\0';
170 
171 	/*
172 	 * Employees of dtrace and their families are ineligible.  Void
173 	 * where prohibited.
174 	 */
175 	if (strcmp(modname, "dtrace") == 0)
176 		return;
177 
178 	/*
179 	 * To register with DTrace, a module must list 'dtrace' as a
180 	 * dependency in order for the kernel linker to resolve
181 	 * symbols like dtrace_register(). All modules with such a
182 	 * dependency are ineligible for FBT tracing.
183 	 */
184 	for (i = 0; i < lf->ndeps; i++)
185 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
186 			return;
187 
188 	if (lf->fbt_nentries) {
189 		/*
190 		 * This module has some FBT entries allocated; we're afraid
191 		 * to screw with it.
192 		 */
193 		return;
194 	}
195 
196 	/*
197 	 * List the functions in the module and the symbol values.
198 	 */
199 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
200 }
201 
202 static void
203 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
204 {
205 	fbt_probe_t *fbt = parg, *next, *hash, *last;
206 	modctl_t *ctl;
207 	int ndx;
208 
209 	do {
210 		ctl = fbt->fbtp_ctl;
211 
212 		ctl->fbt_nentries--;
213 
214 		/*
215 		 * Now we need to remove this probe from the fbt_probetab.
216 		 */
217 		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
218 		last = NULL;
219 		hash = fbt_probetab[ndx];
220 
221 		while (hash != fbt) {
222 			ASSERT(hash != NULL);
223 			last = hash;
224 			hash = hash->fbtp_hashnext;
225 		}
226 
227 		if (last != NULL) {
228 			last->fbtp_hashnext = fbt->fbtp_hashnext;
229 		} else {
230 			fbt_probetab[ndx] = fbt->fbtp_hashnext;
231 		}
232 
233 		next = fbt->fbtp_next;
234 		free(fbt, M_FBT);
235 
236 		fbt = next;
237 	} while (fbt != NULL);
238 }
239 
240 static void
241 fbt_enable(void *arg, dtrace_id_t id, void *parg)
242 {
243 	fbt_probe_t *fbt = parg;
244 	modctl_t *ctl = fbt->fbtp_ctl;
245 
246 	ctl->nenabled++;
247 
248 	/*
249 	 * Now check that our modctl has the expected load count.  If it
250 	 * doesn't, this module must have been unloaded and reloaded -- and
251 	 * we're not going to touch it.
252 	 */
253 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
254 		if (fbt_verbose) {
255 			printf("fbt is failing for probe %s "
256 			    "(module %s reloaded)",
257 			    fbt->fbtp_name, ctl->filename);
258 		}
259 
260 		return;
261 	}
262 
263 	for (; fbt != NULL; fbt = fbt->fbtp_next)
264 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
265 }
266 
267 static void
268 fbt_disable(void *arg, dtrace_id_t id, void *parg)
269 {
270 	fbt_probe_t *fbt = parg;
271 	modctl_t *ctl = fbt->fbtp_ctl;
272 
273 	ASSERT(ctl->nenabled > 0);
274 	ctl->nenabled--;
275 
276 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
277 		return;
278 
279 	for (; fbt != NULL; fbt = fbt->fbtp_next)
280 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
281 }
282 
283 static void
284 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
285 {
286 	fbt_probe_t *fbt = parg;
287 	modctl_t *ctl = fbt->fbtp_ctl;
288 
289 	ASSERT(ctl->nenabled > 0);
290 
291 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
292 		return;
293 
294 	for (; fbt != NULL; fbt = fbt->fbtp_next)
295 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
296 }
297 
298 static void
299 fbt_resume(void *arg, dtrace_id_t id, void *parg)
300 {
301 	fbt_probe_t *fbt = parg;
302 	modctl_t *ctl = fbt->fbtp_ctl;
303 
304 	ASSERT(ctl->nenabled > 0);
305 
306 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
307 		return;
308 
309 	for (; fbt != NULL; fbt = fbt->fbtp_next)
310 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
311 }
312 
313 static int
314 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
315 {
316 	const Elf_Sym *symp = lc->symtab;;
317 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
318 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
319 	int i;
320 	uint32_t *ctfoff;
321 	uint32_t objtoff = hp->cth_objtoff;
322 	uint32_t funcoff = hp->cth_funcoff;
323 	ushort_t info;
324 	ushort_t vlen;
325 
326 	/* Sanity check. */
327 	if (hp->cth_magic != CTF_MAGIC) {
328 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
329 		return (EINVAL);
330 	}
331 
332 	if (lc->symtab == NULL) {
333 		printf("No symbol table in '%s'\n",lf->pathname);
334 		return (EINVAL);
335 	}
336 
337 	if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL)
338 		return (ENOMEM);
339 
340 	*lc->ctfoffp = ctfoff;
341 
342 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
343 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
344 			*ctfoff = 0xffffffff;
345 			continue;
346 		}
347 
348 		switch (ELF_ST_TYPE(symp->st_info)) {
349 		case STT_OBJECT:
350 			if (objtoff >= hp->cth_funcoff ||
351                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
352 				*ctfoff = 0xffffffff;
353                                 break;
354                         }
355 
356                         *ctfoff = objtoff;
357                         objtoff += sizeof (ushort_t);
358 			break;
359 
360 		case STT_FUNC:
361 			if (funcoff >= hp->cth_typeoff) {
362 				*ctfoff = 0xffffffff;
363 				break;
364 			}
365 
366 			*ctfoff = funcoff;
367 
368 			info = *((const ushort_t *)(ctfdata + funcoff));
369 			vlen = CTF_INFO_VLEN(info);
370 
371 			/*
372 			 * If we encounter a zero pad at the end, just skip it.
373 			 * Otherwise skip over the function and its return type
374 			 * (+2) and the argument list (vlen).
375 			 */
376 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
377 				funcoff += sizeof (ushort_t); /* skip pad */
378 			else
379 				funcoff += sizeof (ushort_t) * (vlen + 2);
380 			break;
381 
382 		default:
383 			*ctfoff = 0xffffffff;
384 			break;
385 		}
386 	}
387 
388 	return (0);
389 }
390 
391 static ssize_t
392 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
393     ssize_t *incrementp)
394 {
395 	ssize_t size, increment;
396 
397 	if (version > CTF_VERSION_1 &&
398 	    tp->ctt_size == CTF_LSIZE_SENT) {
399 		size = CTF_TYPE_LSIZE(tp);
400 		increment = sizeof (ctf_type_t);
401 	} else {
402 		size = tp->ctt_size;
403 		increment = sizeof (ctf_stype_t);
404 	}
405 
406 	if (sizep)
407 		*sizep = size;
408 	if (incrementp)
409 		*incrementp = increment;
410 
411 	return (size);
412 }
413 
414 static int
415 fbt_typoff_init(linker_ctf_t *lc)
416 {
417 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
418 	const ctf_type_t *tbuf;
419 	const ctf_type_t *tend;
420 	const ctf_type_t *tp;
421 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
422 	int ctf_typemax = 0;
423 	uint32_t *xp;
424 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
425 
426 
427 	/* Sanity check. */
428 	if (hp->cth_magic != CTF_MAGIC)
429 		return (EINVAL);
430 
431 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
432 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
433 
434 	int child = hp->cth_parname != 0;
435 
436 	/*
437 	 * We make two passes through the entire type section.  In this first
438 	 * pass, we count the number of each type and the total number of types.
439 	 */
440 	for (tp = tbuf; tp < tend; ctf_typemax++) {
441 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
442 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
443 		ssize_t size, increment;
444 
445 		size_t vbytes;
446 		uint_t n;
447 
448 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
449 
450 		switch (kind) {
451 		case CTF_K_INTEGER:
452 		case CTF_K_FLOAT:
453 			vbytes = sizeof (uint_t);
454 			break;
455 		case CTF_K_ARRAY:
456 			vbytes = sizeof (ctf_array_t);
457 			break;
458 		case CTF_K_FUNCTION:
459 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
460 			break;
461 		case CTF_K_STRUCT:
462 		case CTF_K_UNION:
463 			if (size < CTF_LSTRUCT_THRESH) {
464 				ctf_member_t *mp = (ctf_member_t *)
465 				    ((uintptr_t)tp + increment);
466 
467 				vbytes = sizeof (ctf_member_t) * vlen;
468 				for (n = vlen; n != 0; n--, mp++)
469 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
470 			} else {
471 				ctf_lmember_t *lmp = (ctf_lmember_t *)
472 				    ((uintptr_t)tp + increment);
473 
474 				vbytes = sizeof (ctf_lmember_t) * vlen;
475 				for (n = vlen; n != 0; n--, lmp++)
476 					child |=
477 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
478 			}
479 			break;
480 		case CTF_K_ENUM:
481 			vbytes = sizeof (ctf_enum_t) * vlen;
482 			break;
483 		case CTF_K_FORWARD:
484 			/*
485 			 * For forward declarations, ctt_type is the CTF_K_*
486 			 * kind for the tag, so bump that population count too.
487 			 * If ctt_type is unknown, treat the tag as a struct.
488 			 */
489 			if (tp->ctt_type == CTF_K_UNKNOWN ||
490 			    tp->ctt_type >= CTF_K_MAX)
491 				pop[CTF_K_STRUCT]++;
492 			else
493 				pop[tp->ctt_type]++;
494 			/*FALLTHRU*/
495 		case CTF_K_UNKNOWN:
496 			vbytes = 0;
497 			break;
498 		case CTF_K_POINTER:
499 		case CTF_K_TYPEDEF:
500 		case CTF_K_VOLATILE:
501 		case CTF_K_CONST:
502 		case CTF_K_RESTRICT:
503 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
504 			vbytes = 0;
505 			break;
506 		default:
507 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
508 			return (EIO);
509 		}
510 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
511 		pop[kind]++;
512 	}
513 
514 	/* account for a sentinel value below */
515 	ctf_typemax++;
516 	*lc->typlenp = ctf_typemax;
517 
518 	if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL)
519 		return (ENOMEM);
520 
521 	*lc->typoffp = xp;
522 
523 	/* type id 0 is used as a sentinel value */
524 	*xp++ = 0;
525 
526 	/*
527 	 * In the second pass, fill in the type offset.
528 	 */
529 	for (tp = tbuf; tp < tend; xp++) {
530 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
531 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
532 		ssize_t size, increment;
533 
534 		size_t vbytes;
535 		uint_t n;
536 
537 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
538 
539 		switch (kind) {
540 		case CTF_K_INTEGER:
541 		case CTF_K_FLOAT:
542 			vbytes = sizeof (uint_t);
543 			break;
544 		case CTF_K_ARRAY:
545 			vbytes = sizeof (ctf_array_t);
546 			break;
547 		case CTF_K_FUNCTION:
548 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
549 			break;
550 		case CTF_K_STRUCT:
551 		case CTF_K_UNION:
552 			if (size < CTF_LSTRUCT_THRESH) {
553 				ctf_member_t *mp = (ctf_member_t *)
554 				    ((uintptr_t)tp + increment);
555 
556 				vbytes = sizeof (ctf_member_t) * vlen;
557 				for (n = vlen; n != 0; n--, mp++)
558 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
559 			} else {
560 				ctf_lmember_t *lmp = (ctf_lmember_t *)
561 				    ((uintptr_t)tp + increment);
562 
563 				vbytes = sizeof (ctf_lmember_t) * vlen;
564 				for (n = vlen; n != 0; n--, lmp++)
565 					child |=
566 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
567 			}
568 			break;
569 		case CTF_K_ENUM:
570 			vbytes = sizeof (ctf_enum_t) * vlen;
571 			break;
572 		case CTF_K_FORWARD:
573 		case CTF_K_UNKNOWN:
574 			vbytes = 0;
575 			break;
576 		case CTF_K_POINTER:
577 		case CTF_K_TYPEDEF:
578 		case CTF_K_VOLATILE:
579 		case CTF_K_CONST:
580 		case CTF_K_RESTRICT:
581 			vbytes = 0;
582 			break;
583 		default:
584 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
585 			return (EIO);
586 		}
587 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
588 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
589 	}
590 
591 	return (0);
592 }
593 
594 /*
595  * CTF Declaration Stack
596  *
597  * In order to implement ctf_type_name(), we must convert a type graph back
598  * into a C type declaration.  Unfortunately, a type graph represents a storage
599  * class ordering of the type whereas a type declaration must obey the C rules
600  * for operator precedence, and the two orderings are frequently in conflict.
601  * For example, consider these CTF type graphs and their C declarations:
602  *
603  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
604  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
605  *
606  * In each case, parentheses are used to raise operator * to higher lexical
607  * precedence, so the string form of the C declaration cannot be constructed by
608  * walking the type graph links and forming the string from left to right.
609  *
610  * The functions in this file build a set of stacks from the type graph nodes
611  * corresponding to the C operator precedence levels in the appropriate order.
612  * The code in ctf_type_name() can then iterate over the levels and nodes in
613  * lexical precedence order and construct the final C declaration string.
614  */
615 typedef struct ctf_list {
616 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
617 	struct ctf_list *l_next; /* next pointer or head pointer */
618 } ctf_list_t;
619 
620 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
621 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
622 
623 typedef enum {
624 	CTF_PREC_BASE,
625 	CTF_PREC_POINTER,
626 	CTF_PREC_ARRAY,
627 	CTF_PREC_FUNCTION,
628 	CTF_PREC_MAX
629 } ctf_decl_prec_t;
630 
631 typedef struct ctf_decl_node {
632 	ctf_list_t cd_list;			/* linked list pointers */
633 	ctf_id_t cd_type;			/* type identifier */
634 	uint_t cd_kind;				/* type kind */
635 	uint_t cd_n;				/* type dimension if array */
636 } ctf_decl_node_t;
637 
638 typedef struct ctf_decl {
639 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
640 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
641 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
642 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
643 	char *cd_buf;				/* buffer for output */
644 	char *cd_ptr;				/* buffer location */
645 	char *cd_end;				/* buffer limit */
646 	size_t cd_len;				/* buffer space required */
647 	int cd_err;				/* saved error value */
648 } ctf_decl_t;
649 
650 /*
651  * Simple doubly-linked list append routine.  This implementation assumes that
652  * each list element contains an embedded ctf_list_t as the first member.
653  * An additional ctf_list_t is used to store the head (l_next) and tail
654  * (l_prev) pointers.  The current head and tail list elements have their
655  * previous and next pointers set to NULL, respectively.
656  */
657 static void
658 ctf_list_append(ctf_list_t *lp, void *new)
659 {
660 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
661 	ctf_list_t *q = new;		/* q = new list element */
662 
663 	lp->l_prev = q;
664 	q->l_prev = p;
665 	q->l_next = NULL;
666 
667 	if (p != NULL)
668 		p->l_next = q;
669 	else
670 		lp->l_next = q;
671 }
672 
673 /*
674  * Prepend the specified existing element to the given ctf_list_t.  The
675  * existing pointer should be pointing at a struct with embedded ctf_list_t.
676  */
677 static void
678 ctf_list_prepend(ctf_list_t *lp, void *new)
679 {
680 	ctf_list_t *p = new;		/* p = new list element */
681 	ctf_list_t *q = lp->l_next;	/* q = head list element */
682 
683 	lp->l_next = p;
684 	p->l_prev = NULL;
685 	p->l_next = q;
686 
687 	if (q != NULL)
688 		q->l_prev = p;
689 	else
690 		lp->l_prev = p;
691 }
692 
693 static void
694 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
695 {
696 	int i;
697 
698 	bzero(cd, sizeof (ctf_decl_t));
699 
700 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
701 		cd->cd_order[i] = CTF_PREC_BASE - 1;
702 
703 	cd->cd_qualp = CTF_PREC_BASE;
704 	cd->cd_ordp = CTF_PREC_BASE;
705 
706 	cd->cd_buf = buf;
707 	cd->cd_ptr = buf;
708 	cd->cd_end = buf + len;
709 }
710 
711 static void
712 ctf_decl_fini(ctf_decl_t *cd)
713 {
714 	ctf_decl_node_t *cdp, *ndp;
715 	int i;
716 
717 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
718 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
719 		    cdp != NULL; cdp = ndp) {
720 			ndp = ctf_list_next(cdp);
721 			free(cdp, M_FBT);
722 		}
723 	}
724 }
725 
726 static const ctf_type_t *
727 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
728 {
729 	const ctf_type_t *tp;
730 	uint32_t offset;
731 	uint32_t *typoff = *lc->typoffp;
732 
733 	if (type >= *lc->typlenp) {
734 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
735 		return(NULL);
736 	}
737 
738 	/* Check if the type isn't cross-referenced. */
739 	if ((offset = typoff[type]) == 0) {
740 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
741 		return(NULL);
742 	}
743 
744 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
745 
746 	return (tp);
747 }
748 
749 static void
750 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
751 {
752 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
753 	const ctf_type_t *tp;
754 	const ctf_array_t *ap;
755 	ssize_t increment;
756 
757 	bzero(arp, sizeof(*arp));
758 
759 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
760 		return;
761 
762 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
763 		return;
764 
765 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
766 
767 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
768 	arp->ctr_contents = ap->cta_contents;
769 	arp->ctr_index = ap->cta_index;
770 	arp->ctr_nelems = ap->cta_nelems;
771 }
772 
773 static const char *
774 ctf_strptr(linker_ctf_t *lc, int name)
775 {
776 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
777 	const char *strp = "";
778 
779 	if (name < 0 || name >= hp->cth_strlen)
780 		return(strp);
781 
782 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
783 
784 	return (strp);
785 }
786 
787 static void
788 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
789 {
790 	ctf_decl_node_t *cdp;
791 	ctf_decl_prec_t prec;
792 	uint_t kind, n = 1;
793 	int is_qual = 0;
794 
795 	const ctf_type_t *tp;
796 	ctf_arinfo_t ar;
797 
798 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
799 		cd->cd_err = ENOENT;
800 		return;
801 	}
802 
803 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
804 	case CTF_K_ARRAY:
805 		fbt_array_info(lc, type, &ar);
806 		ctf_decl_push(cd, lc, ar.ctr_contents);
807 		n = ar.ctr_nelems;
808 		prec = CTF_PREC_ARRAY;
809 		break;
810 
811 	case CTF_K_TYPEDEF:
812 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
813 			ctf_decl_push(cd, lc, tp->ctt_type);
814 			return;
815 		}
816 		prec = CTF_PREC_BASE;
817 		break;
818 
819 	case CTF_K_FUNCTION:
820 		ctf_decl_push(cd, lc, tp->ctt_type);
821 		prec = CTF_PREC_FUNCTION;
822 		break;
823 
824 	case CTF_K_POINTER:
825 		ctf_decl_push(cd, lc, tp->ctt_type);
826 		prec = CTF_PREC_POINTER;
827 		break;
828 
829 	case CTF_K_VOLATILE:
830 	case CTF_K_CONST:
831 	case CTF_K_RESTRICT:
832 		ctf_decl_push(cd, lc, tp->ctt_type);
833 		prec = cd->cd_qualp;
834 		is_qual++;
835 		break;
836 
837 	default:
838 		prec = CTF_PREC_BASE;
839 	}
840 
841 	if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) {
842 		cd->cd_err = EAGAIN;
843 		return;
844 	}
845 
846 	cdp->cd_type = type;
847 	cdp->cd_kind = kind;
848 	cdp->cd_n = n;
849 
850 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
851 		cd->cd_order[prec] = cd->cd_ordp++;
852 
853 	/*
854 	 * Reset cd_qualp to the highest precedence level that we've seen so
855 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
856 	 */
857 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
858 		cd->cd_qualp = prec;
859 
860 	/*
861 	 * C array declarators are ordered inside out so prepend them.  Also by
862 	 * convention qualifiers of base types precede the type specifier (e.g.
863 	 * const int vs. int const) even though the two forms are equivalent.
864 	 */
865 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
866 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
867 	else
868 		ctf_list_append(&cd->cd_nodes[prec], cdp);
869 }
870 
871 static void
872 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
873 {
874 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
875 	va_list ap;
876 	size_t n;
877 
878 	va_start(ap, format);
879 	n = vsnprintf(cd->cd_ptr, len, format, ap);
880 	va_end(ap);
881 
882 	cd->cd_ptr += MIN(n, len);
883 	cd->cd_len += n;
884 }
885 
886 static ssize_t
887 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
888 {
889 	ctf_decl_t cd;
890 	ctf_decl_node_t *cdp;
891 	ctf_decl_prec_t prec, lp, rp;
892 	int ptr, arr;
893 	uint_t k;
894 
895 	if (lc == NULL && type == CTF_ERR)
896 		return (-1); /* simplify caller code by permitting CTF_ERR */
897 
898 	ctf_decl_init(&cd, buf, len);
899 	ctf_decl_push(&cd, lc, type);
900 
901 	if (cd.cd_err != 0) {
902 		ctf_decl_fini(&cd);
903 		return (-1);
904 	}
905 
906 	/*
907 	 * If the type graph's order conflicts with lexical precedence order
908 	 * for pointers or arrays, then we need to surround the declarations at
909 	 * the corresponding lexical precedence with parentheses.  This can
910 	 * result in either a parenthesized pointer (*) as in int (*)() or
911 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
912 	 */
913 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
914 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
915 
916 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
917 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
918 
919 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
920 
921 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
922 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
923 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
924 
925 			const ctf_type_t *tp =
926 			    ctf_lookup_by_id(lc, cdp->cd_type);
927 			const char *name = ctf_strptr(lc, tp->ctt_name);
928 
929 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
930 				ctf_decl_sprintf(&cd, " ");
931 
932 			if (lp == prec) {
933 				ctf_decl_sprintf(&cd, "(");
934 				lp = -1;
935 			}
936 
937 			switch (cdp->cd_kind) {
938 			case CTF_K_INTEGER:
939 			case CTF_K_FLOAT:
940 			case CTF_K_TYPEDEF:
941 				ctf_decl_sprintf(&cd, "%s", name);
942 				break;
943 			case CTF_K_POINTER:
944 				ctf_decl_sprintf(&cd, "*");
945 				break;
946 			case CTF_K_ARRAY:
947 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
948 				break;
949 			case CTF_K_FUNCTION:
950 				ctf_decl_sprintf(&cd, "()");
951 				break;
952 			case CTF_K_STRUCT:
953 			case CTF_K_FORWARD:
954 				ctf_decl_sprintf(&cd, "struct %s", name);
955 				break;
956 			case CTF_K_UNION:
957 				ctf_decl_sprintf(&cd, "union %s", name);
958 				break;
959 			case CTF_K_ENUM:
960 				ctf_decl_sprintf(&cd, "enum %s", name);
961 				break;
962 			case CTF_K_VOLATILE:
963 				ctf_decl_sprintf(&cd, "volatile");
964 				break;
965 			case CTF_K_CONST:
966 				ctf_decl_sprintf(&cd, "const");
967 				break;
968 			case CTF_K_RESTRICT:
969 				ctf_decl_sprintf(&cd, "restrict");
970 				break;
971 			}
972 
973 			k = cdp->cd_kind;
974 		}
975 
976 		if (rp == prec)
977 			ctf_decl_sprintf(&cd, ")");
978 	}
979 
980 	ctf_decl_fini(&cd);
981 	return (cd.cd_len);
982 }
983 
984 static void
985 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
986 {
987 	const ushort_t *dp;
988 	fbt_probe_t *fbt = parg;
989 	linker_ctf_t lc;
990 	modctl_t *ctl = fbt->fbtp_ctl;
991 	int ndx = desc->dtargd_ndx;
992 	int symindx = fbt->fbtp_symindx;
993 	uint32_t *ctfoff;
994 	uint32_t offset;
995 	ushort_t info, kind, n;
996 
997 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
998 		(void) strcpy(desc->dtargd_native, "int");
999 		return;
1000 	}
1001 
1002 	desc->dtargd_ndx = DTRACE_ARGNONE;
1003 
1004 	/* Get a pointer to the CTF data and it's length. */
1005 	if (linker_ctf_get(ctl, &lc) != 0)
1006 		/* No CTF data? Something wrong? *shrug* */
1007 		return;
1008 
1009 	/* Check if this module hasn't been initialised yet. */
1010 	if (*lc.ctfoffp == NULL) {
1011 		/*
1012 		 * Initialise the CTF object and function symindx to
1013 		 * byte offset array.
1014 		 */
1015 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1016 			return;
1017 
1018 		/* Initialise the CTF type to byte offset array. */
1019 		if (fbt_typoff_init(&lc) != 0)
1020 			return;
1021 	}
1022 
1023 	ctfoff = *lc.ctfoffp;
1024 
1025 	if (ctfoff == NULL || *lc.typoffp == NULL)
1026 		return;
1027 
1028 	/* Check if the symbol index is out of range. */
1029 	if (symindx >= lc.nsym)
1030 		return;
1031 
1032 	/* Check if the symbol isn't cross-referenced. */
1033 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1034 		return;
1035 
1036 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1037 
1038 	info = *dp++;
1039 	kind = CTF_INFO_KIND(info);
1040 	n = CTF_INFO_VLEN(info);
1041 
1042 	if (kind == CTF_K_UNKNOWN && n == 0) {
1043 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1044 		return;
1045 	}
1046 
1047 	if (kind != CTF_K_FUNCTION) {
1048 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1049 		return;
1050 	}
1051 
1052 	if (fbt->fbtp_roffset != 0) {
1053 		/* Only return type is available for args[1] in return probe. */
1054 		if (ndx > 1)
1055 			return;
1056 		ASSERT(ndx == 1);
1057 	} else {
1058 		/* Check if the requested argument doesn't exist. */
1059 		if (ndx >= n)
1060 			return;
1061 
1062 		/* Skip the return type and arguments up to the one requested. */
1063 		dp += ndx + 1;
1064 	}
1065 
1066 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1067 		desc->dtargd_ndx = ndx;
1068 
1069 	return;
1070 }
1071 
1072 static int
1073 fbt_linker_file_cb(linker_file_t lf, void *arg)
1074 {
1075 
1076 	fbt_provide_module(arg, lf);
1077 
1078 	return (0);
1079 }
1080 
1081 static void
1082 fbt_load(void *dummy)
1083 {
1084 	/* Create the /dev/dtrace/fbt entry. */
1085 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1086 	    "dtrace/fbt");
1087 
1088 	/* Default the probe table size if not specified. */
1089 	if (fbt_probetab_size == 0)
1090 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1091 
1092 	/* Choose the hash mask for the probe table. */
1093 	fbt_probetab_mask = fbt_probetab_size - 1;
1094 
1095 	/* Allocate memory for the probe table. */
1096 	fbt_probetab =
1097 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1098 
1099 	dtrace_doubletrap_func = fbt_doubletrap;
1100 	dtrace_invop_add(fbt_invop);
1101 
1102 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1103 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1104 		return;
1105 
1106 	/* Create probes for the kernel and already-loaded modules. */
1107 	linker_file_foreach(fbt_linker_file_cb, NULL);
1108 }
1109 
1110 static int
1111 fbt_unload()
1112 {
1113 	int error = 0;
1114 
1115 	/* De-register the invalid opcode handler. */
1116 	dtrace_invop_remove(fbt_invop);
1117 
1118 	dtrace_doubletrap_func = NULL;
1119 
1120 	/* De-register this DTrace provider. */
1121 	if ((error = dtrace_unregister(fbt_id)) != 0)
1122 		return (error);
1123 
1124 	/* Free the probe table. */
1125 	free(fbt_probetab, M_FBT);
1126 	fbt_probetab = NULL;
1127 	fbt_probetab_mask = 0;
1128 
1129 	destroy_dev(fbt_cdev);
1130 
1131 	return (error);
1132 }
1133 
1134 static int
1135 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1136 {
1137 	int error = 0;
1138 
1139 	switch (type) {
1140 	case MOD_LOAD:
1141 		break;
1142 
1143 	case MOD_UNLOAD:
1144 		break;
1145 
1146 	case MOD_SHUTDOWN:
1147 		break;
1148 
1149 	default:
1150 		error = EOPNOTSUPP;
1151 		break;
1152 
1153 	}
1154 
1155 	return (error);
1156 }
1157 
1158 static int
1159 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1160 {
1161 	return (0);
1162 }
1163 
1164 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1165 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1166 
1167 DEV_MODULE(fbt, fbt_modevent, NULL);
1168 MODULE_VERSION(fbt, 1);
1169 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1170 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1171