xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision f7c32ed617858bcd22f8d1b03199099d50125721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/endian.h>
38 #include <sys/fcntl.h>
39 #include <sys/filio.h>
40 #include <sys/kdb.h>
41 #include <sys/kernel.h>
42 #include <sys/kmem.h>
43 #include <sys/kthread.h>
44 #include <sys/limits.h>
45 #include <sys/linker.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/pcpu.h>
51 #include <sys/poll.h>
52 #include <sys/proc.h>
53 #include <sys/selinfo.h>
54 #include <sys/smp.h>
55 #include <sys/syscall.h>
56 #include <sys/sysent.h>
57 #include <sys/sysproto.h>
58 #include <sys/uio.h>
59 #include <sys/unistd.h>
60 #include <machine/stdarg.h>
61 
62 #include <sys/dtrace.h>
63 #include <sys/dtrace_bsd.h>
64 
65 #include "fbt.h"
66 
67 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
68 
69 dtrace_provider_id_t	fbt_id;
70 fbt_probe_t		**fbt_probetab;
71 int			fbt_probetab_mask;
72 
73 static d_open_t	fbt_open;
74 static int	fbt_unload(void);
75 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
76 static void	fbt_provide_module(void *, modctl_t *);
77 static void	fbt_destroy(void *, dtrace_id_t, void *);
78 static void	fbt_enable(void *, dtrace_id_t, void *);
79 static void	fbt_disable(void *, dtrace_id_t, void *);
80 static void	fbt_load(void *);
81 static void	fbt_suspend(void *, dtrace_id_t, void *);
82 static void	fbt_resume(void *, dtrace_id_t, void *);
83 
84 static struct cdevsw fbt_cdevsw = {
85 	.d_version	= D_VERSION,
86 	.d_open		= fbt_open,
87 	.d_name		= "fbt",
88 };
89 
90 static dtrace_pattr_t fbt_attr = {
91 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
93 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
94 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
95 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
96 };
97 
98 static dtrace_pops_t fbt_pops = {
99 	.dtps_provide =		NULL,
100 	.dtps_provide_module =	fbt_provide_module,
101 	.dtps_enable =		fbt_enable,
102 	.dtps_disable =		fbt_disable,
103 	.dtps_suspend =		fbt_suspend,
104 	.dtps_resume =		fbt_resume,
105 	.dtps_getargdesc =	fbt_getargdesc,
106 	.dtps_getargval =	NULL,
107 	.dtps_usermode =	NULL,
108 	.dtps_destroy =		fbt_destroy
109 };
110 
111 static struct cdev		*fbt_cdev;
112 static int			fbt_probetab_size;
113 static int			fbt_verbose = 0;
114 
115 int
116 fbt_excluded(const char *name)
117 {
118 
119 	if (strncmp(name, "dtrace_", 7) == 0 &&
120 	    strncmp(name, "dtrace_safe_", 12) != 0) {
121 		/*
122 		 * Anything beginning with "dtrace_" may be called
123 		 * from probe context unless it explicitly indicates
124 		 * that it won't be called from probe context by
125 		 * using the prefix "dtrace_safe_".
126 		 */
127 		return (1);
128 	}
129 
130 	/*
131 	 * Omit instrumentation of functions that are probably in DDB.  It
132 	 * makes it too hard to debug broken FBT.
133 	 *
134 	 * NB: kdb_enter() can be excluded, but its call to printf() can't be.
135 	 * This is generally OK since we're not yet in debugging context.
136 	 */
137 	if (strncmp(name, "db_", 3) == 0 ||
138 	    strncmp(name, "kdb_", 4) == 0)
139 		return (1);
140 
141 	/*
142 	 * Lock owner methods may be called from probe context.
143 	 */
144 	if (strcmp(name, "owner_mtx") == 0 ||
145 	    strcmp(name, "owner_rm") == 0 ||
146 	    strcmp(name, "owner_rw") == 0 ||
147 	    strcmp(name, "owner_sx") == 0)
148 		return (1);
149 
150 	/*
151 	 * Stack unwinders may be called from probe context on some
152 	 * platforms.
153 	 */
154 #if defined(__aarch64__) || defined(__riscv)
155 	if (strcmp(name, "unwind_frame") == 0)
156 		return (1);
157 #endif
158 
159 	/*
160 	 * When DTrace is built into the kernel we need to exclude
161 	 * the FBT functions from instrumentation.
162 	 */
163 #ifndef _KLD_MODULE
164 	if (strncmp(name, "fbt_", 4) == 0)
165 		return (1);
166 #endif
167 
168 	return (0);
169 }
170 
171 static void
172 fbt_doubletrap(void)
173 {
174 	fbt_probe_t *fbt;
175 	int i;
176 
177 	for (i = 0; i < fbt_probetab_size; i++) {
178 		fbt = fbt_probetab[i];
179 
180 		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
181 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
182 	}
183 }
184 
185 static void
186 fbt_provide_module(void *arg, modctl_t *lf)
187 {
188 	char modname[MAXPATHLEN];
189 	int i;
190 	size_t len;
191 
192 	strlcpy(modname, lf->filename, sizeof(modname));
193 	len = strlen(modname);
194 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
195 		modname[len - 3] = '\0';
196 
197 	/*
198 	 * Employees of dtrace and their families are ineligible.  Void
199 	 * where prohibited.
200 	 */
201 	if (strcmp(modname, "dtrace") == 0)
202 		return;
203 
204 	/*
205 	 * To register with DTrace, a module must list 'dtrace' as a
206 	 * dependency in order for the kernel linker to resolve
207 	 * symbols like dtrace_register(). All modules with such a
208 	 * dependency are ineligible for FBT tracing.
209 	 */
210 	for (i = 0; i < lf->ndeps; i++)
211 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
212 			return;
213 
214 	if (lf->fbt_nentries) {
215 		/*
216 		 * This module has some FBT entries allocated; we're afraid
217 		 * to screw with it.
218 		 */
219 		return;
220 	}
221 
222 	/*
223 	 * List the functions in the module and the symbol values.
224 	 */
225 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
226 }
227 
228 static void
229 fbt_destroy_one(fbt_probe_t *fbt)
230 {
231 	fbt_probe_t *hash, *hashprev, *next;
232 	int ndx;
233 
234 	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
235 	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
236 	    hashprev = hash, hash = hash->fbtp_hashnext) {
237 		if (hash == fbt) {
238 			if ((next = fbt->fbtp_tracenext) != NULL)
239 				next->fbtp_hashnext = hash->fbtp_hashnext;
240 			else
241 				next = hash->fbtp_hashnext;
242 			if (hashprev != NULL)
243 				hashprev->fbtp_hashnext = next;
244 			else
245 				fbt_probetab[ndx] = next;
246 			goto free;
247 		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
248 			for (next = hash; next->fbtp_tracenext != NULL;
249 			    next = next->fbtp_tracenext) {
250 				if (fbt == next->fbtp_tracenext) {
251 					next->fbtp_tracenext =
252 					    fbt->fbtp_tracenext;
253 					goto free;
254 				}
255 			}
256 		}
257 	}
258 	panic("probe %p not found in hash table", fbt);
259 free:
260 	free(fbt, M_FBT);
261 }
262 
263 static void
264 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
265 {
266 	fbt_probe_t *fbt = parg, *next;
267 	modctl_t *ctl;
268 
269 	do {
270 		ctl = fbt->fbtp_ctl;
271 		ctl->fbt_nentries--;
272 
273 		next = fbt->fbtp_probenext;
274 		fbt_destroy_one(fbt);
275 		fbt = next;
276 	} while (fbt != NULL);
277 }
278 
279 static void
280 fbt_enable(void *arg, dtrace_id_t id, void *parg)
281 {
282 	fbt_probe_t *fbt = parg;
283 	modctl_t *ctl = fbt->fbtp_ctl;
284 
285 	ctl->nenabled++;
286 
287 	/*
288 	 * Now check that our modctl has the expected load count.  If it
289 	 * doesn't, this module must have been unloaded and reloaded -- and
290 	 * we're not going to touch it.
291 	 */
292 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
293 		if (fbt_verbose) {
294 			printf("fbt is failing for probe %s "
295 			    "(module %s reloaded)",
296 			    fbt->fbtp_name, ctl->filename);
297 		}
298 
299 		return;
300 	}
301 
302 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
303 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
304 		fbt->fbtp_enabled++;
305 	}
306 }
307 
308 static void
309 fbt_disable(void *arg, dtrace_id_t id, void *parg)
310 {
311 	fbt_probe_t *fbt = parg, *hash;
312 	modctl_t *ctl = fbt->fbtp_ctl;
313 
314 	ASSERT(ctl->nenabled > 0);
315 	ctl->nenabled--;
316 
317 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
318 		return;
319 
320 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
321 		fbt->fbtp_enabled--;
322 
323 		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
324 		    hash != NULL; hash = hash->fbtp_hashnext) {
325 			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
326 				for (; hash != NULL; hash = hash->fbtp_tracenext)
327 					if (hash->fbtp_enabled > 0)
328 						break;
329 				break;
330 			}
331 		}
332 		if (hash == NULL)
333 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
334 	}
335 }
336 
337 static void
338 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
339 {
340 	fbt_probe_t *fbt = parg;
341 	modctl_t *ctl = fbt->fbtp_ctl;
342 
343 	ASSERT(ctl->nenabled > 0);
344 
345 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
346 		return;
347 
348 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
349 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
350 }
351 
352 static void
353 fbt_resume(void *arg, dtrace_id_t id, void *parg)
354 {
355 	fbt_probe_t *fbt = parg;
356 	modctl_t *ctl = fbt->fbtp_ctl;
357 
358 	ASSERT(ctl->nenabled > 0);
359 
360 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
361 		return;
362 
363 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
364 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
365 }
366 
367 static int
368 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
369 {
370 	const Elf_Sym *symp = lc->symtab;
371 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
372 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
373 	int i;
374 	uint32_t *ctfoff;
375 	uint32_t objtoff = hp->cth_objtoff;
376 	uint32_t funcoff = hp->cth_funcoff;
377 	ushort_t info;
378 	ushort_t vlen;
379 
380 	/* Sanity check. */
381 	if (hp->cth_magic != CTF_MAGIC) {
382 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
383 		return (EINVAL);
384 	}
385 
386 	if (lc->symtab == NULL) {
387 		printf("No symbol table in '%s'\n",lf->pathname);
388 		return (EINVAL);
389 	}
390 
391 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
392 	*lc->ctfoffp = ctfoff;
393 
394 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
395 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
396 			*ctfoff = 0xffffffff;
397 			continue;
398 		}
399 
400 		switch (ELF_ST_TYPE(symp->st_info)) {
401 		case STT_OBJECT:
402 			if (objtoff >= hp->cth_funcoff ||
403                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
404 				*ctfoff = 0xffffffff;
405                                 break;
406                         }
407 
408                         *ctfoff = objtoff;
409                         objtoff += sizeof (ushort_t);
410 			break;
411 
412 		case STT_FUNC:
413 			if (funcoff >= hp->cth_typeoff) {
414 				*ctfoff = 0xffffffff;
415 				break;
416 			}
417 
418 			*ctfoff = funcoff;
419 
420 			info = *((const ushort_t *)(ctfdata + funcoff));
421 			vlen = CTF_INFO_VLEN(info);
422 
423 			/*
424 			 * If we encounter a zero pad at the end, just skip it.
425 			 * Otherwise skip over the function and its return type
426 			 * (+2) and the argument list (vlen).
427 			 */
428 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
429 				funcoff += sizeof (ushort_t); /* skip pad */
430 			else
431 				funcoff += sizeof (ushort_t) * (vlen + 2);
432 			break;
433 
434 		default:
435 			*ctfoff = 0xffffffff;
436 			break;
437 		}
438 	}
439 
440 	return (0);
441 }
442 
443 static ssize_t
444 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
445     ssize_t *incrementp)
446 {
447 	ssize_t size, increment;
448 
449 	if (version > CTF_VERSION_1 &&
450 	    tp->ctt_size == CTF_LSIZE_SENT) {
451 		size = CTF_TYPE_LSIZE(tp);
452 		increment = sizeof (ctf_type_t);
453 	} else {
454 		size = tp->ctt_size;
455 		increment = sizeof (ctf_stype_t);
456 	}
457 
458 	if (sizep)
459 		*sizep = size;
460 	if (incrementp)
461 		*incrementp = increment;
462 
463 	return (size);
464 }
465 
466 static int
467 fbt_typoff_init(linker_ctf_t *lc)
468 {
469 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
470 	const ctf_type_t *tbuf;
471 	const ctf_type_t *tend;
472 	const ctf_type_t *tp;
473 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
474 	int ctf_typemax = 0;
475 	uint32_t *xp;
476 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
477 	uint8_t version;
478 
479 
480 	/* Sanity check. */
481 	if (hp->cth_magic != CTF_MAGIC)
482 		return (EINVAL);
483 
484 	version = hp->cth_version;
485 
486 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
487 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
488 
489 	/*
490 	 * We make two passes through the entire type section.  In this first
491 	 * pass, we count the number of each type and the total number of types.
492 	 */
493 	for (tp = tbuf; tp < tend; ctf_typemax++) {
494 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
495 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
496 		ssize_t size, increment;
497 
498 		size_t vbytes;
499 
500 		(void) fbt_get_ctt_size(version, tp, &size, &increment);
501 
502 		switch (kind) {
503 		case CTF_K_INTEGER:
504 		case CTF_K_FLOAT:
505 			vbytes = sizeof (uint_t);
506 			break;
507 		case CTF_K_ARRAY:
508 			vbytes = sizeof (ctf_array_t);
509 			break;
510 		case CTF_K_FUNCTION:
511 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
512 			break;
513 		case CTF_K_STRUCT:
514 		case CTF_K_UNION:
515 			if (size < CTF_LSTRUCT_THRESH)
516 				vbytes = sizeof (ctf_member_t) * vlen;
517 			else
518 				vbytes = sizeof (ctf_lmember_t) * vlen;
519 			break;
520 		case CTF_K_ENUM:
521 			vbytes = sizeof (ctf_enum_t) * vlen;
522 			break;
523 		case CTF_K_FORWARD:
524 			/*
525 			 * For forward declarations, ctt_type is the CTF_K_*
526 			 * kind for the tag, so bump that population count too.
527 			 * If ctt_type is unknown, treat the tag as a struct.
528 			 */
529 			if (tp->ctt_type == CTF_K_UNKNOWN ||
530 			    tp->ctt_type >= CTF_K_MAX)
531 				pop[CTF_K_STRUCT]++;
532 			else
533 				pop[tp->ctt_type]++;
534 			/*FALLTHRU*/
535 		case CTF_K_UNKNOWN:
536 			vbytes = 0;
537 			break;
538 		case CTF_K_POINTER:
539 		case CTF_K_TYPEDEF:
540 		case CTF_K_VOLATILE:
541 		case CTF_K_CONST:
542 		case CTF_K_RESTRICT:
543 			vbytes = 0;
544 			break;
545 		default:
546 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
547 			return (EIO);
548 		}
549 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
550 		pop[kind]++;
551 	}
552 
553 	/* account for a sentinel value below */
554 	ctf_typemax++;
555 	*lc->typlenp = ctf_typemax;
556 
557 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
558 	    M_ZERO | M_WAITOK);
559 
560 	*lc->typoffp = xp;
561 
562 	/* type id 0 is used as a sentinel value */
563 	*xp++ = 0;
564 
565 	/*
566 	 * In the second pass, fill in the type offset.
567 	 */
568 	for (tp = tbuf; tp < tend; xp++) {
569 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
570 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
571 		ssize_t size, increment;
572 
573 		size_t vbytes;
574 
575 		(void) fbt_get_ctt_size(version, tp, &size, &increment);
576 
577 		switch (kind) {
578 		case CTF_K_INTEGER:
579 		case CTF_K_FLOAT:
580 			vbytes = sizeof (uint_t);
581 			break;
582 		case CTF_K_ARRAY:
583 			vbytes = sizeof (ctf_array_t);
584 			break;
585 		case CTF_K_FUNCTION:
586 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
587 			break;
588 		case CTF_K_STRUCT:
589 		case CTF_K_UNION:
590 			if (size < CTF_LSTRUCT_THRESH)
591 				vbytes = sizeof (ctf_member_t) * vlen;
592 			else
593 				vbytes = sizeof (ctf_lmember_t) * vlen;
594 			break;
595 		case CTF_K_ENUM:
596 			vbytes = sizeof (ctf_enum_t) * vlen;
597 			break;
598 		case CTF_K_FORWARD:
599 		case CTF_K_UNKNOWN:
600 			vbytes = 0;
601 			break;
602 		case CTF_K_POINTER:
603 		case CTF_K_TYPEDEF:
604 		case CTF_K_VOLATILE:
605 		case CTF_K_CONST:
606 		case CTF_K_RESTRICT:
607 			vbytes = 0;
608 			break;
609 		default:
610 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
611 			return (EIO);
612 		}
613 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
614 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
615 	}
616 
617 	return (0);
618 }
619 
620 /*
621  * CTF Declaration Stack
622  *
623  * In order to implement ctf_type_name(), we must convert a type graph back
624  * into a C type declaration.  Unfortunately, a type graph represents a storage
625  * class ordering of the type whereas a type declaration must obey the C rules
626  * for operator precedence, and the two orderings are frequently in conflict.
627  * For example, consider these CTF type graphs and their C declarations:
628  *
629  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
630  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
631  *
632  * In each case, parentheses are used to raise operator * to higher lexical
633  * precedence, so the string form of the C declaration cannot be constructed by
634  * walking the type graph links and forming the string from left to right.
635  *
636  * The functions in this file build a set of stacks from the type graph nodes
637  * corresponding to the C operator precedence levels in the appropriate order.
638  * The code in ctf_type_name() can then iterate over the levels and nodes in
639  * lexical precedence order and construct the final C declaration string.
640  */
641 typedef struct ctf_list {
642 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
643 	struct ctf_list *l_next; /* next pointer or head pointer */
644 } ctf_list_t;
645 
646 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
647 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
648 
649 typedef enum {
650 	CTF_PREC_BASE,
651 	CTF_PREC_POINTER,
652 	CTF_PREC_ARRAY,
653 	CTF_PREC_FUNCTION,
654 	CTF_PREC_MAX
655 } ctf_decl_prec_t;
656 
657 typedef struct ctf_decl_node {
658 	ctf_list_t cd_list;			/* linked list pointers */
659 	ctf_id_t cd_type;			/* type identifier */
660 	uint_t cd_kind;				/* type kind */
661 	uint_t cd_n;				/* type dimension if array */
662 } ctf_decl_node_t;
663 
664 typedef struct ctf_decl {
665 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
666 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
667 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
668 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
669 	char *cd_buf;				/* buffer for output */
670 	char *cd_ptr;				/* buffer location */
671 	char *cd_end;				/* buffer limit */
672 	size_t cd_len;				/* buffer space required */
673 	int cd_err;				/* saved error value */
674 } ctf_decl_t;
675 
676 /*
677  * Simple doubly-linked list append routine.  This implementation assumes that
678  * each list element contains an embedded ctf_list_t as the first member.
679  * An additional ctf_list_t is used to store the head (l_next) and tail
680  * (l_prev) pointers.  The current head and tail list elements have their
681  * previous and next pointers set to NULL, respectively.
682  */
683 static void
684 ctf_list_append(ctf_list_t *lp, void *new)
685 {
686 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
687 	ctf_list_t *q = new;		/* q = new list element */
688 
689 	lp->l_prev = q;
690 	q->l_prev = p;
691 	q->l_next = NULL;
692 
693 	if (p != NULL)
694 		p->l_next = q;
695 	else
696 		lp->l_next = q;
697 }
698 
699 /*
700  * Prepend the specified existing element to the given ctf_list_t.  The
701  * existing pointer should be pointing at a struct with embedded ctf_list_t.
702  */
703 static void
704 ctf_list_prepend(ctf_list_t *lp, void *new)
705 {
706 	ctf_list_t *p = new;		/* p = new list element */
707 	ctf_list_t *q = lp->l_next;	/* q = head list element */
708 
709 	lp->l_next = p;
710 	p->l_prev = NULL;
711 	p->l_next = q;
712 
713 	if (q != NULL)
714 		q->l_prev = p;
715 	else
716 		lp->l_prev = p;
717 }
718 
719 static void
720 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
721 {
722 	int i;
723 
724 	bzero(cd, sizeof (ctf_decl_t));
725 
726 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
727 		cd->cd_order[i] = CTF_PREC_BASE - 1;
728 
729 	cd->cd_qualp = CTF_PREC_BASE;
730 	cd->cd_ordp = CTF_PREC_BASE;
731 
732 	cd->cd_buf = buf;
733 	cd->cd_ptr = buf;
734 	cd->cd_end = buf + len;
735 }
736 
737 static void
738 ctf_decl_fini(ctf_decl_t *cd)
739 {
740 	ctf_decl_node_t *cdp, *ndp;
741 	int i;
742 
743 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
744 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
745 		    cdp != NULL; cdp = ndp) {
746 			ndp = ctf_list_next(cdp);
747 			free(cdp, M_FBT);
748 		}
749 	}
750 }
751 
752 static const ctf_type_t *
753 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
754 {
755 	const ctf_type_t *tp;
756 	uint32_t offset;
757 	uint32_t *typoff = *lc->typoffp;
758 
759 	if (type >= *lc->typlenp) {
760 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
761 		return(NULL);
762 	}
763 
764 	/* Check if the type isn't cross-referenced. */
765 	if ((offset = typoff[type]) == 0) {
766 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
767 		return(NULL);
768 	}
769 
770 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
771 
772 	return (tp);
773 }
774 
775 static void
776 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
777 {
778 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
779 	const ctf_type_t *tp;
780 	const ctf_array_t *ap;
781 	ssize_t increment;
782 
783 	bzero(arp, sizeof(*arp));
784 
785 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
786 		return;
787 
788 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
789 		return;
790 
791 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
792 
793 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
794 	arp->ctr_contents = ap->cta_contents;
795 	arp->ctr_index = ap->cta_index;
796 	arp->ctr_nelems = ap->cta_nelems;
797 }
798 
799 static const char *
800 ctf_strptr(linker_ctf_t *lc, int name)
801 {
802 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
803 	const char *strp = "";
804 
805 	if (name < 0 || name >= hp->cth_strlen)
806 		return(strp);
807 
808 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
809 
810 	return (strp);
811 }
812 
813 static void
814 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
815 {
816 	ctf_decl_node_t *cdp;
817 	ctf_decl_prec_t prec;
818 	uint_t kind, n = 1;
819 	int is_qual = 0;
820 
821 	const ctf_type_t *tp;
822 	ctf_arinfo_t ar;
823 
824 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
825 		cd->cd_err = ENOENT;
826 		return;
827 	}
828 
829 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
830 	case CTF_K_ARRAY:
831 		fbt_array_info(lc, type, &ar);
832 		ctf_decl_push(cd, lc, ar.ctr_contents);
833 		n = ar.ctr_nelems;
834 		prec = CTF_PREC_ARRAY;
835 		break;
836 
837 	case CTF_K_TYPEDEF:
838 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
839 			ctf_decl_push(cd, lc, tp->ctt_type);
840 			return;
841 		}
842 		prec = CTF_PREC_BASE;
843 		break;
844 
845 	case CTF_K_FUNCTION:
846 		ctf_decl_push(cd, lc, tp->ctt_type);
847 		prec = CTF_PREC_FUNCTION;
848 		break;
849 
850 	case CTF_K_POINTER:
851 		ctf_decl_push(cd, lc, tp->ctt_type);
852 		prec = CTF_PREC_POINTER;
853 		break;
854 
855 	case CTF_K_VOLATILE:
856 	case CTF_K_CONST:
857 	case CTF_K_RESTRICT:
858 		ctf_decl_push(cd, lc, tp->ctt_type);
859 		prec = cd->cd_qualp;
860 		is_qual++;
861 		break;
862 
863 	default:
864 		prec = CTF_PREC_BASE;
865 	}
866 
867 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
868 	cdp->cd_type = type;
869 	cdp->cd_kind = kind;
870 	cdp->cd_n = n;
871 
872 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
873 		cd->cd_order[prec] = cd->cd_ordp++;
874 
875 	/*
876 	 * Reset cd_qualp to the highest precedence level that we've seen so
877 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
878 	 */
879 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
880 		cd->cd_qualp = prec;
881 
882 	/*
883 	 * C array declarators are ordered inside out so prepend them.  Also by
884 	 * convention qualifiers of base types precede the type specifier (e.g.
885 	 * const int vs. int const) even though the two forms are equivalent.
886 	 */
887 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
888 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
889 	else
890 		ctf_list_append(&cd->cd_nodes[prec], cdp);
891 }
892 
893 static void
894 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
895 {
896 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
897 	va_list ap;
898 	size_t n;
899 
900 	va_start(ap, format);
901 	n = vsnprintf(cd->cd_ptr, len, format, ap);
902 	va_end(ap);
903 
904 	cd->cd_ptr += MIN(n, len);
905 	cd->cd_len += n;
906 }
907 
908 static ssize_t
909 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
910 {
911 	ctf_decl_t cd;
912 	ctf_decl_node_t *cdp;
913 	ctf_decl_prec_t prec, lp, rp;
914 	int ptr, arr;
915 	uint_t k;
916 
917 	if (lc == NULL && type == CTF_ERR)
918 		return (-1); /* simplify caller code by permitting CTF_ERR */
919 
920 	ctf_decl_init(&cd, buf, len);
921 	ctf_decl_push(&cd, lc, type);
922 
923 	if (cd.cd_err != 0) {
924 		ctf_decl_fini(&cd);
925 		return (-1);
926 	}
927 
928 	/*
929 	 * If the type graph's order conflicts with lexical precedence order
930 	 * for pointers or arrays, then we need to surround the declarations at
931 	 * the corresponding lexical precedence with parentheses.  This can
932 	 * result in either a parenthesized pointer (*) as in int (*)() or
933 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
934 	 */
935 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
936 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
937 
938 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
939 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
940 
941 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
942 
943 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
944 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
945 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
946 
947 			const ctf_type_t *tp =
948 			    ctf_lookup_by_id(lc, cdp->cd_type);
949 			const char *name = ctf_strptr(lc, tp->ctt_name);
950 
951 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
952 				ctf_decl_sprintf(&cd, " ");
953 
954 			if (lp == prec) {
955 				ctf_decl_sprintf(&cd, "(");
956 				lp = -1;
957 			}
958 
959 			switch (cdp->cd_kind) {
960 			case CTF_K_INTEGER:
961 			case CTF_K_FLOAT:
962 			case CTF_K_TYPEDEF:
963 				ctf_decl_sprintf(&cd, "%s", name);
964 				break;
965 			case CTF_K_POINTER:
966 				ctf_decl_sprintf(&cd, "*");
967 				break;
968 			case CTF_K_ARRAY:
969 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
970 				break;
971 			case CTF_K_FUNCTION:
972 				ctf_decl_sprintf(&cd, "()");
973 				break;
974 			case CTF_K_STRUCT:
975 			case CTF_K_FORWARD:
976 				ctf_decl_sprintf(&cd, "struct %s", name);
977 				break;
978 			case CTF_K_UNION:
979 				ctf_decl_sprintf(&cd, "union %s", name);
980 				break;
981 			case CTF_K_ENUM:
982 				ctf_decl_sprintf(&cd, "enum %s", name);
983 				break;
984 			case CTF_K_VOLATILE:
985 				ctf_decl_sprintf(&cd, "volatile");
986 				break;
987 			case CTF_K_CONST:
988 				ctf_decl_sprintf(&cd, "const");
989 				break;
990 			case CTF_K_RESTRICT:
991 				ctf_decl_sprintf(&cd, "restrict");
992 				break;
993 			}
994 
995 			k = cdp->cd_kind;
996 		}
997 
998 		if (rp == prec)
999 			ctf_decl_sprintf(&cd, ")");
1000 	}
1001 
1002 	ctf_decl_fini(&cd);
1003 	return (cd.cd_len);
1004 }
1005 
1006 static void
1007 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1008 {
1009 	const ushort_t *dp;
1010 	fbt_probe_t *fbt = parg;
1011 	linker_ctf_t lc;
1012 	modctl_t *ctl = fbt->fbtp_ctl;
1013 	int ndx = desc->dtargd_ndx;
1014 	int symindx = fbt->fbtp_symindx;
1015 	uint32_t *ctfoff;
1016 	uint32_t offset;
1017 	ushort_t info, kind, n;
1018 
1019 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1020 		(void) strcpy(desc->dtargd_native, "int");
1021 		return;
1022 	}
1023 
1024 	desc->dtargd_ndx = DTRACE_ARGNONE;
1025 
1026 	/* Get a pointer to the CTF data and it's length. */
1027 	if (linker_ctf_get(ctl, &lc) != 0)
1028 		/* No CTF data? Something wrong? *shrug* */
1029 		return;
1030 
1031 	/* Check if this module hasn't been initialised yet. */
1032 	if (*lc.ctfoffp == NULL) {
1033 		/*
1034 		 * Initialise the CTF object and function symindx to
1035 		 * byte offset array.
1036 		 */
1037 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1038 			return;
1039 
1040 		/* Initialise the CTF type to byte offset array. */
1041 		if (fbt_typoff_init(&lc) != 0)
1042 			return;
1043 	}
1044 
1045 	ctfoff = *lc.ctfoffp;
1046 
1047 	if (ctfoff == NULL || *lc.typoffp == NULL)
1048 		return;
1049 
1050 	/* Check if the symbol index is out of range. */
1051 	if (symindx >= lc.nsym)
1052 		return;
1053 
1054 	/* Check if the symbol isn't cross-referenced. */
1055 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1056 		return;
1057 
1058 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1059 
1060 	info = *dp++;
1061 	kind = CTF_INFO_KIND(info);
1062 	n = CTF_INFO_VLEN(info);
1063 
1064 	if (kind == CTF_K_UNKNOWN && n == 0) {
1065 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1066 		return;
1067 	}
1068 
1069 	if (kind != CTF_K_FUNCTION) {
1070 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1071 		return;
1072 	}
1073 
1074 	if (fbt->fbtp_roffset != 0) {
1075 		/* Only return type is available for args[1] in return probe. */
1076 		if (ndx > 1)
1077 			return;
1078 		ASSERT(ndx == 1);
1079 	} else {
1080 		/* Check if the requested argument doesn't exist. */
1081 		if (ndx >= n)
1082 			return;
1083 
1084 		/* Skip the return type and arguments up to the one requested. */
1085 		dp += ndx + 1;
1086 	}
1087 
1088 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1089 		desc->dtargd_ndx = ndx;
1090 
1091 	return;
1092 }
1093 
1094 static int
1095 fbt_linker_file_cb(linker_file_t lf, void *arg)
1096 {
1097 
1098 	fbt_provide_module(arg, lf);
1099 
1100 	return (0);
1101 }
1102 
1103 static void
1104 fbt_load(void *dummy)
1105 {
1106 	/* Create the /dev/dtrace/fbt entry. */
1107 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1108 	    "dtrace/fbt");
1109 
1110 	/* Default the probe table size if not specified. */
1111 	if (fbt_probetab_size == 0)
1112 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1113 
1114 	/* Choose the hash mask for the probe table. */
1115 	fbt_probetab_mask = fbt_probetab_size - 1;
1116 
1117 	/* Allocate memory for the probe table. */
1118 	fbt_probetab =
1119 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1120 
1121 	dtrace_doubletrap_func = fbt_doubletrap;
1122 	dtrace_invop_add(fbt_invop);
1123 
1124 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1125 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1126 		return;
1127 
1128 	/* Create probes for the kernel and already-loaded modules. */
1129 	linker_file_foreach(fbt_linker_file_cb, NULL);
1130 }
1131 
1132 static int
1133 fbt_unload()
1134 {
1135 	int error = 0;
1136 
1137 	/* De-register the invalid opcode handler. */
1138 	dtrace_invop_remove(fbt_invop);
1139 
1140 	dtrace_doubletrap_func = NULL;
1141 
1142 	/* De-register this DTrace provider. */
1143 	if ((error = dtrace_unregister(fbt_id)) != 0)
1144 		return (error);
1145 
1146 	/* Free the probe table. */
1147 	free(fbt_probetab, M_FBT);
1148 	fbt_probetab = NULL;
1149 	fbt_probetab_mask = 0;
1150 
1151 	destroy_dev(fbt_cdev);
1152 
1153 	return (error);
1154 }
1155 
1156 static int
1157 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1158 {
1159 	int error = 0;
1160 
1161 	switch (type) {
1162 	case MOD_LOAD:
1163 		break;
1164 
1165 	case MOD_UNLOAD:
1166 		break;
1167 
1168 	case MOD_SHUTDOWN:
1169 		break;
1170 
1171 	default:
1172 		error = EOPNOTSUPP;
1173 		break;
1174 
1175 	}
1176 
1177 	return (error);
1178 }
1179 
1180 static int
1181 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1182 {
1183 	return (0);
1184 }
1185 
1186 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1187 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1188 
1189 DEV_MODULE(fbt, fbt_modevent, NULL);
1190 MODULE_VERSION(fbt, 1);
1191 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1192 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1193