xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision f2d48b5e2c3b45850585e4d7aee324fe148afbf2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/endian.h>
38 #include <sys/fcntl.h>
39 #include <sys/filio.h>
40 #include <sys/kdb.h>
41 #include <sys/kernel.h>
42 #include <sys/kmem.h>
43 #include <sys/kthread.h>
44 #include <sys/limits.h>
45 #include <sys/linker.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/pcpu.h>
51 #include <sys/poll.h>
52 #include <sys/proc.h>
53 #include <sys/selinfo.h>
54 #include <sys/smp.h>
55 #include <sys/syscall.h>
56 #include <sys/sysent.h>
57 #include <sys/sysproto.h>
58 #include <sys/uio.h>
59 #include <sys/unistd.h>
60 #include <machine/stdarg.h>
61 
62 #include <sys/dtrace.h>
63 #include <sys/dtrace_bsd.h>
64 
65 #include "fbt.h"
66 
67 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
68 
69 dtrace_provider_id_t	fbt_id;
70 fbt_probe_t		**fbt_probetab;
71 int			fbt_probetab_mask;
72 
73 static d_open_t	fbt_open;
74 static int	fbt_unload(void);
75 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
76 static void	fbt_provide_module(void *, modctl_t *);
77 static void	fbt_destroy(void *, dtrace_id_t, void *);
78 static void	fbt_enable(void *, dtrace_id_t, void *);
79 static void	fbt_disable(void *, dtrace_id_t, void *);
80 static void	fbt_load(void *);
81 static void	fbt_suspend(void *, dtrace_id_t, void *);
82 static void	fbt_resume(void *, dtrace_id_t, void *);
83 
84 static struct cdevsw fbt_cdevsw = {
85 	.d_version	= D_VERSION,
86 	.d_open		= fbt_open,
87 	.d_name		= "fbt",
88 };
89 
90 static dtrace_pattr_t fbt_attr = {
91 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
93 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
94 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
95 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
96 };
97 
98 static dtrace_pops_t fbt_pops = {
99 	.dtps_provide =		NULL,
100 	.dtps_provide_module =	fbt_provide_module,
101 	.dtps_enable =		fbt_enable,
102 	.dtps_disable =		fbt_disable,
103 	.dtps_suspend =		fbt_suspend,
104 	.dtps_resume =		fbt_resume,
105 	.dtps_getargdesc =	fbt_getargdesc,
106 	.dtps_getargval =	NULL,
107 	.dtps_usermode =	NULL,
108 	.dtps_destroy =		fbt_destroy
109 };
110 
111 static struct cdev		*fbt_cdev;
112 static int			fbt_probetab_size;
113 static int			fbt_verbose = 0;
114 
115 int
116 fbt_excluded(const char *name)
117 {
118 
119 	if (strncmp(name, "dtrace_", 7) == 0 &&
120 	    strncmp(name, "dtrace_safe_", 12) != 0) {
121 		/*
122 		 * Anything beginning with "dtrace_" may be called
123 		 * from probe context unless it explicitly indicates
124 		 * that it won't be called from probe context by
125 		 * using the prefix "dtrace_safe_".
126 		 */
127 		return (1);
128 	}
129 
130 	/*
131 	 * Lock owner methods may be called from probe context.
132 	 */
133 	if (strcmp(name, "owner_mtx") == 0 ||
134 	    strcmp(name, "owner_rm") == 0 ||
135 	    strcmp(name, "owner_rw") == 0 ||
136 	    strcmp(name, "owner_sx") == 0)
137 		return (1);
138 
139 	/*
140 	 * Stack unwinders may be called from probe context on some
141 	 * platforms.
142 	 */
143 #if defined(__aarch64__) || defined(__riscv)
144 	if (strcmp(name, "unwind_frame") == 0)
145 		return (1);
146 #endif
147 
148 	/*
149 	 * When DTrace is built into the kernel we need to exclude
150 	 * the FBT functions from instrumentation.
151 	 */
152 #ifndef _KLD_MODULE
153 	if (strncmp(name, "fbt_", 4) == 0)
154 		return (1);
155 #endif
156 
157 	return (0);
158 }
159 
160 static void
161 fbt_doubletrap(void)
162 {
163 	fbt_probe_t *fbt;
164 	int i;
165 
166 	for (i = 0; i < fbt_probetab_size; i++) {
167 		fbt = fbt_probetab[i];
168 
169 		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
170 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
171 	}
172 }
173 
174 static void
175 fbt_provide_module(void *arg, modctl_t *lf)
176 {
177 	char modname[MAXPATHLEN];
178 	int i;
179 	size_t len;
180 
181 	strlcpy(modname, lf->filename, sizeof(modname));
182 	len = strlen(modname);
183 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
184 		modname[len - 3] = '\0';
185 
186 	/*
187 	 * Employees of dtrace and their families are ineligible.  Void
188 	 * where prohibited.
189 	 */
190 	if (strcmp(modname, "dtrace") == 0)
191 		return;
192 
193 	/*
194 	 * To register with DTrace, a module must list 'dtrace' as a
195 	 * dependency in order for the kernel linker to resolve
196 	 * symbols like dtrace_register(). All modules with such a
197 	 * dependency are ineligible for FBT tracing.
198 	 */
199 	for (i = 0; i < lf->ndeps; i++)
200 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
201 			return;
202 
203 	if (lf->fbt_nentries) {
204 		/*
205 		 * This module has some FBT entries allocated; we're afraid
206 		 * to screw with it.
207 		 */
208 		return;
209 	}
210 
211 	/*
212 	 * List the functions in the module and the symbol values.
213 	 */
214 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
215 }
216 
217 static void
218 fbt_destroy_one(fbt_probe_t *fbt)
219 {
220 	fbt_probe_t *hash, *hashprev, *next;
221 	int ndx;
222 
223 	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
224 	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
225 	    hashprev = hash, hash = hash->fbtp_hashnext) {
226 		if (hash == fbt) {
227 			if ((next = fbt->fbtp_tracenext) != NULL)
228 				next->fbtp_hashnext = hash->fbtp_hashnext;
229 			else
230 				next = hash->fbtp_hashnext;
231 			if (hashprev != NULL)
232 				hashprev->fbtp_hashnext = next;
233 			else
234 				fbt_probetab[ndx] = next;
235 			goto free;
236 		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
237 			for (next = hash; next->fbtp_tracenext != NULL;
238 			    next = next->fbtp_tracenext) {
239 				if (fbt == next->fbtp_tracenext) {
240 					next->fbtp_tracenext =
241 					    fbt->fbtp_tracenext;
242 					goto free;
243 				}
244 			}
245 		}
246 	}
247 	panic("probe %p not found in hash table", fbt);
248 free:
249 	free(fbt, M_FBT);
250 }
251 
252 static void
253 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
254 {
255 	fbt_probe_t *fbt = parg, *next;
256 	modctl_t *ctl;
257 
258 	do {
259 		ctl = fbt->fbtp_ctl;
260 		ctl->fbt_nentries--;
261 
262 		next = fbt->fbtp_probenext;
263 		fbt_destroy_one(fbt);
264 		fbt = next;
265 	} while (fbt != NULL);
266 }
267 
268 static void
269 fbt_enable(void *arg, dtrace_id_t id, void *parg)
270 {
271 	fbt_probe_t *fbt = parg;
272 	modctl_t *ctl = fbt->fbtp_ctl;
273 
274 	ctl->nenabled++;
275 
276 	/*
277 	 * Now check that our modctl has the expected load count.  If it
278 	 * doesn't, this module must have been unloaded and reloaded -- and
279 	 * we're not going to touch it.
280 	 */
281 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
282 		if (fbt_verbose) {
283 			printf("fbt is failing for probe %s "
284 			    "(module %s reloaded)",
285 			    fbt->fbtp_name, ctl->filename);
286 		}
287 
288 		return;
289 	}
290 
291 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
292 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
293 		fbt->fbtp_enabled++;
294 	}
295 }
296 
297 static void
298 fbt_disable(void *arg, dtrace_id_t id, void *parg)
299 {
300 	fbt_probe_t *fbt = parg, *hash;
301 	modctl_t *ctl = fbt->fbtp_ctl;
302 
303 	ASSERT(ctl->nenabled > 0);
304 	ctl->nenabled--;
305 
306 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
307 		return;
308 
309 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
310 		fbt->fbtp_enabled--;
311 
312 		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
313 		    hash != NULL; hash = hash->fbtp_hashnext) {
314 			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
315 				for (; hash != NULL; hash = hash->fbtp_tracenext)
316 					if (hash->fbtp_enabled > 0)
317 						break;
318 				break;
319 			}
320 		}
321 		if (hash == NULL)
322 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
323 	}
324 }
325 
326 static void
327 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
328 {
329 	fbt_probe_t *fbt = parg;
330 	modctl_t *ctl = fbt->fbtp_ctl;
331 
332 	ASSERT(ctl->nenabled > 0);
333 
334 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
335 		return;
336 
337 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
338 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
339 }
340 
341 static void
342 fbt_resume(void *arg, dtrace_id_t id, void *parg)
343 {
344 	fbt_probe_t *fbt = parg;
345 	modctl_t *ctl = fbt->fbtp_ctl;
346 
347 	ASSERT(ctl->nenabled > 0);
348 
349 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
350 		return;
351 
352 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
353 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
354 }
355 
356 static int
357 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
358 {
359 	const Elf_Sym *symp = lc->symtab;;
360 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
361 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
362 	int i;
363 	uint32_t *ctfoff;
364 	uint32_t objtoff = hp->cth_objtoff;
365 	uint32_t funcoff = hp->cth_funcoff;
366 	ushort_t info;
367 	ushort_t vlen;
368 
369 	/* Sanity check. */
370 	if (hp->cth_magic != CTF_MAGIC) {
371 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
372 		return (EINVAL);
373 	}
374 
375 	if (lc->symtab == NULL) {
376 		printf("No symbol table in '%s'\n",lf->pathname);
377 		return (EINVAL);
378 	}
379 
380 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
381 	*lc->ctfoffp = ctfoff;
382 
383 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
384 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
385 			*ctfoff = 0xffffffff;
386 			continue;
387 		}
388 
389 		switch (ELF_ST_TYPE(symp->st_info)) {
390 		case STT_OBJECT:
391 			if (objtoff >= hp->cth_funcoff ||
392                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
393 				*ctfoff = 0xffffffff;
394                                 break;
395                         }
396 
397                         *ctfoff = objtoff;
398                         objtoff += sizeof (ushort_t);
399 			break;
400 
401 		case STT_FUNC:
402 			if (funcoff >= hp->cth_typeoff) {
403 				*ctfoff = 0xffffffff;
404 				break;
405 			}
406 
407 			*ctfoff = funcoff;
408 
409 			info = *((const ushort_t *)(ctfdata + funcoff));
410 			vlen = CTF_INFO_VLEN(info);
411 
412 			/*
413 			 * If we encounter a zero pad at the end, just skip it.
414 			 * Otherwise skip over the function and its return type
415 			 * (+2) and the argument list (vlen).
416 			 */
417 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
418 				funcoff += sizeof (ushort_t); /* skip pad */
419 			else
420 				funcoff += sizeof (ushort_t) * (vlen + 2);
421 			break;
422 
423 		default:
424 			*ctfoff = 0xffffffff;
425 			break;
426 		}
427 	}
428 
429 	return (0);
430 }
431 
432 static ssize_t
433 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
434     ssize_t *incrementp)
435 {
436 	ssize_t size, increment;
437 
438 	if (version > CTF_VERSION_1 &&
439 	    tp->ctt_size == CTF_LSIZE_SENT) {
440 		size = CTF_TYPE_LSIZE(tp);
441 		increment = sizeof (ctf_type_t);
442 	} else {
443 		size = tp->ctt_size;
444 		increment = sizeof (ctf_stype_t);
445 	}
446 
447 	if (sizep)
448 		*sizep = size;
449 	if (incrementp)
450 		*incrementp = increment;
451 
452 	return (size);
453 }
454 
455 static int
456 fbt_typoff_init(linker_ctf_t *lc)
457 {
458 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
459 	const ctf_type_t *tbuf;
460 	const ctf_type_t *tend;
461 	const ctf_type_t *tp;
462 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
463 	int ctf_typemax = 0;
464 	uint32_t *xp;
465 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
466 
467 
468 	/* Sanity check. */
469 	if (hp->cth_magic != CTF_MAGIC)
470 		return (EINVAL);
471 
472 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
473 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
474 
475 	int child = hp->cth_parname != 0;
476 
477 	/*
478 	 * We make two passes through the entire type section.  In this first
479 	 * pass, we count the number of each type and the total number of types.
480 	 */
481 	for (tp = tbuf; tp < tend; ctf_typemax++) {
482 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
483 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
484 		ssize_t size, increment;
485 
486 		size_t vbytes;
487 		uint_t n;
488 
489 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
490 
491 		switch (kind) {
492 		case CTF_K_INTEGER:
493 		case CTF_K_FLOAT:
494 			vbytes = sizeof (uint_t);
495 			break;
496 		case CTF_K_ARRAY:
497 			vbytes = sizeof (ctf_array_t);
498 			break;
499 		case CTF_K_FUNCTION:
500 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
501 			break;
502 		case CTF_K_STRUCT:
503 		case CTF_K_UNION:
504 			if (size < CTF_LSTRUCT_THRESH) {
505 				ctf_member_t *mp = (ctf_member_t *)
506 				    ((uintptr_t)tp + increment);
507 
508 				vbytes = sizeof (ctf_member_t) * vlen;
509 				for (n = vlen; n != 0; n--, mp++)
510 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
511 			} else {
512 				ctf_lmember_t *lmp = (ctf_lmember_t *)
513 				    ((uintptr_t)tp + increment);
514 
515 				vbytes = sizeof (ctf_lmember_t) * vlen;
516 				for (n = vlen; n != 0; n--, lmp++)
517 					child |=
518 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
519 			}
520 			break;
521 		case CTF_K_ENUM:
522 			vbytes = sizeof (ctf_enum_t) * vlen;
523 			break;
524 		case CTF_K_FORWARD:
525 			/*
526 			 * For forward declarations, ctt_type is the CTF_K_*
527 			 * kind for the tag, so bump that population count too.
528 			 * If ctt_type is unknown, treat the tag as a struct.
529 			 */
530 			if (tp->ctt_type == CTF_K_UNKNOWN ||
531 			    tp->ctt_type >= CTF_K_MAX)
532 				pop[CTF_K_STRUCT]++;
533 			else
534 				pop[tp->ctt_type]++;
535 			/*FALLTHRU*/
536 		case CTF_K_UNKNOWN:
537 			vbytes = 0;
538 			break;
539 		case CTF_K_POINTER:
540 		case CTF_K_TYPEDEF:
541 		case CTF_K_VOLATILE:
542 		case CTF_K_CONST:
543 		case CTF_K_RESTRICT:
544 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
545 			vbytes = 0;
546 			break;
547 		default:
548 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
549 			return (EIO);
550 		}
551 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
552 		pop[kind]++;
553 	}
554 
555 	/* account for a sentinel value below */
556 	ctf_typemax++;
557 	*lc->typlenp = ctf_typemax;
558 
559 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
560 	    M_ZERO | M_WAITOK);
561 
562 	*lc->typoffp = xp;
563 
564 	/* type id 0 is used as a sentinel value */
565 	*xp++ = 0;
566 
567 	/*
568 	 * In the second pass, fill in the type offset.
569 	 */
570 	for (tp = tbuf; tp < tend; xp++) {
571 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
572 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
573 		ssize_t size, increment;
574 
575 		size_t vbytes;
576 		uint_t n;
577 
578 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
579 
580 		switch (kind) {
581 		case CTF_K_INTEGER:
582 		case CTF_K_FLOAT:
583 			vbytes = sizeof (uint_t);
584 			break;
585 		case CTF_K_ARRAY:
586 			vbytes = sizeof (ctf_array_t);
587 			break;
588 		case CTF_K_FUNCTION:
589 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
590 			break;
591 		case CTF_K_STRUCT:
592 		case CTF_K_UNION:
593 			if (size < CTF_LSTRUCT_THRESH) {
594 				ctf_member_t *mp = (ctf_member_t *)
595 				    ((uintptr_t)tp + increment);
596 
597 				vbytes = sizeof (ctf_member_t) * vlen;
598 				for (n = vlen; n != 0; n--, mp++)
599 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
600 			} else {
601 				ctf_lmember_t *lmp = (ctf_lmember_t *)
602 				    ((uintptr_t)tp + increment);
603 
604 				vbytes = sizeof (ctf_lmember_t) * vlen;
605 				for (n = vlen; n != 0; n--, lmp++)
606 					child |=
607 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
608 			}
609 			break;
610 		case CTF_K_ENUM:
611 			vbytes = sizeof (ctf_enum_t) * vlen;
612 			break;
613 		case CTF_K_FORWARD:
614 		case CTF_K_UNKNOWN:
615 			vbytes = 0;
616 			break;
617 		case CTF_K_POINTER:
618 		case CTF_K_TYPEDEF:
619 		case CTF_K_VOLATILE:
620 		case CTF_K_CONST:
621 		case CTF_K_RESTRICT:
622 			vbytes = 0;
623 			break;
624 		default:
625 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
626 			return (EIO);
627 		}
628 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
629 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
630 	}
631 
632 	return (0);
633 }
634 
635 /*
636  * CTF Declaration Stack
637  *
638  * In order to implement ctf_type_name(), we must convert a type graph back
639  * into a C type declaration.  Unfortunately, a type graph represents a storage
640  * class ordering of the type whereas a type declaration must obey the C rules
641  * for operator precedence, and the two orderings are frequently in conflict.
642  * For example, consider these CTF type graphs and their C declarations:
643  *
644  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
645  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
646  *
647  * In each case, parentheses are used to raise operator * to higher lexical
648  * precedence, so the string form of the C declaration cannot be constructed by
649  * walking the type graph links and forming the string from left to right.
650  *
651  * The functions in this file build a set of stacks from the type graph nodes
652  * corresponding to the C operator precedence levels in the appropriate order.
653  * The code in ctf_type_name() can then iterate over the levels and nodes in
654  * lexical precedence order and construct the final C declaration string.
655  */
656 typedef struct ctf_list {
657 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
658 	struct ctf_list *l_next; /* next pointer or head pointer */
659 } ctf_list_t;
660 
661 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
662 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
663 
664 typedef enum {
665 	CTF_PREC_BASE,
666 	CTF_PREC_POINTER,
667 	CTF_PREC_ARRAY,
668 	CTF_PREC_FUNCTION,
669 	CTF_PREC_MAX
670 } ctf_decl_prec_t;
671 
672 typedef struct ctf_decl_node {
673 	ctf_list_t cd_list;			/* linked list pointers */
674 	ctf_id_t cd_type;			/* type identifier */
675 	uint_t cd_kind;				/* type kind */
676 	uint_t cd_n;				/* type dimension if array */
677 } ctf_decl_node_t;
678 
679 typedef struct ctf_decl {
680 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
681 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
682 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
683 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
684 	char *cd_buf;				/* buffer for output */
685 	char *cd_ptr;				/* buffer location */
686 	char *cd_end;				/* buffer limit */
687 	size_t cd_len;				/* buffer space required */
688 	int cd_err;				/* saved error value */
689 } ctf_decl_t;
690 
691 /*
692  * Simple doubly-linked list append routine.  This implementation assumes that
693  * each list element contains an embedded ctf_list_t as the first member.
694  * An additional ctf_list_t is used to store the head (l_next) and tail
695  * (l_prev) pointers.  The current head and tail list elements have their
696  * previous and next pointers set to NULL, respectively.
697  */
698 static void
699 ctf_list_append(ctf_list_t *lp, void *new)
700 {
701 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
702 	ctf_list_t *q = new;		/* q = new list element */
703 
704 	lp->l_prev = q;
705 	q->l_prev = p;
706 	q->l_next = NULL;
707 
708 	if (p != NULL)
709 		p->l_next = q;
710 	else
711 		lp->l_next = q;
712 }
713 
714 /*
715  * Prepend the specified existing element to the given ctf_list_t.  The
716  * existing pointer should be pointing at a struct with embedded ctf_list_t.
717  */
718 static void
719 ctf_list_prepend(ctf_list_t *lp, void *new)
720 {
721 	ctf_list_t *p = new;		/* p = new list element */
722 	ctf_list_t *q = lp->l_next;	/* q = head list element */
723 
724 	lp->l_next = p;
725 	p->l_prev = NULL;
726 	p->l_next = q;
727 
728 	if (q != NULL)
729 		q->l_prev = p;
730 	else
731 		lp->l_prev = p;
732 }
733 
734 static void
735 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
736 {
737 	int i;
738 
739 	bzero(cd, sizeof (ctf_decl_t));
740 
741 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
742 		cd->cd_order[i] = CTF_PREC_BASE - 1;
743 
744 	cd->cd_qualp = CTF_PREC_BASE;
745 	cd->cd_ordp = CTF_PREC_BASE;
746 
747 	cd->cd_buf = buf;
748 	cd->cd_ptr = buf;
749 	cd->cd_end = buf + len;
750 }
751 
752 static void
753 ctf_decl_fini(ctf_decl_t *cd)
754 {
755 	ctf_decl_node_t *cdp, *ndp;
756 	int i;
757 
758 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
759 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
760 		    cdp != NULL; cdp = ndp) {
761 			ndp = ctf_list_next(cdp);
762 			free(cdp, M_FBT);
763 		}
764 	}
765 }
766 
767 static const ctf_type_t *
768 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
769 {
770 	const ctf_type_t *tp;
771 	uint32_t offset;
772 	uint32_t *typoff = *lc->typoffp;
773 
774 	if (type >= *lc->typlenp) {
775 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
776 		return(NULL);
777 	}
778 
779 	/* Check if the type isn't cross-referenced. */
780 	if ((offset = typoff[type]) == 0) {
781 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
782 		return(NULL);
783 	}
784 
785 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
786 
787 	return (tp);
788 }
789 
790 static void
791 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
792 {
793 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
794 	const ctf_type_t *tp;
795 	const ctf_array_t *ap;
796 	ssize_t increment;
797 
798 	bzero(arp, sizeof(*arp));
799 
800 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
801 		return;
802 
803 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
804 		return;
805 
806 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
807 
808 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
809 	arp->ctr_contents = ap->cta_contents;
810 	arp->ctr_index = ap->cta_index;
811 	arp->ctr_nelems = ap->cta_nelems;
812 }
813 
814 static const char *
815 ctf_strptr(linker_ctf_t *lc, int name)
816 {
817 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
818 	const char *strp = "";
819 
820 	if (name < 0 || name >= hp->cth_strlen)
821 		return(strp);
822 
823 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
824 
825 	return (strp);
826 }
827 
828 static void
829 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
830 {
831 	ctf_decl_node_t *cdp;
832 	ctf_decl_prec_t prec;
833 	uint_t kind, n = 1;
834 	int is_qual = 0;
835 
836 	const ctf_type_t *tp;
837 	ctf_arinfo_t ar;
838 
839 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
840 		cd->cd_err = ENOENT;
841 		return;
842 	}
843 
844 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
845 	case CTF_K_ARRAY:
846 		fbt_array_info(lc, type, &ar);
847 		ctf_decl_push(cd, lc, ar.ctr_contents);
848 		n = ar.ctr_nelems;
849 		prec = CTF_PREC_ARRAY;
850 		break;
851 
852 	case CTF_K_TYPEDEF:
853 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
854 			ctf_decl_push(cd, lc, tp->ctt_type);
855 			return;
856 		}
857 		prec = CTF_PREC_BASE;
858 		break;
859 
860 	case CTF_K_FUNCTION:
861 		ctf_decl_push(cd, lc, tp->ctt_type);
862 		prec = CTF_PREC_FUNCTION;
863 		break;
864 
865 	case CTF_K_POINTER:
866 		ctf_decl_push(cd, lc, tp->ctt_type);
867 		prec = CTF_PREC_POINTER;
868 		break;
869 
870 	case CTF_K_VOLATILE:
871 	case CTF_K_CONST:
872 	case CTF_K_RESTRICT:
873 		ctf_decl_push(cd, lc, tp->ctt_type);
874 		prec = cd->cd_qualp;
875 		is_qual++;
876 		break;
877 
878 	default:
879 		prec = CTF_PREC_BASE;
880 	}
881 
882 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
883 	cdp->cd_type = type;
884 	cdp->cd_kind = kind;
885 	cdp->cd_n = n;
886 
887 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
888 		cd->cd_order[prec] = cd->cd_ordp++;
889 
890 	/*
891 	 * Reset cd_qualp to the highest precedence level that we've seen so
892 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
893 	 */
894 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
895 		cd->cd_qualp = prec;
896 
897 	/*
898 	 * C array declarators are ordered inside out so prepend them.  Also by
899 	 * convention qualifiers of base types precede the type specifier (e.g.
900 	 * const int vs. int const) even though the two forms are equivalent.
901 	 */
902 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
903 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
904 	else
905 		ctf_list_append(&cd->cd_nodes[prec], cdp);
906 }
907 
908 static void
909 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
910 {
911 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
912 	va_list ap;
913 	size_t n;
914 
915 	va_start(ap, format);
916 	n = vsnprintf(cd->cd_ptr, len, format, ap);
917 	va_end(ap);
918 
919 	cd->cd_ptr += MIN(n, len);
920 	cd->cd_len += n;
921 }
922 
923 static ssize_t
924 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
925 {
926 	ctf_decl_t cd;
927 	ctf_decl_node_t *cdp;
928 	ctf_decl_prec_t prec, lp, rp;
929 	int ptr, arr;
930 	uint_t k;
931 
932 	if (lc == NULL && type == CTF_ERR)
933 		return (-1); /* simplify caller code by permitting CTF_ERR */
934 
935 	ctf_decl_init(&cd, buf, len);
936 	ctf_decl_push(&cd, lc, type);
937 
938 	if (cd.cd_err != 0) {
939 		ctf_decl_fini(&cd);
940 		return (-1);
941 	}
942 
943 	/*
944 	 * If the type graph's order conflicts with lexical precedence order
945 	 * for pointers or arrays, then we need to surround the declarations at
946 	 * the corresponding lexical precedence with parentheses.  This can
947 	 * result in either a parenthesized pointer (*) as in int (*)() or
948 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
949 	 */
950 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
951 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
952 
953 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
954 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
955 
956 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
957 
958 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
959 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
960 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
961 
962 			const ctf_type_t *tp =
963 			    ctf_lookup_by_id(lc, cdp->cd_type);
964 			const char *name = ctf_strptr(lc, tp->ctt_name);
965 
966 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
967 				ctf_decl_sprintf(&cd, " ");
968 
969 			if (lp == prec) {
970 				ctf_decl_sprintf(&cd, "(");
971 				lp = -1;
972 			}
973 
974 			switch (cdp->cd_kind) {
975 			case CTF_K_INTEGER:
976 			case CTF_K_FLOAT:
977 			case CTF_K_TYPEDEF:
978 				ctf_decl_sprintf(&cd, "%s", name);
979 				break;
980 			case CTF_K_POINTER:
981 				ctf_decl_sprintf(&cd, "*");
982 				break;
983 			case CTF_K_ARRAY:
984 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
985 				break;
986 			case CTF_K_FUNCTION:
987 				ctf_decl_sprintf(&cd, "()");
988 				break;
989 			case CTF_K_STRUCT:
990 			case CTF_K_FORWARD:
991 				ctf_decl_sprintf(&cd, "struct %s", name);
992 				break;
993 			case CTF_K_UNION:
994 				ctf_decl_sprintf(&cd, "union %s", name);
995 				break;
996 			case CTF_K_ENUM:
997 				ctf_decl_sprintf(&cd, "enum %s", name);
998 				break;
999 			case CTF_K_VOLATILE:
1000 				ctf_decl_sprintf(&cd, "volatile");
1001 				break;
1002 			case CTF_K_CONST:
1003 				ctf_decl_sprintf(&cd, "const");
1004 				break;
1005 			case CTF_K_RESTRICT:
1006 				ctf_decl_sprintf(&cd, "restrict");
1007 				break;
1008 			}
1009 
1010 			k = cdp->cd_kind;
1011 		}
1012 
1013 		if (rp == prec)
1014 			ctf_decl_sprintf(&cd, ")");
1015 	}
1016 
1017 	ctf_decl_fini(&cd);
1018 	return (cd.cd_len);
1019 }
1020 
1021 static void
1022 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1023 {
1024 	const ushort_t *dp;
1025 	fbt_probe_t *fbt = parg;
1026 	linker_ctf_t lc;
1027 	modctl_t *ctl = fbt->fbtp_ctl;
1028 	int ndx = desc->dtargd_ndx;
1029 	int symindx = fbt->fbtp_symindx;
1030 	uint32_t *ctfoff;
1031 	uint32_t offset;
1032 	ushort_t info, kind, n;
1033 
1034 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1035 		(void) strcpy(desc->dtargd_native, "int");
1036 		return;
1037 	}
1038 
1039 	desc->dtargd_ndx = DTRACE_ARGNONE;
1040 
1041 	/* Get a pointer to the CTF data and it's length. */
1042 	if (linker_ctf_get(ctl, &lc) != 0)
1043 		/* No CTF data? Something wrong? *shrug* */
1044 		return;
1045 
1046 	/* Check if this module hasn't been initialised yet. */
1047 	if (*lc.ctfoffp == NULL) {
1048 		/*
1049 		 * Initialise the CTF object and function symindx to
1050 		 * byte offset array.
1051 		 */
1052 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1053 			return;
1054 
1055 		/* Initialise the CTF type to byte offset array. */
1056 		if (fbt_typoff_init(&lc) != 0)
1057 			return;
1058 	}
1059 
1060 	ctfoff = *lc.ctfoffp;
1061 
1062 	if (ctfoff == NULL || *lc.typoffp == NULL)
1063 		return;
1064 
1065 	/* Check if the symbol index is out of range. */
1066 	if (symindx >= lc.nsym)
1067 		return;
1068 
1069 	/* Check if the symbol isn't cross-referenced. */
1070 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1071 		return;
1072 
1073 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1074 
1075 	info = *dp++;
1076 	kind = CTF_INFO_KIND(info);
1077 	n = CTF_INFO_VLEN(info);
1078 
1079 	if (kind == CTF_K_UNKNOWN && n == 0) {
1080 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1081 		return;
1082 	}
1083 
1084 	if (kind != CTF_K_FUNCTION) {
1085 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1086 		return;
1087 	}
1088 
1089 	if (fbt->fbtp_roffset != 0) {
1090 		/* Only return type is available for args[1] in return probe. */
1091 		if (ndx > 1)
1092 			return;
1093 		ASSERT(ndx == 1);
1094 	} else {
1095 		/* Check if the requested argument doesn't exist. */
1096 		if (ndx >= n)
1097 			return;
1098 
1099 		/* Skip the return type and arguments up to the one requested. */
1100 		dp += ndx + 1;
1101 	}
1102 
1103 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1104 		desc->dtargd_ndx = ndx;
1105 
1106 	return;
1107 }
1108 
1109 static int
1110 fbt_linker_file_cb(linker_file_t lf, void *arg)
1111 {
1112 
1113 	fbt_provide_module(arg, lf);
1114 
1115 	return (0);
1116 }
1117 
1118 static void
1119 fbt_load(void *dummy)
1120 {
1121 	/* Create the /dev/dtrace/fbt entry. */
1122 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1123 	    "dtrace/fbt");
1124 
1125 	/* Default the probe table size if not specified. */
1126 	if (fbt_probetab_size == 0)
1127 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1128 
1129 	/* Choose the hash mask for the probe table. */
1130 	fbt_probetab_mask = fbt_probetab_size - 1;
1131 
1132 	/* Allocate memory for the probe table. */
1133 	fbt_probetab =
1134 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1135 
1136 	dtrace_doubletrap_func = fbt_doubletrap;
1137 	dtrace_invop_add(fbt_invop);
1138 
1139 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1140 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1141 		return;
1142 
1143 	/* Create probes for the kernel and already-loaded modules. */
1144 	linker_file_foreach(fbt_linker_file_cb, NULL);
1145 }
1146 
1147 static int
1148 fbt_unload()
1149 {
1150 	int error = 0;
1151 
1152 	/* De-register the invalid opcode handler. */
1153 	dtrace_invop_remove(fbt_invop);
1154 
1155 	dtrace_doubletrap_func = NULL;
1156 
1157 	/* De-register this DTrace provider. */
1158 	if ((error = dtrace_unregister(fbt_id)) != 0)
1159 		return (error);
1160 
1161 	/* Free the probe table. */
1162 	free(fbt_probetab, M_FBT);
1163 	fbt_probetab = NULL;
1164 	fbt_probetab_mask = 0;
1165 
1166 	destroy_dev(fbt_cdev);
1167 
1168 	return (error);
1169 }
1170 
1171 static int
1172 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1173 {
1174 	int error = 0;
1175 
1176 	switch (type) {
1177 	case MOD_LOAD:
1178 		break;
1179 
1180 	case MOD_UNLOAD:
1181 		break;
1182 
1183 	case MOD_SHUTDOWN:
1184 		break;
1185 
1186 	default:
1187 		error = EOPNOTSUPP;
1188 		break;
1189 
1190 	}
1191 
1192 	return (error);
1193 }
1194 
1195 static int
1196 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1197 {
1198 	return (0);
1199 }
1200 
1201 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1202 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1203 
1204 DEV_MODULE(fbt, fbt_modevent, NULL);
1205 MODULE_VERSION(fbt, 1);
1206 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1207 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1208