xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision 90b5fc95832da64a5f56295e687379732c33718f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/endian.h>
38 #include <sys/fcntl.h>
39 #include <sys/filio.h>
40 #include <sys/kdb.h>
41 #include <sys/kernel.h>
42 #include <sys/kmem.h>
43 #include <sys/kthread.h>
44 #include <sys/limits.h>
45 #include <sys/linker.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/pcpu.h>
51 #include <sys/poll.h>
52 #include <sys/proc.h>
53 #include <sys/selinfo.h>
54 #include <sys/smp.h>
55 #include <sys/syscall.h>
56 #include <sys/sysent.h>
57 #include <sys/sysproto.h>
58 #include <sys/uio.h>
59 #include <sys/unistd.h>
60 #include <machine/stdarg.h>
61 
62 #include <sys/dtrace.h>
63 #include <sys/dtrace_bsd.h>
64 
65 #include "fbt.h"
66 
67 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
68 
69 dtrace_provider_id_t	fbt_id;
70 fbt_probe_t		**fbt_probetab;
71 int			fbt_probetab_mask;
72 
73 static d_open_t	fbt_open;
74 static int	fbt_unload(void);
75 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
76 static void	fbt_provide_module(void *, modctl_t *);
77 static void	fbt_destroy(void *, dtrace_id_t, void *);
78 static void	fbt_enable(void *, dtrace_id_t, void *);
79 static void	fbt_disable(void *, dtrace_id_t, void *);
80 static void	fbt_load(void *);
81 static void	fbt_suspend(void *, dtrace_id_t, void *);
82 static void	fbt_resume(void *, dtrace_id_t, void *);
83 
84 static struct cdevsw fbt_cdevsw = {
85 	.d_version	= D_VERSION,
86 	.d_open		= fbt_open,
87 	.d_name		= "fbt",
88 };
89 
90 static dtrace_pattr_t fbt_attr = {
91 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
93 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
94 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
95 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
96 };
97 
98 static dtrace_pops_t fbt_pops = {
99 	.dtps_provide =		NULL,
100 	.dtps_provide_module =	fbt_provide_module,
101 	.dtps_enable =		fbt_enable,
102 	.dtps_disable =		fbt_disable,
103 	.dtps_suspend =		fbt_suspend,
104 	.dtps_resume =		fbt_resume,
105 	.dtps_getargdesc =	fbt_getargdesc,
106 	.dtps_getargval =	NULL,
107 	.dtps_usermode =	NULL,
108 	.dtps_destroy =		fbt_destroy
109 };
110 
111 static struct cdev		*fbt_cdev;
112 static int			fbt_probetab_size;
113 static int			fbt_verbose = 0;
114 
115 int
116 fbt_excluded(const char *name)
117 {
118 
119 	if (strncmp(name, "dtrace_", 7) == 0 &&
120 	    strncmp(name, "dtrace_safe_", 12) != 0) {
121 		/*
122 		 * Anything beginning with "dtrace_" may be called
123 		 * from probe context unless it explicitly indicates
124 		 * that it won't be called from probe context by
125 		 * using the prefix "dtrace_safe_".
126 		 */
127 		return (1);
128 	}
129 
130 	/*
131 	 * Omit instrumentation of functions that are probably in DDB.  It
132 	 * makes it too hard to debug broken FBT.
133 	 *
134 	 * NB: kdb_enter() can be excluded, but its call to printf() can't be.
135 	 * This is generally OK since we're not yet in debugging context.
136 	 */
137 	if (strncmp(name, "db_", 3) == 0 ||
138 	    strncmp(name, "kdb_", 4) == 0)
139 		return (1);
140 
141 	/*
142 	 * Lock owner methods may be called from probe context.
143 	 */
144 	if (strcmp(name, "owner_mtx") == 0 ||
145 	    strcmp(name, "owner_rm") == 0 ||
146 	    strcmp(name, "owner_rw") == 0 ||
147 	    strcmp(name, "owner_sx") == 0)
148 		return (1);
149 
150 	/*
151 	 * Stack unwinders may be called from probe context on some
152 	 * platforms.
153 	 */
154 #if defined(__aarch64__) || defined(__riscv)
155 	if (strcmp(name, "unwind_frame") == 0)
156 		return (1);
157 #endif
158 
159 	/*
160 	 * When DTrace is built into the kernel we need to exclude
161 	 * the FBT functions from instrumentation.
162 	 */
163 #ifndef _KLD_MODULE
164 	if (strncmp(name, "fbt_", 4) == 0)
165 		return (1);
166 #endif
167 
168 	return (0);
169 }
170 
171 static void
172 fbt_doubletrap(void)
173 {
174 	fbt_probe_t *fbt;
175 	int i;
176 
177 	for (i = 0; i < fbt_probetab_size; i++) {
178 		fbt = fbt_probetab[i];
179 
180 		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
181 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
182 	}
183 }
184 
185 static void
186 fbt_provide_module(void *arg, modctl_t *lf)
187 {
188 	char modname[MAXPATHLEN];
189 	int i;
190 	size_t len;
191 
192 	strlcpy(modname, lf->filename, sizeof(modname));
193 	len = strlen(modname);
194 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
195 		modname[len - 3] = '\0';
196 
197 	/*
198 	 * Employees of dtrace and their families are ineligible.  Void
199 	 * where prohibited.
200 	 */
201 	if (strcmp(modname, "dtrace") == 0)
202 		return;
203 
204 	/*
205 	 * To register with DTrace, a module must list 'dtrace' as a
206 	 * dependency in order for the kernel linker to resolve
207 	 * symbols like dtrace_register(). All modules with such a
208 	 * dependency are ineligible for FBT tracing.
209 	 */
210 	for (i = 0; i < lf->ndeps; i++)
211 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
212 			return;
213 
214 	if (lf->fbt_nentries) {
215 		/*
216 		 * This module has some FBT entries allocated; we're afraid
217 		 * to screw with it.
218 		 */
219 		return;
220 	}
221 
222 	/*
223 	 * List the functions in the module and the symbol values.
224 	 */
225 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
226 }
227 
228 static void
229 fbt_destroy_one(fbt_probe_t *fbt)
230 {
231 	fbt_probe_t *hash, *hashprev, *next;
232 	int ndx;
233 
234 	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
235 	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
236 	    hashprev = hash, hash = hash->fbtp_hashnext) {
237 		if (hash == fbt) {
238 			if ((next = fbt->fbtp_tracenext) != NULL)
239 				next->fbtp_hashnext = hash->fbtp_hashnext;
240 			else
241 				next = hash->fbtp_hashnext;
242 			if (hashprev != NULL)
243 				hashprev->fbtp_hashnext = next;
244 			else
245 				fbt_probetab[ndx] = next;
246 			goto free;
247 		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
248 			for (next = hash; next->fbtp_tracenext != NULL;
249 			    next = next->fbtp_tracenext) {
250 				if (fbt == next->fbtp_tracenext) {
251 					next->fbtp_tracenext =
252 					    fbt->fbtp_tracenext;
253 					goto free;
254 				}
255 			}
256 		}
257 	}
258 	panic("probe %p not found in hash table", fbt);
259 free:
260 	free(fbt, M_FBT);
261 }
262 
263 static void
264 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
265 {
266 	fbt_probe_t *fbt = parg, *next;
267 	modctl_t *ctl;
268 
269 	do {
270 		ctl = fbt->fbtp_ctl;
271 		ctl->fbt_nentries--;
272 
273 		next = fbt->fbtp_probenext;
274 		fbt_destroy_one(fbt);
275 		fbt = next;
276 	} while (fbt != NULL);
277 }
278 
279 static void
280 fbt_enable(void *arg, dtrace_id_t id, void *parg)
281 {
282 	fbt_probe_t *fbt = parg;
283 	modctl_t *ctl = fbt->fbtp_ctl;
284 
285 	ctl->nenabled++;
286 
287 	/*
288 	 * Now check that our modctl has the expected load count.  If it
289 	 * doesn't, this module must have been unloaded and reloaded -- and
290 	 * we're not going to touch it.
291 	 */
292 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
293 		if (fbt_verbose) {
294 			printf("fbt is failing for probe %s "
295 			    "(module %s reloaded)",
296 			    fbt->fbtp_name, ctl->filename);
297 		}
298 
299 		return;
300 	}
301 
302 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
303 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
304 		fbt->fbtp_enabled++;
305 	}
306 }
307 
308 static void
309 fbt_disable(void *arg, dtrace_id_t id, void *parg)
310 {
311 	fbt_probe_t *fbt = parg, *hash;
312 	modctl_t *ctl = fbt->fbtp_ctl;
313 
314 	ASSERT(ctl->nenabled > 0);
315 	ctl->nenabled--;
316 
317 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
318 		return;
319 
320 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
321 		fbt->fbtp_enabled--;
322 
323 		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
324 		    hash != NULL; hash = hash->fbtp_hashnext) {
325 			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
326 				for (; hash != NULL; hash = hash->fbtp_tracenext)
327 					if (hash->fbtp_enabled > 0)
328 						break;
329 				break;
330 			}
331 		}
332 		if (hash == NULL)
333 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
334 	}
335 }
336 
337 static void
338 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
339 {
340 	fbt_probe_t *fbt = parg;
341 	modctl_t *ctl = fbt->fbtp_ctl;
342 
343 	ASSERT(ctl->nenabled > 0);
344 
345 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
346 		return;
347 
348 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
349 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
350 }
351 
352 static void
353 fbt_resume(void *arg, dtrace_id_t id, void *parg)
354 {
355 	fbt_probe_t *fbt = parg;
356 	modctl_t *ctl = fbt->fbtp_ctl;
357 
358 	ASSERT(ctl->nenabled > 0);
359 
360 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
361 		return;
362 
363 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
364 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
365 }
366 
367 static int
368 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
369 {
370 	const Elf_Sym *symp = lc->symtab;;
371 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
372 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
373 	int i;
374 	uint32_t *ctfoff;
375 	uint32_t objtoff = hp->cth_objtoff;
376 	uint32_t funcoff = hp->cth_funcoff;
377 	ushort_t info;
378 	ushort_t vlen;
379 
380 	/* Sanity check. */
381 	if (hp->cth_magic != CTF_MAGIC) {
382 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
383 		return (EINVAL);
384 	}
385 
386 	if (lc->symtab == NULL) {
387 		printf("No symbol table in '%s'\n",lf->pathname);
388 		return (EINVAL);
389 	}
390 
391 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
392 	*lc->ctfoffp = ctfoff;
393 
394 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
395 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
396 			*ctfoff = 0xffffffff;
397 			continue;
398 		}
399 
400 		switch (ELF_ST_TYPE(symp->st_info)) {
401 		case STT_OBJECT:
402 			if (objtoff >= hp->cth_funcoff ||
403                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
404 				*ctfoff = 0xffffffff;
405                                 break;
406                         }
407 
408                         *ctfoff = objtoff;
409                         objtoff += sizeof (ushort_t);
410 			break;
411 
412 		case STT_FUNC:
413 			if (funcoff >= hp->cth_typeoff) {
414 				*ctfoff = 0xffffffff;
415 				break;
416 			}
417 
418 			*ctfoff = funcoff;
419 
420 			info = *((const ushort_t *)(ctfdata + funcoff));
421 			vlen = CTF_INFO_VLEN(info);
422 
423 			/*
424 			 * If we encounter a zero pad at the end, just skip it.
425 			 * Otherwise skip over the function and its return type
426 			 * (+2) and the argument list (vlen).
427 			 */
428 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
429 				funcoff += sizeof (ushort_t); /* skip pad */
430 			else
431 				funcoff += sizeof (ushort_t) * (vlen + 2);
432 			break;
433 
434 		default:
435 			*ctfoff = 0xffffffff;
436 			break;
437 		}
438 	}
439 
440 	return (0);
441 }
442 
443 static ssize_t
444 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
445     ssize_t *incrementp)
446 {
447 	ssize_t size, increment;
448 
449 	if (version > CTF_VERSION_1 &&
450 	    tp->ctt_size == CTF_LSIZE_SENT) {
451 		size = CTF_TYPE_LSIZE(tp);
452 		increment = sizeof (ctf_type_t);
453 	} else {
454 		size = tp->ctt_size;
455 		increment = sizeof (ctf_stype_t);
456 	}
457 
458 	if (sizep)
459 		*sizep = size;
460 	if (incrementp)
461 		*incrementp = increment;
462 
463 	return (size);
464 }
465 
466 static int
467 fbt_typoff_init(linker_ctf_t *lc)
468 {
469 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
470 	const ctf_type_t *tbuf;
471 	const ctf_type_t *tend;
472 	const ctf_type_t *tp;
473 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
474 	int ctf_typemax = 0;
475 	uint32_t *xp;
476 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
477 
478 
479 	/* Sanity check. */
480 	if (hp->cth_magic != CTF_MAGIC)
481 		return (EINVAL);
482 
483 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
484 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
485 
486 	int child = hp->cth_parname != 0;
487 
488 	/*
489 	 * We make two passes through the entire type section.  In this first
490 	 * pass, we count the number of each type and the total number of types.
491 	 */
492 	for (tp = tbuf; tp < tend; ctf_typemax++) {
493 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
494 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
495 		ssize_t size, increment;
496 
497 		size_t vbytes;
498 		uint_t n;
499 
500 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
501 
502 		switch (kind) {
503 		case CTF_K_INTEGER:
504 		case CTF_K_FLOAT:
505 			vbytes = sizeof (uint_t);
506 			break;
507 		case CTF_K_ARRAY:
508 			vbytes = sizeof (ctf_array_t);
509 			break;
510 		case CTF_K_FUNCTION:
511 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
512 			break;
513 		case CTF_K_STRUCT:
514 		case CTF_K_UNION:
515 			if (size < CTF_LSTRUCT_THRESH) {
516 				ctf_member_t *mp = (ctf_member_t *)
517 				    ((uintptr_t)tp + increment);
518 
519 				vbytes = sizeof (ctf_member_t) * vlen;
520 				for (n = vlen; n != 0; n--, mp++)
521 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
522 			} else {
523 				ctf_lmember_t *lmp = (ctf_lmember_t *)
524 				    ((uintptr_t)tp + increment);
525 
526 				vbytes = sizeof (ctf_lmember_t) * vlen;
527 				for (n = vlen; n != 0; n--, lmp++)
528 					child |=
529 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
530 			}
531 			break;
532 		case CTF_K_ENUM:
533 			vbytes = sizeof (ctf_enum_t) * vlen;
534 			break;
535 		case CTF_K_FORWARD:
536 			/*
537 			 * For forward declarations, ctt_type is the CTF_K_*
538 			 * kind for the tag, so bump that population count too.
539 			 * If ctt_type is unknown, treat the tag as a struct.
540 			 */
541 			if (tp->ctt_type == CTF_K_UNKNOWN ||
542 			    tp->ctt_type >= CTF_K_MAX)
543 				pop[CTF_K_STRUCT]++;
544 			else
545 				pop[tp->ctt_type]++;
546 			/*FALLTHRU*/
547 		case CTF_K_UNKNOWN:
548 			vbytes = 0;
549 			break;
550 		case CTF_K_POINTER:
551 		case CTF_K_TYPEDEF:
552 		case CTF_K_VOLATILE:
553 		case CTF_K_CONST:
554 		case CTF_K_RESTRICT:
555 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
556 			vbytes = 0;
557 			break;
558 		default:
559 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
560 			return (EIO);
561 		}
562 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
563 		pop[kind]++;
564 	}
565 
566 	/* account for a sentinel value below */
567 	ctf_typemax++;
568 	*lc->typlenp = ctf_typemax;
569 
570 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
571 	    M_ZERO | M_WAITOK);
572 
573 	*lc->typoffp = xp;
574 
575 	/* type id 0 is used as a sentinel value */
576 	*xp++ = 0;
577 
578 	/*
579 	 * In the second pass, fill in the type offset.
580 	 */
581 	for (tp = tbuf; tp < tend; xp++) {
582 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
583 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
584 		ssize_t size, increment;
585 
586 		size_t vbytes;
587 		uint_t n;
588 
589 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
590 
591 		switch (kind) {
592 		case CTF_K_INTEGER:
593 		case CTF_K_FLOAT:
594 			vbytes = sizeof (uint_t);
595 			break;
596 		case CTF_K_ARRAY:
597 			vbytes = sizeof (ctf_array_t);
598 			break;
599 		case CTF_K_FUNCTION:
600 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
601 			break;
602 		case CTF_K_STRUCT:
603 		case CTF_K_UNION:
604 			if (size < CTF_LSTRUCT_THRESH) {
605 				ctf_member_t *mp = (ctf_member_t *)
606 				    ((uintptr_t)tp + increment);
607 
608 				vbytes = sizeof (ctf_member_t) * vlen;
609 				for (n = vlen; n != 0; n--, mp++)
610 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
611 			} else {
612 				ctf_lmember_t *lmp = (ctf_lmember_t *)
613 				    ((uintptr_t)tp + increment);
614 
615 				vbytes = sizeof (ctf_lmember_t) * vlen;
616 				for (n = vlen; n != 0; n--, lmp++)
617 					child |=
618 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
619 			}
620 			break;
621 		case CTF_K_ENUM:
622 			vbytes = sizeof (ctf_enum_t) * vlen;
623 			break;
624 		case CTF_K_FORWARD:
625 		case CTF_K_UNKNOWN:
626 			vbytes = 0;
627 			break;
628 		case CTF_K_POINTER:
629 		case CTF_K_TYPEDEF:
630 		case CTF_K_VOLATILE:
631 		case CTF_K_CONST:
632 		case CTF_K_RESTRICT:
633 			vbytes = 0;
634 			break;
635 		default:
636 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
637 			return (EIO);
638 		}
639 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
640 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
641 	}
642 
643 	return (0);
644 }
645 
646 /*
647  * CTF Declaration Stack
648  *
649  * In order to implement ctf_type_name(), we must convert a type graph back
650  * into a C type declaration.  Unfortunately, a type graph represents a storage
651  * class ordering of the type whereas a type declaration must obey the C rules
652  * for operator precedence, and the two orderings are frequently in conflict.
653  * For example, consider these CTF type graphs and their C declarations:
654  *
655  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
656  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
657  *
658  * In each case, parentheses are used to raise operator * to higher lexical
659  * precedence, so the string form of the C declaration cannot be constructed by
660  * walking the type graph links and forming the string from left to right.
661  *
662  * The functions in this file build a set of stacks from the type graph nodes
663  * corresponding to the C operator precedence levels in the appropriate order.
664  * The code in ctf_type_name() can then iterate over the levels and nodes in
665  * lexical precedence order and construct the final C declaration string.
666  */
667 typedef struct ctf_list {
668 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
669 	struct ctf_list *l_next; /* next pointer or head pointer */
670 } ctf_list_t;
671 
672 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
673 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
674 
675 typedef enum {
676 	CTF_PREC_BASE,
677 	CTF_PREC_POINTER,
678 	CTF_PREC_ARRAY,
679 	CTF_PREC_FUNCTION,
680 	CTF_PREC_MAX
681 } ctf_decl_prec_t;
682 
683 typedef struct ctf_decl_node {
684 	ctf_list_t cd_list;			/* linked list pointers */
685 	ctf_id_t cd_type;			/* type identifier */
686 	uint_t cd_kind;				/* type kind */
687 	uint_t cd_n;				/* type dimension if array */
688 } ctf_decl_node_t;
689 
690 typedef struct ctf_decl {
691 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
692 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
693 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
694 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
695 	char *cd_buf;				/* buffer for output */
696 	char *cd_ptr;				/* buffer location */
697 	char *cd_end;				/* buffer limit */
698 	size_t cd_len;				/* buffer space required */
699 	int cd_err;				/* saved error value */
700 } ctf_decl_t;
701 
702 /*
703  * Simple doubly-linked list append routine.  This implementation assumes that
704  * each list element contains an embedded ctf_list_t as the first member.
705  * An additional ctf_list_t is used to store the head (l_next) and tail
706  * (l_prev) pointers.  The current head and tail list elements have their
707  * previous and next pointers set to NULL, respectively.
708  */
709 static void
710 ctf_list_append(ctf_list_t *lp, void *new)
711 {
712 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
713 	ctf_list_t *q = new;		/* q = new list element */
714 
715 	lp->l_prev = q;
716 	q->l_prev = p;
717 	q->l_next = NULL;
718 
719 	if (p != NULL)
720 		p->l_next = q;
721 	else
722 		lp->l_next = q;
723 }
724 
725 /*
726  * Prepend the specified existing element to the given ctf_list_t.  The
727  * existing pointer should be pointing at a struct with embedded ctf_list_t.
728  */
729 static void
730 ctf_list_prepend(ctf_list_t *lp, void *new)
731 {
732 	ctf_list_t *p = new;		/* p = new list element */
733 	ctf_list_t *q = lp->l_next;	/* q = head list element */
734 
735 	lp->l_next = p;
736 	p->l_prev = NULL;
737 	p->l_next = q;
738 
739 	if (q != NULL)
740 		q->l_prev = p;
741 	else
742 		lp->l_prev = p;
743 }
744 
745 static void
746 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
747 {
748 	int i;
749 
750 	bzero(cd, sizeof (ctf_decl_t));
751 
752 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
753 		cd->cd_order[i] = CTF_PREC_BASE - 1;
754 
755 	cd->cd_qualp = CTF_PREC_BASE;
756 	cd->cd_ordp = CTF_PREC_BASE;
757 
758 	cd->cd_buf = buf;
759 	cd->cd_ptr = buf;
760 	cd->cd_end = buf + len;
761 }
762 
763 static void
764 ctf_decl_fini(ctf_decl_t *cd)
765 {
766 	ctf_decl_node_t *cdp, *ndp;
767 	int i;
768 
769 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
770 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
771 		    cdp != NULL; cdp = ndp) {
772 			ndp = ctf_list_next(cdp);
773 			free(cdp, M_FBT);
774 		}
775 	}
776 }
777 
778 static const ctf_type_t *
779 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
780 {
781 	const ctf_type_t *tp;
782 	uint32_t offset;
783 	uint32_t *typoff = *lc->typoffp;
784 
785 	if (type >= *lc->typlenp) {
786 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
787 		return(NULL);
788 	}
789 
790 	/* Check if the type isn't cross-referenced. */
791 	if ((offset = typoff[type]) == 0) {
792 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
793 		return(NULL);
794 	}
795 
796 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
797 
798 	return (tp);
799 }
800 
801 static void
802 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
803 {
804 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
805 	const ctf_type_t *tp;
806 	const ctf_array_t *ap;
807 	ssize_t increment;
808 
809 	bzero(arp, sizeof(*arp));
810 
811 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
812 		return;
813 
814 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
815 		return;
816 
817 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
818 
819 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
820 	arp->ctr_contents = ap->cta_contents;
821 	arp->ctr_index = ap->cta_index;
822 	arp->ctr_nelems = ap->cta_nelems;
823 }
824 
825 static const char *
826 ctf_strptr(linker_ctf_t *lc, int name)
827 {
828 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
829 	const char *strp = "";
830 
831 	if (name < 0 || name >= hp->cth_strlen)
832 		return(strp);
833 
834 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
835 
836 	return (strp);
837 }
838 
839 static void
840 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
841 {
842 	ctf_decl_node_t *cdp;
843 	ctf_decl_prec_t prec;
844 	uint_t kind, n = 1;
845 	int is_qual = 0;
846 
847 	const ctf_type_t *tp;
848 	ctf_arinfo_t ar;
849 
850 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
851 		cd->cd_err = ENOENT;
852 		return;
853 	}
854 
855 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
856 	case CTF_K_ARRAY:
857 		fbt_array_info(lc, type, &ar);
858 		ctf_decl_push(cd, lc, ar.ctr_contents);
859 		n = ar.ctr_nelems;
860 		prec = CTF_PREC_ARRAY;
861 		break;
862 
863 	case CTF_K_TYPEDEF:
864 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
865 			ctf_decl_push(cd, lc, tp->ctt_type);
866 			return;
867 		}
868 		prec = CTF_PREC_BASE;
869 		break;
870 
871 	case CTF_K_FUNCTION:
872 		ctf_decl_push(cd, lc, tp->ctt_type);
873 		prec = CTF_PREC_FUNCTION;
874 		break;
875 
876 	case CTF_K_POINTER:
877 		ctf_decl_push(cd, lc, tp->ctt_type);
878 		prec = CTF_PREC_POINTER;
879 		break;
880 
881 	case CTF_K_VOLATILE:
882 	case CTF_K_CONST:
883 	case CTF_K_RESTRICT:
884 		ctf_decl_push(cd, lc, tp->ctt_type);
885 		prec = cd->cd_qualp;
886 		is_qual++;
887 		break;
888 
889 	default:
890 		prec = CTF_PREC_BASE;
891 	}
892 
893 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
894 	cdp->cd_type = type;
895 	cdp->cd_kind = kind;
896 	cdp->cd_n = n;
897 
898 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
899 		cd->cd_order[prec] = cd->cd_ordp++;
900 
901 	/*
902 	 * Reset cd_qualp to the highest precedence level that we've seen so
903 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
904 	 */
905 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
906 		cd->cd_qualp = prec;
907 
908 	/*
909 	 * C array declarators are ordered inside out so prepend them.  Also by
910 	 * convention qualifiers of base types precede the type specifier (e.g.
911 	 * const int vs. int const) even though the two forms are equivalent.
912 	 */
913 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
914 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
915 	else
916 		ctf_list_append(&cd->cd_nodes[prec], cdp);
917 }
918 
919 static void
920 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
921 {
922 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
923 	va_list ap;
924 	size_t n;
925 
926 	va_start(ap, format);
927 	n = vsnprintf(cd->cd_ptr, len, format, ap);
928 	va_end(ap);
929 
930 	cd->cd_ptr += MIN(n, len);
931 	cd->cd_len += n;
932 }
933 
934 static ssize_t
935 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
936 {
937 	ctf_decl_t cd;
938 	ctf_decl_node_t *cdp;
939 	ctf_decl_prec_t prec, lp, rp;
940 	int ptr, arr;
941 	uint_t k;
942 
943 	if (lc == NULL && type == CTF_ERR)
944 		return (-1); /* simplify caller code by permitting CTF_ERR */
945 
946 	ctf_decl_init(&cd, buf, len);
947 	ctf_decl_push(&cd, lc, type);
948 
949 	if (cd.cd_err != 0) {
950 		ctf_decl_fini(&cd);
951 		return (-1);
952 	}
953 
954 	/*
955 	 * If the type graph's order conflicts with lexical precedence order
956 	 * for pointers or arrays, then we need to surround the declarations at
957 	 * the corresponding lexical precedence with parentheses.  This can
958 	 * result in either a parenthesized pointer (*) as in int (*)() or
959 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
960 	 */
961 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
962 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
963 
964 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
965 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
966 
967 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
968 
969 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
970 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
971 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
972 
973 			const ctf_type_t *tp =
974 			    ctf_lookup_by_id(lc, cdp->cd_type);
975 			const char *name = ctf_strptr(lc, tp->ctt_name);
976 
977 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
978 				ctf_decl_sprintf(&cd, " ");
979 
980 			if (lp == prec) {
981 				ctf_decl_sprintf(&cd, "(");
982 				lp = -1;
983 			}
984 
985 			switch (cdp->cd_kind) {
986 			case CTF_K_INTEGER:
987 			case CTF_K_FLOAT:
988 			case CTF_K_TYPEDEF:
989 				ctf_decl_sprintf(&cd, "%s", name);
990 				break;
991 			case CTF_K_POINTER:
992 				ctf_decl_sprintf(&cd, "*");
993 				break;
994 			case CTF_K_ARRAY:
995 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
996 				break;
997 			case CTF_K_FUNCTION:
998 				ctf_decl_sprintf(&cd, "()");
999 				break;
1000 			case CTF_K_STRUCT:
1001 			case CTF_K_FORWARD:
1002 				ctf_decl_sprintf(&cd, "struct %s", name);
1003 				break;
1004 			case CTF_K_UNION:
1005 				ctf_decl_sprintf(&cd, "union %s", name);
1006 				break;
1007 			case CTF_K_ENUM:
1008 				ctf_decl_sprintf(&cd, "enum %s", name);
1009 				break;
1010 			case CTF_K_VOLATILE:
1011 				ctf_decl_sprintf(&cd, "volatile");
1012 				break;
1013 			case CTF_K_CONST:
1014 				ctf_decl_sprintf(&cd, "const");
1015 				break;
1016 			case CTF_K_RESTRICT:
1017 				ctf_decl_sprintf(&cd, "restrict");
1018 				break;
1019 			}
1020 
1021 			k = cdp->cd_kind;
1022 		}
1023 
1024 		if (rp == prec)
1025 			ctf_decl_sprintf(&cd, ")");
1026 	}
1027 
1028 	ctf_decl_fini(&cd);
1029 	return (cd.cd_len);
1030 }
1031 
1032 static void
1033 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1034 {
1035 	const ushort_t *dp;
1036 	fbt_probe_t *fbt = parg;
1037 	linker_ctf_t lc;
1038 	modctl_t *ctl = fbt->fbtp_ctl;
1039 	int ndx = desc->dtargd_ndx;
1040 	int symindx = fbt->fbtp_symindx;
1041 	uint32_t *ctfoff;
1042 	uint32_t offset;
1043 	ushort_t info, kind, n;
1044 
1045 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1046 		(void) strcpy(desc->dtargd_native, "int");
1047 		return;
1048 	}
1049 
1050 	desc->dtargd_ndx = DTRACE_ARGNONE;
1051 
1052 	/* Get a pointer to the CTF data and it's length. */
1053 	if (linker_ctf_get(ctl, &lc) != 0)
1054 		/* No CTF data? Something wrong? *shrug* */
1055 		return;
1056 
1057 	/* Check if this module hasn't been initialised yet. */
1058 	if (*lc.ctfoffp == NULL) {
1059 		/*
1060 		 * Initialise the CTF object and function symindx to
1061 		 * byte offset array.
1062 		 */
1063 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1064 			return;
1065 
1066 		/* Initialise the CTF type to byte offset array. */
1067 		if (fbt_typoff_init(&lc) != 0)
1068 			return;
1069 	}
1070 
1071 	ctfoff = *lc.ctfoffp;
1072 
1073 	if (ctfoff == NULL || *lc.typoffp == NULL)
1074 		return;
1075 
1076 	/* Check if the symbol index is out of range. */
1077 	if (symindx >= lc.nsym)
1078 		return;
1079 
1080 	/* Check if the symbol isn't cross-referenced. */
1081 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1082 		return;
1083 
1084 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1085 
1086 	info = *dp++;
1087 	kind = CTF_INFO_KIND(info);
1088 	n = CTF_INFO_VLEN(info);
1089 
1090 	if (kind == CTF_K_UNKNOWN && n == 0) {
1091 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1092 		return;
1093 	}
1094 
1095 	if (kind != CTF_K_FUNCTION) {
1096 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1097 		return;
1098 	}
1099 
1100 	if (fbt->fbtp_roffset != 0) {
1101 		/* Only return type is available for args[1] in return probe. */
1102 		if (ndx > 1)
1103 			return;
1104 		ASSERT(ndx == 1);
1105 	} else {
1106 		/* Check if the requested argument doesn't exist. */
1107 		if (ndx >= n)
1108 			return;
1109 
1110 		/* Skip the return type and arguments up to the one requested. */
1111 		dp += ndx + 1;
1112 	}
1113 
1114 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1115 		desc->dtargd_ndx = ndx;
1116 
1117 	return;
1118 }
1119 
1120 static int
1121 fbt_linker_file_cb(linker_file_t lf, void *arg)
1122 {
1123 
1124 	fbt_provide_module(arg, lf);
1125 
1126 	return (0);
1127 }
1128 
1129 static void
1130 fbt_load(void *dummy)
1131 {
1132 	/* Create the /dev/dtrace/fbt entry. */
1133 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1134 	    "dtrace/fbt");
1135 
1136 	/* Default the probe table size if not specified. */
1137 	if (fbt_probetab_size == 0)
1138 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1139 
1140 	/* Choose the hash mask for the probe table. */
1141 	fbt_probetab_mask = fbt_probetab_size - 1;
1142 
1143 	/* Allocate memory for the probe table. */
1144 	fbt_probetab =
1145 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1146 
1147 	dtrace_doubletrap_func = fbt_doubletrap;
1148 	dtrace_invop_add(fbt_invop);
1149 
1150 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1151 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1152 		return;
1153 
1154 	/* Create probes for the kernel and already-loaded modules. */
1155 	linker_file_foreach(fbt_linker_file_cb, NULL);
1156 }
1157 
1158 static int
1159 fbt_unload()
1160 {
1161 	int error = 0;
1162 
1163 	/* De-register the invalid opcode handler. */
1164 	dtrace_invop_remove(fbt_invop);
1165 
1166 	dtrace_doubletrap_func = NULL;
1167 
1168 	/* De-register this DTrace provider. */
1169 	if ((error = dtrace_unregister(fbt_id)) != 0)
1170 		return (error);
1171 
1172 	/* Free the probe table. */
1173 	free(fbt_probetab, M_FBT);
1174 	fbt_probetab = NULL;
1175 	fbt_probetab_mask = 0;
1176 
1177 	destroy_dev(fbt_cdev);
1178 
1179 	return (error);
1180 }
1181 
1182 static int
1183 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1184 {
1185 	int error = 0;
1186 
1187 	switch (type) {
1188 	case MOD_LOAD:
1189 		break;
1190 
1191 	case MOD_UNLOAD:
1192 		break;
1193 
1194 	case MOD_SHUTDOWN:
1195 		break;
1196 
1197 	default:
1198 		error = EOPNOTSUPP;
1199 		break;
1200 
1201 	}
1202 
1203 	return (error);
1204 }
1205 
1206 static int
1207 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1208 {
1209 	return (0);
1210 }
1211 
1212 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1213 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1214 
1215 DEV_MODULE(fbt, fbt_modevent, NULL);
1216 MODULE_VERSION(fbt, 1);
1217 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1218 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1219