xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision 45c23c2608ec3e34b5c7ce18eca91a46419e363c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/endian.h>
38 #include <sys/fcntl.h>
39 #include <sys/filio.h>
40 #include <sys/kdb.h>
41 #include <sys/kernel.h>
42 #include <sys/kmem.h>
43 #include <sys/kthread.h>
44 #include <sys/limits.h>
45 #include <sys/linker.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/pcpu.h>
51 #include <sys/poll.h>
52 #include <sys/proc.h>
53 #include <sys/selinfo.h>
54 #include <sys/smp.h>
55 #include <sys/syscall.h>
56 #include <sys/sysent.h>
57 #include <sys/sysproto.h>
58 #include <sys/uio.h>
59 #include <sys/unistd.h>
60 #include <machine/stdarg.h>
61 
62 #include <sys/dtrace.h>
63 #include <sys/dtrace_bsd.h>
64 
65 #include "fbt.h"
66 
67 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
68 
69 dtrace_provider_id_t	fbt_id;
70 fbt_probe_t		**fbt_probetab;
71 int			fbt_probetab_mask;
72 
73 static d_open_t	fbt_open;
74 static int	fbt_unload(void);
75 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
76 static void	fbt_provide_module(void *, modctl_t *);
77 static void	fbt_destroy(void *, dtrace_id_t, void *);
78 static void	fbt_enable(void *, dtrace_id_t, void *);
79 static void	fbt_disable(void *, dtrace_id_t, void *);
80 static void	fbt_load(void *);
81 static void	fbt_suspend(void *, dtrace_id_t, void *);
82 static void	fbt_resume(void *, dtrace_id_t, void *);
83 
84 static struct cdevsw fbt_cdevsw = {
85 	.d_version	= D_VERSION,
86 	.d_open		= fbt_open,
87 	.d_name		= "fbt",
88 };
89 
90 static dtrace_pattr_t fbt_attr = {
91 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
93 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
94 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
95 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
96 };
97 
98 static dtrace_pops_t fbt_pops = {
99 	.dtps_provide =		NULL,
100 	.dtps_provide_module =	fbt_provide_module,
101 	.dtps_enable =		fbt_enable,
102 	.dtps_disable =		fbt_disable,
103 	.dtps_suspend =		fbt_suspend,
104 	.dtps_resume =		fbt_resume,
105 	.dtps_getargdesc =	fbt_getargdesc,
106 	.dtps_getargval =	NULL,
107 	.dtps_usermode =	NULL,
108 	.dtps_destroy =		fbt_destroy
109 };
110 
111 static struct cdev		*fbt_cdev;
112 static int			fbt_probetab_size;
113 static int			fbt_verbose = 0;
114 
115 int
116 fbt_excluded(const char *name)
117 {
118 
119 	if (strncmp(name, "dtrace_", 7) == 0 &&
120 	    strncmp(name, "dtrace_safe_", 12) != 0) {
121 		/*
122 		 * Anything beginning with "dtrace_" may be called
123 		 * from probe context unless it explicitly indicates
124 		 * that it won't be called from probe context by
125 		 * using the prefix "dtrace_safe_".
126 		 */
127 		return (1);
128 	}
129 
130 	/*
131 	 * Omit instrumentation of functions that are probably in DDB.  It
132 	 * makes it too hard to debug broken FBT.
133 	 *
134 	 * NB: kdb_enter() can be excluded, but its call to printf() can't be.
135 	 * This is generally OK since we're not yet in debugging context.
136 	 */
137 	if (strncmp(name, "db_", 3) == 0 ||
138 	    strncmp(name, "kdb_", 4) == 0)
139 		return (1);
140 
141 	/*
142 	 * Lock owner methods may be called from probe context.
143 	 */
144 	if (strcmp(name, "owner_mtx") == 0 ||
145 	    strcmp(name, "owner_rm") == 0 ||
146 	    strcmp(name, "owner_rw") == 0 ||
147 	    strcmp(name, "owner_sx") == 0)
148 		return (1);
149 
150 	/*
151 	 * Stack unwinders may be called from probe context on some
152 	 * platforms.
153 	 */
154 #if defined(__aarch64__) || defined(__riscv)
155 	if (strcmp(name, "unwind_frame") == 0)
156 		return (1);
157 #endif
158 
159 	/*
160 	 * When DTrace is built into the kernel we need to exclude
161 	 * the FBT functions from instrumentation.
162 	 */
163 #ifndef _KLD_MODULE
164 	if (strncmp(name, "fbt_", 4) == 0)
165 		return (1);
166 #endif
167 
168 	return (0);
169 }
170 
171 static void
172 fbt_doubletrap(void)
173 {
174 	fbt_probe_t *fbt;
175 	int i;
176 
177 	for (i = 0; i < fbt_probetab_size; i++) {
178 		fbt = fbt_probetab[i];
179 
180 		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
181 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
182 	}
183 }
184 
185 static void
186 fbt_provide_module(void *arg, modctl_t *lf)
187 {
188 	char modname[MAXPATHLEN];
189 	int i;
190 	size_t len;
191 
192 	strlcpy(modname, lf->filename, sizeof(modname));
193 	len = strlen(modname);
194 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
195 		modname[len - 3] = '\0';
196 
197 	/*
198 	 * Employees of dtrace and their families are ineligible.  Void
199 	 * where prohibited.
200 	 */
201 	if (strcmp(modname, "dtrace") == 0)
202 		return;
203 
204 	/*
205 	 * To register with DTrace, a module must list 'dtrace' as a
206 	 * dependency in order for the kernel linker to resolve
207 	 * symbols like dtrace_register(). All modules with such a
208 	 * dependency are ineligible for FBT tracing.
209 	 */
210 	for (i = 0; i < lf->ndeps; i++)
211 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
212 			return;
213 
214 	if (lf->fbt_nentries) {
215 		/*
216 		 * This module has some FBT entries allocated; we're afraid
217 		 * to screw with it.
218 		 */
219 		return;
220 	}
221 
222 	/*
223 	 * List the functions in the module and the symbol values.
224 	 */
225 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
226 }
227 
228 static void
229 fbt_destroy_one(fbt_probe_t *fbt)
230 {
231 	fbt_probe_t *hash, *hashprev, *next;
232 	int ndx;
233 
234 	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
235 	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
236 	    hashprev = hash, hash = hash->fbtp_hashnext) {
237 		if (hash == fbt) {
238 			if ((next = fbt->fbtp_tracenext) != NULL)
239 				next->fbtp_hashnext = hash->fbtp_hashnext;
240 			else
241 				next = hash->fbtp_hashnext;
242 			if (hashprev != NULL)
243 				hashprev->fbtp_hashnext = next;
244 			else
245 				fbt_probetab[ndx] = next;
246 			goto free;
247 		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
248 			for (next = hash; next->fbtp_tracenext != NULL;
249 			    next = next->fbtp_tracenext) {
250 				if (fbt == next->fbtp_tracenext) {
251 					next->fbtp_tracenext =
252 					    fbt->fbtp_tracenext;
253 					goto free;
254 				}
255 			}
256 		}
257 	}
258 	panic("probe %p not found in hash table", fbt);
259 free:
260 	free(fbt, M_FBT);
261 }
262 
263 static void
264 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
265 {
266 	fbt_probe_t *fbt = parg, *next;
267 	modctl_t *ctl;
268 
269 	do {
270 		ctl = fbt->fbtp_ctl;
271 		ctl->fbt_nentries--;
272 
273 		next = fbt->fbtp_probenext;
274 		fbt_destroy_one(fbt);
275 		fbt = next;
276 	} while (fbt != NULL);
277 }
278 
279 static void
280 fbt_enable(void *arg, dtrace_id_t id, void *parg)
281 {
282 	fbt_probe_t *fbt = parg;
283 	modctl_t *ctl = fbt->fbtp_ctl;
284 
285 	ctl->nenabled++;
286 
287 	/*
288 	 * Now check that our modctl has the expected load count.  If it
289 	 * doesn't, this module must have been unloaded and reloaded -- and
290 	 * we're not going to touch it.
291 	 */
292 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
293 		if (fbt_verbose) {
294 			printf("fbt is failing for probe %s "
295 			    "(module %s reloaded)",
296 			    fbt->fbtp_name, ctl->filename);
297 		}
298 
299 		return;
300 	}
301 
302 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
303 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
304 		fbt->fbtp_enabled++;
305 	}
306 }
307 
308 static void
309 fbt_disable(void *arg, dtrace_id_t id, void *parg)
310 {
311 	fbt_probe_t *fbt = parg, *hash;
312 	modctl_t *ctl = fbt->fbtp_ctl;
313 
314 	ASSERT(ctl->nenabled > 0);
315 	ctl->nenabled--;
316 
317 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
318 		return;
319 
320 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
321 		fbt->fbtp_enabled--;
322 
323 		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
324 		    hash != NULL; hash = hash->fbtp_hashnext) {
325 			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
326 				for (; hash != NULL; hash = hash->fbtp_tracenext)
327 					if (hash->fbtp_enabled > 0)
328 						break;
329 				break;
330 			}
331 		}
332 		if (hash == NULL)
333 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
334 	}
335 }
336 
337 static void
338 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
339 {
340 	fbt_probe_t *fbt = parg;
341 	modctl_t *ctl = fbt->fbtp_ctl;
342 
343 	ASSERT(ctl->nenabled > 0);
344 
345 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
346 		return;
347 
348 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
349 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
350 }
351 
352 static void
353 fbt_resume(void *arg, dtrace_id_t id, void *parg)
354 {
355 	fbt_probe_t *fbt = parg;
356 	modctl_t *ctl = fbt->fbtp_ctl;
357 
358 	ASSERT(ctl->nenabled > 0);
359 
360 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
361 		return;
362 
363 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
364 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
365 }
366 
367 static int
368 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
369 {
370 	const Elf_Sym *symp = lc->symtab;
371 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
372 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
373 	int i;
374 	uint32_t *ctfoff;
375 	uint32_t objtoff = hp->cth_objtoff;
376 	uint32_t funcoff = hp->cth_funcoff;
377 	ushort_t info;
378 	ushort_t vlen;
379 
380 	/* Sanity check. */
381 	if (hp->cth_magic != CTF_MAGIC) {
382 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
383 		return (EINVAL);
384 	}
385 
386 	if (lc->symtab == NULL) {
387 		printf("No symbol table in '%s'\n",lf->pathname);
388 		return (EINVAL);
389 	}
390 
391 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
392 	*lc->ctfoffp = ctfoff;
393 
394 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
395 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
396 			*ctfoff = 0xffffffff;
397 			continue;
398 		}
399 
400 		switch (ELF_ST_TYPE(symp->st_info)) {
401 		case STT_OBJECT:
402 			if (objtoff >= hp->cth_funcoff ||
403                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
404 				*ctfoff = 0xffffffff;
405                                 break;
406                         }
407 
408                         *ctfoff = objtoff;
409                         objtoff += sizeof (ushort_t);
410 			break;
411 
412 		case STT_FUNC:
413 			if (funcoff >= hp->cth_typeoff) {
414 				*ctfoff = 0xffffffff;
415 				break;
416 			}
417 
418 			*ctfoff = funcoff;
419 
420 			info = *((const ushort_t *)(ctfdata + funcoff));
421 			vlen = CTF_INFO_VLEN(info);
422 
423 			/*
424 			 * If we encounter a zero pad at the end, just skip it.
425 			 * Otherwise skip over the function and its return type
426 			 * (+2) and the argument list (vlen).
427 			 */
428 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
429 				funcoff += sizeof (ushort_t); /* skip pad */
430 			else
431 				funcoff += sizeof (ushort_t) * (vlen + 2);
432 			break;
433 
434 		default:
435 			*ctfoff = 0xffffffff;
436 			break;
437 		}
438 	}
439 
440 	return (0);
441 }
442 
443 static ssize_t
444 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
445     ssize_t *incrementp)
446 {
447 	ssize_t size, increment;
448 
449 	if (tp->ctt_size == CTF_LSIZE_SENT) {
450 		size = CTF_TYPE_LSIZE(tp);
451 		increment = sizeof (ctf_type_t);
452 	} else {
453 		size = tp->ctt_size;
454 		increment = sizeof (ctf_stype_t);
455 	}
456 
457 	if (sizep)
458 		*sizep = size;
459 	if (incrementp)
460 		*incrementp = increment;
461 
462 	return (size);
463 }
464 
465 static int
466 fbt_typoff_init(linker_ctf_t *lc)
467 {
468 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
469 	const ctf_type_t *tbuf;
470 	const ctf_type_t *tend;
471 	const ctf_type_t *tp;
472 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
473 	int ctf_typemax = 0;
474 	uint32_t *xp;
475 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
476 	uint8_t version;
477 
478 
479 	/* Sanity check. */
480 	if (hp->cth_magic != CTF_MAGIC)
481 		return (EINVAL);
482 
483 	version = hp->cth_version;
484 
485 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
486 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
487 
488 	/*
489 	 * We make two passes through the entire type section.  In this first
490 	 * pass, we count the number of each type and the total number of types.
491 	 */
492 	for (tp = tbuf; tp < tend; ctf_typemax++) {
493 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
494 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
495 		ssize_t size, increment;
496 
497 		size_t vbytes;
498 
499 		(void) fbt_get_ctt_size(version, tp, &size, &increment);
500 
501 		switch (kind) {
502 		case CTF_K_INTEGER:
503 		case CTF_K_FLOAT:
504 			vbytes = sizeof (uint_t);
505 			break;
506 		case CTF_K_ARRAY:
507 			vbytes = sizeof (ctf_array_t);
508 			break;
509 		case CTF_K_FUNCTION:
510 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
511 			break;
512 		case CTF_K_STRUCT:
513 		case CTF_K_UNION:
514 			if (size < CTF_LSTRUCT_THRESH)
515 				vbytes = sizeof (ctf_member_t) * vlen;
516 			else
517 				vbytes = sizeof (ctf_lmember_t) * vlen;
518 			break;
519 		case CTF_K_ENUM:
520 			vbytes = sizeof (ctf_enum_t) * vlen;
521 			break;
522 		case CTF_K_FORWARD:
523 			/*
524 			 * For forward declarations, ctt_type is the CTF_K_*
525 			 * kind for the tag, so bump that population count too.
526 			 * If ctt_type is unknown, treat the tag as a struct.
527 			 */
528 			if (tp->ctt_type == CTF_K_UNKNOWN ||
529 			    tp->ctt_type >= CTF_K_MAX)
530 				pop[CTF_K_STRUCT]++;
531 			else
532 				pop[tp->ctt_type]++;
533 			/*FALLTHRU*/
534 		case CTF_K_UNKNOWN:
535 			vbytes = 0;
536 			break;
537 		case CTF_K_POINTER:
538 		case CTF_K_TYPEDEF:
539 		case CTF_K_VOLATILE:
540 		case CTF_K_CONST:
541 		case CTF_K_RESTRICT:
542 			vbytes = 0;
543 			break;
544 		default:
545 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
546 			return (EIO);
547 		}
548 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
549 		pop[kind]++;
550 	}
551 
552 	/* account for a sentinel value below */
553 	ctf_typemax++;
554 	*lc->typlenp = ctf_typemax;
555 
556 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
557 	    M_ZERO | M_WAITOK);
558 
559 	*lc->typoffp = xp;
560 
561 	/* type id 0 is used as a sentinel value */
562 	*xp++ = 0;
563 
564 	/*
565 	 * In the second pass, fill in the type offset.
566 	 */
567 	for (tp = tbuf; tp < tend; xp++) {
568 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
569 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
570 		ssize_t size, increment;
571 
572 		size_t vbytes;
573 
574 		(void) fbt_get_ctt_size(version, tp, &size, &increment);
575 
576 		switch (kind) {
577 		case CTF_K_INTEGER:
578 		case CTF_K_FLOAT:
579 			vbytes = sizeof (uint_t);
580 			break;
581 		case CTF_K_ARRAY:
582 			vbytes = sizeof (ctf_array_t);
583 			break;
584 		case CTF_K_FUNCTION:
585 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
586 			break;
587 		case CTF_K_STRUCT:
588 		case CTF_K_UNION:
589 			if (size < CTF_LSTRUCT_THRESH)
590 				vbytes = sizeof (ctf_member_t) * vlen;
591 			else
592 				vbytes = sizeof (ctf_lmember_t) * vlen;
593 			break;
594 		case CTF_K_ENUM:
595 			vbytes = sizeof (ctf_enum_t) * vlen;
596 			break;
597 		case CTF_K_FORWARD:
598 		case CTF_K_UNKNOWN:
599 			vbytes = 0;
600 			break;
601 		case CTF_K_POINTER:
602 		case CTF_K_TYPEDEF:
603 		case CTF_K_VOLATILE:
604 		case CTF_K_CONST:
605 		case CTF_K_RESTRICT:
606 			vbytes = 0;
607 			break;
608 		default:
609 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
610 			return (EIO);
611 		}
612 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
613 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
614 	}
615 
616 	return (0);
617 }
618 
619 /*
620  * CTF Declaration Stack
621  *
622  * In order to implement ctf_type_name(), we must convert a type graph back
623  * into a C type declaration.  Unfortunately, a type graph represents a storage
624  * class ordering of the type whereas a type declaration must obey the C rules
625  * for operator precedence, and the two orderings are frequently in conflict.
626  * For example, consider these CTF type graphs and their C declarations:
627  *
628  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
629  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
630  *
631  * In each case, parentheses are used to raise operator * to higher lexical
632  * precedence, so the string form of the C declaration cannot be constructed by
633  * walking the type graph links and forming the string from left to right.
634  *
635  * The functions in this file build a set of stacks from the type graph nodes
636  * corresponding to the C operator precedence levels in the appropriate order.
637  * The code in ctf_type_name() can then iterate over the levels and nodes in
638  * lexical precedence order and construct the final C declaration string.
639  */
640 typedef struct ctf_list {
641 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
642 	struct ctf_list *l_next; /* next pointer or head pointer */
643 } ctf_list_t;
644 
645 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
646 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
647 
648 typedef enum {
649 	CTF_PREC_BASE,
650 	CTF_PREC_POINTER,
651 	CTF_PREC_ARRAY,
652 	CTF_PREC_FUNCTION,
653 	CTF_PREC_MAX
654 } ctf_decl_prec_t;
655 
656 typedef struct ctf_decl_node {
657 	ctf_list_t cd_list;			/* linked list pointers */
658 	ctf_id_t cd_type;			/* type identifier */
659 	uint_t cd_kind;				/* type kind */
660 	uint_t cd_n;				/* type dimension if array */
661 } ctf_decl_node_t;
662 
663 typedef struct ctf_decl {
664 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
665 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
666 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
667 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
668 	char *cd_buf;				/* buffer for output */
669 	char *cd_ptr;				/* buffer location */
670 	char *cd_end;				/* buffer limit */
671 	size_t cd_len;				/* buffer space required */
672 	int cd_err;				/* saved error value */
673 } ctf_decl_t;
674 
675 /*
676  * Simple doubly-linked list append routine.  This implementation assumes that
677  * each list element contains an embedded ctf_list_t as the first member.
678  * An additional ctf_list_t is used to store the head (l_next) and tail
679  * (l_prev) pointers.  The current head and tail list elements have their
680  * previous and next pointers set to NULL, respectively.
681  */
682 static void
683 ctf_list_append(ctf_list_t *lp, void *new)
684 {
685 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
686 	ctf_list_t *q = new;		/* q = new list element */
687 
688 	lp->l_prev = q;
689 	q->l_prev = p;
690 	q->l_next = NULL;
691 
692 	if (p != NULL)
693 		p->l_next = q;
694 	else
695 		lp->l_next = q;
696 }
697 
698 /*
699  * Prepend the specified existing element to the given ctf_list_t.  The
700  * existing pointer should be pointing at a struct with embedded ctf_list_t.
701  */
702 static void
703 ctf_list_prepend(ctf_list_t *lp, void *new)
704 {
705 	ctf_list_t *p = new;		/* p = new list element */
706 	ctf_list_t *q = lp->l_next;	/* q = head list element */
707 
708 	lp->l_next = p;
709 	p->l_prev = NULL;
710 	p->l_next = q;
711 
712 	if (q != NULL)
713 		q->l_prev = p;
714 	else
715 		lp->l_prev = p;
716 }
717 
718 static void
719 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
720 {
721 	int i;
722 
723 	bzero(cd, sizeof (ctf_decl_t));
724 
725 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
726 		cd->cd_order[i] = CTF_PREC_BASE - 1;
727 
728 	cd->cd_qualp = CTF_PREC_BASE;
729 	cd->cd_ordp = CTF_PREC_BASE;
730 
731 	cd->cd_buf = buf;
732 	cd->cd_ptr = buf;
733 	cd->cd_end = buf + len;
734 }
735 
736 static void
737 ctf_decl_fini(ctf_decl_t *cd)
738 {
739 	ctf_decl_node_t *cdp, *ndp;
740 	int i;
741 
742 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
743 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
744 		    cdp != NULL; cdp = ndp) {
745 			ndp = ctf_list_next(cdp);
746 			free(cdp, M_FBT);
747 		}
748 	}
749 }
750 
751 static const ctf_type_t *
752 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
753 {
754 	const ctf_type_t *tp;
755 	uint32_t offset;
756 	uint32_t *typoff = *lc->typoffp;
757 
758 	if (type >= *lc->typlenp) {
759 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
760 		return(NULL);
761 	}
762 
763 	/* Check if the type isn't cross-referenced. */
764 	if ((offset = typoff[type]) == 0) {
765 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
766 		return(NULL);
767 	}
768 
769 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
770 
771 	return (tp);
772 }
773 
774 static void
775 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
776 {
777 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
778 	const ctf_type_t *tp;
779 	const ctf_array_t *ap;
780 	ssize_t increment;
781 
782 	bzero(arp, sizeof(*arp));
783 
784 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
785 		return;
786 
787 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
788 		return;
789 
790 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
791 
792 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
793 	arp->ctr_contents = ap->cta_contents;
794 	arp->ctr_index = ap->cta_index;
795 	arp->ctr_nelems = ap->cta_nelems;
796 }
797 
798 static const char *
799 ctf_strptr(linker_ctf_t *lc, int name)
800 {
801 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
802 	const char *strp = "";
803 
804 	if (name < 0 || name >= hp->cth_strlen)
805 		return(strp);
806 
807 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
808 
809 	return (strp);
810 }
811 
812 static void
813 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
814 {
815 	ctf_decl_node_t *cdp;
816 	ctf_decl_prec_t prec;
817 	uint_t kind, n = 1;
818 	int is_qual = 0;
819 
820 	const ctf_type_t *tp;
821 	ctf_arinfo_t ar;
822 
823 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
824 		cd->cd_err = ENOENT;
825 		return;
826 	}
827 
828 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
829 	case CTF_K_ARRAY:
830 		fbt_array_info(lc, type, &ar);
831 		ctf_decl_push(cd, lc, ar.ctr_contents);
832 		n = ar.ctr_nelems;
833 		prec = CTF_PREC_ARRAY;
834 		break;
835 
836 	case CTF_K_TYPEDEF:
837 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
838 			ctf_decl_push(cd, lc, tp->ctt_type);
839 			return;
840 		}
841 		prec = CTF_PREC_BASE;
842 		break;
843 
844 	case CTF_K_FUNCTION:
845 		ctf_decl_push(cd, lc, tp->ctt_type);
846 		prec = CTF_PREC_FUNCTION;
847 		break;
848 
849 	case CTF_K_POINTER:
850 		ctf_decl_push(cd, lc, tp->ctt_type);
851 		prec = CTF_PREC_POINTER;
852 		break;
853 
854 	case CTF_K_VOLATILE:
855 	case CTF_K_CONST:
856 	case CTF_K_RESTRICT:
857 		ctf_decl_push(cd, lc, tp->ctt_type);
858 		prec = cd->cd_qualp;
859 		is_qual++;
860 		break;
861 
862 	default:
863 		prec = CTF_PREC_BASE;
864 	}
865 
866 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
867 	cdp->cd_type = type;
868 	cdp->cd_kind = kind;
869 	cdp->cd_n = n;
870 
871 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
872 		cd->cd_order[prec] = cd->cd_ordp++;
873 
874 	/*
875 	 * Reset cd_qualp to the highest precedence level that we've seen so
876 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
877 	 */
878 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
879 		cd->cd_qualp = prec;
880 
881 	/*
882 	 * C array declarators are ordered inside out so prepend them.  Also by
883 	 * convention qualifiers of base types precede the type specifier (e.g.
884 	 * const int vs. int const) even though the two forms are equivalent.
885 	 */
886 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
887 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
888 	else
889 		ctf_list_append(&cd->cd_nodes[prec], cdp);
890 }
891 
892 static void
893 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
894 {
895 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
896 	va_list ap;
897 	size_t n;
898 
899 	va_start(ap, format);
900 	n = vsnprintf(cd->cd_ptr, len, format, ap);
901 	va_end(ap);
902 
903 	cd->cd_ptr += MIN(n, len);
904 	cd->cd_len += n;
905 }
906 
907 static ssize_t
908 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
909 {
910 	ctf_decl_t cd;
911 	ctf_decl_node_t *cdp;
912 	ctf_decl_prec_t prec, lp, rp;
913 	int ptr, arr;
914 	uint_t k;
915 
916 	if (lc == NULL && type == CTF_ERR)
917 		return (-1); /* simplify caller code by permitting CTF_ERR */
918 
919 	ctf_decl_init(&cd, buf, len);
920 	ctf_decl_push(&cd, lc, type);
921 
922 	if (cd.cd_err != 0) {
923 		ctf_decl_fini(&cd);
924 		return (-1);
925 	}
926 
927 	/*
928 	 * If the type graph's order conflicts with lexical precedence order
929 	 * for pointers or arrays, then we need to surround the declarations at
930 	 * the corresponding lexical precedence with parentheses.  This can
931 	 * result in either a parenthesized pointer (*) as in int (*)() or
932 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
933 	 */
934 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
935 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
936 
937 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
938 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
939 
940 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
941 
942 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
943 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
944 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
945 
946 			const ctf_type_t *tp =
947 			    ctf_lookup_by_id(lc, cdp->cd_type);
948 			const char *name = ctf_strptr(lc, tp->ctt_name);
949 
950 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
951 				ctf_decl_sprintf(&cd, " ");
952 
953 			if (lp == prec) {
954 				ctf_decl_sprintf(&cd, "(");
955 				lp = -1;
956 			}
957 
958 			switch (cdp->cd_kind) {
959 			case CTF_K_INTEGER:
960 			case CTF_K_FLOAT:
961 			case CTF_K_TYPEDEF:
962 				ctf_decl_sprintf(&cd, "%s", name);
963 				break;
964 			case CTF_K_POINTER:
965 				ctf_decl_sprintf(&cd, "*");
966 				break;
967 			case CTF_K_ARRAY:
968 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
969 				break;
970 			case CTF_K_FUNCTION:
971 				ctf_decl_sprintf(&cd, "()");
972 				break;
973 			case CTF_K_STRUCT:
974 			case CTF_K_FORWARD:
975 				ctf_decl_sprintf(&cd, "struct %s", name);
976 				break;
977 			case CTF_K_UNION:
978 				ctf_decl_sprintf(&cd, "union %s", name);
979 				break;
980 			case CTF_K_ENUM:
981 				ctf_decl_sprintf(&cd, "enum %s", name);
982 				break;
983 			case CTF_K_VOLATILE:
984 				ctf_decl_sprintf(&cd, "volatile");
985 				break;
986 			case CTF_K_CONST:
987 				ctf_decl_sprintf(&cd, "const");
988 				break;
989 			case CTF_K_RESTRICT:
990 				ctf_decl_sprintf(&cd, "restrict");
991 				break;
992 			}
993 
994 			k = cdp->cd_kind;
995 		}
996 
997 		if (rp == prec)
998 			ctf_decl_sprintf(&cd, ")");
999 	}
1000 
1001 	ctf_decl_fini(&cd);
1002 	return (cd.cd_len);
1003 }
1004 
1005 static void
1006 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1007 {
1008 	const ushort_t *dp;
1009 	fbt_probe_t *fbt = parg;
1010 	linker_ctf_t lc;
1011 	modctl_t *ctl = fbt->fbtp_ctl;
1012 	int ndx = desc->dtargd_ndx;
1013 	int symindx = fbt->fbtp_symindx;
1014 	uint32_t *ctfoff;
1015 	uint32_t offset;
1016 	ushort_t info, kind, n;
1017 
1018 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1019 		(void) strcpy(desc->dtargd_native, "int");
1020 		return;
1021 	}
1022 
1023 	desc->dtargd_ndx = DTRACE_ARGNONE;
1024 
1025 	/* Get a pointer to the CTF data and it's length. */
1026 	if (linker_ctf_get(ctl, &lc) != 0)
1027 		/* No CTF data? Something wrong? *shrug* */
1028 		return;
1029 
1030 	/* Check if this module hasn't been initialised yet. */
1031 	if (*lc.ctfoffp == NULL) {
1032 		/*
1033 		 * Initialise the CTF object and function symindx to
1034 		 * byte offset array.
1035 		 */
1036 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1037 			return;
1038 
1039 		/* Initialise the CTF type to byte offset array. */
1040 		if (fbt_typoff_init(&lc) != 0)
1041 			return;
1042 	}
1043 
1044 	ctfoff = *lc.ctfoffp;
1045 
1046 	if (ctfoff == NULL || *lc.typoffp == NULL)
1047 		return;
1048 
1049 	/* Check if the symbol index is out of range. */
1050 	if (symindx >= lc.nsym)
1051 		return;
1052 
1053 	/* Check if the symbol isn't cross-referenced. */
1054 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1055 		return;
1056 
1057 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1058 
1059 	info = *dp++;
1060 	kind = CTF_INFO_KIND(info);
1061 	n = CTF_INFO_VLEN(info);
1062 
1063 	if (kind == CTF_K_UNKNOWN && n == 0) {
1064 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1065 		return;
1066 	}
1067 
1068 	if (kind != CTF_K_FUNCTION) {
1069 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1070 		return;
1071 	}
1072 
1073 	if (fbt->fbtp_roffset != 0) {
1074 		/* Only return type is available for args[1] in return probe. */
1075 		if (ndx > 1)
1076 			return;
1077 		ASSERT(ndx == 1);
1078 	} else {
1079 		/* Check if the requested argument doesn't exist. */
1080 		if (ndx >= n)
1081 			return;
1082 
1083 		/* Skip the return type and arguments up to the one requested. */
1084 		dp += ndx + 1;
1085 	}
1086 
1087 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1088 		desc->dtargd_ndx = ndx;
1089 
1090 	return;
1091 }
1092 
1093 static int
1094 fbt_linker_file_cb(linker_file_t lf, void *arg)
1095 {
1096 
1097 	fbt_provide_module(arg, lf);
1098 
1099 	return (0);
1100 }
1101 
1102 static void
1103 fbt_load(void *dummy)
1104 {
1105 	/* Create the /dev/dtrace/fbt entry. */
1106 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1107 	    "dtrace/fbt");
1108 
1109 	/* Default the probe table size if not specified. */
1110 	if (fbt_probetab_size == 0)
1111 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1112 
1113 	/* Choose the hash mask for the probe table. */
1114 	fbt_probetab_mask = fbt_probetab_size - 1;
1115 
1116 	/* Allocate memory for the probe table. */
1117 	fbt_probetab =
1118 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1119 
1120 	dtrace_doubletrap_func = fbt_doubletrap;
1121 	dtrace_invop_add(fbt_invop);
1122 
1123 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1124 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1125 		return;
1126 
1127 	/* Create probes for the kernel and already-loaded modules. */
1128 	linker_file_foreach(fbt_linker_file_cb, NULL);
1129 }
1130 
1131 static int
1132 fbt_unload()
1133 {
1134 	int error = 0;
1135 
1136 	/* De-register the invalid opcode handler. */
1137 	dtrace_invop_remove(fbt_invop);
1138 
1139 	dtrace_doubletrap_func = NULL;
1140 
1141 	/* De-register this DTrace provider. */
1142 	if ((error = dtrace_unregister(fbt_id)) != 0)
1143 		return (error);
1144 
1145 	/* Free the probe table. */
1146 	free(fbt_probetab, M_FBT);
1147 	fbt_probetab = NULL;
1148 	fbt_probetab_mask = 0;
1149 
1150 	destroy_dev(fbt_cdev);
1151 
1152 	return (error);
1153 }
1154 
1155 static int
1156 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1157 {
1158 	int error = 0;
1159 
1160 	switch (type) {
1161 	case MOD_LOAD:
1162 		break;
1163 
1164 	case MOD_UNLOAD:
1165 		break;
1166 
1167 	case MOD_SHUTDOWN:
1168 		break;
1169 
1170 	default:
1171 		error = EOPNOTSUPP;
1172 		break;
1173 
1174 	}
1175 
1176 	return (error);
1177 }
1178 
1179 static int
1180 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1181 {
1182 	return (0);
1183 }
1184 
1185 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1186 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1187 
1188 DEV_MODULE(fbt, fbt_modevent, NULL);
1189 MODULE_VERSION(fbt, 1);
1190 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1191 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1192