xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/endian.h>
38 #include <sys/fcntl.h>
39 #include <sys/filio.h>
40 #include <sys/kdb.h>
41 #include <sys/kernel.h>
42 #include <sys/kmem.h>
43 #include <sys/kthread.h>
44 #include <sys/limits.h>
45 #include <sys/linker.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/pcpu.h>
51 #include <sys/poll.h>
52 #include <sys/proc.h>
53 #include <sys/selinfo.h>
54 #include <sys/smp.h>
55 #include <sys/syscall.h>
56 #include <sys/sysent.h>
57 #include <sys/sysproto.h>
58 #include <sys/uio.h>
59 #include <sys/unistd.h>
60 #include <machine/stdarg.h>
61 
62 #include <sys/dtrace.h>
63 #include <sys/dtrace_bsd.h>
64 
65 #include "fbt.h"
66 
67 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
68 
69 dtrace_provider_id_t	fbt_id;
70 fbt_probe_t		**fbt_probetab;
71 int			fbt_probetab_mask;
72 
73 static d_open_t	fbt_open;
74 static int	fbt_unload(void);
75 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
76 static void	fbt_provide_module(void *, modctl_t *);
77 static void	fbt_destroy(void *, dtrace_id_t, void *);
78 static void	fbt_enable(void *, dtrace_id_t, void *);
79 static void	fbt_disable(void *, dtrace_id_t, void *);
80 static void	fbt_load(void *);
81 static void	fbt_suspend(void *, dtrace_id_t, void *);
82 static void	fbt_resume(void *, dtrace_id_t, void *);
83 
84 static struct cdevsw fbt_cdevsw = {
85 	.d_version	= D_VERSION,
86 	.d_open		= fbt_open,
87 	.d_name		= "fbt",
88 };
89 
90 static dtrace_pattr_t fbt_attr = {
91 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
93 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
94 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
95 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
96 };
97 
98 static dtrace_pops_t fbt_pops = {
99 	.dtps_provide =		NULL,
100 	.dtps_provide_module =	fbt_provide_module,
101 	.dtps_enable =		fbt_enable,
102 	.dtps_disable =		fbt_disable,
103 	.dtps_suspend =		fbt_suspend,
104 	.dtps_resume =		fbt_resume,
105 	.dtps_getargdesc =	fbt_getargdesc,
106 	.dtps_getargval =	NULL,
107 	.dtps_usermode =	NULL,
108 	.dtps_destroy =		fbt_destroy
109 };
110 
111 static struct cdev		*fbt_cdev;
112 static int			fbt_probetab_size;
113 static int			fbt_verbose = 0;
114 
115 int
116 fbt_excluded(const char *name)
117 {
118 
119 	if (strncmp(name, "dtrace_", 7) == 0 &&
120 	    strncmp(name, "dtrace_safe_", 12) != 0) {
121 		/*
122 		 * Anything beginning with "dtrace_" may be called
123 		 * from probe context unless it explicitly indicates
124 		 * that it won't be called from probe context by
125 		 * using the prefix "dtrace_safe_".
126 		 */
127 		return (1);
128 	}
129 
130 	/*
131 	 * Lock owner methods may be called from probe context.
132 	 */
133 	if (strcmp(name, "owner_mtx") == 0 ||
134 	    strcmp(name, "owner_rm") == 0 ||
135 	    strcmp(name, "owner_rw") == 0 ||
136 	    strcmp(name, "owner_sx") == 0)
137 		return (1);
138 
139 	/*
140 	 * When DTrace is built into the kernel we need to exclude
141 	 * the FBT functions from instrumentation.
142 	 */
143 #ifndef _KLD_MODULE
144 	if (strncmp(name, "fbt_", 4) == 0)
145 		return (1);
146 #endif
147 
148 	return (0);
149 }
150 
151 static void
152 fbt_doubletrap(void)
153 {
154 	fbt_probe_t *fbt;
155 	int i;
156 
157 	for (i = 0; i < fbt_probetab_size; i++) {
158 		fbt = fbt_probetab[i];
159 
160 		for (; fbt != NULL; fbt = fbt->fbtp_probenext)
161 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
162 	}
163 }
164 
165 static void
166 fbt_provide_module(void *arg, modctl_t *lf)
167 {
168 	char modname[MAXPATHLEN];
169 	int i;
170 	size_t len;
171 
172 	strlcpy(modname, lf->filename, sizeof(modname));
173 	len = strlen(modname);
174 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
175 		modname[len - 3] = '\0';
176 
177 	/*
178 	 * Employees of dtrace and their families are ineligible.  Void
179 	 * where prohibited.
180 	 */
181 	if (strcmp(modname, "dtrace") == 0)
182 		return;
183 
184 	/*
185 	 * To register with DTrace, a module must list 'dtrace' as a
186 	 * dependency in order for the kernel linker to resolve
187 	 * symbols like dtrace_register(). All modules with such a
188 	 * dependency are ineligible for FBT tracing.
189 	 */
190 	for (i = 0; i < lf->ndeps; i++)
191 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
192 			return;
193 
194 	if (lf->fbt_nentries) {
195 		/*
196 		 * This module has some FBT entries allocated; we're afraid
197 		 * to screw with it.
198 		 */
199 		return;
200 	}
201 
202 	/*
203 	 * List the functions in the module and the symbol values.
204 	 */
205 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
206 }
207 
208 static void
209 fbt_destroy_one(fbt_probe_t *fbt)
210 {
211 	fbt_probe_t *hash, *hashprev, *next;
212 	int ndx;
213 
214 	ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
215 	for (hash = fbt_probetab[ndx], hashprev = NULL; hash != NULL;
216 	    hashprev = hash, hash = hash->fbtp_hashnext) {
217 		if (hash == fbt) {
218 			if ((next = fbt->fbtp_tracenext) != NULL)
219 				next->fbtp_hashnext = hash->fbtp_hashnext;
220 			else
221 				next = hash->fbtp_hashnext;
222 			if (hashprev != NULL)
223 				hashprev->fbtp_hashnext = next;
224 			else
225 				fbt_probetab[ndx] = next;
226 			goto free;
227 		} else if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
228 			for (next = hash; next->fbtp_tracenext != NULL;
229 			    next = next->fbtp_tracenext) {
230 				if (fbt == next->fbtp_tracenext) {
231 					next->fbtp_tracenext =
232 					    fbt->fbtp_tracenext;
233 					goto free;
234 				}
235 			}
236 		}
237 	}
238 	panic("probe %p not found in hash table", fbt);
239 free:
240 	free(fbt, M_FBT);
241 }
242 
243 static void
244 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
245 {
246 	fbt_probe_t *fbt = parg, *next;
247 	modctl_t *ctl;
248 
249 	do {
250 		ctl = fbt->fbtp_ctl;
251 		ctl->fbt_nentries--;
252 
253 		next = fbt->fbtp_probenext;
254 		fbt_destroy_one(fbt);
255 		fbt = next;
256 	} while (fbt != NULL);
257 }
258 
259 static void
260 fbt_enable(void *arg, dtrace_id_t id, void *parg)
261 {
262 	fbt_probe_t *fbt = parg;
263 	modctl_t *ctl = fbt->fbtp_ctl;
264 
265 	ctl->nenabled++;
266 
267 	/*
268 	 * Now check that our modctl has the expected load count.  If it
269 	 * doesn't, this module must have been unloaded and reloaded -- and
270 	 * we're not going to touch it.
271 	 */
272 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
273 		if (fbt_verbose) {
274 			printf("fbt is failing for probe %s "
275 			    "(module %s reloaded)",
276 			    fbt->fbtp_name, ctl->filename);
277 		}
278 
279 		return;
280 	}
281 
282 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
283 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
284 		fbt->fbtp_enabled++;
285 	}
286 }
287 
288 static void
289 fbt_disable(void *arg, dtrace_id_t id, void *parg)
290 {
291 	fbt_probe_t *fbt = parg, *hash;
292 	modctl_t *ctl = fbt->fbtp_ctl;
293 
294 	ASSERT(ctl->nenabled > 0);
295 	ctl->nenabled--;
296 
297 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
298 		return;
299 
300 	for (; fbt != NULL; fbt = fbt->fbtp_probenext) {
301 		fbt->fbtp_enabled--;
302 
303 		for (hash = fbt_probetab[FBT_ADDR2NDX(fbt->fbtp_patchpoint)];
304 		    hash != NULL; hash = hash->fbtp_hashnext) {
305 			if (hash->fbtp_patchpoint == fbt->fbtp_patchpoint) {
306 				for (; hash != NULL; hash = hash->fbtp_tracenext)
307 					if (hash->fbtp_enabled > 0)
308 						break;
309 				break;
310 			}
311 		}
312 		if (hash == NULL)
313 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
314 	}
315 }
316 
317 static void
318 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
319 {
320 	fbt_probe_t *fbt = parg;
321 	modctl_t *ctl = fbt->fbtp_ctl;
322 
323 	ASSERT(ctl->nenabled > 0);
324 
325 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
326 		return;
327 
328 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
329 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
330 }
331 
332 static void
333 fbt_resume(void *arg, dtrace_id_t id, void *parg)
334 {
335 	fbt_probe_t *fbt = parg;
336 	modctl_t *ctl = fbt->fbtp_ctl;
337 
338 	ASSERT(ctl->nenabled > 0);
339 
340 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
341 		return;
342 
343 	for (; fbt != NULL; fbt = fbt->fbtp_probenext)
344 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
345 }
346 
347 static int
348 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
349 {
350 	const Elf_Sym *symp = lc->symtab;;
351 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
352 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
353 	int i;
354 	uint32_t *ctfoff;
355 	uint32_t objtoff = hp->cth_objtoff;
356 	uint32_t funcoff = hp->cth_funcoff;
357 	ushort_t info;
358 	ushort_t vlen;
359 
360 	/* Sanity check. */
361 	if (hp->cth_magic != CTF_MAGIC) {
362 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
363 		return (EINVAL);
364 	}
365 
366 	if (lc->symtab == NULL) {
367 		printf("No symbol table in '%s'\n",lf->pathname);
368 		return (EINVAL);
369 	}
370 
371 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
372 	*lc->ctfoffp = ctfoff;
373 
374 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
375 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
376 			*ctfoff = 0xffffffff;
377 			continue;
378 		}
379 
380 		switch (ELF_ST_TYPE(symp->st_info)) {
381 		case STT_OBJECT:
382 			if (objtoff >= hp->cth_funcoff ||
383                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
384 				*ctfoff = 0xffffffff;
385                                 break;
386                         }
387 
388                         *ctfoff = objtoff;
389                         objtoff += sizeof (ushort_t);
390 			break;
391 
392 		case STT_FUNC:
393 			if (funcoff >= hp->cth_typeoff) {
394 				*ctfoff = 0xffffffff;
395 				break;
396 			}
397 
398 			*ctfoff = funcoff;
399 
400 			info = *((const ushort_t *)(ctfdata + funcoff));
401 			vlen = CTF_INFO_VLEN(info);
402 
403 			/*
404 			 * If we encounter a zero pad at the end, just skip it.
405 			 * Otherwise skip over the function and its return type
406 			 * (+2) and the argument list (vlen).
407 			 */
408 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
409 				funcoff += sizeof (ushort_t); /* skip pad */
410 			else
411 				funcoff += sizeof (ushort_t) * (vlen + 2);
412 			break;
413 
414 		default:
415 			*ctfoff = 0xffffffff;
416 			break;
417 		}
418 	}
419 
420 	return (0);
421 }
422 
423 static ssize_t
424 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
425     ssize_t *incrementp)
426 {
427 	ssize_t size, increment;
428 
429 	if (version > CTF_VERSION_1 &&
430 	    tp->ctt_size == CTF_LSIZE_SENT) {
431 		size = CTF_TYPE_LSIZE(tp);
432 		increment = sizeof (ctf_type_t);
433 	} else {
434 		size = tp->ctt_size;
435 		increment = sizeof (ctf_stype_t);
436 	}
437 
438 	if (sizep)
439 		*sizep = size;
440 	if (incrementp)
441 		*incrementp = increment;
442 
443 	return (size);
444 }
445 
446 static int
447 fbt_typoff_init(linker_ctf_t *lc)
448 {
449 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
450 	const ctf_type_t *tbuf;
451 	const ctf_type_t *tend;
452 	const ctf_type_t *tp;
453 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
454 	int ctf_typemax = 0;
455 	uint32_t *xp;
456 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
457 
458 
459 	/* Sanity check. */
460 	if (hp->cth_magic != CTF_MAGIC)
461 		return (EINVAL);
462 
463 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
464 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
465 
466 	int child = hp->cth_parname != 0;
467 
468 	/*
469 	 * We make two passes through the entire type section.  In this first
470 	 * pass, we count the number of each type and the total number of types.
471 	 */
472 	for (tp = tbuf; tp < tend; ctf_typemax++) {
473 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
474 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
475 		ssize_t size, increment;
476 
477 		size_t vbytes;
478 		uint_t n;
479 
480 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
481 
482 		switch (kind) {
483 		case CTF_K_INTEGER:
484 		case CTF_K_FLOAT:
485 			vbytes = sizeof (uint_t);
486 			break;
487 		case CTF_K_ARRAY:
488 			vbytes = sizeof (ctf_array_t);
489 			break;
490 		case CTF_K_FUNCTION:
491 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
492 			break;
493 		case CTF_K_STRUCT:
494 		case CTF_K_UNION:
495 			if (size < CTF_LSTRUCT_THRESH) {
496 				ctf_member_t *mp = (ctf_member_t *)
497 				    ((uintptr_t)tp + increment);
498 
499 				vbytes = sizeof (ctf_member_t) * vlen;
500 				for (n = vlen; n != 0; n--, mp++)
501 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
502 			} else {
503 				ctf_lmember_t *lmp = (ctf_lmember_t *)
504 				    ((uintptr_t)tp + increment);
505 
506 				vbytes = sizeof (ctf_lmember_t) * vlen;
507 				for (n = vlen; n != 0; n--, lmp++)
508 					child |=
509 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
510 			}
511 			break;
512 		case CTF_K_ENUM:
513 			vbytes = sizeof (ctf_enum_t) * vlen;
514 			break;
515 		case CTF_K_FORWARD:
516 			/*
517 			 * For forward declarations, ctt_type is the CTF_K_*
518 			 * kind for the tag, so bump that population count too.
519 			 * If ctt_type is unknown, treat the tag as a struct.
520 			 */
521 			if (tp->ctt_type == CTF_K_UNKNOWN ||
522 			    tp->ctt_type >= CTF_K_MAX)
523 				pop[CTF_K_STRUCT]++;
524 			else
525 				pop[tp->ctt_type]++;
526 			/*FALLTHRU*/
527 		case CTF_K_UNKNOWN:
528 			vbytes = 0;
529 			break;
530 		case CTF_K_POINTER:
531 		case CTF_K_TYPEDEF:
532 		case CTF_K_VOLATILE:
533 		case CTF_K_CONST:
534 		case CTF_K_RESTRICT:
535 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
536 			vbytes = 0;
537 			break;
538 		default:
539 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
540 			return (EIO);
541 		}
542 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
543 		pop[kind]++;
544 	}
545 
546 	/* account for a sentinel value below */
547 	ctf_typemax++;
548 	*lc->typlenp = ctf_typemax;
549 
550 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
551 	    M_ZERO | M_WAITOK);
552 
553 	*lc->typoffp = xp;
554 
555 	/* type id 0 is used as a sentinel value */
556 	*xp++ = 0;
557 
558 	/*
559 	 * In the second pass, fill in the type offset.
560 	 */
561 	for (tp = tbuf; tp < tend; xp++) {
562 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
563 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
564 		ssize_t size, increment;
565 
566 		size_t vbytes;
567 		uint_t n;
568 
569 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
570 
571 		switch (kind) {
572 		case CTF_K_INTEGER:
573 		case CTF_K_FLOAT:
574 			vbytes = sizeof (uint_t);
575 			break;
576 		case CTF_K_ARRAY:
577 			vbytes = sizeof (ctf_array_t);
578 			break;
579 		case CTF_K_FUNCTION:
580 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
581 			break;
582 		case CTF_K_STRUCT:
583 		case CTF_K_UNION:
584 			if (size < CTF_LSTRUCT_THRESH) {
585 				ctf_member_t *mp = (ctf_member_t *)
586 				    ((uintptr_t)tp + increment);
587 
588 				vbytes = sizeof (ctf_member_t) * vlen;
589 				for (n = vlen; n != 0; n--, mp++)
590 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
591 			} else {
592 				ctf_lmember_t *lmp = (ctf_lmember_t *)
593 				    ((uintptr_t)tp + increment);
594 
595 				vbytes = sizeof (ctf_lmember_t) * vlen;
596 				for (n = vlen; n != 0; n--, lmp++)
597 					child |=
598 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
599 			}
600 			break;
601 		case CTF_K_ENUM:
602 			vbytes = sizeof (ctf_enum_t) * vlen;
603 			break;
604 		case CTF_K_FORWARD:
605 		case CTF_K_UNKNOWN:
606 			vbytes = 0;
607 			break;
608 		case CTF_K_POINTER:
609 		case CTF_K_TYPEDEF:
610 		case CTF_K_VOLATILE:
611 		case CTF_K_CONST:
612 		case CTF_K_RESTRICT:
613 			vbytes = 0;
614 			break;
615 		default:
616 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
617 			return (EIO);
618 		}
619 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
620 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
621 	}
622 
623 	return (0);
624 }
625 
626 /*
627  * CTF Declaration Stack
628  *
629  * In order to implement ctf_type_name(), we must convert a type graph back
630  * into a C type declaration.  Unfortunately, a type graph represents a storage
631  * class ordering of the type whereas a type declaration must obey the C rules
632  * for operator precedence, and the two orderings are frequently in conflict.
633  * For example, consider these CTF type graphs and their C declarations:
634  *
635  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
636  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
637  *
638  * In each case, parentheses are used to raise operator * to higher lexical
639  * precedence, so the string form of the C declaration cannot be constructed by
640  * walking the type graph links and forming the string from left to right.
641  *
642  * The functions in this file build a set of stacks from the type graph nodes
643  * corresponding to the C operator precedence levels in the appropriate order.
644  * The code in ctf_type_name() can then iterate over the levels and nodes in
645  * lexical precedence order and construct the final C declaration string.
646  */
647 typedef struct ctf_list {
648 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
649 	struct ctf_list *l_next; /* next pointer or head pointer */
650 } ctf_list_t;
651 
652 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
653 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
654 
655 typedef enum {
656 	CTF_PREC_BASE,
657 	CTF_PREC_POINTER,
658 	CTF_PREC_ARRAY,
659 	CTF_PREC_FUNCTION,
660 	CTF_PREC_MAX
661 } ctf_decl_prec_t;
662 
663 typedef struct ctf_decl_node {
664 	ctf_list_t cd_list;			/* linked list pointers */
665 	ctf_id_t cd_type;			/* type identifier */
666 	uint_t cd_kind;				/* type kind */
667 	uint_t cd_n;				/* type dimension if array */
668 } ctf_decl_node_t;
669 
670 typedef struct ctf_decl {
671 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
672 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
673 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
674 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
675 	char *cd_buf;				/* buffer for output */
676 	char *cd_ptr;				/* buffer location */
677 	char *cd_end;				/* buffer limit */
678 	size_t cd_len;				/* buffer space required */
679 	int cd_err;				/* saved error value */
680 } ctf_decl_t;
681 
682 /*
683  * Simple doubly-linked list append routine.  This implementation assumes that
684  * each list element contains an embedded ctf_list_t as the first member.
685  * An additional ctf_list_t is used to store the head (l_next) and tail
686  * (l_prev) pointers.  The current head and tail list elements have their
687  * previous and next pointers set to NULL, respectively.
688  */
689 static void
690 ctf_list_append(ctf_list_t *lp, void *new)
691 {
692 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
693 	ctf_list_t *q = new;		/* q = new list element */
694 
695 	lp->l_prev = q;
696 	q->l_prev = p;
697 	q->l_next = NULL;
698 
699 	if (p != NULL)
700 		p->l_next = q;
701 	else
702 		lp->l_next = q;
703 }
704 
705 /*
706  * Prepend the specified existing element to the given ctf_list_t.  The
707  * existing pointer should be pointing at a struct with embedded ctf_list_t.
708  */
709 static void
710 ctf_list_prepend(ctf_list_t *lp, void *new)
711 {
712 	ctf_list_t *p = new;		/* p = new list element */
713 	ctf_list_t *q = lp->l_next;	/* q = head list element */
714 
715 	lp->l_next = p;
716 	p->l_prev = NULL;
717 	p->l_next = q;
718 
719 	if (q != NULL)
720 		q->l_prev = p;
721 	else
722 		lp->l_prev = p;
723 }
724 
725 static void
726 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
727 {
728 	int i;
729 
730 	bzero(cd, sizeof (ctf_decl_t));
731 
732 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
733 		cd->cd_order[i] = CTF_PREC_BASE - 1;
734 
735 	cd->cd_qualp = CTF_PREC_BASE;
736 	cd->cd_ordp = CTF_PREC_BASE;
737 
738 	cd->cd_buf = buf;
739 	cd->cd_ptr = buf;
740 	cd->cd_end = buf + len;
741 }
742 
743 static void
744 ctf_decl_fini(ctf_decl_t *cd)
745 {
746 	ctf_decl_node_t *cdp, *ndp;
747 	int i;
748 
749 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
750 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
751 		    cdp != NULL; cdp = ndp) {
752 			ndp = ctf_list_next(cdp);
753 			free(cdp, M_FBT);
754 		}
755 	}
756 }
757 
758 static const ctf_type_t *
759 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
760 {
761 	const ctf_type_t *tp;
762 	uint32_t offset;
763 	uint32_t *typoff = *lc->typoffp;
764 
765 	if (type >= *lc->typlenp) {
766 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
767 		return(NULL);
768 	}
769 
770 	/* Check if the type isn't cross-referenced. */
771 	if ((offset = typoff[type]) == 0) {
772 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
773 		return(NULL);
774 	}
775 
776 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
777 
778 	return (tp);
779 }
780 
781 static void
782 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
783 {
784 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
785 	const ctf_type_t *tp;
786 	const ctf_array_t *ap;
787 	ssize_t increment;
788 
789 	bzero(arp, sizeof(*arp));
790 
791 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
792 		return;
793 
794 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
795 		return;
796 
797 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
798 
799 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
800 	arp->ctr_contents = ap->cta_contents;
801 	arp->ctr_index = ap->cta_index;
802 	arp->ctr_nelems = ap->cta_nelems;
803 }
804 
805 static const char *
806 ctf_strptr(linker_ctf_t *lc, int name)
807 {
808 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
809 	const char *strp = "";
810 
811 	if (name < 0 || name >= hp->cth_strlen)
812 		return(strp);
813 
814 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
815 
816 	return (strp);
817 }
818 
819 static void
820 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
821 {
822 	ctf_decl_node_t *cdp;
823 	ctf_decl_prec_t prec;
824 	uint_t kind, n = 1;
825 	int is_qual = 0;
826 
827 	const ctf_type_t *tp;
828 	ctf_arinfo_t ar;
829 
830 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
831 		cd->cd_err = ENOENT;
832 		return;
833 	}
834 
835 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
836 	case CTF_K_ARRAY:
837 		fbt_array_info(lc, type, &ar);
838 		ctf_decl_push(cd, lc, ar.ctr_contents);
839 		n = ar.ctr_nelems;
840 		prec = CTF_PREC_ARRAY;
841 		break;
842 
843 	case CTF_K_TYPEDEF:
844 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
845 			ctf_decl_push(cd, lc, tp->ctt_type);
846 			return;
847 		}
848 		prec = CTF_PREC_BASE;
849 		break;
850 
851 	case CTF_K_FUNCTION:
852 		ctf_decl_push(cd, lc, tp->ctt_type);
853 		prec = CTF_PREC_FUNCTION;
854 		break;
855 
856 	case CTF_K_POINTER:
857 		ctf_decl_push(cd, lc, tp->ctt_type);
858 		prec = CTF_PREC_POINTER;
859 		break;
860 
861 	case CTF_K_VOLATILE:
862 	case CTF_K_CONST:
863 	case CTF_K_RESTRICT:
864 		ctf_decl_push(cd, lc, tp->ctt_type);
865 		prec = cd->cd_qualp;
866 		is_qual++;
867 		break;
868 
869 	default:
870 		prec = CTF_PREC_BASE;
871 	}
872 
873 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
874 	cdp->cd_type = type;
875 	cdp->cd_kind = kind;
876 	cdp->cd_n = n;
877 
878 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
879 		cd->cd_order[prec] = cd->cd_ordp++;
880 
881 	/*
882 	 * Reset cd_qualp to the highest precedence level that we've seen so
883 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
884 	 */
885 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
886 		cd->cd_qualp = prec;
887 
888 	/*
889 	 * C array declarators are ordered inside out so prepend them.  Also by
890 	 * convention qualifiers of base types precede the type specifier (e.g.
891 	 * const int vs. int const) even though the two forms are equivalent.
892 	 */
893 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
894 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
895 	else
896 		ctf_list_append(&cd->cd_nodes[prec], cdp);
897 }
898 
899 static void
900 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
901 {
902 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
903 	va_list ap;
904 	size_t n;
905 
906 	va_start(ap, format);
907 	n = vsnprintf(cd->cd_ptr, len, format, ap);
908 	va_end(ap);
909 
910 	cd->cd_ptr += MIN(n, len);
911 	cd->cd_len += n;
912 }
913 
914 static ssize_t
915 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
916 {
917 	ctf_decl_t cd;
918 	ctf_decl_node_t *cdp;
919 	ctf_decl_prec_t prec, lp, rp;
920 	int ptr, arr;
921 	uint_t k;
922 
923 	if (lc == NULL && type == CTF_ERR)
924 		return (-1); /* simplify caller code by permitting CTF_ERR */
925 
926 	ctf_decl_init(&cd, buf, len);
927 	ctf_decl_push(&cd, lc, type);
928 
929 	if (cd.cd_err != 0) {
930 		ctf_decl_fini(&cd);
931 		return (-1);
932 	}
933 
934 	/*
935 	 * If the type graph's order conflicts with lexical precedence order
936 	 * for pointers or arrays, then we need to surround the declarations at
937 	 * the corresponding lexical precedence with parentheses.  This can
938 	 * result in either a parenthesized pointer (*) as in int (*)() or
939 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
940 	 */
941 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
942 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
943 
944 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
945 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
946 
947 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
948 
949 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
950 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
951 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
952 
953 			const ctf_type_t *tp =
954 			    ctf_lookup_by_id(lc, cdp->cd_type);
955 			const char *name = ctf_strptr(lc, tp->ctt_name);
956 
957 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
958 				ctf_decl_sprintf(&cd, " ");
959 
960 			if (lp == prec) {
961 				ctf_decl_sprintf(&cd, "(");
962 				lp = -1;
963 			}
964 
965 			switch (cdp->cd_kind) {
966 			case CTF_K_INTEGER:
967 			case CTF_K_FLOAT:
968 			case CTF_K_TYPEDEF:
969 				ctf_decl_sprintf(&cd, "%s", name);
970 				break;
971 			case CTF_K_POINTER:
972 				ctf_decl_sprintf(&cd, "*");
973 				break;
974 			case CTF_K_ARRAY:
975 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
976 				break;
977 			case CTF_K_FUNCTION:
978 				ctf_decl_sprintf(&cd, "()");
979 				break;
980 			case CTF_K_STRUCT:
981 			case CTF_K_FORWARD:
982 				ctf_decl_sprintf(&cd, "struct %s", name);
983 				break;
984 			case CTF_K_UNION:
985 				ctf_decl_sprintf(&cd, "union %s", name);
986 				break;
987 			case CTF_K_ENUM:
988 				ctf_decl_sprintf(&cd, "enum %s", name);
989 				break;
990 			case CTF_K_VOLATILE:
991 				ctf_decl_sprintf(&cd, "volatile");
992 				break;
993 			case CTF_K_CONST:
994 				ctf_decl_sprintf(&cd, "const");
995 				break;
996 			case CTF_K_RESTRICT:
997 				ctf_decl_sprintf(&cd, "restrict");
998 				break;
999 			}
1000 
1001 			k = cdp->cd_kind;
1002 		}
1003 
1004 		if (rp == prec)
1005 			ctf_decl_sprintf(&cd, ")");
1006 	}
1007 
1008 	ctf_decl_fini(&cd);
1009 	return (cd.cd_len);
1010 }
1011 
1012 static void
1013 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
1014 {
1015 	const ushort_t *dp;
1016 	fbt_probe_t *fbt = parg;
1017 	linker_ctf_t lc;
1018 	modctl_t *ctl = fbt->fbtp_ctl;
1019 	int ndx = desc->dtargd_ndx;
1020 	int symindx = fbt->fbtp_symindx;
1021 	uint32_t *ctfoff;
1022 	uint32_t offset;
1023 	ushort_t info, kind, n;
1024 
1025 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
1026 		(void) strcpy(desc->dtargd_native, "int");
1027 		return;
1028 	}
1029 
1030 	desc->dtargd_ndx = DTRACE_ARGNONE;
1031 
1032 	/* Get a pointer to the CTF data and it's length. */
1033 	if (linker_ctf_get(ctl, &lc) != 0)
1034 		/* No CTF data? Something wrong? *shrug* */
1035 		return;
1036 
1037 	/* Check if this module hasn't been initialised yet. */
1038 	if (*lc.ctfoffp == NULL) {
1039 		/*
1040 		 * Initialise the CTF object and function symindx to
1041 		 * byte offset array.
1042 		 */
1043 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1044 			return;
1045 
1046 		/* Initialise the CTF type to byte offset array. */
1047 		if (fbt_typoff_init(&lc) != 0)
1048 			return;
1049 	}
1050 
1051 	ctfoff = *lc.ctfoffp;
1052 
1053 	if (ctfoff == NULL || *lc.typoffp == NULL)
1054 		return;
1055 
1056 	/* Check if the symbol index is out of range. */
1057 	if (symindx >= lc.nsym)
1058 		return;
1059 
1060 	/* Check if the symbol isn't cross-referenced. */
1061 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1062 		return;
1063 
1064 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1065 
1066 	info = *dp++;
1067 	kind = CTF_INFO_KIND(info);
1068 	n = CTF_INFO_VLEN(info);
1069 
1070 	if (kind == CTF_K_UNKNOWN && n == 0) {
1071 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1072 		return;
1073 	}
1074 
1075 	if (kind != CTF_K_FUNCTION) {
1076 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1077 		return;
1078 	}
1079 
1080 	if (fbt->fbtp_roffset != 0) {
1081 		/* Only return type is available for args[1] in return probe. */
1082 		if (ndx > 1)
1083 			return;
1084 		ASSERT(ndx == 1);
1085 	} else {
1086 		/* Check if the requested argument doesn't exist. */
1087 		if (ndx >= n)
1088 			return;
1089 
1090 		/* Skip the return type and arguments up to the one requested. */
1091 		dp += ndx + 1;
1092 	}
1093 
1094 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1095 		desc->dtargd_ndx = ndx;
1096 
1097 	return;
1098 }
1099 
1100 static int
1101 fbt_linker_file_cb(linker_file_t lf, void *arg)
1102 {
1103 
1104 	fbt_provide_module(arg, lf);
1105 
1106 	return (0);
1107 }
1108 
1109 static void
1110 fbt_load(void *dummy)
1111 {
1112 	/* Create the /dev/dtrace/fbt entry. */
1113 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1114 	    "dtrace/fbt");
1115 
1116 	/* Default the probe table size if not specified. */
1117 	if (fbt_probetab_size == 0)
1118 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1119 
1120 	/* Choose the hash mask for the probe table. */
1121 	fbt_probetab_mask = fbt_probetab_size - 1;
1122 
1123 	/* Allocate memory for the probe table. */
1124 	fbt_probetab =
1125 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1126 
1127 	dtrace_doubletrap_func = fbt_doubletrap;
1128 	dtrace_invop_add(fbt_invop);
1129 
1130 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1131 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1132 		return;
1133 
1134 	/* Create probes for the kernel and already-loaded modules. */
1135 	linker_file_foreach(fbt_linker_file_cb, NULL);
1136 }
1137 
1138 static int
1139 fbt_unload()
1140 {
1141 	int error = 0;
1142 
1143 	/* De-register the invalid opcode handler. */
1144 	dtrace_invop_remove(fbt_invop);
1145 
1146 	dtrace_doubletrap_func = NULL;
1147 
1148 	/* De-register this DTrace provider. */
1149 	if ((error = dtrace_unregister(fbt_id)) != 0)
1150 		return (error);
1151 
1152 	/* Free the probe table. */
1153 	free(fbt_probetab, M_FBT);
1154 	fbt_probetab = NULL;
1155 	fbt_probetab_mask = 0;
1156 
1157 	destroy_dev(fbt_cdev);
1158 
1159 	return (error);
1160 }
1161 
1162 static int
1163 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1164 {
1165 	int error = 0;
1166 
1167 	switch (type) {
1168 	case MOD_LOAD:
1169 		break;
1170 
1171 	case MOD_UNLOAD:
1172 		break;
1173 
1174 	case MOD_SHUTDOWN:
1175 		break;
1176 
1177 	default:
1178 		error = EOPNOTSUPP;
1179 		break;
1180 
1181 	}
1182 
1183 	return (error);
1184 }
1185 
1186 static int
1187 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1188 {
1189 	return (0);
1190 }
1191 
1192 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1193 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1194 
1195 DEV_MODULE(fbt, fbt_modevent, NULL);
1196 MODULE_VERSION(fbt, 1);
1197 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1198 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1199