xref: /freebsd/sys/cddl/dev/fbt/fbt.c (revision 0f7f3352c8bc463607912e2463d13e52d44a4cae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
22  *
23  * $FreeBSD$
24  *
25  */
26 
27 /*
28  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
29  * Use is subject to license terms.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/conf.h>
36 #include <sys/cpuvar.h>
37 #include <sys/fcntl.h>
38 #include <sys/filio.h>
39 #include <sys/kdb.h>
40 #include <sys/kernel.h>
41 #include <sys/kmem.h>
42 #include <sys/kthread.h>
43 #include <sys/limits.h>
44 #include <sys/linker.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/module.h>
48 #include <sys/mutex.h>
49 #include <sys/pcpu.h>
50 #include <sys/poll.h>
51 #include <sys/proc.h>
52 #include <sys/selinfo.h>
53 #include <sys/smp.h>
54 #include <sys/syscall.h>
55 #include <sys/sysent.h>
56 #include <sys/sysproto.h>
57 #include <sys/uio.h>
58 #include <sys/unistd.h>
59 #include <machine/stdarg.h>
60 
61 #include <sys/dtrace.h>
62 #include <sys/dtrace_bsd.h>
63 
64 #include "fbt.h"
65 
66 MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing");
67 
68 dtrace_provider_id_t	fbt_id;
69 fbt_probe_t		**fbt_probetab;
70 int			fbt_probetab_mask;
71 
72 static d_open_t	fbt_open;
73 static int	fbt_unload(void);
74 static void	fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
75 static void	fbt_provide_module(void *, modctl_t *);
76 static void	fbt_destroy(void *, dtrace_id_t, void *);
77 static void	fbt_enable(void *, dtrace_id_t, void *);
78 static void	fbt_disable(void *, dtrace_id_t, void *);
79 static void	fbt_load(void *);
80 static void	fbt_suspend(void *, dtrace_id_t, void *);
81 static void	fbt_resume(void *, dtrace_id_t, void *);
82 
83 static struct cdevsw fbt_cdevsw = {
84 	.d_version	= D_VERSION,
85 	.d_open		= fbt_open,
86 	.d_name		= "fbt",
87 };
88 
89 static dtrace_pattr_t fbt_attr = {
90 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
91 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
92 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
93 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
94 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
95 };
96 
97 static dtrace_pops_t fbt_pops = {
98 	NULL,
99 	fbt_provide_module,
100 	fbt_enable,
101 	fbt_disable,
102 	fbt_suspend,
103 	fbt_resume,
104 	fbt_getargdesc,
105 	NULL,
106 	NULL,
107 	fbt_destroy
108 };
109 
110 static struct cdev		*fbt_cdev;
111 static int			fbt_probetab_size;
112 static int			fbt_verbose = 0;
113 
114 int
115 fbt_excluded(const char *name)
116 {
117 
118 	if (strncmp(name, "dtrace_", 7) == 0 &&
119 	    strncmp(name, "dtrace_safe_", 12) != 0) {
120 		/*
121 		 * Anything beginning with "dtrace_" may be called
122 		 * from probe context unless it explicitly indicates
123 		 * that it won't be called from probe context by
124 		 * using the prefix "dtrace_safe_".
125 		 */
126 		return (1);
127 	}
128 
129 	/*
130 	 * When DTrace is built into the kernel we need to exclude
131 	 * the FBT functions from instrumentation.
132 	 */
133 #ifndef _KLD_MODULE
134 	if (strncmp(name, "fbt_", 4) == 0)
135 		return (1);
136 #endif
137 
138 	return (0);
139 }
140 
141 static void
142 fbt_doubletrap(void)
143 {
144 	fbt_probe_t *fbt;
145 	int i;
146 
147 	for (i = 0; i < fbt_probetab_size; i++) {
148 		fbt = fbt_probetab[i];
149 
150 		for (; fbt != NULL; fbt = fbt->fbtp_next)
151 			fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
152 	}
153 }
154 
155 static void
156 fbt_provide_module(void *arg, modctl_t *lf)
157 {
158 	char modname[MAXPATHLEN];
159 	int i;
160 	size_t len;
161 
162 	strlcpy(modname, lf->filename, sizeof(modname));
163 	len = strlen(modname);
164 	if (len > 3 && strcmp(modname + len - 3, ".ko") == 0)
165 		modname[len - 3] = '\0';
166 
167 	/*
168 	 * Employees of dtrace and their families are ineligible.  Void
169 	 * where prohibited.
170 	 */
171 	if (strcmp(modname, "dtrace") == 0)
172 		return;
173 
174 	/*
175 	 * To register with DTrace, a module must list 'dtrace' as a
176 	 * dependency in order for the kernel linker to resolve
177 	 * symbols like dtrace_register(). All modules with such a
178 	 * dependency are ineligible for FBT tracing.
179 	 */
180 	for (i = 0; i < lf->ndeps; i++)
181 		if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0)
182 			return;
183 
184 	if (lf->fbt_nentries) {
185 		/*
186 		 * This module has some FBT entries allocated; we're afraid
187 		 * to screw with it.
188 		 */
189 		return;
190 	}
191 
192 	/*
193 	 * List the functions in the module and the symbol values.
194 	 */
195 	(void) linker_file_function_listall(lf, fbt_provide_module_function, modname);
196 }
197 
198 static void
199 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
200 {
201 	fbt_probe_t *fbt = parg, *next, *hash, *last;
202 	modctl_t *ctl;
203 	int ndx;
204 
205 	do {
206 		ctl = fbt->fbtp_ctl;
207 
208 		ctl->fbt_nentries--;
209 
210 		/*
211 		 * Now we need to remove this probe from the fbt_probetab.
212 		 */
213 		ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
214 		last = NULL;
215 		hash = fbt_probetab[ndx];
216 
217 		while (hash != fbt) {
218 			ASSERT(hash != NULL);
219 			last = hash;
220 			hash = hash->fbtp_hashnext;
221 		}
222 
223 		if (last != NULL) {
224 			last->fbtp_hashnext = fbt->fbtp_hashnext;
225 		} else {
226 			fbt_probetab[ndx] = fbt->fbtp_hashnext;
227 		}
228 
229 		next = fbt->fbtp_next;
230 		free(fbt, M_FBT);
231 
232 		fbt = next;
233 	} while (fbt != NULL);
234 }
235 
236 static void
237 fbt_enable(void *arg, dtrace_id_t id, void *parg)
238 {
239 	fbt_probe_t *fbt = parg;
240 	modctl_t *ctl = fbt->fbtp_ctl;
241 
242 	ctl->nenabled++;
243 
244 	/*
245 	 * Now check that our modctl has the expected load count.  If it
246 	 * doesn't, this module must have been unloaded and reloaded -- and
247 	 * we're not going to touch it.
248 	 */
249 	if (ctl->loadcnt != fbt->fbtp_loadcnt) {
250 		if (fbt_verbose) {
251 			printf("fbt is failing for probe %s "
252 			    "(module %s reloaded)",
253 			    fbt->fbtp_name, ctl->filename);
254 		}
255 
256 		return;
257 	}
258 
259 	for (; fbt != NULL; fbt = fbt->fbtp_next)
260 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
261 }
262 
263 static void
264 fbt_disable(void *arg, dtrace_id_t id, void *parg)
265 {
266 	fbt_probe_t *fbt = parg;
267 	modctl_t *ctl = fbt->fbtp_ctl;
268 
269 	ASSERT(ctl->nenabled > 0);
270 	ctl->nenabled--;
271 
272 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
273 		return;
274 
275 	for (; fbt != NULL; fbt = fbt->fbtp_next)
276 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
277 }
278 
279 static void
280 fbt_suspend(void *arg, dtrace_id_t id, void *parg)
281 {
282 	fbt_probe_t *fbt = parg;
283 	modctl_t *ctl = fbt->fbtp_ctl;
284 
285 	ASSERT(ctl->nenabled > 0);
286 
287 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
288 		return;
289 
290 	for (; fbt != NULL; fbt = fbt->fbtp_next)
291 		fbt_patch_tracepoint(fbt, fbt->fbtp_savedval);
292 }
293 
294 static void
295 fbt_resume(void *arg, dtrace_id_t id, void *parg)
296 {
297 	fbt_probe_t *fbt = parg;
298 	modctl_t *ctl = fbt->fbtp_ctl;
299 
300 	ASSERT(ctl->nenabled > 0);
301 
302 	if ((ctl->loadcnt != fbt->fbtp_loadcnt))
303 		return;
304 
305 	for (; fbt != NULL; fbt = fbt->fbtp_next)
306 		fbt_patch_tracepoint(fbt, fbt->fbtp_patchval);
307 }
308 
309 static int
310 fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc)
311 {
312 	const Elf_Sym *symp = lc->symtab;;
313 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
314 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
315 	int i;
316 	uint32_t *ctfoff;
317 	uint32_t objtoff = hp->cth_objtoff;
318 	uint32_t funcoff = hp->cth_funcoff;
319 	ushort_t info;
320 	ushort_t vlen;
321 
322 	/* Sanity check. */
323 	if (hp->cth_magic != CTF_MAGIC) {
324 		printf("Bad magic value in CTF data of '%s'\n",lf->pathname);
325 		return (EINVAL);
326 	}
327 
328 	if (lc->symtab == NULL) {
329 		printf("No symbol table in '%s'\n",lf->pathname);
330 		return (EINVAL);
331 	}
332 
333 	ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK);
334 	*lc->ctfoffp = ctfoff;
335 
336 	for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) {
337 		if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) {
338 			*ctfoff = 0xffffffff;
339 			continue;
340 		}
341 
342 		switch (ELF_ST_TYPE(symp->st_info)) {
343 		case STT_OBJECT:
344 			if (objtoff >= hp->cth_funcoff ||
345                             (symp->st_shndx == SHN_ABS && symp->st_value == 0)) {
346 				*ctfoff = 0xffffffff;
347                                 break;
348                         }
349 
350                         *ctfoff = objtoff;
351                         objtoff += sizeof (ushort_t);
352 			break;
353 
354 		case STT_FUNC:
355 			if (funcoff >= hp->cth_typeoff) {
356 				*ctfoff = 0xffffffff;
357 				break;
358 			}
359 
360 			*ctfoff = funcoff;
361 
362 			info = *((const ushort_t *)(ctfdata + funcoff));
363 			vlen = CTF_INFO_VLEN(info);
364 
365 			/*
366 			 * If we encounter a zero pad at the end, just skip it.
367 			 * Otherwise skip over the function and its return type
368 			 * (+2) and the argument list (vlen).
369 			 */
370 			if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0)
371 				funcoff += sizeof (ushort_t); /* skip pad */
372 			else
373 				funcoff += sizeof (ushort_t) * (vlen + 2);
374 			break;
375 
376 		default:
377 			*ctfoff = 0xffffffff;
378 			break;
379 		}
380 	}
381 
382 	return (0);
383 }
384 
385 static ssize_t
386 fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep,
387     ssize_t *incrementp)
388 {
389 	ssize_t size, increment;
390 
391 	if (version > CTF_VERSION_1 &&
392 	    tp->ctt_size == CTF_LSIZE_SENT) {
393 		size = CTF_TYPE_LSIZE(tp);
394 		increment = sizeof (ctf_type_t);
395 	} else {
396 		size = tp->ctt_size;
397 		increment = sizeof (ctf_stype_t);
398 	}
399 
400 	if (sizep)
401 		*sizep = size;
402 	if (incrementp)
403 		*incrementp = increment;
404 
405 	return (size);
406 }
407 
408 static int
409 fbt_typoff_init(linker_ctf_t *lc)
410 {
411 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
412 	const ctf_type_t *tbuf;
413 	const ctf_type_t *tend;
414 	const ctf_type_t *tp;
415 	const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t);
416 	int ctf_typemax = 0;
417 	uint32_t *xp;
418 	ulong_t pop[CTF_K_MAX + 1] = { 0 };
419 
420 
421 	/* Sanity check. */
422 	if (hp->cth_magic != CTF_MAGIC)
423 		return (EINVAL);
424 
425 	tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff);
426 	tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff);
427 
428 	int child = hp->cth_parname != 0;
429 
430 	/*
431 	 * We make two passes through the entire type section.  In this first
432 	 * pass, we count the number of each type and the total number of types.
433 	 */
434 	for (tp = tbuf; tp < tend; ctf_typemax++) {
435 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
436 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
437 		ssize_t size, increment;
438 
439 		size_t vbytes;
440 		uint_t n;
441 
442 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
443 
444 		switch (kind) {
445 		case CTF_K_INTEGER:
446 		case CTF_K_FLOAT:
447 			vbytes = sizeof (uint_t);
448 			break;
449 		case CTF_K_ARRAY:
450 			vbytes = sizeof (ctf_array_t);
451 			break;
452 		case CTF_K_FUNCTION:
453 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
454 			break;
455 		case CTF_K_STRUCT:
456 		case CTF_K_UNION:
457 			if (size < CTF_LSTRUCT_THRESH) {
458 				ctf_member_t *mp = (ctf_member_t *)
459 				    ((uintptr_t)tp + increment);
460 
461 				vbytes = sizeof (ctf_member_t) * vlen;
462 				for (n = vlen; n != 0; n--, mp++)
463 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
464 			} else {
465 				ctf_lmember_t *lmp = (ctf_lmember_t *)
466 				    ((uintptr_t)tp + increment);
467 
468 				vbytes = sizeof (ctf_lmember_t) * vlen;
469 				for (n = vlen; n != 0; n--, lmp++)
470 					child |=
471 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
472 			}
473 			break;
474 		case CTF_K_ENUM:
475 			vbytes = sizeof (ctf_enum_t) * vlen;
476 			break;
477 		case CTF_K_FORWARD:
478 			/*
479 			 * For forward declarations, ctt_type is the CTF_K_*
480 			 * kind for the tag, so bump that population count too.
481 			 * If ctt_type is unknown, treat the tag as a struct.
482 			 */
483 			if (tp->ctt_type == CTF_K_UNKNOWN ||
484 			    tp->ctt_type >= CTF_K_MAX)
485 				pop[CTF_K_STRUCT]++;
486 			else
487 				pop[tp->ctt_type]++;
488 			/*FALLTHRU*/
489 		case CTF_K_UNKNOWN:
490 			vbytes = 0;
491 			break;
492 		case CTF_K_POINTER:
493 		case CTF_K_TYPEDEF:
494 		case CTF_K_VOLATILE:
495 		case CTF_K_CONST:
496 		case CTF_K_RESTRICT:
497 			child |= CTF_TYPE_ISCHILD(tp->ctt_type);
498 			vbytes = 0;
499 			break;
500 		default:
501 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
502 			return (EIO);
503 		}
504 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
505 		pop[kind]++;
506 	}
507 
508 	/* account for a sentinel value below */
509 	ctf_typemax++;
510 	*lc->typlenp = ctf_typemax;
511 
512 	xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER,
513 	    M_ZERO | M_WAITOK);
514 
515 	*lc->typoffp = xp;
516 
517 	/* type id 0 is used as a sentinel value */
518 	*xp++ = 0;
519 
520 	/*
521 	 * In the second pass, fill in the type offset.
522 	 */
523 	for (tp = tbuf; tp < tend; xp++) {
524 		ushort_t kind = CTF_INFO_KIND(tp->ctt_info);
525 		ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info);
526 		ssize_t size, increment;
527 
528 		size_t vbytes;
529 		uint_t n;
530 
531 		(void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment);
532 
533 		switch (kind) {
534 		case CTF_K_INTEGER:
535 		case CTF_K_FLOAT:
536 			vbytes = sizeof (uint_t);
537 			break;
538 		case CTF_K_ARRAY:
539 			vbytes = sizeof (ctf_array_t);
540 			break;
541 		case CTF_K_FUNCTION:
542 			vbytes = sizeof (ushort_t) * (vlen + (vlen & 1));
543 			break;
544 		case CTF_K_STRUCT:
545 		case CTF_K_UNION:
546 			if (size < CTF_LSTRUCT_THRESH) {
547 				ctf_member_t *mp = (ctf_member_t *)
548 				    ((uintptr_t)tp + increment);
549 
550 				vbytes = sizeof (ctf_member_t) * vlen;
551 				for (n = vlen; n != 0; n--, mp++)
552 					child |= CTF_TYPE_ISCHILD(mp->ctm_type);
553 			} else {
554 				ctf_lmember_t *lmp = (ctf_lmember_t *)
555 				    ((uintptr_t)tp + increment);
556 
557 				vbytes = sizeof (ctf_lmember_t) * vlen;
558 				for (n = vlen; n != 0; n--, lmp++)
559 					child |=
560 					    CTF_TYPE_ISCHILD(lmp->ctlm_type);
561 			}
562 			break;
563 		case CTF_K_ENUM:
564 			vbytes = sizeof (ctf_enum_t) * vlen;
565 			break;
566 		case CTF_K_FORWARD:
567 		case CTF_K_UNKNOWN:
568 			vbytes = 0;
569 			break;
570 		case CTF_K_POINTER:
571 		case CTF_K_TYPEDEF:
572 		case CTF_K_VOLATILE:
573 		case CTF_K_CONST:
574 		case CTF_K_RESTRICT:
575 			vbytes = 0;
576 			break;
577 		default:
578 			printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind);
579 			return (EIO);
580 		}
581 		*xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata);
582 		tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes);
583 	}
584 
585 	return (0);
586 }
587 
588 /*
589  * CTF Declaration Stack
590  *
591  * In order to implement ctf_type_name(), we must convert a type graph back
592  * into a C type declaration.  Unfortunately, a type graph represents a storage
593  * class ordering of the type whereas a type declaration must obey the C rules
594  * for operator precedence, and the two orderings are frequently in conflict.
595  * For example, consider these CTF type graphs and their C declarations:
596  *
597  * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER  : int (*)()
598  * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER     : int (*)[]
599  *
600  * In each case, parentheses are used to raise operator * to higher lexical
601  * precedence, so the string form of the C declaration cannot be constructed by
602  * walking the type graph links and forming the string from left to right.
603  *
604  * The functions in this file build a set of stacks from the type graph nodes
605  * corresponding to the C operator precedence levels in the appropriate order.
606  * The code in ctf_type_name() can then iterate over the levels and nodes in
607  * lexical precedence order and construct the final C declaration string.
608  */
609 typedef struct ctf_list {
610 	struct ctf_list *l_prev; /* previous pointer or tail pointer */
611 	struct ctf_list *l_next; /* next pointer or head pointer */
612 } ctf_list_t;
613 
614 #define	ctf_list_prev(elem)	((void *)(((ctf_list_t *)(elem))->l_prev))
615 #define	ctf_list_next(elem)	((void *)(((ctf_list_t *)(elem))->l_next))
616 
617 typedef enum {
618 	CTF_PREC_BASE,
619 	CTF_PREC_POINTER,
620 	CTF_PREC_ARRAY,
621 	CTF_PREC_FUNCTION,
622 	CTF_PREC_MAX
623 } ctf_decl_prec_t;
624 
625 typedef struct ctf_decl_node {
626 	ctf_list_t cd_list;			/* linked list pointers */
627 	ctf_id_t cd_type;			/* type identifier */
628 	uint_t cd_kind;				/* type kind */
629 	uint_t cd_n;				/* type dimension if array */
630 } ctf_decl_node_t;
631 
632 typedef struct ctf_decl {
633 	ctf_list_t cd_nodes[CTF_PREC_MAX];	/* declaration node stacks */
634 	int cd_order[CTF_PREC_MAX];		/* storage order of decls */
635 	ctf_decl_prec_t cd_qualp;		/* qualifier precision */
636 	ctf_decl_prec_t cd_ordp;		/* ordered precision */
637 	char *cd_buf;				/* buffer for output */
638 	char *cd_ptr;				/* buffer location */
639 	char *cd_end;				/* buffer limit */
640 	size_t cd_len;				/* buffer space required */
641 	int cd_err;				/* saved error value */
642 } ctf_decl_t;
643 
644 /*
645  * Simple doubly-linked list append routine.  This implementation assumes that
646  * each list element contains an embedded ctf_list_t as the first member.
647  * An additional ctf_list_t is used to store the head (l_next) and tail
648  * (l_prev) pointers.  The current head and tail list elements have their
649  * previous and next pointers set to NULL, respectively.
650  */
651 static void
652 ctf_list_append(ctf_list_t *lp, void *new)
653 {
654 	ctf_list_t *p = lp->l_prev;	/* p = tail list element */
655 	ctf_list_t *q = new;		/* q = new list element */
656 
657 	lp->l_prev = q;
658 	q->l_prev = p;
659 	q->l_next = NULL;
660 
661 	if (p != NULL)
662 		p->l_next = q;
663 	else
664 		lp->l_next = q;
665 }
666 
667 /*
668  * Prepend the specified existing element to the given ctf_list_t.  The
669  * existing pointer should be pointing at a struct with embedded ctf_list_t.
670  */
671 static void
672 ctf_list_prepend(ctf_list_t *lp, void *new)
673 {
674 	ctf_list_t *p = new;		/* p = new list element */
675 	ctf_list_t *q = lp->l_next;	/* q = head list element */
676 
677 	lp->l_next = p;
678 	p->l_prev = NULL;
679 	p->l_next = q;
680 
681 	if (q != NULL)
682 		q->l_prev = p;
683 	else
684 		lp->l_prev = p;
685 }
686 
687 static void
688 ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len)
689 {
690 	int i;
691 
692 	bzero(cd, sizeof (ctf_decl_t));
693 
694 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++)
695 		cd->cd_order[i] = CTF_PREC_BASE - 1;
696 
697 	cd->cd_qualp = CTF_PREC_BASE;
698 	cd->cd_ordp = CTF_PREC_BASE;
699 
700 	cd->cd_buf = buf;
701 	cd->cd_ptr = buf;
702 	cd->cd_end = buf + len;
703 }
704 
705 static void
706 ctf_decl_fini(ctf_decl_t *cd)
707 {
708 	ctf_decl_node_t *cdp, *ndp;
709 	int i;
710 
711 	for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) {
712 		for (cdp = ctf_list_next(&cd->cd_nodes[i]);
713 		    cdp != NULL; cdp = ndp) {
714 			ndp = ctf_list_next(cdp);
715 			free(cdp, M_FBT);
716 		}
717 	}
718 }
719 
720 static const ctf_type_t *
721 ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type)
722 {
723 	const ctf_type_t *tp;
724 	uint32_t offset;
725 	uint32_t *typoff = *lc->typoffp;
726 
727 	if (type >= *lc->typlenp) {
728 		printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp);
729 		return(NULL);
730 	}
731 
732 	/* Check if the type isn't cross-referenced. */
733 	if ((offset = typoff[type]) == 0) {
734 		printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type);
735 		return(NULL);
736 	}
737 
738 	tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t));
739 
740 	return (tp);
741 }
742 
743 static void
744 fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp)
745 {
746 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;
747 	const ctf_type_t *tp;
748 	const ctf_array_t *ap;
749 	ssize_t increment;
750 
751 	bzero(arp, sizeof(*arp));
752 
753 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL)
754 		return;
755 
756 	if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY)
757 		return;
758 
759 	(void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment);
760 
761 	ap = (const ctf_array_t *)((uintptr_t)tp + increment);
762 	arp->ctr_contents = ap->cta_contents;
763 	arp->ctr_index = ap->cta_index;
764 	arp->ctr_nelems = ap->cta_nelems;
765 }
766 
767 static const char *
768 ctf_strptr(linker_ctf_t *lc, int name)
769 {
770 	const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;;
771 	const char *strp = "";
772 
773 	if (name < 0 || name >= hp->cth_strlen)
774 		return(strp);
775 
776 	strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t));
777 
778 	return (strp);
779 }
780 
781 static void
782 ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type)
783 {
784 	ctf_decl_node_t *cdp;
785 	ctf_decl_prec_t prec;
786 	uint_t kind, n = 1;
787 	int is_qual = 0;
788 
789 	const ctf_type_t *tp;
790 	ctf_arinfo_t ar;
791 
792 	if ((tp = ctf_lookup_by_id(lc, type)) == NULL) {
793 		cd->cd_err = ENOENT;
794 		return;
795 	}
796 
797 	switch (kind = CTF_INFO_KIND(tp->ctt_info)) {
798 	case CTF_K_ARRAY:
799 		fbt_array_info(lc, type, &ar);
800 		ctf_decl_push(cd, lc, ar.ctr_contents);
801 		n = ar.ctr_nelems;
802 		prec = CTF_PREC_ARRAY;
803 		break;
804 
805 	case CTF_K_TYPEDEF:
806 		if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') {
807 			ctf_decl_push(cd, lc, tp->ctt_type);
808 			return;
809 		}
810 		prec = CTF_PREC_BASE;
811 		break;
812 
813 	case CTF_K_FUNCTION:
814 		ctf_decl_push(cd, lc, tp->ctt_type);
815 		prec = CTF_PREC_FUNCTION;
816 		break;
817 
818 	case CTF_K_POINTER:
819 		ctf_decl_push(cd, lc, tp->ctt_type);
820 		prec = CTF_PREC_POINTER;
821 		break;
822 
823 	case CTF_K_VOLATILE:
824 	case CTF_K_CONST:
825 	case CTF_K_RESTRICT:
826 		ctf_decl_push(cd, lc, tp->ctt_type);
827 		prec = cd->cd_qualp;
828 		is_qual++;
829 		break;
830 
831 	default:
832 		prec = CTF_PREC_BASE;
833 	}
834 
835 	cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK);
836 	cdp->cd_type = type;
837 	cdp->cd_kind = kind;
838 	cdp->cd_n = n;
839 
840 	if (ctf_list_next(&cd->cd_nodes[prec]) == NULL)
841 		cd->cd_order[prec] = cd->cd_ordp++;
842 
843 	/*
844 	 * Reset cd_qualp to the highest precedence level that we've seen so
845 	 * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER).
846 	 */
847 	if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY)
848 		cd->cd_qualp = prec;
849 
850 	/*
851 	 * C array declarators are ordered inside out so prepend them.  Also by
852 	 * convention qualifiers of base types precede the type specifier (e.g.
853 	 * const int vs. int const) even though the two forms are equivalent.
854 	 */
855 	if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE))
856 		ctf_list_prepend(&cd->cd_nodes[prec], cdp);
857 	else
858 		ctf_list_append(&cd->cd_nodes[prec], cdp);
859 }
860 
861 static void
862 ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...)
863 {
864 	size_t len = (size_t)(cd->cd_end - cd->cd_ptr);
865 	va_list ap;
866 	size_t n;
867 
868 	va_start(ap, format);
869 	n = vsnprintf(cd->cd_ptr, len, format, ap);
870 	va_end(ap);
871 
872 	cd->cd_ptr += MIN(n, len);
873 	cd->cd_len += n;
874 }
875 
876 static ssize_t
877 fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len)
878 {
879 	ctf_decl_t cd;
880 	ctf_decl_node_t *cdp;
881 	ctf_decl_prec_t prec, lp, rp;
882 	int ptr, arr;
883 	uint_t k;
884 
885 	if (lc == NULL && type == CTF_ERR)
886 		return (-1); /* simplify caller code by permitting CTF_ERR */
887 
888 	ctf_decl_init(&cd, buf, len);
889 	ctf_decl_push(&cd, lc, type);
890 
891 	if (cd.cd_err != 0) {
892 		ctf_decl_fini(&cd);
893 		return (-1);
894 	}
895 
896 	/*
897 	 * If the type graph's order conflicts with lexical precedence order
898 	 * for pointers or arrays, then we need to surround the declarations at
899 	 * the corresponding lexical precedence with parentheses.  This can
900 	 * result in either a parenthesized pointer (*) as in int (*)() or
901 	 * int (*)[], or in a parenthesized pointer and array as in int (*[])().
902 	 */
903 	ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER;
904 	arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY;
905 
906 	rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1;
907 	lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1;
908 
909 	k = CTF_K_POINTER; /* avoid leading whitespace (see below) */
910 
911 	for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) {
912 		for (cdp = ctf_list_next(&cd.cd_nodes[prec]);
913 		    cdp != NULL; cdp = ctf_list_next(cdp)) {
914 
915 			const ctf_type_t *tp =
916 			    ctf_lookup_by_id(lc, cdp->cd_type);
917 			const char *name = ctf_strptr(lc, tp->ctt_name);
918 
919 			if (k != CTF_K_POINTER && k != CTF_K_ARRAY)
920 				ctf_decl_sprintf(&cd, " ");
921 
922 			if (lp == prec) {
923 				ctf_decl_sprintf(&cd, "(");
924 				lp = -1;
925 			}
926 
927 			switch (cdp->cd_kind) {
928 			case CTF_K_INTEGER:
929 			case CTF_K_FLOAT:
930 			case CTF_K_TYPEDEF:
931 				ctf_decl_sprintf(&cd, "%s", name);
932 				break;
933 			case CTF_K_POINTER:
934 				ctf_decl_sprintf(&cd, "*");
935 				break;
936 			case CTF_K_ARRAY:
937 				ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n);
938 				break;
939 			case CTF_K_FUNCTION:
940 				ctf_decl_sprintf(&cd, "()");
941 				break;
942 			case CTF_K_STRUCT:
943 			case CTF_K_FORWARD:
944 				ctf_decl_sprintf(&cd, "struct %s", name);
945 				break;
946 			case CTF_K_UNION:
947 				ctf_decl_sprintf(&cd, "union %s", name);
948 				break;
949 			case CTF_K_ENUM:
950 				ctf_decl_sprintf(&cd, "enum %s", name);
951 				break;
952 			case CTF_K_VOLATILE:
953 				ctf_decl_sprintf(&cd, "volatile");
954 				break;
955 			case CTF_K_CONST:
956 				ctf_decl_sprintf(&cd, "const");
957 				break;
958 			case CTF_K_RESTRICT:
959 				ctf_decl_sprintf(&cd, "restrict");
960 				break;
961 			}
962 
963 			k = cdp->cd_kind;
964 		}
965 
966 		if (rp == prec)
967 			ctf_decl_sprintf(&cd, ")");
968 	}
969 
970 	ctf_decl_fini(&cd);
971 	return (cd.cd_len);
972 }
973 
974 static void
975 fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc)
976 {
977 	const ushort_t *dp;
978 	fbt_probe_t *fbt = parg;
979 	linker_ctf_t lc;
980 	modctl_t *ctl = fbt->fbtp_ctl;
981 	int ndx = desc->dtargd_ndx;
982 	int symindx = fbt->fbtp_symindx;
983 	uint32_t *ctfoff;
984 	uint32_t offset;
985 	ushort_t info, kind, n;
986 
987 	if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) {
988 		(void) strcpy(desc->dtargd_native, "int");
989 		return;
990 	}
991 
992 	desc->dtargd_ndx = DTRACE_ARGNONE;
993 
994 	/* Get a pointer to the CTF data and it's length. */
995 	if (linker_ctf_get(ctl, &lc) != 0)
996 		/* No CTF data? Something wrong? *shrug* */
997 		return;
998 
999 	/* Check if this module hasn't been initialised yet. */
1000 	if (*lc.ctfoffp == NULL) {
1001 		/*
1002 		 * Initialise the CTF object and function symindx to
1003 		 * byte offset array.
1004 		 */
1005 		if (fbt_ctfoff_init(ctl, &lc) != 0)
1006 			return;
1007 
1008 		/* Initialise the CTF type to byte offset array. */
1009 		if (fbt_typoff_init(&lc) != 0)
1010 			return;
1011 	}
1012 
1013 	ctfoff = *lc.ctfoffp;
1014 
1015 	if (ctfoff == NULL || *lc.typoffp == NULL)
1016 		return;
1017 
1018 	/* Check if the symbol index is out of range. */
1019 	if (symindx >= lc.nsym)
1020 		return;
1021 
1022 	/* Check if the symbol isn't cross-referenced. */
1023 	if ((offset = ctfoff[symindx]) == 0xffffffff)
1024 		return;
1025 
1026 	dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t));
1027 
1028 	info = *dp++;
1029 	kind = CTF_INFO_KIND(info);
1030 	n = CTF_INFO_VLEN(info);
1031 
1032 	if (kind == CTF_K_UNKNOWN && n == 0) {
1033 		printf("%s(%d): Unknown function!\n",__func__,__LINE__);
1034 		return;
1035 	}
1036 
1037 	if (kind != CTF_K_FUNCTION) {
1038 		printf("%s(%d): Expected a function!\n",__func__,__LINE__);
1039 		return;
1040 	}
1041 
1042 	if (fbt->fbtp_roffset != 0) {
1043 		/* Only return type is available for args[1] in return probe. */
1044 		if (ndx > 1)
1045 			return;
1046 		ASSERT(ndx == 1);
1047 	} else {
1048 		/* Check if the requested argument doesn't exist. */
1049 		if (ndx >= n)
1050 			return;
1051 
1052 		/* Skip the return type and arguments up to the one requested. */
1053 		dp += ndx + 1;
1054 	}
1055 
1056 	if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0)
1057 		desc->dtargd_ndx = ndx;
1058 
1059 	return;
1060 }
1061 
1062 static int
1063 fbt_linker_file_cb(linker_file_t lf, void *arg)
1064 {
1065 
1066 	fbt_provide_module(arg, lf);
1067 
1068 	return (0);
1069 }
1070 
1071 static void
1072 fbt_load(void *dummy)
1073 {
1074 	/* Create the /dev/dtrace/fbt entry. */
1075 	fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
1076 	    "dtrace/fbt");
1077 
1078 	/* Default the probe table size if not specified. */
1079 	if (fbt_probetab_size == 0)
1080 		fbt_probetab_size = FBT_PROBETAB_SIZE;
1081 
1082 	/* Choose the hash mask for the probe table. */
1083 	fbt_probetab_mask = fbt_probetab_size - 1;
1084 
1085 	/* Allocate memory for the probe table. */
1086 	fbt_probetab =
1087 	    malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO);
1088 
1089 	dtrace_doubletrap_func = fbt_doubletrap;
1090 	dtrace_invop_add(fbt_invop);
1091 
1092 	if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER,
1093 	    NULL, &fbt_pops, NULL, &fbt_id) != 0)
1094 		return;
1095 
1096 	/* Create probes for the kernel and already-loaded modules. */
1097 	linker_file_foreach(fbt_linker_file_cb, NULL);
1098 }
1099 
1100 static int
1101 fbt_unload()
1102 {
1103 	int error = 0;
1104 
1105 	/* De-register the invalid opcode handler. */
1106 	dtrace_invop_remove(fbt_invop);
1107 
1108 	dtrace_doubletrap_func = NULL;
1109 
1110 	/* De-register this DTrace provider. */
1111 	if ((error = dtrace_unregister(fbt_id)) != 0)
1112 		return (error);
1113 
1114 	/* Free the probe table. */
1115 	free(fbt_probetab, M_FBT);
1116 	fbt_probetab = NULL;
1117 	fbt_probetab_mask = 0;
1118 
1119 	destroy_dev(fbt_cdev);
1120 
1121 	return (error);
1122 }
1123 
1124 static int
1125 fbt_modevent(module_t mod __unused, int type, void *data __unused)
1126 {
1127 	int error = 0;
1128 
1129 	switch (type) {
1130 	case MOD_LOAD:
1131 		break;
1132 
1133 	case MOD_UNLOAD:
1134 		break;
1135 
1136 	case MOD_SHUTDOWN:
1137 		break;
1138 
1139 	default:
1140 		error = EOPNOTSUPP;
1141 		break;
1142 
1143 	}
1144 
1145 	return (error);
1146 }
1147 
1148 static int
1149 fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
1150 {
1151 	return (0);
1152 }
1153 
1154 SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL);
1155 SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL);
1156 
1157 DEV_MODULE(fbt, fbt_modevent, NULL);
1158 MODULE_VERSION(fbt, 1);
1159 MODULE_DEPEND(fbt, dtrace, 1, 1, 1);
1160 MODULE_DEPEND(fbt, opensolaris, 1, 1, 1);
1161