xref: /titanic_41/usr/src/uts/intel/dtrace/sdt.c (revision 653d2d91a355b2a5df7db09e17c965ddc5d3ea47)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28  */
29 
30 #include <sys/modctl.h>
31 #include <sys/sunddi.h>
32 #include <sys/dtrace.h>
33 #include <sys/kobj.h>
34 #include <sys/stat.h>
35 #include <sys/conf.h>
36 #include <vm/seg_kmem.h>
37 #include <sys/stack.h>
38 #include <sys/frame.h>
39 #include <sys/dtrace_impl.h>
40 #include <sys/cmn_err.h>
41 #include <sys/sysmacros.h>
42 #include <sys/privregs.h>
43 #include <sys/sdt_impl.h>
44 
45 #define	SDT_PATCHVAL	0xf0
46 #define	SDT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask)
47 #define	SDT_PROBETAB_SIZE	0x1000		/* 4k entries -- 16K total */
48 
49 static dev_info_t		*sdt_devi;
50 static int			sdt_verbose = 0;
51 static sdt_probe_t		**sdt_probetab;
52 static int			sdt_probetab_size;
53 static int			sdt_probetab_mask;
54 
55 /*ARGSUSED*/
56 static int
57 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
58 {
59 	uintptr_t stack0, stack1, stack2, stack3, stack4;
60 	int i = 0;
61 	sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
62 
63 #ifdef __amd64
64 	/*
65 	 * On amd64, stack[0] contains the dereferenced stack pointer,
66 	 * stack[1] contains savfp, stack[2] contains savpc.  We want
67 	 * to step over these entries.
68 	 */
69 	i += 3;
70 #endif
71 
72 	for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
73 		if ((uintptr_t)sdt->sdp_patchpoint == addr) {
74 			/*
75 			 * When accessing the arguments on the stack, we must
76 			 * protect against accessing beyond the stack.  We can
77 			 * safely set NOFAULT here -- we know that interrupts
78 			 * are already disabled.
79 			 */
80 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
81 			stack0 = stack[i++];
82 			stack1 = stack[i++];
83 			stack2 = stack[i++];
84 			stack3 = stack[i++];
85 			stack4 = stack[i++];
86 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
87 			    CPU_DTRACE_BADADDR);
88 
89 			dtrace_probe(sdt->sdp_id, stack0, stack1,
90 			    stack2, stack3, stack4);
91 
92 			return (DTRACE_INVOP_NOP);
93 		}
94 	}
95 
96 	return (0);
97 }
98 
99 /*ARGSUSED*/
100 static void
101 sdt_provide_module(void *arg, struct modctl *ctl)
102 {
103 	struct module *mp = ctl->mod_mp;
104 	char *modname = ctl->mod_modname;
105 	sdt_probedesc_t *sdpd;
106 	sdt_probe_t *sdp, *old;
107 	sdt_provider_t *prov;
108 	int len;
109 
110 	/*
111 	 * One for all, and all for one:  if we haven't yet registered all of
112 	 * our providers, we'll refuse to provide anything.
113 	 */
114 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
115 		if (prov->sdtp_id == DTRACE_PROVNONE)
116 			return;
117 	}
118 
119 	if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL)
120 		return;
121 
122 	for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) {
123 		char *name = sdpd->sdpd_name, *func, *nname;
124 		int i, j;
125 		sdt_provider_t *prov;
126 		ulong_t offs;
127 		dtrace_id_t id;
128 
129 		for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) {
130 			char *prefix = prov->sdtp_prefix;
131 
132 			if (strncmp(name, prefix, strlen(prefix)) == 0) {
133 				name += strlen(prefix);
134 				break;
135 			}
136 		}
137 
138 		nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP);
139 
140 		for (i = 0, j = 0; name[j] != '\0'; i++) {
141 			if (name[j] == '_' && name[j + 1] == '_') {
142 				nname[i] = '-';
143 				j += 2;
144 			} else {
145 				nname[i] = name[j++];
146 			}
147 		}
148 
149 		nname[i] = '\0';
150 
151 		sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP);
152 		sdp->sdp_loadcnt = ctl->mod_loadcnt;
153 		sdp->sdp_ctl = ctl;
154 		sdp->sdp_name = nname;
155 		sdp->sdp_namelen = len;
156 		sdp->sdp_provider = prov;
157 
158 		func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs);
159 
160 		if (func == NULL)
161 			func = "<unknown>";
162 
163 		/*
164 		 * We have our provider.  Now create the probe.
165 		 */
166 		if ((id = dtrace_probe_lookup(prov->sdtp_id, modname,
167 		    func, nname)) != DTRACE_IDNONE) {
168 			old = dtrace_probe_arg(prov->sdtp_id, id);
169 			ASSERT(old != NULL);
170 
171 			sdp->sdp_next = old->sdp_next;
172 			sdp->sdp_id = id;
173 			old->sdp_next = sdp;
174 		} else {
175 			sdp->sdp_id = dtrace_probe_create(prov->sdtp_id,
176 			    modname, func, nname, 3, sdp);
177 
178 			mp->sdt_nprobes++;
179 		}
180 
181 		sdp->sdp_hashnext =
182 		    sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)];
183 		sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp;
184 
185 		sdp->sdp_patchval = SDT_PATCHVAL;
186 		sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset;
187 		sdp->sdp_savedval = *sdp->sdp_patchpoint;
188 	}
189 }
190 
191 /*ARGSUSED*/
192 static void
193 sdt_destroy(void *arg, dtrace_id_t id, void *parg)
194 {
195 	sdt_probe_t *sdp = parg, *old, *last, *hash;
196 	struct modctl *ctl = sdp->sdp_ctl;
197 	int ndx;
198 
199 	if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) {
200 		if ((ctl->mod_loadcnt == sdp->sdp_loadcnt &&
201 		    ctl->mod_loaded)) {
202 			((struct module *)(ctl->mod_mp))->sdt_nprobes--;
203 		}
204 	}
205 
206 	while (sdp != NULL) {
207 		old = sdp;
208 
209 		/*
210 		 * Now we need to remove this probe from the sdt_probetab.
211 		 */
212 		ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint);
213 		last = NULL;
214 		hash = sdt_probetab[ndx];
215 
216 		while (hash != sdp) {
217 			ASSERT(hash != NULL);
218 			last = hash;
219 			hash = hash->sdp_hashnext;
220 		}
221 
222 		if (last != NULL) {
223 			last->sdp_hashnext = sdp->sdp_hashnext;
224 		} else {
225 			sdt_probetab[ndx] = sdp->sdp_hashnext;
226 		}
227 
228 		kmem_free(sdp->sdp_name, sdp->sdp_namelen);
229 		sdp = sdp->sdp_next;
230 		kmem_free(old, sizeof (sdt_probe_t));
231 	}
232 }
233 
234 /*ARGSUSED*/
235 static int
236 sdt_enable(void *arg, dtrace_id_t id, void *parg)
237 {
238 	sdt_probe_t *sdp = parg;
239 	struct modctl *ctl = sdp->sdp_ctl;
240 
241 	ctl->mod_nenabled++;
242 
243 	/*
244 	 * If this module has disappeared since we discovered its probes,
245 	 * refuse to enable it.
246 	 */
247 	if (!ctl->mod_loaded) {
248 		if (sdt_verbose) {
249 			cmn_err(CE_NOTE, "sdt is failing for probe %s "
250 			    "(module %s unloaded)",
251 			    sdp->sdp_name, ctl->mod_modname);
252 		}
253 		goto err;
254 	}
255 
256 	/*
257 	 * Now check that our modctl has the expected load count.  If it
258 	 * doesn't, this module must have been unloaded and reloaded -- and
259 	 * we're not going to touch it.
260 	 */
261 	if (ctl->mod_loadcnt != sdp->sdp_loadcnt) {
262 		if (sdt_verbose) {
263 			cmn_err(CE_NOTE, "sdt is failing for probe %s "
264 			    "(module %s reloaded)",
265 			    sdp->sdp_name, ctl->mod_modname);
266 		}
267 		goto err;
268 	}
269 
270 	while (sdp != NULL) {
271 		*sdp->sdp_patchpoint = sdp->sdp_patchval;
272 		sdp = sdp->sdp_next;
273 	}
274 err:
275 	return (0);
276 }
277 
278 /*ARGSUSED*/
279 static void
280 sdt_disable(void *arg, dtrace_id_t id, void *parg)
281 {
282 	sdt_probe_t *sdp = parg;
283 	struct modctl *ctl = sdp->sdp_ctl;
284 
285 	ctl->mod_nenabled--;
286 
287 	if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
288 		goto err;
289 
290 	while (sdp != NULL) {
291 		*sdp->sdp_patchpoint = sdp->sdp_savedval;
292 		sdp = sdp->sdp_next;
293 	}
294 
295 err:
296 	;
297 }
298 
299 /*ARGSUSED*/
300 uint64_t
301 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
302 {
303 	uintptr_t val;
304 	struct frame *fp = (struct frame *)dtrace_getfp();
305 	uintptr_t *stack;
306 	int i;
307 #if defined(__amd64)
308 	/*
309 	 * A total of 6 arguments are passed via registers; any argument with
310 	 * index of 5 or lower is therefore in a register.
311 	 */
312 	int inreg = 5;
313 #endif
314 
315 	for (i = 1; i <= aframes; i++) {
316 		fp = (struct frame *)(fp->fr_savfp);
317 
318 		if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
319 #if !defined(__amd64)
320 			/*
321 			 * If we pass through the invalid op handler, we will
322 			 * use the pointer that it passed to the stack as the
323 			 * second argument to dtrace_invop() as the pointer to
324 			 * the stack.
325 			 */
326 			stack = ((uintptr_t **)&fp[1])[1];
327 #else
328 			/*
329 			 * In the case of amd64, we will use the pointer to the
330 			 * regs structure that was pushed when we took the
331 			 * trap.  To get this structure, we must increment
332 			 * beyond the frame structure.  If the argument that
333 			 * we're seeking is passed on the stack, we'll pull
334 			 * the true stack pointer out of the saved registers
335 			 * and decrement our argument by the number of
336 			 * arguments passed in registers; if the argument
337 			 * we're seeking is passed in regsiters, we can just
338 			 * load it directly.
339 			 */
340 			struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
341 			    sizeof (uintptr_t));
342 
343 			if (argno <= inreg) {
344 				stack = (uintptr_t *)&rp->r_rdi;
345 			} else {
346 				stack = (uintptr_t *)(rp->r_rsp);
347 				argno -= (inreg + 1);
348 			}
349 #endif
350 			goto load;
351 		}
352 	}
353 
354 	/*
355 	 * We know that we did not come through a trap to get into
356 	 * dtrace_probe() -- the provider simply called dtrace_probe()
357 	 * directly.  As this is the case, we need to shift the argument
358 	 * that we're looking for:  the probe ID is the first argument to
359 	 * dtrace_probe(), so the argument n will actually be found where
360 	 * one would expect to find argument (n + 1).
361 	 */
362 	argno++;
363 
364 #if defined(__amd64)
365 	if (argno <= inreg) {
366 		/*
367 		 * This shouldn't happen.  If the argument is passed in a
368 		 * register then it should have been, well, passed in a
369 		 * register...
370 		 */
371 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
372 		return (0);
373 	}
374 
375 	argno -= (inreg + 1);
376 #endif
377 	stack = (uintptr_t *)&fp[1];
378 
379 load:
380 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
381 	val = stack[argno];
382 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
383 
384 	return (val);
385 }
386 
387 static dtrace_pops_t sdt_pops = {
388 	NULL,
389 	sdt_provide_module,
390 	sdt_enable,
391 	sdt_disable,
392 	NULL,
393 	NULL,
394 	sdt_getargdesc,
395 	sdt_getarg,
396 	NULL,
397 	sdt_destroy
398 };
399 
400 /*ARGSUSED*/
401 static int
402 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
403 {
404 	sdt_provider_t *prov;
405 
406 	if (ddi_create_minor_node(devi, "sdt", S_IFCHR,
407 	    0, DDI_PSEUDO, NULL) == DDI_FAILURE) {
408 		cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node");
409 		ddi_remove_minor_node(devi, NULL);
410 		return (DDI_FAILURE);
411 	}
412 
413 	ddi_report_dev(devi);
414 	sdt_devi = devi;
415 
416 	if (sdt_probetab_size == 0)
417 		sdt_probetab_size = SDT_PROBETAB_SIZE;
418 
419 	sdt_probetab_mask = sdt_probetab_size - 1;
420 	sdt_probetab =
421 	    kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP);
422 	dtrace_invop_add(sdt_invop);
423 
424 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
425 		uint32_t priv;
426 
427 		if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
428 			priv = DTRACE_PRIV_KERNEL;
429 			sdt_pops.dtps_mode = NULL;
430 		} else {
431 			priv = prov->sdtp_priv;
432 			ASSERT(priv == DTRACE_PRIV_USER);
433 			sdt_pops.dtps_mode = sdt_mode;
434 		}
435 
436 		if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
437 		    priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
438 			cmn_err(CE_WARN, "failed to register sdt provider %s",
439 			    prov->sdtp_name);
440 		}
441 	}
442 
443 	return (DDI_SUCCESS);
444 }
445 
446 /*ARGSUSED*/
447 static int
448 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
449 {
450 	sdt_provider_t *prov;
451 
452 	switch (cmd) {
453 	case DDI_DETACH:
454 		break;
455 
456 	case DDI_SUSPEND:
457 		return (DDI_SUCCESS);
458 
459 	default:
460 		return (DDI_FAILURE);
461 	}
462 
463 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
464 		if (prov->sdtp_id != DTRACE_PROVNONE) {
465 			if (dtrace_unregister(prov->sdtp_id) != 0)
466 				return (DDI_FAILURE);
467 
468 			prov->sdtp_id = DTRACE_PROVNONE;
469 		}
470 	}
471 
472 	dtrace_invop_remove(sdt_invop);
473 	kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *));
474 
475 	return (DDI_SUCCESS);
476 }
477 
478 /*ARGSUSED*/
479 static int
480 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
481 {
482 	int error;
483 
484 	switch (infocmd) {
485 	case DDI_INFO_DEVT2DEVINFO:
486 		*result = (void *)sdt_devi;
487 		error = DDI_SUCCESS;
488 		break;
489 	case DDI_INFO_DEVT2INSTANCE:
490 		*result = (void *)0;
491 		error = DDI_SUCCESS;
492 		break;
493 	default:
494 		error = DDI_FAILURE;
495 	}
496 	return (error);
497 }
498 
499 /*ARGSUSED*/
500 static int
501 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
502 {
503 	return (0);
504 }
505 
506 static struct cb_ops sdt_cb_ops = {
507 	sdt_open,		/* open */
508 	nodev,			/* close */
509 	nulldev,		/* strategy */
510 	nulldev,		/* print */
511 	nodev,			/* dump */
512 	nodev,			/* read */
513 	nodev,			/* write */
514 	nodev,			/* ioctl */
515 	nodev,			/* devmap */
516 	nodev,			/* mmap */
517 	nodev,			/* segmap */
518 	nochpoll,		/* poll */
519 	ddi_prop_op,		/* cb_prop_op */
520 	0,			/* streamtab  */
521 	D_NEW | D_MP		/* Driver compatibility flag */
522 };
523 
524 static struct dev_ops sdt_ops = {
525 	DEVO_REV,		/* devo_rev, */
526 	0,			/* refcnt  */
527 	sdt_info,		/* get_dev_info */
528 	nulldev,		/* identify */
529 	nulldev,		/* probe */
530 	sdt_attach,		/* attach */
531 	sdt_detach,		/* detach */
532 	nodev,			/* reset */
533 	&sdt_cb_ops,		/* driver operations */
534 	NULL,			/* bus operations */
535 	nodev,			/* dev power */
536 	ddi_quiesce_not_needed,		/* quiesce */
537 };
538 
539 /*
540  * Module linkage information for the kernel.
541  */
542 static struct modldrv modldrv = {
543 	&mod_driverops,		/* module type (this is a pseudo driver) */
544 	"Statically Defined Tracing",	/* name of module */
545 	&sdt_ops,		/* driver ops */
546 };
547 
548 static struct modlinkage modlinkage = {
549 	MODREV_1,
550 	(void *)&modldrv,
551 	NULL
552 };
553 
554 int
555 _init(void)
556 {
557 	return (mod_install(&modlinkage));
558 }
559 
560 int
561 _info(struct modinfo *modinfop)
562 {
563 	return (mod_info(&modlinkage, modinfop));
564 }
565 
566 int
567 _fini(void)
568 {
569 	return (mod_remove(&modlinkage));
570 }
571