xref: /illumos-gate/usr/src/uts/intel/dtrace/sdt.c (revision bc1f688b4872ace323eaddbb1a6365d054e7bf56)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28  * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29  */
30 
31 #include <sys/modctl.h>
32 #include <sys/sunddi.h>
33 #include <sys/dtrace.h>
34 #include <sys/kobj.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <vm/seg_kmem.h>
38 #include <sys/stack.h>
39 #include <sys/frame.h>
40 #include <sys/dtrace_impl.h>
41 #include <sys/cmn_err.h>
42 #include <sys/sysmacros.h>
43 #include <sys/privregs.h>
44 #include <sys/sdt_impl.h>
45 
46 #define	SDT_PATCHVAL	0xf0
47 #define	SDT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask)
48 #define	SDT_PROBETAB_SIZE	0x1000		/* 4k entries -- 16K total */
49 
50 static dev_info_t		*sdt_devi;
51 static int			sdt_verbose = 0;
52 static sdt_probe_t		**sdt_probetab;
53 static int			sdt_probetab_size;
54 static int			sdt_probetab_mask;
55 
56 /*ARGSUSED*/
57 static int
58 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
59 {
60 	uintptr_t stack0, stack1, stack2, stack3, stack4;
61 	int i = 0;
62 	sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
63 
64 #ifdef __amd64
65 	/*
66 	 * On amd64, stack[0] contains the dereferenced stack pointer,
67 	 * stack[1] contains savfp, stack[2] contains savpc.  We want
68 	 * to step over these entries.
69 	 */
70 	i += 3;
71 #endif
72 
73 	for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
74 		if ((uintptr_t)sdt->sdp_patchpoint == addr) {
75 			/*
76 			 * When accessing the arguments on the stack, we must
77 			 * protect against accessing beyond the stack.  We can
78 			 * safely set NOFAULT here -- we know that interrupts
79 			 * are already disabled.
80 			 */
81 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
82 			stack0 = stack[i++];
83 			stack1 = stack[i++];
84 			stack2 = stack[i++];
85 			stack3 = stack[i++];
86 			stack4 = stack[i++];
87 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
88 			    CPU_DTRACE_BADADDR);
89 
90 			dtrace_probe(sdt->sdp_id, stack0, stack1,
91 			    stack2, stack3, stack4);
92 
93 			return (DTRACE_INVOP_NOP);
94 		}
95 	}
96 
97 	return (0);
98 }
99 
100 /*ARGSUSED*/
101 static void
102 sdt_provide_module(void *arg, struct modctl *ctl)
103 {
104 	struct module *mp = ctl->mod_mp;
105 	char *modname = ctl->mod_modname;
106 	sdt_probedesc_t *sdpd;
107 	sdt_probe_t *sdp, *old;
108 	sdt_provider_t *prov;
109 	int len;
110 
111 	/*
112 	 * One for all, and all for one:  if we haven't yet registered all of
113 	 * our providers, we'll refuse to provide anything.
114 	 */
115 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
116 		if (prov->sdtp_id == DTRACE_PROVNONE)
117 			return;
118 	}
119 
120 	if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL)
121 		return;
122 
123 	for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) {
124 		char *name = sdpd->sdpd_name, *func, *nname;
125 		int i, j;
126 		sdt_provider_t *prov;
127 		ulong_t offs;
128 		dtrace_id_t id;
129 
130 		for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) {
131 			char *prefix = prov->sdtp_prefix;
132 
133 			if (strncmp(name, prefix, strlen(prefix)) == 0) {
134 				name += strlen(prefix);
135 				break;
136 			}
137 		}
138 
139 		nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP);
140 
141 		for (i = 0, j = 0; name[j] != '\0'; i++) {
142 			if (name[j] == '_' && name[j + 1] == '_') {
143 				nname[i] = '-';
144 				j += 2;
145 			} else {
146 				nname[i] = name[j++];
147 			}
148 		}
149 
150 		nname[i] = '\0';
151 
152 		sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP);
153 		sdp->sdp_loadcnt = ctl->mod_loadcnt;
154 		sdp->sdp_ctl = ctl;
155 		sdp->sdp_name = nname;
156 		sdp->sdp_namelen = len;
157 		sdp->sdp_provider = prov;
158 
159 		func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs);
160 
161 		if (func == NULL)
162 			func = "<unknown>";
163 
164 		/*
165 		 * We have our provider.  Now create the probe.
166 		 */
167 		if ((id = dtrace_probe_lookup(prov->sdtp_id, modname,
168 		    func, nname)) != DTRACE_IDNONE) {
169 			old = dtrace_probe_arg(prov->sdtp_id, id);
170 			ASSERT(old != NULL);
171 
172 			sdp->sdp_next = old->sdp_next;
173 			sdp->sdp_id = id;
174 			old->sdp_next = sdp;
175 		} else {
176 			sdp->sdp_id = dtrace_probe_create(prov->sdtp_id,
177 			    modname, func, nname, 3, sdp);
178 
179 			mp->sdt_nprobes++;
180 		}
181 
182 		sdp->sdp_hashnext =
183 		    sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)];
184 		sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp;
185 
186 		sdp->sdp_patchval = SDT_PATCHVAL;
187 		sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset;
188 		sdp->sdp_savedval = *sdp->sdp_patchpoint;
189 	}
190 }
191 
192 /*ARGSUSED*/
193 static void
194 sdt_destroy(void *arg, dtrace_id_t id, void *parg)
195 {
196 	sdt_probe_t *sdp = parg, *old, *last, *hash;
197 	struct modctl *ctl = sdp->sdp_ctl;
198 	int ndx;
199 
200 	if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) {
201 		if ((ctl->mod_loadcnt == sdp->sdp_loadcnt &&
202 		    ctl->mod_loaded)) {
203 			((struct module *)(ctl->mod_mp))->sdt_nprobes--;
204 		}
205 	}
206 
207 	while (sdp != NULL) {
208 		old = sdp;
209 
210 		/*
211 		 * Now we need to remove this probe from the sdt_probetab.
212 		 */
213 		ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint);
214 		last = NULL;
215 		hash = sdt_probetab[ndx];
216 
217 		while (hash != sdp) {
218 			ASSERT(hash != NULL);
219 			last = hash;
220 			hash = hash->sdp_hashnext;
221 		}
222 
223 		if (last != NULL) {
224 			last->sdp_hashnext = sdp->sdp_hashnext;
225 		} else {
226 			sdt_probetab[ndx] = sdp->sdp_hashnext;
227 		}
228 
229 		kmem_free(sdp->sdp_name, sdp->sdp_namelen);
230 		sdp = sdp->sdp_next;
231 		kmem_free(old, sizeof (sdt_probe_t));
232 	}
233 }
234 
235 /*ARGSUSED*/
236 static int
237 sdt_enable(void *arg, dtrace_id_t id, void *parg)
238 {
239 	sdt_probe_t *sdp = parg;
240 	struct modctl *ctl = sdp->sdp_ctl;
241 
242 	ctl->mod_nenabled++;
243 
244 	/*
245 	 * If this module has disappeared since we discovered its probes,
246 	 * refuse to enable it.
247 	 */
248 	if (!ctl->mod_loaded) {
249 		if (sdt_verbose) {
250 			cmn_err(CE_NOTE, "sdt is failing for probe %s "
251 			    "(module %s unloaded)",
252 			    sdp->sdp_name, ctl->mod_modname);
253 		}
254 		goto err;
255 	}
256 
257 	/*
258 	 * Now check that our modctl has the expected load count.  If it
259 	 * doesn't, this module must have been unloaded and reloaded -- and
260 	 * we're not going to touch it.
261 	 */
262 	if (ctl->mod_loadcnt != sdp->sdp_loadcnt) {
263 		if (sdt_verbose) {
264 			cmn_err(CE_NOTE, "sdt is failing for probe %s "
265 			    "(module %s reloaded)",
266 			    sdp->sdp_name, ctl->mod_modname);
267 		}
268 		goto err;
269 	}
270 
271 	while (sdp != NULL) {
272 		*sdp->sdp_patchpoint = sdp->sdp_patchval;
273 		sdp = sdp->sdp_next;
274 	}
275 err:
276 	return (0);
277 }
278 
279 /*ARGSUSED*/
280 static void
281 sdt_disable(void *arg, dtrace_id_t id, void *parg)
282 {
283 	sdt_probe_t *sdp = parg;
284 	struct modctl *ctl = sdp->sdp_ctl;
285 
286 	ctl->mod_nenabled--;
287 
288 	if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
289 		goto err;
290 
291 	while (sdp != NULL) {
292 		*sdp->sdp_patchpoint = sdp->sdp_savedval;
293 		sdp = sdp->sdp_next;
294 	}
295 
296 err:
297 	;
298 }
299 
300 /*ARGSUSED*/
301 uint64_t
302 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
303 {
304 	uintptr_t val;
305 	struct frame *fp = (struct frame *)dtrace_getfp();
306 	uintptr_t *stack;
307 	int i;
308 #if defined(__amd64)
309 	/*
310 	 * A total of 6 arguments are passed via registers; any argument with
311 	 * index of 5 or lower is therefore in a register.
312 	 */
313 	int inreg = 5;
314 #endif
315 
316 	for (i = 1; i <= aframes; i++) {
317 		fp = (struct frame *)(fp->fr_savfp);
318 
319 		if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
320 #if !defined(__amd64)
321 			/*
322 			 * If we pass through the invalid op handler, we will
323 			 * use the pointer that it passed to the stack as the
324 			 * second argument to dtrace_invop() as the pointer to
325 			 * the stack.
326 			 */
327 			stack = ((uintptr_t **)&fp[1])[1];
328 #else
329 			/*
330 			 * In the case of amd64, we will use the pointer to the
331 			 * regs structure that was pushed when we took the
332 			 * trap.  To get this structure, we must increment
333 			 * beyond the frame structure, the calling RIP, and
334 			 * padding stored in dtrace_invop().  If the argument
335 			 * that we're seeking is passed on the stack, we'll
336 			 * pull the true stack pointer out of the saved
337 			 * registers and decrement our argument by the number
338 			 * of arguments passed in registers; if the argument
339 			 * we're seeking is passed in regsiters, we can just
340 			 * load it directly.
341 			 */
342 			struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
343 			    sizeof (uintptr_t) * 2);
344 
345 			if (argno <= inreg) {
346 				stack = (uintptr_t *)&rp->r_rdi;
347 			} else {
348 				stack = (uintptr_t *)(rp->r_rsp);
349 				argno -= (inreg + 1);
350 			}
351 #endif
352 			goto load;
353 		}
354 	}
355 
356 	/*
357 	 * We know that we did not come through a trap to get into
358 	 * dtrace_probe() -- the provider simply called dtrace_probe()
359 	 * directly.  As this is the case, we need to shift the argument
360 	 * that we're looking for:  the probe ID is the first argument to
361 	 * dtrace_probe(), so the argument n will actually be found where
362 	 * one would expect to find argument (n + 1).
363 	 */
364 	argno++;
365 
366 #if defined(__amd64)
367 	if (argno <= inreg) {
368 		/*
369 		 * This shouldn't happen.  If the argument is passed in a
370 		 * register then it should have been, well, passed in a
371 		 * register...
372 		 */
373 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
374 		return (0);
375 	}
376 
377 	argno -= (inreg + 1);
378 #endif
379 	stack = (uintptr_t *)&fp[1];
380 
381 load:
382 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
383 	val = stack[argno];
384 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
385 
386 	return (val);
387 }
388 
389 static dtrace_pops_t sdt_pops = {
390 	NULL,
391 	sdt_provide_module,
392 	sdt_enable,
393 	sdt_disable,
394 	NULL,
395 	NULL,
396 	sdt_getargdesc,
397 	sdt_getarg,
398 	NULL,
399 	sdt_destroy
400 };
401 
402 /*ARGSUSED*/
403 static int
404 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
405 {
406 	sdt_provider_t *prov;
407 
408 	if (ddi_create_minor_node(devi, "sdt", S_IFCHR,
409 	    0, DDI_PSEUDO, NULL) == DDI_FAILURE) {
410 		cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node");
411 		ddi_remove_minor_node(devi, NULL);
412 		return (DDI_FAILURE);
413 	}
414 
415 	ddi_report_dev(devi);
416 	sdt_devi = devi;
417 
418 	if (sdt_probetab_size == 0)
419 		sdt_probetab_size = SDT_PROBETAB_SIZE;
420 
421 	sdt_probetab_mask = sdt_probetab_size - 1;
422 	sdt_probetab =
423 	    kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP);
424 	dtrace_invop_add(sdt_invop);
425 
426 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
427 		uint32_t priv;
428 
429 		if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
430 			priv = DTRACE_PRIV_KERNEL;
431 			sdt_pops.dtps_mode = NULL;
432 		} else {
433 			priv = prov->sdtp_priv;
434 			ASSERT(priv == DTRACE_PRIV_USER);
435 			sdt_pops.dtps_mode = sdt_mode;
436 		}
437 
438 		if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
439 		    priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
440 			cmn_err(CE_WARN, "failed to register sdt provider %s",
441 			    prov->sdtp_name);
442 		}
443 	}
444 
445 	return (DDI_SUCCESS);
446 }
447 
448 /*ARGSUSED*/
449 static int
450 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
451 {
452 	sdt_provider_t *prov;
453 
454 	switch (cmd) {
455 	case DDI_DETACH:
456 		break;
457 
458 	case DDI_SUSPEND:
459 		return (DDI_SUCCESS);
460 
461 	default:
462 		return (DDI_FAILURE);
463 	}
464 
465 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
466 		if (prov->sdtp_id != DTRACE_PROVNONE) {
467 			if (dtrace_unregister(prov->sdtp_id) != 0)
468 				return (DDI_FAILURE);
469 
470 			prov->sdtp_id = DTRACE_PROVNONE;
471 		}
472 	}
473 
474 	dtrace_invop_remove(sdt_invop);
475 	kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *));
476 
477 	return (DDI_SUCCESS);
478 }
479 
480 /*ARGSUSED*/
481 static int
482 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
483 {
484 	int error;
485 
486 	switch (infocmd) {
487 	case DDI_INFO_DEVT2DEVINFO:
488 		*result = (void *)sdt_devi;
489 		error = DDI_SUCCESS;
490 		break;
491 	case DDI_INFO_DEVT2INSTANCE:
492 		*result = (void *)0;
493 		error = DDI_SUCCESS;
494 		break;
495 	default:
496 		error = DDI_FAILURE;
497 	}
498 	return (error);
499 }
500 
501 /*ARGSUSED*/
502 static int
503 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
504 {
505 	return (0);
506 }
507 
508 static struct cb_ops sdt_cb_ops = {
509 	sdt_open,		/* open */
510 	nodev,			/* close */
511 	nulldev,		/* strategy */
512 	nulldev,		/* print */
513 	nodev,			/* dump */
514 	nodev,			/* read */
515 	nodev,			/* write */
516 	nodev,			/* ioctl */
517 	nodev,			/* devmap */
518 	nodev,			/* mmap */
519 	nodev,			/* segmap */
520 	nochpoll,		/* poll */
521 	ddi_prop_op,		/* cb_prop_op */
522 	0,			/* streamtab  */
523 	D_NEW | D_MP		/* Driver compatibility flag */
524 };
525 
526 static struct dev_ops sdt_ops = {
527 	DEVO_REV,		/* devo_rev, */
528 	0,			/* refcnt  */
529 	sdt_info,		/* get_dev_info */
530 	nulldev,		/* identify */
531 	nulldev,		/* probe */
532 	sdt_attach,		/* attach */
533 	sdt_detach,		/* detach */
534 	nodev,			/* reset */
535 	&sdt_cb_ops,		/* driver operations */
536 	NULL,			/* bus operations */
537 	nodev,			/* dev power */
538 	ddi_quiesce_not_needed,		/* quiesce */
539 };
540 
541 /*
542  * Module linkage information for the kernel.
543  */
544 static struct modldrv modldrv = {
545 	&mod_driverops,		/* module type (this is a pseudo driver) */
546 	"Statically Defined Tracing",	/* name of module */
547 	&sdt_ops,		/* driver ops */
548 };
549 
550 static struct modlinkage modlinkage = {
551 	MODREV_1,
552 	(void *)&modldrv,
553 	NULL
554 };
555 
556 int
557 _init(void)
558 {
559 	return (mod_install(&modlinkage));
560 }
561 
562 int
563 _info(struct modinfo *modinfop)
564 {
565 	return (mod_info(&modlinkage, modinfop));
566 }
567 
568 int
569 _fini(void)
570 {
571 	return (mod_remove(&modlinkage));
572 }
573