1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29 */
30
31 #include <sys/modctl.h>
32 #include <sys/sunddi.h>
33 #include <sys/dtrace.h>
34 #include <sys/kobj.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <vm/seg_kmem.h>
38 #include <sys/stack.h>
39 #include <sys/frame.h>
40 #include <sys/dtrace_impl.h>
41 #include <sys/cmn_err.h>
42 #include <sys/sysmacros.h>
43 #include <sys/privregs.h>
44 #include <sys/sdt_impl.h>
45
46 #define SDT_PATCHVAL 0xf0
47 #define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask)
48 #define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */
49
50 static dev_info_t *sdt_devi;
51 static int sdt_verbose = 0;
52 static sdt_probe_t **sdt_probetab;
53 static int sdt_probetab_size;
54 static int sdt_probetab_mask;
55
56 /*ARGSUSED*/
57 static int
sdt_invop(uintptr_t addr,uintptr_t * stack,uintptr_t eax)58 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
59 {
60 uintptr_t stack0, stack1, stack2, stack3, stack4;
61 int i = 0;
62 sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
63
64 #ifdef __amd64
65 /*
66 * On amd64, stack[0] contains the dereferenced stack pointer,
67 * stack[1] contains savfp, stack[2] contains savpc. We want
68 * to step over these entries.
69 */
70 i += 3;
71 #endif
72
73 for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
74 if ((uintptr_t)sdt->sdp_patchpoint == addr) {
75 /*
76 * When accessing the arguments on the stack, we must
77 * protect against accessing beyond the stack. We can
78 * safely set NOFAULT here -- we know that interrupts
79 * are already disabled.
80 */
81 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
82 stack0 = stack[i++];
83 stack1 = stack[i++];
84 stack2 = stack[i++];
85 stack3 = stack[i++];
86 stack4 = stack[i++];
87 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
88 CPU_DTRACE_BADADDR);
89
90 dtrace_probe(sdt->sdp_id, stack0, stack1,
91 stack2, stack3, stack4);
92
93 return (DTRACE_INVOP_NOP);
94 }
95 }
96
97 return (0);
98 }
99
100 /*ARGSUSED*/
101 static void
sdt_provide_module(void * arg,struct modctl * ctl)102 sdt_provide_module(void *arg, struct modctl *ctl)
103 {
104 struct module *mp = ctl->mod_mp;
105 char *modname = ctl->mod_modname;
106 sdt_probedesc_t *sdpd;
107 sdt_probe_t *sdp, *old;
108 sdt_provider_t *prov;
109 int len;
110
111 /*
112 * One for all, and all for one: if we haven't yet registered all of
113 * our providers, we'll refuse to provide anything.
114 */
115 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
116 if (prov->sdtp_id == DTRACE_PROVNONE)
117 return;
118 }
119
120 if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL)
121 return;
122
123 for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) {
124 char *name = sdpd->sdpd_name, *func, *nname;
125 int i, j;
126 sdt_provider_t *prov;
127 ulong_t offs;
128 dtrace_id_t id;
129
130 for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) {
131 char *prefix = prov->sdtp_prefix;
132
133 if (strncmp(name, prefix, strlen(prefix)) == 0) {
134 name += strlen(prefix);
135 break;
136 }
137 }
138
139 nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP);
140
141 for (i = 0, j = 0; name[j] != '\0'; i++) {
142 if (name[j] == '_' && name[j + 1] == '_') {
143 nname[i] = '-';
144 j += 2;
145 } else {
146 nname[i] = name[j++];
147 }
148 }
149
150 nname[i] = '\0';
151
152 sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP);
153 sdp->sdp_loadcnt = ctl->mod_loadcnt;
154 sdp->sdp_ctl = ctl;
155 sdp->sdp_name = nname;
156 sdp->sdp_namelen = len;
157 sdp->sdp_provider = prov;
158
159 func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs);
160
161 if (func == NULL)
162 func = "<unknown>";
163
164 /*
165 * We have our provider. Now create the probe.
166 */
167 if ((id = dtrace_probe_lookup(prov->sdtp_id, modname,
168 func, nname)) != DTRACE_IDNONE) {
169 old = dtrace_probe_arg(prov->sdtp_id, id);
170 ASSERT(old != NULL);
171
172 sdp->sdp_next = old->sdp_next;
173 sdp->sdp_id = id;
174 old->sdp_next = sdp;
175 } else {
176 sdp->sdp_id = dtrace_probe_create(prov->sdtp_id,
177 modname, func, nname, 3, sdp);
178
179 mp->sdt_nprobes++;
180 }
181
182 sdp->sdp_hashnext =
183 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)];
184 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp;
185
186 sdp->sdp_patchval = SDT_PATCHVAL;
187 sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset;
188 sdp->sdp_savedval = *sdp->sdp_patchpoint;
189 }
190 }
191
192 /*ARGSUSED*/
193 static void
sdt_destroy(void * arg,dtrace_id_t id,void * parg)194 sdt_destroy(void *arg, dtrace_id_t id, void *parg)
195 {
196 sdt_probe_t *sdp = parg, *old, *last, *hash;
197 struct modctl *ctl = sdp->sdp_ctl;
198 int ndx;
199
200 if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) {
201 if ((ctl->mod_loadcnt == sdp->sdp_loadcnt &&
202 ctl->mod_loaded)) {
203 ((struct module *)(ctl->mod_mp))->sdt_nprobes--;
204 }
205 }
206
207 while (sdp != NULL) {
208 old = sdp;
209
210 /*
211 * Now we need to remove this probe from the sdt_probetab.
212 */
213 ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint);
214 last = NULL;
215 hash = sdt_probetab[ndx];
216
217 while (hash != sdp) {
218 ASSERT(hash != NULL);
219 last = hash;
220 hash = hash->sdp_hashnext;
221 }
222
223 if (last != NULL) {
224 last->sdp_hashnext = sdp->sdp_hashnext;
225 } else {
226 sdt_probetab[ndx] = sdp->sdp_hashnext;
227 }
228
229 kmem_free(sdp->sdp_name, sdp->sdp_namelen);
230 sdp = sdp->sdp_next;
231 kmem_free(old, sizeof (sdt_probe_t));
232 }
233 }
234
235 /*ARGSUSED*/
236 static int
sdt_enable(void * arg,dtrace_id_t id,void * parg)237 sdt_enable(void *arg, dtrace_id_t id, void *parg)
238 {
239 sdt_probe_t *sdp = parg;
240 struct modctl *ctl = sdp->sdp_ctl;
241
242 ctl->mod_nenabled++;
243
244 /*
245 * If this module has disappeared since we discovered its probes,
246 * refuse to enable it.
247 */
248 if (!ctl->mod_loaded) {
249 if (sdt_verbose) {
250 cmn_err(CE_NOTE, "sdt is failing for probe %s "
251 "(module %s unloaded)",
252 sdp->sdp_name, ctl->mod_modname);
253 }
254 goto err;
255 }
256
257 /*
258 * Now check that our modctl has the expected load count. If it
259 * doesn't, this module must have been unloaded and reloaded -- and
260 * we're not going to touch it.
261 */
262 if (ctl->mod_loadcnt != sdp->sdp_loadcnt) {
263 if (sdt_verbose) {
264 cmn_err(CE_NOTE, "sdt is failing for probe %s "
265 "(module %s reloaded)",
266 sdp->sdp_name, ctl->mod_modname);
267 }
268 goto err;
269 }
270
271 while (sdp != NULL) {
272 *sdp->sdp_patchpoint = sdp->sdp_patchval;
273 sdp = sdp->sdp_next;
274 }
275 err:
276 return (0);
277 }
278
279 /*ARGSUSED*/
280 static void
sdt_disable(void * arg,dtrace_id_t id,void * parg)281 sdt_disable(void *arg, dtrace_id_t id, void *parg)
282 {
283 sdt_probe_t *sdp = parg;
284 struct modctl *ctl = sdp->sdp_ctl;
285
286 ctl->mod_nenabled--;
287
288 if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
289 goto err;
290
291 while (sdp != NULL) {
292 *sdp->sdp_patchpoint = sdp->sdp_savedval;
293 sdp = sdp->sdp_next;
294 }
295
296 err:
297 ;
298 }
299
300 /*ARGSUSED*/
301 uint64_t
sdt_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)302 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
303 {
304 uintptr_t val;
305 struct frame *fp = (struct frame *)dtrace_getfp();
306 uintptr_t *stack;
307 int i;
308 #if defined(__amd64)
309 /*
310 * A total of 6 arguments are passed via registers; any argument with
311 * index of 5 or lower is therefore in a register.
312 */
313 int inreg = 5;
314 #endif
315
316 for (i = 1; i <= aframes; i++) {
317 fp = (struct frame *)(fp->fr_savfp);
318
319 if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
320 #if !defined(__amd64)
321 /*
322 * If we pass through the invalid op handler, we will
323 * use the pointer that it passed to the stack as the
324 * second argument to dtrace_invop() as the pointer to
325 * the stack.
326 */
327 stack = ((uintptr_t **)&fp[1])[1];
328 #else
329 /*
330 * In the case of amd64, we will use the pointer to the
331 * regs structure that was pushed when we took the
332 * trap. To get this structure, we must increment
333 * beyond the frame structure, the calling RIP, and
334 * padding stored in dtrace_invop(). If the argument
335 * that we're seeking is passed on the stack, we'll
336 * pull the true stack pointer out of the saved
337 * registers and decrement our argument by the number
338 * of arguments passed in registers; if the argument
339 * we're seeking is passed in regsiters, we can just
340 * load it directly.
341 */
342 struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
343 sizeof (uintptr_t) * 2);
344
345 if (argno <= inreg) {
346 stack = (uintptr_t *)&rp->r_rdi;
347 } else {
348 stack = (uintptr_t *)(rp->r_rsp);
349 argno -= (inreg + 1);
350 }
351 #endif
352 goto load;
353 }
354 }
355
356 /*
357 * We know that we did not come through a trap to get into
358 * dtrace_probe() -- the provider simply called dtrace_probe()
359 * directly. As this is the case, we need to shift the argument
360 * that we're looking for: the probe ID is the first argument to
361 * dtrace_probe(), so the argument n will actually be found where
362 * one would expect to find argument (n + 1).
363 */
364 argno++;
365
366 #if defined(__amd64)
367 if (argno <= inreg) {
368 /*
369 * This shouldn't happen. If the argument is passed in a
370 * register then it should have been, well, passed in a
371 * register...
372 */
373 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
374 return (0);
375 }
376
377 argno -= (inreg + 1);
378 #endif
379 stack = (uintptr_t *)&fp[1];
380
381 load:
382 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
383 val = stack[argno];
384 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
385
386 return (val);
387 }
388
389 static dtrace_pops_t sdt_pops = {
390 NULL,
391 sdt_provide_module,
392 sdt_enable,
393 sdt_disable,
394 NULL,
395 NULL,
396 sdt_getargdesc,
397 sdt_getarg,
398 NULL,
399 sdt_destroy
400 };
401
402 /*ARGSUSED*/
403 static int
sdt_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)404 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
405 {
406 sdt_provider_t *prov;
407
408 if (ddi_create_minor_node(devi, "sdt", S_IFCHR,
409 0, DDI_PSEUDO, NULL) == DDI_FAILURE) {
410 cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node");
411 ddi_remove_minor_node(devi, NULL);
412 return (DDI_FAILURE);
413 }
414
415 ddi_report_dev(devi);
416 sdt_devi = devi;
417
418 if (sdt_probetab_size == 0)
419 sdt_probetab_size = SDT_PROBETAB_SIZE;
420
421 sdt_probetab_mask = sdt_probetab_size - 1;
422 sdt_probetab =
423 kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP);
424 dtrace_invop_add(sdt_invop);
425
426 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
427 uint32_t priv;
428
429 if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
430 priv = DTRACE_PRIV_KERNEL;
431 sdt_pops.dtps_mode = NULL;
432 } else {
433 priv = prov->sdtp_priv;
434 ASSERT(priv == DTRACE_PRIV_USER);
435 sdt_pops.dtps_mode = sdt_mode;
436 }
437
438 if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
439 priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
440 cmn_err(CE_WARN, "failed to register sdt provider %s",
441 prov->sdtp_name);
442 }
443 }
444
445 return (DDI_SUCCESS);
446 }
447
448 /*ARGSUSED*/
449 static int
sdt_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)450 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
451 {
452 sdt_provider_t *prov;
453
454 switch (cmd) {
455 case DDI_DETACH:
456 break;
457
458 case DDI_SUSPEND:
459 return (DDI_SUCCESS);
460
461 default:
462 return (DDI_FAILURE);
463 }
464
465 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
466 if (prov->sdtp_id != DTRACE_PROVNONE) {
467 if (dtrace_unregister(prov->sdtp_id) != 0)
468 return (DDI_FAILURE);
469
470 prov->sdtp_id = DTRACE_PROVNONE;
471 }
472 }
473
474 dtrace_invop_remove(sdt_invop);
475 kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *));
476
477 return (DDI_SUCCESS);
478 }
479
480 /*ARGSUSED*/
481 static int
sdt_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)482 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
483 {
484 int error;
485
486 switch (infocmd) {
487 case DDI_INFO_DEVT2DEVINFO:
488 *result = (void *)sdt_devi;
489 error = DDI_SUCCESS;
490 break;
491 case DDI_INFO_DEVT2INSTANCE:
492 *result = (void *)0;
493 error = DDI_SUCCESS;
494 break;
495 default:
496 error = DDI_FAILURE;
497 }
498 return (error);
499 }
500
501 /*ARGSUSED*/
502 static int
sdt_open(dev_t * devp,int flag,int otyp,cred_t * cred_p)503 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
504 {
505 return (0);
506 }
507
508 static struct cb_ops sdt_cb_ops = {
509 sdt_open, /* open */
510 nodev, /* close */
511 nulldev, /* strategy */
512 nulldev, /* print */
513 nodev, /* dump */
514 nodev, /* read */
515 nodev, /* write */
516 nodev, /* ioctl */
517 nodev, /* devmap */
518 nodev, /* mmap */
519 nodev, /* segmap */
520 nochpoll, /* poll */
521 ddi_prop_op, /* cb_prop_op */
522 0, /* streamtab */
523 D_NEW | D_MP /* Driver compatibility flag */
524 };
525
526 static struct dev_ops sdt_ops = {
527 DEVO_REV, /* devo_rev, */
528 0, /* refcnt */
529 sdt_info, /* get_dev_info */
530 nulldev, /* identify */
531 nulldev, /* probe */
532 sdt_attach, /* attach */
533 sdt_detach, /* detach */
534 nodev, /* reset */
535 &sdt_cb_ops, /* driver operations */
536 NULL, /* bus operations */
537 nodev, /* dev power */
538 ddi_quiesce_not_needed, /* quiesce */
539 };
540
541 /*
542 * Module linkage information for the kernel.
543 */
544 static struct modldrv modldrv = {
545 &mod_driverops, /* module type (this is a pseudo driver) */
546 "Statically Defined Tracing", /* name of module */
547 &sdt_ops, /* driver ops */
548 };
549
550 static struct modlinkage modlinkage = {
551 MODREV_1,
552 (void *)&modldrv,
553 NULL
554 };
555
556 int
_init(void)557 _init(void)
558 {
559 return (mod_install(&modlinkage));
560 }
561
562 int
_info(struct modinfo * modinfop)563 _info(struct modinfo *modinfop)
564 {
565 return (mod_info(&modlinkage, modinfop));
566 }
567
568 int
_fini(void)569 _fini(void)
570 {
571 return (mod_remove(&modlinkage));
572 }
573