1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29 * Copyright 2024 Oxide Computer Company
30 */
31
32 #include <sys/modctl.h>
33 #include <sys/sunddi.h>
34 #include <sys/dtrace.h>
35 #include <sys/kobj.h>
36 #include <sys/stat.h>
37 #include <sys/conf.h>
38 #include <vm/seg_kmem.h>
39 #include <sys/stack.h>
40 #include <sys/frame.h>
41 #include <sys/dtrace_impl.h>
42 #include <sys/cmn_err.h>
43 #include <sys/sysmacros.h>
44 #include <sys/privregs.h>
45 #include <sys/sdt_impl.h>
46
47 #define SDT_PATCHVAL 0xf0
48 #define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask)
49 #define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */
50
51 static dev_info_t *sdt_devi;
52 static int sdt_verbose = 0;
53 static sdt_probe_t **sdt_probetab;
54 static int sdt_probetab_size;
55 static int sdt_probetab_mask;
56
57 /*ARGSUSED*/
58 static int
sdt_invop(uintptr_t addr,uintptr_t * stack,uintptr_t eax)59 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
60 {
61 uintptr_t stack0, stack1, stack2, stack3, stack4;
62 int i = 0;
63 sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
64
65 /*
66 * On amd64, stack[0] contains the dereferenced stack pointer,
67 * stack[1] contains savfp, stack[2] contains savpc. We want
68 * to step over these entries.
69 */
70 i += 3;
71
72 for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
73 if ((uintptr_t)sdt->sdp_patchpoint == addr) {
74 /*
75 * When accessing the arguments on the stack, we must
76 * protect against accessing beyond the stack. We can
77 * safely set NOFAULT here -- we know that interrupts
78 * are already disabled.
79 */
80 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
81 stack0 = stack[i++];
82 stack1 = stack[i++];
83 stack2 = stack[i++];
84 stack3 = stack[i++];
85 stack4 = stack[i++];
86 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
87 CPU_DTRACE_BADADDR);
88
89 dtrace_probe(sdt->sdp_id, stack0, stack1,
90 stack2, stack3, stack4);
91
92 return (DTRACE_INVOP_NOP);
93 }
94 }
95
96 return (0);
97 }
98
99 /*ARGSUSED*/
100 static void
sdt_provide_module(void * arg,struct modctl * ctl)101 sdt_provide_module(void *arg, struct modctl *ctl)
102 {
103 struct module *mp = ctl->mod_mp;
104 char *modname = ctl->mod_modname;
105 sdt_probedesc_t *sdpd;
106 sdt_probe_t *sdp, *old;
107 sdt_provider_t *prov;
108 int len;
109
110 /*
111 * One for all, and all for one: if we haven't yet registered all of
112 * our providers, we'll refuse to provide anything.
113 */
114 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
115 if (prov->sdtp_id == DTRACE_PROVNONE)
116 return;
117 }
118
119 if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL)
120 return;
121
122 for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) {
123 char *name = sdpd->sdpd_name, *func, *nname;
124 int i, j;
125 sdt_provider_t *prov;
126 ulong_t offs;
127 dtrace_id_t id;
128
129 for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) {
130 char *prefix = prov->sdtp_prefix;
131
132 if (strncmp(name, prefix, strlen(prefix)) == 0) {
133 name += strlen(prefix);
134 break;
135 }
136 }
137
138 nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP);
139
140 for (i = 0, j = 0; name[j] != '\0'; i++) {
141 if (name[j] == '_' && name[j + 1] == '_') {
142 nname[i] = '-';
143 j += 2;
144 } else {
145 nname[i] = name[j++];
146 }
147 }
148
149 nname[i] = '\0';
150
151 sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP);
152 sdp->sdp_loadcnt = ctl->mod_loadcnt;
153 sdp->sdp_ctl = ctl;
154 sdp->sdp_name = nname;
155 sdp->sdp_namelen = len;
156 sdp->sdp_provider = prov;
157
158 func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs);
159
160 if (func == NULL)
161 func = "<unknown>";
162
163 /*
164 * We have our provider. Now create the probe.
165 */
166 if ((id = dtrace_probe_lookup(prov->sdtp_id, modname,
167 func, nname)) != DTRACE_IDNONE) {
168 old = dtrace_probe_arg(prov->sdtp_id, id);
169 ASSERT(old != NULL);
170
171 sdp->sdp_next = old->sdp_next;
172 sdp->sdp_id = id;
173 old->sdp_next = sdp;
174 } else {
175 sdp->sdp_id = dtrace_probe_create(prov->sdtp_id,
176 modname, func, nname, 3, sdp);
177
178 mp->sdt_nprobes++;
179 }
180
181 sdp->sdp_hashnext =
182 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)];
183 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp;
184
185 sdp->sdp_patchval = SDT_PATCHVAL;
186 sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset;
187 sdp->sdp_savedval = *sdp->sdp_patchpoint;
188 sdp->sdp_is_tailcall =
189 sdp->sdp_patchpoint[SDT_OFF_RET_IDX] == SDT_RET;
190 }
191 }
192
193 /*ARGSUSED*/
194 static void
sdt_destroy(void * arg,dtrace_id_t id,void * parg)195 sdt_destroy(void *arg, dtrace_id_t id, void *parg)
196 {
197 sdt_probe_t *sdp = parg, *old, *last, *hash;
198 struct modctl *ctl = sdp->sdp_ctl;
199 int ndx;
200
201 if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) {
202 if ((ctl->mod_loadcnt == sdp->sdp_loadcnt &&
203 ctl->mod_loaded)) {
204 ((struct module *)(ctl->mod_mp))->sdt_nprobes--;
205 }
206 }
207
208 while (sdp != NULL) {
209 old = sdp;
210
211 /*
212 * Now we need to remove this probe from the sdt_probetab.
213 */
214 ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint);
215 last = NULL;
216 hash = sdt_probetab[ndx];
217
218 while (hash != sdp) {
219 ASSERT(hash != NULL);
220 last = hash;
221 hash = hash->sdp_hashnext;
222 }
223
224 if (last != NULL) {
225 last->sdp_hashnext = sdp->sdp_hashnext;
226 } else {
227 sdt_probetab[ndx] = sdp->sdp_hashnext;
228 }
229
230 kmem_free(sdp->sdp_name, sdp->sdp_namelen);
231 sdp = sdp->sdp_next;
232 kmem_free(old, sizeof (sdt_probe_t));
233 }
234 }
235
236 /*ARGSUSED*/
237 static int
sdt_enable(void * arg,dtrace_id_t id,void * parg)238 sdt_enable(void *arg, dtrace_id_t id, void *parg)
239 {
240 sdt_probe_t *sdp = parg;
241 struct modctl *ctl = sdp->sdp_ctl;
242
243 ctl->mod_nenabled++;
244
245 /*
246 * If this module has disappeared since we discovered its probes,
247 * refuse to enable it.
248 */
249 if (!ctl->mod_loaded) {
250 if (sdt_verbose) {
251 cmn_err(CE_NOTE, "sdt is failing for probe %s "
252 "(module %s unloaded)",
253 sdp->sdp_name, ctl->mod_modname);
254 }
255 goto err;
256 }
257
258 /*
259 * Now check that our modctl has the expected load count. If it
260 * doesn't, this module must have been unloaded and reloaded -- and
261 * we're not going to touch it.
262 */
263 if (ctl->mod_loadcnt != sdp->sdp_loadcnt) {
264 if (sdt_verbose) {
265 cmn_err(CE_NOTE, "sdt is failing for probe %s "
266 "(module %s reloaded)",
267 sdp->sdp_name, ctl->mod_modname);
268 }
269 goto err;
270 }
271
272 while (sdp != NULL) {
273 *sdp->sdp_patchpoint = sdp->sdp_patchval;
274 sdp = sdp->sdp_next;
275 }
276 err:
277 return (0);
278 }
279
280 /*ARGSUSED*/
281 static void
sdt_disable(void * arg,dtrace_id_t id,void * parg)282 sdt_disable(void *arg, dtrace_id_t id, void *parg)
283 {
284 sdt_probe_t *sdp = parg;
285 struct modctl *ctl = sdp->sdp_ctl;
286
287 ctl->mod_nenabled--;
288
289 if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
290 goto err;
291
292 while (sdp != NULL) {
293 *sdp->sdp_patchpoint = sdp->sdp_savedval;
294 sdp = sdp->sdp_next;
295 }
296
297 err:
298 ;
299 }
300
301 /*ARGSUSED*/
302 uint64_t
sdt_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)303 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
304 {
305 sdt_probe_t *sdp = parg;
306 uintptr_t val;
307 struct frame *fp = (struct frame *)dtrace_getfp();
308 uintptr_t *stack;
309 int i;
310 /*
311 * A total of 6 arguments are passed via registers; any argument with
312 * index of 5 or lower is therefore in a register.
313 */
314 int inreg = 5;
315
316 for (i = 1; i <= aframes; i++) {
317 fp = (struct frame *)(fp->fr_savfp);
318
319 if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
320 /*
321 * In the case of amd64, we will use the pointer to the
322 * regs structure that was pushed when we took the
323 * trap. To get this structure, we must increment
324 * beyond the frame structure, the calling RIP, and
325 * padding stored in dtrace_invop(). If the argument
326 * that we're seeking is passed on the stack, we'll
327 * pull the true stack pointer out of the saved
328 * registers and decrement our argument by the number
329 * of arguments passed in registers; if the argument
330 * we're seeking is passed in regsiters, we can just
331 * load it directly.
332 */
333 struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
334 sizeof (uintptr_t) * 2);
335
336 if (argno <= inreg) {
337 stack = (uintptr_t *)&rp->r_rdi;
338 } else {
339 stack = (uintptr_t *)(rp->r_rsp);
340 argno -= (inreg + 1);
341
342 /*
343 * If the probe was invoked as a tail call, the
344 * compiler leaves the stack as if we had just
345 * entered the fictitious __dtrace_probe_[name]
346 * function, meaning we need to skip over the
347 * saved return address to get to the stack
348 * arguments.
349 */
350 if (sdp->sdp_is_tailcall)
351 argno++;
352 }
353 goto load;
354 }
355 }
356
357 /*
358 * We know that we did not come through a trap to get into
359 * dtrace_probe() -- the provider simply called dtrace_probe()
360 * directly. As this is the case, we need to shift the argument
361 * that we're looking for: the probe ID is the first argument to
362 * dtrace_probe(), so the argument n will actually be found where
363 * one would expect to find argument (n + 1).
364 */
365 argno++;
366
367 if (argno <= inreg) {
368 /*
369 * This shouldn't happen. If the argument is passed in a
370 * register then it should have been, well, passed in a
371 * register...
372 */
373 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
374 return (0);
375 }
376
377 argno -= (inreg + 1);
378 stack = (uintptr_t *)&fp[1];
379
380 load:
381 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
382 val = stack[argno];
383 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
384
385 return (val);
386 }
387
388 static dtrace_pops_t sdt_pops = {
389 NULL,
390 sdt_provide_module,
391 sdt_enable,
392 sdt_disable,
393 NULL,
394 NULL,
395 sdt_getargdesc,
396 sdt_getarg,
397 NULL,
398 sdt_destroy
399 };
400
401 /*ARGSUSED*/
402 static int
sdt_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)403 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
404 {
405 sdt_provider_t *prov;
406
407 if (ddi_create_minor_node(devi, "sdt", S_IFCHR,
408 0, DDI_PSEUDO, 0) == DDI_FAILURE) {
409 cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node");
410 ddi_remove_minor_node(devi, NULL);
411 return (DDI_FAILURE);
412 }
413
414 ddi_report_dev(devi);
415 sdt_devi = devi;
416
417 if (sdt_probetab_size == 0)
418 sdt_probetab_size = SDT_PROBETAB_SIZE;
419
420 sdt_probetab_mask = sdt_probetab_size - 1;
421 sdt_probetab =
422 kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP);
423 dtrace_invop_add(sdt_invop);
424
425 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
426 uint32_t priv;
427
428 if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
429 priv = DTRACE_PRIV_KERNEL;
430 sdt_pops.dtps_mode = NULL;
431 } else {
432 priv = prov->sdtp_priv;
433 ASSERT(priv == DTRACE_PRIV_USER);
434 sdt_pops.dtps_mode = sdt_mode;
435 }
436
437 if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
438 priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
439 cmn_err(CE_WARN, "failed to register sdt provider %s",
440 prov->sdtp_name);
441 }
442 }
443
444 return (DDI_SUCCESS);
445 }
446
447 /*ARGSUSED*/
448 static int
sdt_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)449 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
450 {
451 sdt_provider_t *prov;
452
453 switch (cmd) {
454 case DDI_DETACH:
455 break;
456
457 case DDI_SUSPEND:
458 return (DDI_SUCCESS);
459
460 default:
461 return (DDI_FAILURE);
462 }
463
464 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
465 if (prov->sdtp_id != DTRACE_PROVNONE) {
466 if (dtrace_unregister(prov->sdtp_id) != 0)
467 return (DDI_FAILURE);
468
469 prov->sdtp_id = DTRACE_PROVNONE;
470 }
471 }
472
473 dtrace_invop_remove(sdt_invop);
474 kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *));
475
476 return (DDI_SUCCESS);
477 }
478
479 /*ARGSUSED*/
480 static int
sdt_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)481 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
482 {
483 int error;
484
485 switch (infocmd) {
486 case DDI_INFO_DEVT2DEVINFO:
487 *result = (void *)sdt_devi;
488 error = DDI_SUCCESS;
489 break;
490 case DDI_INFO_DEVT2INSTANCE:
491 *result = (void *)0;
492 error = DDI_SUCCESS;
493 break;
494 default:
495 error = DDI_FAILURE;
496 }
497 return (error);
498 }
499
500 /*ARGSUSED*/
501 static int
sdt_open(dev_t * devp,int flag,int otyp,cred_t * cred_p)502 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
503 {
504 return (0);
505 }
506
507 static struct cb_ops sdt_cb_ops = {
508 sdt_open, /* open */
509 nodev, /* close */
510 nulldev, /* strategy */
511 nulldev, /* print */
512 nodev, /* dump */
513 nodev, /* read */
514 nodev, /* write */
515 nodev, /* ioctl */
516 nodev, /* devmap */
517 nodev, /* mmap */
518 nodev, /* segmap */
519 nochpoll, /* poll */
520 ddi_prop_op, /* cb_prop_op */
521 0, /* streamtab */
522 D_NEW | D_MP /* Driver compatibility flag */
523 };
524
525 static struct dev_ops sdt_ops = {
526 DEVO_REV, /* devo_rev, */
527 0, /* refcnt */
528 sdt_info, /* get_dev_info */
529 nulldev, /* identify */
530 nulldev, /* probe */
531 sdt_attach, /* attach */
532 sdt_detach, /* detach */
533 nodev, /* reset */
534 &sdt_cb_ops, /* driver operations */
535 NULL, /* bus operations */
536 nodev, /* dev power */
537 ddi_quiesce_not_needed, /* quiesce */
538 };
539
540 /*
541 * Module linkage information for the kernel.
542 */
543 static struct modldrv modldrv = {
544 &mod_driverops, /* module type (this is a pseudo driver) */
545 "Statically Defined Tracing", /* name of module */
546 &sdt_ops, /* driver ops */
547 };
548
549 static struct modlinkage modlinkage = {
550 MODREV_1,
551 (void *)&modldrv,
552 NULL
553 };
554
555 int
_init(void)556 _init(void)
557 {
558 return (mod_install(&modlinkage));
559 }
560
561 int
_info(struct modinfo * modinfop)562 _info(struct modinfo *modinfop)
563 {
564 return (mod_info(&modlinkage, modinfop));
565 }
566
567 int
_fini(void)568 _fini(void)
569 {
570 return (mod_remove(&modlinkage));
571 }
572