1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 28 * Copyright (c) 2013, 2014 by Delphix. All rights reserved. 29 * Copyright 2024 Oxide Computer Company 30 */ 31 32 #include <sys/modctl.h> 33 #include <sys/sunddi.h> 34 #include <sys/dtrace.h> 35 #include <sys/kobj.h> 36 #include <sys/stat.h> 37 #include <sys/conf.h> 38 #include <vm/seg_kmem.h> 39 #include <sys/stack.h> 40 #include <sys/frame.h> 41 #include <sys/dtrace_impl.h> 42 #include <sys/cmn_err.h> 43 #include <sys/sysmacros.h> 44 #include <sys/privregs.h> 45 #include <sys/sdt_impl.h> 46 47 #define SDT_PATCHVAL 0xf0 48 #define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask) 49 #define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */ 50 51 static dev_info_t *sdt_devi; 52 static int sdt_verbose = 0; 53 static sdt_probe_t **sdt_probetab; 54 static int sdt_probetab_size; 55 static int sdt_probetab_mask; 56 57 /*ARGSUSED*/ 58 static int 59 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) 60 { 61 uintptr_t stack0, stack1, stack2, stack3, stack4; 62 int i = 0; 63 sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)]; 64 65 /* 66 * On amd64, stack[0] contains the dereferenced stack pointer, 67 * stack[1] contains savfp, stack[2] contains savpc. We want 68 * to step over these entries. 69 */ 70 i += 3; 71 72 for (; sdt != NULL; sdt = sdt->sdp_hashnext) { 73 if ((uintptr_t)sdt->sdp_patchpoint == addr) { 74 /* 75 * When accessing the arguments on the stack, we must 76 * protect against accessing beyond the stack. We can 77 * safely set NOFAULT here -- we know that interrupts 78 * are already disabled. 79 */ 80 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 81 stack0 = stack[i++]; 82 stack1 = stack[i++]; 83 stack2 = stack[i++]; 84 stack3 = stack[i++]; 85 stack4 = stack[i++]; 86 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | 87 CPU_DTRACE_BADADDR); 88 89 dtrace_probe(sdt->sdp_id, stack0, stack1, 90 stack2, stack3, stack4); 91 92 return (DTRACE_INVOP_NOP); 93 } 94 } 95 96 return (0); 97 } 98 99 /*ARGSUSED*/ 100 static void 101 sdt_provide_module(void *arg, struct modctl *ctl) 102 { 103 struct module *mp = ctl->mod_mp; 104 char *modname = ctl->mod_modname; 105 sdt_probedesc_t *sdpd; 106 sdt_probe_t *sdp, *old; 107 sdt_provider_t *prov; 108 int len; 109 110 /* 111 * One for all, and all for one: if we haven't yet registered all of 112 * our providers, we'll refuse to provide anything. 113 */ 114 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { 115 if (prov->sdtp_id == DTRACE_PROVNONE) 116 return; 117 } 118 119 if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL) 120 return; 121 122 for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) { 123 char *name = sdpd->sdpd_name, *func, *nname; 124 int i, j; 125 sdt_provider_t *prov; 126 ulong_t offs; 127 dtrace_id_t id; 128 129 for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) { 130 char *prefix = prov->sdtp_prefix; 131 132 if (strncmp(name, prefix, strlen(prefix)) == 0) { 133 name += strlen(prefix); 134 break; 135 } 136 } 137 138 nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP); 139 140 for (i = 0, j = 0; name[j] != '\0'; i++) { 141 if (name[j] == '_' && name[j + 1] == '_') { 142 nname[i] = '-'; 143 j += 2; 144 } else { 145 nname[i] = name[j++]; 146 } 147 } 148 149 nname[i] = '\0'; 150 151 sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP); 152 sdp->sdp_loadcnt = ctl->mod_loadcnt; 153 sdp->sdp_ctl = ctl; 154 sdp->sdp_name = nname; 155 sdp->sdp_namelen = len; 156 sdp->sdp_provider = prov; 157 158 func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs); 159 160 if (func == NULL) 161 func = "<unknown>"; 162 163 /* 164 * We have our provider. Now create the probe. 165 */ 166 if ((id = dtrace_probe_lookup(prov->sdtp_id, modname, 167 func, nname)) != DTRACE_IDNONE) { 168 old = dtrace_probe_arg(prov->sdtp_id, id); 169 ASSERT(old != NULL); 170 171 sdp->sdp_next = old->sdp_next; 172 sdp->sdp_id = id; 173 old->sdp_next = sdp; 174 } else { 175 sdp->sdp_id = dtrace_probe_create(prov->sdtp_id, 176 modname, func, nname, 3, sdp); 177 178 mp->sdt_nprobes++; 179 } 180 181 sdp->sdp_hashnext = 182 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)]; 183 sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp; 184 185 sdp->sdp_patchval = SDT_PATCHVAL; 186 sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset; 187 sdp->sdp_savedval = *sdp->sdp_patchpoint; 188 sdp->sdp_is_tailcall = 189 sdp->sdp_patchpoint[SDT_OFF_RET_IDX] == SDT_RET; 190 } 191 } 192 193 /*ARGSUSED*/ 194 static void 195 sdt_destroy(void *arg, dtrace_id_t id, void *parg) 196 { 197 sdt_probe_t *sdp = parg, *old, *last, *hash; 198 struct modctl *ctl = sdp->sdp_ctl; 199 int ndx; 200 201 if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) { 202 if ((ctl->mod_loadcnt == sdp->sdp_loadcnt && 203 ctl->mod_loaded)) { 204 ((struct module *)(ctl->mod_mp))->sdt_nprobes--; 205 } 206 } 207 208 while (sdp != NULL) { 209 old = sdp; 210 211 /* 212 * Now we need to remove this probe from the sdt_probetab. 213 */ 214 ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint); 215 last = NULL; 216 hash = sdt_probetab[ndx]; 217 218 while (hash != sdp) { 219 ASSERT(hash != NULL); 220 last = hash; 221 hash = hash->sdp_hashnext; 222 } 223 224 if (last != NULL) { 225 last->sdp_hashnext = sdp->sdp_hashnext; 226 } else { 227 sdt_probetab[ndx] = sdp->sdp_hashnext; 228 } 229 230 kmem_free(sdp->sdp_name, sdp->sdp_namelen); 231 sdp = sdp->sdp_next; 232 kmem_free(old, sizeof (sdt_probe_t)); 233 } 234 } 235 236 /*ARGSUSED*/ 237 static int 238 sdt_enable(void *arg, dtrace_id_t id, void *parg) 239 { 240 sdt_probe_t *sdp = parg; 241 struct modctl *ctl = sdp->sdp_ctl; 242 243 ctl->mod_nenabled++; 244 245 /* 246 * If this module has disappeared since we discovered its probes, 247 * refuse to enable it. 248 */ 249 if (!ctl->mod_loaded) { 250 if (sdt_verbose) { 251 cmn_err(CE_NOTE, "sdt is failing for probe %s " 252 "(module %s unloaded)", 253 sdp->sdp_name, ctl->mod_modname); 254 } 255 goto err; 256 } 257 258 /* 259 * Now check that our modctl has the expected load count. If it 260 * doesn't, this module must have been unloaded and reloaded -- and 261 * we're not going to touch it. 262 */ 263 if (ctl->mod_loadcnt != sdp->sdp_loadcnt) { 264 if (sdt_verbose) { 265 cmn_err(CE_NOTE, "sdt is failing for probe %s " 266 "(module %s reloaded)", 267 sdp->sdp_name, ctl->mod_modname); 268 } 269 goto err; 270 } 271 272 while (sdp != NULL) { 273 *sdp->sdp_patchpoint = sdp->sdp_patchval; 274 sdp = sdp->sdp_next; 275 } 276 err: 277 return (0); 278 } 279 280 /*ARGSUSED*/ 281 static void 282 sdt_disable(void *arg, dtrace_id_t id, void *parg) 283 { 284 sdt_probe_t *sdp = parg; 285 struct modctl *ctl = sdp->sdp_ctl; 286 287 ctl->mod_nenabled--; 288 289 if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt) 290 goto err; 291 292 while (sdp != NULL) { 293 *sdp->sdp_patchpoint = sdp->sdp_savedval; 294 sdp = sdp->sdp_next; 295 } 296 297 err: 298 ; 299 } 300 301 /*ARGSUSED*/ 302 uint64_t 303 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) 304 { 305 sdt_probe_t *sdp = parg; 306 uintptr_t val; 307 struct frame *fp = (struct frame *)dtrace_getfp(); 308 uintptr_t *stack; 309 int i; 310 /* 311 * A total of 6 arguments are passed via registers; any argument with 312 * index of 5 or lower is therefore in a register. 313 */ 314 int inreg = 5; 315 316 for (i = 1; i <= aframes; i++) { 317 fp = (struct frame *)(fp->fr_savfp); 318 319 if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) { 320 /* 321 * In the case of amd64, we will use the pointer to the 322 * regs structure that was pushed when we took the 323 * trap. To get this structure, we must increment 324 * beyond the frame structure, the calling RIP, and 325 * padding stored in dtrace_invop(). If the argument 326 * that we're seeking is passed on the stack, we'll 327 * pull the true stack pointer out of the saved 328 * registers and decrement our argument by the number 329 * of arguments passed in registers; if the argument 330 * we're seeking is passed in regsiters, we can just 331 * load it directly. 332 */ 333 struct regs *rp = (struct regs *)((uintptr_t)&fp[1] + 334 sizeof (uintptr_t) * 2); 335 336 if (argno <= inreg) { 337 stack = (uintptr_t *)&rp->r_rdi; 338 } else { 339 stack = (uintptr_t *)(rp->r_rsp); 340 argno -= (inreg + 1); 341 342 /* 343 * If the probe was invoked as a tail call, the 344 * compiler leaves the stack as if we had just 345 * entered the fictitious __dtrace_probe_[name] 346 * function, meaning we need to skip over the 347 * saved return address to get to the stack 348 * arguments. 349 */ 350 if (sdp->sdp_is_tailcall) 351 argno++; 352 } 353 goto load; 354 } 355 } 356 357 /* 358 * We know that we did not come through a trap to get into 359 * dtrace_probe() -- the provider simply called dtrace_probe() 360 * directly. As this is the case, we need to shift the argument 361 * that we're looking for: the probe ID is the first argument to 362 * dtrace_probe(), so the argument n will actually be found where 363 * one would expect to find argument (n + 1). 364 */ 365 argno++; 366 367 if (argno <= inreg) { 368 /* 369 * This shouldn't happen. If the argument is passed in a 370 * register then it should have been, well, passed in a 371 * register... 372 */ 373 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); 374 return (0); 375 } 376 377 argno -= (inreg + 1); 378 stack = (uintptr_t *)&fp[1]; 379 380 load: 381 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); 382 val = stack[argno]; 383 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); 384 385 return (val); 386 } 387 388 static dtrace_pops_t sdt_pops = { 389 NULL, 390 sdt_provide_module, 391 sdt_enable, 392 sdt_disable, 393 NULL, 394 NULL, 395 sdt_getargdesc, 396 sdt_getarg, 397 NULL, 398 sdt_destroy 399 }; 400 401 /*ARGSUSED*/ 402 static int 403 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 404 { 405 sdt_provider_t *prov; 406 407 if (ddi_create_minor_node(devi, "sdt", S_IFCHR, 408 0, DDI_PSEUDO, 0) == DDI_FAILURE) { 409 cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node"); 410 ddi_remove_minor_node(devi, NULL); 411 return (DDI_FAILURE); 412 } 413 414 ddi_report_dev(devi); 415 sdt_devi = devi; 416 417 if (sdt_probetab_size == 0) 418 sdt_probetab_size = SDT_PROBETAB_SIZE; 419 420 sdt_probetab_mask = sdt_probetab_size - 1; 421 sdt_probetab = 422 kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP); 423 dtrace_invop_add(sdt_invop); 424 425 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { 426 uint32_t priv; 427 428 if (prov->sdtp_priv == DTRACE_PRIV_NONE) { 429 priv = DTRACE_PRIV_KERNEL; 430 sdt_pops.dtps_mode = NULL; 431 } else { 432 priv = prov->sdtp_priv; 433 ASSERT(priv == DTRACE_PRIV_USER); 434 sdt_pops.dtps_mode = sdt_mode; 435 } 436 437 if (dtrace_register(prov->sdtp_name, prov->sdtp_attr, 438 priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) { 439 cmn_err(CE_WARN, "failed to register sdt provider %s", 440 prov->sdtp_name); 441 } 442 } 443 444 return (DDI_SUCCESS); 445 } 446 447 /*ARGSUSED*/ 448 static int 449 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 450 { 451 sdt_provider_t *prov; 452 453 switch (cmd) { 454 case DDI_DETACH: 455 break; 456 457 case DDI_SUSPEND: 458 return (DDI_SUCCESS); 459 460 default: 461 return (DDI_FAILURE); 462 } 463 464 for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { 465 if (prov->sdtp_id != DTRACE_PROVNONE) { 466 if (dtrace_unregister(prov->sdtp_id) != 0) 467 return (DDI_FAILURE); 468 469 prov->sdtp_id = DTRACE_PROVNONE; 470 } 471 } 472 473 dtrace_invop_remove(sdt_invop); 474 kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *)); 475 476 return (DDI_SUCCESS); 477 } 478 479 /*ARGSUSED*/ 480 static int 481 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 482 { 483 int error; 484 485 switch (infocmd) { 486 case DDI_INFO_DEVT2DEVINFO: 487 *result = (void *)sdt_devi; 488 error = DDI_SUCCESS; 489 break; 490 case DDI_INFO_DEVT2INSTANCE: 491 *result = (void *)0; 492 error = DDI_SUCCESS; 493 break; 494 default: 495 error = DDI_FAILURE; 496 } 497 return (error); 498 } 499 500 /*ARGSUSED*/ 501 static int 502 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 503 { 504 return (0); 505 } 506 507 static struct cb_ops sdt_cb_ops = { 508 sdt_open, /* open */ 509 nodev, /* close */ 510 nulldev, /* strategy */ 511 nulldev, /* print */ 512 nodev, /* dump */ 513 nodev, /* read */ 514 nodev, /* write */ 515 nodev, /* ioctl */ 516 nodev, /* devmap */ 517 nodev, /* mmap */ 518 nodev, /* segmap */ 519 nochpoll, /* poll */ 520 ddi_prop_op, /* cb_prop_op */ 521 0, /* streamtab */ 522 D_NEW | D_MP /* Driver compatibility flag */ 523 }; 524 525 static struct dev_ops sdt_ops = { 526 DEVO_REV, /* devo_rev, */ 527 0, /* refcnt */ 528 sdt_info, /* get_dev_info */ 529 nulldev, /* identify */ 530 nulldev, /* probe */ 531 sdt_attach, /* attach */ 532 sdt_detach, /* detach */ 533 nodev, /* reset */ 534 &sdt_cb_ops, /* driver operations */ 535 NULL, /* bus operations */ 536 nodev, /* dev power */ 537 ddi_quiesce_not_needed, /* quiesce */ 538 }; 539 540 /* 541 * Module linkage information for the kernel. 542 */ 543 static struct modldrv modldrv = { 544 &mod_driverops, /* module type (this is a pseudo driver) */ 545 "Statically Defined Tracing", /* name of module */ 546 &sdt_ops, /* driver ops */ 547 }; 548 549 static struct modlinkage modlinkage = { 550 MODREV_1, 551 (void *)&modldrv, 552 NULL 553 }; 554 555 int 556 _init(void) 557 { 558 return (mod_install(&modlinkage)); 559 } 560 561 int 562 _info(struct modinfo *modinfop) 563 { 564 return (mod_info(&modlinkage, modinfop)); 565 } 566 567 int 568 _fini(void) 569 { 570 return (mod_remove(&modlinkage)); 571 } 572