1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/errno.h> 29 #include <sys/stat.h> 30 #include <sys/modctl.h> 31 #include <sys/conf.h> 32 #include <sys/systm.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/cpuvar.h> 36 #include <sys/kmem.h> 37 #include <sys/strsubr.h> 38 #include <sys/dtrace.h> 39 #include <sys/cyclic.h> 40 #include <sys/atomic.h> 41 42 static dev_info_t *profile_devi; 43 static dtrace_provider_id_t profile_id; 44 45 /* 46 * Regardless of platform, there are five artificial frames in the case of the 47 * profile provider: 48 * 49 * profile_fire 50 * cyclic_expire 51 * cyclic_fire 52 * [ cbe ] 53 * [ locore ] 54 * 55 * On amd64, there are two frames associated with locore: one in locore, and 56 * another in common interrupt dispatch code. (i386 has not been modified to 57 * use this common layer.) Further, on i386, the interrupted instruction 58 * appears as its own stack frame. All of this means that we need to add one 59 * frame for amd64, and then take one away for both amd64 and i386. 60 * 61 * On SPARC, the picture is further complicated because the compiler 62 * optimizes away tail-calls -- so the following frames are optimized away: 63 * 64 * profile_fire 65 * cyclic_expire 66 * 67 * This gives three frames. However, on DEBUG kernels, the cyclic_expire 68 * frame cannot be tail-call eliminated, yielding four frames in this case. 69 * 70 * All of the above constraints lead to the mess below. Yes, the profile 71 * provider should ideally figure this out on-the-fly by hiting one of its own 72 * probes and then walking its own stack trace. This is complicated, however, 73 * and the static definition doesn't seem to be overly brittle. Still, we 74 * allow for a manual override in case we get it completely wrong. 75 */ 76 #ifdef __amd64 77 #define PROF_ARTIFICIAL_FRAMES 7 78 #else 79 #ifdef __i386 80 #define PROF_ARTIFICIAL_FRAMES 6 81 #else 82 #ifdef __sparc 83 #ifdef DEBUG 84 #define PROF_ARTIFICIAL_FRAMES 4 85 #else 86 #define PROF_ARTIFICIAL_FRAMES 3 87 #endif 88 #endif 89 #endif 90 #endif 91 92 #define PROF_NAMELEN 15 93 94 #define PROF_PROFILE 0 95 #define PROF_TICK 1 96 #define PROF_PREFIX_PROFILE "profile-" 97 #define PROF_PREFIX_TICK "tick-" 98 99 typedef struct profile_probe { 100 char prof_name[PROF_NAMELEN]; 101 dtrace_id_t prof_id; 102 int prof_kind; 103 hrtime_t prof_interval; 104 cyclic_id_t prof_cyclic; 105 } profile_probe_t; 106 107 typedef struct profile_probe_percpu { 108 hrtime_t profc_expected; 109 hrtime_t profc_interval; 110 profile_probe_t *profc_probe; 111 } profile_probe_percpu_t; 112 113 hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ 114 int profile_aframes = 0; /* override */ 115 116 static int profile_rates[] = { 117 97, 199, 499, 997, 1999, 118 4001, 4999, 0, 0, 0, 119 0, 0, 0, 0, 0, 120 0, 0, 0, 0, 0 121 }; 122 123 static int profile_ticks[] = { 124 1, 10, 100, 500, 1000, 125 5000, 0, 0, 0, 0, 126 0, 0, 0, 0, 0 127 }; 128 129 /* 130 * profile_max defines the upper bound on the number of profile probes that 131 * can exist (this is to prevent malicious or clumsy users from exhausing 132 * system resources by creating a slew of profile probes). At mod load time, 133 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's 134 * present in the profile.conf file. 135 */ 136 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ 137 static uint32_t profile_max; /* maximum number of profile probes */ 138 static uint32_t profile_total; /* current number of profile probes */ 139 140 static void 141 profile_fire(void *arg) 142 { 143 profile_probe_percpu_t *pcpu = arg; 144 profile_probe_t *prof = pcpu->profc_probe; 145 hrtime_t late; 146 147 late = dtrace_gethrtime() - pcpu->profc_expected; 148 pcpu->profc_expected += pcpu->profc_interval; 149 150 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, 151 CPU->cpu_profile_upc, late, 0, 0); 152 } 153 154 static void 155 profile_tick(void *arg) 156 { 157 profile_probe_t *prof = arg; 158 159 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, 160 CPU->cpu_profile_upc, 0, 0, 0); 161 } 162 163 static void 164 profile_create(hrtime_t interval, const char *name, int kind) 165 { 166 profile_probe_t *prof; 167 168 if (interval < profile_interval_min) 169 return; 170 171 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) 172 return; 173 174 atomic_add_32(&profile_total, 1); 175 if (profile_total > profile_max) { 176 atomic_add_32(&profile_total, -1); 177 return; 178 } 179 180 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); 181 (void) strcpy(prof->prof_name, name); 182 prof->prof_interval = interval; 183 prof->prof_cyclic = CYCLIC_NONE; 184 prof->prof_kind = kind; 185 prof->prof_id = dtrace_probe_create(profile_id, 186 NULL, NULL, name, 187 profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof); 188 } 189 190 /*ARGSUSED*/ 191 static void 192 profile_provide(void *arg, const dtrace_probedesc_t *desc) 193 { 194 int i, j, rate, kind; 195 hrtime_t val = 0, mult = 1, len; 196 const char *name, *suffix = NULL; 197 198 const struct { 199 char *prefix; 200 int kind; 201 } types[] = { 202 { PROF_PREFIX_PROFILE, PROF_PROFILE }, 203 { PROF_PREFIX_TICK, PROF_TICK }, 204 { NULL, NULL } 205 }; 206 207 const struct { 208 char *name; 209 hrtime_t mult; 210 } suffixes[] = { 211 { "ns", NANOSEC / NANOSEC }, 212 { "nsec", NANOSEC / NANOSEC }, 213 { "us", NANOSEC / MICROSEC }, 214 { "usec", NANOSEC / MICROSEC }, 215 { "ms", NANOSEC / MILLISEC }, 216 { "msec", NANOSEC / MILLISEC }, 217 { "s", NANOSEC / SEC }, 218 { "sec", NANOSEC / SEC }, 219 { "m", NANOSEC * (hrtime_t)60 }, 220 { "min", NANOSEC * (hrtime_t)60 }, 221 { "h", NANOSEC * (hrtime_t)(60 * 60) }, 222 { "hour", NANOSEC * (hrtime_t)(60 * 60) }, 223 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 224 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 225 { "hz", 0 }, 226 { NULL } 227 }; 228 229 if (desc == NULL) { 230 char n[PROF_NAMELEN]; 231 232 /* 233 * If no description was provided, provide all of our probes. 234 */ 235 for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { 236 if ((rate = profile_rates[i]) == 0) 237 continue; 238 239 (void) snprintf(n, PROF_NAMELEN, "%s%d", 240 PROF_PREFIX_PROFILE, rate); 241 profile_create(NANOSEC / rate, n, PROF_PROFILE); 242 } 243 244 for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { 245 if ((rate = profile_ticks[i]) == 0) 246 continue; 247 248 (void) snprintf(n, PROF_NAMELEN, "%s%d", 249 PROF_PREFIX_TICK, rate); 250 profile_create(NANOSEC / rate, n, PROF_TICK); 251 } 252 253 return; 254 } 255 256 name = desc->dtpd_name; 257 258 for (i = 0; types[i].prefix != NULL; i++) { 259 len = strlen(types[i].prefix); 260 261 if (strncmp(name, types[i].prefix, len) != 0) 262 continue; 263 break; 264 } 265 266 if (types[i].prefix == NULL) 267 return; 268 269 kind = types[i].kind; 270 j = strlen(name) - len; 271 272 /* 273 * We need to start before any time suffix. 274 */ 275 for (j = strlen(name); j >= len; j--) { 276 if (name[j] >= '0' && name[j] <= '9') 277 break; 278 suffix = &name[j]; 279 } 280 281 ASSERT(suffix != NULL); 282 283 /* 284 * Now determine the numerical value present in the probe name. 285 */ 286 for (; j >= len; j--) { 287 if (name[j] < '0' || name[j] > '9') 288 return; 289 290 val += (name[j] - '0') * mult; 291 mult *= (hrtime_t)10; 292 } 293 294 if (val == 0) 295 return; 296 297 /* 298 * Look-up the suffix to determine the multiplier. 299 */ 300 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { 301 if (strcasecmp(suffixes[i].name, suffix) == 0) { 302 mult = suffixes[i].mult; 303 break; 304 } 305 } 306 307 if (suffixes[i].name == NULL && *suffix != '\0') 308 return; 309 310 if (mult == 0) { 311 /* 312 * The default is frequency-per-second. 313 */ 314 val = NANOSEC / val; 315 } else { 316 val *= mult; 317 } 318 319 profile_create(val, name, kind); 320 } 321 322 /*ARGSUSED*/ 323 static void 324 profile_destroy(void *arg, dtrace_id_t id, void *parg) 325 { 326 profile_probe_t *prof = parg; 327 328 ASSERT(prof->prof_cyclic == CYCLIC_NONE); 329 kmem_free(prof, sizeof (profile_probe_t)); 330 331 ASSERT(profile_total >= 1); 332 atomic_add_32(&profile_total, -1); 333 } 334 335 /*ARGSUSED*/ 336 static void 337 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 338 { 339 profile_probe_t *prof = arg; 340 profile_probe_percpu_t *pcpu; 341 342 pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); 343 pcpu->profc_probe = prof; 344 345 hdlr->cyh_func = profile_fire; 346 hdlr->cyh_arg = pcpu; 347 hdlr->cyh_level = CY_HIGH_LEVEL; 348 349 when->cyt_interval = prof->prof_interval; 350 when->cyt_when = dtrace_gethrtime() + when->cyt_interval; 351 352 pcpu->profc_expected = when->cyt_when; 353 pcpu->profc_interval = when->cyt_interval; 354 } 355 356 /*ARGSUSED*/ 357 static void 358 profile_offline(void *arg, cpu_t *cpu, void *oarg) 359 { 360 profile_probe_percpu_t *pcpu = oarg; 361 362 ASSERT(pcpu->profc_probe == arg); 363 kmem_free(pcpu, sizeof (profile_probe_percpu_t)); 364 } 365 366 /*ARGSUSED*/ 367 static void 368 profile_enable(void *arg, dtrace_id_t id, void *parg) 369 { 370 profile_probe_t *prof = parg; 371 cyc_omni_handler_t omni; 372 cyc_handler_t hdlr; 373 cyc_time_t when; 374 375 ASSERT(prof->prof_interval != 0); 376 ASSERT(MUTEX_HELD(&cpu_lock)); 377 378 if (prof->prof_kind == PROF_TICK) { 379 hdlr.cyh_func = profile_tick; 380 hdlr.cyh_arg = prof; 381 hdlr.cyh_level = CY_HIGH_LEVEL; 382 383 when.cyt_interval = prof->prof_interval; 384 when.cyt_when = dtrace_gethrtime() + when.cyt_interval; 385 } else { 386 ASSERT(prof->prof_kind == PROF_PROFILE); 387 omni.cyo_online = profile_online; 388 omni.cyo_offline = profile_offline; 389 omni.cyo_arg = prof; 390 } 391 392 if (prof->prof_kind == PROF_TICK) { 393 prof->prof_cyclic = cyclic_add(&hdlr, &when); 394 } else { 395 prof->prof_cyclic = cyclic_add_omni(&omni); 396 } 397 } 398 399 /*ARGSUSED*/ 400 static void 401 profile_disable(void *arg, dtrace_id_t id, void *parg) 402 { 403 profile_probe_t *prof = parg; 404 405 ASSERT(prof->prof_cyclic != CYCLIC_NONE); 406 ASSERT(MUTEX_HELD(&cpu_lock)); 407 408 cyclic_remove(prof->prof_cyclic); 409 prof->prof_cyclic = CYCLIC_NONE; 410 } 411 412 /*ARGSUSED*/ 413 static int 414 profile_usermode(void *arg, dtrace_id_t id, void *parg) 415 { 416 return (CPU->cpu_profile_pc == 0); 417 } 418 419 static dtrace_pattr_t profile_attr = { 420 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 421 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN }, 422 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 423 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 424 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 425 }; 426 427 static dtrace_pops_t profile_pops = { 428 profile_provide, 429 NULL, 430 profile_enable, 431 profile_disable, 432 NULL, 433 NULL, 434 NULL, 435 NULL, 436 profile_usermode, 437 profile_destroy 438 }; 439 440 static int 441 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 442 { 443 switch (cmd) { 444 case DDI_ATTACH: 445 break; 446 case DDI_RESUME: 447 return (DDI_SUCCESS); 448 default: 449 return (DDI_FAILURE); 450 } 451 452 if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, 453 DDI_PSEUDO, NULL) == DDI_FAILURE || 454 dtrace_register("profile", &profile_attr, 455 DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL, 456 &profile_pops, NULL, &profile_id) != 0) { 457 ddi_remove_minor_node(devi, NULL); 458 return (DDI_FAILURE); 459 } 460 461 profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 462 "profile-max-probes", PROFILE_MAX_DEFAULT); 463 464 ddi_report_dev(devi); 465 profile_devi = devi; 466 return (DDI_SUCCESS); 467 } 468 469 static int 470 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 471 { 472 switch (cmd) { 473 case DDI_DETACH: 474 break; 475 case DDI_SUSPEND: 476 return (DDI_SUCCESS); 477 default: 478 return (DDI_FAILURE); 479 } 480 481 if (dtrace_unregister(profile_id) != 0) 482 return (DDI_FAILURE); 483 484 ddi_remove_minor_node(devi, NULL); 485 return (DDI_SUCCESS); 486 } 487 488 /*ARGSUSED*/ 489 static int 490 profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 491 { 492 int error; 493 494 switch (infocmd) { 495 case DDI_INFO_DEVT2DEVINFO: 496 *result = (void *)profile_devi; 497 error = DDI_SUCCESS; 498 break; 499 case DDI_INFO_DEVT2INSTANCE: 500 *result = (void *)0; 501 error = DDI_SUCCESS; 502 break; 503 default: 504 error = DDI_FAILURE; 505 } 506 return (error); 507 } 508 509 /*ARGSUSED*/ 510 static int 511 profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 512 { 513 return (0); 514 } 515 516 static struct cb_ops profile_cb_ops = { 517 profile_open, /* open */ 518 nodev, /* close */ 519 nulldev, /* strategy */ 520 nulldev, /* print */ 521 nodev, /* dump */ 522 nodev, /* read */ 523 nodev, /* write */ 524 nodev, /* ioctl */ 525 nodev, /* devmap */ 526 nodev, /* mmap */ 527 nodev, /* segmap */ 528 nochpoll, /* poll */ 529 ddi_prop_op, /* cb_prop_op */ 530 0, /* streamtab */ 531 D_NEW | D_MP /* Driver compatibility flag */ 532 }; 533 534 static struct dev_ops profile_ops = { 535 DEVO_REV, /* devo_rev, */ 536 0, /* refcnt */ 537 profile_info, /* get_dev_info */ 538 nulldev, /* identify */ 539 nulldev, /* probe */ 540 profile_attach, /* attach */ 541 profile_detach, /* detach */ 542 nodev, /* reset */ 543 &profile_cb_ops, /* driver operations */ 544 NULL, /* bus operations */ 545 nodev /* dev power */ 546 }; 547 548 /* 549 * Module linkage information for the kernel. 550 */ 551 static struct modldrv modldrv = { 552 &mod_driverops, /* module type (this is a pseudo driver) */ 553 "Profile Interrupt Tracing", /* name of module */ 554 &profile_ops, /* driver ops */ 555 }; 556 557 static struct modlinkage modlinkage = { 558 MODREV_1, 559 (void *)&modldrv, 560 NULL 561 }; 562 563 int 564 _init(void) 565 { 566 return (mod_install(&modlinkage)); 567 } 568 569 int 570 _info(struct modinfo *modinfop) 571 { 572 return (mod_info(&modlinkage, modinfop)); 573 } 574 575 int 576 _fini(void) 577 { 578 return (mod_remove(&modlinkage)); 579 } 580