1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <sys/errno.h> 28 #include <sys/stat.h> 29 #include <sys/modctl.h> 30 #include <sys/conf.h> 31 #include <sys/systm.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/cpuvar.h> 35 #include <sys/kmem.h> 36 #include <sys/strsubr.h> 37 #include <sys/dtrace.h> 38 #include <sys/cyclic.h> 39 #include <sys/atomic.h> 40 41 static dev_info_t *profile_devi; 42 static dtrace_provider_id_t profile_id; 43 44 /* 45 * Regardless of platform, the stack frames look like this in the case of the 46 * profile provider: 47 * 48 * profile_fire 49 * cyclic_expire 50 * cyclic_fire 51 * [ cbe ] 52 * [ interrupt code ] 53 * 54 * On x86, there are five frames from the generic interrupt code; further, the 55 * interrupted instruction appears as its own stack frame, giving us a total of 56 * 10. 57 * 58 * On SPARC, the picture is further complicated because the compiler 59 * optimizes away tail-calls -- so the following frames are optimized away: 60 * 61 * profile_fire 62 * cyclic_expire 63 * 64 * This gives three frames. However, on DEBUG kernels, the cyclic_expire 65 * frame cannot be tail-call eliminated, yielding four frames in this case. 66 * 67 * All of the above constraints lead to the mess below. Yes, the profile 68 * provider should ideally figure this out on-the-fly by hitting one of its own 69 * probes and then walking its own stack trace. This is complicated, however, 70 * and the static definition doesn't seem to be overly brittle. Still, we 71 * allow for a manual override in case we get it completely wrong. 72 */ 73 #ifdef __x86 74 #define PROF_ARTIFICIAL_FRAMES 10 75 #else 76 #ifdef __sparc 77 #ifdef DEBUG 78 #define PROF_ARTIFICIAL_FRAMES 4 79 #else 80 #define PROF_ARTIFICIAL_FRAMES 3 81 #endif 82 #endif 83 #endif 84 85 #define PROF_NAMELEN 15 86 87 #define PROF_PROFILE 0 88 #define PROF_TICK 1 89 #define PROF_PREFIX_PROFILE "profile-" 90 #define PROF_PREFIX_TICK "tick-" 91 92 typedef struct profile_probe { 93 char prof_name[PROF_NAMELEN]; 94 dtrace_id_t prof_id; 95 int prof_kind; 96 hrtime_t prof_interval; 97 cyclic_id_t prof_cyclic; 98 } profile_probe_t; 99 100 typedef struct profile_probe_percpu { 101 hrtime_t profc_expected; 102 hrtime_t profc_interval; 103 profile_probe_t *profc_probe; 104 } profile_probe_percpu_t; 105 106 hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ 107 int profile_aframes = 0; /* override */ 108 109 static int profile_rates[] = { 110 97, 199, 499, 997, 1999, 111 4001, 4999, 0, 0, 0, 112 0, 0, 0, 0, 0, 113 0, 0, 0, 0, 0 114 }; 115 116 static int profile_ticks[] = { 117 1, 10, 100, 500, 1000, 118 5000, 0, 0, 0, 0, 119 0, 0, 0, 0, 0 120 }; 121 122 /* 123 * profile_max defines the upper bound on the number of profile probes that 124 * can exist (this is to prevent malicious or clumsy users from exhausing 125 * system resources by creating a slew of profile probes). At mod load time, 126 * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's 127 * present in the profile.conf file. 128 */ 129 #define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ 130 static uint32_t profile_max; /* maximum number of profile probes */ 131 static uint32_t profile_total; /* current number of profile probes */ 132 133 static void 134 profile_fire(void *arg) 135 { 136 profile_probe_percpu_t *pcpu = arg; 137 profile_probe_t *prof = pcpu->profc_probe; 138 hrtime_t late; 139 140 late = dtrace_gethrtime() - pcpu->profc_expected; 141 pcpu->profc_expected += pcpu->profc_interval; 142 143 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, 144 CPU->cpu_profile_upc, late, 0, 0); 145 } 146 147 static void 148 profile_tick(void *arg) 149 { 150 profile_probe_t *prof = arg; 151 152 dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, 153 CPU->cpu_profile_upc, 0, 0, 0); 154 } 155 156 static void 157 profile_create(hrtime_t interval, const char *name, int kind) 158 { 159 profile_probe_t *prof; 160 int nr_frames = PROF_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); 161 162 if (profile_aframes) 163 nr_frames = profile_aframes; 164 165 if (interval < profile_interval_min) 166 return; 167 168 if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) 169 return; 170 171 atomic_add_32(&profile_total, 1); 172 if (profile_total > profile_max) { 173 atomic_add_32(&profile_total, -1); 174 return; 175 } 176 177 prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); 178 (void) strcpy(prof->prof_name, name); 179 prof->prof_interval = interval; 180 prof->prof_cyclic = CYCLIC_NONE; 181 prof->prof_kind = kind; 182 prof->prof_id = dtrace_probe_create(profile_id, 183 NULL, NULL, name, nr_frames, prof); 184 } 185 186 /*ARGSUSED*/ 187 static void 188 profile_provide(void *arg, const dtrace_probedesc_t *desc) 189 { 190 int i, j, rate, kind; 191 hrtime_t val = 0, mult = 1, len; 192 const char *name, *suffix = NULL; 193 194 const struct { 195 char *prefix; 196 int kind; 197 } types[] = { 198 { PROF_PREFIX_PROFILE, PROF_PROFILE }, 199 { PROF_PREFIX_TICK, PROF_TICK }, 200 { NULL, NULL } 201 }; 202 203 const struct { 204 char *name; 205 hrtime_t mult; 206 } suffixes[] = { 207 { "ns", NANOSEC / NANOSEC }, 208 { "nsec", NANOSEC / NANOSEC }, 209 { "us", NANOSEC / MICROSEC }, 210 { "usec", NANOSEC / MICROSEC }, 211 { "ms", NANOSEC / MILLISEC }, 212 { "msec", NANOSEC / MILLISEC }, 213 { "s", NANOSEC / SEC }, 214 { "sec", NANOSEC / SEC }, 215 { "m", NANOSEC * (hrtime_t)60 }, 216 { "min", NANOSEC * (hrtime_t)60 }, 217 { "h", NANOSEC * (hrtime_t)(60 * 60) }, 218 { "hour", NANOSEC * (hrtime_t)(60 * 60) }, 219 { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 220 { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, 221 { "hz", 0 }, 222 { NULL } 223 }; 224 225 if (desc == NULL) { 226 char n[PROF_NAMELEN]; 227 228 /* 229 * If no description was provided, provide all of our probes. 230 */ 231 for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { 232 if ((rate = profile_rates[i]) == 0) 233 continue; 234 235 (void) snprintf(n, PROF_NAMELEN, "%s%d", 236 PROF_PREFIX_PROFILE, rate); 237 profile_create(NANOSEC / rate, n, PROF_PROFILE); 238 } 239 240 for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { 241 if ((rate = profile_ticks[i]) == 0) 242 continue; 243 244 (void) snprintf(n, PROF_NAMELEN, "%s%d", 245 PROF_PREFIX_TICK, rate); 246 profile_create(NANOSEC / rate, n, PROF_TICK); 247 } 248 249 return; 250 } 251 252 name = desc->dtpd_name; 253 254 for (i = 0; types[i].prefix != NULL; i++) { 255 len = strlen(types[i].prefix); 256 257 if (strncmp(name, types[i].prefix, len) != 0) 258 continue; 259 break; 260 } 261 262 if (types[i].prefix == NULL) 263 return; 264 265 kind = types[i].kind; 266 j = strlen(name) - len; 267 268 /* 269 * We need to start before any time suffix. 270 */ 271 for (j = strlen(name); j >= len; j--) { 272 if (name[j] >= '0' && name[j] <= '9') 273 break; 274 suffix = &name[j]; 275 } 276 277 ASSERT(suffix != NULL); 278 279 /* 280 * Now determine the numerical value present in the probe name. 281 */ 282 for (; j >= len; j--) { 283 if (name[j] < '0' || name[j] > '9') 284 return; 285 286 val += (name[j] - '0') * mult; 287 mult *= (hrtime_t)10; 288 } 289 290 if (val == 0) 291 return; 292 293 /* 294 * Look-up the suffix to determine the multiplier. 295 */ 296 for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { 297 if (strcasecmp(suffixes[i].name, suffix) == 0) { 298 mult = suffixes[i].mult; 299 break; 300 } 301 } 302 303 if (suffixes[i].name == NULL && *suffix != '\0') 304 return; 305 306 if (mult == 0) { 307 /* 308 * The default is frequency-per-second. 309 */ 310 val = NANOSEC / val; 311 } else { 312 val *= mult; 313 } 314 315 profile_create(val, name, kind); 316 } 317 318 /*ARGSUSED*/ 319 static void 320 profile_destroy(void *arg, dtrace_id_t id, void *parg) 321 { 322 profile_probe_t *prof = parg; 323 324 ASSERT(prof->prof_cyclic == CYCLIC_NONE); 325 kmem_free(prof, sizeof (profile_probe_t)); 326 327 ASSERT(profile_total >= 1); 328 atomic_add_32(&profile_total, -1); 329 } 330 331 /*ARGSUSED*/ 332 static void 333 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) 334 { 335 profile_probe_t *prof = arg; 336 profile_probe_percpu_t *pcpu; 337 338 pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); 339 pcpu->profc_probe = prof; 340 341 hdlr->cyh_func = profile_fire; 342 hdlr->cyh_arg = pcpu; 343 hdlr->cyh_level = CY_HIGH_LEVEL; 344 345 when->cyt_interval = prof->prof_interval; 346 when->cyt_when = dtrace_gethrtime() + when->cyt_interval; 347 348 pcpu->profc_expected = when->cyt_when; 349 pcpu->profc_interval = when->cyt_interval; 350 } 351 352 /*ARGSUSED*/ 353 static void 354 profile_offline(void *arg, cpu_t *cpu, void *oarg) 355 { 356 profile_probe_percpu_t *pcpu = oarg; 357 358 ASSERT(pcpu->profc_probe == arg); 359 kmem_free(pcpu, sizeof (profile_probe_percpu_t)); 360 } 361 362 /*ARGSUSED*/ 363 static void 364 profile_enable(void *arg, dtrace_id_t id, void *parg) 365 { 366 profile_probe_t *prof = parg; 367 cyc_omni_handler_t omni; 368 cyc_handler_t hdlr; 369 cyc_time_t when; 370 371 ASSERT(prof->prof_interval != 0); 372 ASSERT(MUTEX_HELD(&cpu_lock)); 373 374 if (prof->prof_kind == PROF_TICK) { 375 hdlr.cyh_func = profile_tick; 376 hdlr.cyh_arg = prof; 377 hdlr.cyh_level = CY_HIGH_LEVEL; 378 379 when.cyt_interval = prof->prof_interval; 380 when.cyt_when = dtrace_gethrtime() + when.cyt_interval; 381 } else { 382 ASSERT(prof->prof_kind == PROF_PROFILE); 383 omni.cyo_online = profile_online; 384 omni.cyo_offline = profile_offline; 385 omni.cyo_arg = prof; 386 } 387 388 if (prof->prof_kind == PROF_TICK) { 389 prof->prof_cyclic = cyclic_add(&hdlr, &when); 390 } else { 391 prof->prof_cyclic = cyclic_add_omni(&omni); 392 } 393 } 394 395 /*ARGSUSED*/ 396 static void 397 profile_disable(void *arg, dtrace_id_t id, void *parg) 398 { 399 profile_probe_t *prof = parg; 400 401 ASSERT(prof->prof_cyclic != CYCLIC_NONE); 402 ASSERT(MUTEX_HELD(&cpu_lock)); 403 404 cyclic_remove(prof->prof_cyclic); 405 prof->prof_cyclic = CYCLIC_NONE; 406 } 407 408 /*ARGSUSED*/ 409 static int 410 profile_usermode(void *arg, dtrace_id_t id, void *parg) 411 { 412 return (CPU->cpu_profile_pc == 0); 413 } 414 415 static dtrace_pattr_t profile_attr = { 416 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 417 { DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_UNKNOWN }, 418 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 419 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 420 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 421 }; 422 423 static dtrace_pops_t profile_pops = { 424 profile_provide, 425 NULL, 426 profile_enable, 427 profile_disable, 428 NULL, 429 NULL, 430 NULL, 431 NULL, 432 profile_usermode, 433 profile_destroy 434 }; 435 436 static int 437 profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 438 { 439 switch (cmd) { 440 case DDI_ATTACH: 441 break; 442 case DDI_RESUME: 443 return (DDI_SUCCESS); 444 default: 445 return (DDI_FAILURE); 446 } 447 448 if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, 449 DDI_PSEUDO, NULL) == DDI_FAILURE || 450 dtrace_register("profile", &profile_attr, 451 DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL, 452 &profile_pops, NULL, &profile_id) != 0) { 453 ddi_remove_minor_node(devi, NULL); 454 return (DDI_FAILURE); 455 } 456 457 profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, 458 "profile-max-probes", PROFILE_MAX_DEFAULT); 459 460 ddi_report_dev(devi); 461 profile_devi = devi; 462 return (DDI_SUCCESS); 463 } 464 465 static int 466 profile_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 467 { 468 switch (cmd) { 469 case DDI_DETACH: 470 break; 471 case DDI_SUSPEND: 472 return (DDI_SUCCESS); 473 default: 474 return (DDI_FAILURE); 475 } 476 477 if (dtrace_unregister(profile_id) != 0) 478 return (DDI_FAILURE); 479 480 ddi_remove_minor_node(devi, NULL); 481 return (DDI_SUCCESS); 482 } 483 484 /*ARGSUSED*/ 485 static int 486 profile_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 487 { 488 int error; 489 490 switch (infocmd) { 491 case DDI_INFO_DEVT2DEVINFO: 492 *result = (void *)profile_devi; 493 error = DDI_SUCCESS; 494 break; 495 case DDI_INFO_DEVT2INSTANCE: 496 *result = (void *)0; 497 error = DDI_SUCCESS; 498 break; 499 default: 500 error = DDI_FAILURE; 501 } 502 return (error); 503 } 504 505 /*ARGSUSED*/ 506 static int 507 profile_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 508 { 509 return (0); 510 } 511 512 static struct cb_ops profile_cb_ops = { 513 profile_open, /* open */ 514 nodev, /* close */ 515 nulldev, /* strategy */ 516 nulldev, /* print */ 517 nodev, /* dump */ 518 nodev, /* read */ 519 nodev, /* write */ 520 nodev, /* ioctl */ 521 nodev, /* devmap */ 522 nodev, /* mmap */ 523 nodev, /* segmap */ 524 nochpoll, /* poll */ 525 ddi_prop_op, /* cb_prop_op */ 526 0, /* streamtab */ 527 D_NEW | D_MP /* Driver compatibility flag */ 528 }; 529 530 static struct dev_ops profile_ops = { 531 DEVO_REV, /* devo_rev, */ 532 0, /* refcnt */ 533 profile_info, /* get_dev_info */ 534 nulldev, /* identify */ 535 nulldev, /* probe */ 536 profile_attach, /* attach */ 537 profile_detach, /* detach */ 538 nodev, /* reset */ 539 &profile_cb_ops, /* driver operations */ 540 NULL, /* bus operations */ 541 nodev, /* dev power */ 542 ddi_quiesce_not_needed, /* quiesce */ 543 }; 544 545 /* 546 * Module linkage information for the kernel. 547 */ 548 static struct modldrv modldrv = { 549 &mod_driverops, /* module type (this is a pseudo driver) */ 550 "Profile Interrupt Tracing", /* name of module */ 551 &profile_ops, /* driver ops */ 552 }; 553 554 static struct modlinkage modlinkage = { 555 MODREV_1, 556 (void *)&modldrv, 557 NULL 558 }; 559 560 int 561 _init(void) 562 { 563 return (mod_install(&modlinkage)); 564 } 565 566 int 567 _info(struct modinfo *modinfop) 568 { 569 return (mod_info(&modlinkage, modinfop)); 570 } 571 572 int 573 _fini(void) 574 { 575 return (mod_remove(&modlinkage)); 576 } 577