1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/errno.h> 28 #include <sys/cpuvar.h> 29 #include <sys/stat.h> 30 #include <sys/modctl.h> 31 #include <sys/cmn_err.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/ksynch.h> 35 #include <sys/conf.h> 36 #include <sys/kmem.h> 37 #include <sys/kcpc.h> 38 #include <sys/cap_util.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/cpc_impl.h> 41 #include <sys/dtrace_impl.h> 42 43 /* 44 * DTrace CPU Performance Counter Provider 45 * --------------------------------------- 46 * 47 * The DTrace cpc provider allows DTrace consumers to access the CPU 48 * performance counter overflow mechanism of a CPU. The configuration 49 * presented in a probe specification is programmed into the performance 50 * counter hardware of all available CPUs on a system. Programming the 51 * hardware causes a counter on each CPU to begin counting events of the 52 * given type. When the specified number of events have occurred, an overflow 53 * interrupt will be generated and the probe is fired. 54 * 55 * The required configuration for the performance counter is encoded into 56 * the probe specification and this includes the performance counter event 57 * name, processor mode, overflow rate and an optional unit mask. 58 * 59 * Most processors provide several counters (PICs) which can count all or a 60 * subset of the events available for a given CPU. However, when overflow 61 * profiling is being used, not all CPUs can detect which counter generated the 62 * overflow interrupt. In this case we cannot reliably determine which counter 63 * overflowed and we therefore only allow such CPUs to configure one event at 64 * a time. Processors that can determine the counter which overflowed are 65 * allowed to program as many events at one time as possible (in theory up to 66 * the number of instrumentation counters supported by that platform). 67 * Therefore, multiple consumers can enable multiple probes at the same time 68 * on such platforms. Platforms which cannot determine the source of an 69 * overflow interrupt are only allowed to program a single event at one time. 70 * 71 * The performance counter hardware is made available to consumers on a 72 * first-come, first-served basis. Only a finite amount of hardware resource 73 * is available and, while we make every attempt to accomodate requests from 74 * consumers, we must deny requests when hardware resources have been exhausted. 75 * A consumer will fail to enable probes when resources are currently in use. 76 * 77 * The cpc provider contends for shared hardware resources along with other 78 * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)). 79 * Only one such consumer can use the performance counters at any one time and 80 * counters are made available on a first-come, first-served basis. As with 81 * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g. 82 * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP 83 * counter contexts to be invalidated. 84 */ 85 86 typedef struct dcpc_probe { 87 char dcpc_event_name[CPC_MAX_EVENT_LEN]; 88 int dcpc_flag; /* flags (USER/SYS) */ 89 uint32_t dcpc_ovfval; /* overflow value */ 90 int64_t dcpc_umask; /* umask/emask for this event */ 91 int dcpc_picno; /* pic this event is programmed in */ 92 int dcpc_enabled; /* probe is actually enabled? */ 93 int dcpc_disabling; /* probe is currently being disabled */ 94 dtrace_id_t dcpc_id; /* probeid this request is enabling */ 95 int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */ 96 } dcpc_probe_t; 97 98 static dev_info_t *dcpc_devi; 99 static dtrace_provider_id_t dcpc_pid; 100 static dcpc_probe_t **dcpc_actv_reqs; 101 static uint32_t dcpc_enablings = 0; 102 static int dcpc_ovf_mask = 0; 103 static int dcpc_mult_ovf_cap = 0; 104 static int dcpc_mask_type = 0; 105 106 /* 107 * When the dcpc provider is loaded, dcpc_min_overflow is set to either 108 * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in 109 * the dcpc.conf file. Decrease this value to set probes with smaller 110 * overflow values. Remember that very small values could render a system 111 * unusable with frequently occurring events. 112 */ 113 #define DCPC_MIN_OVF_DEFAULT 5000 114 static uint32_t dcpc_min_overflow; 115 116 static int dcpc_aframes = 0; /* override for artificial frame setting */ 117 #if defined(__x86) 118 #define DCPC_ARTIFICIAL_FRAMES 8 119 #elif defined(__sparc) 120 #define DCPC_ARTIFICIAL_FRAMES 2 121 #endif 122 123 /* 124 * Called from the platform overflow interrupt handler. 'bitmap' is a mask 125 * which contains the pic(s) that have overflowed. 126 */ 127 static void 128 dcpc_fire(uint64_t bitmap) 129 { 130 int i; 131 132 /* 133 * No counter was marked as overflowing. Shout about it and get out. 134 */ 135 if ((bitmap & dcpc_ovf_mask) == 0) { 136 cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n"); 137 return; 138 } 139 140 /* 141 * This is the common case of a processor that doesn't support 142 * multiple overflow events. Such systems are only allowed a single 143 * enabling and therefore we just look for the first entry in 144 * the active request array. 145 */ 146 if (!dcpc_mult_ovf_cap) { 147 for (i = 0; i < cpc_ncounters; i++) { 148 if (dcpc_actv_reqs[i] != NULL) { 149 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 150 CPU->cpu_cpcprofile_pc, 151 CPU->cpu_cpcprofile_upc, 0, 0, 0); 152 return; 153 } 154 } 155 return; 156 } 157 158 /* 159 * This is a processor capable of handling multiple overflow events. 160 * Iterate over the array of active requests and locate the counters 161 * that overflowed (note: it is possible for more than one counter to 162 * have overflowed at the same time). 163 */ 164 for (i = 0; i < cpc_ncounters; i++) { 165 if (dcpc_actv_reqs[i] != NULL && 166 (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) { 167 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 168 CPU->cpu_cpcprofile_pc, 169 CPU->cpu_cpcprofile_upc, 0, 0, 0); 170 } 171 } 172 } 173 174 static void 175 dcpc_create_probe(dtrace_provider_id_t id, const char *probename, 176 char *eventname, int64_t umask, uint32_t ovfval, char flag) 177 { 178 dcpc_probe_t *pp; 179 int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); 180 181 if (dcpc_aframes) 182 nr_frames = dcpc_aframes; 183 184 if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0) 185 return; 186 187 pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP); 188 (void) strncpy(pp->dcpc_event_name, eventname, 189 sizeof (pp->dcpc_event_name) - 1); 190 pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0'; 191 pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT; 192 pp->dcpc_ovfval = ovfval; 193 pp->dcpc_umask = umask; 194 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 195 196 pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename, 197 nr_frames, pp); 198 } 199 200 /*ARGSUSED*/ 201 static void 202 dcpc_provide(void *arg, const dtrace_probedesc_t *desc) 203 { 204 /* 205 * The format of a probe is: 206 * 207 * event_name-mode-{optional_umask}-overflow_rate 208 * e.g. 209 * DC_refill_from_system-user-0x1e-50000, or, 210 * DC_refill_from_system-all-10000 211 * 212 */ 213 char *str, *end, *p; 214 int i, flag = 0; 215 char event[CPC_MAX_EVENT_LEN]; 216 long umask = -1, val = 0; 217 size_t evlen, len; 218 219 /* 220 * The 'cpc' provider offers no probes by default. 221 */ 222 if (desc == NULL) 223 return; 224 225 len = strlen(desc->dtpd_name); 226 p = str = kmem_alloc(len + 1, KM_SLEEP); 227 (void) strcpy(str, desc->dtpd_name); 228 229 /* 230 * We have a poor man's strtok() going on here. Replace any hyphens 231 * in the the probe name with NULL characters in order to make it 232 * easy to parse the string with regular string functions. 233 */ 234 for (i = 0; i < len; i++) { 235 if (str[i] == '-') 236 str[i] = '\0'; 237 } 238 239 /* 240 * The first part of the string must be either a platform event 241 * name or a generic event name. 242 */ 243 evlen = strlen(p); 244 (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1); 245 event[CPC_MAX_EVENT_LEN - 1] = '\0'; 246 247 /* 248 * The next part of the name is the mode specification. Valid 249 * settings are "user", "kernel" or "all". 250 */ 251 p += evlen + 1; 252 253 if (strcmp(p, "user") == 0) 254 flag |= CPC_COUNT_USER; 255 else if (strcmp(p, "kernel") == 0) 256 flag |= CPC_COUNT_SYSTEM; 257 else if (strcmp(p, "all") == 0) 258 flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM; 259 else 260 goto err; 261 262 /* 263 * Next we either have a mask specification followed by an overflow 264 * rate or just an overflow rate on its own. 265 */ 266 p += strlen(p) + 1; 267 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 268 /* 269 * A unit mask can only be specified if: 270 * 1) this performance counter back end supports masks. 271 * 2) the specified event is platform specific. 272 * 3) a valid hex number is converted. 273 * 4) no extraneous characters follow the mask specification. 274 */ 275 if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 && 276 ddi_strtol(p, &end, 16, &umask) == 0 && 277 end == p + strlen(p)) { 278 p += strlen(p) + 1; 279 } else { 280 goto err; 281 } 282 } 283 284 /* 285 * This final part must be an overflow value which has to be greater 286 * than the minimum permissible overflow rate. 287 */ 288 if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) || 289 val < dcpc_min_overflow) 290 goto err; 291 292 /* 293 * Validate the event and create the probe. 294 */ 295 for (i = 0; i < cpc_ncounters; i++) { 296 char *events, *cp, *p, *end; 297 int found = 0, j; 298 size_t llen; 299 300 if ((events = kcpc_list_events(i)) == NULL) 301 goto err; 302 303 llen = strlen(events); 304 p = cp = ddi_strdup(events, KM_NOSLEEP); 305 end = cp + llen; 306 307 for (j = 0; j < llen; j++) { 308 if (cp[j] == ',') 309 cp[j] = '\0'; 310 } 311 312 while (p < end && found == 0) { 313 if (strcmp(p, event) == 0) { 314 dcpc_create_probe(dcpc_pid, desc->dtpd_name, 315 event, umask, (uint32_t)val, flag); 316 found = 1; 317 } 318 p += strlen(p) + 1; 319 } 320 kmem_free(cp, llen + 1); 321 322 if (found) 323 break; 324 } 325 326 err: 327 kmem_free(str, len + 1); 328 } 329 330 /*ARGSUSED*/ 331 static void 332 dcpc_destroy(void *arg, dtrace_id_t id, void *parg) 333 { 334 dcpc_probe_t *pp = parg; 335 336 ASSERT(pp->dcpc_enabled == 0); 337 kmem_free(pp, sizeof (dcpc_probe_t)); 338 } 339 340 /*ARGSUSED*/ 341 static int 342 dcpc_usermode(void *arg, dtrace_id_t id, void *parg) 343 { 344 return (CPU->cpu_cpcprofile_pc == 0); 345 } 346 347 static void 348 dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno) 349 { 350 kcpc_set_t *oset; 351 int i; 352 353 (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name, 354 CPC_MAX_EVENT_LEN); 355 set->ks_req[reqno].kr_config = NULL; 356 set->ks_req[reqno].kr_index = reqno; 357 set->ks_req[reqno].kr_picnum = -1; 358 set->ks_req[reqno].kr_flags = pp->dcpc_flag; 359 360 /* 361 * If a unit mask has been specified then detect which attribute 362 * the platform needs. For now, it's either "umask" or "emask". 363 */ 364 if (pp->dcpc_umask >= 0) { 365 set->ks_req[reqno].kr_attr = 366 kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP); 367 set->ks_req[reqno].kr_nattrs = 1; 368 if (dcpc_mask_type & DCPC_UMASK) 369 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 370 "umask", 5); 371 else 372 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 373 "emask", 5); 374 set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask; 375 } else { 376 set->ks_req[reqno].kr_attr = NULL; 377 set->ks_req[reqno].kr_nattrs = 0; 378 } 379 380 /* 381 * If this probe is enabled, obtain its current countdown value 382 * and use that. The CPUs cpc context might not exist yet if we 383 * are dealing with a CPU that is just coming online. 384 */ 385 if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) { 386 oset = c->cpu_cpc_ctx->kc_set; 387 388 for (i = 0; i < oset->ks_nreqs; i++) { 389 if (strcmp(oset->ks_req[i].kr_event, 390 set->ks_req[reqno].kr_event) == 0) { 391 set->ks_req[reqno].kr_preset = 392 *(oset->ks_req[i].kr_data); 393 } 394 } 395 } else { 396 set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval; 397 } 398 399 set->ks_nreqs++; 400 } 401 402 403 /* 404 * Create a fresh request set for the enablings represented in the 405 * 'dcpc_actv_reqs' array which contains the probes we want to be 406 * in the set. This can be called for several reasons: 407 * 408 * 1) We are on a single or multi overflow platform and we have no 409 * current events so we can just create the set and initialize it. 410 * 2) We are on a multi-overflow platform and we already have one or 411 * more existing events and we are adding a new enabling. Create a 412 * new set and copy old requests in and then add the new request. 413 * 3) We are on a multi-overflow platform and we have just removed an 414 * enabling but we still have enablings whch are valid. Create a new 415 * set and copy in still valid requests. 416 */ 417 static kcpc_set_t * 418 dcpc_create_set(cpu_t *c) 419 { 420 int i, reqno = 0; 421 int active_requests = 0; 422 kcpc_set_t *set; 423 424 /* 425 * First get a count of the number of currently active requests. 426 * Note that dcpc_actv_reqs[] should always reflect which requests 427 * we want to be in the set that is to be created. It is the 428 * responsibility of the caller of dcpc_create_set() to adjust that 429 * array accordingly beforehand. 430 */ 431 for (i = 0; i < cpc_ncounters; i++) { 432 if (dcpc_actv_reqs[i] != NULL) 433 active_requests++; 434 } 435 436 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); 437 438 set->ks_req = 439 kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP); 440 441 set->ks_data = 442 kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP); 443 444 /* 445 * Look for valid entries in the active requests array and populate 446 * the request set for any entries found. 447 */ 448 for (i = 0; i < cpc_ncounters; i++) { 449 if (dcpc_actv_reqs[i] != NULL) { 450 dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno); 451 reqno++; 452 } 453 } 454 455 return (set); 456 } 457 458 static int 459 dcpc_program_cpu_event(cpu_t *c) 460 { 461 int i, j, subcode; 462 kcpc_ctx_t *ctx, *octx; 463 kcpc_set_t *set; 464 465 set = dcpc_create_set(c); 466 467 set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP); 468 ctx->kc_set = set; 469 ctx->kc_cpuid = c->cpu_id; 470 471 if (kcpc_assign_reqs(set, ctx) != 0) 472 goto err; 473 474 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) 475 goto err; 476 477 for (i = 0; i < set->ks_nreqs; i++) { 478 for (j = 0; j < cpc_ncounters; j++) { 479 if (dcpc_actv_reqs[j] != NULL && 480 strcmp(set->ks_req[i].kr_event, 481 dcpc_actv_reqs[j]->dcpc_event_name) == 0) { 482 dcpc_actv_reqs[j]->dcpc_picno = 483 set->ks_req[i].kr_picnum; 484 } 485 } 486 } 487 488 /* 489 * If we already have an active enabling then save the current cpc 490 * context away. 491 */ 492 octx = c->cpu_cpc_ctx; 493 494 kcpc_cpu_program(c, ctx); 495 496 if (octx != NULL) { 497 kcpc_set_t *oset = octx->kc_set; 498 kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t)); 499 kcpc_free_configs(oset); 500 kcpc_free_set(oset); 501 kcpc_ctx_free(octx); 502 } 503 504 return (0); 505 506 err: 507 /* 508 * We failed to configure this request up so free things up and 509 * get out. 510 */ 511 kcpc_free_configs(set); 512 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 513 kcpc_free_set(set); 514 kcpc_ctx_free(ctx); 515 516 return (-1); 517 } 518 519 static void 520 dcpc_disable_cpu(cpu_t *c) 521 { 522 kcpc_ctx_t *ctx; 523 kcpc_set_t *set; 524 525 /* 526 * Leave this CPU alone if it's already offline. 527 */ 528 if (c->cpu_flags & CPU_OFFLINE) 529 return; 530 531 /* 532 * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and 533 * changes it. 534 */ 535 ctx = c->cpu_cpc_ctx; 536 537 kcpc_cpu_stop(c, B_FALSE); 538 539 set = ctx->kc_set; 540 541 kcpc_free_configs(set); 542 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 543 kcpc_free_set(set); 544 kcpc_ctx_free(ctx); 545 } 546 547 /* 548 * The dcpc_*_interrupts() routines are responsible for manipulating the 549 * per-CPU dcpc interrupt state byte. The purpose of the state byte is to 550 * synchronize processing of hardware overflow interrupts wth configuration 551 * changes made to the CPU performance counter subsystem by the dcpc provider. 552 * 553 * The dcpc provider claims ownership of the overflow interrupt mechanism 554 * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the 555 * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the 556 * overflow mechanism and interrupts may be processed). Before modifying 557 * a CPUs configuration state the state byte is transitioned from 558 * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state). 559 * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process 560 * an interrupt when a configuration is not in process (i.e. the state is 561 * marked as free). During interrupt processing the state is set to 562 * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based 563 * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate 564 * the dcpc provider is no longer interested in overflow interrupts. 565 */ 566 static void 567 dcpc_block_interrupts(void) 568 { 569 cpu_t *c = cpu_list; 570 uint8_t *state; 571 572 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); 573 574 do { 575 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 576 577 while (atomic_cas_8(state, DCPC_INTR_FREE, 578 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 579 continue; 580 581 } while ((c = c->cpu_next) != cpu_list); 582 } 583 584 /* 585 * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that 586 * overflow interrupts can be processed safely. 587 */ 588 static void 589 dcpc_release_interrupts(void) 590 { 591 cpu_t *c = cpu_list; 592 593 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); 594 595 do { 596 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; 597 membar_producer(); 598 } while ((c = c->cpu_next) != cpu_list); 599 } 600 601 /* 602 * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to 603 * to DCPC_INTR_FREE. This indicates that the dcpc provider is now 604 * responsible for handling all overflow interrupt activity. Should only be 605 * called before enabling the first dcpc based probe. 606 */ 607 static void 608 dcpc_claim_interrupts(void) 609 { 610 cpu_t *c = cpu_list; 611 612 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE); 613 614 do { 615 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; 616 membar_producer(); 617 } while ((c = c->cpu_next) != cpu_list); 618 } 619 620 /* 621 * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that 622 * the dcpc provider is no longer processing overflow interrupts. Only called 623 * during removal of the last dcpc based enabling. 624 */ 625 static void 626 dcpc_surrender_interrupts(void) 627 { 628 cpu_t *c = cpu_list; 629 630 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); 631 632 do { 633 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE; 634 membar_producer(); 635 } while ((c = c->cpu_next) != cpu_list); 636 } 637 638 /* 639 * dcpc_program_event() can be called owing to a new enabling or if a multi 640 * overflow platform has disabled a request but needs to program the requests 641 * that are still valid. 642 * 643 * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t 644 * and a new request set which contains the new enabling and any old enablings 645 * which are still valid (possible with multi-overflow platforms). 646 */ 647 static int 648 dcpc_program_event(dcpc_probe_t *pp) 649 { 650 cpu_t *c; 651 int ret = 0; 652 653 ASSERT(MUTEX_HELD(&cpu_lock)); 654 655 kpreempt_disable(); 656 657 dcpc_block_interrupts(); 658 659 c = cpu_list; 660 661 do { 662 /* 663 * Skip CPUs that are currently offline. 664 */ 665 if (c->cpu_flags & CPU_OFFLINE) 666 continue; 667 668 /* 669 * Stop counters but preserve existing DTrace CPC context 670 * if there is one. 671 * 672 * If we come here when the first event is programmed for a CPU, 673 * there should be no DTrace CPC context installed. In this 674 * case, kcpc_cpu_stop() will ensure that there is no other 675 * context on the CPU. 676 * 677 * If we add new enabling to the original one, the CPU should 678 * have the old DTrace CPC context which we need to keep around 679 * since dcpc_program_event() will add to it. 680 */ 681 if (c->cpu_cpc_ctx != NULL) 682 kcpc_cpu_stop(c, B_TRUE); 683 } while ((c = c->cpu_next) != cpu_list); 684 685 dcpc_release_interrupts(); 686 687 /* 688 * If this enabling is being removed (in the case of a multi event 689 * capable system with more than one active enabling), we can now 690 * update the active request array to reflect the enablings that need 691 * to be reprogrammed. 692 */ 693 if (pp->dcpc_disabling == 1) 694 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 695 696 do { 697 /* 698 * Skip CPUs that are currently offline. 699 */ 700 if (c->cpu_flags & CPU_OFFLINE) 701 continue; 702 703 ret = dcpc_program_cpu_event(c); 704 } while ((c = c->cpu_next) != cpu_list && ret == 0); 705 706 /* 707 * If dcpc_program_cpu_event() fails then it is because we couldn't 708 * configure the requests in the set for the CPU and not because of 709 * an error programming the hardware. If we have a failure here then 710 * we assume no CPUs have been programmed in the above step as they 711 * are all configured identically. 712 */ 713 if (ret != 0) { 714 pp->dcpc_enabled = 0; 715 kpreempt_enable(); 716 return (-1); 717 } 718 719 if (pp->dcpc_disabling != 1) 720 pp->dcpc_enabled = 1; 721 722 kpreempt_enable(); 723 724 return (0); 725 } 726 727 /*ARGSUSED*/ 728 static int 729 dcpc_enable(void *arg, dtrace_id_t id, void *parg) 730 { 731 dcpc_probe_t *pp = parg; 732 int i, found = 0; 733 cpu_t *c; 734 735 ASSERT(MUTEX_HELD(&cpu_lock)); 736 737 /* 738 * Bail out if the counters are being used by a libcpc consumer. 739 */ 740 rw_enter(&kcpc_cpuctx_lock, RW_READER); 741 if (kcpc_cpuctx > 0) { 742 rw_exit(&kcpc_cpuctx_lock); 743 return (-1); 744 } 745 746 dtrace_cpc_in_use++; 747 rw_exit(&kcpc_cpuctx_lock); 748 749 /* 750 * Locate this enabling in the first free entry of the active 751 * request array. 752 */ 753 for (i = 0; i < cpc_ncounters; i++) { 754 if (dcpc_actv_reqs[i] == NULL) { 755 dcpc_actv_reqs[i] = pp; 756 pp->dcpc_actv_req_idx = i; 757 found = 1; 758 break; 759 } 760 } 761 762 /* 763 * If we couldn't find a slot for this probe then there is no 764 * room at the inn. 765 */ 766 if (!found) { 767 dtrace_cpc_in_use--; 768 return (-1); 769 } 770 771 ASSERT(pp->dcpc_actv_req_idx >= 0); 772 773 /* 774 * DTrace is taking over CPC contexts, so stop collecting 775 * capacity/utilization data for all CPUs. 776 */ 777 if (dtrace_cpc_in_use == 1) 778 cu_disable(); 779 780 /* 781 * The following must hold true if we are to (attempt to) enable 782 * this request: 783 * 784 * 1) No enablings currently exist. We allow all platforms to 785 * proceed if this is true. 786 * 787 * OR 788 * 789 * 2) If the platform is multi overflow capable and there are 790 * less valid enablings than there are counters. There is no 791 * guarantee that a platform can accommodate as many events as 792 * it has counters for but we will at least try to program 793 * up to that many requests. 794 * 795 * The 'dcpc_enablings' variable is implictly protected by locking 796 * provided by the DTrace framework and the cpu management framework. 797 */ 798 if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap && 799 dcpc_enablings < cpc_ncounters)) { 800 /* 801 * Before attempting to program the first enabling we need to 802 * invalidate any lwp-based contexts and lay claim to the 803 * overflow interrupt mechanism. 804 */ 805 if (dcpc_enablings == 0) { 806 kcpc_invalidate_all(); 807 dcpc_claim_interrupts(); 808 } 809 810 if (dcpc_program_event(pp) == 0) { 811 dcpc_enablings++; 812 return (0); 813 } 814 } 815 816 /* 817 * If active enablings existed before we failed to enable this probe 818 * on a multi event capable platform then we need to restart counters 819 * as they will have been stopped in the attempted configuration. The 820 * context should now just contain the request prior to this failed 821 * enabling. 822 */ 823 if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) { 824 c = cpu_list; 825 826 ASSERT(dcpc_mult_ovf_cap == 1); 827 do { 828 /* 829 * Skip CPUs that are currently offline. 830 */ 831 if (c->cpu_flags & CPU_OFFLINE) 832 continue; 833 834 kcpc_cpu_program(c, c->cpu_cpc_ctx); 835 } while ((c = c->cpu_next) != cpu_list); 836 } 837 838 /* 839 * Give up any claim to the overflow interrupt mechanism if no 840 * dcpc based enablings exist. 841 */ 842 if (dcpc_enablings == 0) 843 dcpc_surrender_interrupts(); 844 845 dtrace_cpc_in_use--; 846 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 847 pp->dcpc_actv_req_idx = pp->dcpc_picno = -1; 848 849 /* 850 * If all probes are removed, enable capacity/utilization data 851 * collection for every CPU. 852 */ 853 if (dtrace_cpc_in_use == 0) 854 cu_enable(); 855 856 return (-1); 857 } 858 859 /* 860 * If only one enabling is active then remove the context and free 861 * everything up. If there are multiple enablings active then remove this 862 * one, its associated meta-data and re-program the hardware. 863 */ 864 /*ARGSUSED*/ 865 static void 866 dcpc_disable(void *arg, dtrace_id_t id, void *parg) 867 { 868 cpu_t *c; 869 dcpc_probe_t *pp = parg; 870 871 ASSERT(MUTEX_HELD(&cpu_lock)); 872 873 kpreempt_disable(); 874 875 /* 876 * This probe didn't actually make it as far as being fully enabled 877 * so we needn't do anything with it. 878 */ 879 if (pp->dcpc_enabled == 0) { 880 /* 881 * If we actually allocated this request a slot in the 882 * request array but failed to enabled it then remove the 883 * entry in the array. 884 */ 885 if (pp->dcpc_actv_req_idx >= 0) { 886 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 887 pp->dcpc_actv_req_idx = pp->dcpc_picno = 888 pp->dcpc_disabling = -1; 889 } 890 891 kpreempt_enable(); 892 return; 893 } 894 895 /* 896 * If this is the only enabling then stop all the counters and 897 * free up the meta-data. 898 */ 899 if (dcpc_enablings == 1) { 900 ASSERT(dtrace_cpc_in_use == 1); 901 902 dcpc_block_interrupts(); 903 904 c = cpu_list; 905 906 do { 907 dcpc_disable_cpu(c); 908 } while ((c = c->cpu_next) != cpu_list); 909 910 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 911 dcpc_surrender_interrupts(); 912 } else { 913 /* 914 * This platform can support multiple overflow events and 915 * the enabling being disabled is not the last one. Remove this 916 * enabling and re-program the hardware with the new config. 917 */ 918 ASSERT(dcpc_mult_ovf_cap); 919 ASSERT(dcpc_enablings > 1); 920 921 pp->dcpc_disabling = 1; 922 (void) dcpc_program_event(pp); 923 } 924 925 kpreempt_enable(); 926 927 dcpc_enablings--; 928 dtrace_cpc_in_use--; 929 pp->dcpc_enabled = 0; 930 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 931 932 /* 933 * If all probes are removed, enable capacity/utilization data 934 * collection for every CPU 935 */ 936 if (dtrace_cpc_in_use == 0) 937 cu_enable(); 938 } 939 940 /*ARGSUSED*/ 941 static int 942 dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg) 943 { 944 cpu_t *c; 945 uint8_t *state; 946 947 ASSERT(MUTEX_HELD(&cpu_lock)); 948 949 switch (what) { 950 case CPU_OFF: 951 /* 952 * Offline CPUs are not allowed to take part so remove this 953 * CPU if we are actively tracing. 954 */ 955 if (dtrace_cpc_in_use) { 956 c = cpu_get(cpu); 957 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 958 959 /* 960 * Indicate that a configuration is in process in 961 * order to stop overflow interrupts being processed 962 * on this CPU while we disable it. 963 */ 964 while (atomic_cas_8(state, DCPC_INTR_FREE, 965 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 966 continue; 967 968 dcpc_disable_cpu(c); 969 970 /* 971 * Reset this CPUs interrupt state as the configuration 972 * has ended. 973 */ 974 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = 975 DCPC_INTR_FREE; 976 membar_producer(); 977 } 978 break; 979 980 case CPU_ON: 981 case CPU_SETUP: 982 /* 983 * This CPU is being initialized or brought online so program 984 * it with the current request set if we are actively tracing. 985 */ 986 if (dtrace_cpc_in_use) { 987 c = cpu_get(cpu); 988 (void) dcpc_program_cpu_event(c); 989 } 990 break; 991 992 default: 993 break; 994 } 995 996 return (0); 997 } 998 999 static dtrace_pattr_t dcpc_attr = { 1000 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1001 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 1002 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 1003 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, 1004 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1005 }; 1006 1007 static dtrace_pops_t dcpc_pops = { 1008 dcpc_provide, 1009 NULL, 1010 dcpc_enable, 1011 dcpc_disable, 1012 NULL, 1013 NULL, 1014 NULL, 1015 NULL, 1016 dcpc_usermode, 1017 dcpc_destroy 1018 }; 1019 1020 /*ARGSUSED*/ 1021 static int 1022 dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 1023 { 1024 return (0); 1025 } 1026 1027 /*ARGSUSED*/ 1028 static int 1029 dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1030 { 1031 int error; 1032 1033 switch (infocmd) { 1034 case DDI_INFO_DEVT2DEVINFO: 1035 *result = (void *)dcpc_devi; 1036 error = DDI_SUCCESS; 1037 break; 1038 case DDI_INFO_DEVT2INSTANCE: 1039 *result = (void *)0; 1040 error = DDI_SUCCESS; 1041 break; 1042 default: 1043 error = DDI_FAILURE; 1044 } 1045 return (error); 1046 } 1047 1048 static int 1049 dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1050 { 1051 switch (cmd) { 1052 case DDI_DETACH: 1053 break; 1054 case DDI_SUSPEND: 1055 return (DDI_SUCCESS); 1056 default: 1057 return (DDI_FAILURE); 1058 } 1059 1060 if (dtrace_unregister(dcpc_pid) != 0) 1061 return (DDI_FAILURE); 1062 1063 ddi_remove_minor_node(devi, NULL); 1064 1065 mutex_enter(&cpu_lock); 1066 unregister_cpu_setup_func(dcpc_cpu_setup, NULL); 1067 mutex_exit(&cpu_lock); 1068 1069 kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *)); 1070 1071 kcpc_unregister_dcpc(); 1072 1073 return (DDI_SUCCESS); 1074 } 1075 1076 static int 1077 dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1078 { 1079 uint_t caps; 1080 char *attrs; 1081 1082 switch (cmd) { 1083 case DDI_ATTACH: 1084 break; 1085 case DDI_RESUME: 1086 return (DDI_SUCCESS); 1087 default: 1088 return (DDI_FAILURE); 1089 } 1090 1091 if (kcpc_pcbe_loaded() == -1) 1092 return (DDI_FAILURE); 1093 1094 caps = kcpc_pcbe_capabilities(); 1095 1096 if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) { 1097 cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\ 1098 " on this processor"); 1099 return (DDI_FAILURE); 1100 } 1101 1102 if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0, 1103 DDI_PSEUDO, NULL) == DDI_FAILURE || 1104 dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL, 1105 NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) { 1106 ddi_remove_minor_node(devi, NULL); 1107 return (DDI_FAILURE); 1108 } 1109 1110 mutex_enter(&cpu_lock); 1111 register_cpu_setup_func(dcpc_cpu_setup, NULL); 1112 mutex_exit(&cpu_lock); 1113 1114 dcpc_ovf_mask = (1 << cpc_ncounters) - 1; 1115 ASSERT(dcpc_ovf_mask != 0); 1116 1117 if (caps & CPC_CAP_OVERFLOW_PRECISE) 1118 dcpc_mult_ovf_cap = 1; 1119 1120 /* 1121 * Determine which, if any, mask attribute the back-end can use. 1122 */ 1123 attrs = kcpc_list_attrs(); 1124 if (strstr(attrs, "umask") != NULL) 1125 dcpc_mask_type |= DCPC_UMASK; 1126 else if (strstr(attrs, "emask") != NULL) 1127 dcpc_mask_type |= DCPC_EMASK; 1128 1129 /* 1130 * The dcpc_actv_reqs array is used to store the requests that 1131 * we currently have programmed. The order of requests in this 1132 * array is not necessarily the order that the event appears in 1133 * the kcpc_request_t array. Once entered into a slot in the array 1134 * the entry is not moved until it's removed. 1135 */ 1136 dcpc_actv_reqs = 1137 kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP); 1138 1139 dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 1140 DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT); 1141 1142 kcpc_register_dcpc(dcpc_fire); 1143 1144 ddi_report_dev(devi); 1145 dcpc_devi = devi; 1146 1147 return (DDI_SUCCESS); 1148 } 1149 1150 static struct cb_ops dcpc_cb_ops = { 1151 dcpc_open, /* open */ 1152 nodev, /* close */ 1153 nulldev, /* strategy */ 1154 nulldev, /* print */ 1155 nodev, /* dump */ 1156 nodev, /* read */ 1157 nodev, /* write */ 1158 nodev, /* ioctl */ 1159 nodev, /* devmap */ 1160 nodev, /* mmap */ 1161 nodev, /* segmap */ 1162 nochpoll, /* poll */ 1163 ddi_prop_op, /* cb_prop_op */ 1164 0, /* streamtab */ 1165 D_NEW | D_MP /* Driver compatibility flag */ 1166 }; 1167 1168 static struct dev_ops dcpc_ops = { 1169 DEVO_REV, /* devo_rev, */ 1170 0, /* refcnt */ 1171 dcpc_info, /* get_dev_info */ 1172 nulldev, /* identify */ 1173 nulldev, /* probe */ 1174 dcpc_attach, /* attach */ 1175 dcpc_detach, /* detach */ 1176 nodev, /* reset */ 1177 &dcpc_cb_ops, /* driver operations */ 1178 NULL, /* bus operations */ 1179 nodev, /* dev power */ 1180 ddi_quiesce_not_needed /* quiesce */ 1181 }; 1182 1183 /* 1184 * Module linkage information for the kernel. 1185 */ 1186 static struct modldrv modldrv = { 1187 &mod_driverops, /* module type */ 1188 "DTrace CPC Module", /* name of module */ 1189 &dcpc_ops, /* driver ops */ 1190 }; 1191 1192 static struct modlinkage modlinkage = { 1193 MODREV_1, 1194 (void *)&modldrv, 1195 NULL 1196 }; 1197 1198 int 1199 _init(void) 1200 { 1201 return (mod_install(&modlinkage)); 1202 } 1203 1204 int 1205 _info(struct modinfo *modinfop) 1206 { 1207 return (mod_info(&modlinkage, modinfop)); 1208 } 1209 1210 int 1211 _fini(void) 1212 { 1213 return (mod_remove(&modlinkage)); 1214 } 1215