1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/errno.h> 28 #include <sys/cpuvar.h> 29 #include <sys/stat.h> 30 #include <sys/modctl.h> 31 #include <sys/cmn_err.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/ksynch.h> 35 #include <sys/conf.h> 36 #include <sys/kmem.h> 37 #include <sys/kcpc.h> 38 #include <sys/cap_util.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/cpc_impl.h> 41 #include <sys/dtrace_impl.h> 42 43 /* 44 * DTrace CPU Performance Counter Provider 45 * --------------------------------------- 46 * 47 * The DTrace cpc provider allows DTrace consumers to access the CPU 48 * performance counter overflow mechanism of a CPU. The configuration 49 * presented in a probe specification is programmed into the performance 50 * counter hardware of all available CPUs on a system. Programming the 51 * hardware causes a counter on each CPU to begin counting events of the 52 * given type. When the specified number of events have occurred, an overflow 53 * interrupt will be generated and the probe is fired. 54 * 55 * The required configuration for the performance counter is encoded into 56 * the probe specification and this includes the performance counter event 57 * name, processor mode, overflow rate and an optional unit mask. 58 * 59 * Most processors provide several counters (PICs) which can count all or a 60 * subset of the events available for a given CPU. However, when overflow 61 * profiling is being used, not all CPUs can detect which counter generated the 62 * overflow interrupt. In this case we cannot reliably determine which counter 63 * overflowed and we therefore only allow such CPUs to configure one event at 64 * a time. Processors that can determine the counter which overflowed are 65 * allowed to program as many events at one time as possible (in theory up to 66 * the number of instrumentation counters supported by that platform). 67 * Therefore, multiple consumers can enable multiple probes at the same time 68 * on such platforms. Platforms which cannot determine the source of an 69 * overflow interrupt are only allowed to program a single event at one time. 70 * 71 * The performance counter hardware is made available to consumers on a 72 * first-come, first-served basis. Only a finite amount of hardware resource 73 * is available and, while we make every attempt to accomodate requests from 74 * consumers, we must deny requests when hardware resources have been exhausted. 75 * A consumer will fail to enable probes when resources are currently in use. 76 * 77 * The cpc provider contends for shared hardware resources along with other 78 * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)). 79 * Only one such consumer can use the performance counters at any one time and 80 * counters are made available on a first-come, first-served basis. As with 81 * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g. 82 * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP 83 * counter contexts to be invalidated. 84 */ 85 86 typedef struct dcpc_probe { 87 char dcpc_event_name[CPC_MAX_EVENT_LEN]; 88 int dcpc_flag; /* flags (USER/SYS) */ 89 uint32_t dcpc_ovfval; /* overflow value */ 90 int64_t dcpc_umask; /* umask/emask for this event */ 91 int dcpc_picno; /* pic this event is programmed in */ 92 int dcpc_enabled; /* probe is actually enabled? */ 93 int dcpc_disabling; /* probe is currently being disabled */ 94 dtrace_id_t dcpc_id; /* probeid this request is enabling */ 95 int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */ 96 } dcpc_probe_t; 97 98 static dev_info_t *dcpc_devi; 99 static dtrace_provider_id_t dcpc_pid; 100 static dcpc_probe_t **dcpc_actv_reqs; 101 static uint32_t dcpc_enablings = 0; 102 static int dcpc_ovf_mask = 0; 103 static int dcpc_mult_ovf_cap = 0; 104 static int dcpc_mask_type = 0; 105 106 /* 107 * When the dcpc provider is loaded, dcpc_min_overflow is set to either 108 * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in 109 * the dcpc.conf file. Decrease this value to set probes with smaller 110 * overflow values. Remember that very small values could render a system 111 * unusable with frequently occurring events. 112 */ 113 #define DCPC_MIN_OVF_DEFAULT 5000 114 static uint32_t dcpc_min_overflow; 115 116 static int dcpc_aframes = 0; /* override for artificial frame setting */ 117 #if defined(__x86) 118 #define DCPC_ARTIFICIAL_FRAMES 8 119 #elif defined(__sparc) 120 #define DCPC_ARTIFICIAL_FRAMES 2 121 #endif 122 123 /* 124 * Called from the platform overflow interrupt handler. 'bitmap' is a mask 125 * which contains the pic(s) that have overflowed. 126 */ 127 static void 128 dcpc_fire(uint64_t bitmap) 129 { 130 int i; 131 132 /* 133 * No counter was marked as overflowing. Shout about it and get out. 134 */ 135 if ((bitmap & dcpc_ovf_mask) == 0) { 136 cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n"); 137 return; 138 } 139 140 /* 141 * This is the common case of a processor that doesn't support 142 * multiple overflow events. Such systems are only allowed a single 143 * enabling and therefore we just look for the first entry in 144 * the active request array. 145 */ 146 if (!dcpc_mult_ovf_cap) { 147 for (i = 0; i < cpc_ncounters; i++) { 148 if (dcpc_actv_reqs[i] != NULL) { 149 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 150 CPU->cpu_cpcprofile_pc, 151 CPU->cpu_cpcprofile_upc, 0, 0, 0); 152 return; 153 } 154 } 155 return; 156 } 157 158 /* 159 * This is a processor capable of handling multiple overflow events. 160 * Iterate over the array of active requests and locate the counters 161 * that overflowed (note: it is possible for more than one counter to 162 * have overflowed at the same time). 163 */ 164 for (i = 0; i < cpc_ncounters; i++) { 165 if (dcpc_actv_reqs[i] != NULL && 166 (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) { 167 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 168 CPU->cpu_cpcprofile_pc, 169 CPU->cpu_cpcprofile_upc, 0, 0, 0); 170 } 171 } 172 } 173 174 static void 175 dcpc_create_probe(dtrace_provider_id_t id, const char *probename, 176 char *eventname, int64_t umask, uint32_t ovfval, char flag) 177 { 178 dcpc_probe_t *pp; 179 int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); 180 181 if (dcpc_aframes) 182 nr_frames = dcpc_aframes; 183 184 if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0) 185 return; 186 187 pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP); 188 (void) strncpy(pp->dcpc_event_name, eventname, 189 sizeof (pp->dcpc_event_name) - 1); 190 pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0'; 191 pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT; 192 pp->dcpc_ovfval = ovfval; 193 pp->dcpc_umask = umask; 194 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 195 196 pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename, 197 nr_frames, pp); 198 } 199 200 /*ARGSUSED*/ 201 static void 202 dcpc_provide(void *arg, const dtrace_probedesc_t *desc) 203 { 204 /* 205 * The format of a probe is: 206 * 207 * event_name-mode-{optional_umask}-overflow_rate 208 * e.g. 209 * DC_refill_from_system-user-0x1e-50000, or, 210 * DC_refill_from_system-all-10000 211 * 212 */ 213 char *str, *end, *p; 214 int i, flag = 0; 215 char event[CPC_MAX_EVENT_LEN]; 216 long umask = -1, val = 0; 217 size_t evlen, len; 218 219 /* 220 * The 'cpc' provider offers no probes by default. 221 */ 222 if (desc == NULL) 223 return; 224 225 len = strlen(desc->dtpd_name); 226 p = str = kmem_alloc(len + 1, KM_SLEEP); 227 (void) strcpy(str, desc->dtpd_name); 228 229 /* 230 * We have a poor man's strtok() going on here. Replace any hyphens 231 * in the the probe name with NULL characters in order to make it 232 * easy to parse the string with regular string functions. 233 */ 234 for (i = 0; i < len; i++) { 235 if (str[i] == '-') 236 str[i] = '\0'; 237 } 238 239 /* 240 * The first part of the string must be either a platform event 241 * name or a generic event name. 242 */ 243 evlen = strlen(p); 244 (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1); 245 event[CPC_MAX_EVENT_LEN - 1] = '\0'; 246 247 /* 248 * The next part of the name is the mode specification. Valid 249 * settings are "user", "kernel" or "all". 250 */ 251 p += evlen + 1; 252 253 if (strcmp(p, "user") == 0) 254 flag |= CPC_COUNT_USER; 255 else if (strcmp(p, "kernel") == 0) 256 flag |= CPC_COUNT_SYSTEM; 257 else if (strcmp(p, "all") == 0) 258 flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM; 259 else 260 goto err; 261 262 /* 263 * Next we either have a mask specification followed by an overflow 264 * rate or just an overflow rate on its own. 265 */ 266 p += strlen(p) + 1; 267 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 268 /* 269 * A unit mask can only be specified if: 270 * 1) this performance counter back end supports masks. 271 * 2) the specified event is platform specific. 272 * 3) a valid hex number is converted. 273 * 4) no extraneous characters follow the mask specification. 274 */ 275 if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 && 276 ddi_strtol(p, &end, 16, &umask) == 0 && 277 end == p + strlen(p)) { 278 p += strlen(p) + 1; 279 } else { 280 goto err; 281 } 282 } 283 284 /* 285 * This final part must be an overflow value which has to be greater 286 * than the minimum permissible overflow rate. 287 */ 288 if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) || 289 val < dcpc_min_overflow) 290 goto err; 291 292 /* 293 * Validate the event and create the probe. 294 */ 295 for (i = 0; i < cpc_ncounters; i++) { 296 char *events, *cp, *p, *end; 297 int found = 0, j; 298 size_t llen; 299 300 if ((events = kcpc_list_events(i)) == NULL) 301 goto err; 302 303 llen = strlen(events); 304 p = cp = ddi_strdup(events, KM_NOSLEEP); 305 end = cp + llen; 306 307 for (j = 0; j < llen; j++) { 308 if (cp[j] == ',') 309 cp[j] = '\0'; 310 } 311 312 while (p < end && found == 0) { 313 if (strcmp(p, event) == 0) { 314 dcpc_create_probe(dcpc_pid, desc->dtpd_name, 315 event, umask, (uint32_t)val, flag); 316 found = 1; 317 } 318 p += strlen(p) + 1; 319 } 320 kmem_free(cp, llen + 1); 321 322 if (found) 323 break; 324 } 325 326 err: 327 kmem_free(str, len + 1); 328 } 329 330 /*ARGSUSED*/ 331 static void 332 dcpc_destroy(void *arg, dtrace_id_t id, void *parg) 333 { 334 dcpc_probe_t *pp = parg; 335 336 ASSERT(pp->dcpc_enabled == 0); 337 kmem_free(pp, sizeof (dcpc_probe_t)); 338 } 339 340 /*ARGSUSED*/ 341 static int 342 dcpc_mode(void *arg, dtrace_id_t id, void *parg) 343 { 344 if (CPU->cpu_cpcprofile_pc == 0) { 345 return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_USER); 346 } else { 347 return (DTRACE_MODE_NOPRIV_DROP | DTRACE_MODE_KERNEL); 348 } 349 } 350 351 static void 352 dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno) 353 { 354 kcpc_set_t *oset; 355 int i; 356 357 (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name, 358 CPC_MAX_EVENT_LEN); 359 set->ks_req[reqno].kr_config = NULL; 360 set->ks_req[reqno].kr_index = reqno; 361 set->ks_req[reqno].kr_picnum = -1; 362 set->ks_req[reqno].kr_flags = pp->dcpc_flag; 363 364 /* 365 * If a unit mask has been specified then detect which attribute 366 * the platform needs. For now, it's either "umask" or "emask". 367 */ 368 if (pp->dcpc_umask >= 0) { 369 set->ks_req[reqno].kr_attr = 370 kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP); 371 set->ks_req[reqno].kr_nattrs = 1; 372 if (dcpc_mask_type & DCPC_UMASK) 373 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 374 "umask", 5); 375 else 376 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 377 "emask", 5); 378 set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask; 379 } else { 380 set->ks_req[reqno].kr_attr = NULL; 381 set->ks_req[reqno].kr_nattrs = 0; 382 } 383 384 /* 385 * If this probe is enabled, obtain its current countdown value 386 * and use that. The CPUs cpc context might not exist yet if we 387 * are dealing with a CPU that is just coming online. 388 */ 389 if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) { 390 oset = c->cpu_cpc_ctx->kc_set; 391 392 for (i = 0; i < oset->ks_nreqs; i++) { 393 if (strcmp(oset->ks_req[i].kr_event, 394 set->ks_req[reqno].kr_event) == 0) { 395 set->ks_req[reqno].kr_preset = 396 *(oset->ks_req[i].kr_data); 397 } 398 } 399 } else { 400 set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval; 401 } 402 403 set->ks_nreqs++; 404 } 405 406 407 /* 408 * Create a fresh request set for the enablings represented in the 409 * 'dcpc_actv_reqs' array which contains the probes we want to be 410 * in the set. This can be called for several reasons: 411 * 412 * 1) We are on a single or multi overflow platform and we have no 413 * current events so we can just create the set and initialize it. 414 * 2) We are on a multi-overflow platform and we already have one or 415 * more existing events and we are adding a new enabling. Create a 416 * new set and copy old requests in and then add the new request. 417 * 3) We are on a multi-overflow platform and we have just removed an 418 * enabling but we still have enablings whch are valid. Create a new 419 * set and copy in still valid requests. 420 */ 421 static kcpc_set_t * 422 dcpc_create_set(cpu_t *c) 423 { 424 int i, reqno = 0; 425 int active_requests = 0; 426 kcpc_set_t *set; 427 428 /* 429 * First get a count of the number of currently active requests. 430 * Note that dcpc_actv_reqs[] should always reflect which requests 431 * we want to be in the set that is to be created. It is the 432 * responsibility of the caller of dcpc_create_set() to adjust that 433 * array accordingly beforehand. 434 */ 435 for (i = 0; i < cpc_ncounters; i++) { 436 if (dcpc_actv_reqs[i] != NULL) 437 active_requests++; 438 } 439 440 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); 441 442 set->ks_req = 443 kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP); 444 445 set->ks_data = 446 kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP); 447 448 /* 449 * Look for valid entries in the active requests array and populate 450 * the request set for any entries found. 451 */ 452 for (i = 0; i < cpc_ncounters; i++) { 453 if (dcpc_actv_reqs[i] != NULL) { 454 dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno); 455 reqno++; 456 } 457 } 458 459 return (set); 460 } 461 462 static int 463 dcpc_program_cpu_event(cpu_t *c) 464 { 465 int i, j, subcode; 466 kcpc_ctx_t *ctx, *octx; 467 kcpc_set_t *set; 468 469 set = dcpc_create_set(c); 470 471 set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP); 472 ctx->kc_set = set; 473 ctx->kc_cpuid = c->cpu_id; 474 475 if (kcpc_assign_reqs(set, ctx) != 0) 476 goto err; 477 478 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) 479 goto err; 480 481 for (i = 0; i < set->ks_nreqs; i++) { 482 for (j = 0; j < cpc_ncounters; j++) { 483 if (dcpc_actv_reqs[j] != NULL && 484 strcmp(set->ks_req[i].kr_event, 485 dcpc_actv_reqs[j]->dcpc_event_name) == 0) { 486 dcpc_actv_reqs[j]->dcpc_picno = 487 set->ks_req[i].kr_picnum; 488 } 489 } 490 } 491 492 /* 493 * If we already have an active enabling then save the current cpc 494 * context away. 495 */ 496 octx = c->cpu_cpc_ctx; 497 498 kcpc_cpu_program(c, ctx); 499 500 if (octx != NULL) { 501 kcpc_set_t *oset = octx->kc_set; 502 kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t)); 503 kcpc_free_configs(oset); 504 kcpc_free_set(oset); 505 kcpc_ctx_free(octx); 506 } 507 508 return (0); 509 510 err: 511 /* 512 * We failed to configure this request up so free things up and 513 * get out. 514 */ 515 kcpc_free_configs(set); 516 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 517 kcpc_free_set(set); 518 kcpc_ctx_free(ctx); 519 520 return (-1); 521 } 522 523 static void 524 dcpc_disable_cpu(cpu_t *c) 525 { 526 kcpc_ctx_t *ctx; 527 kcpc_set_t *set; 528 529 /* 530 * Leave this CPU alone if it's already offline. 531 */ 532 if (c->cpu_flags & CPU_OFFLINE) 533 return; 534 535 /* 536 * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and 537 * changes it. 538 */ 539 ctx = c->cpu_cpc_ctx; 540 541 kcpc_cpu_stop(c, B_FALSE); 542 543 set = ctx->kc_set; 544 545 kcpc_free_configs(set); 546 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 547 kcpc_free_set(set); 548 kcpc_ctx_free(ctx); 549 } 550 551 /* 552 * The dcpc_*_interrupts() routines are responsible for manipulating the 553 * per-CPU dcpc interrupt state byte. The purpose of the state byte is to 554 * synchronize processing of hardware overflow interrupts wth configuration 555 * changes made to the CPU performance counter subsystem by the dcpc provider. 556 * 557 * The dcpc provider claims ownership of the overflow interrupt mechanism 558 * by transitioning the state byte from DCPC_INTR_INACTIVE (indicating the 559 * dcpc provider is not in use) to DCPC_INTR_FREE (the dcpc provider owns the 560 * overflow mechanism and interrupts may be processed). Before modifying 561 * a CPUs configuration state the state byte is transitioned from 562 * DCPC_INTR_FREE to DCPC_INTR_CONFIG ("configuration in process" state). 563 * The hardware overflow handler, kcpc_hw_overflow_intr(), will only process 564 * an interrupt when a configuration is not in process (i.e. the state is 565 * marked as free). During interrupt processing the state is set to 566 * DCPC_INTR_PROCESSING by the overflow handler. When the last dcpc based 567 * enabling is removed, the state byte is set to DCPC_INTR_INACTIVE to indicate 568 * the dcpc provider is no longer interested in overflow interrupts. 569 */ 570 static void 571 dcpc_block_interrupts(void) 572 { 573 cpu_t *c = cpu_list; 574 uint8_t *state; 575 576 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); 577 578 do { 579 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 580 581 while (atomic_cas_8(state, DCPC_INTR_FREE, 582 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 583 continue; 584 585 } while ((c = c->cpu_next) != cpu_list); 586 } 587 588 /* 589 * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that 590 * overflow interrupts can be processed safely. 591 */ 592 static void 593 dcpc_release_interrupts(void) 594 { 595 cpu_t *c = cpu_list; 596 597 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); 598 599 do { 600 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; 601 membar_producer(); 602 } while ((c = c->cpu_next) != cpu_list); 603 } 604 605 /* 606 * Transition all CPUs dcpc interrupt state from DCPC_INTR_INACTIVE to 607 * to DCPC_INTR_FREE. This indicates that the dcpc provider is now 608 * responsible for handling all overflow interrupt activity. Should only be 609 * called before enabling the first dcpc based probe. 610 */ 611 static void 612 dcpc_claim_interrupts(void) 613 { 614 cpu_t *c = cpu_list; 615 616 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state == DCPC_INTR_INACTIVE); 617 618 do { 619 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; 620 membar_producer(); 621 } while ((c = c->cpu_next) != cpu_list); 622 } 623 624 /* 625 * Set all CPUs dcpc interrupt state to DCPC_INTR_INACTIVE to indicate that 626 * the dcpc provider is no longer processing overflow interrupts. Only called 627 * during removal of the last dcpc based enabling. 628 */ 629 static void 630 dcpc_surrender_interrupts(void) 631 { 632 cpu_t *c = cpu_list; 633 634 ASSERT(cpu_core[c->cpu_id].cpuc_dcpc_intr_state != DCPC_INTR_INACTIVE); 635 636 do { 637 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_INACTIVE; 638 membar_producer(); 639 } while ((c = c->cpu_next) != cpu_list); 640 } 641 642 /* 643 * dcpc_program_event() can be called owing to a new enabling or if a multi 644 * overflow platform has disabled a request but needs to program the requests 645 * that are still valid. 646 * 647 * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t 648 * and a new request set which contains the new enabling and any old enablings 649 * which are still valid (possible with multi-overflow platforms). 650 */ 651 static int 652 dcpc_program_event(dcpc_probe_t *pp) 653 { 654 cpu_t *c; 655 int ret = 0; 656 657 ASSERT(MUTEX_HELD(&cpu_lock)); 658 659 kpreempt_disable(); 660 661 dcpc_block_interrupts(); 662 663 c = cpu_list; 664 665 do { 666 /* 667 * Skip CPUs that are currently offline. 668 */ 669 if (c->cpu_flags & CPU_OFFLINE) 670 continue; 671 672 /* 673 * Stop counters but preserve existing DTrace CPC context 674 * if there is one. 675 * 676 * If we come here when the first event is programmed for a CPU, 677 * there should be no DTrace CPC context installed. In this 678 * case, kcpc_cpu_stop() will ensure that there is no other 679 * context on the CPU. 680 * 681 * If we add new enabling to the original one, the CPU should 682 * have the old DTrace CPC context which we need to keep around 683 * since dcpc_program_event() will add to it. 684 */ 685 if (c->cpu_cpc_ctx != NULL) 686 kcpc_cpu_stop(c, B_TRUE); 687 } while ((c = c->cpu_next) != cpu_list); 688 689 dcpc_release_interrupts(); 690 691 /* 692 * If this enabling is being removed (in the case of a multi event 693 * capable system with more than one active enabling), we can now 694 * update the active request array to reflect the enablings that need 695 * to be reprogrammed. 696 */ 697 if (pp->dcpc_disabling == 1) 698 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 699 700 do { 701 /* 702 * Skip CPUs that are currently offline. 703 */ 704 if (c->cpu_flags & CPU_OFFLINE) 705 continue; 706 707 ret = dcpc_program_cpu_event(c); 708 } while ((c = c->cpu_next) != cpu_list && ret == 0); 709 710 /* 711 * If dcpc_program_cpu_event() fails then it is because we couldn't 712 * configure the requests in the set for the CPU and not because of 713 * an error programming the hardware. If we have a failure here then 714 * we assume no CPUs have been programmed in the above step as they 715 * are all configured identically. 716 */ 717 if (ret != 0) { 718 pp->dcpc_enabled = 0; 719 kpreempt_enable(); 720 return (-1); 721 } 722 723 if (pp->dcpc_disabling != 1) 724 pp->dcpc_enabled = 1; 725 726 kpreempt_enable(); 727 728 return (0); 729 } 730 731 /*ARGSUSED*/ 732 static int 733 dcpc_enable(void *arg, dtrace_id_t id, void *parg) 734 { 735 dcpc_probe_t *pp = parg; 736 int i, found = 0; 737 cpu_t *c; 738 739 ASSERT(MUTEX_HELD(&cpu_lock)); 740 741 /* 742 * Bail out if the counters are being used by a libcpc consumer. 743 */ 744 rw_enter(&kcpc_cpuctx_lock, RW_READER); 745 if (kcpc_cpuctx > 0) { 746 rw_exit(&kcpc_cpuctx_lock); 747 return (-1); 748 } 749 750 dtrace_cpc_in_use++; 751 rw_exit(&kcpc_cpuctx_lock); 752 753 /* 754 * Locate this enabling in the first free entry of the active 755 * request array. 756 */ 757 for (i = 0; i < cpc_ncounters; i++) { 758 if (dcpc_actv_reqs[i] == NULL) { 759 dcpc_actv_reqs[i] = pp; 760 pp->dcpc_actv_req_idx = i; 761 found = 1; 762 break; 763 } 764 } 765 766 /* 767 * If we couldn't find a slot for this probe then there is no 768 * room at the inn. 769 */ 770 if (!found) { 771 dtrace_cpc_in_use--; 772 return (-1); 773 } 774 775 ASSERT(pp->dcpc_actv_req_idx >= 0); 776 777 /* 778 * DTrace is taking over CPC contexts, so stop collecting 779 * capacity/utilization data for all CPUs. 780 */ 781 if (dtrace_cpc_in_use == 1) 782 cu_disable(); 783 784 /* 785 * The following must hold true if we are to (attempt to) enable 786 * this request: 787 * 788 * 1) No enablings currently exist. We allow all platforms to 789 * proceed if this is true. 790 * 791 * OR 792 * 793 * 2) If the platform is multi overflow capable and there are 794 * less valid enablings than there are counters. There is no 795 * guarantee that a platform can accommodate as many events as 796 * it has counters for but we will at least try to program 797 * up to that many requests. 798 * 799 * The 'dcpc_enablings' variable is implictly protected by locking 800 * provided by the DTrace framework and the cpu management framework. 801 */ 802 if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap && 803 dcpc_enablings < cpc_ncounters)) { 804 /* 805 * Before attempting to program the first enabling we need to 806 * invalidate any lwp-based contexts and lay claim to the 807 * overflow interrupt mechanism. 808 */ 809 if (dcpc_enablings == 0) { 810 kcpc_invalidate_all(); 811 dcpc_claim_interrupts(); 812 } 813 814 if (dcpc_program_event(pp) == 0) { 815 dcpc_enablings++; 816 return (0); 817 } 818 } 819 820 /* 821 * If active enablings existed before we failed to enable this probe 822 * on a multi event capable platform then we need to restart counters 823 * as they will have been stopped in the attempted configuration. The 824 * context should now just contain the request prior to this failed 825 * enabling. 826 */ 827 if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) { 828 c = cpu_list; 829 830 ASSERT(dcpc_mult_ovf_cap == 1); 831 do { 832 /* 833 * Skip CPUs that are currently offline. 834 */ 835 if (c->cpu_flags & CPU_OFFLINE) 836 continue; 837 838 kcpc_cpu_program(c, c->cpu_cpc_ctx); 839 } while ((c = c->cpu_next) != cpu_list); 840 } 841 842 /* 843 * Give up any claim to the overflow interrupt mechanism if no 844 * dcpc based enablings exist. 845 */ 846 if (dcpc_enablings == 0) 847 dcpc_surrender_interrupts(); 848 849 dtrace_cpc_in_use--; 850 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 851 pp->dcpc_actv_req_idx = pp->dcpc_picno = -1; 852 853 /* 854 * If all probes are removed, enable capacity/utilization data 855 * collection for every CPU. 856 */ 857 if (dtrace_cpc_in_use == 0) 858 cu_enable(); 859 860 return (-1); 861 } 862 863 /* 864 * If only one enabling is active then remove the context and free 865 * everything up. If there are multiple enablings active then remove this 866 * one, its associated meta-data and re-program the hardware. 867 */ 868 /*ARGSUSED*/ 869 static void 870 dcpc_disable(void *arg, dtrace_id_t id, void *parg) 871 { 872 cpu_t *c; 873 dcpc_probe_t *pp = parg; 874 875 ASSERT(MUTEX_HELD(&cpu_lock)); 876 877 kpreempt_disable(); 878 879 /* 880 * This probe didn't actually make it as far as being fully enabled 881 * so we needn't do anything with it. 882 */ 883 if (pp->dcpc_enabled == 0) { 884 /* 885 * If we actually allocated this request a slot in the 886 * request array but failed to enabled it then remove the 887 * entry in the array. 888 */ 889 if (pp->dcpc_actv_req_idx >= 0) { 890 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 891 pp->dcpc_actv_req_idx = pp->dcpc_picno = 892 pp->dcpc_disabling = -1; 893 } 894 895 kpreempt_enable(); 896 return; 897 } 898 899 /* 900 * If this is the only enabling then stop all the counters and 901 * free up the meta-data. 902 */ 903 if (dcpc_enablings == 1) { 904 ASSERT(dtrace_cpc_in_use == 1); 905 906 dcpc_block_interrupts(); 907 908 c = cpu_list; 909 910 do { 911 dcpc_disable_cpu(c); 912 } while ((c = c->cpu_next) != cpu_list); 913 914 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 915 dcpc_surrender_interrupts(); 916 } else { 917 /* 918 * This platform can support multiple overflow events and 919 * the enabling being disabled is not the last one. Remove this 920 * enabling and re-program the hardware with the new config. 921 */ 922 ASSERT(dcpc_mult_ovf_cap); 923 ASSERT(dcpc_enablings > 1); 924 925 pp->dcpc_disabling = 1; 926 (void) dcpc_program_event(pp); 927 } 928 929 kpreempt_enable(); 930 931 dcpc_enablings--; 932 dtrace_cpc_in_use--; 933 pp->dcpc_enabled = 0; 934 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 935 936 /* 937 * If all probes are removed, enable capacity/utilization data 938 * collection for every CPU 939 */ 940 if (dtrace_cpc_in_use == 0) 941 cu_enable(); 942 } 943 944 /*ARGSUSED*/ 945 static int 946 dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg) 947 { 948 cpu_t *c; 949 uint8_t *state; 950 951 ASSERT(MUTEX_HELD(&cpu_lock)); 952 953 switch (what) { 954 case CPU_OFF: 955 /* 956 * Offline CPUs are not allowed to take part so remove this 957 * CPU if we are actively tracing. 958 */ 959 if (dtrace_cpc_in_use) { 960 c = cpu_get(cpu); 961 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 962 963 /* 964 * Indicate that a configuration is in process in 965 * order to stop overflow interrupts being processed 966 * on this CPU while we disable it. 967 */ 968 while (atomic_cas_8(state, DCPC_INTR_FREE, 969 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 970 continue; 971 972 dcpc_disable_cpu(c); 973 974 /* 975 * Reset this CPUs interrupt state as the configuration 976 * has ended. 977 */ 978 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = 979 DCPC_INTR_FREE; 980 membar_producer(); 981 } 982 break; 983 984 case CPU_ON: 985 case CPU_SETUP: 986 /* 987 * This CPU is being initialized or brought online so program 988 * it with the current request set if we are actively tracing. 989 */ 990 if (dtrace_cpc_in_use) { 991 c = cpu_get(cpu); 992 (void) dcpc_program_cpu_event(c); 993 } 994 break; 995 996 default: 997 break; 998 } 999 1000 return (0); 1001 } 1002 1003 static dtrace_pattr_t dcpc_attr = { 1004 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1005 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 1006 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 1007 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, 1008 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 1009 }; 1010 1011 static dtrace_pops_t dcpc_pops = { 1012 dcpc_provide, 1013 NULL, 1014 dcpc_enable, 1015 dcpc_disable, 1016 NULL, 1017 NULL, 1018 NULL, 1019 NULL, 1020 dcpc_mode, 1021 dcpc_destroy 1022 }; 1023 1024 /*ARGSUSED*/ 1025 static int 1026 dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 1027 { 1028 return (0); 1029 } 1030 1031 /*ARGSUSED*/ 1032 static int 1033 dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1034 { 1035 int error; 1036 1037 switch (infocmd) { 1038 case DDI_INFO_DEVT2DEVINFO: 1039 *result = (void *)dcpc_devi; 1040 error = DDI_SUCCESS; 1041 break; 1042 case DDI_INFO_DEVT2INSTANCE: 1043 *result = (void *)0; 1044 error = DDI_SUCCESS; 1045 break; 1046 default: 1047 error = DDI_FAILURE; 1048 } 1049 return (error); 1050 } 1051 1052 static int 1053 dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1054 { 1055 switch (cmd) { 1056 case DDI_DETACH: 1057 break; 1058 case DDI_SUSPEND: 1059 return (DDI_SUCCESS); 1060 default: 1061 return (DDI_FAILURE); 1062 } 1063 1064 if (dtrace_unregister(dcpc_pid) != 0) 1065 return (DDI_FAILURE); 1066 1067 ddi_remove_minor_node(devi, NULL); 1068 1069 mutex_enter(&cpu_lock); 1070 unregister_cpu_setup_func(dcpc_cpu_setup, NULL); 1071 mutex_exit(&cpu_lock); 1072 1073 kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *)); 1074 1075 kcpc_unregister_dcpc(); 1076 1077 return (DDI_SUCCESS); 1078 } 1079 1080 static int 1081 dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1082 { 1083 uint_t caps; 1084 char *attrs; 1085 1086 switch (cmd) { 1087 case DDI_ATTACH: 1088 break; 1089 case DDI_RESUME: 1090 return (DDI_SUCCESS); 1091 default: 1092 return (DDI_FAILURE); 1093 } 1094 1095 if (kcpc_pcbe_loaded() == -1) 1096 return (DDI_FAILURE); 1097 1098 caps = kcpc_pcbe_capabilities(); 1099 1100 if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) { 1101 cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\ 1102 " on this processor"); 1103 return (DDI_FAILURE); 1104 } 1105 1106 if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0, 1107 DDI_PSEUDO, 0) == DDI_FAILURE || 1108 dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL, 1109 NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) { 1110 ddi_remove_minor_node(devi, NULL); 1111 return (DDI_FAILURE); 1112 } 1113 1114 mutex_enter(&cpu_lock); 1115 register_cpu_setup_func(dcpc_cpu_setup, NULL); 1116 mutex_exit(&cpu_lock); 1117 1118 dcpc_ovf_mask = (1 << cpc_ncounters) - 1; 1119 ASSERT(dcpc_ovf_mask != 0); 1120 1121 if (caps & CPC_CAP_OVERFLOW_PRECISE) 1122 dcpc_mult_ovf_cap = 1; 1123 1124 /* 1125 * Determine which, if any, mask attribute the back-end can use. 1126 */ 1127 attrs = kcpc_list_attrs(); 1128 if (strstr(attrs, "umask") != NULL) 1129 dcpc_mask_type |= DCPC_UMASK; 1130 else if (strstr(attrs, "emask") != NULL) 1131 dcpc_mask_type |= DCPC_EMASK; 1132 1133 /* 1134 * The dcpc_actv_reqs array is used to store the requests that 1135 * we currently have programmed. The order of requests in this 1136 * array is not necessarily the order that the event appears in 1137 * the kcpc_request_t array. Once entered into a slot in the array 1138 * the entry is not moved until it's removed. 1139 */ 1140 dcpc_actv_reqs = 1141 kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP); 1142 1143 dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 1144 DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT); 1145 1146 kcpc_register_dcpc(dcpc_fire); 1147 1148 ddi_report_dev(devi); 1149 dcpc_devi = devi; 1150 1151 return (DDI_SUCCESS); 1152 } 1153 1154 static struct cb_ops dcpc_cb_ops = { 1155 dcpc_open, /* open */ 1156 nodev, /* close */ 1157 nulldev, /* strategy */ 1158 nulldev, /* print */ 1159 nodev, /* dump */ 1160 nodev, /* read */ 1161 nodev, /* write */ 1162 nodev, /* ioctl */ 1163 nodev, /* devmap */ 1164 nodev, /* mmap */ 1165 nodev, /* segmap */ 1166 nochpoll, /* poll */ 1167 ddi_prop_op, /* cb_prop_op */ 1168 0, /* streamtab */ 1169 D_NEW | D_MP /* Driver compatibility flag */ 1170 }; 1171 1172 static struct dev_ops dcpc_ops = { 1173 DEVO_REV, /* devo_rev, */ 1174 0, /* refcnt */ 1175 dcpc_info, /* get_dev_info */ 1176 nulldev, /* identify */ 1177 nulldev, /* probe */ 1178 dcpc_attach, /* attach */ 1179 dcpc_detach, /* detach */ 1180 nodev, /* reset */ 1181 &dcpc_cb_ops, /* driver operations */ 1182 NULL, /* bus operations */ 1183 nodev, /* dev power */ 1184 ddi_quiesce_not_needed /* quiesce */ 1185 }; 1186 1187 /* 1188 * Module linkage information for the kernel. 1189 */ 1190 static struct modldrv modldrv = { 1191 &mod_driverops, /* module type */ 1192 "DTrace CPC Module", /* name of module */ 1193 &dcpc_ops, /* driver ops */ 1194 }; 1195 1196 static struct modlinkage modlinkage = { 1197 MODREV_1, 1198 (void *)&modldrv, 1199 NULL 1200 }; 1201 1202 int 1203 _init(void) 1204 { 1205 return (mod_install(&modlinkage)); 1206 } 1207 1208 int 1209 _info(struct modinfo *modinfop) 1210 { 1211 return (mod_info(&modlinkage, modinfop)); 1212 } 1213 1214 int 1215 _fini(void) 1216 { 1217 return (mod_remove(&modlinkage)); 1218 } 1219