1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/errno.h> 28 #include <sys/cpuvar.h> 29 #include <sys/stat.h> 30 #include <sys/modctl.h> 31 #include <sys/cmn_err.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/ksynch.h> 35 #include <sys/conf.h> 36 #include <sys/kmem.h> 37 #include <sys/kcpc.h> 38 #include <sys/cap_util.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/cpc_impl.h> 41 #include <sys/dtrace_impl.h> 42 43 /* 44 * DTrace CPU Performance Counter Provider 45 * --------------------------------------- 46 * 47 * The DTrace cpc provider allows DTrace consumers to access the CPU 48 * performance counter overflow mechanism of a CPU. The configuration 49 * presented in a probe specification is programmed into the performance 50 * counter hardware of all available CPUs on a system. Programming the 51 * hardware causes a counter on each CPU to begin counting events of the 52 * given type. When the specified number of events have occurred, an overflow 53 * interrupt will be generated and the probe is fired. 54 * 55 * The required configuration for the performance counter is encoded into 56 * the probe specification and this includes the performance counter event 57 * name, processor mode, overflow rate and an optional unit mask. 58 * 59 * Most processors provide several counters (PICs) which can count all or a 60 * subset of the events available for a given CPU. However, when overflow 61 * profiling is being used, not all CPUs can detect which counter generated the 62 * overflow interrupt. In this case we cannot reliably determine which counter 63 * overflowed and we therefore only allow such CPUs to configure one event at 64 * a time. Processors that can determine the counter which overflowed are 65 * allowed to program as many events at one time as possible (in theory up to 66 * the number of instrumentation counters supported by that platform). 67 * Therefore, multiple consumers can enable multiple probes at the same time 68 * on such platforms. Platforms which cannot determine the source of an 69 * overflow interrupt are only allowed to program a single event at one time. 70 * 71 * The performance counter hardware is made available to consumers on a 72 * first-come, first-served basis. Only a finite amount of hardware resource 73 * is available and, while we make every attempt to accomodate requests from 74 * consumers, we must deny requests when hardware resources have been exhausted. 75 * A consumer will fail to enable probes when resources are currently in use. 76 * 77 * The cpc provider contends for shared hardware resources along with other 78 * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)). 79 * Only one such consumer can use the performance counters at any one time and 80 * counters are made available on a first-come, first-served basis. As with 81 * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g. 82 * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP 83 * counter contexts to be invalidated. 84 */ 85 86 typedef struct dcpc_probe { 87 char dcpc_event_name[CPC_MAX_EVENT_LEN]; 88 int dcpc_flag; /* flags (USER/SYS) */ 89 uint32_t dcpc_ovfval; /* overflow value */ 90 int64_t dcpc_umask; /* umask/emask for this event */ 91 int dcpc_picno; /* pic this event is programmed in */ 92 int dcpc_enabled; /* probe is actually enabled? */ 93 int dcpc_disabling; /* probe is currently being disabled */ 94 dtrace_id_t dcpc_id; /* probeid this request is enabling */ 95 int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */ 96 } dcpc_probe_t; 97 98 static dev_info_t *dcpc_devi; 99 static dtrace_provider_id_t dcpc_pid; 100 static dcpc_probe_t **dcpc_actv_reqs; 101 static uint32_t dcpc_enablings = 0; 102 static int dcpc_ovf_mask = 0; 103 static int dcpc_mult_ovf_cap = 0; 104 static int dcpc_mask_type = 0; 105 106 /* 107 * When the dcpc provider is loaded, dcpc_min_overflow is set to either 108 * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in 109 * the dcpc.conf file. Decrease this value to set probes with smaller 110 * overflow values. Remember that very small values could render a system 111 * unusable with frequently occurring events. 112 */ 113 #define DCPC_MIN_OVF_DEFAULT 5000 114 static uint32_t dcpc_min_overflow; 115 116 static int dcpc_aframes = 0; /* override for artificial frame setting */ 117 #if defined(__x86) 118 #define DCPC_ARTIFICIAL_FRAMES 8 119 #elif defined(__sparc) 120 #define DCPC_ARTIFICIAL_FRAMES 2 121 #endif 122 123 /* 124 * Called from the platform overflow interrupt handler. 'bitmap' is a mask 125 * which contains the pic(s) that have overflowed. 126 */ 127 static void 128 dcpc_fire(uint64_t bitmap) 129 { 130 int i; 131 132 /* 133 * No counter was marked as overflowing. Shout about it and get out. 134 */ 135 if ((bitmap & dcpc_ovf_mask) == 0) { 136 cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n"); 137 return; 138 } 139 140 /* 141 * This is the common case of a processor that doesn't support 142 * multiple overflow events. Such systems are only allowed a single 143 * enabling and therefore we just look for the first entry in 144 * the active request array. 145 */ 146 if (!dcpc_mult_ovf_cap) { 147 for (i = 0; i < cpc_ncounters; i++) { 148 if (dcpc_actv_reqs[i] != NULL) { 149 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 150 CPU->cpu_cpcprofile_pc, 151 CPU->cpu_cpcprofile_upc, 0, 0, 0); 152 return; 153 } 154 } 155 return; 156 } 157 158 /* 159 * This is a processor capable of handling multiple overflow events. 160 * Iterate over the array of active requests and locate the counters 161 * that overflowed (note: it is possible for more than one counter to 162 * have overflowed at the same time). 163 */ 164 for (i = 0; i < cpc_ncounters; i++) { 165 if (dcpc_actv_reqs[i] != NULL && 166 (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) { 167 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 168 CPU->cpu_cpcprofile_pc, 169 CPU->cpu_cpcprofile_upc, 0, 0, 0); 170 } 171 } 172 } 173 174 static void 175 dcpc_create_probe(dtrace_provider_id_t id, const char *probename, 176 char *eventname, int64_t umask, uint32_t ovfval, char flag) 177 { 178 dcpc_probe_t *pp; 179 int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); 180 181 if (dcpc_aframes) 182 nr_frames = dcpc_aframes; 183 184 if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0) 185 return; 186 187 pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP); 188 (void) strncpy(pp->dcpc_event_name, eventname, 189 sizeof (pp->dcpc_event_name) - 1); 190 pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0'; 191 pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT; 192 pp->dcpc_ovfval = ovfval; 193 pp->dcpc_umask = umask; 194 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 195 196 pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename, 197 nr_frames, pp); 198 } 199 200 /*ARGSUSED*/ 201 static void 202 dcpc_provide(void *arg, const dtrace_probedesc_t *desc) 203 { 204 /* 205 * The format of a probe is: 206 * 207 * event_name-mode-{optional_umask}-overflow_rate 208 * e.g. 209 * DC_refill_from_system-user-0x1e-50000, or, 210 * DC_refill_from_system-all-10000 211 * 212 */ 213 char *str, *end, *p; 214 int i, flag = 0; 215 char event[CPC_MAX_EVENT_LEN]; 216 long umask = -1, val = 0; 217 size_t evlen, len; 218 219 /* 220 * The 'cpc' provider offers no probes by default. 221 */ 222 if (desc == NULL) 223 return; 224 225 len = strlen(desc->dtpd_name); 226 p = str = kmem_alloc(len + 1, KM_SLEEP); 227 (void) strcpy(str, desc->dtpd_name); 228 229 /* 230 * We have a poor man's strtok() going on here. Replace any hyphens 231 * in the the probe name with NULL characters in order to make it 232 * easy to parse the string with regular string functions. 233 */ 234 for (i = 0; i < len; i++) { 235 if (str[i] == '-') 236 str[i] = '\0'; 237 } 238 239 /* 240 * The first part of the string must be either a platform event 241 * name or a generic event name. 242 */ 243 evlen = strlen(p); 244 (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1); 245 event[CPC_MAX_EVENT_LEN - 1] = '\0'; 246 247 /* 248 * The next part of the name is the mode specification. Valid 249 * settings are "user", "kernel" or "all". 250 */ 251 p += evlen + 1; 252 253 if (strcmp(p, "user") == 0) 254 flag |= CPC_COUNT_USER; 255 else if (strcmp(p, "kernel") == 0) 256 flag |= CPC_COUNT_SYSTEM; 257 else if (strcmp(p, "all") == 0) 258 flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM; 259 else 260 goto err; 261 262 /* 263 * Next we either have a mask specification followed by an overflow 264 * rate or just an overflow rate on its own. 265 */ 266 p += strlen(p) + 1; 267 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 268 /* 269 * A unit mask can only be specified if: 270 * 1) this performance counter back end supports masks. 271 * 2) the specified event is platform specific. 272 * 3) a valid hex number is converted. 273 * 4) no extraneous characters follow the mask specification. 274 */ 275 if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 && 276 ddi_strtol(p, &end, 16, &umask) == 0 && 277 end == p + strlen(p)) { 278 p += strlen(p) + 1; 279 } else { 280 goto err; 281 } 282 } 283 284 /* 285 * This final part must be an overflow value which has to be greater 286 * than the minimum permissible overflow rate. 287 */ 288 if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) || 289 val < dcpc_min_overflow) 290 goto err; 291 292 /* 293 * Validate the event and create the probe. 294 */ 295 for (i = 0; i < cpc_ncounters; i++) { 296 char *events, *cp, *p, *end; 297 int found = 0, j; 298 size_t llen; 299 300 if ((events = kcpc_list_events(i)) == NULL) 301 goto err; 302 303 llen = strlen(events); 304 p = cp = ddi_strdup(events, KM_NOSLEEP); 305 end = cp + llen; 306 307 for (j = 0; j < llen; j++) { 308 if (cp[j] == ',') 309 cp[j] = '\0'; 310 } 311 312 while (p < end && found == 0) { 313 if (strcmp(p, event) == 0) { 314 dcpc_create_probe(dcpc_pid, desc->dtpd_name, 315 event, umask, (uint32_t)val, flag); 316 found = 1; 317 } 318 p += strlen(p) + 1; 319 } 320 kmem_free(cp, llen + 1); 321 322 if (found) 323 break; 324 } 325 326 err: 327 kmem_free(str, len + 1); 328 } 329 330 /*ARGSUSED*/ 331 static void 332 dcpc_destroy(void *arg, dtrace_id_t id, void *parg) 333 { 334 dcpc_probe_t *pp = parg; 335 336 ASSERT(pp->dcpc_enabled == 0); 337 kmem_free(pp, sizeof (dcpc_probe_t)); 338 } 339 340 /*ARGSUSED*/ 341 static int 342 dcpc_usermode(void *arg, dtrace_id_t id, void *parg) 343 { 344 return (CPU->cpu_cpcprofile_pc == 0); 345 } 346 347 static void 348 dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno) 349 { 350 kcpc_set_t *oset; 351 int i; 352 353 (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name, 354 CPC_MAX_EVENT_LEN); 355 set->ks_req[reqno].kr_config = NULL; 356 set->ks_req[reqno].kr_index = reqno; 357 set->ks_req[reqno].kr_picnum = -1; 358 set->ks_req[reqno].kr_flags = pp->dcpc_flag; 359 360 /* 361 * If a unit mask has been specified then detect which attribute 362 * the platform needs. For now, it's either "umask" or "emask". 363 */ 364 if (pp->dcpc_umask >= 0) { 365 set->ks_req[reqno].kr_attr = 366 kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP); 367 set->ks_req[reqno].kr_nattrs = 1; 368 if (dcpc_mask_type & DCPC_UMASK) 369 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 370 "umask", 5); 371 else 372 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 373 "emask", 5); 374 set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask; 375 } else { 376 set->ks_req[reqno].kr_attr = NULL; 377 set->ks_req[reqno].kr_nattrs = 0; 378 } 379 380 /* 381 * If this probe is enabled, obtain its current countdown value 382 * and use that. The CPUs cpc context might not exist yet if we 383 * are dealing with a CPU that is just coming online. 384 */ 385 if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) { 386 oset = c->cpu_cpc_ctx->kc_set; 387 388 for (i = 0; i < oset->ks_nreqs; i++) { 389 if (strcmp(oset->ks_req[i].kr_event, 390 set->ks_req[reqno].kr_event) == 0) { 391 set->ks_req[reqno].kr_preset = 392 *(oset->ks_req[i].kr_data); 393 } 394 } 395 } else { 396 set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval; 397 } 398 399 set->ks_nreqs++; 400 } 401 402 403 /* 404 * Create a fresh request set for the enablings represented in the 405 * 'dcpc_actv_reqs' array which contains the probes we want to be 406 * in the set. This can be called for several reasons: 407 * 408 * 1) We are on a single or multi overflow platform and we have no 409 * current events so we can just create the set and initialize it. 410 * 2) We are on a multi-overflow platform and we already have one or 411 * more existing events and we are adding a new enabling. Create a 412 * new set and copy old requests in and then add the new request. 413 * 3) We are on a multi-overflow platform and we have just removed an 414 * enabling but we still have enablings whch are valid. Create a new 415 * set and copy in still valid requests. 416 */ 417 static kcpc_set_t * 418 dcpc_create_set(cpu_t *c) 419 { 420 int i, reqno = 0; 421 int active_requests = 0; 422 kcpc_set_t *set; 423 424 /* 425 * First get a count of the number of currently active requests. 426 * Note that dcpc_actv_reqs[] should always reflect which requests 427 * we want to be in the set that is to be created. It is the 428 * responsibility of the caller of dcpc_create_set() to adjust that 429 * array accordingly beforehand. 430 */ 431 for (i = 0; i < cpc_ncounters; i++) { 432 if (dcpc_actv_reqs[i] != NULL) 433 active_requests++; 434 } 435 436 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); 437 438 set->ks_req = 439 kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP); 440 441 set->ks_data = 442 kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP); 443 444 /* 445 * Look for valid entries in the active requests array and populate 446 * the request set for any entries found. 447 */ 448 for (i = 0; i < cpc_ncounters; i++) { 449 if (dcpc_actv_reqs[i] != NULL) { 450 dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno); 451 reqno++; 452 } 453 } 454 455 return (set); 456 } 457 458 static int 459 dcpc_program_cpu_event(cpu_t *c) 460 { 461 int i, j, subcode; 462 kcpc_ctx_t *ctx, *octx; 463 kcpc_set_t *set; 464 465 set = dcpc_create_set(c); 466 467 set->ks_ctx = ctx = kcpc_ctx_alloc(KM_SLEEP); 468 ctx->kc_set = set; 469 ctx->kc_cpuid = c->cpu_id; 470 471 if (kcpc_assign_reqs(set, ctx) != 0) 472 goto err; 473 474 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) 475 goto err; 476 477 for (i = 0; i < set->ks_nreqs; i++) { 478 for (j = 0; j < cpc_ncounters; j++) { 479 if (dcpc_actv_reqs[j] != NULL && 480 strcmp(set->ks_req[i].kr_event, 481 dcpc_actv_reqs[j]->dcpc_event_name) == 0) { 482 dcpc_actv_reqs[j]->dcpc_picno = 483 set->ks_req[i].kr_picnum; 484 } 485 } 486 } 487 488 /* 489 * If we already have an active enabling then save the current cpc 490 * context away. 491 */ 492 octx = c->cpu_cpc_ctx; 493 494 kcpc_cpu_program(c, ctx); 495 496 if (octx != NULL) { 497 kcpc_set_t *oset = octx->kc_set; 498 kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t)); 499 kcpc_free_set(oset); 500 kcpc_ctx_free(octx); 501 } 502 503 return (0); 504 505 err: 506 /* 507 * We failed to configure this request up so free things up and 508 * get out. 509 */ 510 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 511 kcpc_free_set(set); 512 kcpc_ctx_free(ctx); 513 514 return (-1); 515 } 516 517 static void 518 dcpc_disable_cpu(cpu_t *c) 519 { 520 kcpc_ctx_t *ctx; 521 kcpc_set_t *set; 522 523 /* 524 * Leave this CPU alone if it's already offline. 525 */ 526 if (c->cpu_flags & CPU_OFFLINE) 527 return; 528 529 /* 530 * Grab CPUs CPC context before kcpc_cpu_stop() stops counters and 531 * changes it. 532 */ 533 ctx = c->cpu_cpc_ctx; 534 535 kcpc_cpu_stop(c, B_FALSE); 536 537 set = ctx->kc_set; 538 539 kcpc_free_configs(set); 540 541 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 542 kcpc_free_set(set); 543 kcpc_ctx_free(ctx); 544 } 545 546 /* 547 * Stop overflow interrupts being actively processed so that per-CPU 548 * configuration state can be changed safely and correctly. Each CPU has a 549 * dcpc interrupt state byte which is transitioned from DCPC_INTR_FREE (the 550 * "free" state) to DCPC_INTR_CONFIG (the "configuration in process" state) 551 * before any configuration state is changed on any CPUs. The hardware overflow 552 * handler, kcpc_hw_overflow_intr(), will only process an interrupt when a 553 * configuration is not in process (i.e. the state is marked as free). During 554 * interrupt processing the state is set to DCPC_INTR_PROCESSING by the 555 * overflow handler. 556 */ 557 static void 558 dcpc_block_interrupts(void) 559 { 560 cpu_t *c; 561 uint8_t *state; 562 563 c = cpu_list; 564 565 do { 566 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 567 568 while (atomic_cas_8(state, DCPC_INTR_FREE, 569 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 570 continue; 571 572 } while ((c = c->cpu_next) != cpu_list); 573 } 574 575 /* 576 * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that 577 * overflow interrupts can be processed safely. 578 */ 579 static void 580 dcpc_release_interrupts(void) 581 { 582 cpu_t *c = cpu_list; 583 584 do { 585 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; 586 membar_producer(); 587 } while ((c = c->cpu_next) != cpu_list); 588 } 589 590 /* 591 * dcpc_program_event() can be called owing to a new enabling or if a multi 592 * overflow platform has disabled a request but needs to program the requests 593 * that are still valid. 594 * 595 * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t 596 * and a new request set which contains the new enabling and any old enablings 597 * which are still valid (possible with multi-overflow platforms). 598 */ 599 static int 600 dcpc_program_event(dcpc_probe_t *pp) 601 { 602 cpu_t *c; 603 int ret = 0; 604 605 ASSERT(MUTEX_HELD(&cpu_lock)); 606 607 kpreempt_disable(); 608 609 dcpc_block_interrupts(); 610 611 c = cpu_list; 612 613 do { 614 /* 615 * Skip CPUs that are currently offline. 616 */ 617 if (c->cpu_flags & CPU_OFFLINE) 618 continue; 619 620 /* 621 * Stop counters but preserve existing DTrace CPC context 622 * if there is one. 623 * 624 * If we come here when the first event is programmed for a CPU, 625 * there should be no DTrace CPC context installed. In this 626 * case, kcpc_cpu_stop() will ensure that there is no other 627 * context on the CPU. 628 * 629 * If we add new enabling to the original one, the CPU should 630 * have the old DTrace CPC context which we need to keep around 631 * since dcpc_program_event() will add to it. 632 */ 633 if (c->cpu_cpc_ctx != NULL) 634 kcpc_cpu_stop(c, B_TRUE); 635 } while ((c = c->cpu_next) != cpu_list); 636 637 dcpc_release_interrupts(); 638 639 /* 640 * If this enabling is being removed (in the case of a multi event 641 * capable system with more than one active enabling), we can now 642 * update the active request array to reflect the enablings that need 643 * to be reprogrammed. 644 */ 645 if (pp->dcpc_disabling == 1) 646 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 647 648 do { 649 /* 650 * Skip CPUs that are currently offline. 651 */ 652 if (c->cpu_flags & CPU_OFFLINE) 653 continue; 654 655 ret = dcpc_program_cpu_event(c); 656 } while ((c = c->cpu_next) != cpu_list && ret == 0); 657 658 /* 659 * If dcpc_program_cpu_event() fails then it is because we couldn't 660 * configure the requests in the set for the CPU and not because of 661 * an error programming the hardware. If we have a failure here then 662 * we assume no CPUs have been programmed in the above step as they 663 * are all configured identically. 664 */ 665 if (ret != 0) { 666 pp->dcpc_enabled = 0; 667 kpreempt_enable(); 668 return (-1); 669 } 670 671 if (pp->dcpc_disabling != 1) 672 pp->dcpc_enabled = 1; 673 674 kpreempt_enable(); 675 676 return (0); 677 } 678 679 /*ARGSUSED*/ 680 static int 681 dcpc_enable(void *arg, dtrace_id_t id, void *parg) 682 { 683 dcpc_probe_t *pp = parg; 684 int i, found = 0; 685 cpu_t *c; 686 687 ASSERT(MUTEX_HELD(&cpu_lock)); 688 689 /* 690 * Bail out if the counters are being used by a libcpc consumer. 691 */ 692 rw_enter(&kcpc_cpuctx_lock, RW_READER); 693 if (kcpc_cpuctx > 0) { 694 rw_exit(&kcpc_cpuctx_lock); 695 return (-1); 696 } 697 698 dtrace_cpc_in_use++; 699 rw_exit(&kcpc_cpuctx_lock); 700 701 /* 702 * Locate this enabling in the first free entry of the active 703 * request array. 704 */ 705 for (i = 0; i < cpc_ncounters; i++) { 706 if (dcpc_actv_reqs[i] == NULL) { 707 dcpc_actv_reqs[i] = pp; 708 pp->dcpc_actv_req_idx = i; 709 found = 1; 710 break; 711 } 712 } 713 714 /* 715 * If we couldn't find a slot for this probe then there is no 716 * room at the inn. 717 */ 718 if (!found) { 719 dtrace_cpc_in_use--; 720 return (-1); 721 } 722 723 ASSERT(pp->dcpc_actv_req_idx >= 0); 724 725 /* 726 * DTrace is taking over CPC contexts, so stop collecting 727 * capacity/utilization data for all CPUs. 728 */ 729 if (dtrace_cpc_in_use == 1) 730 cu_disable(); 731 732 /* 733 * The following must hold true if we are to (attempt to) enable 734 * this request: 735 * 736 * 1) No enablings currently exist. We allow all platforms to 737 * proceed if this is true. 738 * 739 * OR 740 * 741 * 2) If the platform is multi overflow capable and there are 742 * less valid enablings than there are counters. There is no 743 * guarantee that a platform can accommodate as many events as 744 * it has counters for but we will at least try to program 745 * up to that many requests. 746 * 747 * The 'dcpc_enablings' variable is implictly protected by locking 748 * provided by the DTrace framework and the cpu management framework. 749 */ 750 if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap && 751 dcpc_enablings < cpc_ncounters)) { 752 /* 753 * Before attempting to program the first enabling we need to 754 * invalidate any lwp-based contexts. 755 */ 756 if (dcpc_enablings == 0) 757 kcpc_invalidate_all(); 758 759 if (dcpc_program_event(pp) == 0) { 760 dcpc_enablings++; 761 return (0); 762 } 763 } 764 765 /* 766 * If active enablings existed before we failed to enable this probe 767 * on a multi event capable platform then we need to restart counters 768 * as they will have been stopped in the attempted configuration. The 769 * context should now just contain the request prior to this failed 770 * enabling. 771 */ 772 if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) { 773 c = cpu_list; 774 775 ASSERT(dcpc_mult_ovf_cap == 1); 776 do { 777 /* 778 * Skip CPUs that are currently offline. 779 */ 780 if (c->cpu_flags & CPU_OFFLINE) 781 continue; 782 783 kcpc_cpu_program(c, c->cpu_cpc_ctx); 784 } while ((c = c->cpu_next) != cpu_list); 785 } 786 787 dtrace_cpc_in_use--; 788 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 789 pp->dcpc_actv_req_idx = pp->dcpc_picno = -1; 790 791 /* 792 * If all probes are removed, enable capacity/utilization data 793 * collection for every CPU. 794 */ 795 if (dtrace_cpc_in_use == 0) 796 cu_enable(); 797 798 return (-1); 799 } 800 801 /* 802 * If only one enabling is active then remove the context and free 803 * everything up. If there are multiple enablings active then remove this 804 * one, its associated meta-data and re-program the hardware. 805 */ 806 /*ARGSUSED*/ 807 static void 808 dcpc_disable(void *arg, dtrace_id_t id, void *parg) 809 { 810 cpu_t *c; 811 dcpc_probe_t *pp = parg; 812 813 ASSERT(MUTEX_HELD(&cpu_lock)); 814 815 kpreempt_disable(); 816 817 /* 818 * This probe didn't actually make it as far as being fully enabled 819 * so we needn't do anything with it. 820 */ 821 if (pp->dcpc_enabled == 0) { 822 /* 823 * If we actually allocated this request a slot in the 824 * request array but failed to enabled it then remove the 825 * entry in the array. 826 */ 827 if (pp->dcpc_actv_req_idx >= 0) { 828 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 829 pp->dcpc_actv_req_idx = pp->dcpc_picno = 830 pp->dcpc_disabling = -1; 831 } 832 833 kpreempt_enable(); 834 return; 835 } 836 837 /* 838 * If this is the only enabling then stop all the counters and 839 * free up the meta-data. 840 */ 841 if (dcpc_enablings == 1) { 842 ASSERT(dtrace_cpc_in_use == 1); 843 844 dcpc_block_interrupts(); 845 846 c = cpu_list; 847 848 do { 849 dcpc_disable_cpu(c); 850 } while ((c = c->cpu_next) != cpu_list); 851 852 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 853 dcpc_release_interrupts(); 854 } else { 855 /* 856 * This platform can support multiple overflow events and 857 * the enabling being disabled is not the last one. Remove this 858 * enabling and re-program the hardware with the new config. 859 */ 860 ASSERT(dcpc_mult_ovf_cap); 861 ASSERT(dcpc_enablings > 1); 862 863 pp->dcpc_disabling = 1; 864 (void) dcpc_program_event(pp); 865 } 866 867 kpreempt_enable(); 868 869 dcpc_enablings--; 870 dtrace_cpc_in_use--; 871 pp->dcpc_enabled = 0; 872 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 873 874 /* 875 * If all probes are removed, enable capacity/utilization data 876 * collection for every CPU 877 */ 878 if (dtrace_cpc_in_use == 0) 879 cu_enable(); 880 } 881 882 /*ARGSUSED*/ 883 static int 884 dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg) 885 { 886 cpu_t *c; 887 uint8_t *state; 888 889 ASSERT(MUTEX_HELD(&cpu_lock)); 890 891 switch (what) { 892 case CPU_OFF: 893 /* 894 * Offline CPUs are not allowed to take part so remove this 895 * CPU if we are actively tracing. 896 */ 897 if (dtrace_cpc_in_use) { 898 c = cpu_get(cpu); 899 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 900 901 /* 902 * Indicate that a configuration is in process in 903 * order to stop overflow interrupts being processed 904 * on this CPU while we disable it. 905 */ 906 while (atomic_cas_8(state, DCPC_INTR_FREE, 907 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 908 continue; 909 910 dcpc_disable_cpu(c); 911 912 /* 913 * Reset this CPUs interrupt state as the configuration 914 * has ended. 915 */ 916 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = 917 DCPC_INTR_FREE; 918 membar_producer(); 919 } 920 break; 921 922 case CPU_ON: 923 case CPU_SETUP: 924 /* 925 * This CPU is being initialized or brought online so program 926 * it with the current request set if we are actively tracing. 927 */ 928 if (dtrace_cpc_in_use) { 929 c = cpu_get(cpu); 930 (void) dcpc_program_cpu_event(c); 931 } 932 break; 933 934 default: 935 break; 936 } 937 938 return (0); 939 } 940 941 static dtrace_pattr_t dcpc_attr = { 942 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 943 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 944 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 945 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, 946 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 947 }; 948 949 static dtrace_pops_t dcpc_pops = { 950 dcpc_provide, 951 NULL, 952 dcpc_enable, 953 dcpc_disable, 954 NULL, 955 NULL, 956 NULL, 957 NULL, 958 dcpc_usermode, 959 dcpc_destroy 960 }; 961 962 /*ARGSUSED*/ 963 static int 964 dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 965 { 966 return (0); 967 } 968 969 /*ARGSUSED*/ 970 static int 971 dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 972 { 973 int error; 974 975 switch (infocmd) { 976 case DDI_INFO_DEVT2DEVINFO: 977 *result = (void *)dcpc_devi; 978 error = DDI_SUCCESS; 979 break; 980 case DDI_INFO_DEVT2INSTANCE: 981 *result = (void *)0; 982 error = DDI_SUCCESS; 983 break; 984 default: 985 error = DDI_FAILURE; 986 } 987 return (error); 988 } 989 990 static int 991 dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 992 { 993 switch (cmd) { 994 case DDI_DETACH: 995 break; 996 case DDI_SUSPEND: 997 return (DDI_SUCCESS); 998 default: 999 return (DDI_FAILURE); 1000 } 1001 1002 if (dtrace_unregister(dcpc_pid) != 0) 1003 return (DDI_FAILURE); 1004 1005 ddi_remove_minor_node(devi, NULL); 1006 1007 mutex_enter(&cpu_lock); 1008 unregister_cpu_setup_func(dcpc_cpu_setup, NULL); 1009 mutex_exit(&cpu_lock); 1010 1011 kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *)); 1012 1013 kcpc_unregister_dcpc(); 1014 1015 return (DDI_SUCCESS); 1016 } 1017 1018 static int 1019 dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 1020 { 1021 uint_t caps; 1022 char *attrs; 1023 1024 switch (cmd) { 1025 case DDI_ATTACH: 1026 break; 1027 case DDI_RESUME: 1028 return (DDI_SUCCESS); 1029 default: 1030 return (DDI_FAILURE); 1031 } 1032 1033 if (kcpc_pcbe_loaded() == -1) 1034 return (DDI_FAILURE); 1035 1036 caps = kcpc_pcbe_capabilities(); 1037 1038 if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) { 1039 cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\ 1040 " on this processor"); 1041 return (DDI_FAILURE); 1042 } 1043 1044 if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0, 1045 DDI_PSEUDO, NULL) == DDI_FAILURE || 1046 dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL, 1047 NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) { 1048 ddi_remove_minor_node(devi, NULL); 1049 return (DDI_FAILURE); 1050 } 1051 1052 mutex_enter(&cpu_lock); 1053 register_cpu_setup_func(dcpc_cpu_setup, NULL); 1054 mutex_exit(&cpu_lock); 1055 1056 dcpc_ovf_mask = (1 << cpc_ncounters) - 1; 1057 ASSERT(dcpc_ovf_mask != 0); 1058 1059 if (caps & CPC_CAP_OVERFLOW_PRECISE) 1060 dcpc_mult_ovf_cap = 1; 1061 1062 /* 1063 * Determine which, if any, mask attribute the back-end can use. 1064 */ 1065 attrs = kcpc_list_attrs(); 1066 if (strstr(attrs, "umask") != NULL) 1067 dcpc_mask_type |= DCPC_UMASK; 1068 else if (strstr(attrs, "emask") != NULL) 1069 dcpc_mask_type |= DCPC_EMASK; 1070 1071 /* 1072 * The dcpc_actv_reqs array is used to store the requests that 1073 * we currently have programmed. The order of requests in this 1074 * array is not necessarily the order that the event appears in 1075 * the kcpc_request_t array. Once entered into a slot in the array 1076 * the entry is not moved until it's removed. 1077 */ 1078 dcpc_actv_reqs = 1079 kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP); 1080 1081 dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 1082 DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT); 1083 1084 kcpc_register_dcpc(dcpc_fire); 1085 1086 ddi_report_dev(devi); 1087 dcpc_devi = devi; 1088 1089 return (DDI_SUCCESS); 1090 } 1091 1092 static struct cb_ops dcpc_cb_ops = { 1093 dcpc_open, /* open */ 1094 nodev, /* close */ 1095 nulldev, /* strategy */ 1096 nulldev, /* print */ 1097 nodev, /* dump */ 1098 nodev, /* read */ 1099 nodev, /* write */ 1100 nodev, /* ioctl */ 1101 nodev, /* devmap */ 1102 nodev, /* mmap */ 1103 nodev, /* segmap */ 1104 nochpoll, /* poll */ 1105 ddi_prop_op, /* cb_prop_op */ 1106 0, /* streamtab */ 1107 D_NEW | D_MP /* Driver compatibility flag */ 1108 }; 1109 1110 static struct dev_ops dcpc_ops = { 1111 DEVO_REV, /* devo_rev, */ 1112 0, /* refcnt */ 1113 dcpc_info, /* get_dev_info */ 1114 nulldev, /* identify */ 1115 nulldev, /* probe */ 1116 dcpc_attach, /* attach */ 1117 dcpc_detach, /* detach */ 1118 nodev, /* reset */ 1119 &dcpc_cb_ops, /* driver operations */ 1120 NULL, /* bus operations */ 1121 nodev, /* dev power */ 1122 ddi_quiesce_not_needed /* quiesce */ 1123 }; 1124 1125 /* 1126 * Module linkage information for the kernel. 1127 */ 1128 static struct modldrv modldrv = { 1129 &mod_driverops, /* module type */ 1130 "DTrace CPC Module", /* name of module */ 1131 &dcpc_ops, /* driver ops */ 1132 }; 1133 1134 static struct modlinkage modlinkage = { 1135 MODREV_1, 1136 (void *)&modldrv, 1137 NULL 1138 }; 1139 1140 int 1141 _init(void) 1142 { 1143 return (mod_install(&modlinkage)); 1144 } 1145 1146 int 1147 _info(struct modinfo *modinfop) 1148 { 1149 return (mod_info(&modlinkage, modinfop)); 1150 } 1151 1152 int 1153 _fini(void) 1154 { 1155 return (mod_remove(&modlinkage)); 1156 } 1157