1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/errno.h> 28 #include <sys/cpuvar.h> 29 #include <sys/stat.h> 30 #include <sys/modctl.h> 31 #include <sys/cmn_err.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/ksynch.h> 35 #include <sys/conf.h> 36 #include <sys/kmem.h> 37 #include <sys/kcpc.h> 38 #include <sys/cpc_pcbe.h> 39 #include <sys/cpc_impl.h> 40 #include <sys/dtrace_impl.h> 41 42 /* 43 * DTrace CPU Performance Counter Provider 44 * --------------------------------------- 45 * 46 * The DTrace cpc provider allows DTrace consumers to access the CPU 47 * performance counter overflow mechanism of a CPU. The configuration 48 * presented in a probe specification is programmed into the performance 49 * counter hardware of all available CPUs on a system. Programming the 50 * hardware causes a counter on each CPU to begin counting events of the 51 * given type. When the specified number of events have occurred, an overflow 52 * interrupt will be generated and the probe is fired. 53 * 54 * The required configuration for the performance counter is encoded into 55 * the probe specification and this includes the performance counter event 56 * name, processor mode, overflow rate and an optional unit mask. 57 * 58 * Most processors provide several counters (PICs) which can count all or a 59 * subset of the events available for a given CPU. However, when overflow 60 * profiling is being used, not all CPUs can detect which counter generated the 61 * overflow interrupt. In this case we cannot reliably determine which counter 62 * overflowed and we therefore only allow such CPUs to configure one event at 63 * a time. Processors that can determine the counter which overflowed are 64 * allowed to program as many events at one time as possible (in theory up to 65 * the number of instrumentation counters supported by that platform). 66 * Therefore, multiple consumers can enable multiple probes at the same time 67 * on such platforms. Platforms which cannot determine the source of an 68 * overflow interrupt are only allowed to program a single event at one time. 69 * 70 * The performance counter hardware is made available to consumers on a 71 * first-come, first-served basis. Only a finite amount of hardware resource 72 * is available and, while we make every attempt to accomodate requests from 73 * consumers, we must deny requests when hardware resources have been exhausted. 74 * A consumer will fail to enable probes when resources are currently in use. 75 * 76 * The cpc provider contends for shared hardware resources along with other 77 * consumers of the kernel CPU performance counter subsystem (e.g. cpustat(1M)). 78 * Only one such consumer can use the performance counters at any one time and 79 * counters are made available on a first-come, first-served basis. As with 80 * cpustat, the cpc provider has priority over per-LWP libcpc usage (e.g. 81 * cputrack(1)). Invoking the cpc provider will cause all existing per-LWP 82 * counter contexts to be invalidated. 83 */ 84 85 typedef struct dcpc_probe { 86 char dcpc_event_name[CPC_MAX_EVENT_LEN]; 87 int dcpc_flag; /* flags (USER/SYS) */ 88 uint32_t dcpc_ovfval; /* overflow value */ 89 int64_t dcpc_umask; /* umask/emask for this event */ 90 int dcpc_picno; /* pic this event is programmed in */ 91 int dcpc_enabled; /* probe is actually enabled? */ 92 int dcpc_disabling; /* probe is currently being disabled */ 93 dtrace_id_t dcpc_id; /* probeid this request is enabling */ 94 int dcpc_actv_req_idx; /* idx into dcpc_actv_reqs[] */ 95 } dcpc_probe_t; 96 97 static dev_info_t *dcpc_devi; 98 static dtrace_provider_id_t dcpc_pid; 99 static dcpc_probe_t **dcpc_actv_reqs; 100 static uint32_t dcpc_enablings = 0; 101 static int dcpc_ovf_mask = 0; 102 static int dcpc_mult_ovf_cap = 0; 103 static int dcpc_mask_type = 0; 104 105 /* 106 * When the dcpc provider is loaded, dcpc_min_overflow is set to either 107 * DCPC_MIN_OVF_DEFAULT or the value that dcpc-min-overflow is set to in 108 * the dcpc.conf file. Decrease this value to set probes with smaller 109 * overflow values. Remember that very small values could render a system 110 * unusable with frequently occurring events. 111 */ 112 #define DCPC_MIN_OVF_DEFAULT 5000 113 static uint32_t dcpc_min_overflow; 114 115 static int dcpc_aframes = 0; /* override for artificial frame setting */ 116 #if defined(__x86) 117 #define DCPC_ARTIFICIAL_FRAMES 8 118 #elif defined(__sparc) 119 #define DCPC_ARTIFICIAL_FRAMES 2 120 #endif 121 122 /* 123 * Called from the platform overflow interrupt handler. 'bitmap' is a mask 124 * which contains the pic(s) that have overflowed. 125 */ 126 static void 127 dcpc_fire(uint64_t bitmap) 128 { 129 int i; 130 131 /* 132 * No counter was marked as overflowing. Shout about it and get out. 133 */ 134 if ((bitmap & dcpc_ovf_mask) == 0) { 135 cmn_err(CE_NOTE, "dcpc_fire: no counter overflow found\n"); 136 return; 137 } 138 139 /* 140 * This is the common case of a processor that doesn't support 141 * multiple overflow events. Such systems are only allowed a single 142 * enabling and therefore we just look for the first entry in 143 * the active request array. 144 */ 145 if (!dcpc_mult_ovf_cap) { 146 for (i = 0; i < cpc_ncounters; i++) { 147 if (dcpc_actv_reqs[i] != NULL) { 148 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 149 CPU->cpu_cpcprofile_pc, 150 CPU->cpu_cpcprofile_upc, 0, 0, 0); 151 return; 152 } 153 } 154 return; 155 } 156 157 /* 158 * This is a processor capable of handling multiple overflow events. 159 * Iterate over the array of active requests and locate the counters 160 * that overflowed (note: it is possible for more than one counter to 161 * have overflowed at the same time). 162 */ 163 for (i = 0; i < cpc_ncounters; i++) { 164 if (dcpc_actv_reqs[i] != NULL && 165 (bitmap & (1ULL << dcpc_actv_reqs[i]->dcpc_picno))) { 166 dtrace_probe(dcpc_actv_reqs[i]->dcpc_id, 167 CPU->cpu_cpcprofile_pc, 168 CPU->cpu_cpcprofile_upc, 0, 0, 0); 169 } 170 } 171 } 172 173 static void 174 dcpc_create_probe(dtrace_provider_id_t id, const char *probename, 175 char *eventname, int64_t umask, uint32_t ovfval, char flag) 176 { 177 dcpc_probe_t *pp; 178 int nr_frames = DCPC_ARTIFICIAL_FRAMES + dtrace_mach_aframes(); 179 180 if (dcpc_aframes) 181 nr_frames = dcpc_aframes; 182 183 if (dtrace_probe_lookup(id, NULL, NULL, probename) != 0) 184 return; 185 186 pp = kmem_zalloc(sizeof (dcpc_probe_t), KM_SLEEP); 187 (void) strncpy(pp->dcpc_event_name, eventname, 188 sizeof (pp->dcpc_event_name) - 1); 189 pp->dcpc_event_name[sizeof (pp->dcpc_event_name) - 1] = '\0'; 190 pp->dcpc_flag = flag | CPC_OVF_NOTIFY_EMT; 191 pp->dcpc_ovfval = ovfval; 192 pp->dcpc_umask = umask; 193 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 194 195 pp->dcpc_id = dtrace_probe_create(id, NULL, NULL, probename, 196 nr_frames, pp); 197 } 198 199 /*ARGSUSED*/ 200 static void 201 dcpc_provide(void *arg, const dtrace_probedesc_t *desc) 202 { 203 /* 204 * The format of a probe is: 205 * 206 * event_name-mode-{optional_umask}-overflow_rate 207 * e.g. 208 * DC_refill_from_system-user-0x1e-50000, or, 209 * DC_refill_from_system-all-10000 210 * 211 */ 212 char *str, *end, *p; 213 int i, flag = 0; 214 char event[CPC_MAX_EVENT_LEN]; 215 long umask = -1, val = 0; 216 size_t evlen, len; 217 218 /* 219 * The 'cpc' provider offers no probes by default. 220 */ 221 if (desc == NULL) 222 return; 223 224 len = strlen(desc->dtpd_name); 225 p = str = kmem_alloc(len + 1, KM_SLEEP); 226 (void) strcpy(str, desc->dtpd_name); 227 228 /* 229 * We have a poor man's strtok() going on here. Replace any hyphens 230 * in the the probe name with NULL characters in order to make it 231 * easy to parse the string with regular string functions. 232 */ 233 for (i = 0; i < len; i++) { 234 if (str[i] == '-') 235 str[i] = '\0'; 236 } 237 238 /* 239 * The first part of the string must be either a platform event 240 * name or a generic event name. 241 */ 242 evlen = strlen(p); 243 (void) strncpy(event, p, CPC_MAX_EVENT_LEN - 1); 244 event[CPC_MAX_EVENT_LEN - 1] = '\0'; 245 246 /* 247 * The next part of the name is the mode specification. Valid 248 * settings are "user", "kernel" or "all". 249 */ 250 p += evlen + 1; 251 252 if (strcmp(p, "user") == 0) 253 flag |= CPC_COUNT_USER; 254 else if (strcmp(p, "kernel") == 0) 255 flag |= CPC_COUNT_SYSTEM; 256 else if (strcmp(p, "all") == 0) 257 flag |= CPC_COUNT_USER | CPC_COUNT_SYSTEM; 258 else 259 goto err; 260 261 /* 262 * Next we either have a mask specification followed by an overflow 263 * rate or just an overflow rate on its own. 264 */ 265 p += strlen(p) + 1; 266 if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 267 /* 268 * A unit mask can only be specified if: 269 * 1) this performance counter back end supports masks. 270 * 2) the specified event is platform specific. 271 * 3) a valid hex number is converted. 272 * 4) no extraneous characters follow the mask specification. 273 */ 274 if (dcpc_mask_type != 0 && strncmp(event, "PAPI", 4) != 0 && 275 ddi_strtol(p, &end, 16, &umask) == 0 && 276 end == p + strlen(p)) { 277 p += strlen(p) + 1; 278 } else { 279 goto err; 280 } 281 } 282 283 /* 284 * This final part must be an overflow value which has to be greater 285 * than the minimum permissible overflow rate. 286 */ 287 if ((ddi_strtol(p, &end, 10, &val) != 0) || end != p + strlen(p) || 288 val < dcpc_min_overflow) 289 goto err; 290 291 /* 292 * Validate the event and create the probe. 293 */ 294 for (i = 0; i < cpc_ncounters; i++) { 295 char *events, *cp, *p, *end; 296 int found = 0, j; 297 size_t llen; 298 299 if ((events = kcpc_list_events(i)) == NULL) 300 goto err; 301 302 llen = strlen(events); 303 p = cp = ddi_strdup(events, KM_NOSLEEP); 304 end = cp + llen; 305 306 for (j = 0; j < llen; j++) { 307 if (cp[j] == ',') 308 cp[j] = '\0'; 309 } 310 311 while (p < end && found == 0) { 312 if (strcmp(p, event) == 0) { 313 dcpc_create_probe(dcpc_pid, desc->dtpd_name, 314 event, umask, (uint32_t)val, flag); 315 found = 1; 316 } 317 p += strlen(p) + 1; 318 } 319 kmem_free(cp, llen + 1); 320 321 if (found) 322 break; 323 } 324 325 err: 326 kmem_free(str, len + 1); 327 } 328 329 /*ARGSUSED*/ 330 static void 331 dcpc_destroy(void *arg, dtrace_id_t id, void *parg) 332 { 333 dcpc_probe_t *pp = parg; 334 335 ASSERT(pp->dcpc_enabled == 0); 336 kmem_free(pp, sizeof (dcpc_probe_t)); 337 } 338 339 /*ARGSUSED*/ 340 static int 341 dcpc_usermode(void *arg, dtrace_id_t id, void *parg) 342 { 343 return (CPU->cpu_cpcprofile_pc == 0); 344 } 345 346 static void 347 dcpc_populate_set(cpu_t *c, dcpc_probe_t *pp, kcpc_set_t *set, int reqno) 348 { 349 kcpc_set_t *oset; 350 int i; 351 352 (void) strncpy(set->ks_req[reqno].kr_event, pp->dcpc_event_name, 353 CPC_MAX_EVENT_LEN); 354 set->ks_req[reqno].kr_config = NULL; 355 set->ks_req[reqno].kr_index = reqno; 356 set->ks_req[reqno].kr_picnum = -1; 357 set->ks_req[reqno].kr_flags = pp->dcpc_flag; 358 359 /* 360 * If a unit mask has been specified then detect which attribute 361 * the platform needs. For now, it's either "umask" or "emask". 362 */ 363 if (pp->dcpc_umask >= 0) { 364 set->ks_req[reqno].kr_attr = 365 kmem_zalloc(sizeof (kcpc_attr_t), KM_SLEEP); 366 set->ks_req[reqno].kr_nattrs = 1; 367 if (dcpc_mask_type & DCPC_UMASK) 368 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 369 "umask", 5); 370 else 371 (void) strncpy(set->ks_req[reqno].kr_attr->ka_name, 372 "emask", 5); 373 set->ks_req[reqno].kr_attr->ka_val = pp->dcpc_umask; 374 } else { 375 set->ks_req[reqno].kr_attr = NULL; 376 set->ks_req[reqno].kr_nattrs = 0; 377 } 378 379 /* 380 * If this probe is enabled, obtain its current countdown value 381 * and use that. The CPUs cpc context might not exist yet if we 382 * are dealing with a CPU that is just coming online. 383 */ 384 if (pp->dcpc_enabled && (c->cpu_cpc_ctx != NULL)) { 385 oset = c->cpu_cpc_ctx->kc_set; 386 387 for (i = 0; i < oset->ks_nreqs; i++) { 388 if (strcmp(oset->ks_req[i].kr_event, 389 set->ks_req[reqno].kr_event) == 0) { 390 set->ks_req[reqno].kr_preset = 391 *(oset->ks_req[i].kr_data); 392 } 393 } 394 } else { 395 set->ks_req[reqno].kr_preset = UINT64_MAX - pp->dcpc_ovfval; 396 } 397 398 set->ks_nreqs++; 399 } 400 401 402 /* 403 * Create a fresh request set for the enablings represented in the 404 * 'dcpc_actv_reqs' array which contains the probes we want to be 405 * in the set. This can be called for several reasons: 406 * 407 * 1) We are on a single or multi overflow platform and we have no 408 * current events so we can just create the set and initialize it. 409 * 2) We are on a multi-overflow platform and we already have one or 410 * more existing events and we are adding a new enabling. Create a 411 * new set and copy old requests in and then add the new request. 412 * 3) We are on a multi-overflow platform and we have just removed an 413 * enabling but we still have enablings whch are valid. Create a new 414 * set and copy in still valid requests. 415 */ 416 static kcpc_set_t * 417 dcpc_create_set(cpu_t *c) 418 { 419 int i, reqno = 0; 420 int active_requests = 0; 421 kcpc_set_t *set; 422 423 /* 424 * First get a count of the number of currently active requests. 425 * Note that dcpc_actv_reqs[] should always reflect which requests 426 * we want to be in the set that is to be created. It is the 427 * responsibility of the caller of dcpc_create_set() to adjust that 428 * array accordingly beforehand. 429 */ 430 for (i = 0; i < cpc_ncounters; i++) { 431 if (dcpc_actv_reqs[i] != NULL) 432 active_requests++; 433 } 434 435 set = kmem_zalloc(sizeof (kcpc_set_t), KM_SLEEP); 436 437 set->ks_req = 438 kmem_zalloc(sizeof (kcpc_request_t) * active_requests, KM_SLEEP); 439 440 set->ks_data = 441 kmem_zalloc(active_requests * sizeof (uint64_t), KM_SLEEP); 442 443 /* 444 * Look for valid entries in the active requests array and populate 445 * the request set for any entries found. 446 */ 447 for (i = 0; i < cpc_ncounters; i++) { 448 if (dcpc_actv_reqs[i] != NULL) { 449 dcpc_populate_set(c, dcpc_actv_reqs[i], set, reqno); 450 reqno++; 451 } 452 } 453 454 return (set); 455 } 456 457 static int 458 dcpc_program_cpu_event(cpu_t *c) 459 { 460 int i, j, subcode; 461 kcpc_ctx_t *ctx, *octx; 462 kcpc_set_t *set; 463 464 set = dcpc_create_set(c); 465 466 octx = NULL; 467 set->ks_ctx = ctx = kcpc_ctx_alloc(); 468 ctx->kc_set = set; 469 ctx->kc_cpuid = c->cpu_id; 470 471 if (kcpc_assign_reqs(set, ctx) != 0) 472 goto err; 473 474 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) 475 goto err; 476 477 for (i = 0; i < set->ks_nreqs; i++) { 478 for (j = 0; j < cpc_ncounters; j++) { 479 if (dcpc_actv_reqs[j] != NULL && 480 strcmp(set->ks_req[i].kr_event, 481 dcpc_actv_reqs[j]->dcpc_event_name) == 0) { 482 dcpc_actv_reqs[j]->dcpc_picno = 483 set->ks_req[i].kr_picnum; 484 } 485 } 486 } 487 488 /* 489 * If we already have an active enabling then save the current cpc 490 * context away. 491 */ 492 if (c->cpu_cpc_ctx != NULL) 493 octx = c->cpu_cpc_ctx; 494 495 c->cpu_cpc_ctx = ctx; 496 kcpc_remote_program(c); 497 498 if (octx != NULL) { 499 kcpc_set_t *oset = octx->kc_set; 500 kmem_free(oset->ks_data, oset->ks_nreqs * sizeof (uint64_t)); 501 kcpc_free_set(oset); 502 kcpc_ctx_free(octx); 503 } 504 505 return (0); 506 507 err: 508 /* 509 * We failed to configure this request up so free things up and 510 * get out. 511 */ 512 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 513 kcpc_free_set(set); 514 kcpc_ctx_free(ctx); 515 516 return (-1); 517 } 518 519 static void 520 dcpc_disable_cpu(cpu_t *c) 521 { 522 kcpc_ctx_t *ctx; 523 kcpc_set_t *set; 524 525 /* 526 * Leave this CPU alone if it's already offline. 527 */ 528 if (c->cpu_flags & CPU_OFFLINE) 529 return; 530 531 kcpc_remote_stop(c); 532 533 ctx = c->cpu_cpc_ctx; 534 set = ctx->kc_set; 535 536 kcpc_free_configs(set); 537 538 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 539 kcpc_free_set(set); 540 kcpc_ctx_free(ctx); 541 c->cpu_cpc_ctx = NULL; 542 } 543 544 /* 545 * Stop overflow interrupts being actively processed so that per-CPU 546 * configuration state can be changed safely and correctly. Each CPU has a 547 * dcpc interrupt state byte which is transitioned from DCPC_INTR_FREE (the 548 * "free" state) to DCPC_INTR_CONFIG (the "configuration in process" state) 549 * before any configuration state is changed on any CPUs. The hardware overflow 550 * handler, kcpc_hw_overflow_intr(), will only process an interrupt when a 551 * configuration is not in process (i.e. the state is marked as free). During 552 * interrupt processing the state is set to DCPC_INTR_PROCESSING by the 553 * overflow handler. 554 */ 555 static void 556 dcpc_block_interrupts(void) 557 { 558 cpu_t *c; 559 uint8_t *state; 560 561 c = cpu_list; 562 563 do { 564 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 565 566 while (atomic_cas_8(state, DCPC_INTR_FREE, 567 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 568 continue; 569 570 } while ((c = c->cpu_next) != cpu_list); 571 } 572 573 /* 574 * Set all CPUs dcpc interrupt state to DCPC_INTR_FREE to indicate that 575 * overflow interrupts can be processed safely. 576 */ 577 static void 578 dcpc_release_interrupts(void) 579 { 580 cpu_t *c = cpu_list; 581 582 do { 583 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = DCPC_INTR_FREE; 584 membar_producer(); 585 } while ((c = c->cpu_next) != cpu_list); 586 } 587 588 /* 589 * dcpc_program_event() can be called owing to a new enabling or if a multi 590 * overflow platform has disabled a request but needs to program the requests 591 * that are still valid. 592 * 593 * Every invocation of dcpc_program_event() will create a new kcpc_ctx_t 594 * and a new request set which contains the new enabling and any old enablings 595 * which are still valid (possible with multi-overflow platforms). 596 */ 597 static int 598 dcpc_program_event(dcpc_probe_t *pp) 599 { 600 cpu_t *c; 601 int ret = 0; 602 603 ASSERT(MUTEX_HELD(&cpu_lock)); 604 605 kpreempt_disable(); 606 607 dcpc_block_interrupts(); 608 609 c = cpu_list; 610 611 do { 612 /* 613 * Skip CPUs that are currently offline. 614 */ 615 if (c->cpu_flags & CPU_OFFLINE) 616 continue; 617 618 if (c->cpu_cpc_ctx != NULL) 619 kcpc_remote_stop(c); 620 } while ((c = c->cpu_next) != cpu_list); 621 622 dcpc_release_interrupts(); 623 624 /* 625 * If this enabling is being removed (in the case of a multi event 626 * capable system with more than one active enabling), we can now 627 * update the active request array to reflect the enablings that need 628 * to be reprogrammed. 629 */ 630 if (pp->dcpc_disabling == 1) 631 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 632 633 do { 634 /* 635 * Skip CPUs that are currently offline. 636 */ 637 if (c->cpu_flags & CPU_OFFLINE) 638 continue; 639 640 ret = dcpc_program_cpu_event(c); 641 } while ((c = c->cpu_next) != cpu_list && ret == 0); 642 643 /* 644 * If dcpc_program_cpu_event() fails then it is because we couldn't 645 * configure the requests in the set for the CPU and not because of 646 * an error programming the hardware. If we have a failure here then 647 * we assume no CPUs have been programmed in the above step as they 648 * are all configured identically. 649 */ 650 if (ret != 0) { 651 pp->dcpc_enabled = 0; 652 kpreempt_enable(); 653 return (-1); 654 } 655 656 if (pp->dcpc_disabling != 1) 657 pp->dcpc_enabled = 1; 658 659 kpreempt_enable(); 660 661 return (0); 662 } 663 664 /*ARGSUSED*/ 665 static int 666 dcpc_enable(void *arg, dtrace_id_t id, void *parg) 667 { 668 dcpc_probe_t *pp = parg; 669 int i, found = 0; 670 cpu_t *c; 671 672 ASSERT(MUTEX_HELD(&cpu_lock)); 673 674 /* 675 * Bail out if the counters are being used by a libcpc consumer. 676 */ 677 rw_enter(&kcpc_cpuctx_lock, RW_READER); 678 if (kcpc_cpuctx > 0) { 679 rw_exit(&kcpc_cpuctx_lock); 680 return (-1); 681 } 682 683 dtrace_cpc_in_use++; 684 rw_exit(&kcpc_cpuctx_lock); 685 686 /* 687 * Locate this enabling in the first free entry of the active 688 * request array. 689 */ 690 for (i = 0; i < cpc_ncounters; i++) { 691 if (dcpc_actv_reqs[i] == NULL) { 692 dcpc_actv_reqs[i] = pp; 693 pp->dcpc_actv_req_idx = i; 694 found = 1; 695 break; 696 } 697 } 698 699 /* 700 * If we couldn't find a slot for this probe then there is no 701 * room at the inn. 702 */ 703 if (!found) { 704 dtrace_cpc_in_use--; 705 return (-1); 706 } 707 708 ASSERT(pp->dcpc_actv_req_idx >= 0); 709 710 /* 711 * The following must hold true if we are to (attempt to) enable 712 * this request: 713 * 714 * 1) No enablings currently exist. We allow all platforms to 715 * proceed if this is true. 716 * 717 * OR 718 * 719 * 2) If the platform is multi overflow capable and there are 720 * less valid enablings than there are counters. There is no 721 * guarantee that a platform can accommodate as many events as 722 * it has counters for but we will at least try to program 723 * up to that many requests. 724 * 725 * The 'dcpc_enablings' variable is implictly protected by locking 726 * provided by the DTrace framework and the cpu management framework. 727 */ 728 if (dcpc_enablings == 0 || (dcpc_mult_ovf_cap && 729 dcpc_enablings < cpc_ncounters)) { 730 /* 731 * Before attempting to program the first enabling we need to 732 * invalidate any lwp-based contexts. 733 */ 734 if (dcpc_enablings == 0) 735 kcpc_invalidate_all(); 736 737 if (dcpc_program_event(pp) == 0) { 738 dcpc_enablings++; 739 return (0); 740 } 741 } 742 743 /* 744 * If active enablings existed before we failed to enable this probe 745 * on a multi event capable platform then we need to restart counters 746 * as they will have been stopped in the attempted configuration. The 747 * context should now just contain the request prior to this failed 748 * enabling. 749 */ 750 if (dcpc_enablings > 0 && dcpc_mult_ovf_cap) { 751 c = cpu_list; 752 753 ASSERT(dcpc_mult_ovf_cap == 1); 754 do { 755 /* 756 * Skip CPUs that are currently offline. 757 */ 758 if (c->cpu_flags & CPU_OFFLINE) 759 continue; 760 761 kcpc_remote_program(c); 762 } while ((c = c->cpu_next) != cpu_list); 763 } 764 765 dtrace_cpc_in_use--; 766 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 767 pp->dcpc_actv_req_idx = pp->dcpc_picno = -1; 768 769 return (-1); 770 } 771 772 /* 773 * If only one enabling is active then remove the context and free 774 * everything up. If there are multiple enablings active then remove this 775 * one, its associated meta-data and re-program the hardware. 776 */ 777 /*ARGSUSED*/ 778 static void 779 dcpc_disable(void *arg, dtrace_id_t id, void *parg) 780 { 781 cpu_t *c; 782 dcpc_probe_t *pp = parg; 783 784 ASSERT(MUTEX_HELD(&cpu_lock)); 785 786 kpreempt_disable(); 787 788 /* 789 * This probe didn't actually make it as far as being fully enabled 790 * so we needn't do anything with it. 791 */ 792 if (pp->dcpc_enabled == 0) { 793 /* 794 * If we actually allocated this request a slot in the 795 * request array but failed to enabled it then remove the 796 * entry in the array. 797 */ 798 if (pp->dcpc_actv_req_idx >= 0) { 799 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 800 pp->dcpc_actv_req_idx = pp->dcpc_picno = 801 pp->dcpc_disabling = -1; 802 } 803 804 kpreempt_enable(); 805 return; 806 } 807 808 /* 809 * If this is the only enabling then stop all the counters and 810 * free up the meta-data. 811 */ 812 if (dcpc_enablings == 1) { 813 ASSERT(dtrace_cpc_in_use == 1); 814 815 dcpc_block_interrupts(); 816 817 c = cpu_list; 818 819 do { 820 dcpc_disable_cpu(c); 821 } while ((c = c->cpu_next) != cpu_list); 822 823 dcpc_actv_reqs[pp->dcpc_actv_req_idx] = NULL; 824 dcpc_release_interrupts(); 825 } else { 826 /* 827 * This platform can support multiple overflow events and 828 * the enabling being disabled is not the last one. Remove this 829 * enabling and re-program the hardware with the new config. 830 */ 831 ASSERT(dcpc_mult_ovf_cap); 832 ASSERT(dcpc_enablings > 1); 833 834 pp->dcpc_disabling = 1; 835 (void) dcpc_program_event(pp); 836 } 837 838 kpreempt_enable(); 839 840 dcpc_enablings--; 841 dtrace_cpc_in_use--; 842 pp->dcpc_enabled = 0; 843 pp->dcpc_actv_req_idx = pp->dcpc_picno = pp->dcpc_disabling = -1; 844 } 845 846 /*ARGSUSED*/ 847 static int 848 dcpc_cpu_setup(cpu_setup_t what, processorid_t cpu, void *arg) 849 { 850 cpu_t *c; 851 uint8_t *state; 852 853 ASSERT(MUTEX_HELD(&cpu_lock)); 854 855 switch (what) { 856 case CPU_OFF: 857 /* 858 * Offline CPUs are not allowed to take part so remove this 859 * CPU if we are actively tracing. 860 */ 861 if (dtrace_cpc_in_use) { 862 c = cpu_get(cpu); 863 state = &cpu_core[c->cpu_id].cpuc_dcpc_intr_state; 864 865 /* 866 * Indicate that a configuration is in process in 867 * order to stop overflow interrupts being processed 868 * on this CPU while we disable it. 869 */ 870 while (atomic_cas_8(state, DCPC_INTR_FREE, 871 DCPC_INTR_CONFIG) != DCPC_INTR_FREE) 872 continue; 873 874 dcpc_disable_cpu(c); 875 876 /* 877 * Reset this CPUs interrupt state as the configuration 878 * has ended. 879 */ 880 cpu_core[c->cpu_id].cpuc_dcpc_intr_state = 881 DCPC_INTR_FREE; 882 membar_producer(); 883 } 884 break; 885 886 case CPU_ON: 887 case CPU_SETUP: 888 /* 889 * This CPU is being initialized or brought online so program 890 * it with the current request set if we are actively tracing. 891 */ 892 if (dtrace_cpc_in_use) { 893 c = cpu_get(cpu); 894 895 (void) dcpc_program_cpu_event(c); 896 } 897 break; 898 899 default: 900 break; 901 } 902 903 return (0); 904 } 905 906 static dtrace_pattr_t dcpc_attr = { 907 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 908 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 909 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, 910 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_CPU }, 911 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, 912 }; 913 914 static dtrace_pops_t dcpc_pops = { 915 dcpc_provide, 916 NULL, 917 dcpc_enable, 918 dcpc_disable, 919 NULL, 920 NULL, 921 NULL, 922 NULL, 923 dcpc_usermode, 924 dcpc_destroy 925 }; 926 927 /*ARGSUSED*/ 928 static int 929 dcpc_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) 930 { 931 return (0); 932 } 933 934 /*ARGSUSED*/ 935 static int 936 dcpc_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 937 { 938 int error; 939 940 switch (infocmd) { 941 case DDI_INFO_DEVT2DEVINFO: 942 *result = (void *)dcpc_devi; 943 error = DDI_SUCCESS; 944 break; 945 case DDI_INFO_DEVT2INSTANCE: 946 *result = (void *)0; 947 error = DDI_SUCCESS; 948 break; 949 default: 950 error = DDI_FAILURE; 951 } 952 return (error); 953 } 954 955 static int 956 dcpc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 957 { 958 switch (cmd) { 959 case DDI_DETACH: 960 break; 961 case DDI_SUSPEND: 962 return (DDI_SUCCESS); 963 default: 964 return (DDI_FAILURE); 965 } 966 967 if (dtrace_unregister(dcpc_pid) != 0) 968 return (DDI_FAILURE); 969 970 ddi_remove_minor_node(devi, NULL); 971 972 mutex_enter(&cpu_lock); 973 unregister_cpu_setup_func(dcpc_cpu_setup, NULL); 974 mutex_exit(&cpu_lock); 975 976 kmem_free(dcpc_actv_reqs, cpc_ncounters * sizeof (dcpc_probe_t *)); 977 978 kcpc_unregister_dcpc(); 979 980 return (DDI_SUCCESS); 981 } 982 983 static int 984 dcpc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 985 { 986 uint_t caps; 987 char *attrs; 988 989 switch (cmd) { 990 case DDI_ATTACH: 991 break; 992 case DDI_RESUME: 993 return (DDI_SUCCESS); 994 default: 995 return (DDI_FAILURE); 996 } 997 998 if (kcpc_pcbe_loaded() == -1) 999 return (DDI_FAILURE); 1000 1001 caps = kcpc_pcbe_capabilities(); 1002 1003 if (!(caps & CPC_CAP_OVERFLOW_INTERRUPT)) { 1004 cmn_err(CE_NOTE, "!dcpc: Counter Overflow not supported"\ 1005 " on this processor"); 1006 return (DDI_FAILURE); 1007 } 1008 1009 if (ddi_create_minor_node(devi, "dcpc", S_IFCHR, 0, 1010 DDI_PSEUDO, NULL) == DDI_FAILURE || 1011 dtrace_register("cpc", &dcpc_attr, DTRACE_PRIV_KERNEL, 1012 NULL, &dcpc_pops, NULL, &dcpc_pid) != 0) { 1013 ddi_remove_minor_node(devi, NULL); 1014 return (DDI_FAILURE); 1015 } 1016 1017 mutex_enter(&cpu_lock); 1018 register_cpu_setup_func(dcpc_cpu_setup, NULL); 1019 mutex_exit(&cpu_lock); 1020 1021 dcpc_ovf_mask = (1 << cpc_ncounters) - 1; 1022 ASSERT(dcpc_ovf_mask != 0); 1023 1024 if (caps & CPC_CAP_OVERFLOW_PRECISE) 1025 dcpc_mult_ovf_cap = 1; 1026 1027 /* 1028 * Determine which, if any, mask attribute the back-end can use. 1029 */ 1030 attrs = kcpc_list_attrs(); 1031 if (strstr(attrs, "umask") != NULL) 1032 dcpc_mask_type |= DCPC_UMASK; 1033 else if (strstr(attrs, "emask") != NULL) 1034 dcpc_mask_type |= DCPC_EMASK; 1035 1036 /* 1037 * The dcpc_actv_reqs array is used to store the requests that 1038 * we currently have programmed. The order of requests in this 1039 * array is not necessarily the order that the event appears in 1040 * the kcpc_request_t array. Once entered into a slot in the array 1041 * the entry is not moved until it's removed. 1042 */ 1043 dcpc_actv_reqs = 1044 kmem_zalloc(cpc_ncounters * sizeof (dcpc_probe_t *), KM_SLEEP); 1045 1046 dcpc_min_overflow = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 1047 DDI_PROP_DONTPASS, "dcpc-min-overflow", DCPC_MIN_OVF_DEFAULT); 1048 1049 kcpc_register_dcpc(dcpc_fire); 1050 1051 ddi_report_dev(devi); 1052 dcpc_devi = devi; 1053 1054 return (DDI_SUCCESS); 1055 } 1056 1057 static struct cb_ops dcpc_cb_ops = { 1058 dcpc_open, /* open */ 1059 nodev, /* close */ 1060 nulldev, /* strategy */ 1061 nulldev, /* print */ 1062 nodev, /* dump */ 1063 nodev, /* read */ 1064 nodev, /* write */ 1065 nodev, /* ioctl */ 1066 nodev, /* devmap */ 1067 nodev, /* mmap */ 1068 nodev, /* segmap */ 1069 nochpoll, /* poll */ 1070 ddi_prop_op, /* cb_prop_op */ 1071 0, /* streamtab */ 1072 D_NEW | D_MP /* Driver compatibility flag */ 1073 }; 1074 1075 static struct dev_ops dcpc_ops = { 1076 DEVO_REV, /* devo_rev, */ 1077 0, /* refcnt */ 1078 dcpc_info, /* get_dev_info */ 1079 nulldev, /* identify */ 1080 nulldev, /* probe */ 1081 dcpc_attach, /* attach */ 1082 dcpc_detach, /* detach */ 1083 nodev, /* reset */ 1084 &dcpc_cb_ops, /* driver operations */ 1085 NULL, /* bus operations */ 1086 nodev, /* dev power */ 1087 ddi_quiesce_not_needed /* quiesce */ 1088 }; 1089 1090 /* 1091 * Module linkage information for the kernel. 1092 */ 1093 static struct modldrv modldrv = { 1094 &mod_driverops, /* module type */ 1095 "DTrace CPC Module", /* name of module */ 1096 &dcpc_ops, /* driver ops */ 1097 }; 1098 1099 static struct modlinkage modlinkage = { 1100 MODREV_1, 1101 (void *)&modldrv, 1102 NULL 1103 }; 1104 1105 int 1106 _init(void) 1107 { 1108 return (mod_install(&modlinkage)); 1109 } 1110 1111 int 1112 _info(struct modinfo *modinfop) 1113 { 1114 return (mod_info(&modlinkage, modinfop)); 1115 } 1116 1117 int 1118 _fini(void) 1119 { 1120 return (mod_remove(&modlinkage)); 1121 } 1122