1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2021 Joyent, Inc. 25 * Copyright 2021 Oxide Computer Company 26 */ 27 28 #include <sys/param.h> 29 #include <sys/thread.h> 30 #include <sys/cpuvar.h> 31 #include <sys/inttypes.h> 32 #include <sys/cmn_err.h> 33 #include <sys/time.h> 34 #include <sys/ksynch.h> 35 #include <sys/systm.h> 36 #include <sys/kcpc.h> 37 #include <sys/cpc_impl.h> 38 #include <sys/cpc_pcbe.h> 39 #include <sys/atomic.h> 40 #include <sys/sunddi.h> 41 #include <sys/modctl.h> 42 #include <sys/sdt.h> 43 #include <sys/archsystm.h> 44 #include <sys/promif.h> 45 #include <sys/x_call.h> 46 #include <sys/cap_util.h> 47 #if defined(__x86) 48 #include <asm/clock.h> 49 #include <sys/xc_levels.h> 50 #endif 51 52 static kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 53 static kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 54 55 56 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 57 int kcpc_cpuctx; /* number of cpu-specific contexts */ 58 59 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 60 61 /* 62 * These are set when a PCBE module is loaded. 63 */ 64 uint_t cpc_ncounters = 0; 65 pcbe_ops_t *pcbe_ops = NULL; 66 67 /* 68 * Statistics on (mis)behavior 69 */ 70 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 71 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 72 73 /* 74 * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread 75 * with no valid context will result in a panic. 76 */ 77 static int kcpc_nullctx_panic = 0; 78 79 static void kcpc_save(void *); 80 static void kcpc_restore(void *); 81 static void kcpc_lwp_create(void *, void *); 82 static void kcpc_free(void *, int); 83 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 84 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 85 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 86 static kcpc_set_t *kcpc_set_create(kcpc_request_t *reqs, int nreqs, 87 int set_flags, int kmem_flags); 88 89 /* 90 * Macros to manipulate context flags. All flag updates should use one of these 91 * two macros 92 * 93 * Flags should be always be updated atomically since some of the updates are 94 * not protected by locks. 95 */ 96 #define KCPC_CTX_FLAG_SET(ctx, flag) atomic_or_uint(&(ctx)->kc_flags, (flag)) 97 #define KCPC_CTX_FLAG_CLR(ctx, flag) atomic_and_uint(&(ctx)->kc_flags, ~(flag)) 98 99 /* 100 * The IS_HIPIL() macro verifies that the code is executed either from a 101 * cross-call or from high-PIL interrupt 102 */ 103 #ifdef DEBUG 104 #define IS_HIPIL() (getpil() >= XCALL_PIL) 105 #else 106 #define IS_HIPIL() 107 #endif /* DEBUG */ 108 109 110 extern int kcpc_hw_load_pcbe(void); 111 112 /* 113 * Return value from kcpc_hw_load_pcbe() 114 */ 115 static int kcpc_pcbe_error = 0; 116 117 static const struct ctxop_template kcpc_ctxop_tpl = { 118 .ct_rev = CTXOP_TPL_REV, 119 .ct_save = kcpc_save, 120 .ct_restore = kcpc_restore, 121 .ct_lwp_create = kcpc_lwp_create, 122 .ct_free = kcpc_free, 123 }; 124 125 /* 126 * Perform one-time initialization of kcpc framework. 127 * This function performs the initialization only the first time it is called. 128 * It is safe to call it multiple times. 129 */ 130 int 131 kcpc_init(void) 132 { 133 long hash; 134 static uint32_t kcpc_initialized = 0; 135 136 /* 137 * We already tried loading platform pcbe module and failed 138 */ 139 if (kcpc_pcbe_error != 0) 140 return (-1); 141 142 /* 143 * The kcpc framework should be initialized at most once 144 */ 145 if (atomic_cas_32(&kcpc_initialized, 0, 1) != 0) 146 return (0); 147 148 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL); 149 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 150 mutex_init(&kcpc_ctx_llock[hash], 151 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15); 152 153 /* 154 * Load platform-specific pcbe module 155 */ 156 kcpc_pcbe_error = kcpc_hw_load_pcbe(); 157 158 return (kcpc_pcbe_error == 0 ? 0 : -1); 159 } 160 161 void 162 kcpc_register_pcbe(pcbe_ops_t *ops) 163 { 164 pcbe_ops = ops; 165 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 166 } 167 168 void 169 kcpc_register_dcpc(void (*func)(uint64_t)) 170 { 171 dtrace_cpc_fire = func; 172 } 173 174 void 175 kcpc_unregister_dcpc(void) 176 { 177 dtrace_cpc_fire = NULL; 178 } 179 180 int 181 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 182 { 183 cpu_t *cp; 184 kcpc_ctx_t *ctx; 185 int error; 186 int save_spl; 187 188 ctx = kcpc_ctx_alloc(KM_SLEEP); 189 190 if (kcpc_assign_reqs(set, ctx) != 0) { 191 kcpc_ctx_free(ctx); 192 *subcode = CPC_RESOURCE_UNAVAIL; 193 return (EINVAL); 194 } 195 196 ctx->kc_cpuid = cpuid; 197 ctx->kc_thread = curthread; 198 199 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 200 201 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 202 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 203 kcpc_ctx_free(ctx); 204 return (error); 205 } 206 207 set->ks_ctx = ctx; 208 ctx->kc_set = set; 209 210 /* 211 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 212 * we are manipulating the cpu_t and programming the hardware, else the 213 * the cpu_t could go away while we're looking at it. 214 */ 215 mutex_enter(&cpu_lock); 216 cp = cpu_get(cpuid); 217 218 if (cp == NULL) 219 /* 220 * The CPU could have been DRd out while we were getting set up. 221 */ 222 goto unbound; 223 224 mutex_enter(&cp->cpu_cpc_ctxlock); 225 kpreempt_disable(); 226 save_spl = spl_xcall(); 227 228 /* 229 * Check to see whether counters for CPU already being used by someone 230 * other than kernel for capacity and utilization (since kernel will 231 * let go of counters for user in kcpc_program() below) 232 */ 233 if (cp->cpu_cpc_ctx != NULL && !CU_CPC_ON(cp)) { 234 /* 235 * If this CPU already has a bound set, return an error. 236 */ 237 splx(save_spl); 238 kpreempt_enable(); 239 mutex_exit(&cp->cpu_cpc_ctxlock); 240 goto unbound; 241 } 242 243 if (curthread->t_bind_cpu != cpuid) { 244 splx(save_spl); 245 kpreempt_enable(); 246 mutex_exit(&cp->cpu_cpc_ctxlock); 247 goto unbound; 248 } 249 250 kcpc_program(ctx, B_FALSE, B_TRUE); 251 252 splx(save_spl); 253 kpreempt_enable(); 254 255 mutex_exit(&cp->cpu_cpc_ctxlock); 256 mutex_exit(&cpu_lock); 257 258 mutex_enter(&set->ks_lock); 259 set->ks_state |= KCPC_SET_BOUND; 260 cv_signal(&set->ks_condv); 261 mutex_exit(&set->ks_lock); 262 263 return (0); 264 265 unbound: 266 mutex_exit(&cpu_lock); 267 set->ks_ctx = NULL; 268 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 269 kcpc_ctx_free(ctx); 270 return (EAGAIN); 271 } 272 273 int 274 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 275 { 276 kcpc_ctx_t *ctx; 277 int error; 278 279 /* 280 * Only one set is allowed per context, so ensure there is no 281 * existing context. 282 */ 283 284 if (t->t_cpc_ctx != NULL) 285 return (EEXIST); 286 287 ctx = kcpc_ctx_alloc(KM_SLEEP); 288 289 /* 290 * The context must begin life frozen until it has been properly 291 * programmed onto the hardware. This prevents the context ops from 292 * worrying about it until we're ready. 293 */ 294 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 295 ctx->kc_hrtime = gethrtime(); 296 297 if (kcpc_assign_reqs(set, ctx) != 0) { 298 kcpc_ctx_free(ctx); 299 *subcode = CPC_RESOURCE_UNAVAIL; 300 return (EINVAL); 301 } 302 303 ctx->kc_cpuid = -1; 304 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 305 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_LWPINHERIT); 306 ctx->kc_thread = t; 307 t->t_cpc_ctx = ctx; 308 /* 309 * Permit threads to look at their own hardware counters from userland. 310 */ 311 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_NONPRIV); 312 313 /* 314 * Create the data store for this set. 315 */ 316 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 317 318 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 319 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 320 kcpc_ctx_free(ctx); 321 t->t_cpc_ctx = NULL; 322 return (error); 323 } 324 325 set->ks_ctx = ctx; 326 ctx->kc_set = set; 327 328 /* 329 * Add a device context to the subject thread. 330 */ 331 ctxop_install(t, &kcpc_ctxop_tpl, ctx); 332 333 /* 334 * Ask the backend to program the hardware. 335 */ 336 if (t == curthread) { 337 int save_spl; 338 339 kpreempt_disable(); 340 save_spl = spl_xcall(); 341 kcpc_program(ctx, B_TRUE, B_TRUE); 342 splx(save_spl); 343 kpreempt_enable(); 344 } else { 345 /* 346 * Since we are the agent LWP, we know the victim LWP is stopped 347 * until we're done here; no need to worry about preemption or 348 * migration here. We still use an atomic op to clear the flag 349 * to ensure the flags are always self-consistent; they can 350 * still be accessed from, for instance, another CPU doing a 351 * kcpc_invalidate_all(). 352 */ 353 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 354 } 355 356 mutex_enter(&set->ks_lock); 357 set->ks_state |= KCPC_SET_BOUND; 358 cv_signal(&set->ks_condv); 359 mutex_exit(&set->ks_lock); 360 361 return (0); 362 } 363 364 /* 365 * Walk through each request in the set and ask the PCBE to configure a 366 * corresponding counter. 367 */ 368 int 369 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 370 { 371 int i; 372 int ret; 373 kcpc_request_t *rp; 374 375 for (i = 0; i < set->ks_nreqs; i++) { 376 int n; 377 rp = &set->ks_req[i]; 378 379 n = rp->kr_picnum; 380 381 ASSERT(n >= 0 && n < cpc_ncounters); 382 383 ASSERT(ctx->kc_pics[n].kp_req == NULL); 384 385 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 386 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 387 == 0) { 388 *subcode = -1; 389 return (ENOTSUP); 390 } 391 /* 392 * If any of the counters have requested overflow 393 * notification, we flag the context as being one that 394 * cares about overflow. 395 */ 396 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_SIGOVF); 397 } 398 399 rp->kr_config = NULL; 400 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 401 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 402 &(rp->kr_config), (void *)ctx)) != 0) { 403 kcpc_free_configs(set); 404 *subcode = ret; 405 switch (ret) { 406 case CPC_ATTR_REQUIRES_PRIVILEGE: 407 case CPC_HV_NO_ACCESS: 408 return (EACCES); 409 default: 410 return (EINVAL); 411 } 412 } 413 414 ctx->kc_pics[n].kp_req = rp; 415 rp->kr_picp = &ctx->kc_pics[n]; 416 rp->kr_data = set->ks_data + rp->kr_index; 417 *rp->kr_data = rp->kr_preset; 418 } 419 420 return (0); 421 } 422 423 void 424 kcpc_free_configs(kcpc_set_t *set) 425 { 426 int i; 427 428 for (i = 0; i < set->ks_nreqs; i++) 429 if (set->ks_req[i].kr_config != NULL) 430 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 431 } 432 433 /* 434 * buf points to a user address and the data should be copied out to that 435 * address in the current process. 436 */ 437 int 438 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 439 { 440 kcpc_ctx_t *ctx = set->ks_ctx; 441 int save_spl; 442 443 mutex_enter(&set->ks_lock); 444 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 445 mutex_exit(&set->ks_lock); 446 return (EINVAL); 447 } 448 mutex_exit(&set->ks_lock); 449 450 /* 451 * Kernel preemption must be disabled while reading the hardware regs, 452 * and if this is a CPU-bound context, while checking the CPU binding of 453 * the current thread. 454 */ 455 kpreempt_disable(); 456 save_spl = spl_xcall(); 457 458 if (ctx->kc_flags & KCPC_CTX_INVALID) { 459 splx(save_spl); 460 kpreempt_enable(); 461 return (EAGAIN); 462 } 463 464 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 465 if (ctx->kc_cpuid != -1) { 466 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 467 splx(save_spl); 468 kpreempt_enable(); 469 return (EAGAIN); 470 } 471 } 472 473 if (ctx->kc_thread == curthread) { 474 uint64_t curtick = KCPC_GET_TICK(); 475 476 ctx->kc_hrtime = gethrtime_waitfree(); 477 pcbe_ops->pcbe_sample(ctx); 478 ctx->kc_vtick += curtick - ctx->kc_rawtick; 479 ctx->kc_rawtick = curtick; 480 } 481 482 /* 483 * The config may have been invalidated by 484 * the pcbe_sample op. 485 */ 486 if (ctx->kc_flags & KCPC_CTX_INVALID) { 487 splx(save_spl); 488 kpreempt_enable(); 489 return (EAGAIN); 490 } 491 492 } 493 494 splx(save_spl); 495 kpreempt_enable(); 496 497 if (copyout(set->ks_data, buf, 498 set->ks_nreqs * sizeof (uint64_t)) == -1) 499 return (EFAULT); 500 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 501 return (EFAULT); 502 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 503 return (EFAULT); 504 505 return (0); 506 } 507 508 /* 509 * Stop the counters on the CPU this context is bound to. 510 */ 511 static void 512 kcpc_stop_hw(kcpc_ctx_t *ctx) 513 { 514 cpu_t *cp; 515 516 kpreempt_disable(); 517 518 if (ctx->kc_cpuid == CPU->cpu_id) { 519 cp = CPU; 520 } else { 521 cp = cpu_get(ctx->kc_cpuid); 522 } 523 524 ASSERT(cp != NULL && cp->cpu_cpc_ctx == ctx); 525 kcpc_cpu_stop(cp, B_FALSE); 526 527 kpreempt_enable(); 528 } 529 530 int 531 kcpc_unbind(kcpc_set_t *set) 532 { 533 kcpc_ctx_t *ctx; 534 kthread_t *t; 535 536 /* 537 * We could be racing with the process's agent thread as it 538 * binds the set; we must wait for the set to finish binding 539 * before attempting to tear it down. 540 */ 541 mutex_enter(&set->ks_lock); 542 while ((set->ks_state & KCPC_SET_BOUND) == 0) 543 cv_wait(&set->ks_condv, &set->ks_lock); 544 mutex_exit(&set->ks_lock); 545 546 ctx = set->ks_ctx; 547 548 /* 549 * Use kc_lock to synchronize with kcpc_restore(). 550 */ 551 mutex_enter(&ctx->kc_lock); 552 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 553 mutex_exit(&ctx->kc_lock); 554 555 if (ctx->kc_cpuid == -1) { 556 t = ctx->kc_thread; 557 /* 558 * The context is thread-bound and therefore has a device 559 * context. It will be freed via ctxop_remove() calling 560 * freectx() calling kcpc_free(). 561 */ 562 if (t == curthread) { 563 int save_spl; 564 565 kpreempt_disable(); 566 save_spl = spl_xcall(); 567 if (!(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)) 568 kcpc_unprogram(ctx, B_TRUE); 569 splx(save_spl); 570 kpreempt_enable(); 571 } 572 VERIFY3U(ctxop_remove(t, &kcpc_ctxop_tpl, ctx), !=, 0); 573 t->t_cpc_set = NULL; 574 t->t_cpc_ctx = NULL; 575 } else { 576 /* 577 * If we are unbinding a CPU-bound set from a remote CPU, the 578 * native CPU's idle thread could be in the midst of programming 579 * this context onto the CPU. We grab the context's lock here to 580 * ensure that the idle thread is done with it. When we release 581 * the lock, the CPU no longer has a context and the idle thread 582 * will move on. 583 * 584 * cpu_lock must be held to prevent the CPU from being DR'd out 585 * while we disassociate the context from the cpu_t. 586 */ 587 cpu_t *cp; 588 mutex_enter(&cpu_lock); 589 cp = cpu_get(ctx->kc_cpuid); 590 if (cp != NULL) { 591 /* 592 * The CPU may have been DR'd out of the system. 593 */ 594 mutex_enter(&cp->cpu_cpc_ctxlock); 595 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 596 kcpc_stop_hw(ctx); 597 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 598 mutex_exit(&cp->cpu_cpc_ctxlock); 599 } 600 mutex_exit(&cpu_lock); 601 if (ctx->kc_thread == curthread) { 602 kcpc_free(ctx, 0); 603 curthread->t_cpc_set = NULL; 604 } 605 } 606 607 return (0); 608 } 609 610 int 611 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 612 { 613 int i; 614 615 ASSERT(set != NULL); 616 ASSERT(set->ks_state & KCPC_SET_BOUND); 617 ASSERT(set->ks_ctx->kc_thread == curthread); 618 ASSERT(set->ks_ctx->kc_cpuid == -1); 619 620 if (index < 0 || index >= set->ks_nreqs) 621 return (EINVAL); 622 623 for (i = 0; i < set->ks_nreqs; i++) 624 if (set->ks_req[i].kr_index == index) 625 break; 626 ASSERT(i != set->ks_nreqs); 627 628 set->ks_req[i].kr_preset = preset; 629 return (0); 630 } 631 632 int 633 kcpc_restart(kcpc_set_t *set) 634 { 635 kcpc_ctx_t *ctx = set->ks_ctx; 636 int i; 637 int save_spl; 638 639 ASSERT(set->ks_state & KCPC_SET_BOUND); 640 ASSERT(ctx->kc_thread == curthread); 641 ASSERT(ctx->kc_cpuid == -1); 642 643 for (i = 0; i < set->ks_nreqs; i++) { 644 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 645 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 646 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 647 } 648 649 kpreempt_disable(); 650 save_spl = spl_xcall(); 651 652 /* 653 * If the user is doing this on a running set, make sure the counters 654 * are stopped first. 655 */ 656 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 657 pcbe_ops->pcbe_allstop(); 658 659 /* 660 * Ask the backend to program the hardware. 661 */ 662 ctx->kc_rawtick = KCPC_GET_TICK(); 663 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 664 pcbe_ops->pcbe_program(ctx); 665 splx(save_spl); 666 kpreempt_enable(); 667 668 return (0); 669 } 670 671 /* 672 * Caller must hold kcpc_cpuctx_lock. 673 */ 674 int 675 kcpc_enable(kthread_t *t, int cmd, int enable) 676 { 677 kcpc_ctx_t *ctx = t->t_cpc_ctx; 678 kcpc_set_t *set = t->t_cpc_set; 679 kcpc_set_t *newset; 680 int i; 681 int flag; 682 int err; 683 684 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 685 686 if (ctx == NULL) { 687 /* 688 * This thread has a set but no context; it must be a 689 * CPU-bound set. 690 */ 691 ASSERT(t->t_cpc_set != NULL); 692 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 693 return (EINVAL); 694 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 695 return (EAGAIN); 696 697 if (cmd == CPC_ENABLE) { 698 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 699 return (EINVAL); 700 kpreempt_disable(); 701 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 702 kcpc_restore(ctx); 703 kpreempt_enable(); 704 } else if (cmd == CPC_DISABLE) { 705 if (ctx->kc_flags & KCPC_CTX_FREEZE) 706 return (EINVAL); 707 kpreempt_disable(); 708 kcpc_save(ctx); 709 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 710 kpreempt_enable(); 711 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 712 /* 713 * Strategy for usr/sys: stop counters and update set's presets 714 * with current counter values, unbind, update requests with 715 * new config, then re-bind. 716 */ 717 flag = (cmd == CPC_USR_EVENTS) ? 718 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 719 720 kpreempt_disable(); 721 KCPC_CTX_FLAG_SET(ctx, 722 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 723 pcbe_ops->pcbe_allstop(); 724 kpreempt_enable(); 725 726 for (i = 0; i < set->ks_nreqs; i++) { 727 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 728 if (enable) 729 set->ks_req[i].kr_flags |= flag; 730 else 731 set->ks_req[i].kr_flags &= ~flag; 732 } 733 newset = kcpc_dup_set(set); 734 if (kcpc_unbind(set) != 0) 735 return (EINVAL); 736 t->t_cpc_set = newset; 737 if (kcpc_bind_thread(newset, t, &err) != 0) { 738 t->t_cpc_set = NULL; 739 kcpc_free_set(newset); 740 return (EINVAL); 741 } 742 } else 743 return (EINVAL); 744 745 return (0); 746 } 747 748 /* 749 * Provide PCBEs with a way of obtaining the configs of every counter which will 750 * be programmed together. 751 * 752 * If current is NULL, provide the first config. 753 * 754 * If data != NULL, caller wants to know where the data store associated with 755 * the config we return is located. 756 */ 757 void * 758 kcpc_next_config(void *token, void *current, uint64_t **data) 759 { 760 int i; 761 kcpc_pic_t *pic; 762 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 763 764 if (current == NULL) { 765 /* 766 * Client would like the first config, which may not be in 767 * counter 0; we need to search through the counters for the 768 * first config. 769 */ 770 for (i = 0; i < cpc_ncounters; i++) 771 if (ctx->kc_pics[i].kp_req != NULL) 772 break; 773 /* 774 * There are no counters configured for the given context. 775 */ 776 if (i == cpc_ncounters) 777 return (NULL); 778 } else { 779 /* 780 * There surely is a faster way to do this. 781 */ 782 for (i = 0; i < cpc_ncounters; i++) { 783 pic = &ctx->kc_pics[i]; 784 785 if (pic->kp_req != NULL && 786 current == pic->kp_req->kr_config) 787 break; 788 } 789 790 /* 791 * We found the current config at picnum i. Now search for the 792 * next configured PIC. 793 */ 794 for (i++; i < cpc_ncounters; i++) { 795 pic = &ctx->kc_pics[i]; 796 if (pic->kp_req != NULL) 797 break; 798 } 799 800 if (i == cpc_ncounters) 801 return (NULL); 802 } 803 804 if (data != NULL) { 805 *data = ctx->kc_pics[i].kp_req->kr_data; 806 } 807 808 return (ctx->kc_pics[i].kp_req->kr_config); 809 } 810 811 812 kcpc_ctx_t * 813 kcpc_ctx_alloc(int kmem_flags) 814 { 815 kcpc_ctx_t *ctx; 816 long hash; 817 818 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), kmem_flags); 819 if (ctx == NULL) 820 return (NULL); 821 822 hash = CPC_HASH_CTX(ctx); 823 mutex_enter(&kcpc_ctx_llock[hash]); 824 ctx->kc_next = kcpc_ctx_list[hash]; 825 kcpc_ctx_list[hash] = ctx; 826 mutex_exit(&kcpc_ctx_llock[hash]); 827 828 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 829 cpc_ncounters, KM_SLEEP); 830 831 ctx->kc_cpuid = -1; 832 833 return (ctx); 834 } 835 836 /* 837 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 838 * in the flags. 839 */ 840 static void 841 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 842 { 843 kcpc_set_t *ks = ctx->kc_set, *cks; 844 int i, j; 845 int code; 846 847 ASSERT(ks != NULL); 848 849 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 850 return; 851 852 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 853 cks->ks_state &= ~KCPC_SET_BOUND; 854 cctx->kc_set = cks; 855 cks->ks_flags = ks->ks_flags; 856 cks->ks_nreqs = ks->ks_nreqs; 857 cks->ks_req = kmem_alloc(cks->ks_nreqs * 858 sizeof (kcpc_request_t), KM_SLEEP); 859 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 860 KM_SLEEP); 861 cks->ks_ctx = cctx; 862 863 for (i = 0; i < cks->ks_nreqs; i++) { 864 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 865 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 866 (void) strncpy(cks->ks_req[i].kr_event, 867 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 868 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 869 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 870 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 871 if (ks->ks_req[i].kr_nattrs > 0) { 872 cks->ks_req[i].kr_attr = 873 kmem_alloc(ks->ks_req[i].kr_nattrs * 874 sizeof (kcpc_attr_t), KM_SLEEP); 875 } 876 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 877 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 878 ks->ks_req[i].kr_attr[j].ka_name, 879 CPC_MAX_ATTR_LEN); 880 cks->ks_req[i].kr_attr[j].ka_val = 881 ks->ks_req[i].kr_attr[j].ka_val; 882 } 883 } 884 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 885 kcpc_invalidate_config(cctx); 886 887 mutex_enter(&cks->ks_lock); 888 cks->ks_state |= KCPC_SET_BOUND; 889 cv_signal(&cks->ks_condv); 890 mutex_exit(&cks->ks_lock); 891 } 892 893 894 void 895 kcpc_ctx_free(kcpc_ctx_t *ctx) 896 { 897 kcpc_ctx_t **loc; 898 long hash = CPC_HASH_CTX(ctx); 899 900 mutex_enter(&kcpc_ctx_llock[hash]); 901 loc = &kcpc_ctx_list[hash]; 902 ASSERT(*loc != NULL); 903 while (*loc != ctx) 904 loc = &(*loc)->kc_next; 905 *loc = ctx->kc_next; 906 mutex_exit(&kcpc_ctx_llock[hash]); 907 908 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 909 cv_destroy(&ctx->kc_condv); 910 mutex_destroy(&ctx->kc_lock); 911 kmem_free(ctx, sizeof (*ctx)); 912 } 913 914 /* 915 * Generic interrupt handler used on hardware that generates 916 * overflow interrupts. 917 * 918 * Note: executed at high-level interrupt context! 919 */ 920 /*ARGSUSED*/ 921 kcpc_ctx_t * 922 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 923 { 924 kcpc_ctx_t *ctx; 925 kthread_t *t = curthread; 926 int i; 927 928 /* 929 * On both x86 and UltraSPARC, we may deliver the high-level 930 * interrupt in kernel mode, just after we've started to run an 931 * interrupt thread. (That's because the hardware helpfully 932 * delivers the overflow interrupt some random number of cycles 933 * after the instruction that caused the overflow by which time 934 * we're in some part of the kernel, not necessarily running on 935 * the right thread). 936 * 937 * Check for this case here -- find the pinned thread 938 * that was running when the interrupt went off. 939 */ 940 if (t->t_flag & T_INTR_THREAD) { 941 klwp_t *lwp; 942 943 atomic_inc_32(&kcpc_intrctx_count); 944 945 /* 946 * Note that t_lwp is always set to point at the underlying 947 * thread, thus this will work in the presence of nested 948 * interrupts. 949 */ 950 ctx = NULL; 951 if ((lwp = t->t_lwp) != NULL) { 952 t = lwptot(lwp); 953 ctx = t->t_cpc_ctx; 954 } 955 } else 956 ctx = t->t_cpc_ctx; 957 958 if (ctx == NULL) { 959 /* 960 * This can easily happen if we're using the counters in 961 * "shared" mode, for example, and an overflow interrupt 962 * occurs while we are running cpustat. In that case, the 963 * bound thread that has the context that belongs to this 964 * CPU is almost certainly sleeping (if it was running on 965 * the CPU we'd have found it above), and the actual 966 * interrupted thread has no knowledge of performance counters! 967 */ 968 ctx = curthread->t_cpu->cpu_cpc_ctx; 969 if (ctx != NULL) { 970 /* 971 * Return the bound context for this CPU to 972 * the interrupt handler so that it can synchronously 973 * sample the hardware counters and restart them. 974 */ 975 return (ctx); 976 } 977 978 /* 979 * As long as the overflow interrupt really is delivered early 980 * enough after trapping into the kernel to avoid switching 981 * threads, we must always be able to find the cpc context, 982 * or something went terribly wrong i.e. we ended up 983 * running a passivated interrupt thread, a kernel 984 * thread or we interrupted idle, all of which are Very Bad. 985 * 986 * We also could end up here owing to an incredibly unlikely 987 * race condition that exists on x86 based architectures when 988 * the cpc provider is in use; overflow interrupts are directed 989 * to the cpc provider if the 'dtrace_cpc_in_use' variable is 990 * set when we enter the handler. This variable is unset after 991 * overflow interrupts have been disabled on all CPUs and all 992 * contexts have been torn down. To stop interrupts, the cpc 993 * provider issues a xcall to the remote CPU before it tears 994 * down that CPUs context. As high priority xcalls, on an x86 995 * architecture, execute at a higher PIL than this handler, it 996 * is possible (though extremely unlikely) that the xcall could 997 * interrupt the overflow handler before the handler has 998 * checked the 'dtrace_cpc_in_use' variable, stop the counters, 999 * return to the cpc provider which could then rip down 1000 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs 1001 * overflow handler has had a chance to check the variable. In 1002 * that case, the handler would direct the overflow into this 1003 * code and no valid context will be found. The default behavior 1004 * when no valid context is found is now to shout a warning to 1005 * the console and bump the 'kcpc_nullctx_count' variable. 1006 */ 1007 if (kcpc_nullctx_panic) 1008 panic("null cpc context, thread %p", (void *)t); 1009 #ifdef DEBUG 1010 cmn_err(CE_NOTE, 1011 "null cpc context found in overflow handler!\n"); 1012 #endif 1013 atomic_inc_32(&kcpc_nullctx_count); 1014 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 1015 /* 1016 * Schedule an ast to sample the counters, which will 1017 * propagate any overflow into the virtualized performance 1018 * counter(s), and may deliver a signal. 1019 */ 1020 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1021 /* 1022 * If a counter has overflowed which was counting on behalf of 1023 * a request which specified CPC_OVF_NOTIFY_EMT, send the 1024 * process a signal. 1025 */ 1026 for (i = 0; i < cpc_ncounters; i++) { 1027 if (ctx->kc_pics[i].kp_req != NULL && 1028 bitmap & (1 << i) && 1029 ctx->kc_pics[i].kp_req->kr_flags & 1030 CPC_OVF_NOTIFY_EMT) { 1031 /* 1032 * A signal has been requested for this PIC, so 1033 * so freeze the context. The interrupt handler 1034 * has already stopped the counter hardware. 1035 */ 1036 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 1037 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 1038 KCPC_PIC_OVERFLOWED); 1039 } 1040 } 1041 aston(t); 1042 } else if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) { 1043 /* 1044 * Thread context is no longer valid, but here may be a valid 1045 * CPU context. 1046 */ 1047 return (curthread->t_cpu->cpu_cpc_ctx); 1048 } 1049 1050 return (NULL); 1051 } 1052 1053 /* 1054 * The current thread context had an overflow interrupt; we're 1055 * executing here in high-level interrupt context. 1056 */ 1057 /*ARGSUSED*/ 1058 uint_t 1059 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 1060 { 1061 kcpc_ctx_t *ctx; 1062 uint64_t bitmap; 1063 uint8_t *state; 1064 int save_spl; 1065 1066 if (pcbe_ops == NULL || 1067 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 1068 return (DDI_INTR_UNCLAIMED); 1069 1070 /* 1071 * Prevent any further interrupts. 1072 */ 1073 pcbe_ops->pcbe_allstop(); 1074 1075 if (dtrace_cpc_in_use) { 1076 state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state; 1077 1078 /* 1079 * Set the per-CPU state bit to indicate that we are currently 1080 * processing an interrupt if it is currently free. Drop the 1081 * interrupt if the state isn't free (i.e. a configuration 1082 * event is taking place). 1083 */ 1084 if (atomic_cas_8(state, DCPC_INTR_FREE, 1085 DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) { 1086 int i; 1087 kcpc_request_t req; 1088 1089 ASSERT(dtrace_cpc_fire != NULL); 1090 1091 (*dtrace_cpc_fire)(bitmap); 1092 1093 ctx = curthread->t_cpu->cpu_cpc_ctx; 1094 if (ctx == NULL) { 1095 #ifdef DEBUG 1096 cmn_err(CE_NOTE, "null cpc context in" 1097 "hardware overflow handler!\n"); 1098 #endif 1099 return (DDI_INTR_CLAIMED); 1100 } 1101 1102 /* Reset any counters that have overflowed */ 1103 for (i = 0; i < ctx->kc_set->ks_nreqs; i++) { 1104 req = ctx->kc_set->ks_req[i]; 1105 1106 if (bitmap & (1 << req.kr_picnum)) { 1107 pcbe_ops->pcbe_configure(req.kr_picnum, 1108 req.kr_event, req.kr_preset, 1109 req.kr_flags, req.kr_nattrs, 1110 req.kr_attr, &(req.kr_config), 1111 (void *)ctx); 1112 } 1113 } 1114 pcbe_ops->pcbe_program(ctx); 1115 1116 /* 1117 * We've finished processing the interrupt so set 1118 * the state back to free. 1119 */ 1120 cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state = 1121 DCPC_INTR_FREE; 1122 membar_producer(); 1123 } 1124 return (DDI_INTR_CLAIMED); 1125 } 1126 1127 /* 1128 * DTrace isn't involved so pass on accordingly. 1129 * 1130 * If the interrupt has occurred in the context of an lwp owning 1131 * the counters, then the handler posts an AST to the lwp to 1132 * trigger the actual sampling, and optionally deliver a signal or 1133 * restart the counters, on the way out of the kernel using 1134 * kcpc_hw_overflow_ast() (see below). 1135 * 1136 * On the other hand, if the handler returns the context to us 1137 * directly, then it means that there are no other threads in 1138 * the middle of updating it, no AST has been posted, and so we 1139 * should sample the counters here, and restart them with no 1140 * further fuss. 1141 * 1142 * The CPU's CPC context may disappear as a result of cross-call which 1143 * has higher PIL on x86, so protect the context by raising PIL to the 1144 * cross-call level. 1145 */ 1146 save_spl = spl_xcall(); 1147 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 1148 uint64_t curtick = KCPC_GET_TICK(); 1149 1150 ctx->kc_hrtime = gethrtime_waitfree(); 1151 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1152 ctx->kc_rawtick = curtick; 1153 pcbe_ops->pcbe_sample(ctx); 1154 pcbe_ops->pcbe_program(ctx); 1155 } 1156 splx(save_spl); 1157 1158 return (DDI_INTR_CLAIMED); 1159 } 1160 1161 /* 1162 * Called from trap() when processing the ast posted by the high-level 1163 * interrupt handler. 1164 */ 1165 int 1166 kcpc_overflow_ast() 1167 { 1168 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1169 int i; 1170 int found = 0; 1171 uint64_t curtick = KCPC_GET_TICK(); 1172 1173 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 1174 1175 /* 1176 * An overflow happened: sample the context to ensure that 1177 * the overflow is propagated into the upper bits of the 1178 * virtualized 64-bit counter(s). 1179 */ 1180 kpreempt_disable(); 1181 ctx->kc_hrtime = gethrtime_waitfree(); 1182 pcbe_ops->pcbe_sample(ctx); 1183 kpreempt_enable(); 1184 1185 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1186 1187 /* 1188 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 1189 * if that pic generated an overflow and if the request it was counting 1190 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 1191 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 1192 * found any overflowed pics, keep the context frozen and return true 1193 * (thus causing a signal to be sent). 1194 */ 1195 for (i = 0; i < cpc_ncounters; i++) { 1196 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1197 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1198 ~KCPC_PIC_OVERFLOWED); 1199 found = 1; 1200 } 1201 } 1202 if (found) 1203 return (1); 1204 1205 /* 1206 * Otherwise, re-enable the counters and continue life as before. 1207 */ 1208 kpreempt_disable(); 1209 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 1210 pcbe_ops->pcbe_program(ctx); 1211 kpreempt_enable(); 1212 return (0); 1213 } 1214 1215 /* 1216 * Called when switching away from current thread. 1217 */ 1218 static void 1219 kcpc_save(void *arg) 1220 { 1221 kcpc_ctx_t *ctx = arg; 1222 int err; 1223 int save_spl; 1224 1225 kpreempt_disable(); 1226 save_spl = spl_xcall(); 1227 1228 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1229 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) { 1230 splx(save_spl); 1231 kpreempt_enable(); 1232 return; 1233 } 1234 /* 1235 * This context has been invalidated but the counters have not 1236 * been stopped. Stop them here and mark the context stopped. 1237 */ 1238 kcpc_unprogram(ctx, B_TRUE); 1239 splx(save_spl); 1240 kpreempt_enable(); 1241 return; 1242 } 1243 1244 pcbe_ops->pcbe_allstop(); 1245 if (ctx->kc_flags & KCPC_CTX_FREEZE) { 1246 splx(save_spl); 1247 kpreempt_enable(); 1248 return; 1249 } 1250 1251 /* 1252 * Need to sample for all reqs into each req's current mpic. 1253 */ 1254 ctx->kc_hrtime = gethrtime_waitfree(); 1255 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1256 pcbe_ops->pcbe_sample(ctx); 1257 1258 /* 1259 * Program counter for measuring capacity and utilization since user 1260 * thread isn't using counter anymore 1261 */ 1262 ASSERT(ctx->kc_cpuid == -1); 1263 cu_cpc_program(CPU, &err); 1264 splx(save_spl); 1265 kpreempt_enable(); 1266 } 1267 1268 static void 1269 kcpc_restore(void *arg) 1270 { 1271 kcpc_ctx_t *ctx = arg; 1272 int save_spl; 1273 1274 mutex_enter(&ctx->kc_lock); 1275 1276 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1277 KCPC_CTX_INVALID) { 1278 /* 1279 * The context is invalidated but has not been marked stopped. 1280 * We mark it as such here because we will not start the 1281 * counters during this context switch. 1282 */ 1283 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED); 1284 } 1285 1286 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1287 mutex_exit(&ctx->kc_lock); 1288 return; 1289 } 1290 1291 /* 1292 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1293 * ctx & set related memory objects being freed without us knowing. 1294 * This can happen if an agent thread is executing a kcpc_unbind(), 1295 * with this thread as the target, whilst we're concurrently doing a 1296 * restorectx() during, for example, a proc_exit(). Effectively, by 1297 * doing this, we're asking kcpc_free() to cv_wait() until 1298 * kcpc_restore() has completed. 1299 */ 1300 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_RESTORE); 1301 mutex_exit(&ctx->kc_lock); 1302 1303 /* 1304 * While programming the hardware, the counters should be stopped. We 1305 * don't do an explicit pcbe_allstop() here because they should have 1306 * been stopped already by the last consumer. 1307 */ 1308 kpreempt_disable(); 1309 save_spl = spl_xcall(); 1310 kcpc_program(ctx, B_TRUE, B_TRUE); 1311 splx(save_spl); 1312 kpreempt_enable(); 1313 1314 /* 1315 * Wake the agent thread if it's waiting in kcpc_free(). 1316 */ 1317 mutex_enter(&ctx->kc_lock); 1318 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_RESTORE); 1319 cv_signal(&ctx->kc_condv); 1320 mutex_exit(&ctx->kc_lock); 1321 } 1322 1323 /* 1324 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1325 * following context operators to the idle thread on each CPU. They stop the 1326 * counters when the idle thread is switched on, and they start them again when 1327 * it is switched off. 1328 */ 1329 /*ARGSUSED*/ 1330 static void 1331 kcpc_idle_save(void *arg) 1332 { 1333 struct cpu *cp = arg; 1334 1335 /* 1336 * The idle thread shouldn't be run anywhere else. 1337 */ 1338 ASSERT(CPU == cp); 1339 1340 /* 1341 * We must hold the CPU's context lock to ensure the context isn't freed 1342 * while we're looking at it. 1343 */ 1344 mutex_enter(&cp->cpu_cpc_ctxlock); 1345 1346 if ((cp->cpu_cpc_ctx == NULL) || 1347 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1348 mutex_exit(&cp->cpu_cpc_ctxlock); 1349 return; 1350 } 1351 1352 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1353 mutex_exit(&cp->cpu_cpc_ctxlock); 1354 } 1355 1356 static void 1357 kcpc_idle_restore(void *arg) 1358 { 1359 struct cpu *cp = arg; 1360 1361 /* 1362 * The idle thread shouldn't be run anywhere else. 1363 */ 1364 ASSERT(CPU == cp); 1365 1366 /* 1367 * We must hold the CPU's context lock to ensure the context isn't freed 1368 * while we're looking at it. 1369 */ 1370 mutex_enter(&cp->cpu_cpc_ctxlock); 1371 1372 if ((cp->cpu_cpc_ctx == NULL) || 1373 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1374 mutex_exit(&cp->cpu_cpc_ctxlock); 1375 return; 1376 } 1377 1378 pcbe_ops->pcbe_allstop(); 1379 mutex_exit(&cp->cpu_cpc_ctxlock); 1380 } 1381 1382 static const struct ctxop_template kcpc_idle_ctxop_tpl = { 1383 .ct_rev = CTXOP_TPL_REV, 1384 .ct_save = kcpc_idle_save, 1385 .ct_restore = kcpc_idle_restore, 1386 }; 1387 1388 void 1389 kcpc_idle_ctxop_install(kthread_t *t, struct cpu *cp) 1390 { 1391 ctxop_install(t, &kcpc_idle_ctxop_tpl, cp); 1392 } 1393 1394 /*ARGSUSED*/ 1395 static void 1396 kcpc_lwp_create(void *parent, void *child) 1397 { 1398 kthread_t *t = parent, *ct = child; 1399 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1400 int i; 1401 1402 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1403 return; 1404 1405 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1406 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1407 rw_exit(&kcpc_cpuctx_lock); 1408 return; 1409 } 1410 cctx = kcpc_ctx_alloc(KM_SLEEP); 1411 kcpc_ctx_clone(ctx, cctx); 1412 rw_exit(&kcpc_cpuctx_lock); 1413 1414 /* 1415 * Copy the parent context's kc_flags field, but don't overwrite 1416 * the child's in case it was modified during kcpc_ctx_clone. 1417 */ 1418 KCPC_CTX_FLAG_SET(cctx, ctx->kc_flags); 1419 cctx->kc_thread = ct; 1420 cctx->kc_cpuid = -1; 1421 ct->t_cpc_set = cctx->kc_set; 1422 ct->t_cpc_ctx = cctx; 1423 1424 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1425 kcpc_set_t *ks = cctx->kc_set; 1426 /* 1427 * Our contract with the user requires us to immediately send an 1428 * overflow signal to all children if we have the LWPINHERIT 1429 * and SIGOVF flags set. In addition, all counters should be 1430 * set to UINT64_MAX, and their pic's overflow flag turned on 1431 * so that our trap() processing knows to send a signal. 1432 */ 1433 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 1434 for (i = 0; i < ks->ks_nreqs; i++) { 1435 kcpc_request_t *kr = &ks->ks_req[i]; 1436 1437 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1438 *(kr->kr_data) = UINT64_MAX; 1439 atomic_or_uint(&kr->kr_picp->kp_flags, 1440 KCPC_PIC_OVERFLOWED); 1441 } 1442 } 1443 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1444 aston(ct); 1445 } 1446 1447 ctxop_install(ct, &kcpc_ctxop_tpl, cctx); 1448 } 1449 1450 /* 1451 * Counter Stoppage Theory 1452 * 1453 * The counters may need to be stopped properly at the following occasions: 1454 * 1455 * 1) An LWP exits. 1456 * 2) A thread exits. 1457 * 3) An LWP performs an exec(). 1458 * 4) A bound set is unbound. 1459 * 1460 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1461 * to be freed as well. 1462 * 1463 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1464 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1465 * context. 1466 * 1467 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1468 * 1469 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1470 * been called from exec. It stops the counters _and_ frees the context. 1471 * 1472 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1473 * 1474 * CPU-bound counters are always stopped via kcpc_unbind(). 1475 */ 1476 1477 /* 1478 * We're being called to delete the context; we ensure that all associated data 1479 * structures are freed, and that the hardware is passivated if this is an exec. 1480 */ 1481 1482 /*ARGSUSED*/ 1483 void 1484 kcpc_free(void *arg, int isexec) 1485 { 1486 kcpc_ctx_t *ctx = arg; 1487 int i; 1488 kcpc_set_t *set = ctx->kc_set; 1489 1490 ASSERT(set != NULL); 1491 1492 /* 1493 * Wait for kcpc_restore() to finish before we tear things down. 1494 */ 1495 mutex_enter(&ctx->kc_lock); 1496 while (ctx->kc_flags & KCPC_CTX_RESTORE) 1497 cv_wait(&ctx->kc_condv, &ctx->kc_lock); 1498 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1499 mutex_exit(&ctx->kc_lock); 1500 1501 if (isexec) { 1502 /* 1503 * This thread is execing, and after the exec it should not have 1504 * any performance counter context. Stop the counters properly 1505 * here so the system isn't surprised by an overflow interrupt 1506 * later. 1507 */ 1508 if (ctx->kc_cpuid != -1) { 1509 cpu_t *cp; 1510 /* 1511 * CPU-bound context; stop the appropriate CPU's ctrs. 1512 * Hold cpu_lock while examining the CPU to ensure it 1513 * doesn't go away. 1514 */ 1515 mutex_enter(&cpu_lock); 1516 cp = cpu_get(ctx->kc_cpuid); 1517 /* 1518 * The CPU could have been DR'd out, so only stop the 1519 * CPU and clear its context pointer if the CPU still 1520 * exists. 1521 */ 1522 if (cp != NULL) { 1523 mutex_enter(&cp->cpu_cpc_ctxlock); 1524 kcpc_stop_hw(ctx); 1525 mutex_exit(&cp->cpu_cpc_ctxlock); 1526 } 1527 mutex_exit(&cpu_lock); 1528 ASSERT(curthread->t_cpc_ctx == NULL); 1529 } else { 1530 int save_spl; 1531 1532 /* 1533 * Thread-bound context; stop _this_ CPU's counters. 1534 */ 1535 kpreempt_disable(); 1536 save_spl = spl_xcall(); 1537 kcpc_unprogram(ctx, B_TRUE); 1538 curthread->t_cpc_ctx = NULL; 1539 splx(save_spl); 1540 kpreempt_enable(); 1541 } 1542 1543 /* 1544 * Since we are being called from an exec and we know that 1545 * exec is not permitted via the agent thread, we should clean 1546 * up this thread's CPC state completely, and not leave dangling 1547 * CPC pointers behind. 1548 */ 1549 ASSERT(ctx->kc_thread == curthread); 1550 curthread->t_cpc_set = NULL; 1551 } 1552 1553 /* 1554 * Walk through each request in this context's set and free the PCBE's 1555 * configuration if it exists. 1556 */ 1557 for (i = 0; i < set->ks_nreqs; i++) { 1558 if (set->ks_req[i].kr_config != NULL) 1559 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1560 } 1561 1562 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1563 kcpc_ctx_free(ctx); 1564 kcpc_free_set(set); 1565 } 1566 1567 void 1568 kcpc_free_cpu(kcpc_ctx_t *ctx) 1569 { 1570 kcpc_free(ctx, 0); 1571 } 1572 1573 /* 1574 * Free the memory associated with a request set. 1575 */ 1576 void 1577 kcpc_free_set(kcpc_set_t *set) 1578 { 1579 int i; 1580 kcpc_request_t *req; 1581 1582 ASSERT(set->ks_req != NULL); 1583 1584 for (i = 0; i < set->ks_nreqs; i++) { 1585 req = &set->ks_req[i]; 1586 1587 if (req->kr_nattrs != 0) { 1588 kmem_free(req->kr_attr, 1589 req->kr_nattrs * sizeof (kcpc_attr_t)); 1590 } 1591 } 1592 1593 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1594 cv_destroy(&set->ks_condv); 1595 mutex_destroy(&set->ks_lock); 1596 kmem_free(set, sizeof (kcpc_set_t)); 1597 } 1598 1599 /* 1600 * Grab every existing context and mark it as invalid. 1601 */ 1602 void 1603 kcpc_invalidate_all(void) 1604 { 1605 kcpc_ctx_t *ctx; 1606 long hash; 1607 1608 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1609 mutex_enter(&kcpc_ctx_llock[hash]); 1610 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1611 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1612 mutex_exit(&kcpc_ctx_llock[hash]); 1613 } 1614 } 1615 1616 /* 1617 * Interface for PCBEs to signal that an existing configuration has suddenly 1618 * become invalid. 1619 */ 1620 void 1621 kcpc_invalidate_config(void *token) 1622 { 1623 kcpc_ctx_t *ctx = token; 1624 1625 ASSERT(ctx != NULL); 1626 1627 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1628 } 1629 1630 /* 1631 * Called from lwp_exit() and thread_exit() 1632 */ 1633 void 1634 kcpc_passivate(void) 1635 { 1636 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1637 kcpc_set_t *set = curthread->t_cpc_set; 1638 int save_spl; 1639 1640 if (set == NULL) 1641 return; 1642 1643 if (ctx == NULL) { 1644 /* 1645 * This thread has a set but no context; it must be a CPU-bound 1646 * set. The hardware will be stopped via kcpc_unbind() when the 1647 * process exits and closes its file descriptors with 1648 * kcpc_close(). Our only job here is to clean up this thread's 1649 * state; the set will be freed with the unbind(). 1650 */ 1651 (void) kcpc_unbind(set); 1652 /* 1653 * Unbinding a set belonging to the current thread should clear 1654 * its set pointer. 1655 */ 1656 ASSERT(curthread->t_cpc_set == NULL); 1657 return; 1658 } 1659 1660 kpreempt_disable(); 1661 save_spl = spl_xcall(); 1662 curthread->t_cpc_set = NULL; 1663 1664 /* 1665 * This thread/LWP is exiting but context switches will continue to 1666 * happen for a bit as the exit proceeds. Kernel preemption must be 1667 * disabled here to prevent a race between checking or setting the 1668 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1669 * a context switch. 1670 */ 1671 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1672 kcpc_unprogram(ctx, B_TRUE); 1673 KCPC_CTX_FLAG_SET(ctx, 1674 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1675 } 1676 1677 /* 1678 * We're cleaning up after this thread; ensure there are no dangling 1679 * CPC pointers left behind. The context and set will be freed by 1680 * freectx(). 1681 */ 1682 curthread->t_cpc_ctx = NULL; 1683 1684 splx(save_spl); 1685 kpreempt_enable(); 1686 } 1687 1688 /* 1689 * Assign the requests in the given set to the PICs in the context. 1690 * Returns 0 if successful, -1 on failure. 1691 */ 1692 /*ARGSUSED*/ 1693 int 1694 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1695 { 1696 int i; 1697 int *picnum_save; 1698 1699 ASSERT(set->ks_nreqs <= cpc_ncounters); 1700 1701 /* 1702 * Provide kcpc_tryassign() with scratch space to avoid doing an 1703 * alloc/free with every invocation. 1704 */ 1705 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1706 /* 1707 * kcpc_tryassign() blindly walks through each request in the set, 1708 * seeing if a counter can count its event. If yes, it assigns that 1709 * counter. However, that counter may have been the only capable counter 1710 * for _another_ request's event. The solution is to try every possible 1711 * request first. Note that this does not cover all solutions, as 1712 * that would require all unique orderings of requests, an n^n operation 1713 * which would be unacceptable for architectures with many counters. 1714 */ 1715 for (i = 0; i < set->ks_nreqs; i++) 1716 if (kcpc_tryassign(set, i, picnum_save) == 0) 1717 break; 1718 1719 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1720 if (i == set->ks_nreqs) 1721 return (-1); 1722 return (0); 1723 } 1724 1725 static int 1726 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1727 { 1728 int i; 1729 int j; 1730 uint64_t bitmap = 0, resmap = 0; 1731 uint64_t ctrmap; 1732 1733 /* 1734 * We are attempting to assign the reqs to pics, but we may fail. If we 1735 * fail, we need to restore the state of the requests to what it was 1736 * when we found it, as some reqs may have been explicitly assigned to 1737 * a specific PIC beforehand. We do this by snapshotting the assignments 1738 * now and restoring from it later if we fail. 1739 * 1740 * Also we note here which counters have already been claimed by 1741 * requests with explicit counter assignments. 1742 */ 1743 for (i = 0; i < set->ks_nreqs; i++) { 1744 scratch[i] = set->ks_req[i].kr_picnum; 1745 if (set->ks_req[i].kr_picnum != -1) 1746 resmap |= (1 << set->ks_req[i].kr_picnum); 1747 } 1748 1749 /* 1750 * Walk through requests assigning them to the first PIC that is 1751 * capable. 1752 */ 1753 i = starting_req; 1754 do { 1755 if (set->ks_req[i].kr_picnum != -1) { 1756 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1757 bitmap |= (1 << set->ks_req[i].kr_picnum); 1758 if (++i == set->ks_nreqs) 1759 i = 0; 1760 continue; 1761 } 1762 1763 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1764 for (j = 0; j < cpc_ncounters; j++) { 1765 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1766 (resmap & (1 << j)) == 0) { 1767 /* 1768 * We can assign this counter because: 1769 * 1770 * 1. It can count the event (ctrmap) 1771 * 2. It hasn't been assigned yet (bitmap) 1772 * 3. It wasn't reserved by a request (resmap) 1773 */ 1774 bitmap |= (1 << j); 1775 break; 1776 } 1777 } 1778 if (j == cpc_ncounters) { 1779 for (i = 0; i < set->ks_nreqs; i++) 1780 set->ks_req[i].kr_picnum = scratch[i]; 1781 return (-1); 1782 } 1783 set->ks_req[i].kr_picnum = j; 1784 1785 if (++i == set->ks_nreqs) 1786 i = 0; 1787 } while (i != starting_req); 1788 1789 return (0); 1790 } 1791 1792 kcpc_set_t * 1793 kcpc_dup_set(kcpc_set_t *set) 1794 { 1795 kcpc_set_t *new; 1796 int i; 1797 int j; 1798 1799 new = kmem_zalloc(sizeof (*new), KM_SLEEP); 1800 new->ks_state &= ~KCPC_SET_BOUND; 1801 new->ks_flags = set->ks_flags; 1802 new->ks_nreqs = set->ks_nreqs; 1803 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1804 KM_SLEEP); 1805 new->ks_data = NULL; 1806 new->ks_ctx = NULL; 1807 1808 for (i = 0; i < new->ks_nreqs; i++) { 1809 new->ks_req[i].kr_config = NULL; 1810 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1811 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1812 new->ks_req[i].kr_picp = NULL; 1813 new->ks_req[i].kr_data = NULL; 1814 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1815 CPC_MAX_EVENT_LEN); 1816 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1817 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1818 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1819 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1820 sizeof (kcpc_attr_t), KM_SLEEP); 1821 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1822 new->ks_req[i].kr_attr[j].ka_val = 1823 set->ks_req[i].kr_attr[j].ka_val; 1824 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1825 set->ks_req[i].kr_attr[j].ka_name, 1826 CPC_MAX_ATTR_LEN); 1827 } 1828 } 1829 1830 return (new); 1831 } 1832 1833 int 1834 kcpc_allow_nonpriv(void *token) 1835 { 1836 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1837 } 1838 1839 void 1840 kcpc_invalidate(kthread_t *t) 1841 { 1842 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1843 1844 if (ctx != NULL) 1845 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1846 } 1847 1848 /* 1849 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1850 * are used to construct PCBE names, starting with the most specific, 1851 * "pcbe.first.second.third.fourth" and ending with the least specific, 1852 * "pcbe.first". 1853 * 1854 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1855 */ 1856 int 1857 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1858 { 1859 uint_t s[3]; 1860 1861 s[0] = first; 1862 s[1] = second; 1863 s[2] = third; 1864 1865 return (modload_qualified("pcbe", 1866 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0); 1867 } 1868 1869 /* 1870 * Create one or more CPC context for given CPU with specified counter event 1871 * requests 1872 * 1873 * If number of requested counter events is less than or equal number of 1874 * hardware counters on a CPU and can all be assigned to the counters on a CPU 1875 * at the same time, then make one CPC context. 1876 * 1877 * Otherwise, multiple CPC contexts are created to allow multiplexing more 1878 * counter events than existing counters onto the counters by iterating through 1879 * all of the CPC contexts, programming the counters with each CPC context one 1880 * at a time and measuring the resulting counter values. Each of the resulting 1881 * CPC contexts contains some number of requested counter events less than or 1882 * equal the number of counters on a CPU depending on whether all the counter 1883 * events can be programmed on all the counters at the same time or not. 1884 * 1885 * Flags to kmem_{,z}alloc() are passed in as an argument to allow specifying 1886 * whether memory allocation should be non-blocking or not. The code will try 1887 * to allocate *whole* CPC contexts if possible. If there is any memory 1888 * allocation failure during the allocations needed for a given CPC context, it 1889 * will skip allocating that CPC context because it cannot allocate the whole 1890 * thing. Thus, the only time that it will end up allocating none (ie. no CPC 1891 * contexts whatsoever) is when it cannot even allocate *one* whole CPC context 1892 * without a memory allocation failure occurring. 1893 */ 1894 int 1895 kcpc_cpu_ctx_create(cpu_t *cp, kcpc_request_list_t *req_list, int kmem_flags, 1896 kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz) 1897 { 1898 kcpc_ctx_t **ctx_ptrs; 1899 int nctx; 1900 int nctx_ptrs; 1901 int nreqs; 1902 kcpc_request_t *reqs; 1903 1904 if (cp == NULL || ctx_ptr_array == NULL || ctx_ptr_array_sz == NULL || 1905 req_list == NULL || req_list->krl_cnt < 1) 1906 return (-1); 1907 1908 /* 1909 * Allocate number of sets assuming that each set contains one and only 1910 * one counter event request for each counter on a CPU 1911 */ 1912 nreqs = req_list->krl_cnt; 1913 nctx_ptrs = (nreqs + cpc_ncounters - 1) / cpc_ncounters; 1914 ctx_ptrs = kmem_zalloc(nctx_ptrs * sizeof (kcpc_ctx_t *), kmem_flags); 1915 if (ctx_ptrs == NULL) 1916 return (-2); 1917 1918 /* 1919 * Fill in sets of requests 1920 */ 1921 nctx = 0; 1922 reqs = req_list->krl_list; 1923 while (nreqs > 0) { 1924 kcpc_ctx_t *ctx; 1925 kcpc_set_t *set; 1926 int subcode; 1927 1928 /* 1929 * Allocate CPC context and set for requested counter events 1930 */ 1931 ctx = kcpc_ctx_alloc(kmem_flags); 1932 set = kcpc_set_create(reqs, nreqs, 0, kmem_flags); 1933 if (set == NULL) { 1934 kcpc_ctx_free(ctx); 1935 break; 1936 } 1937 1938 /* 1939 * Determine assignment of requested counter events to specific 1940 * counters 1941 */ 1942 if (kcpc_assign_reqs(set, ctx) != 0) { 1943 /* 1944 * May not be able to assign requested counter events 1945 * to all counters since all counters may not be able 1946 * to do all events, so only do one counter event in 1947 * set of counter requests when this happens since at 1948 * least one of the counters must be able to do the 1949 * event. 1950 */ 1951 kcpc_free_set(set); 1952 set = kcpc_set_create(reqs, 1, 0, kmem_flags); 1953 if (set == NULL) { 1954 kcpc_ctx_free(ctx); 1955 break; 1956 } 1957 if (kcpc_assign_reqs(set, ctx) != 0) { 1958 #ifdef DEBUG 1959 cmn_err(CE_NOTE, "!kcpc_cpu_ctx_create: can't " 1960 "assign counter event %s!\n", 1961 set->ks_req->kr_event); 1962 #endif 1963 kcpc_free_set(set); 1964 kcpc_ctx_free(ctx); 1965 reqs++; 1966 nreqs--; 1967 continue; 1968 } 1969 } 1970 1971 /* 1972 * Allocate memory needed to hold requested counter event data 1973 */ 1974 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), 1975 kmem_flags); 1976 if (set->ks_data == NULL) { 1977 kcpc_free_set(set); 1978 kcpc_ctx_free(ctx); 1979 break; 1980 } 1981 1982 /* 1983 * Configure requested counter events 1984 */ 1985 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) { 1986 #ifdef DEBUG 1987 cmn_err(CE_NOTE, 1988 "!kcpc_cpu_ctx_create: can't configure " 1989 "set of counter event requests!\n"); 1990 #endif 1991 reqs += set->ks_nreqs; 1992 nreqs -= set->ks_nreqs; 1993 kmem_free(set->ks_data, 1994 set->ks_nreqs * sizeof (uint64_t)); 1995 kcpc_free_set(set); 1996 kcpc_ctx_free(ctx); 1997 continue; 1998 } 1999 2000 /* 2001 * Point set of counter event requests at this context and fill 2002 * in CPC context 2003 */ 2004 set->ks_ctx = ctx; 2005 ctx->kc_set = set; 2006 ctx->kc_cpuid = cp->cpu_id; 2007 ctx->kc_thread = curthread; 2008 2009 ctx_ptrs[nctx] = ctx; 2010 2011 /* 2012 * Update requests and how many are left to be assigned to sets 2013 */ 2014 reqs += set->ks_nreqs; 2015 nreqs -= set->ks_nreqs; 2016 2017 /* 2018 * Increment number of CPC contexts and allocate bigger array 2019 * for context pointers as needed 2020 */ 2021 nctx++; 2022 if (nctx >= nctx_ptrs) { 2023 kcpc_ctx_t **new; 2024 int new_cnt; 2025 2026 /* 2027 * Allocate more CPC contexts based on how many 2028 * contexts allocated so far and how many counter 2029 * requests left to assign 2030 */ 2031 new_cnt = nctx_ptrs + 2032 ((nreqs + cpc_ncounters - 1) / cpc_ncounters); 2033 new = kmem_zalloc(new_cnt * sizeof (kcpc_ctx_t *), 2034 kmem_flags); 2035 if (new == NULL) 2036 break; 2037 2038 /* 2039 * Copy contents of old sets into new ones 2040 */ 2041 bcopy(ctx_ptrs, new, 2042 nctx_ptrs * sizeof (kcpc_ctx_t *)); 2043 2044 /* 2045 * Free old array of context pointers and use newly 2046 * allocated one instead now 2047 */ 2048 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *)); 2049 ctx_ptrs = new; 2050 nctx_ptrs = new_cnt; 2051 } 2052 } 2053 2054 /* 2055 * Return NULL if no CPC contexts filled in 2056 */ 2057 if (nctx == 0) { 2058 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *)); 2059 *ctx_ptr_array = NULL; 2060 *ctx_ptr_array_sz = 0; 2061 return (-2); 2062 } 2063 2064 *ctx_ptr_array = ctx_ptrs; 2065 *ctx_ptr_array_sz = nctx_ptrs * sizeof (kcpc_ctx_t *); 2066 return (nctx); 2067 } 2068 2069 /* 2070 * Return whether PCBE supports given counter event 2071 */ 2072 boolean_t 2073 kcpc_event_supported(char *event) 2074 { 2075 if (pcbe_ops == NULL || pcbe_ops->pcbe_event_coverage(event) == 0) 2076 return (B_FALSE); 2077 2078 return (B_TRUE); 2079 } 2080 2081 /* 2082 * Program counters on current CPU with given CPC context 2083 * 2084 * If kernel is interposing on counters to measure hardware capacity and 2085 * utilization, then unprogram counters for kernel *before* programming them 2086 * with specified CPC context. 2087 * 2088 * kcpc_{program,unprogram}() may be called either directly by a thread running 2089 * on the target CPU or from a cross-call from another CPU. To protect 2090 * programming and unprogramming from being interrupted by cross-calls, callers 2091 * who execute kcpc_{program,unprogram} should raise PIL to the level used by 2092 * cross-calls. 2093 */ 2094 void 2095 kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread, boolean_t cu_interpose) 2096 { 2097 int error; 2098 2099 ASSERT(IS_HIPIL()); 2100 2101 /* 2102 * CPC context shouldn't be NULL, its CPU field should specify current 2103 * CPU or be -1 to specify any CPU when the context is bound to a 2104 * thread, and preemption should be disabled 2105 */ 2106 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id || 2107 ctx->kc_cpuid == -1) && curthread->t_preempt > 0); 2108 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id && 2109 ctx->kc_cpuid != -1) || curthread->t_preempt < 1) 2110 return; 2111 2112 /* 2113 * Unprogram counters for kernel measuring hardware capacity and 2114 * utilization 2115 */ 2116 if (cu_interpose == B_TRUE) { 2117 cu_cpc_unprogram(CPU, &error); 2118 } else { 2119 kcpc_set_t *set = ctx->kc_set; 2120 int i; 2121 2122 ASSERT(set != NULL); 2123 2124 /* 2125 * Since cu_interpose is false, we are programming CU context. 2126 * In general, PCBE can continue from the state saved in the 2127 * set, but it is not very reliable, so we start again from the 2128 * preset value. 2129 */ 2130 for (i = 0; i < set->ks_nreqs; i++) { 2131 /* 2132 * Reset the virtual counter value to the preset value. 2133 */ 2134 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 2135 2136 /* 2137 * Reset PCBE to the preset value. 2138 */ 2139 pcbe_ops->pcbe_configure(0, NULL, 2140 set->ks_req[i].kr_preset, 2141 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 2142 } 2143 } 2144 2145 /* 2146 * Program counters with specified CPC context 2147 */ 2148 ctx->kc_rawtick = KCPC_GET_TICK(); 2149 pcbe_ops->pcbe_program(ctx); 2150 2151 /* 2152 * Denote that counters programmed for thread or CPU CPC context 2153 * differently 2154 */ 2155 if (for_thread == B_TRUE) 2156 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 2157 else 2158 CPU->cpu_cpc_ctx = ctx; 2159 } 2160 2161 /* 2162 * Unprogram counters with given CPC context on current CPU 2163 * 2164 * If kernel is interposing on counters to measure hardware capacity and 2165 * utilization, then program counters for the kernel capacity and utilization 2166 * *after* unprogramming them for given CPC context. 2167 * 2168 * See the comment for kcpc_program regarding the synchronization with 2169 * cross-calls. 2170 */ 2171 void 2172 kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose) 2173 { 2174 int error; 2175 2176 ASSERT(IS_HIPIL()); 2177 2178 /* 2179 * CPC context shouldn't be NULL, its CPU field should specify current 2180 * CPU or be -1 to specify any CPU when the context is bound to a 2181 * thread, and preemption should be disabled 2182 */ 2183 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id || 2184 ctx->kc_cpuid == -1) && curthread->t_preempt > 0); 2185 2186 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id && 2187 ctx->kc_cpuid != -1) || curthread->t_preempt < 1 || 2188 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) != 0) { 2189 return; 2190 } 2191 2192 /* 2193 * Specified CPC context to be unprogrammed should be bound to current 2194 * CPU or thread 2195 */ 2196 ASSERT(CPU->cpu_cpc_ctx == ctx || curthread->t_cpc_ctx == ctx); 2197 2198 /* 2199 * Stop counters 2200 */ 2201 pcbe_ops->pcbe_allstop(); 2202 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED); 2203 2204 /* 2205 * Allow kernel to interpose on counters and program them for its own 2206 * use to measure hardware capacity and utilization if cu_interpose 2207 * argument is true 2208 */ 2209 if (cu_interpose == B_TRUE) 2210 cu_cpc_program(CPU, &error); 2211 } 2212 2213 /* 2214 * Read CPU Performance Counter (CPC) on current CPU and call specified update 2215 * routine with data for each counter event currently programmed on CPU 2216 */ 2217 int 2218 kcpc_read(kcpc_update_func_t update_func) 2219 { 2220 kcpc_ctx_t *ctx; 2221 int i; 2222 kcpc_request_t *req; 2223 int retval; 2224 kcpc_set_t *set; 2225 2226 ASSERT(IS_HIPIL()); 2227 2228 /* 2229 * Can't grab locks or block because may be called inside dispatcher 2230 */ 2231 kpreempt_disable(); 2232 2233 ctx = CPU->cpu_cpc_ctx; 2234 if (ctx == NULL) { 2235 kpreempt_enable(); 2236 return (0); 2237 } 2238 2239 /* 2240 * Read counter data from current CPU 2241 */ 2242 pcbe_ops->pcbe_sample(ctx); 2243 2244 set = ctx->kc_set; 2245 if (set == NULL || set->ks_req == NULL) { 2246 kpreempt_enable(); 2247 return (0); 2248 } 2249 2250 /* 2251 * Call update function with preset pointer and data for each CPC event 2252 * request currently programmed on current CPU 2253 */ 2254 req = set->ks_req; 2255 retval = 0; 2256 for (i = 0; i < set->ks_nreqs; i++) { 2257 int ret; 2258 2259 if (req[i].kr_data == NULL) 2260 break; 2261 2262 ret = update_func(req[i].kr_ptr, *req[i].kr_data); 2263 if (ret < 0) 2264 retval = ret; 2265 } 2266 2267 kpreempt_enable(); 2268 2269 return (retval); 2270 } 2271 2272 /* 2273 * Initialize list of counter event requests 2274 */ 2275 kcpc_request_list_t * 2276 kcpc_reqs_init(int nreqs, int kmem_flags) 2277 { 2278 kcpc_request_list_t *req_list; 2279 kcpc_request_t *reqs; 2280 2281 if (nreqs < 1) 2282 return (NULL); 2283 2284 req_list = kmem_zalloc(sizeof (kcpc_request_list_t), kmem_flags); 2285 if (req_list == NULL) 2286 return (NULL); 2287 2288 reqs = kmem_zalloc(nreqs * sizeof (kcpc_request_t), kmem_flags); 2289 if (reqs == NULL) { 2290 kmem_free(req_list, sizeof (kcpc_request_list_t)); 2291 return (NULL); 2292 } 2293 2294 req_list->krl_list = reqs; 2295 req_list->krl_cnt = 0; 2296 req_list->krl_max = nreqs; 2297 return (req_list); 2298 } 2299 2300 2301 /* 2302 * Add counter event request to given list of counter event requests 2303 */ 2304 int 2305 kcpc_reqs_add(kcpc_request_list_t *req_list, char *event, uint64_t preset, 2306 uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr, int kmem_flags) 2307 { 2308 kcpc_request_t *req; 2309 2310 if (req_list == NULL || req_list->krl_list == NULL) 2311 return (-1); 2312 2313 ASSERT(req_list->krl_max != 0); 2314 2315 /* 2316 * Allocate more space (if needed) 2317 */ 2318 if (req_list->krl_cnt > req_list->krl_max) { 2319 kcpc_request_t *new; 2320 kcpc_request_t *old; 2321 2322 old = req_list->krl_list; 2323 new = kmem_zalloc((req_list->krl_max + 2324 cpc_ncounters) * sizeof (kcpc_request_t), kmem_flags); 2325 if (new == NULL) 2326 return (-2); 2327 2328 req_list->krl_list = new; 2329 bcopy(old, req_list->krl_list, 2330 req_list->krl_cnt * sizeof (kcpc_request_t)); 2331 kmem_free(old, req_list->krl_max * sizeof (kcpc_request_t)); 2332 req_list->krl_cnt = 0; 2333 req_list->krl_max += cpc_ncounters; 2334 } 2335 2336 /* 2337 * Fill in request as much as possible now, but some fields will need 2338 * to be set when request is assigned to a set. 2339 */ 2340 req = &req_list->krl_list[req_list->krl_cnt]; 2341 req->kr_config = NULL; 2342 req->kr_picnum = -1; /* have CPC pick this */ 2343 req->kr_index = -1; /* set when assigning request to set */ 2344 req->kr_data = NULL; /* set when configuring request */ 2345 (void) strcpy(req->kr_event, event); 2346 req->kr_preset = preset; 2347 req->kr_flags = flags; 2348 req->kr_nattrs = nattrs; 2349 req->kr_attr = attr; 2350 /* 2351 * Keep pointer given by caller to give to update function when this 2352 * counter event is sampled/read 2353 */ 2354 req->kr_ptr = ptr; 2355 2356 req_list->krl_cnt++; 2357 2358 return (0); 2359 } 2360 2361 /* 2362 * Reset list of CPC event requests so its space can be used for another set 2363 * of requests 2364 */ 2365 int 2366 kcpc_reqs_reset(kcpc_request_list_t *req_list) 2367 { 2368 /* 2369 * Return when pointer to request list structure or request is NULL or 2370 * when max requests is less than or equal to 0 2371 */ 2372 if (req_list == NULL || req_list->krl_list == NULL || 2373 req_list->krl_max <= 0) 2374 return (-1); 2375 2376 /* 2377 * Zero out requests and number of requests used 2378 */ 2379 bzero(req_list->krl_list, req_list->krl_max * sizeof (kcpc_request_t)); 2380 req_list->krl_cnt = 0; 2381 return (0); 2382 } 2383 2384 /* 2385 * Free given list of counter event requests 2386 */ 2387 int 2388 kcpc_reqs_fini(kcpc_request_list_t *req_list) 2389 { 2390 kmem_free(req_list->krl_list, 2391 req_list->krl_max * sizeof (kcpc_request_t)); 2392 kmem_free(req_list, sizeof (kcpc_request_list_t)); 2393 return (0); 2394 } 2395 2396 /* 2397 * Create set of given counter event requests 2398 */ 2399 static kcpc_set_t * 2400 kcpc_set_create(kcpc_request_t *reqs, int nreqs, int set_flags, int kmem_flags) 2401 { 2402 int i; 2403 kcpc_set_t *set; 2404 2405 /* 2406 * Allocate set and assign number of requests in set and flags 2407 */ 2408 set = kmem_zalloc(sizeof (kcpc_set_t), kmem_flags); 2409 if (set == NULL) 2410 return (NULL); 2411 2412 if (nreqs < cpc_ncounters) 2413 set->ks_nreqs = nreqs; 2414 else 2415 set->ks_nreqs = cpc_ncounters; 2416 2417 set->ks_flags = set_flags; 2418 2419 /* 2420 * Allocate requests needed, copy requests into set, and set index into 2421 * data for each request (which may change when we assign requested 2422 * counter events to counters) 2423 */ 2424 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) * 2425 set->ks_nreqs, kmem_flags); 2426 if (set->ks_req == NULL) { 2427 kmem_free(set, sizeof (kcpc_set_t)); 2428 return (NULL); 2429 } 2430 2431 bcopy(reqs, set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 2432 2433 for (i = 0; i < set->ks_nreqs; i++) 2434 set->ks_req[i].kr_index = i; 2435 2436 return (set); 2437 } 2438 2439 2440 /* 2441 * Stop counters on current CPU. 2442 * 2443 * If preserve_context is true, the caller is interested in the CPU's CPC 2444 * context and wants it to be preserved. 2445 * 2446 * If preserve_context is false, the caller does not need the CPU's CPC context 2447 * to be preserved, so it is set to NULL. 2448 */ 2449 static void 2450 kcpc_cpustop_func(uintptr_t arg1, uintptr_t arg2 __unused) 2451 { 2452 boolean_t preserve_context; 2453 kpreempt_disable(); 2454 2455 preserve_context = (boolean_t)arg1; 2456 /* 2457 * Someone already stopped this context before us, so there is nothing 2458 * to do. 2459 */ 2460 if (CPU->cpu_cpc_ctx == NULL) { 2461 kpreempt_enable(); 2462 return; 2463 } 2464 2465 kcpc_unprogram(CPU->cpu_cpc_ctx, B_TRUE); 2466 /* 2467 * If CU does not use counters, then clear the CPU's CPC context 2468 * If the caller requested to preserve context it should disable CU 2469 * first, so there should be no CU context now. 2470 */ 2471 ASSERT(!preserve_context || !CU_CPC_ON(CPU)); 2472 if (!preserve_context && CPU->cpu_cpc_ctx != NULL && !CU_CPC_ON(CPU)) 2473 CPU->cpu_cpc_ctx = NULL; 2474 2475 kpreempt_enable(); 2476 } 2477 2478 /* 2479 * Stop counters on given CPU and set its CPC context to NULL unless 2480 * preserve_context is true. 2481 */ 2482 void 2483 kcpc_cpu_stop(cpu_t *cp, boolean_t preserve_context) 2484 { 2485 cpu_call(cp, kcpc_cpustop_func, preserve_context, 0); 2486 } 2487 2488 /* 2489 * Program the context on the current CPU 2490 */ 2491 static void 2492 kcpc_remoteprogram_func(uintptr_t arg1, uintptr_t arg2) 2493 { 2494 kcpc_ctx_t *ctx = (kcpc_ctx_t *)arg1; 2495 boolean_t for_thread = (boolean_t)arg2; 2496 2497 ASSERT(ctx != NULL); 2498 2499 kpreempt_disable(); 2500 kcpc_program(ctx, for_thread, B_TRUE); 2501 kpreempt_enable(); 2502 } 2503 2504 /* 2505 * Program counters on given CPU 2506 */ 2507 void 2508 kcpc_cpu_program(cpu_t *cp, kcpc_ctx_t *ctx) 2509 { 2510 cpu_call(cp, kcpc_remoteprogram_func, (uintptr_t)ctx, 2511 (uintptr_t)B_FALSE); 2512 } 2513 2514 char * 2515 kcpc_list_attrs(void) 2516 { 2517 ASSERT(pcbe_ops != NULL); 2518 2519 return (pcbe_ops->pcbe_list_attrs()); 2520 } 2521 2522 char * 2523 kcpc_list_events(uint_t pic) 2524 { 2525 ASSERT(pcbe_ops != NULL); 2526 2527 return (pcbe_ops->pcbe_list_events(pic)); 2528 } 2529 2530 uint_t 2531 kcpc_pcbe_capabilities(void) 2532 { 2533 ASSERT(pcbe_ops != NULL); 2534 2535 return (pcbe_ops->pcbe_caps); 2536 } 2537 2538 int 2539 kcpc_pcbe_loaded(void) 2540 { 2541 return (pcbe_ops == NULL ? -1 : 0); 2542 } 2543