1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/param.h> 31 #include <sys/thread.h> 32 #include <sys/cpuvar.h> 33 #include <sys/inttypes.h> 34 #include <sys/cmn_err.h> 35 #include <sys/time.h> 36 #include <sys/mutex.h> 37 #include <sys/systm.h> 38 #include <sys/kcpc.h> 39 #include <sys/cpc_impl.h> 40 #include <sys/cpc_pcbe.h> 41 #include <sys/atomic.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/sdt.h> 45 #if defined(__x86) 46 #include <asm/clock.h> 47 #endif 48 49 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 50 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 51 52 53 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 54 int kcpc_cpuctx; /* number of cpu-specific contexts */ 55 56 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 57 58 /* 59 * These are set when a PCBE module is loaded. 60 */ 61 uint_t cpc_ncounters = 0; 62 pcbe_ops_t *pcbe_ops = NULL; 63 64 /* 65 * Statistics on (mis)behavior 66 */ 67 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 68 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 69 70 /* 71 * Is misbehaviour (overflow in a thread with no context) fatal? 72 */ 73 #ifdef DEBUG 74 static int kcpc_nullctx_panic = 1; 75 #else 76 static int kcpc_nullctx_panic = 0; 77 #endif 78 79 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 80 static void kcpc_restore(kcpc_ctx_t *ctx); 81 static void kcpc_save(kcpc_ctx_t *ctx); 82 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 83 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 84 static void kcpc_free_configs(kcpc_set_t *set); 85 static kcpc_ctx_t *kcpc_ctx_alloc(void); 86 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 87 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 88 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 89 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 90 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 91 92 void 93 kcpc_register_pcbe(pcbe_ops_t *ops) 94 { 95 pcbe_ops = ops; 96 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 97 } 98 99 int 100 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 101 { 102 cpu_t *cp; 103 kcpc_ctx_t *ctx; 104 int error; 105 106 ctx = kcpc_ctx_alloc(); 107 108 if (kcpc_assign_reqs(set, ctx) != 0) { 109 kcpc_ctx_free(ctx); 110 *subcode = CPC_RESOURCE_UNAVAIL; 111 return (EINVAL); 112 } 113 114 ctx->kc_cpuid = cpuid; 115 ctx->kc_thread = curthread; 116 117 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 118 119 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 120 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 121 kcpc_ctx_free(ctx); 122 return (error); 123 } 124 125 set->ks_ctx = ctx; 126 ctx->kc_set = set; 127 128 /* 129 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 130 * we are manipulating the cpu_t and programming the hardware, else the 131 * the cpu_t could go away while we're looking at it. 132 */ 133 mutex_enter(&cpu_lock); 134 cp = cpu_get(cpuid); 135 136 if (cp == NULL) 137 /* 138 * The CPU could have been DRd out while we were getting set up. 139 */ 140 goto unbound; 141 142 mutex_enter(&cp->cpu_cpc_ctxlock); 143 144 if (cp->cpu_cpc_ctx != NULL) { 145 /* 146 * If this CPU already has a bound set, return an error. 147 */ 148 mutex_exit(&cp->cpu_cpc_ctxlock); 149 goto unbound; 150 } 151 152 if (curthread->t_bind_cpu != cpuid) { 153 mutex_exit(&cp->cpu_cpc_ctxlock); 154 goto unbound; 155 } 156 cp->cpu_cpc_ctx = ctx; 157 158 /* 159 * Kernel preemption must be disabled while fiddling with the hardware 160 * registers to prevent partial updates. 161 */ 162 kpreempt_disable(); 163 ctx->kc_rawtick = KCPC_GET_TICK(); 164 pcbe_ops->pcbe_program(ctx); 165 kpreempt_enable(); 166 167 mutex_exit(&cp->cpu_cpc_ctxlock); 168 mutex_exit(&cpu_lock); 169 170 return (0); 171 172 unbound: 173 mutex_exit(&cpu_lock); 174 set->ks_ctx = NULL; 175 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 176 kcpc_ctx_free(ctx); 177 return (EAGAIN); 178 } 179 180 int 181 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 182 { 183 kcpc_ctx_t *ctx; 184 int error; 185 186 /* 187 * Only one set is allowed per context, so ensure there is no 188 * existing context. 189 */ 190 191 if (t->t_cpc_ctx != NULL) 192 return (EEXIST); 193 194 ctx = kcpc_ctx_alloc(); 195 196 /* 197 * The context must begin life frozen until it has been properly 198 * programmed onto the hardware. This prevents the context ops from 199 * worrying about it until we're ready. 200 */ 201 ctx->kc_flags |= KCPC_CTX_FREEZE; 202 ctx->kc_hrtime = gethrtime(); 203 204 if (kcpc_assign_reqs(set, ctx) != 0) { 205 kcpc_ctx_free(ctx); 206 *subcode = CPC_RESOURCE_UNAVAIL; 207 return (EINVAL); 208 } 209 210 ctx->kc_cpuid = -1; 211 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 212 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 213 ctx->kc_thread = t; 214 t->t_cpc_ctx = ctx; 215 /* 216 * Permit threads to look at their own hardware counters from userland. 217 */ 218 ctx->kc_flags |= KCPC_CTX_NONPRIV; 219 220 /* 221 * Create the data store for this set. 222 */ 223 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 224 225 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 226 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 227 kcpc_ctx_free(ctx); 228 t->t_cpc_ctx = NULL; 229 return (error); 230 } 231 232 set->ks_ctx = ctx; 233 ctx->kc_set = set; 234 235 /* 236 * Add a device context to the subject thread. 237 */ 238 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 239 kcpc_lwp_create, NULL, kcpc_free); 240 241 /* 242 * Ask the backend to program the hardware. 243 */ 244 if (t == curthread) { 245 kpreempt_disable(); 246 ctx->kc_rawtick = KCPC_GET_TICK(); 247 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 248 pcbe_ops->pcbe_program(ctx); 249 kpreempt_enable(); 250 } else 251 /* 252 * Since we are the agent LWP, we know the victim LWP is stopped 253 * until we're done here; no need to worry about preemption or 254 * migration here. We still use an atomic op to clear the flag 255 * to ensure the flags are always self-consistent; they can 256 * still be accessed from, for instance, another CPU doing a 257 * kcpc_invalidate_all(). 258 */ 259 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 260 261 262 return (0); 263 } 264 265 /* 266 * Walk through each request in the set and ask the PCBE to configure a 267 * corresponding counter. 268 */ 269 static int 270 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 271 { 272 int i; 273 int ret; 274 kcpc_request_t *rp; 275 276 for (i = 0; i < set->ks_nreqs; i++) { 277 int n; 278 rp = &set->ks_req[i]; 279 280 n = rp->kr_picnum; 281 282 ASSERT(n >= 0 && n < cpc_ncounters); 283 284 ASSERT(ctx->kc_pics[n].kp_req == NULL); 285 286 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 287 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 288 == 0) { 289 *subcode = -1; 290 return (ENOTSUP); 291 } 292 /* 293 * If any of the counters have requested overflow 294 * notification, we flag the context as being one that 295 * cares about overflow. 296 */ 297 ctx->kc_flags |= KCPC_CTX_SIGOVF; 298 } 299 300 rp->kr_config = NULL; 301 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 302 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 303 &(rp->kr_config), (void *)ctx)) != 0) { 304 kcpc_free_configs(set); 305 *subcode = ret; 306 if (ret == CPC_ATTR_REQUIRES_PRIVILEGE) 307 return (EACCES); 308 return (EINVAL); 309 } 310 311 ctx->kc_pics[n].kp_req = rp; 312 rp->kr_picp = &ctx->kc_pics[n]; 313 rp->kr_data = set->ks_data + rp->kr_index; 314 *rp->kr_data = rp->kr_preset; 315 } 316 317 return (0); 318 } 319 320 static void 321 kcpc_free_configs(kcpc_set_t *set) 322 { 323 int i; 324 325 for (i = 0; i < set->ks_nreqs; i++) 326 if (set->ks_req[i].kr_config != NULL) 327 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 328 } 329 330 /* 331 * buf points to a user address and the data should be copied out to that 332 * address in the current process. 333 */ 334 int 335 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 336 { 337 kcpc_ctx_t *ctx = set->ks_ctx; 338 uint64_t curtick = KCPC_GET_TICK(); 339 340 if (ctx == NULL) 341 return (EINVAL); 342 else if (ctx->kc_flags & KCPC_CTX_INVALID) 343 return (EAGAIN); 344 345 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 346 /* 347 * Kernel preemption must be disabled while reading the 348 * hardware regs, and if this is a CPU-bound context, while 349 * checking the CPU binding of the current thread. 350 */ 351 kpreempt_disable(); 352 353 if (ctx->kc_cpuid != -1) { 354 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 355 kpreempt_enable(); 356 return (EAGAIN); 357 } 358 } 359 360 if (ctx->kc_thread == curthread) { 361 ctx->kc_hrtime = gethrtime(); 362 pcbe_ops->pcbe_sample(ctx); 363 ctx->kc_vtick += curtick - ctx->kc_rawtick; 364 ctx->kc_rawtick = curtick; 365 } 366 367 kpreempt_enable(); 368 } 369 370 if (copyout(set->ks_data, buf, 371 set->ks_nreqs * sizeof (uint64_t)) == -1) 372 return (EFAULT); 373 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 374 return (EFAULT); 375 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 376 return (EFAULT); 377 378 return (0); 379 } 380 381 /* 382 * Stop the counters on the CPU this context is bound to. 383 */ 384 static void 385 kcpc_stop_hw(kcpc_ctx_t *ctx) 386 { 387 cpu_t *cp; 388 389 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 390 == KCPC_CTX_INVALID); 391 392 kpreempt_disable(); 393 394 cp = cpu_get(ctx->kc_cpuid); 395 ASSERT(cp != NULL); 396 397 if (cp == CPU) { 398 pcbe_ops->pcbe_allstop(); 399 atomic_or_uint(&ctx->kc_flags, 400 KCPC_CTX_INVALID_STOPPED); 401 } else 402 kcpc_remote_stop(cp); 403 kpreempt_enable(); 404 } 405 406 int 407 kcpc_unbind(kcpc_set_t *set) 408 { 409 kcpc_ctx_t *ctx = set->ks_ctx; 410 kthread_t *t; 411 412 if (ctx == NULL) 413 return (EINVAL); 414 415 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 416 417 if (ctx->kc_cpuid == -1) { 418 t = ctx->kc_thread; 419 /* 420 * The context is thread-bound and therefore has a device 421 * context. It will be freed via removectx() calling 422 * freectx() calling kcpc_free(). 423 */ 424 if (t == curthread && 425 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 426 kpreempt_disable(); 427 pcbe_ops->pcbe_allstop(); 428 atomic_or_uint(&ctx->kc_flags, 429 KCPC_CTX_INVALID_STOPPED); 430 kpreempt_enable(); 431 } 432 #ifdef DEBUG 433 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 434 kcpc_lwp_create, NULL, kcpc_free) == 0) 435 panic("kcpc_unbind: context %p not preset on thread %p", 436 ctx, t); 437 #else 438 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 439 kcpc_lwp_create, NULL, kcpc_free); 440 #endif /* DEBUG */ 441 t->t_cpc_set = NULL; 442 t->t_cpc_ctx = NULL; 443 } else { 444 /* 445 * If we are unbinding a CPU-bound set from a remote CPU, the 446 * native CPU's idle thread could be in the midst of programming 447 * this context onto the CPU. We grab the context's lock here to 448 * ensure that the idle thread is done with it. When we release 449 * the lock, the CPU no longer has a context and the idle thread 450 * will move on. 451 * 452 * cpu_lock must be held to prevent the CPU from being DR'd out 453 * while we disassociate the context from the cpu_t. 454 */ 455 cpu_t *cp; 456 mutex_enter(&cpu_lock); 457 cp = cpu_get(ctx->kc_cpuid); 458 if (cp != NULL) { 459 /* 460 * The CPU may have been DR'd out of the system. 461 */ 462 mutex_enter(&cp->cpu_cpc_ctxlock); 463 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 464 kcpc_stop_hw(ctx); 465 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 466 cp->cpu_cpc_ctx = NULL; 467 mutex_exit(&cp->cpu_cpc_ctxlock); 468 } 469 mutex_exit(&cpu_lock); 470 if (ctx->kc_thread == curthread) { 471 kcpc_free(ctx, 0); 472 curthread->t_cpc_set = NULL; 473 } 474 } 475 476 return (0); 477 } 478 479 int 480 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 481 { 482 int i; 483 484 ASSERT(set != NULL); 485 ASSERT(set->ks_ctx != NULL); 486 ASSERT(set->ks_ctx->kc_thread == curthread); 487 ASSERT(set->ks_ctx->kc_cpuid == -1); 488 489 if (index < 0 || index >= set->ks_nreqs) 490 return (EINVAL); 491 492 for (i = 0; i < set->ks_nreqs; i++) 493 if (set->ks_req[i].kr_index == index) 494 break; 495 ASSERT(i != set->ks_nreqs); 496 497 set->ks_req[i].kr_preset = preset; 498 return (0); 499 } 500 501 int 502 kcpc_restart(kcpc_set_t *set) 503 { 504 kcpc_ctx_t *ctx = set->ks_ctx; 505 int i; 506 507 ASSERT(ctx != NULL); 508 ASSERT(ctx->kc_thread == curthread); 509 ASSERT(ctx->kc_cpuid == -1); 510 511 kpreempt_disable(); 512 513 /* 514 * If the user is doing this on a running set, make sure the counters 515 * are stopped first. 516 */ 517 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 518 pcbe_ops->pcbe_allstop(); 519 520 for (i = 0; i < set->ks_nreqs; i++) { 521 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 522 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 523 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 524 } 525 526 /* 527 * Ask the backend to program the hardware. 528 */ 529 ctx->kc_rawtick = KCPC_GET_TICK(); 530 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 531 pcbe_ops->pcbe_program(ctx); 532 kpreempt_enable(); 533 534 return (0); 535 } 536 537 /* 538 * Caller must hold kcpc_cpuctx_lock. 539 */ 540 int 541 kcpc_enable(kthread_t *t, int cmd, int enable) 542 { 543 kcpc_ctx_t *ctx = t->t_cpc_ctx; 544 kcpc_set_t *set = t->t_cpc_set; 545 kcpc_set_t *newset; 546 int i; 547 int flag; 548 int err; 549 550 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 551 552 if (ctx == NULL) { 553 /* 554 * This thread has a set but no context; it must be a 555 * CPU-bound set. 556 */ 557 ASSERT(t->t_cpc_set != NULL); 558 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 559 return (EINVAL); 560 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 561 return (EAGAIN); 562 563 if (cmd == CPC_ENABLE) { 564 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 565 return (EINVAL); 566 kpreempt_disable(); 567 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 568 kcpc_restore(ctx); 569 kpreempt_enable(); 570 } else if (cmd == CPC_DISABLE) { 571 if (ctx->kc_flags & KCPC_CTX_FREEZE) 572 return (EINVAL); 573 kpreempt_disable(); 574 kcpc_save(ctx); 575 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 576 kpreempt_enable(); 577 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 578 /* 579 * Strategy for usr/sys: stop counters and update set's presets 580 * with current counter values, unbind, update requests with 581 * new config, then re-bind. 582 */ 583 flag = (cmd == CPC_USR_EVENTS) ? 584 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 585 586 kpreempt_disable(); 587 atomic_or_uint(&ctx->kc_flags, 588 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 589 pcbe_ops->pcbe_allstop(); 590 kpreempt_enable(); 591 for (i = 0; i < set->ks_nreqs; i++) { 592 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 593 if (enable) 594 set->ks_req[i].kr_flags |= flag; 595 else 596 set->ks_req[i].kr_flags &= ~flag; 597 } 598 newset = kcpc_dup_set(set); 599 if (kcpc_unbind(set) != 0) 600 return (EINVAL); 601 t->t_cpc_set = newset; 602 if (kcpc_bind_thread(newset, t, &err) != 0) { 603 t->t_cpc_set = NULL; 604 kcpc_free_set(newset); 605 return (EINVAL); 606 } 607 } else 608 return (EINVAL); 609 610 return (0); 611 } 612 613 /* 614 * Provide PCBEs with a way of obtaining the configs of every counter which will 615 * be programmed together. 616 * 617 * If current is NULL, provide the first config. 618 * 619 * If data != NULL, caller wants to know where the data store associated with 620 * the config we return is located. 621 */ 622 void * 623 kcpc_next_config(void *token, void *current, uint64_t **data) 624 { 625 int i; 626 kcpc_pic_t *pic; 627 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 628 629 if (current == NULL) { 630 /* 631 * Client would like the first config, which may not be in 632 * counter 0; we need to search through the counters for the 633 * first config. 634 */ 635 for (i = 0; i < cpc_ncounters; i++) 636 if (ctx->kc_pics[i].kp_req != NULL) 637 break; 638 /* 639 * There are no counters configured for the given context. 640 */ 641 if (i == cpc_ncounters) 642 return (NULL); 643 } else { 644 /* 645 * There surely is a faster way to do this. 646 */ 647 for (i = 0; i < cpc_ncounters; i++) { 648 pic = &ctx->kc_pics[i]; 649 650 if (pic->kp_req != NULL && 651 current == pic->kp_req->kr_config) 652 break; 653 } 654 655 /* 656 * We found the current config at picnum i. Now search for the 657 * next configured PIC. 658 */ 659 for (i++; i < cpc_ncounters; i++) { 660 pic = &ctx->kc_pics[i]; 661 if (pic->kp_req != NULL) 662 break; 663 } 664 665 if (i == cpc_ncounters) 666 return (NULL); 667 } 668 669 if (data != NULL) { 670 *data = ctx->kc_pics[i].kp_req->kr_data; 671 } 672 673 return (ctx->kc_pics[i].kp_req->kr_config); 674 } 675 676 677 static kcpc_ctx_t * 678 kcpc_ctx_alloc(void) 679 { 680 kcpc_ctx_t *ctx; 681 long hash; 682 683 ctx = (kcpc_ctx_t *)kmem_alloc(sizeof (kcpc_ctx_t), KM_SLEEP); 684 685 hash = CPC_HASH_CTX(ctx); 686 mutex_enter(&kcpc_ctx_llock[hash]); 687 ctx->kc_next = kcpc_ctx_list[hash]; 688 kcpc_ctx_list[hash] = ctx; 689 mutex_exit(&kcpc_ctx_llock[hash]); 690 691 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 692 cpc_ncounters, KM_SLEEP); 693 694 ctx->kc_flags = 0; 695 ctx->kc_vtick = 0; 696 ctx->kc_rawtick = 0; 697 ctx->kc_cpuid = -1; 698 699 return (ctx); 700 } 701 702 /* 703 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 704 * in the flags. 705 */ 706 static void 707 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 708 { 709 kcpc_set_t *ks = ctx->kc_set, *cks; 710 int i, j; 711 int code; 712 713 ASSERT(ks != NULL); 714 715 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 716 return; 717 718 cks = kmem_alloc(sizeof (*cks), KM_SLEEP); 719 cctx->kc_set = cks; 720 cks->ks_flags = ks->ks_flags; 721 cks->ks_nreqs = ks->ks_nreqs; 722 cks->ks_req = kmem_alloc(cks->ks_nreqs * 723 sizeof (kcpc_request_t), KM_SLEEP); 724 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 725 KM_SLEEP); 726 cks->ks_ctx = cctx; 727 728 for (i = 0; i < cks->ks_nreqs; i++) { 729 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 730 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 731 (void) strncpy(cks->ks_req[i].kr_event, 732 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 733 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 734 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 735 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 736 if (ks->ks_req[i].kr_nattrs > 0) { 737 cks->ks_req[i].kr_attr = 738 kmem_alloc(ks->ks_req[i].kr_nattrs * 739 sizeof (kcpc_attr_t), KM_SLEEP); 740 } 741 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 742 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 743 ks->ks_req[i].kr_attr[j].ka_name, 744 CPC_MAX_ATTR_LEN); 745 cks->ks_req[i].kr_attr[j].ka_val = 746 ks->ks_req[i].kr_attr[j].ka_val; 747 } 748 } 749 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 750 panic("kcpc_ctx_clone: configure of context %p with " 751 "set %p failed with subcode %d", cctx, cks, code); 752 } 753 754 755 static void 756 kcpc_ctx_free(kcpc_ctx_t *ctx) 757 { 758 kcpc_ctx_t **loc; 759 long hash = CPC_HASH_CTX(ctx); 760 761 mutex_enter(&kcpc_ctx_llock[hash]); 762 loc = &kcpc_ctx_list[hash]; 763 ASSERT(*loc != NULL); 764 while (*loc != ctx) 765 loc = &(*loc)->kc_next; 766 *loc = ctx->kc_next; 767 mutex_exit(&kcpc_ctx_llock[hash]); 768 769 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 770 kmem_free(ctx, sizeof (*ctx)); 771 } 772 773 /* 774 * Generic interrupt handler used on hardware that generates 775 * overflow interrupts. 776 * 777 * Note: executed at high-level interrupt context! 778 */ 779 /*ARGSUSED*/ 780 kcpc_ctx_t * 781 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 782 { 783 kcpc_ctx_t *ctx; 784 kthread_t *t = curthread; 785 int i; 786 787 /* 788 * On both x86 and UltraSPARC, we may deliver the high-level 789 * interrupt in kernel mode, just after we've started to run an 790 * interrupt thread. (That's because the hardware helpfully 791 * delivers the overflow interrupt some random number of cycles 792 * after the instruction that caused the overflow by which time 793 * we're in some part of the kernel, not necessarily running on 794 * the right thread). 795 * 796 * Check for this case here -- find the pinned thread 797 * that was running when the interrupt went off. 798 */ 799 if (t->t_flag & T_INTR_THREAD) { 800 klwp_t *lwp; 801 802 atomic_add_32(&kcpc_intrctx_count, 1); 803 804 /* 805 * Note that t_lwp is always set to point at the underlying 806 * thread, thus this will work in the presence of nested 807 * interrupts. 808 */ 809 ctx = NULL; 810 if ((lwp = t->t_lwp) != NULL) { 811 t = lwptot(lwp); 812 ctx = t->t_cpc_ctx; 813 } 814 } else 815 ctx = t->t_cpc_ctx; 816 817 if (ctx == NULL) { 818 /* 819 * This can easily happen if we're using the counters in 820 * "shared" mode, for example, and an overflow interrupt 821 * occurs while we are running cpustat. In that case, the 822 * bound thread that has the context that belongs to this 823 * CPU is almost certainly sleeping (if it was running on 824 * the CPU we'd have found it above), and the actual 825 * interrupted thread has no knowledge of performance counters! 826 */ 827 ctx = curthread->t_cpu->cpu_cpc_ctx; 828 if (ctx != NULL) { 829 /* 830 * Return the bound context for this CPU to 831 * the interrupt handler so that it can synchronously 832 * sample the hardware counters and restart them. 833 */ 834 return (ctx); 835 } 836 837 /* 838 * As long as the overflow interrupt really is delivered early 839 * enough after trapping into the kernel to avoid switching 840 * threads, we must always be able to find the cpc context, 841 * or something went terribly wrong i.e. we ended up 842 * running a passivated interrupt thread, a kernel 843 * thread or we interrupted idle, all of which are Very Bad. 844 */ 845 if (kcpc_nullctx_panic) 846 panic("null cpc context, thread %p", (void *)t); 847 atomic_add_32(&kcpc_nullctx_count, 1); 848 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 849 /* 850 * Schedule an ast to sample the counters, which will 851 * propagate any overflow into the virtualized performance 852 * counter(s), and may deliver a signal. 853 */ 854 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 855 /* 856 * If a counter has overflowed which was counting on behalf of 857 * a request which specified CPC_OVF_NOTIFY_EMT, send the 858 * process a signal. 859 */ 860 for (i = 0; i < cpc_ncounters; i++) { 861 if (ctx->kc_pics[i].kp_req != NULL && 862 bitmap & (1 << i) && 863 ctx->kc_pics[i].kp_req->kr_flags & 864 CPC_OVF_NOTIFY_EMT) { 865 /* 866 * A signal has been requested for this PIC, so 867 * so freeze the context. The interrupt handler 868 * has already stopped the counter hardware. 869 */ 870 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 871 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 872 KCPC_PIC_OVERFLOWED); 873 } 874 } 875 aston(t); 876 } 877 return (NULL); 878 } 879 880 /* 881 * The current thread context had an overflow interrupt; we're 882 * executing here in high-level interrupt context. 883 */ 884 /*ARGSUSED*/ 885 uint_t 886 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 887 { 888 kcpc_ctx_t *ctx; 889 uint64_t bitmap; 890 891 if (pcbe_ops == NULL || 892 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 893 return (DDI_INTR_UNCLAIMED); 894 895 /* 896 * Prevent any further interrupts. 897 */ 898 pcbe_ops->pcbe_allstop(); 899 900 /* 901 * Invoke the "generic" handler. 902 * 903 * If the interrupt has occurred in the context of an lwp owning 904 * the counters, then the handler posts an AST to the lwp to 905 * trigger the actual sampling, and optionally deliver a signal or 906 * restart the counters, on the way out of the kernel using 907 * kcpc_hw_overflow_ast() (see below). 908 * 909 * On the other hand, if the handler returns the context to us 910 * directly, then it means that there are no other threads in 911 * the middle of updating it, no AST has been posted, and so we 912 * should sample the counters here, and restart them with no 913 * further fuss. 914 */ 915 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 916 uint64_t curtick = KCPC_GET_TICK(); 917 918 ctx->kc_hrtime = gethrtime_waitfree(); 919 ctx->kc_vtick += curtick - ctx->kc_rawtick; 920 ctx->kc_rawtick = curtick; 921 pcbe_ops->pcbe_sample(ctx); 922 pcbe_ops->pcbe_program(ctx); 923 } 924 925 return (DDI_INTR_CLAIMED); 926 } 927 928 /* 929 * Called from trap() when processing the ast posted by the high-level 930 * interrupt handler. 931 */ 932 int 933 kcpc_overflow_ast() 934 { 935 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 936 int i; 937 int found = 0; 938 uint64_t curtick = KCPC_GET_TICK(); 939 940 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 941 942 /* 943 * An overflow happened: sample the context to ensure that 944 * the overflow is propagated into the upper bits of the 945 * virtualized 64-bit counter(s). 946 */ 947 kpreempt_disable(); 948 ctx->kc_hrtime = gethrtime_waitfree(); 949 pcbe_ops->pcbe_sample(ctx); 950 kpreempt_enable(); 951 952 ctx->kc_vtick += curtick - ctx->kc_rawtick; 953 954 /* 955 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 956 * if that pic generated an overflow and if the request it was counting 957 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 958 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 959 * found any overflowed pics, keep the context frozen and return true 960 * (thus causing a signal to be sent). 961 */ 962 for (i = 0; i < cpc_ncounters; i++) { 963 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 964 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 965 ~KCPC_PIC_OVERFLOWED); 966 found = 1; 967 } 968 } 969 if (found) 970 return (1); 971 972 /* 973 * Otherwise, re-enable the counters and continue life as before. 974 */ 975 kpreempt_disable(); 976 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 977 pcbe_ops->pcbe_program(ctx); 978 kpreempt_enable(); 979 return (0); 980 } 981 982 /* 983 * Called when switching away from current thread. 984 */ 985 static void 986 kcpc_save(kcpc_ctx_t *ctx) 987 { 988 if (ctx->kc_flags & KCPC_CTX_INVALID) { 989 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 990 return; 991 /* 992 * This context has been invalidated but the counters have not 993 * been stopped. Stop them here and mark the context stopped. 994 */ 995 pcbe_ops->pcbe_allstop(); 996 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 997 return; 998 } 999 1000 pcbe_ops->pcbe_allstop(); 1001 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1002 return; 1003 1004 /* 1005 * Need to sample for all reqs into each req's current mpic. 1006 */ 1007 ctx->kc_hrtime = gethrtime(); 1008 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1009 pcbe_ops->pcbe_sample(ctx); 1010 } 1011 1012 static void 1013 kcpc_restore(kcpc_ctx_t *ctx) 1014 { 1015 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1016 KCPC_CTX_INVALID) 1017 /* 1018 * The context is invalidated but has not been marked stopped. 1019 * We mark it as such here because we will not start the 1020 * counters during this context switch. 1021 */ 1022 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1023 1024 1025 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) 1026 return; 1027 1028 /* 1029 * While programming the hardware, the counters should be stopped. We 1030 * don't do an explicit pcbe_allstop() here because they should have 1031 * been stopped already by the last consumer. 1032 */ 1033 ctx->kc_rawtick = KCPC_GET_TICK(); 1034 pcbe_ops->pcbe_program(ctx); 1035 } 1036 1037 /* 1038 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1039 * following context operators to the idle thread on each CPU. They stop the 1040 * counters when the idle thread is switched on, and they start them again when 1041 * it is switched off. 1042 */ 1043 1044 /*ARGSUSED*/ 1045 void 1046 kcpc_idle_save(struct cpu *cp) 1047 { 1048 /* 1049 * The idle thread shouldn't be run anywhere else. 1050 */ 1051 ASSERT(CPU == cp); 1052 1053 /* 1054 * We must hold the CPU's context lock to ensure the context isn't freed 1055 * while we're looking at it. 1056 */ 1057 mutex_enter(&cp->cpu_cpc_ctxlock); 1058 1059 if ((cp->cpu_cpc_ctx == NULL) || 1060 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1061 mutex_exit(&cp->cpu_cpc_ctxlock); 1062 return; 1063 } 1064 1065 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1066 mutex_exit(&cp->cpu_cpc_ctxlock); 1067 } 1068 1069 void 1070 kcpc_idle_restore(struct cpu *cp) 1071 { 1072 /* 1073 * The idle thread shouldn't be run anywhere else. 1074 */ 1075 ASSERT(CPU == cp); 1076 1077 /* 1078 * We must hold the CPU's context lock to ensure the context isn't freed 1079 * while we're looking at it. 1080 */ 1081 mutex_enter(&cp->cpu_cpc_ctxlock); 1082 1083 if ((cp->cpu_cpc_ctx == NULL) || 1084 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1085 mutex_exit(&cp->cpu_cpc_ctxlock); 1086 return; 1087 } 1088 1089 pcbe_ops->pcbe_allstop(); 1090 mutex_exit(&cp->cpu_cpc_ctxlock); 1091 } 1092 1093 /*ARGSUSED*/ 1094 static void 1095 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1096 { 1097 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1098 int i; 1099 1100 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1101 return; 1102 1103 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1104 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1105 rw_exit(&kcpc_cpuctx_lock); 1106 return; 1107 } 1108 cctx = kcpc_ctx_alloc(); 1109 kcpc_ctx_clone(ctx, cctx); 1110 rw_exit(&kcpc_cpuctx_lock); 1111 1112 cctx->kc_flags = ctx->kc_flags; 1113 cctx->kc_thread = ct; 1114 cctx->kc_cpuid = -1; 1115 ct->t_cpc_set = cctx->kc_set; 1116 ct->t_cpc_ctx = cctx; 1117 1118 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1119 kcpc_set_t *ks = cctx->kc_set; 1120 /* 1121 * Our contract with the user requires us to immediately send an 1122 * overflow signal to all children if we have the LWPINHERIT 1123 * and SIGOVF flags set. In addition, all counters should be 1124 * set to UINT64_MAX, and their pic's overflow flag turned on 1125 * so that our trap() processing knows to send a signal. 1126 */ 1127 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1128 for (i = 0; i < ks->ks_nreqs; i++) { 1129 kcpc_request_t *kr = &ks->ks_req[i]; 1130 1131 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1132 *(kr->kr_data) = UINT64_MAX; 1133 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1134 } 1135 } 1136 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1137 aston(ct); 1138 } 1139 1140 installctx(ct, cctx, kcpc_save, kcpc_restore, 1141 NULL, kcpc_lwp_create, NULL, kcpc_free); 1142 } 1143 1144 /* 1145 * Counter Stoppage Theory 1146 * 1147 * The counters may need to be stopped properly at the following occasions: 1148 * 1149 * 1) An LWP exits. 1150 * 2) A thread exits. 1151 * 3) An LWP performs an exec(). 1152 * 4) A bound set is unbound. 1153 * 1154 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1155 * to be freed as well. 1156 * 1157 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1158 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1159 * context. 1160 * 1161 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1162 * 1163 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1164 * been called from exec. It stops the counters _and_ frees the context. 1165 * 1166 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1167 * 1168 * CPU-bound counters are always stopped via kcpc_unbind(). 1169 */ 1170 1171 /* 1172 * We're being called to delete the context; we ensure that all associated data 1173 * structures are freed, and that the hardware is passivated if this is an exec. 1174 */ 1175 1176 /*ARGSUSED*/ 1177 static void 1178 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1179 { 1180 int i; 1181 kcpc_set_t *set = ctx->kc_set; 1182 1183 ASSERT(set != NULL); 1184 1185 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1186 1187 if (isexec) { 1188 /* 1189 * This thread is execing, and after the exec it should not have 1190 * any performance counter context. Stop the counters properly 1191 * here so the system isn't surprised by an overflow interrupt 1192 * later. 1193 */ 1194 if (ctx->kc_cpuid != -1) { 1195 cpu_t *cp; 1196 /* 1197 * CPU-bound context; stop the appropriate CPU's ctrs. 1198 * Hold cpu_lock while examining the CPU to ensure it 1199 * doesn't go away. 1200 */ 1201 mutex_enter(&cpu_lock); 1202 cp = cpu_get(ctx->kc_cpuid); 1203 /* 1204 * The CPU could have been DR'd out, so only stop the 1205 * CPU and clear its context pointer if the CPU still 1206 * exists. 1207 */ 1208 if (cp != NULL) { 1209 mutex_enter(&cp->cpu_cpc_ctxlock); 1210 kcpc_stop_hw(ctx); 1211 cp->cpu_cpc_ctx = NULL; 1212 mutex_exit(&cp->cpu_cpc_ctxlock); 1213 } 1214 mutex_exit(&cpu_lock); 1215 ASSERT(curthread->t_cpc_ctx == NULL); 1216 } else { 1217 /* 1218 * Thread-bound context; stop _this_ CPU's counters. 1219 */ 1220 kpreempt_disable(); 1221 pcbe_ops->pcbe_allstop(); 1222 atomic_or_uint(&ctx->kc_flags, 1223 KCPC_CTX_INVALID_STOPPED); 1224 kpreempt_enable(); 1225 curthread->t_cpc_ctx = NULL; 1226 } 1227 1228 /* 1229 * Since we are being called from an exec and we know that 1230 * exec is not permitted via the agent thread, we should clean 1231 * up this thread's CPC state completely, and not leave dangling 1232 * CPC pointers behind. 1233 */ 1234 ASSERT(ctx->kc_thread == curthread); 1235 curthread->t_cpc_set = NULL; 1236 } 1237 1238 /* 1239 * Walk through each request in this context's set and free the PCBE's 1240 * configuration if it exists. 1241 */ 1242 for (i = 0; i < set->ks_nreqs; i++) { 1243 if (set->ks_req[i].kr_config != NULL) 1244 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1245 } 1246 1247 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1248 kcpc_ctx_free(ctx); 1249 kcpc_free_set(set); 1250 } 1251 1252 /* 1253 * Free the memory associated with a request set. 1254 */ 1255 void 1256 kcpc_free_set(kcpc_set_t *set) 1257 { 1258 int i; 1259 kcpc_request_t *req; 1260 1261 ASSERT(set->ks_req != NULL); 1262 1263 for (i = 0; i < set->ks_nreqs; i++) { 1264 req = &set->ks_req[i]; 1265 1266 if (req->kr_nattrs != 0) { 1267 kmem_free(req->kr_attr, 1268 req->kr_nattrs * sizeof (kcpc_attr_t)); 1269 } 1270 } 1271 1272 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1273 kmem_free(set, sizeof (kcpc_set_t)); 1274 } 1275 1276 /* 1277 * Grab every existing context and mark it as invalid. 1278 */ 1279 void 1280 kcpc_invalidate_all(void) 1281 { 1282 kcpc_ctx_t *ctx; 1283 long hash; 1284 1285 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1286 mutex_enter(&kcpc_ctx_llock[hash]); 1287 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1288 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1289 mutex_exit(&kcpc_ctx_llock[hash]); 1290 } 1291 } 1292 1293 /* 1294 * Called from lwp_exit() and thread_exit() 1295 */ 1296 void 1297 kcpc_passivate(void) 1298 { 1299 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1300 kcpc_set_t *set = curthread->t_cpc_set; 1301 1302 if (set == NULL) 1303 return; 1304 1305 /* 1306 * We're cleaning up after this thread; ensure there are no dangling 1307 * CPC pointers left behind. The context and set will be freed by 1308 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1309 * the case of a CPU-bound set. 1310 */ 1311 curthread->t_cpc_ctx = NULL; 1312 1313 if (ctx == NULL) { 1314 /* 1315 * This thread has a set but no context; it must be a CPU-bound 1316 * set. The hardware will be stopped via kcpc_unbind() when the 1317 * process exits and closes its file descriptors with 1318 * kcpc_close(). Our only job here is to clean up this thread's 1319 * state; the set will be freed with the unbind(). 1320 */ 1321 (void) kcpc_unbind(set); 1322 /* 1323 * Unbinding a set belonging to the current thread should clear 1324 * its set pointer. 1325 */ 1326 ASSERT(curthread->t_cpc_set == NULL); 1327 return; 1328 } 1329 1330 curthread->t_cpc_set = NULL; 1331 1332 /* 1333 * This thread/LWP is exiting but context switches will continue to 1334 * happen for a bit as the exit proceeds. Kernel preemption must be 1335 * disabled here to prevent a race between checking or setting the 1336 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1337 * a context switch. 1338 */ 1339 1340 kpreempt_disable(); 1341 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1342 pcbe_ops->pcbe_allstop(); 1343 atomic_or_uint(&ctx->kc_flags, 1344 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1345 } 1346 kpreempt_enable(); 1347 } 1348 1349 /* 1350 * Assign the requests in the given set to the PICs in the context. 1351 * Returns 0 if successful, -1 on failure. 1352 */ 1353 /*ARGSUSED*/ 1354 static int 1355 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1356 { 1357 int i; 1358 int *picnum_save; 1359 1360 ASSERT(set->ks_nreqs <= cpc_ncounters); 1361 1362 /* 1363 * Provide kcpc_tryassign() with scratch space to avoid doing an 1364 * alloc/free with every invocation. 1365 */ 1366 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1367 /* 1368 * kcpc_tryassign() blindly walks through each request in the set, 1369 * seeing if a counter can count its event. If yes, it assigns that 1370 * counter. However, that counter may have been the only capable counter 1371 * for _another_ request's event. The solution is to try every possible 1372 * request first. Note that this does not cover all solutions, as 1373 * that would require all unique orderings of requests, an n^n operation 1374 * which would be unacceptable for architectures with many counters. 1375 */ 1376 for (i = 0; i < set->ks_nreqs; i++) 1377 if (kcpc_tryassign(set, i, picnum_save) == 0) 1378 break; 1379 1380 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1381 if (i == set->ks_nreqs) 1382 return (-1); 1383 return (0); 1384 } 1385 1386 static int 1387 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1388 { 1389 int i; 1390 int j; 1391 uint64_t bitmap = 0, resmap = 0; 1392 uint64_t ctrmap; 1393 1394 /* 1395 * We are attempting to assign the reqs to pics, but we may fail. If we 1396 * fail, we need to restore the state of the requests to what it was 1397 * when we found it, as some reqs may have been explicitly assigned to 1398 * a specific PIC beforehand. We do this by snapshotting the assignments 1399 * now and restoring from it later if we fail. 1400 * 1401 * Also we note here which counters have already been claimed by 1402 * requests with explicit counter assignments. 1403 */ 1404 for (i = 0; i < set->ks_nreqs; i++) { 1405 scratch[i] = set->ks_req[i].kr_picnum; 1406 if (set->ks_req[i].kr_picnum != -1) 1407 resmap |= (1 << set->ks_req[i].kr_picnum); 1408 } 1409 1410 /* 1411 * Walk through requests assigning them to the first PIC that is 1412 * capable. 1413 */ 1414 i = starting_req; 1415 do { 1416 if (set->ks_req[i].kr_picnum != -1) { 1417 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1418 bitmap |= (1 << set->ks_req[i].kr_picnum); 1419 if (++i == set->ks_nreqs) 1420 i = 0; 1421 continue; 1422 } 1423 1424 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1425 for (j = 0; j < cpc_ncounters; j++) { 1426 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1427 (resmap & (1 << j)) == 0) { 1428 /* 1429 * We can assign this counter because: 1430 * 1431 * 1. It can count the event (ctrmap) 1432 * 2. It hasn't been assigned yet (bitmap) 1433 * 3. It wasn't reserved by a request (resmap) 1434 */ 1435 bitmap |= (1 << j); 1436 break; 1437 } 1438 } 1439 if (j == cpc_ncounters) { 1440 for (i = 0; i < set->ks_nreqs; i++) 1441 set->ks_req[i].kr_picnum = scratch[i]; 1442 return (-1); 1443 } 1444 set->ks_req[i].kr_picnum = j; 1445 1446 if (++i == set->ks_nreqs) 1447 i = 0; 1448 } while (i != starting_req); 1449 1450 return (0); 1451 } 1452 1453 kcpc_set_t * 1454 kcpc_dup_set(kcpc_set_t *set) 1455 { 1456 kcpc_set_t *new; 1457 int i; 1458 int j; 1459 1460 new = kmem_alloc(sizeof (*new), KM_SLEEP); 1461 new->ks_flags = set->ks_flags; 1462 new->ks_nreqs = set->ks_nreqs; 1463 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1464 KM_SLEEP); 1465 new->ks_data = NULL; 1466 new->ks_ctx = NULL; 1467 1468 for (i = 0; i < new->ks_nreqs; i++) { 1469 new->ks_req[i].kr_config = NULL; 1470 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1471 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1472 new->ks_req[i].kr_picp = NULL; 1473 new->ks_req[i].kr_data = NULL; 1474 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1475 CPC_MAX_EVENT_LEN); 1476 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1477 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1478 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1479 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1480 sizeof (kcpc_attr_t), KM_SLEEP); 1481 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1482 new->ks_req[i].kr_attr[j].ka_val = 1483 set->ks_req[i].kr_attr[j].ka_val; 1484 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1485 set->ks_req[i].kr_attr[j].ka_name, 1486 CPC_MAX_ATTR_LEN); 1487 } 1488 } 1489 1490 return (new); 1491 } 1492 1493 int 1494 kcpc_allow_nonpriv(void *token) 1495 { 1496 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1497 } 1498 1499 void 1500 kcpc_invalidate(kthread_t *t) 1501 { 1502 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1503 1504 if (ctx != NULL) 1505 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1506 } 1507 1508 /* 1509 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1510 * are used to construct PCBE names, starting with the most specific, 1511 * "pcbe.first.second.third.fourth" and ending with the least specific, 1512 * "pcbe.first". 1513 * 1514 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1515 */ 1516 int 1517 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1518 { 1519 uint_t s[3]; 1520 1521 s[0] = first; 1522 s[1] = second; 1523 s[2] = third; 1524 1525 return (modload_qualified("pcbe", 1526 "pcbe", prefix, ".", s, 3) < 0 ? -1 : 0); 1527 } 1528