1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2021 Joyent, Inc.
25 * Copyright 2021 Oxide Computer Company
26 */
27
28 #include <sys/param.h>
29 #include <sys/thread.h>
30 #include <sys/cpuvar.h>
31 #include <sys/inttypes.h>
32 #include <sys/cmn_err.h>
33 #include <sys/time.h>
34 #include <sys/ksynch.h>
35 #include <sys/systm.h>
36 #include <sys/kcpc.h>
37 #include <sys/cpc_impl.h>
38 #include <sys/cpc_pcbe.h>
39 #include <sys/atomic.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/sdt.h>
43 #include <sys/archsystm.h>
44 #include <sys/promif.h>
45 #include <sys/x_call.h>
46 #include <sys/cap_util.h>
47 #if defined(__x86)
48 #include <asm/clock.h>
49 #include <sys/xc_levels.h>
50 #endif
51
52 static kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */
53 static kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */
54
55
56 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */
57 int kcpc_cpuctx; /* number of cpu-specific contexts */
58
59 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */
60
61 /*
62 * These are set when a PCBE module is loaded.
63 */
64 uint_t cpc_ncounters = 0;
65 pcbe_ops_t *pcbe_ops = NULL;
66
67 /*
68 * Statistics on (mis)behavior
69 */
70 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */
71 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */
72
73 /*
74 * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread
75 * with no valid context will result in a panic.
76 */
77 static int kcpc_nullctx_panic = 0;
78
79 static void kcpc_save(void *);
80 static void kcpc_restore(void *);
81 static void kcpc_lwp_create(void *, void *);
82 static void kcpc_free(void *, int);
83 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx);
84 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch);
85 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set);
86 static kcpc_set_t *kcpc_set_create(kcpc_request_t *reqs, int nreqs,
87 int set_flags, int kmem_flags);
88
89 /*
90 * Macros to manipulate context flags. All flag updates should use one of these
91 * two macros
92 *
93 * Flags should be always be updated atomically since some of the updates are
94 * not protected by locks.
95 */
96 #define KCPC_CTX_FLAG_SET(ctx, flag) atomic_or_uint(&(ctx)->kc_flags, (flag))
97 #define KCPC_CTX_FLAG_CLR(ctx, flag) atomic_and_uint(&(ctx)->kc_flags, ~(flag))
98
99 /*
100 * The IS_HIPIL() macro verifies that the code is executed either from a
101 * cross-call or from high-PIL interrupt
102 */
103 #ifdef DEBUG
104 #define IS_HIPIL() (getpil() >= XCALL_PIL)
105 #else
106 #define IS_HIPIL()
107 #endif /* DEBUG */
108
109
110 extern int kcpc_hw_load_pcbe(void);
111
112 /*
113 * Return value from kcpc_hw_load_pcbe()
114 */
115 static int kcpc_pcbe_error = 0;
116
117 static const struct ctxop_template kcpc_ctxop_tpl = {
118 .ct_rev = CTXOP_TPL_REV,
119 .ct_save = kcpc_save,
120 .ct_restore = kcpc_restore,
121 .ct_lwp_create = kcpc_lwp_create,
122 .ct_free = kcpc_free,
123 };
124
125 /*
126 * Perform one-time initialization of kcpc framework.
127 * This function performs the initialization only the first time it is called.
128 * It is safe to call it multiple times.
129 */
130 int
kcpc_init(void)131 kcpc_init(void)
132 {
133 long hash;
134 static uint32_t kcpc_initialized = 0;
135
136 /*
137 * We already tried loading platform pcbe module and failed
138 */
139 if (kcpc_pcbe_error != 0)
140 return (-1);
141
142 /*
143 * The kcpc framework should be initialized at most once
144 */
145 if (atomic_cas_32(&kcpc_initialized, 0, 1) != 0)
146 return (0);
147
148 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL);
149 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++)
150 mutex_init(&kcpc_ctx_llock[hash],
151 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15);
152
153 /*
154 * Load platform-specific pcbe module
155 */
156 kcpc_pcbe_error = kcpc_hw_load_pcbe();
157
158 return (kcpc_pcbe_error == 0 ? 0 : -1);
159 }
160
161 void
kcpc_register_pcbe(pcbe_ops_t * ops)162 kcpc_register_pcbe(pcbe_ops_t *ops)
163 {
164 pcbe_ops = ops;
165 cpc_ncounters = pcbe_ops->pcbe_ncounters();
166 }
167
168 void
kcpc_register_dcpc(void (* func)(uint64_t))169 kcpc_register_dcpc(void (*func)(uint64_t))
170 {
171 dtrace_cpc_fire = func;
172 }
173
174 void
kcpc_unregister_dcpc(void)175 kcpc_unregister_dcpc(void)
176 {
177 dtrace_cpc_fire = NULL;
178 }
179
180 int
kcpc_bind_cpu(kcpc_set_t * set,processorid_t cpuid,int * subcode)181 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode)
182 {
183 cpu_t *cp;
184 kcpc_ctx_t *ctx;
185 int error;
186 int save_spl;
187
188 ctx = kcpc_ctx_alloc(KM_SLEEP);
189
190 if (kcpc_assign_reqs(set, ctx) != 0) {
191 kcpc_ctx_free(ctx);
192 *subcode = CPC_RESOURCE_UNAVAIL;
193 return (EINVAL);
194 }
195
196 ctx->kc_cpuid = cpuid;
197 ctx->kc_thread = curthread;
198
199 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
200
201 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
202 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
203 kcpc_ctx_free(ctx);
204 return (error);
205 }
206
207 set->ks_ctx = ctx;
208 ctx->kc_set = set;
209
210 /*
211 * We must hold cpu_lock to prevent DR, offlining, or unbinding while
212 * we are manipulating the cpu_t and programming the hardware, else the
213 * the cpu_t could go away while we're looking at it.
214 */
215 mutex_enter(&cpu_lock);
216 cp = cpu_get(cpuid);
217
218 if (cp == NULL)
219 /*
220 * The CPU could have been DRd out while we were getting set up.
221 */
222 goto unbound;
223
224 mutex_enter(&cp->cpu_cpc_ctxlock);
225 kpreempt_disable();
226 save_spl = spl_xcall();
227
228 /*
229 * Check to see whether counters for CPU already being used by someone
230 * other than kernel for capacity and utilization (since kernel will
231 * let go of counters for user in kcpc_program() below)
232 */
233 if (cp->cpu_cpc_ctx != NULL && !CU_CPC_ON(cp)) {
234 /*
235 * If this CPU already has a bound set, return an error.
236 */
237 splx(save_spl);
238 kpreempt_enable();
239 mutex_exit(&cp->cpu_cpc_ctxlock);
240 goto unbound;
241 }
242
243 if (curthread->t_bind_cpu != cpuid) {
244 splx(save_spl);
245 kpreempt_enable();
246 mutex_exit(&cp->cpu_cpc_ctxlock);
247 goto unbound;
248 }
249
250 kcpc_program(ctx, B_FALSE, B_TRUE);
251
252 splx(save_spl);
253 kpreempt_enable();
254
255 mutex_exit(&cp->cpu_cpc_ctxlock);
256 mutex_exit(&cpu_lock);
257
258 mutex_enter(&set->ks_lock);
259 set->ks_state |= KCPC_SET_BOUND;
260 cv_signal(&set->ks_condv);
261 mutex_exit(&set->ks_lock);
262
263 return (0);
264
265 unbound:
266 mutex_exit(&cpu_lock);
267 set->ks_ctx = NULL;
268 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
269 kcpc_ctx_free(ctx);
270 return (EAGAIN);
271 }
272
273 int
kcpc_bind_thread(kcpc_set_t * set,kthread_t * t,int * subcode)274 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
275 {
276 kcpc_ctx_t *ctx;
277 int error;
278
279 /*
280 * Only one set is allowed per context, so ensure there is no
281 * existing context.
282 */
283
284 if (t->t_cpc_ctx != NULL)
285 return (EEXIST);
286
287 ctx = kcpc_ctx_alloc(KM_SLEEP);
288
289 /*
290 * The context must begin life frozen until it has been properly
291 * programmed onto the hardware. This prevents the context ops from
292 * worrying about it until we're ready.
293 */
294 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
295 ctx->kc_hrtime = gethrtime();
296
297 if (kcpc_assign_reqs(set, ctx) != 0) {
298 kcpc_ctx_free(ctx);
299 *subcode = CPC_RESOURCE_UNAVAIL;
300 return (EINVAL);
301 }
302
303 ctx->kc_cpuid = -1;
304 if (set->ks_flags & CPC_BIND_LWP_INHERIT)
305 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_LWPINHERIT);
306 ctx->kc_thread = t;
307 t->t_cpc_ctx = ctx;
308 /*
309 * Permit threads to look at their own hardware counters from userland.
310 */
311 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_NONPRIV);
312
313 /*
314 * Create the data store for this set.
315 */
316 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
317
318 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
319 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
320 kcpc_ctx_free(ctx);
321 t->t_cpc_ctx = NULL;
322 return (error);
323 }
324
325 set->ks_ctx = ctx;
326 ctx->kc_set = set;
327
328 /*
329 * Add a device context to the subject thread.
330 */
331 ctxop_install(t, &kcpc_ctxop_tpl, ctx);
332
333 /*
334 * Ask the backend to program the hardware.
335 */
336 if (t == curthread) {
337 int save_spl;
338
339 kpreempt_disable();
340 save_spl = spl_xcall();
341 kcpc_program(ctx, B_TRUE, B_TRUE);
342 splx(save_spl);
343 kpreempt_enable();
344 } else {
345 /*
346 * Since we are the agent LWP, we know the victim LWP is stopped
347 * until we're done here; no need to worry about preemption or
348 * migration here. We still use an atomic op to clear the flag
349 * to ensure the flags are always self-consistent; they can
350 * still be accessed from, for instance, another CPU doing a
351 * kcpc_invalidate_all().
352 */
353 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
354 }
355
356 mutex_enter(&set->ks_lock);
357 set->ks_state |= KCPC_SET_BOUND;
358 cv_signal(&set->ks_condv);
359 mutex_exit(&set->ks_lock);
360
361 return (0);
362 }
363
364 /*
365 * Walk through each request in the set and ask the PCBE to configure a
366 * corresponding counter.
367 */
368 int
kcpc_configure_reqs(kcpc_ctx_t * ctx,kcpc_set_t * set,int * subcode)369 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode)
370 {
371 int i;
372 int ret;
373 kcpc_request_t *rp;
374
375 for (i = 0; i < set->ks_nreqs; i++) {
376 int n;
377 rp = &set->ks_req[i];
378
379 n = rp->kr_picnum;
380
381 ASSERT(n >= 0 && n < cpc_ncounters);
382
383 ASSERT(ctx->kc_pics[n].kp_req == NULL);
384
385 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) {
386 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT)
387 == 0) {
388 *subcode = -1;
389 return (ENOTSUP);
390 }
391 /*
392 * If any of the counters have requested overflow
393 * notification, we flag the context as being one that
394 * cares about overflow.
395 */
396 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_SIGOVF);
397 }
398
399 rp->kr_config = NULL;
400 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event,
401 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr,
402 &(rp->kr_config), (void *)ctx)) != 0) {
403 kcpc_free_configs(set);
404 *subcode = ret;
405 switch (ret) {
406 case CPC_ATTR_REQUIRES_PRIVILEGE:
407 case CPC_HV_NO_ACCESS:
408 return (EACCES);
409 default:
410 return (EINVAL);
411 }
412 }
413
414 ctx->kc_pics[n].kp_req = rp;
415 rp->kr_picp = &ctx->kc_pics[n];
416 rp->kr_data = set->ks_data + rp->kr_index;
417 *rp->kr_data = rp->kr_preset;
418 }
419
420 return (0);
421 }
422
423 void
kcpc_free_configs(kcpc_set_t * set)424 kcpc_free_configs(kcpc_set_t *set)
425 {
426 int i;
427
428 for (i = 0; i < set->ks_nreqs; i++)
429 if (set->ks_req[i].kr_config != NULL)
430 pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
431 }
432
433 /*
434 * buf points to a user address and the data should be copied out to that
435 * address in the current process.
436 */
437 int
kcpc_sample(kcpc_set_t * set,uint64_t * buf,hrtime_t * hrtime,uint64_t * tick)438 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick)
439 {
440 kcpc_ctx_t *ctx = set->ks_ctx;
441 int save_spl;
442
443 mutex_enter(&set->ks_lock);
444 if ((set->ks_state & KCPC_SET_BOUND) == 0) {
445 mutex_exit(&set->ks_lock);
446 return (EINVAL);
447 }
448 mutex_exit(&set->ks_lock);
449
450 /*
451 * Kernel preemption must be disabled while reading the hardware regs,
452 * and if this is a CPU-bound context, while checking the CPU binding of
453 * the current thread.
454 */
455 kpreempt_disable();
456 save_spl = spl_xcall();
457
458 if (ctx->kc_flags & KCPC_CTX_INVALID) {
459 splx(save_spl);
460 kpreempt_enable();
461 return (EAGAIN);
462 }
463
464 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) {
465 if (ctx->kc_cpuid != -1) {
466 if (curthread->t_bind_cpu != ctx->kc_cpuid) {
467 splx(save_spl);
468 kpreempt_enable();
469 return (EAGAIN);
470 }
471 }
472
473 if (ctx->kc_thread == curthread) {
474 uint64_t curtick = KCPC_GET_TICK();
475
476 ctx->kc_hrtime = gethrtime_waitfree();
477 pcbe_ops->pcbe_sample(ctx);
478 ctx->kc_vtick += curtick - ctx->kc_rawtick;
479 ctx->kc_rawtick = curtick;
480 }
481
482 /*
483 * The config may have been invalidated by
484 * the pcbe_sample op.
485 */
486 if (ctx->kc_flags & KCPC_CTX_INVALID) {
487 splx(save_spl);
488 kpreempt_enable();
489 return (EAGAIN);
490 }
491
492 }
493
494 splx(save_spl);
495 kpreempt_enable();
496
497 if (copyout(set->ks_data, buf,
498 set->ks_nreqs * sizeof (uint64_t)) == -1)
499 return (EFAULT);
500 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1)
501 return (EFAULT);
502 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1)
503 return (EFAULT);
504
505 return (0);
506 }
507
508 /*
509 * Stop the counters on the CPU this context is bound to.
510 */
511 static void
kcpc_stop_hw(kcpc_ctx_t * ctx)512 kcpc_stop_hw(kcpc_ctx_t *ctx)
513 {
514 cpu_t *cp;
515
516 kpreempt_disable();
517
518 if (ctx->kc_cpuid == CPU->cpu_id) {
519 cp = CPU;
520 } else {
521 cp = cpu_get(ctx->kc_cpuid);
522 }
523
524 ASSERT(cp != NULL && cp->cpu_cpc_ctx == ctx);
525 kcpc_cpu_stop(cp, B_FALSE);
526
527 kpreempt_enable();
528 }
529
530 int
kcpc_unbind(kcpc_set_t * set)531 kcpc_unbind(kcpc_set_t *set)
532 {
533 kcpc_ctx_t *ctx;
534 kthread_t *t;
535
536 /*
537 * We could be racing with the process's agent thread as it
538 * binds the set; we must wait for the set to finish binding
539 * before attempting to tear it down.
540 */
541 mutex_enter(&set->ks_lock);
542 while ((set->ks_state & KCPC_SET_BOUND) == 0)
543 cv_wait(&set->ks_condv, &set->ks_lock);
544 mutex_exit(&set->ks_lock);
545
546 ctx = set->ks_ctx;
547
548 /*
549 * Use kc_lock to synchronize with kcpc_restore().
550 */
551 mutex_enter(&ctx->kc_lock);
552 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
553 mutex_exit(&ctx->kc_lock);
554
555 if (ctx->kc_cpuid == -1) {
556 t = ctx->kc_thread;
557 /*
558 * The context is thread-bound and therefore has a device
559 * context. It will be freed via ctxop_remove() calling
560 * freectx() calling kcpc_free().
561 */
562 if (t == curthread) {
563 int save_spl;
564
565 kpreempt_disable();
566 save_spl = spl_xcall();
567 if (!(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED))
568 kcpc_unprogram(ctx, B_TRUE);
569 splx(save_spl);
570 kpreempt_enable();
571 }
572 VERIFY3U(ctxop_remove(t, &kcpc_ctxop_tpl, ctx), !=, 0);
573 t->t_cpc_set = NULL;
574 t->t_cpc_ctx = NULL;
575 } else {
576 /*
577 * If we are unbinding a CPU-bound set from a remote CPU, the
578 * native CPU's idle thread could be in the midst of programming
579 * this context onto the CPU. We grab the context's lock here to
580 * ensure that the idle thread is done with it. When we release
581 * the lock, the CPU no longer has a context and the idle thread
582 * will move on.
583 *
584 * cpu_lock must be held to prevent the CPU from being DR'd out
585 * while we disassociate the context from the cpu_t.
586 */
587 cpu_t *cp;
588 mutex_enter(&cpu_lock);
589 cp = cpu_get(ctx->kc_cpuid);
590 if (cp != NULL) {
591 /*
592 * The CPU may have been DR'd out of the system.
593 */
594 mutex_enter(&cp->cpu_cpc_ctxlock);
595 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0)
596 kcpc_stop_hw(ctx);
597 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED);
598 mutex_exit(&cp->cpu_cpc_ctxlock);
599 }
600 mutex_exit(&cpu_lock);
601 if (ctx->kc_thread == curthread) {
602 kcpc_free(ctx, 0);
603 curthread->t_cpc_set = NULL;
604 }
605 }
606
607 return (0);
608 }
609
610 int
kcpc_preset(kcpc_set_t * set,int index,uint64_t preset)611 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset)
612 {
613 int i;
614
615 ASSERT(set != NULL);
616 ASSERT(set->ks_state & KCPC_SET_BOUND);
617 ASSERT(set->ks_ctx->kc_thread == curthread);
618 ASSERT(set->ks_ctx->kc_cpuid == -1);
619
620 if (index < 0 || index >= set->ks_nreqs)
621 return (EINVAL);
622
623 for (i = 0; i < set->ks_nreqs; i++)
624 if (set->ks_req[i].kr_index == index)
625 break;
626 ASSERT(i != set->ks_nreqs);
627
628 set->ks_req[i].kr_preset = preset;
629 return (0);
630 }
631
632 int
kcpc_restart(kcpc_set_t * set)633 kcpc_restart(kcpc_set_t *set)
634 {
635 kcpc_ctx_t *ctx = set->ks_ctx;
636 int i;
637 int save_spl;
638
639 ASSERT(set->ks_state & KCPC_SET_BOUND);
640 ASSERT(ctx->kc_thread == curthread);
641 ASSERT(ctx->kc_cpuid == -1);
642
643 for (i = 0; i < set->ks_nreqs; i++) {
644 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
645 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset,
646 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
647 }
648
649 kpreempt_disable();
650 save_spl = spl_xcall();
651
652 /*
653 * If the user is doing this on a running set, make sure the counters
654 * are stopped first.
655 */
656 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
657 pcbe_ops->pcbe_allstop();
658
659 /*
660 * Ask the backend to program the hardware.
661 */
662 ctx->kc_rawtick = KCPC_GET_TICK();
663 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
664 pcbe_ops->pcbe_program(ctx);
665 splx(save_spl);
666 kpreempt_enable();
667
668 return (0);
669 }
670
671 /*
672 * Caller must hold kcpc_cpuctx_lock.
673 */
674 int
kcpc_enable(kthread_t * t,int cmd,int enable)675 kcpc_enable(kthread_t *t, int cmd, int enable)
676 {
677 kcpc_ctx_t *ctx = t->t_cpc_ctx;
678 kcpc_set_t *set = t->t_cpc_set;
679 kcpc_set_t *newset;
680 int i;
681 int flag;
682 int err;
683
684 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock));
685
686 if (ctx == NULL) {
687 /*
688 * This thread has a set but no context; it must be a
689 * CPU-bound set.
690 */
691 ASSERT(t->t_cpc_set != NULL);
692 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1);
693 return (EINVAL);
694 } else if (ctx->kc_flags & KCPC_CTX_INVALID)
695 return (EAGAIN);
696
697 if (cmd == CPC_ENABLE) {
698 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
699 return (EINVAL);
700 kpreempt_disable();
701 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
702 kcpc_restore(ctx);
703 kpreempt_enable();
704 } else if (cmd == CPC_DISABLE) {
705 if (ctx->kc_flags & KCPC_CTX_FREEZE)
706 return (EINVAL);
707 kpreempt_disable();
708 kcpc_save(ctx);
709 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
710 kpreempt_enable();
711 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) {
712 /*
713 * Strategy for usr/sys: stop counters and update set's presets
714 * with current counter values, unbind, update requests with
715 * new config, then re-bind.
716 */
717 flag = (cmd == CPC_USR_EVENTS) ?
718 CPC_COUNT_USER: CPC_COUNT_SYSTEM;
719
720 kpreempt_disable();
721 KCPC_CTX_FLAG_SET(ctx,
722 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
723 pcbe_ops->pcbe_allstop();
724 kpreempt_enable();
725
726 for (i = 0; i < set->ks_nreqs; i++) {
727 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data);
728 if (enable)
729 set->ks_req[i].kr_flags |= flag;
730 else
731 set->ks_req[i].kr_flags &= ~flag;
732 }
733 newset = kcpc_dup_set(set);
734 if (kcpc_unbind(set) != 0)
735 return (EINVAL);
736 t->t_cpc_set = newset;
737 if (kcpc_bind_thread(newset, t, &err) != 0) {
738 t->t_cpc_set = NULL;
739 kcpc_free_set(newset);
740 return (EINVAL);
741 }
742 } else
743 return (EINVAL);
744
745 return (0);
746 }
747
748 /*
749 * Provide PCBEs with a way of obtaining the configs of every counter which will
750 * be programmed together.
751 *
752 * If current is NULL, provide the first config.
753 *
754 * If data != NULL, caller wants to know where the data store associated with
755 * the config we return is located.
756 */
757 void *
kcpc_next_config(void * token,void * current,uint64_t ** data)758 kcpc_next_config(void *token, void *current, uint64_t **data)
759 {
760 int i;
761 kcpc_pic_t *pic;
762 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token;
763
764 if (current == NULL) {
765 /*
766 * Client would like the first config, which may not be in
767 * counter 0; we need to search through the counters for the
768 * first config.
769 */
770 for (i = 0; i < cpc_ncounters; i++)
771 if (ctx->kc_pics[i].kp_req != NULL)
772 break;
773 /*
774 * There are no counters configured for the given context.
775 */
776 if (i == cpc_ncounters)
777 return (NULL);
778 } else {
779 /*
780 * There surely is a faster way to do this.
781 */
782 for (i = 0; i < cpc_ncounters; i++) {
783 pic = &ctx->kc_pics[i];
784
785 if (pic->kp_req != NULL &&
786 current == pic->kp_req->kr_config)
787 break;
788 }
789
790 /*
791 * We found the current config at picnum i. Now search for the
792 * next configured PIC.
793 */
794 for (i++; i < cpc_ncounters; i++) {
795 pic = &ctx->kc_pics[i];
796 if (pic->kp_req != NULL)
797 break;
798 }
799
800 if (i == cpc_ncounters)
801 return (NULL);
802 }
803
804 if (data != NULL) {
805 *data = ctx->kc_pics[i].kp_req->kr_data;
806 }
807
808 return (ctx->kc_pics[i].kp_req->kr_config);
809 }
810
811
812 kcpc_ctx_t *
kcpc_ctx_alloc(int kmem_flags)813 kcpc_ctx_alloc(int kmem_flags)
814 {
815 kcpc_ctx_t *ctx;
816 long hash;
817
818 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), kmem_flags);
819 if (ctx == NULL)
820 return (NULL);
821
822 hash = CPC_HASH_CTX(ctx);
823 mutex_enter(&kcpc_ctx_llock[hash]);
824 ctx->kc_next = kcpc_ctx_list[hash];
825 kcpc_ctx_list[hash] = ctx;
826 mutex_exit(&kcpc_ctx_llock[hash]);
827
828 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) *
829 cpc_ncounters, KM_SLEEP);
830
831 ctx->kc_cpuid = -1;
832
833 return (ctx);
834 }
835
836 /*
837 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT
838 * in the flags.
839 */
840 static void
kcpc_ctx_clone(kcpc_ctx_t * ctx,kcpc_ctx_t * cctx)841 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx)
842 {
843 kcpc_set_t *ks = ctx->kc_set, *cks;
844 int i, j;
845 int code;
846
847 ASSERT(ks != NULL);
848
849 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0)
850 return;
851
852 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP);
853 cks->ks_state &= ~KCPC_SET_BOUND;
854 cctx->kc_set = cks;
855 cks->ks_flags = ks->ks_flags;
856 cks->ks_nreqs = ks->ks_nreqs;
857 cks->ks_req = kmem_alloc(cks->ks_nreqs *
858 sizeof (kcpc_request_t), KM_SLEEP);
859 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t),
860 KM_SLEEP);
861 cks->ks_ctx = cctx;
862
863 for (i = 0; i < cks->ks_nreqs; i++) {
864 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index;
865 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum;
866 (void) strncpy(cks->ks_req[i].kr_event,
867 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN);
868 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset;
869 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags;
870 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs;
871 if (ks->ks_req[i].kr_nattrs > 0) {
872 cks->ks_req[i].kr_attr =
873 kmem_alloc(ks->ks_req[i].kr_nattrs *
874 sizeof (kcpc_attr_t), KM_SLEEP);
875 }
876 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) {
877 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name,
878 ks->ks_req[i].kr_attr[j].ka_name,
879 CPC_MAX_ATTR_LEN);
880 cks->ks_req[i].kr_attr[j].ka_val =
881 ks->ks_req[i].kr_attr[j].ka_val;
882 }
883 }
884 if (kcpc_configure_reqs(cctx, cks, &code) != 0)
885 kcpc_invalidate_config(cctx);
886
887 mutex_enter(&cks->ks_lock);
888 cks->ks_state |= KCPC_SET_BOUND;
889 cv_signal(&cks->ks_condv);
890 mutex_exit(&cks->ks_lock);
891 }
892
893
894 void
kcpc_ctx_free(kcpc_ctx_t * ctx)895 kcpc_ctx_free(kcpc_ctx_t *ctx)
896 {
897 kcpc_ctx_t **loc;
898 long hash = CPC_HASH_CTX(ctx);
899
900 mutex_enter(&kcpc_ctx_llock[hash]);
901 loc = &kcpc_ctx_list[hash];
902 ASSERT(*loc != NULL);
903 while (*loc != ctx)
904 loc = &(*loc)->kc_next;
905 *loc = ctx->kc_next;
906 mutex_exit(&kcpc_ctx_llock[hash]);
907
908 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t));
909 cv_destroy(&ctx->kc_condv);
910 mutex_destroy(&ctx->kc_lock);
911 kmem_free(ctx, sizeof (*ctx));
912 }
913
914 /*
915 * Generic interrupt handler used on hardware that generates
916 * overflow interrupts.
917 *
918 * Note: executed at high-level interrupt context!
919 */
920 /*ARGSUSED*/
921 kcpc_ctx_t *
kcpc_overflow_intr(caddr_t arg,uint64_t bitmap)922 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap)
923 {
924 kcpc_ctx_t *ctx;
925 kthread_t *t = curthread;
926 int i;
927
928 /*
929 * On both x86 and UltraSPARC, we may deliver the high-level
930 * interrupt in kernel mode, just after we've started to run an
931 * interrupt thread. (That's because the hardware helpfully
932 * delivers the overflow interrupt some random number of cycles
933 * after the instruction that caused the overflow by which time
934 * we're in some part of the kernel, not necessarily running on
935 * the right thread).
936 *
937 * Check for this case here -- find the pinned thread
938 * that was running when the interrupt went off.
939 */
940 if (t->t_flag & T_INTR_THREAD) {
941 klwp_t *lwp;
942
943 atomic_inc_32(&kcpc_intrctx_count);
944
945 /*
946 * Note that t_lwp is always set to point at the underlying
947 * thread, thus this will work in the presence of nested
948 * interrupts.
949 */
950 ctx = NULL;
951 if ((lwp = t->t_lwp) != NULL) {
952 t = lwptot(lwp);
953 ctx = t->t_cpc_ctx;
954 }
955 } else
956 ctx = t->t_cpc_ctx;
957
958 if (ctx == NULL) {
959 /*
960 * This can easily happen if we're using the counters in
961 * "shared" mode, for example, and an overflow interrupt
962 * occurs while we are running cpustat. In that case, the
963 * bound thread that has the context that belongs to this
964 * CPU is almost certainly sleeping (if it was running on
965 * the CPU we'd have found it above), and the actual
966 * interrupted thread has no knowledge of performance counters!
967 */
968 ctx = curthread->t_cpu->cpu_cpc_ctx;
969 if (ctx != NULL) {
970 /*
971 * Return the bound context for this CPU to
972 * the interrupt handler so that it can synchronously
973 * sample the hardware counters and restart them.
974 */
975 return (ctx);
976 }
977
978 /*
979 * As long as the overflow interrupt really is delivered early
980 * enough after trapping into the kernel to avoid switching
981 * threads, we must always be able to find the cpc context,
982 * or something went terribly wrong i.e. we ended up
983 * running a passivated interrupt thread, a kernel
984 * thread or we interrupted idle, all of which are Very Bad.
985 *
986 * We also could end up here owing to an incredibly unlikely
987 * race condition that exists on x86 based architectures when
988 * the cpc provider is in use; overflow interrupts are directed
989 * to the cpc provider if the 'dtrace_cpc_in_use' variable is
990 * set when we enter the handler. This variable is unset after
991 * overflow interrupts have been disabled on all CPUs and all
992 * contexts have been torn down. To stop interrupts, the cpc
993 * provider issues a xcall to the remote CPU before it tears
994 * down that CPUs context. As high priority xcalls, on an x86
995 * architecture, execute at a higher PIL than this handler, it
996 * is possible (though extremely unlikely) that the xcall could
997 * interrupt the overflow handler before the handler has
998 * checked the 'dtrace_cpc_in_use' variable, stop the counters,
999 * return to the cpc provider which could then rip down
1000 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs
1001 * overflow handler has had a chance to check the variable. In
1002 * that case, the handler would direct the overflow into this
1003 * code and no valid context will be found. The default behavior
1004 * when no valid context is found is now to shout a warning to
1005 * the console and bump the 'kcpc_nullctx_count' variable.
1006 */
1007 if (kcpc_nullctx_panic)
1008 panic("null cpc context, thread %p", (void *)t);
1009 #ifdef DEBUG
1010 cmn_err(CE_NOTE,
1011 "null cpc context found in overflow handler!\n");
1012 #endif
1013 atomic_inc_32(&kcpc_nullctx_count);
1014 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) {
1015 /*
1016 * Schedule an ast to sample the counters, which will
1017 * propagate any overflow into the virtualized performance
1018 * counter(s), and may deliver a signal.
1019 */
1020 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
1021 /*
1022 * If a counter has overflowed which was counting on behalf of
1023 * a request which specified CPC_OVF_NOTIFY_EMT, send the
1024 * process a signal.
1025 */
1026 for (i = 0; i < cpc_ncounters; i++) {
1027 if (ctx->kc_pics[i].kp_req != NULL &&
1028 bitmap & (1 << i) &&
1029 ctx->kc_pics[i].kp_req->kr_flags &
1030 CPC_OVF_NOTIFY_EMT) {
1031 /*
1032 * A signal has been requested for this PIC, so
1033 * so freeze the context. The interrupt handler
1034 * has already stopped the counter hardware.
1035 */
1036 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
1037 atomic_or_uint(&ctx->kc_pics[i].kp_flags,
1038 KCPC_PIC_OVERFLOWED);
1039 }
1040 }
1041 aston(t);
1042 } else if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) {
1043 /*
1044 * Thread context is no longer valid, but here may be a valid
1045 * CPU context.
1046 */
1047 return (curthread->t_cpu->cpu_cpc_ctx);
1048 }
1049
1050 return (NULL);
1051 }
1052
1053 /*
1054 * The current thread context had an overflow interrupt; we're
1055 * executing here in high-level interrupt context.
1056 */
1057 /*ARGSUSED*/
1058 uint_t
kcpc_hw_overflow_intr(caddr_t arg1,caddr_t arg2)1059 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
1060 {
1061 kcpc_ctx_t *ctx;
1062 uint64_t bitmap;
1063 uint8_t *state;
1064 int save_spl;
1065
1066 if (pcbe_ops == NULL ||
1067 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0)
1068 return (DDI_INTR_UNCLAIMED);
1069
1070 /*
1071 * Prevent any further interrupts.
1072 */
1073 pcbe_ops->pcbe_allstop();
1074
1075 if (dtrace_cpc_in_use) {
1076 state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state;
1077
1078 /*
1079 * Set the per-CPU state bit to indicate that we are currently
1080 * processing an interrupt if it is currently free. Drop the
1081 * interrupt if the state isn't free (i.e. a configuration
1082 * event is taking place).
1083 */
1084 if (atomic_cas_8(state, DCPC_INTR_FREE,
1085 DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) {
1086 int i;
1087 kcpc_request_t req;
1088
1089 ASSERT(dtrace_cpc_fire != NULL);
1090
1091 (*dtrace_cpc_fire)(bitmap);
1092
1093 ctx = curthread->t_cpu->cpu_cpc_ctx;
1094 if (ctx == NULL) {
1095 #ifdef DEBUG
1096 cmn_err(CE_NOTE, "null cpc context in"
1097 "hardware overflow handler!\n");
1098 #endif
1099 return (DDI_INTR_CLAIMED);
1100 }
1101
1102 /* Reset any counters that have overflowed */
1103 for (i = 0; i < ctx->kc_set->ks_nreqs; i++) {
1104 req = ctx->kc_set->ks_req[i];
1105
1106 if (bitmap & (1 << req.kr_picnum)) {
1107 pcbe_ops->pcbe_configure(req.kr_picnum,
1108 req.kr_event, req.kr_preset,
1109 req.kr_flags, req.kr_nattrs,
1110 req.kr_attr, &(req.kr_config),
1111 (void *)ctx);
1112 }
1113 }
1114 pcbe_ops->pcbe_program(ctx);
1115
1116 /*
1117 * We've finished processing the interrupt so set
1118 * the state back to free.
1119 */
1120 cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state =
1121 DCPC_INTR_FREE;
1122 membar_producer();
1123 }
1124 return (DDI_INTR_CLAIMED);
1125 }
1126
1127 /*
1128 * DTrace isn't involved so pass on accordingly.
1129 *
1130 * If the interrupt has occurred in the context of an lwp owning
1131 * the counters, then the handler posts an AST to the lwp to
1132 * trigger the actual sampling, and optionally deliver a signal or
1133 * restart the counters, on the way out of the kernel using
1134 * kcpc_hw_overflow_ast() (see below).
1135 *
1136 * On the other hand, if the handler returns the context to us
1137 * directly, then it means that there are no other threads in
1138 * the middle of updating it, no AST has been posted, and so we
1139 * should sample the counters here, and restart them with no
1140 * further fuss.
1141 *
1142 * The CPU's CPC context may disappear as a result of cross-call which
1143 * has higher PIL on x86, so protect the context by raising PIL to the
1144 * cross-call level.
1145 */
1146 save_spl = spl_xcall();
1147 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) {
1148 uint64_t curtick = KCPC_GET_TICK();
1149
1150 ctx->kc_hrtime = gethrtime_waitfree();
1151 ctx->kc_vtick += curtick - ctx->kc_rawtick;
1152 ctx->kc_rawtick = curtick;
1153 pcbe_ops->pcbe_sample(ctx);
1154 pcbe_ops->pcbe_program(ctx);
1155 }
1156 splx(save_spl);
1157
1158 return (DDI_INTR_CLAIMED);
1159 }
1160
1161 /*
1162 * Called from trap() when processing the ast posted by the high-level
1163 * interrupt handler.
1164 */
1165 int
kcpc_overflow_ast()1166 kcpc_overflow_ast()
1167 {
1168 kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
1169 int i;
1170 int found = 0;
1171 uint64_t curtick = KCPC_GET_TICK();
1172
1173 ASSERT(ctx != NULL); /* Beware of interrupt skid. */
1174
1175 /*
1176 * An overflow happened: sample the context to ensure that
1177 * the overflow is propagated into the upper bits of the
1178 * virtualized 64-bit counter(s).
1179 */
1180 kpreempt_disable();
1181 ctx->kc_hrtime = gethrtime_waitfree();
1182 pcbe_ops->pcbe_sample(ctx);
1183 kpreempt_enable();
1184
1185 ctx->kc_vtick += curtick - ctx->kc_rawtick;
1186
1187 /*
1188 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED
1189 * if that pic generated an overflow and if the request it was counting
1190 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all
1191 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we
1192 * found any overflowed pics, keep the context frozen and return true
1193 * (thus causing a signal to be sent).
1194 */
1195 for (i = 0; i < cpc_ncounters; i++) {
1196 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) {
1197 atomic_and_uint(&ctx->kc_pics[i].kp_flags,
1198 ~KCPC_PIC_OVERFLOWED);
1199 found = 1;
1200 }
1201 }
1202 if (found)
1203 return (1);
1204
1205 /*
1206 * Otherwise, re-enable the counters and continue life as before.
1207 */
1208 kpreempt_disable();
1209 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
1210 pcbe_ops->pcbe_program(ctx);
1211 kpreempt_enable();
1212 return (0);
1213 }
1214
1215 /*
1216 * Called when switching away from current thread.
1217 */
1218 static void
kcpc_save(void * arg)1219 kcpc_save(void *arg)
1220 {
1221 kcpc_ctx_t *ctx = arg;
1222 int err;
1223 int save_spl;
1224
1225 kpreempt_disable();
1226 save_spl = spl_xcall();
1227
1228 if (ctx->kc_flags & KCPC_CTX_INVALID) {
1229 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) {
1230 splx(save_spl);
1231 kpreempt_enable();
1232 return;
1233 }
1234 /*
1235 * This context has been invalidated but the counters have not
1236 * been stopped. Stop them here and mark the context stopped.
1237 */
1238 kcpc_unprogram(ctx, B_TRUE);
1239 splx(save_spl);
1240 kpreempt_enable();
1241 return;
1242 }
1243
1244 pcbe_ops->pcbe_allstop();
1245 if (ctx->kc_flags & KCPC_CTX_FREEZE) {
1246 splx(save_spl);
1247 kpreempt_enable();
1248 return;
1249 }
1250
1251 /*
1252 * Need to sample for all reqs into each req's current mpic.
1253 */
1254 ctx->kc_hrtime = gethrtime_waitfree();
1255 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick;
1256 pcbe_ops->pcbe_sample(ctx);
1257
1258 /*
1259 * Program counter for measuring capacity and utilization since user
1260 * thread isn't using counter anymore
1261 */
1262 ASSERT(ctx->kc_cpuid == -1);
1263 cu_cpc_program(CPU, &err);
1264 splx(save_spl);
1265 kpreempt_enable();
1266 }
1267
1268 static void
kcpc_restore(void * arg)1269 kcpc_restore(void *arg)
1270 {
1271 kcpc_ctx_t *ctx = arg;
1272 int save_spl;
1273
1274 mutex_enter(&ctx->kc_lock);
1275
1276 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) ==
1277 KCPC_CTX_INVALID) {
1278 /*
1279 * The context is invalidated but has not been marked stopped.
1280 * We mark it as such here because we will not start the
1281 * counters during this context switch.
1282 */
1283 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED);
1284 }
1285
1286 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) {
1287 mutex_exit(&ctx->kc_lock);
1288 return;
1289 }
1290
1291 /*
1292 * Set kc_flags to show that a kcpc_restore() is in progress to avoid
1293 * ctx & set related memory objects being freed without us knowing.
1294 * This can happen if an agent thread is executing a kcpc_unbind(),
1295 * with this thread as the target, whilst we're concurrently doing a
1296 * restorectx() during, for example, a proc_exit(). Effectively, by
1297 * doing this, we're asking kcpc_free() to cv_wait() until
1298 * kcpc_restore() has completed.
1299 */
1300 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_RESTORE);
1301 mutex_exit(&ctx->kc_lock);
1302
1303 /*
1304 * While programming the hardware, the counters should be stopped. We
1305 * don't do an explicit pcbe_allstop() here because they should have
1306 * been stopped already by the last consumer.
1307 */
1308 kpreempt_disable();
1309 save_spl = spl_xcall();
1310 kcpc_program(ctx, B_TRUE, B_TRUE);
1311 splx(save_spl);
1312 kpreempt_enable();
1313
1314 /*
1315 * Wake the agent thread if it's waiting in kcpc_free().
1316 */
1317 mutex_enter(&ctx->kc_lock);
1318 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_RESTORE);
1319 cv_signal(&ctx->kc_condv);
1320 mutex_exit(&ctx->kc_lock);
1321 }
1322
1323 /*
1324 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the
1325 * following context operators to the idle thread on each CPU. They stop the
1326 * counters when the idle thread is switched on, and they start them again when
1327 * it is switched off.
1328 */
1329 /*ARGSUSED*/
1330 static void
kcpc_idle_save(void * arg)1331 kcpc_idle_save(void *arg)
1332 {
1333 struct cpu *cp = arg;
1334
1335 /*
1336 * The idle thread shouldn't be run anywhere else.
1337 */
1338 ASSERT(CPU == cp);
1339
1340 /*
1341 * We must hold the CPU's context lock to ensure the context isn't freed
1342 * while we're looking at it.
1343 */
1344 mutex_enter(&cp->cpu_cpc_ctxlock);
1345
1346 if ((cp->cpu_cpc_ctx == NULL) ||
1347 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
1348 mutex_exit(&cp->cpu_cpc_ctxlock);
1349 return;
1350 }
1351
1352 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx);
1353 mutex_exit(&cp->cpu_cpc_ctxlock);
1354 }
1355
1356 static void
kcpc_idle_restore(void * arg)1357 kcpc_idle_restore(void *arg)
1358 {
1359 struct cpu *cp = arg;
1360
1361 /*
1362 * The idle thread shouldn't be run anywhere else.
1363 */
1364 ASSERT(CPU == cp);
1365
1366 /*
1367 * We must hold the CPU's context lock to ensure the context isn't freed
1368 * while we're looking at it.
1369 */
1370 mutex_enter(&cp->cpu_cpc_ctxlock);
1371
1372 if ((cp->cpu_cpc_ctx == NULL) ||
1373 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
1374 mutex_exit(&cp->cpu_cpc_ctxlock);
1375 return;
1376 }
1377
1378 pcbe_ops->pcbe_allstop();
1379 mutex_exit(&cp->cpu_cpc_ctxlock);
1380 }
1381
1382 static const struct ctxop_template kcpc_idle_ctxop_tpl = {
1383 .ct_rev = CTXOP_TPL_REV,
1384 .ct_save = kcpc_idle_save,
1385 .ct_restore = kcpc_idle_restore,
1386 };
1387
1388 void
kcpc_idle_ctxop_install(kthread_t * t,struct cpu * cp)1389 kcpc_idle_ctxop_install(kthread_t *t, struct cpu *cp)
1390 {
1391 ctxop_install(t, &kcpc_idle_ctxop_tpl, cp);
1392 }
1393
1394 /*ARGSUSED*/
1395 static void
kcpc_lwp_create(void * parent,void * child)1396 kcpc_lwp_create(void *parent, void *child)
1397 {
1398 kthread_t *t = parent, *ct = child;
1399 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx;
1400 int i;
1401
1402 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0)
1403 return;
1404
1405 rw_enter(&kcpc_cpuctx_lock, RW_READER);
1406 if (ctx->kc_flags & KCPC_CTX_INVALID) {
1407 rw_exit(&kcpc_cpuctx_lock);
1408 return;
1409 }
1410 cctx = kcpc_ctx_alloc(KM_SLEEP);
1411 kcpc_ctx_clone(ctx, cctx);
1412 rw_exit(&kcpc_cpuctx_lock);
1413
1414 /*
1415 * Copy the parent context's kc_flags field, but don't overwrite
1416 * the child's in case it was modified during kcpc_ctx_clone.
1417 */
1418 KCPC_CTX_FLAG_SET(cctx, ctx->kc_flags);
1419 cctx->kc_thread = ct;
1420 cctx->kc_cpuid = -1;
1421 ct->t_cpc_set = cctx->kc_set;
1422 ct->t_cpc_ctx = cctx;
1423
1424 if (cctx->kc_flags & KCPC_CTX_SIGOVF) {
1425 kcpc_set_t *ks = cctx->kc_set;
1426 /*
1427 * Our contract with the user requires us to immediately send an
1428 * overflow signal to all children if we have the LWPINHERIT
1429 * and SIGOVF flags set. In addition, all counters should be
1430 * set to UINT64_MAX, and their pic's overflow flag turned on
1431 * so that our trap() processing knows to send a signal.
1432 */
1433 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE);
1434 for (i = 0; i < ks->ks_nreqs; i++) {
1435 kcpc_request_t *kr = &ks->ks_req[i];
1436
1437 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) {
1438 *(kr->kr_data) = UINT64_MAX;
1439 atomic_or_uint(&kr->kr_picp->kp_flags,
1440 KCPC_PIC_OVERFLOWED);
1441 }
1442 }
1443 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
1444 aston(ct);
1445 }
1446
1447 ctxop_install(ct, &kcpc_ctxop_tpl, cctx);
1448 }
1449
1450 /*
1451 * Counter Stoppage Theory
1452 *
1453 * The counters may need to be stopped properly at the following occasions:
1454 *
1455 * 1) An LWP exits.
1456 * 2) A thread exits.
1457 * 3) An LWP performs an exec().
1458 * 4) A bound set is unbound.
1459 *
1460 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need
1461 * to be freed as well.
1462 *
1463 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on
1464 * when the thread is freed, kcpc_free(), called by freectx(), frees the
1465 * context.
1466 *
1467 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit().
1468 *
1469 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has
1470 * been called from exec. It stops the counters _and_ frees the context.
1471 *
1472 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context.
1473 *
1474 * CPU-bound counters are always stopped via kcpc_unbind().
1475 */
1476
1477 /*
1478 * We're being called to delete the context; we ensure that all associated data
1479 * structures are freed, and that the hardware is passivated if this is an exec.
1480 */
1481
1482 /*ARGSUSED*/
1483 void
kcpc_free(void * arg,int isexec)1484 kcpc_free(void *arg, int isexec)
1485 {
1486 kcpc_ctx_t *ctx = arg;
1487 int i;
1488 kcpc_set_t *set = ctx->kc_set;
1489
1490 ASSERT(set != NULL);
1491
1492 /*
1493 * Wait for kcpc_restore() to finish before we tear things down.
1494 */
1495 mutex_enter(&ctx->kc_lock);
1496 while (ctx->kc_flags & KCPC_CTX_RESTORE)
1497 cv_wait(&ctx->kc_condv, &ctx->kc_lock);
1498 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1499 mutex_exit(&ctx->kc_lock);
1500
1501 if (isexec) {
1502 /*
1503 * This thread is execing, and after the exec it should not have
1504 * any performance counter context. Stop the counters properly
1505 * here so the system isn't surprised by an overflow interrupt
1506 * later.
1507 */
1508 if (ctx->kc_cpuid != -1) {
1509 cpu_t *cp;
1510 /*
1511 * CPU-bound context; stop the appropriate CPU's ctrs.
1512 * Hold cpu_lock while examining the CPU to ensure it
1513 * doesn't go away.
1514 */
1515 mutex_enter(&cpu_lock);
1516 cp = cpu_get(ctx->kc_cpuid);
1517 /*
1518 * The CPU could have been DR'd out, so only stop the
1519 * CPU and clear its context pointer if the CPU still
1520 * exists.
1521 */
1522 if (cp != NULL) {
1523 mutex_enter(&cp->cpu_cpc_ctxlock);
1524 kcpc_stop_hw(ctx);
1525 mutex_exit(&cp->cpu_cpc_ctxlock);
1526 }
1527 mutex_exit(&cpu_lock);
1528 ASSERT(curthread->t_cpc_ctx == NULL);
1529 } else {
1530 int save_spl;
1531
1532 /*
1533 * Thread-bound context; stop _this_ CPU's counters.
1534 */
1535 kpreempt_disable();
1536 save_spl = spl_xcall();
1537 kcpc_unprogram(ctx, B_TRUE);
1538 curthread->t_cpc_ctx = NULL;
1539 splx(save_spl);
1540 kpreempt_enable();
1541 }
1542
1543 /*
1544 * Since we are being called from an exec and we know that
1545 * exec is not permitted via the agent thread, we should clean
1546 * up this thread's CPC state completely, and not leave dangling
1547 * CPC pointers behind.
1548 */
1549 ASSERT(ctx->kc_thread == curthread);
1550 curthread->t_cpc_set = NULL;
1551 }
1552
1553 /*
1554 * Walk through each request in this context's set and free the PCBE's
1555 * configuration if it exists.
1556 */
1557 for (i = 0; i < set->ks_nreqs; i++) {
1558 if (set->ks_req[i].kr_config != NULL)
1559 pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
1560 }
1561
1562 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
1563 kcpc_ctx_free(ctx);
1564 kcpc_free_set(set);
1565 }
1566
1567 void
kcpc_free_cpu(kcpc_ctx_t * ctx)1568 kcpc_free_cpu(kcpc_ctx_t *ctx)
1569 {
1570 kcpc_free(ctx, 0);
1571 }
1572
1573 /*
1574 * Free the memory associated with a request set.
1575 */
1576 void
kcpc_free_set(kcpc_set_t * set)1577 kcpc_free_set(kcpc_set_t *set)
1578 {
1579 int i;
1580 kcpc_request_t *req;
1581
1582 ASSERT(set->ks_req != NULL);
1583
1584 for (i = 0; i < set->ks_nreqs; i++) {
1585 req = &set->ks_req[i];
1586
1587 if (req->kr_nattrs != 0) {
1588 kmem_free(req->kr_attr,
1589 req->kr_nattrs * sizeof (kcpc_attr_t));
1590 }
1591 }
1592
1593 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
1594 cv_destroy(&set->ks_condv);
1595 mutex_destroy(&set->ks_lock);
1596 kmem_free(set, sizeof (kcpc_set_t));
1597 }
1598
1599 /*
1600 * Grab every existing context and mark it as invalid.
1601 */
1602 void
kcpc_invalidate_all(void)1603 kcpc_invalidate_all(void)
1604 {
1605 kcpc_ctx_t *ctx;
1606 long hash;
1607
1608 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) {
1609 mutex_enter(&kcpc_ctx_llock[hash]);
1610 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next)
1611 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1612 mutex_exit(&kcpc_ctx_llock[hash]);
1613 }
1614 }
1615
1616 /*
1617 * Interface for PCBEs to signal that an existing configuration has suddenly
1618 * become invalid.
1619 */
1620 void
kcpc_invalidate_config(void * token)1621 kcpc_invalidate_config(void *token)
1622 {
1623 kcpc_ctx_t *ctx = token;
1624
1625 ASSERT(ctx != NULL);
1626
1627 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1628 }
1629
1630 /*
1631 * Called from lwp_exit() and thread_exit()
1632 */
1633 void
kcpc_passivate(void)1634 kcpc_passivate(void)
1635 {
1636 kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
1637 kcpc_set_t *set = curthread->t_cpc_set;
1638 int save_spl;
1639
1640 if (set == NULL)
1641 return;
1642
1643 if (ctx == NULL) {
1644 /*
1645 * This thread has a set but no context; it must be a CPU-bound
1646 * set. The hardware will be stopped via kcpc_unbind() when the
1647 * process exits and closes its file descriptors with
1648 * kcpc_close(). Our only job here is to clean up this thread's
1649 * state; the set will be freed with the unbind().
1650 */
1651 (void) kcpc_unbind(set);
1652 /*
1653 * Unbinding a set belonging to the current thread should clear
1654 * its set pointer.
1655 */
1656 ASSERT(curthread->t_cpc_set == NULL);
1657 return;
1658 }
1659
1660 kpreempt_disable();
1661 save_spl = spl_xcall();
1662 curthread->t_cpc_set = NULL;
1663
1664 /*
1665 * This thread/LWP is exiting but context switches will continue to
1666 * happen for a bit as the exit proceeds. Kernel preemption must be
1667 * disabled here to prevent a race between checking or setting the
1668 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during
1669 * a context switch.
1670 */
1671 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
1672 kcpc_unprogram(ctx, B_TRUE);
1673 KCPC_CTX_FLAG_SET(ctx,
1674 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
1675 }
1676
1677 /*
1678 * We're cleaning up after this thread; ensure there are no dangling
1679 * CPC pointers left behind. The context and set will be freed by
1680 * freectx().
1681 */
1682 curthread->t_cpc_ctx = NULL;
1683
1684 splx(save_spl);
1685 kpreempt_enable();
1686 }
1687
1688 /*
1689 * Assign the requests in the given set to the PICs in the context.
1690 * Returns 0 if successful, -1 on failure.
1691 */
1692 /*ARGSUSED*/
1693 int
kcpc_assign_reqs(kcpc_set_t * set,kcpc_ctx_t * ctx)1694 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx)
1695 {
1696 int i;
1697 int *picnum_save;
1698
1699 ASSERT(set->ks_nreqs <= cpc_ncounters);
1700
1701 /*
1702 * Provide kcpc_tryassign() with scratch space to avoid doing an
1703 * alloc/free with every invocation.
1704 */
1705 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP);
1706 /*
1707 * kcpc_tryassign() blindly walks through each request in the set,
1708 * seeing if a counter can count its event. If yes, it assigns that
1709 * counter. However, that counter may have been the only capable counter
1710 * for _another_ request's event. The solution is to try every possible
1711 * request first. Note that this does not cover all solutions, as
1712 * that would require all unique orderings of requests, an n^n operation
1713 * which would be unacceptable for architectures with many counters.
1714 */
1715 for (i = 0; i < set->ks_nreqs; i++)
1716 if (kcpc_tryassign(set, i, picnum_save) == 0)
1717 break;
1718
1719 kmem_free(picnum_save, set->ks_nreqs * sizeof (int));
1720 if (i == set->ks_nreqs)
1721 return (-1);
1722 return (0);
1723 }
1724
1725 static int
kcpc_tryassign(kcpc_set_t * set,int starting_req,int * scratch)1726 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch)
1727 {
1728 int i;
1729 int j;
1730 uint64_t bitmap = 0, resmap = 0;
1731 uint64_t ctrmap;
1732
1733 /*
1734 * We are attempting to assign the reqs to pics, but we may fail. If we
1735 * fail, we need to restore the state of the requests to what it was
1736 * when we found it, as some reqs may have been explicitly assigned to
1737 * a specific PIC beforehand. We do this by snapshotting the assignments
1738 * now and restoring from it later if we fail.
1739 *
1740 * Also we note here which counters have already been claimed by
1741 * requests with explicit counter assignments.
1742 */
1743 for (i = 0; i < set->ks_nreqs; i++) {
1744 scratch[i] = set->ks_req[i].kr_picnum;
1745 if (set->ks_req[i].kr_picnum != -1)
1746 resmap |= (1 << set->ks_req[i].kr_picnum);
1747 }
1748
1749 /*
1750 * Walk through requests assigning them to the first PIC that is
1751 * capable.
1752 */
1753 i = starting_req;
1754 do {
1755 if (set->ks_req[i].kr_picnum != -1) {
1756 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0);
1757 bitmap |= (1 << set->ks_req[i].kr_picnum);
1758 if (++i == set->ks_nreqs)
1759 i = 0;
1760 continue;
1761 }
1762
1763 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event);
1764 for (j = 0; j < cpc_ncounters; j++) {
1765 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 &&
1766 (resmap & (1 << j)) == 0) {
1767 /*
1768 * We can assign this counter because:
1769 *
1770 * 1. It can count the event (ctrmap)
1771 * 2. It hasn't been assigned yet (bitmap)
1772 * 3. It wasn't reserved by a request (resmap)
1773 */
1774 bitmap |= (1 << j);
1775 break;
1776 }
1777 }
1778 if (j == cpc_ncounters) {
1779 for (i = 0; i < set->ks_nreqs; i++)
1780 set->ks_req[i].kr_picnum = scratch[i];
1781 return (-1);
1782 }
1783 set->ks_req[i].kr_picnum = j;
1784
1785 if (++i == set->ks_nreqs)
1786 i = 0;
1787 } while (i != starting_req);
1788
1789 return (0);
1790 }
1791
1792 kcpc_set_t *
kcpc_dup_set(kcpc_set_t * set)1793 kcpc_dup_set(kcpc_set_t *set)
1794 {
1795 kcpc_set_t *new;
1796 int i;
1797 int j;
1798
1799 new = kmem_zalloc(sizeof (*new), KM_SLEEP);
1800 new->ks_state &= ~KCPC_SET_BOUND;
1801 new->ks_flags = set->ks_flags;
1802 new->ks_nreqs = set->ks_nreqs;
1803 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t),
1804 KM_SLEEP);
1805 new->ks_data = NULL;
1806 new->ks_ctx = NULL;
1807
1808 for (i = 0; i < new->ks_nreqs; i++) {
1809 new->ks_req[i].kr_config = NULL;
1810 new->ks_req[i].kr_index = set->ks_req[i].kr_index;
1811 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum;
1812 new->ks_req[i].kr_picp = NULL;
1813 new->ks_req[i].kr_data = NULL;
1814 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event,
1815 CPC_MAX_EVENT_LEN);
1816 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset;
1817 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags;
1818 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs;
1819 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs *
1820 sizeof (kcpc_attr_t), KM_SLEEP);
1821 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) {
1822 new->ks_req[i].kr_attr[j].ka_val =
1823 set->ks_req[i].kr_attr[j].ka_val;
1824 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name,
1825 set->ks_req[i].kr_attr[j].ka_name,
1826 CPC_MAX_ATTR_LEN);
1827 }
1828 }
1829
1830 return (new);
1831 }
1832
1833 int
kcpc_allow_nonpriv(void * token)1834 kcpc_allow_nonpriv(void *token)
1835 {
1836 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV);
1837 }
1838
1839 void
kcpc_invalidate(kthread_t * t)1840 kcpc_invalidate(kthread_t *t)
1841 {
1842 kcpc_ctx_t *ctx = t->t_cpc_ctx;
1843
1844 if (ctx != NULL)
1845 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID);
1846 }
1847
1848 /*
1849 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given
1850 * are used to construct PCBE names, starting with the most specific,
1851 * "pcbe.first.second.third.fourth" and ending with the least specific,
1852 * "pcbe.first".
1853 *
1854 * Returns 0 if a PCBE was successfully loaded and -1 upon error.
1855 */
1856 int
kcpc_pcbe_tryload(const char * prefix,uint_t first,uint_t second,uint_t third)1857 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third)
1858 {
1859 uint_t s[3];
1860
1861 s[0] = first;
1862 s[1] = second;
1863 s[2] = third;
1864
1865 return (modload_qualified("pcbe",
1866 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0);
1867 }
1868
1869 /*
1870 * Create one or more CPC context for given CPU with specified counter event
1871 * requests
1872 *
1873 * If number of requested counter events is less than or equal number of
1874 * hardware counters on a CPU and can all be assigned to the counters on a CPU
1875 * at the same time, then make one CPC context.
1876 *
1877 * Otherwise, multiple CPC contexts are created to allow multiplexing more
1878 * counter events than existing counters onto the counters by iterating through
1879 * all of the CPC contexts, programming the counters with each CPC context one
1880 * at a time and measuring the resulting counter values. Each of the resulting
1881 * CPC contexts contains some number of requested counter events less than or
1882 * equal the number of counters on a CPU depending on whether all the counter
1883 * events can be programmed on all the counters at the same time or not.
1884 *
1885 * Flags to kmem_{,z}alloc() are passed in as an argument to allow specifying
1886 * whether memory allocation should be non-blocking or not. The code will try
1887 * to allocate *whole* CPC contexts if possible. If there is any memory
1888 * allocation failure during the allocations needed for a given CPC context, it
1889 * will skip allocating that CPC context because it cannot allocate the whole
1890 * thing. Thus, the only time that it will end up allocating none (ie. no CPC
1891 * contexts whatsoever) is when it cannot even allocate *one* whole CPC context
1892 * without a memory allocation failure occurring.
1893 */
1894 int
kcpc_cpu_ctx_create(cpu_t * cp,kcpc_request_list_t * req_list,int kmem_flags,kcpc_ctx_t *** ctx_ptr_array,size_t * ctx_ptr_array_sz)1895 kcpc_cpu_ctx_create(cpu_t *cp, kcpc_request_list_t *req_list, int kmem_flags,
1896 kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz)
1897 {
1898 kcpc_ctx_t **ctx_ptrs;
1899 int nctx;
1900 int nctx_ptrs;
1901 int nreqs;
1902 kcpc_request_t *reqs;
1903
1904 if (cp == NULL || ctx_ptr_array == NULL || ctx_ptr_array_sz == NULL ||
1905 req_list == NULL || req_list->krl_cnt < 1)
1906 return (-1);
1907
1908 /*
1909 * Allocate number of sets assuming that each set contains one and only
1910 * one counter event request for each counter on a CPU
1911 */
1912 nreqs = req_list->krl_cnt;
1913 nctx_ptrs = (nreqs + cpc_ncounters - 1) / cpc_ncounters;
1914 ctx_ptrs = kmem_zalloc(nctx_ptrs * sizeof (kcpc_ctx_t *), kmem_flags);
1915 if (ctx_ptrs == NULL)
1916 return (-2);
1917
1918 /*
1919 * Fill in sets of requests
1920 */
1921 nctx = 0;
1922 reqs = req_list->krl_list;
1923 while (nreqs > 0) {
1924 kcpc_ctx_t *ctx;
1925 kcpc_set_t *set;
1926 int subcode;
1927
1928 /*
1929 * Allocate CPC context and set for requested counter events
1930 */
1931 ctx = kcpc_ctx_alloc(kmem_flags);
1932 set = kcpc_set_create(reqs, nreqs, 0, kmem_flags);
1933 if (set == NULL) {
1934 kcpc_ctx_free(ctx);
1935 break;
1936 }
1937
1938 /*
1939 * Determine assignment of requested counter events to specific
1940 * counters
1941 */
1942 if (kcpc_assign_reqs(set, ctx) != 0) {
1943 /*
1944 * May not be able to assign requested counter events
1945 * to all counters since all counters may not be able
1946 * to do all events, so only do one counter event in
1947 * set of counter requests when this happens since at
1948 * least one of the counters must be able to do the
1949 * event.
1950 */
1951 kcpc_free_set(set);
1952 set = kcpc_set_create(reqs, 1, 0, kmem_flags);
1953 if (set == NULL) {
1954 kcpc_ctx_free(ctx);
1955 break;
1956 }
1957 if (kcpc_assign_reqs(set, ctx) != 0) {
1958 #ifdef DEBUG
1959 cmn_err(CE_NOTE, "!kcpc_cpu_ctx_create: can't "
1960 "assign counter event %s!\n",
1961 set->ks_req->kr_event);
1962 #endif
1963 kcpc_free_set(set);
1964 kcpc_ctx_free(ctx);
1965 reqs++;
1966 nreqs--;
1967 continue;
1968 }
1969 }
1970
1971 /*
1972 * Allocate memory needed to hold requested counter event data
1973 */
1974 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t),
1975 kmem_flags);
1976 if (set->ks_data == NULL) {
1977 kcpc_free_set(set);
1978 kcpc_ctx_free(ctx);
1979 break;
1980 }
1981
1982 /*
1983 * Configure requested counter events
1984 */
1985 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) {
1986 #ifdef DEBUG
1987 cmn_err(CE_NOTE,
1988 "!kcpc_cpu_ctx_create: can't configure "
1989 "set of counter event requests!\n");
1990 #endif
1991 reqs += set->ks_nreqs;
1992 nreqs -= set->ks_nreqs;
1993 kmem_free(set->ks_data,
1994 set->ks_nreqs * sizeof (uint64_t));
1995 kcpc_free_set(set);
1996 kcpc_ctx_free(ctx);
1997 continue;
1998 }
1999
2000 /*
2001 * Point set of counter event requests at this context and fill
2002 * in CPC context
2003 */
2004 set->ks_ctx = ctx;
2005 ctx->kc_set = set;
2006 ctx->kc_cpuid = cp->cpu_id;
2007 ctx->kc_thread = curthread;
2008
2009 ctx_ptrs[nctx] = ctx;
2010
2011 /*
2012 * Update requests and how many are left to be assigned to sets
2013 */
2014 reqs += set->ks_nreqs;
2015 nreqs -= set->ks_nreqs;
2016
2017 /*
2018 * Increment number of CPC contexts and allocate bigger array
2019 * for context pointers as needed
2020 */
2021 nctx++;
2022 if (nctx >= nctx_ptrs) {
2023 kcpc_ctx_t **new;
2024 int new_cnt;
2025
2026 /*
2027 * Allocate more CPC contexts based on how many
2028 * contexts allocated so far and how many counter
2029 * requests left to assign
2030 */
2031 new_cnt = nctx_ptrs +
2032 ((nreqs + cpc_ncounters - 1) / cpc_ncounters);
2033 new = kmem_zalloc(new_cnt * sizeof (kcpc_ctx_t *),
2034 kmem_flags);
2035 if (new == NULL)
2036 break;
2037
2038 /*
2039 * Copy contents of old sets into new ones
2040 */
2041 bcopy(ctx_ptrs, new,
2042 nctx_ptrs * sizeof (kcpc_ctx_t *));
2043
2044 /*
2045 * Free old array of context pointers and use newly
2046 * allocated one instead now
2047 */
2048 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *));
2049 ctx_ptrs = new;
2050 nctx_ptrs = new_cnt;
2051 }
2052 }
2053
2054 /*
2055 * Return NULL if no CPC contexts filled in
2056 */
2057 if (nctx == 0) {
2058 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *));
2059 *ctx_ptr_array = NULL;
2060 *ctx_ptr_array_sz = 0;
2061 return (-2);
2062 }
2063
2064 *ctx_ptr_array = ctx_ptrs;
2065 *ctx_ptr_array_sz = nctx_ptrs * sizeof (kcpc_ctx_t *);
2066 return (nctx);
2067 }
2068
2069 /*
2070 * Return whether PCBE supports given counter event
2071 */
2072 boolean_t
kcpc_event_supported(char * event)2073 kcpc_event_supported(char *event)
2074 {
2075 if (pcbe_ops == NULL || pcbe_ops->pcbe_event_coverage(event) == 0)
2076 return (B_FALSE);
2077
2078 return (B_TRUE);
2079 }
2080
2081 /*
2082 * Program counters on current CPU with given CPC context
2083 *
2084 * If kernel is interposing on counters to measure hardware capacity and
2085 * utilization, then unprogram counters for kernel *before* programming them
2086 * with specified CPC context.
2087 *
2088 * kcpc_{program,unprogram}() may be called either directly by a thread running
2089 * on the target CPU or from a cross-call from another CPU. To protect
2090 * programming and unprogramming from being interrupted by cross-calls, callers
2091 * who execute kcpc_{program,unprogram} should raise PIL to the level used by
2092 * cross-calls.
2093 */
2094 void
kcpc_program(kcpc_ctx_t * ctx,boolean_t for_thread,boolean_t cu_interpose)2095 kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread, boolean_t cu_interpose)
2096 {
2097 int error;
2098
2099 ASSERT(IS_HIPIL());
2100
2101 /*
2102 * CPC context shouldn't be NULL, its CPU field should specify current
2103 * CPU or be -1 to specify any CPU when the context is bound to a
2104 * thread, and preemption should be disabled
2105 */
2106 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id ||
2107 ctx->kc_cpuid == -1) && curthread->t_preempt > 0);
2108 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id &&
2109 ctx->kc_cpuid != -1) || curthread->t_preempt < 1)
2110 return;
2111
2112 /*
2113 * Unprogram counters for kernel measuring hardware capacity and
2114 * utilization
2115 */
2116 if (cu_interpose == B_TRUE) {
2117 cu_cpc_unprogram(CPU, &error);
2118 } else {
2119 kcpc_set_t *set = ctx->kc_set;
2120 int i;
2121
2122 ASSERT(set != NULL);
2123
2124 /*
2125 * Since cu_interpose is false, we are programming CU context.
2126 * In general, PCBE can continue from the state saved in the
2127 * set, but it is not very reliable, so we start again from the
2128 * preset value.
2129 */
2130 for (i = 0; i < set->ks_nreqs; i++) {
2131 /*
2132 * Reset the virtual counter value to the preset value.
2133 */
2134 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
2135
2136 /*
2137 * Reset PCBE to the preset value.
2138 */
2139 pcbe_ops->pcbe_configure(0, NULL,
2140 set->ks_req[i].kr_preset,
2141 0, 0, NULL, &set->ks_req[i].kr_config, NULL);
2142 }
2143 }
2144
2145 /*
2146 * Program counters with specified CPC context
2147 */
2148 ctx->kc_rawtick = KCPC_GET_TICK();
2149 pcbe_ops->pcbe_program(ctx);
2150
2151 /*
2152 * Denote that counters programmed for thread or CPU CPC context
2153 * differently
2154 */
2155 if (for_thread == B_TRUE)
2156 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE);
2157 else
2158 CPU->cpu_cpc_ctx = ctx;
2159 }
2160
2161 /*
2162 * Unprogram counters with given CPC context on current CPU
2163 *
2164 * If kernel is interposing on counters to measure hardware capacity and
2165 * utilization, then program counters for the kernel capacity and utilization
2166 * *after* unprogramming them for given CPC context.
2167 *
2168 * See the comment for kcpc_program regarding the synchronization with
2169 * cross-calls.
2170 */
2171 void
kcpc_unprogram(kcpc_ctx_t * ctx,boolean_t cu_interpose)2172 kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose)
2173 {
2174 int error;
2175
2176 ASSERT(IS_HIPIL());
2177
2178 /*
2179 * CPC context shouldn't be NULL, its CPU field should specify current
2180 * CPU or be -1 to specify any CPU when the context is bound to a
2181 * thread, and preemption should be disabled
2182 */
2183 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id ||
2184 ctx->kc_cpuid == -1) && curthread->t_preempt > 0);
2185
2186 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id &&
2187 ctx->kc_cpuid != -1) || curthread->t_preempt < 1 ||
2188 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) != 0) {
2189 return;
2190 }
2191
2192 /*
2193 * Specified CPC context to be unprogrammed should be bound to current
2194 * CPU or thread
2195 */
2196 ASSERT(CPU->cpu_cpc_ctx == ctx || curthread->t_cpc_ctx == ctx);
2197
2198 /*
2199 * Stop counters
2200 */
2201 pcbe_ops->pcbe_allstop();
2202 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED);
2203
2204 /*
2205 * Allow kernel to interpose on counters and program them for its own
2206 * use to measure hardware capacity and utilization if cu_interpose
2207 * argument is true
2208 */
2209 if (cu_interpose == B_TRUE)
2210 cu_cpc_program(CPU, &error);
2211 }
2212
2213 /*
2214 * Read CPU Performance Counter (CPC) on current CPU and call specified update
2215 * routine with data for each counter event currently programmed on CPU
2216 */
2217 int
kcpc_read(kcpc_update_func_t update_func)2218 kcpc_read(kcpc_update_func_t update_func)
2219 {
2220 kcpc_ctx_t *ctx;
2221 int i;
2222 kcpc_request_t *req;
2223 int retval;
2224 kcpc_set_t *set;
2225
2226 ASSERT(IS_HIPIL());
2227
2228 /*
2229 * Can't grab locks or block because may be called inside dispatcher
2230 */
2231 kpreempt_disable();
2232
2233 ctx = CPU->cpu_cpc_ctx;
2234 if (ctx == NULL) {
2235 kpreempt_enable();
2236 return (0);
2237 }
2238
2239 /*
2240 * Read counter data from current CPU
2241 */
2242 pcbe_ops->pcbe_sample(ctx);
2243
2244 set = ctx->kc_set;
2245 if (set == NULL || set->ks_req == NULL) {
2246 kpreempt_enable();
2247 return (0);
2248 }
2249
2250 /*
2251 * Call update function with preset pointer and data for each CPC event
2252 * request currently programmed on current CPU
2253 */
2254 req = set->ks_req;
2255 retval = 0;
2256 for (i = 0; i < set->ks_nreqs; i++) {
2257 int ret;
2258
2259 if (req[i].kr_data == NULL)
2260 break;
2261
2262 ret = update_func(req[i].kr_ptr, *req[i].kr_data);
2263 if (ret < 0)
2264 retval = ret;
2265 }
2266
2267 kpreempt_enable();
2268
2269 return (retval);
2270 }
2271
2272 /*
2273 * Initialize list of counter event requests
2274 */
2275 kcpc_request_list_t *
kcpc_reqs_init(int nreqs,int kmem_flags)2276 kcpc_reqs_init(int nreqs, int kmem_flags)
2277 {
2278 kcpc_request_list_t *req_list;
2279 kcpc_request_t *reqs;
2280
2281 if (nreqs < 1)
2282 return (NULL);
2283
2284 req_list = kmem_zalloc(sizeof (kcpc_request_list_t), kmem_flags);
2285 if (req_list == NULL)
2286 return (NULL);
2287
2288 reqs = kmem_zalloc(nreqs * sizeof (kcpc_request_t), kmem_flags);
2289 if (reqs == NULL) {
2290 kmem_free(req_list, sizeof (kcpc_request_list_t));
2291 return (NULL);
2292 }
2293
2294 req_list->krl_list = reqs;
2295 req_list->krl_cnt = 0;
2296 req_list->krl_max = nreqs;
2297 return (req_list);
2298 }
2299
2300
2301 /*
2302 * Add counter event request to given list of counter event requests
2303 */
2304 int
kcpc_reqs_add(kcpc_request_list_t * req_list,char * event,uint64_t preset,uint_t flags,uint_t nattrs,kcpc_attr_t * attr,void * ptr,int kmem_flags)2305 kcpc_reqs_add(kcpc_request_list_t *req_list, char *event, uint64_t preset,
2306 uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr, int kmem_flags)
2307 {
2308 kcpc_request_t *req;
2309
2310 if (req_list == NULL || req_list->krl_list == NULL)
2311 return (-1);
2312
2313 ASSERT(req_list->krl_max != 0);
2314
2315 /*
2316 * Allocate more space (if needed)
2317 */
2318 if (req_list->krl_cnt > req_list->krl_max) {
2319 kcpc_request_t *new;
2320 kcpc_request_t *old;
2321
2322 old = req_list->krl_list;
2323 new = kmem_zalloc((req_list->krl_max +
2324 cpc_ncounters) * sizeof (kcpc_request_t), kmem_flags);
2325 if (new == NULL)
2326 return (-2);
2327
2328 req_list->krl_list = new;
2329 bcopy(old, req_list->krl_list,
2330 req_list->krl_cnt * sizeof (kcpc_request_t));
2331 kmem_free(old, req_list->krl_max * sizeof (kcpc_request_t));
2332 req_list->krl_cnt = 0;
2333 req_list->krl_max += cpc_ncounters;
2334 }
2335
2336 /*
2337 * Fill in request as much as possible now, but some fields will need
2338 * to be set when request is assigned to a set.
2339 */
2340 req = &req_list->krl_list[req_list->krl_cnt];
2341 req->kr_config = NULL;
2342 req->kr_picnum = -1; /* have CPC pick this */
2343 req->kr_index = -1; /* set when assigning request to set */
2344 req->kr_data = NULL; /* set when configuring request */
2345 (void) strcpy(req->kr_event, event);
2346 req->kr_preset = preset;
2347 req->kr_flags = flags;
2348 req->kr_nattrs = nattrs;
2349 req->kr_attr = attr;
2350 /*
2351 * Keep pointer given by caller to give to update function when this
2352 * counter event is sampled/read
2353 */
2354 req->kr_ptr = ptr;
2355
2356 req_list->krl_cnt++;
2357
2358 return (0);
2359 }
2360
2361 /*
2362 * Reset list of CPC event requests so its space can be used for another set
2363 * of requests
2364 */
2365 int
kcpc_reqs_reset(kcpc_request_list_t * req_list)2366 kcpc_reqs_reset(kcpc_request_list_t *req_list)
2367 {
2368 /*
2369 * Return when pointer to request list structure or request is NULL or
2370 * when max requests is less than or equal to 0
2371 */
2372 if (req_list == NULL || req_list->krl_list == NULL ||
2373 req_list->krl_max <= 0)
2374 return (-1);
2375
2376 /*
2377 * Zero out requests and number of requests used
2378 */
2379 bzero(req_list->krl_list, req_list->krl_max * sizeof (kcpc_request_t));
2380 req_list->krl_cnt = 0;
2381 return (0);
2382 }
2383
2384 /*
2385 * Free given list of counter event requests
2386 */
2387 int
kcpc_reqs_fini(kcpc_request_list_t * req_list)2388 kcpc_reqs_fini(kcpc_request_list_t *req_list)
2389 {
2390 kmem_free(req_list->krl_list,
2391 req_list->krl_max * sizeof (kcpc_request_t));
2392 kmem_free(req_list, sizeof (kcpc_request_list_t));
2393 return (0);
2394 }
2395
2396 /*
2397 * Create set of given counter event requests
2398 */
2399 static kcpc_set_t *
kcpc_set_create(kcpc_request_t * reqs,int nreqs,int set_flags,int kmem_flags)2400 kcpc_set_create(kcpc_request_t *reqs, int nreqs, int set_flags, int kmem_flags)
2401 {
2402 int i;
2403 kcpc_set_t *set;
2404
2405 /*
2406 * Allocate set and assign number of requests in set and flags
2407 */
2408 set = kmem_zalloc(sizeof (kcpc_set_t), kmem_flags);
2409 if (set == NULL)
2410 return (NULL);
2411
2412 if (nreqs < cpc_ncounters)
2413 set->ks_nreqs = nreqs;
2414 else
2415 set->ks_nreqs = cpc_ncounters;
2416
2417 set->ks_flags = set_flags;
2418
2419 /*
2420 * Allocate requests needed, copy requests into set, and set index into
2421 * data for each request (which may change when we assign requested
2422 * counter events to counters)
2423 */
2424 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) *
2425 set->ks_nreqs, kmem_flags);
2426 if (set->ks_req == NULL) {
2427 kmem_free(set, sizeof (kcpc_set_t));
2428 return (NULL);
2429 }
2430
2431 bcopy(reqs, set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
2432
2433 for (i = 0; i < set->ks_nreqs; i++)
2434 set->ks_req[i].kr_index = i;
2435
2436 return (set);
2437 }
2438
2439
2440 /*
2441 * Stop counters on current CPU.
2442 *
2443 * If preserve_context is true, the caller is interested in the CPU's CPC
2444 * context and wants it to be preserved.
2445 *
2446 * If preserve_context is false, the caller does not need the CPU's CPC context
2447 * to be preserved, so it is set to NULL.
2448 */
2449 static void
kcpc_cpustop_func(uintptr_t arg1,uintptr_t arg2 __unused)2450 kcpc_cpustop_func(uintptr_t arg1, uintptr_t arg2 __unused)
2451 {
2452 boolean_t preserve_context;
2453 kpreempt_disable();
2454
2455 preserve_context = (boolean_t)arg1;
2456 /*
2457 * Someone already stopped this context before us, so there is nothing
2458 * to do.
2459 */
2460 if (CPU->cpu_cpc_ctx == NULL) {
2461 kpreempt_enable();
2462 return;
2463 }
2464
2465 kcpc_unprogram(CPU->cpu_cpc_ctx, B_TRUE);
2466 /*
2467 * If CU does not use counters, then clear the CPU's CPC context
2468 * If the caller requested to preserve context it should disable CU
2469 * first, so there should be no CU context now.
2470 */
2471 ASSERT(!preserve_context || !CU_CPC_ON(CPU));
2472 if (!preserve_context && CPU->cpu_cpc_ctx != NULL && !CU_CPC_ON(CPU))
2473 CPU->cpu_cpc_ctx = NULL;
2474
2475 kpreempt_enable();
2476 }
2477
2478 /*
2479 * Stop counters on given CPU and set its CPC context to NULL unless
2480 * preserve_context is true.
2481 */
2482 void
kcpc_cpu_stop(cpu_t * cp,boolean_t preserve_context)2483 kcpc_cpu_stop(cpu_t *cp, boolean_t preserve_context)
2484 {
2485 cpu_call(cp, kcpc_cpustop_func, preserve_context, 0);
2486 }
2487
2488 /*
2489 * Program the context on the current CPU
2490 */
2491 static void
kcpc_remoteprogram_func(uintptr_t arg1,uintptr_t arg2)2492 kcpc_remoteprogram_func(uintptr_t arg1, uintptr_t arg2)
2493 {
2494 kcpc_ctx_t *ctx = (kcpc_ctx_t *)arg1;
2495 boolean_t for_thread = (boolean_t)arg2;
2496
2497 ASSERT(ctx != NULL);
2498
2499 kpreempt_disable();
2500 kcpc_program(ctx, for_thread, B_TRUE);
2501 kpreempt_enable();
2502 }
2503
2504 /*
2505 * Program counters on given CPU
2506 */
2507 void
kcpc_cpu_program(cpu_t * cp,kcpc_ctx_t * ctx)2508 kcpc_cpu_program(cpu_t *cp, kcpc_ctx_t *ctx)
2509 {
2510 cpu_call(cp, kcpc_remoteprogram_func, (uintptr_t)ctx,
2511 (uintptr_t)B_FALSE);
2512 }
2513
2514 char *
kcpc_list_attrs(void)2515 kcpc_list_attrs(void)
2516 {
2517 ASSERT(pcbe_ops != NULL);
2518
2519 return (pcbe_ops->pcbe_list_attrs());
2520 }
2521
2522 char *
kcpc_list_events(uint_t pic)2523 kcpc_list_events(uint_t pic)
2524 {
2525 ASSERT(pcbe_ops != NULL);
2526
2527 return (pcbe_ops->pcbe_list_events(pic));
2528 }
2529
2530 uint_t
kcpc_pcbe_capabilities(void)2531 kcpc_pcbe_capabilities(void)
2532 {
2533 ASSERT(pcbe_ops != NULL);
2534
2535 return (pcbe_ops->pcbe_caps);
2536 }
2537
2538 int
kcpc_pcbe_loaded(void)2539 kcpc_pcbe_loaded(void)
2540 {
2541 return (pcbe_ops == NULL ? -1 : 0);
2542 }
2543