xref: /illumos-gate/usr/src/uts/common/os/kcpc.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/inttypes.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/mutex.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/kcpc.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/cpc_impl.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/cpc_pcbe.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
44*7c478bd9Sstevel@tonic-gate #if defined(__x86)
45*7c478bd9Sstevel@tonic-gate #include <asm/clock.h>
46*7c478bd9Sstevel@tonic-gate #endif
47*7c478bd9Sstevel@tonic-gate 
48*7c478bd9Sstevel@tonic-gate kmutex_t	kcpc_ctx_llock[CPC_HASH_BUCKETS];	/* protects ctx_list */
49*7c478bd9Sstevel@tonic-gate kcpc_ctx_t	*kcpc_ctx_list[CPC_HASH_BUCKETS];	/* head of list */
50*7c478bd9Sstevel@tonic-gate 
51*7c478bd9Sstevel@tonic-gate 
52*7c478bd9Sstevel@tonic-gate krwlock_t	kcpc_cpuctx_lock;	/* lock for 'kcpc_cpuctx' below */
53*7c478bd9Sstevel@tonic-gate int		kcpc_cpuctx;		/* number of cpu-specific contexts */
54*7c478bd9Sstevel@tonic-gate 
55*7c478bd9Sstevel@tonic-gate int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */
56*7c478bd9Sstevel@tonic-gate 
57*7c478bd9Sstevel@tonic-gate /*
58*7c478bd9Sstevel@tonic-gate  * These are set when a PCBE module is loaded.
59*7c478bd9Sstevel@tonic-gate  */
60*7c478bd9Sstevel@tonic-gate uint_t		cpc_ncounters = 0;
61*7c478bd9Sstevel@tonic-gate pcbe_ops_t	*pcbe_ops = NULL;
62*7c478bd9Sstevel@tonic-gate 
63*7c478bd9Sstevel@tonic-gate /*
64*7c478bd9Sstevel@tonic-gate  * Statistics on (mis)behavior
65*7c478bd9Sstevel@tonic-gate  */
66*7c478bd9Sstevel@tonic-gate static uint32_t kcpc_intrctx_count;    /* # overflows in an interrupt handler */
67*7c478bd9Sstevel@tonic-gate static uint32_t kcpc_nullctx_count;    /* # overflows in a thread with no ctx */
68*7c478bd9Sstevel@tonic-gate 
69*7c478bd9Sstevel@tonic-gate /*
70*7c478bd9Sstevel@tonic-gate  * Is misbehaviour (overflow in a thread with no context) fatal?
71*7c478bd9Sstevel@tonic-gate  */
72*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
73*7c478bd9Sstevel@tonic-gate static int kcpc_nullctx_panic = 1;
74*7c478bd9Sstevel@tonic-gate #else
75*7c478bd9Sstevel@tonic-gate static int kcpc_nullctx_panic = 0;
76*7c478bd9Sstevel@tonic-gate #endif
77*7c478bd9Sstevel@tonic-gate 
78*7c478bd9Sstevel@tonic-gate static void kcpc_lwp_create(kthread_t *t, kthread_t *ct);
79*7c478bd9Sstevel@tonic-gate static void kcpc_restore(kcpc_ctx_t *ctx);
80*7c478bd9Sstevel@tonic-gate static void kcpc_save(kcpc_ctx_t *ctx);
81*7c478bd9Sstevel@tonic-gate static void kcpc_free(kcpc_ctx_t *ctx, int isexec);
82*7c478bd9Sstevel@tonic-gate static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode);
83*7c478bd9Sstevel@tonic-gate static void kcpc_free_configs(kcpc_set_t *set);
84*7c478bd9Sstevel@tonic-gate static kcpc_ctx_t *kcpc_ctx_alloc(void);
85*7c478bd9Sstevel@tonic-gate static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx);
86*7c478bd9Sstevel@tonic-gate static void kcpc_ctx_free(kcpc_ctx_t *ctx);
87*7c478bd9Sstevel@tonic-gate static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx);
88*7c478bd9Sstevel@tonic-gate static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch);
89*7c478bd9Sstevel@tonic-gate static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set);
90*7c478bd9Sstevel@tonic-gate 
91*7c478bd9Sstevel@tonic-gate void
92*7c478bd9Sstevel@tonic-gate kcpc_register_pcbe(pcbe_ops_t *ops)
93*7c478bd9Sstevel@tonic-gate {
94*7c478bd9Sstevel@tonic-gate 	pcbe_ops = ops;
95*7c478bd9Sstevel@tonic-gate 	cpc_ncounters = pcbe_ops->pcbe_ncounters();
96*7c478bd9Sstevel@tonic-gate }
97*7c478bd9Sstevel@tonic-gate 
98*7c478bd9Sstevel@tonic-gate int
99*7c478bd9Sstevel@tonic-gate kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode)
100*7c478bd9Sstevel@tonic-gate {
101*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
102*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
103*7c478bd9Sstevel@tonic-gate 	int		error;
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate 	ctx = kcpc_ctx_alloc();
106*7c478bd9Sstevel@tonic-gate 
107*7c478bd9Sstevel@tonic-gate 	if (kcpc_assign_reqs(set, ctx) != 0) {
108*7c478bd9Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
109*7c478bd9Sstevel@tonic-gate 		*subcode = CPC_RESOURCE_UNAVAIL;
110*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
111*7c478bd9Sstevel@tonic-gate 	}
112*7c478bd9Sstevel@tonic-gate 
113*7c478bd9Sstevel@tonic-gate 	ctx->kc_cpuid = cpuid;
114*7c478bd9Sstevel@tonic-gate 	ctx->kc_thread = curthread;
115*7c478bd9Sstevel@tonic-gate 
116*7c478bd9Sstevel@tonic-gate 	set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
117*7c478bd9Sstevel@tonic-gate 
118*7c478bd9Sstevel@tonic-gate 	if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
119*7c478bd9Sstevel@tonic-gate 		kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
120*7c478bd9Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
121*7c478bd9Sstevel@tonic-gate 		return (error);
122*7c478bd9Sstevel@tonic-gate 	}
123*7c478bd9Sstevel@tonic-gate 
124*7c478bd9Sstevel@tonic-gate 	set->ks_ctx = ctx;
125*7c478bd9Sstevel@tonic-gate 	ctx->kc_set = set;
126*7c478bd9Sstevel@tonic-gate 
127*7c478bd9Sstevel@tonic-gate 	/*
128*7c478bd9Sstevel@tonic-gate 	 * We must hold cpu_lock to prevent DR, offlining, or unbinding while
129*7c478bd9Sstevel@tonic-gate 	 * we are manipulating the cpu_t and programming the hardware, else the
130*7c478bd9Sstevel@tonic-gate 	 * the cpu_t could go away while we're looking at it.
131*7c478bd9Sstevel@tonic-gate 	 */
132*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
133*7c478bd9Sstevel@tonic-gate 	cp = cpu_get(cpuid);
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate 	if (cp == NULL)
136*7c478bd9Sstevel@tonic-gate 		/*
137*7c478bd9Sstevel@tonic-gate 		 * The CPU could have been DRd out while we were getting set up.
138*7c478bd9Sstevel@tonic-gate 		 */
139*7c478bd9Sstevel@tonic-gate 		goto unbound;
140*7c478bd9Sstevel@tonic-gate 
141*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cp->cpu_cpc_ctxlock);
142*7c478bd9Sstevel@tonic-gate 
143*7c478bd9Sstevel@tonic-gate 	if (cp->cpu_cpc_ctx != NULL) {
144*7c478bd9Sstevel@tonic-gate 		/*
145*7c478bd9Sstevel@tonic-gate 		 * If this CPU already has a bound set, return an error.
146*7c478bd9Sstevel@tonic-gate 		 */
147*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
148*7c478bd9Sstevel@tonic-gate 		goto unbound;
149*7c478bd9Sstevel@tonic-gate 	}
150*7c478bd9Sstevel@tonic-gate 
151*7c478bd9Sstevel@tonic-gate 	if (curthread->t_bind_cpu != cpuid) {
152*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
153*7c478bd9Sstevel@tonic-gate 		goto unbound;
154*7c478bd9Sstevel@tonic-gate 	}
155*7c478bd9Sstevel@tonic-gate 	cp->cpu_cpc_ctx = ctx;
156*7c478bd9Sstevel@tonic-gate 
157*7c478bd9Sstevel@tonic-gate 	/*
158*7c478bd9Sstevel@tonic-gate 	 * Kernel preemption must be disabled while fiddling with the hardware
159*7c478bd9Sstevel@tonic-gate 	 * registers to prevent partial updates.
160*7c478bd9Sstevel@tonic-gate 	 */
161*7c478bd9Sstevel@tonic-gate 	kpreempt_disable();
162*7c478bd9Sstevel@tonic-gate 	ctx->kc_rawtick = KCPC_GET_TICK();
163*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
164*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
165*7c478bd9Sstevel@tonic-gate 
166*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cp->cpu_cpc_ctxlock);
167*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate 	return (0);
170*7c478bd9Sstevel@tonic-gate 
171*7c478bd9Sstevel@tonic-gate unbound:
172*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
173*7c478bd9Sstevel@tonic-gate 	set->ks_ctx = NULL;
174*7c478bd9Sstevel@tonic-gate 	kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
175*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_free(ctx);
176*7c478bd9Sstevel@tonic-gate 	return (EAGAIN);
177*7c478bd9Sstevel@tonic-gate }
178*7c478bd9Sstevel@tonic-gate 
179*7c478bd9Sstevel@tonic-gate int
180*7c478bd9Sstevel@tonic-gate kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode)
181*7c478bd9Sstevel@tonic-gate {
182*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
183*7c478bd9Sstevel@tonic-gate 	int		error;
184*7c478bd9Sstevel@tonic-gate 
185*7c478bd9Sstevel@tonic-gate 	/*
186*7c478bd9Sstevel@tonic-gate 	 * Only one set is allowed per context, so ensure there is no
187*7c478bd9Sstevel@tonic-gate 	 * existing context.
188*7c478bd9Sstevel@tonic-gate 	 */
189*7c478bd9Sstevel@tonic-gate 
190*7c478bd9Sstevel@tonic-gate 	if (t->t_cpc_ctx != NULL)
191*7c478bd9Sstevel@tonic-gate 		return (EEXIST);
192*7c478bd9Sstevel@tonic-gate 
193*7c478bd9Sstevel@tonic-gate 	ctx = kcpc_ctx_alloc();
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate 	/*
196*7c478bd9Sstevel@tonic-gate 	 * The context must begin life frozen until it has been properly
197*7c478bd9Sstevel@tonic-gate 	 * programmed onto the hardware. This prevents the context ops from
198*7c478bd9Sstevel@tonic-gate 	 * worrying about it until we're ready.
199*7c478bd9Sstevel@tonic-gate 	 */
200*7c478bd9Sstevel@tonic-gate 	ctx->kc_flags |= KCPC_CTX_FREEZE;
201*7c478bd9Sstevel@tonic-gate 	ctx->kc_hrtime = gethrtime();
202*7c478bd9Sstevel@tonic-gate 
203*7c478bd9Sstevel@tonic-gate 	if (kcpc_assign_reqs(set, ctx) != 0) {
204*7c478bd9Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
205*7c478bd9Sstevel@tonic-gate 		*subcode = CPC_RESOURCE_UNAVAIL;
206*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
207*7c478bd9Sstevel@tonic-gate 	}
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate 	ctx->kc_cpuid = -1;
210*7c478bd9Sstevel@tonic-gate 	if (set->ks_flags & CPC_BIND_LWP_INHERIT)
211*7c478bd9Sstevel@tonic-gate 		ctx->kc_flags |= KCPC_CTX_LWPINHERIT;
212*7c478bd9Sstevel@tonic-gate 	ctx->kc_thread = t;
213*7c478bd9Sstevel@tonic-gate 	t->t_cpc_ctx = ctx;
214*7c478bd9Sstevel@tonic-gate 	/*
215*7c478bd9Sstevel@tonic-gate 	 * Permit threads to look at their own hardware counters from userland.
216*7c478bd9Sstevel@tonic-gate 	 */
217*7c478bd9Sstevel@tonic-gate 	ctx->kc_flags |= KCPC_CTX_NONPRIV;
218*7c478bd9Sstevel@tonic-gate 
219*7c478bd9Sstevel@tonic-gate 	/*
220*7c478bd9Sstevel@tonic-gate 	 * Create the data store for this set.
221*7c478bd9Sstevel@tonic-gate 	 */
222*7c478bd9Sstevel@tonic-gate 	set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP);
223*7c478bd9Sstevel@tonic-gate 
224*7c478bd9Sstevel@tonic-gate 	if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) {
225*7c478bd9Sstevel@tonic-gate 		kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
226*7c478bd9Sstevel@tonic-gate 		kcpc_ctx_free(ctx);
227*7c478bd9Sstevel@tonic-gate 		t->t_cpc_ctx = NULL;
228*7c478bd9Sstevel@tonic-gate 		return (error);
229*7c478bd9Sstevel@tonic-gate 	}
230*7c478bd9Sstevel@tonic-gate 
231*7c478bd9Sstevel@tonic-gate 	set->ks_ctx = ctx;
232*7c478bd9Sstevel@tonic-gate 	ctx->kc_set = set;
233*7c478bd9Sstevel@tonic-gate 
234*7c478bd9Sstevel@tonic-gate 	/*
235*7c478bd9Sstevel@tonic-gate 	 * Add a device context to the subject thread.
236*7c478bd9Sstevel@tonic-gate 	 */
237*7c478bd9Sstevel@tonic-gate 	installctx(t, ctx, kcpc_save, kcpc_restore, NULL,
238*7c478bd9Sstevel@tonic-gate 	    kcpc_lwp_create, NULL, kcpc_free);
239*7c478bd9Sstevel@tonic-gate 
240*7c478bd9Sstevel@tonic-gate 	/*
241*7c478bd9Sstevel@tonic-gate 	 * Ask the backend to program the hardware.
242*7c478bd9Sstevel@tonic-gate 	 */
243*7c478bd9Sstevel@tonic-gate 	if (t == curthread) {
244*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
245*7c478bd9Sstevel@tonic-gate 		ctx->kc_rawtick = KCPC_GET_TICK();
246*7c478bd9Sstevel@tonic-gate 		atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
247*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_program(ctx);
248*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
249*7c478bd9Sstevel@tonic-gate 	} else
250*7c478bd9Sstevel@tonic-gate 		/*
251*7c478bd9Sstevel@tonic-gate 		 * Since we are the agent LWP, we know the victim LWP is stopped
252*7c478bd9Sstevel@tonic-gate 		 * until we're done here; no need to worry about preemption or
253*7c478bd9Sstevel@tonic-gate 		 * migration here. We still use an atomic op to clear the flag
254*7c478bd9Sstevel@tonic-gate 		 * to ensure the flags are always self-consistent; they can
255*7c478bd9Sstevel@tonic-gate 		 * still be accessed from, for instance, another CPU doing a
256*7c478bd9Sstevel@tonic-gate 		 * kcpc_invalidate_all().
257*7c478bd9Sstevel@tonic-gate 		 */
258*7c478bd9Sstevel@tonic-gate 		atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
259*7c478bd9Sstevel@tonic-gate 
260*7c478bd9Sstevel@tonic-gate 
261*7c478bd9Sstevel@tonic-gate 	return (0);
262*7c478bd9Sstevel@tonic-gate }
263*7c478bd9Sstevel@tonic-gate 
264*7c478bd9Sstevel@tonic-gate /*
265*7c478bd9Sstevel@tonic-gate  * Walk through each request in the set and ask the PCBE to configure a
266*7c478bd9Sstevel@tonic-gate  * corresponding counter.
267*7c478bd9Sstevel@tonic-gate  */
268*7c478bd9Sstevel@tonic-gate static int
269*7c478bd9Sstevel@tonic-gate kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode)
270*7c478bd9Sstevel@tonic-gate {
271*7c478bd9Sstevel@tonic-gate 	int		i;
272*7c478bd9Sstevel@tonic-gate 	int		ret;
273*7c478bd9Sstevel@tonic-gate 	kcpc_request_t	*rp;
274*7c478bd9Sstevel@tonic-gate 
275*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
276*7c478bd9Sstevel@tonic-gate 		int n;
277*7c478bd9Sstevel@tonic-gate 		rp = &set->ks_req[i];
278*7c478bd9Sstevel@tonic-gate 
279*7c478bd9Sstevel@tonic-gate 		n = rp->kr_picnum;
280*7c478bd9Sstevel@tonic-gate 
281*7c478bd9Sstevel@tonic-gate 		ASSERT(n >= 0 && n < cpc_ncounters);
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate 		ASSERT(ctx->kc_pics[n].kp_req == NULL);
284*7c478bd9Sstevel@tonic-gate 
285*7c478bd9Sstevel@tonic-gate 		if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) {
286*7c478bd9Sstevel@tonic-gate 			if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT)
287*7c478bd9Sstevel@tonic-gate 			    == 0) {
288*7c478bd9Sstevel@tonic-gate 				*subcode = -1;
289*7c478bd9Sstevel@tonic-gate 				return (ENOTSUP);
290*7c478bd9Sstevel@tonic-gate 			}
291*7c478bd9Sstevel@tonic-gate 			/*
292*7c478bd9Sstevel@tonic-gate 			 * If any of the counters have requested overflow
293*7c478bd9Sstevel@tonic-gate 			 * notification, we flag the context as being one that
294*7c478bd9Sstevel@tonic-gate 			 * cares about overflow.
295*7c478bd9Sstevel@tonic-gate 			 */
296*7c478bd9Sstevel@tonic-gate 			ctx->kc_flags |= KCPC_CTX_SIGOVF;
297*7c478bd9Sstevel@tonic-gate 		}
298*7c478bd9Sstevel@tonic-gate 
299*7c478bd9Sstevel@tonic-gate 		rp->kr_config = NULL;
300*7c478bd9Sstevel@tonic-gate 		if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event,
301*7c478bd9Sstevel@tonic-gate 		    rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr,
302*7c478bd9Sstevel@tonic-gate 		    &(rp->kr_config), (void *)ctx)) != 0) {
303*7c478bd9Sstevel@tonic-gate 			kcpc_free_configs(set);
304*7c478bd9Sstevel@tonic-gate 			*subcode = ret;
305*7c478bd9Sstevel@tonic-gate 			if (ret == CPC_ATTR_REQUIRES_PRIVILEGE)
306*7c478bd9Sstevel@tonic-gate 				return (EACCES);
307*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
308*7c478bd9Sstevel@tonic-gate 		}
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate 		ctx->kc_pics[n].kp_req = rp;
311*7c478bd9Sstevel@tonic-gate 		rp->kr_picp = &ctx->kc_pics[n];
312*7c478bd9Sstevel@tonic-gate 		rp->kr_data = set->ks_data + rp->kr_index;
313*7c478bd9Sstevel@tonic-gate 		*rp->kr_data = rp->kr_preset;
314*7c478bd9Sstevel@tonic-gate 	}
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 	return (0);
317*7c478bd9Sstevel@tonic-gate }
318*7c478bd9Sstevel@tonic-gate 
319*7c478bd9Sstevel@tonic-gate static void
320*7c478bd9Sstevel@tonic-gate kcpc_free_configs(kcpc_set_t *set)
321*7c478bd9Sstevel@tonic-gate {
322*7c478bd9Sstevel@tonic-gate 	int i;
323*7c478bd9Sstevel@tonic-gate 
324*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++)
325*7c478bd9Sstevel@tonic-gate 		if (set->ks_req[i].kr_config != NULL)
326*7c478bd9Sstevel@tonic-gate 			pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
327*7c478bd9Sstevel@tonic-gate }
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate /*
330*7c478bd9Sstevel@tonic-gate  * buf points to a user address and the data should be copied out to that
331*7c478bd9Sstevel@tonic-gate  * address in the current process.
332*7c478bd9Sstevel@tonic-gate  */
333*7c478bd9Sstevel@tonic-gate int
334*7c478bd9Sstevel@tonic-gate kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick)
335*7c478bd9Sstevel@tonic-gate {
336*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = set->ks_ctx;
337*7c478bd9Sstevel@tonic-gate 	uint64_t	curtick = KCPC_GET_TICK();
338*7c478bd9Sstevel@tonic-gate 
339*7c478bd9Sstevel@tonic-gate 	if (ctx == NULL)
340*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
341*7c478bd9Sstevel@tonic-gate 	else if (ctx->kc_flags & KCPC_CTX_INVALID)
342*7c478bd9Sstevel@tonic-gate 		return (EAGAIN);
343*7c478bd9Sstevel@tonic-gate 
344*7c478bd9Sstevel@tonic-gate 	if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) {
345*7c478bd9Sstevel@tonic-gate 		/*
346*7c478bd9Sstevel@tonic-gate 		 * Kernel preemption must be disabled while reading the
347*7c478bd9Sstevel@tonic-gate 		 * hardware regs, and if this is a CPU-bound context, while
348*7c478bd9Sstevel@tonic-gate 		 * checking the CPU binding of the current thread.
349*7c478bd9Sstevel@tonic-gate 		 */
350*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
351*7c478bd9Sstevel@tonic-gate 
352*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_cpuid != -1) {
353*7c478bd9Sstevel@tonic-gate 			if (curthread->t_bind_cpu != ctx->kc_cpuid) {
354*7c478bd9Sstevel@tonic-gate 				kpreempt_enable();
355*7c478bd9Sstevel@tonic-gate 				return (EAGAIN);
356*7c478bd9Sstevel@tonic-gate 			}
357*7c478bd9Sstevel@tonic-gate 		}
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_thread == curthread) {
360*7c478bd9Sstevel@tonic-gate 			ctx->kc_hrtime = gethrtime();
361*7c478bd9Sstevel@tonic-gate 			pcbe_ops->pcbe_sample(ctx);
362*7c478bd9Sstevel@tonic-gate 			ctx->kc_vtick += curtick - ctx->kc_rawtick;
363*7c478bd9Sstevel@tonic-gate 			ctx->kc_rawtick = curtick;
364*7c478bd9Sstevel@tonic-gate 		}
365*7c478bd9Sstevel@tonic-gate 
366*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
367*7c478bd9Sstevel@tonic-gate 	}
368*7c478bd9Sstevel@tonic-gate 
369*7c478bd9Sstevel@tonic-gate 	if (copyout(set->ks_data, buf,
370*7c478bd9Sstevel@tonic-gate 	    set->ks_nreqs * sizeof (uint64_t)) == -1)
371*7c478bd9Sstevel@tonic-gate 		return (EFAULT);
372*7c478bd9Sstevel@tonic-gate 	if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1)
373*7c478bd9Sstevel@tonic-gate 		return (EFAULT);
374*7c478bd9Sstevel@tonic-gate 	if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1)
375*7c478bd9Sstevel@tonic-gate 		return (EFAULT);
376*7c478bd9Sstevel@tonic-gate 
377*7c478bd9Sstevel@tonic-gate 	return (0);
378*7c478bd9Sstevel@tonic-gate }
379*7c478bd9Sstevel@tonic-gate 
380*7c478bd9Sstevel@tonic-gate /*
381*7c478bd9Sstevel@tonic-gate  * Stop the counters on the CPU this context is bound to.
382*7c478bd9Sstevel@tonic-gate  */
383*7c478bd9Sstevel@tonic-gate static void
384*7c478bd9Sstevel@tonic-gate kcpc_stop_hw(kcpc_ctx_t *ctx)
385*7c478bd9Sstevel@tonic-gate {
386*7c478bd9Sstevel@tonic-gate 	cpu_t *cp;
387*7c478bd9Sstevel@tonic-gate 
388*7c478bd9Sstevel@tonic-gate 	ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED))
389*7c478bd9Sstevel@tonic-gate 	    == KCPC_CTX_INVALID);
390*7c478bd9Sstevel@tonic-gate 
391*7c478bd9Sstevel@tonic-gate 	kpreempt_disable();
392*7c478bd9Sstevel@tonic-gate 
393*7c478bd9Sstevel@tonic-gate 	cp = cpu_get(ctx->kc_cpuid);
394*7c478bd9Sstevel@tonic-gate 	ASSERT(cp != NULL);
395*7c478bd9Sstevel@tonic-gate 
396*7c478bd9Sstevel@tonic-gate 	if (cp == CPU) {
397*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
398*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags,
399*7c478bd9Sstevel@tonic-gate 		    KCPC_CTX_INVALID_STOPPED);
400*7c478bd9Sstevel@tonic-gate 	} else
401*7c478bd9Sstevel@tonic-gate 		kcpc_remote_stop(cp);
402*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
403*7c478bd9Sstevel@tonic-gate }
404*7c478bd9Sstevel@tonic-gate 
405*7c478bd9Sstevel@tonic-gate int
406*7c478bd9Sstevel@tonic-gate kcpc_unbind(kcpc_set_t *set)
407*7c478bd9Sstevel@tonic-gate {
408*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = set->ks_ctx;
409*7c478bd9Sstevel@tonic-gate 	kthread_t	*t;
410*7c478bd9Sstevel@tonic-gate 
411*7c478bd9Sstevel@tonic-gate 	if (ctx == NULL)
412*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
413*7c478bd9Sstevel@tonic-gate 
414*7c478bd9Sstevel@tonic-gate 	atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
415*7c478bd9Sstevel@tonic-gate 
416*7c478bd9Sstevel@tonic-gate 	if (ctx->kc_cpuid == -1) {
417*7c478bd9Sstevel@tonic-gate 		t = ctx->kc_thread;
418*7c478bd9Sstevel@tonic-gate 		/*
419*7c478bd9Sstevel@tonic-gate 		 * The context is thread-bound and therefore has a device
420*7c478bd9Sstevel@tonic-gate 		 * context.  It will be freed via removectx() calling
421*7c478bd9Sstevel@tonic-gate 		 * freectx() calling kcpc_free().
422*7c478bd9Sstevel@tonic-gate 		 */
423*7c478bd9Sstevel@tonic-gate 		if (t == curthread &&
424*7c478bd9Sstevel@tonic-gate 			(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
425*7c478bd9Sstevel@tonic-gate 			kpreempt_disable();
426*7c478bd9Sstevel@tonic-gate 			pcbe_ops->pcbe_allstop();
427*7c478bd9Sstevel@tonic-gate 			atomic_or_uint(&ctx->kc_flags,
428*7c478bd9Sstevel@tonic-gate 			    KCPC_CTX_INVALID_STOPPED);
429*7c478bd9Sstevel@tonic-gate 			kpreempt_enable();
430*7c478bd9Sstevel@tonic-gate 		}
431*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
432*7c478bd9Sstevel@tonic-gate 		if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL,
433*7c478bd9Sstevel@tonic-gate 		    kcpc_lwp_create, NULL, kcpc_free) == 0)
434*7c478bd9Sstevel@tonic-gate 			panic("kcpc_unbind: context %p not preset on thread %p",
435*7c478bd9Sstevel@tonic-gate 			    ctx, t);
436*7c478bd9Sstevel@tonic-gate #else
437*7c478bd9Sstevel@tonic-gate 		(void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL,
438*7c478bd9Sstevel@tonic-gate 		    kcpc_lwp_create, NULL, kcpc_free);
439*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
440*7c478bd9Sstevel@tonic-gate 		t->t_cpc_set = NULL;
441*7c478bd9Sstevel@tonic-gate 		t->t_cpc_ctx = NULL;
442*7c478bd9Sstevel@tonic-gate 	} else {
443*7c478bd9Sstevel@tonic-gate 		/*
444*7c478bd9Sstevel@tonic-gate 		 * If we are unbinding a CPU-bound set from a remote CPU, the
445*7c478bd9Sstevel@tonic-gate 		 * native CPU's idle thread could be in the midst of programming
446*7c478bd9Sstevel@tonic-gate 		 * this context onto the CPU. We grab the context's lock here to
447*7c478bd9Sstevel@tonic-gate 		 * ensure that the idle thread is done with it. When we release
448*7c478bd9Sstevel@tonic-gate 		 * the lock, the CPU no longer has a context and the idle thread
449*7c478bd9Sstevel@tonic-gate 		 * will move on.
450*7c478bd9Sstevel@tonic-gate 		 *
451*7c478bd9Sstevel@tonic-gate 		 * cpu_lock must be held to prevent the CPU from being DR'd out
452*7c478bd9Sstevel@tonic-gate 		 * while we disassociate the context from the cpu_t.
453*7c478bd9Sstevel@tonic-gate 		 */
454*7c478bd9Sstevel@tonic-gate 		cpu_t *cp;
455*7c478bd9Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
456*7c478bd9Sstevel@tonic-gate 		cp = cpu_get(ctx->kc_cpuid);
457*7c478bd9Sstevel@tonic-gate 		if (cp != NULL) {
458*7c478bd9Sstevel@tonic-gate 			/*
459*7c478bd9Sstevel@tonic-gate 			 * The CPU may have been DR'd out of the system.
460*7c478bd9Sstevel@tonic-gate 			 */
461*7c478bd9Sstevel@tonic-gate 			mutex_enter(&cp->cpu_cpc_ctxlock);
462*7c478bd9Sstevel@tonic-gate 			if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0)
463*7c478bd9Sstevel@tonic-gate 				kcpc_stop_hw(ctx);
464*7c478bd9Sstevel@tonic-gate 			ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED);
465*7c478bd9Sstevel@tonic-gate 			cp->cpu_cpc_ctx = NULL;
466*7c478bd9Sstevel@tonic-gate 			mutex_exit(&cp->cpu_cpc_ctxlock);
467*7c478bd9Sstevel@tonic-gate 		}
468*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
469*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_thread == curthread) {
470*7c478bd9Sstevel@tonic-gate 			kcpc_free(ctx, 0);
471*7c478bd9Sstevel@tonic-gate 			curthread->t_cpc_set = NULL;
472*7c478bd9Sstevel@tonic-gate 		}
473*7c478bd9Sstevel@tonic-gate 	}
474*7c478bd9Sstevel@tonic-gate 
475*7c478bd9Sstevel@tonic-gate 	return (0);
476*7c478bd9Sstevel@tonic-gate }
477*7c478bd9Sstevel@tonic-gate 
478*7c478bd9Sstevel@tonic-gate int
479*7c478bd9Sstevel@tonic-gate kcpc_preset(kcpc_set_t *set, int index, uint64_t preset)
480*7c478bd9Sstevel@tonic-gate {
481*7c478bd9Sstevel@tonic-gate 	int i;
482*7c478bd9Sstevel@tonic-gate 
483*7c478bd9Sstevel@tonic-gate 	ASSERT(set != NULL);
484*7c478bd9Sstevel@tonic-gate 	ASSERT(set->ks_ctx != NULL);
485*7c478bd9Sstevel@tonic-gate 	ASSERT(set->ks_ctx->kc_thread == curthread);
486*7c478bd9Sstevel@tonic-gate 	ASSERT(set->ks_ctx->kc_cpuid == -1);
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate 	if (index < 0 || index >= set->ks_nreqs)
489*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++)
492*7c478bd9Sstevel@tonic-gate 		if (set->ks_req[i].kr_index == index)
493*7c478bd9Sstevel@tonic-gate 			break;
494*7c478bd9Sstevel@tonic-gate 	ASSERT(i != set->ks_nreqs);
495*7c478bd9Sstevel@tonic-gate 
496*7c478bd9Sstevel@tonic-gate 	set->ks_req[i].kr_preset = preset;
497*7c478bd9Sstevel@tonic-gate 	return (0);
498*7c478bd9Sstevel@tonic-gate }
499*7c478bd9Sstevel@tonic-gate 
500*7c478bd9Sstevel@tonic-gate int
501*7c478bd9Sstevel@tonic-gate kcpc_restart(kcpc_set_t *set)
502*7c478bd9Sstevel@tonic-gate {
503*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = set->ks_ctx;
504*7c478bd9Sstevel@tonic-gate 	int		i;
505*7c478bd9Sstevel@tonic-gate 
506*7c478bd9Sstevel@tonic-gate 	ASSERT(ctx != NULL);
507*7c478bd9Sstevel@tonic-gate 	ASSERT(ctx->kc_thread == curthread);
508*7c478bd9Sstevel@tonic-gate 	ASSERT(ctx->kc_cpuid == -1);
509*7c478bd9Sstevel@tonic-gate 
510*7c478bd9Sstevel@tonic-gate 	kpreempt_disable();
511*7c478bd9Sstevel@tonic-gate 
512*7c478bd9Sstevel@tonic-gate 	/*
513*7c478bd9Sstevel@tonic-gate 	 * If the user is doing this on a running set, make sure the counters
514*7c478bd9Sstevel@tonic-gate 	 * are stopped first.
515*7c478bd9Sstevel@tonic-gate 	 */
516*7c478bd9Sstevel@tonic-gate 	if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
517*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
518*7c478bd9Sstevel@tonic-gate 
519*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
520*7c478bd9Sstevel@tonic-gate 		*(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset;
521*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset,
522*7c478bd9Sstevel@tonic-gate 		    0, 0, NULL, &set->ks_req[i].kr_config, NULL);
523*7c478bd9Sstevel@tonic-gate 	}
524*7c478bd9Sstevel@tonic-gate 
525*7c478bd9Sstevel@tonic-gate 	/*
526*7c478bd9Sstevel@tonic-gate 	 * Ask the backend to program the hardware.
527*7c478bd9Sstevel@tonic-gate 	 */
528*7c478bd9Sstevel@tonic-gate 	ctx->kc_rawtick = KCPC_GET_TICK();
529*7c478bd9Sstevel@tonic-gate 	atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
530*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
531*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
532*7c478bd9Sstevel@tonic-gate 
533*7c478bd9Sstevel@tonic-gate 	return (0);
534*7c478bd9Sstevel@tonic-gate }
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate /*
537*7c478bd9Sstevel@tonic-gate  * Caller must hold kcpc_cpuctx_lock.
538*7c478bd9Sstevel@tonic-gate  */
539*7c478bd9Sstevel@tonic-gate int
540*7c478bd9Sstevel@tonic-gate kcpc_enable(kthread_t *t, int cmd, int enable)
541*7c478bd9Sstevel@tonic-gate {
542*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = t->t_cpc_ctx;
543*7c478bd9Sstevel@tonic-gate 	kcpc_set_t	*set = t->t_cpc_set;
544*7c478bd9Sstevel@tonic-gate 	kcpc_set_t	*newset;
545*7c478bd9Sstevel@tonic-gate 	int		i;
546*7c478bd9Sstevel@tonic-gate 	int		flag;
547*7c478bd9Sstevel@tonic-gate 	int		err;
548*7c478bd9Sstevel@tonic-gate 
549*7c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock));
550*7c478bd9Sstevel@tonic-gate 
551*7c478bd9Sstevel@tonic-gate 	if (ctx == NULL) {
552*7c478bd9Sstevel@tonic-gate 		/*
553*7c478bd9Sstevel@tonic-gate 		 * This thread has a set but no context; it must be a
554*7c478bd9Sstevel@tonic-gate 		 * CPU-bound set.
555*7c478bd9Sstevel@tonic-gate 		 */
556*7c478bd9Sstevel@tonic-gate 		ASSERT(t->t_cpc_set != NULL);
557*7c478bd9Sstevel@tonic-gate 		ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1);
558*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
559*7c478bd9Sstevel@tonic-gate 	} else if (ctx->kc_flags & KCPC_CTX_INVALID)
560*7c478bd9Sstevel@tonic-gate 		return (EAGAIN);
561*7c478bd9Sstevel@tonic-gate 
562*7c478bd9Sstevel@tonic-gate 	if (cmd == CPC_ENABLE) {
563*7c478bd9Sstevel@tonic-gate 		if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0)
564*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
565*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
566*7c478bd9Sstevel@tonic-gate 		atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
567*7c478bd9Sstevel@tonic-gate 		kcpc_restore(ctx);
568*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
569*7c478bd9Sstevel@tonic-gate 	} else if (cmd == CPC_DISABLE) {
570*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_flags & KCPC_CTX_FREEZE)
571*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
572*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
573*7c478bd9Sstevel@tonic-gate 		kcpc_save(ctx);
574*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE);
575*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
576*7c478bd9Sstevel@tonic-gate 	} else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) {
577*7c478bd9Sstevel@tonic-gate 		/*
578*7c478bd9Sstevel@tonic-gate 		 * Strategy for usr/sys: stop counters and update set's presets
579*7c478bd9Sstevel@tonic-gate 		 * with current counter values, unbind, update requests with
580*7c478bd9Sstevel@tonic-gate 		 * new config, then re-bind.
581*7c478bd9Sstevel@tonic-gate 		 */
582*7c478bd9Sstevel@tonic-gate 		flag = (cmd == CPC_USR_EVENTS) ?
583*7c478bd9Sstevel@tonic-gate 		    CPC_COUNT_USER: CPC_COUNT_SYSTEM;
584*7c478bd9Sstevel@tonic-gate 
585*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
586*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags,
587*7c478bd9Sstevel@tonic-gate 		    KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
588*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
589*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
590*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < set->ks_nreqs; i++) {
591*7c478bd9Sstevel@tonic-gate 			set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data);
592*7c478bd9Sstevel@tonic-gate 			if (enable)
593*7c478bd9Sstevel@tonic-gate 				set->ks_req[i].kr_flags |= flag;
594*7c478bd9Sstevel@tonic-gate 			else
595*7c478bd9Sstevel@tonic-gate 				set->ks_req[i].kr_flags &= ~flag;
596*7c478bd9Sstevel@tonic-gate 		}
597*7c478bd9Sstevel@tonic-gate 		newset = kcpc_dup_set(set);
598*7c478bd9Sstevel@tonic-gate 		if (kcpc_unbind(set) != 0)
599*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
600*7c478bd9Sstevel@tonic-gate 		t->t_cpc_set = newset;
601*7c478bd9Sstevel@tonic-gate 		if (kcpc_bind_thread(newset, t, &err) != 0) {
602*7c478bd9Sstevel@tonic-gate 			t->t_cpc_set = NULL;
603*7c478bd9Sstevel@tonic-gate 			kcpc_free_set(newset);
604*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
605*7c478bd9Sstevel@tonic-gate 		}
606*7c478bd9Sstevel@tonic-gate 	} else
607*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
608*7c478bd9Sstevel@tonic-gate 
609*7c478bd9Sstevel@tonic-gate 	return (0);
610*7c478bd9Sstevel@tonic-gate }
611*7c478bd9Sstevel@tonic-gate 
612*7c478bd9Sstevel@tonic-gate /*
613*7c478bd9Sstevel@tonic-gate  * Provide PCBEs with a way of obtaining the configs of every counter which will
614*7c478bd9Sstevel@tonic-gate  * be programmed together.
615*7c478bd9Sstevel@tonic-gate  *
616*7c478bd9Sstevel@tonic-gate  * If current is NULL, provide the first config.
617*7c478bd9Sstevel@tonic-gate  *
618*7c478bd9Sstevel@tonic-gate  * If data != NULL, caller wants to know where the data store associated with
619*7c478bd9Sstevel@tonic-gate  * the config we return is located.
620*7c478bd9Sstevel@tonic-gate  */
621*7c478bd9Sstevel@tonic-gate void *
622*7c478bd9Sstevel@tonic-gate kcpc_next_config(void *token, void *current, uint64_t **data)
623*7c478bd9Sstevel@tonic-gate {
624*7c478bd9Sstevel@tonic-gate 	int		i;
625*7c478bd9Sstevel@tonic-gate 	kcpc_pic_t	*pic;
626*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t *ctx = (kcpc_ctx_t *)token;
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate 	if (current == NULL) {
629*7c478bd9Sstevel@tonic-gate 		/*
630*7c478bd9Sstevel@tonic-gate 		 * Client would like the first config, which may not be in
631*7c478bd9Sstevel@tonic-gate 		 * counter 0; we need to search through the counters for the
632*7c478bd9Sstevel@tonic-gate 		 * first config.
633*7c478bd9Sstevel@tonic-gate 		 */
634*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < cpc_ncounters; i++)
635*7c478bd9Sstevel@tonic-gate 			if (ctx->kc_pics[i].kp_req != NULL)
636*7c478bd9Sstevel@tonic-gate 				break;
637*7c478bd9Sstevel@tonic-gate 		/*
638*7c478bd9Sstevel@tonic-gate 		 * There are no counters configured for the given context.
639*7c478bd9Sstevel@tonic-gate 		 */
640*7c478bd9Sstevel@tonic-gate 		if (i == cpc_ncounters)
641*7c478bd9Sstevel@tonic-gate 			return (NULL);
642*7c478bd9Sstevel@tonic-gate 	} else {
643*7c478bd9Sstevel@tonic-gate 		/*
644*7c478bd9Sstevel@tonic-gate 		 * There surely is a faster way to do this.
645*7c478bd9Sstevel@tonic-gate 		 */
646*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < cpc_ncounters; i++) {
647*7c478bd9Sstevel@tonic-gate 			pic = &ctx->kc_pics[i];
648*7c478bd9Sstevel@tonic-gate 
649*7c478bd9Sstevel@tonic-gate 			if (pic->kp_req != NULL &&
650*7c478bd9Sstevel@tonic-gate 			    current == pic->kp_req->kr_config)
651*7c478bd9Sstevel@tonic-gate 				break;
652*7c478bd9Sstevel@tonic-gate 		}
653*7c478bd9Sstevel@tonic-gate 
654*7c478bd9Sstevel@tonic-gate 		/*
655*7c478bd9Sstevel@tonic-gate 		 * We found the current config at picnum i. Now search for the
656*7c478bd9Sstevel@tonic-gate 		 * next configured PIC.
657*7c478bd9Sstevel@tonic-gate 		 */
658*7c478bd9Sstevel@tonic-gate 		for (i++; i < cpc_ncounters; i++) {
659*7c478bd9Sstevel@tonic-gate 			pic = &ctx->kc_pics[i];
660*7c478bd9Sstevel@tonic-gate 			if (pic->kp_req != NULL)
661*7c478bd9Sstevel@tonic-gate 				break;
662*7c478bd9Sstevel@tonic-gate 		}
663*7c478bd9Sstevel@tonic-gate 
664*7c478bd9Sstevel@tonic-gate 		if (i == cpc_ncounters)
665*7c478bd9Sstevel@tonic-gate 			return (NULL);
666*7c478bd9Sstevel@tonic-gate 	}
667*7c478bd9Sstevel@tonic-gate 
668*7c478bd9Sstevel@tonic-gate 	if (data != NULL) {
669*7c478bd9Sstevel@tonic-gate 		*data = ctx->kc_pics[i].kp_req->kr_data;
670*7c478bd9Sstevel@tonic-gate 	}
671*7c478bd9Sstevel@tonic-gate 
672*7c478bd9Sstevel@tonic-gate 	return (ctx->kc_pics[i].kp_req->kr_config);
673*7c478bd9Sstevel@tonic-gate }
674*7c478bd9Sstevel@tonic-gate 
675*7c478bd9Sstevel@tonic-gate 
676*7c478bd9Sstevel@tonic-gate static kcpc_ctx_t *
677*7c478bd9Sstevel@tonic-gate kcpc_ctx_alloc(void)
678*7c478bd9Sstevel@tonic-gate {
679*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
680*7c478bd9Sstevel@tonic-gate 	long		hash;
681*7c478bd9Sstevel@tonic-gate 
682*7c478bd9Sstevel@tonic-gate 	ctx = (kcpc_ctx_t *)kmem_alloc(sizeof (kcpc_ctx_t), KM_SLEEP);
683*7c478bd9Sstevel@tonic-gate 
684*7c478bd9Sstevel@tonic-gate 	hash = CPC_HASH_CTX(ctx);
685*7c478bd9Sstevel@tonic-gate 	mutex_enter(&kcpc_ctx_llock[hash]);
686*7c478bd9Sstevel@tonic-gate 	ctx->kc_next = kcpc_ctx_list[hash];
687*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_list[hash] = ctx;
688*7c478bd9Sstevel@tonic-gate 	mutex_exit(&kcpc_ctx_llock[hash]);
689*7c478bd9Sstevel@tonic-gate 
690*7c478bd9Sstevel@tonic-gate 	ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) *
691*7c478bd9Sstevel@tonic-gate 	    cpc_ncounters, KM_SLEEP);
692*7c478bd9Sstevel@tonic-gate 
693*7c478bd9Sstevel@tonic-gate 	ctx->kc_flags = 0;
694*7c478bd9Sstevel@tonic-gate 	ctx->kc_vtick = 0;
695*7c478bd9Sstevel@tonic-gate 	ctx->kc_rawtick = 0;
696*7c478bd9Sstevel@tonic-gate 	ctx->kc_cpuid = -1;
697*7c478bd9Sstevel@tonic-gate 
698*7c478bd9Sstevel@tonic-gate 	return (ctx);
699*7c478bd9Sstevel@tonic-gate }
700*7c478bd9Sstevel@tonic-gate 
701*7c478bd9Sstevel@tonic-gate /*
702*7c478bd9Sstevel@tonic-gate  * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT
703*7c478bd9Sstevel@tonic-gate  * in the flags.
704*7c478bd9Sstevel@tonic-gate  */
705*7c478bd9Sstevel@tonic-gate static void
706*7c478bd9Sstevel@tonic-gate kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx)
707*7c478bd9Sstevel@tonic-gate {
708*7c478bd9Sstevel@tonic-gate 	kcpc_set_t	*ks = ctx->kc_set, *cks;
709*7c478bd9Sstevel@tonic-gate 	int		i, j;
710*7c478bd9Sstevel@tonic-gate 	int		code;
711*7c478bd9Sstevel@tonic-gate 
712*7c478bd9Sstevel@tonic-gate 	ASSERT(ks != NULL);
713*7c478bd9Sstevel@tonic-gate 
714*7c478bd9Sstevel@tonic-gate 	if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0)
715*7c478bd9Sstevel@tonic-gate 		return;
716*7c478bd9Sstevel@tonic-gate 
717*7c478bd9Sstevel@tonic-gate 	cks = kmem_alloc(sizeof (*cks), KM_SLEEP);
718*7c478bd9Sstevel@tonic-gate 	cctx->kc_set = cks;
719*7c478bd9Sstevel@tonic-gate 	cks->ks_flags = ks->ks_flags;
720*7c478bd9Sstevel@tonic-gate 	cks->ks_nreqs = ks->ks_nreqs;
721*7c478bd9Sstevel@tonic-gate 	cks->ks_req = kmem_alloc(cks->ks_nreqs *
722*7c478bd9Sstevel@tonic-gate 	    sizeof (kcpc_request_t), KM_SLEEP);
723*7c478bd9Sstevel@tonic-gate 	cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t),
724*7c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
725*7c478bd9Sstevel@tonic-gate 	cks->ks_ctx = cctx;
726*7c478bd9Sstevel@tonic-gate 
727*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < cks->ks_nreqs; i++) {
728*7c478bd9Sstevel@tonic-gate 		cks->ks_req[i].kr_index = ks->ks_req[i].kr_index;
729*7c478bd9Sstevel@tonic-gate 		cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum;
730*7c478bd9Sstevel@tonic-gate 		(void) strncpy(cks->ks_req[i].kr_event,
731*7c478bd9Sstevel@tonic-gate 		    ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN);
732*7c478bd9Sstevel@tonic-gate 		cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset;
733*7c478bd9Sstevel@tonic-gate 		cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags;
734*7c478bd9Sstevel@tonic-gate 		cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs;
735*7c478bd9Sstevel@tonic-gate 		if (ks->ks_req[i].kr_nattrs > 0) {
736*7c478bd9Sstevel@tonic-gate 			cks->ks_req[i].kr_attr =
737*7c478bd9Sstevel@tonic-gate 			    kmem_alloc(ks->ks_req[i].kr_nattrs *
738*7c478bd9Sstevel@tonic-gate 				sizeof (kcpc_attr_t), KM_SLEEP);
739*7c478bd9Sstevel@tonic-gate 		}
740*7c478bd9Sstevel@tonic-gate 		for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) {
741*7c478bd9Sstevel@tonic-gate 			(void) strncpy(cks->ks_req[i].kr_attr[j].ka_name,
742*7c478bd9Sstevel@tonic-gate 			    ks->ks_req[i].kr_attr[j].ka_name,
743*7c478bd9Sstevel@tonic-gate 			    CPC_MAX_ATTR_LEN);
744*7c478bd9Sstevel@tonic-gate 			cks->ks_req[i].kr_attr[j].ka_val =
745*7c478bd9Sstevel@tonic-gate 			    ks->ks_req[i].kr_attr[j].ka_val;
746*7c478bd9Sstevel@tonic-gate 		}
747*7c478bd9Sstevel@tonic-gate 	}
748*7c478bd9Sstevel@tonic-gate 	if (kcpc_configure_reqs(cctx, cks, &code) != 0)
749*7c478bd9Sstevel@tonic-gate 		panic("kcpc_ctx_clone: configure of context %p with "
750*7c478bd9Sstevel@tonic-gate 		    "set %p failed with subcode %d", cctx, cks, code);
751*7c478bd9Sstevel@tonic-gate }
752*7c478bd9Sstevel@tonic-gate 
753*7c478bd9Sstevel@tonic-gate 
754*7c478bd9Sstevel@tonic-gate static void
755*7c478bd9Sstevel@tonic-gate kcpc_ctx_free(kcpc_ctx_t *ctx)
756*7c478bd9Sstevel@tonic-gate {
757*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	**loc;
758*7c478bd9Sstevel@tonic-gate 	long		hash = CPC_HASH_CTX(ctx);
759*7c478bd9Sstevel@tonic-gate 
760*7c478bd9Sstevel@tonic-gate 	mutex_enter(&kcpc_ctx_llock[hash]);
761*7c478bd9Sstevel@tonic-gate 	loc = &kcpc_ctx_list[hash];
762*7c478bd9Sstevel@tonic-gate 	ASSERT(*loc != NULL);
763*7c478bd9Sstevel@tonic-gate 	while (*loc != ctx)
764*7c478bd9Sstevel@tonic-gate 		loc = &(*loc)->kc_next;
765*7c478bd9Sstevel@tonic-gate 	*loc = ctx->kc_next;
766*7c478bd9Sstevel@tonic-gate 	mutex_exit(&kcpc_ctx_llock[hash]);
767*7c478bd9Sstevel@tonic-gate 
768*7c478bd9Sstevel@tonic-gate 	kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t));
769*7c478bd9Sstevel@tonic-gate 	kmem_free(ctx, sizeof (*ctx));
770*7c478bd9Sstevel@tonic-gate }
771*7c478bd9Sstevel@tonic-gate 
772*7c478bd9Sstevel@tonic-gate /*
773*7c478bd9Sstevel@tonic-gate  * Generic interrupt handler used on hardware that generates
774*7c478bd9Sstevel@tonic-gate  * overflow interrupts.
775*7c478bd9Sstevel@tonic-gate  *
776*7c478bd9Sstevel@tonic-gate  * Note: executed at high-level interrupt context!
777*7c478bd9Sstevel@tonic-gate  */
778*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
779*7c478bd9Sstevel@tonic-gate kcpc_ctx_t *
780*7c478bd9Sstevel@tonic-gate kcpc_overflow_intr(caddr_t arg, uint64_t bitmap)
781*7c478bd9Sstevel@tonic-gate {
782*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
783*7c478bd9Sstevel@tonic-gate 	kthread_t	*t = curthread;
784*7c478bd9Sstevel@tonic-gate 	int		i;
785*7c478bd9Sstevel@tonic-gate 
786*7c478bd9Sstevel@tonic-gate 	/*
787*7c478bd9Sstevel@tonic-gate 	 * On both x86 and UltraSPARC, we may deliver the high-level
788*7c478bd9Sstevel@tonic-gate 	 * interrupt in kernel mode, just after we've started to run an
789*7c478bd9Sstevel@tonic-gate 	 * interrupt thread.  (That's because the hardware helpfully
790*7c478bd9Sstevel@tonic-gate 	 * delivers the overflow interrupt some random number of cycles
791*7c478bd9Sstevel@tonic-gate 	 * after the instruction that caused the overflow by which time
792*7c478bd9Sstevel@tonic-gate 	 * we're in some part of the kernel, not necessarily running on
793*7c478bd9Sstevel@tonic-gate 	 * the right thread).
794*7c478bd9Sstevel@tonic-gate 	 *
795*7c478bd9Sstevel@tonic-gate 	 * Check for this case here -- find the pinned thread
796*7c478bd9Sstevel@tonic-gate 	 * that was running when the interrupt went off.
797*7c478bd9Sstevel@tonic-gate 	 */
798*7c478bd9Sstevel@tonic-gate 	if (t->t_flag & T_INTR_THREAD) {
799*7c478bd9Sstevel@tonic-gate 		klwp_t *lwp;
800*7c478bd9Sstevel@tonic-gate 
801*7c478bd9Sstevel@tonic-gate 		atomic_add_32(&kcpc_intrctx_count, 1);
802*7c478bd9Sstevel@tonic-gate 
803*7c478bd9Sstevel@tonic-gate 		/*
804*7c478bd9Sstevel@tonic-gate 		 * Note that t_lwp is always set to point at the underlying
805*7c478bd9Sstevel@tonic-gate 		 * thread, thus this will work in the presence of nested
806*7c478bd9Sstevel@tonic-gate 		 * interrupts.
807*7c478bd9Sstevel@tonic-gate 		 */
808*7c478bd9Sstevel@tonic-gate 		ctx = NULL;
809*7c478bd9Sstevel@tonic-gate 		if ((lwp = t->t_lwp) != NULL) {
810*7c478bd9Sstevel@tonic-gate 			t = lwptot(lwp);
811*7c478bd9Sstevel@tonic-gate 			ctx = t->t_cpc_ctx;
812*7c478bd9Sstevel@tonic-gate 		}
813*7c478bd9Sstevel@tonic-gate 	} else
814*7c478bd9Sstevel@tonic-gate 		ctx = t->t_cpc_ctx;
815*7c478bd9Sstevel@tonic-gate 
816*7c478bd9Sstevel@tonic-gate 	if (ctx == NULL) {
817*7c478bd9Sstevel@tonic-gate 		/*
818*7c478bd9Sstevel@tonic-gate 		 * This can easily happen if we're using the counters in
819*7c478bd9Sstevel@tonic-gate 		 * "shared" mode, for example, and an overflow interrupt
820*7c478bd9Sstevel@tonic-gate 		 * occurs while we are running cpustat.  In that case, the
821*7c478bd9Sstevel@tonic-gate 		 * bound thread that has the context that belongs to this
822*7c478bd9Sstevel@tonic-gate 		 * CPU is almost certainly sleeping (if it was running on
823*7c478bd9Sstevel@tonic-gate 		 * the CPU we'd have found it above), and the actual
824*7c478bd9Sstevel@tonic-gate 		 * interrupted thread has no knowledge of performance counters!
825*7c478bd9Sstevel@tonic-gate 		 */
826*7c478bd9Sstevel@tonic-gate 		ctx = curthread->t_cpu->cpu_cpc_ctx;
827*7c478bd9Sstevel@tonic-gate 		if (ctx != NULL) {
828*7c478bd9Sstevel@tonic-gate 			/*
829*7c478bd9Sstevel@tonic-gate 			 * Return the bound context for this CPU to
830*7c478bd9Sstevel@tonic-gate 			 * the interrupt handler so that it can synchronously
831*7c478bd9Sstevel@tonic-gate 			 * sample the hardware counters and restart them.
832*7c478bd9Sstevel@tonic-gate 			 */
833*7c478bd9Sstevel@tonic-gate 			return (ctx);
834*7c478bd9Sstevel@tonic-gate 		}
835*7c478bd9Sstevel@tonic-gate 
836*7c478bd9Sstevel@tonic-gate 		/*
837*7c478bd9Sstevel@tonic-gate 		 * As long as the overflow interrupt really is delivered early
838*7c478bd9Sstevel@tonic-gate 		 * enough after trapping into the kernel to avoid switching
839*7c478bd9Sstevel@tonic-gate 		 * threads, we must always be able to find the cpc context,
840*7c478bd9Sstevel@tonic-gate 		 * or something went terribly wrong i.e. we ended up
841*7c478bd9Sstevel@tonic-gate 		 * running a passivated interrupt thread, a kernel
842*7c478bd9Sstevel@tonic-gate 		 * thread or we interrupted idle, all of which are Very Bad.
843*7c478bd9Sstevel@tonic-gate 		 */
844*7c478bd9Sstevel@tonic-gate 		if (kcpc_nullctx_panic)
845*7c478bd9Sstevel@tonic-gate 			panic("null cpc context, thread %p", (void *)t);
846*7c478bd9Sstevel@tonic-gate 		atomic_add_32(&kcpc_nullctx_count, 1);
847*7c478bd9Sstevel@tonic-gate 	} else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) {
848*7c478bd9Sstevel@tonic-gate 		/*
849*7c478bd9Sstevel@tonic-gate 		 * Schedule an ast to sample the counters, which will
850*7c478bd9Sstevel@tonic-gate 		 * propagate any overflow into the virtualized performance
851*7c478bd9Sstevel@tonic-gate 		 * counter(s), and may deliver a signal.
852*7c478bd9Sstevel@tonic-gate 		 */
853*7c478bd9Sstevel@tonic-gate 		ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
854*7c478bd9Sstevel@tonic-gate 		/*
855*7c478bd9Sstevel@tonic-gate 		 * If a counter has overflowed which was counting on behalf of
856*7c478bd9Sstevel@tonic-gate 		 * a request which specified CPC_OVF_NOTIFY_EMT, send the
857*7c478bd9Sstevel@tonic-gate 		 * process a signal.
858*7c478bd9Sstevel@tonic-gate 		 */
859*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < cpc_ncounters; i++) {
860*7c478bd9Sstevel@tonic-gate 			if (ctx->kc_pics[i].kp_req != NULL &&
861*7c478bd9Sstevel@tonic-gate 			    bitmap & (1 << i) &&
862*7c478bd9Sstevel@tonic-gate 			    ctx->kc_pics[i].kp_req->kr_flags &
863*7c478bd9Sstevel@tonic-gate 			    CPC_OVF_NOTIFY_EMT) {
864*7c478bd9Sstevel@tonic-gate 				/*
865*7c478bd9Sstevel@tonic-gate 				 * A signal has been requested for this PIC, so
866*7c478bd9Sstevel@tonic-gate 				 * so freeze the context. The interrupt handler
867*7c478bd9Sstevel@tonic-gate 				 * has already stopped the counter hardware.
868*7c478bd9Sstevel@tonic-gate 				 */
869*7c478bd9Sstevel@tonic-gate 				atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE);
870*7c478bd9Sstevel@tonic-gate 				atomic_or_uint(&ctx->kc_pics[i].kp_flags,
871*7c478bd9Sstevel@tonic-gate 				    KCPC_PIC_OVERFLOWED);
872*7c478bd9Sstevel@tonic-gate 			}
873*7c478bd9Sstevel@tonic-gate 		}
874*7c478bd9Sstevel@tonic-gate 		aston(t);
875*7c478bd9Sstevel@tonic-gate 	}
876*7c478bd9Sstevel@tonic-gate 	return (NULL);
877*7c478bd9Sstevel@tonic-gate }
878*7c478bd9Sstevel@tonic-gate 
879*7c478bd9Sstevel@tonic-gate /*
880*7c478bd9Sstevel@tonic-gate  * The current thread context had an overflow interrupt; we're
881*7c478bd9Sstevel@tonic-gate  * executing here in high-level interrupt context.
882*7c478bd9Sstevel@tonic-gate  */
883*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
884*7c478bd9Sstevel@tonic-gate uint_t
885*7c478bd9Sstevel@tonic-gate kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2)
886*7c478bd9Sstevel@tonic-gate {
887*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx;
888*7c478bd9Sstevel@tonic-gate 	uint64_t	bitmap;
889*7c478bd9Sstevel@tonic-gate 
890*7c478bd9Sstevel@tonic-gate 	if (pcbe_ops == NULL ||
891*7c478bd9Sstevel@tonic-gate 	    (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0)
892*7c478bd9Sstevel@tonic-gate 		return (DDI_INTR_UNCLAIMED);
893*7c478bd9Sstevel@tonic-gate 
894*7c478bd9Sstevel@tonic-gate 	/*
895*7c478bd9Sstevel@tonic-gate 	 * Prevent any further interrupts.
896*7c478bd9Sstevel@tonic-gate 	 */
897*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_allstop();
898*7c478bd9Sstevel@tonic-gate 
899*7c478bd9Sstevel@tonic-gate 	/*
900*7c478bd9Sstevel@tonic-gate 	 * Invoke the "generic" handler.
901*7c478bd9Sstevel@tonic-gate 	 *
902*7c478bd9Sstevel@tonic-gate 	 * If the interrupt has occurred in the context of an lwp owning
903*7c478bd9Sstevel@tonic-gate 	 * the counters, then the handler posts an AST to the lwp to
904*7c478bd9Sstevel@tonic-gate 	 * trigger the actual sampling, and optionally deliver a signal or
905*7c478bd9Sstevel@tonic-gate 	 * restart the counters, on the way out of the kernel using
906*7c478bd9Sstevel@tonic-gate 	 * kcpc_hw_overflow_ast() (see below).
907*7c478bd9Sstevel@tonic-gate 	 *
908*7c478bd9Sstevel@tonic-gate 	 * On the other hand, if the handler returns the context to us
909*7c478bd9Sstevel@tonic-gate 	 * directly, then it means that there are no other threads in
910*7c478bd9Sstevel@tonic-gate 	 * the middle of updating it, no AST has been posted, and so we
911*7c478bd9Sstevel@tonic-gate 	 * should sample the counters here, and restart them with no
912*7c478bd9Sstevel@tonic-gate 	 * further fuss.
913*7c478bd9Sstevel@tonic-gate 	 */
914*7c478bd9Sstevel@tonic-gate 	if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) {
915*7c478bd9Sstevel@tonic-gate 		uint64_t curtick = KCPC_GET_TICK();
916*7c478bd9Sstevel@tonic-gate 
917*7c478bd9Sstevel@tonic-gate 		ctx->kc_hrtime = gethrtime_waitfree();
918*7c478bd9Sstevel@tonic-gate 		ctx->kc_vtick += curtick - ctx->kc_rawtick;
919*7c478bd9Sstevel@tonic-gate 		ctx->kc_rawtick = curtick;
920*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_sample(ctx);
921*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_program(ctx);
922*7c478bd9Sstevel@tonic-gate 	}
923*7c478bd9Sstevel@tonic-gate 
924*7c478bd9Sstevel@tonic-gate 	return (DDI_INTR_CLAIMED);
925*7c478bd9Sstevel@tonic-gate }
926*7c478bd9Sstevel@tonic-gate 
927*7c478bd9Sstevel@tonic-gate /*
928*7c478bd9Sstevel@tonic-gate  * Called from trap() when processing the ast posted by the high-level
929*7c478bd9Sstevel@tonic-gate  * interrupt handler.
930*7c478bd9Sstevel@tonic-gate  */
931*7c478bd9Sstevel@tonic-gate int
932*7c478bd9Sstevel@tonic-gate kcpc_overflow_ast()
933*7c478bd9Sstevel@tonic-gate {
934*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = curthread->t_cpc_ctx;
935*7c478bd9Sstevel@tonic-gate 	int		i;
936*7c478bd9Sstevel@tonic-gate 	int		found = 0;
937*7c478bd9Sstevel@tonic-gate 	uint64_t	curtick = KCPC_GET_TICK();
938*7c478bd9Sstevel@tonic-gate 
939*7c478bd9Sstevel@tonic-gate 	ASSERT(ctx != NULL);	/* Beware of interrupt skid. */
940*7c478bd9Sstevel@tonic-gate 
941*7c478bd9Sstevel@tonic-gate 	/*
942*7c478bd9Sstevel@tonic-gate 	 * An overflow happened: sample the context to ensure that
943*7c478bd9Sstevel@tonic-gate 	 * the overflow is propagated into the upper bits of the
944*7c478bd9Sstevel@tonic-gate 	 * virtualized 64-bit counter(s).
945*7c478bd9Sstevel@tonic-gate 	 */
946*7c478bd9Sstevel@tonic-gate 	kpreempt_disable();
947*7c478bd9Sstevel@tonic-gate 	ctx->kc_hrtime = gethrtime_waitfree();
948*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_sample(ctx);
949*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
950*7c478bd9Sstevel@tonic-gate 
951*7c478bd9Sstevel@tonic-gate 	ctx->kc_vtick += curtick - ctx->kc_rawtick;
952*7c478bd9Sstevel@tonic-gate 
953*7c478bd9Sstevel@tonic-gate 	/*
954*7c478bd9Sstevel@tonic-gate 	 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED
955*7c478bd9Sstevel@tonic-gate 	 * if that pic generated an overflow and if the request it was counting
956*7c478bd9Sstevel@tonic-gate 	 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all
957*7c478bd9Sstevel@tonic-gate 	 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we
958*7c478bd9Sstevel@tonic-gate 	 * found any overflowed pics, keep the context frozen and return true
959*7c478bd9Sstevel@tonic-gate 	 * (thus causing a signal to be sent).
960*7c478bd9Sstevel@tonic-gate 	 */
961*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < cpc_ncounters; i++) {
962*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) {
963*7c478bd9Sstevel@tonic-gate 			atomic_and_uint(&ctx->kc_pics[i].kp_flags,
964*7c478bd9Sstevel@tonic-gate 			    ~KCPC_PIC_OVERFLOWED);
965*7c478bd9Sstevel@tonic-gate 			found = 1;
966*7c478bd9Sstevel@tonic-gate 		}
967*7c478bd9Sstevel@tonic-gate 	}
968*7c478bd9Sstevel@tonic-gate 	if (found)
969*7c478bd9Sstevel@tonic-gate 		return (1);
970*7c478bd9Sstevel@tonic-gate 
971*7c478bd9Sstevel@tonic-gate 	/*
972*7c478bd9Sstevel@tonic-gate 	 * Otherwise, re-enable the counters and continue life as before.
973*7c478bd9Sstevel@tonic-gate 	 */
974*7c478bd9Sstevel@tonic-gate 	kpreempt_disable();
975*7c478bd9Sstevel@tonic-gate 	atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE);
976*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
977*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
978*7c478bd9Sstevel@tonic-gate 	return (0);
979*7c478bd9Sstevel@tonic-gate }
980*7c478bd9Sstevel@tonic-gate 
981*7c478bd9Sstevel@tonic-gate /*
982*7c478bd9Sstevel@tonic-gate  * Called when switching away from current thread.
983*7c478bd9Sstevel@tonic-gate  */
984*7c478bd9Sstevel@tonic-gate static void
985*7c478bd9Sstevel@tonic-gate kcpc_save(kcpc_ctx_t *ctx)
986*7c478bd9Sstevel@tonic-gate {
987*7c478bd9Sstevel@tonic-gate 	if (ctx->kc_flags & KCPC_CTX_INVALID) {
988*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)
989*7c478bd9Sstevel@tonic-gate 			return;
990*7c478bd9Sstevel@tonic-gate 		/*
991*7c478bd9Sstevel@tonic-gate 		 * This context has been invalidated but the counters have not
992*7c478bd9Sstevel@tonic-gate 		 * been stopped. Stop them here and mark the context stopped.
993*7c478bd9Sstevel@tonic-gate 		 */
994*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
995*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
996*7c478bd9Sstevel@tonic-gate 		return;
997*7c478bd9Sstevel@tonic-gate 	}
998*7c478bd9Sstevel@tonic-gate 
999*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_allstop();
1000*7c478bd9Sstevel@tonic-gate 	if (ctx->kc_flags & KCPC_CTX_FREEZE)
1001*7c478bd9Sstevel@tonic-gate 		return;
1002*7c478bd9Sstevel@tonic-gate 
1003*7c478bd9Sstevel@tonic-gate 	/*
1004*7c478bd9Sstevel@tonic-gate 	 * Need to sample for all reqs into each req's current mpic.
1005*7c478bd9Sstevel@tonic-gate 	 */
1006*7c478bd9Sstevel@tonic-gate 	ctx->kc_hrtime = gethrtime();
1007*7c478bd9Sstevel@tonic-gate 	ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick;
1008*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_sample(ctx);
1009*7c478bd9Sstevel@tonic-gate }
1010*7c478bd9Sstevel@tonic-gate 
1011*7c478bd9Sstevel@tonic-gate static void
1012*7c478bd9Sstevel@tonic-gate kcpc_restore(kcpc_ctx_t *ctx)
1013*7c478bd9Sstevel@tonic-gate {
1014*7c478bd9Sstevel@tonic-gate 	if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) ==
1015*7c478bd9Sstevel@tonic-gate 	    KCPC_CTX_INVALID)
1016*7c478bd9Sstevel@tonic-gate 		/*
1017*7c478bd9Sstevel@tonic-gate 		 * The context is invalidated but has not been marked stopped.
1018*7c478bd9Sstevel@tonic-gate 		 * We mark it as such here because we will not start the
1019*7c478bd9Sstevel@tonic-gate 		 * counters during this context switch.
1020*7c478bd9Sstevel@tonic-gate 		 */
1021*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED);
1022*7c478bd9Sstevel@tonic-gate 
1023*7c478bd9Sstevel@tonic-gate 
1024*7c478bd9Sstevel@tonic-gate 	if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE))
1025*7c478bd9Sstevel@tonic-gate 		return;
1026*7c478bd9Sstevel@tonic-gate 
1027*7c478bd9Sstevel@tonic-gate 	/*
1028*7c478bd9Sstevel@tonic-gate 	 * While programming the hardware, the counters should be stopped. We
1029*7c478bd9Sstevel@tonic-gate 	 * don't do an explicit pcbe_allstop() here because they should have
1030*7c478bd9Sstevel@tonic-gate 	 * been stopped already by the last consumer.
1031*7c478bd9Sstevel@tonic-gate 	 */
1032*7c478bd9Sstevel@tonic-gate 	ctx->kc_rawtick = KCPC_GET_TICK();
1033*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_program(ctx);
1034*7c478bd9Sstevel@tonic-gate }
1035*7c478bd9Sstevel@tonic-gate 
1036*7c478bd9Sstevel@tonic-gate /*
1037*7c478bd9Sstevel@tonic-gate  * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the
1038*7c478bd9Sstevel@tonic-gate  * following context operators to the idle thread on each CPU. They stop the
1039*7c478bd9Sstevel@tonic-gate  * counters when the idle thread is switched on, and they start them again when
1040*7c478bd9Sstevel@tonic-gate  * it is switched off.
1041*7c478bd9Sstevel@tonic-gate  */
1042*7c478bd9Sstevel@tonic-gate 
1043*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1044*7c478bd9Sstevel@tonic-gate void
1045*7c478bd9Sstevel@tonic-gate kcpc_idle_save(struct cpu *cp)
1046*7c478bd9Sstevel@tonic-gate {
1047*7c478bd9Sstevel@tonic-gate 	/*
1048*7c478bd9Sstevel@tonic-gate 	 * The idle thread shouldn't be run anywhere else.
1049*7c478bd9Sstevel@tonic-gate 	 */
1050*7c478bd9Sstevel@tonic-gate 	ASSERT(CPU == cp);
1051*7c478bd9Sstevel@tonic-gate 
1052*7c478bd9Sstevel@tonic-gate 	/*
1053*7c478bd9Sstevel@tonic-gate 	 * We must hold the CPU's context lock to ensure the context isn't freed
1054*7c478bd9Sstevel@tonic-gate 	 * while we're looking at it.
1055*7c478bd9Sstevel@tonic-gate 	 */
1056*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cp->cpu_cpc_ctxlock);
1057*7c478bd9Sstevel@tonic-gate 
1058*7c478bd9Sstevel@tonic-gate 	if ((cp->cpu_cpc_ctx == NULL) ||
1059*7c478bd9Sstevel@tonic-gate 	    (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
1060*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
1061*7c478bd9Sstevel@tonic-gate 		return;
1062*7c478bd9Sstevel@tonic-gate 	}
1063*7c478bd9Sstevel@tonic-gate 
1064*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_program(cp->cpu_cpc_ctx);
1065*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cp->cpu_cpc_ctxlock);
1066*7c478bd9Sstevel@tonic-gate }
1067*7c478bd9Sstevel@tonic-gate 
1068*7c478bd9Sstevel@tonic-gate void
1069*7c478bd9Sstevel@tonic-gate kcpc_idle_restore(struct cpu *cp)
1070*7c478bd9Sstevel@tonic-gate {
1071*7c478bd9Sstevel@tonic-gate 	/*
1072*7c478bd9Sstevel@tonic-gate 	 * The idle thread shouldn't be run anywhere else.
1073*7c478bd9Sstevel@tonic-gate 	 */
1074*7c478bd9Sstevel@tonic-gate 	ASSERT(CPU == cp);
1075*7c478bd9Sstevel@tonic-gate 
1076*7c478bd9Sstevel@tonic-gate 	/*
1077*7c478bd9Sstevel@tonic-gate 	 * We must hold the CPU's context lock to ensure the context isn't freed
1078*7c478bd9Sstevel@tonic-gate 	 * while we're looking at it.
1079*7c478bd9Sstevel@tonic-gate 	 */
1080*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cp->cpu_cpc_ctxlock);
1081*7c478bd9Sstevel@tonic-gate 
1082*7c478bd9Sstevel@tonic-gate 	if ((cp->cpu_cpc_ctx == NULL) ||
1083*7c478bd9Sstevel@tonic-gate 	    (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) {
1084*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cp->cpu_cpc_ctxlock);
1085*7c478bd9Sstevel@tonic-gate 		return;
1086*7c478bd9Sstevel@tonic-gate 	}
1087*7c478bd9Sstevel@tonic-gate 
1088*7c478bd9Sstevel@tonic-gate 	pcbe_ops->pcbe_allstop();
1089*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cp->cpu_cpc_ctxlock);
1090*7c478bd9Sstevel@tonic-gate }
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1093*7c478bd9Sstevel@tonic-gate static void
1094*7c478bd9Sstevel@tonic-gate kcpc_lwp_create(kthread_t *t, kthread_t *ct)
1095*7c478bd9Sstevel@tonic-gate {
1096*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t	*ctx = t->t_cpc_ctx, *cctx;
1097*7c478bd9Sstevel@tonic-gate 	int		i;
1098*7c478bd9Sstevel@tonic-gate 
1099*7c478bd9Sstevel@tonic-gate 	if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0)
1100*7c478bd9Sstevel@tonic-gate 		return;
1101*7c478bd9Sstevel@tonic-gate 
1102*7c478bd9Sstevel@tonic-gate 	rw_enter(&kcpc_cpuctx_lock, RW_READER);
1103*7c478bd9Sstevel@tonic-gate 	if (ctx->kc_flags & KCPC_CTX_INVALID) {
1104*7c478bd9Sstevel@tonic-gate 		rw_exit(&kcpc_cpuctx_lock);
1105*7c478bd9Sstevel@tonic-gate 		return;
1106*7c478bd9Sstevel@tonic-gate 	}
1107*7c478bd9Sstevel@tonic-gate 	cctx = kcpc_ctx_alloc();
1108*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_clone(ctx, cctx);
1109*7c478bd9Sstevel@tonic-gate 	rw_exit(&kcpc_cpuctx_lock);
1110*7c478bd9Sstevel@tonic-gate 
1111*7c478bd9Sstevel@tonic-gate 	cctx->kc_flags = ctx->kc_flags;
1112*7c478bd9Sstevel@tonic-gate 	cctx->kc_thread = ct;
1113*7c478bd9Sstevel@tonic-gate 	cctx->kc_cpuid = -1;
1114*7c478bd9Sstevel@tonic-gate 	ct->t_cpc_set = cctx->kc_set;
1115*7c478bd9Sstevel@tonic-gate 	ct->t_cpc_ctx = cctx;
1116*7c478bd9Sstevel@tonic-gate 
1117*7c478bd9Sstevel@tonic-gate 	if (cctx->kc_flags & KCPC_CTX_SIGOVF) {
1118*7c478bd9Sstevel@tonic-gate 		kcpc_set_t *ks = cctx->kc_set;
1119*7c478bd9Sstevel@tonic-gate 		/*
1120*7c478bd9Sstevel@tonic-gate 		 * Our contract with the user requires us to immediately send an
1121*7c478bd9Sstevel@tonic-gate 		 * overflow signal to all children if we have the LWPINHERIT
1122*7c478bd9Sstevel@tonic-gate 		 * and SIGOVF flags set. In addition, all counters should be
1123*7c478bd9Sstevel@tonic-gate 		 * set to UINT64_MAX, and their pic's overflow flag turned on
1124*7c478bd9Sstevel@tonic-gate 		 * so that our trap() processing knows to send a signal.
1125*7c478bd9Sstevel@tonic-gate 		 */
1126*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE);
1127*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < ks->ks_nreqs; i++) {
1128*7c478bd9Sstevel@tonic-gate 			kcpc_request_t *kr = &ks->ks_req[i];
1129*7c478bd9Sstevel@tonic-gate 
1130*7c478bd9Sstevel@tonic-gate 			if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) {
1131*7c478bd9Sstevel@tonic-gate 				*(kr->kr_data) = UINT64_MAX;
1132*7c478bd9Sstevel@tonic-gate 				kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED;
1133*7c478bd9Sstevel@tonic-gate 			}
1134*7c478bd9Sstevel@tonic-gate 		}
1135*7c478bd9Sstevel@tonic-gate 		ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW;
1136*7c478bd9Sstevel@tonic-gate 		aston(ct);
1137*7c478bd9Sstevel@tonic-gate 	}
1138*7c478bd9Sstevel@tonic-gate 
1139*7c478bd9Sstevel@tonic-gate 	installctx(ct, cctx, kcpc_save, kcpc_restore,
1140*7c478bd9Sstevel@tonic-gate 	    NULL, kcpc_lwp_create, NULL, kcpc_free);
1141*7c478bd9Sstevel@tonic-gate }
1142*7c478bd9Sstevel@tonic-gate 
1143*7c478bd9Sstevel@tonic-gate /*
1144*7c478bd9Sstevel@tonic-gate  * Counter Stoppage Theory
1145*7c478bd9Sstevel@tonic-gate  *
1146*7c478bd9Sstevel@tonic-gate  * The counters may need to be stopped properly at the following occasions:
1147*7c478bd9Sstevel@tonic-gate  *
1148*7c478bd9Sstevel@tonic-gate  * 1) An LWP exits.
1149*7c478bd9Sstevel@tonic-gate  * 2) A thread exits.
1150*7c478bd9Sstevel@tonic-gate  * 3) An LWP performs an exec().
1151*7c478bd9Sstevel@tonic-gate  * 4) A bound set is unbound.
1152*7c478bd9Sstevel@tonic-gate  *
1153*7c478bd9Sstevel@tonic-gate  * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need
1154*7c478bd9Sstevel@tonic-gate  * to be freed as well.
1155*7c478bd9Sstevel@tonic-gate  *
1156*7c478bd9Sstevel@tonic-gate  * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on
1157*7c478bd9Sstevel@tonic-gate  * when the thread is freed, kcpc_free(), called by freectx(), frees the
1158*7c478bd9Sstevel@tonic-gate  * context.
1159*7c478bd9Sstevel@tonic-gate  *
1160*7c478bd9Sstevel@tonic-gate  * Case 2: same as case 1 except kcpc_passivate is called from thread_exit().
1161*7c478bd9Sstevel@tonic-gate  *
1162*7c478bd9Sstevel@tonic-gate  * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has
1163*7c478bd9Sstevel@tonic-gate  * been called from exec. It stops the counters _and_ frees the context.
1164*7c478bd9Sstevel@tonic-gate  *
1165*7c478bd9Sstevel@tonic-gate  * Case 4: kcpc_unbind() stops the hardware _and_ frees the context.
1166*7c478bd9Sstevel@tonic-gate  *
1167*7c478bd9Sstevel@tonic-gate  * CPU-bound counters are always stopped via kcpc_unbind().
1168*7c478bd9Sstevel@tonic-gate  */
1169*7c478bd9Sstevel@tonic-gate 
1170*7c478bd9Sstevel@tonic-gate /*
1171*7c478bd9Sstevel@tonic-gate  * We're being called to delete the context; we ensure that all associated data
1172*7c478bd9Sstevel@tonic-gate  * structures are freed, and that the hardware is passivated if this is an exec.
1173*7c478bd9Sstevel@tonic-gate  */
1174*7c478bd9Sstevel@tonic-gate 
1175*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1176*7c478bd9Sstevel@tonic-gate static void
1177*7c478bd9Sstevel@tonic-gate kcpc_free(kcpc_ctx_t *ctx, int isexec)
1178*7c478bd9Sstevel@tonic-gate {
1179*7c478bd9Sstevel@tonic-gate 	int		i;
1180*7c478bd9Sstevel@tonic-gate 	kcpc_set_t	*set = ctx->kc_set;
1181*7c478bd9Sstevel@tonic-gate 
1182*7c478bd9Sstevel@tonic-gate 	ASSERT(set != NULL);
1183*7c478bd9Sstevel@tonic-gate 
1184*7c478bd9Sstevel@tonic-gate 	atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
1185*7c478bd9Sstevel@tonic-gate 
1186*7c478bd9Sstevel@tonic-gate 	if (isexec) {
1187*7c478bd9Sstevel@tonic-gate 		/*
1188*7c478bd9Sstevel@tonic-gate 		 * This thread is execing, and after the exec it should not have
1189*7c478bd9Sstevel@tonic-gate 		 * any performance counter context. Stop the counters properly
1190*7c478bd9Sstevel@tonic-gate 		 * here so the system isn't surprised by an overflow interrupt
1191*7c478bd9Sstevel@tonic-gate 		 * later.
1192*7c478bd9Sstevel@tonic-gate 		 */
1193*7c478bd9Sstevel@tonic-gate 		if (ctx->kc_cpuid != -1) {
1194*7c478bd9Sstevel@tonic-gate 			cpu_t *cp;
1195*7c478bd9Sstevel@tonic-gate 			/*
1196*7c478bd9Sstevel@tonic-gate 			 * CPU-bound context; stop the appropriate CPU's ctrs.
1197*7c478bd9Sstevel@tonic-gate 			 * Hold cpu_lock while examining the CPU to ensure it
1198*7c478bd9Sstevel@tonic-gate 			 * doesn't go away.
1199*7c478bd9Sstevel@tonic-gate 			 */
1200*7c478bd9Sstevel@tonic-gate 			mutex_enter(&cpu_lock);
1201*7c478bd9Sstevel@tonic-gate 			cp = cpu_get(ctx->kc_cpuid);
1202*7c478bd9Sstevel@tonic-gate 			/*
1203*7c478bd9Sstevel@tonic-gate 			 * The CPU could have been DR'd out, so only stop the
1204*7c478bd9Sstevel@tonic-gate 			 * CPU and clear its context pointer if the CPU still
1205*7c478bd9Sstevel@tonic-gate 			 * exists.
1206*7c478bd9Sstevel@tonic-gate 			 */
1207*7c478bd9Sstevel@tonic-gate 			if (cp != NULL) {
1208*7c478bd9Sstevel@tonic-gate 				mutex_enter(&cp->cpu_cpc_ctxlock);
1209*7c478bd9Sstevel@tonic-gate 				kcpc_stop_hw(ctx);
1210*7c478bd9Sstevel@tonic-gate 				cp->cpu_cpc_ctx = NULL;
1211*7c478bd9Sstevel@tonic-gate 				mutex_exit(&cp->cpu_cpc_ctxlock);
1212*7c478bd9Sstevel@tonic-gate 			}
1213*7c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
1214*7c478bd9Sstevel@tonic-gate 			ASSERT(curthread->t_cpc_ctx == NULL);
1215*7c478bd9Sstevel@tonic-gate 		} else {
1216*7c478bd9Sstevel@tonic-gate 			/*
1217*7c478bd9Sstevel@tonic-gate 			 * Thread-bound context; stop _this_ CPU's counters.
1218*7c478bd9Sstevel@tonic-gate 			 */
1219*7c478bd9Sstevel@tonic-gate 			kpreempt_disable();
1220*7c478bd9Sstevel@tonic-gate 			pcbe_ops->pcbe_allstop();
1221*7c478bd9Sstevel@tonic-gate 			atomic_or_uint(&ctx->kc_flags,
1222*7c478bd9Sstevel@tonic-gate 			    KCPC_CTX_INVALID_STOPPED);
1223*7c478bd9Sstevel@tonic-gate 			kpreempt_enable();
1224*7c478bd9Sstevel@tonic-gate 			curthread->t_cpc_ctx = NULL;
1225*7c478bd9Sstevel@tonic-gate 		}
1226*7c478bd9Sstevel@tonic-gate 
1227*7c478bd9Sstevel@tonic-gate 		/*
1228*7c478bd9Sstevel@tonic-gate 		 * Since we are being called from an exec and we know that
1229*7c478bd9Sstevel@tonic-gate 		 * exec is not permitted via the agent thread, we should clean
1230*7c478bd9Sstevel@tonic-gate 		 * up this thread's CPC state completely, and not leave dangling
1231*7c478bd9Sstevel@tonic-gate 		 * CPC pointers behind.
1232*7c478bd9Sstevel@tonic-gate 		 */
1233*7c478bd9Sstevel@tonic-gate 		ASSERT(ctx->kc_thread == curthread);
1234*7c478bd9Sstevel@tonic-gate 		curthread->t_cpc_set = NULL;
1235*7c478bd9Sstevel@tonic-gate 	}
1236*7c478bd9Sstevel@tonic-gate 
1237*7c478bd9Sstevel@tonic-gate 	/*
1238*7c478bd9Sstevel@tonic-gate 	 * Walk through each request in this context's set and free the PCBE's
1239*7c478bd9Sstevel@tonic-gate 	 * configuration if it exists.
1240*7c478bd9Sstevel@tonic-gate 	 */
1241*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
1242*7c478bd9Sstevel@tonic-gate 		if (set->ks_req[i].kr_config != NULL)
1243*7c478bd9Sstevel@tonic-gate 			pcbe_ops->pcbe_free(set->ks_req[i].kr_config);
1244*7c478bd9Sstevel@tonic-gate 	}
1245*7c478bd9Sstevel@tonic-gate 
1246*7c478bd9Sstevel@tonic-gate 	kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t));
1247*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_free(ctx);
1248*7c478bd9Sstevel@tonic-gate 	kcpc_free_set(set);
1249*7c478bd9Sstevel@tonic-gate }
1250*7c478bd9Sstevel@tonic-gate 
1251*7c478bd9Sstevel@tonic-gate /*
1252*7c478bd9Sstevel@tonic-gate  * Free the memory associated with a request set.
1253*7c478bd9Sstevel@tonic-gate  */
1254*7c478bd9Sstevel@tonic-gate void
1255*7c478bd9Sstevel@tonic-gate kcpc_free_set(kcpc_set_t *set)
1256*7c478bd9Sstevel@tonic-gate {
1257*7c478bd9Sstevel@tonic-gate 	int		i;
1258*7c478bd9Sstevel@tonic-gate 	kcpc_request_t	*req;
1259*7c478bd9Sstevel@tonic-gate 
1260*7c478bd9Sstevel@tonic-gate 	ASSERT(set->ks_req != NULL);
1261*7c478bd9Sstevel@tonic-gate 
1262*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
1263*7c478bd9Sstevel@tonic-gate 		req = &set->ks_req[i];
1264*7c478bd9Sstevel@tonic-gate 
1265*7c478bd9Sstevel@tonic-gate 		if (req->kr_nattrs != 0) {
1266*7c478bd9Sstevel@tonic-gate 			kmem_free(req->kr_attr,
1267*7c478bd9Sstevel@tonic-gate 			    req->kr_nattrs * sizeof (kcpc_attr_t));
1268*7c478bd9Sstevel@tonic-gate 		}
1269*7c478bd9Sstevel@tonic-gate 	}
1270*7c478bd9Sstevel@tonic-gate 
1271*7c478bd9Sstevel@tonic-gate 	kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs);
1272*7c478bd9Sstevel@tonic-gate 	kmem_free(set, sizeof (kcpc_set_t));
1273*7c478bd9Sstevel@tonic-gate }
1274*7c478bd9Sstevel@tonic-gate 
1275*7c478bd9Sstevel@tonic-gate /*
1276*7c478bd9Sstevel@tonic-gate  * Grab every existing context and mark it as invalid.
1277*7c478bd9Sstevel@tonic-gate  */
1278*7c478bd9Sstevel@tonic-gate void
1279*7c478bd9Sstevel@tonic-gate kcpc_invalidate_all(void)
1280*7c478bd9Sstevel@tonic-gate {
1281*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t *ctx;
1282*7c478bd9Sstevel@tonic-gate 	long hash;
1283*7c478bd9Sstevel@tonic-gate 
1284*7c478bd9Sstevel@tonic-gate 	for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) {
1285*7c478bd9Sstevel@tonic-gate 		mutex_enter(&kcpc_ctx_llock[hash]);
1286*7c478bd9Sstevel@tonic-gate 		for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next)
1287*7c478bd9Sstevel@tonic-gate 			atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
1288*7c478bd9Sstevel@tonic-gate 		mutex_exit(&kcpc_ctx_llock[hash]);
1289*7c478bd9Sstevel@tonic-gate 	}
1290*7c478bd9Sstevel@tonic-gate }
1291*7c478bd9Sstevel@tonic-gate 
1292*7c478bd9Sstevel@tonic-gate /*
1293*7c478bd9Sstevel@tonic-gate  * Called from lwp_exit() and thread_exit()
1294*7c478bd9Sstevel@tonic-gate  */
1295*7c478bd9Sstevel@tonic-gate void
1296*7c478bd9Sstevel@tonic-gate kcpc_passivate(void)
1297*7c478bd9Sstevel@tonic-gate {
1298*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t *ctx = curthread->t_cpc_ctx;
1299*7c478bd9Sstevel@tonic-gate 	kcpc_set_t *set = curthread->t_cpc_set;
1300*7c478bd9Sstevel@tonic-gate 
1301*7c478bd9Sstevel@tonic-gate 	if (set == NULL)
1302*7c478bd9Sstevel@tonic-gate 		return;
1303*7c478bd9Sstevel@tonic-gate 
1304*7c478bd9Sstevel@tonic-gate 	/*
1305*7c478bd9Sstevel@tonic-gate 	 * We're cleaning up after this thread; ensure there are no dangling
1306*7c478bd9Sstevel@tonic-gate 	 * CPC pointers left behind. The context and set will be freed by
1307*7c478bd9Sstevel@tonic-gate 	 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in
1308*7c478bd9Sstevel@tonic-gate 	 * the case of a CPU-bound set.
1309*7c478bd9Sstevel@tonic-gate 	 */
1310*7c478bd9Sstevel@tonic-gate 	curthread->t_cpc_ctx = NULL;
1311*7c478bd9Sstevel@tonic-gate 
1312*7c478bd9Sstevel@tonic-gate 	if (ctx == NULL) {
1313*7c478bd9Sstevel@tonic-gate 		/*
1314*7c478bd9Sstevel@tonic-gate 		 * This thread has a set but no context; it must be a CPU-bound
1315*7c478bd9Sstevel@tonic-gate 		 * set. The hardware will be stopped via kcpc_unbind() when the
1316*7c478bd9Sstevel@tonic-gate 		 * process exits and closes its file descriptors with
1317*7c478bd9Sstevel@tonic-gate 		 * kcpc_close(). Our only job here is to clean up this thread's
1318*7c478bd9Sstevel@tonic-gate 		 * state; the set will be freed with the unbind().
1319*7c478bd9Sstevel@tonic-gate 		 */
1320*7c478bd9Sstevel@tonic-gate 		(void) kcpc_unbind(set);
1321*7c478bd9Sstevel@tonic-gate 		/*
1322*7c478bd9Sstevel@tonic-gate 		 * Unbinding a set belonging to the current thread should clear
1323*7c478bd9Sstevel@tonic-gate 		 * its set pointer.
1324*7c478bd9Sstevel@tonic-gate 		 */
1325*7c478bd9Sstevel@tonic-gate 		ASSERT(curthread->t_cpc_set == NULL);
1326*7c478bd9Sstevel@tonic-gate 		return;
1327*7c478bd9Sstevel@tonic-gate 	}
1328*7c478bd9Sstevel@tonic-gate 
1329*7c478bd9Sstevel@tonic-gate 	curthread->t_cpc_set = NULL;
1330*7c478bd9Sstevel@tonic-gate 
1331*7c478bd9Sstevel@tonic-gate 	/*
1332*7c478bd9Sstevel@tonic-gate 	 * This thread/LWP is exiting but context switches will continue to
1333*7c478bd9Sstevel@tonic-gate 	 * happen for a bit as the exit proceeds.  Kernel preemption must be
1334*7c478bd9Sstevel@tonic-gate 	 * disabled here to prevent a race between checking or setting the
1335*7c478bd9Sstevel@tonic-gate 	 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during
1336*7c478bd9Sstevel@tonic-gate 	 * a context switch.
1337*7c478bd9Sstevel@tonic-gate 	 */
1338*7c478bd9Sstevel@tonic-gate 
1339*7c478bd9Sstevel@tonic-gate 	kpreempt_disable();
1340*7c478bd9Sstevel@tonic-gate 	if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) {
1341*7c478bd9Sstevel@tonic-gate 		pcbe_ops->pcbe_allstop();
1342*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags,
1343*7c478bd9Sstevel@tonic-gate 		    KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED);
1344*7c478bd9Sstevel@tonic-gate 	}
1345*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
1346*7c478bd9Sstevel@tonic-gate }
1347*7c478bd9Sstevel@tonic-gate 
1348*7c478bd9Sstevel@tonic-gate /*
1349*7c478bd9Sstevel@tonic-gate  * Assign the requests in the given set to the PICs in the context.
1350*7c478bd9Sstevel@tonic-gate  * Returns 0 if successful, -1 on failure.
1351*7c478bd9Sstevel@tonic-gate  */
1352*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1353*7c478bd9Sstevel@tonic-gate static int
1354*7c478bd9Sstevel@tonic-gate kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx)
1355*7c478bd9Sstevel@tonic-gate {
1356*7c478bd9Sstevel@tonic-gate 	int i;
1357*7c478bd9Sstevel@tonic-gate 	int *picnum_save;
1358*7c478bd9Sstevel@tonic-gate 
1359*7c478bd9Sstevel@tonic-gate 	ASSERT(set->ks_nreqs <= cpc_ncounters);
1360*7c478bd9Sstevel@tonic-gate 
1361*7c478bd9Sstevel@tonic-gate 	/*
1362*7c478bd9Sstevel@tonic-gate 	 * Provide kcpc_tryassign() with scratch space to avoid doing an
1363*7c478bd9Sstevel@tonic-gate 	 * alloc/free with every invocation.
1364*7c478bd9Sstevel@tonic-gate 	 */
1365*7c478bd9Sstevel@tonic-gate 	picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP);
1366*7c478bd9Sstevel@tonic-gate 	/*
1367*7c478bd9Sstevel@tonic-gate 	 * kcpc_tryassign() blindly walks through each request in the set,
1368*7c478bd9Sstevel@tonic-gate 	 * seeing if a counter can count its event. If yes, it assigns that
1369*7c478bd9Sstevel@tonic-gate 	 * counter. However, that counter may have been the only capable counter
1370*7c478bd9Sstevel@tonic-gate 	 * for _another_ request's event. The solution is to try every possible
1371*7c478bd9Sstevel@tonic-gate 	 * request first. Note that this does not cover all solutions, as
1372*7c478bd9Sstevel@tonic-gate 	 * that would require all unique orderings of requests, an n^n operation
1373*7c478bd9Sstevel@tonic-gate 	 * which would be unacceptable for architectures with many counters.
1374*7c478bd9Sstevel@tonic-gate 	 */
1375*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++)
1376*7c478bd9Sstevel@tonic-gate 		if (kcpc_tryassign(set, i, picnum_save) == 0)
1377*7c478bd9Sstevel@tonic-gate 			break;
1378*7c478bd9Sstevel@tonic-gate 
1379*7c478bd9Sstevel@tonic-gate 	kmem_free(picnum_save, set->ks_nreqs * sizeof (int));
1380*7c478bd9Sstevel@tonic-gate 	if (i == set->ks_nreqs)
1381*7c478bd9Sstevel@tonic-gate 		return (-1);
1382*7c478bd9Sstevel@tonic-gate 	return (0);
1383*7c478bd9Sstevel@tonic-gate }
1384*7c478bd9Sstevel@tonic-gate 
1385*7c478bd9Sstevel@tonic-gate static int
1386*7c478bd9Sstevel@tonic-gate kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch)
1387*7c478bd9Sstevel@tonic-gate {
1388*7c478bd9Sstevel@tonic-gate 	int		i;
1389*7c478bd9Sstevel@tonic-gate 	int		j;
1390*7c478bd9Sstevel@tonic-gate 	uint64_t	bitmap = 0, resmap = 0;
1391*7c478bd9Sstevel@tonic-gate 	uint64_t	ctrmap;
1392*7c478bd9Sstevel@tonic-gate 
1393*7c478bd9Sstevel@tonic-gate 	/*
1394*7c478bd9Sstevel@tonic-gate 	 * We are attempting to assign the reqs to pics, but we may fail. If we
1395*7c478bd9Sstevel@tonic-gate 	 * fail, we need to restore the state of the requests to what it was
1396*7c478bd9Sstevel@tonic-gate 	 * when we found it, as some reqs may have been explicitly assigned to
1397*7c478bd9Sstevel@tonic-gate 	 * a specific PIC beforehand. We do this by snapshotting the assignments
1398*7c478bd9Sstevel@tonic-gate 	 * now and restoring from it later if we fail.
1399*7c478bd9Sstevel@tonic-gate 	 *
1400*7c478bd9Sstevel@tonic-gate 	 * Also we note here which counters have already been claimed by
1401*7c478bd9Sstevel@tonic-gate 	 * requests with explicit counter assignments.
1402*7c478bd9Sstevel@tonic-gate 	 */
1403*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < set->ks_nreqs; i++) {
1404*7c478bd9Sstevel@tonic-gate 		scratch[i] = set->ks_req[i].kr_picnum;
1405*7c478bd9Sstevel@tonic-gate 		if (set->ks_req[i].kr_picnum != -1)
1406*7c478bd9Sstevel@tonic-gate 			resmap |= (1 << set->ks_req[i].kr_picnum);
1407*7c478bd9Sstevel@tonic-gate 	}
1408*7c478bd9Sstevel@tonic-gate 
1409*7c478bd9Sstevel@tonic-gate 	/*
1410*7c478bd9Sstevel@tonic-gate 	 * Walk through requests assigning them to the first PIC that is
1411*7c478bd9Sstevel@tonic-gate 	 * capable.
1412*7c478bd9Sstevel@tonic-gate 	 */
1413*7c478bd9Sstevel@tonic-gate 	i = starting_req;
1414*7c478bd9Sstevel@tonic-gate 	do {
1415*7c478bd9Sstevel@tonic-gate 		if (set->ks_req[i].kr_picnum != -1) {
1416*7c478bd9Sstevel@tonic-gate 			ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0);
1417*7c478bd9Sstevel@tonic-gate 			bitmap |= (1 << set->ks_req[i].kr_picnum);
1418*7c478bd9Sstevel@tonic-gate 			if (++i == set->ks_nreqs)
1419*7c478bd9Sstevel@tonic-gate 				i = 0;
1420*7c478bd9Sstevel@tonic-gate 			continue;
1421*7c478bd9Sstevel@tonic-gate 		}
1422*7c478bd9Sstevel@tonic-gate 
1423*7c478bd9Sstevel@tonic-gate 		ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event);
1424*7c478bd9Sstevel@tonic-gate 		for (j = 0; j < cpc_ncounters; j++) {
1425*7c478bd9Sstevel@tonic-gate 			if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 &&
1426*7c478bd9Sstevel@tonic-gate 			    (resmap & (1 << j)) == 0) {
1427*7c478bd9Sstevel@tonic-gate 				/*
1428*7c478bd9Sstevel@tonic-gate 				 * We can assign this counter because:
1429*7c478bd9Sstevel@tonic-gate 				 *
1430*7c478bd9Sstevel@tonic-gate 				 * 1. It can count the event (ctrmap)
1431*7c478bd9Sstevel@tonic-gate 				 * 2. It hasn't been assigned yet (bitmap)
1432*7c478bd9Sstevel@tonic-gate 				 * 3. It wasn't reserved by a request (resmap)
1433*7c478bd9Sstevel@tonic-gate 				 */
1434*7c478bd9Sstevel@tonic-gate 				bitmap |= (1 << j);
1435*7c478bd9Sstevel@tonic-gate 				break;
1436*7c478bd9Sstevel@tonic-gate 			}
1437*7c478bd9Sstevel@tonic-gate 		}
1438*7c478bd9Sstevel@tonic-gate 		if (j == cpc_ncounters) {
1439*7c478bd9Sstevel@tonic-gate 			for (i = 0; i < set->ks_nreqs; i++)
1440*7c478bd9Sstevel@tonic-gate 				set->ks_req[i].kr_picnum = scratch[i];
1441*7c478bd9Sstevel@tonic-gate 			return (-1);
1442*7c478bd9Sstevel@tonic-gate 		}
1443*7c478bd9Sstevel@tonic-gate 		set->ks_req[i].kr_picnum = j;
1444*7c478bd9Sstevel@tonic-gate 
1445*7c478bd9Sstevel@tonic-gate 		if (++i == set->ks_nreqs)
1446*7c478bd9Sstevel@tonic-gate 			i = 0;
1447*7c478bd9Sstevel@tonic-gate 	} while (i != starting_req);
1448*7c478bd9Sstevel@tonic-gate 
1449*7c478bd9Sstevel@tonic-gate 	return (0);
1450*7c478bd9Sstevel@tonic-gate }
1451*7c478bd9Sstevel@tonic-gate 
1452*7c478bd9Sstevel@tonic-gate kcpc_set_t *
1453*7c478bd9Sstevel@tonic-gate kcpc_dup_set(kcpc_set_t *set)
1454*7c478bd9Sstevel@tonic-gate {
1455*7c478bd9Sstevel@tonic-gate 	kcpc_set_t	*new;
1456*7c478bd9Sstevel@tonic-gate 	int		i;
1457*7c478bd9Sstevel@tonic-gate 	int		j;
1458*7c478bd9Sstevel@tonic-gate 
1459*7c478bd9Sstevel@tonic-gate 	new = kmem_alloc(sizeof (*new), KM_SLEEP);
1460*7c478bd9Sstevel@tonic-gate 	new->ks_flags = set->ks_flags;
1461*7c478bd9Sstevel@tonic-gate 	new->ks_nreqs = set->ks_nreqs;
1462*7c478bd9Sstevel@tonic-gate 	new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t),
1463*7c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
1464*7c478bd9Sstevel@tonic-gate 	new->ks_data = NULL;
1465*7c478bd9Sstevel@tonic-gate 	new->ks_ctx = NULL;
1466*7c478bd9Sstevel@tonic-gate 
1467*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < new->ks_nreqs; i++) {
1468*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_config = NULL;
1469*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_index = set->ks_req[i].kr_index;
1470*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum;
1471*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_picp = NULL;
1472*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_data = NULL;
1473*7c478bd9Sstevel@tonic-gate 		(void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event,
1474*7c478bd9Sstevel@tonic-gate 		    CPC_MAX_EVENT_LEN);
1475*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_preset = set->ks_req[i].kr_preset;
1476*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_flags = set->ks_req[i].kr_flags;
1477*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs;
1478*7c478bd9Sstevel@tonic-gate 		new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs *
1479*7c478bd9Sstevel@tonic-gate 		    sizeof (kcpc_attr_t), KM_SLEEP);
1480*7c478bd9Sstevel@tonic-gate 		for (j = 0; j < new->ks_req[i].kr_nattrs; j++) {
1481*7c478bd9Sstevel@tonic-gate 			new->ks_req[i].kr_attr[j].ka_val =
1482*7c478bd9Sstevel@tonic-gate 			    set->ks_req[i].kr_attr[j].ka_val;
1483*7c478bd9Sstevel@tonic-gate 			(void) strncpy(new->ks_req[i].kr_attr[j].ka_name,
1484*7c478bd9Sstevel@tonic-gate 			    set->ks_req[i].kr_attr[j].ka_name,
1485*7c478bd9Sstevel@tonic-gate 			    CPC_MAX_ATTR_LEN);
1486*7c478bd9Sstevel@tonic-gate 		}
1487*7c478bd9Sstevel@tonic-gate 	}
1488*7c478bd9Sstevel@tonic-gate 
1489*7c478bd9Sstevel@tonic-gate 	return (new);
1490*7c478bd9Sstevel@tonic-gate }
1491*7c478bd9Sstevel@tonic-gate 
1492*7c478bd9Sstevel@tonic-gate int
1493*7c478bd9Sstevel@tonic-gate kcpc_allow_nonpriv(void *token)
1494*7c478bd9Sstevel@tonic-gate {
1495*7c478bd9Sstevel@tonic-gate 	return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV);
1496*7c478bd9Sstevel@tonic-gate }
1497*7c478bd9Sstevel@tonic-gate 
1498*7c478bd9Sstevel@tonic-gate void
1499*7c478bd9Sstevel@tonic-gate kcpc_invalidate(kthread_t *t)
1500*7c478bd9Sstevel@tonic-gate {
1501*7c478bd9Sstevel@tonic-gate 	kcpc_ctx_t *ctx = t->t_cpc_ctx;
1502*7c478bd9Sstevel@tonic-gate 
1503*7c478bd9Sstevel@tonic-gate 	if (ctx != NULL)
1504*7c478bd9Sstevel@tonic-gate 		atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID);
1505*7c478bd9Sstevel@tonic-gate }
1506*7c478bd9Sstevel@tonic-gate 
1507*7c478bd9Sstevel@tonic-gate /*
1508*7c478bd9Sstevel@tonic-gate  * Given a PCBE ID, attempt to load a matching PCBE module. The strings given
1509*7c478bd9Sstevel@tonic-gate  * are used to construct PCBE names, starting with the most specific,
1510*7c478bd9Sstevel@tonic-gate  * "pcbe.first.second.third.fourth" and ending with the least specific,
1511*7c478bd9Sstevel@tonic-gate  * "pcbe.first".
1512*7c478bd9Sstevel@tonic-gate  *
1513*7c478bd9Sstevel@tonic-gate  * Returns 0 if a PCBE was successfully loaded and -1 upon error.
1514*7c478bd9Sstevel@tonic-gate  */
1515*7c478bd9Sstevel@tonic-gate int
1516*7c478bd9Sstevel@tonic-gate kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third)
1517*7c478bd9Sstevel@tonic-gate {
1518*7c478bd9Sstevel@tonic-gate 	char	modname[PCBE_NAMELEN];
1519*7c478bd9Sstevel@tonic-gate 	char	stub[PCBE_NAMELEN];
1520*7c478bd9Sstevel@tonic-gate 
1521*7c478bd9Sstevel@tonic-gate 	if (prefix != NULL)
1522*7c478bd9Sstevel@tonic-gate 		(void) snprintf(stub, PCBE_NAMELEN, "pcbe.%s", prefix);
1523*7c478bd9Sstevel@tonic-gate 	else
1524*7c478bd9Sstevel@tonic-gate 		(void) snprintf(stub, PCBE_NAMELEN, "pcbe");
1525*7c478bd9Sstevel@tonic-gate 
1526*7c478bd9Sstevel@tonic-gate 	(void) snprintf(modname, PCBE_NAMELEN, "%s.%u.%u.%u",
1527*7c478bd9Sstevel@tonic-gate 	    stub, first, second, third);
1528*7c478bd9Sstevel@tonic-gate 
1529*7c478bd9Sstevel@tonic-gate 	DTRACE_PROBE1(kcpc__pcbe__spec, char *, modname);
1530*7c478bd9Sstevel@tonic-gate 
1531*7c478bd9Sstevel@tonic-gate 	if (modload("pcbe", modname) >= 0)
1532*7c478bd9Sstevel@tonic-gate 		return (0);
1533*7c478bd9Sstevel@tonic-gate 
1534*7c478bd9Sstevel@tonic-gate 	(void) snprintf(modname, PCBE_NAMELEN, "%s.%u.%u",
1535*7c478bd9Sstevel@tonic-gate 	    stub, first, second);
1536*7c478bd9Sstevel@tonic-gate 	if (modload("pcbe", modname) >= 0)
1537*7c478bd9Sstevel@tonic-gate 		return (0);
1538*7c478bd9Sstevel@tonic-gate 
1539*7c478bd9Sstevel@tonic-gate 	(void) snprintf(modname, PCBE_NAMELEN, "%s.%u", stub, first);
1540*7c478bd9Sstevel@tonic-gate 	if (modload("pcbe", modname) >= 0)
1541*7c478bd9Sstevel@tonic-gate 		return (0);
1542*7c478bd9Sstevel@tonic-gate 
1543*7c478bd9Sstevel@tonic-gate 	if (prefix == NULL)
1544*7c478bd9Sstevel@tonic-gate 		/*
1545*7c478bd9Sstevel@tonic-gate 		 * If no prefix was given, we have tried all possible
1546*7c478bd9Sstevel@tonic-gate 		 * PCBE names.
1547*7c478bd9Sstevel@tonic-gate 		 */
1548*7c478bd9Sstevel@tonic-gate 		return (-1);
1549*7c478bd9Sstevel@tonic-gate 
1550*7c478bd9Sstevel@tonic-gate 	(void) snprintf(modname, PCBE_NAMELEN, "%s", stub);
1551*7c478bd9Sstevel@tonic-gate 	if (modload("pcbe", modname) >= 0)
1552*7c478bd9Sstevel@tonic-gate 		return (0);
1553*7c478bd9Sstevel@tonic-gate 
1554*7c478bd9Sstevel@tonic-gate 	return (-1);
1555*7c478bd9Sstevel@tonic-gate }
1556