xref: /illumos-gate/usr/src/uts/common/os/dtrace_subr.c (revision 1b500975aaacf8b5d0e18c9a117bf5560069ffc3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2016 Joyent, Inc.
26  */
27 
28 #include <sys/dtrace.h>
29 #include <sys/cmn_err.h>
30 #include <sys/tnf.h>
31 #include <sys/atomic.h>
32 #include <sys/prsystm.h>
33 #include <sys/modctl.h>
34 #include <sys/aio_impl.h>
35 
36 #ifdef __sparc
37 #include <sys/privregs.h>
38 #endif
39 
40 void (*dtrace_cpu_init)(processorid_t);
41 void (*dtrace_modload)(struct modctl *);
42 void (*dtrace_modunload)(struct modctl *);
43 void (*dtrace_helpers_cleanup)(proc_t *);
44 void (*dtrace_helpers_fork)(proc_t *, proc_t *);
45 void (*dtrace_cpustart_init)(void);
46 void (*dtrace_cpustart_fini)(void);
47 void (*dtrace_cpc_fire)(uint64_t);
48 void (*dtrace_closef)(void);
49 
50 void (*dtrace_debugger_init)(void);
51 void (*dtrace_debugger_fini)(void);
52 
53 dtrace_vtime_state_t dtrace_vtime_active = 0;
54 dtrace_cacheid_t dtrace_predcache_id = DTRACE_CACHEIDNONE + 1;
55 
56 /*
57  * dtrace_cpc_in_use usage statement: this global variable is used by the cpc
58  * hardware overflow interrupt handler and the kernel cpc framework to check
59  * whether or not the DTrace cpc provider is currently in use. The variable is
60  * set before counters are enabled with the first enabling and cleared when
61  * the last enabling is disabled. Its value at any given time indicates the
62  * number of active dcpc based enablings. The global 'kcpc_cpuctx_lock' rwlock
63  * is held during initial setting to protect races between kcpc_open() and the
64  * first enabling. The locking provided by the DTrace subsystem, the kernel
65  * cpc framework and the cpu management framework protect consumers from race
66  * conditions on enabling and disabling probes.
67  */
68 uint32_t dtrace_cpc_in_use = 0;
69 
70 typedef struct dtrace_hrestime {
71 	lock_t		dthr_lock;		/* lock for this element */
72 	timestruc_t	dthr_hrestime;		/* hrestime value */
73 	int64_t		dthr_adj;		/* hrestime_adj value */
74 	hrtime_t	dthr_hrtime;		/* hrtime value */
75 } dtrace_hrestime_t;
76 
77 static dtrace_hrestime_t dtrace_hrestime[2];
78 
79 /*
80  * Making available adjustable high-resolution time in DTrace is regrettably
81  * more complicated than one might think it should be.  The problem is that
82  * the variables related to adjusted high-resolution time (hrestime,
83  * hrestime_adj and friends) are adjusted under hres_lock -- and this lock may
84  * be held when we enter probe context.  One might think that we could address
85  * this by having a single snapshot copy that is stored under a different lock
86  * from hres_tick(), using the snapshot iff hres_lock is locked in probe
87  * context.  Unfortunately, this too won't work:  because hres_lock is grabbed
88  * in more than just hres_tick() context, we could enter probe context
89  * concurrently on two different CPUs with both locks (hres_lock and the
90  * snapshot lock) held.  As this implies, the fundamental problem is that we
91  * need to have access to a snapshot of these variables that we _know_ will
92  * not be locked in probe context.  To effect this, we have two snapshots
93  * protected by two different locks, and we mandate that these snapshots are
94  * recorded in succession by a single thread calling dtrace_hres_tick().  (We
95  * assure this by calling it out of the same CY_HIGH_LEVEL cyclic that calls
96  * hres_tick().)  A single thread can't be in two places at once:  one of the
97  * snapshot locks is guaranteed to be unheld at all times.  The
98  * dtrace_gethrestime() algorithm is thus to check first one snapshot and then
99  * the other to find the unlocked snapshot.
100  */
101 void
102 dtrace_hres_tick(void)
103 {
104 	int i;
105 	ushort_t spl;
106 
107 	for (i = 0; i < 2; i++) {
108 		dtrace_hrestime_t tmp;
109 
110 		spl = hr_clock_lock();
111 		tmp.dthr_hrestime = hrestime;
112 		tmp.dthr_adj = hrestime_adj;
113 		tmp.dthr_hrtime = dtrace_gethrtime();
114 		hr_clock_unlock(spl);
115 
116 		lock_set(&dtrace_hrestime[i].dthr_lock);
117 		dtrace_hrestime[i].dthr_hrestime = tmp.dthr_hrestime;
118 		dtrace_hrestime[i].dthr_adj = tmp.dthr_adj;
119 		dtrace_hrestime[i].dthr_hrtime = tmp.dthr_hrtime;
120 		dtrace_membar_producer();
121 
122 		/*
123 		 * To allow for lock-free examination of this lock, we use
124 		 * the same trick that is used hres_lock; for more details,
125 		 * see the description of this technique in sun4u/sys/clock.h.
126 		 */
127 		dtrace_hrestime[i].dthr_lock++;
128 	}
129 }
130 
131 hrtime_t
132 dtrace_gethrestime(void)
133 {
134 	dtrace_hrestime_t snap;
135 	hrtime_t now;
136 	int i = 0, adj, nslt;
137 
138 	for (;;) {
139 		snap.dthr_lock = dtrace_hrestime[i].dthr_lock;
140 		dtrace_membar_consumer();
141 		snap.dthr_hrestime = dtrace_hrestime[i].dthr_hrestime;
142 		snap.dthr_hrtime = dtrace_hrestime[i].dthr_hrtime;
143 		snap.dthr_adj = dtrace_hrestime[i].dthr_adj;
144 		dtrace_membar_consumer();
145 
146 		if ((snap.dthr_lock & ~1) == dtrace_hrestime[i].dthr_lock)
147 			break;
148 
149 		/*
150 		 * If we're here, the lock was either locked, or it
151 		 * transitioned while we were taking the snapshot.  Either
152 		 * way, we're going to try the other dtrace_hrestime element;
153 		 * we know that it isn't possible for both to be locked
154 		 * simultaneously, so we will ultimately get a good snapshot.
155 		 */
156 		i ^= 1;
157 	}
158 
159 	/*
160 	 * We have a good snapshot.  Now perform any necessary adjustments.
161 	 */
162 	nslt = dtrace_gethrtime() - snap.dthr_hrtime;
163 	ASSERT(nslt >= 0);
164 
165 	now = ((hrtime_t)snap.dthr_hrestime.tv_sec * (hrtime_t)NANOSEC) +
166 	    snap.dthr_hrestime.tv_nsec;
167 
168 	if (snap.dthr_adj != 0) {
169 		if (snap.dthr_adj > 0) {
170 			adj = (nslt >> adj_shift);
171 			if (adj > snap.dthr_adj)
172 				adj = (int)snap.dthr_adj;
173 		} else {
174 			adj = -(nslt >> adj_shift);
175 			if (adj < snap.dthr_adj)
176 				adj = (int)snap.dthr_adj;
177 		}
178 		now += adj;
179 	}
180 
181 	return (now);
182 }
183 
184 void
185 dtrace_vtime_enable(void)
186 {
187 	dtrace_vtime_state_t state, nstate;
188 
189 	nstate = DTRACE_VTIME_INACTIVE;
190 	do {
191 		state = dtrace_vtime_active;
192 
193 		switch (state) {
194 		case DTRACE_VTIME_INACTIVE:
195 			nstate = DTRACE_VTIME_ACTIVE;
196 			break;
197 
198 		case DTRACE_VTIME_INACTIVE_TNF:
199 			nstate = DTRACE_VTIME_ACTIVE_TNF;
200 			break;
201 
202 		case DTRACE_VTIME_ACTIVE:
203 		case DTRACE_VTIME_ACTIVE_TNF:
204 			panic("DTrace virtual time already enabled");
205 			/*NOTREACHED*/
206 		}
207 
208 	} while	(atomic_cas_32((uint32_t *)&dtrace_vtime_active,
209 	    state, nstate) != state);
210 }
211 
212 void
213 dtrace_vtime_disable(void)
214 {
215 	dtrace_vtime_state_t state, nstate;
216 
217 	nstate = DTRACE_VTIME_INACTIVE;
218 	do {
219 		state = dtrace_vtime_active;
220 
221 		switch (state) {
222 		case DTRACE_VTIME_ACTIVE:
223 			nstate = DTRACE_VTIME_INACTIVE;
224 			break;
225 
226 		case DTRACE_VTIME_ACTIVE_TNF:
227 			nstate = DTRACE_VTIME_INACTIVE_TNF;
228 			break;
229 
230 		case DTRACE_VTIME_INACTIVE:
231 		case DTRACE_VTIME_INACTIVE_TNF:
232 			panic("DTrace virtual time already disabled");
233 			/*NOTREACHED*/
234 		}
235 
236 	} while	(atomic_cas_32((uint32_t *)&dtrace_vtime_active,
237 	    state, nstate) != state);
238 }
239 
240 void
241 dtrace_vtime_enable_tnf(void)
242 {
243 	dtrace_vtime_state_t state, nstate;
244 
245 	nstate = DTRACE_VTIME_INACTIVE;
246 	do {
247 		state = dtrace_vtime_active;
248 
249 		switch (state) {
250 		case DTRACE_VTIME_ACTIVE:
251 			nstate = DTRACE_VTIME_ACTIVE_TNF;
252 			break;
253 
254 		case DTRACE_VTIME_INACTIVE:
255 			nstate = DTRACE_VTIME_INACTIVE_TNF;
256 			break;
257 
258 		case DTRACE_VTIME_ACTIVE_TNF:
259 		case DTRACE_VTIME_INACTIVE_TNF:
260 			panic("TNF already active");
261 			/*NOTREACHED*/
262 		}
263 
264 	} while	(atomic_cas_32((uint32_t *)&dtrace_vtime_active,
265 	    state, nstate) != state);
266 }
267 
268 void
269 dtrace_vtime_disable_tnf(void)
270 {
271 	dtrace_vtime_state_t state, nstate;
272 
273 	nstate = DTRACE_VTIME_INACTIVE;
274 	do {
275 		state = dtrace_vtime_active;
276 
277 		switch (state) {
278 		case DTRACE_VTIME_ACTIVE_TNF:
279 			nstate = DTRACE_VTIME_ACTIVE;
280 			break;
281 
282 		case DTRACE_VTIME_INACTIVE_TNF:
283 			nstate = DTRACE_VTIME_INACTIVE;
284 			break;
285 
286 		case DTRACE_VTIME_ACTIVE:
287 		case DTRACE_VTIME_INACTIVE:
288 			panic("TNF already inactive");
289 			/*NOTREACHED*/
290 		}
291 
292 	} while	(atomic_cas_32((uint32_t *)&dtrace_vtime_active,
293 	    state, nstate) != state);
294 }
295 
296 void
297 dtrace_vtime_switch(kthread_t *next)
298 {
299 	dtrace_icookie_t cookie;
300 	hrtime_t ts;
301 
302 	if (tnf_tracing_active) {
303 		tnf_thread_switch(next);
304 
305 		if (dtrace_vtime_active == DTRACE_VTIME_INACTIVE_TNF)
306 			return;
307 	}
308 
309 	cookie = dtrace_interrupt_disable();
310 	ts = dtrace_gethrtime();
311 
312 	if (curthread->t_dtrace_start != 0) {
313 		curthread->t_dtrace_vtime += ts - curthread->t_dtrace_start;
314 		curthread->t_dtrace_start = 0;
315 	}
316 
317 	next->t_dtrace_start = ts;
318 
319 	dtrace_interrupt_enable(cookie);
320 }
321 
322 void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *);
323 void (*dtrace_fasttrap_exec_ptr)(proc_t *);
324 void (*dtrace_fasttrap_exit_ptr)(proc_t *);
325 
326 /*
327  * This function is called by cfork() in the event that it appears that
328  * there may be dtrace tracepoints active in the parent process's address
329  * space. This first confirms the existence of dtrace tracepoints in the
330  * parent process and calls into the fasttrap module to remove the
331  * corresponding tracepoints from the child. By knowing that there are
332  * existing tracepoints, and ensuring they can't be removed, we can rely
333  * on the fasttrap module remaining loaded.
334  */
335 void
336 dtrace_fasttrap_fork(proc_t *p, proc_t *cp)
337 {
338 	ASSERT(p->p_proc_flag & P_PR_LOCK);
339 	ASSERT(p->p_dtrace_count > 0);
340 	ASSERT(dtrace_fasttrap_fork_ptr != NULL);
341 
342 	dtrace_fasttrap_fork_ptr(p, cp);
343 }
344