xref: /illumos-gate/usr/src/lib/libdtrace/common/dt_proc.c (revision 6c9bfa0b39999e3f2c9448ede1e4cbd8bfaca728)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * DTrace Process Control
31  *
32  * This file provides a set of routines that permit libdtrace and its clients
33  * to create and grab process handles using libproc, and to share these handles
34  * between library mechanisms that need libproc access, such as ustack(), and
35  * client mechanisms that need libproc access, such as dtrace(1M) -c and -p.
36  * The library provides several mechanisms in the libproc control layer:
37  *
38  * Reference Counting: The library code and client code can independently grab
39  * the same process handles without interfering with one another.  Only when
40  * the reference count drops to zero and the handle is not being cached (see
41  * below for more information on caching) will Prelease() be called on it.
42  *
43  * Handle Caching: If a handle is grabbed PGRAB_RDONLY (e.g. by ustack()) and
44  * the reference count drops to zero, the handle is not immediately released.
45  * Instead, libproc handles are maintained on dph_lrulist in order from most-
46  * recently accessed to least-recently accessed.  Idle handles are maintained
47  * until a pre-defined LRU cache limit is exceeded, permitting repeated calls
48  * to ustack() to avoid the overhead of releasing and re-grabbing processes.
49  *
50  * Process Control: For processes that are grabbed for control (~PGRAB_RDONLY)
51  * or created by dt_proc_create(), a control thread is created to provide
52  * callbacks on process exit and symbol table caching on dlopen()s.
53  *
54  * MT-Safety: Libproc is not MT-Safe, so dt_proc_lock() and dt_proc_unlock()
55  * are provided to synchronize access to the libproc handle between libdtrace
56  * code and client code and the control thread's use of the ps_prochandle.
57  *
58  * NOTE: MT-Safety is NOT provided for libdtrace itself, or for use of the
59  * dtrace_proc_grab/dtrace_proc_create mechanisms.  Like all exported libdtrace
60  * calls, these are assumed to be MT-Unsafe.  MT-Safety is ONLY provided for
61  * synchronization between libdtrace control threads and the client thread.
62  *
63  * The ps_prochandles themselves are maintained along with a dt_proc_t struct
64  * in a hash table indexed by PID.  This provides basic locking and reference
65  * counting.  The dt_proc_t is also maintained in LRU order on dph_lrulist.
66  * The dph_lrucnt and dph_lrulim count the number of cacheable processes and
67  * the current limit on the number of actively cached entries.
68  *
69  * The control thread for a process establishes breakpoints at the rtld_db
70  * locations of interest, updates mappings and symbol tables at these points,
71  * and handles exec and fork (by always following the parent).  The control
72  * thread automatically exits when the process dies or control is lost.
73  *
74  * A simple notification mechanism is provided for libdtrace clients using
75  * dtrace_handle_proc() for notification of PS_UNDEAD or PS_LOST events.  If
76  * such an event occurs, the dt_proc_t itself is enqueued on a notification
77  * list and the control thread broadcasts to dph_cv.  dtrace_sleep() will wake
78  * up using this condition and will then call the client handler as necessary.
79  */
80 
81 #include <sys/wait.h>
82 #include <sys/lwp.h>
83 #include <strings.h>
84 #include <signal.h>
85 #include <assert.h>
86 #include <errno.h>
87 
88 #include <dt_proc.h>
89 #include <dt_pid.h>
90 #include <dt_impl.h>
91 
92 #define	IS_SYS_EXEC(w)	(w == SYS_exec || w == SYS_execve)
93 #define	IS_SYS_FORK(w)	(w == SYS_vfork || w == SYS_fork1 || w == SYS_forkall)
94 
95 static dt_bkpt_t *
96 dt_proc_bpcreate(dt_proc_t *dpr, uintptr_t addr, dt_bkpt_f *func, void *data)
97 {
98 	struct ps_prochandle *P = dpr->dpr_proc;
99 	dt_bkpt_t *dbp;
100 
101 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
102 
103 	if ((dbp = dt_zalloc(dpr->dpr_hdl, sizeof (dt_bkpt_t))) != NULL) {
104 		dbp->dbp_func = func;
105 		dbp->dbp_data = data;
106 		dbp->dbp_addr = addr;
107 
108 		if (Psetbkpt(P, dbp->dbp_addr, &dbp->dbp_instr) == 0)
109 			dbp->dbp_active = B_TRUE;
110 
111 		dt_list_append(&dpr->dpr_bps, dbp);
112 	}
113 
114 	return (dbp);
115 }
116 
117 static void
118 dt_proc_bpdestroy(dt_proc_t *dpr, int delbkpts)
119 {
120 	int state = Pstate(dpr->dpr_proc);
121 	dt_bkpt_t *dbp, *nbp;
122 
123 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
124 
125 	for (dbp = dt_list_next(&dpr->dpr_bps); dbp != NULL; dbp = nbp) {
126 		if (delbkpts && dbp->dbp_active &&
127 		    state != PS_LOST && state != PS_UNDEAD) {
128 			(void) Pdelbkpt(dpr->dpr_proc,
129 			    dbp->dbp_addr, dbp->dbp_instr);
130 		}
131 		nbp = dt_list_next(dbp);
132 		dt_list_delete(&dpr->dpr_bps, dbp);
133 		dt_free(dpr->dpr_hdl, dbp);
134 	}
135 }
136 
137 static void
138 dt_proc_bpmatch(dtrace_hdl_t *dtp, dt_proc_t *dpr)
139 {
140 	const lwpstatus_t *psp = &Pstatus(dpr->dpr_proc)->pr_lwp;
141 	dt_bkpt_t *dbp;
142 
143 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
144 
145 	for (dbp = dt_list_next(&dpr->dpr_bps);
146 	    dbp != NULL; dbp = dt_list_next(dbp)) {
147 		if (psp->pr_reg[R_PC] == dbp->dbp_addr)
148 			break;
149 	}
150 
151 	if (dbp == NULL) {
152 		dt_dprintf("pid %d: spurious breakpoint wakeup for %lx\n",
153 		    (int)dpr->dpr_pid, (ulong_t)psp->pr_reg[R_PC]);
154 		return;
155 	}
156 
157 	dt_dprintf("pid %d: hit breakpoint at %lx (%lu)\n",
158 	    (int)dpr->dpr_pid, (ulong_t)dbp->dbp_addr, ++dbp->dbp_hits);
159 
160 	dbp->dbp_func(dtp, dpr, dbp->dbp_data);
161 	(void) Pxecbkpt(dpr->dpr_proc, dbp->dbp_instr);
162 }
163 
164 static void
165 dt_proc_bpenable(dt_proc_t *dpr)
166 {
167 	dt_bkpt_t *dbp;
168 
169 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
170 
171 	for (dbp = dt_list_next(&dpr->dpr_bps);
172 	    dbp != NULL; dbp = dt_list_next(dbp)) {
173 		if (!dbp->dbp_active && Psetbkpt(dpr->dpr_proc,
174 		    dbp->dbp_addr, &dbp->dbp_instr) == 0)
175 			dbp->dbp_active = B_TRUE;
176 	}
177 }
178 
179 static void
180 dt_proc_bpdisable(dt_proc_t *dpr)
181 {
182 	dt_bkpt_t *dbp;
183 
184 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
185 
186 	for (dbp = dt_list_next(&dpr->dpr_bps);
187 	    dbp != NULL; dbp = dt_list_next(dbp)) {
188 		if (dbp->dbp_active && Pdelbkpt(dpr->dpr_proc,
189 		    dbp->dbp_addr, dbp->dbp_instr) == 0)
190 			dbp->dbp_active = B_FALSE;
191 	}
192 }
193 
194 /*
195  * Check to see if the control thread was requested to stop when the victim
196  * process reached a particular event (why) rather than continuing the victim.
197  * If 'why' is set in the stop mask, we wait on dpr_cv for dt_proc_continue().
198  * If 'why' is not set, this function returns immediately and does nothing.
199  */
200 static void
201 dt_proc_stop(dt_proc_t *dpr, uint8_t why)
202 {
203 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
204 	assert(why != DT_PROC_STOP_IDLE);
205 
206 	if (dpr->dpr_stop & why) {
207 		dpr->dpr_stop |= DT_PROC_STOP_IDLE;
208 		dpr->dpr_stop &= ~why;
209 
210 		(void) pthread_cond_broadcast(&dpr->dpr_cv);
211 
212 		while (dpr->dpr_stop & DT_PROC_STOP_IDLE)
213 			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
214 	}
215 }
216 
217 /*ARGSUSED*/
218 static void
219 dt_proc_bpmain(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *fname)
220 {
221 	dt_dprintf("pid %d: breakpoint at %s()\n", (int)dpr->dpr_pid, fname);
222 	dt_proc_stop(dpr, DT_PROC_STOP_MAIN);
223 }
224 
225 static void
226 dt_proc_rdevent(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *evname)
227 {
228 	rd_event_msg_t rdm;
229 	rd_err_e err;
230 
231 	if ((err = rd_event_getmsg(dpr->dpr_rtld, &rdm)) != RD_OK) {
232 		dt_dprintf("pid %d: failed to get %s event message: %s\n",
233 		    (int)dpr->dpr_pid, evname, rd_errstr(err));
234 		return;
235 	}
236 
237 	dt_dprintf("pid %d: rtld event %s type=%d state %d\n",
238 	    (int)dpr->dpr_pid, evname, rdm.type, rdm.u.state);
239 
240 	switch (rdm.type) {
241 	case RD_DLACTIVITY:
242 		if (rdm.u.state == RD_CONSISTENT) {
243 			Pupdate_syms(dpr->dpr_proc);
244 			dt_proc_bpdisable(dpr);
245 			dt_pid_create_probes_module(dtp, dpr);
246 			dt_proc_bpenable(dpr);
247 		}
248 		break;
249 	case RD_PREINIT:
250 		Pupdate_syms(dpr->dpr_proc);
251 		dt_proc_stop(dpr, DT_PROC_STOP_PREINIT);
252 		break;
253 	case RD_POSTINIT:
254 		Pupdate_syms(dpr->dpr_proc);
255 		dt_proc_stop(dpr, DT_PROC_STOP_POSTINIT);
256 		break;
257 	}
258 }
259 
260 static void
261 dt_proc_rdwatch(dt_proc_t *dpr, rd_event_e event, const char *evname)
262 {
263 	rd_notify_t rdn;
264 	rd_err_e err;
265 
266 	if ((err = rd_event_addr(dpr->dpr_rtld, event, &rdn)) != RD_OK) {
267 		dt_dprintf("pid %d: failed to get event address for %s: %s\n",
268 		    (int)dpr->dpr_pid, evname, rd_errstr(err));
269 		return;
270 	}
271 
272 	if (rdn.type != RD_NOTIFY_BPT) {
273 		dt_dprintf("pid %d: event %s has unexpected type %d\n",
274 		    (int)dpr->dpr_pid, evname, rdn.type);
275 		return;
276 	}
277 
278 	(void) dt_proc_bpcreate(dpr, rdn.u.bptaddr,
279 	    (dt_bkpt_f *)dt_proc_rdevent, (void *)evname);
280 }
281 
282 /*
283  * Common code for enabling events associated with the run-time linker after
284  * attaching to a process or after a victim process completes an exec(2).
285  */
286 static void
287 dt_proc_attach(dt_proc_t *dpr, int exec)
288 {
289 	const pstatus_t *psp = Pstatus(dpr->dpr_proc);
290 	rd_err_e err;
291 	GElf_Sym sym;
292 
293 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
294 
295 	if (exec) {
296 		if (psp->pr_lwp.pr_errno != 0)
297 			return; /* exec failed: nothing needs to be done */
298 
299 		dt_proc_bpdestroy(dpr, B_FALSE);
300 		Preset_maps(dpr->dpr_proc);
301 	}
302 
303 	if ((dpr->dpr_rtld = Prd_agent(dpr->dpr_proc)) != NULL &&
304 	    (err = rd_event_enable(dpr->dpr_rtld, B_TRUE)) == RD_OK) {
305 		dt_proc_rdwatch(dpr, RD_PREINIT, "RD_PREINIT");
306 		dt_proc_rdwatch(dpr, RD_POSTINIT, "RD_POSTINIT");
307 		dt_proc_rdwatch(dpr, RD_DLACTIVITY, "RD_DLACTIVITY");
308 	} else {
309 		dt_dprintf("pid %d: failed to enable rtld events: %s\n",
310 		    (int)dpr->dpr_pid, dpr->dpr_rtld ? rd_errstr(err) :
311 		    "rtld_db agent initialization failed");
312 	}
313 
314 	Pupdate_maps(dpr->dpr_proc);
315 
316 	if (Pxlookup_by_name(dpr->dpr_proc, LM_ID_BASE,
317 	    "a.out", "main", &sym, NULL) == 0) {
318 		(void) dt_proc_bpcreate(dpr, (uintptr_t)sym.st_value,
319 		    (dt_bkpt_f *)dt_proc_bpmain, "a.out`main");
320 	} else {
321 		dt_dprintf("pid %d: failed to find a.out`main: %s\n",
322 		    (int)dpr->dpr_pid, strerror(errno));
323 	}
324 }
325 
326 /*
327  * Wait for a stopped process to be set running again by some other debugger.
328  * This is typically not required by /proc-based debuggers, since the usual
329  * model is that one debugger controls one victim.  But DTrace, as usual, has
330  * its own needs: the stop() action assumes that prun(1) or some other tool
331  * will be applied to resume the victim process.  This could be solved by
332  * adding a PCWRUN directive to /proc, but that seems like overkill unless
333  * other debuggers end up needing this functionality, so we implement a cheap
334  * equivalent to PCWRUN using the set of existing kernel mechanisms.
335  *
336  * Our intent is really not just to wait for the victim to run, but rather to
337  * wait for it to run and then stop again for a reason other than the current
338  * PR_REQUESTED stop.  Since PCWSTOP/Pstopstatus() can be applied repeatedly
339  * to a stopped process and will return the same result without affecting the
340  * victim, we can just perform these operations repeatedly until Pstate()
341  * changes, the representative LWP ID changes, or the stop timestamp advances.
342  * dt_proc_control() will then rediscover the new state and continue as usual.
343  * When the process is still stopped in the same exact state, we sleep for a
344  * brief interval before waiting again so as not to spin consuming CPU cycles.
345  */
346 static void
347 dt_proc_waitrun(dt_proc_t *dpr)
348 {
349 	struct ps_prochandle *P = dpr->dpr_proc;
350 	const lwpstatus_t *psp = &Pstatus(P)->pr_lwp;
351 
352 	int krflag = psp->pr_flags & (PR_KLC | PR_RLC);
353 	timestruc_t tstamp = psp->pr_tstamp;
354 	lwpid_t lwpid = psp->pr_lwpid;
355 
356 	const long wstop = PCWSTOP;
357 	int pfd = Pctlfd(P);
358 
359 	assert(DT_MUTEX_HELD(&dpr->dpr_lock));
360 	assert(psp->pr_flags & PR_STOPPED);
361 	assert(Pstate(P) == PS_STOP);
362 
363 	/*
364 	 * While we are waiting for the victim to run, clear PR_KLC and PR_RLC
365 	 * so that if the libdtrace client is killed, the victim stays stopped.
366 	 * dt_proc_destroy() will also observe this and perform PRELEASE_HANG.
367 	 */
368 	(void) Punsetflags(P, krflag);
369 	Psync(P);
370 
371 	(void) pthread_mutex_unlock(&dpr->dpr_lock);
372 
373 	while (!dpr->dpr_quit) {
374 		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
375 			continue; /* check dpr_quit and continue waiting */
376 
377 		(void) pthread_mutex_lock(&dpr->dpr_lock);
378 		(void) Pstopstatus(P, PCNULL, 0);
379 		psp = &Pstatus(P)->pr_lwp;
380 
381 		/*
382 		 * If we've reached a new state, found a new representative, or
383 		 * the stop timestamp has changed, restore PR_KLC/PR_RLC to its
384 		 * original setting and then return with dpr_lock held.
385 		 */
386 		if (Pstate(P) != PS_STOP || psp->pr_lwpid != lwpid ||
387 		    bcmp(&psp->pr_tstamp, &tstamp, sizeof (tstamp)) != 0) {
388 			(void) Psetflags(P, krflag);
389 			Psync(P);
390 			return;
391 		}
392 
393 		(void) pthread_mutex_unlock(&dpr->dpr_lock);
394 		(void) poll(NULL, 0, MILLISEC / 2);
395 	}
396 
397 	(void) pthread_mutex_lock(&dpr->dpr_lock);
398 }
399 
400 typedef struct dt_proc_control_data {
401 	dtrace_hdl_t *dpcd_hdl;			/* DTrace handle */
402 	dt_proc_t *dpcd_proc;			/* proccess to control */
403 } dt_proc_control_data_t;
404 
405 /*
406  * Main loop for all victim process control threads.  We initialize all the
407  * appropriate /proc control mechanisms, and then enter a loop waiting for
408  * the process to stop on an event or die.  We process any events by calling
409  * appropriate subroutines, and exit when the victim dies or we lose control.
410  *
411  * The control thread synchronizes the use of dpr_proc with other libdtrace
412  * threads using dpr_lock.  We hold the lock for all of our operations except
413  * waiting while the process is running: this is accomplished by writing a
414  * PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.  If the
415  * libdtrace client wishes to exit or abort our wait, SIGCANCEL can be used.
416  */
417 static void *
418 dt_proc_control(void *arg)
419 {
420 	dt_proc_control_data_t *datap = arg;
421 	dtrace_hdl_t *dtp = datap->dpcd_hdl;
422 	dt_proc_t *dpr = datap->dpcd_proc;
423 	dt_proc_hash_t *dph = dpr->dpr_hdl->dt_procs;
424 	struct ps_prochandle *P = dpr->dpr_proc;
425 
426 	int pfd = Pctlfd(P);
427 	int pid = dpr->dpr_pid;
428 
429 	const long wstop = PCWSTOP;
430 	int notify = B_FALSE;
431 
432 	/*
433 	 * We disable the POSIX thread cancellation mechanism so that the
434 	 * client program using libdtrace can't accidentally cancel our thread.
435 	 * dt_proc_destroy() uses SIGCANCEL explicitly to simply poke us out
436 	 * of PCWSTOP with EINTR, at which point we will see dpr_quit and exit.
437 	 */
438 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
439 
440 	/*
441 	 * Set up the corresponding process for tracing by libdtrace.  We want
442 	 * to be able to catch breakpoints and efficiently single-step over
443 	 * them, and we need to enable librtld_db to watch libdl activity.
444 	 */
445 	(void) pthread_mutex_lock(&dpr->dpr_lock);
446 
447 	(void) Punsetflags(P, PR_ASYNC);	/* require synchronous mode */
448 	(void) Psetflags(P, PR_BPTADJ);		/* always adjust eip on x86 */
449 	(void) Punsetflags(P, PR_FORK);		/* do not inherit on fork */
450 
451 	(void) Pfault(P, FLTBPT, B_TRUE);	/* always trace breakpoints */
452 	(void) Pfault(P, FLTTRACE, B_TRUE);	/* always trace single-step */
453 
454 	/*
455 	 * We must trace exit from exec() system calls so that if the exec is
456 	 * successful, we can reset our breakpoints and re-initialize libproc.
457 	 */
458 	(void) Psysexit(P, SYS_exec, B_TRUE);
459 	(void) Psysexit(P, SYS_execve, B_TRUE);
460 
461 	/*
462 	 * We must trace entry and exit for fork() system calls in order to
463 	 * disable our breakpoints temporarily during the fork.  We do not set
464 	 * the PR_FORK flag, so if fork succeeds the child begins executing and
465 	 * does not inherit any other tracing behaviors or a control thread.
466 	 */
467 	(void) Psysentry(P, SYS_vfork, B_TRUE);
468 	(void) Psysexit(P, SYS_vfork, B_TRUE);
469 	(void) Psysentry(P, SYS_fork1, B_TRUE);
470 	(void) Psysexit(P, SYS_fork1, B_TRUE);
471 	(void) Psysentry(P, SYS_forkall, B_TRUE);
472 	(void) Psysexit(P, SYS_forkall, B_TRUE);
473 
474 	Psync(P);				/* enable all /proc changes */
475 	dt_proc_attach(dpr, B_FALSE);		/* enable rtld breakpoints */
476 
477 	/*
478 	 * If PR_KLC is set, we created the process; otherwise we grabbed it.
479 	 * Check for an appropriate stop request and wait for dt_proc_continue.
480 	 */
481 	if (Pstatus(P)->pr_flags & PR_KLC)
482 		dt_proc_stop(dpr, DT_PROC_STOP_CREATE);
483 	else
484 		dt_proc_stop(dpr, DT_PROC_STOP_GRAB);
485 
486 	if (Psetrun(P, 0, 0) == -1) {
487 		dt_dprintf("pid %d: failed to set running: %s\n",
488 		    (int)dpr->dpr_pid, strerror(errno));
489 	}
490 
491 	(void) pthread_mutex_unlock(&dpr->dpr_lock);
492 
493 	/*
494 	 * Wait for the process corresponding to this control thread to stop,
495 	 * process the event, and then set it running again.  We want to sleep
496 	 * with dpr_lock *unheld* so that other parts of libdtrace can use the
497 	 * ps_prochandle in the meantime (e.g. ustack()).  To do this, we write
498 	 * a PCWSTOP directive directly to the underlying /proc/<pid>/ctl file.
499 	 * Once the process stops, we wake up, grab dpr_lock, and then call
500 	 * Pwait() (which will return immediately) and do our processing.
501 	 */
502 	while (!dpr->dpr_quit) {
503 		const lwpstatus_t *psp;
504 
505 		if (write(pfd, &wstop, sizeof (wstop)) == -1 && errno == EINTR)
506 			continue; /* check dpr_quit and continue waiting */
507 
508 		(void) pthread_mutex_lock(&dpr->dpr_lock);
509 pwait_locked:
510 		if (Pstopstatus(P, PCNULL, 0) == -1 && errno == EINTR) {
511 			(void) pthread_mutex_unlock(&dpr->dpr_lock);
512 			continue; /* check dpr_quit and continue waiting */
513 		}
514 
515 		switch (Pstate(P)) {
516 		case PS_STOP:
517 			psp = &Pstatus(P)->pr_lwp;
518 
519 			dt_dprintf("pid %d: proc stopped showing %d/%d\n",
520 			    pid, psp->pr_why, psp->pr_what);
521 
522 			/*
523 			 * If the process stops showing PR_REQUESTED, then the
524 			 * DTrace stop() action was applied to it or another
525 			 * debugging utility (e.g. pstop(1)) asked it to stop.
526 			 * In either case, the user's intention is for the
527 			 * process to remain stopped until another external
528 			 * mechanism (e.g. prun(1)) is applied.  So instead of
529 			 * setting the process running ourself, we wait for
530 			 * someone else to do so.  Once that happens, we return
531 			 * to our normal loop waiting for an event of interest.
532 			 */
533 			if (psp->pr_why == PR_REQUESTED) {
534 				dt_proc_waitrun(dpr);
535 				(void) pthread_mutex_unlock(&dpr->dpr_lock);
536 				continue;
537 			}
538 
539 			/*
540 			 * If the process stops showing one of the events that
541 			 * we are tracing, perform the appropriate response.
542 			 * Note that we ignore PR_SUSPENDED, PR_CHECKPOINT, and
543 			 * PR_JOBCONTROL by design: if one of these conditions
544 			 * occurs, we will fall through to Psetrun() but the
545 			 * process will remain stopped in the kernel by the
546 			 * corresponding mechanism (e.g. job control stop).
547 			 */
548 			if (psp->pr_why == PR_FAULTED && psp->pr_what == FLTBPT)
549 				dt_proc_bpmatch(dtp, dpr);
550 			else if (psp->pr_why == PR_SYSENTRY &&
551 			    IS_SYS_FORK(psp->pr_what))
552 				dt_proc_bpdisable(dpr);
553 			else if (psp->pr_why == PR_SYSEXIT &&
554 			    IS_SYS_FORK(psp->pr_what))
555 				dt_proc_bpenable(dpr);
556 			else if (psp->pr_why == PR_SYSEXIT &&
557 			    IS_SYS_EXEC(psp->pr_what))
558 				dt_proc_attach(dpr, B_TRUE);
559 			break;
560 
561 		case PS_LOST:
562 			if (Preopen(P) == 0)
563 				goto pwait_locked;
564 
565 			dt_dprintf("pid %d: proc lost: %s\n",
566 			    pid, strerror(errno));
567 
568 			dpr->dpr_quit = B_TRUE;
569 			notify = B_TRUE;
570 			break;
571 
572 		case PS_UNDEAD:
573 			dt_dprintf("pid %d: proc died\n", pid);
574 			dpr->dpr_quit = B_TRUE;
575 			notify = B_TRUE;
576 			break;
577 		}
578 
579 		if (Pstate(P) != PS_UNDEAD && Psetrun(P, 0, 0) == -1) {
580 			dt_dprintf("pid %d: failed to set running: %s\n",
581 			    (int)dpr->dpr_pid, strerror(errno));
582 		}
583 
584 		(void) pthread_mutex_unlock(&dpr->dpr_lock);
585 	}
586 
587 	/*
588 	 * If the control thread detected PS_UNDEAD or PS_LOST, then enqueue
589 	 * the dt_proc_t structure on the dt_proc_hash_t notification list.
590 	 */
591 	if (notify) {
592 		(void) pthread_mutex_lock(&dph->dph_lock);
593 
594 		dpr->dpr_notify = dph->dph_notify;
595 		dph->dph_notify = dpr;
596 
597 		(void) pthread_mutex_unlock(&dph->dph_lock);
598 		(void) pthread_cond_broadcast(&dph->dph_cv);
599 	}
600 
601 	/*
602 	 * Destroy and remove any remaining breakpoints, set dpr_done and clear
603 	 * dpr_tid to indicate the control thread has exited, and notify any
604 	 * waiting thread in dt_proc_destroy() that we have succesfully exited.
605 	 */
606 	(void) pthread_mutex_lock(&dpr->dpr_lock);
607 
608 	dt_proc_bpdestroy(dpr, B_TRUE);
609 	dpr->dpr_done = B_TRUE;
610 	dpr->dpr_tid = 0;
611 
612 	(void) pthread_mutex_unlock(&dpr->dpr_lock);
613 	(void) pthread_cond_broadcast(&dpr->dpr_cv);
614 
615 	return (NULL);
616 }
617 
618 /*PRINTFLIKE3*/
619 static struct ps_prochandle *
620 dt_proc_error(dtrace_hdl_t *dtp, dt_proc_t *dpr, const char *format, ...)
621 {
622 	va_list ap;
623 
624 	va_start(ap, format);
625 	dt_set_errmsg(dtp, NULL, NULL, NULL, 0, format, ap);
626 	va_end(ap);
627 
628 	if (dpr->dpr_proc != NULL)
629 		Prelease(dpr->dpr_proc, 0);
630 
631 	dt_free(dtp, dpr);
632 	(void) dt_set_errno(dtp, EDT_COMPILER);
633 	return (NULL);
634 }
635 
636 dt_proc_t *
637 dt_proc_lookup(dtrace_hdl_t *dtp, struct ps_prochandle *P, int remove)
638 {
639 	dt_proc_hash_t *dph = dtp->dt_procs;
640 	pid_t pid = Pstatus(P)->pr_pid;
641 	dt_proc_t *dpr, **dpp = &dph->dph_hash[pid & (dph->dph_hashlen - 1)];
642 
643 	for (dpr = *dpp; dpr != NULL; dpr = dpr->dpr_hash) {
644 		if (dpr->dpr_pid == pid)
645 			break;
646 		else
647 			dpp = &dpr->dpr_hash;
648 	}
649 
650 	assert(dpr != NULL);
651 	assert(dpr->dpr_proc == P);
652 
653 	if (remove)
654 		*dpp = dpr->dpr_hash; /* remove from pid hash chain */
655 
656 	return (dpr);
657 }
658 
659 static void
660 dt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P)
661 {
662 	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_TRUE);
663 	dt_proc_hash_t *dph = dtp->dt_procs;
664 	dt_proc_t *npr, **npp;
665 	int rflag;
666 
667 	assert(dpr != NULL);
668 
669 	/*
670 	 * If neither PR_KLC nor PR_RLC is set, then the process is stopped by
671 	 * an external debugger and we were waiting in dt_proc_waitrun().
672 	 * Leave the process in this condition using PRELEASE_HANG.
673 	 */
674 	if (!(Pstatus(dpr->dpr_proc)->pr_flags & (PR_KLC | PR_RLC))) {
675 		dt_dprintf("abandoning pid %d\n", (int)dpr->dpr_pid);
676 		rflag = PRELEASE_HANG;
677 	} else {
678 		dt_dprintf("releasing pid %d\n", (int)dpr->dpr_pid);
679 		rflag = 0; /* apply kill or run-on-last-close */
680 	}
681 
682 	if (dpr->dpr_tid) {
683 		/*
684 		 * Set the dpr_quit flag to tell the daemon thread to exit.  We
685 		 * send it a SIGCANCEL to poke it out of PCWSTOP or any other
686 		 * long-term /proc system call.  Our daemon threads have POSIX
687 		 * cancellation disabled, so EINTR will be the only effect.  We
688 		 * then wait for dpr_done to indicate the thread has exited.
689 		 *
690 		 * We can't use pthread_kill() to send SIGCANCEL because the
691 		 * interface forbids it and we can't use pthread_cancel()
692 		 * because with cancellation disabled it won't actually
693 		 * send SIGCANCEL to the target thread, so we use _lwp_kill()
694 		 * to do the job.  This is all built on evil knowledge of
695 		 * the details of the cancellation mechanism in libc.
696 		 */
697 		(void) pthread_mutex_lock(&dpr->dpr_lock);
698 		dpr->dpr_quit = B_TRUE;
699 		(void) _lwp_kill(dpr->dpr_tid, SIGCANCEL);
700 
701 		/*
702 		 * If the process is currently idling in dt_proc_stop(), re-
703 		 * enable breakpoints and poke it into running again.
704 		 */
705 		if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
706 			dt_proc_bpenable(dpr);
707 			dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
708 			(void) pthread_cond_broadcast(&dpr->dpr_cv);
709 		}
710 
711 		while (!dpr->dpr_done)
712 			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
713 
714 		(void) pthread_mutex_unlock(&dpr->dpr_lock);
715 	}
716 
717 	/*
718 	 * Before we free the process structure, walk the dt_proc_hash_t's
719 	 * notification list and remove this dt_proc_t if it is enqueued.
720 	 */
721 	(void) pthread_mutex_lock(&dph->dph_lock);
722 	npp = &dph->dph_notify;
723 
724 	for (npr = *npp; npr != NULL; npr = npr->dpr_notify) {
725 		if (npr != dpr)
726 			npp = &npr->dpr_notify;
727 		else
728 			break;
729 	}
730 
731 	if (npr != NULL) {
732 		*npp = npr->dpr_notify;
733 		npr->dpr_notify = NULL;
734 	}
735 
736 	(void) pthread_mutex_unlock(&dph->dph_lock);
737 
738 	/*
739 	 * Remove the dt_proc_list from the LRU list, release the underlying
740 	 * libproc handle, and free our dt_proc_t data structure.
741 	 */
742 	if (dpr->dpr_cacheable) {
743 		assert(dph->dph_lrucnt != 0);
744 		dph->dph_lrucnt--;
745 	}
746 
747 	dt_list_delete(&dph->dph_lrulist, dpr);
748 	Prelease(dpr->dpr_proc, rflag);
749 	dt_free(dtp, dpr);
750 }
751 
752 static int
753 dt_proc_create_thread(dtrace_hdl_t *dtp, dt_proc_t *dpr, uint_t stop)
754 {
755 	dt_proc_control_data_t data;
756 	sigset_t nset, oset;
757 	pthread_attr_t a;
758 	int err;
759 
760 	(void) pthread_mutex_lock(&dpr->dpr_lock);
761 	dpr->dpr_stop |= stop; /* set bit for initial rendezvous */
762 
763 	(void) pthread_attr_init(&a);
764 	(void) pthread_attr_setdetachstate(&a, PTHREAD_CREATE_DETACHED);
765 
766 	(void) sigfillset(&nset);
767 	(void) sigdelset(&nset, SIGABRT);	/* unblocked for assert() */
768 	(void) sigdelset(&nset, SIGCANCEL);	/* see dt_proc_destroy() */
769 
770 	data.dpcd_hdl = dtp;
771 	data.dpcd_proc = dpr;
772 
773 	(void) pthread_sigmask(SIG_SETMASK, &nset, &oset);
774 	err = pthread_create(&dpr->dpr_tid, &a, dt_proc_control, &data);
775 	(void) pthread_sigmask(SIG_SETMASK, &oset, NULL);
776 
777 	/*
778 	 * If the control thread was created, then wait on dpr_cv for either
779 	 * dpr_done to be set (the victim died or the control thread failed)
780 	 * or DT_PROC_STOP_IDLE to be set, indicating that the victim is now
781 	 * stopped by /proc and the control thread is at the rendezvous event.
782 	 * On success, we return with the process and control thread stopped:
783 	 * the caller can then apply dt_proc_continue() to resume both.
784 	 */
785 	if (err == 0) {
786 		while (!dpr->dpr_done && !(dpr->dpr_stop & DT_PROC_STOP_IDLE))
787 			(void) pthread_cond_wait(&dpr->dpr_cv, &dpr->dpr_lock);
788 
789 		/*
790 		 * If dpr_done is set, the control thread aborted before it
791 		 * reached the rendezvous event.  This is either due to PS_LOST
792 		 * or PS_UNDEAD (i.e. the process died).  We try to provide a
793 		 * small amount of useful information to help figure it out.
794 		 */
795 		if (dpr->dpr_done) {
796 			const psinfo_t *prp = Ppsinfo(dpr->dpr_proc);
797 			int stat = prp ? prp->pr_wstat : 0;
798 			int pid = dpr->dpr_pid;
799 
800 			if (Pstate(dpr->dpr_proc) == PS_LOST) {
801 				(void) dt_proc_error(dpr->dpr_hdl, dpr,
802 				    "failed to control pid %d: process exec'd "
803 				    "set-id or unobservable program\n", pid);
804 			} else if (WIFSIGNALED(stat)) {
805 				(void) dt_proc_error(dpr->dpr_hdl, dpr,
806 				    "failed to control pid %d: process died "
807 				    "from signal %d\n", pid, WTERMSIG(stat));
808 			} else {
809 				(void) dt_proc_error(dpr->dpr_hdl, dpr,
810 				    "failed to control pid %d: process exited "
811 				    "with status %d\n", pid, WEXITSTATUS(stat));
812 			}
813 
814 			err = ESRCH; /* cause grab() or create() to fail */
815 		} else {
816 			/*
817 			 * Disable breakpoints while the process is stopped so
818 			 * the pid provider can correctly disassemble all
819 			 * functions.
820 			 */
821 			dt_proc_bpdisable(dpr);
822 		}
823 
824 	} else {
825 		(void) dt_proc_error(dpr->dpr_hdl, dpr,
826 		    "failed to create control thread for process-id %d: %s\n",
827 		    (int)dpr->dpr_pid, strerror(err));
828 	}
829 
830 	(void) pthread_mutex_unlock(&dpr->dpr_lock);
831 	(void) pthread_attr_destroy(&a);
832 
833 	return (err);
834 }
835 
836 struct ps_prochandle *
837 dt_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv)
838 {
839 	dt_proc_hash_t *dph = dtp->dt_procs;
840 	dt_proc_t *dpr;
841 	int err;
842 
843 	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
844 		return (NULL); /* errno is set for us */
845 
846 	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
847 	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
848 
849 	if ((dpr->dpr_proc = Pcreate(file, argv, &err, NULL, 0)) == NULL) {
850 		return (dt_proc_error(dtp, dpr,
851 		    "failed to execute %s: %s\n", file, Pcreate_error(err)));
852 	}
853 
854 	dpr->dpr_hdl = dtp;
855 	dpr->dpr_pid = Pstatus(dpr->dpr_proc)->pr_pid;
856 
857 	(void) Punsetflags(dpr->dpr_proc, PR_RLC);
858 	(void) Psetflags(dpr->dpr_proc, PR_KLC);
859 
860 	if (dt_proc_create_thread(dtp, dpr, dtp->dt_prcmode) != 0)
861 		return (NULL); /* dt_proc_error() has been called for us */
862 
863 	dpr->dpr_hash = dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)];
864 	dph->dph_hash[dpr->dpr_pid & (dph->dph_hashlen - 1)] = dpr;
865 	dt_list_prepend(&dph->dph_lrulist, dpr);
866 
867 	dt_dprintf("created pid %d\n", (int)dpr->dpr_pid);
868 	dpr->dpr_refs++;
869 
870 	return (dpr->dpr_proc);
871 }
872 
873 struct ps_prochandle *
874 dt_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags, int nomonitor)
875 {
876 	dt_proc_hash_t *dph = dtp->dt_procs;
877 	uint_t h = pid & (dph->dph_hashlen - 1);
878 	dt_proc_t *dpr, *opr;
879 	int err;
880 
881 	/*
882 	 * Search the hash table for the pid.  If it is already grabbed or
883 	 * created, move the handle to the front of the lrulist, increment
884 	 * the reference count, and return the existing ps_prochandle.
885 	 */
886 	for (dpr = dph->dph_hash[h]; dpr != NULL; dpr = dpr->dpr_hash) {
887 		if (dpr->dpr_pid == pid && !dpr->dpr_stale) {
888 			/*
889 			 * If the cached handle was opened read-only and
890 			 * this request is for a writeable handle, mark
891 			 * the cached handle as stale and open a new handle.
892 			 * Since it's stale, unmark it as cacheable.
893 			 */
894 			if (dpr->dpr_rdonly && !(flags & PGRAB_RDONLY)) {
895 				dt_dprintf("upgrading pid %d\n", (int)pid);
896 				dpr->dpr_stale = B_TRUE;
897 				dpr->dpr_cacheable = B_FALSE;
898 				dph->dph_lrucnt--;
899 				break;
900 			}
901 
902 			dt_dprintf("grabbed pid %d (cached)\n", (int)pid);
903 			dt_list_delete(&dph->dph_lrulist, dpr);
904 			dt_list_prepend(&dph->dph_lrulist, dpr);
905 			dpr->dpr_refs++;
906 			return (dpr->dpr_proc);
907 		}
908 	}
909 
910 	if ((dpr = dt_zalloc(dtp, sizeof (dt_proc_t))) == NULL)
911 		return (NULL); /* errno is set for us */
912 
913 	(void) pthread_mutex_init(&dpr->dpr_lock, NULL);
914 	(void) pthread_cond_init(&dpr->dpr_cv, NULL);
915 
916 	if ((dpr->dpr_proc = Pgrab(pid, flags, &err)) == NULL) {
917 		return (dt_proc_error(dtp, dpr,
918 		    "failed to grab pid %d: %s\n", (int)pid, Pgrab_error(err)));
919 	}
920 
921 	dpr->dpr_hdl = dtp;
922 	dpr->dpr_pid = pid;
923 
924 	(void) Punsetflags(dpr->dpr_proc, PR_KLC);
925 	(void) Psetflags(dpr->dpr_proc, PR_RLC);
926 
927 	/*
928 	 * If we are attempting to grab the process without a monitor
929 	 * thread, then mark the process cacheable only if it's being
930 	 * grabbed read-only.  If we're currently caching more process
931 	 * handles than dph_lrulim permits, attempt to find the
932 	 * least-recently-used handle that is currently unreferenced and
933 	 * release it from the cache.  Otherwise we are grabbing the process
934 	 * for control: create a control thread for this process and store
935 	 * its ID in dpr->dpr_tid.
936 	 */
937 	if (nomonitor || (flags & PGRAB_RDONLY)) {
938 		if (dph->dph_lrucnt >= dph->dph_lrulim) {
939 			for (opr = dt_list_prev(&dph->dph_lrulist);
940 			    opr != NULL; opr = dt_list_prev(opr)) {
941 				if (opr->dpr_cacheable && opr->dpr_refs == 0) {
942 					dt_proc_destroy(dtp, opr->dpr_proc);
943 					break;
944 				}
945 			}
946 		}
947 
948 		if (flags & PGRAB_RDONLY) {
949 			dpr->dpr_cacheable = B_TRUE;
950 			dpr->dpr_rdonly = B_TRUE;
951 			dph->dph_lrucnt++;
952 		}
953 
954 	} else if (dt_proc_create_thread(dtp, dpr, DT_PROC_STOP_GRAB) != 0)
955 		return (NULL); /* dt_proc_error() has been called for us */
956 
957 	dpr->dpr_hash = dph->dph_hash[h];
958 	dph->dph_hash[h] = dpr;
959 	dt_list_prepend(&dph->dph_lrulist, dpr);
960 
961 	dt_dprintf("grabbed pid %d\n", (int)pid);
962 	dpr->dpr_refs++;
963 
964 	return (dpr->dpr_proc);
965 }
966 
967 void
968 dt_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
969 {
970 	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
971 	dt_proc_hash_t *dph = dtp->dt_procs;
972 
973 	assert(dpr != NULL);
974 	assert(dpr->dpr_refs != 0);
975 
976 	if (--dpr->dpr_refs == 0 &&
977 	    (!dpr->dpr_cacheable || dph->dph_lrucnt > dph->dph_lrulim))
978 		dt_proc_destroy(dtp, P);
979 }
980 
981 void
982 dt_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
983 {
984 	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
985 
986 	(void) pthread_mutex_lock(&dpr->dpr_lock);
987 
988 	if (dpr->dpr_stop & DT_PROC_STOP_IDLE) {
989 		/*
990 		 * Breakpoints are disabled while the process is stopped so
991 		 * the pid provider can correctly disassemble all functions.
992 		 */
993 		dt_proc_bpenable(dpr);
994 		dpr->dpr_stop &= ~DT_PROC_STOP_IDLE;
995 		(void) pthread_cond_broadcast(&dpr->dpr_cv);
996 	}
997 
998 	(void) pthread_mutex_unlock(&dpr->dpr_lock);
999 }
1000 
1001 void
1002 dt_proc_lock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1003 {
1004 	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1005 	int err = pthread_mutex_lock(&dpr->dpr_lock);
1006 	assert(err == 0); /* check for recursion */
1007 }
1008 
1009 void
1010 dt_proc_unlock(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1011 {
1012 	dt_proc_t *dpr = dt_proc_lookup(dtp, P, B_FALSE);
1013 	int err = pthread_mutex_unlock(&dpr->dpr_lock);
1014 	assert(err == 0); /* check for unheld lock */
1015 }
1016 
1017 void
1018 dt_proc_hash_create(dtrace_hdl_t *dtp)
1019 {
1020 	if ((dtp->dt_procs = dt_zalloc(dtp, sizeof (dt_proc_hash_t) +
1021 	    sizeof (dt_proc_t *) * _dtrace_pidbuckets - 1)) != NULL) {
1022 
1023 		(void) pthread_mutex_init(&dtp->dt_procs->dph_lock, NULL);
1024 		(void) pthread_cond_init(&dtp->dt_procs->dph_cv, NULL);
1025 
1026 		dtp->dt_procs->dph_hashlen = _dtrace_pidbuckets;
1027 		dtp->dt_procs->dph_lrulim = _dtrace_pidlrulim;
1028 	}
1029 }
1030 
1031 void
1032 dt_proc_hash_destroy(dtrace_hdl_t *dtp)
1033 {
1034 	dt_proc_hash_t *dph = dtp->dt_procs;
1035 	dt_proc_t *dpr;
1036 
1037 	while ((dpr = dt_list_next(&dph->dph_lrulist)) != NULL)
1038 		dt_proc_destroy(dtp, dpr->dpr_proc);
1039 
1040 	dtp->dt_procs = NULL;
1041 	dt_free(dtp, dph);
1042 }
1043 
1044 struct ps_prochandle *
1045 dtrace_proc_create(dtrace_hdl_t *dtp, const char *file, char *const *argv)
1046 {
1047 	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1048 	struct ps_prochandle *P = dt_proc_create(dtp, file, argv);
1049 
1050 	if (P != NULL && idp != NULL && idp->di_id == 0)
1051 		idp->di_id = Pstatus(P)->pr_pid; /* $target = created pid */
1052 
1053 	return (P);
1054 }
1055 
1056 struct ps_prochandle *
1057 dtrace_proc_grab(dtrace_hdl_t *dtp, pid_t pid, int flags)
1058 {
1059 	dt_ident_t *idp = dt_idhash_lookup(dtp->dt_macros, "target");
1060 	struct ps_prochandle *P = dt_proc_grab(dtp, pid, flags, 0);
1061 
1062 	if (P != NULL && idp != NULL && idp->di_id == 0)
1063 		idp->di_id = pid; /* $target = grabbed pid */
1064 
1065 	return (P);
1066 }
1067 
1068 void
1069 dtrace_proc_release(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1070 {
1071 	dt_proc_release(dtp, P);
1072 }
1073 
1074 void
1075 dtrace_proc_continue(dtrace_hdl_t *dtp, struct ps_prochandle *P)
1076 {
1077 	dt_proc_continue(dtp, P);
1078 }
1079