xref: /illumos-gate/usr/src/lib/libpctx/common/libpctx.c (revision e9af4bc0b1cc30cea75d6ad4aa2fde97d985e9be)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains a set of generic routines for periodically
29  * sampling the state of another process, or tree of processes.
30  *
31  * It is built upon the infrastructure provided by libproc.
32  */
33 
34 #include <sys/wait.h>
35 #include <sys/syscall.h>
36 #include <sys/time.h>
37 #include <libproc.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <errno.h>
41 #include <unistd.h>
42 #include <signal.h>
43 #include <string.h>
44 #include <strings.h>
45 #include <limits.h>
46 #include <ctype.h>
47 #include <libintl.h>
48 #include <libcpc.h>
49 #include <sys/cpc_impl.h>
50 
51 #include "libpctx.h"
52 
53 struct __pctx {
54 	pctx_errfn_t *errfn;
55 	struct ps_prochandle *Pr;
56 	void *uarg;
57 	pctx_sysc_execfn_t *exec;
58 	pctx_sysc_forkfn_t *fork;
59 	pctx_sysc_exitfn_t *exit;
60 	pctx_sysc_lwp_createfn_t *lwp_create;
61 	pctx_init_lwpfn_t *init_lwp;
62 	pctx_fini_lwpfn_t *fini_lwp;
63 	pctx_sysc_lwp_exitfn_t *lwp_exit;
64 	int verbose;
65 	int created;
66 	int sigblocked;
67 	int terminate;
68 	sigset_t savedset;
69 	cpc_t *cpc;
70 };
71 
72 static void (*pctx_cpc_callback)(cpc_t *cpc, struct __pctx *pctx);
73 
74 static void
75 pctx_default_errfn(const char *fn, const char *fmt, va_list ap)
76 {
77 	(void) fprintf(stderr, "libpctx: pctx_%s: ", fn);
78 	(void) vfprintf(stderr, fmt, ap);
79 }
80 
81 /*PRINTFLIKE3*/
82 static void
83 pctx_error(pctx_t *pctx, const char *fn, const char *fmt, ...)
84 {
85 	va_list ap;
86 
87 	va_start(ap, fmt);
88 	pctx->errfn(fn, fmt, ap);
89 	va_end(ap);
90 }
91 
92 /*
93  * Create a new process and bind the user args for it
94  */
95 pctx_t *
96 pctx_create(
97     const char *filename,
98     char *const *argv,
99     void *arg,
100     int verbose,
101     pctx_errfn_t *errfn)
102 {
103 	static const char fn[] = "create";
104 	int err;
105 	pctx_t *pctx;
106 
107 	pctx = calloc(1, sizeof (*pctx));
108 	pctx->uarg = arg;
109 	pctx->verbose = verbose;
110 	pctx->terminate = 0;
111 	pctx->errfn = errfn ? errfn : pctx_default_errfn;
112 
113 	if ((pctx->Pr = Pcreate(filename, argv, &err, 0, 0)) == NULL) {
114 		switch (err) {
115 		case C_PERM:
116 			pctx_error(pctx, fn, gettext("cannot trace set-id or "
117 			    "unreadable program '%s'\n"), filename);
118 			break;
119 		case C_LP64:
120 			pctx_error(pctx, fn, gettext("cannot control LP64 "
121 			    "program '%s'\n"), filename);
122 			break;
123 		case C_NOEXEC:
124 			pctx_error(pctx, fn, gettext("cannot execute "
125 			    "program '%s'\n"), filename);
126 			break;
127 		case C_NOENT:
128 			pctx_error(pctx, fn, gettext("cannot find"
129 			    "program '%s'\n"), filename);
130 			break;
131 		case C_FORK:
132 			pctx_error(pctx, fn, gettext("cannot fork, "
133 			    "program '%s'\n"), filename);
134 			break;
135 		default:
136 			pctx_error(pctx, fn, gettext("%s, program '%s'\n"),
137 			    Pcreate_error(err), filename);
138 			break;
139 		}
140 		free(pctx);
141 		return (NULL);
142 	}
143 
144 	if (Psysentry(pctx->Pr, SYS_exit, 1) == -1) {
145 		pctx_error(pctx, fn,
146 		    gettext("can't stop-on-exit() program '%s'\n"), filename);
147 		Prelease(pctx->Pr, PRELEASE_KILL);
148 		free(pctx);
149 		return (NULL);
150 	}
151 	/*
152 	 * Set kill-on-last-close so the controlled process
153 	 * dies if we die.
154 	 */
155 	pctx->created = 1;
156 	(void) Psetflags(pctx->Pr, PR_KLC);
157 	(void) pctx_set_events(pctx, PCTX_NULL_EVENT);
158 
159 	return (pctx);
160 }
161 
162 /*
163  * Capture an existing process and bind the user args for it
164  */
165 pctx_t *
166 pctx_capture(pid_t pid, void *arg, int verbose, pctx_errfn_t *errfn)
167 {
168 	static const char fn[] = "capture";
169 	int err;
170 	pctx_t *pctx;
171 
172 	pctx = calloc(1, sizeof (*pctx));
173 	pctx->uarg = arg;
174 	pctx->verbose = verbose;
175 	pctx->errfn = errfn ? errfn : pctx_default_errfn;
176 
177 	if ((pctx->Pr = Pgrab(pid, 0, &err)) == NULL) {
178 		switch (err) {
179 		case G_NOPROC:
180 			pctx_error(pctx, fn,
181 			    gettext("pid %d doesn't exist\n"), (int)pid);
182 			break;
183 		case G_ZOMB:
184 			pctx_error(pctx, fn,
185 			    gettext("pid %d is a zombie\n"), (int)pid);
186 			break;
187 		case G_PERM:
188 			pctx_error(pctx, fn,
189 			    gettext("pid %d: permission denied\n"), (int)pid);
190 			break;
191 		case G_BUSY:
192 			pctx_error(pctx, fn,
193 			    gettext("pid %d is already being traced\n"),
194 			    (int)pid);
195 			break;
196 		case G_SYS:
197 			pctx_error(pctx, fn,
198 			    gettext("pid %d is a system process\n"), (int)pid);
199 			break;
200 		case G_SELF:
201 			pctx_error(pctx, fn,
202 			    gettext("cannot capture self!\n"));
203 			break;
204 		case G_LP64:
205 			pctx_error(pctx, fn, gettext("cannot control LP64 "
206 			    "process, pid %d\n"), (int)pid);
207 			break;
208 		default:
209 			pctx_error(pctx, fn, gettext("%s: pid %d\n"),
210 			    Pgrab_error(err), (int)pid);
211 			break;
212 		}
213 		free(pctx);
214 		return (NULL);
215 	}
216 
217 	if (Psysentry(pctx->Pr, SYS_exit, 1) == -1) {
218 		pctx_error(pctx, fn,
219 		    gettext("can't stop-on-exit() pid %d\n"), (int)pid);
220 		Prelease(pctx->Pr, PRELEASE_CLEAR);
221 		free(pctx);
222 		return (NULL);
223 	}
224 
225 	/*
226 	 * Set run-on-last-close so the controlled process
227 	 * runs even if we die on a signal.  This is because
228 	 * we grabbed an existing process - it would be impolite
229 	 * to cause it to die if we exit prematurely.
230 	 */
231 	pctx->created = 0;
232 	(void) Psetflags(pctx->Pr, PR_RLC);
233 	(void) pctx_set_events(pctx, PCTX_NULL_EVENT);
234 
235 	return (pctx);
236 }
237 
238 /*ARGSUSED*/
239 static void
240 default_void(pctx_t *pctx)
241 {}
242 
243 /*ARGSUSED*/
244 static int
245 default_int(pctx_t *pctx)
246 {
247 	return (0);
248 }
249 
250 int
251 pctx_set_events(pctx_t *pctx, ...)
252 {
253 	static const char fn[] = "set_events";
254 	va_list pvar;
255 	int error = 0;
256 	pctx_event_t event;
257 
258 	va_start(pvar, pctx);
259 	do {
260 		switch (event = (pctx_event_t)va_arg(pvar, pctx_event_t)) {
261 		case PCTX_NULL_EVENT:
262 			break;
263 		case PCTX_SYSC_EXEC_EVENT:
264 			pctx->exec = (pctx_sysc_execfn_t *)
265 			    va_arg(pvar, pctx_sysc_execfn_t *);
266 			break;
267 		case PCTX_SYSC_FORK_EVENT:
268 			pctx->fork = (pctx_sysc_forkfn_t *)
269 			    va_arg(pvar, pctx_sysc_forkfn_t *);
270 			break;
271 		case PCTX_SYSC_EXIT_EVENT:	/* always intercepted */
272 			pctx->exit = (pctx_sysc_exitfn_t *)
273 			    va_arg(pvar, pctx_sysc_exitfn_t *);
274 			break;
275 		case PCTX_SYSC_LWP_CREATE_EVENT:
276 			pctx->lwp_create = (pctx_sysc_lwp_createfn_t *)
277 			    va_arg(pvar, pctx_sysc_lwp_createfn_t *);
278 			break;
279 		case PCTX_INIT_LWP_EVENT:
280 			pctx->init_lwp = (pctx_init_lwpfn_t *)
281 			    va_arg(pvar, pctx_init_lwpfn_t *);
282 			break;
283 		case PCTX_FINI_LWP_EVENT:
284 			pctx->fini_lwp = (pctx_fini_lwpfn_t *)
285 			    va_arg(pvar, pctx_fini_lwpfn_t *);
286 			break;
287 		case PCTX_SYSC_LWP_EXIT_EVENT:
288 			pctx->lwp_exit = (pctx_sysc_lwp_exitfn_t *)
289 			    va_arg(pvar, pctx_sysc_lwp_exitfn_t *);
290 			break;
291 		default:
292 			pctx_error(pctx, fn,
293 			    gettext("unknown event type %x\n"), event);
294 			error = -1;
295 			break;
296 		}
297 	} while (event != PCTX_NULL_EVENT && error == 0);
298 	va_end(pvar);
299 
300 	if (error != 0)
301 		return (error);
302 
303 	if (pctx->exec == NULL)
304 		pctx->exec = (pctx_sysc_execfn_t *)default_int;
305 	if (pctx->fork == NULL)
306 		pctx->fork = (pctx_sysc_forkfn_t *)default_void;
307 	if (pctx->exit == NULL)
308 		pctx->exit = (pctx_sysc_exitfn_t *)default_void;
309 	if (pctx->lwp_create == NULL)
310 		pctx->lwp_create = (pctx_sysc_lwp_createfn_t *)default_int;
311 	if (pctx->init_lwp == NULL)
312 		pctx->init_lwp = (pctx_init_lwpfn_t *)default_int;
313 	if (pctx->fini_lwp == NULL)
314 		pctx->fini_lwp = (pctx_fini_lwpfn_t *)default_int;
315 	if (pctx->lwp_exit == NULL)
316 		pctx->lwp_exit = (pctx_sysc_lwp_exitfn_t *)default_int;
317 
318 	if (pctx->fork != (pctx_sysc_forkfn_t *)default_void) {
319 		(void) Psysexit(pctx->Pr, SYS_forkall, 1);
320 		(void) Psysexit(pctx->Pr, SYS_vfork, 1);
321 		(void) Psysexit(pctx->Pr, SYS_fork1, 1);
322 		(void) Psysexit(pctx->Pr, SYS_forksys, 1);
323 		if (Psetflags(pctx->Pr, PR_FORK) == -1)
324 			error = -1;
325 	} else {
326 		(void) Psysexit(pctx->Pr, SYS_forkall, 0);
327 		(void) Psysexit(pctx->Pr, SYS_vfork, 0);
328 		(void) Psysexit(pctx->Pr, SYS_fork1, 0);
329 		(void) Psysexit(pctx->Pr, SYS_forksys, 0);
330 		if (Punsetflags(pctx->Pr, PR_FORK) == -1)
331 			error = -1;
332 	}
333 
334 	/*
335 	 * exec causes termination of all but the exec-ing lwp,
336 	 * and resets the lwpid to one in the new address space.
337 	 */
338 	if (pctx->exec != (pctx_sysc_execfn_t *)default_int ||
339 	    pctx->fini_lwp != (pctx_fini_lwpfn_t *)default_int ||
340 	    pctx->init_lwp != (pctx_init_lwpfn_t *)default_int) {
341 		(void) Psysexit(pctx->Pr, SYS_exec, 1);
342 		(void) Psysexit(pctx->Pr, SYS_execve, 1);
343 		(void) Psysentry(pctx->Pr, SYS_exec, 1);
344 		(void) Psysentry(pctx->Pr, SYS_execve, 1);
345 	} else {
346 		(void) Psysexit(pctx->Pr, SYS_exec, 0);
347 		(void) Psysexit(pctx->Pr, SYS_execve, 0);
348 		(void) Psysentry(pctx->Pr, SYS_exec, 0);
349 		(void) Psysentry(pctx->Pr, SYS_execve, 0);
350 	}
351 
352 	(void) Psysexit(pctx->Pr, SYS_lwp_create,
353 	    pctx->lwp_create != (pctx_sysc_lwp_createfn_t *)default_int ||
354 	    pctx->init_lwp != (pctx_init_lwpfn_t *)default_int);
355 
356 	(void) Psysentry(pctx->Pr, SYS_lwp_exit,
357 	    pctx->lwp_exit != (pctx_sysc_lwp_exitfn_t *)default_int ||
358 	    pctx->fini_lwp != (pctx_fini_lwpfn_t *)default_int);
359 
360 	return (0);
361 }
362 
363 static sigset_t termsig;
364 
365 static void
366 __libpctx_init(void)
367 {
368 	/*
369 	 * Initialize the signal set used to shield ourselves from
370 	 * death-by-terminal-signal while the agent lwp is running.
371 	 */
372 	(void) sigemptyset(&termsig);
373 	(void) sigaddset(&termsig, SIGHUP);
374 	(void) sigaddset(&termsig, SIGTERM);
375 	(void) sigaddset(&termsig, SIGINT);
376 	(void) sigaddset(&termsig, SIGQUIT);
377 }
378 
379 #pragma init(__libpctx_init)
380 
381 static void
382 pctx_begin_syscalls(pctx_t *pctx)
383 {
384 	if (pctx->Pr == NULL)
385 		return;
386 	if (pctx->sigblocked++ == 0) {
387 		(void) sigprocmask(SIG_BLOCK, &termsig, &pctx->savedset);
388 		(void) Pcreate_agent(pctx->Pr);
389 	}
390 }
391 
392 static void
393 pctx_end_syscalls(pctx_t *pctx)
394 {
395 	if (pctx->Pr == NULL)
396 		return;
397 	if (--pctx->sigblocked == 0) {
398 		(void) Pdestroy_agent(pctx->Pr);
399 		(void) sigprocmask(SIG_SETMASK, &pctx->savedset, NULL);
400 	}
401 }
402 
403 /*
404  * Iterate over the valid lwpids in the process, invoking the
405  * action function on each one.
406  */
407 static int
408 pctx_lwpiterate(pctx_t *pctx, int (*action)(pctx_t *, pid_t, id_t, void *))
409 {
410 	const pstatus_t *pstatus;
411 	char lstatus[64];
412 	struct stat statb;
413 	lwpstatus_t *lwps;
414 	prheader_t *prh;
415 	int fd, nlwp;
416 	int ret = 0;
417 
418 	if (action == (int (*)(pctx_t *, pid_t, id_t, void *))default_int)
419 		return (0);
420 
421 	pstatus = Pstatus(pctx->Pr);
422 	if (pstatus->pr_nlwp <= 1) {
423 		pctx_begin_syscalls(pctx);
424 		ret = action(pctx, pstatus->pr_pid, 1, pctx->uarg);
425 		pctx_end_syscalls(pctx);
426 		return (ret);
427 	}
428 
429 	(void) snprintf(lstatus, sizeof (lstatus),
430 	    "/proc/%d/lstatus", (int)pstatus->pr_pid);
431 
432 	if ((fd = open(lstatus, O_RDONLY)) < 0 ||
433 	    fstat(fd, &statb) != 0) {
434 		if (fd >= 0)
435 			(void) close(fd);
436 		return (-1);
437 	}
438 
439 	prh = malloc(statb.st_size);
440 	if (read(fd, prh, statb.st_size) <
441 	    sizeof (prheader_t) + sizeof (lwpstatus_t)) {
442 		(void) close(fd);
443 		free(prh);
444 		return (-1);
445 	}
446 	(void) close(fd);
447 
448 	/* LINTED pointer cast may result in improper alignment */
449 	lwps = (lwpstatus_t *)(prh + 1);
450 	pctx_begin_syscalls(pctx);
451 	for (nlwp = prh->pr_nent; nlwp > 0; nlwp--) {
452 		if (action(pctx,
453 		    pstatus->pr_pid, lwps->pr_lwpid, pctx->uarg) != 0)
454 			ret = -1;
455 		/* LINTED pointer cast may result in improper alignment */
456 		lwps = (lwpstatus_t *)((char *)lwps + prh->pr_entsize);
457 	}
458 	pctx_end_syscalls(pctx);
459 	free(prh);
460 	return (ret);
461 }
462 
463 /*
464  * Free any associated state, but leave the process stopped if it
465  * is still under our control.  (If it isn't under our control,
466  * it should just run to completion when we do our last close)
467  */
468 static void
469 pctx_free(pctx_t *pctx)
470 {
471 	if (pctx->cpc != NULL && pctx_cpc_callback != NULL)
472 		(*pctx_cpc_callback)(pctx->cpc, pctx);
473 	if (pctx->Pr) {
474 		Pfree(pctx->Pr);
475 		pctx->Pr = NULL;
476 	}
477 	pctx->errfn = pctx_default_errfn;
478 }
479 
480 /*
481  * Completely release the process from our control and discard all our state
482  */
483 void
484 pctx_release(pctx_t *pctx)
485 {
486 	if (pctx->Pr) {
487 		Prelease(pctx->Pr, PRELEASE_CLEAR);
488 		pctx->Pr = NULL;
489 	}
490 
491 	pctx_free(pctx);
492 	bzero(pctx, sizeof (*pctx));
493 	free(pctx);
494 }
495 
496 static void
497 msincr(struct timeval *tv, uint_t msec)
498 {
499 	tv->tv_sec += msec / MILLISEC;
500 	tv->tv_usec += (msec % MILLISEC) * MILLISEC;
501 	if (tv->tv_usec > MICROSEC) {
502 		tv->tv_sec++;
503 		tv->tv_usec -= MICROSEC;
504 	}
505 }
506 
507 static uint_t
508 msdiff(struct timeval *tva, struct timeval *tvb)
509 {
510 	time_t sdiff = tva->tv_sec - tvb->tv_sec;
511 	suseconds_t udiff = tva->tv_usec - tvb->tv_usec;
512 
513 	if (sdiff < 0)
514 		return (0);
515 	if (udiff < 0) {
516 		udiff += MICROSEC;
517 		sdiff--;
518 	}
519 	if (sdiff < 0)
520 		return (0);
521 	if (sdiff >= (INT_MAX / MILLISEC))
522 		return ((uint_t)INT_MAX);
523 	return ((uint_t)(sdiff * MILLISEC + udiff / MILLISEC));
524 }
525 
526 int
527 pctx_run(
528 	pctx_t *pctx,
529 	uint_t msec,
530 	uint_t nsamples,
531 	int (*tick)(pctx_t *, pid_t, id_t, void *))
532 {
533 	static const char fn[] = "run";
534 	struct timeval tvgoal, tvnow;
535 	uint_t mswait = 0;
536 	int running = 1;
537 	const pstatus_t *pstatus;
538 	psinfo_t psinfo;
539 	void (*sigsaved)();
540 	id_t lwpid;
541 	pid_t pid = Pstatus(pctx->Pr)->pr_pid;
542 	int pstate;
543 
544 	if (msec == 0)
545 		nsamples = 0;
546 	if (nsamples == 0)
547 		nsamples = UINT_MAX;
548 
549 	/*
550 	 * Casually discard any knowledge of the children we create
551 	 */
552 	sigsaved = signal(SIGCHLD, SIG_IGN);
553 
554 	/*
555 	 * Since we've just "discovered" this process which might have
556 	 * been running for weeks, deliver some init_lwp events so
557 	 * that our caller gets a handle on the process.
558 	 */
559 	if (pctx_lwpiterate(pctx, pctx->init_lwp) != 0) {
560 		if (pctx->verbose)
561 			pctx_error(pctx, fn,
562 			    gettext("%d: lwp discovery failed\n"), (int)pid);
563 		goto bailout;
564 	}
565 
566 	if (msec != 0) {
567 		/*
568 		 * tvgoal represents the time at which the sample
569 		 * should next be taken.
570 		 */
571 		(void) gettimeofday(&tvgoal, 0);
572 		msincr(&tvgoal, msec);
573 	}
574 
575 	/*
576 	 * The event handling loop continues while running is 1.
577 	 * running becomes 0 when either the controlled process has
578 	 * exited successfully or the number of time samples has expired.
579 	 * Otherwise, if an error has occurred, running becomes -1.
580 	 */
581 	while (running == 1 && !pctx->terminate) {
582 
583 		if (Psetrun(pctx->Pr, 0, 0) != 0) {
584 			if (pctx->verbose)
585 				pctx_error(pctx, fn,
586 				    gettext("%d: Psetrun\n"), (int)pid);
587 			break;
588 		}
589 
590 		if (msec != 0) {
591 			/*
592 			 * This timing loop attempts to estimate the number
593 			 * of milliseconds between our "goal" time (when
594 			 * we should stop the process and run the tick
595 			 * routine) and the current time.
596 			 *
597 			 * If we ever find ourselves running behind i.e. we
598 			 * missed our goal, then we skip ahead to the next
599 			 * goal instead.
600 			 */
601 			do {
602 				(void) gettimeofday(&tvnow, 0);
603 				if ((mswait = msdiff(&tvgoal, &tvnow)) == 0) {
604 					msincr(&tvgoal, msec);
605 					/*
606 					 * Skip ahead to the next goal, unless
607 					 * there is only one more sample left
608 					 * to take.
609 					 */
610 					if (nsamples != 1)
611 						nsamples--;
612 				}
613 			} while (mswait == 0 && !pctx->terminate);
614 		}
615 
616 		if (pctx->terminate)
617 			goto bailout;
618 		else
619 			(void) Pwait(pctx->Pr, mswait);
620 
621 checkstate:
622 		switch (pstate = Pstate(pctx->Pr)) {
623 		case PS_RUN:
624 			/*
625 			 * Try again, but wait for up to 5 seconds.
626 			 */
627 			if (Pstop(pctx->Pr, 5 * MILLISEC) == -1 ||
628 			    (pstate = Pstate(pctx->Pr)) != PS_STOP) {
629 				pctx_error(pctx, fn,
630 				    gettext("%d: won't stop\n"), (int)pid);
631 			}
632 			break;
633 		case PS_STOP:
634 			break;
635 		case PS_LOST:
636 			/*
637 			 * Lost control - probably execed a setuid/setgid
638 			 * executable.  Try and get control back again,
639 			 * else bail ..
640 			 */
641 			(void) Preopen(pctx->Pr);
642 			if ((pstate = Pstate(pctx->Pr)) != PS_LOST)
643 				goto checkstate;
644 			pctx_error(pctx, fn,
645 			    gettext("%d: execed a program that cannot "
646 			    "be tracked\n"), (int)pid);
647 			running = -1;
648 			break;
649 		case PS_UNDEAD:
650 		case PS_DEAD:
651 			if (pctx->verbose)
652 				pctx_error(pctx, fn,
653 				    gettext("%d: process terminated\n"),
654 				    (int)pid);
655 			running = -1;
656 			break;
657 		default:
658 			if (pctx->verbose)
659 				pctx_error(pctx, fn,
660 				    gettext("%d: process state 0x%x?\n"),
661 				    (int)pid, pstate);
662 			break;
663 		}
664 
665 		if (pstate != PS_STOP)
666 			break;
667 
668 		pstatus = Pstatus(pctx->Pr);
669 		lwpid = pstatus->pr_lwp.pr_lwpid;
670 		switch (pstatus->pr_lwp.pr_why) {
671 		case PR_REQUESTED:
672 			msincr(&tvgoal, msec);
673 			if (pstatus->pr_flags & PR_VFORKP) {
674 				/*
675 				 * The process is in a vfork stupor until
676 				 * its child releases it via an exec.
677 				 * Don't sample it while it's in this state
678 				 * - we won't be able to create the agent.
679 				 */
680 				break;
681 			}
682 			if (pctx_lwpiterate(pctx, tick) != 0)
683 				running = -1;
684 			if (running == 1 && --nsamples == 0)
685 				running = 0;
686 			break;
687 		case PR_SYSENTRY:
688 			switch (pstatus->pr_lwp.pr_what) {
689 			case SYS_lwp_exit:
690 				pctx_begin_syscalls(pctx);
691 				(void) pctx->fini_lwp(pctx,
692 				    pid, lwpid, pctx->uarg);
693 				(void) pctx->lwp_exit(pctx,
694 				    pid, lwpid, pctx->uarg);
695 				pctx_end_syscalls(pctx);
696 				break;
697 			case SYS_exit:
698 				if (pctx_lwpiterate(pctx, pctx->fini_lwp)
699 				    != 0)
700 					running = -1;
701 				pctx->exit(pctx, pid, lwpid,
702 				    (int)pstatus->pr_lwp.pr_sysarg[0],
703 				    pctx->uarg);
704 				if (running == 1)
705 					running = 0;
706 				break;
707 			case SYS_exec:
708 			case SYS_execve:
709 				(void) pctx_lwpiterate(pctx, pctx->fini_lwp);
710 				break;
711 			default:
712 				pctx_error(pctx, fn,
713 				    "warning - pid %d sysentry(%d)\n",
714 				    (int)pid, pstatus->pr_lwp.pr_what);
715 				break;
716 			}
717 			break;
718 		case PR_SYSEXIT:
719 			switch (pstatus->pr_lwp.pr_what) {
720 			case SYS_exec:
721 			case SYS_execve:
722 				if (pstatus->pr_lwp.pr_errno) {
723 					/*
724 					 * The exec failed completely.
725 					 * Reinstate the lwps we fini'd
726 					 * at exec entrance
727 					 */
728 					if (pctx_lwpiterate(pctx,
729 					    pctx->init_lwp) == 0)
730 						running = 1;
731 					else
732 						running = -1;
733 					break;
734 				}
735 				if (pctx->exec == (pctx_sysc_execfn_t *)
736 				    default_int) {
737 					running = 0;
738 					break;
739 				}
740 				(void) memcpy(&psinfo,
741 				    Ppsinfo(pctx->Pr), sizeof (psinfo));
742 				proc_unctrl_psinfo(&psinfo);
743 				pctx_begin_syscalls(pctx);
744 				if (pctx->exec(pctx, pid, lwpid,
745 				    psinfo.pr_psargs, pctx->uarg) != 0)
746 					running = -1;
747 				if (running == 1 && pctx->init_lwp(pctx,
748 				    pid, 1, pctx->uarg) != 0)
749 					running = -1;
750 				pctx_end_syscalls(pctx);
751 				break;
752 			case SYS_lwp_create:
753 				if (pstatus->pr_lwp.pr_errno ||
754 				    pstatus->pr_lwp.pr_rval1)
755 					break;
756 				pctx_begin_syscalls(pctx);
757 				if (pctx->init_lwp(pctx, pid, lwpid,
758 				    pctx->uarg) != 0)
759 					running = -1;
760 				if (running == 1 && pctx->lwp_create(pctx,
761 				    pid, lwpid, pctx->uarg) != 0)
762 					running = -1;
763 				pctx_end_syscalls(pctx);
764 				break;
765 			case SYS_forkall:
766 			case SYS_vfork:
767 			case SYS_fork1:
768 			case SYS_forksys:
769 				if (pstatus->pr_lwp.pr_errno)
770 					break;
771 				(void) fflush(NULL);
772 				switch (fork1()) {
773 					pid_t ppid;
774 					int wascreated;
775 					pctx_sysc_forkfn_t *forkfn;
776 				case 0:
777 					ppid = pid;
778 					pid = pstatus->pr_lwp.pr_rval1;
779 					wascreated = pctx->created;
780 					forkfn = pctx->fork;
781 					pctx_free(pctx);
782 					pctx = pctx_capture(pid, pctx->uarg,
783 					    pctx->verbose, pctx->errfn);
784 					if (pctx != NULL) {
785 						if (wascreated) {
786 							/*
787 							 * Set kill on last
788 							 * close so -all-
789 							 * children die.
790 							 */
791 							pctx->created = 1;
792 							(void) Psetflags(
793 							    pctx->Pr, PR_KLC);
794 						}
795 						(*forkfn)(pctx, ppid, pid,
796 						    lwpid, pctx->uarg);
797 						pctx_release(pctx);
798 						_exit(0);
799 					} else {
800 						_exit(1);
801 					}
802 					/*NOTREACHED*/
803 				case -1:
804 					pctx_error(pctx, fn,
805 					    "cannot follow pid %d: %s\n",
806 					    (int)pstatus->pr_lwp.pr_rval1,
807 					    strerror(errno));
808 					break;
809 				default:
810 					break;
811 				}
812 				break;
813 			default:
814 				pctx_error(pctx, fn, gettext(
815 				    "warning - pid %d sysexit(%d)\n"),
816 				    (int)pid, pstatus->pr_lwp.pr_what);
817 				break;
818 			}
819 			break;
820 		case PR_SIGNALLED:
821 			if (pctx->verbose)
822 				pctx_error(pctx, fn,
823 				    gettext("pid %d - signalled\n"), (int)pid);
824 			break;
825 		case PR_JOBCONTROL:
826 			if (pctx->verbose)
827 				pctx_error(pctx, fn,
828 				    gettext("pid %d - job control stop\n"),
829 				    (int)pid);
830 			running = -1;
831 			break;
832 		case PR_FAULTED:
833 			if (pctx->verbose)
834 				pctx_error(pctx, fn,
835 				    gettext("pid %d - faulted\n"), (int)pid);
836 			break;
837 		case PR_SUSPENDED:
838 			if (pctx->verbose)
839 				pctx_error(pctx, fn,
840 				    gettext("pid %d - suspended\n"), (int)pid);
841 			break;
842 		case PR_CHECKPOINT:
843 			if (pctx->verbose)
844 				pctx_error(pctx, fn,
845 				    gettext("pid %d - checkpoint\n"),
846 				    (int)pid);
847 			break;
848 		default:
849 			if (pctx->verbose)
850 				pctx_error(pctx, fn,
851 				    gettext("pid %d - reason %d\n"),
852 				    (int)pid, pstatus->pr_lwp.pr_why);
853 			running = -1;
854 			break;
855 		}
856 	}
857 
858 bailout:
859 	(void) signal(SIGCHLD, sigsaved);
860 
861 	if (pctx->terminate)
862 		return (0);
863 
864 	switch (running) {
865 	case 0:
866 		return (0);
867 	case -1:
868 		return (-1);
869 	default:
870 		pctx_error(pctx, fn, gettext("lost control of pid %d\n"),
871 		    (int)pid);
872 		pctx_free(pctx);
873 		return (-1);
874 	}
875 }
876 
877 /*
878  * Execute the private 'cpc' system call in the context of the
879  * controlled process.
880  */
881 int
882 __pctx_cpc(pctx_t *pctx, cpc_t *cpc,
883     int cmd, id_t lwpid, void *data1, void *data2, void *data3, int bufsize)
884 {
885 	sysret_t rval;
886 	argdes_t argd[5];
887 	argdes_t *adp = &argd[0];
888 	int error;
889 
890 	/*
891 	 * Keep track of the relationship between cpc_t and pctx_t here.
892 	 * We store the last cpc_t used by libpctx, so that when this pctx is
893 	 * destroyed, libpctx can notify libcpc.
894 	 */
895 
896 	if (pctx->cpc != NULL && pctx->cpc != cpc && pctx_cpc_callback != NULL)
897 		(*pctx_cpc_callback)(pctx->cpc, pctx);
898 	pctx->cpc = cpc;
899 
900 	/*
901 	 * cmd and lwpid are passed in by value no matter what the command is.
902 	 */
903 	adp->arg_value = cmd;
904 	adp->arg_object = NULL;
905 	adp->arg_type = AT_BYVAL;
906 	adp->arg_inout = AI_INPUT;
907 	adp->arg_size = 0;
908 	adp++;
909 
910 	adp->arg_value = lwpid;
911 	adp->arg_object = NULL;
912 	adp->arg_type = AT_BYVAL;
913 	adp->arg_inout = AI_INPUT;
914 	adp->arg_size = 0;
915 	adp++;
916 
917 	switch (cmd) {
918 	case CPC_BIND:
919 		adp->arg_value = 0;
920 		adp->arg_object = data1;
921 		adp->arg_type = AT_BYREF;
922 		adp->arg_inout = AI_INPUT;
923 		adp->arg_size = (size_t)data2;
924 		adp++;
925 
926 		adp->arg_value = (size_t)data2;
927 		adp->arg_object = NULL;
928 		adp->arg_type = AT_BYVAL;
929 		adp->arg_inout = AI_INPUT;
930 		adp->arg_size = 0;
931 		adp++;
932 
933 		adp->arg_value = 0;
934 		adp->arg_object = data3;
935 		adp->arg_type = AT_BYREF;
936 		adp->arg_inout = AI_INOUT;
937 		adp->arg_size = sizeof (int);
938 
939 		break;
940 	case CPC_SAMPLE:
941 		adp->arg_value = 0;
942 		adp->arg_object = data1;
943 		adp->arg_type = AT_BYREF;
944 		adp->arg_inout = AI_OUTPUT;
945 		adp->arg_size = bufsize;
946 		adp++;
947 
948 		adp->arg_value = 0;
949 		adp->arg_object = data2;
950 		adp->arg_type = AT_BYREF;
951 		adp->arg_inout = AI_OUTPUT;
952 		adp->arg_size = sizeof (hrtime_t);
953 		adp++;
954 
955 		adp->arg_value = 0;
956 		adp->arg_object = data3;
957 		adp->arg_type = AT_BYREF;
958 		adp->arg_inout = AI_OUTPUT;
959 		adp->arg_size = sizeof (uint64_t);
960 
961 		break;
962 	default:
963 		adp->arg_value = 0;
964 		adp->arg_object = 0;
965 		adp->arg_type = AT_BYVAL;
966 		adp->arg_inout = AI_INPUT;
967 		adp->arg_size = 0;
968 		adp++;
969 
970 		adp->arg_value = 0;
971 		adp->arg_object = 0;
972 		adp->arg_type = AT_BYVAL;
973 		adp->arg_inout = AI_INPUT;
974 		adp->arg_size = 0;
975 		adp++;
976 
977 		adp->arg_value = 0;
978 		adp->arg_object = 0;
979 		adp->arg_type = AT_BYVAL;
980 		adp->arg_inout = AI_INPUT;
981 		adp->arg_size = 0;
982 
983 		break;
984 	}
985 
986 	error = Psyscall(pctx->Pr, &rval, SYS_cpc, 5, &argd[0]);
987 
988 	if (error) {
989 		errno = error > 0 ? error : ENOSYS;
990 		return (-1);
991 	}
992 	return (rval.sys_rval1);
993 }
994 
995 /*
996  * libcpc-private hook used to register a callback. The callback is used to
997  * notify libcpc when a pctx handle is invalidated.
998  */
999 void
1000 __pctx_cpc_register_callback(void (*arg)(struct __cpc *, struct __pctx *))
1001 {
1002 	pctx_cpc_callback = arg;
1003 }
1004 
1005 /*
1006  * Tell pctx_run to bail out immediately
1007  */
1008 void
1009 pctx_terminate(struct __pctx *pctx)
1010 {
1011 	pctx->terminate = 1;
1012 }
1013