xref: /freebsd/sys/kern/kern_procctl.c (revision 52f72944b8f5abb2386eae924357dee8aea17d5b)
1 /*-
2  * Copyright (c) 2014 John Baldwin
3  * Copyright (c) 2014, 2016 The FreeBSD Foundation
4  *
5  * Portions of this software were developed by Konstantin Belousov
6  * under sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/capsicum.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/procctl.h>
41 #include <sys/sx.h>
42 #include <sys/syscallsubr.h>
43 #include <sys/sysproto.h>
44 #include <sys/wait.h>
45 
46 static int
47 protect_setchild(struct thread *td, struct proc *p, int flags)
48 {
49 
50 	PROC_LOCK_ASSERT(p, MA_OWNED);
51 	if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
52 		return (0);
53 	if (flags & PPROT_SET) {
54 		p->p_flag |= P_PROTECTED;
55 		if (flags & PPROT_INHERIT)
56 			p->p_flag2 |= P2_INHERIT_PROTECTED;
57 	} else {
58 		p->p_flag &= ~P_PROTECTED;
59 		p->p_flag2 &= ~P2_INHERIT_PROTECTED;
60 	}
61 	return (1);
62 }
63 
64 static int
65 protect_setchildren(struct thread *td, struct proc *top, int flags)
66 {
67 	struct proc *p;
68 	int ret;
69 
70 	p = top;
71 	ret = 0;
72 	sx_assert(&proctree_lock, SX_LOCKED);
73 	for (;;) {
74 		ret |= protect_setchild(td, p, flags);
75 		PROC_UNLOCK(p);
76 		/*
77 		 * If this process has children, descend to them next,
78 		 * otherwise do any siblings, and if done with this level,
79 		 * follow back up the tree (but not past top).
80 		 */
81 		if (!LIST_EMPTY(&p->p_children))
82 			p = LIST_FIRST(&p->p_children);
83 		else for (;;) {
84 			if (p == top) {
85 				PROC_LOCK(p);
86 				return (ret);
87 			}
88 			if (LIST_NEXT(p, p_sibling)) {
89 				p = LIST_NEXT(p, p_sibling);
90 				break;
91 			}
92 			p = p->p_pptr;
93 		}
94 		PROC_LOCK(p);
95 	}
96 }
97 
98 static int
99 protect_set(struct thread *td, struct proc *p, int flags)
100 {
101 	int error, ret;
102 
103 	switch (PPROT_OP(flags)) {
104 	case PPROT_SET:
105 	case PPROT_CLEAR:
106 		break;
107 	default:
108 		return (EINVAL);
109 	}
110 
111 	if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
112 		return (EINVAL);
113 
114 	error = priv_check(td, PRIV_VM_MADV_PROTECT);
115 	if (error)
116 		return (error);
117 
118 	if (flags & PPROT_DESCEND)
119 		ret = protect_setchildren(td, p, flags);
120 	else
121 		ret = protect_setchild(td, p, flags);
122 	if (ret == 0)
123 		return (EPERM);
124 	return (0);
125 }
126 
127 static int
128 reap_acquire(struct thread *td, struct proc *p)
129 {
130 
131 	sx_assert(&proctree_lock, SX_XLOCKED);
132 	if (p != curproc)
133 		return (EPERM);
134 	if ((p->p_treeflag & P_TREE_REAPER) != 0)
135 		return (EBUSY);
136 	p->p_treeflag |= P_TREE_REAPER;
137 	/*
138 	 * We do not reattach existing children and the whole tree
139 	 * under them to us, since p->p_reaper already seen them.
140 	 */
141 	return (0);
142 }
143 
144 static int
145 reap_release(struct thread *td, struct proc *p)
146 {
147 
148 	sx_assert(&proctree_lock, SX_XLOCKED);
149 	if (p != curproc)
150 		return (EPERM);
151 	if (p == initproc)
152 		return (EINVAL);
153 	if ((p->p_treeflag & P_TREE_REAPER) == 0)
154 		return (EINVAL);
155 	reaper_abandon_children(p, false);
156 	return (0);
157 }
158 
159 static int
160 reap_status(struct thread *td, struct proc *p,
161     struct procctl_reaper_status *rs)
162 {
163 	struct proc *reap, *p2, *first_p;
164 
165 	sx_assert(&proctree_lock, SX_LOCKED);
166 	bzero(rs, sizeof(*rs));
167 	if ((p->p_treeflag & P_TREE_REAPER) == 0) {
168 		reap = p->p_reaper;
169 	} else {
170 		reap = p;
171 		rs->rs_flags |= REAPER_STATUS_OWNED;
172 	}
173 	if (reap == initproc)
174 		rs->rs_flags |= REAPER_STATUS_REALINIT;
175 	rs->rs_reaper = reap->p_pid;
176 	rs->rs_descendants = 0;
177 	rs->rs_children = 0;
178 	if (!LIST_EMPTY(&reap->p_reaplist)) {
179 		first_p = LIST_FIRST(&reap->p_children);
180 		if (first_p == NULL)
181 			first_p = LIST_FIRST(&reap->p_reaplist);
182 		rs->rs_pid = first_p->p_pid;
183 		LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
184 			if (proc_realparent(p2) == reap)
185 				rs->rs_children++;
186 			rs->rs_descendants++;
187 		}
188 	} else {
189 		rs->rs_pid = -1;
190 	}
191 	return (0);
192 }
193 
194 static int
195 reap_getpids(struct thread *td, struct proc *p, struct procctl_reaper_pids *rp)
196 {
197 	struct proc *reap, *p2;
198 	struct procctl_reaper_pidinfo *pi, *pip;
199 	u_int i, n;
200 	int error;
201 
202 	sx_assert(&proctree_lock, SX_LOCKED);
203 	PROC_UNLOCK(p);
204 	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
205 	n = i = 0;
206 	error = 0;
207 	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
208 		n++;
209 	sx_unlock(&proctree_lock);
210 	if (rp->rp_count < n)
211 		n = rp->rp_count;
212 	pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
213 	sx_slock(&proctree_lock);
214 	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
215 		if (i == n)
216 			break;
217 		pip = &pi[i];
218 		bzero(pip, sizeof(*pip));
219 		pip->pi_pid = p2->p_pid;
220 		pip->pi_subtree = p2->p_reapsubtree;
221 		pip->pi_flags = REAPER_PIDINFO_VALID;
222 		if (proc_realparent(p2) == reap)
223 			pip->pi_flags |= REAPER_PIDINFO_CHILD;
224 		if ((p2->p_treeflag & P_TREE_REAPER) != 0)
225 			pip->pi_flags |= REAPER_PIDINFO_REAPER;
226 		i++;
227 	}
228 	sx_sunlock(&proctree_lock);
229 	error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
230 	free(pi, M_TEMP);
231 	sx_slock(&proctree_lock);
232 	PROC_LOCK(p);
233 	return (error);
234 }
235 
236 static void
237 reap_kill_proc(struct thread *td, struct proc *p2, ksiginfo_t *ksi,
238     struct procctl_reaper_kill *rk, int *error)
239 {
240 	int error1;
241 
242 	PROC_LOCK(p2);
243 	error1 = p_cansignal(td, p2, rk->rk_sig);
244 	if (error1 == 0) {
245 		pksignal(p2, rk->rk_sig, ksi);
246 		rk->rk_killed++;
247 		*error = error1;
248 	} else if (*error == ESRCH) {
249 		rk->rk_fpid = p2->p_pid;
250 		*error = error1;
251 	}
252 	PROC_UNLOCK(p2);
253 }
254 
255 struct reap_kill_tracker {
256 	struct proc *parent;
257 	TAILQ_ENTRY(reap_kill_tracker) link;
258 };
259 
260 TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
261 
262 static void
263 reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
264 {
265 	struct reap_kill_tracker *t;
266 
267 	t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
268 	t->parent = p2;
269 	TAILQ_INSERT_TAIL(tracker, t, link);
270 }
271 
272 static int
273 reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk)
274 {
275 	struct proc *reap, *p2;
276 	ksiginfo_t ksi;
277 	struct reap_kill_tracker_head tracker;
278 	struct reap_kill_tracker *t;
279 	int error;
280 
281 	sx_assert(&proctree_lock, SX_LOCKED);
282 	if (IN_CAPABILITY_MODE(td))
283 		return (ECAPMODE);
284 	if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
285 	    (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
286 	    REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
287 	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
288 	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
289 		return (EINVAL);
290 	PROC_UNLOCK(p);
291 	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
292 	ksiginfo_init(&ksi);
293 	ksi.ksi_signo = rk->rk_sig;
294 	ksi.ksi_code = SI_USER;
295 	ksi.ksi_pid = td->td_proc->p_pid;
296 	ksi.ksi_uid = td->td_ucred->cr_ruid;
297 	error = ESRCH;
298 	rk->rk_killed = 0;
299 	rk->rk_fpid = -1;
300 	if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
301 		for (p2 = LIST_FIRST(&reap->p_children); p2 != NULL;
302 		    p2 = LIST_NEXT(p2, p_sibling)) {
303 			reap_kill_proc(td, p2, &ksi, rk, &error);
304 			/*
305 			 * Do not end the loop on error, signal
306 			 * everything we can.
307 			 */
308 		}
309 	} else {
310 		TAILQ_INIT(&tracker);
311 		reap_kill_sched(&tracker, reap);
312 		while ((t = TAILQ_FIRST(&tracker)) != NULL) {
313 			MPASS((t->parent->p_treeflag & P_TREE_REAPER) != 0);
314 			TAILQ_REMOVE(&tracker, t, link);
315 			for (p2 = LIST_FIRST(&t->parent->p_reaplist); p2 != NULL;
316 			    p2 = LIST_NEXT(p2, p_reapsibling)) {
317 				if (t->parent == reap &&
318 				    (rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
319 				    p2->p_reapsubtree != rk->rk_subtree)
320 					continue;
321 				if ((p2->p_treeflag & P_TREE_REAPER) != 0)
322 					reap_kill_sched(&tracker, p2);
323 				reap_kill_proc(td, p2, &ksi, rk, &error);
324 			}
325 			free(t, M_TEMP);
326 		}
327 	}
328 	PROC_LOCK(p);
329 	return (error);
330 }
331 
332 static int
333 trace_ctl(struct thread *td, struct proc *p, int state)
334 {
335 
336 	PROC_LOCK_ASSERT(p, MA_OWNED);
337 
338 	/*
339 	 * Ktrace changes p_traceflag from or to zero under the
340 	 * process lock, so the test does not need to acquire ktrace
341 	 * mutex.
342 	 */
343 	if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
344 		return (EBUSY);
345 
346 	switch (state) {
347 	case PROC_TRACE_CTL_ENABLE:
348 		if (td->td_proc != p)
349 			return (EPERM);
350 		p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
351 		break;
352 	case PROC_TRACE_CTL_DISABLE_EXEC:
353 		p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
354 		break;
355 	case PROC_TRACE_CTL_DISABLE:
356 		if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
357 			KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
358 			    ("dandling P2_NOTRACE_EXEC"));
359 			if (td->td_proc != p)
360 				return (EPERM);
361 			p->p_flag2 &= ~P2_NOTRACE_EXEC;
362 		} else {
363 			p->p_flag2 |= P2_NOTRACE;
364 		}
365 		break;
366 	default:
367 		return (EINVAL);
368 	}
369 	return (0);
370 }
371 
372 static int
373 trace_status(struct thread *td, struct proc *p, int *data)
374 {
375 
376 	if ((p->p_flag2 & P2_NOTRACE) != 0) {
377 		KASSERT((p->p_flag & P_TRACED) == 0,
378 		    ("%d traced but tracing disabled", p->p_pid));
379 		*data = -1;
380 	} else if ((p->p_flag & P_TRACED) != 0) {
381 		*data = p->p_pptr->p_pid;
382 	} else {
383 		*data = 0;
384 	}
385 	return (0);
386 }
387 
388 static int
389 trapcap_ctl(struct thread *td, struct proc *p, int state)
390 {
391 
392 	PROC_LOCK_ASSERT(p, MA_OWNED);
393 
394 	switch (state) {
395 	case PROC_TRAPCAP_CTL_ENABLE:
396 		p->p_flag2 |= P2_TRAPCAP;
397 		break;
398 	case PROC_TRAPCAP_CTL_DISABLE:
399 		p->p_flag2 &= ~P2_TRAPCAP;
400 		break;
401 	default:
402 		return (EINVAL);
403 	}
404 	return (0);
405 }
406 
407 static int
408 trapcap_status(struct thread *td, struct proc *p, int *data)
409 {
410 
411 	*data = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
412 	    PROC_TRAPCAP_CTL_DISABLE;
413 	return (0);
414 }
415 
416 #ifndef _SYS_SYSPROTO_H_
417 struct procctl_args {
418 	idtype_t idtype;
419 	id_t	id;
420 	int	com;
421 	void	*data;
422 };
423 #endif
424 /* ARGSUSED */
425 int
426 sys_procctl(struct thread *td, struct procctl_args *uap)
427 {
428 	void *data;
429 	union {
430 		struct procctl_reaper_status rs;
431 		struct procctl_reaper_pids rp;
432 		struct procctl_reaper_kill rk;
433 	} x;
434 	int error, error1, flags;
435 
436 	switch (uap->com) {
437 	case PROC_SPROTECT:
438 	case PROC_TRACE_CTL:
439 	case PROC_TRAPCAP_CTL:
440 		error = copyin(uap->data, &flags, sizeof(flags));
441 		if (error != 0)
442 			return (error);
443 		data = &flags;
444 		break;
445 	case PROC_REAP_ACQUIRE:
446 	case PROC_REAP_RELEASE:
447 		if (uap->data != NULL)
448 			return (EINVAL);
449 		data = NULL;
450 		break;
451 	case PROC_REAP_STATUS:
452 		data = &x.rs;
453 		break;
454 	case PROC_REAP_GETPIDS:
455 		error = copyin(uap->data, &x.rp, sizeof(x.rp));
456 		if (error != 0)
457 			return (error);
458 		data = &x.rp;
459 		break;
460 	case PROC_REAP_KILL:
461 		error = copyin(uap->data, &x.rk, sizeof(x.rk));
462 		if (error != 0)
463 			return (error);
464 		data = &x.rk;
465 		break;
466 	case PROC_TRACE_STATUS:
467 	case PROC_TRAPCAP_STATUS:
468 		data = &flags;
469 		break;
470 	default:
471 		return (EINVAL);
472 	}
473 	error = kern_procctl(td, uap->idtype, uap->id, uap->com, data);
474 	switch (uap->com) {
475 	case PROC_REAP_STATUS:
476 		if (error == 0)
477 			error = copyout(&x.rs, uap->data, sizeof(x.rs));
478 		break;
479 	case PROC_REAP_KILL:
480 		error1 = copyout(&x.rk, uap->data, sizeof(x.rk));
481 		if (error == 0)
482 			error = error1;
483 		break;
484 	case PROC_TRACE_STATUS:
485 	case PROC_TRAPCAP_STATUS:
486 		if (error == 0)
487 			error = copyout(&flags, uap->data, sizeof(flags));
488 		break;
489 	}
490 	return (error);
491 }
492 
493 static int
494 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
495 {
496 
497 	PROC_LOCK_ASSERT(p, MA_OWNED);
498 	switch (com) {
499 	case PROC_SPROTECT:
500 		return (protect_set(td, p, *(int *)data));
501 	case PROC_REAP_ACQUIRE:
502 		return (reap_acquire(td, p));
503 	case PROC_REAP_RELEASE:
504 		return (reap_release(td, p));
505 	case PROC_REAP_STATUS:
506 		return (reap_status(td, p, data));
507 	case PROC_REAP_GETPIDS:
508 		return (reap_getpids(td, p, data));
509 	case PROC_REAP_KILL:
510 		return (reap_kill(td, p, data));
511 	case PROC_TRACE_CTL:
512 		return (trace_ctl(td, p, *(int *)data));
513 	case PROC_TRACE_STATUS:
514 		return (trace_status(td, p, data));
515 	case PROC_TRAPCAP_CTL:
516 		return (trapcap_ctl(td, p, *(int *)data));
517 	case PROC_TRAPCAP_STATUS:
518 		return (trapcap_status(td, p, data));
519 	default:
520 		return (EINVAL);
521 	}
522 }
523 
524 int
525 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
526 {
527 	struct pgrp *pg;
528 	struct proc *p;
529 	int error, first_error, ok;
530 	bool tree_locked;
531 
532 	switch (com) {
533 	case PROC_REAP_ACQUIRE:
534 	case PROC_REAP_RELEASE:
535 	case PROC_REAP_STATUS:
536 	case PROC_REAP_GETPIDS:
537 	case PROC_REAP_KILL:
538 	case PROC_TRACE_STATUS:
539 	case PROC_TRAPCAP_STATUS:
540 		if (idtype != P_PID)
541 			return (EINVAL);
542 	}
543 
544 	switch (com) {
545 	case PROC_SPROTECT:
546 	case PROC_REAP_STATUS:
547 	case PROC_REAP_GETPIDS:
548 	case PROC_REAP_KILL:
549 	case PROC_TRACE_CTL:
550 	case PROC_TRAPCAP_CTL:
551 		sx_slock(&proctree_lock);
552 		tree_locked = true;
553 		break;
554 	case PROC_REAP_ACQUIRE:
555 	case PROC_REAP_RELEASE:
556 		sx_xlock(&proctree_lock);
557 		tree_locked = true;
558 		break;
559 	case PROC_TRACE_STATUS:
560 	case PROC_TRAPCAP_STATUS:
561 		tree_locked = false;
562 		break;
563 	default:
564 		return (EINVAL);
565 	}
566 
567 	switch (idtype) {
568 	case P_PID:
569 		p = pfind(id);
570 		if (p == NULL) {
571 			error = ESRCH;
572 			break;
573 		}
574 		error = p_cansee(td, p);
575 		if (error == 0)
576 			error = kern_procctl_single(td, p, com, data);
577 		PROC_UNLOCK(p);
578 		break;
579 	case P_PGID:
580 		/*
581 		 * Attempt to apply the operation to all members of the
582 		 * group.  Ignore processes in the group that can't be
583 		 * seen.  Ignore errors so long as at least one process is
584 		 * able to complete the request successfully.
585 		 */
586 		pg = pgfind(id);
587 		if (pg == NULL) {
588 			error = ESRCH;
589 			break;
590 		}
591 		PGRP_UNLOCK(pg);
592 		ok = 0;
593 		first_error = 0;
594 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
595 			PROC_LOCK(p);
596 			if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
597 				PROC_UNLOCK(p);
598 				continue;
599 			}
600 			error = kern_procctl_single(td, p, com, data);
601 			PROC_UNLOCK(p);
602 			if (error == 0)
603 				ok = 1;
604 			else if (first_error == 0)
605 				first_error = error;
606 		}
607 		if (ok)
608 			error = 0;
609 		else if (first_error != 0)
610 			error = first_error;
611 		else
612 			/*
613 			 * Was not able to see any processes in the
614 			 * process group.
615 			 */
616 			error = ESRCH;
617 		break;
618 	default:
619 		error = EINVAL;
620 		break;
621 	}
622 	if (tree_locked)
623 		sx_unlock(&proctree_lock);
624 	return (error);
625 }
626