1 /*- 2 * Copyright (c) 2009 Robert N. M. Watson 3 * All rights reserved. 4 * 5 * This software was developed at the University of Cambridge Computer 6 * Laboratory with support from a grant from Google, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /*- 31 * FreeBSD process descriptor facility. 32 * 33 * Some processes are represented by a file descriptor, which will be used in 34 * preference to signaling and pids for the purposes of process management, 35 * and is, in effect, a form of capability. When a process descriptor is 36 * used with a process, it ceases to be visible to certain traditional UNIX 37 * process facilities, such as waitpid(2). 38 * 39 * Some semantics: 40 * 41 * - At most one process descriptor will exist for any process, although 42 * references to that descriptor may be held from many processes (or even 43 * be in flight between processes over a local domain socket). 44 * - Last close on the process descriptor will terminate the process using 45 * SIGKILL and reparent it to init so that there's a process to reap it 46 * when it's done exiting. 47 * - If the process exits before the descriptor is closed, it will not 48 * generate SIGCHLD on termination, or be picked up by waitpid(). 49 * - The pdkill(2) system call may be used to deliver a signal to the process 50 * using its process descriptor. 51 * - The pdwait4(2) system call may be used to block (or not) on a process 52 * descriptor to collect termination information. 53 * 54 * Open questions: 55 * 56 * - How to handle ptrace(2)? 57 * - Will we want to add a pidtoprocdesc(2) system call to allow process 58 * descriptors to be created for processes without pdfork(2)? 59 */ 60 61 #include <sys/cdefs.h> 62 __FBSDID("$FreeBSD$"); 63 64 #include <sys/param.h> 65 #include <sys/capsicum.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/filedesc.h> 69 #include <sys/kernel.h> 70 #include <sys/lock.h> 71 #include <sys/mutex.h> 72 #include <sys/poll.h> 73 #include <sys/proc.h> 74 #include <sys/procdesc.h> 75 #include <sys/resourcevar.h> 76 #include <sys/stat.h> 77 #include <sys/sysproto.h> 78 #include <sys/sysctl.h> 79 #include <sys/systm.h> 80 #include <sys/ucred.h> 81 82 #include <security/audit/audit.h> 83 84 #include <vm/uma.h> 85 86 FEATURE(process_descriptors, "Process Descriptors"); 87 88 static uma_zone_t procdesc_zone; 89 90 static fo_poll_t procdesc_poll; 91 static fo_kqfilter_t procdesc_kqfilter; 92 static fo_stat_t procdesc_stat; 93 static fo_close_t procdesc_close; 94 95 static struct fileops procdesc_ops = { 96 .fo_read = invfo_rdwr, 97 .fo_write = invfo_rdwr, 98 .fo_truncate = invfo_truncate, 99 .fo_ioctl = invfo_ioctl, 100 .fo_poll = procdesc_poll, 101 .fo_kqfilter = procdesc_kqfilter, 102 .fo_stat = procdesc_stat, 103 .fo_close = procdesc_close, 104 .fo_chmod = invfo_chmod, 105 .fo_chown = invfo_chown, 106 .fo_sendfile = invfo_sendfile, 107 .fo_flags = DFLAG_PASSABLE, 108 }; 109 110 /* 111 * Initialize with VFS so that process descriptors are available along with 112 * other file descriptor types. As long as it runs before init(8) starts, 113 * there shouldn't be a problem. 114 */ 115 static void 116 procdesc_init(void *dummy __unused) 117 { 118 119 procdesc_zone = uma_zcreate("procdesc", sizeof(struct procdesc), 120 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 121 if (procdesc_zone == NULL) 122 panic("procdesc_init: procdesc_zone not initialized"); 123 } 124 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, procdesc_init, NULL); 125 126 /* 127 * Return a locked process given a process descriptor, or ESRCH if it has 128 * died. 129 */ 130 int 131 procdesc_find(struct thread *td, int fd, cap_rights_t *rightsp, 132 struct proc **p) 133 { 134 struct procdesc *pd; 135 struct file *fp; 136 int error; 137 138 error = fget(td, fd, rightsp, &fp); 139 if (error) 140 return (error); 141 if (fp->f_type != DTYPE_PROCDESC) { 142 error = EBADF; 143 goto out; 144 } 145 pd = fp->f_data; 146 sx_slock(&proctree_lock); 147 if (pd->pd_proc != NULL) { 148 *p = pd->pd_proc; 149 PROC_LOCK(*p); 150 } else 151 error = ESRCH; 152 sx_sunlock(&proctree_lock); 153 out: 154 fdrop(fp, td); 155 return (error); 156 } 157 158 /* 159 * Function to be used by procstat(1) sysctls when returning procdesc 160 * information. 161 */ 162 pid_t 163 procdesc_pid(struct file *fp_procdesc) 164 { 165 struct procdesc *pd; 166 167 KASSERT(fp_procdesc->f_type == DTYPE_PROCDESC, 168 ("procdesc_pid: !procdesc")); 169 170 pd = fp_procdesc->f_data; 171 return (pd->pd_pid); 172 } 173 174 /* 175 * Retrieve the PID associated with a process descriptor. 176 */ 177 int 178 kern_pdgetpid(struct thread *td, int fd, cap_rights_t *rightsp, pid_t *pidp) 179 { 180 struct file *fp; 181 int error; 182 183 error = fget(td, fd, rightsp, &fp); 184 if (error) 185 return (error); 186 if (fp->f_type != DTYPE_PROCDESC) { 187 error = EBADF; 188 goto out; 189 } 190 *pidp = procdesc_pid(fp); 191 out: 192 fdrop(fp, td); 193 return (error); 194 } 195 196 /* 197 * System call to return the pid of a process given its process descriptor. 198 */ 199 int 200 sys_pdgetpid(struct thread *td, struct pdgetpid_args *uap) 201 { 202 cap_rights_t rights; 203 pid_t pid; 204 int error; 205 206 AUDIT_ARG_FD(uap->fd); 207 error = kern_pdgetpid(td, uap->fd, 208 cap_rights_init(&rights, CAP_PDGETPID), &pid); 209 if (error == 0) 210 error = copyout(&pid, uap->pidp, sizeof(pid)); 211 return (error); 212 } 213 214 /* 215 * When a new process is forked by pdfork(), a file descriptor is allocated 216 * by the fork code first, then the process is forked, and then we get a 217 * chance to set up the process descriptor. Failure is not permitted at this 218 * point, so procdesc_new() must succeed. 219 */ 220 void 221 procdesc_new(struct proc *p, int flags) 222 { 223 struct procdesc *pd; 224 225 pd = uma_zalloc(procdesc_zone, M_WAITOK | M_ZERO); 226 pd->pd_proc = p; 227 pd->pd_pid = p->p_pid; 228 p->p_procdesc = pd; 229 pd->pd_flags = 0; 230 if (flags & PD_DAEMON) 231 pd->pd_flags |= PDF_DAEMON; 232 PROCDESC_LOCK_INIT(pd); 233 knlist_init_mtx(&pd->pd_selinfo.si_note, &pd->pd_lock); 234 235 /* 236 * Process descriptors start out with two references: one from their 237 * struct file, and the other from their struct proc. 238 */ 239 refcount_init(&pd->pd_refcount, 2); 240 } 241 242 /* 243 * Initialize a file with a process descriptor. 244 */ 245 void 246 procdesc_finit(struct procdesc *pdp, struct file *fp) 247 { 248 249 finit(fp, FREAD | FWRITE, DTYPE_PROCDESC, pdp, &procdesc_ops); 250 } 251 252 static void 253 procdesc_free(struct procdesc *pd) 254 { 255 256 /* 257 * When the last reference is released, we assert that the descriptor 258 * has been closed, but not that the process has exited, as we will 259 * detach the descriptor before the process dies if the descript is 260 * closed, as we can't wait synchronously. 261 */ 262 if (refcount_release(&pd->pd_refcount)) { 263 KASSERT(pd->pd_proc == NULL, 264 ("procdesc_free: pd_proc != NULL")); 265 KASSERT((pd->pd_flags & PDF_CLOSED), 266 ("procdesc_free: !PDF_CLOSED")); 267 268 knlist_destroy(&pd->pd_selinfo.si_note); 269 PROCDESC_LOCK_DESTROY(pd); 270 uma_zfree(procdesc_zone, pd); 271 } 272 } 273 274 /* 275 * procdesc_exit() - notify a process descriptor that its process is exiting. 276 * We use the proctree_lock to ensure that process exit either happens 277 * strictly before or strictly after a concurrent call to procdesc_close(). 278 */ 279 int 280 procdesc_exit(struct proc *p) 281 { 282 struct procdesc *pd; 283 284 sx_assert(&proctree_lock, SA_XLOCKED); 285 PROC_LOCK_ASSERT(p, MA_OWNED); 286 KASSERT(p->p_procdesc != NULL, ("procdesc_exit: p_procdesc NULL")); 287 288 pd = p->p_procdesc; 289 290 PROCDESC_LOCK(pd); 291 KASSERT((pd->pd_flags & PDF_CLOSED) == 0 || p->p_pptr == initproc, 292 ("procdesc_exit: closed && parent not init")); 293 294 pd->pd_flags |= PDF_EXITED; 295 pd->pd_xstat = p->p_xstat; 296 297 /* 298 * If the process descriptor has been closed, then we have nothing 299 * to do; return 1 so that init will get SIGCHLD and do the reaping. 300 * Clean up the procdesc now rather than letting it happen during 301 * that reap. 302 */ 303 if (pd->pd_flags & PDF_CLOSED) { 304 PROCDESC_UNLOCK(pd); 305 pd->pd_proc = NULL; 306 p->p_procdesc = NULL; 307 procdesc_free(pd); 308 return (1); 309 } 310 if (pd->pd_flags & PDF_SELECTED) { 311 pd->pd_flags &= ~PDF_SELECTED; 312 selwakeup(&pd->pd_selinfo); 313 } 314 KNOTE_LOCKED(&pd->pd_selinfo.si_note, NOTE_EXIT); 315 PROCDESC_UNLOCK(pd); 316 return (0); 317 } 318 319 /* 320 * When a process descriptor is reaped, perhaps as a result of close() or 321 * pdwait4(), release the process's reference on the process descriptor. 322 */ 323 void 324 procdesc_reap(struct proc *p) 325 { 326 struct procdesc *pd; 327 328 sx_assert(&proctree_lock, SA_XLOCKED); 329 KASSERT(p->p_procdesc != NULL, ("procdesc_reap: p_procdesc == NULL")); 330 331 pd = p->p_procdesc; 332 pd->pd_proc = NULL; 333 p->p_procdesc = NULL; 334 procdesc_free(pd); 335 } 336 337 /* 338 * procdesc_close() - last close on a process descriptor. If the process is 339 * still running, terminate with SIGKILL (unless PDF_DAEMON is set) and let 340 * init(8) clean up the mess; if not, we have to clean up the zombie ourselves. 341 */ 342 static int 343 procdesc_close(struct file *fp, struct thread *td) 344 { 345 struct procdesc *pd; 346 struct proc *p; 347 348 KASSERT(fp->f_type == DTYPE_PROCDESC, ("procdesc_close: !procdesc")); 349 350 pd = fp->f_data; 351 fp->f_ops = &badfileops; 352 fp->f_data = NULL; 353 354 sx_xlock(&proctree_lock); 355 PROCDESC_LOCK(pd); 356 pd->pd_flags |= PDF_CLOSED; 357 PROCDESC_UNLOCK(pd); 358 p = pd->pd_proc; 359 if (p == NULL) { 360 /* 361 * This is the case where process' exit status was already 362 * collected and procdesc_reap() was already called. 363 */ 364 sx_xunlock(&proctree_lock); 365 } else { 366 PROC_LOCK(p); 367 if (p->p_state == PRS_ZOMBIE) { 368 /* 369 * If the process is already dead and just awaiting 370 * reaping, do that now. This will release the 371 * process's reference to the process descriptor when it 372 * calls back into procdesc_reap(). 373 */ 374 PROC_SLOCK(p); 375 proc_reap(curthread, p, NULL, 0); 376 } else { 377 /* 378 * If the process is not yet dead, we need to kill it, 379 * but we can't wait around synchronously for it to go 380 * away, as that path leads to madness (and deadlocks). 381 * First, detach the process from its descriptor so that 382 * its exit status will be reported normally. 383 */ 384 pd->pd_proc = NULL; 385 p->p_procdesc = NULL; 386 procdesc_free(pd); 387 388 /* 389 * Next, reparent it to init(8) so that there's someone 390 * to pick up the pieces; finally, terminate with 391 * prejudice. 392 */ 393 p->p_sigparent = SIGCHLD; 394 proc_reparent(p, initproc); 395 if ((pd->pd_flags & PDF_DAEMON) == 0) 396 kern_psignal(p, SIGKILL); 397 PROC_UNLOCK(p); 398 sx_xunlock(&proctree_lock); 399 } 400 } 401 402 /* 403 * Release the file descriptor's reference on the process descriptor. 404 */ 405 procdesc_free(pd); 406 return (0); 407 } 408 409 static int 410 procdesc_poll(struct file *fp, int events, struct ucred *active_cred, 411 struct thread *td) 412 { 413 struct procdesc *pd; 414 int revents; 415 416 revents = 0; 417 pd = fp->f_data; 418 PROCDESC_LOCK(pd); 419 if (pd->pd_flags & PDF_EXITED) 420 revents |= POLLHUP; 421 if (revents == 0) { 422 selrecord(td, &pd->pd_selinfo); 423 pd->pd_flags |= PDF_SELECTED; 424 } 425 PROCDESC_UNLOCK(pd); 426 return (revents); 427 } 428 429 static void 430 procdesc_kqops_detach(struct knote *kn) 431 { 432 struct procdesc *pd; 433 434 pd = kn->kn_fp->f_data; 435 knlist_remove(&pd->pd_selinfo.si_note, kn, 0); 436 } 437 438 static int 439 procdesc_kqops_event(struct knote *kn, long hint) 440 { 441 struct procdesc *pd; 442 u_int event; 443 444 pd = kn->kn_fp->f_data; 445 if (hint == 0) { 446 /* 447 * Initial test after registration. Generate a NOTE_EXIT in 448 * case the process already terminated before registration. 449 */ 450 event = pd->pd_flags & PDF_EXITED ? NOTE_EXIT : 0; 451 } else { 452 /* Mask off extra data. */ 453 event = (u_int)hint & NOTE_PCTRLMASK; 454 } 455 456 /* If the user is interested in this event, record it. */ 457 if (kn->kn_sfflags & event) 458 kn->kn_fflags |= event; 459 460 /* Process is gone, so flag the event as finished. */ 461 if (event == NOTE_EXIT) { 462 kn->kn_flags |= EV_EOF | EV_ONESHOT; 463 if (kn->kn_fflags & NOTE_EXIT) 464 kn->kn_data = pd->pd_xstat; 465 if (kn->kn_fflags == 0) 466 kn->kn_flags |= EV_DROP; 467 return (1); 468 } 469 470 return (kn->kn_fflags != 0); 471 } 472 473 static struct filterops procdesc_kqops = { 474 .f_isfd = 1, 475 .f_detach = procdesc_kqops_detach, 476 .f_event = procdesc_kqops_event, 477 }; 478 479 static int 480 procdesc_kqfilter(struct file *fp, struct knote *kn) 481 { 482 struct procdesc *pd; 483 484 pd = fp->f_data; 485 switch (kn->kn_filter) { 486 case EVFILT_PROCDESC: 487 kn->kn_fop = &procdesc_kqops; 488 kn->kn_flags |= EV_CLEAR; 489 knlist_add(&pd->pd_selinfo.si_note, kn, 0); 490 return (0); 491 default: 492 return (EINVAL); 493 } 494 } 495 496 static int 497 procdesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 498 struct thread *td) 499 { 500 struct procdesc *pd; 501 struct timeval pstart; 502 503 /* 504 * XXXRW: Perhaps we should cache some more information from the 505 * process so that we can return it reliably here even after it has 506 * died. For example, caching its credential data. 507 */ 508 bzero(sb, sizeof(*sb)); 509 pd = fp->f_data; 510 sx_slock(&proctree_lock); 511 if (pd->pd_proc != NULL) { 512 PROC_LOCK(pd->pd_proc); 513 514 /* Set birth and [acm] times to process start time. */ 515 pstart = pd->pd_proc->p_stats->p_start; 516 timevaladd(&pstart, &boottime); 517 TIMEVAL_TO_TIMESPEC(&pstart, &sb->st_birthtim); 518 sb->st_atim = sb->st_birthtim; 519 sb->st_ctim = sb->st_birthtim; 520 sb->st_mtim = sb->st_birthtim; 521 if (pd->pd_proc->p_state != PRS_ZOMBIE) 522 sb->st_mode = S_IFREG | S_IRWXU; 523 else 524 sb->st_mode = S_IFREG; 525 sb->st_uid = pd->pd_proc->p_ucred->cr_ruid; 526 sb->st_gid = pd->pd_proc->p_ucred->cr_rgid; 527 PROC_UNLOCK(pd->pd_proc); 528 } else 529 sb->st_mode = S_IFREG; 530 sx_sunlock(&proctree_lock); 531 return (0); 532 } 533 534