1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 #include "opt_mac.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/mac.h> 48 #include <sys/malloc.h> 49 #include <sys/namei.h> 50 #include <sys/proc.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 #include <sys/ktrace.h> 54 #include <sys/sx.h> 55 #include <sys/sysctl.h> 56 #include <sys/syslog.h> 57 #include <sys/sysproto.h> 58 59 /* 60 * The ktrace facility allows the tracing of certain key events in user space 61 * processes, such as system calls, signal delivery, context switches, and 62 * user generated events using utrace(2). It works by streaming event 63 * records and data to a vnode associated with the process using the 64 * ktrace(2) system call. In general, records can be written directly from 65 * the context that generates the event. One important exception to this is 66 * during a context switch, where sleeping is not permitted. To handle this 67 * case, trace events are generated using in-kernel ktr_request records, and 68 * then delivered to disk at a convenient moment -- either immediately, the 69 * next traceable event, at system call return, or at process exit. 70 * 71 * When dealing with multiple threads or processes writing to the same event 72 * log, ordering guarantees are weak: specifically, if an event has multiple 73 * records (i.e., system call enter and return), they may be interlaced with 74 * records from another event. Process and thread ID information is provided 75 * in the record, and user applications can de-interlace events if required. 76 */ 77 78 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 79 80 #ifdef KTRACE 81 82 #ifndef KTRACE_REQUEST_POOL 83 #define KTRACE_REQUEST_POOL 100 84 #endif 85 86 struct ktr_request { 87 struct ktr_header ktr_header; 88 void *ktr_buffer; 89 union { 90 struct ktr_syscall ktr_syscall; 91 struct ktr_sysret ktr_sysret; 92 struct ktr_genio ktr_genio; 93 struct ktr_psig ktr_psig; 94 struct ktr_csw ktr_csw; 95 } ktr_data; 96 STAILQ_ENTRY(ktr_request) ktr_list; 97 }; 98 99 static int data_lengths[] = { 100 0, /* none */ 101 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 102 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 103 0, /* KTR_NAMEI */ 104 sizeof(struct ktr_genio), /* KTR_GENIO */ 105 sizeof(struct ktr_psig), /* KTR_PSIG */ 106 sizeof(struct ktr_csw), /* KTR_CSW */ 107 0 /* KTR_USER */ 108 }; 109 110 static STAILQ_HEAD(, ktr_request) ktr_free; 111 112 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 113 114 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 115 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 116 117 static u_int ktr_geniosize = PAGE_SIZE; 118 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 119 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 120 0, "Maximum size of genio event payload"); 121 122 static int print_message = 1; 123 struct mtx ktrace_mtx; 124 static struct cv ktrace_cv; 125 static struct sx ktrace_sx; 126 127 static void ktrace_init(void *dummy); 128 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 129 static u_int ktrace_resize_pool(u_int newsize); 130 static struct ktr_request *ktr_getrequest(int type); 131 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 132 static void ktr_freerequest(struct ktr_request *req); 133 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 134 static int ktrcanset(struct thread *,struct proc *); 135 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 136 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 137 138 /* 139 * ktrace itself generates events, such as context switches, which we do not 140 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 141 * whether or not it is in a region where tracing of events should be 142 * suppressed. 143 */ 144 static void 145 ktrace_enter(struct thread *td) 146 { 147 148 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 149 td->td_pflags |= TDP_INKTRACE; 150 } 151 152 static void 153 ktrace_exit(struct thread *td) 154 { 155 156 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 157 td->td_pflags &= ~TDP_INKTRACE; 158 } 159 160 static void 161 ktrace_assert(struct thread *td) 162 { 163 164 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 165 } 166 167 static void 168 ktrace_init(void *dummy) 169 { 170 struct ktr_request *req; 171 int i; 172 173 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 174 sx_init(&ktrace_sx, "ktrace_sx"); 175 cv_init(&ktrace_cv, "ktrace"); 176 STAILQ_INIT(&ktr_free); 177 for (i = 0; i < ktr_requestpool; i++) { 178 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 179 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 180 } 181 } 182 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 183 184 static int 185 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 186 { 187 struct thread *td; 188 u_int newsize, oldsize, wantsize; 189 int error; 190 191 /* Handle easy read-only case first to avoid warnings from GCC. */ 192 if (!req->newptr) { 193 mtx_lock(&ktrace_mtx); 194 oldsize = ktr_requestpool; 195 mtx_unlock(&ktrace_mtx); 196 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 197 } 198 199 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 200 if (error) 201 return (error); 202 td = curthread; 203 ktrace_enter(td); 204 mtx_lock(&ktrace_mtx); 205 oldsize = ktr_requestpool; 206 newsize = ktrace_resize_pool(wantsize); 207 mtx_unlock(&ktrace_mtx); 208 ktrace_exit(td); 209 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 210 if (error) 211 return (error); 212 if (wantsize > oldsize && newsize < wantsize) 213 return (ENOSPC); 214 return (0); 215 } 216 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 217 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 218 219 static u_int 220 ktrace_resize_pool(u_int newsize) 221 { 222 struct ktr_request *req; 223 int bound; 224 225 mtx_assert(&ktrace_mtx, MA_OWNED); 226 print_message = 1; 227 bound = newsize - ktr_requestpool; 228 if (bound == 0) 229 return (ktr_requestpool); 230 if (bound < 0) 231 /* Shrink pool down to newsize if possible. */ 232 while (bound++ < 0) { 233 req = STAILQ_FIRST(&ktr_free); 234 if (req == NULL) 235 return (ktr_requestpool); 236 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 237 ktr_requestpool--; 238 mtx_unlock(&ktrace_mtx); 239 free(req, M_KTRACE); 240 mtx_lock(&ktrace_mtx); 241 } 242 else 243 /* Grow pool up to newsize. */ 244 while (bound-- > 0) { 245 mtx_unlock(&ktrace_mtx); 246 req = malloc(sizeof(struct ktr_request), M_KTRACE, 247 M_WAITOK); 248 mtx_lock(&ktrace_mtx); 249 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 250 ktr_requestpool++; 251 } 252 return (ktr_requestpool); 253 } 254 255 static struct ktr_request * 256 ktr_getrequest(int type) 257 { 258 struct ktr_request *req; 259 struct thread *td = curthread; 260 struct proc *p = td->td_proc; 261 int pm; 262 263 ktrace_enter(td); /* XXX: In caller instead? */ 264 mtx_lock(&ktrace_mtx); 265 if (!KTRCHECK(td, type)) { 266 mtx_unlock(&ktrace_mtx); 267 ktrace_exit(td); 268 return (NULL); 269 } 270 req = STAILQ_FIRST(&ktr_free); 271 if (req != NULL) { 272 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 273 req->ktr_header.ktr_type = type; 274 if (p->p_traceflag & KTRFAC_DROP) { 275 req->ktr_header.ktr_type |= KTR_DROP; 276 p->p_traceflag &= ~KTRFAC_DROP; 277 } 278 mtx_unlock(&ktrace_mtx); 279 microtime(&req->ktr_header.ktr_time); 280 req->ktr_header.ktr_pid = p->p_pid; 281 req->ktr_header.ktr_tid = td->td_tid; 282 bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1); 283 req->ktr_buffer = NULL; 284 req->ktr_header.ktr_len = 0; 285 } else { 286 p->p_traceflag |= KTRFAC_DROP; 287 pm = print_message; 288 print_message = 0; 289 mtx_unlock(&ktrace_mtx); 290 if (pm) 291 printf("Out of ktrace request objects.\n"); 292 ktrace_exit(td); 293 } 294 return (req); 295 } 296 297 /* 298 * Some trace generation environments don't permit direct access to VFS, 299 * such as during a context switch where sleeping is not allowed. Under these 300 * circumstances, queue a request to the thread to be written asynchronously 301 * later. 302 */ 303 static void 304 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 305 { 306 307 mtx_lock(&ktrace_mtx); 308 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 309 mtx_unlock(&ktrace_mtx); 310 ktrace_exit(td); 311 } 312 313 /* 314 * Drain any pending ktrace records from the per-thread queue to disk. This 315 * is used both internally before committing other records, and also on 316 * system call return. We drain all the ones we can find at the time when 317 * drain is requested, but don't keep draining after that as those events 318 * may me approximately "after" the current event. 319 */ 320 static void 321 ktr_drain(struct thread *td) 322 { 323 struct ktr_request *queued_req; 324 STAILQ_HEAD(, ktr_request) local_queue; 325 326 ktrace_assert(td); 327 sx_assert(&ktrace_sx, SX_XLOCKED); 328 329 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 330 331 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 332 mtx_lock(&ktrace_mtx); 333 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 334 mtx_unlock(&ktrace_mtx); 335 336 while ((queued_req = STAILQ_FIRST(&local_queue))) { 337 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 338 ktr_writerequest(td, queued_req); 339 ktr_freerequest(queued_req); 340 } 341 } 342 } 343 344 /* 345 * Submit a trace record for immediate commit to disk -- to be used only 346 * where entering VFS is OK. First drain any pending records that may have 347 * been cached in the thread. 348 */ 349 static void 350 ktr_submitrequest(struct thread *td, struct ktr_request *req) 351 { 352 353 ktrace_assert(td); 354 355 sx_xlock(&ktrace_sx); 356 ktr_drain(td); 357 ktr_writerequest(td, req); 358 ktr_freerequest(req); 359 sx_xunlock(&ktrace_sx); 360 361 ktrace_exit(td); 362 } 363 364 static void 365 ktr_freerequest(struct ktr_request *req) 366 { 367 368 if (req->ktr_buffer != NULL) 369 free(req->ktr_buffer, M_KTRACE); 370 mtx_lock(&ktrace_mtx); 371 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 372 mtx_unlock(&ktrace_mtx); 373 } 374 375 /* 376 * MPSAFE 377 */ 378 void 379 ktrsyscall(code, narg, args) 380 int code, narg; 381 register_t args[]; 382 { 383 struct ktr_request *req; 384 struct ktr_syscall *ktp; 385 size_t buflen; 386 char *buf = NULL; 387 388 buflen = sizeof(register_t) * narg; 389 if (buflen > 0) { 390 buf = malloc(buflen, M_KTRACE, M_WAITOK); 391 bcopy(args, buf, buflen); 392 } 393 req = ktr_getrequest(KTR_SYSCALL); 394 if (req == NULL) { 395 if (buf != NULL) 396 free(buf, M_KTRACE); 397 return; 398 } 399 ktp = &req->ktr_data.ktr_syscall; 400 ktp->ktr_code = code; 401 ktp->ktr_narg = narg; 402 if (buflen > 0) { 403 req->ktr_header.ktr_len = buflen; 404 req->ktr_buffer = buf; 405 } 406 ktr_submitrequest(curthread, req); 407 } 408 409 /* 410 * MPSAFE 411 */ 412 void 413 ktrsysret(code, error, retval) 414 int code, error; 415 register_t retval; 416 { 417 struct ktr_request *req; 418 struct ktr_sysret *ktp; 419 420 req = ktr_getrequest(KTR_SYSRET); 421 if (req == NULL) 422 return; 423 ktp = &req->ktr_data.ktr_sysret; 424 ktp->ktr_code = code; 425 ktp->ktr_error = error; 426 ktp->ktr_retval = retval; /* what about val2 ? */ 427 ktr_submitrequest(curthread, req); 428 } 429 430 /* 431 * When a process exits, drain per-process asynchronous trace records. 432 */ 433 void 434 ktrprocexit(struct thread *td) 435 { 436 437 ktrace_enter(td); 438 sx_xlock(&ktrace_sx); 439 ktr_drain(td); 440 sx_xunlock(&ktrace_sx); 441 ktrace_exit(td); 442 } 443 444 /* 445 * When a thread returns, drain any asynchronous records generated by the 446 * system call. 447 */ 448 void 449 ktruserret(struct thread *td) 450 { 451 452 ktrace_enter(td); 453 sx_xlock(&ktrace_sx); 454 ktr_drain(td); 455 sx_xunlock(&ktrace_sx); 456 ktrace_exit(td); 457 } 458 459 void 460 ktrnamei(path) 461 char *path; 462 { 463 struct ktr_request *req; 464 int namelen; 465 char *buf = NULL; 466 467 namelen = strlen(path); 468 if (namelen > 0) { 469 buf = malloc(namelen, M_KTRACE, M_WAITOK); 470 bcopy(path, buf, namelen); 471 } 472 req = ktr_getrequest(KTR_NAMEI); 473 if (req == NULL) { 474 if (buf != NULL) 475 free(buf, M_KTRACE); 476 return; 477 } 478 if (namelen > 0) { 479 req->ktr_header.ktr_len = namelen; 480 req->ktr_buffer = buf; 481 } 482 ktr_submitrequest(curthread, req); 483 } 484 485 /* 486 * Since the uio may not stay valid, we can not hand off this request to 487 * the thread and need to process it synchronously. However, we wish to 488 * keep the relative order of records in a trace file correct, so we 489 * do put this request on the queue (if it isn't empty) and then block. 490 * The ktrace thread waks us back up when it is time for this event to 491 * be posted and blocks until we have completed writing out the event 492 * and woken it back up. 493 */ 494 void 495 ktrgenio(fd, rw, uio, error) 496 int fd; 497 enum uio_rw rw; 498 struct uio *uio; 499 int error; 500 { 501 struct ktr_request *req; 502 struct ktr_genio *ktg; 503 int datalen; 504 char *buf; 505 506 if (error) { 507 free(uio, M_IOV); 508 return; 509 } 510 uio->uio_offset = 0; 511 uio->uio_rw = UIO_WRITE; 512 datalen = imin(uio->uio_resid, ktr_geniosize); 513 buf = malloc(datalen, M_KTRACE, M_WAITOK); 514 error = uiomove(buf, datalen, uio); 515 free(uio, M_IOV); 516 if (error) { 517 free(buf, M_KTRACE); 518 return; 519 } 520 req = ktr_getrequest(KTR_GENIO); 521 if (req == NULL) { 522 free(buf, M_KTRACE); 523 return; 524 } 525 ktg = &req->ktr_data.ktr_genio; 526 ktg->ktr_fd = fd; 527 ktg->ktr_rw = rw; 528 req->ktr_header.ktr_len = datalen; 529 req->ktr_buffer = buf; 530 ktr_submitrequest(curthread, req); 531 } 532 533 void 534 ktrpsig(sig, action, mask, code) 535 int sig; 536 sig_t action; 537 sigset_t *mask; 538 int code; 539 { 540 struct ktr_request *req; 541 struct ktr_psig *kp; 542 543 req = ktr_getrequest(KTR_PSIG); 544 if (req == NULL) 545 return; 546 kp = &req->ktr_data.ktr_psig; 547 kp->signo = (char)sig; 548 kp->action = action; 549 kp->mask = *mask; 550 kp->code = code; 551 ktr_enqueuerequest(curthread, req); 552 } 553 554 void 555 ktrcsw(out, user) 556 int out, user; 557 { 558 struct ktr_request *req; 559 struct ktr_csw *kc; 560 561 req = ktr_getrequest(KTR_CSW); 562 if (req == NULL) 563 return; 564 kc = &req->ktr_data.ktr_csw; 565 kc->out = out; 566 kc->user = user; 567 ktr_enqueuerequest(curthread, req); 568 } 569 #endif /* KTRACE */ 570 571 /* Interface and common routines */ 572 573 /* 574 * ktrace system call 575 * 576 * MPSAFE 577 */ 578 #ifndef _SYS_SYSPROTO_H_ 579 struct ktrace_args { 580 char *fname; 581 int ops; 582 int facs; 583 int pid; 584 }; 585 #endif 586 /* ARGSUSED */ 587 int 588 ktrace(td, uap) 589 struct thread *td; 590 register struct ktrace_args *uap; 591 { 592 #ifdef KTRACE 593 register struct vnode *vp = NULL; 594 register struct proc *p; 595 struct pgrp *pg; 596 int facs = uap->facs & ~KTRFAC_ROOT; 597 int ops = KTROP(uap->ops); 598 int descend = uap->ops & KTRFLAG_DESCEND; 599 int nfound, ret = 0; 600 int flags, error = 0; 601 struct nameidata nd; 602 struct ucred *cred; 603 604 /* 605 * Need something to (un)trace. 606 */ 607 if (ops != KTROP_CLEARFILE && facs == 0) 608 return (EINVAL); 609 610 ktrace_enter(td); 611 if (ops != KTROP_CLEAR) { 612 /* 613 * an operation which requires a file argument. 614 */ 615 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 616 flags = FREAD | FWRITE | O_NOFOLLOW; 617 mtx_lock(&Giant); 618 error = vn_open(&nd, &flags, 0, -1); 619 if (error) { 620 mtx_unlock(&Giant); 621 ktrace_exit(td); 622 return (error); 623 } 624 NDFREE(&nd, NDF_ONLY_PNBUF); 625 vp = nd.ni_vp; 626 VOP_UNLOCK(vp, 0, td); 627 if (vp->v_type != VREG) { 628 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 629 mtx_unlock(&Giant); 630 ktrace_exit(td); 631 return (EACCES); 632 } 633 mtx_unlock(&Giant); 634 } 635 /* 636 * Clear all uses of the tracefile. 637 */ 638 if (ops == KTROP_CLEARFILE) { 639 sx_slock(&allproc_lock); 640 LIST_FOREACH(p, &allproc, p_list) { 641 PROC_LOCK(p); 642 if (p->p_tracevp == vp) { 643 if (ktrcanset(td, p)) { 644 mtx_lock(&ktrace_mtx); 645 cred = p->p_tracecred; 646 p->p_tracecred = NULL; 647 p->p_tracevp = NULL; 648 p->p_traceflag = 0; 649 mtx_unlock(&ktrace_mtx); 650 PROC_UNLOCK(p); 651 mtx_lock(&Giant); 652 (void) vn_close(vp, FREAD|FWRITE, 653 cred, td); 654 mtx_unlock(&Giant); 655 crfree(cred); 656 } else { 657 PROC_UNLOCK(p); 658 error = EPERM; 659 } 660 } else 661 PROC_UNLOCK(p); 662 } 663 sx_sunlock(&allproc_lock); 664 goto done; 665 } 666 /* 667 * do it 668 */ 669 sx_slock(&proctree_lock); 670 if (uap->pid < 0) { 671 /* 672 * by process group 673 */ 674 pg = pgfind(-uap->pid); 675 if (pg == NULL) { 676 sx_sunlock(&proctree_lock); 677 error = ESRCH; 678 goto done; 679 } 680 /* 681 * ktrops() may call vrele(). Lock pg_members 682 * by the proctree_lock rather than pg_mtx. 683 */ 684 PGRP_UNLOCK(pg); 685 nfound = 0; 686 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 687 PROC_LOCK(p); 688 if (p_cansee(td, p) != 0) { 689 PROC_UNLOCK(p); 690 continue; 691 } 692 PROC_UNLOCK(p); 693 nfound++; 694 if (descend) 695 ret |= ktrsetchildren(td, p, ops, facs, vp); 696 else 697 ret |= ktrops(td, p, ops, facs, vp); 698 } 699 if (nfound == 0) { 700 sx_sunlock(&proctree_lock); 701 error = ESRCH; 702 goto done; 703 } 704 } else { 705 /* 706 * by pid 707 */ 708 p = pfind(uap->pid); 709 if (p == NULL) { 710 sx_sunlock(&proctree_lock); 711 error = ESRCH; 712 goto done; 713 } 714 error = p_cansee(td, p); 715 /* 716 * The slock of the proctree lock will keep this process 717 * from going away, so unlocking the proc here is ok. 718 */ 719 PROC_UNLOCK(p); 720 if (error) { 721 sx_sunlock(&proctree_lock); 722 goto done; 723 } 724 if (descend) 725 ret |= ktrsetchildren(td, p, ops, facs, vp); 726 else 727 ret |= ktrops(td, p, ops, facs, vp); 728 } 729 sx_sunlock(&proctree_lock); 730 if (!ret) 731 error = EPERM; 732 done: 733 if (vp != NULL) { 734 mtx_lock(&Giant); 735 (void) vn_close(vp, FWRITE, td->td_ucred, td); 736 mtx_unlock(&Giant); 737 } 738 ktrace_exit(td); 739 return (error); 740 #else /* !KTRACE */ 741 return (ENOSYS); 742 #endif /* KTRACE */ 743 } 744 745 /* 746 * utrace system call 747 * 748 * MPSAFE 749 */ 750 /* ARGSUSED */ 751 int 752 utrace(td, uap) 753 struct thread *td; 754 register struct utrace_args *uap; 755 { 756 757 #ifdef KTRACE 758 struct ktr_request *req; 759 void *cp; 760 int error; 761 762 if (!KTRPOINT(td, KTR_USER)) 763 return (0); 764 if (uap->len > KTR_USER_MAXLEN) 765 return (EINVAL); 766 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 767 error = copyin(uap->addr, cp, uap->len); 768 if (error) { 769 free(cp, M_KTRACE); 770 return (error); 771 } 772 req = ktr_getrequest(KTR_USER); 773 if (req == NULL) { 774 free(cp, M_KTRACE); 775 return (ENOMEM); 776 } 777 req->ktr_buffer = cp; 778 req->ktr_header.ktr_len = uap->len; 779 ktr_submitrequest(td, req); 780 return (0); 781 #else /* !KTRACE */ 782 return (ENOSYS); 783 #endif /* KTRACE */ 784 } 785 786 #ifdef KTRACE 787 static int 788 ktrops(td, p, ops, facs, vp) 789 struct thread *td; 790 struct proc *p; 791 int ops, facs; 792 struct vnode *vp; 793 { 794 struct vnode *tracevp = NULL; 795 struct ucred *tracecred = NULL; 796 797 PROC_LOCK(p); 798 if (!ktrcanset(td, p)) { 799 PROC_UNLOCK(p); 800 return (0); 801 } 802 mtx_lock(&ktrace_mtx); 803 if (ops == KTROP_SET) { 804 if (p->p_tracevp != vp) { 805 /* 806 * if trace file already in use, relinquish below 807 */ 808 tracevp = p->p_tracevp; 809 VREF(vp); 810 p->p_tracevp = vp; 811 } 812 if (p->p_tracecred != td->td_ucred) { 813 tracecred = p->p_tracecred; 814 p->p_tracecred = crhold(td->td_ucred); 815 } 816 p->p_traceflag |= facs; 817 if (td->td_ucred->cr_uid == 0) 818 p->p_traceflag |= KTRFAC_ROOT; 819 } else { 820 /* KTROP_CLEAR */ 821 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 822 /* no more tracing */ 823 p->p_traceflag = 0; 824 tracevp = p->p_tracevp; 825 p->p_tracevp = NULL; 826 tracecred = p->p_tracecred; 827 p->p_tracecred = NULL; 828 } 829 } 830 mtx_unlock(&ktrace_mtx); 831 PROC_UNLOCK(p); 832 if (tracevp != NULL) { 833 mtx_lock(&Giant); 834 vrele(tracevp); 835 mtx_unlock(&Giant); 836 } 837 if (tracecred != NULL) 838 crfree(tracecred); 839 840 return (1); 841 } 842 843 static int 844 ktrsetchildren(td, top, ops, facs, vp) 845 struct thread *td; 846 struct proc *top; 847 int ops, facs; 848 struct vnode *vp; 849 { 850 register struct proc *p; 851 register int ret = 0; 852 853 p = top; 854 sx_assert(&proctree_lock, SX_LOCKED); 855 for (;;) { 856 ret |= ktrops(td, p, ops, facs, vp); 857 /* 858 * If this process has children, descend to them next, 859 * otherwise do any siblings, and if done with this level, 860 * follow back up the tree (but not past top). 861 */ 862 if (!LIST_EMPTY(&p->p_children)) 863 p = LIST_FIRST(&p->p_children); 864 else for (;;) { 865 if (p == top) 866 return (ret); 867 if (LIST_NEXT(p, p_sibling)) { 868 p = LIST_NEXT(p, p_sibling); 869 break; 870 } 871 p = p->p_pptr; 872 } 873 } 874 /*NOTREACHED*/ 875 } 876 877 static void 878 ktr_writerequest(struct thread *td, struct ktr_request *req) 879 { 880 struct ktr_header *kth; 881 struct vnode *vp; 882 struct proc *p; 883 struct ucred *cred; 884 struct uio auio; 885 struct iovec aiov[3]; 886 struct mount *mp; 887 int datalen, buflen, vrele_count; 888 int error; 889 890 /* 891 * We hold the vnode and credential for use in I/O in case ktrace is 892 * disabled on the process as we write out the request. 893 * 894 * XXXRW: This is not ideal: we could end up performing a write after 895 * the vnode has been closed. 896 */ 897 mtx_lock(&ktrace_mtx); 898 vp = td->td_proc->p_tracevp; 899 if (vp != NULL) 900 VREF(vp); 901 cred = td->td_proc->p_tracecred; 902 if (cred != NULL) 903 crhold(cred); 904 mtx_unlock(&ktrace_mtx); 905 906 /* 907 * If vp is NULL, the vp has been cleared out from under this 908 * request, so just drop it. Make sure the credential and vnode are 909 * in sync: we should have both or neither. 910 */ 911 if (vp == NULL) { 912 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 913 return; 914 } 915 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 916 917 kth = &req->ktr_header; 918 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 919 buflen = kth->ktr_len; 920 auio.uio_iov = &aiov[0]; 921 auio.uio_offset = 0; 922 auio.uio_segflg = UIO_SYSSPACE; 923 auio.uio_rw = UIO_WRITE; 924 aiov[0].iov_base = (caddr_t)kth; 925 aiov[0].iov_len = sizeof(struct ktr_header); 926 auio.uio_resid = sizeof(struct ktr_header); 927 auio.uio_iovcnt = 1; 928 auio.uio_td = td; 929 if (datalen != 0) { 930 aiov[1].iov_base = (caddr_t)&req->ktr_data; 931 aiov[1].iov_len = datalen; 932 auio.uio_resid += datalen; 933 auio.uio_iovcnt++; 934 kth->ktr_len += datalen; 935 } 936 if (buflen != 0) { 937 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 938 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 939 aiov[auio.uio_iovcnt].iov_len = buflen; 940 auio.uio_resid += buflen; 941 auio.uio_iovcnt++; 942 } 943 944 mtx_lock(&Giant); 945 vn_start_write(vp, &mp, V_WAIT); 946 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 947 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE); 948 #ifdef MAC 949 error = mac_check_vnode_write(cred, NOCRED, vp); 950 if (error == 0) 951 #endif 952 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 953 VOP_UNLOCK(vp, 0, td); 954 vn_finished_write(mp); 955 mtx_unlock(&Giant); 956 if (!error) 957 return; 958 /* 959 * If error encountered, give up tracing on this vnode. We defer 960 * all the vrele()'s on the vnode until after we are finished walking 961 * the various lists to avoid needlessly holding locks. 962 */ 963 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 964 error); 965 vrele_count = 0; 966 /* 967 * First, clear this vnode from being used by any processes in the 968 * system. 969 * XXX - If one process gets an EPERM writing to the vnode, should 970 * we really do this? Other processes might have suitable 971 * credentials for the operation. 972 */ 973 cred = NULL; 974 sx_slock(&allproc_lock); 975 LIST_FOREACH(p, &allproc, p_list) { 976 PROC_LOCK(p); 977 if (p->p_tracevp == vp) { 978 mtx_lock(&ktrace_mtx); 979 p->p_tracevp = NULL; 980 p->p_traceflag = 0; 981 cred = p->p_tracecred; 982 p->p_tracecred = NULL; 983 mtx_unlock(&ktrace_mtx); 984 vrele_count++; 985 } 986 PROC_UNLOCK(p); 987 if (cred != NULL) { 988 crfree(cred); 989 cred = NULL; 990 } 991 } 992 sx_sunlock(&allproc_lock); 993 994 /* 995 * We can't clear any pending requests in threads that have cached 996 * them but not yet committed them, as those are per-thread. The 997 * thread will have to clear it itself on system call return. 998 */ 999 mtx_lock(&Giant); 1000 while (vrele_count-- > 0) 1001 vrele(vp); 1002 mtx_unlock(&Giant); 1003 } 1004 1005 /* 1006 * Return true if caller has permission to set the ktracing state 1007 * of target. Essentially, the target can't possess any 1008 * more permissions than the caller. KTRFAC_ROOT signifies that 1009 * root previously set the tracing status on the target process, and 1010 * so, only root may further change it. 1011 */ 1012 static int 1013 ktrcanset(td, targetp) 1014 struct thread *td; 1015 struct proc *targetp; 1016 { 1017 1018 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1019 if (targetp->p_traceflag & KTRFAC_ROOT && 1020 suser_cred(td->td_ucred, SUSER_ALLOWJAIL)) 1021 return (0); 1022 1023 if (p_candebug(td, targetp) != 0) 1024 return (0); 1025 1026 return (1); 1027 } 1028 1029 #endif /* KTRACE */ 1030