1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 #include "opt_mac.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 #include <sys/ktrace.h> 55 #include <sys/sx.h> 56 #include <sys/sysctl.h> 57 #include <sys/syslog.h> 58 #include <sys/sysproto.h> 59 60 #include <security/mac/mac_framework.h> 61 62 /* 63 * The ktrace facility allows the tracing of certain key events in user space 64 * processes, such as system calls, signal delivery, context switches, and 65 * user generated events using utrace(2). It works by streaming event 66 * records and data to a vnode associated with the process using the 67 * ktrace(2) system call. In general, records can be written directly from 68 * the context that generates the event. One important exception to this is 69 * during a context switch, where sleeping is not permitted. To handle this 70 * case, trace events are generated using in-kernel ktr_request records, and 71 * then delivered to disk at a convenient moment -- either immediately, the 72 * next traceable event, at system call return, or at process exit. 73 * 74 * When dealing with multiple threads or processes writing to the same event 75 * log, ordering guarantees are weak: specifically, if an event has multiple 76 * records (i.e., system call enter and return), they may be interlaced with 77 * records from another event. Process and thread ID information is provided 78 * in the record, and user applications can de-interlace events if required. 79 */ 80 81 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 82 83 #ifdef KTRACE 84 85 #ifndef KTRACE_REQUEST_POOL 86 #define KTRACE_REQUEST_POOL 100 87 #endif 88 89 struct ktr_request { 90 struct ktr_header ktr_header; 91 void *ktr_buffer; 92 union { 93 struct ktr_syscall ktr_syscall; 94 struct ktr_sysret ktr_sysret; 95 struct ktr_genio ktr_genio; 96 struct ktr_psig ktr_psig; 97 struct ktr_csw ktr_csw; 98 } ktr_data; 99 STAILQ_ENTRY(ktr_request) ktr_list; 100 }; 101 102 static int data_lengths[] = { 103 0, /* none */ 104 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 105 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 106 0, /* KTR_NAMEI */ 107 sizeof(struct ktr_genio), /* KTR_GENIO */ 108 sizeof(struct ktr_psig), /* KTR_PSIG */ 109 sizeof(struct ktr_csw), /* KTR_CSW */ 110 0 /* KTR_USER */ 111 }; 112 113 static STAILQ_HEAD(, ktr_request) ktr_free; 114 115 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 116 117 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 118 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 119 120 static u_int ktr_geniosize = PAGE_SIZE; 121 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 122 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 123 0, "Maximum size of genio event payload"); 124 125 static int print_message = 1; 126 struct mtx ktrace_mtx; 127 static struct sx ktrace_sx; 128 129 static void ktrace_init(void *dummy); 130 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 131 static u_int ktrace_resize_pool(u_int newsize); 132 static struct ktr_request *ktr_getrequest(int type); 133 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 134 static void ktr_freerequest(struct ktr_request *req); 135 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 136 static int ktrcanset(struct thread *,struct proc *); 137 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 138 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 139 140 /* 141 * ktrace itself generates events, such as context switches, which we do not 142 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 143 * whether or not it is in a region where tracing of events should be 144 * suppressed. 145 */ 146 static void 147 ktrace_enter(struct thread *td) 148 { 149 150 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 151 td->td_pflags |= TDP_INKTRACE; 152 } 153 154 static void 155 ktrace_exit(struct thread *td) 156 { 157 158 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 159 td->td_pflags &= ~TDP_INKTRACE; 160 } 161 162 static void 163 ktrace_assert(struct thread *td) 164 { 165 166 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 167 } 168 169 static void 170 ktrace_init(void *dummy) 171 { 172 struct ktr_request *req; 173 int i; 174 175 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 176 sx_init(&ktrace_sx, "ktrace_sx"); 177 STAILQ_INIT(&ktr_free); 178 for (i = 0; i < ktr_requestpool; i++) { 179 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 180 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 181 } 182 } 183 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 184 185 static int 186 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 187 { 188 struct thread *td; 189 u_int newsize, oldsize, wantsize; 190 int error; 191 192 /* Handle easy read-only case first to avoid warnings from GCC. */ 193 if (!req->newptr) { 194 mtx_lock(&ktrace_mtx); 195 oldsize = ktr_requestpool; 196 mtx_unlock(&ktrace_mtx); 197 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 198 } 199 200 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 201 if (error) 202 return (error); 203 td = curthread; 204 ktrace_enter(td); 205 mtx_lock(&ktrace_mtx); 206 oldsize = ktr_requestpool; 207 newsize = ktrace_resize_pool(wantsize); 208 mtx_unlock(&ktrace_mtx); 209 ktrace_exit(td); 210 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 211 if (error) 212 return (error); 213 if (wantsize > oldsize && newsize < wantsize) 214 return (ENOSPC); 215 return (0); 216 } 217 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 218 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 219 220 static u_int 221 ktrace_resize_pool(u_int newsize) 222 { 223 struct ktr_request *req; 224 int bound; 225 226 mtx_assert(&ktrace_mtx, MA_OWNED); 227 print_message = 1; 228 bound = newsize - ktr_requestpool; 229 if (bound == 0) 230 return (ktr_requestpool); 231 if (bound < 0) 232 /* Shrink pool down to newsize if possible. */ 233 while (bound++ < 0) { 234 req = STAILQ_FIRST(&ktr_free); 235 if (req == NULL) 236 return (ktr_requestpool); 237 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 238 ktr_requestpool--; 239 mtx_unlock(&ktrace_mtx); 240 free(req, M_KTRACE); 241 mtx_lock(&ktrace_mtx); 242 } 243 else 244 /* Grow pool up to newsize. */ 245 while (bound-- > 0) { 246 mtx_unlock(&ktrace_mtx); 247 req = malloc(sizeof(struct ktr_request), M_KTRACE, 248 M_WAITOK); 249 mtx_lock(&ktrace_mtx); 250 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 251 ktr_requestpool++; 252 } 253 return (ktr_requestpool); 254 } 255 256 static struct ktr_request * 257 ktr_getrequest(int type) 258 { 259 struct ktr_request *req; 260 struct thread *td = curthread; 261 struct proc *p = td->td_proc; 262 int pm; 263 264 ktrace_enter(td); /* XXX: In caller instead? */ 265 mtx_lock(&ktrace_mtx); 266 if (!KTRCHECK(td, type)) { 267 mtx_unlock(&ktrace_mtx); 268 ktrace_exit(td); 269 return (NULL); 270 } 271 req = STAILQ_FIRST(&ktr_free); 272 if (req != NULL) { 273 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 274 req->ktr_header.ktr_type = type; 275 if (p->p_traceflag & KTRFAC_DROP) { 276 req->ktr_header.ktr_type |= KTR_DROP; 277 p->p_traceflag &= ~KTRFAC_DROP; 278 } 279 mtx_unlock(&ktrace_mtx); 280 microtime(&req->ktr_header.ktr_time); 281 req->ktr_header.ktr_pid = p->p_pid; 282 req->ktr_header.ktr_tid = td->td_tid; 283 bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1); 284 req->ktr_buffer = NULL; 285 req->ktr_header.ktr_len = 0; 286 } else { 287 p->p_traceflag |= KTRFAC_DROP; 288 pm = print_message; 289 print_message = 0; 290 mtx_unlock(&ktrace_mtx); 291 if (pm) 292 printf("Out of ktrace request objects.\n"); 293 ktrace_exit(td); 294 } 295 return (req); 296 } 297 298 /* 299 * Some trace generation environments don't permit direct access to VFS, 300 * such as during a context switch where sleeping is not allowed. Under these 301 * circumstances, queue a request to the thread to be written asynchronously 302 * later. 303 */ 304 static void 305 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 306 { 307 308 mtx_lock(&ktrace_mtx); 309 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 310 mtx_unlock(&ktrace_mtx); 311 ktrace_exit(td); 312 } 313 314 /* 315 * Drain any pending ktrace records from the per-thread queue to disk. This 316 * is used both internally before committing other records, and also on 317 * system call return. We drain all the ones we can find at the time when 318 * drain is requested, but don't keep draining after that as those events 319 * may me approximately "after" the current event. 320 */ 321 static void 322 ktr_drain(struct thread *td) 323 { 324 struct ktr_request *queued_req; 325 STAILQ_HEAD(, ktr_request) local_queue; 326 327 ktrace_assert(td); 328 sx_assert(&ktrace_sx, SX_XLOCKED); 329 330 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 331 332 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 333 mtx_lock(&ktrace_mtx); 334 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 335 mtx_unlock(&ktrace_mtx); 336 337 while ((queued_req = STAILQ_FIRST(&local_queue))) { 338 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 339 ktr_writerequest(td, queued_req); 340 ktr_freerequest(queued_req); 341 } 342 } 343 } 344 345 /* 346 * Submit a trace record for immediate commit to disk -- to be used only 347 * where entering VFS is OK. First drain any pending records that may have 348 * been cached in the thread. 349 */ 350 static void 351 ktr_submitrequest(struct thread *td, struct ktr_request *req) 352 { 353 354 ktrace_assert(td); 355 356 sx_xlock(&ktrace_sx); 357 ktr_drain(td); 358 ktr_writerequest(td, req); 359 ktr_freerequest(req); 360 sx_xunlock(&ktrace_sx); 361 362 ktrace_exit(td); 363 } 364 365 static void 366 ktr_freerequest(struct ktr_request *req) 367 { 368 369 if (req->ktr_buffer != NULL) 370 free(req->ktr_buffer, M_KTRACE); 371 mtx_lock(&ktrace_mtx); 372 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 373 mtx_unlock(&ktrace_mtx); 374 } 375 376 void 377 ktrsyscall(code, narg, args) 378 int code, narg; 379 register_t args[]; 380 { 381 struct ktr_request *req; 382 struct ktr_syscall *ktp; 383 size_t buflen; 384 char *buf = NULL; 385 386 buflen = sizeof(register_t) * narg; 387 if (buflen > 0) { 388 buf = malloc(buflen, M_KTRACE, M_WAITOK); 389 bcopy(args, buf, buflen); 390 } 391 req = ktr_getrequest(KTR_SYSCALL); 392 if (req == NULL) { 393 if (buf != NULL) 394 free(buf, M_KTRACE); 395 return; 396 } 397 ktp = &req->ktr_data.ktr_syscall; 398 ktp->ktr_code = code; 399 ktp->ktr_narg = narg; 400 if (buflen > 0) { 401 req->ktr_header.ktr_len = buflen; 402 req->ktr_buffer = buf; 403 } 404 ktr_submitrequest(curthread, req); 405 } 406 407 void 408 ktrsysret(code, error, retval) 409 int code, error; 410 register_t retval; 411 { 412 struct ktr_request *req; 413 struct ktr_sysret *ktp; 414 415 req = ktr_getrequest(KTR_SYSRET); 416 if (req == NULL) 417 return; 418 ktp = &req->ktr_data.ktr_sysret; 419 ktp->ktr_code = code; 420 ktp->ktr_error = error; 421 ktp->ktr_retval = retval; /* what about val2 ? */ 422 ktr_submitrequest(curthread, req); 423 } 424 425 /* 426 * When a process exits, drain per-process asynchronous trace records. 427 */ 428 void 429 ktrprocexit(struct thread *td) 430 { 431 432 ktrace_enter(td); 433 sx_xlock(&ktrace_sx); 434 ktr_drain(td); 435 sx_xunlock(&ktrace_sx); 436 ktrace_exit(td); 437 } 438 439 /* 440 * When a thread returns, drain any asynchronous records generated by the 441 * system call. 442 */ 443 void 444 ktruserret(struct thread *td) 445 { 446 447 ktrace_enter(td); 448 sx_xlock(&ktrace_sx); 449 ktr_drain(td); 450 sx_xunlock(&ktrace_sx); 451 ktrace_exit(td); 452 } 453 454 void 455 ktrnamei(path) 456 char *path; 457 { 458 struct ktr_request *req; 459 int namelen; 460 char *buf = NULL; 461 462 namelen = strlen(path); 463 if (namelen > 0) { 464 buf = malloc(namelen, M_KTRACE, M_WAITOK); 465 bcopy(path, buf, namelen); 466 } 467 req = ktr_getrequest(KTR_NAMEI); 468 if (req == NULL) { 469 if (buf != NULL) 470 free(buf, M_KTRACE); 471 return; 472 } 473 if (namelen > 0) { 474 req->ktr_header.ktr_len = namelen; 475 req->ktr_buffer = buf; 476 } 477 ktr_submitrequest(curthread, req); 478 } 479 480 void 481 ktrgenio(fd, rw, uio, error) 482 int fd; 483 enum uio_rw rw; 484 struct uio *uio; 485 int error; 486 { 487 struct ktr_request *req; 488 struct ktr_genio *ktg; 489 int datalen; 490 char *buf; 491 492 if (error) { 493 free(uio, M_IOV); 494 return; 495 } 496 uio->uio_offset = 0; 497 uio->uio_rw = UIO_WRITE; 498 datalen = imin(uio->uio_resid, ktr_geniosize); 499 buf = malloc(datalen, M_KTRACE, M_WAITOK); 500 error = uiomove(buf, datalen, uio); 501 free(uio, M_IOV); 502 if (error) { 503 free(buf, M_KTRACE); 504 return; 505 } 506 req = ktr_getrequest(KTR_GENIO); 507 if (req == NULL) { 508 free(buf, M_KTRACE); 509 return; 510 } 511 ktg = &req->ktr_data.ktr_genio; 512 ktg->ktr_fd = fd; 513 ktg->ktr_rw = rw; 514 req->ktr_header.ktr_len = datalen; 515 req->ktr_buffer = buf; 516 ktr_submitrequest(curthread, req); 517 } 518 519 void 520 ktrpsig(sig, action, mask, code) 521 int sig; 522 sig_t action; 523 sigset_t *mask; 524 int code; 525 { 526 struct ktr_request *req; 527 struct ktr_psig *kp; 528 529 req = ktr_getrequest(KTR_PSIG); 530 if (req == NULL) 531 return; 532 kp = &req->ktr_data.ktr_psig; 533 kp->signo = (char)sig; 534 kp->action = action; 535 kp->mask = *mask; 536 kp->code = code; 537 ktr_enqueuerequest(curthread, req); 538 } 539 540 void 541 ktrcsw(out, user) 542 int out, user; 543 { 544 struct ktr_request *req; 545 struct ktr_csw *kc; 546 547 req = ktr_getrequest(KTR_CSW); 548 if (req == NULL) 549 return; 550 kc = &req->ktr_data.ktr_csw; 551 kc->out = out; 552 kc->user = user; 553 ktr_enqueuerequest(curthread, req); 554 } 555 #endif /* KTRACE */ 556 557 /* Interface and common routines */ 558 559 #ifndef _SYS_SYSPROTO_H_ 560 struct ktrace_args { 561 char *fname; 562 int ops; 563 int facs; 564 int pid; 565 }; 566 #endif 567 /* ARGSUSED */ 568 int 569 ktrace(td, uap) 570 struct thread *td; 571 register struct ktrace_args *uap; 572 { 573 #ifdef KTRACE 574 register struct vnode *vp = NULL; 575 register struct proc *p; 576 struct pgrp *pg; 577 int facs = uap->facs & ~KTRFAC_ROOT; 578 int ops = KTROP(uap->ops); 579 int descend = uap->ops & KTRFLAG_DESCEND; 580 int nfound, ret = 0; 581 int flags, error = 0, vfslocked; 582 struct nameidata nd; 583 struct ucred *cred; 584 585 /* 586 * Need something to (un)trace. 587 */ 588 if (ops != KTROP_CLEARFILE && facs == 0) 589 return (EINVAL); 590 591 ktrace_enter(td); 592 if (ops != KTROP_CLEAR) { 593 /* 594 * an operation which requires a file argument. 595 */ 596 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 597 uap->fname, td); 598 flags = FREAD | FWRITE | O_NOFOLLOW; 599 error = vn_open(&nd, &flags, 0, NULL); 600 if (error) { 601 ktrace_exit(td); 602 return (error); 603 } 604 vfslocked = NDHASGIANT(&nd); 605 NDFREE(&nd, NDF_ONLY_PNBUF); 606 vp = nd.ni_vp; 607 VOP_UNLOCK(vp, 0, td); 608 if (vp->v_type != VREG) { 609 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 610 VFS_UNLOCK_GIANT(vfslocked); 611 ktrace_exit(td); 612 return (EACCES); 613 } 614 VFS_UNLOCK_GIANT(vfslocked); 615 } 616 /* 617 * Clear all uses of the tracefile. 618 */ 619 if (ops == KTROP_CLEARFILE) { 620 int vrele_count; 621 622 vrele_count = 0; 623 sx_slock(&allproc_lock); 624 FOREACH_PROC_IN_SYSTEM(p) { 625 PROC_LOCK(p); 626 if (p->p_tracevp == vp) { 627 if (ktrcanset(td, p)) { 628 mtx_lock(&ktrace_mtx); 629 cred = p->p_tracecred; 630 p->p_tracecred = NULL; 631 p->p_tracevp = NULL; 632 p->p_traceflag = 0; 633 mtx_unlock(&ktrace_mtx); 634 vrele_count++; 635 crfree(cred); 636 } else 637 error = EPERM; 638 } 639 PROC_UNLOCK(p); 640 } 641 sx_sunlock(&allproc_lock); 642 if (vrele_count > 0) { 643 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 644 while (vrele_count-- > 0) 645 vrele(vp); 646 VFS_UNLOCK_GIANT(vfslocked); 647 } 648 goto done; 649 } 650 /* 651 * do it 652 */ 653 sx_slock(&proctree_lock); 654 if (uap->pid < 0) { 655 /* 656 * by process group 657 */ 658 pg = pgfind(-uap->pid); 659 if (pg == NULL) { 660 sx_sunlock(&proctree_lock); 661 error = ESRCH; 662 goto done; 663 } 664 /* 665 * ktrops() may call vrele(). Lock pg_members 666 * by the proctree_lock rather than pg_mtx. 667 */ 668 PGRP_UNLOCK(pg); 669 nfound = 0; 670 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 671 PROC_LOCK(p); 672 if (p_cansee(td, p) != 0) { 673 PROC_UNLOCK(p); 674 continue; 675 } 676 PROC_UNLOCK(p); 677 nfound++; 678 if (descend) 679 ret |= ktrsetchildren(td, p, ops, facs, vp); 680 else 681 ret |= ktrops(td, p, ops, facs, vp); 682 } 683 if (nfound == 0) { 684 sx_sunlock(&proctree_lock); 685 error = ESRCH; 686 goto done; 687 } 688 } else { 689 /* 690 * by pid 691 */ 692 p = pfind(uap->pid); 693 if (p == NULL) { 694 sx_sunlock(&proctree_lock); 695 error = ESRCH; 696 goto done; 697 } 698 error = p_cansee(td, p); 699 /* 700 * The slock of the proctree lock will keep this process 701 * from going away, so unlocking the proc here is ok. 702 */ 703 PROC_UNLOCK(p); 704 if (error) { 705 sx_sunlock(&proctree_lock); 706 goto done; 707 } 708 if (descend) 709 ret |= ktrsetchildren(td, p, ops, facs, vp); 710 else 711 ret |= ktrops(td, p, ops, facs, vp); 712 } 713 sx_sunlock(&proctree_lock); 714 if (!ret) 715 error = EPERM; 716 done: 717 if (vp != NULL) { 718 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 719 (void) vn_close(vp, FWRITE, td->td_ucred, td); 720 VFS_UNLOCK_GIANT(vfslocked); 721 } 722 ktrace_exit(td); 723 return (error); 724 #else /* !KTRACE */ 725 return (ENOSYS); 726 #endif /* KTRACE */ 727 } 728 729 /* ARGSUSED */ 730 int 731 utrace(td, uap) 732 struct thread *td; 733 register struct utrace_args *uap; 734 { 735 736 #ifdef KTRACE 737 struct ktr_request *req; 738 void *cp; 739 int error; 740 741 if (!KTRPOINT(td, KTR_USER)) 742 return (0); 743 if (uap->len > KTR_USER_MAXLEN) 744 return (EINVAL); 745 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 746 error = copyin(uap->addr, cp, uap->len); 747 if (error) { 748 free(cp, M_KTRACE); 749 return (error); 750 } 751 req = ktr_getrequest(KTR_USER); 752 if (req == NULL) { 753 free(cp, M_KTRACE); 754 return (ENOMEM); 755 } 756 req->ktr_buffer = cp; 757 req->ktr_header.ktr_len = uap->len; 758 ktr_submitrequest(td, req); 759 return (0); 760 #else /* !KTRACE */ 761 return (ENOSYS); 762 #endif /* KTRACE */ 763 } 764 765 #ifdef KTRACE 766 static int 767 ktrops(td, p, ops, facs, vp) 768 struct thread *td; 769 struct proc *p; 770 int ops, facs; 771 struct vnode *vp; 772 { 773 struct vnode *tracevp = NULL; 774 struct ucred *tracecred = NULL; 775 776 PROC_LOCK(p); 777 if (!ktrcanset(td, p)) { 778 PROC_UNLOCK(p); 779 return (0); 780 } 781 mtx_lock(&ktrace_mtx); 782 if (ops == KTROP_SET) { 783 if (p->p_tracevp != vp) { 784 /* 785 * if trace file already in use, relinquish below 786 */ 787 tracevp = p->p_tracevp; 788 VREF(vp); 789 p->p_tracevp = vp; 790 } 791 if (p->p_tracecred != td->td_ucred) { 792 tracecred = p->p_tracecred; 793 p->p_tracecred = crhold(td->td_ucred); 794 } 795 p->p_traceflag |= facs; 796 if (priv_check(td, PRIV_KTRACE) == 0) 797 p->p_traceflag |= KTRFAC_ROOT; 798 } else { 799 /* KTROP_CLEAR */ 800 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 801 /* no more tracing */ 802 p->p_traceflag = 0; 803 tracevp = p->p_tracevp; 804 p->p_tracevp = NULL; 805 tracecred = p->p_tracecred; 806 p->p_tracecred = NULL; 807 } 808 } 809 mtx_unlock(&ktrace_mtx); 810 PROC_UNLOCK(p); 811 if (tracevp != NULL) { 812 int vfslocked; 813 814 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 815 vrele(tracevp); 816 VFS_UNLOCK_GIANT(vfslocked); 817 } 818 if (tracecred != NULL) 819 crfree(tracecred); 820 821 return (1); 822 } 823 824 static int 825 ktrsetchildren(td, top, ops, facs, vp) 826 struct thread *td; 827 struct proc *top; 828 int ops, facs; 829 struct vnode *vp; 830 { 831 register struct proc *p; 832 register int ret = 0; 833 834 p = top; 835 sx_assert(&proctree_lock, SX_LOCKED); 836 for (;;) { 837 ret |= ktrops(td, p, ops, facs, vp); 838 /* 839 * If this process has children, descend to them next, 840 * otherwise do any siblings, and if done with this level, 841 * follow back up the tree (but not past top). 842 */ 843 if (!LIST_EMPTY(&p->p_children)) 844 p = LIST_FIRST(&p->p_children); 845 else for (;;) { 846 if (p == top) 847 return (ret); 848 if (LIST_NEXT(p, p_sibling)) { 849 p = LIST_NEXT(p, p_sibling); 850 break; 851 } 852 p = p->p_pptr; 853 } 854 } 855 /*NOTREACHED*/ 856 } 857 858 static void 859 ktr_writerequest(struct thread *td, struct ktr_request *req) 860 { 861 struct ktr_header *kth; 862 struct vnode *vp; 863 struct proc *p; 864 struct ucred *cred; 865 struct uio auio; 866 struct iovec aiov[3]; 867 struct mount *mp; 868 int datalen, buflen, vrele_count; 869 int error, vfslocked; 870 871 /* 872 * We hold the vnode and credential for use in I/O in case ktrace is 873 * disabled on the process as we write out the request. 874 * 875 * XXXRW: This is not ideal: we could end up performing a write after 876 * the vnode has been closed. 877 */ 878 mtx_lock(&ktrace_mtx); 879 vp = td->td_proc->p_tracevp; 880 if (vp != NULL) 881 VREF(vp); 882 cred = td->td_proc->p_tracecred; 883 if (cred != NULL) 884 crhold(cred); 885 mtx_unlock(&ktrace_mtx); 886 887 /* 888 * If vp is NULL, the vp has been cleared out from under this 889 * request, so just drop it. Make sure the credential and vnode are 890 * in sync: we should have both or neither. 891 */ 892 if (vp == NULL) { 893 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 894 return; 895 } 896 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 897 898 kth = &req->ktr_header; 899 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 900 buflen = kth->ktr_len; 901 auio.uio_iov = &aiov[0]; 902 auio.uio_offset = 0; 903 auio.uio_segflg = UIO_SYSSPACE; 904 auio.uio_rw = UIO_WRITE; 905 aiov[0].iov_base = (caddr_t)kth; 906 aiov[0].iov_len = sizeof(struct ktr_header); 907 auio.uio_resid = sizeof(struct ktr_header); 908 auio.uio_iovcnt = 1; 909 auio.uio_td = td; 910 if (datalen != 0) { 911 aiov[1].iov_base = (caddr_t)&req->ktr_data; 912 aiov[1].iov_len = datalen; 913 auio.uio_resid += datalen; 914 auio.uio_iovcnt++; 915 kth->ktr_len += datalen; 916 } 917 if (buflen != 0) { 918 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 919 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 920 aiov[auio.uio_iovcnt].iov_len = buflen; 921 auio.uio_resid += buflen; 922 auio.uio_iovcnt++; 923 } 924 925 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 926 vn_start_write(vp, &mp, V_WAIT); 927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 928 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE); 929 #ifdef MAC 930 error = mac_check_vnode_write(cred, NOCRED, vp); 931 if (error == 0) 932 #endif 933 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 934 VOP_UNLOCK(vp, 0, td); 935 vn_finished_write(mp); 936 vrele(vp); 937 VFS_UNLOCK_GIANT(vfslocked); 938 if (!error) 939 return; 940 /* 941 * If error encountered, give up tracing on this vnode. We defer 942 * all the vrele()'s on the vnode until after we are finished walking 943 * the various lists to avoid needlessly holding locks. 944 */ 945 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 946 error); 947 vrele_count = 0; 948 /* 949 * First, clear this vnode from being used by any processes in the 950 * system. 951 * XXX - If one process gets an EPERM writing to the vnode, should 952 * we really do this? Other processes might have suitable 953 * credentials for the operation. 954 */ 955 cred = NULL; 956 sx_slock(&allproc_lock); 957 FOREACH_PROC_IN_SYSTEM(p) { 958 PROC_LOCK(p); 959 if (p->p_tracevp == vp) { 960 mtx_lock(&ktrace_mtx); 961 p->p_tracevp = NULL; 962 p->p_traceflag = 0; 963 cred = p->p_tracecred; 964 p->p_tracecred = NULL; 965 mtx_unlock(&ktrace_mtx); 966 vrele_count++; 967 } 968 PROC_UNLOCK(p); 969 if (cred != NULL) { 970 crfree(cred); 971 cred = NULL; 972 } 973 } 974 sx_sunlock(&allproc_lock); 975 976 /* 977 * We can't clear any pending requests in threads that have cached 978 * them but not yet committed them, as those are per-thread. The 979 * thread will have to clear it itself on system call return. 980 */ 981 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 982 while (vrele_count-- > 0) 983 vrele(vp); 984 VFS_UNLOCK_GIANT(vfslocked); 985 } 986 987 /* 988 * Return true if caller has permission to set the ktracing state 989 * of target. Essentially, the target can't possess any 990 * more permissions than the caller. KTRFAC_ROOT signifies that 991 * root previously set the tracing status on the target process, and 992 * so, only root may further change it. 993 */ 994 static int 995 ktrcanset(td, targetp) 996 struct thread *td; 997 struct proc *targetp; 998 { 999 1000 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1001 if (targetp->p_traceflag & KTRFAC_ROOT && 1002 priv_check(td, PRIV_KTRACE)) 1003 return (0); 1004 1005 if (p_candebug(td, targetp) != 0) 1006 return (0); 1007 1008 return (1); 1009 } 1010 1011 #endif /* KTRACE */ 1012