1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 #include "opt_mac.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 #include <sys/ktrace.h> 55 #include <sys/sx.h> 56 #include <sys/sysctl.h> 57 #include <sys/syslog.h> 58 #include <sys/sysproto.h> 59 60 #include <security/mac/mac_framework.h> 61 62 /* 63 * The ktrace facility allows the tracing of certain key events in user space 64 * processes, such as system calls, signal delivery, context switches, and 65 * user generated events using utrace(2). It works by streaming event 66 * records and data to a vnode associated with the process using the 67 * ktrace(2) system call. In general, records can be written directly from 68 * the context that generates the event. One important exception to this is 69 * during a context switch, where sleeping is not permitted. To handle this 70 * case, trace events are generated using in-kernel ktr_request records, and 71 * then delivered to disk at a convenient moment -- either immediately, the 72 * next traceable event, at system call return, or at process exit. 73 * 74 * When dealing with multiple threads or processes writing to the same event 75 * log, ordering guarantees are weak: specifically, if an event has multiple 76 * records (i.e., system call enter and return), they may be interlaced with 77 * records from another event. Process and thread ID information is provided 78 * in the record, and user applications can de-interlace events if required. 79 */ 80 81 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 82 83 #ifdef KTRACE 84 85 #ifndef KTRACE_REQUEST_POOL 86 #define KTRACE_REQUEST_POOL 100 87 #endif 88 89 struct ktr_request { 90 struct ktr_header ktr_header; 91 void *ktr_buffer; 92 union { 93 struct ktr_syscall ktr_syscall; 94 struct ktr_sysret ktr_sysret; 95 struct ktr_genio ktr_genio; 96 struct ktr_psig ktr_psig; 97 struct ktr_csw ktr_csw; 98 } ktr_data; 99 STAILQ_ENTRY(ktr_request) ktr_list; 100 }; 101 102 static int data_lengths[] = { 103 0, /* none */ 104 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 105 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 106 0, /* KTR_NAMEI */ 107 sizeof(struct ktr_genio), /* KTR_GENIO */ 108 sizeof(struct ktr_psig), /* KTR_PSIG */ 109 sizeof(struct ktr_csw), /* KTR_CSW */ 110 0 /* KTR_USER */ 111 }; 112 113 static STAILQ_HEAD(, ktr_request) ktr_free; 114 115 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 116 117 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 118 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 119 120 static u_int ktr_geniosize = PAGE_SIZE; 121 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 122 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 123 0, "Maximum size of genio event payload"); 124 125 static int print_message = 1; 126 struct mtx ktrace_mtx; 127 static struct cv ktrace_cv; 128 static struct sx ktrace_sx; 129 130 static void ktrace_init(void *dummy); 131 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 132 static u_int ktrace_resize_pool(u_int newsize); 133 static struct ktr_request *ktr_getrequest(int type); 134 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 135 static void ktr_freerequest(struct ktr_request *req); 136 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 137 static int ktrcanset(struct thread *,struct proc *); 138 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 139 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 140 141 /* 142 * ktrace itself generates events, such as context switches, which we do not 143 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 144 * whether or not it is in a region where tracing of events should be 145 * suppressed. 146 */ 147 static void 148 ktrace_enter(struct thread *td) 149 { 150 151 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 152 td->td_pflags |= TDP_INKTRACE; 153 } 154 155 static void 156 ktrace_exit(struct thread *td) 157 { 158 159 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 160 td->td_pflags &= ~TDP_INKTRACE; 161 } 162 163 static void 164 ktrace_assert(struct thread *td) 165 { 166 167 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 168 } 169 170 static void 171 ktrace_init(void *dummy) 172 { 173 struct ktr_request *req; 174 int i; 175 176 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 177 sx_init(&ktrace_sx, "ktrace_sx"); 178 cv_init(&ktrace_cv, "ktrace"); 179 STAILQ_INIT(&ktr_free); 180 for (i = 0; i < ktr_requestpool; i++) { 181 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 182 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 183 } 184 } 185 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 186 187 static int 188 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 189 { 190 struct thread *td; 191 u_int newsize, oldsize, wantsize; 192 int error; 193 194 /* Handle easy read-only case first to avoid warnings from GCC. */ 195 if (!req->newptr) { 196 mtx_lock(&ktrace_mtx); 197 oldsize = ktr_requestpool; 198 mtx_unlock(&ktrace_mtx); 199 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 200 } 201 202 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 203 if (error) 204 return (error); 205 td = curthread; 206 ktrace_enter(td); 207 mtx_lock(&ktrace_mtx); 208 oldsize = ktr_requestpool; 209 newsize = ktrace_resize_pool(wantsize); 210 mtx_unlock(&ktrace_mtx); 211 ktrace_exit(td); 212 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 213 if (error) 214 return (error); 215 if (wantsize > oldsize && newsize < wantsize) 216 return (ENOSPC); 217 return (0); 218 } 219 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 220 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 221 222 static u_int 223 ktrace_resize_pool(u_int newsize) 224 { 225 struct ktr_request *req; 226 int bound; 227 228 mtx_assert(&ktrace_mtx, MA_OWNED); 229 print_message = 1; 230 bound = newsize - ktr_requestpool; 231 if (bound == 0) 232 return (ktr_requestpool); 233 if (bound < 0) 234 /* Shrink pool down to newsize if possible. */ 235 while (bound++ < 0) { 236 req = STAILQ_FIRST(&ktr_free); 237 if (req == NULL) 238 return (ktr_requestpool); 239 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 240 ktr_requestpool--; 241 mtx_unlock(&ktrace_mtx); 242 free(req, M_KTRACE); 243 mtx_lock(&ktrace_mtx); 244 } 245 else 246 /* Grow pool up to newsize. */ 247 while (bound-- > 0) { 248 mtx_unlock(&ktrace_mtx); 249 req = malloc(sizeof(struct ktr_request), M_KTRACE, 250 M_WAITOK); 251 mtx_lock(&ktrace_mtx); 252 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 253 ktr_requestpool++; 254 } 255 return (ktr_requestpool); 256 } 257 258 static struct ktr_request * 259 ktr_getrequest(int type) 260 { 261 struct ktr_request *req; 262 struct thread *td = curthread; 263 struct proc *p = td->td_proc; 264 int pm; 265 266 ktrace_enter(td); /* XXX: In caller instead? */ 267 mtx_lock(&ktrace_mtx); 268 if (!KTRCHECK(td, type)) { 269 mtx_unlock(&ktrace_mtx); 270 ktrace_exit(td); 271 return (NULL); 272 } 273 req = STAILQ_FIRST(&ktr_free); 274 if (req != NULL) { 275 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 276 req->ktr_header.ktr_type = type; 277 if (p->p_traceflag & KTRFAC_DROP) { 278 req->ktr_header.ktr_type |= KTR_DROP; 279 p->p_traceflag &= ~KTRFAC_DROP; 280 } 281 mtx_unlock(&ktrace_mtx); 282 microtime(&req->ktr_header.ktr_time); 283 req->ktr_header.ktr_pid = p->p_pid; 284 req->ktr_header.ktr_tid = td->td_tid; 285 bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1); 286 req->ktr_buffer = NULL; 287 req->ktr_header.ktr_len = 0; 288 } else { 289 p->p_traceflag |= KTRFAC_DROP; 290 pm = print_message; 291 print_message = 0; 292 mtx_unlock(&ktrace_mtx); 293 if (pm) 294 printf("Out of ktrace request objects.\n"); 295 ktrace_exit(td); 296 } 297 return (req); 298 } 299 300 /* 301 * Some trace generation environments don't permit direct access to VFS, 302 * such as during a context switch where sleeping is not allowed. Under these 303 * circumstances, queue a request to the thread to be written asynchronously 304 * later. 305 */ 306 static void 307 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 308 { 309 310 mtx_lock(&ktrace_mtx); 311 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 312 mtx_unlock(&ktrace_mtx); 313 ktrace_exit(td); 314 } 315 316 /* 317 * Drain any pending ktrace records from the per-thread queue to disk. This 318 * is used both internally before committing other records, and also on 319 * system call return. We drain all the ones we can find at the time when 320 * drain is requested, but don't keep draining after that as those events 321 * may me approximately "after" the current event. 322 */ 323 static void 324 ktr_drain(struct thread *td) 325 { 326 struct ktr_request *queued_req; 327 STAILQ_HEAD(, ktr_request) local_queue; 328 329 ktrace_assert(td); 330 sx_assert(&ktrace_sx, SX_XLOCKED); 331 332 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 333 334 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 335 mtx_lock(&ktrace_mtx); 336 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 337 mtx_unlock(&ktrace_mtx); 338 339 while ((queued_req = STAILQ_FIRST(&local_queue))) { 340 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 341 ktr_writerequest(td, queued_req); 342 ktr_freerequest(queued_req); 343 } 344 } 345 } 346 347 /* 348 * Submit a trace record for immediate commit to disk -- to be used only 349 * where entering VFS is OK. First drain any pending records that may have 350 * been cached in the thread. 351 */ 352 static void 353 ktr_submitrequest(struct thread *td, struct ktr_request *req) 354 { 355 356 ktrace_assert(td); 357 358 sx_xlock(&ktrace_sx); 359 ktr_drain(td); 360 ktr_writerequest(td, req); 361 ktr_freerequest(req); 362 sx_xunlock(&ktrace_sx); 363 364 ktrace_exit(td); 365 } 366 367 static void 368 ktr_freerequest(struct ktr_request *req) 369 { 370 371 if (req->ktr_buffer != NULL) 372 free(req->ktr_buffer, M_KTRACE); 373 mtx_lock(&ktrace_mtx); 374 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 375 mtx_unlock(&ktrace_mtx); 376 } 377 378 /* 379 * MPSAFE 380 */ 381 void 382 ktrsyscall(code, narg, args) 383 int code, narg; 384 register_t args[]; 385 { 386 struct ktr_request *req; 387 struct ktr_syscall *ktp; 388 size_t buflen; 389 char *buf = NULL; 390 391 buflen = sizeof(register_t) * narg; 392 if (buflen > 0) { 393 buf = malloc(buflen, M_KTRACE, M_WAITOK); 394 bcopy(args, buf, buflen); 395 } 396 req = ktr_getrequest(KTR_SYSCALL); 397 if (req == NULL) { 398 if (buf != NULL) 399 free(buf, M_KTRACE); 400 return; 401 } 402 ktp = &req->ktr_data.ktr_syscall; 403 ktp->ktr_code = code; 404 ktp->ktr_narg = narg; 405 if (buflen > 0) { 406 req->ktr_header.ktr_len = buflen; 407 req->ktr_buffer = buf; 408 } 409 ktr_submitrequest(curthread, req); 410 } 411 412 /* 413 * MPSAFE 414 */ 415 void 416 ktrsysret(code, error, retval) 417 int code, error; 418 register_t retval; 419 { 420 struct ktr_request *req; 421 struct ktr_sysret *ktp; 422 423 req = ktr_getrequest(KTR_SYSRET); 424 if (req == NULL) 425 return; 426 ktp = &req->ktr_data.ktr_sysret; 427 ktp->ktr_code = code; 428 ktp->ktr_error = error; 429 ktp->ktr_retval = retval; /* what about val2 ? */ 430 ktr_submitrequest(curthread, req); 431 } 432 433 /* 434 * When a process exits, drain per-process asynchronous trace records. 435 */ 436 void 437 ktrprocexit(struct thread *td) 438 { 439 440 ktrace_enter(td); 441 sx_xlock(&ktrace_sx); 442 ktr_drain(td); 443 sx_xunlock(&ktrace_sx); 444 ktrace_exit(td); 445 } 446 447 /* 448 * When a thread returns, drain any asynchronous records generated by the 449 * system call. 450 */ 451 void 452 ktruserret(struct thread *td) 453 { 454 455 ktrace_enter(td); 456 sx_xlock(&ktrace_sx); 457 ktr_drain(td); 458 sx_xunlock(&ktrace_sx); 459 ktrace_exit(td); 460 } 461 462 void 463 ktrnamei(path) 464 char *path; 465 { 466 struct ktr_request *req; 467 int namelen; 468 char *buf = NULL; 469 470 namelen = strlen(path); 471 if (namelen > 0) { 472 buf = malloc(namelen, M_KTRACE, M_WAITOK); 473 bcopy(path, buf, namelen); 474 } 475 req = ktr_getrequest(KTR_NAMEI); 476 if (req == NULL) { 477 if (buf != NULL) 478 free(buf, M_KTRACE); 479 return; 480 } 481 if (namelen > 0) { 482 req->ktr_header.ktr_len = namelen; 483 req->ktr_buffer = buf; 484 } 485 ktr_submitrequest(curthread, req); 486 } 487 488 void 489 ktrgenio(fd, rw, uio, error) 490 int fd; 491 enum uio_rw rw; 492 struct uio *uio; 493 int error; 494 { 495 struct ktr_request *req; 496 struct ktr_genio *ktg; 497 int datalen; 498 char *buf; 499 500 if (error) { 501 free(uio, M_IOV); 502 return; 503 } 504 uio->uio_offset = 0; 505 uio->uio_rw = UIO_WRITE; 506 datalen = imin(uio->uio_resid, ktr_geniosize); 507 buf = malloc(datalen, M_KTRACE, M_WAITOK); 508 error = uiomove(buf, datalen, uio); 509 free(uio, M_IOV); 510 if (error) { 511 free(buf, M_KTRACE); 512 return; 513 } 514 req = ktr_getrequest(KTR_GENIO); 515 if (req == NULL) { 516 free(buf, M_KTRACE); 517 return; 518 } 519 ktg = &req->ktr_data.ktr_genio; 520 ktg->ktr_fd = fd; 521 ktg->ktr_rw = rw; 522 req->ktr_header.ktr_len = datalen; 523 req->ktr_buffer = buf; 524 ktr_submitrequest(curthread, req); 525 } 526 527 void 528 ktrpsig(sig, action, mask, code) 529 int sig; 530 sig_t action; 531 sigset_t *mask; 532 int code; 533 { 534 struct ktr_request *req; 535 struct ktr_psig *kp; 536 537 req = ktr_getrequest(KTR_PSIG); 538 if (req == NULL) 539 return; 540 kp = &req->ktr_data.ktr_psig; 541 kp->signo = (char)sig; 542 kp->action = action; 543 kp->mask = *mask; 544 kp->code = code; 545 ktr_enqueuerequest(curthread, req); 546 } 547 548 void 549 ktrcsw(out, user) 550 int out, user; 551 { 552 struct ktr_request *req; 553 struct ktr_csw *kc; 554 555 req = ktr_getrequest(KTR_CSW); 556 if (req == NULL) 557 return; 558 kc = &req->ktr_data.ktr_csw; 559 kc->out = out; 560 kc->user = user; 561 ktr_enqueuerequest(curthread, req); 562 } 563 #endif /* KTRACE */ 564 565 /* Interface and common routines */ 566 567 /* 568 * ktrace system call 569 * 570 * MPSAFE 571 */ 572 #ifndef _SYS_SYSPROTO_H_ 573 struct ktrace_args { 574 char *fname; 575 int ops; 576 int facs; 577 int pid; 578 }; 579 #endif 580 /* ARGSUSED */ 581 int 582 ktrace(td, uap) 583 struct thread *td; 584 register struct ktrace_args *uap; 585 { 586 #ifdef KTRACE 587 register struct vnode *vp = NULL; 588 register struct proc *p; 589 struct pgrp *pg; 590 int facs = uap->facs & ~KTRFAC_ROOT; 591 int ops = KTROP(uap->ops); 592 int descend = uap->ops & KTRFLAG_DESCEND; 593 int nfound, ret = 0; 594 int flags, error = 0, vfslocked; 595 struct nameidata nd; 596 struct ucred *cred; 597 598 /* 599 * Need something to (un)trace. 600 */ 601 if (ops != KTROP_CLEARFILE && facs == 0) 602 return (EINVAL); 603 604 ktrace_enter(td); 605 if (ops != KTROP_CLEAR) { 606 /* 607 * an operation which requires a file argument. 608 */ 609 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 610 uap->fname, td); 611 flags = FREAD | FWRITE | O_NOFOLLOW; 612 error = vn_open(&nd, &flags, 0, -1); 613 if (error) { 614 ktrace_exit(td); 615 return (error); 616 } 617 vfslocked = NDHASGIANT(&nd); 618 NDFREE(&nd, NDF_ONLY_PNBUF); 619 vp = nd.ni_vp; 620 VOP_UNLOCK(vp, 0, td); 621 if (vp->v_type != VREG) { 622 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 623 VFS_UNLOCK_GIANT(vfslocked); 624 ktrace_exit(td); 625 return (EACCES); 626 } 627 VFS_UNLOCK_GIANT(vfslocked); 628 } 629 /* 630 * Clear all uses of the tracefile. 631 */ 632 if (ops == KTROP_CLEARFILE) { 633 sx_slock(&allproc_lock); 634 LIST_FOREACH(p, &allproc, p_list) { 635 PROC_LOCK(p); 636 if (p->p_tracevp == vp) { 637 if (ktrcanset(td, p)) { 638 mtx_lock(&ktrace_mtx); 639 cred = p->p_tracecred; 640 p->p_tracecred = NULL; 641 p->p_tracevp = NULL; 642 p->p_traceflag = 0; 643 mtx_unlock(&ktrace_mtx); 644 PROC_UNLOCK(p); 645 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 646 (void) vn_close(vp, FREAD|FWRITE, 647 cred, td); 648 VFS_UNLOCK_GIANT(vfslocked); 649 crfree(cred); 650 } else { 651 PROC_UNLOCK(p); 652 error = EPERM; 653 } 654 } else 655 PROC_UNLOCK(p); 656 } 657 sx_sunlock(&allproc_lock); 658 goto done; 659 } 660 /* 661 * do it 662 */ 663 sx_slock(&proctree_lock); 664 if (uap->pid < 0) { 665 /* 666 * by process group 667 */ 668 pg = pgfind(-uap->pid); 669 if (pg == NULL) { 670 sx_sunlock(&proctree_lock); 671 error = ESRCH; 672 goto done; 673 } 674 /* 675 * ktrops() may call vrele(). Lock pg_members 676 * by the proctree_lock rather than pg_mtx. 677 */ 678 PGRP_UNLOCK(pg); 679 nfound = 0; 680 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 681 PROC_LOCK(p); 682 if (p_cansee(td, p) != 0) { 683 PROC_UNLOCK(p); 684 continue; 685 } 686 PROC_UNLOCK(p); 687 nfound++; 688 if (descend) 689 ret |= ktrsetchildren(td, p, ops, facs, vp); 690 else 691 ret |= ktrops(td, p, ops, facs, vp); 692 } 693 if (nfound == 0) { 694 sx_sunlock(&proctree_lock); 695 error = ESRCH; 696 goto done; 697 } 698 } else { 699 /* 700 * by pid 701 */ 702 p = pfind(uap->pid); 703 if (p == NULL) { 704 sx_sunlock(&proctree_lock); 705 error = ESRCH; 706 goto done; 707 } 708 error = p_cansee(td, p); 709 /* 710 * The slock of the proctree lock will keep this process 711 * from going away, so unlocking the proc here is ok. 712 */ 713 PROC_UNLOCK(p); 714 if (error) { 715 sx_sunlock(&proctree_lock); 716 goto done; 717 } 718 if (descend) 719 ret |= ktrsetchildren(td, p, ops, facs, vp); 720 else 721 ret |= ktrops(td, p, ops, facs, vp); 722 } 723 sx_sunlock(&proctree_lock); 724 if (!ret) 725 error = EPERM; 726 done: 727 if (vp != NULL) { 728 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 729 (void) vn_close(vp, FWRITE, td->td_ucred, td); 730 VFS_UNLOCK_GIANT(vfslocked); 731 } 732 ktrace_exit(td); 733 return (error); 734 #else /* !KTRACE */ 735 return (ENOSYS); 736 #endif /* KTRACE */ 737 } 738 739 /* 740 * utrace system call 741 * 742 * MPSAFE 743 */ 744 /* ARGSUSED */ 745 int 746 utrace(td, uap) 747 struct thread *td; 748 register struct utrace_args *uap; 749 { 750 751 #ifdef KTRACE 752 struct ktr_request *req; 753 void *cp; 754 int error; 755 756 if (!KTRPOINT(td, KTR_USER)) 757 return (0); 758 if (uap->len > KTR_USER_MAXLEN) 759 return (EINVAL); 760 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 761 error = copyin(uap->addr, cp, uap->len); 762 if (error) { 763 free(cp, M_KTRACE); 764 return (error); 765 } 766 req = ktr_getrequest(KTR_USER); 767 if (req == NULL) { 768 free(cp, M_KTRACE); 769 return (ENOMEM); 770 } 771 req->ktr_buffer = cp; 772 req->ktr_header.ktr_len = uap->len; 773 ktr_submitrequest(td, req); 774 return (0); 775 #else /* !KTRACE */ 776 return (ENOSYS); 777 #endif /* KTRACE */ 778 } 779 780 #ifdef KTRACE 781 static int 782 ktrops(td, p, ops, facs, vp) 783 struct thread *td; 784 struct proc *p; 785 int ops, facs; 786 struct vnode *vp; 787 { 788 struct vnode *tracevp = NULL; 789 struct ucred *tracecred = NULL; 790 791 PROC_LOCK(p); 792 if (!ktrcanset(td, p)) { 793 PROC_UNLOCK(p); 794 return (0); 795 } 796 mtx_lock(&ktrace_mtx); 797 if (ops == KTROP_SET) { 798 if (p->p_tracevp != vp) { 799 /* 800 * if trace file already in use, relinquish below 801 */ 802 tracevp = p->p_tracevp; 803 VREF(vp); 804 p->p_tracevp = vp; 805 } 806 if (p->p_tracecred != td->td_ucred) { 807 tracecred = p->p_tracecred; 808 p->p_tracecred = crhold(td->td_ucred); 809 } 810 p->p_traceflag |= facs; 811 if (priv_check_cred(td->td_ucred, PRIV_KTRACE, 812 SUSER_ALLOWJAIL) == 0) 813 p->p_traceflag |= KTRFAC_ROOT; 814 } else { 815 /* KTROP_CLEAR */ 816 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 817 /* no more tracing */ 818 p->p_traceflag = 0; 819 tracevp = p->p_tracevp; 820 p->p_tracevp = NULL; 821 tracecred = p->p_tracecred; 822 p->p_tracecred = NULL; 823 } 824 } 825 mtx_unlock(&ktrace_mtx); 826 PROC_UNLOCK(p); 827 if (tracevp != NULL) { 828 int vfslocked; 829 830 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 831 vrele(tracevp); 832 VFS_UNLOCK_GIANT(vfslocked); 833 } 834 if (tracecred != NULL) 835 crfree(tracecred); 836 837 return (1); 838 } 839 840 static int 841 ktrsetchildren(td, top, ops, facs, vp) 842 struct thread *td; 843 struct proc *top; 844 int ops, facs; 845 struct vnode *vp; 846 { 847 register struct proc *p; 848 register int ret = 0; 849 850 p = top; 851 sx_assert(&proctree_lock, SX_LOCKED); 852 for (;;) { 853 ret |= ktrops(td, p, ops, facs, vp); 854 /* 855 * If this process has children, descend to them next, 856 * otherwise do any siblings, and if done with this level, 857 * follow back up the tree (but not past top). 858 */ 859 if (!LIST_EMPTY(&p->p_children)) 860 p = LIST_FIRST(&p->p_children); 861 else for (;;) { 862 if (p == top) 863 return (ret); 864 if (LIST_NEXT(p, p_sibling)) { 865 p = LIST_NEXT(p, p_sibling); 866 break; 867 } 868 p = p->p_pptr; 869 } 870 } 871 /*NOTREACHED*/ 872 } 873 874 static void 875 ktr_writerequest(struct thread *td, struct ktr_request *req) 876 { 877 struct ktr_header *kth; 878 struct vnode *vp; 879 struct proc *p; 880 struct ucred *cred; 881 struct uio auio; 882 struct iovec aiov[3]; 883 struct mount *mp; 884 int datalen, buflen, vrele_count; 885 int error, vfslocked; 886 887 /* 888 * We hold the vnode and credential for use in I/O in case ktrace is 889 * disabled on the process as we write out the request. 890 * 891 * XXXRW: This is not ideal: we could end up performing a write after 892 * the vnode has been closed. 893 */ 894 mtx_lock(&ktrace_mtx); 895 vp = td->td_proc->p_tracevp; 896 if (vp != NULL) 897 VREF(vp); 898 cred = td->td_proc->p_tracecred; 899 if (cred != NULL) 900 crhold(cred); 901 mtx_unlock(&ktrace_mtx); 902 903 /* 904 * If vp is NULL, the vp has been cleared out from under this 905 * request, so just drop it. Make sure the credential and vnode are 906 * in sync: we should have both or neither. 907 */ 908 if (vp == NULL) { 909 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 910 return; 911 } 912 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 913 914 kth = &req->ktr_header; 915 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 916 buflen = kth->ktr_len; 917 auio.uio_iov = &aiov[0]; 918 auio.uio_offset = 0; 919 auio.uio_segflg = UIO_SYSSPACE; 920 auio.uio_rw = UIO_WRITE; 921 aiov[0].iov_base = (caddr_t)kth; 922 aiov[0].iov_len = sizeof(struct ktr_header); 923 auio.uio_resid = sizeof(struct ktr_header); 924 auio.uio_iovcnt = 1; 925 auio.uio_td = td; 926 if (datalen != 0) { 927 aiov[1].iov_base = (caddr_t)&req->ktr_data; 928 aiov[1].iov_len = datalen; 929 auio.uio_resid += datalen; 930 auio.uio_iovcnt++; 931 kth->ktr_len += datalen; 932 } 933 if (buflen != 0) { 934 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 935 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 936 aiov[auio.uio_iovcnt].iov_len = buflen; 937 auio.uio_resid += buflen; 938 auio.uio_iovcnt++; 939 } 940 941 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 942 vn_start_write(vp, &mp, V_WAIT); 943 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 944 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE); 945 #ifdef MAC 946 error = mac_check_vnode_write(cred, NOCRED, vp); 947 if (error == 0) 948 #endif 949 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 950 VOP_UNLOCK(vp, 0, td); 951 vn_finished_write(mp); 952 vrele(vp); 953 VFS_UNLOCK_GIANT(vfslocked); 954 if (!error) 955 return; 956 /* 957 * If error encountered, give up tracing on this vnode. We defer 958 * all the vrele()'s on the vnode until after we are finished walking 959 * the various lists to avoid needlessly holding locks. 960 */ 961 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 962 error); 963 vrele_count = 0; 964 /* 965 * First, clear this vnode from being used by any processes in the 966 * system. 967 * XXX - If one process gets an EPERM writing to the vnode, should 968 * we really do this? Other processes might have suitable 969 * credentials for the operation. 970 */ 971 cred = NULL; 972 sx_slock(&allproc_lock); 973 LIST_FOREACH(p, &allproc, p_list) { 974 PROC_LOCK(p); 975 if (p->p_tracevp == vp) { 976 mtx_lock(&ktrace_mtx); 977 p->p_tracevp = NULL; 978 p->p_traceflag = 0; 979 cred = p->p_tracecred; 980 p->p_tracecred = NULL; 981 mtx_unlock(&ktrace_mtx); 982 vrele_count++; 983 } 984 PROC_UNLOCK(p); 985 if (cred != NULL) { 986 crfree(cred); 987 cred = NULL; 988 } 989 } 990 sx_sunlock(&allproc_lock); 991 992 /* 993 * We can't clear any pending requests in threads that have cached 994 * them but not yet committed them, as those are per-thread. The 995 * thread will have to clear it itself on system call return. 996 */ 997 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 998 while (vrele_count-- > 0) 999 vrele(vp); 1000 VFS_UNLOCK_GIANT(vfslocked); 1001 } 1002 1003 /* 1004 * Return true if caller has permission to set the ktracing state 1005 * of target. Essentially, the target can't possess any 1006 * more permissions than the caller. KTRFAC_ROOT signifies that 1007 * root previously set the tracing status on the target process, and 1008 * so, only root may further change it. 1009 */ 1010 static int 1011 ktrcanset(td, targetp) 1012 struct thread *td; 1013 struct proc *targetp; 1014 { 1015 1016 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1017 if (targetp->p_traceflag & KTRFAC_ROOT && 1018 priv_check_cred(td->td_ucred, PRIV_KTRACE, SUSER_ALLOWJAIL)) 1019 return (0); 1020 1021 if (p_candebug(td, targetp) != 0) 1022 return (0); 1023 1024 return (1); 1025 } 1026 1027 #endif /* KTRACE */ 1028