1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 #include "opt_mac.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 #include <sys/ktrace.h> 55 #include <sys/sx.h> 56 #include <sys/sysctl.h> 57 #include <sys/syslog.h> 58 #include <sys/sysproto.h> 59 60 #include <security/mac/mac_framework.h> 61 62 /* 63 * The ktrace facility allows the tracing of certain key events in user space 64 * processes, such as system calls, signal delivery, context switches, and 65 * user generated events using utrace(2). It works by streaming event 66 * records and data to a vnode associated with the process using the 67 * ktrace(2) system call. In general, records can be written directly from 68 * the context that generates the event. One important exception to this is 69 * during a context switch, where sleeping is not permitted. To handle this 70 * case, trace events are generated using in-kernel ktr_request records, and 71 * then delivered to disk at a convenient moment -- either immediately, the 72 * next traceable event, at system call return, or at process exit. 73 * 74 * When dealing with multiple threads or processes writing to the same event 75 * log, ordering guarantees are weak: specifically, if an event has multiple 76 * records (i.e., system call enter and return), they may be interlaced with 77 * records from another event. Process and thread ID information is provided 78 * in the record, and user applications can de-interlace events if required. 79 */ 80 81 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 82 83 #ifdef KTRACE 84 85 #ifndef KTRACE_REQUEST_POOL 86 #define KTRACE_REQUEST_POOL 100 87 #endif 88 89 struct ktr_request { 90 struct ktr_header ktr_header; 91 void *ktr_buffer; 92 union { 93 struct ktr_syscall ktr_syscall; 94 struct ktr_sysret ktr_sysret; 95 struct ktr_genio ktr_genio; 96 struct ktr_psig ktr_psig; 97 struct ktr_csw ktr_csw; 98 } ktr_data; 99 STAILQ_ENTRY(ktr_request) ktr_list; 100 }; 101 102 static int data_lengths[] = { 103 0, /* none */ 104 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 105 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 106 0, /* KTR_NAMEI */ 107 sizeof(struct ktr_genio), /* KTR_GENIO */ 108 sizeof(struct ktr_psig), /* KTR_PSIG */ 109 sizeof(struct ktr_csw), /* KTR_CSW */ 110 0 /* KTR_USER */ 111 }; 112 113 static STAILQ_HEAD(, ktr_request) ktr_free; 114 115 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 116 117 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 118 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 119 120 static u_int ktr_geniosize = PAGE_SIZE; 121 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 122 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 123 0, "Maximum size of genio event payload"); 124 125 static int print_message = 1; 126 struct mtx ktrace_mtx; 127 static struct sx ktrace_sx; 128 129 static void ktrace_init(void *dummy); 130 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 131 static u_int ktrace_resize_pool(u_int newsize); 132 static struct ktr_request *ktr_getrequest(int type); 133 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 134 static void ktr_freerequest(struct ktr_request *req); 135 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 136 static int ktrcanset(struct thread *,struct proc *); 137 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 138 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 139 140 /* 141 * ktrace itself generates events, such as context switches, which we do not 142 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 143 * whether or not it is in a region where tracing of events should be 144 * suppressed. 145 */ 146 static void 147 ktrace_enter(struct thread *td) 148 { 149 150 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 151 td->td_pflags |= TDP_INKTRACE; 152 } 153 154 static void 155 ktrace_exit(struct thread *td) 156 { 157 158 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 159 td->td_pflags &= ~TDP_INKTRACE; 160 } 161 162 static void 163 ktrace_assert(struct thread *td) 164 { 165 166 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 167 } 168 169 static void 170 ktrace_init(void *dummy) 171 { 172 struct ktr_request *req; 173 int i; 174 175 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 176 sx_init(&ktrace_sx, "ktrace_sx"); 177 STAILQ_INIT(&ktr_free); 178 for (i = 0; i < ktr_requestpool; i++) { 179 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 180 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 181 } 182 } 183 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 184 185 static int 186 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 187 { 188 struct thread *td; 189 u_int newsize, oldsize, wantsize; 190 int error; 191 192 /* Handle easy read-only case first to avoid warnings from GCC. */ 193 if (!req->newptr) { 194 mtx_lock(&ktrace_mtx); 195 oldsize = ktr_requestpool; 196 mtx_unlock(&ktrace_mtx); 197 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 198 } 199 200 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 201 if (error) 202 return (error); 203 td = curthread; 204 ktrace_enter(td); 205 mtx_lock(&ktrace_mtx); 206 oldsize = ktr_requestpool; 207 newsize = ktrace_resize_pool(wantsize); 208 mtx_unlock(&ktrace_mtx); 209 ktrace_exit(td); 210 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 211 if (error) 212 return (error); 213 if (wantsize > oldsize && newsize < wantsize) 214 return (ENOSPC); 215 return (0); 216 } 217 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 218 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 219 220 static u_int 221 ktrace_resize_pool(u_int newsize) 222 { 223 struct ktr_request *req; 224 int bound; 225 226 mtx_assert(&ktrace_mtx, MA_OWNED); 227 print_message = 1; 228 bound = newsize - ktr_requestpool; 229 if (bound == 0) 230 return (ktr_requestpool); 231 if (bound < 0) 232 /* Shrink pool down to newsize if possible. */ 233 while (bound++ < 0) { 234 req = STAILQ_FIRST(&ktr_free); 235 if (req == NULL) 236 return (ktr_requestpool); 237 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 238 ktr_requestpool--; 239 mtx_unlock(&ktrace_mtx); 240 free(req, M_KTRACE); 241 mtx_lock(&ktrace_mtx); 242 } 243 else 244 /* Grow pool up to newsize. */ 245 while (bound-- > 0) { 246 mtx_unlock(&ktrace_mtx); 247 req = malloc(sizeof(struct ktr_request), M_KTRACE, 248 M_WAITOK); 249 mtx_lock(&ktrace_mtx); 250 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 251 ktr_requestpool++; 252 } 253 return (ktr_requestpool); 254 } 255 256 static struct ktr_request * 257 ktr_getrequest(int type) 258 { 259 struct ktr_request *req; 260 struct thread *td = curthread; 261 struct proc *p = td->td_proc; 262 int pm; 263 264 ktrace_enter(td); /* XXX: In caller instead? */ 265 mtx_lock(&ktrace_mtx); 266 if (!KTRCHECK(td, type)) { 267 mtx_unlock(&ktrace_mtx); 268 ktrace_exit(td); 269 return (NULL); 270 } 271 req = STAILQ_FIRST(&ktr_free); 272 if (req != NULL) { 273 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 274 req->ktr_header.ktr_type = type; 275 if (p->p_traceflag & KTRFAC_DROP) { 276 req->ktr_header.ktr_type |= KTR_DROP; 277 p->p_traceflag &= ~KTRFAC_DROP; 278 } 279 mtx_unlock(&ktrace_mtx); 280 microtime(&req->ktr_header.ktr_time); 281 req->ktr_header.ktr_pid = p->p_pid; 282 req->ktr_header.ktr_tid = td->td_tid; 283 bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1); 284 req->ktr_buffer = NULL; 285 req->ktr_header.ktr_len = 0; 286 } else { 287 p->p_traceflag |= KTRFAC_DROP; 288 pm = print_message; 289 print_message = 0; 290 mtx_unlock(&ktrace_mtx); 291 if (pm) 292 printf("Out of ktrace request objects.\n"); 293 ktrace_exit(td); 294 } 295 return (req); 296 } 297 298 /* 299 * Some trace generation environments don't permit direct access to VFS, 300 * such as during a context switch where sleeping is not allowed. Under these 301 * circumstances, queue a request to the thread to be written asynchronously 302 * later. 303 */ 304 static void 305 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 306 { 307 308 mtx_lock(&ktrace_mtx); 309 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 310 mtx_unlock(&ktrace_mtx); 311 ktrace_exit(td); 312 } 313 314 /* 315 * Drain any pending ktrace records from the per-thread queue to disk. This 316 * is used both internally before committing other records, and also on 317 * system call return. We drain all the ones we can find at the time when 318 * drain is requested, but don't keep draining after that as those events 319 * may me approximately "after" the current event. 320 */ 321 static void 322 ktr_drain(struct thread *td) 323 { 324 struct ktr_request *queued_req; 325 STAILQ_HEAD(, ktr_request) local_queue; 326 327 ktrace_assert(td); 328 sx_assert(&ktrace_sx, SX_XLOCKED); 329 330 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 331 332 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 333 mtx_lock(&ktrace_mtx); 334 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 335 mtx_unlock(&ktrace_mtx); 336 337 while ((queued_req = STAILQ_FIRST(&local_queue))) { 338 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 339 ktr_writerequest(td, queued_req); 340 ktr_freerequest(queued_req); 341 } 342 } 343 } 344 345 /* 346 * Submit a trace record for immediate commit to disk -- to be used only 347 * where entering VFS is OK. First drain any pending records that may have 348 * been cached in the thread. 349 */ 350 static void 351 ktr_submitrequest(struct thread *td, struct ktr_request *req) 352 { 353 354 ktrace_assert(td); 355 356 sx_xlock(&ktrace_sx); 357 ktr_drain(td); 358 ktr_writerequest(td, req); 359 ktr_freerequest(req); 360 sx_xunlock(&ktrace_sx); 361 362 ktrace_exit(td); 363 } 364 365 static void 366 ktr_freerequest(struct ktr_request *req) 367 { 368 369 if (req->ktr_buffer != NULL) 370 free(req->ktr_buffer, M_KTRACE); 371 mtx_lock(&ktrace_mtx); 372 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 373 mtx_unlock(&ktrace_mtx); 374 } 375 376 void 377 ktrsyscall(code, narg, args) 378 int code, narg; 379 register_t args[]; 380 { 381 struct ktr_request *req; 382 struct ktr_syscall *ktp; 383 size_t buflen; 384 char *buf = NULL; 385 386 buflen = sizeof(register_t) * narg; 387 if (buflen > 0) { 388 buf = malloc(buflen, M_KTRACE, M_WAITOK); 389 bcopy(args, buf, buflen); 390 } 391 req = ktr_getrequest(KTR_SYSCALL); 392 if (req == NULL) { 393 if (buf != NULL) 394 free(buf, M_KTRACE); 395 return; 396 } 397 ktp = &req->ktr_data.ktr_syscall; 398 ktp->ktr_code = code; 399 ktp->ktr_narg = narg; 400 if (buflen > 0) { 401 req->ktr_header.ktr_len = buflen; 402 req->ktr_buffer = buf; 403 } 404 ktr_submitrequest(curthread, req); 405 } 406 407 void 408 ktrsysret(code, error, retval) 409 int code, error; 410 register_t retval; 411 { 412 struct ktr_request *req; 413 struct ktr_sysret *ktp; 414 415 req = ktr_getrequest(KTR_SYSRET); 416 if (req == NULL) 417 return; 418 ktp = &req->ktr_data.ktr_sysret; 419 ktp->ktr_code = code; 420 ktp->ktr_error = error; 421 ktp->ktr_retval = retval; /* what about val2 ? */ 422 ktr_submitrequest(curthread, req); 423 } 424 425 /* 426 * When a process exits, drain per-process asynchronous trace records. 427 */ 428 void 429 ktrprocexit(struct thread *td) 430 { 431 432 ktrace_enter(td); 433 sx_xlock(&ktrace_sx); 434 ktr_drain(td); 435 sx_xunlock(&ktrace_sx); 436 ktrace_exit(td); 437 } 438 439 /* 440 * When a thread returns, drain any asynchronous records generated by the 441 * system call. 442 */ 443 void 444 ktruserret(struct thread *td) 445 { 446 447 if (STAILQ_EMPTY(&td->td_proc->p_ktr)) 448 return; 449 ktrace_enter(td); 450 sx_xlock(&ktrace_sx); 451 ktr_drain(td); 452 sx_xunlock(&ktrace_sx); 453 ktrace_exit(td); 454 } 455 456 void 457 ktrnamei(path) 458 char *path; 459 { 460 struct ktr_request *req; 461 int namelen; 462 char *buf = NULL; 463 464 namelen = strlen(path); 465 if (namelen > 0) { 466 buf = malloc(namelen, M_KTRACE, M_WAITOK); 467 bcopy(path, buf, namelen); 468 } 469 req = ktr_getrequest(KTR_NAMEI); 470 if (req == NULL) { 471 if (buf != NULL) 472 free(buf, M_KTRACE); 473 return; 474 } 475 if (namelen > 0) { 476 req->ktr_header.ktr_len = namelen; 477 req->ktr_buffer = buf; 478 } 479 ktr_submitrequest(curthread, req); 480 } 481 482 void 483 ktrgenio(fd, rw, uio, error) 484 int fd; 485 enum uio_rw rw; 486 struct uio *uio; 487 int error; 488 { 489 struct ktr_request *req; 490 struct ktr_genio *ktg; 491 int datalen; 492 char *buf; 493 494 if (error) { 495 free(uio, M_IOV); 496 return; 497 } 498 uio->uio_offset = 0; 499 uio->uio_rw = UIO_WRITE; 500 datalen = imin(uio->uio_resid, ktr_geniosize); 501 buf = malloc(datalen, M_KTRACE, M_WAITOK); 502 error = uiomove(buf, datalen, uio); 503 free(uio, M_IOV); 504 if (error) { 505 free(buf, M_KTRACE); 506 return; 507 } 508 req = ktr_getrequest(KTR_GENIO); 509 if (req == NULL) { 510 free(buf, M_KTRACE); 511 return; 512 } 513 ktg = &req->ktr_data.ktr_genio; 514 ktg->ktr_fd = fd; 515 ktg->ktr_rw = rw; 516 req->ktr_header.ktr_len = datalen; 517 req->ktr_buffer = buf; 518 ktr_submitrequest(curthread, req); 519 } 520 521 void 522 ktrpsig(sig, action, mask, code) 523 int sig; 524 sig_t action; 525 sigset_t *mask; 526 int code; 527 { 528 struct ktr_request *req; 529 struct ktr_psig *kp; 530 531 req = ktr_getrequest(KTR_PSIG); 532 if (req == NULL) 533 return; 534 kp = &req->ktr_data.ktr_psig; 535 kp->signo = (char)sig; 536 kp->action = action; 537 kp->mask = *mask; 538 kp->code = code; 539 ktr_enqueuerequest(curthread, req); 540 } 541 542 void 543 ktrcsw(out, user) 544 int out, user; 545 { 546 struct ktr_request *req; 547 struct ktr_csw *kc; 548 549 req = ktr_getrequest(KTR_CSW); 550 if (req == NULL) 551 return; 552 kc = &req->ktr_data.ktr_csw; 553 kc->out = out; 554 kc->user = user; 555 ktr_enqueuerequest(curthread, req); 556 } 557 #endif /* KTRACE */ 558 559 /* Interface and common routines */ 560 561 #ifndef _SYS_SYSPROTO_H_ 562 struct ktrace_args { 563 char *fname; 564 int ops; 565 int facs; 566 int pid; 567 }; 568 #endif 569 /* ARGSUSED */ 570 int 571 ktrace(td, uap) 572 struct thread *td; 573 register struct ktrace_args *uap; 574 { 575 #ifdef KTRACE 576 register struct vnode *vp = NULL; 577 register struct proc *p; 578 struct pgrp *pg; 579 int facs = uap->facs & ~KTRFAC_ROOT; 580 int ops = KTROP(uap->ops); 581 int descend = uap->ops & KTRFLAG_DESCEND; 582 int nfound, ret = 0; 583 int flags, error = 0, vfslocked; 584 struct nameidata nd; 585 struct ucred *cred; 586 587 /* 588 * Need something to (un)trace. 589 */ 590 if (ops != KTROP_CLEARFILE && facs == 0) 591 return (EINVAL); 592 593 ktrace_enter(td); 594 if (ops != KTROP_CLEAR) { 595 /* 596 * an operation which requires a file argument. 597 */ 598 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 599 uap->fname, td); 600 flags = FREAD | FWRITE | O_NOFOLLOW; 601 error = vn_open(&nd, &flags, 0, NULL); 602 if (error) { 603 ktrace_exit(td); 604 return (error); 605 } 606 vfslocked = NDHASGIANT(&nd); 607 NDFREE(&nd, NDF_ONLY_PNBUF); 608 vp = nd.ni_vp; 609 VOP_UNLOCK(vp, 0, td); 610 if (vp->v_type != VREG) { 611 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 612 VFS_UNLOCK_GIANT(vfslocked); 613 ktrace_exit(td); 614 return (EACCES); 615 } 616 VFS_UNLOCK_GIANT(vfslocked); 617 } 618 /* 619 * Clear all uses of the tracefile. 620 */ 621 if (ops == KTROP_CLEARFILE) { 622 int vrele_count; 623 624 vrele_count = 0; 625 sx_slock(&allproc_lock); 626 FOREACH_PROC_IN_SYSTEM(p) { 627 PROC_LOCK(p); 628 if (p->p_tracevp == vp) { 629 if (ktrcanset(td, p)) { 630 mtx_lock(&ktrace_mtx); 631 cred = p->p_tracecred; 632 p->p_tracecred = NULL; 633 p->p_tracevp = NULL; 634 p->p_traceflag = 0; 635 mtx_unlock(&ktrace_mtx); 636 vrele_count++; 637 crfree(cred); 638 } else 639 error = EPERM; 640 } 641 PROC_UNLOCK(p); 642 } 643 sx_sunlock(&allproc_lock); 644 if (vrele_count > 0) { 645 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 646 while (vrele_count-- > 0) 647 vrele(vp); 648 VFS_UNLOCK_GIANT(vfslocked); 649 } 650 goto done; 651 } 652 /* 653 * do it 654 */ 655 sx_slock(&proctree_lock); 656 if (uap->pid < 0) { 657 /* 658 * by process group 659 */ 660 pg = pgfind(-uap->pid); 661 if (pg == NULL) { 662 sx_sunlock(&proctree_lock); 663 error = ESRCH; 664 goto done; 665 } 666 /* 667 * ktrops() may call vrele(). Lock pg_members 668 * by the proctree_lock rather than pg_mtx. 669 */ 670 PGRP_UNLOCK(pg); 671 nfound = 0; 672 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 673 PROC_LOCK(p); 674 if (p_cansee(td, p) != 0) { 675 PROC_UNLOCK(p); 676 continue; 677 } 678 PROC_UNLOCK(p); 679 nfound++; 680 if (descend) 681 ret |= ktrsetchildren(td, p, ops, facs, vp); 682 else 683 ret |= ktrops(td, p, ops, facs, vp); 684 } 685 if (nfound == 0) { 686 sx_sunlock(&proctree_lock); 687 error = ESRCH; 688 goto done; 689 } 690 } else { 691 /* 692 * by pid 693 */ 694 p = pfind(uap->pid); 695 if (p == NULL) { 696 sx_sunlock(&proctree_lock); 697 error = ESRCH; 698 goto done; 699 } 700 error = p_cansee(td, p); 701 /* 702 * The slock of the proctree lock will keep this process 703 * from going away, so unlocking the proc here is ok. 704 */ 705 PROC_UNLOCK(p); 706 if (error) { 707 sx_sunlock(&proctree_lock); 708 goto done; 709 } 710 if (descend) 711 ret |= ktrsetchildren(td, p, ops, facs, vp); 712 else 713 ret |= ktrops(td, p, ops, facs, vp); 714 } 715 sx_sunlock(&proctree_lock); 716 if (!ret) 717 error = EPERM; 718 done: 719 if (vp != NULL) { 720 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 721 (void) vn_close(vp, FWRITE, td->td_ucred, td); 722 VFS_UNLOCK_GIANT(vfslocked); 723 } 724 ktrace_exit(td); 725 return (error); 726 #else /* !KTRACE */ 727 return (ENOSYS); 728 #endif /* KTRACE */ 729 } 730 731 /* ARGSUSED */ 732 int 733 utrace(td, uap) 734 struct thread *td; 735 register struct utrace_args *uap; 736 { 737 738 #ifdef KTRACE 739 struct ktr_request *req; 740 void *cp; 741 int error; 742 743 if (!KTRPOINT(td, KTR_USER)) 744 return (0); 745 if (uap->len > KTR_USER_MAXLEN) 746 return (EINVAL); 747 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 748 error = copyin(uap->addr, cp, uap->len); 749 if (error) { 750 free(cp, M_KTRACE); 751 return (error); 752 } 753 req = ktr_getrequest(KTR_USER); 754 if (req == NULL) { 755 free(cp, M_KTRACE); 756 return (ENOMEM); 757 } 758 req->ktr_buffer = cp; 759 req->ktr_header.ktr_len = uap->len; 760 ktr_submitrequest(td, req); 761 return (0); 762 #else /* !KTRACE */ 763 return (ENOSYS); 764 #endif /* KTRACE */ 765 } 766 767 #ifdef KTRACE 768 static int 769 ktrops(td, p, ops, facs, vp) 770 struct thread *td; 771 struct proc *p; 772 int ops, facs; 773 struct vnode *vp; 774 { 775 struct vnode *tracevp = NULL; 776 struct ucred *tracecred = NULL; 777 778 PROC_LOCK(p); 779 if (!ktrcanset(td, p)) { 780 PROC_UNLOCK(p); 781 return (0); 782 } 783 mtx_lock(&ktrace_mtx); 784 if (ops == KTROP_SET) { 785 if (p->p_tracevp != vp) { 786 /* 787 * if trace file already in use, relinquish below 788 */ 789 tracevp = p->p_tracevp; 790 VREF(vp); 791 p->p_tracevp = vp; 792 } 793 if (p->p_tracecred != td->td_ucred) { 794 tracecred = p->p_tracecred; 795 p->p_tracecred = crhold(td->td_ucred); 796 } 797 p->p_traceflag |= facs; 798 if (priv_check(td, PRIV_KTRACE) == 0) 799 p->p_traceflag |= KTRFAC_ROOT; 800 } else { 801 /* KTROP_CLEAR */ 802 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 803 /* no more tracing */ 804 p->p_traceflag = 0; 805 tracevp = p->p_tracevp; 806 p->p_tracevp = NULL; 807 tracecred = p->p_tracecred; 808 p->p_tracecred = NULL; 809 } 810 } 811 mtx_unlock(&ktrace_mtx); 812 PROC_UNLOCK(p); 813 if (tracevp != NULL) { 814 int vfslocked; 815 816 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 817 vrele(tracevp); 818 VFS_UNLOCK_GIANT(vfslocked); 819 } 820 if (tracecred != NULL) 821 crfree(tracecred); 822 823 return (1); 824 } 825 826 static int 827 ktrsetchildren(td, top, ops, facs, vp) 828 struct thread *td; 829 struct proc *top; 830 int ops, facs; 831 struct vnode *vp; 832 { 833 register struct proc *p; 834 register int ret = 0; 835 836 p = top; 837 sx_assert(&proctree_lock, SX_LOCKED); 838 for (;;) { 839 ret |= ktrops(td, p, ops, facs, vp); 840 /* 841 * If this process has children, descend to them next, 842 * otherwise do any siblings, and if done with this level, 843 * follow back up the tree (but not past top). 844 */ 845 if (!LIST_EMPTY(&p->p_children)) 846 p = LIST_FIRST(&p->p_children); 847 else for (;;) { 848 if (p == top) 849 return (ret); 850 if (LIST_NEXT(p, p_sibling)) { 851 p = LIST_NEXT(p, p_sibling); 852 break; 853 } 854 p = p->p_pptr; 855 } 856 } 857 /*NOTREACHED*/ 858 } 859 860 static void 861 ktr_writerequest(struct thread *td, struct ktr_request *req) 862 { 863 struct ktr_header *kth; 864 struct vnode *vp; 865 struct proc *p; 866 struct ucred *cred; 867 struct uio auio; 868 struct iovec aiov[3]; 869 struct mount *mp; 870 int datalen, buflen, vrele_count; 871 int error, vfslocked; 872 873 /* 874 * We hold the vnode and credential for use in I/O in case ktrace is 875 * disabled on the process as we write out the request. 876 * 877 * XXXRW: This is not ideal: we could end up performing a write after 878 * the vnode has been closed. 879 */ 880 mtx_lock(&ktrace_mtx); 881 vp = td->td_proc->p_tracevp; 882 if (vp != NULL) 883 VREF(vp); 884 cred = td->td_proc->p_tracecred; 885 if (cred != NULL) 886 crhold(cred); 887 mtx_unlock(&ktrace_mtx); 888 889 /* 890 * If vp is NULL, the vp has been cleared out from under this 891 * request, so just drop it. Make sure the credential and vnode are 892 * in sync: we should have both or neither. 893 */ 894 if (vp == NULL) { 895 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 896 return; 897 } 898 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 899 900 kth = &req->ktr_header; 901 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 902 buflen = kth->ktr_len; 903 auio.uio_iov = &aiov[0]; 904 auio.uio_offset = 0; 905 auio.uio_segflg = UIO_SYSSPACE; 906 auio.uio_rw = UIO_WRITE; 907 aiov[0].iov_base = (caddr_t)kth; 908 aiov[0].iov_len = sizeof(struct ktr_header); 909 auio.uio_resid = sizeof(struct ktr_header); 910 auio.uio_iovcnt = 1; 911 auio.uio_td = td; 912 if (datalen != 0) { 913 aiov[1].iov_base = (caddr_t)&req->ktr_data; 914 aiov[1].iov_len = datalen; 915 auio.uio_resid += datalen; 916 auio.uio_iovcnt++; 917 kth->ktr_len += datalen; 918 } 919 if (buflen != 0) { 920 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 921 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 922 aiov[auio.uio_iovcnt].iov_len = buflen; 923 auio.uio_resid += buflen; 924 auio.uio_iovcnt++; 925 } 926 927 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 928 vn_start_write(vp, &mp, V_WAIT); 929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 930 (void)VOP_LEASE(vp, td, cred, LEASE_WRITE); 931 #ifdef MAC 932 error = mac_check_vnode_write(cred, NOCRED, vp); 933 if (error == 0) 934 #endif 935 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 936 VOP_UNLOCK(vp, 0, td); 937 vn_finished_write(mp); 938 vrele(vp); 939 VFS_UNLOCK_GIANT(vfslocked); 940 if (!error) 941 return; 942 /* 943 * If error encountered, give up tracing on this vnode. We defer 944 * all the vrele()'s on the vnode until after we are finished walking 945 * the various lists to avoid needlessly holding locks. 946 */ 947 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 948 error); 949 vrele_count = 0; 950 /* 951 * First, clear this vnode from being used by any processes in the 952 * system. 953 * XXX - If one process gets an EPERM writing to the vnode, should 954 * we really do this? Other processes might have suitable 955 * credentials for the operation. 956 */ 957 cred = NULL; 958 sx_slock(&allproc_lock); 959 FOREACH_PROC_IN_SYSTEM(p) { 960 PROC_LOCK(p); 961 if (p->p_tracevp == vp) { 962 mtx_lock(&ktrace_mtx); 963 p->p_tracevp = NULL; 964 p->p_traceflag = 0; 965 cred = p->p_tracecred; 966 p->p_tracecred = NULL; 967 mtx_unlock(&ktrace_mtx); 968 vrele_count++; 969 } 970 PROC_UNLOCK(p); 971 if (cred != NULL) { 972 crfree(cred); 973 cred = NULL; 974 } 975 } 976 sx_sunlock(&allproc_lock); 977 978 /* 979 * We can't clear any pending requests in threads that have cached 980 * them but not yet committed them, as those are per-thread. The 981 * thread will have to clear it itself on system call return. 982 */ 983 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 984 while (vrele_count-- > 0) 985 vrele(vp); 986 VFS_UNLOCK_GIANT(vfslocked); 987 } 988 989 /* 990 * Return true if caller has permission to set the ktracing state 991 * of target. Essentially, the target can't possess any 992 * more permissions than the caller. KTRFAC_ROOT signifies that 993 * root previously set the tracing status on the target process, and 994 * so, only root may further change it. 995 */ 996 static int 997 ktrcanset(td, targetp) 998 struct thread *td; 999 struct proc *targetp; 1000 { 1001 1002 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1003 if (targetp->p_traceflag & KTRFAC_ROOT && 1004 priv_check(td, PRIV_KTRACE)) 1005 return (0); 1006 1007 if (p_candebug(td, targetp) != 0) 1008 return (0); 1009 1010 return (1); 1011 } 1012 1013 #endif /* KTRACE */ 1014