1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/kernel.h> 43 #include <sys/kthread.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 #include <sys/socket.h> 54 #include <sys/stat.h> 55 #include <sys/ktrace.h> 56 #include <sys/sx.h> 57 #include <sys/sysctl.h> 58 #include <sys/syslog.h> 59 #include <sys/sysproto.h> 60 61 #include <security/mac/mac_framework.h> 62 63 /* 64 * The ktrace facility allows the tracing of certain key events in user space 65 * processes, such as system calls, signal delivery, context switches, and 66 * user generated events using utrace(2). It works by streaming event 67 * records and data to a vnode associated with the process using the 68 * ktrace(2) system call. In general, records can be written directly from 69 * the context that generates the event. One important exception to this is 70 * during a context switch, where sleeping is not permitted. To handle this 71 * case, trace events are generated using in-kernel ktr_request records, and 72 * then delivered to disk at a convenient moment -- either immediately, the 73 * next traceable event, at system call return, or at process exit. 74 * 75 * When dealing with multiple threads or processes writing to the same event 76 * log, ordering guarantees are weak: specifically, if an event has multiple 77 * records (i.e., system call enter and return), they may be interlaced with 78 * records from another event. Process and thread ID information is provided 79 * in the record, and user applications can de-interlace events if required. 80 */ 81 82 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 83 84 #ifdef KTRACE 85 86 #ifndef KTRACE_REQUEST_POOL 87 #define KTRACE_REQUEST_POOL 100 88 #endif 89 90 struct ktr_request { 91 struct ktr_header ktr_header; 92 void *ktr_buffer; 93 union { 94 struct ktr_syscall ktr_syscall; 95 struct ktr_sysret ktr_sysret; 96 struct ktr_genio ktr_genio; 97 struct ktr_psig ktr_psig; 98 struct ktr_csw ktr_csw; 99 } ktr_data; 100 STAILQ_ENTRY(ktr_request) ktr_list; 101 }; 102 103 static int data_lengths[] = { 104 0, /* none */ 105 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 106 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 107 0, /* KTR_NAMEI */ 108 sizeof(struct ktr_genio), /* KTR_GENIO */ 109 sizeof(struct ktr_psig), /* KTR_PSIG */ 110 sizeof(struct ktr_csw), /* KTR_CSW */ 111 0, /* KTR_USER */ 112 0, /* KTR_STRUCT */ 113 0, /* KTR_SYSCTL */ 114 }; 115 116 static STAILQ_HEAD(, ktr_request) ktr_free; 117 118 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 119 120 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 121 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 122 123 static u_int ktr_geniosize = PAGE_SIZE; 124 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 125 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 126 0, "Maximum size of genio event payload"); 127 128 static int print_message = 1; 129 struct mtx ktrace_mtx; 130 static struct sx ktrace_sx; 131 132 static void ktrace_init(void *dummy); 133 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 134 static u_int ktrace_resize_pool(u_int newsize); 135 static struct ktr_request *ktr_getrequest(int type); 136 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 137 static void ktr_freerequest(struct ktr_request *req); 138 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 139 static int ktrcanset(struct thread *,struct proc *); 140 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 141 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 142 143 /* 144 * ktrace itself generates events, such as context switches, which we do not 145 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 146 * whether or not it is in a region where tracing of events should be 147 * suppressed. 148 */ 149 static void 150 ktrace_enter(struct thread *td) 151 { 152 153 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 154 td->td_pflags |= TDP_INKTRACE; 155 } 156 157 static void 158 ktrace_exit(struct thread *td) 159 { 160 161 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 162 td->td_pflags &= ~TDP_INKTRACE; 163 } 164 165 static void 166 ktrace_assert(struct thread *td) 167 { 168 169 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 170 } 171 172 static void 173 ktrace_init(void *dummy) 174 { 175 struct ktr_request *req; 176 int i; 177 178 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 179 sx_init(&ktrace_sx, "ktrace_sx"); 180 STAILQ_INIT(&ktr_free); 181 for (i = 0; i < ktr_requestpool; i++) { 182 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 183 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 184 } 185 } 186 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 187 188 static int 189 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 190 { 191 struct thread *td; 192 u_int newsize, oldsize, wantsize; 193 int error; 194 195 /* Handle easy read-only case first to avoid warnings from GCC. */ 196 if (!req->newptr) { 197 mtx_lock(&ktrace_mtx); 198 oldsize = ktr_requestpool; 199 mtx_unlock(&ktrace_mtx); 200 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 201 } 202 203 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 204 if (error) 205 return (error); 206 td = curthread; 207 ktrace_enter(td); 208 mtx_lock(&ktrace_mtx); 209 oldsize = ktr_requestpool; 210 newsize = ktrace_resize_pool(wantsize); 211 mtx_unlock(&ktrace_mtx); 212 ktrace_exit(td); 213 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 214 if (error) 215 return (error); 216 if (wantsize > oldsize && newsize < wantsize) 217 return (ENOSPC); 218 return (0); 219 } 220 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 221 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", ""); 222 223 static u_int 224 ktrace_resize_pool(u_int newsize) 225 { 226 struct ktr_request *req; 227 int bound; 228 229 mtx_assert(&ktrace_mtx, MA_OWNED); 230 print_message = 1; 231 bound = newsize - ktr_requestpool; 232 if (bound == 0) 233 return (ktr_requestpool); 234 if (bound < 0) 235 /* Shrink pool down to newsize if possible. */ 236 while (bound++ < 0) { 237 req = STAILQ_FIRST(&ktr_free); 238 if (req == NULL) 239 return (ktr_requestpool); 240 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 241 ktr_requestpool--; 242 mtx_unlock(&ktrace_mtx); 243 free(req, M_KTRACE); 244 mtx_lock(&ktrace_mtx); 245 } 246 else 247 /* Grow pool up to newsize. */ 248 while (bound-- > 0) { 249 mtx_unlock(&ktrace_mtx); 250 req = malloc(sizeof(struct ktr_request), M_KTRACE, 251 M_WAITOK); 252 mtx_lock(&ktrace_mtx); 253 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 254 ktr_requestpool++; 255 } 256 return (ktr_requestpool); 257 } 258 259 static struct ktr_request * 260 ktr_getrequest(int type) 261 { 262 struct ktr_request *req; 263 struct thread *td = curthread; 264 struct proc *p = td->td_proc; 265 int pm; 266 267 ktrace_enter(td); /* XXX: In caller instead? */ 268 mtx_lock(&ktrace_mtx); 269 if (!KTRCHECK(td, type)) { 270 mtx_unlock(&ktrace_mtx); 271 ktrace_exit(td); 272 return (NULL); 273 } 274 req = STAILQ_FIRST(&ktr_free); 275 if (req != NULL) { 276 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 277 req->ktr_header.ktr_type = type; 278 if (p->p_traceflag & KTRFAC_DROP) { 279 req->ktr_header.ktr_type |= KTR_DROP; 280 p->p_traceflag &= ~KTRFAC_DROP; 281 } 282 mtx_unlock(&ktrace_mtx); 283 microtime(&req->ktr_header.ktr_time); 284 req->ktr_header.ktr_pid = p->p_pid; 285 req->ktr_header.ktr_tid = td->td_tid; 286 bcopy(td->td_name, req->ktr_header.ktr_comm, MAXCOMLEN + 1); 287 req->ktr_buffer = NULL; 288 req->ktr_header.ktr_len = 0; 289 } else { 290 p->p_traceflag |= KTRFAC_DROP; 291 pm = print_message; 292 print_message = 0; 293 mtx_unlock(&ktrace_mtx); 294 if (pm) 295 printf("Out of ktrace request objects.\n"); 296 ktrace_exit(td); 297 } 298 return (req); 299 } 300 301 /* 302 * Some trace generation environments don't permit direct access to VFS, 303 * such as during a context switch where sleeping is not allowed. Under these 304 * circumstances, queue a request to the thread to be written asynchronously 305 * later. 306 */ 307 static void 308 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 309 { 310 311 mtx_lock(&ktrace_mtx); 312 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 313 mtx_unlock(&ktrace_mtx); 314 ktrace_exit(td); 315 } 316 317 /* 318 * Drain any pending ktrace records from the per-thread queue to disk. This 319 * is used both internally before committing other records, and also on 320 * system call return. We drain all the ones we can find at the time when 321 * drain is requested, but don't keep draining after that as those events 322 * may be approximately "after" the current event. 323 */ 324 static void 325 ktr_drain(struct thread *td) 326 { 327 struct ktr_request *queued_req; 328 STAILQ_HEAD(, ktr_request) local_queue; 329 330 ktrace_assert(td); 331 sx_assert(&ktrace_sx, SX_XLOCKED); 332 333 STAILQ_INIT(&local_queue); /* XXXRW: needed? */ 334 335 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 336 mtx_lock(&ktrace_mtx); 337 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 338 mtx_unlock(&ktrace_mtx); 339 340 while ((queued_req = STAILQ_FIRST(&local_queue))) { 341 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 342 ktr_writerequest(td, queued_req); 343 ktr_freerequest(queued_req); 344 } 345 } 346 } 347 348 /* 349 * Submit a trace record for immediate commit to disk -- to be used only 350 * where entering VFS is OK. First drain any pending records that may have 351 * been cached in the thread. 352 */ 353 static void 354 ktr_submitrequest(struct thread *td, struct ktr_request *req) 355 { 356 357 ktrace_assert(td); 358 359 sx_xlock(&ktrace_sx); 360 ktr_drain(td); 361 ktr_writerequest(td, req); 362 ktr_freerequest(req); 363 sx_xunlock(&ktrace_sx); 364 365 ktrace_exit(td); 366 } 367 368 static void 369 ktr_freerequest(struct ktr_request *req) 370 { 371 372 if (req->ktr_buffer != NULL) 373 free(req->ktr_buffer, M_KTRACE); 374 mtx_lock(&ktrace_mtx); 375 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 376 mtx_unlock(&ktrace_mtx); 377 } 378 379 void 380 ktrsyscall(code, narg, args) 381 int code, narg; 382 register_t args[]; 383 { 384 struct ktr_request *req; 385 struct ktr_syscall *ktp; 386 size_t buflen; 387 char *buf = NULL; 388 389 buflen = sizeof(register_t) * narg; 390 if (buflen > 0) { 391 buf = malloc(buflen, M_KTRACE, M_WAITOK); 392 bcopy(args, buf, buflen); 393 } 394 req = ktr_getrequest(KTR_SYSCALL); 395 if (req == NULL) { 396 if (buf != NULL) 397 free(buf, M_KTRACE); 398 return; 399 } 400 ktp = &req->ktr_data.ktr_syscall; 401 ktp->ktr_code = code; 402 ktp->ktr_narg = narg; 403 if (buflen > 0) { 404 req->ktr_header.ktr_len = buflen; 405 req->ktr_buffer = buf; 406 } 407 ktr_submitrequest(curthread, req); 408 } 409 410 void 411 ktrsysret(code, error, retval) 412 int code, error; 413 register_t retval; 414 { 415 struct ktr_request *req; 416 struct ktr_sysret *ktp; 417 418 req = ktr_getrequest(KTR_SYSRET); 419 if (req == NULL) 420 return; 421 ktp = &req->ktr_data.ktr_sysret; 422 ktp->ktr_code = code; 423 ktp->ktr_error = error; 424 ktp->ktr_retval = retval; /* what about val2 ? */ 425 ktr_submitrequest(curthread, req); 426 } 427 428 /* 429 * When a process exits, drain per-process asynchronous trace records. 430 */ 431 void 432 ktrprocexit(struct thread *td) 433 { 434 435 ktrace_enter(td); 436 sx_xlock(&ktrace_sx); 437 ktr_drain(td); 438 sx_xunlock(&ktrace_sx); 439 ktrace_exit(td); 440 } 441 442 /* 443 * When a thread returns, drain any asynchronous records generated by the 444 * system call. 445 */ 446 void 447 ktruserret(struct thread *td) 448 { 449 450 ktrace_enter(td); 451 sx_xlock(&ktrace_sx); 452 ktr_drain(td); 453 sx_xunlock(&ktrace_sx); 454 ktrace_exit(td); 455 } 456 457 void 458 ktrnamei(path) 459 char *path; 460 { 461 struct ktr_request *req; 462 int namelen; 463 char *buf = NULL; 464 465 namelen = strlen(path); 466 if (namelen > 0) { 467 buf = malloc(namelen, M_KTRACE, M_WAITOK); 468 bcopy(path, buf, namelen); 469 } 470 req = ktr_getrequest(KTR_NAMEI); 471 if (req == NULL) { 472 if (buf != NULL) 473 free(buf, M_KTRACE); 474 return; 475 } 476 if (namelen > 0) { 477 req->ktr_header.ktr_len = namelen; 478 req->ktr_buffer = buf; 479 } 480 ktr_submitrequest(curthread, req); 481 } 482 483 void 484 ktrsysctl(name, namelen) 485 int *name; 486 u_int namelen; 487 { 488 struct ktr_request *req; 489 u_int mib[CTL_MAXNAME + 2]; 490 char *mibname; 491 size_t mibnamelen; 492 int error; 493 494 /* Lookup name of mib. */ 495 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 496 mib[0] = 0; 497 mib[1] = 1; 498 bcopy(name, mib + 2, namelen * sizeof(*name)); 499 mibnamelen = 128; 500 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 501 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 502 NULL, 0, &mibnamelen, 0); 503 if (error) { 504 free(mibname, M_KTRACE); 505 return; 506 } 507 req = ktr_getrequest(KTR_SYSCTL); 508 if (req == NULL) { 509 free(mibname, M_KTRACE); 510 return; 511 } 512 req->ktr_header.ktr_len = mibnamelen; 513 req->ktr_buffer = mibname; 514 ktr_submitrequest(curthread, req); 515 } 516 517 void 518 ktrgenio(fd, rw, uio, error) 519 int fd; 520 enum uio_rw rw; 521 struct uio *uio; 522 int error; 523 { 524 struct ktr_request *req; 525 struct ktr_genio *ktg; 526 int datalen; 527 char *buf; 528 529 if (error) { 530 free(uio, M_IOV); 531 return; 532 } 533 uio->uio_offset = 0; 534 uio->uio_rw = UIO_WRITE; 535 datalen = imin(uio->uio_resid, ktr_geniosize); 536 buf = malloc(datalen, M_KTRACE, M_WAITOK); 537 error = uiomove(buf, datalen, uio); 538 free(uio, M_IOV); 539 if (error) { 540 free(buf, M_KTRACE); 541 return; 542 } 543 req = ktr_getrequest(KTR_GENIO); 544 if (req == NULL) { 545 free(buf, M_KTRACE); 546 return; 547 } 548 ktg = &req->ktr_data.ktr_genio; 549 ktg->ktr_fd = fd; 550 ktg->ktr_rw = rw; 551 req->ktr_header.ktr_len = datalen; 552 req->ktr_buffer = buf; 553 ktr_submitrequest(curthread, req); 554 } 555 556 void 557 ktrpsig(sig, action, mask, code) 558 int sig; 559 sig_t action; 560 sigset_t *mask; 561 int code; 562 { 563 struct ktr_request *req; 564 struct ktr_psig *kp; 565 566 req = ktr_getrequest(KTR_PSIG); 567 if (req == NULL) 568 return; 569 kp = &req->ktr_data.ktr_psig; 570 kp->signo = (char)sig; 571 kp->action = action; 572 kp->mask = *mask; 573 kp->code = code; 574 ktr_enqueuerequest(curthread, req); 575 } 576 577 void 578 ktrcsw(out, user) 579 int out, user; 580 { 581 struct ktr_request *req; 582 struct ktr_csw *kc; 583 584 req = ktr_getrequest(KTR_CSW); 585 if (req == NULL) 586 return; 587 kc = &req->ktr_data.ktr_csw; 588 kc->out = out; 589 kc->user = user; 590 ktr_enqueuerequest(curthread, req); 591 } 592 593 void 594 ktrstruct(name, namelen, data, datalen) 595 const char *name; 596 size_t namelen; 597 void *data; 598 size_t datalen; 599 { 600 struct ktr_request *req; 601 char *buf = NULL; 602 size_t buflen; 603 604 if (!data) 605 datalen = 0; 606 buflen = namelen + 1 + datalen; 607 buf = malloc(buflen, M_KTRACE, M_WAITOK); 608 bcopy(name, buf, namelen); 609 buf[namelen] = '\0'; 610 bcopy(data, buf + namelen + 1, datalen); 611 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 612 free(buf, M_KTRACE); 613 return; 614 } 615 req->ktr_buffer = buf; 616 req->ktr_header.ktr_len = buflen; 617 ktr_submitrequest(curthread, req); 618 } 619 #endif /* KTRACE */ 620 621 /* Interface and common routines */ 622 623 #ifndef _SYS_SYSPROTO_H_ 624 struct ktrace_args { 625 char *fname; 626 int ops; 627 int facs; 628 int pid; 629 }; 630 #endif 631 /* ARGSUSED */ 632 int 633 ktrace(td, uap) 634 struct thread *td; 635 register struct ktrace_args *uap; 636 { 637 #ifdef KTRACE 638 register struct vnode *vp = NULL; 639 register struct proc *p; 640 struct pgrp *pg; 641 int facs = uap->facs & ~KTRFAC_ROOT; 642 int ops = KTROP(uap->ops); 643 int descend = uap->ops & KTRFLAG_DESCEND; 644 int nfound, ret = 0; 645 int flags, error = 0, vfslocked; 646 struct nameidata nd; 647 struct ucred *cred; 648 649 /* 650 * Need something to (un)trace. 651 */ 652 if (ops != KTROP_CLEARFILE && facs == 0) 653 return (EINVAL); 654 655 ktrace_enter(td); 656 if (ops != KTROP_CLEAR) { 657 /* 658 * an operation which requires a file argument. 659 */ 660 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 661 uap->fname, td); 662 flags = FREAD | FWRITE | O_NOFOLLOW; 663 error = vn_open(&nd, &flags, 0, NULL); 664 if (error) { 665 ktrace_exit(td); 666 return (error); 667 } 668 vfslocked = NDHASGIANT(&nd); 669 NDFREE(&nd, NDF_ONLY_PNBUF); 670 vp = nd.ni_vp; 671 VOP_UNLOCK(vp, 0); 672 if (vp->v_type != VREG) { 673 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 674 VFS_UNLOCK_GIANT(vfslocked); 675 ktrace_exit(td); 676 return (EACCES); 677 } 678 VFS_UNLOCK_GIANT(vfslocked); 679 } 680 /* 681 * Clear all uses of the tracefile. 682 */ 683 if (ops == KTROP_CLEARFILE) { 684 int vrele_count; 685 686 vrele_count = 0; 687 sx_slock(&allproc_lock); 688 FOREACH_PROC_IN_SYSTEM(p) { 689 PROC_LOCK(p); 690 if (p->p_tracevp == vp) { 691 if (ktrcanset(td, p)) { 692 mtx_lock(&ktrace_mtx); 693 cred = p->p_tracecred; 694 p->p_tracecred = NULL; 695 p->p_tracevp = NULL; 696 p->p_traceflag = 0; 697 mtx_unlock(&ktrace_mtx); 698 vrele_count++; 699 crfree(cred); 700 } else 701 error = EPERM; 702 } 703 PROC_UNLOCK(p); 704 } 705 sx_sunlock(&allproc_lock); 706 if (vrele_count > 0) { 707 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 708 while (vrele_count-- > 0) 709 vrele(vp); 710 VFS_UNLOCK_GIANT(vfslocked); 711 } 712 goto done; 713 } 714 /* 715 * do it 716 */ 717 sx_slock(&proctree_lock); 718 if (uap->pid < 0) { 719 /* 720 * by process group 721 */ 722 pg = pgfind(-uap->pid); 723 if (pg == NULL) { 724 sx_sunlock(&proctree_lock); 725 error = ESRCH; 726 goto done; 727 } 728 /* 729 * ktrops() may call vrele(). Lock pg_members 730 * by the proctree_lock rather than pg_mtx. 731 */ 732 PGRP_UNLOCK(pg); 733 nfound = 0; 734 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 735 PROC_LOCK(p); 736 if (p_cansee(td, p) != 0) { 737 PROC_UNLOCK(p); 738 continue; 739 } 740 PROC_UNLOCK(p); 741 nfound++; 742 if (descend) 743 ret |= ktrsetchildren(td, p, ops, facs, vp); 744 else 745 ret |= ktrops(td, p, ops, facs, vp); 746 } 747 if (nfound == 0) { 748 sx_sunlock(&proctree_lock); 749 error = ESRCH; 750 goto done; 751 } 752 } else { 753 /* 754 * by pid 755 */ 756 p = pfind(uap->pid); 757 if (p == NULL) { 758 sx_sunlock(&proctree_lock); 759 error = ESRCH; 760 goto done; 761 } 762 error = p_cansee(td, p); 763 /* 764 * The slock of the proctree lock will keep this process 765 * from going away, so unlocking the proc here is ok. 766 */ 767 PROC_UNLOCK(p); 768 if (error) { 769 sx_sunlock(&proctree_lock); 770 goto done; 771 } 772 if (descend) 773 ret |= ktrsetchildren(td, p, ops, facs, vp); 774 else 775 ret |= ktrops(td, p, ops, facs, vp); 776 } 777 sx_sunlock(&proctree_lock); 778 if (!ret) 779 error = EPERM; 780 done: 781 if (vp != NULL) { 782 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 783 (void) vn_close(vp, FWRITE, td->td_ucred, td); 784 VFS_UNLOCK_GIANT(vfslocked); 785 } 786 ktrace_exit(td); 787 return (error); 788 #else /* !KTRACE */ 789 return (ENOSYS); 790 #endif /* KTRACE */ 791 } 792 793 /* ARGSUSED */ 794 int 795 utrace(td, uap) 796 struct thread *td; 797 register struct utrace_args *uap; 798 { 799 800 #ifdef KTRACE 801 struct ktr_request *req; 802 void *cp; 803 int error; 804 805 if (!KTRPOINT(td, KTR_USER)) 806 return (0); 807 if (uap->len > KTR_USER_MAXLEN) 808 return (EINVAL); 809 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 810 error = copyin(uap->addr, cp, uap->len); 811 if (error) { 812 free(cp, M_KTRACE); 813 return (error); 814 } 815 req = ktr_getrequest(KTR_USER); 816 if (req == NULL) { 817 free(cp, M_KTRACE); 818 return (ENOMEM); 819 } 820 req->ktr_buffer = cp; 821 req->ktr_header.ktr_len = uap->len; 822 ktr_submitrequest(td, req); 823 return (0); 824 #else /* !KTRACE */ 825 return (ENOSYS); 826 #endif /* KTRACE */ 827 } 828 829 #ifdef KTRACE 830 static int 831 ktrops(td, p, ops, facs, vp) 832 struct thread *td; 833 struct proc *p; 834 int ops, facs; 835 struct vnode *vp; 836 { 837 struct vnode *tracevp = NULL; 838 struct ucred *tracecred = NULL; 839 840 PROC_LOCK(p); 841 if (!ktrcanset(td, p)) { 842 PROC_UNLOCK(p); 843 return (0); 844 } 845 mtx_lock(&ktrace_mtx); 846 if (ops == KTROP_SET) { 847 if (p->p_tracevp != vp) { 848 /* 849 * if trace file already in use, relinquish below 850 */ 851 tracevp = p->p_tracevp; 852 VREF(vp); 853 p->p_tracevp = vp; 854 } 855 if (p->p_tracecred != td->td_ucred) { 856 tracecred = p->p_tracecred; 857 p->p_tracecred = crhold(td->td_ucred); 858 } 859 p->p_traceflag |= facs; 860 if (priv_check(td, PRIV_KTRACE) == 0) 861 p->p_traceflag |= KTRFAC_ROOT; 862 } else { 863 /* KTROP_CLEAR */ 864 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 865 /* no more tracing */ 866 p->p_traceflag = 0; 867 tracevp = p->p_tracevp; 868 p->p_tracevp = NULL; 869 tracecred = p->p_tracecred; 870 p->p_tracecred = NULL; 871 } 872 } 873 mtx_unlock(&ktrace_mtx); 874 PROC_UNLOCK(p); 875 if (tracevp != NULL) { 876 int vfslocked; 877 878 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 879 vrele(tracevp); 880 VFS_UNLOCK_GIANT(vfslocked); 881 } 882 if (tracecred != NULL) 883 crfree(tracecred); 884 885 return (1); 886 } 887 888 static int 889 ktrsetchildren(td, top, ops, facs, vp) 890 struct thread *td; 891 struct proc *top; 892 int ops, facs; 893 struct vnode *vp; 894 { 895 register struct proc *p; 896 register int ret = 0; 897 898 p = top; 899 sx_assert(&proctree_lock, SX_LOCKED); 900 for (;;) { 901 ret |= ktrops(td, p, ops, facs, vp); 902 /* 903 * If this process has children, descend to them next, 904 * otherwise do any siblings, and if done with this level, 905 * follow back up the tree (but not past top). 906 */ 907 if (!LIST_EMPTY(&p->p_children)) 908 p = LIST_FIRST(&p->p_children); 909 else for (;;) { 910 if (p == top) 911 return (ret); 912 if (LIST_NEXT(p, p_sibling)) { 913 p = LIST_NEXT(p, p_sibling); 914 break; 915 } 916 p = p->p_pptr; 917 } 918 } 919 /*NOTREACHED*/ 920 } 921 922 static void 923 ktr_writerequest(struct thread *td, struct ktr_request *req) 924 { 925 struct ktr_header *kth; 926 struct vnode *vp; 927 struct proc *p; 928 struct ucred *cred; 929 struct uio auio; 930 struct iovec aiov[3]; 931 struct mount *mp; 932 int datalen, buflen, vrele_count; 933 int error, vfslocked; 934 935 /* 936 * We hold the vnode and credential for use in I/O in case ktrace is 937 * disabled on the process as we write out the request. 938 * 939 * XXXRW: This is not ideal: we could end up performing a write after 940 * the vnode has been closed. 941 */ 942 mtx_lock(&ktrace_mtx); 943 vp = td->td_proc->p_tracevp; 944 cred = td->td_proc->p_tracecred; 945 946 /* 947 * If vp is NULL, the vp has been cleared out from under this 948 * request, so just drop it. Make sure the credential and vnode are 949 * in sync: we should have both or neither. 950 */ 951 if (vp == NULL) { 952 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 953 mtx_unlock(&ktrace_mtx); 954 return; 955 } 956 VREF(vp); 957 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 958 crhold(cred); 959 mtx_unlock(&ktrace_mtx); 960 961 kth = &req->ktr_header; 962 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 963 sizeof(data_lengths) / sizeof(data_lengths[0]), 964 ("data_lengths array overflow")); 965 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 966 buflen = kth->ktr_len; 967 auio.uio_iov = &aiov[0]; 968 auio.uio_offset = 0; 969 auio.uio_segflg = UIO_SYSSPACE; 970 auio.uio_rw = UIO_WRITE; 971 aiov[0].iov_base = (caddr_t)kth; 972 aiov[0].iov_len = sizeof(struct ktr_header); 973 auio.uio_resid = sizeof(struct ktr_header); 974 auio.uio_iovcnt = 1; 975 auio.uio_td = td; 976 if (datalen != 0) { 977 aiov[1].iov_base = (caddr_t)&req->ktr_data; 978 aiov[1].iov_len = datalen; 979 auio.uio_resid += datalen; 980 auio.uio_iovcnt++; 981 kth->ktr_len += datalen; 982 } 983 if (buflen != 0) { 984 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 985 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 986 aiov[auio.uio_iovcnt].iov_len = buflen; 987 auio.uio_resid += buflen; 988 auio.uio_iovcnt++; 989 } 990 991 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 992 vn_start_write(vp, &mp, V_WAIT); 993 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 994 #ifdef MAC 995 error = mac_vnode_check_write(cred, NOCRED, vp); 996 if (error == 0) 997 #endif 998 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 999 VOP_UNLOCK(vp, 0); 1000 vn_finished_write(mp); 1001 crfree(cred); 1002 if (!error) { 1003 vrele(vp); 1004 VFS_UNLOCK_GIANT(vfslocked); 1005 return; 1006 } 1007 VFS_UNLOCK_GIANT(vfslocked); 1008 1009 /* 1010 * If error encountered, give up tracing on this vnode. We defer 1011 * all the vrele()'s on the vnode until after we are finished walking 1012 * the various lists to avoid needlessly holding locks. 1013 * NB: at this point we still hold the vnode reference that must 1014 * not go away as we need the valid vnode to compare with. Thus let 1015 * vrele_count start at 1 and the reference will be freed 1016 * by the loop at the end after our last use of vp. 1017 */ 1018 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1019 error); 1020 vrele_count = 1; 1021 /* 1022 * First, clear this vnode from being used by any processes in the 1023 * system. 1024 * XXX - If one process gets an EPERM writing to the vnode, should 1025 * we really do this? Other processes might have suitable 1026 * credentials for the operation. 1027 */ 1028 cred = NULL; 1029 sx_slock(&allproc_lock); 1030 FOREACH_PROC_IN_SYSTEM(p) { 1031 PROC_LOCK(p); 1032 if (p->p_tracevp == vp) { 1033 mtx_lock(&ktrace_mtx); 1034 p->p_tracevp = NULL; 1035 p->p_traceflag = 0; 1036 cred = p->p_tracecred; 1037 p->p_tracecred = NULL; 1038 mtx_unlock(&ktrace_mtx); 1039 vrele_count++; 1040 } 1041 PROC_UNLOCK(p); 1042 if (cred != NULL) { 1043 crfree(cred); 1044 cred = NULL; 1045 } 1046 } 1047 sx_sunlock(&allproc_lock); 1048 1049 /* 1050 * We can't clear any pending requests in threads that have cached 1051 * them but not yet committed them, as those are per-thread. The 1052 * thread will have to clear it itself on system call return. 1053 */ 1054 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1055 while (vrele_count-- > 0) 1056 vrele(vp); 1057 VFS_UNLOCK_GIANT(vfslocked); 1058 } 1059 1060 /* 1061 * Return true if caller has permission to set the ktracing state 1062 * of target. Essentially, the target can't possess any 1063 * more permissions than the caller. KTRFAC_ROOT signifies that 1064 * root previously set the tracing status on the target process, and 1065 * so, only root may further change it. 1066 */ 1067 static int 1068 ktrcanset(td, targetp) 1069 struct thread *td; 1070 struct proc *targetp; 1071 { 1072 1073 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1074 if (targetp->p_traceflag & KTRFAC_ROOT && 1075 priv_check(td, PRIV_KTRACE)) 1076 return (0); 1077 1078 if (p_candebug(td, targetp) != 0) 1079 return (0); 1080 1081 return (1); 1082 } 1083 1084 #endif /* KTRACE */ 1085