1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/kernel.h> 43 #include <sys/kthread.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 #include <sys/socket.h> 54 #include <sys/stat.h> 55 #include <sys/ktrace.h> 56 #include <sys/sx.h> 57 #include <sys/sysctl.h> 58 #include <sys/syslog.h> 59 #include <sys/sysproto.h> 60 61 #include <security/mac/mac_framework.h> 62 63 /* 64 * The ktrace facility allows the tracing of certain key events in user space 65 * processes, such as system calls, signal delivery, context switches, and 66 * user generated events using utrace(2). It works by streaming event 67 * records and data to a vnode associated with the process using the 68 * ktrace(2) system call. In general, records can be written directly from 69 * the context that generates the event. One important exception to this is 70 * during a context switch, where sleeping is not permitted. To handle this 71 * case, trace events are generated using in-kernel ktr_request records, and 72 * then delivered to disk at a convenient moment -- either immediately, the 73 * next traceable event, at system call return, or at process exit. 74 * 75 * When dealing with multiple threads or processes writing to the same event 76 * log, ordering guarantees are weak: specifically, if an event has multiple 77 * records (i.e., system call enter and return), they may be interlaced with 78 * records from another event. Process and thread ID information is provided 79 * in the record, and user applications can de-interlace events if required. 80 */ 81 82 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 83 84 #ifdef KTRACE 85 86 #ifndef KTRACE_REQUEST_POOL 87 #define KTRACE_REQUEST_POOL 100 88 #endif 89 90 struct ktr_request { 91 struct ktr_header ktr_header; 92 void *ktr_buffer; 93 union { 94 struct ktr_syscall ktr_syscall; 95 struct ktr_sysret ktr_sysret; 96 struct ktr_genio ktr_genio; 97 struct ktr_psig ktr_psig; 98 struct ktr_csw ktr_csw; 99 } ktr_data; 100 STAILQ_ENTRY(ktr_request) ktr_list; 101 }; 102 103 static int data_lengths[] = { 104 0, /* none */ 105 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 106 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 107 0, /* KTR_NAMEI */ 108 sizeof(struct ktr_genio), /* KTR_GENIO */ 109 sizeof(struct ktr_psig), /* KTR_PSIG */ 110 sizeof(struct ktr_csw), /* KTR_CSW */ 111 0, /* KTR_USER */ 112 0, /* KTR_STRUCT */ 113 0, /* KTR_SYSCTL */ 114 }; 115 116 static STAILQ_HEAD(, ktr_request) ktr_free; 117 118 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 119 120 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 121 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 122 123 static u_int ktr_geniosize = PAGE_SIZE; 124 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 125 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 126 0, "Maximum size of genio event payload"); 127 128 static int print_message = 1; 129 static struct mtx ktrace_mtx; 130 static struct sx ktrace_sx; 131 132 static void ktrace_init(void *dummy); 133 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 134 static u_int ktrace_resize_pool(u_int newsize); 135 static struct ktr_request *ktr_getrequest(int type); 136 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 137 static void ktr_freeproc(struct proc *p, struct ucred **uc, 138 struct vnode **vp); 139 static void ktr_freerequest(struct ktr_request *req); 140 static void ktr_freerequest_locked(struct ktr_request *req); 141 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 142 static int ktrcanset(struct thread *,struct proc *); 143 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 144 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 145 146 /* 147 * ktrace itself generates events, such as context switches, which we do not 148 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 149 * whether or not it is in a region where tracing of events should be 150 * suppressed. 151 */ 152 static void 153 ktrace_enter(struct thread *td) 154 { 155 156 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 157 td->td_pflags |= TDP_INKTRACE; 158 } 159 160 static void 161 ktrace_exit(struct thread *td) 162 { 163 164 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 165 td->td_pflags &= ~TDP_INKTRACE; 166 } 167 168 static void 169 ktrace_assert(struct thread *td) 170 { 171 172 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 173 } 174 175 static void 176 ktrace_init(void *dummy) 177 { 178 struct ktr_request *req; 179 int i; 180 181 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 182 sx_init(&ktrace_sx, "ktrace_sx"); 183 STAILQ_INIT(&ktr_free); 184 for (i = 0; i < ktr_requestpool; i++) { 185 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 186 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 187 } 188 } 189 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 190 191 static int 192 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 193 { 194 struct thread *td; 195 u_int newsize, oldsize, wantsize; 196 int error; 197 198 /* Handle easy read-only case first to avoid warnings from GCC. */ 199 if (!req->newptr) { 200 mtx_lock(&ktrace_mtx); 201 oldsize = ktr_requestpool; 202 mtx_unlock(&ktrace_mtx); 203 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 204 } 205 206 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 207 if (error) 208 return (error); 209 td = curthread; 210 ktrace_enter(td); 211 mtx_lock(&ktrace_mtx); 212 oldsize = ktr_requestpool; 213 newsize = ktrace_resize_pool(wantsize); 214 mtx_unlock(&ktrace_mtx); 215 ktrace_exit(td); 216 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 217 if (error) 218 return (error); 219 if (wantsize > oldsize && newsize < wantsize) 220 return (ENOSPC); 221 return (0); 222 } 223 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 224 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 225 "Pool buffer size for ktrace(1)"); 226 227 static u_int 228 ktrace_resize_pool(u_int newsize) 229 { 230 struct ktr_request *req; 231 int bound; 232 233 mtx_assert(&ktrace_mtx, MA_OWNED); 234 print_message = 1; 235 bound = newsize - ktr_requestpool; 236 if (bound == 0) 237 return (ktr_requestpool); 238 if (bound < 0) 239 /* Shrink pool down to newsize if possible. */ 240 while (bound++ < 0) { 241 req = STAILQ_FIRST(&ktr_free); 242 if (req == NULL) 243 return (ktr_requestpool); 244 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 245 ktr_requestpool--; 246 mtx_unlock(&ktrace_mtx); 247 free(req, M_KTRACE); 248 mtx_lock(&ktrace_mtx); 249 } 250 else 251 /* Grow pool up to newsize. */ 252 while (bound-- > 0) { 253 mtx_unlock(&ktrace_mtx); 254 req = malloc(sizeof(struct ktr_request), M_KTRACE, 255 M_WAITOK); 256 mtx_lock(&ktrace_mtx); 257 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 258 ktr_requestpool++; 259 } 260 return (ktr_requestpool); 261 } 262 263 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 264 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 265 (sizeof((struct thread *)NULL)->td_name)); 266 267 static struct ktr_request * 268 ktr_getrequest(int type) 269 { 270 struct ktr_request *req; 271 struct thread *td = curthread; 272 struct proc *p = td->td_proc; 273 int pm; 274 275 ktrace_enter(td); /* XXX: In caller instead? */ 276 mtx_lock(&ktrace_mtx); 277 if (!KTRCHECK(td, type)) { 278 mtx_unlock(&ktrace_mtx); 279 ktrace_exit(td); 280 return (NULL); 281 } 282 req = STAILQ_FIRST(&ktr_free); 283 if (req != NULL) { 284 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 285 req->ktr_header.ktr_type = type; 286 if (p->p_traceflag & KTRFAC_DROP) { 287 req->ktr_header.ktr_type |= KTR_DROP; 288 p->p_traceflag &= ~KTRFAC_DROP; 289 } 290 mtx_unlock(&ktrace_mtx); 291 microtime(&req->ktr_header.ktr_time); 292 req->ktr_header.ktr_pid = p->p_pid; 293 req->ktr_header.ktr_tid = td->td_tid; 294 bcopy(td->td_name, req->ktr_header.ktr_comm, 295 sizeof(req->ktr_header.ktr_comm)); 296 req->ktr_buffer = NULL; 297 req->ktr_header.ktr_len = 0; 298 } else { 299 p->p_traceflag |= KTRFAC_DROP; 300 pm = print_message; 301 print_message = 0; 302 mtx_unlock(&ktrace_mtx); 303 if (pm) 304 printf("Out of ktrace request objects.\n"); 305 ktrace_exit(td); 306 } 307 return (req); 308 } 309 310 /* 311 * Some trace generation environments don't permit direct access to VFS, 312 * such as during a context switch where sleeping is not allowed. Under these 313 * circumstances, queue a request to the thread to be written asynchronously 314 * later. 315 */ 316 static void 317 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 318 { 319 320 mtx_lock(&ktrace_mtx); 321 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 322 mtx_unlock(&ktrace_mtx); 323 ktrace_exit(td); 324 } 325 326 /* 327 * Drain any pending ktrace records from the per-thread queue to disk. This 328 * is used both internally before committing other records, and also on 329 * system call return. We drain all the ones we can find at the time when 330 * drain is requested, but don't keep draining after that as those events 331 * may be approximately "after" the current event. 332 */ 333 static void 334 ktr_drain(struct thread *td) 335 { 336 struct ktr_request *queued_req; 337 STAILQ_HEAD(, ktr_request) local_queue; 338 339 ktrace_assert(td); 340 sx_assert(&ktrace_sx, SX_XLOCKED); 341 342 STAILQ_INIT(&local_queue); 343 344 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 345 mtx_lock(&ktrace_mtx); 346 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 347 mtx_unlock(&ktrace_mtx); 348 349 while ((queued_req = STAILQ_FIRST(&local_queue))) { 350 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 351 ktr_writerequest(td, queued_req); 352 ktr_freerequest(queued_req); 353 } 354 } 355 } 356 357 /* 358 * Submit a trace record for immediate commit to disk -- to be used only 359 * where entering VFS is OK. First drain any pending records that may have 360 * been cached in the thread. 361 */ 362 static void 363 ktr_submitrequest(struct thread *td, struct ktr_request *req) 364 { 365 366 ktrace_assert(td); 367 368 sx_xlock(&ktrace_sx); 369 ktr_drain(td); 370 ktr_writerequest(td, req); 371 ktr_freerequest(req); 372 sx_xunlock(&ktrace_sx); 373 374 ktrace_exit(td); 375 } 376 377 static void 378 ktr_freerequest(struct ktr_request *req) 379 { 380 381 mtx_lock(&ktrace_mtx); 382 ktr_freerequest_locked(req); 383 mtx_unlock(&ktrace_mtx); 384 } 385 386 static void 387 ktr_freerequest_locked(struct ktr_request *req) 388 { 389 390 mtx_assert(&ktrace_mtx, MA_OWNED); 391 if (req->ktr_buffer != NULL) 392 free(req->ktr_buffer, M_KTRACE); 393 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 394 } 395 396 /* 397 * Disable tracing for a process and release all associated resources. 398 * The caller is responsible for releasing a reference on the returned 399 * vnode and credentials. 400 */ 401 static void 402 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 403 { 404 struct ktr_request *req; 405 406 PROC_LOCK_ASSERT(p, MA_OWNED); 407 mtx_assert(&ktrace_mtx, MA_OWNED); 408 *uc = p->p_tracecred; 409 p->p_tracecred = NULL; 410 if (vp != NULL) 411 *vp = p->p_tracevp; 412 p->p_tracevp = NULL; 413 p->p_traceflag = 0; 414 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 415 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 416 ktr_freerequest_locked(req); 417 } 418 } 419 420 void 421 ktrsyscall(code, narg, args) 422 int code, narg; 423 register_t args[]; 424 { 425 struct ktr_request *req; 426 struct ktr_syscall *ktp; 427 size_t buflen; 428 char *buf = NULL; 429 430 buflen = sizeof(register_t) * narg; 431 if (buflen > 0) { 432 buf = malloc(buflen, M_KTRACE, M_WAITOK); 433 bcopy(args, buf, buflen); 434 } 435 req = ktr_getrequest(KTR_SYSCALL); 436 if (req == NULL) { 437 if (buf != NULL) 438 free(buf, M_KTRACE); 439 return; 440 } 441 ktp = &req->ktr_data.ktr_syscall; 442 ktp->ktr_code = code; 443 ktp->ktr_narg = narg; 444 if (buflen > 0) { 445 req->ktr_header.ktr_len = buflen; 446 req->ktr_buffer = buf; 447 } 448 ktr_submitrequest(curthread, req); 449 } 450 451 void 452 ktrsysret(code, error, retval) 453 int code, error; 454 register_t retval; 455 { 456 struct ktr_request *req; 457 struct ktr_sysret *ktp; 458 459 req = ktr_getrequest(KTR_SYSRET); 460 if (req == NULL) 461 return; 462 ktp = &req->ktr_data.ktr_sysret; 463 ktp->ktr_code = code; 464 ktp->ktr_error = error; 465 ktp->ktr_retval = retval; /* what about val2 ? */ 466 ktr_submitrequest(curthread, req); 467 } 468 469 /* 470 * When a setuid process execs, disable tracing. 471 * 472 * XXX: We toss any pending asynchronous records. 473 */ 474 void 475 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 476 { 477 478 PROC_LOCK_ASSERT(p, MA_OWNED); 479 mtx_lock(&ktrace_mtx); 480 ktr_freeproc(p, uc, vp); 481 mtx_unlock(&ktrace_mtx); 482 } 483 484 /* 485 * When a process exits, drain per-process asynchronous trace records 486 * and disable tracing. 487 */ 488 void 489 ktrprocexit(struct thread *td) 490 { 491 struct proc *p; 492 struct ucred *cred; 493 struct vnode *vp; 494 int vfslocked; 495 496 p = td->td_proc; 497 if (p->p_traceflag == 0) 498 return; 499 500 ktrace_enter(td); 501 sx_xlock(&ktrace_sx); 502 ktr_drain(td); 503 sx_xunlock(&ktrace_sx); 504 PROC_LOCK(p); 505 mtx_lock(&ktrace_mtx); 506 ktr_freeproc(p, &cred, &vp); 507 mtx_unlock(&ktrace_mtx); 508 PROC_UNLOCK(p); 509 if (vp != NULL) { 510 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 511 vrele(vp); 512 VFS_UNLOCK_GIANT(vfslocked); 513 } 514 if (cred != NULL) 515 crfree(cred); 516 ktrace_exit(td); 517 } 518 519 /* 520 * When a process forks, enable tracing in the new process if needed. 521 */ 522 void 523 ktrprocfork(struct proc *p1, struct proc *p2) 524 { 525 526 PROC_LOCK_ASSERT(p1, MA_OWNED); 527 PROC_LOCK_ASSERT(p2, MA_OWNED); 528 mtx_lock(&ktrace_mtx); 529 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 530 if (p1->p_traceflag & KTRFAC_INHERIT) { 531 p2->p_traceflag = p1->p_traceflag; 532 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 533 VREF(p2->p_tracevp); 534 KASSERT(p1->p_tracecred != NULL, 535 ("ktrace vnode with no cred")); 536 p2->p_tracecred = crhold(p1->p_tracecred); 537 } 538 } 539 mtx_unlock(&ktrace_mtx); 540 } 541 542 /* 543 * When a thread returns, drain any asynchronous records generated by the 544 * system call. 545 */ 546 void 547 ktruserret(struct thread *td) 548 { 549 550 ktrace_enter(td); 551 sx_xlock(&ktrace_sx); 552 ktr_drain(td); 553 sx_xunlock(&ktrace_sx); 554 ktrace_exit(td); 555 } 556 557 void 558 ktrnamei(path) 559 char *path; 560 { 561 struct ktr_request *req; 562 int namelen; 563 char *buf = NULL; 564 565 namelen = strlen(path); 566 if (namelen > 0) { 567 buf = malloc(namelen, M_KTRACE, M_WAITOK); 568 bcopy(path, buf, namelen); 569 } 570 req = ktr_getrequest(KTR_NAMEI); 571 if (req == NULL) { 572 if (buf != NULL) 573 free(buf, M_KTRACE); 574 return; 575 } 576 if (namelen > 0) { 577 req->ktr_header.ktr_len = namelen; 578 req->ktr_buffer = buf; 579 } 580 ktr_submitrequest(curthread, req); 581 } 582 583 void 584 ktrsysctl(name, namelen) 585 int *name; 586 u_int namelen; 587 { 588 struct ktr_request *req; 589 u_int mib[CTL_MAXNAME + 2]; 590 char *mibname; 591 size_t mibnamelen; 592 int error; 593 594 /* Lookup name of mib. */ 595 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 596 mib[0] = 0; 597 mib[1] = 1; 598 bcopy(name, mib + 2, namelen * sizeof(*name)); 599 mibnamelen = 128; 600 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 601 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 602 NULL, 0, &mibnamelen, 0); 603 if (error) { 604 free(mibname, M_KTRACE); 605 return; 606 } 607 req = ktr_getrequest(KTR_SYSCTL); 608 if (req == NULL) { 609 free(mibname, M_KTRACE); 610 return; 611 } 612 req->ktr_header.ktr_len = mibnamelen; 613 req->ktr_buffer = mibname; 614 ktr_submitrequest(curthread, req); 615 } 616 617 void 618 ktrgenio(fd, rw, uio, error) 619 int fd; 620 enum uio_rw rw; 621 struct uio *uio; 622 int error; 623 { 624 struct ktr_request *req; 625 struct ktr_genio *ktg; 626 int datalen; 627 char *buf; 628 629 if (error) { 630 free(uio, M_IOV); 631 return; 632 } 633 uio->uio_offset = 0; 634 uio->uio_rw = UIO_WRITE; 635 datalen = imin(uio->uio_resid, ktr_geniosize); 636 buf = malloc(datalen, M_KTRACE, M_WAITOK); 637 error = uiomove(buf, datalen, uio); 638 free(uio, M_IOV); 639 if (error) { 640 free(buf, M_KTRACE); 641 return; 642 } 643 req = ktr_getrequest(KTR_GENIO); 644 if (req == NULL) { 645 free(buf, M_KTRACE); 646 return; 647 } 648 ktg = &req->ktr_data.ktr_genio; 649 ktg->ktr_fd = fd; 650 ktg->ktr_rw = rw; 651 req->ktr_header.ktr_len = datalen; 652 req->ktr_buffer = buf; 653 ktr_submitrequest(curthread, req); 654 } 655 656 void 657 ktrpsig(sig, action, mask, code) 658 int sig; 659 sig_t action; 660 sigset_t *mask; 661 int code; 662 { 663 struct ktr_request *req; 664 struct ktr_psig *kp; 665 666 req = ktr_getrequest(KTR_PSIG); 667 if (req == NULL) 668 return; 669 kp = &req->ktr_data.ktr_psig; 670 kp->signo = (char)sig; 671 kp->action = action; 672 kp->mask = *mask; 673 kp->code = code; 674 ktr_enqueuerequest(curthread, req); 675 } 676 677 void 678 ktrcsw(out, user) 679 int out, user; 680 { 681 struct ktr_request *req; 682 struct ktr_csw *kc; 683 684 req = ktr_getrequest(KTR_CSW); 685 if (req == NULL) 686 return; 687 kc = &req->ktr_data.ktr_csw; 688 kc->out = out; 689 kc->user = user; 690 ktr_enqueuerequest(curthread, req); 691 } 692 693 void 694 ktrstruct(name, data, datalen) 695 const char *name; 696 void *data; 697 size_t datalen; 698 { 699 struct ktr_request *req; 700 char *buf = NULL; 701 size_t buflen; 702 703 if (!data) 704 datalen = 0; 705 buflen = strlen(name) + 1 + datalen; 706 buf = malloc(buflen, M_KTRACE, M_WAITOK); 707 strcpy(buf, name); 708 bcopy(data, buf + strlen(name) + 1, datalen); 709 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 710 free(buf, M_KTRACE); 711 return; 712 } 713 req->ktr_buffer = buf; 714 req->ktr_header.ktr_len = buflen; 715 ktr_submitrequest(curthread, req); 716 } 717 #endif /* KTRACE */ 718 719 /* Interface and common routines */ 720 721 #ifndef _SYS_SYSPROTO_H_ 722 struct ktrace_args { 723 char *fname; 724 int ops; 725 int facs; 726 int pid; 727 }; 728 #endif 729 /* ARGSUSED */ 730 int 731 ktrace(td, uap) 732 struct thread *td; 733 register struct ktrace_args *uap; 734 { 735 #ifdef KTRACE 736 register struct vnode *vp = NULL; 737 register struct proc *p; 738 struct pgrp *pg; 739 int facs = uap->facs & ~KTRFAC_ROOT; 740 int ops = KTROP(uap->ops); 741 int descend = uap->ops & KTRFLAG_DESCEND; 742 int nfound, ret = 0; 743 int flags, error = 0, vfslocked; 744 struct nameidata nd; 745 struct ucred *cred; 746 747 /* 748 * Need something to (un)trace. 749 */ 750 if (ops != KTROP_CLEARFILE && facs == 0) 751 return (EINVAL); 752 753 ktrace_enter(td); 754 if (ops != KTROP_CLEAR) { 755 /* 756 * an operation which requires a file argument. 757 */ 758 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 759 uap->fname, td); 760 flags = FREAD | FWRITE | O_NOFOLLOW; 761 error = vn_open(&nd, &flags, 0, NULL); 762 if (error) { 763 ktrace_exit(td); 764 return (error); 765 } 766 vfslocked = NDHASGIANT(&nd); 767 NDFREE(&nd, NDF_ONLY_PNBUF); 768 vp = nd.ni_vp; 769 VOP_UNLOCK(vp, 0); 770 if (vp->v_type != VREG) { 771 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 772 VFS_UNLOCK_GIANT(vfslocked); 773 ktrace_exit(td); 774 return (EACCES); 775 } 776 VFS_UNLOCK_GIANT(vfslocked); 777 } 778 /* 779 * Clear all uses of the tracefile. 780 */ 781 if (ops == KTROP_CLEARFILE) { 782 int vrele_count; 783 784 vrele_count = 0; 785 sx_slock(&allproc_lock); 786 FOREACH_PROC_IN_SYSTEM(p) { 787 PROC_LOCK(p); 788 if (p->p_tracevp == vp) { 789 if (ktrcanset(td, p)) { 790 mtx_lock(&ktrace_mtx); 791 ktr_freeproc(p, &cred, NULL); 792 mtx_unlock(&ktrace_mtx); 793 vrele_count++; 794 crfree(cred); 795 } else 796 error = EPERM; 797 } 798 PROC_UNLOCK(p); 799 } 800 sx_sunlock(&allproc_lock); 801 if (vrele_count > 0) { 802 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 803 while (vrele_count-- > 0) 804 vrele(vp); 805 VFS_UNLOCK_GIANT(vfslocked); 806 } 807 goto done; 808 } 809 /* 810 * do it 811 */ 812 sx_slock(&proctree_lock); 813 if (uap->pid < 0) { 814 /* 815 * by process group 816 */ 817 pg = pgfind(-uap->pid); 818 if (pg == NULL) { 819 sx_sunlock(&proctree_lock); 820 error = ESRCH; 821 goto done; 822 } 823 /* 824 * ktrops() may call vrele(). Lock pg_members 825 * by the proctree_lock rather than pg_mtx. 826 */ 827 PGRP_UNLOCK(pg); 828 nfound = 0; 829 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 830 PROC_LOCK(p); 831 if (p_cansee(td, p) != 0) { 832 PROC_UNLOCK(p); 833 continue; 834 } 835 nfound++; 836 if (descend) 837 ret |= ktrsetchildren(td, p, ops, facs, vp); 838 else 839 ret |= ktrops(td, p, ops, facs, vp); 840 } 841 if (nfound == 0) { 842 sx_sunlock(&proctree_lock); 843 error = ESRCH; 844 goto done; 845 } 846 } else { 847 /* 848 * by pid 849 */ 850 p = pfind(uap->pid); 851 if (p == NULL) 852 error = ESRCH; 853 else 854 error = p_cansee(td, p); 855 if (error) { 856 if (p != NULL) 857 PROC_UNLOCK(p); 858 sx_sunlock(&proctree_lock); 859 goto done; 860 } 861 if (descend) 862 ret |= ktrsetchildren(td, p, ops, facs, vp); 863 else 864 ret |= ktrops(td, p, ops, facs, vp); 865 } 866 sx_sunlock(&proctree_lock); 867 if (!ret) 868 error = EPERM; 869 done: 870 if (vp != NULL) { 871 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 872 (void) vn_close(vp, FWRITE, td->td_ucred, td); 873 VFS_UNLOCK_GIANT(vfslocked); 874 } 875 ktrace_exit(td); 876 return (error); 877 #else /* !KTRACE */ 878 return (ENOSYS); 879 #endif /* KTRACE */ 880 } 881 882 /* ARGSUSED */ 883 int 884 utrace(td, uap) 885 struct thread *td; 886 register struct utrace_args *uap; 887 { 888 889 #ifdef KTRACE 890 struct ktr_request *req; 891 void *cp; 892 int error; 893 894 if (!KTRPOINT(td, KTR_USER)) 895 return (0); 896 if (uap->len > KTR_USER_MAXLEN) 897 return (EINVAL); 898 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 899 error = copyin(uap->addr, cp, uap->len); 900 if (error) { 901 free(cp, M_KTRACE); 902 return (error); 903 } 904 req = ktr_getrequest(KTR_USER); 905 if (req == NULL) { 906 free(cp, M_KTRACE); 907 return (ENOMEM); 908 } 909 req->ktr_buffer = cp; 910 req->ktr_header.ktr_len = uap->len; 911 ktr_submitrequest(td, req); 912 return (0); 913 #else /* !KTRACE */ 914 return (ENOSYS); 915 #endif /* KTRACE */ 916 } 917 918 #ifdef KTRACE 919 static int 920 ktrops(td, p, ops, facs, vp) 921 struct thread *td; 922 struct proc *p; 923 int ops, facs; 924 struct vnode *vp; 925 { 926 struct vnode *tracevp = NULL; 927 struct ucred *tracecred = NULL; 928 929 PROC_LOCK_ASSERT(p, MA_OWNED); 930 if (!ktrcanset(td, p)) { 931 PROC_UNLOCK(p); 932 return (0); 933 } 934 if (p->p_flag & P_WEXIT) { 935 /* If the process is exiting, just ignore it. */ 936 PROC_UNLOCK(p); 937 return (1); 938 } 939 mtx_lock(&ktrace_mtx); 940 if (ops == KTROP_SET) { 941 if (p->p_tracevp != vp) { 942 /* 943 * if trace file already in use, relinquish below 944 */ 945 tracevp = p->p_tracevp; 946 VREF(vp); 947 p->p_tracevp = vp; 948 } 949 if (p->p_tracecred != td->td_ucred) { 950 tracecred = p->p_tracecred; 951 p->p_tracecred = crhold(td->td_ucred); 952 } 953 p->p_traceflag |= facs; 954 if (priv_check(td, PRIV_KTRACE) == 0) 955 p->p_traceflag |= KTRFAC_ROOT; 956 } else { 957 /* KTROP_CLEAR */ 958 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 959 /* no more tracing */ 960 ktr_freeproc(p, &tracecred, &tracevp); 961 } 962 mtx_unlock(&ktrace_mtx); 963 PROC_UNLOCK(p); 964 if (tracevp != NULL) { 965 int vfslocked; 966 967 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 968 vrele(tracevp); 969 VFS_UNLOCK_GIANT(vfslocked); 970 } 971 if (tracecred != NULL) 972 crfree(tracecred); 973 974 return (1); 975 } 976 977 static int 978 ktrsetchildren(td, top, ops, facs, vp) 979 struct thread *td; 980 struct proc *top; 981 int ops, facs; 982 struct vnode *vp; 983 { 984 register struct proc *p; 985 register int ret = 0; 986 987 p = top; 988 PROC_LOCK_ASSERT(p, MA_OWNED); 989 sx_assert(&proctree_lock, SX_LOCKED); 990 for (;;) { 991 ret |= ktrops(td, p, ops, facs, vp); 992 /* 993 * If this process has children, descend to them next, 994 * otherwise do any siblings, and if done with this level, 995 * follow back up the tree (but not past top). 996 */ 997 if (!LIST_EMPTY(&p->p_children)) 998 p = LIST_FIRST(&p->p_children); 999 else for (;;) { 1000 if (p == top) 1001 return (ret); 1002 if (LIST_NEXT(p, p_sibling)) { 1003 p = LIST_NEXT(p, p_sibling); 1004 break; 1005 } 1006 p = p->p_pptr; 1007 } 1008 PROC_LOCK(p); 1009 } 1010 /*NOTREACHED*/ 1011 } 1012 1013 static void 1014 ktr_writerequest(struct thread *td, struct ktr_request *req) 1015 { 1016 struct ktr_header *kth; 1017 struct vnode *vp; 1018 struct proc *p; 1019 struct ucred *cred; 1020 struct uio auio; 1021 struct iovec aiov[3]; 1022 struct mount *mp; 1023 int datalen, buflen, vrele_count; 1024 int error, vfslocked; 1025 1026 /* 1027 * We hold the vnode and credential for use in I/O in case ktrace is 1028 * disabled on the process as we write out the request. 1029 * 1030 * XXXRW: This is not ideal: we could end up performing a write after 1031 * the vnode has been closed. 1032 */ 1033 mtx_lock(&ktrace_mtx); 1034 vp = td->td_proc->p_tracevp; 1035 cred = td->td_proc->p_tracecred; 1036 1037 /* 1038 * If vp is NULL, the vp has been cleared out from under this 1039 * request, so just drop it. Make sure the credential and vnode are 1040 * in sync: we should have both or neither. 1041 */ 1042 if (vp == NULL) { 1043 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1044 mtx_unlock(&ktrace_mtx); 1045 return; 1046 } 1047 VREF(vp); 1048 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1049 crhold(cred); 1050 mtx_unlock(&ktrace_mtx); 1051 1052 kth = &req->ktr_header; 1053 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1054 sizeof(data_lengths) / sizeof(data_lengths[0]), 1055 ("data_lengths array overflow")); 1056 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1057 buflen = kth->ktr_len; 1058 auio.uio_iov = &aiov[0]; 1059 auio.uio_offset = 0; 1060 auio.uio_segflg = UIO_SYSSPACE; 1061 auio.uio_rw = UIO_WRITE; 1062 aiov[0].iov_base = (caddr_t)kth; 1063 aiov[0].iov_len = sizeof(struct ktr_header); 1064 auio.uio_resid = sizeof(struct ktr_header); 1065 auio.uio_iovcnt = 1; 1066 auio.uio_td = td; 1067 if (datalen != 0) { 1068 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1069 aiov[1].iov_len = datalen; 1070 auio.uio_resid += datalen; 1071 auio.uio_iovcnt++; 1072 kth->ktr_len += datalen; 1073 } 1074 if (buflen != 0) { 1075 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1076 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1077 aiov[auio.uio_iovcnt].iov_len = buflen; 1078 auio.uio_resid += buflen; 1079 auio.uio_iovcnt++; 1080 } 1081 1082 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1083 vn_start_write(vp, &mp, V_WAIT); 1084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1085 #ifdef MAC 1086 error = mac_vnode_check_write(cred, NOCRED, vp); 1087 if (error == 0) 1088 #endif 1089 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1090 VOP_UNLOCK(vp, 0); 1091 vn_finished_write(mp); 1092 crfree(cred); 1093 if (!error) { 1094 vrele(vp); 1095 VFS_UNLOCK_GIANT(vfslocked); 1096 return; 1097 } 1098 VFS_UNLOCK_GIANT(vfslocked); 1099 1100 /* 1101 * If error encountered, give up tracing on this vnode. We defer 1102 * all the vrele()'s on the vnode until after we are finished walking 1103 * the various lists to avoid needlessly holding locks. 1104 * NB: at this point we still hold the vnode reference that must 1105 * not go away as we need the valid vnode to compare with. Thus let 1106 * vrele_count start at 1 and the reference will be freed 1107 * by the loop at the end after our last use of vp. 1108 */ 1109 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1110 error); 1111 vrele_count = 1; 1112 /* 1113 * First, clear this vnode from being used by any processes in the 1114 * system. 1115 * XXX - If one process gets an EPERM writing to the vnode, should 1116 * we really do this? Other processes might have suitable 1117 * credentials for the operation. 1118 */ 1119 cred = NULL; 1120 sx_slock(&allproc_lock); 1121 FOREACH_PROC_IN_SYSTEM(p) { 1122 PROC_LOCK(p); 1123 if (p->p_tracevp == vp) { 1124 mtx_lock(&ktrace_mtx); 1125 ktr_freeproc(p, &cred, NULL); 1126 mtx_unlock(&ktrace_mtx); 1127 vrele_count++; 1128 } 1129 PROC_UNLOCK(p); 1130 if (cred != NULL) { 1131 crfree(cred); 1132 cred = NULL; 1133 } 1134 } 1135 sx_sunlock(&allproc_lock); 1136 1137 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1138 while (vrele_count-- > 0) 1139 vrele(vp); 1140 VFS_UNLOCK_GIANT(vfslocked); 1141 } 1142 1143 /* 1144 * Return true if caller has permission to set the ktracing state 1145 * of target. Essentially, the target can't possess any 1146 * more permissions than the caller. KTRFAC_ROOT signifies that 1147 * root previously set the tracing status on the target process, and 1148 * so, only root may further change it. 1149 */ 1150 static int 1151 ktrcanset(td, targetp) 1152 struct thread *td; 1153 struct proc *targetp; 1154 { 1155 1156 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1157 if (targetp->p_traceflag & KTRFAC_ROOT && 1158 priv_check(td, PRIV_KTRACE)) 1159 return (0); 1160 1161 if (p_candebug(td, targetp) != 0) 1162 return (0); 1163 1164 return (1); 1165 } 1166 1167 #endif /* KTRACE */ 1168