1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 #include <sys/socket.h> 55 #include <sys/stat.h> 56 #include <sys/ktrace.h> 57 #include <sys/sx.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #include <sys/syslog.h> 61 #include <sys/sysproto.h> 62 63 #include <security/mac/mac_framework.h> 64 65 /* 66 * The ktrace facility allows the tracing of certain key events in user space 67 * processes, such as system calls, signal delivery, context switches, and 68 * user generated events using utrace(2). It works by streaming event 69 * records and data to a vnode associated with the process using the 70 * ktrace(2) system call. In general, records can be written directly from 71 * the context that generates the event. One important exception to this is 72 * during a context switch, where sleeping is not permitted. To handle this 73 * case, trace events are generated using in-kernel ktr_request records, and 74 * then delivered to disk at a convenient moment -- either immediately, the 75 * next traceable event, at system call return, or at process exit. 76 * 77 * When dealing with multiple threads or processes writing to the same event 78 * log, ordering guarantees are weak: specifically, if an event has multiple 79 * records (i.e., system call enter and return), they may be interlaced with 80 * records from another event. Process and thread ID information is provided 81 * in the record, and user applications can de-interlace events if required. 82 */ 83 84 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 85 86 #ifdef KTRACE 87 88 FEATURE(ktrace, "Kernel support for system-call tracing"); 89 90 #ifndef KTRACE_REQUEST_POOL 91 #define KTRACE_REQUEST_POOL 100 92 #endif 93 94 struct ktr_request { 95 struct ktr_header ktr_header; 96 void *ktr_buffer; 97 union { 98 struct ktr_proc_ctor ktr_proc_ctor; 99 struct ktr_cap_fail ktr_cap_fail; 100 struct ktr_syscall ktr_syscall; 101 struct ktr_sysret ktr_sysret; 102 struct ktr_genio ktr_genio; 103 struct ktr_psig ktr_psig; 104 struct ktr_csw ktr_csw; 105 struct ktr_fault ktr_fault; 106 struct ktr_faultend ktr_faultend; 107 } ktr_data; 108 STAILQ_ENTRY(ktr_request) ktr_list; 109 }; 110 111 static int data_lengths[] = { 112 [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), 113 [KTR_SYSRET] = sizeof(struct ktr_sysret), 114 [KTR_NAMEI] = 0, 115 [KTR_GENIO] = sizeof(struct ktr_genio), 116 [KTR_PSIG] = sizeof(struct ktr_psig), 117 [KTR_CSW] = sizeof(struct ktr_csw), 118 [KTR_USER] = 0, 119 [KTR_STRUCT] = 0, 120 [KTR_SYSCTL] = 0, 121 [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), 122 [KTR_PROCDTOR] = 0, 123 [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), 124 [KTR_FAULT] = sizeof(struct ktr_fault), 125 [KTR_FAULTEND] = sizeof(struct ktr_faultend), 126 }; 127 128 static STAILQ_HEAD(, ktr_request) ktr_free; 129 130 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 131 132 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 133 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 134 135 static u_int ktr_geniosize = PAGE_SIZE; 136 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 137 0, "Maximum size of genio event payload"); 138 139 static int print_message = 1; 140 static struct mtx ktrace_mtx; 141 static struct sx ktrace_sx; 142 143 static void ktrace_init(void *dummy); 144 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 145 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 146 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 147 static struct ktr_request *ktr_getrequest(int type); 148 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 149 static void ktr_freeproc(struct proc *p, struct ucred **uc, 150 struct vnode **vp); 151 static void ktr_freerequest(struct ktr_request *req); 152 static void ktr_freerequest_locked(struct ktr_request *req); 153 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 154 static int ktrcanset(struct thread *,struct proc *); 155 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 156 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 157 static void ktrprocctor_entered(struct thread *, struct proc *); 158 159 /* 160 * ktrace itself generates events, such as context switches, which we do not 161 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 162 * whether or not it is in a region where tracing of events should be 163 * suppressed. 164 */ 165 static void 166 ktrace_enter(struct thread *td) 167 { 168 169 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 170 td->td_pflags |= TDP_INKTRACE; 171 } 172 173 static void 174 ktrace_exit(struct thread *td) 175 { 176 177 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 178 td->td_pflags &= ~TDP_INKTRACE; 179 } 180 181 static void 182 ktrace_assert(struct thread *td) 183 { 184 185 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 186 } 187 188 static void 189 ktrace_init(void *dummy) 190 { 191 struct ktr_request *req; 192 int i; 193 194 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 195 sx_init(&ktrace_sx, "ktrace_sx"); 196 STAILQ_INIT(&ktr_free); 197 for (i = 0; i < ktr_requestpool; i++) { 198 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 199 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 200 } 201 } 202 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 203 204 static int 205 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 206 { 207 struct thread *td; 208 u_int newsize, oldsize, wantsize; 209 int error; 210 211 /* Handle easy read-only case first to avoid warnings from GCC. */ 212 if (!req->newptr) { 213 oldsize = ktr_requestpool; 214 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 215 } 216 217 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 218 if (error) 219 return (error); 220 td = curthread; 221 ktrace_enter(td); 222 oldsize = ktr_requestpool; 223 newsize = ktrace_resize_pool(oldsize, wantsize); 224 ktrace_exit(td); 225 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 226 if (error) 227 return (error); 228 if (wantsize > oldsize && newsize < wantsize) 229 return (ENOSPC); 230 return (0); 231 } 232 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 233 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 234 "Pool buffer size for ktrace(1)"); 235 236 static u_int 237 ktrace_resize_pool(u_int oldsize, u_int newsize) 238 { 239 STAILQ_HEAD(, ktr_request) ktr_new; 240 struct ktr_request *req; 241 int bound; 242 243 print_message = 1; 244 bound = newsize - oldsize; 245 if (bound == 0) 246 return (ktr_requestpool); 247 if (bound < 0) { 248 mtx_lock(&ktrace_mtx); 249 /* Shrink pool down to newsize if possible. */ 250 while (bound++ < 0) { 251 req = STAILQ_FIRST(&ktr_free); 252 if (req == NULL) 253 break; 254 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 255 ktr_requestpool--; 256 free(req, M_KTRACE); 257 } 258 } else { 259 /* Grow pool up to newsize. */ 260 STAILQ_INIT(&ktr_new); 261 while (bound-- > 0) { 262 req = malloc(sizeof(struct ktr_request), M_KTRACE, 263 M_WAITOK); 264 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 265 } 266 mtx_lock(&ktrace_mtx); 267 STAILQ_CONCAT(&ktr_free, &ktr_new); 268 ktr_requestpool += (newsize - oldsize); 269 } 270 mtx_unlock(&ktrace_mtx); 271 return (ktr_requestpool); 272 } 273 274 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 275 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 276 (sizeof((struct thread *)NULL)->td_name)); 277 278 static struct ktr_request * 279 ktr_getrequest_entered(struct thread *td, int type) 280 { 281 struct ktr_request *req; 282 struct proc *p = td->td_proc; 283 int pm; 284 285 mtx_lock(&ktrace_mtx); 286 if (!KTRCHECK(td, type)) { 287 mtx_unlock(&ktrace_mtx); 288 return (NULL); 289 } 290 req = STAILQ_FIRST(&ktr_free); 291 if (req != NULL) { 292 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 293 req->ktr_header.ktr_type = type; 294 if (p->p_traceflag & KTRFAC_DROP) { 295 req->ktr_header.ktr_type |= KTR_DROP; 296 p->p_traceflag &= ~KTRFAC_DROP; 297 } 298 mtx_unlock(&ktrace_mtx); 299 microtime(&req->ktr_header.ktr_time); 300 req->ktr_header.ktr_pid = p->p_pid; 301 req->ktr_header.ktr_tid = td->td_tid; 302 bcopy(td->td_name, req->ktr_header.ktr_comm, 303 sizeof(req->ktr_header.ktr_comm)); 304 req->ktr_buffer = NULL; 305 req->ktr_header.ktr_len = 0; 306 } else { 307 p->p_traceflag |= KTRFAC_DROP; 308 pm = print_message; 309 print_message = 0; 310 mtx_unlock(&ktrace_mtx); 311 if (pm) 312 printf("Out of ktrace request objects.\n"); 313 } 314 return (req); 315 } 316 317 static struct ktr_request * 318 ktr_getrequest(int type) 319 { 320 struct thread *td = curthread; 321 struct ktr_request *req; 322 323 ktrace_enter(td); 324 req = ktr_getrequest_entered(td, type); 325 if (req == NULL) 326 ktrace_exit(td); 327 328 return (req); 329 } 330 331 /* 332 * Some trace generation environments don't permit direct access to VFS, 333 * such as during a context switch where sleeping is not allowed. Under these 334 * circumstances, queue a request to the thread to be written asynchronously 335 * later. 336 */ 337 static void 338 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 339 { 340 341 mtx_lock(&ktrace_mtx); 342 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 343 mtx_unlock(&ktrace_mtx); 344 } 345 346 /* 347 * Drain any pending ktrace records from the per-thread queue to disk. This 348 * is used both internally before committing other records, and also on 349 * system call return. We drain all the ones we can find at the time when 350 * drain is requested, but don't keep draining after that as those events 351 * may be approximately "after" the current event. 352 */ 353 static void 354 ktr_drain(struct thread *td) 355 { 356 struct ktr_request *queued_req; 357 STAILQ_HEAD(, ktr_request) local_queue; 358 359 ktrace_assert(td); 360 sx_assert(&ktrace_sx, SX_XLOCKED); 361 362 STAILQ_INIT(&local_queue); 363 364 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 365 mtx_lock(&ktrace_mtx); 366 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 367 mtx_unlock(&ktrace_mtx); 368 369 while ((queued_req = STAILQ_FIRST(&local_queue))) { 370 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 371 ktr_writerequest(td, queued_req); 372 ktr_freerequest(queued_req); 373 } 374 } 375 } 376 377 /* 378 * Submit a trace record for immediate commit to disk -- to be used only 379 * where entering VFS is OK. First drain any pending records that may have 380 * been cached in the thread. 381 */ 382 static void 383 ktr_submitrequest(struct thread *td, struct ktr_request *req) 384 { 385 386 ktrace_assert(td); 387 388 sx_xlock(&ktrace_sx); 389 ktr_drain(td); 390 ktr_writerequest(td, req); 391 ktr_freerequest(req); 392 sx_xunlock(&ktrace_sx); 393 ktrace_exit(td); 394 } 395 396 static void 397 ktr_freerequest(struct ktr_request *req) 398 { 399 400 mtx_lock(&ktrace_mtx); 401 ktr_freerequest_locked(req); 402 mtx_unlock(&ktrace_mtx); 403 } 404 405 static void 406 ktr_freerequest_locked(struct ktr_request *req) 407 { 408 409 mtx_assert(&ktrace_mtx, MA_OWNED); 410 if (req->ktr_buffer != NULL) 411 free(req->ktr_buffer, M_KTRACE); 412 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 413 } 414 415 /* 416 * Disable tracing for a process and release all associated resources. 417 * The caller is responsible for releasing a reference on the returned 418 * vnode and credentials. 419 */ 420 static void 421 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 422 { 423 struct ktr_request *req; 424 425 PROC_LOCK_ASSERT(p, MA_OWNED); 426 mtx_assert(&ktrace_mtx, MA_OWNED); 427 *uc = p->p_tracecred; 428 p->p_tracecred = NULL; 429 if (vp != NULL) 430 *vp = p->p_tracevp; 431 p->p_tracevp = NULL; 432 p->p_traceflag = 0; 433 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 434 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 435 ktr_freerequest_locked(req); 436 } 437 } 438 439 void 440 ktrsyscall(code, narg, args) 441 int code, narg; 442 register_t args[]; 443 { 444 struct ktr_request *req; 445 struct ktr_syscall *ktp; 446 size_t buflen; 447 char *buf = NULL; 448 449 buflen = sizeof(register_t) * narg; 450 if (buflen > 0) { 451 buf = malloc(buflen, M_KTRACE, M_WAITOK); 452 bcopy(args, buf, buflen); 453 } 454 req = ktr_getrequest(KTR_SYSCALL); 455 if (req == NULL) { 456 if (buf != NULL) 457 free(buf, M_KTRACE); 458 return; 459 } 460 ktp = &req->ktr_data.ktr_syscall; 461 ktp->ktr_code = code; 462 ktp->ktr_narg = narg; 463 if (buflen > 0) { 464 req->ktr_header.ktr_len = buflen; 465 req->ktr_buffer = buf; 466 } 467 ktr_submitrequest(curthread, req); 468 } 469 470 void 471 ktrsysret(code, error, retval) 472 int code, error; 473 register_t retval; 474 { 475 struct ktr_request *req; 476 struct ktr_sysret *ktp; 477 478 req = ktr_getrequest(KTR_SYSRET); 479 if (req == NULL) 480 return; 481 ktp = &req->ktr_data.ktr_sysret; 482 ktp->ktr_code = code; 483 ktp->ktr_error = error; 484 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 485 ktr_submitrequest(curthread, req); 486 } 487 488 /* 489 * When a setuid process execs, disable tracing. 490 * 491 * XXX: We toss any pending asynchronous records. 492 */ 493 void 494 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 495 { 496 497 PROC_LOCK_ASSERT(p, MA_OWNED); 498 mtx_lock(&ktrace_mtx); 499 ktr_freeproc(p, uc, vp); 500 mtx_unlock(&ktrace_mtx); 501 } 502 503 /* 504 * When a process exits, drain per-process asynchronous trace records 505 * and disable tracing. 506 */ 507 void 508 ktrprocexit(struct thread *td) 509 { 510 struct ktr_request *req; 511 struct proc *p; 512 struct ucred *cred; 513 struct vnode *vp; 514 515 p = td->td_proc; 516 if (p->p_traceflag == 0) 517 return; 518 519 ktrace_enter(td); 520 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 521 if (req != NULL) 522 ktr_enqueuerequest(td, req); 523 sx_xlock(&ktrace_sx); 524 ktr_drain(td); 525 sx_xunlock(&ktrace_sx); 526 PROC_LOCK(p); 527 mtx_lock(&ktrace_mtx); 528 ktr_freeproc(p, &cred, &vp); 529 mtx_unlock(&ktrace_mtx); 530 PROC_UNLOCK(p); 531 if (vp != NULL) 532 vrele(vp); 533 if (cred != NULL) 534 crfree(cred); 535 ktrace_exit(td); 536 } 537 538 static void 539 ktrprocctor_entered(struct thread *td, struct proc *p) 540 { 541 struct ktr_proc_ctor *ktp; 542 struct ktr_request *req; 543 struct thread *td2; 544 545 ktrace_assert(td); 546 td2 = FIRST_THREAD_IN_PROC(p); 547 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 548 if (req == NULL) 549 return; 550 ktp = &req->ktr_data.ktr_proc_ctor; 551 ktp->sv_flags = p->p_sysent->sv_flags; 552 ktr_enqueuerequest(td2, req); 553 } 554 555 void 556 ktrprocctor(struct proc *p) 557 { 558 struct thread *td = curthread; 559 560 if ((p->p_traceflag & KTRFAC_MASK) == 0) 561 return; 562 563 ktrace_enter(td); 564 ktrprocctor_entered(td, p); 565 ktrace_exit(td); 566 } 567 568 /* 569 * When a process forks, enable tracing in the new process if needed. 570 */ 571 void 572 ktrprocfork(struct proc *p1, struct proc *p2) 573 { 574 575 MPASS(p2->p_tracevp == NULL); 576 MPASS(p2->p_traceflag == 0); 577 578 if (p1->p_traceflag == 0) 579 return; 580 581 PROC_LOCK(p1); 582 mtx_lock(&ktrace_mtx); 583 if (p1->p_traceflag & KTRFAC_INHERIT) { 584 p2->p_traceflag = p1->p_traceflag; 585 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 586 VREF(p2->p_tracevp); 587 KASSERT(p1->p_tracecred != NULL, 588 ("ktrace vnode with no cred")); 589 p2->p_tracecred = crhold(p1->p_tracecred); 590 } 591 } 592 mtx_unlock(&ktrace_mtx); 593 PROC_UNLOCK(p1); 594 595 ktrprocctor(p2); 596 } 597 598 /* 599 * When a thread returns, drain any asynchronous records generated by the 600 * system call. 601 */ 602 void 603 ktruserret(struct thread *td) 604 { 605 606 ktrace_enter(td); 607 sx_xlock(&ktrace_sx); 608 ktr_drain(td); 609 sx_xunlock(&ktrace_sx); 610 ktrace_exit(td); 611 } 612 613 void 614 ktrnamei(path) 615 char *path; 616 { 617 struct ktr_request *req; 618 int namelen; 619 char *buf = NULL; 620 621 namelen = strlen(path); 622 if (namelen > 0) { 623 buf = malloc(namelen, M_KTRACE, M_WAITOK); 624 bcopy(path, buf, namelen); 625 } 626 req = ktr_getrequest(KTR_NAMEI); 627 if (req == NULL) { 628 if (buf != NULL) 629 free(buf, M_KTRACE); 630 return; 631 } 632 if (namelen > 0) { 633 req->ktr_header.ktr_len = namelen; 634 req->ktr_buffer = buf; 635 } 636 ktr_submitrequest(curthread, req); 637 } 638 639 void 640 ktrsysctl(name, namelen) 641 int *name; 642 u_int namelen; 643 { 644 struct ktr_request *req; 645 u_int mib[CTL_MAXNAME + 2]; 646 char *mibname; 647 size_t mibnamelen; 648 int error; 649 650 /* Lookup name of mib. */ 651 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 652 mib[0] = 0; 653 mib[1] = 1; 654 bcopy(name, mib + 2, namelen * sizeof(*name)); 655 mibnamelen = 128; 656 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 657 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 658 NULL, 0, &mibnamelen, 0); 659 if (error) { 660 free(mibname, M_KTRACE); 661 return; 662 } 663 req = ktr_getrequest(KTR_SYSCTL); 664 if (req == NULL) { 665 free(mibname, M_KTRACE); 666 return; 667 } 668 req->ktr_header.ktr_len = mibnamelen; 669 req->ktr_buffer = mibname; 670 ktr_submitrequest(curthread, req); 671 } 672 673 void 674 ktrgenio(fd, rw, uio, error) 675 int fd; 676 enum uio_rw rw; 677 struct uio *uio; 678 int error; 679 { 680 struct ktr_request *req; 681 struct ktr_genio *ktg; 682 int datalen; 683 char *buf; 684 685 if (error) { 686 free(uio, M_IOV); 687 return; 688 } 689 uio->uio_offset = 0; 690 uio->uio_rw = UIO_WRITE; 691 datalen = MIN(uio->uio_resid, ktr_geniosize); 692 buf = malloc(datalen, M_KTRACE, M_WAITOK); 693 error = uiomove(buf, datalen, uio); 694 free(uio, M_IOV); 695 if (error) { 696 free(buf, M_KTRACE); 697 return; 698 } 699 req = ktr_getrequest(KTR_GENIO); 700 if (req == NULL) { 701 free(buf, M_KTRACE); 702 return; 703 } 704 ktg = &req->ktr_data.ktr_genio; 705 ktg->ktr_fd = fd; 706 ktg->ktr_rw = rw; 707 req->ktr_header.ktr_len = datalen; 708 req->ktr_buffer = buf; 709 ktr_submitrequest(curthread, req); 710 } 711 712 void 713 ktrpsig(sig, action, mask, code) 714 int sig; 715 sig_t action; 716 sigset_t *mask; 717 int code; 718 { 719 struct thread *td = curthread; 720 struct ktr_request *req; 721 struct ktr_psig *kp; 722 723 req = ktr_getrequest(KTR_PSIG); 724 if (req == NULL) 725 return; 726 kp = &req->ktr_data.ktr_psig; 727 kp->signo = (char)sig; 728 kp->action = action; 729 kp->mask = *mask; 730 kp->code = code; 731 ktr_enqueuerequest(td, req); 732 ktrace_exit(td); 733 } 734 735 void 736 ktrcsw(out, user, wmesg) 737 int out, user; 738 const char *wmesg; 739 { 740 struct thread *td = curthread; 741 struct ktr_request *req; 742 struct ktr_csw *kc; 743 744 req = ktr_getrequest(KTR_CSW); 745 if (req == NULL) 746 return; 747 kc = &req->ktr_data.ktr_csw; 748 kc->out = out; 749 kc->user = user; 750 if (wmesg != NULL) 751 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 752 else 753 bzero(kc->wmesg, sizeof(kc->wmesg)); 754 ktr_enqueuerequest(td, req); 755 ktrace_exit(td); 756 } 757 758 void 759 ktrstruct(name, data, datalen) 760 const char *name; 761 void *data; 762 size_t datalen; 763 { 764 struct ktr_request *req; 765 char *buf; 766 size_t buflen, namelen; 767 768 if (data == NULL) 769 datalen = 0; 770 namelen = strlen(name) + 1; 771 buflen = namelen + datalen; 772 buf = malloc(buflen, M_KTRACE, M_WAITOK); 773 strcpy(buf, name); 774 bcopy(data, buf + namelen, datalen); 775 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 776 free(buf, M_KTRACE); 777 return; 778 } 779 req->ktr_buffer = buf; 780 req->ktr_header.ktr_len = buflen; 781 ktr_submitrequest(curthread, req); 782 } 783 784 void 785 ktrcapfail(type, needed, held) 786 enum ktr_cap_fail_type type; 787 const cap_rights_t *needed; 788 const cap_rights_t *held; 789 { 790 struct thread *td = curthread; 791 struct ktr_request *req; 792 struct ktr_cap_fail *kcf; 793 794 req = ktr_getrequest(KTR_CAPFAIL); 795 if (req == NULL) 796 return; 797 kcf = &req->ktr_data.ktr_cap_fail; 798 kcf->cap_type = type; 799 if (needed != NULL) 800 kcf->cap_needed = *needed; 801 else 802 cap_rights_init(&kcf->cap_needed); 803 if (held != NULL) 804 kcf->cap_held = *held; 805 else 806 cap_rights_init(&kcf->cap_held); 807 ktr_enqueuerequest(td, req); 808 ktrace_exit(td); 809 } 810 811 void 812 ktrfault(vaddr, type) 813 vm_offset_t vaddr; 814 int type; 815 { 816 struct thread *td = curthread; 817 struct ktr_request *req; 818 struct ktr_fault *kf; 819 820 req = ktr_getrequest(KTR_FAULT); 821 if (req == NULL) 822 return; 823 kf = &req->ktr_data.ktr_fault; 824 kf->vaddr = vaddr; 825 kf->type = type; 826 ktr_enqueuerequest(td, req); 827 ktrace_exit(td); 828 } 829 830 void 831 ktrfaultend(result) 832 int result; 833 { 834 struct thread *td = curthread; 835 struct ktr_request *req; 836 struct ktr_faultend *kf; 837 838 req = ktr_getrequest(KTR_FAULTEND); 839 if (req == NULL) 840 return; 841 kf = &req->ktr_data.ktr_faultend; 842 kf->result = result; 843 ktr_enqueuerequest(td, req); 844 ktrace_exit(td); 845 } 846 #endif /* KTRACE */ 847 848 /* Interface and common routines */ 849 850 #ifndef _SYS_SYSPROTO_H_ 851 struct ktrace_args { 852 char *fname; 853 int ops; 854 int facs; 855 int pid; 856 }; 857 #endif 858 /* ARGSUSED */ 859 int 860 sys_ktrace(td, uap) 861 struct thread *td; 862 register struct ktrace_args *uap; 863 { 864 #ifdef KTRACE 865 register struct vnode *vp = NULL; 866 register struct proc *p; 867 struct pgrp *pg; 868 int facs = uap->facs & ~KTRFAC_ROOT; 869 int ops = KTROP(uap->ops); 870 int descend = uap->ops & KTRFLAG_DESCEND; 871 int nfound, ret = 0; 872 int flags, error = 0; 873 struct nameidata nd; 874 struct ucred *cred; 875 876 /* 877 * Need something to (un)trace. 878 */ 879 if (ops != KTROP_CLEARFILE && facs == 0) 880 return (EINVAL); 881 882 ktrace_enter(td); 883 if (ops != KTROP_CLEAR) { 884 /* 885 * an operation which requires a file argument. 886 */ 887 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 888 flags = FREAD | FWRITE | O_NOFOLLOW; 889 error = vn_open(&nd, &flags, 0, NULL); 890 if (error) { 891 ktrace_exit(td); 892 return (error); 893 } 894 NDFREE(&nd, NDF_ONLY_PNBUF); 895 vp = nd.ni_vp; 896 VOP_UNLOCK(vp, 0); 897 if (vp->v_type != VREG) { 898 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 899 ktrace_exit(td); 900 return (EACCES); 901 } 902 } 903 /* 904 * Clear all uses of the tracefile. 905 */ 906 if (ops == KTROP_CLEARFILE) { 907 int vrele_count; 908 909 vrele_count = 0; 910 sx_slock(&allproc_lock); 911 FOREACH_PROC_IN_SYSTEM(p) { 912 PROC_LOCK(p); 913 if (p->p_tracevp == vp) { 914 if (ktrcanset(td, p)) { 915 mtx_lock(&ktrace_mtx); 916 ktr_freeproc(p, &cred, NULL); 917 mtx_unlock(&ktrace_mtx); 918 vrele_count++; 919 crfree(cred); 920 } else 921 error = EPERM; 922 } 923 PROC_UNLOCK(p); 924 } 925 sx_sunlock(&allproc_lock); 926 if (vrele_count > 0) { 927 while (vrele_count-- > 0) 928 vrele(vp); 929 } 930 goto done; 931 } 932 /* 933 * do it 934 */ 935 sx_slock(&proctree_lock); 936 if (uap->pid < 0) { 937 /* 938 * by process group 939 */ 940 pg = pgfind(-uap->pid); 941 if (pg == NULL) { 942 sx_sunlock(&proctree_lock); 943 error = ESRCH; 944 goto done; 945 } 946 /* 947 * ktrops() may call vrele(). Lock pg_members 948 * by the proctree_lock rather than pg_mtx. 949 */ 950 PGRP_UNLOCK(pg); 951 nfound = 0; 952 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 953 PROC_LOCK(p); 954 if (p->p_state == PRS_NEW || 955 p_cansee(td, p) != 0) { 956 PROC_UNLOCK(p); 957 continue; 958 } 959 nfound++; 960 if (descend) 961 ret |= ktrsetchildren(td, p, ops, facs, vp); 962 else 963 ret |= ktrops(td, p, ops, facs, vp); 964 } 965 if (nfound == 0) { 966 sx_sunlock(&proctree_lock); 967 error = ESRCH; 968 goto done; 969 } 970 } else { 971 /* 972 * by pid 973 */ 974 p = pfind(uap->pid); 975 if (p == NULL) 976 error = ESRCH; 977 else 978 error = p_cansee(td, p); 979 if (error) { 980 if (p != NULL) 981 PROC_UNLOCK(p); 982 sx_sunlock(&proctree_lock); 983 goto done; 984 } 985 if (descend) 986 ret |= ktrsetchildren(td, p, ops, facs, vp); 987 else 988 ret |= ktrops(td, p, ops, facs, vp); 989 } 990 sx_sunlock(&proctree_lock); 991 if (!ret) 992 error = EPERM; 993 done: 994 if (vp != NULL) 995 (void) vn_close(vp, FWRITE, td->td_ucred, td); 996 ktrace_exit(td); 997 return (error); 998 #else /* !KTRACE */ 999 return (ENOSYS); 1000 #endif /* KTRACE */ 1001 } 1002 1003 /* ARGSUSED */ 1004 int 1005 sys_utrace(td, uap) 1006 struct thread *td; 1007 register struct utrace_args *uap; 1008 { 1009 1010 #ifdef KTRACE 1011 struct ktr_request *req; 1012 void *cp; 1013 int error; 1014 1015 if (!KTRPOINT(td, KTR_USER)) 1016 return (0); 1017 if (uap->len > KTR_USER_MAXLEN) 1018 return (EINVAL); 1019 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1020 error = copyin(uap->addr, cp, uap->len); 1021 if (error) { 1022 free(cp, M_KTRACE); 1023 return (error); 1024 } 1025 req = ktr_getrequest(KTR_USER); 1026 if (req == NULL) { 1027 free(cp, M_KTRACE); 1028 return (ENOMEM); 1029 } 1030 req->ktr_buffer = cp; 1031 req->ktr_header.ktr_len = uap->len; 1032 ktr_submitrequest(td, req); 1033 return (0); 1034 #else /* !KTRACE */ 1035 return (ENOSYS); 1036 #endif /* KTRACE */ 1037 } 1038 1039 #ifdef KTRACE 1040 static int 1041 ktrops(td, p, ops, facs, vp) 1042 struct thread *td; 1043 struct proc *p; 1044 int ops, facs; 1045 struct vnode *vp; 1046 { 1047 struct vnode *tracevp = NULL; 1048 struct ucred *tracecred = NULL; 1049 1050 PROC_LOCK_ASSERT(p, MA_OWNED); 1051 if (!ktrcanset(td, p)) { 1052 PROC_UNLOCK(p); 1053 return (0); 1054 } 1055 if (p->p_flag & P_WEXIT) { 1056 /* If the process is exiting, just ignore it. */ 1057 PROC_UNLOCK(p); 1058 return (1); 1059 } 1060 mtx_lock(&ktrace_mtx); 1061 if (ops == KTROP_SET) { 1062 if (p->p_tracevp != vp) { 1063 /* 1064 * if trace file already in use, relinquish below 1065 */ 1066 tracevp = p->p_tracevp; 1067 VREF(vp); 1068 p->p_tracevp = vp; 1069 } 1070 if (p->p_tracecred != td->td_ucred) { 1071 tracecred = p->p_tracecred; 1072 p->p_tracecred = crhold(td->td_ucred); 1073 } 1074 p->p_traceflag |= facs; 1075 if (priv_check(td, PRIV_KTRACE) == 0) 1076 p->p_traceflag |= KTRFAC_ROOT; 1077 } else { 1078 /* KTROP_CLEAR */ 1079 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1080 /* no more tracing */ 1081 ktr_freeproc(p, &tracecred, &tracevp); 1082 } 1083 mtx_unlock(&ktrace_mtx); 1084 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1085 ktrprocctor_entered(td, p); 1086 PROC_UNLOCK(p); 1087 if (tracevp != NULL) 1088 vrele(tracevp); 1089 if (tracecred != NULL) 1090 crfree(tracecred); 1091 1092 return (1); 1093 } 1094 1095 static int 1096 ktrsetchildren(td, top, ops, facs, vp) 1097 struct thread *td; 1098 struct proc *top; 1099 int ops, facs; 1100 struct vnode *vp; 1101 { 1102 register struct proc *p; 1103 register int ret = 0; 1104 1105 p = top; 1106 PROC_LOCK_ASSERT(p, MA_OWNED); 1107 sx_assert(&proctree_lock, SX_LOCKED); 1108 for (;;) { 1109 ret |= ktrops(td, p, ops, facs, vp); 1110 /* 1111 * If this process has children, descend to them next, 1112 * otherwise do any siblings, and if done with this level, 1113 * follow back up the tree (but not past top). 1114 */ 1115 if (!LIST_EMPTY(&p->p_children)) 1116 p = LIST_FIRST(&p->p_children); 1117 else for (;;) { 1118 if (p == top) 1119 return (ret); 1120 if (LIST_NEXT(p, p_sibling)) { 1121 p = LIST_NEXT(p, p_sibling); 1122 break; 1123 } 1124 p = p->p_pptr; 1125 } 1126 PROC_LOCK(p); 1127 } 1128 /*NOTREACHED*/ 1129 } 1130 1131 static void 1132 ktr_writerequest(struct thread *td, struct ktr_request *req) 1133 { 1134 struct ktr_header *kth; 1135 struct vnode *vp; 1136 struct proc *p; 1137 struct ucred *cred; 1138 struct uio auio; 1139 struct iovec aiov[3]; 1140 struct mount *mp; 1141 int datalen, buflen, vrele_count; 1142 int error; 1143 1144 /* 1145 * We hold the vnode and credential for use in I/O in case ktrace is 1146 * disabled on the process as we write out the request. 1147 * 1148 * XXXRW: This is not ideal: we could end up performing a write after 1149 * the vnode has been closed. 1150 */ 1151 mtx_lock(&ktrace_mtx); 1152 vp = td->td_proc->p_tracevp; 1153 cred = td->td_proc->p_tracecred; 1154 1155 /* 1156 * If vp is NULL, the vp has been cleared out from under this 1157 * request, so just drop it. Make sure the credential and vnode are 1158 * in sync: we should have both or neither. 1159 */ 1160 if (vp == NULL) { 1161 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1162 mtx_unlock(&ktrace_mtx); 1163 return; 1164 } 1165 VREF(vp); 1166 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1167 crhold(cred); 1168 mtx_unlock(&ktrace_mtx); 1169 1170 kth = &req->ktr_header; 1171 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < nitems(data_lengths), 1172 ("data_lengths array overflow")); 1173 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1174 buflen = kth->ktr_len; 1175 auio.uio_iov = &aiov[0]; 1176 auio.uio_offset = 0; 1177 auio.uio_segflg = UIO_SYSSPACE; 1178 auio.uio_rw = UIO_WRITE; 1179 aiov[0].iov_base = (caddr_t)kth; 1180 aiov[0].iov_len = sizeof(struct ktr_header); 1181 auio.uio_resid = sizeof(struct ktr_header); 1182 auio.uio_iovcnt = 1; 1183 auio.uio_td = td; 1184 if (datalen != 0) { 1185 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1186 aiov[1].iov_len = datalen; 1187 auio.uio_resid += datalen; 1188 auio.uio_iovcnt++; 1189 kth->ktr_len += datalen; 1190 } 1191 if (buflen != 0) { 1192 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1193 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1194 aiov[auio.uio_iovcnt].iov_len = buflen; 1195 auio.uio_resid += buflen; 1196 auio.uio_iovcnt++; 1197 } 1198 1199 vn_start_write(vp, &mp, V_WAIT); 1200 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1201 #ifdef MAC 1202 error = mac_vnode_check_write(cred, NOCRED, vp); 1203 if (error == 0) 1204 #endif 1205 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1206 VOP_UNLOCK(vp, 0); 1207 vn_finished_write(mp); 1208 crfree(cred); 1209 if (!error) { 1210 vrele(vp); 1211 return; 1212 } 1213 1214 /* 1215 * If error encountered, give up tracing on this vnode. We defer 1216 * all the vrele()'s on the vnode until after we are finished walking 1217 * the various lists to avoid needlessly holding locks. 1218 * NB: at this point we still hold the vnode reference that must 1219 * not go away as we need the valid vnode to compare with. Thus let 1220 * vrele_count start at 1 and the reference will be freed 1221 * by the loop at the end after our last use of vp. 1222 */ 1223 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1224 error); 1225 vrele_count = 1; 1226 /* 1227 * First, clear this vnode from being used by any processes in the 1228 * system. 1229 * XXX - If one process gets an EPERM writing to the vnode, should 1230 * we really do this? Other processes might have suitable 1231 * credentials for the operation. 1232 */ 1233 cred = NULL; 1234 sx_slock(&allproc_lock); 1235 FOREACH_PROC_IN_SYSTEM(p) { 1236 PROC_LOCK(p); 1237 if (p->p_tracevp == vp) { 1238 mtx_lock(&ktrace_mtx); 1239 ktr_freeproc(p, &cred, NULL); 1240 mtx_unlock(&ktrace_mtx); 1241 vrele_count++; 1242 } 1243 PROC_UNLOCK(p); 1244 if (cred != NULL) { 1245 crfree(cred); 1246 cred = NULL; 1247 } 1248 } 1249 sx_sunlock(&allproc_lock); 1250 1251 while (vrele_count-- > 0) 1252 vrele(vp); 1253 } 1254 1255 /* 1256 * Return true if caller has permission to set the ktracing state 1257 * of target. Essentially, the target can't possess any 1258 * more permissions than the caller. KTRFAC_ROOT signifies that 1259 * root previously set the tracing status on the target process, and 1260 * so, only root may further change it. 1261 */ 1262 static int 1263 ktrcanset(td, targetp) 1264 struct thread *td; 1265 struct proc *targetp; 1266 { 1267 1268 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1269 if (targetp->p_traceflag & KTRFAC_ROOT && 1270 priv_check(td, PRIV_KTRACE)) 1271 return (0); 1272 1273 if (p_candebug(td, targetp) != 0) 1274 return (0); 1275 1276 return (1); 1277 } 1278 1279 #endif /* KTRACE */ 1280