1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 39 #include <sys/param.h> 40 #include <sys/capability.h> 41 #include <sys/systm.h> 42 #include <sys/fcntl.h> 43 #include <sys/kernel.h> 44 #include <sys/kthread.h> 45 #include <sys/lock.h> 46 #include <sys/mutex.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/namei.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/unistd.h> 53 #include <sys/vnode.h> 54 #include <sys/socket.h> 55 #include <sys/stat.h> 56 #include <sys/ktrace.h> 57 #include <sys/sx.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #include <sys/syslog.h> 61 #include <sys/sysproto.h> 62 63 #include <security/mac/mac_framework.h> 64 65 /* 66 * The ktrace facility allows the tracing of certain key events in user space 67 * processes, such as system calls, signal delivery, context switches, and 68 * user generated events using utrace(2). It works by streaming event 69 * records and data to a vnode associated with the process using the 70 * ktrace(2) system call. In general, records can be written directly from 71 * the context that generates the event. One important exception to this is 72 * during a context switch, where sleeping is not permitted. To handle this 73 * case, trace events are generated using in-kernel ktr_request records, and 74 * then delivered to disk at a convenient moment -- either immediately, the 75 * next traceable event, at system call return, or at process exit. 76 * 77 * When dealing with multiple threads or processes writing to the same event 78 * log, ordering guarantees are weak: specifically, if an event has multiple 79 * records (i.e., system call enter and return), they may be interlaced with 80 * records from another event. Process and thread ID information is provided 81 * in the record, and user applications can de-interlace events if required. 82 */ 83 84 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 85 86 #ifdef KTRACE 87 88 FEATURE(ktrace, "Kernel support for system-call tracing"); 89 90 #ifndef KTRACE_REQUEST_POOL 91 #define KTRACE_REQUEST_POOL 100 92 #endif 93 94 struct ktr_request { 95 struct ktr_header ktr_header; 96 void *ktr_buffer; 97 union { 98 struct ktr_proc_ctor ktr_proc_ctor; 99 struct ktr_cap_fail ktr_cap_fail; 100 struct ktr_syscall ktr_syscall; 101 struct ktr_sysret ktr_sysret; 102 struct ktr_genio ktr_genio; 103 struct ktr_psig ktr_psig; 104 struct ktr_csw ktr_csw; 105 struct ktr_fault ktr_fault; 106 struct ktr_faultend ktr_faultend; 107 } ktr_data; 108 STAILQ_ENTRY(ktr_request) ktr_list; 109 }; 110 111 static int data_lengths[] = { 112 0, /* none */ 113 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 114 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 115 0, /* KTR_NAMEI */ 116 sizeof(struct ktr_genio), /* KTR_GENIO */ 117 sizeof(struct ktr_psig), /* KTR_PSIG */ 118 sizeof(struct ktr_csw), /* KTR_CSW */ 119 0, /* KTR_USER */ 120 0, /* KTR_STRUCT */ 121 0, /* KTR_SYSCTL */ 122 sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 123 0, /* KTR_PROCDTOR */ 124 sizeof(struct ktr_cap_fail), /* KTR_CAPFAIL */ 125 sizeof(struct ktr_fault), /* KTR_FAULT */ 126 sizeof(struct ktr_faultend), /* KTR_FAULTEND */ 127 }; 128 129 static STAILQ_HEAD(, ktr_request) ktr_free; 130 131 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 132 133 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 134 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 135 136 static u_int ktr_geniosize = PAGE_SIZE; 137 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 138 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 139 0, "Maximum size of genio event payload"); 140 141 static int print_message = 1; 142 static struct mtx ktrace_mtx; 143 static struct sx ktrace_sx; 144 145 static void ktrace_init(void *dummy); 146 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 147 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 148 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 149 static struct ktr_request *ktr_getrequest(int type); 150 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 151 static void ktr_freeproc(struct proc *p, struct ucred **uc, 152 struct vnode **vp); 153 static void ktr_freerequest(struct ktr_request *req); 154 static void ktr_freerequest_locked(struct ktr_request *req); 155 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 156 static int ktrcanset(struct thread *,struct proc *); 157 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 158 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 159 static void ktrprocctor_entered(struct thread *, struct proc *); 160 161 /* 162 * ktrace itself generates events, such as context switches, which we do not 163 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 164 * whether or not it is in a region where tracing of events should be 165 * suppressed. 166 */ 167 static void 168 ktrace_enter(struct thread *td) 169 { 170 171 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 172 td->td_pflags |= TDP_INKTRACE; 173 } 174 175 static void 176 ktrace_exit(struct thread *td) 177 { 178 179 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 180 td->td_pflags &= ~TDP_INKTRACE; 181 } 182 183 static void 184 ktrace_assert(struct thread *td) 185 { 186 187 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 188 } 189 190 static void 191 ktrace_init(void *dummy) 192 { 193 struct ktr_request *req; 194 int i; 195 196 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 197 sx_init(&ktrace_sx, "ktrace_sx"); 198 STAILQ_INIT(&ktr_free); 199 for (i = 0; i < ktr_requestpool; i++) { 200 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 201 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 202 } 203 } 204 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 205 206 static int 207 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 208 { 209 struct thread *td; 210 u_int newsize, oldsize, wantsize; 211 int error; 212 213 /* Handle easy read-only case first to avoid warnings from GCC. */ 214 if (!req->newptr) { 215 oldsize = ktr_requestpool; 216 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 217 } 218 219 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 220 if (error) 221 return (error); 222 td = curthread; 223 ktrace_enter(td); 224 oldsize = ktr_requestpool; 225 newsize = ktrace_resize_pool(oldsize, wantsize); 226 ktrace_exit(td); 227 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 228 if (error) 229 return (error); 230 if (wantsize > oldsize && newsize < wantsize) 231 return (ENOSPC); 232 return (0); 233 } 234 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 235 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 236 "Pool buffer size for ktrace(1)"); 237 238 static u_int 239 ktrace_resize_pool(u_int oldsize, u_int newsize) 240 { 241 STAILQ_HEAD(, ktr_request) ktr_new; 242 struct ktr_request *req; 243 int bound; 244 245 print_message = 1; 246 bound = newsize - oldsize; 247 if (bound == 0) 248 return (ktr_requestpool); 249 if (bound < 0) { 250 mtx_lock(&ktrace_mtx); 251 /* Shrink pool down to newsize if possible. */ 252 while (bound++ < 0) { 253 req = STAILQ_FIRST(&ktr_free); 254 if (req == NULL) 255 break; 256 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 257 ktr_requestpool--; 258 free(req, M_KTRACE); 259 } 260 } else { 261 /* Grow pool up to newsize. */ 262 STAILQ_INIT(&ktr_new); 263 while (bound-- > 0) { 264 req = malloc(sizeof(struct ktr_request), M_KTRACE, 265 M_WAITOK); 266 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 267 } 268 mtx_lock(&ktrace_mtx); 269 STAILQ_CONCAT(&ktr_free, &ktr_new); 270 ktr_requestpool += (newsize - oldsize); 271 } 272 mtx_unlock(&ktrace_mtx); 273 return (ktr_requestpool); 274 } 275 276 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 277 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 278 (sizeof((struct thread *)NULL)->td_name)); 279 280 static struct ktr_request * 281 ktr_getrequest_entered(struct thread *td, int type) 282 { 283 struct ktr_request *req; 284 struct proc *p = td->td_proc; 285 int pm; 286 287 mtx_lock(&ktrace_mtx); 288 if (!KTRCHECK(td, type)) { 289 mtx_unlock(&ktrace_mtx); 290 return (NULL); 291 } 292 req = STAILQ_FIRST(&ktr_free); 293 if (req != NULL) { 294 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 295 req->ktr_header.ktr_type = type; 296 if (p->p_traceflag & KTRFAC_DROP) { 297 req->ktr_header.ktr_type |= KTR_DROP; 298 p->p_traceflag &= ~KTRFAC_DROP; 299 } 300 mtx_unlock(&ktrace_mtx); 301 microtime(&req->ktr_header.ktr_time); 302 req->ktr_header.ktr_pid = p->p_pid; 303 req->ktr_header.ktr_tid = td->td_tid; 304 bcopy(td->td_name, req->ktr_header.ktr_comm, 305 sizeof(req->ktr_header.ktr_comm)); 306 req->ktr_buffer = NULL; 307 req->ktr_header.ktr_len = 0; 308 } else { 309 p->p_traceflag |= KTRFAC_DROP; 310 pm = print_message; 311 print_message = 0; 312 mtx_unlock(&ktrace_mtx); 313 if (pm) 314 printf("Out of ktrace request objects.\n"); 315 } 316 return (req); 317 } 318 319 static struct ktr_request * 320 ktr_getrequest(int type) 321 { 322 struct thread *td = curthread; 323 struct ktr_request *req; 324 325 ktrace_enter(td); 326 req = ktr_getrequest_entered(td, type); 327 if (req == NULL) 328 ktrace_exit(td); 329 330 return (req); 331 } 332 333 /* 334 * Some trace generation environments don't permit direct access to VFS, 335 * such as during a context switch where sleeping is not allowed. Under these 336 * circumstances, queue a request to the thread to be written asynchronously 337 * later. 338 */ 339 static void 340 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 341 { 342 343 mtx_lock(&ktrace_mtx); 344 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 345 mtx_unlock(&ktrace_mtx); 346 } 347 348 /* 349 * Drain any pending ktrace records from the per-thread queue to disk. This 350 * is used both internally before committing other records, and also on 351 * system call return. We drain all the ones we can find at the time when 352 * drain is requested, but don't keep draining after that as those events 353 * may be approximately "after" the current event. 354 */ 355 static void 356 ktr_drain(struct thread *td) 357 { 358 struct ktr_request *queued_req; 359 STAILQ_HEAD(, ktr_request) local_queue; 360 361 ktrace_assert(td); 362 sx_assert(&ktrace_sx, SX_XLOCKED); 363 364 STAILQ_INIT(&local_queue); 365 366 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 367 mtx_lock(&ktrace_mtx); 368 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 369 mtx_unlock(&ktrace_mtx); 370 371 while ((queued_req = STAILQ_FIRST(&local_queue))) { 372 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 373 ktr_writerequest(td, queued_req); 374 ktr_freerequest(queued_req); 375 } 376 } 377 } 378 379 /* 380 * Submit a trace record for immediate commit to disk -- to be used only 381 * where entering VFS is OK. First drain any pending records that may have 382 * been cached in the thread. 383 */ 384 static void 385 ktr_submitrequest(struct thread *td, struct ktr_request *req) 386 { 387 388 ktrace_assert(td); 389 390 sx_xlock(&ktrace_sx); 391 ktr_drain(td); 392 ktr_writerequest(td, req); 393 ktr_freerequest(req); 394 sx_xunlock(&ktrace_sx); 395 ktrace_exit(td); 396 } 397 398 static void 399 ktr_freerequest(struct ktr_request *req) 400 { 401 402 mtx_lock(&ktrace_mtx); 403 ktr_freerequest_locked(req); 404 mtx_unlock(&ktrace_mtx); 405 } 406 407 static void 408 ktr_freerequest_locked(struct ktr_request *req) 409 { 410 411 mtx_assert(&ktrace_mtx, MA_OWNED); 412 if (req->ktr_buffer != NULL) 413 free(req->ktr_buffer, M_KTRACE); 414 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 415 } 416 417 /* 418 * Disable tracing for a process and release all associated resources. 419 * The caller is responsible for releasing a reference on the returned 420 * vnode and credentials. 421 */ 422 static void 423 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 424 { 425 struct ktr_request *req; 426 427 PROC_LOCK_ASSERT(p, MA_OWNED); 428 mtx_assert(&ktrace_mtx, MA_OWNED); 429 *uc = p->p_tracecred; 430 p->p_tracecred = NULL; 431 if (vp != NULL) 432 *vp = p->p_tracevp; 433 p->p_tracevp = NULL; 434 p->p_traceflag = 0; 435 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 436 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 437 ktr_freerequest_locked(req); 438 } 439 } 440 441 void 442 ktrsyscall(code, narg, args) 443 int code, narg; 444 register_t args[]; 445 { 446 struct ktr_request *req; 447 struct ktr_syscall *ktp; 448 size_t buflen; 449 char *buf = NULL; 450 451 buflen = sizeof(register_t) * narg; 452 if (buflen > 0) { 453 buf = malloc(buflen, M_KTRACE, M_WAITOK); 454 bcopy(args, buf, buflen); 455 } 456 req = ktr_getrequest(KTR_SYSCALL); 457 if (req == NULL) { 458 if (buf != NULL) 459 free(buf, M_KTRACE); 460 return; 461 } 462 ktp = &req->ktr_data.ktr_syscall; 463 ktp->ktr_code = code; 464 ktp->ktr_narg = narg; 465 if (buflen > 0) { 466 req->ktr_header.ktr_len = buflen; 467 req->ktr_buffer = buf; 468 } 469 ktr_submitrequest(curthread, req); 470 } 471 472 void 473 ktrsysret(code, error, retval) 474 int code, error; 475 register_t retval; 476 { 477 struct ktr_request *req; 478 struct ktr_sysret *ktp; 479 480 req = ktr_getrequest(KTR_SYSRET); 481 if (req == NULL) 482 return; 483 ktp = &req->ktr_data.ktr_sysret; 484 ktp->ktr_code = code; 485 ktp->ktr_error = error; 486 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 487 ktr_submitrequest(curthread, req); 488 } 489 490 /* 491 * When a setuid process execs, disable tracing. 492 * 493 * XXX: We toss any pending asynchronous records. 494 */ 495 void 496 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 497 { 498 499 PROC_LOCK_ASSERT(p, MA_OWNED); 500 mtx_lock(&ktrace_mtx); 501 ktr_freeproc(p, uc, vp); 502 mtx_unlock(&ktrace_mtx); 503 } 504 505 /* 506 * When a process exits, drain per-process asynchronous trace records 507 * and disable tracing. 508 */ 509 void 510 ktrprocexit(struct thread *td) 511 { 512 struct ktr_request *req; 513 struct proc *p; 514 struct ucred *cred; 515 struct vnode *vp; 516 517 p = td->td_proc; 518 if (p->p_traceflag == 0) 519 return; 520 521 ktrace_enter(td); 522 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 523 if (req != NULL) 524 ktr_enqueuerequest(td, req); 525 sx_xlock(&ktrace_sx); 526 ktr_drain(td); 527 sx_xunlock(&ktrace_sx); 528 PROC_LOCK(p); 529 mtx_lock(&ktrace_mtx); 530 ktr_freeproc(p, &cred, &vp); 531 mtx_unlock(&ktrace_mtx); 532 PROC_UNLOCK(p); 533 if (vp != NULL) 534 vrele(vp); 535 if (cred != NULL) 536 crfree(cred); 537 ktrace_exit(td); 538 } 539 540 static void 541 ktrprocctor_entered(struct thread *td, struct proc *p) 542 { 543 struct ktr_proc_ctor *ktp; 544 struct ktr_request *req; 545 struct thread *td2; 546 547 ktrace_assert(td); 548 td2 = FIRST_THREAD_IN_PROC(p); 549 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 550 if (req == NULL) 551 return; 552 ktp = &req->ktr_data.ktr_proc_ctor; 553 ktp->sv_flags = p->p_sysent->sv_flags; 554 ktr_enqueuerequest(td2, req); 555 } 556 557 void 558 ktrprocctor(struct proc *p) 559 { 560 struct thread *td = curthread; 561 562 if ((p->p_traceflag & KTRFAC_MASK) == 0) 563 return; 564 565 ktrace_enter(td); 566 ktrprocctor_entered(td, p); 567 ktrace_exit(td); 568 } 569 570 /* 571 * When a process forks, enable tracing in the new process if needed. 572 */ 573 void 574 ktrprocfork(struct proc *p1, struct proc *p2) 575 { 576 577 PROC_LOCK(p1); 578 mtx_lock(&ktrace_mtx); 579 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 580 if (p1->p_traceflag & KTRFAC_INHERIT) { 581 p2->p_traceflag = p1->p_traceflag; 582 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 583 VREF(p2->p_tracevp); 584 KASSERT(p1->p_tracecred != NULL, 585 ("ktrace vnode with no cred")); 586 p2->p_tracecred = crhold(p1->p_tracecred); 587 } 588 } 589 mtx_unlock(&ktrace_mtx); 590 PROC_UNLOCK(p1); 591 592 ktrprocctor(p2); 593 } 594 595 /* 596 * When a thread returns, drain any asynchronous records generated by the 597 * system call. 598 */ 599 void 600 ktruserret(struct thread *td) 601 { 602 603 ktrace_enter(td); 604 sx_xlock(&ktrace_sx); 605 ktr_drain(td); 606 sx_xunlock(&ktrace_sx); 607 ktrace_exit(td); 608 } 609 610 void 611 ktrnamei(path) 612 char *path; 613 { 614 struct ktr_request *req; 615 int namelen; 616 char *buf = NULL; 617 618 namelen = strlen(path); 619 if (namelen > 0) { 620 buf = malloc(namelen, M_KTRACE, M_WAITOK); 621 bcopy(path, buf, namelen); 622 } 623 req = ktr_getrequest(KTR_NAMEI); 624 if (req == NULL) { 625 if (buf != NULL) 626 free(buf, M_KTRACE); 627 return; 628 } 629 if (namelen > 0) { 630 req->ktr_header.ktr_len = namelen; 631 req->ktr_buffer = buf; 632 } 633 ktr_submitrequest(curthread, req); 634 } 635 636 void 637 ktrsysctl(name, namelen) 638 int *name; 639 u_int namelen; 640 { 641 struct ktr_request *req; 642 u_int mib[CTL_MAXNAME + 2]; 643 char *mibname; 644 size_t mibnamelen; 645 int error; 646 647 /* Lookup name of mib. */ 648 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 649 mib[0] = 0; 650 mib[1] = 1; 651 bcopy(name, mib + 2, namelen * sizeof(*name)); 652 mibnamelen = 128; 653 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 654 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 655 NULL, 0, &mibnamelen, 0); 656 if (error) { 657 free(mibname, M_KTRACE); 658 return; 659 } 660 req = ktr_getrequest(KTR_SYSCTL); 661 if (req == NULL) { 662 free(mibname, M_KTRACE); 663 return; 664 } 665 req->ktr_header.ktr_len = mibnamelen; 666 req->ktr_buffer = mibname; 667 ktr_submitrequest(curthread, req); 668 } 669 670 void 671 ktrgenio(fd, rw, uio, error) 672 int fd; 673 enum uio_rw rw; 674 struct uio *uio; 675 int error; 676 { 677 struct ktr_request *req; 678 struct ktr_genio *ktg; 679 int datalen; 680 char *buf; 681 682 if (error) { 683 free(uio, M_IOV); 684 return; 685 } 686 uio->uio_offset = 0; 687 uio->uio_rw = UIO_WRITE; 688 datalen = MIN(uio->uio_resid, ktr_geniosize); 689 buf = malloc(datalen, M_KTRACE, M_WAITOK); 690 error = uiomove(buf, datalen, uio); 691 free(uio, M_IOV); 692 if (error) { 693 free(buf, M_KTRACE); 694 return; 695 } 696 req = ktr_getrequest(KTR_GENIO); 697 if (req == NULL) { 698 free(buf, M_KTRACE); 699 return; 700 } 701 ktg = &req->ktr_data.ktr_genio; 702 ktg->ktr_fd = fd; 703 ktg->ktr_rw = rw; 704 req->ktr_header.ktr_len = datalen; 705 req->ktr_buffer = buf; 706 ktr_submitrequest(curthread, req); 707 } 708 709 void 710 ktrpsig(sig, action, mask, code) 711 int sig; 712 sig_t action; 713 sigset_t *mask; 714 int code; 715 { 716 struct thread *td = curthread; 717 struct ktr_request *req; 718 struct ktr_psig *kp; 719 720 req = ktr_getrequest(KTR_PSIG); 721 if (req == NULL) 722 return; 723 kp = &req->ktr_data.ktr_psig; 724 kp->signo = (char)sig; 725 kp->action = action; 726 kp->mask = *mask; 727 kp->code = code; 728 ktr_enqueuerequest(td, req); 729 ktrace_exit(td); 730 } 731 732 void 733 ktrcsw(out, user, wmesg) 734 int out, user; 735 const char *wmesg; 736 { 737 struct thread *td = curthread; 738 struct ktr_request *req; 739 struct ktr_csw *kc; 740 741 req = ktr_getrequest(KTR_CSW); 742 if (req == NULL) 743 return; 744 kc = &req->ktr_data.ktr_csw; 745 kc->out = out; 746 kc->user = user; 747 if (wmesg != NULL) 748 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 749 else 750 bzero(kc->wmesg, sizeof(kc->wmesg)); 751 ktr_enqueuerequest(td, req); 752 ktrace_exit(td); 753 } 754 755 void 756 ktrstruct(name, data, datalen) 757 const char *name; 758 void *data; 759 size_t datalen; 760 { 761 struct ktr_request *req; 762 char *buf = NULL; 763 size_t buflen; 764 765 if (!data) 766 datalen = 0; 767 buflen = strlen(name) + 1 + datalen; 768 buf = malloc(buflen, M_KTRACE, M_WAITOK); 769 strcpy(buf, name); 770 bcopy(data, buf + strlen(name) + 1, datalen); 771 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 772 free(buf, M_KTRACE); 773 return; 774 } 775 req->ktr_buffer = buf; 776 req->ktr_header.ktr_len = buflen; 777 ktr_submitrequest(curthread, req); 778 } 779 780 void 781 ktrcapfail(type, needed, held) 782 enum ktr_cap_fail_type type; 783 const cap_rights_t *needed; 784 const cap_rights_t *held; 785 { 786 struct thread *td = curthread; 787 struct ktr_request *req; 788 struct ktr_cap_fail *kcf; 789 790 req = ktr_getrequest(KTR_CAPFAIL); 791 if (req == NULL) 792 return; 793 kcf = &req->ktr_data.ktr_cap_fail; 794 kcf->cap_type = type; 795 if (needed != NULL) 796 kcf->cap_needed = *needed; 797 else 798 cap_rights_init(&kcf->cap_needed); 799 if (held != NULL) 800 kcf->cap_held = *held; 801 else 802 cap_rights_init(&kcf->cap_held); 803 ktr_enqueuerequest(td, req); 804 ktrace_exit(td); 805 } 806 807 void 808 ktrfault(vaddr, type) 809 vm_offset_t vaddr; 810 int type; 811 { 812 struct thread *td = curthread; 813 struct ktr_request *req; 814 struct ktr_fault *kf; 815 816 req = ktr_getrequest(KTR_FAULT); 817 if (req == NULL) 818 return; 819 kf = &req->ktr_data.ktr_fault; 820 kf->vaddr = vaddr; 821 kf->type = type; 822 ktr_enqueuerequest(td, req); 823 ktrace_exit(td); 824 } 825 826 void 827 ktrfaultend(result) 828 int result; 829 { 830 struct thread *td = curthread; 831 struct ktr_request *req; 832 struct ktr_faultend *kf; 833 834 req = ktr_getrequest(KTR_FAULTEND); 835 if (req == NULL) 836 return; 837 kf = &req->ktr_data.ktr_faultend; 838 kf->result = result; 839 ktr_enqueuerequest(td, req); 840 ktrace_exit(td); 841 } 842 #endif /* KTRACE */ 843 844 /* Interface and common routines */ 845 846 #ifndef _SYS_SYSPROTO_H_ 847 struct ktrace_args { 848 char *fname; 849 int ops; 850 int facs; 851 int pid; 852 }; 853 #endif 854 /* ARGSUSED */ 855 int 856 sys_ktrace(td, uap) 857 struct thread *td; 858 register struct ktrace_args *uap; 859 { 860 #ifdef KTRACE 861 register struct vnode *vp = NULL; 862 register struct proc *p; 863 struct pgrp *pg; 864 int facs = uap->facs & ~KTRFAC_ROOT; 865 int ops = KTROP(uap->ops); 866 int descend = uap->ops & KTRFLAG_DESCEND; 867 int nfound, ret = 0; 868 int flags, error = 0; 869 struct nameidata nd; 870 struct ucred *cred; 871 872 /* 873 * Need something to (un)trace. 874 */ 875 if (ops != KTROP_CLEARFILE && facs == 0) 876 return (EINVAL); 877 878 ktrace_enter(td); 879 if (ops != KTROP_CLEAR) { 880 /* 881 * an operation which requires a file argument. 882 */ 883 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 884 flags = FREAD | FWRITE | O_NOFOLLOW; 885 error = vn_open(&nd, &flags, 0, NULL); 886 if (error) { 887 ktrace_exit(td); 888 return (error); 889 } 890 NDFREE(&nd, NDF_ONLY_PNBUF); 891 vp = nd.ni_vp; 892 VOP_UNLOCK(vp, 0); 893 if (vp->v_type != VREG) { 894 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 895 ktrace_exit(td); 896 return (EACCES); 897 } 898 } 899 /* 900 * Clear all uses of the tracefile. 901 */ 902 if (ops == KTROP_CLEARFILE) { 903 int vrele_count; 904 905 vrele_count = 0; 906 sx_slock(&allproc_lock); 907 FOREACH_PROC_IN_SYSTEM(p) { 908 PROC_LOCK(p); 909 if (p->p_tracevp == vp) { 910 if (ktrcanset(td, p)) { 911 mtx_lock(&ktrace_mtx); 912 ktr_freeproc(p, &cred, NULL); 913 mtx_unlock(&ktrace_mtx); 914 vrele_count++; 915 crfree(cred); 916 } else 917 error = EPERM; 918 } 919 PROC_UNLOCK(p); 920 } 921 sx_sunlock(&allproc_lock); 922 if (vrele_count > 0) { 923 while (vrele_count-- > 0) 924 vrele(vp); 925 } 926 goto done; 927 } 928 /* 929 * do it 930 */ 931 sx_slock(&proctree_lock); 932 if (uap->pid < 0) { 933 /* 934 * by process group 935 */ 936 pg = pgfind(-uap->pid); 937 if (pg == NULL) { 938 sx_sunlock(&proctree_lock); 939 error = ESRCH; 940 goto done; 941 } 942 /* 943 * ktrops() may call vrele(). Lock pg_members 944 * by the proctree_lock rather than pg_mtx. 945 */ 946 PGRP_UNLOCK(pg); 947 nfound = 0; 948 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 949 PROC_LOCK(p); 950 if (p->p_state == PRS_NEW || 951 p_cansee(td, p) != 0) { 952 PROC_UNLOCK(p); 953 continue; 954 } 955 nfound++; 956 if (descend) 957 ret |= ktrsetchildren(td, p, ops, facs, vp); 958 else 959 ret |= ktrops(td, p, ops, facs, vp); 960 } 961 if (nfound == 0) { 962 sx_sunlock(&proctree_lock); 963 error = ESRCH; 964 goto done; 965 } 966 } else { 967 /* 968 * by pid 969 */ 970 p = pfind(uap->pid); 971 if (p == NULL) 972 error = ESRCH; 973 else 974 error = p_cansee(td, p); 975 if (error) { 976 if (p != NULL) 977 PROC_UNLOCK(p); 978 sx_sunlock(&proctree_lock); 979 goto done; 980 } 981 if (descend) 982 ret |= ktrsetchildren(td, p, ops, facs, vp); 983 else 984 ret |= ktrops(td, p, ops, facs, vp); 985 } 986 sx_sunlock(&proctree_lock); 987 if (!ret) 988 error = EPERM; 989 done: 990 if (vp != NULL) 991 (void) vn_close(vp, FWRITE, td->td_ucred, td); 992 ktrace_exit(td); 993 return (error); 994 #else /* !KTRACE */ 995 return (ENOSYS); 996 #endif /* KTRACE */ 997 } 998 999 /* ARGSUSED */ 1000 int 1001 sys_utrace(td, uap) 1002 struct thread *td; 1003 register struct utrace_args *uap; 1004 { 1005 1006 #ifdef KTRACE 1007 struct ktr_request *req; 1008 void *cp; 1009 int error; 1010 1011 if (!KTRPOINT(td, KTR_USER)) 1012 return (0); 1013 if (uap->len > KTR_USER_MAXLEN) 1014 return (EINVAL); 1015 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1016 error = copyin(uap->addr, cp, uap->len); 1017 if (error) { 1018 free(cp, M_KTRACE); 1019 return (error); 1020 } 1021 req = ktr_getrequest(KTR_USER); 1022 if (req == NULL) { 1023 free(cp, M_KTRACE); 1024 return (ENOMEM); 1025 } 1026 req->ktr_buffer = cp; 1027 req->ktr_header.ktr_len = uap->len; 1028 ktr_submitrequest(td, req); 1029 return (0); 1030 #else /* !KTRACE */ 1031 return (ENOSYS); 1032 #endif /* KTRACE */ 1033 } 1034 1035 #ifdef KTRACE 1036 static int 1037 ktrops(td, p, ops, facs, vp) 1038 struct thread *td; 1039 struct proc *p; 1040 int ops, facs; 1041 struct vnode *vp; 1042 { 1043 struct vnode *tracevp = NULL; 1044 struct ucred *tracecred = NULL; 1045 1046 PROC_LOCK_ASSERT(p, MA_OWNED); 1047 if (!ktrcanset(td, p)) { 1048 PROC_UNLOCK(p); 1049 return (0); 1050 } 1051 if (p->p_flag & P_WEXIT) { 1052 /* If the process is exiting, just ignore it. */ 1053 PROC_UNLOCK(p); 1054 return (1); 1055 } 1056 mtx_lock(&ktrace_mtx); 1057 if (ops == KTROP_SET) { 1058 if (p->p_tracevp != vp) { 1059 /* 1060 * if trace file already in use, relinquish below 1061 */ 1062 tracevp = p->p_tracevp; 1063 VREF(vp); 1064 p->p_tracevp = vp; 1065 } 1066 if (p->p_tracecred != td->td_ucred) { 1067 tracecred = p->p_tracecred; 1068 p->p_tracecred = crhold(td->td_ucred); 1069 } 1070 p->p_traceflag |= facs; 1071 if (priv_check(td, PRIV_KTRACE) == 0) 1072 p->p_traceflag |= KTRFAC_ROOT; 1073 } else { 1074 /* KTROP_CLEAR */ 1075 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1076 /* no more tracing */ 1077 ktr_freeproc(p, &tracecred, &tracevp); 1078 } 1079 mtx_unlock(&ktrace_mtx); 1080 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1081 ktrprocctor_entered(td, p); 1082 PROC_UNLOCK(p); 1083 if (tracevp != NULL) 1084 vrele(tracevp); 1085 if (tracecred != NULL) 1086 crfree(tracecred); 1087 1088 return (1); 1089 } 1090 1091 static int 1092 ktrsetchildren(td, top, ops, facs, vp) 1093 struct thread *td; 1094 struct proc *top; 1095 int ops, facs; 1096 struct vnode *vp; 1097 { 1098 register struct proc *p; 1099 register int ret = 0; 1100 1101 p = top; 1102 PROC_LOCK_ASSERT(p, MA_OWNED); 1103 sx_assert(&proctree_lock, SX_LOCKED); 1104 for (;;) { 1105 ret |= ktrops(td, p, ops, facs, vp); 1106 /* 1107 * If this process has children, descend to them next, 1108 * otherwise do any siblings, and if done with this level, 1109 * follow back up the tree (but not past top). 1110 */ 1111 if (!LIST_EMPTY(&p->p_children)) 1112 p = LIST_FIRST(&p->p_children); 1113 else for (;;) { 1114 if (p == top) 1115 return (ret); 1116 if (LIST_NEXT(p, p_sibling)) { 1117 p = LIST_NEXT(p, p_sibling); 1118 break; 1119 } 1120 p = p->p_pptr; 1121 } 1122 PROC_LOCK(p); 1123 } 1124 /*NOTREACHED*/ 1125 } 1126 1127 static void 1128 ktr_writerequest(struct thread *td, struct ktr_request *req) 1129 { 1130 struct ktr_header *kth; 1131 struct vnode *vp; 1132 struct proc *p; 1133 struct ucred *cred; 1134 struct uio auio; 1135 struct iovec aiov[3]; 1136 struct mount *mp; 1137 int datalen, buflen, vrele_count; 1138 int error; 1139 1140 /* 1141 * We hold the vnode and credential for use in I/O in case ktrace is 1142 * disabled on the process as we write out the request. 1143 * 1144 * XXXRW: This is not ideal: we could end up performing a write after 1145 * the vnode has been closed. 1146 */ 1147 mtx_lock(&ktrace_mtx); 1148 vp = td->td_proc->p_tracevp; 1149 cred = td->td_proc->p_tracecred; 1150 1151 /* 1152 * If vp is NULL, the vp has been cleared out from under this 1153 * request, so just drop it. Make sure the credential and vnode are 1154 * in sync: we should have both or neither. 1155 */ 1156 if (vp == NULL) { 1157 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1158 mtx_unlock(&ktrace_mtx); 1159 return; 1160 } 1161 VREF(vp); 1162 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1163 crhold(cred); 1164 mtx_unlock(&ktrace_mtx); 1165 1166 kth = &req->ktr_header; 1167 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1168 sizeof(data_lengths) / sizeof(data_lengths[0]), 1169 ("data_lengths array overflow")); 1170 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1171 buflen = kth->ktr_len; 1172 auio.uio_iov = &aiov[0]; 1173 auio.uio_offset = 0; 1174 auio.uio_segflg = UIO_SYSSPACE; 1175 auio.uio_rw = UIO_WRITE; 1176 aiov[0].iov_base = (caddr_t)kth; 1177 aiov[0].iov_len = sizeof(struct ktr_header); 1178 auio.uio_resid = sizeof(struct ktr_header); 1179 auio.uio_iovcnt = 1; 1180 auio.uio_td = td; 1181 if (datalen != 0) { 1182 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1183 aiov[1].iov_len = datalen; 1184 auio.uio_resid += datalen; 1185 auio.uio_iovcnt++; 1186 kth->ktr_len += datalen; 1187 } 1188 if (buflen != 0) { 1189 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1190 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1191 aiov[auio.uio_iovcnt].iov_len = buflen; 1192 auio.uio_resid += buflen; 1193 auio.uio_iovcnt++; 1194 } 1195 1196 vn_start_write(vp, &mp, V_WAIT); 1197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1198 #ifdef MAC 1199 error = mac_vnode_check_write(cred, NOCRED, vp); 1200 if (error == 0) 1201 #endif 1202 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1203 VOP_UNLOCK(vp, 0); 1204 vn_finished_write(mp); 1205 crfree(cred); 1206 if (!error) { 1207 vrele(vp); 1208 return; 1209 } 1210 1211 /* 1212 * If error encountered, give up tracing on this vnode. We defer 1213 * all the vrele()'s on the vnode until after we are finished walking 1214 * the various lists to avoid needlessly holding locks. 1215 * NB: at this point we still hold the vnode reference that must 1216 * not go away as we need the valid vnode to compare with. Thus let 1217 * vrele_count start at 1 and the reference will be freed 1218 * by the loop at the end after our last use of vp. 1219 */ 1220 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1221 error); 1222 vrele_count = 1; 1223 /* 1224 * First, clear this vnode from being used by any processes in the 1225 * system. 1226 * XXX - If one process gets an EPERM writing to the vnode, should 1227 * we really do this? Other processes might have suitable 1228 * credentials for the operation. 1229 */ 1230 cred = NULL; 1231 sx_slock(&allproc_lock); 1232 FOREACH_PROC_IN_SYSTEM(p) { 1233 PROC_LOCK(p); 1234 if (p->p_tracevp == vp) { 1235 mtx_lock(&ktrace_mtx); 1236 ktr_freeproc(p, &cred, NULL); 1237 mtx_unlock(&ktrace_mtx); 1238 vrele_count++; 1239 } 1240 PROC_UNLOCK(p); 1241 if (cred != NULL) { 1242 crfree(cred); 1243 cred = NULL; 1244 } 1245 } 1246 sx_sunlock(&allproc_lock); 1247 1248 while (vrele_count-- > 0) 1249 vrele(vp); 1250 } 1251 1252 /* 1253 * Return true if caller has permission to set the ktracing state 1254 * of target. Essentially, the target can't possess any 1255 * more permissions than the caller. KTRFAC_ROOT signifies that 1256 * root previously set the tracing status on the target process, and 1257 * so, only root may further change it. 1258 */ 1259 static int 1260 ktrcanset(td, targetp) 1261 struct thread *td; 1262 struct proc *targetp; 1263 { 1264 1265 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1266 if (targetp->p_traceflag & KTRFAC_ROOT && 1267 priv_check(td, PRIV_KTRACE)) 1268 return (0); 1269 1270 if (p_candebug(td, targetp) != 0) 1271 return (0); 1272 1273 return (1); 1274 } 1275 1276 #endif /* KTRACE */ 1277