1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/kernel.h> 43 #include <sys/kthread.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 #include <sys/socket.h> 54 #include <sys/stat.h> 55 #include <sys/ktrace.h> 56 #include <sys/sx.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/syslog.h> 60 #include <sys/sysproto.h> 61 62 #include <security/mac/mac_framework.h> 63 64 /* 65 * The ktrace facility allows the tracing of certain key events in user space 66 * processes, such as system calls, signal delivery, context switches, and 67 * user generated events using utrace(2). It works by streaming event 68 * records and data to a vnode associated with the process using the 69 * ktrace(2) system call. In general, records can be written directly from 70 * the context that generates the event. One important exception to this is 71 * during a context switch, where sleeping is not permitted. To handle this 72 * case, trace events are generated using in-kernel ktr_request records, and 73 * then delivered to disk at a convenient moment -- either immediately, the 74 * next traceable event, at system call return, or at process exit. 75 * 76 * When dealing with multiple threads or processes writing to the same event 77 * log, ordering guarantees are weak: specifically, if an event has multiple 78 * records (i.e., system call enter and return), they may be interlaced with 79 * records from another event. Process and thread ID information is provided 80 * in the record, and user applications can de-interlace events if required. 81 */ 82 83 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 84 85 #ifdef KTRACE 86 87 FEATURE(ktrace, "Kernel support for system-call tracing"); 88 89 #ifndef KTRACE_REQUEST_POOL 90 #define KTRACE_REQUEST_POOL 100 91 #endif 92 93 struct ktr_request { 94 struct ktr_header ktr_header; 95 void *ktr_buffer; 96 union { 97 struct ktr_proc_ctor ktr_proc_ctor; 98 struct ktr_cap_fail ktr_cap_fail; 99 struct ktr_syscall ktr_syscall; 100 struct ktr_sysret ktr_sysret; 101 struct ktr_genio ktr_genio; 102 struct ktr_psig ktr_psig; 103 struct ktr_csw ktr_csw; 104 struct ktr_fault ktr_fault; 105 struct ktr_faultend ktr_faultend; 106 } ktr_data; 107 STAILQ_ENTRY(ktr_request) ktr_list; 108 }; 109 110 static int data_lengths[] = { 111 0, /* none */ 112 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 113 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 114 0, /* KTR_NAMEI */ 115 sizeof(struct ktr_genio), /* KTR_GENIO */ 116 sizeof(struct ktr_psig), /* KTR_PSIG */ 117 sizeof(struct ktr_csw), /* KTR_CSW */ 118 0, /* KTR_USER */ 119 0, /* KTR_STRUCT */ 120 0, /* KTR_SYSCTL */ 121 sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 122 0, /* KTR_PROCDTOR */ 123 sizeof(struct ktr_cap_fail), /* KTR_CAPFAIL */ 124 sizeof(struct ktr_fault), /* KTR_FAULT */ 125 sizeof(struct ktr_faultend), /* KTR_FAULTEND */ 126 }; 127 128 static STAILQ_HEAD(, ktr_request) ktr_free; 129 130 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 131 132 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 133 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 134 135 static u_int ktr_geniosize = PAGE_SIZE; 136 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 137 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 138 0, "Maximum size of genio event payload"); 139 140 static int print_message = 1; 141 static struct mtx ktrace_mtx; 142 static struct sx ktrace_sx; 143 144 static void ktrace_init(void *dummy); 145 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 146 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 147 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 148 static struct ktr_request *ktr_getrequest(int type); 149 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 150 static void ktr_freeproc(struct proc *p, struct ucred **uc, 151 struct vnode **vp); 152 static void ktr_freerequest(struct ktr_request *req); 153 static void ktr_freerequest_locked(struct ktr_request *req); 154 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 155 static int ktrcanset(struct thread *,struct proc *); 156 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 157 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 158 static void ktrprocctor_entered(struct thread *, struct proc *); 159 160 /* 161 * ktrace itself generates events, such as context switches, which we do not 162 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 163 * whether or not it is in a region where tracing of events should be 164 * suppressed. 165 */ 166 static void 167 ktrace_enter(struct thread *td) 168 { 169 170 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 171 td->td_pflags |= TDP_INKTRACE; 172 } 173 174 static void 175 ktrace_exit(struct thread *td) 176 { 177 178 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 179 td->td_pflags &= ~TDP_INKTRACE; 180 } 181 182 static void 183 ktrace_assert(struct thread *td) 184 { 185 186 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 187 } 188 189 static void 190 ktrace_init(void *dummy) 191 { 192 struct ktr_request *req; 193 int i; 194 195 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 196 sx_init(&ktrace_sx, "ktrace_sx"); 197 STAILQ_INIT(&ktr_free); 198 for (i = 0; i < ktr_requestpool; i++) { 199 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 200 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 201 } 202 } 203 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 204 205 static int 206 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 207 { 208 struct thread *td; 209 u_int newsize, oldsize, wantsize; 210 int error; 211 212 /* Handle easy read-only case first to avoid warnings from GCC. */ 213 if (!req->newptr) { 214 oldsize = ktr_requestpool; 215 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 216 } 217 218 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 219 if (error) 220 return (error); 221 td = curthread; 222 ktrace_enter(td); 223 oldsize = ktr_requestpool; 224 newsize = ktrace_resize_pool(oldsize, wantsize); 225 ktrace_exit(td); 226 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 227 if (error) 228 return (error); 229 if (wantsize > oldsize && newsize < wantsize) 230 return (ENOSPC); 231 return (0); 232 } 233 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 234 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 235 "Pool buffer size for ktrace(1)"); 236 237 static u_int 238 ktrace_resize_pool(u_int oldsize, u_int newsize) 239 { 240 STAILQ_HEAD(, ktr_request) ktr_new; 241 struct ktr_request *req; 242 int bound; 243 244 print_message = 1; 245 bound = newsize - oldsize; 246 if (bound == 0) 247 return (ktr_requestpool); 248 if (bound < 0) { 249 mtx_lock(&ktrace_mtx); 250 /* Shrink pool down to newsize if possible. */ 251 while (bound++ < 0) { 252 req = STAILQ_FIRST(&ktr_free); 253 if (req == NULL) 254 break; 255 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 256 ktr_requestpool--; 257 free(req, M_KTRACE); 258 } 259 } else { 260 /* Grow pool up to newsize. */ 261 STAILQ_INIT(&ktr_new); 262 while (bound-- > 0) { 263 req = malloc(sizeof(struct ktr_request), M_KTRACE, 264 M_WAITOK); 265 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 266 } 267 mtx_lock(&ktrace_mtx); 268 STAILQ_CONCAT(&ktr_free, &ktr_new); 269 ktr_requestpool += (newsize - oldsize); 270 } 271 mtx_unlock(&ktrace_mtx); 272 return (ktr_requestpool); 273 } 274 275 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 276 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 277 (sizeof((struct thread *)NULL)->td_name)); 278 279 static struct ktr_request * 280 ktr_getrequest_entered(struct thread *td, int type) 281 { 282 struct ktr_request *req; 283 struct proc *p = td->td_proc; 284 int pm; 285 286 mtx_lock(&ktrace_mtx); 287 if (!KTRCHECK(td, type)) { 288 mtx_unlock(&ktrace_mtx); 289 return (NULL); 290 } 291 req = STAILQ_FIRST(&ktr_free); 292 if (req != NULL) { 293 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 294 req->ktr_header.ktr_type = type; 295 if (p->p_traceflag & KTRFAC_DROP) { 296 req->ktr_header.ktr_type |= KTR_DROP; 297 p->p_traceflag &= ~KTRFAC_DROP; 298 } 299 mtx_unlock(&ktrace_mtx); 300 microtime(&req->ktr_header.ktr_time); 301 req->ktr_header.ktr_pid = p->p_pid; 302 req->ktr_header.ktr_tid = td->td_tid; 303 bcopy(td->td_name, req->ktr_header.ktr_comm, 304 sizeof(req->ktr_header.ktr_comm)); 305 req->ktr_buffer = NULL; 306 req->ktr_header.ktr_len = 0; 307 } else { 308 p->p_traceflag |= KTRFAC_DROP; 309 pm = print_message; 310 print_message = 0; 311 mtx_unlock(&ktrace_mtx); 312 if (pm) 313 printf("Out of ktrace request objects.\n"); 314 } 315 return (req); 316 } 317 318 static struct ktr_request * 319 ktr_getrequest(int type) 320 { 321 struct thread *td = curthread; 322 struct ktr_request *req; 323 324 ktrace_enter(td); 325 req = ktr_getrequest_entered(td, type); 326 if (req == NULL) 327 ktrace_exit(td); 328 329 return (req); 330 } 331 332 /* 333 * Some trace generation environments don't permit direct access to VFS, 334 * such as during a context switch where sleeping is not allowed. Under these 335 * circumstances, queue a request to the thread to be written asynchronously 336 * later. 337 */ 338 static void 339 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 340 { 341 342 mtx_lock(&ktrace_mtx); 343 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 344 mtx_unlock(&ktrace_mtx); 345 } 346 347 /* 348 * Drain any pending ktrace records from the per-thread queue to disk. This 349 * is used both internally before committing other records, and also on 350 * system call return. We drain all the ones we can find at the time when 351 * drain is requested, but don't keep draining after that as those events 352 * may be approximately "after" the current event. 353 */ 354 static void 355 ktr_drain(struct thread *td) 356 { 357 struct ktr_request *queued_req; 358 STAILQ_HEAD(, ktr_request) local_queue; 359 360 ktrace_assert(td); 361 sx_assert(&ktrace_sx, SX_XLOCKED); 362 363 STAILQ_INIT(&local_queue); 364 365 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 366 mtx_lock(&ktrace_mtx); 367 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 368 mtx_unlock(&ktrace_mtx); 369 370 while ((queued_req = STAILQ_FIRST(&local_queue))) { 371 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 372 ktr_writerequest(td, queued_req); 373 ktr_freerequest(queued_req); 374 } 375 } 376 } 377 378 /* 379 * Submit a trace record for immediate commit to disk -- to be used only 380 * where entering VFS is OK. First drain any pending records that may have 381 * been cached in the thread. 382 */ 383 static void 384 ktr_submitrequest(struct thread *td, struct ktr_request *req) 385 { 386 387 ktrace_assert(td); 388 389 sx_xlock(&ktrace_sx); 390 ktr_drain(td); 391 ktr_writerequest(td, req); 392 ktr_freerequest(req); 393 sx_xunlock(&ktrace_sx); 394 ktrace_exit(td); 395 } 396 397 static void 398 ktr_freerequest(struct ktr_request *req) 399 { 400 401 mtx_lock(&ktrace_mtx); 402 ktr_freerequest_locked(req); 403 mtx_unlock(&ktrace_mtx); 404 } 405 406 static void 407 ktr_freerequest_locked(struct ktr_request *req) 408 { 409 410 mtx_assert(&ktrace_mtx, MA_OWNED); 411 if (req->ktr_buffer != NULL) 412 free(req->ktr_buffer, M_KTRACE); 413 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 414 } 415 416 /* 417 * Disable tracing for a process and release all associated resources. 418 * The caller is responsible for releasing a reference on the returned 419 * vnode and credentials. 420 */ 421 static void 422 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 423 { 424 struct ktr_request *req; 425 426 PROC_LOCK_ASSERT(p, MA_OWNED); 427 mtx_assert(&ktrace_mtx, MA_OWNED); 428 *uc = p->p_tracecred; 429 p->p_tracecred = NULL; 430 if (vp != NULL) 431 *vp = p->p_tracevp; 432 p->p_tracevp = NULL; 433 p->p_traceflag = 0; 434 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 435 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 436 ktr_freerequest_locked(req); 437 } 438 } 439 440 void 441 ktrsyscall(code, narg, args) 442 int code, narg; 443 register_t args[]; 444 { 445 struct ktr_request *req; 446 struct ktr_syscall *ktp; 447 size_t buflen; 448 char *buf = NULL; 449 450 buflen = sizeof(register_t) * narg; 451 if (buflen > 0) { 452 buf = malloc(buflen, M_KTRACE, M_WAITOK); 453 bcopy(args, buf, buflen); 454 } 455 req = ktr_getrequest(KTR_SYSCALL); 456 if (req == NULL) { 457 if (buf != NULL) 458 free(buf, M_KTRACE); 459 return; 460 } 461 ktp = &req->ktr_data.ktr_syscall; 462 ktp->ktr_code = code; 463 ktp->ktr_narg = narg; 464 if (buflen > 0) { 465 req->ktr_header.ktr_len = buflen; 466 req->ktr_buffer = buf; 467 } 468 ktr_submitrequest(curthread, req); 469 } 470 471 void 472 ktrsysret(code, error, retval) 473 int code, error; 474 register_t retval; 475 { 476 struct ktr_request *req; 477 struct ktr_sysret *ktp; 478 479 req = ktr_getrequest(KTR_SYSRET); 480 if (req == NULL) 481 return; 482 ktp = &req->ktr_data.ktr_sysret; 483 ktp->ktr_code = code; 484 ktp->ktr_error = error; 485 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 486 ktr_submitrequest(curthread, req); 487 } 488 489 /* 490 * When a setuid process execs, disable tracing. 491 * 492 * XXX: We toss any pending asynchronous records. 493 */ 494 void 495 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 496 { 497 498 PROC_LOCK_ASSERT(p, MA_OWNED); 499 mtx_lock(&ktrace_mtx); 500 ktr_freeproc(p, uc, vp); 501 mtx_unlock(&ktrace_mtx); 502 } 503 504 /* 505 * When a process exits, drain per-process asynchronous trace records 506 * and disable tracing. 507 */ 508 void 509 ktrprocexit(struct thread *td) 510 { 511 struct ktr_request *req; 512 struct proc *p; 513 struct ucred *cred; 514 struct vnode *vp; 515 int vfslocked; 516 517 p = td->td_proc; 518 if (p->p_traceflag == 0) 519 return; 520 521 ktrace_enter(td); 522 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 523 if (req != NULL) 524 ktr_enqueuerequest(td, req); 525 sx_xlock(&ktrace_sx); 526 ktr_drain(td); 527 sx_xunlock(&ktrace_sx); 528 PROC_LOCK(p); 529 mtx_lock(&ktrace_mtx); 530 ktr_freeproc(p, &cred, &vp); 531 mtx_unlock(&ktrace_mtx); 532 PROC_UNLOCK(p); 533 if (vp != NULL) { 534 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 535 vrele(vp); 536 VFS_UNLOCK_GIANT(vfslocked); 537 } 538 if (cred != NULL) 539 crfree(cred); 540 ktrace_exit(td); 541 } 542 543 static void 544 ktrprocctor_entered(struct thread *td, struct proc *p) 545 { 546 struct ktr_proc_ctor *ktp; 547 struct ktr_request *req; 548 struct thread *td2; 549 550 ktrace_assert(td); 551 td2 = FIRST_THREAD_IN_PROC(p); 552 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 553 if (req == NULL) 554 return; 555 ktp = &req->ktr_data.ktr_proc_ctor; 556 ktp->sv_flags = p->p_sysent->sv_flags; 557 ktr_enqueuerequest(td2, req); 558 } 559 560 void 561 ktrprocctor(struct proc *p) 562 { 563 struct thread *td = curthread; 564 565 if ((p->p_traceflag & KTRFAC_MASK) == 0) 566 return; 567 568 ktrace_enter(td); 569 ktrprocctor_entered(td, p); 570 ktrace_exit(td); 571 } 572 573 /* 574 * When a process forks, enable tracing in the new process if needed. 575 */ 576 void 577 ktrprocfork(struct proc *p1, struct proc *p2) 578 { 579 580 PROC_LOCK(p1); 581 mtx_lock(&ktrace_mtx); 582 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 583 if (p1->p_traceflag & KTRFAC_INHERIT) { 584 p2->p_traceflag = p1->p_traceflag; 585 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 586 VREF(p2->p_tracevp); 587 KASSERT(p1->p_tracecred != NULL, 588 ("ktrace vnode with no cred")); 589 p2->p_tracecred = crhold(p1->p_tracecred); 590 } 591 } 592 mtx_unlock(&ktrace_mtx); 593 PROC_UNLOCK(p1); 594 595 ktrprocctor(p2); 596 } 597 598 /* 599 * When a thread returns, drain any asynchronous records generated by the 600 * system call. 601 */ 602 void 603 ktruserret(struct thread *td) 604 { 605 606 ktrace_enter(td); 607 sx_xlock(&ktrace_sx); 608 ktr_drain(td); 609 sx_xunlock(&ktrace_sx); 610 ktrace_exit(td); 611 } 612 613 void 614 ktrnamei(path) 615 char *path; 616 { 617 struct ktr_request *req; 618 int namelen; 619 char *buf = NULL; 620 621 namelen = strlen(path); 622 if (namelen > 0) { 623 buf = malloc(namelen, M_KTRACE, M_WAITOK); 624 bcopy(path, buf, namelen); 625 } 626 req = ktr_getrequest(KTR_NAMEI); 627 if (req == NULL) { 628 if (buf != NULL) 629 free(buf, M_KTRACE); 630 return; 631 } 632 if (namelen > 0) { 633 req->ktr_header.ktr_len = namelen; 634 req->ktr_buffer = buf; 635 } 636 ktr_submitrequest(curthread, req); 637 } 638 639 void 640 ktrsysctl(name, namelen) 641 int *name; 642 u_int namelen; 643 { 644 struct ktr_request *req; 645 u_int mib[CTL_MAXNAME + 2]; 646 char *mibname; 647 size_t mibnamelen; 648 int error; 649 650 /* Lookup name of mib. */ 651 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 652 mib[0] = 0; 653 mib[1] = 1; 654 bcopy(name, mib + 2, namelen * sizeof(*name)); 655 mibnamelen = 128; 656 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 657 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 658 NULL, 0, &mibnamelen, 0); 659 if (error) { 660 free(mibname, M_KTRACE); 661 return; 662 } 663 req = ktr_getrequest(KTR_SYSCTL); 664 if (req == NULL) { 665 free(mibname, M_KTRACE); 666 return; 667 } 668 req->ktr_header.ktr_len = mibnamelen; 669 req->ktr_buffer = mibname; 670 ktr_submitrequest(curthread, req); 671 } 672 673 void 674 ktrgenio(fd, rw, uio, error) 675 int fd; 676 enum uio_rw rw; 677 struct uio *uio; 678 int error; 679 { 680 struct ktr_request *req; 681 struct ktr_genio *ktg; 682 int datalen; 683 char *buf; 684 685 if (error) { 686 free(uio, M_IOV); 687 return; 688 } 689 uio->uio_offset = 0; 690 uio->uio_rw = UIO_WRITE; 691 datalen = MIN(uio->uio_resid, ktr_geniosize); 692 buf = malloc(datalen, M_KTRACE, M_WAITOK); 693 error = uiomove(buf, datalen, uio); 694 free(uio, M_IOV); 695 if (error) { 696 free(buf, M_KTRACE); 697 return; 698 } 699 req = ktr_getrequest(KTR_GENIO); 700 if (req == NULL) { 701 free(buf, M_KTRACE); 702 return; 703 } 704 ktg = &req->ktr_data.ktr_genio; 705 ktg->ktr_fd = fd; 706 ktg->ktr_rw = rw; 707 req->ktr_header.ktr_len = datalen; 708 req->ktr_buffer = buf; 709 ktr_submitrequest(curthread, req); 710 } 711 712 void 713 ktrpsig(sig, action, mask, code) 714 int sig; 715 sig_t action; 716 sigset_t *mask; 717 int code; 718 { 719 struct thread *td = curthread; 720 struct ktr_request *req; 721 struct ktr_psig *kp; 722 723 req = ktr_getrequest(KTR_PSIG); 724 if (req == NULL) 725 return; 726 kp = &req->ktr_data.ktr_psig; 727 kp->signo = (char)sig; 728 kp->action = action; 729 kp->mask = *mask; 730 kp->code = code; 731 ktr_enqueuerequest(td, req); 732 ktrace_exit(td); 733 } 734 735 void 736 ktrcsw(out, user, wmesg) 737 int out, user; 738 const char *wmesg; 739 { 740 struct thread *td = curthread; 741 struct ktr_request *req; 742 struct ktr_csw *kc; 743 744 req = ktr_getrequest(KTR_CSW); 745 if (req == NULL) 746 return; 747 kc = &req->ktr_data.ktr_csw; 748 kc->out = out; 749 kc->user = user; 750 if (wmesg != NULL) 751 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 752 else 753 bzero(kc->wmesg, sizeof(kc->wmesg)); 754 ktr_enqueuerequest(td, req); 755 ktrace_exit(td); 756 } 757 758 void 759 ktrstruct(name, data, datalen) 760 const char *name; 761 void *data; 762 size_t datalen; 763 { 764 struct ktr_request *req; 765 char *buf = NULL; 766 size_t buflen; 767 768 if (!data) 769 datalen = 0; 770 buflen = strlen(name) + 1 + datalen; 771 buf = malloc(buflen, M_KTRACE, M_WAITOK); 772 strcpy(buf, name); 773 bcopy(data, buf + strlen(name) + 1, datalen); 774 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 775 free(buf, M_KTRACE); 776 return; 777 } 778 req->ktr_buffer = buf; 779 req->ktr_header.ktr_len = buflen; 780 ktr_submitrequest(curthread, req); 781 } 782 783 void 784 ktrcapfail(type, needed, held) 785 enum ktr_cap_fail_type type; 786 cap_rights_t needed; 787 cap_rights_t held; 788 { 789 struct thread *td = curthread; 790 struct ktr_request *req; 791 struct ktr_cap_fail *kcf; 792 793 req = ktr_getrequest(KTR_CAPFAIL); 794 if (req == NULL) 795 return; 796 kcf = &req->ktr_data.ktr_cap_fail; 797 kcf->cap_type = type; 798 kcf->cap_needed = needed; 799 kcf->cap_held = held; 800 ktr_enqueuerequest(td, req); 801 ktrace_exit(td); 802 } 803 804 void 805 ktrfault(vaddr, type) 806 vm_offset_t vaddr; 807 int type; 808 { 809 struct thread *td = curthread; 810 struct ktr_request *req; 811 struct ktr_fault *kf; 812 813 req = ktr_getrequest(KTR_FAULT); 814 if (req == NULL) 815 return; 816 kf = &req->ktr_data.ktr_fault; 817 kf->vaddr = vaddr; 818 kf->type = type; 819 ktr_enqueuerequest(td, req); 820 ktrace_exit(td); 821 } 822 823 void 824 ktrfaultend(result) 825 int result; 826 { 827 struct thread *td = curthread; 828 struct ktr_request *req; 829 struct ktr_faultend *kf; 830 831 req = ktr_getrequest(KTR_FAULTEND); 832 if (req == NULL) 833 return; 834 kf = &req->ktr_data.ktr_faultend; 835 kf->result = result; 836 ktr_enqueuerequest(td, req); 837 ktrace_exit(td); 838 } 839 #endif /* KTRACE */ 840 841 /* Interface and common routines */ 842 843 #ifndef _SYS_SYSPROTO_H_ 844 struct ktrace_args { 845 char *fname; 846 int ops; 847 int facs; 848 int pid; 849 }; 850 #endif 851 /* ARGSUSED */ 852 int 853 sys_ktrace(td, uap) 854 struct thread *td; 855 register struct ktrace_args *uap; 856 { 857 #ifdef KTRACE 858 register struct vnode *vp = NULL; 859 register struct proc *p; 860 struct pgrp *pg; 861 int facs = uap->facs & ~KTRFAC_ROOT; 862 int ops = KTROP(uap->ops); 863 int descend = uap->ops & KTRFLAG_DESCEND; 864 int nfound, ret = 0; 865 int flags, error = 0, vfslocked; 866 struct nameidata nd; 867 struct ucred *cred; 868 869 /* 870 * Need something to (un)trace. 871 */ 872 if (ops != KTROP_CLEARFILE && facs == 0) 873 return (EINVAL); 874 875 ktrace_enter(td); 876 if (ops != KTROP_CLEAR) { 877 /* 878 * an operation which requires a file argument. 879 */ 880 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 881 uap->fname, td); 882 flags = FREAD | FWRITE | O_NOFOLLOW; 883 error = vn_open(&nd, &flags, 0, NULL); 884 if (error) { 885 ktrace_exit(td); 886 return (error); 887 } 888 vfslocked = NDHASGIANT(&nd); 889 NDFREE(&nd, NDF_ONLY_PNBUF); 890 vp = nd.ni_vp; 891 VOP_UNLOCK(vp, 0); 892 if (vp->v_type != VREG) { 893 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 894 VFS_UNLOCK_GIANT(vfslocked); 895 ktrace_exit(td); 896 return (EACCES); 897 } 898 VFS_UNLOCK_GIANT(vfslocked); 899 } 900 /* 901 * Clear all uses of the tracefile. 902 */ 903 if (ops == KTROP_CLEARFILE) { 904 int vrele_count; 905 906 vrele_count = 0; 907 sx_slock(&allproc_lock); 908 FOREACH_PROC_IN_SYSTEM(p) { 909 PROC_LOCK(p); 910 if (p->p_tracevp == vp) { 911 if (ktrcanset(td, p)) { 912 mtx_lock(&ktrace_mtx); 913 ktr_freeproc(p, &cred, NULL); 914 mtx_unlock(&ktrace_mtx); 915 vrele_count++; 916 crfree(cred); 917 } else 918 error = EPERM; 919 } 920 PROC_UNLOCK(p); 921 } 922 sx_sunlock(&allproc_lock); 923 if (vrele_count > 0) { 924 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 925 while (vrele_count-- > 0) 926 vrele(vp); 927 VFS_UNLOCK_GIANT(vfslocked); 928 } 929 goto done; 930 } 931 /* 932 * do it 933 */ 934 sx_slock(&proctree_lock); 935 if (uap->pid < 0) { 936 /* 937 * by process group 938 */ 939 pg = pgfind(-uap->pid); 940 if (pg == NULL) { 941 sx_sunlock(&proctree_lock); 942 error = ESRCH; 943 goto done; 944 } 945 /* 946 * ktrops() may call vrele(). Lock pg_members 947 * by the proctree_lock rather than pg_mtx. 948 */ 949 PGRP_UNLOCK(pg); 950 nfound = 0; 951 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 952 PROC_LOCK(p); 953 if (p->p_state == PRS_NEW || 954 p_cansee(td, p) != 0) { 955 PROC_UNLOCK(p); 956 continue; 957 } 958 nfound++; 959 if (descend) 960 ret |= ktrsetchildren(td, p, ops, facs, vp); 961 else 962 ret |= ktrops(td, p, ops, facs, vp); 963 } 964 if (nfound == 0) { 965 sx_sunlock(&proctree_lock); 966 error = ESRCH; 967 goto done; 968 } 969 } else { 970 /* 971 * by pid 972 */ 973 p = pfind(uap->pid); 974 if (p == NULL) 975 error = ESRCH; 976 else 977 error = p_cansee(td, p); 978 if (error) { 979 if (p != NULL) 980 PROC_UNLOCK(p); 981 sx_sunlock(&proctree_lock); 982 goto done; 983 } 984 if (descend) 985 ret |= ktrsetchildren(td, p, ops, facs, vp); 986 else 987 ret |= ktrops(td, p, ops, facs, vp); 988 } 989 sx_sunlock(&proctree_lock); 990 if (!ret) 991 error = EPERM; 992 done: 993 if (vp != NULL) { 994 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 995 (void) vn_close(vp, FWRITE, td->td_ucred, td); 996 VFS_UNLOCK_GIANT(vfslocked); 997 } 998 ktrace_exit(td); 999 return (error); 1000 #else /* !KTRACE */ 1001 return (ENOSYS); 1002 #endif /* KTRACE */ 1003 } 1004 1005 /* ARGSUSED */ 1006 int 1007 sys_utrace(td, uap) 1008 struct thread *td; 1009 register struct utrace_args *uap; 1010 { 1011 1012 #ifdef KTRACE 1013 struct ktr_request *req; 1014 void *cp; 1015 int error; 1016 1017 if (!KTRPOINT(td, KTR_USER)) 1018 return (0); 1019 if (uap->len > KTR_USER_MAXLEN) 1020 return (EINVAL); 1021 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1022 error = copyin(uap->addr, cp, uap->len); 1023 if (error) { 1024 free(cp, M_KTRACE); 1025 return (error); 1026 } 1027 req = ktr_getrequest(KTR_USER); 1028 if (req == NULL) { 1029 free(cp, M_KTRACE); 1030 return (ENOMEM); 1031 } 1032 req->ktr_buffer = cp; 1033 req->ktr_header.ktr_len = uap->len; 1034 ktr_submitrequest(td, req); 1035 return (0); 1036 #else /* !KTRACE */ 1037 return (ENOSYS); 1038 #endif /* KTRACE */ 1039 } 1040 1041 #ifdef KTRACE 1042 static int 1043 ktrops(td, p, ops, facs, vp) 1044 struct thread *td; 1045 struct proc *p; 1046 int ops, facs; 1047 struct vnode *vp; 1048 { 1049 struct vnode *tracevp = NULL; 1050 struct ucred *tracecred = NULL; 1051 1052 PROC_LOCK_ASSERT(p, MA_OWNED); 1053 if (!ktrcanset(td, p)) { 1054 PROC_UNLOCK(p); 1055 return (0); 1056 } 1057 if (p->p_flag & P_WEXIT) { 1058 /* If the process is exiting, just ignore it. */ 1059 PROC_UNLOCK(p); 1060 return (1); 1061 } 1062 mtx_lock(&ktrace_mtx); 1063 if (ops == KTROP_SET) { 1064 if (p->p_tracevp != vp) { 1065 /* 1066 * if trace file already in use, relinquish below 1067 */ 1068 tracevp = p->p_tracevp; 1069 VREF(vp); 1070 p->p_tracevp = vp; 1071 } 1072 if (p->p_tracecred != td->td_ucred) { 1073 tracecred = p->p_tracecred; 1074 p->p_tracecred = crhold(td->td_ucred); 1075 } 1076 p->p_traceflag |= facs; 1077 if (priv_check(td, PRIV_KTRACE) == 0) 1078 p->p_traceflag |= KTRFAC_ROOT; 1079 } else { 1080 /* KTROP_CLEAR */ 1081 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1082 /* no more tracing */ 1083 ktr_freeproc(p, &tracecred, &tracevp); 1084 } 1085 mtx_unlock(&ktrace_mtx); 1086 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1087 ktrprocctor_entered(td, p); 1088 PROC_UNLOCK(p); 1089 if (tracevp != NULL) { 1090 int vfslocked; 1091 1092 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 1093 vrele(tracevp); 1094 VFS_UNLOCK_GIANT(vfslocked); 1095 } 1096 if (tracecred != NULL) 1097 crfree(tracecred); 1098 1099 return (1); 1100 } 1101 1102 static int 1103 ktrsetchildren(td, top, ops, facs, vp) 1104 struct thread *td; 1105 struct proc *top; 1106 int ops, facs; 1107 struct vnode *vp; 1108 { 1109 register struct proc *p; 1110 register int ret = 0; 1111 1112 p = top; 1113 PROC_LOCK_ASSERT(p, MA_OWNED); 1114 sx_assert(&proctree_lock, SX_LOCKED); 1115 for (;;) { 1116 ret |= ktrops(td, p, ops, facs, vp); 1117 /* 1118 * If this process has children, descend to them next, 1119 * otherwise do any siblings, and if done with this level, 1120 * follow back up the tree (but not past top). 1121 */ 1122 if (!LIST_EMPTY(&p->p_children)) 1123 p = LIST_FIRST(&p->p_children); 1124 else for (;;) { 1125 if (p == top) 1126 return (ret); 1127 if (LIST_NEXT(p, p_sibling)) { 1128 p = LIST_NEXT(p, p_sibling); 1129 break; 1130 } 1131 p = p->p_pptr; 1132 } 1133 PROC_LOCK(p); 1134 } 1135 /*NOTREACHED*/ 1136 } 1137 1138 static void 1139 ktr_writerequest(struct thread *td, struct ktr_request *req) 1140 { 1141 struct ktr_header *kth; 1142 struct vnode *vp; 1143 struct proc *p; 1144 struct ucred *cred; 1145 struct uio auio; 1146 struct iovec aiov[3]; 1147 struct mount *mp; 1148 int datalen, buflen, vrele_count; 1149 int error, vfslocked; 1150 1151 /* 1152 * We hold the vnode and credential for use in I/O in case ktrace is 1153 * disabled on the process as we write out the request. 1154 * 1155 * XXXRW: This is not ideal: we could end up performing a write after 1156 * the vnode has been closed. 1157 */ 1158 mtx_lock(&ktrace_mtx); 1159 vp = td->td_proc->p_tracevp; 1160 cred = td->td_proc->p_tracecred; 1161 1162 /* 1163 * If vp is NULL, the vp has been cleared out from under this 1164 * request, so just drop it. Make sure the credential and vnode are 1165 * in sync: we should have both or neither. 1166 */ 1167 if (vp == NULL) { 1168 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1169 mtx_unlock(&ktrace_mtx); 1170 return; 1171 } 1172 VREF(vp); 1173 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1174 crhold(cred); 1175 mtx_unlock(&ktrace_mtx); 1176 1177 kth = &req->ktr_header; 1178 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1179 sizeof(data_lengths) / sizeof(data_lengths[0]), 1180 ("data_lengths array overflow")); 1181 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1182 buflen = kth->ktr_len; 1183 auio.uio_iov = &aiov[0]; 1184 auio.uio_offset = 0; 1185 auio.uio_segflg = UIO_SYSSPACE; 1186 auio.uio_rw = UIO_WRITE; 1187 aiov[0].iov_base = (caddr_t)kth; 1188 aiov[0].iov_len = sizeof(struct ktr_header); 1189 auio.uio_resid = sizeof(struct ktr_header); 1190 auio.uio_iovcnt = 1; 1191 auio.uio_td = td; 1192 if (datalen != 0) { 1193 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1194 aiov[1].iov_len = datalen; 1195 auio.uio_resid += datalen; 1196 auio.uio_iovcnt++; 1197 kth->ktr_len += datalen; 1198 } 1199 if (buflen != 0) { 1200 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1201 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1202 aiov[auio.uio_iovcnt].iov_len = buflen; 1203 auio.uio_resid += buflen; 1204 auio.uio_iovcnt++; 1205 } 1206 1207 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1208 vn_start_write(vp, &mp, V_WAIT); 1209 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1210 #ifdef MAC 1211 error = mac_vnode_check_write(cred, NOCRED, vp); 1212 if (error == 0) 1213 #endif 1214 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1215 VOP_UNLOCK(vp, 0); 1216 vn_finished_write(mp); 1217 crfree(cred); 1218 if (!error) { 1219 vrele(vp); 1220 VFS_UNLOCK_GIANT(vfslocked); 1221 return; 1222 } 1223 VFS_UNLOCK_GIANT(vfslocked); 1224 1225 /* 1226 * If error encountered, give up tracing on this vnode. We defer 1227 * all the vrele()'s on the vnode until after we are finished walking 1228 * the various lists to avoid needlessly holding locks. 1229 * NB: at this point we still hold the vnode reference that must 1230 * not go away as we need the valid vnode to compare with. Thus let 1231 * vrele_count start at 1 and the reference will be freed 1232 * by the loop at the end after our last use of vp. 1233 */ 1234 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1235 error); 1236 vrele_count = 1; 1237 /* 1238 * First, clear this vnode from being used by any processes in the 1239 * system. 1240 * XXX - If one process gets an EPERM writing to the vnode, should 1241 * we really do this? Other processes might have suitable 1242 * credentials for the operation. 1243 */ 1244 cred = NULL; 1245 sx_slock(&allproc_lock); 1246 FOREACH_PROC_IN_SYSTEM(p) { 1247 PROC_LOCK(p); 1248 if (p->p_tracevp == vp) { 1249 mtx_lock(&ktrace_mtx); 1250 ktr_freeproc(p, &cred, NULL); 1251 mtx_unlock(&ktrace_mtx); 1252 vrele_count++; 1253 } 1254 PROC_UNLOCK(p); 1255 if (cred != NULL) { 1256 crfree(cred); 1257 cred = NULL; 1258 } 1259 } 1260 sx_sunlock(&allproc_lock); 1261 1262 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1263 while (vrele_count-- > 0) 1264 vrele(vp); 1265 VFS_UNLOCK_GIANT(vfslocked); 1266 } 1267 1268 /* 1269 * Return true if caller has permission to set the ktracing state 1270 * of target. Essentially, the target can't possess any 1271 * more permissions than the caller. KTRFAC_ROOT signifies that 1272 * root previously set the tracing status on the target process, and 1273 * so, only root may further change it. 1274 */ 1275 static int 1276 ktrcanset(td, targetp) 1277 struct thread *td; 1278 struct proc *targetp; 1279 { 1280 1281 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1282 if (targetp->p_traceflag & KTRFAC_ROOT && 1283 priv_check(td, PRIV_KTRACE)) 1284 return (0); 1285 1286 if (p_candebug(td, targetp) != 0) 1287 return (0); 1288 1289 return (1); 1290 } 1291 1292 #endif /* KTRACE */ 1293