1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ktrace.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/fcntl.h> 42 #include <sys/kernel.h> 43 #include <sys/kthread.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/namei.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/unistd.h> 52 #include <sys/vnode.h> 53 #include <sys/socket.h> 54 #include <sys/stat.h> 55 #include <sys/ktrace.h> 56 #include <sys/sx.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/syslog.h> 60 #include <sys/sysproto.h> 61 62 #include <security/mac/mac_framework.h> 63 64 /* 65 * The ktrace facility allows the tracing of certain key events in user space 66 * processes, such as system calls, signal delivery, context switches, and 67 * user generated events using utrace(2). It works by streaming event 68 * records and data to a vnode associated with the process using the 69 * ktrace(2) system call. In general, records can be written directly from 70 * the context that generates the event. One important exception to this is 71 * during a context switch, where sleeping is not permitted. To handle this 72 * case, trace events are generated using in-kernel ktr_request records, and 73 * then delivered to disk at a convenient moment -- either immediately, the 74 * next traceable event, at system call return, or at process exit. 75 * 76 * When dealing with multiple threads or processes writing to the same event 77 * log, ordering guarantees are weak: specifically, if an event has multiple 78 * records (i.e., system call enter and return), they may be interlaced with 79 * records from another event. Process and thread ID information is provided 80 * in the record, and user applications can de-interlace events if required. 81 */ 82 83 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 84 85 #ifdef KTRACE 86 87 FEATURE(ktrace, "Kernel support for system-call tracing"); 88 89 #ifndef KTRACE_REQUEST_POOL 90 #define KTRACE_REQUEST_POOL 100 91 #endif 92 93 struct ktr_request { 94 struct ktr_header ktr_header; 95 void *ktr_buffer; 96 union { 97 struct ktr_proc_ctor ktr_proc_ctor; 98 struct ktr_syscall ktr_syscall; 99 struct ktr_sysret ktr_sysret; 100 struct ktr_genio ktr_genio; 101 struct ktr_psig ktr_psig; 102 struct ktr_csw ktr_csw; 103 } ktr_data; 104 STAILQ_ENTRY(ktr_request) ktr_list; 105 }; 106 107 static int data_lengths[] = { 108 0, /* none */ 109 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 110 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 111 0, /* KTR_NAMEI */ 112 sizeof(struct ktr_genio), /* KTR_GENIO */ 113 sizeof(struct ktr_psig), /* KTR_PSIG */ 114 sizeof(struct ktr_csw), /* KTR_CSW */ 115 0, /* KTR_USER */ 116 0, /* KTR_STRUCT */ 117 0, /* KTR_SYSCTL */ 118 sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 119 0, /* KTR_PROCDTOR */ 120 }; 121 122 static STAILQ_HEAD(, ktr_request) ktr_free; 123 124 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 125 126 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 127 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 128 129 static u_int ktr_geniosize = PAGE_SIZE; 130 TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 131 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 132 0, "Maximum size of genio event payload"); 133 134 static int print_message = 1; 135 static struct mtx ktrace_mtx; 136 static struct sx ktrace_sx; 137 138 static void ktrace_init(void *dummy); 139 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 140 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 141 static struct ktr_request *ktr_getrequest_ne(struct thread *, int type); 142 static struct ktr_request *ktr_getrequest(int type); 143 static void ktr_submitrequest_ne(struct thread *td, struct ktr_request *req); 144 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 145 static void ktr_freeproc(struct proc *p, struct ucred **uc, 146 struct vnode **vp); 147 static void ktr_freerequest(struct ktr_request *req); 148 static void ktr_freerequest_locked(struct ktr_request *req); 149 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 150 static int ktrcanset(struct thread *,struct proc *); 151 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 152 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 153 static void ktrprocctor_ne(struct thread *, struct proc *p); 154 155 /* 156 * ktrace itself generates events, such as context switches, which we do not 157 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 158 * whether or not it is in a region where tracing of events should be 159 * suppressed. 160 */ 161 static void 162 ktrace_enter(struct thread *td) 163 { 164 165 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 166 td->td_pflags |= TDP_INKTRACE; 167 } 168 169 static void 170 ktrace_exit(struct thread *td) 171 { 172 173 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 174 td->td_pflags &= ~TDP_INKTRACE; 175 } 176 177 static void 178 ktrace_assert(struct thread *td) 179 { 180 181 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 182 } 183 184 static void 185 ktrace_init(void *dummy) 186 { 187 struct ktr_request *req; 188 int i; 189 190 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 191 sx_init(&ktrace_sx, "ktrace_sx"); 192 STAILQ_INIT(&ktr_free); 193 for (i = 0; i < ktr_requestpool; i++) { 194 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 195 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 196 } 197 } 198 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 199 200 static int 201 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 202 { 203 struct thread *td; 204 u_int newsize, oldsize, wantsize; 205 int error; 206 207 /* Handle easy read-only case first to avoid warnings from GCC. */ 208 if (!req->newptr) { 209 oldsize = ktr_requestpool; 210 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 211 } 212 213 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 214 if (error) 215 return (error); 216 td = curthread; 217 ktrace_enter(td); 218 oldsize = ktr_requestpool; 219 newsize = ktrace_resize_pool(oldsize, wantsize); 220 ktrace_exit(td); 221 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 222 if (error) 223 return (error); 224 if (wantsize > oldsize && newsize < wantsize) 225 return (ENOSPC); 226 return (0); 227 } 228 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 229 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 230 "Pool buffer size for ktrace(1)"); 231 232 static u_int 233 ktrace_resize_pool(u_int oldsize, u_int newsize) 234 { 235 STAILQ_HEAD(, ktr_request) ktr_new; 236 struct ktr_request *req; 237 int bound; 238 239 print_message = 1; 240 bound = newsize - oldsize; 241 if (bound == 0) 242 return (ktr_requestpool); 243 if (bound < 0) { 244 mtx_lock(&ktrace_mtx); 245 /* Shrink pool down to newsize if possible. */ 246 while (bound++ < 0) { 247 req = STAILQ_FIRST(&ktr_free); 248 if (req == NULL) 249 break; 250 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 251 ktr_requestpool--; 252 free(req, M_KTRACE); 253 } 254 } else { 255 /* Grow pool up to newsize. */ 256 STAILQ_INIT(&ktr_new); 257 while (bound-- > 0) { 258 req = malloc(sizeof(struct ktr_request), M_KTRACE, 259 M_WAITOK); 260 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 261 } 262 mtx_lock(&ktrace_mtx); 263 STAILQ_CONCAT(&ktr_free, &ktr_new); 264 ktr_requestpool += (newsize - oldsize); 265 } 266 mtx_unlock(&ktrace_mtx); 267 return (ktr_requestpool); 268 } 269 270 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 271 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 272 (sizeof((struct thread *)NULL)->td_name)); 273 274 static struct ktr_request * 275 ktr_getrequest_ne(struct thread *td, int type) 276 { 277 struct ktr_request *req; 278 struct proc *p = td->td_proc; 279 int pm; 280 281 mtx_lock(&ktrace_mtx); 282 if (!KTRCHECK(td, type)) { 283 mtx_unlock(&ktrace_mtx); 284 return (NULL); 285 } 286 req = STAILQ_FIRST(&ktr_free); 287 if (req != NULL) { 288 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 289 req->ktr_header.ktr_type = type; 290 if (p->p_traceflag & KTRFAC_DROP) { 291 req->ktr_header.ktr_type |= KTR_DROP; 292 p->p_traceflag &= ~KTRFAC_DROP; 293 } 294 mtx_unlock(&ktrace_mtx); 295 microtime(&req->ktr_header.ktr_time); 296 req->ktr_header.ktr_pid = p->p_pid; 297 req->ktr_header.ktr_tid = td->td_tid; 298 bcopy(td->td_name, req->ktr_header.ktr_comm, 299 sizeof(req->ktr_header.ktr_comm)); 300 req->ktr_buffer = NULL; 301 req->ktr_header.ktr_len = 0; 302 } else { 303 p->p_traceflag |= KTRFAC_DROP; 304 pm = print_message; 305 print_message = 0; 306 mtx_unlock(&ktrace_mtx); 307 if (pm) 308 printf("Out of ktrace request objects.\n"); 309 } 310 return (req); 311 } 312 313 static struct ktr_request * 314 ktr_getrequest(int type) 315 { 316 struct thread *td = curthread; 317 struct ktr_request *req; 318 319 ktrace_enter(td); 320 req = ktr_getrequest_ne(td, type); 321 if (req == NULL) 322 ktrace_exit(td); 323 324 return (req); 325 } 326 327 /* 328 * Some trace generation environments don't permit direct access to VFS, 329 * such as during a context switch where sleeping is not allowed. Under these 330 * circumstances, queue a request to the thread to be written asynchronously 331 * later. 332 */ 333 static void 334 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 335 { 336 337 mtx_lock(&ktrace_mtx); 338 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 339 mtx_unlock(&ktrace_mtx); 340 ktrace_exit(td); 341 } 342 343 /* 344 * Drain any pending ktrace records from the per-thread queue to disk. This 345 * is used both internally before committing other records, and also on 346 * system call return. We drain all the ones we can find at the time when 347 * drain is requested, but don't keep draining after that as those events 348 * may be approximately "after" the current event. 349 */ 350 static void 351 ktr_drain(struct thread *td) 352 { 353 struct ktr_request *queued_req; 354 STAILQ_HEAD(, ktr_request) local_queue; 355 356 ktrace_assert(td); 357 sx_assert(&ktrace_sx, SX_XLOCKED); 358 359 STAILQ_INIT(&local_queue); 360 361 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 362 mtx_lock(&ktrace_mtx); 363 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 364 mtx_unlock(&ktrace_mtx); 365 366 while ((queued_req = STAILQ_FIRST(&local_queue))) { 367 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 368 ktr_writerequest(td, queued_req); 369 ktr_freerequest(queued_req); 370 } 371 } 372 } 373 374 /* 375 * Submit a trace record for immediate commit to disk -- to be used only 376 * where entering VFS is OK. First drain any pending records that may have 377 * been cached in the thread. 378 */ 379 static void 380 ktr_submitrequest_ne(struct thread *td, struct ktr_request *req) 381 { 382 383 ktrace_assert(td); 384 385 sx_xlock(&ktrace_sx); 386 ktr_drain(td); 387 ktr_writerequest(td, req); 388 ktr_freerequest(req); 389 sx_xunlock(&ktrace_sx); 390 } 391 392 static void 393 ktr_submitrequest(struct thread *td, struct ktr_request *req) 394 { 395 396 ktrace_assert(td); 397 ktr_submitrequest_ne(td, req); 398 ktrace_exit(td); 399 } 400 401 static void 402 ktr_freerequest(struct ktr_request *req) 403 { 404 405 mtx_lock(&ktrace_mtx); 406 ktr_freerequest_locked(req); 407 mtx_unlock(&ktrace_mtx); 408 } 409 410 static void 411 ktr_freerequest_locked(struct ktr_request *req) 412 { 413 414 mtx_assert(&ktrace_mtx, MA_OWNED); 415 if (req->ktr_buffer != NULL) 416 free(req->ktr_buffer, M_KTRACE); 417 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 418 } 419 420 /* 421 * Disable tracing for a process and release all associated resources. 422 * The caller is responsible for releasing a reference on the returned 423 * vnode and credentials. 424 */ 425 static void 426 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 427 { 428 struct ktr_request *req; 429 430 PROC_LOCK_ASSERT(p, MA_OWNED); 431 mtx_assert(&ktrace_mtx, MA_OWNED); 432 *uc = p->p_tracecred; 433 p->p_tracecred = NULL; 434 if (vp != NULL) 435 *vp = p->p_tracevp; 436 p->p_tracevp = NULL; 437 p->p_traceflag = 0; 438 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 439 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 440 ktr_freerequest_locked(req); 441 } 442 } 443 444 void 445 ktrsyscall(code, narg, args) 446 int code, narg; 447 register_t args[]; 448 { 449 struct ktr_request *req; 450 struct ktr_syscall *ktp; 451 size_t buflen; 452 char *buf = NULL; 453 454 buflen = sizeof(register_t) * narg; 455 if (buflen > 0) { 456 buf = malloc(buflen, M_KTRACE, M_WAITOK); 457 bcopy(args, buf, buflen); 458 } 459 req = ktr_getrequest(KTR_SYSCALL); 460 if (req == NULL) { 461 if (buf != NULL) 462 free(buf, M_KTRACE); 463 return; 464 } 465 ktp = &req->ktr_data.ktr_syscall; 466 ktp->ktr_code = code; 467 ktp->ktr_narg = narg; 468 if (buflen > 0) { 469 req->ktr_header.ktr_len = buflen; 470 req->ktr_buffer = buf; 471 } 472 ktr_submitrequest(curthread, req); 473 } 474 475 void 476 ktrsysret(code, error, retval) 477 int code, error; 478 register_t retval; 479 { 480 struct ktr_request *req; 481 struct ktr_sysret *ktp; 482 483 req = ktr_getrequest(KTR_SYSRET); 484 if (req == NULL) 485 return; 486 ktp = &req->ktr_data.ktr_sysret; 487 ktp->ktr_code = code; 488 ktp->ktr_error = error; 489 ktp->ktr_retval = retval; /* what about val2 ? */ 490 ktr_submitrequest(curthread, req); 491 } 492 493 /* 494 * When a setuid process execs, disable tracing. 495 * 496 * XXX: We toss any pending asynchronous records. 497 */ 498 void 499 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 500 { 501 502 PROC_LOCK_ASSERT(p, MA_OWNED); 503 mtx_lock(&ktrace_mtx); 504 ktr_freeproc(p, uc, vp); 505 mtx_unlock(&ktrace_mtx); 506 } 507 508 /* 509 * When a process exits, drain per-process asynchronous trace records 510 * and disable tracing. 511 */ 512 void 513 ktrprocexit(struct thread *td) 514 { 515 struct ktr_request *req; 516 struct proc *p; 517 struct ucred *cred; 518 struct vnode *vp; 519 int vfslocked; 520 521 p = td->td_proc; 522 if (p->p_traceflag == 0) 523 return; 524 525 ktrace_enter(td); 526 sx_xlock(&ktrace_sx); 527 ktr_drain(td); 528 sx_xunlock(&ktrace_sx); 529 req = ktr_getrequest_ne(td, KTR_PROCDTOR); 530 if (req != NULL) 531 ktr_submitrequest_ne(td, req); 532 PROC_LOCK(p); 533 mtx_lock(&ktrace_mtx); 534 ktr_freeproc(p, &cred, &vp); 535 mtx_unlock(&ktrace_mtx); 536 PROC_UNLOCK(p); 537 if (vp != NULL) { 538 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 539 vrele(vp); 540 VFS_UNLOCK_GIANT(vfslocked); 541 } 542 if (cred != NULL) 543 crfree(cred); 544 ktrace_exit(td); 545 } 546 547 static void 548 ktrprocctor_ne(struct thread *td, struct proc *p) 549 { 550 struct ktr_proc_ctor *ktp; 551 struct ktr_request *req; 552 struct thread *td2; 553 554 ktrace_assert(td); 555 td2 = FIRST_THREAD_IN_PROC(p); 556 req = ktr_getrequest_ne(td2, KTR_PROCCTOR); 557 if (req == NULL) 558 return; 559 560 ktp = &req->ktr_data.ktr_proc_ctor; 561 ktp->sv_flags = p->p_sysent->sv_flags; 562 ktr_submitrequest_ne(td, req); 563 } 564 565 void 566 ktrprocctor(struct proc *p) 567 { 568 struct thread *td = curthread; 569 570 if ((p->p_traceflag & KTRFAC_MASK) == 0) 571 return; 572 573 ktrace_enter(td); 574 ktrprocctor_ne(td, p); 575 ktrace_exit(td); 576 } 577 578 /* 579 * When a process forks, enable tracing in the new process if needed. 580 */ 581 void 582 ktrprocfork(struct proc *p1, struct proc *p2) 583 { 584 585 PROC_LOCK(p1); 586 mtx_lock(&ktrace_mtx); 587 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 588 if (p1->p_traceflag & KTRFAC_INHERIT) { 589 p2->p_traceflag = p1->p_traceflag; 590 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 591 VREF(p2->p_tracevp); 592 KASSERT(p1->p_tracecred != NULL, 593 ("ktrace vnode with no cred")); 594 p2->p_tracecred = crhold(p1->p_tracecred); 595 } 596 } 597 mtx_unlock(&ktrace_mtx); 598 PROC_UNLOCK(p1); 599 600 ktrprocctor(p2); 601 } 602 603 /* 604 * When a thread returns, drain any asynchronous records generated by the 605 * system call. 606 */ 607 void 608 ktruserret(struct thread *td) 609 { 610 611 ktrace_enter(td); 612 sx_xlock(&ktrace_sx); 613 ktr_drain(td); 614 sx_xunlock(&ktrace_sx); 615 ktrace_exit(td); 616 } 617 618 void 619 ktrnamei(path) 620 char *path; 621 { 622 struct ktr_request *req; 623 int namelen; 624 char *buf = NULL; 625 626 namelen = strlen(path); 627 if (namelen > 0) { 628 buf = malloc(namelen, M_KTRACE, M_WAITOK); 629 bcopy(path, buf, namelen); 630 } 631 req = ktr_getrequest(KTR_NAMEI); 632 if (req == NULL) { 633 if (buf != NULL) 634 free(buf, M_KTRACE); 635 return; 636 } 637 if (namelen > 0) { 638 req->ktr_header.ktr_len = namelen; 639 req->ktr_buffer = buf; 640 } 641 ktr_submitrequest(curthread, req); 642 } 643 644 void 645 ktrsysctl(name, namelen) 646 int *name; 647 u_int namelen; 648 { 649 struct ktr_request *req; 650 u_int mib[CTL_MAXNAME + 2]; 651 char *mibname; 652 size_t mibnamelen; 653 int error; 654 655 /* Lookup name of mib. */ 656 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 657 mib[0] = 0; 658 mib[1] = 1; 659 bcopy(name, mib + 2, namelen * sizeof(*name)); 660 mibnamelen = 128; 661 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 662 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 663 NULL, 0, &mibnamelen, 0); 664 if (error) { 665 free(mibname, M_KTRACE); 666 return; 667 } 668 req = ktr_getrequest(KTR_SYSCTL); 669 if (req == NULL) { 670 free(mibname, M_KTRACE); 671 return; 672 } 673 req->ktr_header.ktr_len = mibnamelen; 674 req->ktr_buffer = mibname; 675 ktr_submitrequest(curthread, req); 676 } 677 678 void 679 ktrgenio(fd, rw, uio, error) 680 int fd; 681 enum uio_rw rw; 682 struct uio *uio; 683 int error; 684 { 685 struct ktr_request *req; 686 struct ktr_genio *ktg; 687 int datalen; 688 char *buf; 689 690 if (error) { 691 free(uio, M_IOV); 692 return; 693 } 694 uio->uio_offset = 0; 695 uio->uio_rw = UIO_WRITE; 696 datalen = imin(uio->uio_resid, ktr_geniosize); 697 buf = malloc(datalen, M_KTRACE, M_WAITOK); 698 error = uiomove(buf, datalen, uio); 699 free(uio, M_IOV); 700 if (error) { 701 free(buf, M_KTRACE); 702 return; 703 } 704 req = ktr_getrequest(KTR_GENIO); 705 if (req == NULL) { 706 free(buf, M_KTRACE); 707 return; 708 } 709 ktg = &req->ktr_data.ktr_genio; 710 ktg->ktr_fd = fd; 711 ktg->ktr_rw = rw; 712 req->ktr_header.ktr_len = datalen; 713 req->ktr_buffer = buf; 714 ktr_submitrequest(curthread, req); 715 } 716 717 void 718 ktrpsig(sig, action, mask, code) 719 int sig; 720 sig_t action; 721 sigset_t *mask; 722 int code; 723 { 724 struct ktr_request *req; 725 struct ktr_psig *kp; 726 727 req = ktr_getrequest(KTR_PSIG); 728 if (req == NULL) 729 return; 730 kp = &req->ktr_data.ktr_psig; 731 kp->signo = (char)sig; 732 kp->action = action; 733 kp->mask = *mask; 734 kp->code = code; 735 ktr_enqueuerequest(curthread, req); 736 } 737 738 void 739 ktrcsw(out, user) 740 int out, user; 741 { 742 struct ktr_request *req; 743 struct ktr_csw *kc; 744 745 req = ktr_getrequest(KTR_CSW); 746 if (req == NULL) 747 return; 748 kc = &req->ktr_data.ktr_csw; 749 kc->out = out; 750 kc->user = user; 751 ktr_enqueuerequest(curthread, req); 752 } 753 754 void 755 ktrstruct(name, data, datalen) 756 const char *name; 757 void *data; 758 size_t datalen; 759 { 760 struct ktr_request *req; 761 char *buf = NULL; 762 size_t buflen; 763 764 if (!data) 765 datalen = 0; 766 buflen = strlen(name) + 1 + datalen; 767 buf = malloc(buflen, M_KTRACE, M_WAITOK); 768 strcpy(buf, name); 769 bcopy(data, buf + strlen(name) + 1, datalen); 770 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 771 free(buf, M_KTRACE); 772 return; 773 } 774 req->ktr_buffer = buf; 775 req->ktr_header.ktr_len = buflen; 776 ktr_submitrequest(curthread, req); 777 } 778 #endif /* KTRACE */ 779 780 /* Interface and common routines */ 781 782 #ifndef _SYS_SYSPROTO_H_ 783 struct ktrace_args { 784 char *fname; 785 int ops; 786 int facs; 787 int pid; 788 }; 789 #endif 790 /* ARGSUSED */ 791 int 792 ktrace(td, uap) 793 struct thread *td; 794 register struct ktrace_args *uap; 795 { 796 #ifdef KTRACE 797 register struct vnode *vp = NULL; 798 register struct proc *p; 799 struct pgrp *pg; 800 int facs = uap->facs & ~KTRFAC_ROOT; 801 int ops = KTROP(uap->ops); 802 int descend = uap->ops & KTRFLAG_DESCEND; 803 int nfound, ret = 0; 804 int flags, error = 0, vfslocked; 805 struct nameidata nd; 806 struct ucred *cred; 807 808 /* 809 * Need something to (un)trace. 810 */ 811 if (ops != KTROP_CLEARFILE && facs == 0) 812 return (EINVAL); 813 814 ktrace_enter(td); 815 if (ops != KTROP_CLEAR) { 816 /* 817 * an operation which requires a file argument. 818 */ 819 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 820 uap->fname, td); 821 flags = FREAD | FWRITE | O_NOFOLLOW; 822 error = vn_open(&nd, &flags, 0, NULL); 823 if (error) { 824 ktrace_exit(td); 825 return (error); 826 } 827 vfslocked = NDHASGIANT(&nd); 828 NDFREE(&nd, NDF_ONLY_PNBUF); 829 vp = nd.ni_vp; 830 VOP_UNLOCK(vp, 0); 831 if (vp->v_type != VREG) { 832 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 833 VFS_UNLOCK_GIANT(vfslocked); 834 ktrace_exit(td); 835 return (EACCES); 836 } 837 VFS_UNLOCK_GIANT(vfslocked); 838 } 839 /* 840 * Clear all uses of the tracefile. 841 */ 842 if (ops == KTROP_CLEARFILE) { 843 int vrele_count; 844 845 vrele_count = 0; 846 sx_slock(&allproc_lock); 847 FOREACH_PROC_IN_SYSTEM(p) { 848 PROC_LOCK(p); 849 if (p->p_tracevp == vp) { 850 if (ktrcanset(td, p)) { 851 mtx_lock(&ktrace_mtx); 852 ktr_freeproc(p, &cred, NULL); 853 mtx_unlock(&ktrace_mtx); 854 vrele_count++; 855 crfree(cred); 856 } else 857 error = EPERM; 858 } 859 PROC_UNLOCK(p); 860 } 861 sx_sunlock(&allproc_lock); 862 if (vrele_count > 0) { 863 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 864 while (vrele_count-- > 0) 865 vrele(vp); 866 VFS_UNLOCK_GIANT(vfslocked); 867 } 868 goto done; 869 } 870 /* 871 * do it 872 */ 873 sx_slock(&proctree_lock); 874 if (uap->pid < 0) { 875 /* 876 * by process group 877 */ 878 pg = pgfind(-uap->pid); 879 if (pg == NULL) { 880 sx_sunlock(&proctree_lock); 881 error = ESRCH; 882 goto done; 883 } 884 /* 885 * ktrops() may call vrele(). Lock pg_members 886 * by the proctree_lock rather than pg_mtx. 887 */ 888 PGRP_UNLOCK(pg); 889 nfound = 0; 890 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 891 PROC_LOCK(p); 892 if (p_cansee(td, p) != 0) { 893 PROC_UNLOCK(p); 894 continue; 895 } 896 nfound++; 897 if (descend) 898 ret |= ktrsetchildren(td, p, ops, facs, vp); 899 else 900 ret |= ktrops(td, p, ops, facs, vp); 901 } 902 if (nfound == 0) { 903 sx_sunlock(&proctree_lock); 904 error = ESRCH; 905 goto done; 906 } 907 } else { 908 /* 909 * by pid 910 */ 911 p = pfind(uap->pid); 912 if (p == NULL) 913 error = ESRCH; 914 else 915 error = p_cansee(td, p); 916 if (error) { 917 if (p != NULL) 918 PROC_UNLOCK(p); 919 sx_sunlock(&proctree_lock); 920 goto done; 921 } 922 if (descend) 923 ret |= ktrsetchildren(td, p, ops, facs, vp); 924 else 925 ret |= ktrops(td, p, ops, facs, vp); 926 } 927 sx_sunlock(&proctree_lock); 928 if (!ret) 929 error = EPERM; 930 done: 931 if (vp != NULL) { 932 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 933 (void) vn_close(vp, FWRITE, td->td_ucred, td); 934 VFS_UNLOCK_GIANT(vfslocked); 935 } 936 ktrace_exit(td); 937 return (error); 938 #else /* !KTRACE */ 939 return (ENOSYS); 940 #endif /* KTRACE */ 941 } 942 943 /* ARGSUSED */ 944 int 945 utrace(td, uap) 946 struct thread *td; 947 register struct utrace_args *uap; 948 { 949 950 #ifdef KTRACE 951 struct ktr_request *req; 952 void *cp; 953 int error; 954 955 if (!KTRPOINT(td, KTR_USER)) 956 return (0); 957 if (uap->len > KTR_USER_MAXLEN) 958 return (EINVAL); 959 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 960 error = copyin(uap->addr, cp, uap->len); 961 if (error) { 962 free(cp, M_KTRACE); 963 return (error); 964 } 965 req = ktr_getrequest(KTR_USER); 966 if (req == NULL) { 967 free(cp, M_KTRACE); 968 return (ENOMEM); 969 } 970 req->ktr_buffer = cp; 971 req->ktr_header.ktr_len = uap->len; 972 ktr_submitrequest(td, req); 973 return (0); 974 #else /* !KTRACE */ 975 return (ENOSYS); 976 #endif /* KTRACE */ 977 } 978 979 #ifdef KTRACE 980 static int 981 ktrops(td, p, ops, facs, vp) 982 struct thread *td; 983 struct proc *p; 984 int ops, facs; 985 struct vnode *vp; 986 { 987 struct vnode *tracevp = NULL; 988 struct ucred *tracecred = NULL; 989 990 PROC_LOCK_ASSERT(p, MA_OWNED); 991 if (!ktrcanset(td, p)) { 992 PROC_UNLOCK(p); 993 return (0); 994 } 995 if (p->p_flag & P_WEXIT) { 996 /* If the process is exiting, just ignore it. */ 997 PROC_UNLOCK(p); 998 return (1); 999 } 1000 mtx_lock(&ktrace_mtx); 1001 if (ops == KTROP_SET) { 1002 if (p->p_tracevp != vp) { 1003 /* 1004 * if trace file already in use, relinquish below 1005 */ 1006 tracevp = p->p_tracevp; 1007 VREF(vp); 1008 p->p_tracevp = vp; 1009 } 1010 if (p->p_tracecred != td->td_ucred) { 1011 tracecred = p->p_tracecred; 1012 p->p_tracecred = crhold(td->td_ucred); 1013 } 1014 p->p_traceflag |= facs; 1015 if (priv_check(td, PRIV_KTRACE) == 0) 1016 p->p_traceflag |= KTRFAC_ROOT; 1017 } else { 1018 /* KTROP_CLEAR */ 1019 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1020 /* no more tracing */ 1021 ktr_freeproc(p, &tracecred, &tracevp); 1022 } 1023 mtx_unlock(&ktrace_mtx); 1024 PROC_UNLOCK(p); 1025 if (tracevp != NULL) { 1026 int vfslocked; 1027 1028 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 1029 vrele(tracevp); 1030 VFS_UNLOCK_GIANT(vfslocked); 1031 } 1032 if (tracecred != NULL) 1033 crfree(tracecred); 1034 1035 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1036 ktrprocctor_ne(td, p); 1037 1038 return (1); 1039 } 1040 1041 static int 1042 ktrsetchildren(td, top, ops, facs, vp) 1043 struct thread *td; 1044 struct proc *top; 1045 int ops, facs; 1046 struct vnode *vp; 1047 { 1048 register struct proc *p; 1049 register int ret = 0; 1050 1051 p = top; 1052 PROC_LOCK_ASSERT(p, MA_OWNED); 1053 sx_assert(&proctree_lock, SX_LOCKED); 1054 for (;;) { 1055 ret |= ktrops(td, p, ops, facs, vp); 1056 /* 1057 * If this process has children, descend to them next, 1058 * otherwise do any siblings, and if done with this level, 1059 * follow back up the tree (but not past top). 1060 */ 1061 if (!LIST_EMPTY(&p->p_children)) 1062 p = LIST_FIRST(&p->p_children); 1063 else for (;;) { 1064 if (p == top) 1065 return (ret); 1066 if (LIST_NEXT(p, p_sibling)) { 1067 p = LIST_NEXT(p, p_sibling); 1068 break; 1069 } 1070 p = p->p_pptr; 1071 } 1072 PROC_LOCK(p); 1073 } 1074 /*NOTREACHED*/ 1075 } 1076 1077 static void 1078 ktr_writerequest(struct thread *td, struct ktr_request *req) 1079 { 1080 struct ktr_header *kth; 1081 struct vnode *vp; 1082 struct proc *p; 1083 struct ucred *cred; 1084 struct uio auio; 1085 struct iovec aiov[3]; 1086 struct mount *mp; 1087 int datalen, buflen, vrele_count; 1088 int error, vfslocked; 1089 1090 /* 1091 * We hold the vnode and credential for use in I/O in case ktrace is 1092 * disabled on the process as we write out the request. 1093 * 1094 * XXXRW: This is not ideal: we could end up performing a write after 1095 * the vnode has been closed. 1096 */ 1097 mtx_lock(&ktrace_mtx); 1098 vp = td->td_proc->p_tracevp; 1099 cred = td->td_proc->p_tracecred; 1100 1101 /* 1102 * If vp is NULL, the vp has been cleared out from under this 1103 * request, so just drop it. Make sure the credential and vnode are 1104 * in sync: we should have both or neither. 1105 */ 1106 if (vp == NULL) { 1107 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1108 mtx_unlock(&ktrace_mtx); 1109 return; 1110 } 1111 VREF(vp); 1112 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1113 crhold(cred); 1114 mtx_unlock(&ktrace_mtx); 1115 1116 kth = &req->ktr_header; 1117 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1118 sizeof(data_lengths) / sizeof(data_lengths[0]), 1119 ("data_lengths array overflow")); 1120 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1121 buflen = kth->ktr_len; 1122 auio.uio_iov = &aiov[0]; 1123 auio.uio_offset = 0; 1124 auio.uio_segflg = UIO_SYSSPACE; 1125 auio.uio_rw = UIO_WRITE; 1126 aiov[0].iov_base = (caddr_t)kth; 1127 aiov[0].iov_len = sizeof(struct ktr_header); 1128 auio.uio_resid = sizeof(struct ktr_header); 1129 auio.uio_iovcnt = 1; 1130 auio.uio_td = td; 1131 if (datalen != 0) { 1132 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1133 aiov[1].iov_len = datalen; 1134 auio.uio_resid += datalen; 1135 auio.uio_iovcnt++; 1136 kth->ktr_len += datalen; 1137 } 1138 if (buflen != 0) { 1139 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1140 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1141 aiov[auio.uio_iovcnt].iov_len = buflen; 1142 auio.uio_resid += buflen; 1143 auio.uio_iovcnt++; 1144 } 1145 1146 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1147 vn_start_write(vp, &mp, V_WAIT); 1148 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1149 #ifdef MAC 1150 error = mac_vnode_check_write(cred, NOCRED, vp); 1151 if (error == 0) 1152 #endif 1153 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1154 VOP_UNLOCK(vp, 0); 1155 vn_finished_write(mp); 1156 crfree(cred); 1157 if (!error) { 1158 vrele(vp); 1159 VFS_UNLOCK_GIANT(vfslocked); 1160 return; 1161 } 1162 VFS_UNLOCK_GIANT(vfslocked); 1163 1164 /* 1165 * If error encountered, give up tracing on this vnode. We defer 1166 * all the vrele()'s on the vnode until after we are finished walking 1167 * the various lists to avoid needlessly holding locks. 1168 * NB: at this point we still hold the vnode reference that must 1169 * not go away as we need the valid vnode to compare with. Thus let 1170 * vrele_count start at 1 and the reference will be freed 1171 * by the loop at the end after our last use of vp. 1172 */ 1173 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1174 error); 1175 vrele_count = 1; 1176 /* 1177 * First, clear this vnode from being used by any processes in the 1178 * system. 1179 * XXX - If one process gets an EPERM writing to the vnode, should 1180 * we really do this? Other processes might have suitable 1181 * credentials for the operation. 1182 */ 1183 cred = NULL; 1184 sx_slock(&allproc_lock); 1185 FOREACH_PROC_IN_SYSTEM(p) { 1186 PROC_LOCK(p); 1187 if (p->p_tracevp == vp) { 1188 mtx_lock(&ktrace_mtx); 1189 ktr_freeproc(p, &cred, NULL); 1190 mtx_unlock(&ktrace_mtx); 1191 vrele_count++; 1192 } 1193 PROC_UNLOCK(p); 1194 if (cred != NULL) { 1195 crfree(cred); 1196 cred = NULL; 1197 } 1198 } 1199 sx_sunlock(&allproc_lock); 1200 1201 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1202 while (vrele_count-- > 0) 1203 vrele(vp); 1204 VFS_UNLOCK_GIANT(vfslocked); 1205 } 1206 1207 /* 1208 * Return true if caller has permission to set the ktracing state 1209 * of target. Essentially, the target can't possess any 1210 * more permissions than the caller. KTRFAC_ROOT signifies that 1211 * root previously set the tracing status on the target process, and 1212 * so, only root may further change it. 1213 */ 1214 static int 1215 ktrcanset(td, targetp) 1216 struct thread *td; 1217 struct proc *targetp; 1218 { 1219 1220 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1221 if (targetp->p_traceflag & KTRFAC_ROOT && 1222 priv_check(td, PRIV_KTRACE)) 1223 return (0); 1224 1225 if (p_candebug(td, targetp) != 0) 1226 return (0); 1227 1228 return (1); 1229 } 1230 1231 #endif /* KTRACE */ 1232