1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. 6 * Copyright (c) 2005 Robert N. M. Watson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/capsicum.h> 43 #include <sys/systm.h> 44 #include <sys/fcntl.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 #include <sys/socket.h> 57 #include <sys/stat.h> 58 #include <sys/ktrace.h> 59 #include <sys/sx.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/syslog.h> 63 #include <sys/sysproto.h> 64 65 #include <security/mac/mac_framework.h> 66 67 /* 68 * The ktrace facility allows the tracing of certain key events in user space 69 * processes, such as system calls, signal delivery, context switches, and 70 * user generated events using utrace(2). It works by streaming event 71 * records and data to a vnode associated with the process using the 72 * ktrace(2) system call. In general, records can be written directly from 73 * the context that generates the event. One important exception to this is 74 * during a context switch, where sleeping is not permitted. To handle this 75 * case, trace events are generated using in-kernel ktr_request records, and 76 * then delivered to disk at a convenient moment -- either immediately, the 77 * next traceable event, at system call return, or at process exit. 78 * 79 * When dealing with multiple threads or processes writing to the same event 80 * log, ordering guarantees are weak: specifically, if an event has multiple 81 * records (i.e., system call enter and return), they may be interlaced with 82 * records from another event. Process and thread ID information is provided 83 * in the record, and user applications can de-interlace events if required. 84 */ 85 86 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 87 88 #ifdef KTRACE 89 90 FEATURE(ktrace, "Kernel support for system-call tracing"); 91 92 #ifndef KTRACE_REQUEST_POOL 93 #define KTRACE_REQUEST_POOL 100 94 #endif 95 96 struct ktr_request { 97 struct ktr_header ktr_header; 98 void *ktr_buffer; 99 union { 100 struct ktr_proc_ctor ktr_proc_ctor; 101 struct ktr_cap_fail ktr_cap_fail; 102 struct ktr_syscall ktr_syscall; 103 struct ktr_sysret ktr_sysret; 104 struct ktr_genio ktr_genio; 105 struct ktr_psig ktr_psig; 106 struct ktr_csw ktr_csw; 107 struct ktr_fault ktr_fault; 108 struct ktr_faultend ktr_faultend; 109 struct ktr_struct_array ktr_struct_array; 110 } ktr_data; 111 STAILQ_ENTRY(ktr_request) ktr_list; 112 }; 113 114 static int data_lengths[] = { 115 [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), 116 [KTR_SYSRET] = sizeof(struct ktr_sysret), 117 [KTR_NAMEI] = 0, 118 [KTR_GENIO] = sizeof(struct ktr_genio), 119 [KTR_PSIG] = sizeof(struct ktr_psig), 120 [KTR_CSW] = sizeof(struct ktr_csw), 121 [KTR_USER] = 0, 122 [KTR_STRUCT] = 0, 123 [KTR_SYSCTL] = 0, 124 [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), 125 [KTR_PROCDTOR] = 0, 126 [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), 127 [KTR_FAULT] = sizeof(struct ktr_fault), 128 [KTR_FAULTEND] = sizeof(struct ktr_faultend), 129 [KTR_STRUCT_ARRAY] = sizeof(struct ktr_struct_array), 130 }; 131 132 static STAILQ_HEAD(, ktr_request) ktr_free; 133 134 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 135 "KTRACE options"); 136 137 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 138 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 139 140 u_int ktr_geniosize = PAGE_SIZE; 141 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 142 0, "Maximum size of genio event payload"); 143 144 static int print_message = 1; 145 static struct mtx ktrace_mtx; 146 static struct sx ktrace_sx; 147 148 static void ktrace_init(void *dummy); 149 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 150 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 151 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 152 static struct ktr_request *ktr_getrequest(int type); 153 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 154 static void ktr_freeproc(struct proc *p, struct ucred **uc, 155 struct vnode **vp); 156 static void ktr_freerequest(struct ktr_request *req); 157 static void ktr_freerequest_locked(struct ktr_request *req); 158 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 159 static int ktrcanset(struct thread *,struct proc *); 160 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 161 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 162 static void ktrprocctor_entered(struct thread *, struct proc *); 163 164 /* 165 * ktrace itself generates events, such as context switches, which we do not 166 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 167 * whether or not it is in a region where tracing of events should be 168 * suppressed. 169 */ 170 static void 171 ktrace_enter(struct thread *td) 172 { 173 174 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 175 td->td_pflags |= TDP_INKTRACE; 176 } 177 178 static void 179 ktrace_exit(struct thread *td) 180 { 181 182 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 183 td->td_pflags &= ~TDP_INKTRACE; 184 } 185 186 static void 187 ktrace_assert(struct thread *td) 188 { 189 190 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 191 } 192 193 static void 194 ktrace_init(void *dummy) 195 { 196 struct ktr_request *req; 197 int i; 198 199 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 200 sx_init(&ktrace_sx, "ktrace_sx"); 201 STAILQ_INIT(&ktr_free); 202 for (i = 0; i < ktr_requestpool; i++) { 203 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 204 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 205 } 206 } 207 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 208 209 static int 210 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 211 { 212 struct thread *td; 213 u_int newsize, oldsize, wantsize; 214 int error; 215 216 /* Handle easy read-only case first to avoid warnings from GCC. */ 217 if (!req->newptr) { 218 oldsize = ktr_requestpool; 219 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 220 } 221 222 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 223 if (error) 224 return (error); 225 td = curthread; 226 ktrace_enter(td); 227 oldsize = ktr_requestpool; 228 newsize = ktrace_resize_pool(oldsize, wantsize); 229 ktrace_exit(td); 230 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 231 if (error) 232 return (error); 233 if (wantsize > oldsize && newsize < wantsize) 234 return (ENOSPC); 235 return (0); 236 } 237 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, 238 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ktr_requestpool, 0, 239 sysctl_kern_ktrace_request_pool, "IU", 240 "Pool buffer size for ktrace(1)"); 241 242 static u_int 243 ktrace_resize_pool(u_int oldsize, u_int newsize) 244 { 245 STAILQ_HEAD(, ktr_request) ktr_new; 246 struct ktr_request *req; 247 int bound; 248 249 print_message = 1; 250 bound = newsize - oldsize; 251 if (bound == 0) 252 return (ktr_requestpool); 253 if (bound < 0) { 254 mtx_lock(&ktrace_mtx); 255 /* Shrink pool down to newsize if possible. */ 256 while (bound++ < 0) { 257 req = STAILQ_FIRST(&ktr_free); 258 if (req == NULL) 259 break; 260 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 261 ktr_requestpool--; 262 free(req, M_KTRACE); 263 } 264 } else { 265 /* Grow pool up to newsize. */ 266 STAILQ_INIT(&ktr_new); 267 while (bound-- > 0) { 268 req = malloc(sizeof(struct ktr_request), M_KTRACE, 269 M_WAITOK); 270 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 271 } 272 mtx_lock(&ktrace_mtx); 273 STAILQ_CONCAT(&ktr_free, &ktr_new); 274 ktr_requestpool += (newsize - oldsize); 275 } 276 mtx_unlock(&ktrace_mtx); 277 return (ktr_requestpool); 278 } 279 280 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 281 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 282 (sizeof((struct thread *)NULL)->td_name)); 283 284 static struct ktr_request * 285 ktr_getrequest_entered(struct thread *td, int type) 286 { 287 struct ktr_request *req; 288 struct proc *p = td->td_proc; 289 int pm; 290 291 mtx_lock(&ktrace_mtx); 292 if (!KTRCHECK(td, type)) { 293 mtx_unlock(&ktrace_mtx); 294 return (NULL); 295 } 296 req = STAILQ_FIRST(&ktr_free); 297 if (req != NULL) { 298 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 299 req->ktr_header.ktr_type = type; 300 if (p->p_traceflag & KTRFAC_DROP) { 301 req->ktr_header.ktr_type |= KTR_DROP; 302 p->p_traceflag &= ~KTRFAC_DROP; 303 } 304 mtx_unlock(&ktrace_mtx); 305 microtime(&req->ktr_header.ktr_time); 306 req->ktr_header.ktr_pid = p->p_pid; 307 req->ktr_header.ktr_tid = td->td_tid; 308 bcopy(td->td_name, req->ktr_header.ktr_comm, 309 sizeof(req->ktr_header.ktr_comm)); 310 req->ktr_buffer = NULL; 311 req->ktr_header.ktr_len = 0; 312 } else { 313 p->p_traceflag |= KTRFAC_DROP; 314 pm = print_message; 315 print_message = 0; 316 mtx_unlock(&ktrace_mtx); 317 if (pm) 318 printf("Out of ktrace request objects.\n"); 319 } 320 return (req); 321 } 322 323 static struct ktr_request * 324 ktr_getrequest(int type) 325 { 326 struct thread *td = curthread; 327 struct ktr_request *req; 328 329 ktrace_enter(td); 330 req = ktr_getrequest_entered(td, type); 331 if (req == NULL) 332 ktrace_exit(td); 333 334 return (req); 335 } 336 337 /* 338 * Some trace generation environments don't permit direct access to VFS, 339 * such as during a context switch where sleeping is not allowed. Under these 340 * circumstances, queue a request to the thread to be written asynchronously 341 * later. 342 */ 343 static void 344 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 345 { 346 347 mtx_lock(&ktrace_mtx); 348 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 349 mtx_unlock(&ktrace_mtx); 350 } 351 352 /* 353 * Drain any pending ktrace records from the per-thread queue to disk. This 354 * is used both internally before committing other records, and also on 355 * system call return. We drain all the ones we can find at the time when 356 * drain is requested, but don't keep draining after that as those events 357 * may be approximately "after" the current event. 358 */ 359 static void 360 ktr_drain(struct thread *td) 361 { 362 struct ktr_request *queued_req; 363 STAILQ_HEAD(, ktr_request) local_queue; 364 365 ktrace_assert(td); 366 sx_assert(&ktrace_sx, SX_XLOCKED); 367 368 STAILQ_INIT(&local_queue); 369 370 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 371 mtx_lock(&ktrace_mtx); 372 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 373 mtx_unlock(&ktrace_mtx); 374 375 while ((queued_req = STAILQ_FIRST(&local_queue))) { 376 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 377 ktr_writerequest(td, queued_req); 378 ktr_freerequest(queued_req); 379 } 380 } 381 } 382 383 /* 384 * Submit a trace record for immediate commit to disk -- to be used only 385 * where entering VFS is OK. First drain any pending records that may have 386 * been cached in the thread. 387 */ 388 static void 389 ktr_submitrequest(struct thread *td, struct ktr_request *req) 390 { 391 392 ktrace_assert(td); 393 394 sx_xlock(&ktrace_sx); 395 ktr_drain(td); 396 ktr_writerequest(td, req); 397 ktr_freerequest(req); 398 sx_xunlock(&ktrace_sx); 399 ktrace_exit(td); 400 } 401 402 static void 403 ktr_freerequest(struct ktr_request *req) 404 { 405 406 mtx_lock(&ktrace_mtx); 407 ktr_freerequest_locked(req); 408 mtx_unlock(&ktrace_mtx); 409 } 410 411 static void 412 ktr_freerequest_locked(struct ktr_request *req) 413 { 414 415 mtx_assert(&ktrace_mtx, MA_OWNED); 416 if (req->ktr_buffer != NULL) 417 free(req->ktr_buffer, M_KTRACE); 418 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 419 } 420 421 /* 422 * Disable tracing for a process and release all associated resources. 423 * The caller is responsible for releasing a reference on the returned 424 * vnode and credentials. 425 */ 426 static void 427 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 428 { 429 struct ktr_request *req; 430 431 PROC_LOCK_ASSERT(p, MA_OWNED); 432 mtx_assert(&ktrace_mtx, MA_OWNED); 433 *uc = p->p_tracecred; 434 p->p_tracecred = NULL; 435 if (vp != NULL) 436 *vp = p->p_tracevp; 437 p->p_tracevp = NULL; 438 p->p_traceflag = 0; 439 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 440 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 441 ktr_freerequest_locked(req); 442 } 443 } 444 445 void 446 ktrsyscall(int code, int narg, register_t args[]) 447 { 448 struct ktr_request *req; 449 struct ktr_syscall *ktp; 450 size_t buflen; 451 char *buf = NULL; 452 453 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 454 return; 455 456 buflen = sizeof(register_t) * narg; 457 if (buflen > 0) { 458 buf = malloc(buflen, M_KTRACE, M_WAITOK); 459 bcopy(args, buf, buflen); 460 } 461 req = ktr_getrequest(KTR_SYSCALL); 462 if (req == NULL) { 463 if (buf != NULL) 464 free(buf, M_KTRACE); 465 return; 466 } 467 ktp = &req->ktr_data.ktr_syscall; 468 ktp->ktr_code = code; 469 ktp->ktr_narg = narg; 470 if (buflen > 0) { 471 req->ktr_header.ktr_len = buflen; 472 req->ktr_buffer = buf; 473 } 474 ktr_submitrequest(curthread, req); 475 } 476 477 void 478 ktrsysret(int code, int error, register_t retval) 479 { 480 struct ktr_request *req; 481 struct ktr_sysret *ktp; 482 483 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 484 return; 485 486 req = ktr_getrequest(KTR_SYSRET); 487 if (req == NULL) 488 return; 489 ktp = &req->ktr_data.ktr_sysret; 490 ktp->ktr_code = code; 491 ktp->ktr_error = error; 492 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 493 ktr_submitrequest(curthread, req); 494 } 495 496 /* 497 * When a setuid process execs, disable tracing. 498 * 499 * XXX: We toss any pending asynchronous records. 500 */ 501 void 502 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 503 { 504 505 PROC_LOCK_ASSERT(p, MA_OWNED); 506 mtx_lock(&ktrace_mtx); 507 ktr_freeproc(p, uc, vp); 508 mtx_unlock(&ktrace_mtx); 509 } 510 511 /* 512 * When a process exits, drain per-process asynchronous trace records 513 * and disable tracing. 514 */ 515 void 516 ktrprocexit(struct thread *td) 517 { 518 struct ktr_request *req; 519 struct proc *p; 520 struct ucred *cred; 521 struct vnode *vp; 522 523 p = td->td_proc; 524 if (p->p_traceflag == 0) 525 return; 526 527 ktrace_enter(td); 528 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 529 if (req != NULL) 530 ktr_enqueuerequest(td, req); 531 sx_xlock(&ktrace_sx); 532 ktr_drain(td); 533 sx_xunlock(&ktrace_sx); 534 PROC_LOCK(p); 535 mtx_lock(&ktrace_mtx); 536 ktr_freeproc(p, &cred, &vp); 537 mtx_unlock(&ktrace_mtx); 538 PROC_UNLOCK(p); 539 if (vp != NULL) 540 vrele(vp); 541 if (cred != NULL) 542 crfree(cred); 543 ktrace_exit(td); 544 } 545 546 static void 547 ktrprocctor_entered(struct thread *td, struct proc *p) 548 { 549 struct ktr_proc_ctor *ktp; 550 struct ktr_request *req; 551 struct thread *td2; 552 553 ktrace_assert(td); 554 td2 = FIRST_THREAD_IN_PROC(p); 555 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 556 if (req == NULL) 557 return; 558 ktp = &req->ktr_data.ktr_proc_ctor; 559 ktp->sv_flags = p->p_sysent->sv_flags; 560 ktr_enqueuerequest(td2, req); 561 } 562 563 void 564 ktrprocctor(struct proc *p) 565 { 566 struct thread *td = curthread; 567 568 if ((p->p_traceflag & KTRFAC_MASK) == 0) 569 return; 570 571 ktrace_enter(td); 572 ktrprocctor_entered(td, p); 573 ktrace_exit(td); 574 } 575 576 /* 577 * When a process forks, enable tracing in the new process if needed. 578 */ 579 void 580 ktrprocfork(struct proc *p1, struct proc *p2) 581 { 582 583 MPASS(p2->p_tracevp == NULL); 584 MPASS(p2->p_traceflag == 0); 585 586 if (p1->p_traceflag == 0) 587 return; 588 589 PROC_LOCK(p1); 590 mtx_lock(&ktrace_mtx); 591 if (p1->p_traceflag & KTRFAC_INHERIT) { 592 p2->p_traceflag = p1->p_traceflag; 593 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 594 VREF(p2->p_tracevp); 595 KASSERT(p1->p_tracecred != NULL, 596 ("ktrace vnode with no cred")); 597 p2->p_tracecred = crhold(p1->p_tracecred); 598 } 599 } 600 mtx_unlock(&ktrace_mtx); 601 PROC_UNLOCK(p1); 602 603 ktrprocctor(p2); 604 } 605 606 /* 607 * When a thread returns, drain any asynchronous records generated by the 608 * system call. 609 */ 610 void 611 ktruserret(struct thread *td) 612 { 613 614 ktrace_enter(td); 615 sx_xlock(&ktrace_sx); 616 ktr_drain(td); 617 sx_xunlock(&ktrace_sx); 618 ktrace_exit(td); 619 } 620 621 void 622 ktrnamei(path) 623 char *path; 624 { 625 struct ktr_request *req; 626 int namelen; 627 char *buf = NULL; 628 629 namelen = strlen(path); 630 if (namelen > 0) { 631 buf = malloc(namelen, M_KTRACE, M_WAITOK); 632 bcopy(path, buf, namelen); 633 } 634 req = ktr_getrequest(KTR_NAMEI); 635 if (req == NULL) { 636 if (buf != NULL) 637 free(buf, M_KTRACE); 638 return; 639 } 640 if (namelen > 0) { 641 req->ktr_header.ktr_len = namelen; 642 req->ktr_buffer = buf; 643 } 644 ktr_submitrequest(curthread, req); 645 } 646 647 void 648 ktrsysctl(int *name, u_int namelen) 649 { 650 struct ktr_request *req; 651 u_int mib[CTL_MAXNAME + 2]; 652 char *mibname; 653 size_t mibnamelen; 654 int error; 655 656 /* Lookup name of mib. */ 657 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 658 mib[0] = 0; 659 mib[1] = 1; 660 bcopy(name, mib + 2, namelen * sizeof(*name)); 661 mibnamelen = 128; 662 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 663 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 664 NULL, 0, &mibnamelen, 0); 665 if (error) { 666 free(mibname, M_KTRACE); 667 return; 668 } 669 req = ktr_getrequest(KTR_SYSCTL); 670 if (req == NULL) { 671 free(mibname, M_KTRACE); 672 return; 673 } 674 req->ktr_header.ktr_len = mibnamelen; 675 req->ktr_buffer = mibname; 676 ktr_submitrequest(curthread, req); 677 } 678 679 void 680 ktrgenio(int fd, enum uio_rw rw, struct uio *uio, int error) 681 { 682 struct ktr_request *req; 683 struct ktr_genio *ktg; 684 int datalen; 685 char *buf; 686 687 if (error) { 688 free(uio, M_IOV); 689 return; 690 } 691 uio->uio_offset = 0; 692 uio->uio_rw = UIO_WRITE; 693 datalen = MIN(uio->uio_resid, ktr_geniosize); 694 buf = malloc(datalen, M_KTRACE, M_WAITOK); 695 error = uiomove(buf, datalen, uio); 696 free(uio, M_IOV); 697 if (error) { 698 free(buf, M_KTRACE); 699 return; 700 } 701 req = ktr_getrequest(KTR_GENIO); 702 if (req == NULL) { 703 free(buf, M_KTRACE); 704 return; 705 } 706 ktg = &req->ktr_data.ktr_genio; 707 ktg->ktr_fd = fd; 708 ktg->ktr_rw = rw; 709 req->ktr_header.ktr_len = datalen; 710 req->ktr_buffer = buf; 711 ktr_submitrequest(curthread, req); 712 } 713 714 void 715 ktrpsig(int sig, sig_t action, sigset_t *mask, int code) 716 { 717 struct thread *td = curthread; 718 struct ktr_request *req; 719 struct ktr_psig *kp; 720 721 req = ktr_getrequest(KTR_PSIG); 722 if (req == NULL) 723 return; 724 kp = &req->ktr_data.ktr_psig; 725 kp->signo = (char)sig; 726 kp->action = action; 727 kp->mask = *mask; 728 kp->code = code; 729 ktr_enqueuerequest(td, req); 730 ktrace_exit(td); 731 } 732 733 void 734 ktrcsw(int out, int user, const char *wmesg) 735 { 736 struct thread *td = curthread; 737 struct ktr_request *req; 738 struct ktr_csw *kc; 739 740 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 741 return; 742 743 req = ktr_getrequest(KTR_CSW); 744 if (req == NULL) 745 return; 746 kc = &req->ktr_data.ktr_csw; 747 kc->out = out; 748 kc->user = user; 749 if (wmesg != NULL) 750 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 751 else 752 bzero(kc->wmesg, sizeof(kc->wmesg)); 753 ktr_enqueuerequest(td, req); 754 ktrace_exit(td); 755 } 756 757 void 758 ktrstruct(const char *name, const void *data, size_t datalen) 759 { 760 struct ktr_request *req; 761 char *buf; 762 size_t buflen, namelen; 763 764 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 765 return; 766 767 if (data == NULL) 768 datalen = 0; 769 namelen = strlen(name) + 1; 770 buflen = namelen + datalen; 771 buf = malloc(buflen, M_KTRACE, M_WAITOK); 772 strcpy(buf, name); 773 bcopy(data, buf + namelen, datalen); 774 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 775 free(buf, M_KTRACE); 776 return; 777 } 778 req->ktr_buffer = buf; 779 req->ktr_header.ktr_len = buflen; 780 ktr_submitrequest(curthread, req); 781 } 782 783 void 784 ktrstruct_error(const char *name, const void *data, size_t datalen, int error) 785 { 786 787 if (error == 0) 788 ktrstruct(name, data, datalen); 789 } 790 791 void 792 ktrstructarray(const char *name, enum uio_seg seg, const void *data, 793 int num_items, size_t struct_size) 794 { 795 struct ktr_request *req; 796 struct ktr_struct_array *ksa; 797 char *buf; 798 size_t buflen, datalen, namelen; 799 int max_items; 800 801 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 802 return; 803 804 /* Trim array length to genio size. */ 805 max_items = ktr_geniosize / struct_size; 806 if (num_items > max_items) { 807 if (max_items == 0) 808 num_items = 1; 809 else 810 num_items = max_items; 811 } 812 datalen = num_items * struct_size; 813 814 if (data == NULL) 815 datalen = 0; 816 817 namelen = strlen(name) + 1; 818 buflen = namelen + datalen; 819 buf = malloc(buflen, M_KTRACE, M_WAITOK); 820 strcpy(buf, name); 821 if (seg == UIO_SYSSPACE) 822 bcopy(data, buf + namelen, datalen); 823 else { 824 if (copyin(data, buf + namelen, datalen) != 0) { 825 free(buf, M_KTRACE); 826 return; 827 } 828 } 829 if ((req = ktr_getrequest(KTR_STRUCT_ARRAY)) == NULL) { 830 free(buf, M_KTRACE); 831 return; 832 } 833 ksa = &req->ktr_data.ktr_struct_array; 834 ksa->struct_size = struct_size; 835 req->ktr_buffer = buf; 836 req->ktr_header.ktr_len = buflen; 837 ktr_submitrequest(curthread, req); 838 } 839 840 void 841 ktrcapfail(enum ktr_cap_fail_type type, const cap_rights_t *needed, 842 const cap_rights_t *held) 843 { 844 struct thread *td = curthread; 845 struct ktr_request *req; 846 struct ktr_cap_fail *kcf; 847 848 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 849 return; 850 851 req = ktr_getrequest(KTR_CAPFAIL); 852 if (req == NULL) 853 return; 854 kcf = &req->ktr_data.ktr_cap_fail; 855 kcf->cap_type = type; 856 if (needed != NULL) 857 kcf->cap_needed = *needed; 858 else 859 cap_rights_init(&kcf->cap_needed); 860 if (held != NULL) 861 kcf->cap_held = *held; 862 else 863 cap_rights_init(&kcf->cap_held); 864 ktr_enqueuerequest(td, req); 865 ktrace_exit(td); 866 } 867 868 void 869 ktrfault(vm_offset_t vaddr, int type) 870 { 871 struct thread *td = curthread; 872 struct ktr_request *req; 873 struct ktr_fault *kf; 874 875 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 876 return; 877 878 req = ktr_getrequest(KTR_FAULT); 879 if (req == NULL) 880 return; 881 kf = &req->ktr_data.ktr_fault; 882 kf->vaddr = vaddr; 883 kf->type = type; 884 ktr_enqueuerequest(td, req); 885 ktrace_exit(td); 886 } 887 888 void 889 ktrfaultend(int result) 890 { 891 struct thread *td = curthread; 892 struct ktr_request *req; 893 struct ktr_faultend *kf; 894 895 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 896 return; 897 898 req = ktr_getrequest(KTR_FAULTEND); 899 if (req == NULL) 900 return; 901 kf = &req->ktr_data.ktr_faultend; 902 kf->result = result; 903 ktr_enqueuerequest(td, req); 904 ktrace_exit(td); 905 } 906 #endif /* KTRACE */ 907 908 /* Interface and common routines */ 909 910 #ifndef _SYS_SYSPROTO_H_ 911 struct ktrace_args { 912 char *fname; 913 int ops; 914 int facs; 915 int pid; 916 }; 917 #endif 918 /* ARGSUSED */ 919 int 920 sys_ktrace(struct thread *td, struct ktrace_args *uap) 921 { 922 #ifdef KTRACE 923 struct vnode *vp = NULL; 924 struct proc *p; 925 struct pgrp *pg; 926 int facs = uap->facs & ~KTRFAC_ROOT; 927 int ops = KTROP(uap->ops); 928 int descend = uap->ops & KTRFLAG_DESCEND; 929 int nfound, ret = 0; 930 int flags, error = 0; 931 struct nameidata nd; 932 struct ucred *cred; 933 934 /* 935 * Need something to (un)trace. 936 */ 937 if (ops != KTROP_CLEARFILE && facs == 0) 938 return (EINVAL); 939 940 ktrace_enter(td); 941 if (ops != KTROP_CLEAR) { 942 /* 943 * an operation which requires a file argument. 944 */ 945 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 946 flags = FREAD | FWRITE | O_NOFOLLOW; 947 error = vn_open(&nd, &flags, 0, NULL); 948 if (error) { 949 ktrace_exit(td); 950 return (error); 951 } 952 NDFREE(&nd, NDF_ONLY_PNBUF); 953 vp = nd.ni_vp; 954 VOP_UNLOCK(vp); 955 if (vp->v_type != VREG) { 956 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 957 ktrace_exit(td); 958 return (EACCES); 959 } 960 } 961 /* 962 * Clear all uses of the tracefile. 963 */ 964 if (ops == KTROP_CLEARFILE) { 965 int vrele_count; 966 967 vrele_count = 0; 968 sx_slock(&allproc_lock); 969 FOREACH_PROC_IN_SYSTEM(p) { 970 PROC_LOCK(p); 971 if (p->p_tracevp == vp) { 972 if (ktrcanset(td, p)) { 973 mtx_lock(&ktrace_mtx); 974 ktr_freeproc(p, &cred, NULL); 975 mtx_unlock(&ktrace_mtx); 976 vrele_count++; 977 crfree(cred); 978 } else 979 error = EPERM; 980 } 981 PROC_UNLOCK(p); 982 } 983 sx_sunlock(&allproc_lock); 984 if (vrele_count > 0) { 985 while (vrele_count-- > 0) 986 vrele(vp); 987 } 988 goto done; 989 } 990 /* 991 * do it 992 */ 993 sx_slock(&proctree_lock); 994 if (uap->pid < 0) { 995 /* 996 * by process group 997 */ 998 pg = pgfind(-uap->pid); 999 if (pg == NULL) { 1000 sx_sunlock(&proctree_lock); 1001 error = ESRCH; 1002 goto done; 1003 } 1004 /* 1005 * ktrops() may call vrele(). Lock pg_members 1006 * by the proctree_lock rather than pg_mtx. 1007 */ 1008 PGRP_UNLOCK(pg); 1009 nfound = 0; 1010 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1011 PROC_LOCK(p); 1012 if (p->p_state == PRS_NEW || 1013 p_cansee(td, p) != 0) { 1014 PROC_UNLOCK(p); 1015 continue; 1016 } 1017 nfound++; 1018 if (descend) 1019 ret |= ktrsetchildren(td, p, ops, facs, vp); 1020 else 1021 ret |= ktrops(td, p, ops, facs, vp); 1022 } 1023 if (nfound == 0) { 1024 sx_sunlock(&proctree_lock); 1025 error = ESRCH; 1026 goto done; 1027 } 1028 } else { 1029 /* 1030 * by pid 1031 */ 1032 p = pfind(uap->pid); 1033 if (p == NULL) 1034 error = ESRCH; 1035 else 1036 error = p_cansee(td, p); 1037 if (error) { 1038 if (p != NULL) 1039 PROC_UNLOCK(p); 1040 sx_sunlock(&proctree_lock); 1041 goto done; 1042 } 1043 if (descend) 1044 ret |= ktrsetchildren(td, p, ops, facs, vp); 1045 else 1046 ret |= ktrops(td, p, ops, facs, vp); 1047 } 1048 sx_sunlock(&proctree_lock); 1049 if (!ret) 1050 error = EPERM; 1051 done: 1052 if (vp != NULL) 1053 (void) vn_close(vp, FWRITE, td->td_ucred, td); 1054 ktrace_exit(td); 1055 return (error); 1056 #else /* !KTRACE */ 1057 return (ENOSYS); 1058 #endif /* KTRACE */ 1059 } 1060 1061 /* ARGSUSED */ 1062 int 1063 sys_utrace(struct thread *td, struct utrace_args *uap) 1064 { 1065 1066 #ifdef KTRACE 1067 struct ktr_request *req; 1068 void *cp; 1069 int error; 1070 1071 if (!KTRPOINT(td, KTR_USER)) 1072 return (0); 1073 if (uap->len > KTR_USER_MAXLEN) 1074 return (EINVAL); 1075 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1076 error = copyin(uap->addr, cp, uap->len); 1077 if (error) { 1078 free(cp, M_KTRACE); 1079 return (error); 1080 } 1081 req = ktr_getrequest(KTR_USER); 1082 if (req == NULL) { 1083 free(cp, M_KTRACE); 1084 return (ENOMEM); 1085 } 1086 req->ktr_buffer = cp; 1087 req->ktr_header.ktr_len = uap->len; 1088 ktr_submitrequest(td, req); 1089 return (0); 1090 #else /* !KTRACE */ 1091 return (ENOSYS); 1092 #endif /* KTRACE */ 1093 } 1094 1095 #ifdef KTRACE 1096 static int 1097 ktrops(struct thread *td, struct proc *p, int ops, int facs, struct vnode *vp) 1098 { 1099 struct vnode *tracevp = NULL; 1100 struct ucred *tracecred = NULL; 1101 1102 PROC_LOCK_ASSERT(p, MA_OWNED); 1103 if (!ktrcanset(td, p)) { 1104 PROC_UNLOCK(p); 1105 return (0); 1106 } 1107 if (p->p_flag & P_WEXIT) { 1108 /* If the process is exiting, just ignore it. */ 1109 PROC_UNLOCK(p); 1110 return (1); 1111 } 1112 mtx_lock(&ktrace_mtx); 1113 if (ops == KTROP_SET) { 1114 if (p->p_tracevp != vp) { 1115 /* 1116 * if trace file already in use, relinquish below 1117 */ 1118 tracevp = p->p_tracevp; 1119 VREF(vp); 1120 p->p_tracevp = vp; 1121 } 1122 if (p->p_tracecred != td->td_ucred) { 1123 tracecred = p->p_tracecred; 1124 p->p_tracecred = crhold(td->td_ucred); 1125 } 1126 p->p_traceflag |= facs; 1127 if (priv_check(td, PRIV_KTRACE) == 0) 1128 p->p_traceflag |= KTRFAC_ROOT; 1129 } else { 1130 /* KTROP_CLEAR */ 1131 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1132 /* no more tracing */ 1133 ktr_freeproc(p, &tracecred, &tracevp); 1134 } 1135 mtx_unlock(&ktrace_mtx); 1136 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1137 ktrprocctor_entered(td, p); 1138 PROC_UNLOCK(p); 1139 if (tracevp != NULL) 1140 vrele(tracevp); 1141 if (tracecred != NULL) 1142 crfree(tracecred); 1143 1144 return (1); 1145 } 1146 1147 static int 1148 ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, 1149 struct vnode *vp) 1150 { 1151 struct proc *p; 1152 int ret = 0; 1153 1154 p = top; 1155 PROC_LOCK_ASSERT(p, MA_OWNED); 1156 sx_assert(&proctree_lock, SX_LOCKED); 1157 for (;;) { 1158 ret |= ktrops(td, p, ops, facs, vp); 1159 /* 1160 * If this process has children, descend to them next, 1161 * otherwise do any siblings, and if done with this level, 1162 * follow back up the tree (but not past top). 1163 */ 1164 if (!LIST_EMPTY(&p->p_children)) 1165 p = LIST_FIRST(&p->p_children); 1166 else for (;;) { 1167 if (p == top) 1168 return (ret); 1169 if (LIST_NEXT(p, p_sibling)) { 1170 p = LIST_NEXT(p, p_sibling); 1171 break; 1172 } 1173 p = p->p_pptr; 1174 } 1175 PROC_LOCK(p); 1176 } 1177 /*NOTREACHED*/ 1178 } 1179 1180 static void 1181 ktr_writerequest(struct thread *td, struct ktr_request *req) 1182 { 1183 struct ktr_header *kth; 1184 struct vnode *vp; 1185 struct proc *p; 1186 struct ucred *cred; 1187 struct uio auio; 1188 struct iovec aiov[3]; 1189 struct mount *mp; 1190 int datalen, buflen, vrele_count; 1191 int error; 1192 1193 /* 1194 * We hold the vnode and credential for use in I/O in case ktrace is 1195 * disabled on the process as we write out the request. 1196 * 1197 * XXXRW: This is not ideal: we could end up performing a write after 1198 * the vnode has been closed. 1199 */ 1200 mtx_lock(&ktrace_mtx); 1201 vp = td->td_proc->p_tracevp; 1202 cred = td->td_proc->p_tracecred; 1203 1204 /* 1205 * If vp is NULL, the vp has been cleared out from under this 1206 * request, so just drop it. Make sure the credential and vnode are 1207 * in sync: we should have both or neither. 1208 */ 1209 if (vp == NULL) { 1210 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1211 mtx_unlock(&ktrace_mtx); 1212 return; 1213 } 1214 VREF(vp); 1215 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1216 crhold(cred); 1217 mtx_unlock(&ktrace_mtx); 1218 1219 kth = &req->ktr_header; 1220 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < nitems(data_lengths), 1221 ("data_lengths array overflow")); 1222 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1223 buflen = kth->ktr_len; 1224 auio.uio_iov = &aiov[0]; 1225 auio.uio_offset = 0; 1226 auio.uio_segflg = UIO_SYSSPACE; 1227 auio.uio_rw = UIO_WRITE; 1228 aiov[0].iov_base = (caddr_t)kth; 1229 aiov[0].iov_len = sizeof(struct ktr_header); 1230 auio.uio_resid = sizeof(struct ktr_header); 1231 auio.uio_iovcnt = 1; 1232 auio.uio_td = td; 1233 if (datalen != 0) { 1234 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1235 aiov[1].iov_len = datalen; 1236 auio.uio_resid += datalen; 1237 auio.uio_iovcnt++; 1238 kth->ktr_len += datalen; 1239 } 1240 if (buflen != 0) { 1241 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1242 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1243 aiov[auio.uio_iovcnt].iov_len = buflen; 1244 auio.uio_resid += buflen; 1245 auio.uio_iovcnt++; 1246 } 1247 1248 vn_start_write(vp, &mp, V_WAIT); 1249 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1250 #ifdef MAC 1251 error = mac_vnode_check_write(cred, NOCRED, vp); 1252 if (error == 0) 1253 #endif 1254 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1255 VOP_UNLOCK(vp); 1256 vn_finished_write(mp); 1257 crfree(cred); 1258 if (!error) { 1259 vrele(vp); 1260 return; 1261 } 1262 1263 /* 1264 * If error encountered, give up tracing on this vnode. We defer 1265 * all the vrele()'s on the vnode until after we are finished walking 1266 * the various lists to avoid needlessly holding locks. 1267 * NB: at this point we still hold the vnode reference that must 1268 * not go away as we need the valid vnode to compare with. Thus let 1269 * vrele_count start at 1 and the reference will be freed 1270 * by the loop at the end after our last use of vp. 1271 */ 1272 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1273 error); 1274 vrele_count = 1; 1275 /* 1276 * First, clear this vnode from being used by any processes in the 1277 * system. 1278 * XXX - If one process gets an EPERM writing to the vnode, should 1279 * we really do this? Other processes might have suitable 1280 * credentials for the operation. 1281 */ 1282 cred = NULL; 1283 sx_slock(&allproc_lock); 1284 FOREACH_PROC_IN_SYSTEM(p) { 1285 PROC_LOCK(p); 1286 if (p->p_tracevp == vp) { 1287 mtx_lock(&ktrace_mtx); 1288 ktr_freeproc(p, &cred, NULL); 1289 mtx_unlock(&ktrace_mtx); 1290 vrele_count++; 1291 } 1292 PROC_UNLOCK(p); 1293 if (cred != NULL) { 1294 crfree(cred); 1295 cred = NULL; 1296 } 1297 } 1298 sx_sunlock(&allproc_lock); 1299 1300 while (vrele_count-- > 0) 1301 vrele(vp); 1302 } 1303 1304 /* 1305 * Return true if caller has permission to set the ktracing state 1306 * of target. Essentially, the target can't possess any 1307 * more permissions than the caller. KTRFAC_ROOT signifies that 1308 * root previously set the tracing status on the target process, and 1309 * so, only root may further change it. 1310 */ 1311 static int 1312 ktrcanset(struct thread *td, struct proc *targetp) 1313 { 1314 1315 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1316 if (targetp->p_traceflag & KTRFAC_ROOT && 1317 priv_check(td, PRIV_KTRACE)) 1318 return (0); 1319 1320 if (p_candebug(td, targetp) != 0) 1321 return (0); 1322 1323 return (1); 1324 } 1325 1326 #endif /* KTRACE */ 1327