1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. 6 * Copyright (c) 2005 Robert N. M. Watson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/capsicum.h> 43 #include <sys/systm.h> 44 #include <sys/fcntl.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 #include <sys/socket.h> 57 #include <sys/stat.h> 58 #include <sys/ktrace.h> 59 #include <sys/sx.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/syslog.h> 63 #include <sys/sysproto.h> 64 65 #include <security/mac/mac_framework.h> 66 67 /* 68 * The ktrace facility allows the tracing of certain key events in user space 69 * processes, such as system calls, signal delivery, context switches, and 70 * user generated events using utrace(2). It works by streaming event 71 * records and data to a vnode associated with the process using the 72 * ktrace(2) system call. In general, records can be written directly from 73 * the context that generates the event. One important exception to this is 74 * during a context switch, where sleeping is not permitted. To handle this 75 * case, trace events are generated using in-kernel ktr_request records, and 76 * then delivered to disk at a convenient moment -- either immediately, the 77 * next traceable event, at system call return, or at process exit. 78 * 79 * When dealing with multiple threads or processes writing to the same event 80 * log, ordering guarantees are weak: specifically, if an event has multiple 81 * records (i.e., system call enter and return), they may be interlaced with 82 * records from another event. Process and thread ID information is provided 83 * in the record, and user applications can de-interlace events if required. 84 */ 85 86 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 87 88 #ifdef KTRACE 89 90 FEATURE(ktrace, "Kernel support for system-call tracing"); 91 92 #ifndef KTRACE_REQUEST_POOL 93 #define KTRACE_REQUEST_POOL 100 94 #endif 95 96 struct ktr_request { 97 struct ktr_header ktr_header; 98 void *ktr_buffer; 99 union { 100 struct ktr_proc_ctor ktr_proc_ctor; 101 struct ktr_cap_fail ktr_cap_fail; 102 struct ktr_syscall ktr_syscall; 103 struct ktr_sysret ktr_sysret; 104 struct ktr_genio ktr_genio; 105 struct ktr_psig ktr_psig; 106 struct ktr_csw ktr_csw; 107 struct ktr_fault ktr_fault; 108 struct ktr_faultend ktr_faultend; 109 struct ktr_struct_array ktr_struct_array; 110 } ktr_data; 111 STAILQ_ENTRY(ktr_request) ktr_list; 112 }; 113 114 static int data_lengths[] = { 115 [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), 116 [KTR_SYSRET] = sizeof(struct ktr_sysret), 117 [KTR_NAMEI] = 0, 118 [KTR_GENIO] = sizeof(struct ktr_genio), 119 [KTR_PSIG] = sizeof(struct ktr_psig), 120 [KTR_CSW] = sizeof(struct ktr_csw), 121 [KTR_USER] = 0, 122 [KTR_STRUCT] = 0, 123 [KTR_SYSCTL] = 0, 124 [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), 125 [KTR_PROCDTOR] = 0, 126 [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), 127 [KTR_FAULT] = sizeof(struct ktr_fault), 128 [KTR_FAULTEND] = sizeof(struct ktr_faultend), 129 [KTR_STRUCT_ARRAY] = sizeof(struct ktr_struct_array), 130 }; 131 132 static STAILQ_HEAD(, ktr_request) ktr_free; 133 134 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 135 136 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 137 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 138 139 u_int ktr_geniosize = PAGE_SIZE; 140 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 141 0, "Maximum size of genio event payload"); 142 143 static int print_message = 1; 144 static struct mtx ktrace_mtx; 145 static struct sx ktrace_sx; 146 147 static void ktrace_init(void *dummy); 148 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 149 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 150 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 151 static struct ktr_request *ktr_getrequest(int type); 152 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 153 static void ktr_freeproc(struct proc *p, struct ucred **uc, 154 struct vnode **vp); 155 static void ktr_freerequest(struct ktr_request *req); 156 static void ktr_freerequest_locked(struct ktr_request *req); 157 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 158 static int ktrcanset(struct thread *,struct proc *); 159 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 160 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 161 static void ktrprocctor_entered(struct thread *, struct proc *); 162 163 /* 164 * ktrace itself generates events, such as context switches, which we do not 165 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 166 * whether or not it is in a region where tracing of events should be 167 * suppressed. 168 */ 169 static void 170 ktrace_enter(struct thread *td) 171 { 172 173 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 174 td->td_pflags |= TDP_INKTRACE; 175 } 176 177 static void 178 ktrace_exit(struct thread *td) 179 { 180 181 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 182 td->td_pflags &= ~TDP_INKTRACE; 183 } 184 185 static void 186 ktrace_assert(struct thread *td) 187 { 188 189 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 190 } 191 192 static void 193 ktrace_init(void *dummy) 194 { 195 struct ktr_request *req; 196 int i; 197 198 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 199 sx_init(&ktrace_sx, "ktrace_sx"); 200 STAILQ_INIT(&ktr_free); 201 for (i = 0; i < ktr_requestpool; i++) { 202 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 203 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 204 } 205 } 206 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 207 208 static int 209 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 210 { 211 struct thread *td; 212 u_int newsize, oldsize, wantsize; 213 int error; 214 215 /* Handle easy read-only case first to avoid warnings from GCC. */ 216 if (!req->newptr) { 217 oldsize = ktr_requestpool; 218 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 219 } 220 221 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 222 if (error) 223 return (error); 224 td = curthread; 225 ktrace_enter(td); 226 oldsize = ktr_requestpool; 227 newsize = ktrace_resize_pool(oldsize, wantsize); 228 ktrace_exit(td); 229 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 230 if (error) 231 return (error); 232 if (wantsize > oldsize && newsize < wantsize) 233 return (ENOSPC); 234 return (0); 235 } 236 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 237 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 238 "Pool buffer size for ktrace(1)"); 239 240 static u_int 241 ktrace_resize_pool(u_int oldsize, u_int newsize) 242 { 243 STAILQ_HEAD(, ktr_request) ktr_new; 244 struct ktr_request *req; 245 int bound; 246 247 print_message = 1; 248 bound = newsize - oldsize; 249 if (bound == 0) 250 return (ktr_requestpool); 251 if (bound < 0) { 252 mtx_lock(&ktrace_mtx); 253 /* Shrink pool down to newsize if possible. */ 254 while (bound++ < 0) { 255 req = STAILQ_FIRST(&ktr_free); 256 if (req == NULL) 257 break; 258 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 259 ktr_requestpool--; 260 free(req, M_KTRACE); 261 } 262 } else { 263 /* Grow pool up to newsize. */ 264 STAILQ_INIT(&ktr_new); 265 while (bound-- > 0) { 266 req = malloc(sizeof(struct ktr_request), M_KTRACE, 267 M_WAITOK); 268 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 269 } 270 mtx_lock(&ktrace_mtx); 271 STAILQ_CONCAT(&ktr_free, &ktr_new); 272 ktr_requestpool += (newsize - oldsize); 273 } 274 mtx_unlock(&ktrace_mtx); 275 return (ktr_requestpool); 276 } 277 278 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 279 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 280 (sizeof((struct thread *)NULL)->td_name)); 281 282 static struct ktr_request * 283 ktr_getrequest_entered(struct thread *td, int type) 284 { 285 struct ktr_request *req; 286 struct proc *p = td->td_proc; 287 int pm; 288 289 mtx_lock(&ktrace_mtx); 290 if (!KTRCHECK(td, type)) { 291 mtx_unlock(&ktrace_mtx); 292 return (NULL); 293 } 294 req = STAILQ_FIRST(&ktr_free); 295 if (req != NULL) { 296 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 297 req->ktr_header.ktr_type = type; 298 if (p->p_traceflag & KTRFAC_DROP) { 299 req->ktr_header.ktr_type |= KTR_DROP; 300 p->p_traceflag &= ~KTRFAC_DROP; 301 } 302 mtx_unlock(&ktrace_mtx); 303 microtime(&req->ktr_header.ktr_time); 304 req->ktr_header.ktr_pid = p->p_pid; 305 req->ktr_header.ktr_tid = td->td_tid; 306 bcopy(td->td_name, req->ktr_header.ktr_comm, 307 sizeof(req->ktr_header.ktr_comm)); 308 req->ktr_buffer = NULL; 309 req->ktr_header.ktr_len = 0; 310 } else { 311 p->p_traceflag |= KTRFAC_DROP; 312 pm = print_message; 313 print_message = 0; 314 mtx_unlock(&ktrace_mtx); 315 if (pm) 316 printf("Out of ktrace request objects.\n"); 317 } 318 return (req); 319 } 320 321 static struct ktr_request * 322 ktr_getrequest(int type) 323 { 324 struct thread *td = curthread; 325 struct ktr_request *req; 326 327 ktrace_enter(td); 328 req = ktr_getrequest_entered(td, type); 329 if (req == NULL) 330 ktrace_exit(td); 331 332 return (req); 333 } 334 335 /* 336 * Some trace generation environments don't permit direct access to VFS, 337 * such as during a context switch where sleeping is not allowed. Under these 338 * circumstances, queue a request to the thread to be written asynchronously 339 * later. 340 */ 341 static void 342 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 343 { 344 345 mtx_lock(&ktrace_mtx); 346 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 347 mtx_unlock(&ktrace_mtx); 348 } 349 350 /* 351 * Drain any pending ktrace records from the per-thread queue to disk. This 352 * is used both internally before committing other records, and also on 353 * system call return. We drain all the ones we can find at the time when 354 * drain is requested, but don't keep draining after that as those events 355 * may be approximately "after" the current event. 356 */ 357 static void 358 ktr_drain(struct thread *td) 359 { 360 struct ktr_request *queued_req; 361 STAILQ_HEAD(, ktr_request) local_queue; 362 363 ktrace_assert(td); 364 sx_assert(&ktrace_sx, SX_XLOCKED); 365 366 STAILQ_INIT(&local_queue); 367 368 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 369 mtx_lock(&ktrace_mtx); 370 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 371 mtx_unlock(&ktrace_mtx); 372 373 while ((queued_req = STAILQ_FIRST(&local_queue))) { 374 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 375 ktr_writerequest(td, queued_req); 376 ktr_freerequest(queued_req); 377 } 378 } 379 } 380 381 /* 382 * Submit a trace record for immediate commit to disk -- to be used only 383 * where entering VFS is OK. First drain any pending records that may have 384 * been cached in the thread. 385 */ 386 static void 387 ktr_submitrequest(struct thread *td, struct ktr_request *req) 388 { 389 390 ktrace_assert(td); 391 392 sx_xlock(&ktrace_sx); 393 ktr_drain(td); 394 ktr_writerequest(td, req); 395 ktr_freerequest(req); 396 sx_xunlock(&ktrace_sx); 397 ktrace_exit(td); 398 } 399 400 static void 401 ktr_freerequest(struct ktr_request *req) 402 { 403 404 mtx_lock(&ktrace_mtx); 405 ktr_freerequest_locked(req); 406 mtx_unlock(&ktrace_mtx); 407 } 408 409 static void 410 ktr_freerequest_locked(struct ktr_request *req) 411 { 412 413 mtx_assert(&ktrace_mtx, MA_OWNED); 414 if (req->ktr_buffer != NULL) 415 free(req->ktr_buffer, M_KTRACE); 416 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 417 } 418 419 /* 420 * Disable tracing for a process and release all associated resources. 421 * The caller is responsible for releasing a reference on the returned 422 * vnode and credentials. 423 */ 424 static void 425 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 426 { 427 struct ktr_request *req; 428 429 PROC_LOCK_ASSERT(p, MA_OWNED); 430 mtx_assert(&ktrace_mtx, MA_OWNED); 431 *uc = p->p_tracecred; 432 p->p_tracecred = NULL; 433 if (vp != NULL) 434 *vp = p->p_tracevp; 435 p->p_tracevp = NULL; 436 p->p_traceflag = 0; 437 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 438 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 439 ktr_freerequest_locked(req); 440 } 441 } 442 443 void 444 ktrsyscall(int code, int narg, register_t args[]) 445 { 446 struct ktr_request *req; 447 struct ktr_syscall *ktp; 448 size_t buflen; 449 char *buf = NULL; 450 451 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 452 return; 453 454 buflen = sizeof(register_t) * narg; 455 if (buflen > 0) { 456 buf = malloc(buflen, M_KTRACE, M_WAITOK); 457 bcopy(args, buf, buflen); 458 } 459 req = ktr_getrequest(KTR_SYSCALL); 460 if (req == NULL) { 461 if (buf != NULL) 462 free(buf, M_KTRACE); 463 return; 464 } 465 ktp = &req->ktr_data.ktr_syscall; 466 ktp->ktr_code = code; 467 ktp->ktr_narg = narg; 468 if (buflen > 0) { 469 req->ktr_header.ktr_len = buflen; 470 req->ktr_buffer = buf; 471 } 472 ktr_submitrequest(curthread, req); 473 } 474 475 void 476 ktrsysret(int code, int error, register_t retval) 477 { 478 struct ktr_request *req; 479 struct ktr_sysret *ktp; 480 481 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 482 return; 483 484 req = ktr_getrequest(KTR_SYSRET); 485 if (req == NULL) 486 return; 487 ktp = &req->ktr_data.ktr_sysret; 488 ktp->ktr_code = code; 489 ktp->ktr_error = error; 490 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 491 ktr_submitrequest(curthread, req); 492 } 493 494 /* 495 * When a setuid process execs, disable tracing. 496 * 497 * XXX: We toss any pending asynchronous records. 498 */ 499 void 500 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 501 { 502 503 PROC_LOCK_ASSERT(p, MA_OWNED); 504 mtx_lock(&ktrace_mtx); 505 ktr_freeproc(p, uc, vp); 506 mtx_unlock(&ktrace_mtx); 507 } 508 509 /* 510 * When a process exits, drain per-process asynchronous trace records 511 * and disable tracing. 512 */ 513 void 514 ktrprocexit(struct thread *td) 515 { 516 struct ktr_request *req; 517 struct proc *p; 518 struct ucred *cred; 519 struct vnode *vp; 520 521 p = td->td_proc; 522 if (p->p_traceflag == 0) 523 return; 524 525 ktrace_enter(td); 526 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 527 if (req != NULL) 528 ktr_enqueuerequest(td, req); 529 sx_xlock(&ktrace_sx); 530 ktr_drain(td); 531 sx_xunlock(&ktrace_sx); 532 PROC_LOCK(p); 533 mtx_lock(&ktrace_mtx); 534 ktr_freeproc(p, &cred, &vp); 535 mtx_unlock(&ktrace_mtx); 536 PROC_UNLOCK(p); 537 if (vp != NULL) 538 vrele(vp); 539 if (cred != NULL) 540 crfree(cred); 541 ktrace_exit(td); 542 } 543 544 static void 545 ktrprocctor_entered(struct thread *td, struct proc *p) 546 { 547 struct ktr_proc_ctor *ktp; 548 struct ktr_request *req; 549 struct thread *td2; 550 551 ktrace_assert(td); 552 td2 = FIRST_THREAD_IN_PROC(p); 553 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 554 if (req == NULL) 555 return; 556 ktp = &req->ktr_data.ktr_proc_ctor; 557 ktp->sv_flags = p->p_sysent->sv_flags; 558 ktr_enqueuerequest(td2, req); 559 } 560 561 void 562 ktrprocctor(struct proc *p) 563 { 564 struct thread *td = curthread; 565 566 if ((p->p_traceflag & KTRFAC_MASK) == 0) 567 return; 568 569 ktrace_enter(td); 570 ktrprocctor_entered(td, p); 571 ktrace_exit(td); 572 } 573 574 /* 575 * When a process forks, enable tracing in the new process if needed. 576 */ 577 void 578 ktrprocfork(struct proc *p1, struct proc *p2) 579 { 580 581 MPASS(p2->p_tracevp == NULL); 582 MPASS(p2->p_traceflag == 0); 583 584 if (p1->p_traceflag == 0) 585 return; 586 587 PROC_LOCK(p1); 588 mtx_lock(&ktrace_mtx); 589 if (p1->p_traceflag & KTRFAC_INHERIT) { 590 p2->p_traceflag = p1->p_traceflag; 591 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 592 VREF(p2->p_tracevp); 593 KASSERT(p1->p_tracecred != NULL, 594 ("ktrace vnode with no cred")); 595 p2->p_tracecred = crhold(p1->p_tracecred); 596 } 597 } 598 mtx_unlock(&ktrace_mtx); 599 PROC_UNLOCK(p1); 600 601 ktrprocctor(p2); 602 } 603 604 /* 605 * When a thread returns, drain any asynchronous records generated by the 606 * system call. 607 */ 608 void 609 ktruserret(struct thread *td) 610 { 611 612 ktrace_enter(td); 613 sx_xlock(&ktrace_sx); 614 ktr_drain(td); 615 sx_xunlock(&ktrace_sx); 616 ktrace_exit(td); 617 } 618 619 void 620 ktrnamei(path) 621 char *path; 622 { 623 struct ktr_request *req; 624 int namelen; 625 char *buf = NULL; 626 627 namelen = strlen(path); 628 if (namelen > 0) { 629 buf = malloc(namelen, M_KTRACE, M_WAITOK); 630 bcopy(path, buf, namelen); 631 } 632 req = ktr_getrequest(KTR_NAMEI); 633 if (req == NULL) { 634 if (buf != NULL) 635 free(buf, M_KTRACE); 636 return; 637 } 638 if (namelen > 0) { 639 req->ktr_header.ktr_len = namelen; 640 req->ktr_buffer = buf; 641 } 642 ktr_submitrequest(curthread, req); 643 } 644 645 void 646 ktrsysctl(int *name, u_int namelen) 647 { 648 struct ktr_request *req; 649 u_int mib[CTL_MAXNAME + 2]; 650 char *mibname; 651 size_t mibnamelen; 652 int error; 653 654 /* Lookup name of mib. */ 655 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 656 mib[0] = 0; 657 mib[1] = 1; 658 bcopy(name, mib + 2, namelen * sizeof(*name)); 659 mibnamelen = 128; 660 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 661 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 662 NULL, 0, &mibnamelen, 0); 663 if (error) { 664 free(mibname, M_KTRACE); 665 return; 666 } 667 req = ktr_getrequest(KTR_SYSCTL); 668 if (req == NULL) { 669 free(mibname, M_KTRACE); 670 return; 671 } 672 req->ktr_header.ktr_len = mibnamelen; 673 req->ktr_buffer = mibname; 674 ktr_submitrequest(curthread, req); 675 } 676 677 void 678 ktrgenio(int fd, enum uio_rw rw, struct uio *uio, int error) 679 { 680 struct ktr_request *req; 681 struct ktr_genio *ktg; 682 int datalen; 683 char *buf; 684 685 if (error) { 686 free(uio, M_IOV); 687 return; 688 } 689 uio->uio_offset = 0; 690 uio->uio_rw = UIO_WRITE; 691 datalen = MIN(uio->uio_resid, ktr_geniosize); 692 buf = malloc(datalen, M_KTRACE, M_WAITOK); 693 error = uiomove(buf, datalen, uio); 694 free(uio, M_IOV); 695 if (error) { 696 free(buf, M_KTRACE); 697 return; 698 } 699 req = ktr_getrequest(KTR_GENIO); 700 if (req == NULL) { 701 free(buf, M_KTRACE); 702 return; 703 } 704 ktg = &req->ktr_data.ktr_genio; 705 ktg->ktr_fd = fd; 706 ktg->ktr_rw = rw; 707 req->ktr_header.ktr_len = datalen; 708 req->ktr_buffer = buf; 709 ktr_submitrequest(curthread, req); 710 } 711 712 void 713 ktrpsig(int sig, sig_t action, sigset_t *mask, int code) 714 { 715 struct thread *td = curthread; 716 struct ktr_request *req; 717 struct ktr_psig *kp; 718 719 req = ktr_getrequest(KTR_PSIG); 720 if (req == NULL) 721 return; 722 kp = &req->ktr_data.ktr_psig; 723 kp->signo = (char)sig; 724 kp->action = action; 725 kp->mask = *mask; 726 kp->code = code; 727 ktr_enqueuerequest(td, req); 728 ktrace_exit(td); 729 } 730 731 void 732 ktrcsw(int out, int user, const char *wmesg) 733 { 734 struct thread *td = curthread; 735 struct ktr_request *req; 736 struct ktr_csw *kc; 737 738 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 739 return; 740 741 req = ktr_getrequest(KTR_CSW); 742 if (req == NULL) 743 return; 744 kc = &req->ktr_data.ktr_csw; 745 kc->out = out; 746 kc->user = user; 747 if (wmesg != NULL) 748 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 749 else 750 bzero(kc->wmesg, sizeof(kc->wmesg)); 751 ktr_enqueuerequest(td, req); 752 ktrace_exit(td); 753 } 754 755 void 756 ktrstruct(const char *name, const void *data, size_t datalen) 757 { 758 struct ktr_request *req; 759 char *buf; 760 size_t buflen, namelen; 761 762 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 763 return; 764 765 if (data == NULL) 766 datalen = 0; 767 namelen = strlen(name) + 1; 768 buflen = namelen + datalen; 769 buf = malloc(buflen, M_KTRACE, M_WAITOK); 770 strcpy(buf, name); 771 bcopy(data, buf + namelen, datalen); 772 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 773 free(buf, M_KTRACE); 774 return; 775 } 776 req->ktr_buffer = buf; 777 req->ktr_header.ktr_len = buflen; 778 ktr_submitrequest(curthread, req); 779 } 780 781 void 782 ktrstruct_error(const char *name, const void *data, size_t datalen, int error) 783 { 784 785 if (error == 0) 786 ktrstruct(name, data, datalen); 787 } 788 789 void 790 ktrstructarray(const char *name, enum uio_seg seg, const void *data, 791 int num_items, size_t struct_size) 792 { 793 struct ktr_request *req; 794 struct ktr_struct_array *ksa; 795 char *buf; 796 size_t buflen, datalen, namelen; 797 int max_items; 798 799 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 800 return; 801 802 /* Trim array length to genio size. */ 803 max_items = ktr_geniosize / struct_size; 804 if (num_items > max_items) { 805 if (max_items == 0) 806 num_items = 1; 807 else 808 num_items = max_items; 809 } 810 datalen = num_items * struct_size; 811 812 if (data == NULL) 813 datalen = 0; 814 815 namelen = strlen(name) + 1; 816 buflen = namelen + datalen; 817 buf = malloc(buflen, M_KTRACE, M_WAITOK); 818 strcpy(buf, name); 819 if (seg == UIO_SYSSPACE) 820 bcopy(data, buf + namelen, datalen); 821 else { 822 if (copyin(data, buf + namelen, datalen) != 0) { 823 free(buf, M_KTRACE); 824 return; 825 } 826 } 827 if ((req = ktr_getrequest(KTR_STRUCT_ARRAY)) == NULL) { 828 free(buf, M_KTRACE); 829 return; 830 } 831 ksa = &req->ktr_data.ktr_struct_array; 832 ksa->struct_size = struct_size; 833 req->ktr_buffer = buf; 834 req->ktr_header.ktr_len = buflen; 835 ktr_submitrequest(curthread, req); 836 } 837 838 void 839 ktrcapfail(enum ktr_cap_fail_type type, const cap_rights_t *needed, 840 const cap_rights_t *held) 841 { 842 struct thread *td = curthread; 843 struct ktr_request *req; 844 struct ktr_cap_fail *kcf; 845 846 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 847 return; 848 849 req = ktr_getrequest(KTR_CAPFAIL); 850 if (req == NULL) 851 return; 852 kcf = &req->ktr_data.ktr_cap_fail; 853 kcf->cap_type = type; 854 if (needed != NULL) 855 kcf->cap_needed = *needed; 856 else 857 cap_rights_init(&kcf->cap_needed); 858 if (held != NULL) 859 kcf->cap_held = *held; 860 else 861 cap_rights_init(&kcf->cap_held); 862 ktr_enqueuerequest(td, req); 863 ktrace_exit(td); 864 } 865 866 void 867 ktrfault(vm_offset_t vaddr, int type) 868 { 869 struct thread *td = curthread; 870 struct ktr_request *req; 871 struct ktr_fault *kf; 872 873 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 874 return; 875 876 req = ktr_getrequest(KTR_FAULT); 877 if (req == NULL) 878 return; 879 kf = &req->ktr_data.ktr_fault; 880 kf->vaddr = vaddr; 881 kf->type = type; 882 ktr_enqueuerequest(td, req); 883 ktrace_exit(td); 884 } 885 886 void 887 ktrfaultend(int result) 888 { 889 struct thread *td = curthread; 890 struct ktr_request *req; 891 struct ktr_faultend *kf; 892 893 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 894 return; 895 896 req = ktr_getrequest(KTR_FAULTEND); 897 if (req == NULL) 898 return; 899 kf = &req->ktr_data.ktr_faultend; 900 kf->result = result; 901 ktr_enqueuerequest(td, req); 902 ktrace_exit(td); 903 } 904 #endif /* KTRACE */ 905 906 /* Interface and common routines */ 907 908 #ifndef _SYS_SYSPROTO_H_ 909 struct ktrace_args { 910 char *fname; 911 int ops; 912 int facs; 913 int pid; 914 }; 915 #endif 916 /* ARGSUSED */ 917 int 918 sys_ktrace(struct thread *td, struct ktrace_args *uap) 919 { 920 #ifdef KTRACE 921 struct vnode *vp = NULL; 922 struct proc *p; 923 struct pgrp *pg; 924 int facs = uap->facs & ~KTRFAC_ROOT; 925 int ops = KTROP(uap->ops); 926 int descend = uap->ops & KTRFLAG_DESCEND; 927 int nfound, ret = 0; 928 int flags, error = 0; 929 struct nameidata nd; 930 struct ucred *cred; 931 932 /* 933 * Need something to (un)trace. 934 */ 935 if (ops != KTROP_CLEARFILE && facs == 0) 936 return (EINVAL); 937 938 ktrace_enter(td); 939 if (ops != KTROP_CLEAR) { 940 /* 941 * an operation which requires a file argument. 942 */ 943 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 944 flags = FREAD | FWRITE | O_NOFOLLOW; 945 error = vn_open(&nd, &flags, 0, NULL); 946 if (error) { 947 ktrace_exit(td); 948 return (error); 949 } 950 NDFREE(&nd, NDF_ONLY_PNBUF); 951 vp = nd.ni_vp; 952 VOP_UNLOCK(vp); 953 if (vp->v_type != VREG) { 954 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 955 ktrace_exit(td); 956 return (EACCES); 957 } 958 } 959 /* 960 * Clear all uses of the tracefile. 961 */ 962 if (ops == KTROP_CLEARFILE) { 963 int vrele_count; 964 965 vrele_count = 0; 966 sx_slock(&allproc_lock); 967 FOREACH_PROC_IN_SYSTEM(p) { 968 PROC_LOCK(p); 969 if (p->p_tracevp == vp) { 970 if (ktrcanset(td, p)) { 971 mtx_lock(&ktrace_mtx); 972 ktr_freeproc(p, &cred, NULL); 973 mtx_unlock(&ktrace_mtx); 974 vrele_count++; 975 crfree(cred); 976 } else 977 error = EPERM; 978 } 979 PROC_UNLOCK(p); 980 } 981 sx_sunlock(&allproc_lock); 982 if (vrele_count > 0) { 983 while (vrele_count-- > 0) 984 vrele(vp); 985 } 986 goto done; 987 } 988 /* 989 * do it 990 */ 991 sx_slock(&proctree_lock); 992 if (uap->pid < 0) { 993 /* 994 * by process group 995 */ 996 pg = pgfind(-uap->pid); 997 if (pg == NULL) { 998 sx_sunlock(&proctree_lock); 999 error = ESRCH; 1000 goto done; 1001 } 1002 /* 1003 * ktrops() may call vrele(). Lock pg_members 1004 * by the proctree_lock rather than pg_mtx. 1005 */ 1006 PGRP_UNLOCK(pg); 1007 nfound = 0; 1008 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1009 PROC_LOCK(p); 1010 if (p->p_state == PRS_NEW || 1011 p_cansee(td, p) != 0) { 1012 PROC_UNLOCK(p); 1013 continue; 1014 } 1015 nfound++; 1016 if (descend) 1017 ret |= ktrsetchildren(td, p, ops, facs, vp); 1018 else 1019 ret |= ktrops(td, p, ops, facs, vp); 1020 } 1021 if (nfound == 0) { 1022 sx_sunlock(&proctree_lock); 1023 error = ESRCH; 1024 goto done; 1025 } 1026 } else { 1027 /* 1028 * by pid 1029 */ 1030 p = pfind(uap->pid); 1031 if (p == NULL) 1032 error = ESRCH; 1033 else 1034 error = p_cansee(td, p); 1035 if (error) { 1036 if (p != NULL) 1037 PROC_UNLOCK(p); 1038 sx_sunlock(&proctree_lock); 1039 goto done; 1040 } 1041 if (descend) 1042 ret |= ktrsetchildren(td, p, ops, facs, vp); 1043 else 1044 ret |= ktrops(td, p, ops, facs, vp); 1045 } 1046 sx_sunlock(&proctree_lock); 1047 if (!ret) 1048 error = EPERM; 1049 done: 1050 if (vp != NULL) 1051 (void) vn_close(vp, FWRITE, td->td_ucred, td); 1052 ktrace_exit(td); 1053 return (error); 1054 #else /* !KTRACE */ 1055 return (ENOSYS); 1056 #endif /* KTRACE */ 1057 } 1058 1059 /* ARGSUSED */ 1060 int 1061 sys_utrace(struct thread *td, struct utrace_args *uap) 1062 { 1063 1064 #ifdef KTRACE 1065 struct ktr_request *req; 1066 void *cp; 1067 int error; 1068 1069 if (!KTRPOINT(td, KTR_USER)) 1070 return (0); 1071 if (uap->len > KTR_USER_MAXLEN) 1072 return (EINVAL); 1073 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1074 error = copyin(uap->addr, cp, uap->len); 1075 if (error) { 1076 free(cp, M_KTRACE); 1077 return (error); 1078 } 1079 req = ktr_getrequest(KTR_USER); 1080 if (req == NULL) { 1081 free(cp, M_KTRACE); 1082 return (ENOMEM); 1083 } 1084 req->ktr_buffer = cp; 1085 req->ktr_header.ktr_len = uap->len; 1086 ktr_submitrequest(td, req); 1087 return (0); 1088 #else /* !KTRACE */ 1089 return (ENOSYS); 1090 #endif /* KTRACE */ 1091 } 1092 1093 #ifdef KTRACE 1094 static int 1095 ktrops(struct thread *td, struct proc *p, int ops, int facs, struct vnode *vp) 1096 { 1097 struct vnode *tracevp = NULL; 1098 struct ucred *tracecred = NULL; 1099 1100 PROC_LOCK_ASSERT(p, MA_OWNED); 1101 if (!ktrcanset(td, p)) { 1102 PROC_UNLOCK(p); 1103 return (0); 1104 } 1105 if (p->p_flag & P_WEXIT) { 1106 /* If the process is exiting, just ignore it. */ 1107 PROC_UNLOCK(p); 1108 return (1); 1109 } 1110 mtx_lock(&ktrace_mtx); 1111 if (ops == KTROP_SET) { 1112 if (p->p_tracevp != vp) { 1113 /* 1114 * if trace file already in use, relinquish below 1115 */ 1116 tracevp = p->p_tracevp; 1117 VREF(vp); 1118 p->p_tracevp = vp; 1119 } 1120 if (p->p_tracecred != td->td_ucred) { 1121 tracecred = p->p_tracecred; 1122 p->p_tracecred = crhold(td->td_ucred); 1123 } 1124 p->p_traceflag |= facs; 1125 if (priv_check(td, PRIV_KTRACE) == 0) 1126 p->p_traceflag |= KTRFAC_ROOT; 1127 } else { 1128 /* KTROP_CLEAR */ 1129 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1130 /* no more tracing */ 1131 ktr_freeproc(p, &tracecred, &tracevp); 1132 } 1133 mtx_unlock(&ktrace_mtx); 1134 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1135 ktrprocctor_entered(td, p); 1136 PROC_UNLOCK(p); 1137 if (tracevp != NULL) 1138 vrele(tracevp); 1139 if (tracecred != NULL) 1140 crfree(tracecred); 1141 1142 return (1); 1143 } 1144 1145 static int 1146 ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, 1147 struct vnode *vp) 1148 { 1149 struct proc *p; 1150 int ret = 0; 1151 1152 p = top; 1153 PROC_LOCK_ASSERT(p, MA_OWNED); 1154 sx_assert(&proctree_lock, SX_LOCKED); 1155 for (;;) { 1156 ret |= ktrops(td, p, ops, facs, vp); 1157 /* 1158 * If this process has children, descend to them next, 1159 * otherwise do any siblings, and if done with this level, 1160 * follow back up the tree (but not past top). 1161 */ 1162 if (!LIST_EMPTY(&p->p_children)) 1163 p = LIST_FIRST(&p->p_children); 1164 else for (;;) { 1165 if (p == top) 1166 return (ret); 1167 if (LIST_NEXT(p, p_sibling)) { 1168 p = LIST_NEXT(p, p_sibling); 1169 break; 1170 } 1171 p = p->p_pptr; 1172 } 1173 PROC_LOCK(p); 1174 } 1175 /*NOTREACHED*/ 1176 } 1177 1178 static void 1179 ktr_writerequest(struct thread *td, struct ktr_request *req) 1180 { 1181 struct ktr_header *kth; 1182 struct vnode *vp; 1183 struct proc *p; 1184 struct ucred *cred; 1185 struct uio auio; 1186 struct iovec aiov[3]; 1187 struct mount *mp; 1188 int datalen, buflen, vrele_count; 1189 int error; 1190 1191 /* 1192 * We hold the vnode and credential for use in I/O in case ktrace is 1193 * disabled on the process as we write out the request. 1194 * 1195 * XXXRW: This is not ideal: we could end up performing a write after 1196 * the vnode has been closed. 1197 */ 1198 mtx_lock(&ktrace_mtx); 1199 vp = td->td_proc->p_tracevp; 1200 cred = td->td_proc->p_tracecred; 1201 1202 /* 1203 * If vp is NULL, the vp has been cleared out from under this 1204 * request, so just drop it. Make sure the credential and vnode are 1205 * in sync: we should have both or neither. 1206 */ 1207 if (vp == NULL) { 1208 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1209 mtx_unlock(&ktrace_mtx); 1210 return; 1211 } 1212 VREF(vp); 1213 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1214 crhold(cred); 1215 mtx_unlock(&ktrace_mtx); 1216 1217 kth = &req->ktr_header; 1218 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < nitems(data_lengths), 1219 ("data_lengths array overflow")); 1220 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1221 buflen = kth->ktr_len; 1222 auio.uio_iov = &aiov[0]; 1223 auio.uio_offset = 0; 1224 auio.uio_segflg = UIO_SYSSPACE; 1225 auio.uio_rw = UIO_WRITE; 1226 aiov[0].iov_base = (caddr_t)kth; 1227 aiov[0].iov_len = sizeof(struct ktr_header); 1228 auio.uio_resid = sizeof(struct ktr_header); 1229 auio.uio_iovcnt = 1; 1230 auio.uio_td = td; 1231 if (datalen != 0) { 1232 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1233 aiov[1].iov_len = datalen; 1234 auio.uio_resid += datalen; 1235 auio.uio_iovcnt++; 1236 kth->ktr_len += datalen; 1237 } 1238 if (buflen != 0) { 1239 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1240 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1241 aiov[auio.uio_iovcnt].iov_len = buflen; 1242 auio.uio_resid += buflen; 1243 auio.uio_iovcnt++; 1244 } 1245 1246 vn_start_write(vp, &mp, V_WAIT); 1247 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1248 #ifdef MAC 1249 error = mac_vnode_check_write(cred, NOCRED, vp); 1250 if (error == 0) 1251 #endif 1252 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1253 VOP_UNLOCK(vp); 1254 vn_finished_write(mp); 1255 crfree(cred); 1256 if (!error) { 1257 vrele(vp); 1258 return; 1259 } 1260 1261 /* 1262 * If error encountered, give up tracing on this vnode. We defer 1263 * all the vrele()'s on the vnode until after we are finished walking 1264 * the various lists to avoid needlessly holding locks. 1265 * NB: at this point we still hold the vnode reference that must 1266 * not go away as we need the valid vnode to compare with. Thus let 1267 * vrele_count start at 1 and the reference will be freed 1268 * by the loop at the end after our last use of vp. 1269 */ 1270 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1271 error); 1272 vrele_count = 1; 1273 /* 1274 * First, clear this vnode from being used by any processes in the 1275 * system. 1276 * XXX - If one process gets an EPERM writing to the vnode, should 1277 * we really do this? Other processes might have suitable 1278 * credentials for the operation. 1279 */ 1280 cred = NULL; 1281 sx_slock(&allproc_lock); 1282 FOREACH_PROC_IN_SYSTEM(p) { 1283 PROC_LOCK(p); 1284 if (p->p_tracevp == vp) { 1285 mtx_lock(&ktrace_mtx); 1286 ktr_freeproc(p, &cred, NULL); 1287 mtx_unlock(&ktrace_mtx); 1288 vrele_count++; 1289 } 1290 PROC_UNLOCK(p); 1291 if (cred != NULL) { 1292 crfree(cred); 1293 cred = NULL; 1294 } 1295 } 1296 sx_sunlock(&allproc_lock); 1297 1298 while (vrele_count-- > 0) 1299 vrele(vp); 1300 } 1301 1302 /* 1303 * Return true if caller has permission to set the ktracing state 1304 * of target. Essentially, the target can't possess any 1305 * more permissions than the caller. KTRFAC_ROOT signifies that 1306 * root previously set the tracing status on the target process, and 1307 * so, only root may further change it. 1308 */ 1309 static int 1310 ktrcanset(struct thread *td, struct proc *targetp) 1311 { 1312 1313 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1314 if (targetp->p_traceflag & KTRFAC_ROOT && 1315 priv_check(td, PRIV_KTRACE)) 1316 return (0); 1317 1318 if (p_candebug(td, targetp) != 0) 1319 return (0); 1320 1321 return (1); 1322 } 1323 1324 #endif /* KTRACE */ 1325