1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. 6 * Copyright (c) 2005 Robert N. M. Watson 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/capsicum.h> 43 #include <sys/systm.h> 44 #include <sys/fcntl.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/unistd.h> 55 #include <sys/vnode.h> 56 #include <sys/socket.h> 57 #include <sys/stat.h> 58 #include <sys/ktrace.h> 59 #include <sys/sx.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/syslog.h> 63 #include <sys/sysproto.h> 64 65 #include <security/mac/mac_framework.h> 66 67 /* 68 * The ktrace facility allows the tracing of certain key events in user space 69 * processes, such as system calls, signal delivery, context switches, and 70 * user generated events using utrace(2). It works by streaming event 71 * records and data to a vnode associated with the process using the 72 * ktrace(2) system call. In general, records can be written directly from 73 * the context that generates the event. One important exception to this is 74 * during a context switch, where sleeping is not permitted. To handle this 75 * case, trace events are generated using in-kernel ktr_request records, and 76 * then delivered to disk at a convenient moment -- either immediately, the 77 * next traceable event, at system call return, or at process exit. 78 * 79 * When dealing with multiple threads or processes writing to the same event 80 * log, ordering guarantees are weak: specifically, if an event has multiple 81 * records (i.e., system call enter and return), they may be interlaced with 82 * records from another event. Process and thread ID information is provided 83 * in the record, and user applications can de-interlace events if required. 84 */ 85 86 static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 87 88 #ifdef KTRACE 89 90 FEATURE(ktrace, "Kernel support for system-call tracing"); 91 92 #ifndef KTRACE_REQUEST_POOL 93 #define KTRACE_REQUEST_POOL 100 94 #endif 95 96 struct ktr_request { 97 struct ktr_header ktr_header; 98 void *ktr_buffer; 99 union { 100 struct ktr_proc_ctor ktr_proc_ctor; 101 struct ktr_cap_fail ktr_cap_fail; 102 struct ktr_syscall ktr_syscall; 103 struct ktr_sysret ktr_sysret; 104 struct ktr_genio ktr_genio; 105 struct ktr_psig ktr_psig; 106 struct ktr_csw ktr_csw; 107 struct ktr_fault ktr_fault; 108 struct ktr_faultend ktr_faultend; 109 struct ktr_struct_array ktr_struct_array; 110 } ktr_data; 111 STAILQ_ENTRY(ktr_request) ktr_list; 112 }; 113 114 static int data_lengths[] = { 115 [KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args), 116 [KTR_SYSRET] = sizeof(struct ktr_sysret), 117 [KTR_NAMEI] = 0, 118 [KTR_GENIO] = sizeof(struct ktr_genio), 119 [KTR_PSIG] = sizeof(struct ktr_psig), 120 [KTR_CSW] = sizeof(struct ktr_csw), 121 [KTR_USER] = 0, 122 [KTR_STRUCT] = 0, 123 [KTR_SYSCTL] = 0, 124 [KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor), 125 [KTR_PROCDTOR] = 0, 126 [KTR_CAPFAIL] = sizeof(struct ktr_cap_fail), 127 [KTR_FAULT] = sizeof(struct ktr_fault), 128 [KTR_FAULTEND] = sizeof(struct ktr_faultend), 129 [KTR_STRUCT_ARRAY] = sizeof(struct ktr_struct_array), 130 }; 131 132 static STAILQ_HEAD(, ktr_request) ktr_free; 133 134 static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 135 "KTRACE options"); 136 137 static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 138 TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 139 140 u_int ktr_geniosize = PAGE_SIZE; 141 SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RWTUN, &ktr_geniosize, 142 0, "Maximum size of genio event payload"); 143 144 static int print_message = 1; 145 static struct mtx ktrace_mtx; 146 static struct sx ktrace_sx; 147 148 static void ktrace_init(void *dummy); 149 static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 150 static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 151 static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 152 static struct ktr_request *ktr_getrequest(int type); 153 static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 154 static void ktr_freeproc(struct proc *p, struct ucred **uc, 155 struct vnode **vp); 156 static void ktr_freerequest(struct ktr_request *req); 157 static void ktr_freerequest_locked(struct ktr_request *req); 158 static void ktr_writerequest(struct thread *td, struct ktr_request *req); 159 static int ktrcanset(struct thread *,struct proc *); 160 static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 161 static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 162 static void ktrprocctor_entered(struct thread *, struct proc *); 163 164 /* 165 * ktrace itself generates events, such as context switches, which we do not 166 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 167 * whether or not it is in a region where tracing of events should be 168 * suppressed. 169 */ 170 static void 171 ktrace_enter(struct thread *td) 172 { 173 174 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 175 td->td_pflags |= TDP_INKTRACE; 176 } 177 178 static void 179 ktrace_exit(struct thread *td) 180 { 181 182 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 183 td->td_pflags &= ~TDP_INKTRACE; 184 } 185 186 static void 187 ktrace_assert(struct thread *td) 188 { 189 190 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 191 } 192 193 static void 194 ktrace_init(void *dummy) 195 { 196 struct ktr_request *req; 197 int i; 198 199 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 200 sx_init(&ktrace_sx, "ktrace_sx"); 201 STAILQ_INIT(&ktr_free); 202 for (i = 0; i < ktr_requestpool; i++) { 203 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 204 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 205 } 206 } 207 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 208 209 static int 210 sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 211 { 212 struct thread *td; 213 u_int newsize, oldsize, wantsize; 214 int error; 215 216 /* Handle easy read-only case first to avoid warnings from GCC. */ 217 if (!req->newptr) { 218 oldsize = ktr_requestpool; 219 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 220 } 221 222 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 223 if (error) 224 return (error); 225 td = curthread; 226 ktrace_enter(td); 227 oldsize = ktr_requestpool; 228 newsize = ktrace_resize_pool(oldsize, wantsize); 229 ktrace_exit(td); 230 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 231 if (error) 232 return (error); 233 if (wantsize > oldsize && newsize < wantsize) 234 return (ENOSPC); 235 return (0); 236 } 237 SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, 238 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ktr_requestpool, 0, 239 sysctl_kern_ktrace_request_pool, "IU", 240 "Pool buffer size for ktrace(1)"); 241 242 static u_int 243 ktrace_resize_pool(u_int oldsize, u_int newsize) 244 { 245 STAILQ_HEAD(, ktr_request) ktr_new; 246 struct ktr_request *req; 247 int bound; 248 249 print_message = 1; 250 bound = newsize - oldsize; 251 if (bound == 0) 252 return (ktr_requestpool); 253 if (bound < 0) { 254 mtx_lock(&ktrace_mtx); 255 /* Shrink pool down to newsize if possible. */ 256 while (bound++ < 0) { 257 req = STAILQ_FIRST(&ktr_free); 258 if (req == NULL) 259 break; 260 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 261 ktr_requestpool--; 262 free(req, M_KTRACE); 263 } 264 } else { 265 /* Grow pool up to newsize. */ 266 STAILQ_INIT(&ktr_new); 267 while (bound-- > 0) { 268 req = malloc(sizeof(struct ktr_request), M_KTRACE, 269 M_WAITOK); 270 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 271 } 272 mtx_lock(&ktrace_mtx); 273 STAILQ_CONCAT(&ktr_free, &ktr_new); 274 ktr_requestpool += (newsize - oldsize); 275 } 276 mtx_unlock(&ktrace_mtx); 277 return (ktr_requestpool); 278 } 279 280 /* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 281 CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 282 (sizeof((struct thread *)NULL)->td_name)); 283 284 static struct ktr_request * 285 ktr_getrequest_entered(struct thread *td, int type) 286 { 287 struct ktr_request *req; 288 struct proc *p = td->td_proc; 289 int pm; 290 291 mtx_lock(&ktrace_mtx); 292 if (!KTRCHECK(td, type)) { 293 mtx_unlock(&ktrace_mtx); 294 return (NULL); 295 } 296 req = STAILQ_FIRST(&ktr_free); 297 if (req != NULL) { 298 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 299 req->ktr_header.ktr_type = type; 300 if (p->p_traceflag & KTRFAC_DROP) { 301 req->ktr_header.ktr_type |= KTR_DROP; 302 p->p_traceflag &= ~KTRFAC_DROP; 303 } 304 mtx_unlock(&ktrace_mtx); 305 microtime(&req->ktr_header.ktr_time); 306 req->ktr_header.ktr_pid = p->p_pid; 307 req->ktr_header.ktr_tid = td->td_tid; 308 bcopy(td->td_name, req->ktr_header.ktr_comm, 309 sizeof(req->ktr_header.ktr_comm)); 310 req->ktr_buffer = NULL; 311 req->ktr_header.ktr_len = 0; 312 } else { 313 p->p_traceflag |= KTRFAC_DROP; 314 pm = print_message; 315 print_message = 0; 316 mtx_unlock(&ktrace_mtx); 317 if (pm) 318 printf("Out of ktrace request objects.\n"); 319 } 320 return (req); 321 } 322 323 static struct ktr_request * 324 ktr_getrequest(int type) 325 { 326 struct thread *td = curthread; 327 struct ktr_request *req; 328 329 ktrace_enter(td); 330 req = ktr_getrequest_entered(td, type); 331 if (req == NULL) 332 ktrace_exit(td); 333 334 return (req); 335 } 336 337 /* 338 * Some trace generation environments don't permit direct access to VFS, 339 * such as during a context switch where sleeping is not allowed. Under these 340 * circumstances, queue a request to the thread to be written asynchronously 341 * later. 342 */ 343 static void 344 ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 345 { 346 347 mtx_lock(&ktrace_mtx); 348 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 349 mtx_unlock(&ktrace_mtx); 350 thread_lock(td); 351 td->td_flags |= TDF_ASTPENDING; 352 thread_unlock(td); 353 } 354 355 /* 356 * Drain any pending ktrace records from the per-thread queue to disk. This 357 * is used both internally before committing other records, and also on 358 * system call return. We drain all the ones we can find at the time when 359 * drain is requested, but don't keep draining after that as those events 360 * may be approximately "after" the current event. 361 */ 362 static void 363 ktr_drain(struct thread *td) 364 { 365 struct ktr_request *queued_req; 366 STAILQ_HEAD(, ktr_request) local_queue; 367 368 ktrace_assert(td); 369 sx_assert(&ktrace_sx, SX_XLOCKED); 370 371 STAILQ_INIT(&local_queue); 372 373 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 374 mtx_lock(&ktrace_mtx); 375 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 376 mtx_unlock(&ktrace_mtx); 377 378 while ((queued_req = STAILQ_FIRST(&local_queue))) { 379 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 380 ktr_writerequest(td, queued_req); 381 ktr_freerequest(queued_req); 382 } 383 } 384 } 385 386 /* 387 * Submit a trace record for immediate commit to disk -- to be used only 388 * where entering VFS is OK. First drain any pending records that may have 389 * been cached in the thread. 390 */ 391 static void 392 ktr_submitrequest(struct thread *td, struct ktr_request *req) 393 { 394 395 ktrace_assert(td); 396 397 sx_xlock(&ktrace_sx); 398 ktr_drain(td); 399 ktr_writerequest(td, req); 400 ktr_freerequest(req); 401 sx_xunlock(&ktrace_sx); 402 ktrace_exit(td); 403 } 404 405 static void 406 ktr_freerequest(struct ktr_request *req) 407 { 408 409 mtx_lock(&ktrace_mtx); 410 ktr_freerequest_locked(req); 411 mtx_unlock(&ktrace_mtx); 412 } 413 414 static void 415 ktr_freerequest_locked(struct ktr_request *req) 416 { 417 418 mtx_assert(&ktrace_mtx, MA_OWNED); 419 if (req->ktr_buffer != NULL) 420 free(req->ktr_buffer, M_KTRACE); 421 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 422 } 423 424 /* 425 * Disable tracing for a process and release all associated resources. 426 * The caller is responsible for releasing a reference on the returned 427 * vnode and credentials. 428 */ 429 static void 430 ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 431 { 432 struct ktr_request *req; 433 434 PROC_LOCK_ASSERT(p, MA_OWNED); 435 mtx_assert(&ktrace_mtx, MA_OWNED); 436 *uc = p->p_tracecred; 437 p->p_tracecred = NULL; 438 if (vp != NULL) 439 *vp = p->p_tracevp; 440 p->p_tracevp = NULL; 441 p->p_traceflag = 0; 442 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 443 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 444 ktr_freerequest_locked(req); 445 } 446 } 447 448 void 449 ktrsyscall(int code, int narg, register_t args[]) 450 { 451 struct ktr_request *req; 452 struct ktr_syscall *ktp; 453 size_t buflen; 454 char *buf = NULL; 455 456 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 457 return; 458 459 buflen = sizeof(register_t) * narg; 460 if (buflen > 0) { 461 buf = malloc(buflen, M_KTRACE, M_WAITOK); 462 bcopy(args, buf, buflen); 463 } 464 req = ktr_getrequest(KTR_SYSCALL); 465 if (req == NULL) { 466 if (buf != NULL) 467 free(buf, M_KTRACE); 468 return; 469 } 470 ktp = &req->ktr_data.ktr_syscall; 471 ktp->ktr_code = code; 472 ktp->ktr_narg = narg; 473 if (buflen > 0) { 474 req->ktr_header.ktr_len = buflen; 475 req->ktr_buffer = buf; 476 } 477 ktr_submitrequest(curthread, req); 478 } 479 480 void 481 ktrsysret(int code, int error, register_t retval) 482 { 483 struct ktr_request *req; 484 struct ktr_sysret *ktp; 485 486 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 487 return; 488 489 req = ktr_getrequest(KTR_SYSRET); 490 if (req == NULL) 491 return; 492 ktp = &req->ktr_data.ktr_sysret; 493 ktp->ktr_code = code; 494 ktp->ktr_error = error; 495 ktp->ktr_retval = ((error == 0) ? retval: 0); /* what about val2 ? */ 496 ktr_submitrequest(curthread, req); 497 } 498 499 /* 500 * When a setuid process execs, disable tracing. 501 * 502 * XXX: We toss any pending asynchronous records. 503 */ 504 void 505 ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 506 { 507 508 PROC_LOCK_ASSERT(p, MA_OWNED); 509 mtx_lock(&ktrace_mtx); 510 ktr_freeproc(p, uc, vp); 511 mtx_unlock(&ktrace_mtx); 512 } 513 514 /* 515 * When a process exits, drain per-process asynchronous trace records 516 * and disable tracing. 517 */ 518 void 519 ktrprocexit(struct thread *td) 520 { 521 struct ktr_request *req; 522 struct proc *p; 523 struct ucred *cred; 524 struct vnode *vp; 525 526 p = td->td_proc; 527 if (p->p_traceflag == 0) 528 return; 529 530 ktrace_enter(td); 531 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 532 if (req != NULL) 533 ktr_enqueuerequest(td, req); 534 sx_xlock(&ktrace_sx); 535 ktr_drain(td); 536 sx_xunlock(&ktrace_sx); 537 PROC_LOCK(p); 538 mtx_lock(&ktrace_mtx); 539 ktr_freeproc(p, &cred, &vp); 540 mtx_unlock(&ktrace_mtx); 541 PROC_UNLOCK(p); 542 if (vp != NULL) 543 vrele(vp); 544 if (cred != NULL) 545 crfree(cred); 546 ktrace_exit(td); 547 } 548 549 static void 550 ktrprocctor_entered(struct thread *td, struct proc *p) 551 { 552 struct ktr_proc_ctor *ktp; 553 struct ktr_request *req; 554 struct thread *td2; 555 556 ktrace_assert(td); 557 td2 = FIRST_THREAD_IN_PROC(p); 558 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 559 if (req == NULL) 560 return; 561 ktp = &req->ktr_data.ktr_proc_ctor; 562 ktp->sv_flags = p->p_sysent->sv_flags; 563 ktr_enqueuerequest(td2, req); 564 } 565 566 void 567 ktrprocctor(struct proc *p) 568 { 569 struct thread *td = curthread; 570 571 if ((p->p_traceflag & KTRFAC_MASK) == 0) 572 return; 573 574 ktrace_enter(td); 575 ktrprocctor_entered(td, p); 576 ktrace_exit(td); 577 } 578 579 /* 580 * When a process forks, enable tracing in the new process if needed. 581 */ 582 void 583 ktrprocfork(struct proc *p1, struct proc *p2) 584 { 585 586 MPASS(p2->p_tracevp == NULL); 587 MPASS(p2->p_traceflag == 0); 588 589 if (p1->p_traceflag == 0) 590 return; 591 592 PROC_LOCK(p1); 593 mtx_lock(&ktrace_mtx); 594 if (p1->p_traceflag & KTRFAC_INHERIT) { 595 p2->p_traceflag = p1->p_traceflag; 596 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 597 VREF(p2->p_tracevp); 598 KASSERT(p1->p_tracecred != NULL, 599 ("ktrace vnode with no cred")); 600 p2->p_tracecred = crhold(p1->p_tracecred); 601 } 602 } 603 mtx_unlock(&ktrace_mtx); 604 PROC_UNLOCK(p1); 605 606 ktrprocctor(p2); 607 } 608 609 /* 610 * When a thread returns, drain any asynchronous records generated by the 611 * system call. 612 */ 613 void 614 ktruserret(struct thread *td) 615 { 616 617 ktrace_enter(td); 618 sx_xlock(&ktrace_sx); 619 ktr_drain(td); 620 sx_xunlock(&ktrace_sx); 621 ktrace_exit(td); 622 } 623 624 void 625 ktrnamei(path) 626 char *path; 627 { 628 struct ktr_request *req; 629 int namelen; 630 char *buf = NULL; 631 632 namelen = strlen(path); 633 if (namelen > 0) { 634 buf = malloc(namelen, M_KTRACE, M_WAITOK); 635 bcopy(path, buf, namelen); 636 } 637 req = ktr_getrequest(KTR_NAMEI); 638 if (req == NULL) { 639 if (buf != NULL) 640 free(buf, M_KTRACE); 641 return; 642 } 643 if (namelen > 0) { 644 req->ktr_header.ktr_len = namelen; 645 req->ktr_buffer = buf; 646 } 647 ktr_submitrequest(curthread, req); 648 } 649 650 void 651 ktrsysctl(int *name, u_int namelen) 652 { 653 struct ktr_request *req; 654 u_int mib[CTL_MAXNAME + 2]; 655 char *mibname; 656 size_t mibnamelen; 657 int error; 658 659 /* Lookup name of mib. */ 660 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 661 mib[0] = 0; 662 mib[1] = 1; 663 bcopy(name, mib + 2, namelen * sizeof(*name)); 664 mibnamelen = 128; 665 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 666 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 667 NULL, 0, &mibnamelen, 0); 668 if (error) { 669 free(mibname, M_KTRACE); 670 return; 671 } 672 req = ktr_getrequest(KTR_SYSCTL); 673 if (req == NULL) { 674 free(mibname, M_KTRACE); 675 return; 676 } 677 req->ktr_header.ktr_len = mibnamelen; 678 req->ktr_buffer = mibname; 679 ktr_submitrequest(curthread, req); 680 } 681 682 void 683 ktrgenio(int fd, enum uio_rw rw, struct uio *uio, int error) 684 { 685 struct ktr_request *req; 686 struct ktr_genio *ktg; 687 int datalen; 688 char *buf; 689 690 if (error) { 691 free(uio, M_IOV); 692 return; 693 } 694 uio->uio_offset = 0; 695 uio->uio_rw = UIO_WRITE; 696 datalen = MIN(uio->uio_resid, ktr_geniosize); 697 buf = malloc(datalen, M_KTRACE, M_WAITOK); 698 error = uiomove(buf, datalen, uio); 699 free(uio, M_IOV); 700 if (error) { 701 free(buf, M_KTRACE); 702 return; 703 } 704 req = ktr_getrequest(KTR_GENIO); 705 if (req == NULL) { 706 free(buf, M_KTRACE); 707 return; 708 } 709 ktg = &req->ktr_data.ktr_genio; 710 ktg->ktr_fd = fd; 711 ktg->ktr_rw = rw; 712 req->ktr_header.ktr_len = datalen; 713 req->ktr_buffer = buf; 714 ktr_submitrequest(curthread, req); 715 } 716 717 void 718 ktrpsig(int sig, sig_t action, sigset_t *mask, int code) 719 { 720 struct thread *td = curthread; 721 struct ktr_request *req; 722 struct ktr_psig *kp; 723 724 req = ktr_getrequest(KTR_PSIG); 725 if (req == NULL) 726 return; 727 kp = &req->ktr_data.ktr_psig; 728 kp->signo = (char)sig; 729 kp->action = action; 730 kp->mask = *mask; 731 kp->code = code; 732 ktr_enqueuerequest(td, req); 733 ktrace_exit(td); 734 } 735 736 void 737 ktrcsw(int out, int user, const char *wmesg) 738 { 739 struct thread *td = curthread; 740 struct ktr_request *req; 741 struct ktr_csw *kc; 742 743 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 744 return; 745 746 req = ktr_getrequest(KTR_CSW); 747 if (req == NULL) 748 return; 749 kc = &req->ktr_data.ktr_csw; 750 kc->out = out; 751 kc->user = user; 752 if (wmesg != NULL) 753 strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); 754 else 755 bzero(kc->wmesg, sizeof(kc->wmesg)); 756 ktr_enqueuerequest(td, req); 757 ktrace_exit(td); 758 } 759 760 void 761 ktrstruct(const char *name, const void *data, size_t datalen) 762 { 763 struct ktr_request *req; 764 char *buf; 765 size_t buflen, namelen; 766 767 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 768 return; 769 770 if (data == NULL) 771 datalen = 0; 772 namelen = strlen(name) + 1; 773 buflen = namelen + datalen; 774 buf = malloc(buflen, M_KTRACE, M_WAITOK); 775 strcpy(buf, name); 776 bcopy(data, buf + namelen, datalen); 777 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 778 free(buf, M_KTRACE); 779 return; 780 } 781 req->ktr_buffer = buf; 782 req->ktr_header.ktr_len = buflen; 783 ktr_submitrequest(curthread, req); 784 } 785 786 void 787 ktrstruct_error(const char *name, const void *data, size_t datalen, int error) 788 { 789 790 if (error == 0) 791 ktrstruct(name, data, datalen); 792 } 793 794 void 795 ktrstructarray(const char *name, enum uio_seg seg, const void *data, 796 int num_items, size_t struct_size) 797 { 798 struct ktr_request *req; 799 struct ktr_struct_array *ksa; 800 char *buf; 801 size_t buflen, datalen, namelen; 802 int max_items; 803 804 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 805 return; 806 807 /* Trim array length to genio size. */ 808 max_items = ktr_geniosize / struct_size; 809 if (num_items > max_items) { 810 if (max_items == 0) 811 num_items = 1; 812 else 813 num_items = max_items; 814 } 815 datalen = num_items * struct_size; 816 817 if (data == NULL) 818 datalen = 0; 819 820 namelen = strlen(name) + 1; 821 buflen = namelen + datalen; 822 buf = malloc(buflen, M_KTRACE, M_WAITOK); 823 strcpy(buf, name); 824 if (seg == UIO_SYSSPACE) 825 bcopy(data, buf + namelen, datalen); 826 else { 827 if (copyin(data, buf + namelen, datalen) != 0) { 828 free(buf, M_KTRACE); 829 return; 830 } 831 } 832 if ((req = ktr_getrequest(KTR_STRUCT_ARRAY)) == NULL) { 833 free(buf, M_KTRACE); 834 return; 835 } 836 ksa = &req->ktr_data.ktr_struct_array; 837 ksa->struct_size = struct_size; 838 req->ktr_buffer = buf; 839 req->ktr_header.ktr_len = buflen; 840 ktr_submitrequest(curthread, req); 841 } 842 843 void 844 ktrcapfail(enum ktr_cap_fail_type type, const cap_rights_t *needed, 845 const cap_rights_t *held) 846 { 847 struct thread *td = curthread; 848 struct ktr_request *req; 849 struct ktr_cap_fail *kcf; 850 851 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 852 return; 853 854 req = ktr_getrequest(KTR_CAPFAIL); 855 if (req == NULL) 856 return; 857 kcf = &req->ktr_data.ktr_cap_fail; 858 kcf->cap_type = type; 859 if (needed != NULL) 860 kcf->cap_needed = *needed; 861 else 862 cap_rights_init(&kcf->cap_needed); 863 if (held != NULL) 864 kcf->cap_held = *held; 865 else 866 cap_rights_init(&kcf->cap_held); 867 ktr_enqueuerequest(td, req); 868 ktrace_exit(td); 869 } 870 871 void 872 ktrfault(vm_offset_t vaddr, int type) 873 { 874 struct thread *td = curthread; 875 struct ktr_request *req; 876 struct ktr_fault *kf; 877 878 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 879 return; 880 881 req = ktr_getrequest(KTR_FAULT); 882 if (req == NULL) 883 return; 884 kf = &req->ktr_data.ktr_fault; 885 kf->vaddr = vaddr; 886 kf->type = type; 887 ktr_enqueuerequest(td, req); 888 ktrace_exit(td); 889 } 890 891 void 892 ktrfaultend(int result) 893 { 894 struct thread *td = curthread; 895 struct ktr_request *req; 896 struct ktr_faultend *kf; 897 898 if (__predict_false(curthread->td_pflags & TDP_INKTRACE)) 899 return; 900 901 req = ktr_getrequest(KTR_FAULTEND); 902 if (req == NULL) 903 return; 904 kf = &req->ktr_data.ktr_faultend; 905 kf->result = result; 906 ktr_enqueuerequest(td, req); 907 ktrace_exit(td); 908 } 909 #endif /* KTRACE */ 910 911 /* Interface and common routines */ 912 913 #ifndef _SYS_SYSPROTO_H_ 914 struct ktrace_args { 915 char *fname; 916 int ops; 917 int facs; 918 int pid; 919 }; 920 #endif 921 /* ARGSUSED */ 922 int 923 sys_ktrace(struct thread *td, struct ktrace_args *uap) 924 { 925 #ifdef KTRACE 926 struct vnode *vp = NULL; 927 struct proc *p; 928 struct pgrp *pg; 929 int facs = uap->facs & ~KTRFAC_ROOT; 930 int ops = KTROP(uap->ops); 931 int descend = uap->ops & KTRFLAG_DESCEND; 932 int nfound, ret = 0; 933 int flags, error = 0; 934 struct nameidata nd; 935 struct ucred *cred; 936 937 /* 938 * Need something to (un)trace. 939 */ 940 if (ops != KTROP_CLEARFILE && facs == 0) 941 return (EINVAL); 942 943 ktrace_enter(td); 944 if (ops != KTROP_CLEAR) { 945 /* 946 * an operation which requires a file argument. 947 */ 948 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td); 949 flags = FREAD | FWRITE | O_NOFOLLOW; 950 error = vn_open(&nd, &flags, 0, NULL); 951 if (error) { 952 ktrace_exit(td); 953 return (error); 954 } 955 NDFREE(&nd, NDF_ONLY_PNBUF); 956 vp = nd.ni_vp; 957 VOP_UNLOCK(vp); 958 if (vp->v_type != VREG) { 959 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 960 ktrace_exit(td); 961 return (EACCES); 962 } 963 } 964 /* 965 * Clear all uses of the tracefile. 966 */ 967 if (ops == KTROP_CLEARFILE) { 968 int vrele_count; 969 970 vrele_count = 0; 971 sx_slock(&allproc_lock); 972 FOREACH_PROC_IN_SYSTEM(p) { 973 PROC_LOCK(p); 974 if (p->p_tracevp == vp) { 975 if (ktrcanset(td, p)) { 976 mtx_lock(&ktrace_mtx); 977 ktr_freeproc(p, &cred, NULL); 978 mtx_unlock(&ktrace_mtx); 979 vrele_count++; 980 crfree(cred); 981 } else 982 error = EPERM; 983 } 984 PROC_UNLOCK(p); 985 } 986 sx_sunlock(&allproc_lock); 987 if (vrele_count > 0) { 988 while (vrele_count-- > 0) 989 vrele(vp); 990 } 991 goto done; 992 } 993 /* 994 * do it 995 */ 996 sx_slock(&proctree_lock); 997 if (uap->pid < 0) { 998 /* 999 * by process group 1000 */ 1001 pg = pgfind(-uap->pid); 1002 if (pg == NULL) { 1003 sx_sunlock(&proctree_lock); 1004 error = ESRCH; 1005 goto done; 1006 } 1007 /* 1008 * ktrops() may call vrele(). Lock pg_members 1009 * by the proctree_lock rather than pg_mtx. 1010 */ 1011 PGRP_UNLOCK(pg); 1012 nfound = 0; 1013 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 1014 PROC_LOCK(p); 1015 if (p->p_state == PRS_NEW || 1016 p_cansee(td, p) != 0) { 1017 PROC_UNLOCK(p); 1018 continue; 1019 } 1020 nfound++; 1021 if (descend) 1022 ret |= ktrsetchildren(td, p, ops, facs, vp); 1023 else 1024 ret |= ktrops(td, p, ops, facs, vp); 1025 } 1026 if (nfound == 0) { 1027 sx_sunlock(&proctree_lock); 1028 error = ESRCH; 1029 goto done; 1030 } 1031 } else { 1032 /* 1033 * by pid 1034 */ 1035 p = pfind(uap->pid); 1036 if (p == NULL) 1037 error = ESRCH; 1038 else 1039 error = p_cansee(td, p); 1040 if (error) { 1041 if (p != NULL) 1042 PROC_UNLOCK(p); 1043 sx_sunlock(&proctree_lock); 1044 goto done; 1045 } 1046 if (descend) 1047 ret |= ktrsetchildren(td, p, ops, facs, vp); 1048 else 1049 ret |= ktrops(td, p, ops, facs, vp); 1050 } 1051 sx_sunlock(&proctree_lock); 1052 if (!ret) 1053 error = EPERM; 1054 done: 1055 if (vp != NULL) 1056 (void) vn_close(vp, FWRITE, td->td_ucred, td); 1057 ktrace_exit(td); 1058 return (error); 1059 #else /* !KTRACE */ 1060 return (ENOSYS); 1061 #endif /* KTRACE */ 1062 } 1063 1064 /* ARGSUSED */ 1065 int 1066 sys_utrace(struct thread *td, struct utrace_args *uap) 1067 { 1068 1069 #ifdef KTRACE 1070 struct ktr_request *req; 1071 void *cp; 1072 int error; 1073 1074 if (!KTRPOINT(td, KTR_USER)) 1075 return (0); 1076 if (uap->len > KTR_USER_MAXLEN) 1077 return (EINVAL); 1078 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 1079 error = copyin(uap->addr, cp, uap->len); 1080 if (error) { 1081 free(cp, M_KTRACE); 1082 return (error); 1083 } 1084 req = ktr_getrequest(KTR_USER); 1085 if (req == NULL) { 1086 free(cp, M_KTRACE); 1087 return (ENOMEM); 1088 } 1089 req->ktr_buffer = cp; 1090 req->ktr_header.ktr_len = uap->len; 1091 ktr_submitrequest(td, req); 1092 return (0); 1093 #else /* !KTRACE */ 1094 return (ENOSYS); 1095 #endif /* KTRACE */ 1096 } 1097 1098 #ifdef KTRACE 1099 static int 1100 ktrops(struct thread *td, struct proc *p, int ops, int facs, struct vnode *vp) 1101 { 1102 struct vnode *tracevp = NULL; 1103 struct ucred *tracecred = NULL; 1104 1105 PROC_LOCK_ASSERT(p, MA_OWNED); 1106 if (!ktrcanset(td, p)) { 1107 PROC_UNLOCK(p); 1108 return (0); 1109 } 1110 if (p->p_flag & P_WEXIT) { 1111 /* If the process is exiting, just ignore it. */ 1112 PROC_UNLOCK(p); 1113 return (1); 1114 } 1115 mtx_lock(&ktrace_mtx); 1116 if (ops == KTROP_SET) { 1117 if (p->p_tracevp != vp) { 1118 /* 1119 * if trace file already in use, relinquish below 1120 */ 1121 tracevp = p->p_tracevp; 1122 VREF(vp); 1123 p->p_tracevp = vp; 1124 } 1125 if (p->p_tracecred != td->td_ucred) { 1126 tracecred = p->p_tracecred; 1127 p->p_tracecred = crhold(td->td_ucred); 1128 } 1129 p->p_traceflag |= facs; 1130 if (priv_check(td, PRIV_KTRACE) == 0) 1131 p->p_traceflag |= KTRFAC_ROOT; 1132 } else { 1133 /* KTROP_CLEAR */ 1134 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1135 /* no more tracing */ 1136 ktr_freeproc(p, &tracecred, &tracevp); 1137 } 1138 mtx_unlock(&ktrace_mtx); 1139 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1140 ktrprocctor_entered(td, p); 1141 PROC_UNLOCK(p); 1142 if (tracevp != NULL) 1143 vrele(tracevp); 1144 if (tracecred != NULL) 1145 crfree(tracecred); 1146 1147 return (1); 1148 } 1149 1150 static int 1151 ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, 1152 struct vnode *vp) 1153 { 1154 struct proc *p; 1155 int ret = 0; 1156 1157 p = top; 1158 PROC_LOCK_ASSERT(p, MA_OWNED); 1159 sx_assert(&proctree_lock, SX_LOCKED); 1160 for (;;) { 1161 ret |= ktrops(td, p, ops, facs, vp); 1162 /* 1163 * If this process has children, descend to them next, 1164 * otherwise do any siblings, and if done with this level, 1165 * follow back up the tree (but not past top). 1166 */ 1167 if (!LIST_EMPTY(&p->p_children)) 1168 p = LIST_FIRST(&p->p_children); 1169 else for (;;) { 1170 if (p == top) 1171 return (ret); 1172 if (LIST_NEXT(p, p_sibling)) { 1173 p = LIST_NEXT(p, p_sibling); 1174 break; 1175 } 1176 p = p->p_pptr; 1177 } 1178 PROC_LOCK(p); 1179 } 1180 /*NOTREACHED*/ 1181 } 1182 1183 static void 1184 ktr_writerequest(struct thread *td, struct ktr_request *req) 1185 { 1186 struct ktr_header *kth; 1187 struct vnode *vp; 1188 struct proc *p; 1189 struct ucred *cred; 1190 struct uio auio; 1191 struct iovec aiov[3]; 1192 struct mount *mp; 1193 int datalen, buflen, vrele_count; 1194 int error; 1195 1196 /* 1197 * We hold the vnode and credential for use in I/O in case ktrace is 1198 * disabled on the process as we write out the request. 1199 * 1200 * XXXRW: This is not ideal: we could end up performing a write after 1201 * the vnode has been closed. 1202 */ 1203 mtx_lock(&ktrace_mtx); 1204 vp = td->td_proc->p_tracevp; 1205 cred = td->td_proc->p_tracecred; 1206 1207 /* 1208 * If vp is NULL, the vp has been cleared out from under this 1209 * request, so just drop it. Make sure the credential and vnode are 1210 * in sync: we should have both or neither. 1211 */ 1212 if (vp == NULL) { 1213 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1214 mtx_unlock(&ktrace_mtx); 1215 return; 1216 } 1217 VREF(vp); 1218 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1219 crhold(cred); 1220 mtx_unlock(&ktrace_mtx); 1221 1222 kth = &req->ktr_header; 1223 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < nitems(data_lengths), 1224 ("data_lengths array overflow")); 1225 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1226 buflen = kth->ktr_len; 1227 auio.uio_iov = &aiov[0]; 1228 auio.uio_offset = 0; 1229 auio.uio_segflg = UIO_SYSSPACE; 1230 auio.uio_rw = UIO_WRITE; 1231 aiov[0].iov_base = (caddr_t)kth; 1232 aiov[0].iov_len = sizeof(struct ktr_header); 1233 auio.uio_resid = sizeof(struct ktr_header); 1234 auio.uio_iovcnt = 1; 1235 auio.uio_td = td; 1236 if (datalen != 0) { 1237 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1238 aiov[1].iov_len = datalen; 1239 auio.uio_resid += datalen; 1240 auio.uio_iovcnt++; 1241 kth->ktr_len += datalen; 1242 } 1243 if (buflen != 0) { 1244 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1245 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1246 aiov[auio.uio_iovcnt].iov_len = buflen; 1247 auio.uio_resid += buflen; 1248 auio.uio_iovcnt++; 1249 } 1250 1251 vn_start_write(vp, &mp, V_WAIT); 1252 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1253 #ifdef MAC 1254 error = mac_vnode_check_write(cred, NOCRED, vp); 1255 if (error == 0) 1256 #endif 1257 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1258 VOP_UNLOCK(vp); 1259 vn_finished_write(mp); 1260 crfree(cred); 1261 if (!error) { 1262 vrele(vp); 1263 return; 1264 } 1265 1266 /* 1267 * If error encountered, give up tracing on this vnode. We defer 1268 * all the vrele()'s on the vnode until after we are finished walking 1269 * the various lists to avoid needlessly holding locks. 1270 * NB: at this point we still hold the vnode reference that must 1271 * not go away as we need the valid vnode to compare with. Thus let 1272 * vrele_count start at 1 and the reference will be freed 1273 * by the loop at the end after our last use of vp. 1274 */ 1275 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1276 error); 1277 vrele_count = 1; 1278 /* 1279 * First, clear this vnode from being used by any processes in the 1280 * system. 1281 * XXX - If one process gets an EPERM writing to the vnode, should 1282 * we really do this? Other processes might have suitable 1283 * credentials for the operation. 1284 */ 1285 cred = NULL; 1286 sx_slock(&allproc_lock); 1287 FOREACH_PROC_IN_SYSTEM(p) { 1288 PROC_LOCK(p); 1289 if (p->p_tracevp == vp) { 1290 mtx_lock(&ktrace_mtx); 1291 ktr_freeproc(p, &cred, NULL); 1292 mtx_unlock(&ktrace_mtx); 1293 vrele_count++; 1294 } 1295 PROC_UNLOCK(p); 1296 if (cred != NULL) { 1297 crfree(cred); 1298 cred = NULL; 1299 } 1300 } 1301 sx_sunlock(&allproc_lock); 1302 1303 while (vrele_count-- > 0) 1304 vrele(vp); 1305 } 1306 1307 /* 1308 * Return true if caller has permission to set the ktracing state 1309 * of target. Essentially, the target can't possess any 1310 * more permissions than the caller. KTRFAC_ROOT signifies that 1311 * root previously set the tracing status on the target process, and 1312 * so, only root may further change it. 1313 */ 1314 static int 1315 ktrcanset(struct thread *td, struct proc *targetp) 1316 { 1317 1318 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1319 if (targetp->p_traceflag & KTRFAC_ROOT && 1320 priv_check(td, PRIV_KTRACE)) 1321 return (0); 1322 1323 if (p_candebug(td, targetp) != 0) 1324 return (0); 1325 1326 return (1); 1327 } 1328 1329 #endif /* KTRACE */ 1330