1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2007 Joseph Koshy 5 * Copyright (c) 2007 The FreeBSD Foundation 6 * Copyright (c) 2018 Matthew Macy 7 * All rights reserved. 8 * 9 * Portions of this software were developed by A. Joseph Koshy under 10 * sponsorship from the FreeBSD Foundation and Google, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 */ 34 35 /* 36 * Logging code for hwpmc(4) 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include <sys/param.h> 43 #include <sys/capsicum.h> 44 #include <sys/file.h> 45 #include <sys/kernel.h> 46 #include <sys/kthread.h> 47 #include <sys/lock.h> 48 #include <sys/module.h> 49 #include <sys/mutex.h> 50 #include <sys/pmc.h> 51 #include <sys/pmckern.h> 52 #include <sys/pmclog.h> 53 #include <sys/proc.h> 54 #include <sys/sched.h> 55 #include <sys/signalvar.h> 56 #include <sys/smp.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysctl.h> 59 #include <sys/systm.h> 60 #include <sys/uio.h> 61 #include <sys/unistd.h> 62 #include <sys/vnode.h> 63 64 #ifdef NUMA 65 #define NDOMAINS vm_ndomains 66 #define curdomain PCPU_GET(domain) 67 #else 68 #define NDOMAINS 1 69 #define curdomain 0 70 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) 71 #define free_domain(addr, type) free(addr, type) 72 #endif 73 74 /* 75 * Sysctl tunables 76 */ 77 78 SYSCTL_DECL(_kern_hwpmc); 79 80 /* 81 * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers. 82 */ 83 84 static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; 85 #if (__FreeBSD_version < 1100000) 86 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size); 87 #endif 88 SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_RDTUN, 89 &pmclog_buffer_size, 0, "size of log buffers in kilobytes"); 90 91 /* 92 * kern.hwpmc.nbuffer -- number of global log buffers 93 */ 94 95 static int pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; 96 #if (__FreeBSD_version < 1100000) 97 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers_pcpu); 98 #endif 99 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers_pcpu, CTLFLAG_RDTUN, 100 &pmc_nlogbuffers_pcpu, 0, "number of log buffers per cpu"); 101 102 /* 103 * Global log buffer list and associated spin lock. 104 */ 105 106 static struct mtx pmc_kthread_mtx; /* sleep lock */ 107 108 #define PMCLOG_INIT_BUFFER_DESCRIPTOR(D, buf, domain) do { \ 109 (D)->plb_fence = ((char *) (buf)) + 1024*pmclog_buffer_size; \ 110 (D)->plb_base = (D)->plb_ptr = ((char *) (buf)); \ 111 (D)->plb_domain = domain; \ 112 } while (0) 113 114 #define PMCLOG_RESET_BUFFER_DESCRIPTOR(D) do { \ 115 (D)->plb_ptr = (D)->plb_base; \ 116 } while (0) 117 118 /* 119 * Log file record constructors. 120 */ 121 #define _PMCLOG_TO_HEADER(T,L) \ 122 ((PMCLOG_HEADER_MAGIC << 24) | \ 123 (PMCLOG_TYPE_ ## T << 16) | \ 124 ((L) & 0xFFFF)) 125 126 /* reserve LEN bytes of space and initialize the entry header */ 127 #define _PMCLOG_RESERVE_SAFE(PO,TYPE,LEN,ACTION) do { \ 128 uint32_t *_le; \ 129 int _len = roundup((LEN), sizeof(uint32_t)); \ 130 if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ 131 ACTION; \ 132 } \ 133 *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ 134 _le += 3 /* skip over timestamp */ 135 136 /* reserve LEN bytes of space and initialize the entry header */ 137 #define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \ 138 uint32_t *_le; \ 139 int _len = roundup((LEN), sizeof(uint32_t)); \ 140 spinlock_enter(); \ 141 if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ 142 spinlock_exit(); \ 143 ACTION; \ 144 } \ 145 *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ 146 _le += 3 /* skip over timestamp */ 147 148 149 #define PMCLOG_RESERVE_SAFE(P,T,L) _PMCLOG_RESERVE_SAFE(P,T,L,return) 150 #define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return) 151 #define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \ 152 error=ENOMEM;goto error) 153 154 #define PMCLOG_EMIT32(V) do { *_le++ = (V); } while (0) 155 #define PMCLOG_EMIT64(V) do { \ 156 *_le++ = (uint32_t) ((V) & 0xFFFFFFFF); \ 157 *_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF); \ 158 } while (0) 159 160 161 /* Emit a string. Caution: does NOT update _le, so needs to be last */ 162 #define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0) 163 #define PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0) 164 165 #define PMCLOG_DESPATCH_SAFE(PO) \ 166 pmclog_release((PO)); \ 167 } while (0) 168 169 #define PMCLOG_DESPATCH_SCHED_LOCK(PO) \ 170 pmclog_release_flags((PO), 0); \ 171 } while (0) 172 173 #define PMCLOG_DESPATCH(PO) \ 174 pmclog_release((PO)); \ 175 spinlock_exit(); \ 176 } while (0) 177 178 #define PMCLOG_DESPATCH_SYNC(PO) \ 179 pmclog_schedule_io((PO), 1); \ 180 spinlock_exit(); \ 181 } while (0) 182 183 184 /* 185 * Assertions about the log file format. 186 */ 187 188 CTASSERT(sizeof(struct pmclog_callchain) == 8*4 + 189 PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t)); 190 CTASSERT(sizeof(struct pmclog_closelog) == 4*4); 191 CTASSERT(sizeof(struct pmclog_dropnotify) == 4*4); 192 CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX + 193 4*4 + sizeof(uintfptr_t)); 194 CTASSERT(offsetof(struct pmclog_map_in,pl_pathname) == 195 4*4 + sizeof(uintfptr_t)); 196 CTASSERT(sizeof(struct pmclog_map_out) == 4*4 + 2*sizeof(uintfptr_t)); 197 CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4); 198 CTASSERT(sizeof(struct pmclog_pmcattach) == 6*4 + PATH_MAX); 199 CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 6*4); 200 CTASSERT(sizeof(struct pmclog_pmcdetach) == 6*4); 201 CTASSERT(sizeof(struct pmclog_proccsw) == 6*4 + 8); 202 CTASSERT(sizeof(struct pmclog_procexec) == 6*4 + PATH_MAX + 203 sizeof(uintfptr_t)); 204 CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 6*4 + 205 sizeof(uintfptr_t)); 206 CTASSERT(sizeof(struct pmclog_procexit) == 6*4 + 8); 207 CTASSERT(sizeof(struct pmclog_procfork) == 6*4); 208 CTASSERT(sizeof(struct pmclog_sysexit) == 4*4); 209 CTASSERT(sizeof(struct pmclog_userdata) == 4*4); 210 211 /* 212 * Log buffer structure 213 */ 214 215 struct pmclog_buffer { 216 TAILQ_ENTRY(pmclog_buffer) plb_next; 217 char *plb_base; 218 char *plb_ptr; 219 char *plb_fence; 220 uint16_t plb_domain; 221 } __aligned(CACHE_LINE_SIZE); 222 223 /* 224 * Prototypes 225 */ 226 227 static int pmclog_get_buffer(struct pmc_owner *po); 228 static void pmclog_loop(void *arg); 229 static void pmclog_release(struct pmc_owner *po); 230 static uint32_t *pmclog_reserve(struct pmc_owner *po, int length); 231 static void pmclog_schedule_io(struct pmc_owner *po, int wakeup); 232 static void pmclog_schedule_all(struct pmc_owner *po); 233 static void pmclog_stop_kthread(struct pmc_owner *po); 234 235 /* 236 * Helper functions 237 */ 238 239 static inline void 240 pmc_plb_rele_unlocked(struct pmclog_buffer *plb) 241 { 242 TAILQ_INSERT_HEAD(&pmc_dom_hdrs[plb->plb_domain]->pdbh_head, plb, plb_next); 243 } 244 245 static inline void 246 pmc_plb_rele(struct pmclog_buffer *plb) 247 { 248 mtx_lock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); 249 pmc_plb_rele_unlocked(plb); 250 mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); 251 } 252 253 254 /* 255 * Get a log buffer 256 */ 257 258 static int 259 pmclog_get_buffer(struct pmc_owner *po) 260 { 261 struct pmclog_buffer *plb; 262 int domain; 263 264 KASSERT(po->po_curbuf[curcpu] == NULL, 265 ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po)); 266 267 domain = curdomain; 268 MPASS(pmc_dom_hdrs[domain]); 269 mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); 270 if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) 271 TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); 272 mtx_unlock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); 273 274 PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb); 275 276 #ifdef HWPMC_DEBUG 277 if (plb) 278 KASSERT(plb->plb_ptr == plb->plb_base && 279 plb->plb_base < plb->plb_fence, 280 ("[pmclog,%d] po=%p buffer invariants: ptr=%p " 281 "base=%p fence=%p", __LINE__, po, plb->plb_ptr, 282 plb->plb_base, plb->plb_fence)); 283 #endif 284 285 po->po_curbuf[curcpu] = plb; 286 287 /* update stats */ 288 counter_u64_add(pmc_stats.pm_buffer_requests, 1); 289 if (plb == NULL) 290 counter_u64_add(pmc_stats.pm_buffer_requests_failed, 1); 291 292 return (plb ? 0 : ENOMEM); 293 } 294 295 struct pmclog_proc_init_args { 296 struct proc *kthr; 297 struct pmc_owner *po; 298 bool exit; 299 bool acted; 300 }; 301 302 int 303 pmclog_proc_create(struct thread *td, void **handlep) 304 { 305 struct pmclog_proc_init_args *ia; 306 int error; 307 308 ia = malloc(sizeof(*ia), M_TEMP, M_WAITOK | M_ZERO); 309 error = kproc_create(pmclog_loop, ia, &ia->kthr, 310 RFHIGHPID, 0, "hwpmc: proc(%d)", td->td_proc->p_pid); 311 if (error == 0) 312 *handlep = ia; 313 return (error); 314 } 315 316 void 317 pmclog_proc_ignite(void *handle, struct pmc_owner *po) 318 { 319 struct pmclog_proc_init_args *ia; 320 321 ia = handle; 322 mtx_lock(&pmc_kthread_mtx); 323 MPASS(!ia->acted); 324 MPASS(ia->po == NULL); 325 MPASS(!ia->exit); 326 MPASS(ia->kthr != NULL); 327 if (po == NULL) { 328 ia->exit = true; 329 } else { 330 ia->po = po; 331 KASSERT(po->po_kthread == NULL, 332 ("[pmclog,%d] po=%p kthread (%p) already present", 333 __LINE__, po, po->po_kthread)); 334 po->po_kthread = ia->kthr; 335 } 336 wakeup(ia); 337 while (!ia->acted) 338 msleep(ia, &pmc_kthread_mtx, PWAIT, "pmclogw", 0); 339 mtx_unlock(&pmc_kthread_mtx); 340 free(ia, M_TEMP); 341 } 342 343 /* 344 * Log handler loop. 345 * 346 * This function is executed by each pmc owner's helper thread. 347 */ 348 349 static void 350 pmclog_loop(void *arg) 351 { 352 struct pmclog_proc_init_args *ia; 353 struct pmc_owner *po; 354 struct pmclog_buffer *lb; 355 struct proc *p; 356 struct ucred *ownercred; 357 struct ucred *mycred; 358 struct thread *td; 359 sigset_t unb; 360 struct uio auio; 361 struct iovec aiov; 362 size_t nbytes; 363 int error; 364 365 td = curthread; 366 367 SIGEMPTYSET(unb); 368 SIGADDSET(unb, SIGHUP); 369 (void)kern_sigprocmask(td, SIG_UNBLOCK, &unb, NULL, 0); 370 371 ia = arg; 372 MPASS(ia->kthr == curproc); 373 MPASS(!ia->acted); 374 mtx_lock(&pmc_kthread_mtx); 375 while (ia->po == NULL && !ia->exit) 376 msleep(ia, &pmc_kthread_mtx, PWAIT, "pmclogi", 0); 377 if (ia->exit) { 378 ia->acted = true; 379 wakeup(ia); 380 mtx_unlock(&pmc_kthread_mtx); 381 kproc_exit(0); 382 } 383 MPASS(ia->po != NULL); 384 po = ia->po; 385 ia->acted = true; 386 wakeup(ia); 387 mtx_unlock(&pmc_kthread_mtx); 388 ia = NULL; 389 390 p = po->po_owner; 391 mycred = td->td_ucred; 392 393 PROC_LOCK(p); 394 ownercred = crhold(p->p_ucred); 395 PROC_UNLOCK(p); 396 397 PMCDBG2(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread); 398 KASSERT(po->po_kthread == curthread->td_proc, 399 ("[pmclog,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__, 400 po, po->po_kthread, curthread->td_proc)); 401 402 lb = NULL; 403 404 405 /* 406 * Loop waiting for I/O requests to be added to the owner 407 * struct's queue. The loop is exited when the log file 408 * is deconfigured. 409 */ 410 411 mtx_lock(&pmc_kthread_mtx); 412 413 for (;;) { 414 415 /* check if we've been asked to exit */ 416 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) 417 break; 418 419 if (lb == NULL) { /* look for a fresh buffer to write */ 420 mtx_lock_spin(&po->po_mtx); 421 if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) { 422 mtx_unlock_spin(&po->po_mtx); 423 424 /* No more buffers and shutdown required. */ 425 if (po->po_flags & PMC_PO_SHUTDOWN) 426 break; 427 428 (void) msleep(po, &pmc_kthread_mtx, PWAIT, 429 "pmcloop", 250); 430 continue; 431 } 432 433 TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); 434 mtx_unlock_spin(&po->po_mtx); 435 } 436 437 mtx_unlock(&pmc_kthread_mtx); 438 439 /* process the request */ 440 PMCDBG3(LOG,WRI,2, "po=%p base=%p ptr=%p", po, 441 lb->plb_base, lb->plb_ptr); 442 /* change our thread's credentials before issuing the I/O */ 443 444 aiov.iov_base = lb->plb_base; 445 aiov.iov_len = nbytes = lb->plb_ptr - lb->plb_base; 446 447 auio.uio_iov = &aiov; 448 auio.uio_iovcnt = 1; 449 auio.uio_offset = -1; 450 auio.uio_resid = nbytes; 451 auio.uio_rw = UIO_WRITE; 452 auio.uio_segflg = UIO_SYSSPACE; 453 auio.uio_td = td; 454 455 /* switch thread credentials -- see kern_ktrace.c */ 456 td->td_ucred = ownercred; 457 error = fo_write(po->po_file, &auio, ownercred, 0, td); 458 td->td_ucred = mycred; 459 460 if (error) { 461 /* XXX some errors are recoverable */ 462 /* send a SIGIO to the owner and exit */ 463 PROC_LOCK(p); 464 kern_psignal(p, SIGIO); 465 PROC_UNLOCK(p); 466 467 mtx_lock(&pmc_kthread_mtx); 468 469 po->po_error = error; /* save for flush log */ 470 471 PMCDBG2(LOG,WRI,2, "po=%p error=%d", po, error); 472 473 break; 474 } 475 476 mtx_lock(&pmc_kthread_mtx); 477 478 /* put the used buffer back into the global pool */ 479 PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); 480 481 pmc_plb_rele(lb); 482 lb = NULL; 483 } 484 485 wakeup_one(po->po_kthread); 486 po->po_kthread = NULL; 487 488 mtx_unlock(&pmc_kthread_mtx); 489 490 /* return the current I/O buffer to the global pool */ 491 if (lb) { 492 PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); 493 494 pmc_plb_rele(lb); 495 } 496 497 /* 498 * Exit this thread, signalling the waiter 499 */ 500 501 crfree(ownercred); 502 503 kproc_exit(0); 504 } 505 506 /* 507 * Release and log entry and schedule an I/O if needed. 508 */ 509 510 static void 511 pmclog_release_flags(struct pmc_owner *po, int wakeup) 512 { 513 struct pmclog_buffer *plb; 514 515 plb = po->po_curbuf[curcpu]; 516 KASSERT(plb->plb_ptr >= plb->plb_base, 517 ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, 518 po, plb->plb_ptr, plb->plb_base)); 519 KASSERT(plb->plb_ptr <= plb->plb_fence, 520 ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, 521 po, plb->plb_ptr, plb->plb_fence)); 522 523 /* schedule an I/O if we've filled a buffer */ 524 if (plb->plb_ptr >= plb->plb_fence) 525 pmclog_schedule_io(po, wakeup); 526 527 PMCDBG1(LOG,REL,1, "po=%p", po); 528 } 529 530 static void 531 pmclog_release(struct pmc_owner *po) 532 { 533 534 pmclog_release_flags(po, 1); 535 } 536 537 538 /* 539 * Attempt to reserve 'length' bytes of space in an owner's log 540 * buffer. The function returns a pointer to 'length' bytes of space 541 * if there was enough space or returns NULL if no space was 542 * available. Non-null returns do so with the po mutex locked. The 543 * caller must invoke pmclog_release() on the pmc owner structure 544 * when done. 545 */ 546 547 static uint32_t * 548 pmclog_reserve(struct pmc_owner *po, int length) 549 { 550 uintptr_t newptr, oldptr; 551 uint32_t *lh; 552 struct timespec ts; 553 struct pmclog_buffer *plb, **pplb; 554 555 PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length); 556 557 KASSERT(length % sizeof(uint32_t) == 0, 558 ("[pmclog,%d] length not a multiple of word size", __LINE__)); 559 560 /* No more data when shutdown in progress. */ 561 if (po->po_flags & PMC_PO_SHUTDOWN) 562 return (NULL); 563 564 pplb = &po->po_curbuf[curcpu]; 565 if (*pplb == NULL && pmclog_get_buffer(po) != 0) 566 goto fail; 567 568 KASSERT(*pplb != NULL, 569 ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); 570 571 plb = *pplb; 572 KASSERT(plb->plb_ptr >= plb->plb_base && 573 plb->plb_ptr <= plb->plb_fence, 574 ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", 575 __LINE__, po, plb->plb_ptr, plb->plb_base, 576 plb->plb_fence)); 577 578 oldptr = (uintptr_t) plb->plb_ptr; 579 newptr = oldptr + length; 580 581 KASSERT(oldptr != (uintptr_t) NULL, 582 ("[pmclog,%d] po=%p Null log buffer pointer", __LINE__, po)); 583 584 /* 585 * If we have space in the current buffer, return a pointer to 586 * available space with the PO structure locked. 587 */ 588 if (newptr <= (uintptr_t) plb->plb_fence) { 589 plb->plb_ptr = (char *) newptr; 590 goto done; 591 } 592 593 /* 594 * Otherwise, schedule the current buffer for output and get a 595 * fresh buffer. 596 */ 597 pmclog_schedule_io(po, 0); 598 599 if (pmclog_get_buffer(po) != 0) 600 goto fail; 601 602 plb = *pplb; 603 KASSERT(plb != NULL, 604 ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); 605 606 KASSERT(plb->plb_ptr != NULL, 607 ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__)); 608 609 KASSERT(plb->plb_ptr == plb->plb_base && 610 plb->plb_ptr <= plb->plb_fence, 611 ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", 612 __LINE__, po, plb->plb_ptr, plb->plb_base, 613 plb->plb_fence)); 614 615 oldptr = (uintptr_t) plb->plb_ptr; 616 617 done: 618 lh = (uint32_t *) oldptr; 619 lh++; /* skip header */ 620 getnanotime(&ts); /* fill in the timestamp */ 621 *lh++ = ts.tv_sec & 0xFFFFFFFF; 622 *lh++ = ts.tv_nsec & 0xFFFFFFF; 623 return ((uint32_t *) oldptr); 624 fail: 625 return (NULL); 626 } 627 628 /* 629 * Schedule an I/O. 630 * 631 * Transfer the current buffer to the helper kthread. 632 */ 633 634 static void 635 pmclog_schedule_io(struct pmc_owner *po, int wakeup) 636 { 637 struct pmclog_buffer *plb; 638 639 plb = po->po_curbuf[curcpu]; 640 po->po_curbuf[curcpu] = NULL; 641 KASSERT(plb != NULL, 642 ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po)); 643 KASSERT(plb->plb_ptr >= plb->plb_base, 644 ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, 645 po, plb->plb_ptr, plb->plb_base)); 646 KASSERT(plb->plb_ptr <= plb->plb_fence, 647 ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, 648 po, plb->plb_ptr, plb->plb_fence)); 649 650 PMCDBG1(LOG,SIO, 1, "po=%p", po); 651 652 /* 653 * Add the current buffer to the tail of the buffer list and 654 * wakeup the helper. 655 */ 656 mtx_lock_spin(&po->po_mtx); 657 TAILQ_INSERT_TAIL(&po->po_logbuffers, plb, plb_next); 658 mtx_unlock_spin(&po->po_mtx); 659 if (wakeup) 660 wakeup_one(po); 661 } 662 663 /* 664 * Stop the helper kthread. 665 */ 666 667 static void 668 pmclog_stop_kthread(struct pmc_owner *po) 669 { 670 671 mtx_lock(&pmc_kthread_mtx); 672 po->po_flags &= ~PMC_PO_OWNS_LOGFILE; 673 if (po->po_kthread != NULL) { 674 PROC_LOCK(po->po_kthread); 675 kern_psignal(po->po_kthread, SIGHUP); 676 PROC_UNLOCK(po->po_kthread); 677 } 678 wakeup_one(po); 679 while (po->po_kthread) 680 msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmckstp", 0); 681 mtx_unlock(&pmc_kthread_mtx); 682 } 683 684 /* 685 * Public functions 686 */ 687 688 /* 689 * Configure a log file for pmc owner 'po'. 690 * 691 * Parameter 'logfd' is a file handle referencing an open file in the 692 * owner process. This file needs to have been opened for writing. 693 */ 694 695 int 696 pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd) 697 { 698 struct proc *p; 699 int error; 700 701 sx_assert(&pmc_sx, SA_XLOCKED); 702 PMCDBG2(LOG,CFG,1, "config po=%p logfd=%d", po, logfd); 703 704 p = po->po_owner; 705 706 /* return EBUSY if a log file was already present */ 707 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 708 return (EBUSY); 709 710 KASSERT(po->po_file == NULL, 711 ("[pmclog,%d] po=%p file (%p) already present", __LINE__, po, 712 po->po_file)); 713 714 /* get a reference to the file state */ 715 error = fget_write(curthread, logfd, &cap_write_rights, &po->po_file); 716 if (error) 717 goto error; 718 719 /* mark process as owning a log file */ 720 po->po_flags |= PMC_PO_OWNS_LOGFILE; 721 722 /* mark process as using HWPMCs */ 723 PROC_LOCK(p); 724 p->p_flag |= P_HWPMC; 725 PROC_UNLOCK(p); 726 727 /* create a log initialization entry */ 728 PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE, 729 sizeof(struct pmclog_initialize)); 730 PMCLOG_EMIT32(PMC_VERSION); 731 PMCLOG_EMIT32(md->pmd_cputype); 732 PMCLOG_DESPATCH_SYNC(po); 733 734 return (0); 735 736 error: 737 KASSERT(po->po_kthread == NULL, ("[pmclog,%d] po=%p kthread not " 738 "stopped", __LINE__, po)); 739 740 if (po->po_file) 741 (void) fdrop(po->po_file, curthread); 742 po->po_file = NULL; /* clear file and error state */ 743 po->po_error = 0; 744 po->po_flags &= ~PMC_PO_OWNS_LOGFILE; 745 746 return (error); 747 } 748 749 750 /* 751 * De-configure a log file. This will throw away any buffers queued 752 * for this owner process. 753 */ 754 755 int 756 pmclog_deconfigure_log(struct pmc_owner *po) 757 { 758 int error; 759 struct pmclog_buffer *lb; 760 761 PMCDBG1(LOG,CFG,1, "de-config po=%p", po); 762 763 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) 764 return (EINVAL); 765 766 KASSERT(po->po_sscount == 0, 767 ("[pmclog,%d] po=%p still owning SS PMCs", __LINE__, po)); 768 KASSERT(po->po_file != NULL, 769 ("[pmclog,%d] po=%p no log file", __LINE__, po)); 770 771 /* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */ 772 pmclog_stop_kthread(po); 773 774 KASSERT(po->po_kthread == NULL, 775 ("[pmclog,%d] po=%p kthread not stopped", __LINE__, po)); 776 777 /* return all queued log buffers to the global pool */ 778 while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) { 779 TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); 780 PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); 781 pmc_plb_rele(lb); 782 } 783 for (int i = 0; i < mp_ncpus; i++) { 784 thread_lock(curthread); 785 sched_bind(curthread, i); 786 thread_unlock(curthread); 787 /* return the 'current' buffer to the global pool */ 788 if ((lb = po->po_curbuf[curcpu]) != NULL) { 789 PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); 790 pmc_plb_rele(lb); 791 } 792 } 793 thread_lock(curthread); 794 sched_unbind(curthread); 795 thread_unlock(curthread); 796 797 /* drop a reference to the fd */ 798 if (po->po_file != NULL) { 799 error = fdrop(po->po_file, curthread); 800 po->po_file = NULL; 801 } else 802 error = 0; 803 po->po_error = 0; 804 805 return (error); 806 } 807 808 /* 809 * Flush a process' log buffer. 810 */ 811 812 int 813 pmclog_flush(struct pmc_owner *po) 814 { 815 int error; 816 817 PMCDBG1(LOG,FLS,1, "po=%p", po); 818 819 /* 820 * If there is a pending error recorded by the logger thread, 821 * return that. 822 */ 823 if (po->po_error) 824 return (po->po_error); 825 826 error = 0; 827 828 /* 829 * Check that we do have an active log file. 830 */ 831 mtx_lock(&pmc_kthread_mtx); 832 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { 833 error = EINVAL; 834 goto error; 835 } 836 837 pmclog_schedule_all(po); 838 error: 839 mtx_unlock(&pmc_kthread_mtx); 840 841 return (error); 842 } 843 844 static void 845 pmclog_schedule_one_cond(void *arg) 846 { 847 struct pmc_owner *po = arg; 848 struct pmclog_buffer *plb; 849 850 spinlock_enter(); 851 /* tell hardclock not to run again */ 852 if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) 853 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); 854 plb = po->po_curbuf[curcpu]; 855 if (plb && plb->plb_ptr != plb->plb_base) 856 pmclog_schedule_io(po, 1); 857 spinlock_exit(); 858 } 859 860 static void 861 pmclog_schedule_all(struct pmc_owner *po) 862 { 863 /* 864 * Schedule the current buffer if any and not empty. 865 */ 866 for (int i = 0; i < mp_ncpus; i++) { 867 thread_lock(curthread); 868 sched_bind(curthread, i); 869 thread_unlock(curthread); 870 pmclog_schedule_one_cond(po); 871 } 872 thread_lock(curthread); 873 sched_unbind(curthread); 874 thread_unlock(curthread); 875 } 876 877 int 878 pmclog_close(struct pmc_owner *po) 879 { 880 881 PMCDBG1(LOG,CLO,1, "po=%p", po); 882 883 pmclog_process_closelog(po); 884 885 mtx_lock(&pmc_kthread_mtx); 886 /* 887 * Initiate shutdown: no new data queued, 888 * thread will close file on last block. 889 */ 890 po->po_flags |= PMC_PO_SHUTDOWN; 891 /* give time for all to see */ 892 DELAY(50); 893 894 /* 895 * Schedule the current buffer. 896 */ 897 pmclog_schedule_all(po); 898 wakeup_one(po); 899 900 mtx_unlock(&pmc_kthread_mtx); 901 902 return (0); 903 } 904 905 void 906 pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps) 907 { 908 int n, recordlen; 909 uint32_t flags; 910 struct pmc_owner *po; 911 912 PMCDBG3(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid, 913 ps->ps_nsamples); 914 915 recordlen = offsetof(struct pmclog_callchain, pl_pc) + 916 ps->ps_nsamples * sizeof(uintfptr_t); 917 po = pm->pm_owner; 918 flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags); 919 PMCLOG_RESERVE_SAFE(po, CALLCHAIN, recordlen); 920 PMCLOG_EMIT32(ps->ps_pid); 921 PMCLOG_EMIT32(ps->ps_tid); 922 PMCLOG_EMIT32(pm->pm_id); 923 PMCLOG_EMIT32(flags); 924 /* unused for now */ 925 PMCLOG_EMIT32(0); 926 for (n = 0; n < ps->ps_nsamples; n++) 927 PMCLOG_EMITADDR(ps->ps_pc[n]); 928 PMCLOG_DESPATCH_SAFE(po); 929 } 930 931 void 932 pmclog_process_closelog(struct pmc_owner *po) 933 { 934 PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog)); 935 PMCLOG_DESPATCH_SYNC(po); 936 } 937 938 void 939 pmclog_process_dropnotify(struct pmc_owner *po) 940 { 941 PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify)); 942 PMCLOG_DESPATCH(po); 943 } 944 945 void 946 pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start, 947 const char *path) 948 { 949 int pathlen, recordlen; 950 951 KASSERT(path != NULL, ("[pmclog,%d] map-in, null path", __LINE__)); 952 953 pathlen = strlen(path) + 1; /* #bytes for path name */ 954 recordlen = offsetof(struct pmclog_map_in, pl_pathname) + 955 pathlen; 956 957 PMCLOG_RESERVE(po, MAP_IN, recordlen); 958 PMCLOG_EMIT32(pid); 959 PMCLOG_EMITADDR(start); 960 PMCLOG_EMITSTRING(path,pathlen); 961 PMCLOG_DESPATCH(po); 962 } 963 964 void 965 pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start, 966 uintfptr_t end) 967 { 968 KASSERT(start <= end, ("[pmclog,%d] start > end", __LINE__)); 969 970 PMCLOG_RESERVE(po, MAP_OUT, sizeof(struct pmclog_map_out)); 971 PMCLOG_EMIT32(pid); 972 PMCLOG_EMITADDR(start); 973 PMCLOG_EMITADDR(end); 974 PMCLOG_DESPATCH(po); 975 } 976 977 void 978 pmclog_process_pmcallocate(struct pmc *pm) 979 { 980 struct pmc_owner *po; 981 struct pmc_soft *ps; 982 983 po = pm->pm_owner; 984 985 PMCDBG1(LOG,ALL,1, "pm=%p", pm); 986 987 if (PMC_TO_CLASS(pm) == PMC_CLASS_SOFT) { 988 PMCLOG_RESERVE(po, PMCALLOCATEDYN, 989 sizeof(struct pmclog_pmcallocatedyn)); 990 PMCLOG_EMIT32(pm->pm_id); 991 PMCLOG_EMIT32(pm->pm_event); 992 PMCLOG_EMIT32(pm->pm_flags); 993 ps = pmc_soft_ev_acquire(pm->pm_event); 994 if (ps != NULL) 995 PMCLOG_EMITSTRING(ps->ps_ev.pm_ev_name,PMC_NAME_MAX); 996 else 997 PMCLOG_EMITNULLSTRING(PMC_NAME_MAX); 998 pmc_soft_ev_release(ps); 999 PMCLOG_DESPATCH_SYNC(po); 1000 } else { 1001 PMCLOG_RESERVE(po, PMCALLOCATE, 1002 sizeof(struct pmclog_pmcallocate)); 1003 PMCLOG_EMIT32(pm->pm_id); 1004 PMCLOG_EMIT32(pm->pm_event); 1005 PMCLOG_EMIT32(pm->pm_flags); 1006 PMCLOG_DESPATCH_SYNC(po); 1007 } 1008 } 1009 1010 void 1011 pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path) 1012 { 1013 int pathlen, recordlen; 1014 struct pmc_owner *po; 1015 1016 PMCDBG2(LOG,ATT,1,"pm=%p pid=%d", pm, pid); 1017 1018 po = pm->pm_owner; 1019 1020 pathlen = strlen(path) + 1; /* #bytes for the string */ 1021 recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen; 1022 1023 PMCLOG_RESERVE(po, PMCATTACH, recordlen); 1024 PMCLOG_EMIT32(pm->pm_id); 1025 PMCLOG_EMIT32(pid); 1026 PMCLOG_EMIT32(0); 1027 PMCLOG_EMITSTRING(path, pathlen); 1028 PMCLOG_DESPATCH_SYNC(po); 1029 } 1030 1031 void 1032 pmclog_process_pmcdetach(struct pmc *pm, pid_t pid) 1033 { 1034 struct pmc_owner *po; 1035 1036 PMCDBG2(LOG,ATT,1,"!pm=%p pid=%d", pm, pid); 1037 1038 po = pm->pm_owner; 1039 1040 PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach)); 1041 PMCLOG_EMIT32(pm->pm_id); 1042 PMCLOG_EMIT32(pid); 1043 PMCLOG_DESPATCH_SYNC(po); 1044 } 1045 1046 /* 1047 * Log a context switch event to the log file. 1048 */ 1049 1050 void 1051 pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v, struct thread *td) 1052 { 1053 struct pmc_owner *po; 1054 1055 KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW, 1056 ("[pmclog,%d] log-process-csw called gratuitously", __LINE__)); 1057 1058 PMCDBG3(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, 1059 v); 1060 1061 po = pm->pm_owner; 1062 1063 PMCLOG_RESERVE_SAFE(po, PROCCSW, sizeof(struct pmclog_proccsw)); 1064 PMCLOG_EMIT32(pm->pm_id); 1065 PMCLOG_EMIT64(v); 1066 PMCLOG_EMIT32(pp->pp_proc->p_pid); 1067 PMCLOG_EMIT32(td->td_tid); 1068 PMCLOG_DESPATCH_SCHED_LOCK(po); 1069 } 1070 1071 void 1072 pmclog_process_procexec(struct pmc_owner *po, pmc_id_t pmid, pid_t pid, 1073 uintfptr_t startaddr, char *path) 1074 { 1075 int pathlen, recordlen; 1076 1077 PMCDBG3(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path); 1078 1079 pathlen = strlen(path) + 1; /* #bytes for the path */ 1080 recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen; 1081 1082 PMCLOG_RESERVE(po, PROCEXEC, recordlen); 1083 PMCLOG_EMIT32(pid); 1084 PMCLOG_EMIT32(pmid); 1085 PMCLOG_EMIT32(0); 1086 PMCLOG_EMITADDR(startaddr); 1087 PMCLOG_EMITSTRING(path,pathlen); 1088 PMCLOG_DESPATCH(po); 1089 } 1090 1091 /* 1092 * Log a process exit event (and accumulated pmc value) to the log file. 1093 */ 1094 1095 void 1096 pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp) 1097 { 1098 int ri; 1099 struct pmc_owner *po; 1100 1101 ri = PMC_TO_ROWINDEX(pm); 1102 PMCDBG3(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid, 1103 pp->pp_pmcs[ri].pp_pmcval); 1104 1105 po = pm->pm_owner; 1106 1107 PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit)); 1108 PMCLOG_EMIT32(pm->pm_id); 1109 PMCLOG_EMIT32(pp->pp_proc->p_pid); 1110 PMCLOG_EMIT32(0); 1111 PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval); 1112 PMCLOG_DESPATCH(po); 1113 } 1114 1115 /* 1116 * Log a fork event. 1117 */ 1118 1119 void 1120 pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid) 1121 { 1122 PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork)); 1123 PMCLOG_EMIT32(oldpid); 1124 PMCLOG_EMIT32(newpid); 1125 PMCLOG_DESPATCH(po); 1126 } 1127 1128 /* 1129 * Log a process exit event of the form suitable for system-wide PMCs. 1130 */ 1131 1132 void 1133 pmclog_process_sysexit(struct pmc_owner *po, pid_t pid) 1134 { 1135 PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit)); 1136 PMCLOG_EMIT32(pid); 1137 PMCLOG_DESPATCH(po); 1138 } 1139 1140 /* 1141 * Write a user log entry. 1142 */ 1143 1144 int 1145 pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl) 1146 { 1147 int error; 1148 1149 PMCDBG2(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata); 1150 1151 error = 0; 1152 1153 PMCLOG_RESERVE_WITH_ERROR(po, USERDATA, 1154 sizeof(struct pmclog_userdata)); 1155 PMCLOG_EMIT32(wl->pm_userdata); 1156 PMCLOG_DESPATCH(po); 1157 1158 error: 1159 return (error); 1160 } 1161 1162 /* 1163 * Initialization. 1164 * 1165 * Create a pool of log buffers and initialize mutexes. 1166 */ 1167 1168 void 1169 pmclog_initialize() 1170 { 1171 int domain; 1172 struct pmclog_buffer *plb; 1173 1174 if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) { 1175 (void) printf("hwpmc: tunable logbuffersize=%d must be " 1176 "greater than zero and less than or equal to 16MB.\n", 1177 pmclog_buffer_size); 1178 pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; 1179 } 1180 1181 if (pmc_nlogbuffers_pcpu <= 0) { 1182 (void) printf("hwpmc: tunable nlogbuffers=%d must be greater " 1183 "than zero.\n", pmc_nlogbuffers_pcpu); 1184 pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; 1185 } 1186 if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) { 1187 (void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n", 1188 pmc_nlogbuffers_pcpu*pmclog_buffer_size); 1189 pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; 1190 pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; 1191 } 1192 for (domain = 0; domain < NDOMAINS; domain++) { 1193 int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus; 1194 int total = ncpus*pmc_nlogbuffers_pcpu; 1195 1196 plb = malloc_domain(sizeof(struct pmclog_buffer)*total, M_PMC, domain, M_WAITOK|M_ZERO); 1197 pmc_dom_hdrs[domain]->pdbh_plbs = plb; 1198 for (int i = 0; i < total; i++, plb++) { 1199 void *buf; 1200 1201 buf = malloc_domain(1024 * pmclog_buffer_size, M_PMC, domain, 1202 M_WAITOK|M_ZERO); 1203 PMCLOG_INIT_BUFFER_DESCRIPTOR(plb, buf, domain); 1204 pmc_plb_rele_unlocked(plb); 1205 } 1206 } 1207 mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF); 1208 } 1209 1210 /* 1211 * Shutdown logging. 1212 * 1213 * Destroy mutexes and release memory back the to free pool. 1214 */ 1215 1216 void 1217 pmclog_shutdown() 1218 { 1219 struct pmclog_buffer *plb; 1220 int domain; 1221 1222 mtx_destroy(&pmc_kthread_mtx); 1223 1224 for (domain = 0; domain < NDOMAINS; domain++) { 1225 while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) { 1226 TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); 1227 free(plb->plb_base, M_PMC); 1228 } 1229 free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC); 1230 } 1231 } 1232