xref: /freebsd/sys/dev/hwpmc/hwpmc_logging.c (revision 7bd6fde3951af84ef3b68e4d1eadc1840c2fc1b3)
1 /*-
2  * Copyright (c) 2005 Joseph Koshy
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Logging code for hwpmc(4)
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/file.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/lock.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/pmc.h>
43 #include <sys/pmclog.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/systm.h>
48 #include <sys/uio.h>
49 #include <sys/unistd.h>
50 #include <sys/vnode.h>
51 
52 /*
53  * Sysctl tunables
54  */
55 
56 SYSCTL_DECL(_kern_hwpmc);
57 
58 /*
59  * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers.
60  */
61 
62 static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
63 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size);
64 SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_TUN|CTLFLAG_RD,
65     &pmclog_buffer_size, 0, "size of log buffers in kilobytes");
66 
67 
68 /*
69  * kern.hwpmc.nbuffer -- number of global log buffers
70  */
71 
72 static int pmc_nlogbuffers = PMC_NLOGBUFFERS;
73 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers);
74 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_TUN|CTLFLAG_RD,
75     &pmc_nlogbuffers, 0, "number of global log buffers");
76 
77 /*
78  * Global log buffer list and associated spin lock.
79  */
80 
81 TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist =
82 	TAILQ_HEAD_INITIALIZER(pmc_bufferlist);
83 static struct mtx pmc_bufferlist_mtx;	/* spin lock */
84 static struct mtx pmc_kthread_mtx;	/* sleep lock */
85 
86 #define	PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do {				\
87 		const int __roundup = roundup(sizeof(*D),		\
88 			sizeof(uint32_t));				\
89 		(D)->plb_fence = ((char *) (D)) +			\
90 			 1024*pmclog_buffer_size;			\
91 		(D)->plb_base  = (D)->plb_ptr = ((char *) (D)) +	\
92 			__roundup;					\
93 	} while (0)
94 
95 
96 /*
97  * Log file record constructors.
98  */
99 
100 #define	_PMCLOG_TO_HEADER(T,L)						\
101 	((PMCLOG_HEADER_MAGIC << 24) |					\
102 	 (PMCLOG_TYPE_ ## T << 16)   |					\
103 	 ((L) & 0xFFFF))
104 
105 /* reserve LEN bytes of space and initialize the entry header */
106 #define	_PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do {			\
107 		uint32_t *_le;						\
108 		int _len = roundup((LEN), sizeof(uint32_t));		\
109 		if ((_le = pmclog_reserve((PO), _len)) == NULL) {	\
110 			ACTION;						\
111 		}							\
112 		*_le = _PMCLOG_TO_HEADER(TYPE,_len);			\
113 		_le += 3	/* skip over timestamp */
114 
115 #define	PMCLOG_RESERVE(P,T,L)		_PMCLOG_RESERVE(P,T,L,return)
116 #define	PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L,		\
117 	error=ENOMEM;goto error)
118 
119 #define	PMCLOG_EMIT32(V)	do { *_le++ = (V); } while (0)
120 #define	PMCLOG_EMIT64(V)	do { 					\
121 		*_le++ = (uint32_t) ((V) & 0xFFFFFFFF);			\
122 		*_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF);		\
123 	} while (0)
124 
125 
126 /* Emit a string.  Caution: does NOT update _le, so needs to be last */
127 #define	PMCLOG_EMITSTRING(S,L)	do { bcopy((S), _le, (L)); } while (0)
128 
129 #define	PMCLOG_DESPATCH(PO)						\
130 		pmclog_release((PO));					\
131 	} while (0)
132 
133 
134 /*
135  * Assertions about the log file format.
136  */
137 
138 CTASSERT(sizeof(struct pmclog_closelog) == 3*4);
139 CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4);
140 CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX +
141     4*4 + sizeof(uintfptr_t));
142 CTASSERT(offsetof(struct pmclog_map_in,pl_pathname) ==
143     4*4 + sizeof(uintfptr_t));
144 CTASSERT(sizeof(struct pmclog_map_out) == 4*4 + 2*sizeof(uintfptr_t));
145 CTASSERT(sizeof(struct pmclog_pcsample) == 6*4 + sizeof(uintfptr_t));
146 CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4);
147 CTASSERT(sizeof(struct pmclog_pmcattach) == 5*4 + PATH_MAX);
148 CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 5*4);
149 CTASSERT(sizeof(struct pmclog_pmcdetach) == 5*4);
150 CTASSERT(sizeof(struct pmclog_proccsw) == 5*4 + 8);
151 CTASSERT(sizeof(struct pmclog_procexec) == 5*4 + PATH_MAX +
152     sizeof(uintfptr_t));
153 CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 5*4 +
154     sizeof(uintfptr_t));
155 CTASSERT(sizeof(struct pmclog_procexit) == 5*4 + 8);
156 CTASSERT(sizeof(struct pmclog_procfork) == 5*4);
157 CTASSERT(sizeof(struct pmclog_sysexit) == 4*4);
158 CTASSERT(sizeof(struct pmclog_userdata) == 4*4);
159 
160 /*
161  * Log buffer structure
162  */
163 
164 struct pmclog_buffer {
165 	TAILQ_ENTRY(pmclog_buffer) plb_next;
166 	char 		*plb_base;
167 	char		*plb_ptr;
168 	char 		*plb_fence;
169 };
170 
171 /*
172  * Prototypes
173  */
174 
175 static int pmclog_get_buffer(struct pmc_owner *po);
176 static void pmclog_loop(void *arg);
177 static void pmclog_release(struct pmc_owner *po);
178 static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
179 static void pmclog_schedule_io(struct pmc_owner *po);
180 static void pmclog_stop_kthread(struct pmc_owner *po);
181 
182 /*
183  * Helper functions
184  */
185 
186 /*
187  * Get a log buffer
188  */
189 
190 static int
191 pmclog_get_buffer(struct pmc_owner *po)
192 {
193 	struct pmclog_buffer *plb;
194 
195 	mtx_assert(&po->po_mtx, MA_OWNED);
196 
197 	KASSERT(po->po_curbuf == NULL,
198 	    ("[pmc,%d] po=%p current buffer still valid", __LINE__, po));
199 
200 	mtx_lock_spin(&pmc_bufferlist_mtx);
201 	if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL)
202 		TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
203 	mtx_unlock_spin(&pmc_bufferlist_mtx);
204 
205 	PMCDBG(LOG,GTB,1, "po=%p plb=%p", po, plb);
206 
207 #ifdef	DEBUG
208 	if (plb)
209 		KASSERT(plb->plb_ptr == plb->plb_base &&
210 		    plb->plb_base < plb->plb_fence,
211 		    ("[pmc,%d] po=%p buffer invariants: ptr=%p "
212 		    "base=%p fence=%p", __LINE__, po, plb->plb_ptr,
213 		    plb->plb_base, plb->plb_fence));
214 #endif
215 
216 	po->po_curbuf = plb;
217 
218 	/* update stats */
219 	atomic_add_int(&pmc_stats.pm_buffer_requests, 1);
220 	if (plb == NULL)
221 		atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1);
222 
223 	return plb ? 0 : ENOMEM;
224 }
225 
226 /*
227  * Log handler loop.
228  *
229  * This function is executed by each pmc owner's helper thread.
230  */
231 
232 static void
233 pmclog_loop(void *arg)
234 {
235 	int error;
236 	struct pmc_owner *po;
237 	struct pmclog_buffer *lb;
238 	struct ucred *ownercred;
239 	struct ucred *mycred;
240 	struct thread *td;
241 	struct uio auio;
242 	struct iovec aiov;
243 	size_t nbytes;
244 
245 	po = (struct pmc_owner *) arg;
246 	td = curthread;
247 	mycred = td->td_ucred;
248 
249 	PROC_LOCK(po->po_owner);
250 	ownercred = crhold(po->po_owner->p_ucred);
251 	PROC_UNLOCK(po->po_owner);
252 
253 	PMCDBG(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread);
254 	KASSERT(po->po_kthread == curthread->td_proc,
255 	    ("[pmc,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__,
256 		po, po->po_kthread, curthread->td_proc));
257 
258 	lb = NULL;
259 
260 
261 	/*
262 	 * Loop waiting for I/O requests to be added to the owner
263 	 * struct's queue.  The loop is exited when the log file
264 	 * is deconfigured.
265 	 */
266 
267 	mtx_lock(&pmc_kthread_mtx);
268 
269 	for (;;) {
270 
271 		/* check if we've been asked to exit */
272 		if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
273 			break;
274 
275 		if (lb == NULL) { /* look for a fresh buffer to write */
276 			mtx_lock_spin(&po->po_mtx);
277 			if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) {
278 				mtx_unlock_spin(&po->po_mtx);
279 
280 				/* wakeup any processes waiting for a FLUSH */
281 				if (po->po_flags & PMC_PO_IN_FLUSH) {
282 					po->po_flags &= ~PMC_PO_IN_FLUSH;
283 					wakeup_one(po->po_kthread);
284 				}
285 
286 				(void) msleep(po, &pmc_kthread_mtx, PWAIT,
287 				    "pmcloop", 0);
288 				continue;
289 			}
290 
291 			TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
292 			mtx_unlock_spin(&po->po_mtx);
293 		}
294 
295 		mtx_unlock(&pmc_kthread_mtx);
296 
297 		/* process the request */
298 		PMCDBG(LOG,WRI,2, "po=%p base=%p ptr=%p", po,
299 		    lb->plb_base, lb->plb_ptr);
300 		/* change our thread's credentials before issuing the I/O */
301 
302 		aiov.iov_base = lb->plb_base;
303 		aiov.iov_len  = nbytes = lb->plb_ptr - lb->plb_base;
304 
305 		auio.uio_iov    = &aiov;
306 		auio.uio_iovcnt = 1;
307 		auio.uio_offset = -1;
308 		auio.uio_resid  = nbytes;
309 		auio.uio_rw     = UIO_WRITE;
310 		auio.uio_segflg = UIO_SYSSPACE;
311 		auio.uio_td     = td;
312 
313 		/* switch thread credentials -- see kern_ktrace.c */
314 		td->td_ucred = ownercred;
315 		error = fo_write(po->po_file, &auio, ownercred, 0, td);
316 		td->td_ucred = mycred;
317 
318 		mtx_lock(&pmc_kthread_mtx);
319 
320 		if (error) {
321 			/* XXX some errors are recoverable */
322 			/* XXX also check for SIGPIPE if a socket */
323 
324 			/* send a SIGIO to the owner and exit */
325 			PROC_LOCK(po->po_owner);
326 			psignal(po->po_owner, SIGIO);
327 			PROC_UNLOCK(po->po_owner);
328 
329 			po->po_error = error; /* save for flush log */
330 
331 			PMCDBG(LOG,WRI,2, "po=%p error=%d", po, error);
332 
333 			break;
334 		}
335 
336 		/* put the used buffer back into the global pool */
337 		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
338 
339 		mtx_lock_spin(&pmc_bufferlist_mtx);
340 		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
341 		mtx_unlock_spin(&pmc_bufferlist_mtx);
342 
343 		lb = NULL;
344 	}
345 
346 	po->po_kthread = NULL;
347 
348 	mtx_unlock(&pmc_kthread_mtx);
349 
350 	/* return the current I/O buffer to the global pool */
351 	if (lb) {
352 		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
353 
354 		mtx_lock_spin(&pmc_bufferlist_mtx);
355 		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
356 		mtx_unlock_spin(&pmc_bufferlist_mtx);
357 	}
358 
359 	/*
360 	 * Exit this thread, signalling the waiter
361 	 */
362 
363 	crfree(ownercred);
364 
365 	kthread_exit(0);
366 }
367 
368 /*
369  * Release and log entry and schedule an I/O if needed.
370  */
371 
372 static void
373 pmclog_release(struct pmc_owner *po)
374 {
375 	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
376 	    ("[pmc,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
377 		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
378 	KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
379 	    ("[pmc,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
380 		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
381 
382 	/* schedule an I/O if we've filled a buffer */
383 	if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence)
384 		pmclog_schedule_io(po);
385 
386 	mtx_unlock_spin(&po->po_mtx);
387 
388 	PMCDBG(LOG,REL,1, "po=%p", po);
389 }
390 
391 
392 /*
393  * Attempt to reserve 'length' bytes of space in an owner's log
394  * buffer.  The function returns a pointer to 'length' bytes of space
395  * if there was enough space or returns NULL if no space was
396  * available.  Non-null returns do so with the po mutex locked.  The
397  * caller must invoke pmclog_release() on the pmc owner structure
398  * when done.
399  */
400 
401 static uint32_t *
402 pmclog_reserve(struct pmc_owner *po, int length)
403 {
404 	uintptr_t newptr, oldptr;
405 	uint32_t *lh;
406 	struct timespec ts;
407 
408 	PMCDBG(LOG,ALL,1, "po=%p len=%d", po, length);
409 
410 	KASSERT(length % sizeof(uint32_t) == 0,
411 	    ("[pmclog,%d] length not a multiple of word size", __LINE__));
412 
413 	mtx_lock_spin(&po->po_mtx);
414 
415 	if (po->po_curbuf == NULL)
416 		if (pmclog_get_buffer(po) != 0) {
417 			mtx_unlock_spin(&po->po_mtx);
418 			return NULL;
419 		}
420 
421 	KASSERT(po->po_curbuf != NULL,
422 	    ("[pmc,%d] po=%p no current buffer", __LINE__, po));
423 
424 	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base &&
425 	    po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
426 	    ("[pmc,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
427 		__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
428 		po->po_curbuf->plb_fence));
429 
430 	oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
431 	newptr = oldptr + length;
432 
433 	KASSERT(oldptr != (uintptr_t) NULL,
434 	    ("[pmc,%d] po=%p Null log buffer pointer", __LINE__, po));
435 
436 	/*
437 	 * If we have space in the current buffer, return a pointer to
438 	 * available space with the PO structure locked.
439 	 */
440 	if (newptr <= (uintptr_t) po->po_curbuf->plb_fence) {
441 		po->po_curbuf->plb_ptr = (char *) newptr;
442 		goto done;
443 	}
444 
445 	/*
446 	 * Otherwise, schedule the current buffer for output and get a
447 	 * fresh buffer.
448 	 */
449 	pmclog_schedule_io(po);
450 
451 	if (pmclog_get_buffer(po) != 0) {
452 		mtx_unlock_spin(&po->po_mtx);
453 		return NULL;
454 	}
455 
456 	KASSERT(po->po_curbuf != NULL,
457 	    ("[pmc,%d] po=%p no current buffer", __LINE__, po));
458 
459 	KASSERT(po->po_curbuf->plb_ptr != NULL,
460 	    ("[pmc,%d] null return from pmc_get_log_buffer", __LINE__));
461 
462 	KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base &&
463 	    po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
464 	    ("[pmc,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
465 		__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
466 		po->po_curbuf->plb_fence));
467 
468 	oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
469 
470  done:
471 	lh = (uint32_t *) oldptr;
472 	lh++;				/* skip header */
473 	getnanotime(&ts);		/* fill in the timestamp */
474 	*lh++ = ts.tv_sec & 0xFFFFFFFF;
475 	*lh++ = ts.tv_nsec & 0xFFFFFFF;
476 	return (uint32_t *) oldptr;
477 }
478 
479 /*
480  * Schedule an I/O.
481  *
482  * Transfer the current buffer to the helper kthread.
483  */
484 
485 static void
486 pmclog_schedule_io(struct pmc_owner *po)
487 {
488 	KASSERT(po->po_curbuf != NULL,
489 	    ("[pmc,%d] schedule_io with null buffer po=%p", __LINE__, po));
490 
491 	KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
492 	    ("[pmc,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
493 		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
494 	KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
495 	    ("[pmc,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
496 		po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
497 
498 	PMCDBG(LOG,SIO, 1, "po=%p", po);
499 
500 	mtx_assert(&po->po_mtx, MA_OWNED);
501 
502 	/*
503 	 * Add the current buffer to the tail of the buffer list and
504 	 * wakeup the helper.
505 	 */
506 	TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next);
507 	po->po_curbuf = NULL;
508 	wakeup_one(po);
509 }
510 
511 /*
512  * Stop the helper kthread.
513  */
514 
515 static void
516 pmclog_stop_kthread(struct pmc_owner *po)
517 {
518 	/*
519 	 * Unset flag, wakeup the helper thread,
520 	 * wait for it to exit
521 	 */
522 
523 	mtx_assert(&pmc_kthread_mtx, MA_OWNED);
524 	po->po_flags &= ~PMC_PO_OWNS_LOGFILE;
525 	wakeup_one(po);
526 	if (po->po_kthread)
527 		msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmckstp", 0);
528 }
529 
530 /*
531  * Public functions
532  */
533 
534 /*
535  * Configure a log file for pmc owner 'po'.
536  *
537  * Parameter 'logfd' is a file handle referencing an open file in the
538  * owner process.  This file needs to have been opened for writing.
539  */
540 
541 int
542 pmclog_configure_log(struct pmc_owner *po, int logfd)
543 {
544 	int error;
545 	struct proc *p;
546 
547 	PMCDBG(LOG,CFG,1, "config po=%p logfd=%d", po, logfd);
548 
549 	p = po->po_owner;
550 
551 	/* return EBUSY if a log file was already present */
552 	if (po->po_flags & PMC_PO_OWNS_LOGFILE)
553 		return EBUSY;
554 
555 	KASSERT(po->po_kthread == NULL,
556 	    ("[pmc,%d] po=%p kthread (%p) already present", __LINE__, po,
557 		po->po_kthread));
558 	KASSERT(po->po_file == NULL,
559 	    ("[pmc,%d] po=%p file (%p) already present", __LINE__, po,
560 		po->po_file));
561 
562 	/* get a reference to the file state */
563 	error = fget_write(curthread, logfd, &po->po_file);
564 	if (error)
565 		goto error;
566 
567 	/* mark process as owning a log file */
568 	po->po_flags |= PMC_PO_OWNS_LOGFILE;
569 	error = kthread_create(pmclog_loop, po, &po->po_kthread,
570 	    RFHIGHPID, 0, "hwpmc: proc(%d)", p->p_pid);
571 	if (error)
572 		goto error;
573 
574 	/* mark process as using HWPMCs */
575 	PROC_LOCK(p);
576 	p->p_flag |= P_HWPMC;
577 	PROC_UNLOCK(p);
578 
579 	/* create a log initialization entry */
580 	PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE,
581 	    sizeof(struct pmclog_initialize));
582 	PMCLOG_EMIT32(PMC_VERSION);
583 	PMCLOG_EMIT32(md->pmd_cputype);
584 	PMCLOG_DESPATCH(po);
585 
586 	return 0;
587 
588  error:
589 	/* shutdown the thread */
590 	mtx_lock(&pmc_kthread_mtx);
591 	if (po->po_kthread)
592 		pmclog_stop_kthread(po);
593 	mtx_unlock(&pmc_kthread_mtx);
594 
595 	KASSERT(po->po_kthread == NULL, ("[pmc,%d] po=%p kthread not stopped",
596 	    __LINE__, po));
597 
598 	if (po->po_file)
599 		(void) fdrop(po->po_file, curthread);
600 	po->po_file  = NULL;	/* clear file and error state */
601 	po->po_error = 0;
602 
603 	return error;
604 }
605 
606 
607 /*
608  * De-configure a log file.  This will throw away any buffers queued
609  * for this owner process.
610  */
611 
612 int
613 pmclog_deconfigure_log(struct pmc_owner *po)
614 {
615 	int error;
616 	struct pmclog_buffer *lb;
617 
618 	PMCDBG(LOG,CFG,1, "de-config po=%p", po);
619 
620 	if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
621 		return EINVAL;
622 
623 	KASSERT(po->po_sscount == 0,
624 	    ("[pmc,%d] po=%p still owning SS PMCs", __LINE__, po));
625 	KASSERT(po->po_file != NULL,
626 	    ("[pmc,%d] po=%p no log file", __LINE__, po));
627 
628 	/* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */
629 	mtx_lock(&pmc_kthread_mtx);
630 	if (po->po_kthread)
631 		pmclog_stop_kthread(po);
632 	mtx_unlock(&pmc_kthread_mtx);
633 
634 	KASSERT(po->po_kthread == NULL,
635 	    ("[pmc,%d] po=%p kthread not stopped", __LINE__, po));
636 
637 	/* return all queued log buffers to the global pool */
638 	while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) {
639 		TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
640 		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
641 		mtx_lock_spin(&pmc_bufferlist_mtx);
642 		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
643 		mtx_unlock_spin(&pmc_bufferlist_mtx);
644 	}
645 
646 	/* return the 'current' buffer to the global pool */
647 	if ((lb = po->po_curbuf) != NULL) {
648 		PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
649 		mtx_lock_spin(&pmc_bufferlist_mtx);
650 		TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
651 		mtx_unlock_spin(&pmc_bufferlist_mtx);
652 	}
653 
654 	/* drop a reference to the fd */
655 	error = fdrop(po->po_file, curthread);
656 	po->po_file  = NULL;
657 	po->po_error = 0;
658 
659 	return error;
660 }
661 
662 /*
663  * Flush a process' log buffer.
664  */
665 
666 int
667 pmclog_flush(struct pmc_owner *po)
668 {
669 	int error, has_pending_buffers;
670 
671 	PMCDBG(LOG,FLS,1, "po=%p", po);
672 
673 	/*
674 	 * If there is a pending error recorded by the logger thread,
675 	 * return that.
676 	 */
677 	if (po->po_error)
678 		return po->po_error;
679 
680 	error = 0;
681 
682 	/*
683 	 * Check that we do have an active log file.
684 	 */
685 	mtx_lock(&pmc_kthread_mtx);
686 	if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
687 		error = EINVAL;
688 		goto error;
689 	}
690 
691 	/*
692 	 * Schedule the current buffer if any.
693 	 */
694 	mtx_lock_spin(&po->po_mtx);
695 	if (po->po_curbuf)
696 		pmclog_schedule_io(po);
697 	has_pending_buffers = !TAILQ_EMPTY(&po->po_logbuffers);
698 	mtx_unlock_spin(&po->po_mtx);
699 
700 	if (has_pending_buffers) {
701 		po->po_flags |= PMC_PO_IN_FLUSH; /* ask for a wakeup */
702 		error = msleep(po->po_kthread, &pmc_kthread_mtx, PWAIT,
703 		    "pmcflush", 0);
704 	}
705 
706  error:
707 	mtx_unlock(&pmc_kthread_mtx);
708 
709 	return error;
710 }
711 
712 
713 /*
714  * Send a 'close log' event to the log file.
715  */
716 
717 void
718 pmclog_process_closelog(struct pmc_owner *po)
719 {
720 	PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog));
721 	PMCLOG_DESPATCH(po);
722 }
723 
724 void
725 pmclog_process_dropnotify(struct pmc_owner *po)
726 {
727 	PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify));
728 	PMCLOG_DESPATCH(po);
729 }
730 
731 void
732 pmclog_process_map_in(struct pmc_owner *po, pid_t pid, uintfptr_t start,
733     const char *path)
734 {
735 	int pathlen, recordlen;
736 
737 	KASSERT(path != NULL, ("[pmclog,%d] map-in, null path", __LINE__));
738 
739 	pathlen = strlen(path) + 1;	/* #bytes for path name */
740 	recordlen = offsetof(struct pmclog_map_in, pl_pathname) +
741 	    pathlen;
742 
743 	PMCLOG_RESERVE(po, MAP_IN, recordlen);
744 	PMCLOG_EMIT32(pid);
745 	PMCLOG_EMITADDR(start);
746 	PMCLOG_EMITSTRING(path,pathlen);
747 	PMCLOG_DESPATCH(po);
748 }
749 
750 void
751 pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start,
752     uintfptr_t end)
753 {
754 	KASSERT(start <= end, ("[pmclog,%d] start > end", __LINE__));
755 
756 	PMCLOG_RESERVE(po, MAP_OUT, sizeof(struct pmclog_map_out));
757 	PMCLOG_EMIT32(pid);
758 	PMCLOG_EMITADDR(start);
759 	PMCLOG_EMITADDR(end);
760 	PMCLOG_DESPATCH(po);
761 }
762 
763 void
764 pmclog_process_pcsample(struct pmc *pm, struct pmc_sample *ps)
765 {
766 	struct pmc_owner *po;
767 
768 	PMCDBG(LOG,SAM,1,"pm=%p pid=%d pc=%p", pm, ps->ps_pid,
769 	    (void *) ps->ps_pc);
770 
771 	po = pm->pm_owner;
772 
773 	PMCLOG_RESERVE(po, PCSAMPLE, sizeof(struct pmclog_pcsample));
774 	PMCLOG_EMIT32(ps->ps_pid);
775 	PMCLOG_EMITADDR(ps->ps_pc);
776 	PMCLOG_EMIT32(pm->pm_id);
777 	PMCLOG_EMIT32(ps->ps_usermode);
778 	PMCLOG_DESPATCH(po);
779 }
780 
781 void
782 pmclog_process_pmcallocate(struct pmc *pm)
783 {
784 	struct pmc_owner *po;
785 
786 	po = pm->pm_owner;
787 
788 	PMCDBG(LOG,ALL,1, "pm=%p", pm);
789 
790 	PMCLOG_RESERVE(po, PMCALLOCATE, sizeof(struct pmclog_pmcallocate));
791 	PMCLOG_EMIT32(pm->pm_id);
792 	PMCLOG_EMIT32(pm->pm_event);
793 	PMCLOG_EMIT32(pm->pm_flags);
794 	PMCLOG_DESPATCH(po);
795 }
796 
797 void
798 pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path)
799 {
800 	int pathlen, recordlen;
801 	struct pmc_owner *po;
802 
803 	PMCDBG(LOG,ATT,1,"pm=%p pid=%d", pm, pid);
804 
805 	po = pm->pm_owner;
806 
807 	pathlen = strlen(path) + 1;	/* #bytes for the string */
808 	recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen;
809 
810 	PMCLOG_RESERVE(po, PMCATTACH, recordlen);
811 	PMCLOG_EMIT32(pm->pm_id);
812 	PMCLOG_EMIT32(pid);
813 	PMCLOG_EMITSTRING(path, pathlen);
814 	PMCLOG_DESPATCH(po);
815 }
816 
817 void
818 pmclog_process_pmcdetach(struct pmc *pm, pid_t pid)
819 {
820 	struct pmc_owner *po;
821 
822 	PMCDBG(LOG,ATT,1,"!pm=%p pid=%d", pm, pid);
823 
824 	po = pm->pm_owner;
825 
826 	PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach));
827 	PMCLOG_EMIT32(pm->pm_id);
828 	PMCLOG_EMIT32(pid);
829 	PMCLOG_DESPATCH(po);
830 }
831 
832 /*
833  * Log a context switch event to the log file.
834  */
835 
836 void
837 pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v)
838 {
839 	struct pmc_owner *po;
840 
841 	KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW,
842 	    ("[pmclog,%d] log-process-csw called gratuitously", __LINE__));
843 
844 	PMCDBG(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid,
845 	    v);
846 
847 	po = pm->pm_owner;
848 
849 	PMCLOG_RESERVE(po, PROCCSW, sizeof(struct pmclog_proccsw));
850 	PMCLOG_EMIT32(pm->pm_id);
851 	PMCLOG_EMIT64(v);
852 	PMCLOG_EMIT32(pp->pp_proc->p_pid);
853 	PMCLOG_DESPATCH(po);
854 }
855 
856 void
857 pmclog_process_procexec(struct pmc_owner *po, pmc_id_t pmid, pid_t pid,
858     uintfptr_t startaddr, char *path)
859 {
860 	int pathlen, recordlen;
861 
862 	PMCDBG(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path);
863 
864 	pathlen   = strlen(path) + 1;	/* #bytes for the path */
865 	recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen;
866 
867 	PMCLOG_RESERVE(po, PROCEXEC, recordlen);
868 	PMCLOG_EMIT32(pid);
869 	PMCLOG_EMITADDR(startaddr);
870 	PMCLOG_EMIT32(pmid);
871 	PMCLOG_EMITSTRING(path,pathlen);
872 	PMCLOG_DESPATCH(po);
873 }
874 
875 /*
876  * Log a process exit event (and accumulated pmc value) to the log file.
877  */
878 
879 void
880 pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp)
881 {
882 	int ri;
883 	struct pmc_owner *po;
884 
885 	ri = PMC_TO_ROWINDEX(pm);
886 	PMCDBG(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid,
887 	    pp->pp_pmcs[ri].pp_pmcval);
888 
889 	po = pm->pm_owner;
890 
891 	PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit));
892 	PMCLOG_EMIT32(pm->pm_id);
893 	PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval);
894 	PMCLOG_EMIT32(pp->pp_proc->p_pid);
895 	PMCLOG_DESPATCH(po);
896 }
897 
898 /*
899  * Log a fork event.
900  */
901 
902 void
903 pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid)
904 {
905 	PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork));
906 	PMCLOG_EMIT32(oldpid);
907 	PMCLOG_EMIT32(newpid);
908 	PMCLOG_DESPATCH(po);
909 }
910 
911 /*
912  * Log a process exit event of the form suitable for system-wide PMCs.
913  */
914 
915 void
916 pmclog_process_sysexit(struct pmc_owner *po, pid_t pid)
917 {
918 	PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit));
919 	PMCLOG_EMIT32(pid);
920 	PMCLOG_DESPATCH(po);
921 }
922 
923 /*
924  * Write a user log entry.
925  */
926 
927 int
928 pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl)
929 {
930 	int error;
931 
932 	PMCDBG(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata);
933 
934 	error = 0;
935 
936 	PMCLOG_RESERVE_WITH_ERROR(po, USERDATA,
937 	    sizeof(struct pmclog_userdata));
938 	PMCLOG_EMIT32(wl->pm_userdata);
939 	PMCLOG_DESPATCH(po);
940 
941  error:
942 	return error;
943 }
944 
945 /*
946  * Initialization.
947  *
948  * Create a pool of log buffers and initialize mutexes.
949  */
950 
951 void
952 pmclog_initialize()
953 {
954 	int n;
955 	struct pmclog_buffer *plb;
956 
957 	if (pmclog_buffer_size <= 0) {
958 		(void) printf("hwpmc: tunable logbuffersize=%d must be greater "
959 		    "than zero.\n", pmclog_buffer_size);
960 		pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
961 	}
962 
963 	if (pmc_nlogbuffers <= 0) {
964 		(void) printf("hwpmc: tunable nlogbuffers=%d must be greater "
965 		    "than zero.\n", pmc_nlogbuffers);
966 		pmc_nlogbuffers = PMC_NLOGBUFFERS;
967 	}
968 
969 	/* create global pool of log buffers */
970 	for (n = 0; n < pmc_nlogbuffers; n++) {
971 		MALLOC(plb, struct pmclog_buffer *, 1024 * pmclog_buffer_size,
972 		    M_PMC, M_ZERO|M_WAITOK);
973 		PMCLOG_INIT_BUFFER_DESCRIPTOR(plb);
974 		TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next);
975 	}
976 	mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc", MTX_SPIN);
977 	mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc", MTX_DEF);
978 }
979 
980 /*
981  * Shutdown logging.
982  *
983  * Destroy mutexes and release memory back the to free pool.
984  */
985 
986 void
987 pmclog_shutdown()
988 {
989 	struct pmclog_buffer *plb;
990 
991 	mtx_destroy(&pmc_kthread_mtx);
992 	mtx_destroy(&pmc_bufferlist_mtx);
993 
994 	while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) {
995 		TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
996 		FREE(plb, M_PMC);
997 	}
998 }
999