xref: /freebsd/sys/security/audit/audit_pipe.c (revision b78ee15e9f04ae15c3e1200df974473167524d17)
1 /*-
2  * Copyright (c) 2006 Robert N. M. Watson
3  * Copyright (c) 2008-2009 Apple, Inc.
4  * All rights reserved.
5  *
6  * This software was developed by Robert Watson for the TrustedBSD Project.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/condvar.h>
35 #include <sys/conf.h>
36 #include <sys/eventhandler.h>
37 #include <sys/filio.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/poll.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/rwlock.h>
46 #include <sys/selinfo.h>
47 #include <sys/sigio.h>
48 #include <sys/signal.h>
49 #include <sys/signalvar.h>
50 #include <sys/sx.h>
51 #include <sys/systm.h>
52 #include <sys/uio.h>
53 
54 #include <security/audit/audit.h>
55 #include <security/audit/audit_ioctl.h>
56 #include <security/audit/audit_private.h>
57 
58 /*
59  * Implementation of a clonable special device providing a live stream of BSM
60  * audit data.  Consumers receive a "tee" of the system audit trail by
61  * default, but may also define alternative event selections using ioctls.
62  * This interface provides unreliable but timely access to audit events.
63  * Consumers should be very careful to avoid introducing event cycles.
64  */
65 
66 /*
67  * Memory types.
68  */
69 static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
70 static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
71     "Audit pipe entries and buffers");
72 static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
73     "Audit pipe preselection structure");
74 
75 /*
76  * Audit pipe buffer parameters.
77  */
78 #define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
79 #define	AUDIT_PIPE_QLIMIT_MIN		(1)
80 #define	AUDIT_PIPE_QLIMIT_MAX		(1024)
81 
82 /*
83  * Description of an entry in an audit_pipe.
84  */
85 struct audit_pipe_entry {
86 	void				*ape_record;
87 	u_int				 ape_record_len;
88 	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
89 };
90 
91 /*
92  * Audit pipes allow processes to express "interest" in the set of records
93  * that are delivered via the pipe.  They do this in a similar manner to the
94  * mechanism for audit trail configuration, by expressing two global masks,
95  * and optionally expressing per-auid masks.  The following data structure is
96  * the per-auid mask description.  The global state is stored in the audit
97  * pipe data structure.
98  *
99  * We may want to consider a more space/time-efficient data structure once
100  * usage patterns for per-auid specifications are clear.
101  */
102 struct audit_pipe_preselect {
103 	au_id_t					 app_auid;
104 	au_mask_t				 app_mask;
105 	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
106 };
107 
108 /*
109  * Description of an individual audit_pipe.  Consists largely of a bounded
110  * length queue.
111  */
112 #define	AUDIT_PIPE_ASYNC	0x00000001
113 #define	AUDIT_PIPE_NBIO		0x00000002
114 struct audit_pipe {
115 	u_int				 ap_flags;
116 
117 	struct selinfo			 ap_selinfo;
118 	struct sigio			*ap_sigio;
119 
120 	/*
121 	 * Per-pipe mutex protecting most fields in this data structure.
122 	 */
123 	struct mtx			 ap_mtx;
124 
125 	/*
126 	 * Per-pipe sleep lock serializing user-generated reads and flushes.
127 	 * uiomove() is called to copy out the current head record's data
128 	 * while the record remains in the queue, so we prevent other threads
129 	 * from removing it using this lock.
130 	 */
131 	struct sx			 ap_sx;
132 
133 	/*
134 	 * Condition variable to signal when data has been delivered to a
135 	 * pipe.
136 	 */
137 	struct cv			 ap_cv;
138 
139 	/*
140 	 * Various queue-reated variables: qlen and qlimit are a count of
141 	 * records in the queue; qbyteslen is the number of bytes of data
142 	 * across all records, and qoffset is the amount read so far of the
143 	 * first record in the queue.  The number of bytes available for
144 	 * reading in the queue is qbyteslen - qoffset.
145 	 */
146 	u_int				 ap_qlen;
147 	u_int				 ap_qlimit;
148 	u_int				 ap_qbyteslen;
149 	u_int				 ap_qoffset;
150 
151 	/*
152 	 * Per-pipe operation statistics.
153 	 */
154 	u_int64_t			 ap_inserts;	/* Records added. */
155 	u_int64_t			 ap_reads;	/* Records read. */
156 	u_int64_t			 ap_drops;	/* Records dropped. */
157 
158 	/*
159 	 * Fields relating to pipe interest: global masks for unmatched
160 	 * processes (attributable, non-attributable), and a list of specific
161 	 * interest specifications by auid.
162 	 */
163 	int				 ap_preselect_mode;
164 	au_mask_t			 ap_preselect_flags;
165 	au_mask_t			 ap_preselect_naflags;
166 	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
167 
168 	/*
169 	 * Current pending record list.  Protected by a combination of ap_mtx
170 	 * and ap_sx.  Note particularly that *both* locks are required to
171 	 * remove a record from the head of the queue, as an in-progress read
172 	 * may sleep while copying and therefore cannot hold ap_mtx.
173 	 */
174 	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
175 
176 	/*
177 	 * Global pipe list.
178 	 */
179 	TAILQ_ENTRY(audit_pipe)		 ap_list;
180 };
181 
182 #define	AUDIT_PIPE_LOCK(ap)		mtx_lock(&(ap)->ap_mtx)
183 #define	AUDIT_PIPE_LOCK_ASSERT(ap)	mtx_assert(&(ap)->ap_mtx, MA_OWNED)
184 #define	AUDIT_PIPE_LOCK_DESTROY(ap)	mtx_destroy(&(ap)->ap_mtx)
185 #define	AUDIT_PIPE_LOCK_INIT(ap)	mtx_init(&(ap)->ap_mtx, \
186 					    "audit_pipe_mtx", NULL, MTX_DEF)
187 #define	AUDIT_PIPE_UNLOCK(ap)		mtx_unlock(&(ap)->ap_mtx)
188 #define	AUDIT_PIPE_MTX(ap)		(&(ap)->ap_mtx)
189 
190 #define	AUDIT_PIPE_SX_LOCK_DESTROY(ap)	sx_destroy(&(ap)->ap_sx)
191 #define	AUDIT_PIPE_SX_LOCK_INIT(ap)	sx_init(&(ap)->ap_sx, "audit_pipe_sx")
192 #define	AUDIT_PIPE_SX_XLOCK_ASSERT(ap)	sx_assert(&(ap)->ap_sx, SA_XLOCKED)
193 #define	AUDIT_PIPE_SX_XLOCK_SIG(ap)	sx_xlock_sig(&(ap)->ap_sx)
194 #define	AUDIT_PIPE_SX_XUNLOCK(ap)	sx_xunlock(&(ap)->ap_sx)
195 
196 /*
197  * Global list of audit pipes, rwlock to protect it.  Individual record
198  * queues on pipes are protected by per-pipe locks; these locks synchronize
199  * between threads walking the list to deliver to individual pipes and add/
200  * remove of pipes, and are mostly acquired for read.
201  */
202 static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
203 static struct rwlock		 audit_pipe_lock;
204 
205 #define	AUDIT_PIPE_LIST_LOCK_INIT()	rw_init(&audit_pipe_lock, \
206 					    "audit_pipe_list_lock")
207 #define	AUDIT_PIPE_LIST_LOCK_DESTROY()	rw_destroy(&audit_pipe_lock)
208 #define	AUDIT_PIPE_LIST_RLOCK()		rw_rlock(&audit_pipe_lock)
209 #define	AUDIT_PIPE_LIST_RUNLOCK()	rw_runlock(&audit_pipe_lock)
210 #define	AUDIT_PIPE_LIST_WLOCK()		rw_wlock(&audit_pipe_lock)
211 #define	AUDIT_PIPE_LIST_WLOCK_ASSERT()	rw_assert(&audit_pipe_lock, \
212 					    RA_WLOCKED)
213 #define	AUDIT_PIPE_LIST_WUNLOCK()	rw_wunlock(&audit_pipe_lock)
214 
215 /*
216  * Audit pipe device.
217  */
218 static struct cdev	*audit_pipe_dev;
219 
220 #define AUDIT_PIPE_NAME	"auditpipe"
221 
222 /*
223  * Special device methods and definition.
224  */
225 static d_open_t		audit_pipe_open;
226 static d_close_t	audit_pipe_close;
227 static d_read_t		audit_pipe_read;
228 static d_ioctl_t	audit_pipe_ioctl;
229 static d_poll_t		audit_pipe_poll;
230 static d_kqfilter_t	audit_pipe_kqfilter;
231 
232 static struct cdevsw	audit_pipe_cdevsw = {
233 	.d_version =	D_VERSION,
234 	.d_open =	audit_pipe_open,
235 	.d_close =	audit_pipe_close,
236 	.d_read =	audit_pipe_read,
237 	.d_ioctl =	audit_pipe_ioctl,
238 	.d_poll =	audit_pipe_poll,
239 	.d_kqfilter =	audit_pipe_kqfilter,
240 	.d_name =	AUDIT_PIPE_NAME,
241 };
242 
243 static int	audit_pipe_kqread(struct knote *note, long hint);
244 static void	audit_pipe_kqdetach(struct knote *note);
245 
246 static struct filterops audit_pipe_read_filterops = {
247 	.f_isfd =	1,
248 	.f_attach =	NULL,
249 	.f_detach =	audit_pipe_kqdetach,
250 	.f_event =	audit_pipe_kqread,
251 };
252 
253 /*
254  * Some global statistics on audit pipes.
255  */
256 static int		audit_pipe_count;	/* Current number of pipes. */
257 static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
258 static u_int64_t	audit_pipe_records;	/* Records seen. */
259 static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
260 
261 /*
262  * Free an audit pipe entry.
263  */
264 static void
265 audit_pipe_entry_free(struct audit_pipe_entry *ape)
266 {
267 
268 	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
269 	free(ape, M_AUDIT_PIPE_ENTRY);
270 }
271 
272 /*
273  * Find an audit pipe preselection specification for an auid, if any.
274  */
275 static struct audit_pipe_preselect *
276 audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
277 {
278 	struct audit_pipe_preselect *app;
279 
280 	AUDIT_PIPE_LOCK_ASSERT(ap);
281 
282 	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
283 		if (app->app_auid == auid)
284 			return (app);
285 	}
286 	return (NULL);
287 }
288 
289 /*
290  * Query the per-pipe mask for a specific auid.
291  */
292 static int
293 audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
294     au_mask_t *maskp)
295 {
296 	struct audit_pipe_preselect *app;
297 	int error;
298 
299 	AUDIT_PIPE_LOCK(ap);
300 	app = audit_pipe_preselect_find(ap, auid);
301 	if (app != NULL) {
302 		*maskp = app->app_mask;
303 		error = 0;
304 	} else
305 		error = ENOENT;
306 	AUDIT_PIPE_UNLOCK(ap);
307 	return (error);
308 }
309 
310 /*
311  * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
312  * otherwise, update the current entry.
313  */
314 static void
315 audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
316 {
317 	struct audit_pipe_preselect *app, *app_new;
318 
319 	/*
320 	 * Pessimistically assume that the auid doesn't already have a mask
321 	 * set, and allocate.  We will free it if it is unneeded.
322 	 */
323 	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
324 	AUDIT_PIPE_LOCK(ap);
325 	app = audit_pipe_preselect_find(ap, auid);
326 	if (app == NULL) {
327 		app = app_new;
328 		app_new = NULL;
329 		app->app_auid = auid;
330 		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
331 	}
332 	app->app_mask = mask;
333 	AUDIT_PIPE_UNLOCK(ap);
334 	if (app_new != NULL)
335 		free(app_new, M_AUDIT_PIPE_PRESELECT);
336 }
337 
338 /*
339  * Delete a per-auid mask on an audit pipe.
340  */
341 static int
342 audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
343 {
344 	struct audit_pipe_preselect *app;
345 	int error;
346 
347 	AUDIT_PIPE_LOCK(ap);
348 	app = audit_pipe_preselect_find(ap, auid);
349 	if (app != NULL) {
350 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
351 		error = 0;
352 	} else
353 		error = ENOENT;
354 	AUDIT_PIPE_UNLOCK(ap);
355 	if (app != NULL)
356 		free(app, M_AUDIT_PIPE_PRESELECT);
357 	return (error);
358 }
359 
360 /*
361  * Delete all per-auid masks on an audit pipe.
362  */
363 static void
364 audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
365 {
366 	struct audit_pipe_preselect *app;
367 
368 	AUDIT_PIPE_LOCK_ASSERT(ap);
369 
370 	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
371 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
372 		free(app, M_AUDIT_PIPE_PRESELECT);
373 	}
374 }
375 
376 static void
377 audit_pipe_preselect_flush(struct audit_pipe *ap)
378 {
379 
380 	AUDIT_PIPE_LOCK(ap);
381 	audit_pipe_preselect_flush_locked(ap);
382 	AUDIT_PIPE_UNLOCK(ap);
383 }
384 
385 /*-
386  * Determine whether a specific audit pipe matches a record with these
387  * properties.  Algorithm is as follows:
388  *
389  * - If the pipe is configured to track the default trail configuration, then
390  *   use the results of global preselection matching.
391  * - If not, search for a specifically configured auid entry matching the
392  *   event.  If an entry is found, use that.
393  * - Otherwise, use the default flags or naflags configured for the pipe.
394  */
395 static int
396 audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
397     au_event_t event, au_class_t class, int sorf, int trail_preselect)
398 {
399 	struct audit_pipe_preselect *app;
400 
401 	AUDIT_PIPE_LOCK_ASSERT(ap);
402 
403 	switch (ap->ap_preselect_mode) {
404 	case AUDITPIPE_PRESELECT_MODE_TRAIL:
405 		return (trail_preselect);
406 
407 	case AUDITPIPE_PRESELECT_MODE_LOCAL:
408 		app = audit_pipe_preselect_find(ap, auid);
409 		if (app == NULL) {
410 			if (auid == AU_DEFAUDITID)
411 				return (au_preselect(event, class,
412 				    &ap->ap_preselect_naflags, sorf));
413 			else
414 				return (au_preselect(event, class,
415 				    &ap->ap_preselect_flags, sorf));
416 		} else
417 			return (au_preselect(event, class, &app->app_mask,
418 			    sorf));
419 
420 	default:
421 		panic("audit_pipe_preselect_check: mode %d",
422 		    ap->ap_preselect_mode);
423 	}
424 
425 	return (0);
426 }
427 
428 /*
429  * Determine whether there exists a pipe interested in a record with specific
430  * properties.
431  */
432 int
433 audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
434     int sorf, int trail_preselect)
435 {
436 	struct audit_pipe *ap;
437 
438 	/* Lockless read to avoid acquiring the global lock if not needed. */
439 	if (TAILQ_EMPTY(&audit_pipe_list))
440 		return (0);
441 
442 	AUDIT_PIPE_LIST_RLOCK();
443 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
444 		AUDIT_PIPE_LOCK(ap);
445 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
446 		    trail_preselect)) {
447 			AUDIT_PIPE_UNLOCK(ap);
448 			AUDIT_PIPE_LIST_RUNLOCK();
449 			return (1);
450 		}
451 		AUDIT_PIPE_UNLOCK(ap);
452 	}
453 	AUDIT_PIPE_LIST_RUNLOCK();
454 	return (0);
455 }
456 
457 /*
458  * Append individual record to a queue -- allocate queue-local buffer, and
459  * add to the queue.  If the queue is full or we can't allocate memory, drop
460  * the newest record.
461  */
462 static void
463 audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
464 {
465 	struct audit_pipe_entry *ape;
466 
467 	AUDIT_PIPE_LOCK_ASSERT(ap);
468 
469 	if (ap->ap_qlen >= ap->ap_qlimit) {
470 		ap->ap_drops++;
471 		audit_pipe_drops++;
472 		return;
473 	}
474 
475 	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
476 	if (ape == NULL) {
477 		ap->ap_drops++;
478 		audit_pipe_drops++;
479 		return;
480 	}
481 
482 	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
483 	if (ape->ape_record == NULL) {
484 		free(ape, M_AUDIT_PIPE_ENTRY);
485 		ap->ap_drops++;
486 		audit_pipe_drops++;
487 		return;
488 	}
489 
490 	bcopy(record, ape->ape_record, record_len);
491 	ape->ape_record_len = record_len;
492 
493 	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
494 	ap->ap_inserts++;
495 	ap->ap_qlen++;
496 	ap->ap_qbyteslen += ape->ape_record_len;
497 	selwakeuppri(&ap->ap_selinfo, PSOCK);
498 	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
499 	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
500 		pgsigio(&ap->ap_sigio, SIGIO, 0);
501 	cv_broadcast(&ap->ap_cv);
502 }
503 
504 /*
505  * audit_pipe_submit(): audit_worker submits audit records via this
506  * interface, which arranges for them to be delivered to pipe queues.
507  */
508 void
509 audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
510     int trail_select, void *record, u_int record_len)
511 {
512 	struct audit_pipe *ap;
513 
514 	/*
515 	 * Lockless read to avoid lock overhead if pipes are not in use.
516 	 */
517 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
518 		return;
519 
520 	AUDIT_PIPE_LIST_RLOCK();
521 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
522 		AUDIT_PIPE_LOCK(ap);
523 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
524 		    trail_select))
525 			audit_pipe_append(ap, record, record_len);
526 		AUDIT_PIPE_UNLOCK(ap);
527 	}
528 	AUDIT_PIPE_LIST_RUNLOCK();
529 
530 	/* Unlocked increment. */
531 	audit_pipe_records++;
532 }
533 
534 /*
535  * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
536  * since we don't currently have selection information available, it is
537  * delivered to the pipe unconditionally.
538  *
539  * XXXRW: This is a bug.  The BSM check routine for submitting a user record
540  * should parse that information and return it.
541  */
542 void
543 audit_pipe_submit_user(void *record, u_int record_len)
544 {
545 	struct audit_pipe *ap;
546 
547 	/*
548 	 * Lockless read to avoid lock overhead if pipes are not in use.
549 	 */
550 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
551 		return;
552 
553 	AUDIT_PIPE_LIST_RLOCK();
554 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
555 		AUDIT_PIPE_LOCK(ap);
556 		audit_pipe_append(ap, record, record_len);
557 		AUDIT_PIPE_UNLOCK(ap);
558 	}
559 	AUDIT_PIPE_LIST_RUNLOCK();
560 
561 	/* Unlocked increment. */
562 	audit_pipe_records++;
563 }
564 
565 /*
566  * Allocate a new audit pipe.  Connects the pipe, on success, to the global
567  * list and updates statistics.
568  */
569 static struct audit_pipe *
570 audit_pipe_alloc(void)
571 {
572 	struct audit_pipe *ap;
573 
574 	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
575 	if (ap == NULL)
576 		return (NULL);
577 	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
578 	TAILQ_INIT(&ap->ap_queue);
579 	knlist_init_mtx(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap));
580 	AUDIT_PIPE_LOCK_INIT(ap);
581 	AUDIT_PIPE_SX_LOCK_INIT(ap);
582 	cv_init(&ap->ap_cv, "audit_pipe");
583 
584 	/*
585 	 * Default flags, naflags, and auid-specific preselection settings to
586 	 * 0.  Initialize the mode to the global trail so that if praudit(1)
587 	 * is run on /dev/auditpipe, it sees events associated with the
588 	 * default trail.  Pipe-aware application can clear the flag, set
589 	 * custom masks, and flush the pipe as needed.
590 	 */
591 	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
592 	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
593 	TAILQ_INIT(&ap->ap_preselect_list);
594 	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
595 
596 	/*
597 	 * Add to global list and update global statistics.
598 	 */
599 	AUDIT_PIPE_LIST_WLOCK();
600 	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
601 	audit_pipe_count++;
602 	audit_pipe_ever++;
603 	AUDIT_PIPE_LIST_WUNLOCK();
604 
605 	return (ap);
606 }
607 
608 /*
609  * Flush all records currently present in an audit pipe; assume mutex is held.
610  */
611 static void
612 audit_pipe_flush(struct audit_pipe *ap)
613 {
614 	struct audit_pipe_entry *ape;
615 
616 	AUDIT_PIPE_LOCK_ASSERT(ap);
617 
618 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
619 		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
620 		ap->ap_qbyteslen -= ape->ape_record_len;
621 		audit_pipe_entry_free(ape);
622 		ap->ap_qlen--;
623 	}
624 	ap->ap_qoffset = 0;
625 
626 	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen"));
627 	KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen"));
628 }
629 
630 /*
631  * Free an audit pipe; this means freeing all preselection state and all
632  * records in the pipe.  Assumes global write lock and pipe mutex are held to
633  * prevent any new records from being inserted during the free, and that the
634  * audit pipe is still on the global list.
635  */
636 static void
637 audit_pipe_free(struct audit_pipe *ap)
638 {
639 
640 	AUDIT_PIPE_LIST_WLOCK_ASSERT();
641 	AUDIT_PIPE_LOCK_ASSERT(ap);
642 
643 	audit_pipe_preselect_flush_locked(ap);
644 	audit_pipe_flush(ap);
645 	cv_destroy(&ap->ap_cv);
646 	AUDIT_PIPE_SX_LOCK_DESTROY(ap);
647 	AUDIT_PIPE_LOCK_DESTROY(ap);
648 	seldrain(&ap->ap_selinfo);
649 	knlist_destroy(&ap->ap_selinfo.si_note);
650 	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
651 	free(ap, M_AUDIT_PIPE);
652 	audit_pipe_count--;
653 }
654 
655 static void
656 audit_pipe_dtor(void *arg)
657 {
658 	struct audit_pipe *ap;
659 
660 	ap = arg;
661 	AUDIT_PIPE_LIST_WLOCK();
662 	AUDIT_PIPE_LOCK(ap);
663 	audit_pipe_free(ap);
664 	AUDIT_PIPE_LIST_WUNLOCK();
665 }
666 
667 /*
668  * Audit pipe open method.  Explicit privilege check isn't used as this
669  * allows file permissions on the special device to be used to grant audit
670  * review access.  Those file permissions should be managed carefully.
671  */
672 static int
673 audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
674 {
675 	struct audit_pipe *ap;
676 	int error;
677 
678 	ap = audit_pipe_alloc();
679 	if (ap == NULL) {
680 		return (ENOMEM);
681 	}
682 	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
683 	error = devfs_set_cdevpriv(ap, audit_pipe_dtor);
684 	if (error != 0) {
685 		AUDIT_PIPE_LIST_WLOCK();
686 		audit_pipe_free(ap);
687 		AUDIT_PIPE_LIST_WUNLOCK();
688 	}
689 	return (0);
690 }
691 
692 /*
693  * Close audit pipe, tear down all records, etc.
694  */
695 static int
696 audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
697 {
698 	struct audit_pipe *ap;
699 	int error;
700 
701 	error = devfs_get_cdevpriv((void **)&ap);
702 	if (error != 0)
703 		return (error);
704 	funsetown(&ap->ap_sigio);
705 	return (0);
706 }
707 
708 /*
709  * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
710  * commands.
711  */
712 static int
713 audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
714     struct thread *td)
715 {
716 	struct auditpipe_ioctl_preselect *aip;
717 	struct audit_pipe *ap;
718 	au_mask_t *maskp;
719 	int error, mode;
720 	au_id_t auid;
721 
722 	error = devfs_get_cdevpriv((void **)&ap);
723 	if (error != 0)
724 		return (error);
725 
726 	/*
727 	 * Audit pipe ioctls: first come standard device node ioctls, then
728 	 * manipulation of pipe settings, and finally, statistics query
729 	 * ioctls.
730 	 */
731 	switch (cmd) {
732 	case FIONBIO:
733 		AUDIT_PIPE_LOCK(ap);
734 		if (*(int *)data)
735 			ap->ap_flags |= AUDIT_PIPE_NBIO;
736 		else
737 			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
738 		AUDIT_PIPE_UNLOCK(ap);
739 		error = 0;
740 		break;
741 
742 	case FIONREAD:
743 		AUDIT_PIPE_LOCK(ap);
744 		*(int *)data = ap->ap_qbyteslen - ap->ap_qoffset;
745 		AUDIT_PIPE_UNLOCK(ap);
746 		error = 0;
747 		break;
748 
749 	case FIOASYNC:
750 		AUDIT_PIPE_LOCK(ap);
751 		if (*(int *)data)
752 			ap->ap_flags |= AUDIT_PIPE_ASYNC;
753 		else
754 			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
755 		AUDIT_PIPE_UNLOCK(ap);
756 		error = 0;
757 		break;
758 
759 	case FIOSETOWN:
760 		error = fsetown(*(int *)data, &ap->ap_sigio);
761 		break;
762 
763 	case FIOGETOWN:
764 		*(int *)data = fgetown(&ap->ap_sigio);
765 		error = 0;
766 		break;
767 
768 	case AUDITPIPE_GET_QLEN:
769 		*(u_int *)data = ap->ap_qlen;
770 		error = 0;
771 		break;
772 
773 	case AUDITPIPE_GET_QLIMIT:
774 		*(u_int *)data = ap->ap_qlimit;
775 		error = 0;
776 		break;
777 
778 	case AUDITPIPE_SET_QLIMIT:
779 		/* Lockless integer write. */
780 		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
781 		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
782 			ap->ap_qlimit = *(u_int *)data;
783 			error = 0;
784 		} else
785 			error = EINVAL;
786 		break;
787 
788 	case AUDITPIPE_GET_QLIMIT_MIN:
789 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
790 		error = 0;
791 		break;
792 
793 	case AUDITPIPE_GET_QLIMIT_MAX:
794 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
795 		error = 0;
796 		break;
797 
798 	case AUDITPIPE_GET_PRESELECT_FLAGS:
799 		AUDIT_PIPE_LOCK(ap);
800 		maskp = (au_mask_t *)data;
801 		*maskp = ap->ap_preselect_flags;
802 		AUDIT_PIPE_UNLOCK(ap);
803 		error = 0;
804 		break;
805 
806 	case AUDITPIPE_SET_PRESELECT_FLAGS:
807 		AUDIT_PIPE_LOCK(ap);
808 		maskp = (au_mask_t *)data;
809 		ap->ap_preselect_flags = *maskp;
810 		AUDIT_PIPE_UNLOCK(ap);
811 		error = 0;
812 		break;
813 
814 	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
815 		AUDIT_PIPE_LOCK(ap);
816 		maskp = (au_mask_t *)data;
817 		*maskp = ap->ap_preselect_naflags;
818 		AUDIT_PIPE_UNLOCK(ap);
819 		error = 0;
820 		break;
821 
822 	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
823 		AUDIT_PIPE_LOCK(ap);
824 		maskp = (au_mask_t *)data;
825 		ap->ap_preselect_naflags = *maskp;
826 		AUDIT_PIPE_UNLOCK(ap);
827 		error = 0;
828 		break;
829 
830 	case AUDITPIPE_GET_PRESELECT_AUID:
831 		aip = (struct auditpipe_ioctl_preselect *)data;
832 		error = audit_pipe_preselect_get(ap, aip->aip_auid,
833 		    &aip->aip_mask);
834 		break;
835 
836 	case AUDITPIPE_SET_PRESELECT_AUID:
837 		aip = (struct auditpipe_ioctl_preselect *)data;
838 		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
839 		error = 0;
840 		break;
841 
842 	case AUDITPIPE_DELETE_PRESELECT_AUID:
843 		auid = *(au_id_t *)data;
844 		error = audit_pipe_preselect_delete(ap, auid);
845 		break;
846 
847 	case AUDITPIPE_FLUSH_PRESELECT_AUID:
848 		audit_pipe_preselect_flush(ap);
849 		error = 0;
850 		break;
851 
852 	case AUDITPIPE_GET_PRESELECT_MODE:
853 		AUDIT_PIPE_LOCK(ap);
854 		*(int *)data = ap->ap_preselect_mode;
855 		AUDIT_PIPE_UNLOCK(ap);
856 		error = 0;
857 		break;
858 
859 	case AUDITPIPE_SET_PRESELECT_MODE:
860 		mode = *(int *)data;
861 		switch (mode) {
862 		case AUDITPIPE_PRESELECT_MODE_TRAIL:
863 		case AUDITPIPE_PRESELECT_MODE_LOCAL:
864 			AUDIT_PIPE_LOCK(ap);
865 			ap->ap_preselect_mode = mode;
866 			AUDIT_PIPE_UNLOCK(ap);
867 			error = 0;
868 			break;
869 
870 		default:
871 			error = EINVAL;
872 		}
873 		break;
874 
875 	case AUDITPIPE_FLUSH:
876 		if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
877 			return (EINTR);
878 		AUDIT_PIPE_LOCK(ap);
879 		audit_pipe_flush(ap);
880 		AUDIT_PIPE_UNLOCK(ap);
881 		AUDIT_PIPE_SX_XUNLOCK(ap);
882 		error = 0;
883 		break;
884 
885 	case AUDITPIPE_GET_MAXAUDITDATA:
886 		*(u_int *)data = MAXAUDITDATA;
887 		error = 0;
888 		break;
889 
890 	case AUDITPIPE_GET_INSERTS:
891 		*(u_int *)data = ap->ap_inserts;
892 		error = 0;
893 		break;
894 
895 	case AUDITPIPE_GET_READS:
896 		*(u_int *)data = ap->ap_reads;
897 		error = 0;
898 		break;
899 
900 	case AUDITPIPE_GET_DROPS:
901 		*(u_int *)data = ap->ap_drops;
902 		error = 0;
903 		break;
904 
905 	case AUDITPIPE_GET_TRUNCATES:
906 		*(u_int *)data = 0;
907 		error = 0;
908 		break;
909 
910 	default:
911 		error = ENOTTY;
912 	}
913 	return (error);
914 }
915 
916 /*
917  * Audit pipe read.  Read one or more partial or complete records to user
918  * memory.
919  */
920 static int
921 audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
922 {
923 	struct audit_pipe_entry *ape;
924 	struct audit_pipe *ap;
925 	u_int toread;
926 	int error;
927 
928 	error = devfs_get_cdevpriv((void **)&ap);
929 	if (error != 0)
930 		return (error);
931 
932 	/*
933 	 * We hold an sx(9) lock over read and flush because we rely on the
934 	 * stability of a record in the queue during uiomove(9).
935 	 */
936 	if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
937 		return (EINTR);
938 	AUDIT_PIPE_LOCK(ap);
939 	while (TAILQ_EMPTY(&ap->ap_queue)) {
940 		if (ap->ap_flags & AUDIT_PIPE_NBIO) {
941 			AUDIT_PIPE_UNLOCK(ap);
942 			AUDIT_PIPE_SX_XUNLOCK(ap);
943 			return (EAGAIN);
944 		}
945 		error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap));
946 		if (error) {
947 			AUDIT_PIPE_UNLOCK(ap);
948 			AUDIT_PIPE_SX_XUNLOCK(ap);
949 			return (error);
950 		}
951 	}
952 
953 	/*
954 	 * Copy as many remaining bytes from the current record to userspace
955 	 * as we can.  Keep processing records until we run out of records in
956 	 * the queue, or until the user buffer runs out of space.
957 	 *
958 	 * Note: we rely on the SX lock to maintain ape's stability here.
959 	 */
960 	ap->ap_reads++;
961 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL &&
962 	    uio->uio_resid > 0) {
963 		AUDIT_PIPE_LOCK_ASSERT(ap);
964 
965 		KASSERT(ape->ape_record_len > ap->ap_qoffset,
966 		    ("audit_pipe_read: record_len > qoffset (1)"));
967 		toread = MIN(ape->ape_record_len - ap->ap_qoffset,
968 		    uio->uio_resid);
969 		AUDIT_PIPE_UNLOCK(ap);
970 		error = uiomove((char *)ape->ape_record + ap->ap_qoffset,
971 		    toread, uio);
972 		if (error) {
973 			AUDIT_PIPE_SX_XUNLOCK(ap);
974 			return (error);
975 		}
976 
977 		/*
978 		 * If the copy succeeded, update book-keeping, and if no
979 		 * bytes remain in the current record, free it.
980 		 */
981 		AUDIT_PIPE_LOCK(ap);
982 		KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape,
983 		    ("audit_pipe_read: queue out of sync after uiomove"));
984 		ap->ap_qoffset += toread;
985 		KASSERT(ape->ape_record_len >= ap->ap_qoffset,
986 		    ("audit_pipe_read: record_len >= qoffset (2)"));
987 		if (ap->ap_qoffset == ape->ape_record_len) {
988 			TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
989 			ap->ap_qbyteslen -= ape->ape_record_len;
990 			audit_pipe_entry_free(ape);
991 			ap->ap_qlen--;
992 			ap->ap_qoffset = 0;
993 		}
994 	}
995 	AUDIT_PIPE_UNLOCK(ap);
996 	AUDIT_PIPE_SX_XUNLOCK(ap);
997 	return (0);
998 }
999 
1000 /*
1001  * Audit pipe poll.
1002  */
1003 static int
1004 audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
1005 {
1006 	struct audit_pipe *ap;
1007 	int error, revents;
1008 
1009 	revents = 0;
1010 	error = devfs_get_cdevpriv((void **)&ap);
1011 	if (error != 0)
1012 		return (error);
1013 	if (events & (POLLIN | POLLRDNORM)) {
1014 		AUDIT_PIPE_LOCK(ap);
1015 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
1016 			revents |= events & (POLLIN | POLLRDNORM);
1017 		else
1018 			selrecord(td, &ap->ap_selinfo);
1019 		AUDIT_PIPE_UNLOCK(ap);
1020 	}
1021 	return (revents);
1022 }
1023 
1024 /*
1025  * Audit pipe kqfilter.
1026  */
1027 static int
1028 audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
1029 {
1030 	struct audit_pipe *ap;
1031 	int error;
1032 
1033 	error = devfs_get_cdevpriv((void **)&ap);
1034 	if (error != 0)
1035 		return (error);
1036 	if (kn->kn_filter != EVFILT_READ)
1037 		return (EINVAL);
1038 
1039 	kn->kn_fop = &audit_pipe_read_filterops;
1040 	kn->kn_hook = ap;
1041 
1042 	AUDIT_PIPE_LOCK(ap);
1043 	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
1044 	AUDIT_PIPE_UNLOCK(ap);
1045 	return (0);
1046 }
1047 
1048 /*
1049  * Return true if there are records available for reading on the pipe.
1050  */
1051 static int
1052 audit_pipe_kqread(struct knote *kn, long hint)
1053 {
1054 	struct audit_pipe *ap;
1055 
1056 	ap = (struct audit_pipe *)kn->kn_hook;
1057 	AUDIT_PIPE_LOCK_ASSERT(ap);
1058 
1059 	if (ap->ap_qlen != 0) {
1060 		kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset;
1061 		return (1);
1062 	} else {
1063 		kn->kn_data = 0;
1064 		return (0);
1065 	}
1066 }
1067 
1068 /*
1069  * Detach kqueue state from audit pipe.
1070  */
1071 static void
1072 audit_pipe_kqdetach(struct knote *kn)
1073 {
1074 	struct audit_pipe *ap;
1075 
1076 	ap = (struct audit_pipe *)kn->kn_hook;
1077 	AUDIT_PIPE_LOCK(ap);
1078 	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1079 	AUDIT_PIPE_UNLOCK(ap);
1080 }
1081 
1082 /*
1083  * Initialize the audit pipe system.
1084  */
1085 static void
1086 audit_pipe_init(void *unused)
1087 {
1088 
1089 	TAILQ_INIT(&audit_pipe_list);
1090 	AUDIT_PIPE_LIST_LOCK_INIT();
1091 	audit_pipe_dev = make_dev(&audit_pipe_cdevsw, 0, UID_ROOT,
1092 		GID_WHEEL, 0600, "%s", AUDIT_PIPE_NAME);
1093 	if (audit_pipe_dev == NULL) {
1094 		AUDIT_PIPE_LIST_LOCK_DESTROY();
1095 		panic("Can't initialize audit pipe subsystem");
1096 	}
1097 }
1098 
1099 SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1100     NULL);
1101