xref: /freebsd/sys/security/audit/audit_pipe.c (revision bb15ca603fa442c72dde3f3cb8b46db6970e3950)
1 /*-
2  * Copyright (c) 2006 Robert N. M. Watson
3  * Copyright (c) 2008-2009 Apple, Inc.
4  * All rights reserved.
5  *
6  * This software was developed by Robert Watson for the TrustedBSD Project.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/condvar.h>
35 #include <sys/conf.h>
36 #include <sys/eventhandler.h>
37 #include <sys/filio.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/poll.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/rwlock.h>
46 #include <sys/selinfo.h>
47 #include <sys/sigio.h>
48 #include <sys/signal.h>
49 #include <sys/signalvar.h>
50 #include <sys/sx.h>
51 #include <sys/systm.h>
52 #include <sys/uio.h>
53 
54 #include <security/audit/audit.h>
55 #include <security/audit/audit_ioctl.h>
56 #include <security/audit/audit_private.h>
57 
58 /*
59  * Implementation of a clonable special device providing a live stream of BSM
60  * audit data.  Consumers receive a "tee" of the system audit trail by
61  * default, but may also define alternative event selections using ioctls.
62  * This interface provides unreliable but timely access to audit events.
63  * Consumers should be very careful to avoid introducing event cycles.
64  */
65 
66 /*
67  * Memory types.
68  */
69 static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
70 static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
71     "Audit pipe entries and buffers");
72 static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
73     "Audit pipe preselection structure");
74 
75 /*
76  * Audit pipe buffer parameters.
77  */
78 #define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
79 #define	AUDIT_PIPE_QLIMIT_MIN		(1)
80 #define	AUDIT_PIPE_QLIMIT_MAX		(1024)
81 
82 /*
83  * Description of an entry in an audit_pipe.
84  */
85 struct audit_pipe_entry {
86 	void				*ape_record;
87 	u_int				 ape_record_len;
88 	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
89 };
90 
91 /*
92  * Audit pipes allow processes to express "interest" in the set of records
93  * that are delivered via the pipe.  They do this in a similar manner to the
94  * mechanism for audit trail configuration, by expressing two global masks,
95  * and optionally expressing per-auid masks.  The following data structure is
96  * the per-auid mask description.  The global state is stored in the audit
97  * pipe data structure.
98  *
99  * We may want to consider a more space/time-efficient data structure once
100  * usage patterns for per-auid specifications are clear.
101  */
102 struct audit_pipe_preselect {
103 	au_id_t					 app_auid;
104 	au_mask_t				 app_mask;
105 	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
106 };
107 
108 /*
109  * Description of an individual audit_pipe.  Consists largely of a bounded
110  * length queue.
111  */
112 #define	AUDIT_PIPE_ASYNC	0x00000001
113 #define	AUDIT_PIPE_NBIO		0x00000002
114 struct audit_pipe {
115 	int				 ap_open;	/* Device open? */
116 	u_int				 ap_flags;
117 
118 	struct selinfo			 ap_selinfo;
119 	struct sigio			*ap_sigio;
120 
121 	/*
122 	 * Per-pipe mutex protecting most fields in this data structure.
123 	 */
124 	struct mtx			 ap_mtx;
125 
126 	/*
127 	 * Per-pipe sleep lock serializing user-generated reads and flushes.
128 	 * uiomove() is called to copy out the current head record's data
129 	 * while the record remains in the queue, so we prevent other threads
130 	 * from removing it using this lock.
131 	 */
132 	struct sx			 ap_sx;
133 
134 	/*
135 	 * Condition variable to signal when data has been delivered to a
136 	 * pipe.
137 	 */
138 	struct cv			 ap_cv;
139 
140 	/*
141 	 * Various queue-reated variables: qlen and qlimit are a count of
142 	 * records in the queue; qbyteslen is the number of bytes of data
143 	 * across all records, and qoffset is the amount read so far of the
144 	 * first record in the queue.  The number of bytes available for
145 	 * reading in the queue is qbyteslen - qoffset.
146 	 */
147 	u_int				 ap_qlen;
148 	u_int				 ap_qlimit;
149 	u_int				 ap_qbyteslen;
150 	u_int				 ap_qoffset;
151 
152 	/*
153 	 * Per-pipe operation statistics.
154 	 */
155 	u_int64_t			 ap_inserts;	/* Records added. */
156 	u_int64_t			 ap_reads;	/* Records read. */
157 	u_int64_t			 ap_drops;	/* Records dropped. */
158 
159 	/*
160 	 * Fields relating to pipe interest: global masks for unmatched
161 	 * processes (attributable, non-attributable), and a list of specific
162 	 * interest specifications by auid.
163 	 */
164 	int				 ap_preselect_mode;
165 	au_mask_t			 ap_preselect_flags;
166 	au_mask_t			 ap_preselect_naflags;
167 	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
168 
169 	/*
170 	 * Current pending record list.  Protected by a combination of ap_mtx
171 	 * and ap_sx.  Note particularly that *both* locks are required to
172 	 * remove a record from the head of the queue, as an in-progress read
173 	 * may sleep while copying and therefore cannot hold ap_mtx.
174 	 */
175 	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
176 
177 	/*
178 	 * Global pipe list.
179 	 */
180 	TAILQ_ENTRY(audit_pipe)		 ap_list;
181 };
182 
183 #define	AUDIT_PIPE_LOCK(ap)		mtx_lock(&(ap)->ap_mtx)
184 #define	AUDIT_PIPE_LOCK_ASSERT(ap)	mtx_assert(&(ap)->ap_mtx, MA_OWNED)
185 #define	AUDIT_PIPE_LOCK_DESTROY(ap)	mtx_destroy(&(ap)->ap_mtx)
186 #define	AUDIT_PIPE_LOCK_INIT(ap)	mtx_init(&(ap)->ap_mtx, \
187 					    "audit_pipe_mtx", NULL, MTX_DEF)
188 #define	AUDIT_PIPE_UNLOCK(ap)		mtx_unlock(&(ap)->ap_mtx)
189 #define	AUDIT_PIPE_MTX(ap)		(&(ap)->ap_mtx)
190 
191 #define	AUDIT_PIPE_SX_LOCK_DESTROY(ap)	sx_destroy(&(ap)->ap_sx)
192 #define	AUDIT_PIPE_SX_LOCK_INIT(ap)	sx_init(&(ap)->ap_sx, "audit_pipe_sx")
193 #define	AUDIT_PIPE_SX_XLOCK_ASSERT(ap)	sx_assert(&(ap)->ap_sx, SA_XLOCKED)
194 #define	AUDIT_PIPE_SX_XLOCK_SIG(ap)	sx_xlock_sig(&(ap)->ap_sx)
195 #define	AUDIT_PIPE_SX_XUNLOCK(ap)	sx_xunlock(&(ap)->ap_sx)
196 
197 /*
198  * Global list of audit pipes, rwlock to protect it.  Individual record
199  * queues on pipes are protected by per-pipe locks; these locks synchronize
200  * between threads walking the list to deliver to individual pipes and add/
201  * remove of pipes, and are mostly acquired for read.
202  */
203 static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
204 static struct rwlock		 audit_pipe_lock;
205 
206 #define	AUDIT_PIPE_LIST_LOCK_INIT()	rw_init(&audit_pipe_lock, \
207 					    "audit_pipe_list_lock")
208 #define	AUDIT_PIPE_LIST_RLOCK()		rw_rlock(&audit_pipe_lock)
209 #define	AUDIT_PIPE_LIST_RUNLOCK()	rw_runlock(&audit_pipe_lock)
210 #define	AUDIT_PIPE_LIST_WLOCK()		rw_wlock(&audit_pipe_lock)
211 #define	AUDIT_PIPE_LIST_WLOCK_ASSERT()	rw_assert(&audit_pipe_lock, \
212 					    RA_WLOCKED)
213 #define	AUDIT_PIPE_LIST_WUNLOCK()	rw_wunlock(&audit_pipe_lock)
214 
215 /*
216  * Cloning related variables and constants.
217  */
218 #define	AUDIT_PIPE_NAME		"auditpipe"
219 static eventhandler_tag		 audit_pipe_eh_tag;
220 static struct clonedevs		*audit_pipe_clones;
221 
222 /*
223  * Special device methods and definition.
224  */
225 static d_open_t		audit_pipe_open;
226 static d_close_t	audit_pipe_close;
227 static d_read_t		audit_pipe_read;
228 static d_ioctl_t	audit_pipe_ioctl;
229 static d_poll_t		audit_pipe_poll;
230 static d_kqfilter_t	audit_pipe_kqfilter;
231 
232 static struct cdevsw	audit_pipe_cdevsw = {
233 	.d_version =	D_VERSION,
234 	.d_flags =	D_NEEDMINOR,
235 	.d_open =	audit_pipe_open,
236 	.d_close =	audit_pipe_close,
237 	.d_read =	audit_pipe_read,
238 	.d_ioctl =	audit_pipe_ioctl,
239 	.d_poll =	audit_pipe_poll,
240 	.d_kqfilter =	audit_pipe_kqfilter,
241 	.d_name =	AUDIT_PIPE_NAME,
242 };
243 
244 static int	audit_pipe_kqread(struct knote *note, long hint);
245 static void	audit_pipe_kqdetach(struct knote *note);
246 
247 static struct filterops audit_pipe_read_filterops = {
248 	.f_isfd =	1,
249 	.f_attach =	NULL,
250 	.f_detach =	audit_pipe_kqdetach,
251 	.f_event =	audit_pipe_kqread,
252 };
253 
254 /*
255  * Some global statistics on audit pipes.
256  */
257 static int		audit_pipe_count;	/* Current number of pipes. */
258 static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
259 static u_int64_t	audit_pipe_records;	/* Records seen. */
260 static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
261 
262 /*
263  * Free an audit pipe entry.
264  */
265 static void
266 audit_pipe_entry_free(struct audit_pipe_entry *ape)
267 {
268 
269 	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
270 	free(ape, M_AUDIT_PIPE_ENTRY);
271 }
272 
273 /*
274  * Find an audit pipe preselection specification for an auid, if any.
275  */
276 static struct audit_pipe_preselect *
277 audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
278 {
279 	struct audit_pipe_preselect *app;
280 
281 	AUDIT_PIPE_LOCK_ASSERT(ap);
282 
283 	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
284 		if (app->app_auid == auid)
285 			return (app);
286 	}
287 	return (NULL);
288 }
289 
290 /*
291  * Query the per-pipe mask for a specific auid.
292  */
293 static int
294 audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
295     au_mask_t *maskp)
296 {
297 	struct audit_pipe_preselect *app;
298 	int error;
299 
300 	AUDIT_PIPE_LOCK(ap);
301 	app = audit_pipe_preselect_find(ap, auid);
302 	if (app != NULL) {
303 		*maskp = app->app_mask;
304 		error = 0;
305 	} else
306 		error = ENOENT;
307 	AUDIT_PIPE_UNLOCK(ap);
308 	return (error);
309 }
310 
311 /*
312  * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
313  * otherwise, update the current entry.
314  */
315 static void
316 audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
317 {
318 	struct audit_pipe_preselect *app, *app_new;
319 
320 	/*
321 	 * Pessimistically assume that the auid doesn't already have a mask
322 	 * set, and allocate.  We will free it if it is unneeded.
323 	 */
324 	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
325 	AUDIT_PIPE_LOCK(ap);
326 	app = audit_pipe_preselect_find(ap, auid);
327 	if (app == NULL) {
328 		app = app_new;
329 		app_new = NULL;
330 		app->app_auid = auid;
331 		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
332 	}
333 	app->app_mask = mask;
334 	AUDIT_PIPE_UNLOCK(ap);
335 	if (app_new != NULL)
336 		free(app_new, M_AUDIT_PIPE_PRESELECT);
337 }
338 
339 /*
340  * Delete a per-auid mask on an audit pipe.
341  */
342 static int
343 audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
344 {
345 	struct audit_pipe_preselect *app;
346 	int error;
347 
348 	AUDIT_PIPE_LOCK(ap);
349 	app = audit_pipe_preselect_find(ap, auid);
350 	if (app != NULL) {
351 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
352 		error = 0;
353 	} else
354 		error = ENOENT;
355 	AUDIT_PIPE_UNLOCK(ap);
356 	if (app != NULL)
357 		free(app, M_AUDIT_PIPE_PRESELECT);
358 	return (error);
359 }
360 
361 /*
362  * Delete all per-auid masks on an audit pipe.
363  */
364 static void
365 audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
366 {
367 	struct audit_pipe_preselect *app;
368 
369 	AUDIT_PIPE_LOCK_ASSERT(ap);
370 
371 	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
372 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
373 		free(app, M_AUDIT_PIPE_PRESELECT);
374 	}
375 }
376 
377 static void
378 audit_pipe_preselect_flush(struct audit_pipe *ap)
379 {
380 
381 	AUDIT_PIPE_LOCK(ap);
382 	audit_pipe_preselect_flush_locked(ap);
383 	AUDIT_PIPE_UNLOCK(ap);
384 }
385 
386 /*-
387  * Determine whether a specific audit pipe matches a record with these
388  * properties.  Algorithm is as follows:
389  *
390  * - If the pipe is configured to track the default trail configuration, then
391  *   use the results of global preselection matching.
392  * - If not, search for a specifically configured auid entry matching the
393  *   event.  If an entry is found, use that.
394  * - Otherwise, use the default flags or naflags configured for the pipe.
395  */
396 static int
397 audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
398     au_event_t event, au_class_t class, int sorf, int trail_preselect)
399 {
400 	struct audit_pipe_preselect *app;
401 
402 	AUDIT_PIPE_LOCK_ASSERT(ap);
403 
404 	switch (ap->ap_preselect_mode) {
405 	case AUDITPIPE_PRESELECT_MODE_TRAIL:
406 		return (trail_preselect);
407 
408 	case AUDITPIPE_PRESELECT_MODE_LOCAL:
409 		app = audit_pipe_preselect_find(ap, auid);
410 		if (app == NULL) {
411 			if (auid == AU_DEFAUDITID)
412 				return (au_preselect(event, class,
413 				    &ap->ap_preselect_naflags, sorf));
414 			else
415 				return (au_preselect(event, class,
416 				    &ap->ap_preselect_flags, sorf));
417 		} else
418 			return (au_preselect(event, class, &app->app_mask,
419 			    sorf));
420 
421 	default:
422 		panic("audit_pipe_preselect_check: mode %d",
423 		    ap->ap_preselect_mode);
424 	}
425 
426 	return (0);
427 }
428 
429 /*
430  * Determine whether there exists a pipe interested in a record with specific
431  * properties.
432  */
433 int
434 audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
435     int sorf, int trail_preselect)
436 {
437 	struct audit_pipe *ap;
438 
439 	/* Lockless read to avoid acquiring the global lock if not needed. */
440 	if (TAILQ_EMPTY(&audit_pipe_list))
441 		return (0);
442 
443 	AUDIT_PIPE_LIST_RLOCK();
444 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
445 		AUDIT_PIPE_LOCK(ap);
446 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
447 		    trail_preselect)) {
448 			AUDIT_PIPE_UNLOCK(ap);
449 			AUDIT_PIPE_LIST_RUNLOCK();
450 			return (1);
451 		}
452 		AUDIT_PIPE_UNLOCK(ap);
453 	}
454 	AUDIT_PIPE_LIST_RUNLOCK();
455 	return (0);
456 }
457 
458 /*
459  * Append individual record to a queue -- allocate queue-local buffer, and
460  * add to the queue.  If the queue is full or we can't allocate memory, drop
461  * the newest record.
462  */
463 static void
464 audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
465 {
466 	struct audit_pipe_entry *ape;
467 
468 	AUDIT_PIPE_LOCK_ASSERT(ap);
469 
470 	if (ap->ap_qlen >= ap->ap_qlimit) {
471 		ap->ap_drops++;
472 		audit_pipe_drops++;
473 		return;
474 	}
475 
476 	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
477 	if (ape == NULL) {
478 		ap->ap_drops++;
479 		audit_pipe_drops++;
480 		return;
481 	}
482 
483 	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
484 	if (ape->ape_record == NULL) {
485 		free(ape, M_AUDIT_PIPE_ENTRY);
486 		ap->ap_drops++;
487 		audit_pipe_drops++;
488 		return;
489 	}
490 
491 	bcopy(record, ape->ape_record, record_len);
492 	ape->ape_record_len = record_len;
493 
494 	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
495 	ap->ap_inserts++;
496 	ap->ap_qlen++;
497 	ap->ap_qbyteslen += ape->ape_record_len;
498 	selwakeuppri(&ap->ap_selinfo, PSOCK);
499 	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
500 	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
501 		pgsigio(&ap->ap_sigio, SIGIO, 0);
502 	cv_broadcast(&ap->ap_cv);
503 }
504 
505 /*
506  * audit_pipe_submit(): audit_worker submits audit records via this
507  * interface, which arranges for them to be delivered to pipe queues.
508  */
509 void
510 audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
511     int trail_select, void *record, u_int record_len)
512 {
513 	struct audit_pipe *ap;
514 
515 	/*
516 	 * Lockless read to avoid lock overhead if pipes are not in use.
517 	 */
518 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
519 		return;
520 
521 	AUDIT_PIPE_LIST_RLOCK();
522 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
523 		AUDIT_PIPE_LOCK(ap);
524 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
525 		    trail_select))
526 			audit_pipe_append(ap, record, record_len);
527 		AUDIT_PIPE_UNLOCK(ap);
528 	}
529 	AUDIT_PIPE_LIST_RUNLOCK();
530 
531 	/* Unlocked increment. */
532 	audit_pipe_records++;
533 }
534 
535 /*
536  * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
537  * since we don't currently have selection information available, it is
538  * delivered to the pipe unconditionally.
539  *
540  * XXXRW: This is a bug.  The BSM check routine for submitting a user record
541  * should parse that information and return it.
542  */
543 void
544 audit_pipe_submit_user(void *record, u_int record_len)
545 {
546 	struct audit_pipe *ap;
547 
548 	/*
549 	 * Lockless read to avoid lock overhead if pipes are not in use.
550 	 */
551 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
552 		return;
553 
554 	AUDIT_PIPE_LIST_RLOCK();
555 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
556 		AUDIT_PIPE_LOCK(ap);
557 		audit_pipe_append(ap, record, record_len);
558 		AUDIT_PIPE_UNLOCK(ap);
559 	}
560 	AUDIT_PIPE_LIST_RUNLOCK();
561 
562 	/* Unlocked increment. */
563 	audit_pipe_records++;
564 }
565 
566 /*
567  * Allocate a new audit pipe.  Connects the pipe, on success, to the global
568  * list and updates statistics.
569  */
570 static struct audit_pipe *
571 audit_pipe_alloc(void)
572 {
573 	struct audit_pipe *ap;
574 
575 	AUDIT_PIPE_LIST_WLOCK_ASSERT();
576 
577 	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
578 	if (ap == NULL)
579 		return (NULL);
580 	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
581 	TAILQ_INIT(&ap->ap_queue);
582 	knlist_init_mtx(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap));
583 	AUDIT_PIPE_LOCK_INIT(ap);
584 	AUDIT_PIPE_SX_LOCK_INIT(ap);
585 	cv_init(&ap->ap_cv, "audit_pipe");
586 
587 	/*
588 	 * Default flags, naflags, and auid-specific preselection settings to
589 	 * 0.  Initialize the mode to the global trail so that if praudit(1)
590 	 * is run on /dev/auditpipe, it sees events associated with the
591 	 * default trail.  Pipe-aware application can clear the flag, set
592 	 * custom masks, and flush the pipe as needed.
593 	 */
594 	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
595 	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
596 	TAILQ_INIT(&ap->ap_preselect_list);
597 	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
598 
599 	/*
600 	 * Add to global list and update global statistics.
601 	 */
602 	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
603 	audit_pipe_count++;
604 	audit_pipe_ever++;
605 
606 	return (ap);
607 }
608 
609 /*
610  * Flush all records currently present in an audit pipe; assume mutex is held.
611  */
612 static void
613 audit_pipe_flush(struct audit_pipe *ap)
614 {
615 	struct audit_pipe_entry *ape;
616 
617 	AUDIT_PIPE_LOCK_ASSERT(ap);
618 
619 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
620 		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
621 		ap->ap_qbyteslen -= ape->ape_record_len;
622 		audit_pipe_entry_free(ape);
623 		ap->ap_qlen--;
624 	}
625 	ap->ap_qoffset = 0;
626 
627 	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen"));
628 	KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen"));
629 }
630 
631 /*
632  * Free an audit pipe; this means freeing all preselection state and all
633  * records in the pipe.  Assumes global write lock and pipe mutex are held to
634  * prevent any new records from being inserted during the free, and that the
635  * audit pipe is still on the global list.
636  */
637 static void
638 audit_pipe_free(struct audit_pipe *ap)
639 {
640 
641 	AUDIT_PIPE_LIST_WLOCK_ASSERT();
642 	AUDIT_PIPE_LOCK_ASSERT(ap);
643 
644 	audit_pipe_preselect_flush_locked(ap);
645 	audit_pipe_flush(ap);
646 	cv_destroy(&ap->ap_cv);
647 	AUDIT_PIPE_SX_LOCK_DESTROY(ap);
648 	AUDIT_PIPE_LOCK_DESTROY(ap);
649 	seldrain(&ap->ap_selinfo);
650 	knlist_destroy(&ap->ap_selinfo.si_note);
651 	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
652 	free(ap, M_AUDIT_PIPE);
653 	audit_pipe_count--;
654 }
655 
656 /*
657  * Audit pipe clone routine -- provide specific requested audit pipe, or a
658  * fresh one if a specific one is not requested.
659  */
660 static void
661 audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
662     struct cdev **dev)
663 {
664 	int i, u;
665 
666 	if (*dev != NULL)
667 		return;
668 
669 	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
670 		u = -1;
671 	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
672 		return;
673 
674 	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
675 	if (i) {
676 		*dev = make_dev(&audit_pipe_cdevsw, u, UID_ROOT,
677 		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
678 		if (*dev != NULL) {
679 			dev_ref(*dev);
680 			(*dev)->si_flags |= SI_CHEAPCLONE;
681 		}
682 	}
683 }
684 
685 /*
686  * Audit pipe open method.  Explicit privilege check isn't used as this
687  * allows file permissions on the special device to be used to grant audit
688  * review access.  Those file permissions should be managed carefully.
689  */
690 static int
691 audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
692 {
693 	struct audit_pipe *ap;
694 
695 	AUDIT_PIPE_LIST_WLOCK();
696 	ap = dev->si_drv1;
697 	if (ap == NULL) {
698 		ap = audit_pipe_alloc();
699 		if (ap == NULL) {
700 			AUDIT_PIPE_LIST_WUNLOCK();
701 			return (ENOMEM);
702 		}
703 		dev->si_drv1 = ap;
704 	} else {
705 		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
706 		AUDIT_PIPE_LIST_WUNLOCK();
707 		return (EBUSY);
708 	}
709 	ap->ap_open = 1;	/* No lock required yet. */
710 	AUDIT_PIPE_LIST_WUNLOCK();
711 	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
712 	return (0);
713 }
714 
715 /*
716  * Close audit pipe, tear down all records, etc.
717  */
718 static int
719 audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
720 {
721 	struct audit_pipe *ap;
722 
723 	ap = dev->si_drv1;
724 	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
725 	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
726 
727 	funsetown(&ap->ap_sigio);
728 	AUDIT_PIPE_LIST_WLOCK();
729 	AUDIT_PIPE_LOCK(ap);
730 	ap->ap_open = 0;
731 	audit_pipe_free(ap);
732 	dev->si_drv1 = NULL;
733 	AUDIT_PIPE_LIST_WUNLOCK();
734 	return (0);
735 }
736 
737 /*
738  * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
739  * commands.
740  */
741 static int
742 audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
743     struct thread *td)
744 {
745 	struct auditpipe_ioctl_preselect *aip;
746 	struct audit_pipe *ap;
747 	au_mask_t *maskp;
748 	int error, mode;
749 	au_id_t auid;
750 
751 	ap = dev->si_drv1;
752 	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
753 
754 	/*
755 	 * Audit pipe ioctls: first come standard device node ioctls, then
756 	 * manipulation of pipe settings, and finally, statistics query
757 	 * ioctls.
758 	 */
759 	switch (cmd) {
760 	case FIONBIO:
761 		AUDIT_PIPE_LOCK(ap);
762 		if (*(int *)data)
763 			ap->ap_flags |= AUDIT_PIPE_NBIO;
764 		else
765 			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
766 		AUDIT_PIPE_UNLOCK(ap);
767 		error = 0;
768 		break;
769 
770 	case FIONREAD:
771 		AUDIT_PIPE_LOCK(ap);
772 		*(int *)data = ap->ap_qbyteslen - ap->ap_qoffset;
773 		AUDIT_PIPE_UNLOCK(ap);
774 		error = 0;
775 		break;
776 
777 	case FIOASYNC:
778 		AUDIT_PIPE_LOCK(ap);
779 		if (*(int *)data)
780 			ap->ap_flags |= AUDIT_PIPE_ASYNC;
781 		else
782 			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
783 		AUDIT_PIPE_UNLOCK(ap);
784 		error = 0;
785 		break;
786 
787 	case FIOSETOWN:
788 		error = fsetown(*(int *)data, &ap->ap_sigio);
789 		break;
790 
791 	case FIOGETOWN:
792 		*(int *)data = fgetown(&ap->ap_sigio);
793 		error = 0;
794 		break;
795 
796 	case AUDITPIPE_GET_QLEN:
797 		*(u_int *)data = ap->ap_qlen;
798 		error = 0;
799 		break;
800 
801 	case AUDITPIPE_GET_QLIMIT:
802 		*(u_int *)data = ap->ap_qlimit;
803 		error = 0;
804 		break;
805 
806 	case AUDITPIPE_SET_QLIMIT:
807 		/* Lockless integer write. */
808 		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
809 		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
810 			ap->ap_qlimit = *(u_int *)data;
811 			error = 0;
812 		} else
813 			error = EINVAL;
814 		break;
815 
816 	case AUDITPIPE_GET_QLIMIT_MIN:
817 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
818 		error = 0;
819 		break;
820 
821 	case AUDITPIPE_GET_QLIMIT_MAX:
822 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
823 		error = 0;
824 		break;
825 
826 	case AUDITPIPE_GET_PRESELECT_FLAGS:
827 		AUDIT_PIPE_LOCK(ap);
828 		maskp = (au_mask_t *)data;
829 		*maskp = ap->ap_preselect_flags;
830 		AUDIT_PIPE_UNLOCK(ap);
831 		error = 0;
832 		break;
833 
834 	case AUDITPIPE_SET_PRESELECT_FLAGS:
835 		AUDIT_PIPE_LOCK(ap);
836 		maskp = (au_mask_t *)data;
837 		ap->ap_preselect_flags = *maskp;
838 		AUDIT_PIPE_UNLOCK(ap);
839 		error = 0;
840 		break;
841 
842 	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
843 		AUDIT_PIPE_LOCK(ap);
844 		maskp = (au_mask_t *)data;
845 		*maskp = ap->ap_preselect_naflags;
846 		AUDIT_PIPE_UNLOCK(ap);
847 		error = 0;
848 		break;
849 
850 	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
851 		AUDIT_PIPE_LOCK(ap);
852 		maskp = (au_mask_t *)data;
853 		ap->ap_preselect_naflags = *maskp;
854 		AUDIT_PIPE_UNLOCK(ap);
855 		error = 0;
856 		break;
857 
858 	case AUDITPIPE_GET_PRESELECT_AUID:
859 		aip = (struct auditpipe_ioctl_preselect *)data;
860 		error = audit_pipe_preselect_get(ap, aip->aip_auid,
861 		    &aip->aip_mask);
862 		break;
863 
864 	case AUDITPIPE_SET_PRESELECT_AUID:
865 		aip = (struct auditpipe_ioctl_preselect *)data;
866 		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
867 		error = 0;
868 		break;
869 
870 	case AUDITPIPE_DELETE_PRESELECT_AUID:
871 		auid = *(au_id_t *)data;
872 		error = audit_pipe_preselect_delete(ap, auid);
873 		break;
874 
875 	case AUDITPIPE_FLUSH_PRESELECT_AUID:
876 		audit_pipe_preselect_flush(ap);
877 		error = 0;
878 		break;
879 
880 	case AUDITPIPE_GET_PRESELECT_MODE:
881 		AUDIT_PIPE_LOCK(ap);
882 		*(int *)data = ap->ap_preselect_mode;
883 		AUDIT_PIPE_UNLOCK(ap);
884 		error = 0;
885 		break;
886 
887 	case AUDITPIPE_SET_PRESELECT_MODE:
888 		mode = *(int *)data;
889 		switch (mode) {
890 		case AUDITPIPE_PRESELECT_MODE_TRAIL:
891 		case AUDITPIPE_PRESELECT_MODE_LOCAL:
892 			AUDIT_PIPE_LOCK(ap);
893 			ap->ap_preselect_mode = mode;
894 			AUDIT_PIPE_UNLOCK(ap);
895 			error = 0;
896 			break;
897 
898 		default:
899 			error = EINVAL;
900 		}
901 		break;
902 
903 	case AUDITPIPE_FLUSH:
904 		if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
905 			return (EINTR);
906 		AUDIT_PIPE_LOCK(ap);
907 		audit_pipe_flush(ap);
908 		AUDIT_PIPE_UNLOCK(ap);
909 		AUDIT_PIPE_SX_XUNLOCK(ap);
910 		error = 0;
911 		break;
912 
913 	case AUDITPIPE_GET_MAXAUDITDATA:
914 		*(u_int *)data = MAXAUDITDATA;
915 		error = 0;
916 		break;
917 
918 	case AUDITPIPE_GET_INSERTS:
919 		*(u_int *)data = ap->ap_inserts;
920 		error = 0;
921 		break;
922 
923 	case AUDITPIPE_GET_READS:
924 		*(u_int *)data = ap->ap_reads;
925 		error = 0;
926 		break;
927 
928 	case AUDITPIPE_GET_DROPS:
929 		*(u_int *)data = ap->ap_drops;
930 		error = 0;
931 		break;
932 
933 	case AUDITPIPE_GET_TRUNCATES:
934 		*(u_int *)data = 0;
935 		error = 0;
936 		break;
937 
938 	default:
939 		error = ENOTTY;
940 	}
941 	return (error);
942 }
943 
944 /*
945  * Audit pipe read.  Read one or more partial or complete records to user
946  * memory.
947  */
948 static int
949 audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
950 {
951 	struct audit_pipe_entry *ape;
952 	struct audit_pipe *ap;
953 	u_int toread;
954 	int error;
955 
956 	ap = dev->si_drv1;
957 	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
958 
959 	/*
960 	 * We hold an sx(9) lock over read and flush because we rely on the
961 	 * stability of a record in the queue during uiomove(9).
962 	 */
963 	if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
964 		return (EINTR);
965 	AUDIT_PIPE_LOCK(ap);
966 	while (TAILQ_EMPTY(&ap->ap_queue)) {
967 		if (ap->ap_flags & AUDIT_PIPE_NBIO) {
968 			AUDIT_PIPE_UNLOCK(ap);
969 			AUDIT_PIPE_SX_XUNLOCK(ap);
970 			return (EAGAIN);
971 		}
972 		error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap));
973 		if (error) {
974 			AUDIT_PIPE_UNLOCK(ap);
975 			AUDIT_PIPE_SX_XUNLOCK(ap);
976 			return (error);
977 		}
978 	}
979 
980 	/*
981 	 * Copy as many remaining bytes from the current record to userspace
982 	 * as we can.  Keep processing records until we run out of records in
983 	 * the queue, or until the user buffer runs out of space.
984 	 *
985 	 * Note: we rely on the SX lock to maintain ape's stability here.
986 	 */
987 	ap->ap_reads++;
988 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL &&
989 	    uio->uio_resid > 0) {
990 		AUDIT_PIPE_LOCK_ASSERT(ap);
991 
992 		KASSERT(ape->ape_record_len > ap->ap_qoffset,
993 		    ("audit_pipe_read: record_len > qoffset (1)"));
994 		toread = MIN(ape->ape_record_len - ap->ap_qoffset,
995 		    uio->uio_resid);
996 		AUDIT_PIPE_UNLOCK(ap);
997 		error = uiomove((char *)ape->ape_record + ap->ap_qoffset,
998 		    toread, uio);
999 		if (error) {
1000 			AUDIT_PIPE_SX_XUNLOCK(ap);
1001 			return (error);
1002 		}
1003 
1004 		/*
1005 		 * If the copy succeeded, update book-keeping, and if no
1006 		 * bytes remain in the current record, free it.
1007 		 */
1008 		AUDIT_PIPE_LOCK(ap);
1009 		KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape,
1010 		    ("audit_pipe_read: queue out of sync after uiomove"));
1011 		ap->ap_qoffset += toread;
1012 		KASSERT(ape->ape_record_len >= ap->ap_qoffset,
1013 		    ("audit_pipe_read: record_len >= qoffset (2)"));
1014 		if (ap->ap_qoffset == ape->ape_record_len) {
1015 			TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
1016 			ap->ap_qbyteslen -= ape->ape_record_len;
1017 			audit_pipe_entry_free(ape);
1018 			ap->ap_qlen--;
1019 			ap->ap_qoffset = 0;
1020 		}
1021 	}
1022 	AUDIT_PIPE_UNLOCK(ap);
1023 	AUDIT_PIPE_SX_XUNLOCK(ap);
1024 	return (0);
1025 }
1026 
1027 /*
1028  * Audit pipe poll.
1029  */
1030 static int
1031 audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
1032 {
1033 	struct audit_pipe *ap;
1034 	int revents;
1035 
1036 	revents = 0;
1037 	ap = dev->si_drv1;
1038 	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
1039 
1040 	if (events & (POLLIN | POLLRDNORM)) {
1041 		AUDIT_PIPE_LOCK(ap);
1042 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
1043 			revents |= events & (POLLIN | POLLRDNORM);
1044 		else
1045 			selrecord(td, &ap->ap_selinfo);
1046 		AUDIT_PIPE_UNLOCK(ap);
1047 	}
1048 	return (revents);
1049 }
1050 
1051 /*
1052  * Audit pipe kqfilter.
1053  */
1054 static int
1055 audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
1056 {
1057 	struct audit_pipe *ap;
1058 
1059 	ap = dev->si_drv1;
1060 	KASSERT(ap != NULL, ("audit_pipe_kqfilter: ap == NULL"));
1061 
1062 	if (kn->kn_filter != EVFILT_READ)
1063 		return (EINVAL);
1064 
1065 	kn->kn_fop = &audit_pipe_read_filterops;
1066 	kn->kn_hook = ap;
1067 
1068 	AUDIT_PIPE_LOCK(ap);
1069 	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
1070 	AUDIT_PIPE_UNLOCK(ap);
1071 	return (0);
1072 }
1073 
1074 /*
1075  * Return true if there are records available for reading on the pipe.
1076  */
1077 static int
1078 audit_pipe_kqread(struct knote *kn, long hint)
1079 {
1080 	struct audit_pipe *ap;
1081 
1082 	ap = (struct audit_pipe *)kn->kn_hook;
1083 	KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
1084 	AUDIT_PIPE_LOCK_ASSERT(ap);
1085 
1086 	if (ap->ap_qlen != 0) {
1087 		kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset;
1088 		return (1);
1089 	} else {
1090 		kn->kn_data = 0;
1091 		return (0);
1092 	}
1093 }
1094 
1095 /*
1096  * Detach kqueue state from audit pipe.
1097  */
1098 static void
1099 audit_pipe_kqdetach(struct knote *kn)
1100 {
1101 	struct audit_pipe *ap;
1102 
1103 	ap = (struct audit_pipe *)kn->kn_hook;
1104 	KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
1105 
1106 	AUDIT_PIPE_LOCK(ap);
1107 	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1108 	AUDIT_PIPE_UNLOCK(ap);
1109 }
1110 
1111 /*
1112  * Initialize the audit pipe system.
1113  */
1114 static void
1115 audit_pipe_init(void *unused)
1116 {
1117 
1118 	TAILQ_INIT(&audit_pipe_list);
1119 	AUDIT_PIPE_LIST_LOCK_INIT();
1120 
1121 	clone_setup(&audit_pipe_clones);
1122 	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
1123 	    audit_pipe_clone, 0, 1000);
1124 	if (audit_pipe_eh_tag == NULL)
1125 		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
1126 }
1127 
1128 SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1129     NULL);
1130