xref: /freebsd/sys/security/audit/audit_pipe.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 2006 Robert N. M. Watson
3  * All rights reserved.
4  *
5  * This software was developed by Robert Watson for the TrustedBSD Project.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/conf.h>
34 #include <sys/eventhandler.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mutex.h>
40 #include <sys/poll.h>
41 #include <sys/proc.h>
42 #include <sys/queue.h>
43 #include <sys/selinfo.h>
44 #include <sys/sigio.h>
45 #include <sys/signal.h>
46 #include <sys/signalvar.h>
47 #include <sys/systm.h>
48 #include <sys/uio.h>
49 
50 #include <security/audit/audit.h>
51 #include <security/audit/audit_ioctl.h>
52 #include <security/audit/audit_private.h>
53 
54 /*
55  * Implementation of a clonable special device providing a live stream of BSM
56  * audit data.  This is a "tee" of the data going to the file.  It provides
57  * unreliable but timely access to audit events.  Consumers of this interface
58  * should be very careful to avoid introducing event cycles.  Consumers may
59  * express interest via a set of preselection ioctls.
60  */
61 
62 /*
63  * Memory types.
64  */
65 static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
66 static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
67     "Audit pipe entries and buffers");
68 static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_preselect",
69     "Audit pipe preselection structure");
70 
71 /*
72  * Audit pipe buffer parameters.
73  */
74 #define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
75 #define	AUDIT_PIPE_QLIMIT_MIN		(0)
76 #define	AUDIT_PIPE_QLIMIT_MAX		(1024)
77 
78 /*
79  * Description of an entry in an audit_pipe.
80  */
81 struct audit_pipe_entry {
82 	void				*ape_record;
83 	u_int				 ape_record_len;
84 	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
85 };
86 
87 /*
88  * Audit pipes allow processes to express "interest" in the set of records
89  * that are delivered via the pipe.  They do this in a similar manner to the
90  * mechanism for audit trail configuration, by expressing two global masks,
91  * and optionally expressing per-auid masks.  The following data structure is
92  * the per-auid mask description.  The global state is stored in the audit
93  * pipe data structure.
94  *
95  * We may want to consider a more space/time-efficient data structure once
96  * usage patterns for per-auid specifications are clear.
97  */
98 struct audit_pipe_preselect {
99 	au_id_t					 app_auid;
100 	au_mask_t				 app_mask;
101 	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
102 };
103 
104 /*
105  * Description of an individual audit_pipe.  Consists largely of a bounded
106  * length queue.
107  */
108 #define	AUDIT_PIPE_ASYNC	0x00000001
109 #define	AUDIT_PIPE_NBIO		0x00000002
110 struct audit_pipe {
111 	int				 ap_open;	/* Device open? */
112 	u_int				 ap_flags;
113 
114 	struct selinfo			 ap_selinfo;
115 	struct sigio			*ap_sigio;
116 
117 	u_int				 ap_qlen;
118 	u_int				 ap_qlimit;
119 
120 	u_int64_t			 ap_inserts;	/* Records added. */
121 	u_int64_t			 ap_reads;	/* Records read. */
122 	u_int64_t			 ap_drops;	/* Records dropped. */
123 	u_int64_t			 ap_truncates;	/* Records too long. */
124 
125 	/*
126 	 * Fields relating to pipe interest: global masks for unmatched
127 	 * processes (attributable, non-attributable), and a list of specific
128 	 * interest specifications by auid.
129 	 */
130 	int				 ap_preselect_mode;
131 	au_mask_t			 ap_preselect_flags;
132 	au_mask_t			 ap_preselect_naflags;
133 	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
134 
135 	/*
136 	 * Current pending record list.
137 	 */
138 	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
139 
140 	/*
141 	 * Global pipe list.
142 	 */
143 	TAILQ_ENTRY(audit_pipe)		 ap_list;
144 };
145 
146 /*
147  * Global list of audit pipes, mutex to protect it and the pipes.  Finer
148  * grained locking may be desirable at some point.
149  */
150 static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
151 static struct mtx		 audit_pipe_mtx;
152 
153 /*
154  * This CV is used to wakeup on an audit record write.  Eventually, it might
155  * be per-pipe to avoid unnecessary wakeups when several pipes with different
156  * preselection masks are present.
157  */
158 static struct cv		 audit_pipe_cv;
159 
160 /*
161  * Cloning related variables and constants.
162  */
163 #define	AUDIT_PIPE_NAME		"auditpipe"
164 static eventhandler_tag		 audit_pipe_eh_tag;
165 static struct clonedevs		*audit_pipe_clones;
166 
167 /*
168  * Special device methods and definition.
169  */
170 static d_open_t		audit_pipe_open;
171 static d_close_t	audit_pipe_close;
172 static d_read_t		audit_pipe_read;
173 static d_ioctl_t	audit_pipe_ioctl;
174 static d_poll_t		audit_pipe_poll;
175 
176 static struct cdevsw	audit_pipe_cdevsw = {
177 	.d_version =	D_VERSION,
178 	.d_flags =	D_PSEUDO | D_NEEDGIANT,
179 	.d_open =	audit_pipe_open,
180 	.d_close =	audit_pipe_close,
181 	.d_read =	audit_pipe_read,
182 	.d_ioctl =	audit_pipe_ioctl,
183 	.d_poll =	audit_pipe_poll,
184 	.d_name =	AUDIT_PIPE_NAME,
185 };
186 
187 /*
188  * Some global statistics on audit pipes.
189  */
190 static int		audit_pipe_count;	/* Current number of pipes. */
191 static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
192 static u_int64_t	audit_pipe_records;	/* Records seen. */
193 static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
194 
195 /*
196  * Free an audit pipe entry.
197  */
198 static void
199 audit_pipe_entry_free(struct audit_pipe_entry *ape)
200 {
201 
202 	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
203 	free(ape, M_AUDIT_PIPE_ENTRY);
204 }
205 
206 /*
207  * Find an audit pipe preselection specification for an auid, if any.
208  */
209 static struct audit_pipe_preselect *
210 audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
211 {
212 	struct audit_pipe_preselect *app;
213 
214 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
215 
216 	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
217 		if (app->app_auid == auid)
218 			return (app);
219 	}
220 	return (NULL);
221 }
222 
223 /*
224  * Query the per-pipe mask for a specific auid.
225  */
226 static int
227 audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
228     au_mask_t *maskp)
229 {
230 	struct audit_pipe_preselect *app;
231 	int error;
232 
233 	mtx_lock(&audit_pipe_mtx);
234 	app = audit_pipe_preselect_find(ap, auid);
235 	if (app != NULL) {
236 		*maskp = app->app_mask;
237 		error = 0;
238 	} else
239 		error = ENOENT;
240 	mtx_unlock(&audit_pipe_mtx);
241 	return (error);
242 }
243 
244 /*
245  * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
246  * otherwise, update the current entry.
247  */
248 static void
249 audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
250 {
251 	struct audit_pipe_preselect *app, *app_new;
252 
253 	/*
254 	 * Pessimistically assume that the auid doesn't already have a mask
255 	 * set, and allocate.  We will free it if it is unneeded.
256 	 */
257 	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
258 	mtx_lock(&audit_pipe_mtx);
259 	app = audit_pipe_preselect_find(ap, auid);
260 	if (app == NULL) {
261 		app = app_new;
262 		app_new = NULL;
263 		app->app_auid = auid;
264 		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
265 	}
266 	app->app_mask = mask;
267 	mtx_unlock(&audit_pipe_mtx);
268 	if (app_new != NULL)
269 		free(app_new, M_AUDIT_PIPE_PRESELECT);
270 }
271 
272 /*
273  * Delete a per-auid mask on an audit pipe.
274  */
275 static int
276 audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
277 {
278 	struct audit_pipe_preselect *app;
279 	int error;
280 
281 	mtx_lock(&audit_pipe_mtx);
282 	app = audit_pipe_preselect_find(ap, auid);
283 	if (app != NULL) {
284 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
285 		error = 0;
286 	} else
287 		error = ENOENT;
288 	mtx_unlock(&audit_pipe_mtx);
289 	if (app != NULL)
290 		free(app, M_AUDIT_PIPE_PRESELECT);
291 	return (error);
292 }
293 
294 /*
295  * Delete all per-auid masks on an audit pipe.
296  */
297 static void
298 audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
299 {
300 	struct audit_pipe_preselect *app;
301 
302 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
303 
304 	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
305 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
306 		free(app, M_AUDIT_PIPE_PRESELECT);
307 	}
308 }
309 
310 static void
311 audit_pipe_preselect_flush(struct audit_pipe *ap)
312 {
313 
314 	mtx_lock(&audit_pipe_mtx);
315 	audit_pipe_preselect_flush_locked(ap);
316 	mtx_unlock(&audit_pipe_mtx);
317 }
318 
319 /*
320  * Determine whether a specific audit pipe matches a record with these
321  * properties.  Algorithm is as follows:
322  *
323  * - If the pipe is configured to track the default trail configuration, then
324  *   use the results of global preselection matching.
325  * - If not, search for a specifically configured auid entry matching the
326  *   event.  If an entry is found, use that.
327  * - Otherwise, use the default flags or naflags configured for the pipe.
328  */
329 static int
330 audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
331     au_event_t event, au_class_t class, int sorf, int trail_preselect)
332 {
333 	struct audit_pipe_preselect *app;
334 
335 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
336 
337 	switch (ap->ap_preselect_mode) {
338 	case AUDITPIPE_PRESELECT_MODE_TRAIL:
339 		return (trail_preselect);
340 
341 	case AUDITPIPE_PRESELECT_MODE_LOCAL:
342 		app = audit_pipe_preselect_find(ap, auid);
343 		if (app == NULL) {
344 			if (auid == AU_DEFAUDITID)
345 				return (au_preselect(event, class,
346 				    &ap->ap_preselect_naflags, sorf));
347 			else
348 				return (au_preselect(event, class,
349 				    &ap->ap_preselect_flags, sorf));
350 		} else
351 			return (au_preselect(event, class, &app->app_mask,
352 			    sorf));
353 
354 	default:
355 		panic("audit_pipe_preselect_check: mode %d",
356 		    ap->ap_preselect_mode);
357 	}
358 
359 	return (0);
360 }
361 
362 /*
363  * Determine whether there exists a pipe interested in a record with specific
364  * properties.
365  */
366 int
367 audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
368     int sorf, int trail_preselect)
369 {
370 	struct audit_pipe *ap;
371 
372 	mtx_lock(&audit_pipe_mtx);
373 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
374 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
375 		    trail_preselect)) {
376 			mtx_unlock(&audit_pipe_mtx);
377 			return (1);
378 		}
379 	}
380 	mtx_unlock(&audit_pipe_mtx);
381 	return (0);
382 }
383 
384 /*
385  * Append individual record to a queue -- allocate queue-local buffer, and
386  * add to the queue.  We try to drop from the head of the queue so that more
387  * recent events take precedence over older ones, but if allocation fails we
388  * do drop the new event.
389  */
390 static void
391 audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
392 {
393 	struct audit_pipe_entry *ape, *ape_remove;
394 
395 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
396 
397 	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
398 	if (ape == NULL) {
399 		ap->ap_drops++;
400 		audit_pipe_drops++;
401 		return;
402 	}
403 
404 	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
405 	if (ape->ape_record == NULL) {
406 		free(ape, M_AUDIT_PIPE_ENTRY);
407 		ap->ap_drops++;
408 		audit_pipe_drops++;
409 		return;
410 	}
411 
412 	bcopy(record, ape->ape_record, record_len);
413 	ape->ape_record_len = record_len;
414 
415 	if (ap->ap_qlen >= ap->ap_qlimit) {
416 		ape_remove = TAILQ_FIRST(&ap->ap_queue);
417 		TAILQ_REMOVE(&ap->ap_queue, ape_remove, ape_queue);
418 		audit_pipe_entry_free(ape_remove);
419 		ap->ap_qlen--;
420 		ap->ap_drops++;
421 		audit_pipe_drops++;
422 	}
423 
424 	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
425 	ap->ap_inserts++;
426 	ap->ap_qlen++;
427 	selwakeuppri(&ap->ap_selinfo, PSOCK);
428 	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
429 		pgsigio(&ap->ap_sigio, SIGIO, 0);
430 }
431 
432 /*
433  * audit_pipe_submit(): audit_worker submits audit records via this
434  * interface, which arranges for them to be delivered to pipe queues.
435  */
436 void
437 audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
438     int trail_select, void *record, u_int record_len)
439 {
440 	struct audit_pipe *ap;
441 
442 	/*
443 	 * Lockless read to avoid mutex overhead if pipes are not in use.
444 	 */
445 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
446 		return;
447 
448 	mtx_lock(&audit_pipe_mtx);
449 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
450 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
451 		    trail_select))
452 			audit_pipe_append(ap, record, record_len);
453 	}
454 	audit_pipe_records++;
455 	mtx_unlock(&audit_pipe_mtx);
456 	cv_signal(&audit_pipe_cv);
457 }
458 
459 /*
460  * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
461  * since we don't currently have selection information available, it is
462  * delivered to the pipe unconditionally.
463  *
464  * XXXRW: This is a bug.  The BSM check routine for submitting a user record
465  * should parse that information and return it.
466  */
467 void
468 audit_pipe_submit_user(void *record, u_int record_len)
469 {
470 	struct audit_pipe *ap;
471 
472 	/*
473 	 * Lockless read to avoid mutex overhead if pipes are not in use.
474 	 */
475 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
476 		return;
477 
478 	mtx_lock(&audit_pipe_mtx);
479 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list)
480 		audit_pipe_append(ap, record, record_len);
481 	audit_pipe_records++;
482 	mtx_unlock(&audit_pipe_mtx);
483 	cv_signal(&audit_pipe_cv);
484 }
485 
486 
487 /*
488  * Pop the next record off of an audit pipe.
489  */
490 static struct audit_pipe_entry *
491 audit_pipe_pop(struct audit_pipe *ap)
492 {
493 	struct audit_pipe_entry *ape;
494 
495 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
496 
497 	ape = TAILQ_FIRST(&ap->ap_queue);
498 	KASSERT((ape == NULL && ap->ap_qlen == 0) ||
499 	    (ape != NULL && ap->ap_qlen != 0), ("audit_pipe_pop: qlen"));
500 	if (ape == NULL)
501 		return (NULL);
502 	TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
503 	ap->ap_qlen--;
504 	return (ape);
505 }
506 
507 /*
508  * Allocate a new audit pipe.  Connects the pipe, on success, to the global
509  * list and updates statistics.
510  */
511 static struct audit_pipe *
512 audit_pipe_alloc(void)
513 {
514 	struct audit_pipe *ap;
515 
516 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
517 
518 	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
519 	if (ap == NULL)
520 		return (NULL);
521 	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
522 	TAILQ_INIT(&ap->ap_queue);
523 
524 	/*
525 	 * Default flags, naflags, and auid-specific preselection settings to
526 	 * 0.  Initialize the mode to the global trail so that if praudit(1)
527 	 * is run on /dev/auditpipe, it sees events associated with the
528 	 * default trail.  Pipe-aware application can clear the flag, set
529 	 * custom masks, and flush the pipe as needed.
530 	 */
531 	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
532 	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
533 	TAILQ_INIT(&ap->ap_preselect_list);
534 	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
535 
536 	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
537 	audit_pipe_count++;
538 	audit_pipe_ever++;
539 
540 	return (ap);
541 }
542 
543 /*
544  * Flush all records currently present in an audit pipe; assume mutex is held.
545  */
546 static void
547 audit_pipe_flush(struct audit_pipe *ap)
548 {
549 	struct audit_pipe_entry *ape;
550 
551 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
552 
553 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
554 		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
555 		audit_pipe_entry_free(ape);
556 		ap->ap_qlen--;
557 	}
558 	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qlen"));
559 }
560 
561 /*
562  * Free an audit pipe; this means freeing all preselection state and all
563  * records in the pipe.  Assumes mutex is held to prevent any new records
564  * from being inserted during the free, and that the audit pipe is still on
565  * the global list.
566  */
567 static void
568 audit_pipe_free(struct audit_pipe *ap)
569 {
570 
571 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
572 
573 	audit_pipe_preselect_flush_locked(ap);
574 	audit_pipe_flush(ap);
575 	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
576 	free(ap, M_AUDIT_PIPE);
577 	audit_pipe_count--;
578 }
579 
580 /*
581  * Audit pipe clone routine -- provide specific requested audit pipe, or a
582  * fresh one if a specific one is not requested.
583  */
584 static void
585 audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
586     struct cdev **dev)
587 {
588 	int i, u;
589 
590 	if (*dev != NULL)
591 		return;
592 
593 	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
594 		u = -1;
595 	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
596 		return;
597 
598 	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
599 	if (i) {
600 		*dev = make_dev(&audit_pipe_cdevsw, unit2minor(u), UID_ROOT,
601 		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
602 		if (*dev != NULL) {
603 			dev_ref(*dev);
604 			(*dev)->si_flags |= SI_CHEAPCLONE;
605 		}
606 	}
607 }
608 
609 /*
610  * Audit pipe open method.  Explicit suser check isn't used as this allows
611  * file permissions on the special device to be used to grant audit review
612  * access.
613  */
614 static int
615 audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
616 {
617 	struct audit_pipe *ap;
618 
619 	mtx_lock(&audit_pipe_mtx);
620 	ap = dev->si_drv1;
621 	if (ap == NULL) {
622 		ap = audit_pipe_alloc();
623 		if (ap == NULL) {
624 			mtx_unlock(&audit_pipe_mtx);
625 			return (ENOMEM);
626 		}
627 		dev->si_drv1 = ap;
628 	} else {
629 		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
630 		mtx_unlock(&audit_pipe_mtx);
631 		return (EBUSY);
632 	}
633 	ap->ap_open = 1;
634 	mtx_unlock(&audit_pipe_mtx);
635 	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
636 	return (0);
637 }
638 
639 /*
640  * Close audit pipe, tear down all records, etc.
641  */
642 static int
643 audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
644 {
645 	struct audit_pipe *ap;
646 
647 	ap = dev->si_drv1;
648 	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
649 	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
650 	funsetown(&ap->ap_sigio);
651 	mtx_lock(&audit_pipe_mtx);
652 	ap->ap_open = 0;
653 	audit_pipe_free(ap);
654 	dev->si_drv1 = NULL;
655 	mtx_unlock(&audit_pipe_mtx);
656 	return (0);
657 }
658 
659 /*
660  * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
661  * commands.
662  *
663  * Would be desirable to support filtering, although perhaps something simple
664  * like an event mask, as opposed to something complicated like BPF.
665  */
666 static int
667 audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
668     struct thread *td)
669 {
670 	struct auditpipe_ioctl_preselect *aip;
671 	struct audit_pipe *ap;
672 	au_mask_t *maskp;
673 	int error, mode;
674 	au_id_t auid;
675 
676 	ap = dev->si_drv1;
677 	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
678 
679 	/*
680 	 * Audit pipe ioctls: first come standard device node ioctls, then
681 	 * manipulation of pipe settings, and finally, statistics query
682 	 * ioctls.
683 	 */
684 	switch (cmd) {
685 	case FIONBIO:
686 		mtx_lock(&audit_pipe_mtx);
687 		if (*(int *)data)
688 			ap->ap_flags |= AUDIT_PIPE_NBIO;
689 		else
690 			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
691 		mtx_unlock(&audit_pipe_mtx);
692 		error = 0;
693 		break;
694 
695 	case FIONREAD:
696 		mtx_lock(&audit_pipe_mtx);
697 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
698 			*(int *)data =
699 			    TAILQ_FIRST(&ap->ap_queue)->ape_record_len;
700 		else
701 			*(int *)data = 0;
702 		mtx_unlock(&audit_pipe_mtx);
703 		error = 0;
704 		break;
705 
706 	case FIOASYNC:
707 		mtx_lock(&audit_pipe_mtx);
708 		if (*(int *)data)
709 			ap->ap_flags |= AUDIT_PIPE_ASYNC;
710 		else
711 			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
712 		mtx_unlock(&audit_pipe_mtx);
713 		error = 0;
714 		break;
715 
716 	case FIOSETOWN:
717 		error = fsetown(*(int *)data, &ap->ap_sigio);
718 		break;
719 
720 	case FIOGETOWN:
721 		*(int *)data = fgetown(&ap->ap_sigio);
722 		error = 0;
723 		break;
724 
725 	case AUDITPIPE_GET_QLEN:
726 		*(u_int *)data = ap->ap_qlen;
727 		error = 0;
728 		break;
729 
730 	case AUDITPIPE_GET_QLIMIT:
731 		*(u_int *)data = ap->ap_qlimit;
732 		error = 0;
733 		break;
734 
735 	case AUDITPIPE_SET_QLIMIT:
736 		/* Lockless integer write. */
737 		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
738 		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
739 			ap->ap_qlimit = *(u_int *)data;
740 			error = 0;
741 		} else
742 			error = EINVAL;
743 		break;
744 
745 	case AUDITPIPE_GET_QLIMIT_MIN:
746 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
747 		error = 0;
748 		break;
749 
750 	case AUDITPIPE_GET_QLIMIT_MAX:
751 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
752 		error = 0;
753 		break;
754 
755 	case AUDITPIPE_GET_PRESELECT_FLAGS:
756 		mtx_lock(&audit_pipe_mtx);
757 		maskp = (au_mask_t *)data;
758 		*maskp = ap->ap_preselect_flags;
759 		mtx_unlock(&audit_pipe_mtx);
760 		error = 0;
761 		break;
762 
763 	case AUDITPIPE_SET_PRESELECT_FLAGS:
764 		mtx_lock(&audit_pipe_mtx);
765 		maskp = (au_mask_t *)data;
766 		ap->ap_preselect_flags = *maskp;
767 		mtx_unlock(&audit_pipe_mtx);
768 		error = 0;
769 		break;
770 
771 	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
772 		mtx_lock(&audit_pipe_mtx);
773 		maskp = (au_mask_t *)data;
774 		*maskp = ap->ap_preselect_naflags;
775 		mtx_unlock(&audit_pipe_mtx);
776 		error = 0;
777 		break;
778 
779 	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
780 		mtx_lock(&audit_pipe_mtx);
781 		maskp = (au_mask_t *)data;
782 		ap->ap_preselect_naflags = *maskp;
783 		mtx_unlock(&audit_pipe_mtx);
784 		error = 0;
785 		break;
786 
787 	case AUDITPIPE_GET_PRESELECT_AUID:
788 		aip = (struct auditpipe_ioctl_preselect *)data;
789 		error = audit_pipe_preselect_get(ap, aip->aip_auid,
790 		    &aip->aip_mask);
791 		break;
792 
793 	case AUDITPIPE_SET_PRESELECT_AUID:
794 		aip = (struct auditpipe_ioctl_preselect *)data;
795 		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
796 		error = 0;
797 		break;
798 
799 	case AUDITPIPE_DELETE_PRESELECT_AUID:
800 		auid = *(au_id_t *)data;
801 		error = audit_pipe_preselect_delete(ap, auid);
802 		break;
803 
804 	case AUDITPIPE_FLUSH_PRESELECT_AUID:
805 		audit_pipe_preselect_flush(ap);
806 		error = 0;
807 		break;
808 
809 	case AUDITPIPE_GET_PRESELECT_MODE:
810 		mtx_lock(&audit_pipe_mtx);
811 		*(int *)data = ap->ap_preselect_mode;
812 		mtx_unlock(&audit_pipe_mtx);
813 		error = 0;
814 		break;
815 
816 	case AUDITPIPE_SET_PRESELECT_MODE:
817 		mode = *(int *)data;
818 		switch (mode) {
819 		case AUDITPIPE_PRESELECT_MODE_TRAIL:
820 		case AUDITPIPE_PRESELECT_MODE_LOCAL:
821 			mtx_lock(&audit_pipe_mtx);
822 			ap->ap_preselect_mode = mode;
823 			mtx_unlock(&audit_pipe_mtx);
824 			error = 0;
825 			break;
826 
827 		default:
828 			error = EINVAL;
829 		}
830 		break;
831 
832 	case AUDITPIPE_FLUSH:
833 		mtx_lock(&audit_pipe_mtx);
834 		audit_pipe_flush(ap);
835 		mtx_unlock(&audit_pipe_mtx);
836 		error = 0;
837 		break;
838 
839 	case AUDITPIPE_GET_INSERTS:
840 		*(u_int *)data = ap->ap_inserts;
841 		error = 0;
842 		break;
843 
844 	case AUDITPIPE_GET_READS:
845 		*(u_int *)data = ap->ap_reads;
846 		error = 0;
847 		break;
848 
849 	case AUDITPIPE_GET_DROPS:
850 		*(u_int *)data = ap->ap_drops;
851 		error = 0;
852 		break;
853 
854 	case AUDITPIPE_GET_TRUNCATES:
855 		*(u_int *)data = ap->ap_truncates;
856 		error = 0;
857 		break;
858 
859 	default:
860 		error = ENOTTY;
861 	}
862 	return (error);
863 }
864 
865 /*
866  * Audit pipe read.  Pull one record off the queue and copy to user space.
867  * On error, the record is dropped.
868  *
869  * Providing more sophisticated behavior, such as partial reads, is tricky
870  * due to the potential for parallel I/O.  If partial read support is
871  * required, it will require a per-pipe "current record being read" along
872  * with an offset into that trecord which has already been read.  Threads
873  * performing partial reads will need to allocate per-thread copies of the
874  * data so that if another thread completes the read of the record, it can be
875  * freed without adding reference count logic.  If this is added, a flag to
876  * indicate that only atomic record reads are desired would be useful, as if
877  * different threads are all waiting for records on the pipe, they will want
878  * independent record reads, which is currently the behavior.
879  */
880 static int
881 audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
882 {
883 	struct audit_pipe_entry *ape;
884 	struct audit_pipe *ap;
885 	int error;
886 
887 	ap = dev->si_drv1;
888 	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
889 	mtx_lock(&audit_pipe_mtx);
890 	do {
891 		/*
892 		 * Wait for a record that fits into the read buffer, dropping
893 		 * records that would be truncated if actually passed to the
894 		 * process.  This helps maintain the discreet record read
895 		 * interface.
896 		 */
897 		while ((ape = audit_pipe_pop(ap)) == NULL) {
898 			if (ap->ap_flags & AUDIT_PIPE_NBIO) {
899 				mtx_unlock(&audit_pipe_mtx);
900 				return (EAGAIN);
901 			}
902 			error = cv_wait_sig(&audit_pipe_cv, &audit_pipe_mtx);
903 			if (error) {
904 				mtx_unlock(&audit_pipe_mtx);
905 				return (error);
906 			}
907 		}
908 		if (ape->ape_record_len <= uio->uio_resid)
909 			break;
910 		audit_pipe_entry_free(ape);
911 		ap->ap_truncates++;
912 	} while (1);
913 	mtx_unlock(&audit_pipe_mtx);
914 
915 	/*
916 	 * Now read record to user space memory.  Even if the read is short,
917 	 * we abandon the remainder of the record, supporting only discreet
918 	 * record reads.
919 	 */
920 	error = uiomove(ape->ape_record, ape->ape_record_len, uio);
921 	audit_pipe_entry_free(ape);
922 	return (error);
923 }
924 
925 /*
926  * Audit pipe poll.
927  */
928 static int
929 audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
930 {
931 	struct audit_pipe *ap;
932 	int revents;
933 
934 	revents = 0;
935 	ap = dev->si_drv1;
936 	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
937 	if (events & (POLLIN | POLLRDNORM)) {
938 		mtx_lock(&audit_pipe_mtx);
939 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
940 			revents |= events & (POLLIN | POLLRDNORM);
941 		else
942 			selrecord(td, &ap->ap_selinfo);
943 		mtx_unlock(&audit_pipe_mtx);
944 	}
945 	return (revents);
946 }
947 
948 /*
949  * Initialize the audit pipe system.
950  */
951 static void
952 audit_pipe_init(void *unused)
953 {
954 
955 	TAILQ_INIT(&audit_pipe_list);
956 	mtx_init(&audit_pipe_mtx, "audit_pipe_mtx", NULL, MTX_DEF);
957 	cv_init(&audit_pipe_cv, "audit_pipe_cv");
958 
959 	clone_setup(&audit_pipe_clones);
960 	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
961 	    audit_pipe_clone, 0, 1000);
962 	if (audit_pipe_eh_tag == NULL)
963 		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
964 }
965 
966 SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
967     NULL);
968