xref: /freebsd/sys/security/audit/audit_pipe.c (revision 29d4cb241b5b8d786221402075febdb832fea55a)
1 /*-
2  * Copyright (c) 2006 Robert N. M. Watson
3  * All rights reserved.
4  *
5  * This software was developed by Robert Watson for the TrustedBSD Project.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/condvar.h>
34 #include <sys/conf.h>
35 #include <sys/eventhandler.h>
36 #include <sys/filio.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/poll.h>
42 #include <sys/proc.h>
43 #include <sys/queue.h>
44 #include <sys/selinfo.h>
45 #include <sys/sigio.h>
46 #include <sys/signal.h>
47 #include <sys/signalvar.h>
48 #include <sys/systm.h>
49 #include <sys/uio.h>
50 
51 #include <security/audit/audit.h>
52 #include <security/audit/audit_ioctl.h>
53 #include <security/audit/audit_private.h>
54 
55 /*
56  * Implementation of a clonable special device providing a live stream of BSM
57  * audit data.  This is a "tee" of the data going to the file.  It provides
58  * unreliable but timely access to audit events.  Consumers of this interface
59  * should be very careful to avoid introducing event cycles.  Consumers may
60  * express interest via a set of preselection ioctls.
61  */
62 
63 /*
64  * Memory types.
65  */
66 static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
67 static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
68     "Audit pipe entries and buffers");
69 static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
70     "Audit pipe preselection structure");
71 
72 /*
73  * Audit pipe buffer parameters.
74  */
75 #define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
76 #define	AUDIT_PIPE_QLIMIT_MIN		(0)
77 #define	AUDIT_PIPE_QLIMIT_MAX		(1024)
78 
79 /*
80  * Description of an entry in an audit_pipe.
81  */
82 struct audit_pipe_entry {
83 	void				*ape_record;
84 	u_int				 ape_record_len;
85 	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
86 };
87 
88 /*
89  * Audit pipes allow processes to express "interest" in the set of records
90  * that are delivered via the pipe.  They do this in a similar manner to the
91  * mechanism for audit trail configuration, by expressing two global masks,
92  * and optionally expressing per-auid masks.  The following data structure is
93  * the per-auid mask description.  The global state is stored in the audit
94  * pipe data structure.
95  *
96  * We may want to consider a more space/time-efficient data structure once
97  * usage patterns for per-auid specifications are clear.
98  */
99 struct audit_pipe_preselect {
100 	au_id_t					 app_auid;
101 	au_mask_t				 app_mask;
102 	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
103 };
104 
105 /*
106  * Description of an individual audit_pipe.  Consists largely of a bounded
107  * length queue.
108  */
109 #define	AUDIT_PIPE_ASYNC	0x00000001
110 #define	AUDIT_PIPE_NBIO		0x00000002
111 struct audit_pipe {
112 	int				 ap_open;	/* Device open? */
113 	u_int				 ap_flags;
114 
115 	struct selinfo			 ap_selinfo;
116 	struct sigio			*ap_sigio;
117 
118 	u_int				 ap_qlen;
119 	u_int				 ap_qlimit;
120 
121 	u_int64_t			 ap_inserts;	/* Records added. */
122 	u_int64_t			 ap_reads;	/* Records read. */
123 	u_int64_t			 ap_drops;	/* Records dropped. */
124 	u_int64_t			 ap_truncates;	/* Records too long. */
125 
126 	/*
127 	 * Fields relating to pipe interest: global masks for unmatched
128 	 * processes (attributable, non-attributable), and a list of specific
129 	 * interest specifications by auid.
130 	 */
131 	int				 ap_preselect_mode;
132 	au_mask_t			 ap_preselect_flags;
133 	au_mask_t			 ap_preselect_naflags;
134 	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
135 
136 	/*
137 	 * Current pending record list.
138 	 */
139 	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
140 
141 	/*
142 	 * Global pipe list.
143 	 */
144 	TAILQ_ENTRY(audit_pipe)		 ap_list;
145 };
146 
147 /*
148  * Global list of audit pipes, mutex to protect it and the pipes.  Finer
149  * grained locking may be desirable at some point.
150  */
151 static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
152 static struct mtx		 audit_pipe_mtx;
153 
154 /*
155  * This CV is used to wakeup on an audit record write.  Eventually, it might
156  * be per-pipe to avoid unnecessary wakeups when several pipes with different
157  * preselection masks are present.
158  */
159 static struct cv		 audit_pipe_cv;
160 
161 /*
162  * Cloning related variables and constants.
163  */
164 #define	AUDIT_PIPE_NAME		"auditpipe"
165 static eventhandler_tag		 audit_pipe_eh_tag;
166 static struct clonedevs		*audit_pipe_clones;
167 
168 /*
169  * Special device methods and definition.
170  */
171 static d_open_t		audit_pipe_open;
172 static d_close_t	audit_pipe_close;
173 static d_read_t		audit_pipe_read;
174 static d_ioctl_t	audit_pipe_ioctl;
175 static d_poll_t		audit_pipe_poll;
176 static d_kqfilter_t	audit_pipe_kqfilter;
177 
178 static struct cdevsw	audit_pipe_cdevsw = {
179 	.d_version =	D_VERSION,
180 	.d_flags =	D_PSEUDO | D_NEEDGIANT | D_NEEDMINOR,
181 	.d_open =	audit_pipe_open,
182 	.d_close =	audit_pipe_close,
183 	.d_read =	audit_pipe_read,
184 	.d_ioctl =	audit_pipe_ioctl,
185 	.d_poll =	audit_pipe_poll,
186 	.d_kqfilter =	audit_pipe_kqfilter,
187 	.d_name =	AUDIT_PIPE_NAME,
188 };
189 
190 static int	audit_pipe_kqread(struct knote *note, long hint);
191 static void	audit_pipe_kqdetach(struct knote *note);
192 
193 static struct filterops audit_pipe_read_filterops = {
194 	.f_isfd =	1,
195 	.f_attach =	NULL,
196 	.f_detach =	audit_pipe_kqdetach,
197 	.f_event =	audit_pipe_kqread,
198 };
199 
200 /*
201  * Some global statistics on audit pipes.
202  */
203 static int		audit_pipe_count;	/* Current number of pipes. */
204 static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
205 static u_int64_t	audit_pipe_records;	/* Records seen. */
206 static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
207 
208 /*
209  * Free an audit pipe entry.
210  */
211 static void
212 audit_pipe_entry_free(struct audit_pipe_entry *ape)
213 {
214 
215 	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
216 	free(ape, M_AUDIT_PIPE_ENTRY);
217 }
218 
219 /*
220  * Find an audit pipe preselection specification for an auid, if any.
221  */
222 static struct audit_pipe_preselect *
223 audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
224 {
225 	struct audit_pipe_preselect *app;
226 
227 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
228 
229 	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
230 		if (app->app_auid == auid)
231 			return (app);
232 	}
233 	return (NULL);
234 }
235 
236 /*
237  * Query the per-pipe mask for a specific auid.
238  */
239 static int
240 audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
241     au_mask_t *maskp)
242 {
243 	struct audit_pipe_preselect *app;
244 	int error;
245 
246 	mtx_lock(&audit_pipe_mtx);
247 	app = audit_pipe_preselect_find(ap, auid);
248 	if (app != NULL) {
249 		*maskp = app->app_mask;
250 		error = 0;
251 	} else
252 		error = ENOENT;
253 	mtx_unlock(&audit_pipe_mtx);
254 	return (error);
255 }
256 
257 /*
258  * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
259  * otherwise, update the current entry.
260  */
261 static void
262 audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
263 {
264 	struct audit_pipe_preselect *app, *app_new;
265 
266 	/*
267 	 * Pessimistically assume that the auid doesn't already have a mask
268 	 * set, and allocate.  We will free it if it is unneeded.
269 	 */
270 	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
271 	mtx_lock(&audit_pipe_mtx);
272 	app = audit_pipe_preselect_find(ap, auid);
273 	if (app == NULL) {
274 		app = app_new;
275 		app_new = NULL;
276 		app->app_auid = auid;
277 		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
278 	}
279 	app->app_mask = mask;
280 	mtx_unlock(&audit_pipe_mtx);
281 	if (app_new != NULL)
282 		free(app_new, M_AUDIT_PIPE_PRESELECT);
283 }
284 
285 /*
286  * Delete a per-auid mask on an audit pipe.
287  */
288 static int
289 audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
290 {
291 	struct audit_pipe_preselect *app;
292 	int error;
293 
294 	mtx_lock(&audit_pipe_mtx);
295 	app = audit_pipe_preselect_find(ap, auid);
296 	if (app != NULL) {
297 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
298 		error = 0;
299 	} else
300 		error = ENOENT;
301 	mtx_unlock(&audit_pipe_mtx);
302 	if (app != NULL)
303 		free(app, M_AUDIT_PIPE_PRESELECT);
304 	return (error);
305 }
306 
307 /*
308  * Delete all per-auid masks on an audit pipe.
309  */
310 static void
311 audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
312 {
313 	struct audit_pipe_preselect *app;
314 
315 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
316 
317 	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
318 		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
319 		free(app, M_AUDIT_PIPE_PRESELECT);
320 	}
321 }
322 
323 static void
324 audit_pipe_preselect_flush(struct audit_pipe *ap)
325 {
326 
327 	mtx_lock(&audit_pipe_mtx);
328 	audit_pipe_preselect_flush_locked(ap);
329 	mtx_unlock(&audit_pipe_mtx);
330 }
331 
332 /*-
333  * Determine whether a specific audit pipe matches a record with these
334  * properties.  Algorithm is as follows:
335  *
336  * - If the pipe is configured to track the default trail configuration, then
337  *   use the results of global preselection matching.
338  * - If not, search for a specifically configured auid entry matching the
339  *   event.  If an entry is found, use that.
340  * - Otherwise, use the default flags or naflags configured for the pipe.
341  */
342 static int
343 audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
344     au_event_t event, au_class_t class, int sorf, int trail_preselect)
345 {
346 	struct audit_pipe_preselect *app;
347 
348 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
349 
350 	switch (ap->ap_preselect_mode) {
351 	case AUDITPIPE_PRESELECT_MODE_TRAIL:
352 		return (trail_preselect);
353 
354 	case AUDITPIPE_PRESELECT_MODE_LOCAL:
355 		app = audit_pipe_preselect_find(ap, auid);
356 		if (app == NULL) {
357 			if (auid == AU_DEFAUDITID)
358 				return (au_preselect(event, class,
359 				    &ap->ap_preselect_naflags, sorf));
360 			else
361 				return (au_preselect(event, class,
362 				    &ap->ap_preselect_flags, sorf));
363 		} else
364 			return (au_preselect(event, class, &app->app_mask,
365 			    sorf));
366 
367 	default:
368 		panic("audit_pipe_preselect_check: mode %d",
369 		    ap->ap_preselect_mode);
370 	}
371 
372 	return (0);
373 }
374 
375 /*
376  * Determine whether there exists a pipe interested in a record with specific
377  * properties.
378  */
379 int
380 audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
381     int sorf, int trail_preselect)
382 {
383 	struct audit_pipe *ap;
384 
385 	mtx_lock(&audit_pipe_mtx);
386 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
387 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
388 		    trail_preselect)) {
389 			mtx_unlock(&audit_pipe_mtx);
390 			return (1);
391 		}
392 	}
393 	mtx_unlock(&audit_pipe_mtx);
394 	return (0);
395 }
396 
397 /*
398  * Append individual record to a queue -- allocate queue-local buffer, and
399  * add to the queue.  We try to drop from the head of the queue so that more
400  * recent events take precedence over older ones, but if allocation fails we
401  * do drop the new event.
402  */
403 static void
404 audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
405 {
406 	struct audit_pipe_entry *ape, *ape_remove;
407 
408 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
409 
410 	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
411 	if (ape == NULL) {
412 		ap->ap_drops++;
413 		audit_pipe_drops++;
414 		return;
415 	}
416 
417 	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
418 	if (ape->ape_record == NULL) {
419 		free(ape, M_AUDIT_PIPE_ENTRY);
420 		ap->ap_drops++;
421 		audit_pipe_drops++;
422 		return;
423 	}
424 
425 	bcopy(record, ape->ape_record, record_len);
426 	ape->ape_record_len = record_len;
427 
428 	if (ap->ap_qlen >= ap->ap_qlimit) {
429 		ape_remove = TAILQ_FIRST(&ap->ap_queue);
430 		TAILQ_REMOVE(&ap->ap_queue, ape_remove, ape_queue);
431 		audit_pipe_entry_free(ape_remove);
432 		ap->ap_qlen--;
433 		ap->ap_drops++;
434 		audit_pipe_drops++;
435 	}
436 
437 	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
438 	ap->ap_inserts++;
439 	ap->ap_qlen++;
440 	selwakeuppri(&ap->ap_selinfo, PSOCK);
441 	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
442 	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
443 		pgsigio(&ap->ap_sigio, SIGIO, 0);
444 }
445 
446 /*
447  * audit_pipe_submit(): audit_worker submits audit records via this
448  * interface, which arranges for them to be delivered to pipe queues.
449  */
450 void
451 audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
452     int trail_select, void *record, u_int record_len)
453 {
454 	struct audit_pipe *ap;
455 
456 	/*
457 	 * Lockless read to avoid mutex overhead if pipes are not in use.
458 	 */
459 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
460 		return;
461 
462 	mtx_lock(&audit_pipe_mtx);
463 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
464 		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
465 		    trail_select))
466 			audit_pipe_append(ap, record, record_len);
467 	}
468 	audit_pipe_records++;
469 	mtx_unlock(&audit_pipe_mtx);
470 	cv_broadcastpri(&audit_pipe_cv, PSOCK);
471 }
472 
473 /*
474  * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
475  * since we don't currently have selection information available, it is
476  * delivered to the pipe unconditionally.
477  *
478  * XXXRW: This is a bug.  The BSM check routine for submitting a user record
479  * should parse that information and return it.
480  */
481 void
482 audit_pipe_submit_user(void *record, u_int record_len)
483 {
484 	struct audit_pipe *ap;
485 
486 	/*
487 	 * Lockless read to avoid mutex overhead if pipes are not in use.
488 	 */
489 	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
490 		return;
491 
492 	mtx_lock(&audit_pipe_mtx);
493 	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list)
494 		audit_pipe_append(ap, record, record_len);
495 	audit_pipe_records++;
496 	mtx_unlock(&audit_pipe_mtx);
497 	cv_broadcastpri(&audit_pipe_cv, PSOCK);
498 }
499 
500 
501 /*
502  * Pop the next record off of an audit pipe.
503  */
504 static struct audit_pipe_entry *
505 audit_pipe_pop(struct audit_pipe *ap)
506 {
507 	struct audit_pipe_entry *ape;
508 
509 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
510 
511 	ape = TAILQ_FIRST(&ap->ap_queue);
512 	KASSERT((ape == NULL && ap->ap_qlen == 0) ||
513 	    (ape != NULL && ap->ap_qlen != 0), ("audit_pipe_pop: qlen"));
514 	if (ape == NULL)
515 		return (NULL);
516 	TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
517 	ap->ap_qlen--;
518 	return (ape);
519 }
520 
521 /*
522  * Allocate a new audit pipe.  Connects the pipe, on success, to the global
523  * list and updates statistics.
524  */
525 static struct audit_pipe *
526 audit_pipe_alloc(void)
527 {
528 	struct audit_pipe *ap;
529 
530 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
531 
532 	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
533 	if (ap == NULL)
534 		return (NULL);
535 	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
536 	TAILQ_INIT(&ap->ap_queue);
537 	knlist_init(&ap->ap_selinfo.si_note, &audit_pipe_mtx, NULL, NULL,
538 	    NULL);
539 
540 	/*
541 	 * Default flags, naflags, and auid-specific preselection settings to
542 	 * 0.  Initialize the mode to the global trail so that if praudit(1)
543 	 * is run on /dev/auditpipe, it sees events associated with the
544 	 * default trail.  Pipe-aware application can clear the flag, set
545 	 * custom masks, and flush the pipe as needed.
546 	 */
547 	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
548 	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
549 	TAILQ_INIT(&ap->ap_preselect_list);
550 	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
551 
552 	/*
553 	 * Add to global list and update global statistics.
554 	 */
555 	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
556 	audit_pipe_count++;
557 	audit_pipe_ever++;
558 
559 	return (ap);
560 }
561 
562 /*
563  * Flush all records currently present in an audit pipe; assume mutex is held.
564  */
565 static void
566 audit_pipe_flush(struct audit_pipe *ap)
567 {
568 	struct audit_pipe_entry *ape;
569 
570 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
571 
572 	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
573 		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
574 		audit_pipe_entry_free(ape);
575 		ap->ap_qlen--;
576 	}
577 	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qlen"));
578 }
579 
580 /*
581  * Free an audit pipe; this means freeing all preselection state and all
582  * records in the pipe.  Assumes mutex is held to prevent any new records
583  * from being inserted during the free, and that the audit pipe is still on
584  * the global list.
585  */
586 static void
587 audit_pipe_free(struct audit_pipe *ap)
588 {
589 
590 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
591 
592 	audit_pipe_preselect_flush_locked(ap);
593 	audit_pipe_flush(ap);
594 	knlist_destroy(&ap->ap_selinfo.si_note);
595 	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
596 	free(ap, M_AUDIT_PIPE);
597 	audit_pipe_count--;
598 }
599 
600 /*
601  * Audit pipe clone routine -- provide specific requested audit pipe, or a
602  * fresh one if a specific one is not requested.
603  */
604 static void
605 audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
606     struct cdev **dev)
607 {
608 	int i, u;
609 
610 	if (*dev != NULL)
611 		return;
612 
613 	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
614 		u = -1;
615 	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
616 		return;
617 
618 	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
619 	if (i) {
620 		*dev = make_dev(&audit_pipe_cdevsw, unit2minor(u), UID_ROOT,
621 		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
622 		if (*dev != NULL) {
623 			dev_ref(*dev);
624 			(*dev)->si_flags |= SI_CHEAPCLONE;
625 		}
626 	}
627 }
628 
629 /*
630  * Audit pipe open method.  Explicit privilege check isn't used as this
631  * allows file permissions on the special device to be used to grant audit
632  * review access.  Those file permissions should be managed carefully.
633  */
634 static int
635 audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
636 {
637 	struct audit_pipe *ap;
638 
639 	mtx_lock(&audit_pipe_mtx);
640 	ap = dev->si_drv1;
641 	if (ap == NULL) {
642 		ap = audit_pipe_alloc();
643 		if (ap == NULL) {
644 			mtx_unlock(&audit_pipe_mtx);
645 			return (ENOMEM);
646 		}
647 		dev->si_drv1 = ap;
648 	} else {
649 		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
650 		mtx_unlock(&audit_pipe_mtx);
651 		return (EBUSY);
652 	}
653 	ap->ap_open = 1;
654 	mtx_unlock(&audit_pipe_mtx);
655 	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
656 	return (0);
657 }
658 
659 /*
660  * Close audit pipe, tear down all records, etc.
661  */
662 static int
663 audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
664 {
665 	struct audit_pipe *ap;
666 
667 	ap = dev->si_drv1;
668 	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
669 	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
670 	funsetown(&ap->ap_sigio);
671 	mtx_lock(&audit_pipe_mtx);
672 	ap->ap_open = 0;
673 	audit_pipe_free(ap);
674 	dev->si_drv1 = NULL;
675 	mtx_unlock(&audit_pipe_mtx);
676 	return (0);
677 }
678 
679 /*
680  * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
681  * commands.
682  *
683  * Would be desirable to support filtering, although perhaps something simple
684  * like an event mask, as opposed to something complicated like BPF.
685  */
686 static int
687 audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
688     struct thread *td)
689 {
690 	struct auditpipe_ioctl_preselect *aip;
691 	struct audit_pipe *ap;
692 	au_mask_t *maskp;
693 	int error, mode;
694 	au_id_t auid;
695 
696 	ap = dev->si_drv1;
697 	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
698 
699 	/*
700 	 * Audit pipe ioctls: first come standard device node ioctls, then
701 	 * manipulation of pipe settings, and finally, statistics query
702 	 * ioctls.
703 	 */
704 	switch (cmd) {
705 	case FIONBIO:
706 		mtx_lock(&audit_pipe_mtx);
707 		if (*(int *)data)
708 			ap->ap_flags |= AUDIT_PIPE_NBIO;
709 		else
710 			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
711 		mtx_unlock(&audit_pipe_mtx);
712 		error = 0;
713 		break;
714 
715 	case FIONREAD:
716 		mtx_lock(&audit_pipe_mtx);
717 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
718 			*(int *)data =
719 			    TAILQ_FIRST(&ap->ap_queue)->ape_record_len;
720 		else
721 			*(int *)data = 0;
722 		mtx_unlock(&audit_pipe_mtx);
723 		error = 0;
724 		break;
725 
726 	case FIOASYNC:
727 		mtx_lock(&audit_pipe_mtx);
728 		if (*(int *)data)
729 			ap->ap_flags |= AUDIT_PIPE_ASYNC;
730 		else
731 			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
732 		mtx_unlock(&audit_pipe_mtx);
733 		error = 0;
734 		break;
735 
736 	case FIOSETOWN:
737 		error = fsetown(*(int *)data, &ap->ap_sigio);
738 		break;
739 
740 	case FIOGETOWN:
741 		*(int *)data = fgetown(&ap->ap_sigio);
742 		error = 0;
743 		break;
744 
745 	case AUDITPIPE_GET_QLEN:
746 		*(u_int *)data = ap->ap_qlen;
747 		error = 0;
748 		break;
749 
750 	case AUDITPIPE_GET_QLIMIT:
751 		*(u_int *)data = ap->ap_qlimit;
752 		error = 0;
753 		break;
754 
755 	case AUDITPIPE_SET_QLIMIT:
756 		/* Lockless integer write. */
757 		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
758 		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
759 			ap->ap_qlimit = *(u_int *)data;
760 			error = 0;
761 		} else
762 			error = EINVAL;
763 		break;
764 
765 	case AUDITPIPE_GET_QLIMIT_MIN:
766 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
767 		error = 0;
768 		break;
769 
770 	case AUDITPIPE_GET_QLIMIT_MAX:
771 		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
772 		error = 0;
773 		break;
774 
775 	case AUDITPIPE_GET_PRESELECT_FLAGS:
776 		mtx_lock(&audit_pipe_mtx);
777 		maskp = (au_mask_t *)data;
778 		*maskp = ap->ap_preselect_flags;
779 		mtx_unlock(&audit_pipe_mtx);
780 		error = 0;
781 		break;
782 
783 	case AUDITPIPE_SET_PRESELECT_FLAGS:
784 		mtx_lock(&audit_pipe_mtx);
785 		maskp = (au_mask_t *)data;
786 		ap->ap_preselect_flags = *maskp;
787 		mtx_unlock(&audit_pipe_mtx);
788 		error = 0;
789 		break;
790 
791 	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
792 		mtx_lock(&audit_pipe_mtx);
793 		maskp = (au_mask_t *)data;
794 		*maskp = ap->ap_preselect_naflags;
795 		mtx_unlock(&audit_pipe_mtx);
796 		error = 0;
797 		break;
798 
799 	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
800 		mtx_lock(&audit_pipe_mtx);
801 		maskp = (au_mask_t *)data;
802 		ap->ap_preselect_naflags = *maskp;
803 		mtx_unlock(&audit_pipe_mtx);
804 		error = 0;
805 		break;
806 
807 	case AUDITPIPE_GET_PRESELECT_AUID:
808 		aip = (struct auditpipe_ioctl_preselect *)data;
809 		error = audit_pipe_preselect_get(ap, aip->aip_auid,
810 		    &aip->aip_mask);
811 		break;
812 
813 	case AUDITPIPE_SET_PRESELECT_AUID:
814 		aip = (struct auditpipe_ioctl_preselect *)data;
815 		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
816 		error = 0;
817 		break;
818 
819 	case AUDITPIPE_DELETE_PRESELECT_AUID:
820 		auid = *(au_id_t *)data;
821 		error = audit_pipe_preselect_delete(ap, auid);
822 		break;
823 
824 	case AUDITPIPE_FLUSH_PRESELECT_AUID:
825 		audit_pipe_preselect_flush(ap);
826 		error = 0;
827 		break;
828 
829 	case AUDITPIPE_GET_PRESELECT_MODE:
830 		mtx_lock(&audit_pipe_mtx);
831 		*(int *)data = ap->ap_preselect_mode;
832 		mtx_unlock(&audit_pipe_mtx);
833 		error = 0;
834 		break;
835 
836 	case AUDITPIPE_SET_PRESELECT_MODE:
837 		mode = *(int *)data;
838 		switch (mode) {
839 		case AUDITPIPE_PRESELECT_MODE_TRAIL:
840 		case AUDITPIPE_PRESELECT_MODE_LOCAL:
841 			mtx_lock(&audit_pipe_mtx);
842 			ap->ap_preselect_mode = mode;
843 			mtx_unlock(&audit_pipe_mtx);
844 			error = 0;
845 			break;
846 
847 		default:
848 			error = EINVAL;
849 		}
850 		break;
851 
852 	case AUDITPIPE_FLUSH:
853 		mtx_lock(&audit_pipe_mtx);
854 		audit_pipe_flush(ap);
855 		mtx_unlock(&audit_pipe_mtx);
856 		error = 0;
857 		break;
858 
859 	case AUDITPIPE_GET_MAXAUDITDATA:
860 		*(u_int *)data = MAXAUDITDATA;
861 		error = 0;
862 		break;
863 
864 	case AUDITPIPE_GET_INSERTS:
865 		*(u_int *)data = ap->ap_inserts;
866 		error = 0;
867 		break;
868 
869 	case AUDITPIPE_GET_READS:
870 		*(u_int *)data = ap->ap_reads;
871 		error = 0;
872 		break;
873 
874 	case AUDITPIPE_GET_DROPS:
875 		*(u_int *)data = ap->ap_drops;
876 		error = 0;
877 		break;
878 
879 	case AUDITPIPE_GET_TRUNCATES:
880 		*(u_int *)data = ap->ap_truncates;
881 		error = 0;
882 		break;
883 
884 	default:
885 		error = ENOTTY;
886 	}
887 	return (error);
888 }
889 
890 /*
891  * Audit pipe read.  Pull one record off the queue and copy to user space.
892  * On error, the record is dropped.
893  *
894  * Providing more sophisticated behavior, such as partial reads, is tricky
895  * due to the potential for parallel I/O.  If partial read support is
896  * required, it will require a per-pipe "current record being read" along
897  * with an offset into that trecord which has already been read.  Threads
898  * performing partial reads will need to allocate per-thread copies of the
899  * data so that if another thread completes the read of the record, it can be
900  * freed without adding reference count logic.  If this is added, a flag to
901  * indicate that only atomic record reads are desired would be useful, as if
902  * different threads are all waiting for records on the pipe, they will want
903  * independent record reads, which is currently the behavior.
904  */
905 static int
906 audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
907 {
908 	struct audit_pipe_entry *ape;
909 	struct audit_pipe *ap;
910 	int error;
911 
912 	ap = dev->si_drv1;
913 	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
914 	mtx_lock(&audit_pipe_mtx);
915 	do {
916 		/*
917 		 * Wait for a record that fits into the read buffer, dropping
918 		 * records that would be truncated if actually passed to the
919 		 * process.  This helps maintain the discreet record read
920 		 * interface.
921 		 */
922 		while ((ape = audit_pipe_pop(ap)) == NULL) {
923 			if (ap->ap_flags & AUDIT_PIPE_NBIO) {
924 				mtx_unlock(&audit_pipe_mtx);
925 				return (EAGAIN);
926 			}
927 			error = cv_wait_sig(&audit_pipe_cv, &audit_pipe_mtx);
928 			if (error) {
929 				mtx_unlock(&audit_pipe_mtx);
930 				return (error);
931 			}
932 		}
933 		if (ape->ape_record_len <= uio->uio_resid)
934 			break;
935 		audit_pipe_entry_free(ape);
936 		ap->ap_truncates++;
937 	} while (1);
938 	ap->ap_reads++;
939 	mtx_unlock(&audit_pipe_mtx);
940 
941 	/*
942 	 * Now read record to user space memory.  Even if the read is short,
943 	 * we abandon the remainder of the record, supporting only discreet
944 	 * record reads.
945 	 */
946 	error = uiomove(ape->ape_record, ape->ape_record_len, uio);
947 	audit_pipe_entry_free(ape);
948 	return (error);
949 }
950 
951 /*
952  * Audit pipe poll.
953  */
954 static int
955 audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
956 {
957 	struct audit_pipe *ap;
958 	int revents;
959 
960 	revents = 0;
961 	ap = dev->si_drv1;
962 	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
963 	if (events & (POLLIN | POLLRDNORM)) {
964 		mtx_lock(&audit_pipe_mtx);
965 		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
966 			revents |= events & (POLLIN | POLLRDNORM);
967 		else
968 			selrecord(td, &ap->ap_selinfo);
969 		mtx_unlock(&audit_pipe_mtx);
970 	}
971 	return (revents);
972 }
973 
974 /*
975  * Audit pipe kqfilter.
976  */
977 static int
978 audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
979 {
980 	struct audit_pipe *ap;
981 
982 	ap = dev->si_drv1;
983 	KASSERT(ap != NULL, ("audit_pipe_kqfilter: ap == NULL"));
984 
985 	if (kn->kn_filter != EVFILT_READ)
986 		return (EINVAL);
987 
988 	kn->kn_fop = &audit_pipe_read_filterops;
989 	kn->kn_hook = ap;
990 
991 	mtx_lock(&audit_pipe_mtx);
992 	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
993 	mtx_unlock(&audit_pipe_mtx);
994 	return (0);
995 }
996 
997 /*
998  * Return true if there are records available for reading on the pipe.
999  */
1000 static int
1001 audit_pipe_kqread(struct knote *kn, long hint)
1002 {
1003 	struct audit_pipe_entry *ape;
1004 	struct audit_pipe *ap;
1005 
1006 	mtx_assert(&audit_pipe_mtx, MA_OWNED);
1007 
1008 	ap = (struct audit_pipe *)kn->kn_hook;
1009 	KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
1010 
1011 	if (ap->ap_qlen != 0) {
1012 		ape = TAILQ_FIRST(&ap->ap_queue);
1013 		KASSERT(ape != NULL, ("audit_pipe_kqread: ape == NULL"));
1014 
1015 		kn->kn_data = ape->ape_record_len;
1016 		return (1);
1017 	} else {
1018 		kn->kn_data = 0;
1019 		return (0);
1020 	}
1021 }
1022 
1023 /*
1024  * Detach kqueue state from audit pipe.
1025  */
1026 static void
1027 audit_pipe_kqdetach(struct knote *kn)
1028 {
1029 	struct audit_pipe *ap;
1030 
1031 	ap = (struct audit_pipe *)kn->kn_hook;
1032 	KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
1033 
1034 	mtx_lock(&audit_pipe_mtx);
1035 	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1036 	mtx_unlock(&audit_pipe_mtx);
1037 }
1038 
1039 /*
1040  * Initialize the audit pipe system.
1041  */
1042 static void
1043 audit_pipe_init(void *unused)
1044 {
1045 
1046 	TAILQ_INIT(&audit_pipe_list);
1047 	mtx_init(&audit_pipe_mtx, "audit_pipe_mtx", NULL, MTX_DEF);
1048 	cv_init(&audit_pipe_cv, "audit_pipe_cv");
1049 
1050 	clone_setup(&audit_pipe_clones);
1051 	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
1052 	    audit_pipe_clone, 0, 1000);
1053 	if (audit_pipe_eh_tag == NULL)
1054 		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
1055 }
1056 
1057 SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1058     NULL);
1059