xref: /titanic_52/usr/src/uts/common/c2/audit_io.c (revision ea1a228c80597366447774aa1988868492330eb5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Routines for writing audit records.
24  *
25  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/door.h>
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/types.h>
35 #include <sys/statvfs.h>	/* for statfs */
36 #include <sys/vnode.h>
37 #include <sys/file.h>
38 #include <sys/vfs.h>
39 #include <sys/user.h>
40 #include <sys/uio.h>
41 #include <sys/reboot.h>
42 #include <sys/kmem.h>		/* for KM_SLEEP */
43 #include <sys/resource.h>	/* for RLIM_INFINITY */
44 #include <sys/cmn_err.h>	/* panic */
45 #include <sys/systm.h>
46 #include <sys/debug.h>
47 #include <sys/sysmacros.h>
48 #include <sys/syscall.h>
49 #include <sys/zone.h>
50 
51 #include <c2/audit.h>
52 #include <c2/audit_kernel.h>
53 #include <c2/audit_record.h>
54 #include <c2/audit_kevents.h>
55 #include <c2/audit_door_infc.h>
56 
57 static void	au_dequeue(au_kcontext_t *, au_buff_t *);
58 static void	audit_async_finish_backend(void *);
59 static int	audit_sync_block(au_kcontext_t *);
60 /*
61  * each of these two tables are indexed by the values AU_DBUF_COMPLETE
62  * through AU_DBUF_LAST; the content is the next state value.  The
63  * first table determines the next state for a buffer which is not the
64  * end of a record and the second table determines the state for a
65  * buffer which is the end of a record.  The initial state is
66  * AU_DBUF_COMPLETE.
67  */
68 static int state_if_part[] = {
69     AU_DBUF_FIRST, AU_DBUF_MIDDLE, AU_DBUF_MIDDLE, AU_DBUF_FIRST};
70 static int state_if_not_part[] = {
71     AU_DBUF_COMPLETE, AU_DBUF_LAST, AU_DBUF_LAST, AU_DBUF_COMPLETE};
72 /*
73  * Write to an audit descriptor.
74  * Add the au_membuf to the descriptor chain and free the chain passed in.
75  */
76 void
77 au_uwrite(m)
78 	token_t *m;
79 {
80 	au_write(&(u_ad), m);
81 }
82 
83 void
84 au_write(caddr_t *d, token_t *m)
85 {
86 	if (d == NULL) {
87 		au_toss_token(m);
88 		return;
89 	}
90 	if (m == (token_t *)0) {
91 		printf("au_write: null token\n");
92 		return;
93 	}
94 
95 	if (*d == NULL)
96 		*d = (caddr_t)m;
97 	else
98 		(void) au_append_rec((au_buff_t *)*d, m, AU_PACK);
99 }
100 #define	AU_INTERVAL 120
101 
102 /*
103  * Write audit information to the disk.
104  * Called from auditsvc(); EOL'd as of Sol 10.
105  * Local zones are not allowed; the caller (auditsvc()) enforces the
106  * restriction.
107  */
108 int
109 au_doio(vp, limit)
110 
111 	struct vnode *vp;
112 	int limit;
113 
114 {	/* AU_DOIO */
115 
116 	off_t		off;	/* space used in buffer */
117 	size_t		used;	/* space used in au_membuf */
118 	token_t		*cAR;	/* current AR being processed */
119 	token_t		*cMB;	/* current au_membuf being processed */
120 	token_t		*sp;	/* last AR processed */
121 	char		*bp;	/* start of free space in staging buffer */
122 	unsigned char	*cp;	/* ptr to data to be moved */
123 	au_kcontext_t	*kctx;
124 	/*
125 	 * size (data left in au_membuf - space in buffer)
126 	 */
127 	ssize_t		sz;
128 	ssize_t		len;	/* len of data to move, size of AR */
129 	int		error;	/* error return */
130 	ssize_t		left;	/* data not xfered by write to disk */
131 	statvfs64_t	sb;	/* buffer for statfs */
132 	size_t		curr_sz = 0;	/* amount of data written during now */
133 	int		part    = 0;	/* partial audit record written */
134 	int		partial = 0;	/* flag to force partial AR to file */
135 					/* 0 - idle, ignore */
136 					/* 1 - force write of audit record */
137 					/* 2 - finished writing AR, commit */
138 
139 	kctx = SET_KCTX_GZ;
140 
141 	ASSERT(kctx != NULL);
142 
143 	/*
144 	 * Check to ensure enough free space on audit device.
145 	 */
146 	bzero(&sb, sizeof (statvfs64_t));
147 	(void) VFS_STATVFS(vp->v_vfsp, &sb);
148 	/*
149 	 * Large Files: We do not convert any of this part of kernel
150 	 * to be large file aware. Original behaviour should be
151 	 * maintained. This function is called from audit_svc and
152 	 * it already checks for negative values of limit.
153 	 */
154 
155 	if (sb.f_blocks && (fsblkcnt64_t)limit > sb.f_bavail)
156 		return (ENOSPC);
157 
158 	if (kctx->auk_file_stat.af_filesz &&
159 		(kctx->auk_file_stat.af_currsz >=
160 		kctx->auk_file_stat.af_filesz))
161 		return (EFBIG);
162 
163 	/*
164 	 * has the write buffer changed length due to a auditctl(2)?
165 	 * (remember that auk_buffer is an element of auk_dbuffer)
166 	 */
167 	if (kctx->auk_queue.bufsz != kctx->auk_queue.buflen) {
168 
169 		kmem_free(kctx->auk_buffer, kctx->auk_queue.buflen);
170 
171 		/* bad, should not sleep here. Testing only */
172 		kctx->auk_buffer = kmem_alloc(kctx->auk_queue.bufsz, KM_SLEEP);
173 
174 		kctx->auk_queue.buflen = kctx->auk_queue.bufsz;
175 	}
176 
177 	if (!kctx->auk_queue.head) {
178 		goto nodata;
179 	}
180 	sp   = (token_t *)0; /* no AR copied */
181 	off  = 0;	/* no space used in buffer */
182 	used = 0;	/* no data processed in au_membuf */
183 	cAR  = kctx->auk_queue.head;	/* start at head of queue */
184 	cMB  = cAR;	/* start with first au_membuf of record */
185 	bp = &(kctx->auk_buffer[0]);	/* start at beginning of buffer */
186 
187 	while (cMB) {
188 		ASSERT(kctx->auk_queue.head != NULL);
189 
190 		/* indicate audit record being processed */
191 		part = 1;
192 
193 		/* pointer to buffer data */
194 		cp  = memtod(cMB, unsigned char *);
195 		/* data left in au_membuf */
196 		sz  = (ssize_t)cMB->len - used;
197 		/* len to move */
198 		len = (ssize_t)MIN(sz, kctx->auk_queue.buflen - off);
199 
200 		/* move the data */
201 		bcopy(cp + used, bp + off, len);
202 		used += len; /* update used au_membuf */
203 		off  += len; /* update offset into buffer */
204 
205 		if (used >= (ssize_t)cMB->len) {
206 			/* advance to next au_membuf */
207 			used = 0;
208 			cMB  = cMB->next_buf;
209 		}
210 		if (cMB == (au_buff_t *)0) {
211 			/* advance to next AR */
212 			sp   = cAR;
213 			cAR  = cAR->next_rec;
214 			cMB  = cAR;
215 			/* reached end of an audit record */
216 			part = 0;
217 			/* force abort at end of audit record? */
218 			if (partial == 1)
219 				partial = 2;
220 		}
221 		/*
222 		 * If we've reached end of buffer, or have run out of
223 		 * audit records on the queue or we've processed a
224 		 * partial audit record to complete the audit file,
225 		 * then its time to flush the holding buffer to the
226 		 * audit trail.
227 		 */
228 		if ((kctx->auk_queue.buflen == off) ||
229 		    (cAR == (au_buff_t *)0) ||
230 		    (partial == 2)) {
231 
232 			left = 0;
233 			/*
234 			 * Largefiles: We purposely pass a value of
235 			 * MAXOFF_T as we do not want any of the
236 			 * auditing files to exceed 2GB. May be we will
237 			 * support this in future.
238 			 */
239 			error = vn_rdwr(UIO_WRITE, vp, kctx->auk_buffer,
240 				off, 0LL, UIO_SYSSPACE, FAPPEND,
241 				(rlim64_t)MAXOFF_T, CRED(), &left);
242 
243 			/* error on write */
244 			if (error != 0) {
245 				if (error == EDQUOT)
246 					error = ENOSPC;
247 				return (error);
248 			}
249 
250 			/* end of file system? */
251 			if (left) {
252 				au_buff_t *b = NULL;
253 
254 				sz = off - left; /* how much written */
255 
256 				/* update space counters */
257 				kctx->auk_file_stat.af_currsz += sz;
258 
259 				/* which AR are done */
260 				cAR = kctx->auk_queue.head;
261 				while (sz) {
262 					cp  = memtod(cAR, unsigned char *);
263 					len = (ssize_t)((cp[1]<<24 | cp[2]<<16 |
264 						cp[3]<<8 | cp[4]) &
265 						0xffffffffU);
266 
267 					if (len > sz)
268 						break;
269 					b = cAR;
270 					cAR = cAR->next_rec;
271 					sz -= len;
272 				}
273 				if (b != NULL)
274 					au_dequeue(kctx, b);
275 
276 				return (ENOSPC);
277 
278 			} else {	/* still space in file system */
279 				/* if we've written an AR */
280 				if (sp) {
281 					/*
282 					 * free records up to last one copied.
283 					 */
284 					au_dequeue(kctx, sp);
285 				}
286 				/* Update sizes */
287 				curr_sz += off;
288 				kctx->auk_file_stat.af_currsz += (uint_t)off;
289 
290 				/* reset auk_buffer pointers */
291 				sp = (token_t *)0;
292 				off  = 0;
293 				bp   = &(kctx->auk_buffer[0]);
294 
295 				/* check exit conditions */
296 				if (sb.f_blocks) {
297 					ulong_t blks_used;
298 					blks_used = (curr_sz / sb.f_bsize);
299 					if ((fsblkcnt64_t)limit >
300 				(sb.f_bavail - (fsblkcnt64_t)blks_used)) {
301 						/*
302 						 * if we haven't put out a
303 						 * complete audit record,
304 						 * continue to process the
305 						 * audit queue until we reach
306 						 * the end of the record.
307 						 */
308 						if (part && (partial == 0)) {
309 							partial = 1;
310 							continue;
311 						}
312 						/*
313 						 * exit if complete record
314 						 */
315 						if (partial != 1)
316 							return (ENOSPC);
317 					}
318 				}
319 				if (kctx->auk_file_stat.af_filesz &&
320 					(kctx->auk_file_stat.af_currsz
321 					>= kctx->auk_file_stat.af_filesz)) {
322 						/*
323 						 * force a complete audit
324 						 * record to the trail.
325 						 */
326 						if (partial == 0)
327 							partial = 1;
328 						/*
329 						 * Written data to AR boundry.
330 						 */
331 						if (partial != 1)
332 							return (EFBIG);
333 				}
334 			}
335 		}
336 	}	/* while(cMB) */
337 
338 nodata:
339 	return (0);
340 }
341 
342 /*
343  * Close an audit descriptor.
344  * Use the second parameter to indicate if it should be written or not.
345  */
346 void
347 au_close(au_kcontext_t *kctx, caddr_t *d, int flag, short e_type, short e_mod)
348 {
349 	token_t *dchain;	/* au_membuf chain which is the tokens */
350 	t_audit_data_t *tad = U2A(u);
351 
352 	ASSERT(tad != NULL);
353 	ASSERT(d != NULL);
354 	ASSERT(kctx != NULL);
355 
356 	if ((dchain = (token_t *)*d) == (token_t *)NULL)
357 		return;
358 
359 	*d = NULL;
360 
361 	/*
362 	 * If async then defer; or if requested, defer the closing/queueing to
363 	 * syscall end, unless no syscall is active or the syscall is _exit.
364 	 */
365 	if ((flag & AU_DONTBLOCK) || ((flag & AU_DEFER) &&
366 	    (tad->tad_scid != 0) && (tad->tad_scid != SYS_exit))) {
367 		au_close_defer(dchain, flag, e_type, e_mod);
368 		return;
369 	}
370 	au_close_time(kctx, dchain, flag, e_type, e_mod, NULL);
371 }
372 
373 /*
374  * Defer closing/queueing of an audit descriptor. For async events, queue
375  * via softcall. Otherwise, defer by queueing the record onto the tad; at
376  * syscall end time it will be pulled off.
377  */
378 void
379 au_close_defer(token_t *dchain, int flag, short e_type, short e_mod)
380 {
381 	au_defer_info_t	*attr;
382 	t_audit_data_t *tad = U2A(u);
383 
384 	ASSERT(tad != NULL);
385 
386 	/* If not to be written, toss the record. */
387 	if ((flag & AU_OK) == 0) {
388 		au_toss_token(dchain);
389 		return;
390 	}
391 
392 	attr = kmem_alloc(sizeof (au_defer_info_t), KM_NOSLEEP);
393 	/* If no mem available, failing silently is the best recourse */
394 	if (attr == NULL) {
395 		au_toss_token(dchain);
396 		return;
397 	}
398 
399 	attr->audi_next = NULL;
400 	attr->audi_ad = dchain;
401 	attr->audi_e_type = e_type;
402 	attr->audi_e_mod = e_mod;
403 	attr->audi_flag = flag;
404 	gethrestime(&attr->audi_atime);
405 
406 	/*
407 	 * All async events must be queued via softcall to avoid possible
408 	 * sleeping in high interrupt context. softcall will ensure it's
409 	 * done on a dedicated software-level interrupt thread.
410 	 */
411 	if (flag & AU_DONTBLOCK) {
412 		softcall(audit_async_finish_backend, attr);
413 		audit_async_done(NULL, 0);
414 		return;
415 	}
416 
417 	/*
418 	 * If not an async event, defer by queuing onto the tad until
419 	 * syscall end. No locking is needed because the tad is per-thread.
420 	 */
421 	if (tad->tad_defer_head)
422 		tad->tad_defer_tail->audi_next = attr;
423 	else
424 		tad->tad_defer_head = attr;
425 	tad->tad_defer_tail = attr;
426 }
427 
428 
429 /*
430  * Save the time in the event header. If time is not specified (i.e., pointer
431  * is NULL), use the current time.  This code is fairly ugly since it needs
432  * to support both 32- and 64-bit environments and can be called indirectly
433  * from both au_close() (for kernel audit) and from audit() (userland audit).
434  */
435 /*ARGSUSED*/
436 static void
437 au_save_time(adr_t *hadrp, timestruc_t *time, int size)
438 {
439 	struct {
440 		uint32_t sec;
441 		uint32_t usec;
442 	} tv;
443 	timestruc_t	now;
444 
445 	if (time == NULL) {
446 		gethrestime(&now);
447 		time = &now;
448 	}
449 
450 #ifdef _LP64
451 	if (size)
452 		adr_int64(hadrp, (int64_t *)time, 2);
453 	else
454 #endif
455 	{
456 		tv.sec = (uint32_t)time->tv_sec;
457 		tv.usec = (uint32_t)time->tv_nsec;
458 		adr_int32(hadrp, (int32_t *)&tv, 2);
459 	}
460 }
461 
462 
463 /*
464  * Close an audit descriptor.
465  * If time of event is specified, use it in the record, otherwise use the
466  * current time.
467  */
468 void
469 au_close_time(au_kcontext_t *kctx, token_t *dchain, int flag, short e_type,
470     short e_mod, timestruc_t *etime)
471 {
472 	token_t 	*record;	/* au_membuf chain == the record */
473 	int		byte_count;
474 	token_t 	*m;		/* for potential sequence token */
475 	adr_t		hadr;		/* handle for header token */
476 	adr_t		sadr;		/* handle for sequence token */
477 	size_t		zone_length;	/* length of zonename token */
478 
479 	ASSERT(dchain != NULL);
480 
481 	/* If not to be written, toss the record */
482 	if ((flag & AU_OK) == 0) {
483 		au_toss_token(dchain);
484 		return;
485 	}
486 	/* if auditing not enabled, then don't generate an audit record */
487 	ASSERT(kctx != NULL);
488 
489 	if ((kctx->auk_auditstate != AUC_AUDITING) &&
490 	    (kctx->auk_auditstate != AUC_INIT_AUDIT)) {
491 		/*
492 		 * at system boot, neither is set yet we want to generate
493 		 * an audit record.
494 		 */
495 		if (e_type != AUE_SYSTEMBOOT) {
496 			au_toss_token(dchain);
497 			return;
498 		}
499 	}
500 
501 	/* Count up the bytes used in the record. */
502 	byte_count = au_token_size(dchain);
503 
504 	/*
505 	 * add in size of header token (always present).
506 	 */
507 	byte_count += sizeof (char) + sizeof (int32_t) +
508 	    sizeof (char) + 2 * sizeof (short) + sizeof (timestruc_t);
509 
510 	if (kctx->auk_hostaddr_valid)
511 	    byte_count += sizeof (int32_t) + kctx->auk_info.ai_termid.at_type;
512 
513 	/*
514 	 * add in size of zonename token (zero if !AUDIT_ZONENAME)
515 	 */
516 	if (kctx->auk_policy & AUDIT_ZONENAME) {
517 		zone_length = au_zonename_length();
518 		byte_count += zone_length;
519 	} else {
520 		zone_length = 0;
521 	}
522 	/* add in size of (optional) trailer token */
523 	if (kctx->auk_policy & AUDIT_TRAIL)
524 		byte_count += 7;
525 
526 	/* add in size of (optional) sequence token */
527 	if (kctx->auk_policy & AUDIT_SEQ)
528 		byte_count += 5;
529 
530 	/* build the header */
531 	if (kctx->auk_hostaddr_valid)
532 		record = au_to_header_ex(byte_count, e_type, e_mod);
533 	else
534 		record = au_to_header(byte_count, e_type, e_mod);
535 
536 	/*
537 	 * If timestamp was specified, save it in header now. Otherwise,
538 	 * save reference to header so we can update time/data later
539 	 * and artificially adjust pointer to the time/date field of header.
540 	 */
541 	adr_start(&hadr, memtod(record, char *));
542 	hadr.adr_now += sizeof (char) + sizeof (int32_t) +
543 	    sizeof (char) + 2 * sizeof (short);
544 	if (kctx->auk_hostaddr_valid)
545 		hadr.adr_now += sizeof (int32_t) +
546 		    kctx->auk_info.ai_termid.at_type;
547 	if (etime != NULL) {
548 		au_save_time(&hadr, etime, 1);
549 		hadr.adr_now = (char *)NULL;
550 	}
551 
552 	/* append body of audit record */
553 	(void) au_append_rec(record, dchain, AU_PACK);
554 
555 	/* add (optional) zonename token */
556 	if (zone_length > 0) {
557 		m = au_to_zonename(zone_length);
558 		(void) au_append_rec(record, m, AU_PACK);
559 	}
560 
561 	/* Add an (optional) sequence token. NULL offset if none */
562 	if (kctx->auk_policy & AUDIT_SEQ) {
563 		/* get the sequence token */
564 		m = au_to_seq();
565 
566 		/* link to audit record (i.e. don't pack the data) */
567 		(void) au_append_rec(record, m, AU_LINK);
568 
569 		/*
570 		 * advance to count field of sequence token by skipping
571 		 * the token type byte.
572 		 */
573 		adr_start(&sadr, memtod(m, char *));
574 		sadr.adr_now += 1;
575 	} else {
576 		sadr.adr_now = NULL;
577 	}
578 	/* add (optional) trailer token */
579 	if (kctx->auk_policy & AUDIT_TRAIL) {
580 		(void) au_append_rec(record, au_to_trailer(byte_count),
581 		    AU_PACK);
582 	}
583 
584 	/*
585 	 * 1 - use 64 bit version of audit tokens for 64 bit kernels.
586 	 * 0 - use 32 bit version of audit tokens for 32 bit kernels.
587 	 */
588 #ifdef _LP64
589 	au_enqueue(kctx, record, &hadr, &sadr, 1, flag & AU_DONTBLOCK);
590 #else
591 	au_enqueue(kctx, record, &hadr, &sadr, 0, flag & AU_DONTBLOCK);
592 #endif
593 	AS_INC(as_totalsize, byte_count, kctx);
594 }
595 
596 /*ARGSUSED*/
597 void
598 au_enqueue(au_kcontext_t *kctx, au_buff_t *m, adr_t *hadrp, adr_t *sadrp,
599     int size, int dontblock)
600 {
601 	if (kctx == NULL)
602 		return;
603 
604 	mutex_enter(&(kctx->auk_queue.lock));
605 
606 	if (!dontblock && (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater) &&
607 	    audit_sync_block(kctx)) {
608 		mutex_exit(&(kctx->auk_queue.lock));
609 		au_free_rec(m);
610 		return;
611 	}
612 
613 	/* Fill in date and time if needed */
614 	if (hadrp->adr_now) {
615 		au_save_time(hadrp, NULL, size);
616 	}
617 
618 	/* address will be non-zero only if AUDIT_SEQ set */
619 	if (sadrp->adr_now) {
620 		kctx->auk_sequence++;
621 		adr_int32(sadrp, (int32_t *)&(kctx->auk_sequence), 1);
622 	}
623 
624 	if (kctx->auk_queue.head)
625 		kctx->auk_queue.tail->next_rec = m;
626 	else
627 		kctx->auk_queue.head = m;
628 
629 	kctx->auk_queue.tail = m;
630 
631 	if (++(kctx->auk_queue.cnt) >
632 	    kctx->auk_queue.lowater && kctx->auk_queue.rd_block)
633 		cv_broadcast(&(kctx->auk_queue.read_cv));
634 
635 	mutex_exit(&(kctx->auk_queue.lock));
636 
637 	/* count # audit records put onto kernel audit queue */
638 	AS_INC(as_enqueue, 1, kctx);
639 }
640 
641 /*
642  * Dequeue and free buffers upto and including "freeto"
643  * Keeps the queue lock long but acquires it only once when doing
644  * bulk dequeueing.
645  */
646 static void
647 au_dequeue(au_kcontext_t *kctx, au_buff_t *freeto)
648 {
649 	au_buff_t *m, *l, *lastl;
650 	int n = 0;
651 
652 	ASSERT(kctx != NULL);
653 
654 	mutex_enter(&(kctx->auk_queue.lock));
655 
656 	ASSERT(kctx->auk_queue.head != NULL);
657 	ASSERT(freeto != NULL);
658 
659 	l = m = kctx->auk_queue.head;
660 
661 	do {
662 		n++;
663 		lastl = l;
664 		l = l->next_rec;
665 	} while (l != NULL && freeto != lastl);
666 
667 	kctx->auk_queue.cnt -= n;
668 	lastl->next_rec = NULL;
669 	kctx->auk_queue.head = l;
670 
671 	/* Freeto must exist in the list */
672 	ASSERT(freeto == lastl);
673 
674 	if (kctx->auk_queue.cnt <= kctx->auk_queue.lowater &&
675 	    kctx->auk_queue.wt_block)
676 		cv_broadcast(&(kctx->auk_queue.write_cv));
677 
678 	mutex_exit(&(kctx->auk_queue.lock));
679 
680 	while (m) {
681 		l = m->next_rec;
682 		au_free_rec(m);
683 		m = l;
684 	}
685 	AS_INC(as_written, n, kctx);
686 }
687 
688 /*
689  * audit_sync_block()
690  * If we've reached the high water mark, we look at the policy to see
691  * if we sleep or we should drop the audit record.
692  * This function is called with the auk_queue.lock held and the check
693  * performed one time already as an optimization.  Caller should unlock.
694  * Returns 1 if the caller needs to free the record.
695  */
696 static int
697 audit_sync_block(au_kcontext_t *kctx)
698 {
699 	ASSERT(MUTEX_HELD(&(kctx->auk_queue.lock)));
700 	/*
701 	 * Loop while we are at the high watermark.
702 	 */
703 	do {
704 		if ((kctx->auk_auditstate != AUC_AUDITING) ||
705 		    (kctx->auk_policy & AUDIT_CNT)) {
706 
707 			/* just count # of dropped audit records */
708 			AS_INC(as_dropped, 1, kctx);
709 
710 			return (1);
711 		}
712 
713 		/* kick reader awake if its asleep */
714 		if (kctx->auk_queue.rd_block &&
715 		    kctx->auk_queue.cnt > kctx->auk_queue.lowater)
716 			cv_broadcast(&(kctx->auk_queue.read_cv));
717 
718 		/* keep count of # times blocked */
719 		AS_INC(as_wblocked, 1, kctx);
720 
721 		/* sleep now, until woken by reader */
722 		kctx->auk_queue.wt_block++;
723 		cv_wait(&(kctx->auk_queue.write_cv), &(kctx->auk_queue.lock));
724 		kctx->auk_queue.wt_block--;
725 	} while (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater);
726 
727 	return (0);
728 }
729 
730 /*
731  * audit_async_block()
732  * if we've reached the high water mark, we look at the ahlt policy to see
733  * if we reboot we should drop the audit record.
734  * Returns 1 if blocked.
735  */
736 static int
737 audit_async_block(au_kcontext_t *kctx, caddr_t *rpp)
738 {
739 	ASSERT(kctx != NULL);
740 
741 	mutex_enter(&(kctx->auk_queue.lock));
742 	/* see if we've reached high water mark */
743 	if (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater) {
744 		mutex_exit(&(kctx->auk_queue.lock));
745 
746 		audit_async_drop(rpp, AU_BACKEND);
747 		return (1);
748 	}
749 	mutex_exit(&(kctx->auk_queue.lock));
750 	return (0);
751 }
752 
753 /*
754  * au_door_upcall.  auditdoor() may change vp without notice, so
755  * some locking seems in order.
756  *
757  */
758 #define	AGAIN_TICKS	10
759 
760 static int
761 au_door_upcall(au_kcontext_t *kctx, au_dbuf_t *aubuf)
762 {
763 	int		rc;
764 	door_arg_t	darg;
765 	int		retry = 1;
766 	int		ticks_to_wait;
767 
768 	darg.data_ptr = (char *)aubuf;
769 	darg.data_size = AU_DBUF_HEADER + aubuf->aub_size;
770 
771 	darg.desc_ptr = NULL;
772 	darg.desc_num = 0;
773 
774 	while (retry == 1) {
775 		/* non-zero means return results expected */
776 		darg.rbuf = (char *)aubuf;
777 		darg.rsize = darg.data_size;
778 
779 		retry = 0;
780 		mutex_enter(&(kctx->auk_svc_lock));
781 		if ((rc = door_upcall(kctx->auk_current_vp, &darg)) != 0) {
782 			mutex_exit(&(kctx->auk_svc_lock));
783 			if (rc == EAGAIN)
784 				ticks_to_wait = AGAIN_TICKS;
785 			else
786 				return (rc);
787 
788 			mutex_enter(&(kctx->auk_eagain_mutex));
789 			(void) cv_timedwait(&(kctx->auk_eagain_cv),
790 			    &(kctx->auk_eagain_mutex),
791 			    lbolt + ticks_to_wait);
792 			mutex_exit(&(kctx->auk_eagain_mutex));
793 
794 			retry = 1;
795 		} else
796 			mutex_exit(&(kctx->auk_svc_lock));	/* no retry */
797 	}	/* end while (retry == 1) */
798 	if (darg.rbuf == NULL)
799 		return (-1);
800 
801 	/* return code from door server */
802 	return (*(int *)darg.rbuf);
803 }
804 
805 /*
806  * Write an audit control message to the door handle.  The message
807  * structure depends on message_code and at present the only control
808  * message defined is for a policy change.  These are infrequent,
809  * so no memory is held for control messages.
810  */
811 int
812 au_doormsg(au_kcontext_t *kctx, uint32_t message_code, void *message)
813 {
814 	int		rc;
815 	au_dbuf_t	*buf;
816 	size_t		alloc_size;
817 
818 	switch (message_code) {
819 	case AU_DBUF_POLICY:
820 		alloc_size = AU_DBUF_HEADER + sizeof (uint32_t);
821 		buf = kmem_alloc(alloc_size, KM_SLEEP);
822 		buf->aub_size = sizeof (uint32_t);
823 		*(uint32_t *)buf->aub_buf = *(uint32_t *)message;
824 		break;
825 	case AU_DBUF_SHUTDOWN:
826 		alloc_size = AU_DBUF_HEADER;
827 		buf = kmem_alloc(alloc_size, KM_SLEEP);
828 		buf->aub_size = 0;
829 		break;
830 	default:
831 		return (1);
832 	}
833 
834 	buf->aub_type = AU_DBUF_NOTIFY | message_code;
835 	rc = au_door_upcall(kctx, buf);
836 	kmem_free(buf, alloc_size);
837 
838 	return (rc);
839 }
840 
841 /*
842  * Write audit information to the door handle.  au_doorio is called with
843  * one or more complete audit records on the queue and outputs those
844  * records in buffers of up to auk_queue.buflen in size.
845  */
846 int
847 au_doorio(au_kcontext_t *kctx) {
848 	off_t		off;	/* space used in buffer */
849 	ssize_t		used;	/* space used in au_membuf */
850 	token_t		*cAR;	/* current AR being processed */
851 	token_t		*cMB;	/* current au_membuf being processed */
852 	token_t		*sp;	/* last AR processed */
853 	char		*bp;	/* start of free space in staging buffer */
854 	unsigned char	*cp;	/* ptr to data to be moved */
855 	int		error;  /* return from door upcall */
856 
857 	/*
858 	 * size (data left in au_membuf - space in buffer)
859 	 */
860 	ssize_t		sz;
861 	ssize_t		len;	/* len of data to move, size of AR */
862 	ssize_t		curr_sz = 0;	/* amount of data written during now */
863 	/*
864 	 * partial_state is AU_DBUF_COMPLETE...LAST; see audit_door_infc.h
865 	 */
866 	int		part    = 0;	/* partial audit record written */
867 	int		partial_state = AU_DBUF_COMPLETE;
868 	/*
869 	 * Has the write buffer changed length due to a auditctl(2)?
870 	 * Initial allocation is from audit_start.c/audit_init()
871 	 */
872 	if (kctx->auk_queue.bufsz != kctx->auk_queue.buflen) {
873 		kmem_free(kctx->auk_dbuffer, AU_DBUF_HEADER +
874 		    kctx->auk_queue.buflen);
875 
876 		kctx->auk_dbuffer = kmem_alloc(AU_DBUF_HEADER +
877 		    kctx->auk_queue.bufsz, KM_SLEEP);
878 
879 		/* omit the 64 bit header */
880 		kctx->auk_queue.buflen = kctx->auk_queue.bufsz;
881 	}
882 	if (!kctx->auk_queue.head)
883 		goto nodata;
884 
885 	sp   = NULL;	/* no record copied */
886 	off  = 0;	/* no space used in buffer */
887 	used = 0;	/* no data processed in au_membuf */
888 	cAR  = kctx->auk_queue.head;	/* start at head of queue */
889 	cMB  = cAR;	/* start with first au_membuf of record */
890 
891 	/* start at beginning of buffer */
892 	bp   = &(kctx->auk_dbuffer->aub_buf[0]);
893 
894 	while (cMB) {
895 		part = 1;	/* indicate audit record being processed */
896 
897 		cp  = memtod(cMB, unsigned char *); /* buffer ptr */
898 
899 		sz  = (ssize_t)cMB->len - used;	/* data left in au_membuf */
900 		/* len to move */
901 		len = (ssize_t)MIN(sz, kctx->auk_queue.buflen - off);
902 
903 		/* move the data */
904 		bcopy(cp + used, bp + off, len);
905 		used += len; /* update used au_membuf */
906 		off  += len; /* update offset into buffer */
907 
908 		if (used >= (ssize_t)cMB->len) {
909 			/* advance to next au_membuf */
910 			used = 0;
911 			cMB  = cMB->next_buf;
912 		}
913 		if (cMB == NULL) {
914 			/* advance to next audit record */
915 			sp   = cAR;
916 			cAR  = cAR->next_rec;
917 			cMB  = cAR;
918 			part = 0;	/* have a complete record */
919 		}
920 		error = 0;
921 		if ((kctx->auk_queue.buflen == off) || (part == 0)) {
922 			if (part)
923 				partial_state = state_if_part[partial_state];
924 			else
925 				partial_state =
926 				    state_if_not_part[partial_state];
927 
928 			kctx->auk_dbuffer->aub_type = partial_state;
929 			kctx->auk_dbuffer->aub_size = off;
930 			error = au_door_upcall(kctx, kctx->auk_dbuffer);
931 			if (error != 0)
932 				goto nodata;
933 			/*
934 			 * if we've successfully written an audit record,
935 			 * free records up to last full record copied
936 			 */
937 			if (sp)
938 				au_dequeue(kctx, sp);
939 
940 				/* Update size */
941 			curr_sz += off;
942 
943 				/* reset auk_dbuffer pointers */
944 			sp = NULL;
945 			off  = 0;
946 		}
947 	}	/* while(cMB) */
948 nodata:
949 	return (error);
950 }
951 
952 /*
953  * Clean up thread audit state to clear out asynchronous audit record
954  * generation error recovery processing. Note that this is done on a
955  * per-thread basis and thus does not need any locking.
956  */
957 void
958 audit_async_done(caddr_t *rpp, int flags)
959 {
960 	t_audit_data_t *tad = U2A(u);
961 
962 	/* clean up the tad unless called from softcall backend */
963 	if (!(flags & AU_BACKEND)) {
964 		ASSERT(tad != NULL);
965 		ASSERT(tad->tad_ctrl & PAD_ERRJMP);
966 
967 		tad->tad_ctrl &= ~PAD_ERRJMP;
968 		tad->tad_errjmp = NULL;
969 	}
970 
971 	/* clean out partial audit record */
972 	if ((rpp != NULL) && (*rpp != NULL)) {
973 		au_toss_token((au_buff_t *)*rpp);
974 		*rpp = NULL;
975 	}
976 }
977 
978 /*
979  * implement the audit policy for asynchronous events generated within
980  * the kernel.
981  * XXX might need locks around audit_policy check.
982  */
983 void
984 audit_async_drop(caddr_t *rpp, int flags)
985 {
986 	au_kcontext_t	*kctx;
987 
988 	/* could not generate audit record, clean up */
989 	audit_async_done((caddr_t *)rpp, flags);
990 
991 	kctx = SET_KCTX_GZ;
992 	ASSERT(kctx != NULL);
993 	/* just drop the record and return */
994 	if (((audit_policy & AUDIT_AHLT) == 0) ||
995 	    (kctx->auk_auditstate == AUC_INIT_AUDIT)) {
996 		/* just count # of dropped audit records */
997 		AS_INC(as_dropped, 1, kctx);
998 		return;
999 	}
1000 
1001 	/*
1002 	 * There can be a lot of data in the audit queue. We
1003 	 * will first sync the file systems then attempt to
1004 	 * shutdown the kernel so that a memory dump is
1005 	 * performed.
1006 	 */
1007 	sync();
1008 	sync();
1009 
1010 	/*
1011 	 * now shut down. What a cruel world it has been
1012 	 */
1013 	panic("non-attributable halt. should dump core");
1014 	/* No return */
1015 }
1016 
1017 int
1018 audit_async_start(label_t *jb, int event, int sorf)
1019 {
1020 	t_audit_data_t *tad = U2A(u);
1021 	au_state_t estate;
1022 	int success = 0, failure = 0;
1023 	au_kcontext_t	*kctx = SET_KCTX_GZ;
1024 
1025 	ASSERT(kctx != NULL);
1026 
1027 	/* if audit state off, then no audit record generation */
1028 	if ((kctx->auk_auditstate != AUC_AUDITING) &&
1029 	    (kctx->auk_auditstate != AUC_INIT_AUDIT))
1030 		return (1);
1031 
1032 	/*
1033 	 * preselect asynchronous event
1034 	 * XXX should we check for out-of-range???
1035 	 */
1036 	estate = kctx->auk_ets[event];
1037 
1038 	if (sorf & AUM_SUCC)
1039 		success = kctx->auk_info.ai_mask.as_success & estate;
1040 	if (sorf & AUM_FAIL)
1041 		failure = kctx->auk_info.ai_mask.as_failure & estate;
1042 
1043 	if ((success | failure) == NULL)
1044 		return (1);
1045 
1046 	ASSERT(tad->tad_errjmp == NULL);
1047 	tad->tad_errjmp = (void *)jb;
1048 	tad->tad_ctrl |= PAD_ERRJMP;
1049 
1050 	return (0);
1051 }
1052 
1053 /*
1054  * Complete auditing of an async event. The AU_DONTBLOCK flag to au_close will
1055  * result in the backend routine being invoked from softcall, so all the real
1056  * work can be done in a safe context.
1057  */
1058 void
1059 audit_async_finish(caddr_t *ad, int aid, int amod)
1060 {
1061 	au_kcontext_t	*kctx;
1062 
1063 	kctx  = SET_KCTX_GZ;
1064 	ASSERT(kctx != NULL);
1065 
1066 	au_close(kctx, ad, AU_DONTBLOCK | AU_OK, aid, PAD_NONATTR|amod);
1067 }
1068 
1069 /*
1070  * Backend routine to complete an async audit. Invoked from softcall.
1071  * (Note: the blocking and the queuing below both involve locking which can't
1072  * be done safely in high interrupt context due to the chance of sleeping on
1073  * the corresponding adaptive mutex. Hence the softcall.)
1074  */
1075 static void
1076 audit_async_finish_backend(void *addr)
1077 {
1078 	au_kcontext_t	*kctx;
1079 	au_defer_info_t	*attr = (au_defer_info_t *)addr;
1080 
1081 	if (attr == NULL)
1082 		return;		/* won't happen unless softcall is broken */
1083 
1084 	kctx  = SET_KCTX_GZ;
1085 	ASSERT(kctx != NULL);
1086 
1087 	if (audit_async_block(kctx, (caddr_t *)&attr->audi_ad)) {
1088 		kmem_free(attr, sizeof (au_defer_info_t));
1089 		return;
1090 	}
1091 
1092 	/*
1093 	 * Call au_close_time to complete the audit with the saved values.
1094 	 *
1095 	 * For the exit-prom event, use the current time instead of the
1096 	 * saved time as a better approximation. (Because the time saved via
1097 	 * gethrestime during prom-exit handling would not yet be caught up
1098 	 * after the system was idled in the debugger for a period of time.)
1099 	 */
1100 	if (attr->audi_e_type == AUE_EXITPROM) {
1101 		au_close_time(kctx, (token_t *)attr->audi_ad, attr->audi_flag,
1102 		    attr->audi_e_type, attr->audi_e_mod, NULL);
1103 	} else {
1104 		au_close_time(kctx, (token_t *)attr->audi_ad, attr->audi_flag,
1105 		    attr->audi_e_type, attr->audi_e_mod, &attr->audi_atime);
1106 	}
1107 
1108 	AS_INC(as_generated, 1, kctx);
1109 	AS_INC(as_nonattrib, 1, kctx);
1110 
1111 	kmem_free(attr, sizeof (au_defer_info_t));
1112 }
1113