xref: /titanic_51/usr/src/uts/common/c2/audit_io.c (revision 6733190958bbcc0bd6d1d601e7ae0a6994dafb45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Routines for writing audit records.
23  *
24  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/door.h>
31 #include <sys/param.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/statvfs.h>	/* for statfs */
35 #include <sys/vnode.h>
36 #include <sys/file.h>
37 #include <sys/vfs.h>
38 #include <sys/user.h>
39 #include <sys/uio.h>
40 #include <sys/reboot.h>
41 #include <sys/kmem.h>		/* for KM_SLEEP */
42 #include <sys/resource.h>	/* for RLIM_INFINITY */
43 #include <sys/cmn_err.h>	/* panic */
44 #include <sys/systm.h>
45 #include <sys/debug.h>
46 #include <sys/sysmacros.h>
47 #include <sys/syscall.h>
48 #include <sys/zone.h>
49 
50 #include <c2/audit.h>
51 #include <c2/audit_kernel.h>
52 #include <c2/audit_record.h>
53 #include <c2/audit_kevents.h>
54 #include <c2/audit_door_infc.h>
55 
56 static void	au_dequeue(au_kcontext_t *, au_buff_t *);
57 static void	audit_async_finish_backend(void *);
58 static int	audit_sync_block(au_kcontext_t *);
59 /*
60  * each of these two tables are indexed by the values AU_DBUF_COMPLETE
61  * through AU_DBUF_LAST; the content is the next state value.  The
62  * first table determines the next state for a buffer which is not the
63  * end of a record and the second table determines the state for a
64  * buffer which is the end of a record.  The initial state is
65  * AU_DBUF_COMPLETE.
66  */
67 static int state_if_part[] = {
68     AU_DBUF_FIRST, AU_DBUF_MIDDLE, AU_DBUF_MIDDLE, AU_DBUF_FIRST};
69 static int state_if_not_part[] = {
70     AU_DBUF_COMPLETE, AU_DBUF_LAST, AU_DBUF_LAST, AU_DBUF_COMPLETE};
71 /*
72  * Write to an audit descriptor.
73  * Add the au_membuf to the descriptor chain and free the chain passed in.
74  */
75 void
76 au_uwrite(m)
77 	token_t *m;
78 {
79 	au_write(&(u_ad), m);
80 }
81 
82 void
83 au_write(caddr_t *d, token_t *m)
84 {
85 	if (d == NULL) {
86 		au_toss_token(m);
87 		return;
88 	}
89 	if (m == (token_t *)0) {
90 		printf("au_write: null token\n");
91 		return;
92 	}
93 
94 	if (*d == NULL)
95 		*d = (caddr_t)m;
96 	else
97 		(void) au_append_rec((au_buff_t *)*d, m, AU_PACK);
98 }
99 #define	AU_INTERVAL 120
100 
101 /*
102  * Write audit information to the disk.
103  * Called from auditsvc(); EOL'd as of Sol 10.
104  * Local zones are not allowed; the caller (auditsvc()) enforces the
105  * restriction.
106  */
107 int
108 au_doio(vp, limit)
109 
110 	struct vnode *vp;
111 	int limit;
112 
113 {	/* AU_DOIO */
114 
115 	off_t		off;	/* space used in buffer */
116 	size_t		used;	/* space used in au_membuf */
117 	token_t		*cAR;	/* current AR being processed */
118 	token_t		*cMB;	/* current au_membuf being processed */
119 	token_t		*sp;	/* last AR processed */
120 	char		*bp;	/* start of free space in staging buffer */
121 	unsigned char	*cp;	/* ptr to data to be moved */
122 	au_kcontext_t	*kctx;
123 	/*
124 	 * size (data left in au_membuf - space in buffer)
125 	 */
126 	ssize_t		sz;
127 	ssize_t		len;	/* len of data to move, size of AR */
128 	int		error;	/* error return */
129 	ssize_t		left;	/* data not xfered by write to disk */
130 	statvfs64_t	sb;	/* buffer for statfs */
131 	size_t		curr_sz = 0;	/* amount of data written during now */
132 	int		part    = 0;	/* partial audit record written */
133 	int		partial = 0;	/* flag to force partial AR to file */
134 					/* 0 - idle, ignore */
135 					/* 1 - force write of audit record */
136 					/* 2 - finished writing AR, commit */
137 
138 	kctx = SET_KCTX_GZ;
139 
140 	ASSERT(kctx != NULL);
141 
142 	/*
143 	 * Check to ensure enough free space on audit device.
144 	 */
145 	bzero(&sb, sizeof (statvfs64_t));
146 	(void) VFS_STATVFS(vp->v_vfsp, &sb);
147 	/*
148 	 * Large Files: We do not convert any of this part of kernel
149 	 * to be large file aware. Original behaviour should be
150 	 * maintained. This function is called from audit_svc and
151 	 * it already checks for negative values of limit.
152 	 */
153 
154 	if (sb.f_blocks && (fsblkcnt64_t)limit > sb.f_bavail)
155 		return (ENOSPC);
156 
157 	if (kctx->auk_file_stat.af_filesz &&
158 		(kctx->auk_file_stat.af_currsz >=
159 		kctx->auk_file_stat.af_filesz))
160 		return (EFBIG);
161 
162 	/*
163 	 * has the write buffer changed length due to a auditctl(2)?
164 	 * (remember that auk_buffer is an element of auk_dbuffer)
165 	 */
166 	if (kctx->auk_queue.bufsz != kctx->auk_queue.buflen) {
167 
168 		kmem_free(kctx->auk_buffer, kctx->auk_queue.buflen);
169 
170 		/* bad, should not sleep here. Testing only */
171 		kctx->auk_buffer = kmem_alloc(kctx->auk_queue.bufsz, KM_SLEEP);
172 
173 		kctx->auk_queue.buflen = kctx->auk_queue.bufsz;
174 	}
175 
176 	if (!kctx->auk_queue.head) {
177 		goto nodata;
178 	}
179 	sp   = (token_t *)0; /* no AR copied */
180 	off  = 0;	/* no space used in buffer */
181 	used = 0;	/* no data processed in au_membuf */
182 	cAR  = kctx->auk_queue.head;	/* start at head of queue */
183 	cMB  = cAR;	/* start with first au_membuf of record */
184 	bp = &(kctx->auk_buffer[0]);	/* start at beginning of buffer */
185 
186 	while (cMB) {
187 		ASSERT(kctx->auk_queue.head != NULL);
188 
189 		/* indicate audit record being processed */
190 		part = 1;
191 
192 		/* pointer to buffer data */
193 		cp  = memtod(cMB, unsigned char *);
194 		/* data left in au_membuf */
195 		sz  = (ssize_t)cMB->len - used;
196 		/* len to move */
197 		len = (ssize_t)MIN(sz, kctx->auk_queue.buflen - off);
198 
199 		/* move the data */
200 		bcopy(cp + used, bp + off, len);
201 		used += len; /* update used au_membuf */
202 		off  += len; /* update offset into buffer */
203 
204 		if (used >= (ssize_t)cMB->len) {
205 			/* advance to next au_membuf */
206 			used = 0;
207 			cMB  = cMB->next_buf;
208 		}
209 		if (cMB == (au_buff_t *)0) {
210 			/* advance to next AR */
211 			sp   = cAR;
212 			cAR  = cAR->next_rec;
213 			cMB  = cAR;
214 			/* reached end of an audit record */
215 			part = 0;
216 			/* force abort at end of audit record? */
217 			if (partial == 1)
218 				partial = 2;
219 		}
220 		/*
221 		 * If we've reached end of buffer, or have run out of
222 		 * audit records on the queue or we've processed a
223 		 * partial audit record to complete the audit file,
224 		 * then its time to flush the holding buffer to the
225 		 * audit trail.
226 		 */
227 		if ((kctx->auk_queue.buflen == off) ||
228 		    (cAR == (au_buff_t *)0) ||
229 		    (partial == 2)) {
230 
231 			left = 0;
232 			/*
233 			 * Largefiles: We purposely pass a value of
234 			 * MAXOFF_T as we do not want any of the
235 			 * auditing files to exceed 2GB. May be we will
236 			 * support this in future.
237 			 */
238 			error = vn_rdwr(UIO_WRITE, vp, kctx->auk_buffer,
239 				off, 0LL, UIO_SYSSPACE, FAPPEND,
240 				(rlim64_t)MAXOFF_T, CRED(), &left);
241 
242 			/* error on write */
243 			if (error != 0) {
244 				if (error == EDQUOT)
245 					error = ENOSPC;
246 				return (error);
247 			}
248 
249 			/* end of file system? */
250 			if (left) {
251 				au_buff_t *b = NULL;
252 
253 				sz = off - left; /* how much written */
254 
255 				/* update space counters */
256 				kctx->auk_file_stat.af_currsz += sz;
257 
258 				/* which AR are done */
259 				cAR = kctx->auk_queue.head;
260 				while (sz) {
261 					cp  = memtod(cAR, unsigned char *);
262 					len = (ssize_t)((cp[1]<<24 | cp[2]<<16 |
263 						cp[3]<<8 | cp[4]) &
264 						0xffffffffU);
265 
266 					if (len > sz)
267 						break;
268 					b = cAR;
269 					cAR = cAR->next_rec;
270 					sz -= len;
271 				}
272 				if (b != NULL)
273 					au_dequeue(kctx, b);
274 
275 				return (ENOSPC);
276 
277 			} else {	/* still space in file system */
278 				/* if we've written an AR */
279 				if (sp) {
280 					/*
281 					 * free records up to last one copied.
282 					 */
283 					au_dequeue(kctx, sp);
284 				}
285 				/* Update sizes */
286 				curr_sz += off;
287 				kctx->auk_file_stat.af_currsz += (uint_t)off;
288 
289 				/* reset auk_buffer pointers */
290 				sp = (token_t *)0;
291 				off  = 0;
292 				bp   = &(kctx->auk_buffer[0]);
293 
294 				/* check exit conditions */
295 				if (sb.f_blocks) {
296 					ulong_t blks_used;
297 					blks_used = (curr_sz / sb.f_bsize);
298 					if ((fsblkcnt64_t)limit >
299 				(sb.f_bavail - (fsblkcnt64_t)blks_used)) {
300 						/*
301 						 * if we haven't put out a
302 						 * complete audit record,
303 						 * continue to process the
304 						 * audit queue until we reach
305 						 * the end of the record.
306 						 */
307 						if (part && (partial == 0)) {
308 							partial = 1;
309 							continue;
310 						}
311 						/*
312 						 * exit if complete record
313 						 */
314 						if (partial != 1)
315 							return (ENOSPC);
316 					}
317 				}
318 				if (kctx->auk_file_stat.af_filesz &&
319 					(kctx->auk_file_stat.af_currsz
320 					>= kctx->auk_file_stat.af_filesz)) {
321 						/*
322 						 * force a complete audit
323 						 * record to the trail.
324 						 */
325 						if (partial == 0)
326 							partial = 1;
327 						/*
328 						 * Written data to AR boundry.
329 						 */
330 						if (partial != 1)
331 							return (EFBIG);
332 				}
333 			}
334 		}
335 	}	/* while(cMB) */
336 
337 nodata:
338 	return (0);
339 }
340 
341 /*
342  * Close an audit descriptor.
343  * Use the second parameter to indicate if it should be written or not.
344  */
345 void
346 au_close(au_kcontext_t *kctx, caddr_t *d, int flag, short e_type, short e_mod)
347 {
348 	token_t *dchain;	/* au_membuf chain which is the tokens */
349 	t_audit_data_t *tad = U2A(u);
350 
351 	ASSERT(tad != NULL);
352 	ASSERT(d != NULL);
353 	ASSERT(kctx != NULL);
354 
355 	if ((dchain = (token_t *)*d) == (token_t *)NULL)
356 		return;
357 
358 	*d = NULL;
359 
360 	/*
361 	 * If async then defer; or if requested, defer the closing/queueing to
362 	 * syscall end, unless no syscall is active or the syscall is _exit.
363 	 */
364 	if ((flag & AU_DONTBLOCK) || ((flag & AU_DEFER) &&
365 	    (tad->tad_scid != 0) && (tad->tad_scid != SYS_exit))) {
366 		au_close_defer(dchain, flag, e_type, e_mod);
367 		return;
368 	}
369 	au_close_time(kctx, dchain, flag, e_type, e_mod, NULL);
370 }
371 
372 /*
373  * Defer closing/queueing of an audit descriptor. For async events, queue
374  * via softcall. Otherwise, defer by queueing the record onto the tad; at
375  * syscall end time it will be pulled off.
376  */
377 void
378 au_close_defer(token_t *dchain, int flag, short e_type, short e_mod)
379 {
380 	au_defer_info_t	*attr;
381 	t_audit_data_t *tad = U2A(u);
382 
383 	ASSERT(tad != NULL);
384 
385 	/* If not to be written, toss the record. */
386 	if ((flag & AU_OK) == 0) {
387 		au_toss_token(dchain);
388 		return;
389 	}
390 
391 	attr = kmem_alloc(sizeof (au_defer_info_t), KM_NOSLEEP);
392 	/* If no mem available, failing silently is the best recourse */
393 	if (attr == NULL) {
394 		au_toss_token(dchain);
395 		return;
396 	}
397 
398 	attr->audi_next = NULL;
399 	attr->audi_ad = dchain;
400 	attr->audi_e_type = e_type;
401 	attr->audi_e_mod = e_mod;
402 	attr->audi_flag = flag;
403 	gethrestime(&attr->audi_atime);
404 
405 	/*
406 	 * All async events must be queued via softcall to avoid possible
407 	 * sleeping in high interrupt context. softcall will ensure it's
408 	 * done on a dedicated software-level interrupt thread.
409 	 */
410 	if (flag & AU_DONTBLOCK) {
411 		softcall(audit_async_finish_backend, attr);
412 		audit_async_done(NULL, 0);
413 		return;
414 	}
415 
416 	/*
417 	 * If not an async event, defer by queuing onto the tad until
418 	 * syscall end. No locking is needed because the tad is per-thread.
419 	 */
420 	if (tad->tad_defer_head)
421 		tad->tad_defer_tail->audi_next = attr;
422 	else
423 		tad->tad_defer_head = attr;
424 	tad->tad_defer_tail = attr;
425 }
426 
427 
428 /*
429  * Save the time in the event header. If time is not specified (i.e., pointer
430  * is NULL), use the current time.  This code is fairly ugly since it needs
431  * to support both 32- and 64-bit environments and can be called indirectly
432  * from both au_close() (for kernel audit) and from audit() (userland audit).
433  */
434 /*ARGSUSED*/
435 static void
436 au_save_time(adr_t *hadrp, timestruc_t *time, int size)
437 {
438 	struct {
439 		uint32_t sec;
440 		uint32_t usec;
441 	} tv;
442 	timestruc_t	now;
443 
444 	if (time == NULL) {
445 		gethrestime(&now);
446 		time = &now;
447 	}
448 
449 #ifdef _LP64
450 	if (size)
451 		adr_int64(hadrp, (int64_t *)time, 2);
452 	else
453 #endif
454 	{
455 		tv.sec = (uint32_t)time->tv_sec;
456 		tv.usec = (uint32_t)time->tv_nsec;
457 		adr_int32(hadrp, (int32_t *)&tv, 2);
458 	}
459 }
460 
461 
462 /*
463  * Close an audit descriptor.
464  * If time of event is specified, use it in the record, otherwise use the
465  * current time.
466  */
467 void
468 au_close_time(au_kcontext_t *kctx, token_t *dchain, int flag, short e_type,
469     short e_mod, timestruc_t *etime)
470 {
471 	token_t 	*record;	/* au_membuf chain == the record */
472 	int		byte_count;
473 	token_t 	*m;		/* for potential sequence token */
474 	adr_t		hadr;		/* handle for header token */
475 	adr_t		sadr;		/* handle for sequence token */
476 	size_t		zone_length;	/* length of zonename token */
477 
478 	ASSERT(dchain != NULL);
479 
480 	/* If not to be written, toss the record */
481 	if ((flag & AU_OK) == 0) {
482 		au_toss_token(dchain);
483 		return;
484 	}
485 	/* if auditing not enabled, then don't generate an audit record */
486 	ASSERT(kctx != NULL);
487 
488 	if ((kctx->auk_auditstate != AUC_AUDITING) &&
489 	    (kctx->auk_auditstate != AUC_INIT_AUDIT)) {
490 		/*
491 		 * at system boot, neither is set yet we want to generate
492 		 * an audit record.
493 		 */
494 		if (e_type != AUE_SYSTEMBOOT) {
495 			au_toss_token(dchain);
496 			return;
497 		}
498 	}
499 
500 	/* Count up the bytes used in the record. */
501 	byte_count = au_token_size(dchain);
502 
503 	/*
504 	 * add in size of header token (always present).
505 	 */
506 	byte_count += sizeof (char) + sizeof (int32_t) +
507 	    sizeof (char) + 2 * sizeof (short) + sizeof (timestruc_t);
508 
509 	if (kctx->auk_hostaddr_valid)
510 	    byte_count += sizeof (int32_t) + kctx->auk_info.ai_termid.at_type;
511 
512 	/*
513 	 * add in size of zonename token (zero if !AUDIT_ZONENAME)
514 	 */
515 	if (kctx->auk_policy & AUDIT_ZONENAME) {
516 		zone_length = au_zonename_length(NULL);
517 		byte_count += zone_length;
518 	} else {
519 		zone_length = 0;
520 	}
521 	/* add in size of (optional) trailer token */
522 	if (kctx->auk_policy & AUDIT_TRAIL)
523 		byte_count += 7;
524 
525 	/* add in size of (optional) sequence token */
526 	if (kctx->auk_policy & AUDIT_SEQ)
527 		byte_count += 5;
528 
529 	/* build the header */
530 	if (kctx->auk_hostaddr_valid)
531 		record = au_to_header_ex(byte_count, e_type, e_mod);
532 	else
533 		record = au_to_header(byte_count, e_type, e_mod);
534 
535 	/*
536 	 * If timestamp was specified, save it in header now. Otherwise,
537 	 * save reference to header so we can update time/data later
538 	 * and artificially adjust pointer to the time/date field of header.
539 	 */
540 	adr_start(&hadr, memtod(record, char *));
541 	hadr.adr_now += sizeof (char) + sizeof (int32_t) +
542 	    sizeof (char) + 2 * sizeof (short);
543 	if (kctx->auk_hostaddr_valid)
544 		hadr.adr_now += sizeof (int32_t) +
545 		    kctx->auk_info.ai_termid.at_type;
546 	if (etime != NULL) {
547 		au_save_time(&hadr, etime, 1);
548 		hadr.adr_now = (char *)NULL;
549 	}
550 
551 	/* append body of audit record */
552 	(void) au_append_rec(record, dchain, AU_PACK);
553 
554 	/* add (optional) zonename token */
555 	if (zone_length > 0) {
556 		m = au_to_zonename(zone_length, NULL);
557 		(void) au_append_rec(record, m, AU_PACK);
558 	}
559 
560 	/* Add an (optional) sequence token. NULL offset if none */
561 	if (kctx->auk_policy & AUDIT_SEQ) {
562 		/* get the sequence token */
563 		m = au_to_seq();
564 
565 		/* link to audit record (i.e. don't pack the data) */
566 		(void) au_append_rec(record, m, AU_LINK);
567 
568 		/*
569 		 * advance to count field of sequence token by skipping
570 		 * the token type byte.
571 		 */
572 		adr_start(&sadr, memtod(m, char *));
573 		sadr.adr_now += 1;
574 	} else {
575 		sadr.adr_now = NULL;
576 	}
577 	/* add (optional) trailer token */
578 	if (kctx->auk_policy & AUDIT_TRAIL) {
579 		(void) au_append_rec(record, au_to_trailer(byte_count),
580 		    AU_PACK);
581 	}
582 
583 	/*
584 	 * 1 - use 64 bit version of audit tokens for 64 bit kernels.
585 	 * 0 - use 32 bit version of audit tokens for 32 bit kernels.
586 	 */
587 #ifdef _LP64
588 	au_enqueue(kctx, record, &hadr, &sadr, 1, flag & AU_DONTBLOCK);
589 #else
590 	au_enqueue(kctx, record, &hadr, &sadr, 0, flag & AU_DONTBLOCK);
591 #endif
592 	AS_INC(as_totalsize, byte_count, kctx);
593 }
594 
595 /*ARGSUSED*/
596 void
597 au_enqueue(au_kcontext_t *kctx, au_buff_t *m, adr_t *hadrp, adr_t *sadrp,
598     int size, int dontblock)
599 {
600 	if (kctx == NULL)
601 		return;
602 
603 	mutex_enter(&(kctx->auk_queue.lock));
604 
605 	if (!dontblock && (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater) &&
606 	    audit_sync_block(kctx)) {
607 		mutex_exit(&(kctx->auk_queue.lock));
608 		au_free_rec(m);
609 		return;
610 	}
611 
612 	/* Fill in date and time if needed */
613 	if (hadrp->adr_now) {
614 		au_save_time(hadrp, NULL, size);
615 	}
616 
617 	/* address will be non-zero only if AUDIT_SEQ set */
618 	if (sadrp->adr_now) {
619 		kctx->auk_sequence++;
620 		adr_int32(sadrp, (int32_t *)&(kctx->auk_sequence), 1);
621 	}
622 
623 	if (kctx->auk_queue.head)
624 		kctx->auk_queue.tail->next_rec = m;
625 	else
626 		kctx->auk_queue.head = m;
627 
628 	kctx->auk_queue.tail = m;
629 
630 	if (++(kctx->auk_queue.cnt) >
631 	    kctx->auk_queue.lowater && kctx->auk_queue.rd_block)
632 		cv_broadcast(&(kctx->auk_queue.read_cv));
633 
634 	mutex_exit(&(kctx->auk_queue.lock));
635 
636 	/* count # audit records put onto kernel audit queue */
637 	AS_INC(as_enqueue, 1, kctx);
638 }
639 
640 /*
641  * Dequeue and free buffers upto and including "freeto"
642  * Keeps the queue lock long but acquires it only once when doing
643  * bulk dequeueing.
644  */
645 static void
646 au_dequeue(au_kcontext_t *kctx, au_buff_t *freeto)
647 {
648 	au_buff_t *m, *l, *lastl;
649 	int n = 0;
650 
651 	ASSERT(kctx != NULL);
652 
653 	mutex_enter(&(kctx->auk_queue.lock));
654 
655 	ASSERT(kctx->auk_queue.head != NULL);
656 	ASSERT(freeto != NULL);
657 
658 	l = m = kctx->auk_queue.head;
659 
660 	do {
661 		n++;
662 		lastl = l;
663 		l = l->next_rec;
664 	} while (l != NULL && freeto != lastl);
665 
666 	kctx->auk_queue.cnt -= n;
667 	lastl->next_rec = NULL;
668 	kctx->auk_queue.head = l;
669 
670 	/* Freeto must exist in the list */
671 	ASSERT(freeto == lastl);
672 
673 	if (kctx->auk_queue.cnt <= kctx->auk_queue.lowater &&
674 	    kctx->auk_queue.wt_block)
675 		cv_broadcast(&(kctx->auk_queue.write_cv));
676 
677 	mutex_exit(&(kctx->auk_queue.lock));
678 
679 	while (m) {
680 		l = m->next_rec;
681 		au_free_rec(m);
682 		m = l;
683 	}
684 	AS_INC(as_written, n, kctx);
685 }
686 
687 /*
688  * audit_sync_block()
689  * If we've reached the high water mark, we look at the policy to see
690  * if we sleep or we should drop the audit record.
691  * This function is called with the auk_queue.lock held and the check
692  * performed one time already as an optimization.  Caller should unlock.
693  * Returns 1 if the caller needs to free the record.
694  */
695 static int
696 audit_sync_block(au_kcontext_t *kctx)
697 {
698 	ASSERT(MUTEX_HELD(&(kctx->auk_queue.lock)));
699 	/*
700 	 * Loop while we are at the high watermark.
701 	 */
702 	do {
703 		if ((kctx->auk_auditstate != AUC_AUDITING) ||
704 		    (kctx->auk_policy & AUDIT_CNT)) {
705 
706 			/* just count # of dropped audit records */
707 			AS_INC(as_dropped, 1, kctx);
708 
709 			return (1);
710 		}
711 
712 		/* kick reader awake if its asleep */
713 		if (kctx->auk_queue.rd_block &&
714 		    kctx->auk_queue.cnt > kctx->auk_queue.lowater)
715 			cv_broadcast(&(kctx->auk_queue.read_cv));
716 
717 		/* keep count of # times blocked */
718 		AS_INC(as_wblocked, 1, kctx);
719 
720 		/* sleep now, until woken by reader */
721 		kctx->auk_queue.wt_block++;
722 		cv_wait(&(kctx->auk_queue.write_cv), &(kctx->auk_queue.lock));
723 		kctx->auk_queue.wt_block--;
724 	} while (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater);
725 
726 	return (0);
727 }
728 
729 /*
730  * audit_async_block()
731  * if we've reached the high water mark, we look at the ahlt policy to see
732  * if we reboot we should drop the audit record.
733  * Returns 1 if blocked.
734  */
735 static int
736 audit_async_block(au_kcontext_t *kctx, caddr_t *rpp)
737 {
738 	ASSERT(kctx != NULL);
739 
740 	mutex_enter(&(kctx->auk_queue.lock));
741 	/* see if we've reached high water mark */
742 	if (kctx->auk_queue.cnt >= kctx->auk_queue.hiwater) {
743 		mutex_exit(&(kctx->auk_queue.lock));
744 
745 		audit_async_drop(rpp, AU_BACKEND);
746 		return (1);
747 	}
748 	mutex_exit(&(kctx->auk_queue.lock));
749 	return (0);
750 }
751 
752 /*
753  * au_door_upcall.  auditdoor() may change vp without notice, so
754  * some locking seems in order.
755  *
756  */
757 #define	AGAIN_TICKS	10
758 
759 static int
760 au_door_upcall(au_kcontext_t *kctx, au_dbuf_t *aubuf)
761 {
762 	int		rc;
763 	door_arg_t	darg;
764 	int		retry = 1;
765 	int		ticks_to_wait;
766 
767 	darg.data_ptr = (char *)aubuf;
768 	darg.data_size = AU_DBUF_HEADER + aubuf->aub_size;
769 
770 	darg.desc_ptr = NULL;
771 	darg.desc_num = 0;
772 
773 	while (retry == 1) {
774 		/* non-zero means return results expected */
775 		darg.rbuf = (char *)aubuf;
776 		darg.rsize = darg.data_size;
777 
778 		retry = 0;
779 		mutex_enter(&(kctx->auk_svc_lock));
780 		if ((rc = door_upcall(kctx->auk_current_vp, &darg)) != 0) {
781 			mutex_exit(&(kctx->auk_svc_lock));
782 			if (rc == EAGAIN)
783 				ticks_to_wait = AGAIN_TICKS;
784 			else
785 				return (rc);
786 
787 			mutex_enter(&(kctx->auk_eagain_mutex));
788 			(void) cv_timedwait(&(kctx->auk_eagain_cv),
789 			    &(kctx->auk_eagain_mutex),
790 			    lbolt + ticks_to_wait);
791 			mutex_exit(&(kctx->auk_eagain_mutex));
792 
793 			retry = 1;
794 		} else
795 			mutex_exit(&(kctx->auk_svc_lock));	/* no retry */
796 	}	/* end while (retry == 1) */
797 	if (darg.rbuf == NULL)
798 		return (-1);
799 
800 	/* return code from door server */
801 	return (*(int *)darg.rbuf);
802 }
803 
804 /*
805  * Write an audit control message to the door handle.  The message
806  * structure depends on message_code and at present the only control
807  * message defined is for a policy change.  These are infrequent,
808  * so no memory is held for control messages.
809  */
810 int
811 au_doormsg(au_kcontext_t *kctx, uint32_t message_code, void *message)
812 {
813 	int		rc;
814 	au_dbuf_t	*buf;
815 	size_t		alloc_size;
816 
817 	switch (message_code) {
818 	case AU_DBUF_POLICY:
819 		alloc_size = AU_DBUF_HEADER + sizeof (uint32_t);
820 		buf = kmem_alloc(alloc_size, KM_SLEEP);
821 		buf->aub_size = sizeof (uint32_t);
822 		*(uint32_t *)buf->aub_buf = *(uint32_t *)message;
823 		break;
824 	case AU_DBUF_SHUTDOWN:
825 		alloc_size = AU_DBUF_HEADER;
826 		buf = kmem_alloc(alloc_size, KM_SLEEP);
827 		buf->aub_size = 0;
828 		break;
829 	default:
830 		return (1);
831 	}
832 
833 	buf->aub_type = AU_DBUF_NOTIFY | message_code;
834 	rc = au_door_upcall(kctx, buf);
835 	kmem_free(buf, alloc_size);
836 
837 	return (rc);
838 }
839 
840 /*
841  * Write audit information to the door handle.  au_doorio is called with
842  * one or more complete audit records on the queue and outputs those
843  * records in buffers of up to auk_queue.buflen in size.
844  */
845 int
846 au_doorio(au_kcontext_t *kctx) {
847 	off_t		off;	/* space used in buffer */
848 	ssize_t		used;	/* space used in au_membuf */
849 	token_t		*cAR;	/* current AR being processed */
850 	token_t		*cMB;	/* current au_membuf being processed */
851 	token_t		*sp;	/* last AR processed */
852 	char		*bp;	/* start of free space in staging buffer */
853 	unsigned char	*cp;	/* ptr to data to be moved */
854 	int		error;  /* return from door upcall */
855 
856 	/*
857 	 * size (data left in au_membuf - space in buffer)
858 	 */
859 	ssize_t		sz;
860 	ssize_t		len;	/* len of data to move, size of AR */
861 	ssize_t		curr_sz = 0;	/* amount of data written during now */
862 	/*
863 	 * partial_state is AU_DBUF_COMPLETE...LAST; see audit_door_infc.h
864 	 */
865 	int		part    = 0;	/* partial audit record written */
866 	int		partial_state = AU_DBUF_COMPLETE;
867 	/*
868 	 * Has the write buffer changed length due to a auditctl(2)?
869 	 * Initial allocation is from audit_start.c/audit_init()
870 	 */
871 	if (kctx->auk_queue.bufsz != kctx->auk_queue.buflen) {
872 		kmem_free(kctx->auk_dbuffer, AU_DBUF_HEADER +
873 		    kctx->auk_queue.buflen);
874 
875 		kctx->auk_dbuffer = kmem_alloc(AU_DBUF_HEADER +
876 		    kctx->auk_queue.bufsz, KM_SLEEP);
877 
878 		/* omit the 64 bit header */
879 		kctx->auk_queue.buflen = kctx->auk_queue.bufsz;
880 	}
881 	if (!kctx->auk_queue.head)
882 		goto nodata;
883 
884 	sp   = NULL;	/* no record copied */
885 	off  = 0;	/* no space used in buffer */
886 	used = 0;	/* no data processed in au_membuf */
887 	cAR  = kctx->auk_queue.head;	/* start at head of queue */
888 	cMB  = cAR;	/* start with first au_membuf of record */
889 
890 	/* start at beginning of buffer */
891 	bp   = &(kctx->auk_dbuffer->aub_buf[0]);
892 
893 	while (cMB) {
894 		part = 1;	/* indicate audit record being processed */
895 
896 		cp  = memtod(cMB, unsigned char *); /* buffer ptr */
897 
898 		sz  = (ssize_t)cMB->len - used;	/* data left in au_membuf */
899 		/* len to move */
900 		len = (ssize_t)MIN(sz, kctx->auk_queue.buflen - off);
901 
902 		/* move the data */
903 		bcopy(cp + used, bp + off, len);
904 		used += len; /* update used au_membuf */
905 		off  += len; /* update offset into buffer */
906 
907 		if (used >= (ssize_t)cMB->len) {
908 			/* advance to next au_membuf */
909 			used = 0;
910 			cMB  = cMB->next_buf;
911 		}
912 		if (cMB == NULL) {
913 			/* advance to next audit record */
914 			sp   = cAR;
915 			cAR  = cAR->next_rec;
916 			cMB  = cAR;
917 			part = 0;	/* have a complete record */
918 		}
919 		error = 0;
920 		if ((kctx->auk_queue.buflen == off) || (part == 0)) {
921 			if (part)
922 				partial_state = state_if_part[partial_state];
923 			else
924 				partial_state =
925 				    state_if_not_part[partial_state];
926 
927 			kctx->auk_dbuffer->aub_type = partial_state;
928 			kctx->auk_dbuffer->aub_size = off;
929 			error = au_door_upcall(kctx, kctx->auk_dbuffer);
930 			if (error != 0)
931 				goto nodata;
932 			/*
933 			 * if we've successfully written an audit record,
934 			 * free records up to last full record copied
935 			 */
936 			if (sp)
937 				au_dequeue(kctx, sp);
938 
939 				/* Update size */
940 			curr_sz += off;
941 
942 				/* reset auk_dbuffer pointers */
943 			sp = NULL;
944 			off  = 0;
945 		}
946 	}	/* while(cMB) */
947 nodata:
948 	return (error);
949 }
950 
951 /*
952  * Clean up thread audit state to clear out asynchronous audit record
953  * generation error recovery processing. Note that this is done on a
954  * per-thread basis and thus does not need any locking.
955  */
956 void
957 audit_async_done(caddr_t *rpp, int flags)
958 {
959 	t_audit_data_t *tad = U2A(u);
960 
961 	/* clean up the tad unless called from softcall backend */
962 	if (!(flags & AU_BACKEND)) {
963 		ASSERT(tad != NULL);
964 		ASSERT(tad->tad_ctrl & PAD_ERRJMP);
965 
966 		tad->tad_ctrl &= ~PAD_ERRJMP;
967 		tad->tad_errjmp = NULL;
968 	}
969 
970 	/* clean out partial audit record */
971 	if ((rpp != NULL) && (*rpp != NULL)) {
972 		au_toss_token((au_buff_t *)*rpp);
973 		*rpp = NULL;
974 	}
975 }
976 
977 /*
978  * implement the audit policy for asynchronous events generated within
979  * the kernel.
980  * XXX might need locks around audit_policy check.
981  */
982 void
983 audit_async_drop(caddr_t *rpp, int flags)
984 {
985 	au_kcontext_t	*kctx;
986 
987 	/* could not generate audit record, clean up */
988 	audit_async_done((caddr_t *)rpp, flags);
989 
990 	kctx = SET_KCTX_GZ;
991 	ASSERT(kctx != NULL);
992 	/* just drop the record and return */
993 	if (((audit_policy & AUDIT_AHLT) == 0) ||
994 	    (kctx->auk_auditstate == AUC_INIT_AUDIT)) {
995 		/* just count # of dropped audit records */
996 		AS_INC(as_dropped, 1, kctx);
997 		return;
998 	}
999 
1000 	/*
1001 	 * There can be a lot of data in the audit queue. We
1002 	 * will first sync the file systems then attempt to
1003 	 * shutdown the kernel so that a memory dump is
1004 	 * performed.
1005 	 */
1006 	sync();
1007 	sync();
1008 
1009 	/*
1010 	 * now shut down. What a cruel world it has been
1011 	 */
1012 	panic("non-attributable halt. should dump core");
1013 	/* No return */
1014 }
1015 
1016 int
1017 audit_async_start(label_t *jb, int event, int sorf)
1018 {
1019 	t_audit_data_t *tad = U2A(u);
1020 	au_state_t estate;
1021 	int success = 0, failure = 0;
1022 	au_kcontext_t	*kctx = SET_KCTX_GZ;
1023 
1024 	ASSERT(kctx != NULL);
1025 
1026 	/* if audit state off, then no audit record generation */
1027 	if ((kctx->auk_auditstate != AUC_AUDITING) &&
1028 	    (kctx->auk_auditstate != AUC_INIT_AUDIT))
1029 		return (1);
1030 
1031 	/*
1032 	 * preselect asynchronous event
1033 	 * XXX should we check for out-of-range???
1034 	 */
1035 	estate = kctx->auk_ets[event];
1036 
1037 	if (sorf & AUM_SUCC)
1038 		success = kctx->auk_info.ai_mask.as_success & estate;
1039 	if (sorf & AUM_FAIL)
1040 		failure = kctx->auk_info.ai_mask.as_failure & estate;
1041 
1042 	if ((success | failure) == NULL)
1043 		return (1);
1044 
1045 	ASSERT(tad->tad_errjmp == NULL);
1046 	tad->tad_errjmp = (void *)jb;
1047 	tad->tad_ctrl |= PAD_ERRJMP;
1048 
1049 	return (0);
1050 }
1051 
1052 /*
1053  * Complete auditing of an async event. The AU_DONTBLOCK flag to au_close will
1054  * result in the backend routine being invoked from softcall, so all the real
1055  * work can be done in a safe context.
1056  */
1057 void
1058 audit_async_finish(caddr_t *ad, int aid, int amod)
1059 {
1060 	au_kcontext_t	*kctx;
1061 
1062 	kctx  = SET_KCTX_GZ;
1063 	ASSERT(kctx != NULL);
1064 
1065 	au_close(kctx, ad, AU_DONTBLOCK | AU_OK, aid, PAD_NONATTR|amod);
1066 }
1067 
1068 /*
1069  * Backend routine to complete an async audit. Invoked from softcall.
1070  * (Note: the blocking and the queuing below both involve locking which can't
1071  * be done safely in high interrupt context due to the chance of sleeping on
1072  * the corresponding adaptive mutex. Hence the softcall.)
1073  */
1074 static void
1075 audit_async_finish_backend(void *addr)
1076 {
1077 	au_kcontext_t	*kctx;
1078 	au_defer_info_t	*attr = (au_defer_info_t *)addr;
1079 
1080 	if (attr == NULL)
1081 		return;		/* won't happen unless softcall is broken */
1082 
1083 	kctx  = SET_KCTX_GZ;
1084 	ASSERT(kctx != NULL);
1085 
1086 	if (audit_async_block(kctx, (caddr_t *)&attr->audi_ad)) {
1087 		kmem_free(attr, sizeof (au_defer_info_t));
1088 		return;
1089 	}
1090 
1091 	/*
1092 	 * Call au_close_time to complete the audit with the saved values.
1093 	 *
1094 	 * For the exit-prom event, use the current time instead of the
1095 	 * saved time as a better approximation. (Because the time saved via
1096 	 * gethrestime during prom-exit handling would not yet be caught up
1097 	 * after the system was idled in the debugger for a period of time.)
1098 	 */
1099 	if (attr->audi_e_type == AUE_EXITPROM) {
1100 		au_close_time(kctx, (token_t *)attr->audi_ad, attr->audi_flag,
1101 		    attr->audi_e_type, attr->audi_e_mod, NULL);
1102 	} else {
1103 		au_close_time(kctx, (token_t *)attr->audi_ad, attr->audi_flag,
1104 		    attr->audi_e_type, attr->audi_e_mod, &attr->audi_atime);
1105 	}
1106 
1107 	AS_INC(as_generated, 1, kctx);
1108 	AS_INC(as_nonattrib, 1, kctx);
1109 
1110 	kmem_free(attr, sizeof (au_defer_info_t));
1111 }
1112