xref: /titanic_51/usr/src/uts/common/os/contract.c (revision 7aec1d6e253b21f9e9b7ef68b4d81ab9859b51fe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Contracts
31  * ---------
32  *
33  * Contracts are a primitive which enrich the relationships between
34  * processes and system resources.  The primary purpose of contracts is
35  * to provide a means for the system to negotiate the departure from a
36  * binding relationship (e.g. pages locked in memory or a thread bound
37  * to processor), but they can also be used as a purely asynchronous
38  * error reporting mechanism as they are with process contracts.
39  *
40  * More information on how one interfaces with contracts and what
41  * contracts can do for you can be found in:
42  *   PSARC 2003/193 Solaris Contracts
43  *   PSARC 2004/460 Contracts addendum
44  *
45  * This file contains the core contracts framework.  By itself it is
46  * useless: it depends the contracts filesystem (ctfs) to provide an
47  * interface to user processes and individual contract types to
48  * implement the process/resource relationships.
49  *
50  * Data structure overview
51  * -----------------------
52  *
53  * A contract is represented by a contract_t, which itself points to an
54  * encapsulating contract-type specific contract object.  A contract_t
55  * contains the contract's static identity (including its terms), its
56  * linkage to various bookkeeping structures, the contract-specific
57  * event queue, and a reference count.
58  *
59  * A contract template is represented by a ct_template_t, which, like a
60  * contract, points to an encapsulating contract-type specific template
61  * object.  A ct_template_t contains the template's terms.
62  *
63  * An event queue is represented by a ct_equeue_t, and consists of a
64  * list of events, a list of listeners, and a list of listeners who are
65  * waiting for new events (affectionately referred to as "tail
66  * listeners").  There are three queue types, defined by ct_listnum_t
67  * (an enum).  An event may be on one of each type of queue
68  * simultaneously; the list linkage used by a queue is determined by
69  * its type.
70  *
71  * An event is represented by a ct_kevent_t, which contains mostly
72  * static event data (e.g. id, payload).  It also has an array of
73  * ct_member_t structures, each of which contains a list_node_t and
74  * represent the event's linkage in a specific event queue.
75  *
76  * Each open of an event endpoint results in the creation of a new
77  * listener, represented by a ct_listener_t.  In addition to linkage
78  * into the aforementioned lists in the event_queue, a ct_listener_t
79  * contains a pointer to the ct_kevent_t it is currently positioned at
80  * as well as a set of status flags and other administrative data.
81  *
82  * Each process has a list of contracts it owns, p_ct_held; a pointer
83  * to the process contract it is a member of, p_ct_process; the linkage
84  * for that membership, p_ct_member; and an array of event queue
85  * structures representing the process bundle queues.
86  *
87  * Each LWP has an array of its active templates, lwp_ct_active; and
88  * the most recently created contracts, lwp_ct_latest.
89  *
90  * A process contract has a list of member processes and a list of
91  * inherited contracts.
92  *
93  * There is a system-wide list of all contracts, as well as per-type
94  * lists of contracts.
95  *
96  * Lock ordering overview
97  * ----------------------
98  *
99  * Locks at the top are taken first:
100  *
101  *                   ct_evtlock
102  *                   regent ct_lock
103  *                   member ct_lock
104  *                   pidlock
105  *                   p_lock
106  *    contract ctq_lock         contract_lock
107  *    pbundle ctq_lock
108  *    cte_lock
109  *                   ct_reflock
110  *
111  * contract_lock and ctq_lock/cte_lock are not currently taken at the
112  * same time.
113  *
114  * Reference counting and locking
115  * ------------------------------
116  *
117  * A contract has a reference count, protected by ct_reflock.
118  * (ct_reflock is also used in a couple other places where atomic
119  * access to a variable is needed in an innermost context).  A process
120  * maintains a hold on each contract it owns.  A process contract has a
121  * hold on each contract is has inherited.  Each event has a hold on
122  * the contract which generated it.  Process contract templates have
123  * holds on the contracts referred to by their transfer terms.  CTFS
124  * contract directory nodes have holds on contracts.  Lastly, various
125  * code paths may temporarily take holds on contracts to prevent them
126  * from disappearing while other processing is going on.  It is
127  * important to note that the global contract lists do not hold
128  * references on contracts; a contract is removed from these structures
129  * atomically with the release of its last reference.
130  *
131  * At a given point in time, a contract can either be owned by a
132  * process, inherited by a regent process contract, or orphaned.  A
133  * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
134  * protected by its ct_lock.  The linkage in the holder's (holder =
135  * owner or regent) list of contracts, ct_ctlist, is protected by
136  * whatever lock protects the holder's data structure.  In order for
137  * these two directions to remain consistent, changing the holder of a
138  * contract requires that both locks be held.
139  *
140  * Events also have reference counts.  There is one hold on an event
141  * per queue it is present on, in addition to those needed for the
142  * usual sundry reasons.  Individual listeners are associated with
143  * specific queues, and increase a queue-specific reference count
144  * stored in the ct_member_t structure.
145  *
146  * The dynamic contents of an event (reference count and flags) are
147  * protected by its cte_lock, while the contents of the embedded
148  * ct_member_t structures are protected by the locks of the queues they
149  * are linked into.  A ct_listener_t's contents are also protected by
150  * its event queue's ctq_lock.
151  *
152  * Resource controls
153  * -----------------
154  *
155  * Control:      project.max-contracts (rc_project_contract)
156  * Description:  Maximum number of contracts allowed a project.
157  *
158  *   When a contract is created, the project's allocation is tested and
159  *   (assuming success) increased.  When the last reference to a
160  *   contract is released, the creating project's allocation is
161  *   decreased.
162  */
163 
164 #include <sys/mutex.h>
165 #include <sys/debug.h>
166 #include <sys/types.h>
167 #include <sys/param.h>
168 #include <sys/kmem.h>
169 #include <sys/thread.h>
170 #include <sys/id_space.h>
171 #include <sys/avl.h>
172 #include <sys/list.h>
173 #include <sys/sysmacros.h>
174 #include <sys/proc.h>
175 #include <sys/contract_impl.h>
176 #include <sys/contract/process_impl.h>
177 #include <sys/systm.h>
178 #include <sys/atomic.h>
179 #include <sys/cmn_err.h>
180 #include <sys/model.h>
181 #include <sys/policy.h>
182 #include <sys/zone.h>
183 #include <sys/task.h>
184 
185 extern rctl_hndl_t rc_project_contract;
186 
187 static id_space_t	*contract_ids;
188 static avl_tree_t	contract_avl;
189 static kmutex_t		contract_lock;
190 
191 int			ct_ntypes = CTT_MAXTYPE;
192 static ct_type_t	*ct_types_static[CTT_MAXTYPE];
193 ct_type_t		**ct_types = ct_types_static;
194 
195 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
196 static void cte_queue_destroy(ct_equeue_t *);
197 static void cte_queue_drain(ct_equeue_t *, int);
198 static void cte_trim(ct_equeue_t *, contract_t *);
199 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
200 
201 /*
202  * contract_compar
203  *
204  * A contract comparator which sorts on contract ID.
205  */
206 int
207 contract_compar(const void *x, const void *y)
208 {
209 	const contract_t *ct1 = x;
210 	const contract_t *ct2 = y;
211 
212 	if (ct1->ct_id < ct2->ct_id)
213 		return (-1);
214 	if (ct1->ct_id > ct2->ct_id)
215 		return (1);
216 	return (0);
217 }
218 
219 /*
220  * contract_init
221  *
222  * Initializes the contract subsystem, the specific contract types, and
223  * process 0.
224  */
225 void
226 contract_init(void)
227 {
228 	/*
229 	 * Initialize contract subsystem.
230 	 */
231 	contract_ids = id_space_create("contracts", 1, INT_MAX);
232 	avl_create(&contract_avl, contract_compar, sizeof (contract_t),
233 	    offsetof(contract_t, ct_ctavl));
234 	mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
235 
236 	/*
237 	 * Initialize contract types.
238 	 */
239 	contract_process_init();
240 
241 	/*
242 	 * Initialize p0/lwp0 contract state.
243 	 */
244 	avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
245 	    offsetof(contract_t, ct_ctlist));
246 }
247 
248 /*
249  * contract_dtor
250  *
251  * Performs basic destruction of the common portions of a contract.
252  * Called from the failure path of contract_ctor and from
253  * contract_rele.
254  */
255 static void
256 contract_dtor(contract_t *ct)
257 {
258 	cte_queue_destroy(&ct->ct_events);
259 	list_destroy(&ct->ct_vnodes);
260 	mutex_destroy(&ct->ct_reflock);
261 	mutex_destroy(&ct->ct_lock);
262 	mutex_destroy(&ct->ct_evtlock);
263 }
264 
265 /*
266  * contract_ctor
267  *
268  * Called by a contract type to initialize a contract.  Fails if the
269  * max-contract resource control would have been exceeded.  After a
270  * successful call to contract_ctor, the contract is unlocked and
271  * visible in all namespaces; any type-specific initialization should
272  * be completed before calling contract_ctor.  Returns 0 on success.
273  *
274  * Because not all callers can tolerate failure, a 0 value for canfail
275  * instructs contract_ctor to ignore the project.max-contracts resource
276  * control.  Obviously, this "out" should only be employed by callers
277  * who are sufficiently constrained in other ways (e.g. newproc).
278  */
279 int
280 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
281     ctflags_t flags, proc_t *author, int canfail)
282 {
283 	avl_index_t where;
284 	klwp_t *curlwp = ttolwp(curthread);
285 
286 	ASSERT(author == curproc);
287 
288 	mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
289 	mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
290 	mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
291 	ct->ct_id = id_alloc(contract_ids);
292 
293 	cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
294 	list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
295 	    offsetof(contract_vnode_t, ctv_node));
296 
297 	/*
298 	 * Instance data
299 	 */
300 	ct->ct_ref = 2;		/* one for the holder, one for "latest" */
301 	ct->ct_cuid = crgetuid(CRED());
302 	ct->ct_type = type;
303 	ct->ct_data = data;
304 	gethrestime(&ct->ct_ctime);
305 	ct->ct_state = CTS_OWNED;
306 	ct->ct_flags = flags;
307 	ct->ct_regent = author->p_ct_process ?
308 	    &author->p_ct_process->conp_contract : NULL;
309 	ct->ct_ev_info = tmpl->ctmpl_ev_info;
310 	ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
311 	ct->ct_cookie = tmpl->ctmpl_cookie;
312 	ct->ct_owner = author;
313 
314 	/*
315 	 * Test project.max-contracts.
316 	 */
317 	mutex_enter(&author->p_lock);
318 	mutex_enter(&contract_lock);
319 	if (canfail && rctl_test(rc_project_contract,
320 	    author->p_task->tk_proj->kpj_rctls, author, 1,
321 	    RCA_SAFE) & RCT_DENY) {
322 		id_free(contract_ids, ct->ct_id);
323 		mutex_exit(&contract_lock);
324 		mutex_exit(&author->p_lock);
325 		ct->ct_events.ctq_flags |= CTQ_DEAD;
326 		contract_dtor(ct);
327 		return (1);
328 	}
329 	ct->ct_proj = author->p_task->tk_proj;
330 	ct->ct_proj->kpj_data.kpd_contract++;
331 	(void) project_hold(ct->ct_proj);
332 	mutex_exit(&contract_lock);
333 
334 	/*
335 	 * Insert into holder's avl of contracts.
336 	 * We use an avl not because order is important, but because
337 	 * readdir of /proc/contracts requires we be able to use a
338 	 * scalar as an index into the process's list of contracts
339 	 */
340 	ct->ct_zoneid = author->p_zone->zone_id;
341 	ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
342 	VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
343 	avl_insert(&author->p_ct_held, ct, where);
344 	mutex_exit(&author->p_lock);
345 
346 	/*
347 	 * Insert into global contract AVL
348 	 */
349 	mutex_enter(&contract_lock);
350 	VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
351 	avl_insert(&contract_avl, ct, where);
352 	mutex_exit(&contract_lock);
353 
354 	/*
355 	 * Insert into type AVL
356 	 */
357 	mutex_enter(&type->ct_type_lock);
358 	VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
359 	avl_insert(&type->ct_type_avl, ct, where);
360 	type->ct_type_timestruc = ct->ct_ctime;
361 	mutex_exit(&type->ct_type_lock);
362 
363 	if (curlwp->lwp_ct_latest[type->ct_type_index])
364 		contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
365 	curlwp->lwp_ct_latest[type->ct_type_index] = ct;
366 
367 	return (0);
368 }
369 
370 /*
371  * contract_rele
372  *
373  * Releases a reference to a contract.  If the caller had the last
374  * reference, the contract is removed from all namespaces, its
375  * allocation against the max-contracts resource control is released,
376  * and the contract type's free entry point is invoked for any
377  * type-specific deconstruction and to (presumably) free the object.
378  */
379 void
380 contract_rele(contract_t *ct)
381 {
382 	uint64_t nref;
383 
384 	mutex_enter(&ct->ct_reflock);
385 	ASSERT(ct->ct_ref > 0);
386 	nref = --ct->ct_ref;
387 	mutex_exit(&ct->ct_reflock);
388 	if (nref == 0) {
389 		/*
390 		 * ct_owner is cleared when it drops its reference.
391 		 */
392 		ASSERT(ct->ct_owner == NULL);
393 		ASSERT(ct->ct_evcnt == 0);
394 
395 		/*
396 		 * Remove from global contract AVL
397 		 */
398 		mutex_enter(&contract_lock);
399 		avl_remove(&contract_avl, ct);
400 		mutex_exit(&contract_lock);
401 
402 		/*
403 		 * Remove from type AVL
404 		 */
405 		mutex_enter(&ct->ct_type->ct_type_lock);
406 		avl_remove(&ct->ct_type->ct_type_avl, ct);
407 		mutex_exit(&ct->ct_type->ct_type_lock);
408 
409 		/*
410 		 * Release the contract's ID
411 		 */
412 		id_free(contract_ids, ct->ct_id);
413 
414 		/*
415 		 * Release project hold
416 		 */
417 		mutex_enter(&contract_lock);
418 		ct->ct_proj->kpj_data.kpd_contract--;
419 		project_rele(ct->ct_proj);
420 		mutex_exit(&contract_lock);
421 
422 		/*
423 		 * Free the contract
424 		 */
425 		contract_dtor(ct);
426 		ct->ct_type->ct_type_ops->contop_free(ct);
427 	}
428 }
429 
430 /*
431  * contract_hold
432  *
433  * Adds a reference to a contract
434  */
435 void
436 contract_hold(contract_t *ct)
437 {
438 	mutex_enter(&ct->ct_reflock);
439 	ASSERT(ct->ct_ref < UINT64_MAX);
440 	ct->ct_ref++;
441 	mutex_exit(&ct->ct_reflock);
442 }
443 
444 /*
445  * contract_getzuniqid
446  *
447  * Get a contract's zone unique ID.  Needed because 64-bit reads and
448  * writes aren't atomic on x86.  Since there are contexts where we are
449  * unable to take ct_lock, we instead use ct_reflock; in actuality any
450  * lock would do.
451  */
452 uint64_t
453 contract_getzuniqid(contract_t *ct)
454 {
455 	uint64_t zuniqid;
456 
457 	mutex_enter(&ct->ct_reflock);
458 	zuniqid = ct->ct_mzuniqid;
459 	mutex_exit(&ct->ct_reflock);
460 
461 	return (zuniqid);
462 }
463 
464 /*
465  * contract_setzuniqid
466  *
467  * Sets a contract's zone unique ID.   See contract_getzuniqid.
468  */
469 void
470 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
471 {
472 	mutex_enter(&ct->ct_reflock);
473 	ct->ct_mzuniqid = zuniqid;
474 	mutex_exit(&ct->ct_reflock);
475 }
476 
477 /*
478  * contract_abandon
479  *
480  * Abandons the specified contract.  If "explicit" is clear, the
481  * contract was implicitly abandoned (by process exit) and should be
482  * inherited if its terms allow it and its owner was a member of a
483  * regent contract.  Otherwise, the contract type's abandon entry point
484  * is invoked to either destroy or orphan the contract.
485  */
486 int
487 contract_abandon(contract_t *ct, proc_t *p, int explicit)
488 {
489 	ct_equeue_t *q = NULL;
490 	contract_t *parent = &p->p_ct_process->conp_contract;
491 	int inherit = 0;
492 
493 	ASSERT(p == curproc);
494 
495 	mutex_enter(&ct->ct_lock);
496 
497 	/*
498 	 * Multiple contract locks are taken contract -> subcontract.
499 	 * Check if the contract will be inherited so we can acquire
500 	 * all the necessary locks before making sensitive changes.
501 	 */
502 	if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
503 	    contract_process_accept(parent)) {
504 		mutex_exit(&ct->ct_lock);
505 		mutex_enter(&parent->ct_lock);
506 		mutex_enter(&ct->ct_lock);
507 		inherit = 1;
508 	}
509 
510 	if (ct->ct_owner != p) {
511 		mutex_exit(&ct->ct_lock);
512 		if (inherit)
513 			mutex_exit(&parent->ct_lock);
514 		return (EINVAL);
515 	}
516 
517 	mutex_enter(&p->p_lock);
518 	if (explicit)
519 		avl_remove(&p->p_ct_held, ct);
520 	ct->ct_owner = NULL;
521 	mutex_exit(&p->p_lock);
522 
523 	/*
524 	 * Since we can't call cte_trim with the contract lock held,
525 	 * we grab the queue pointer here.
526 	 */
527 	if (p->p_ct_equeue)
528 		q = p->p_ct_equeue[ct->ct_type->ct_type_index];
529 
530 	/*
531 	 * contop_abandon may destroy the contract so we rely on it to
532 	 * drop ct_lock.  We retain a reference on the contract so that
533 	 * the cte_trim which follows functions properly.  Even though
534 	 * cte_trim doesn't dereference the contract pointer, it is
535 	 * still necessary to retain a reference to the contract so
536 	 * that we don't trim events which are sent by a subsequently
537 	 * allocated contract infortuitously located at the same address.
538 	 */
539 	contract_hold(ct);
540 
541 	if (inherit) {
542 		ct->ct_state = CTS_INHERITED;
543 		ASSERT(ct->ct_regent == parent);
544 		contract_process_take(parent, ct);
545 
546 		/*
547 		 * We are handing off the process's reference to the
548 		 * parent contract.  For this reason, the order in
549 		 * which we drop the contract locks is also important.
550 		 */
551 		mutex_exit(&ct->ct_lock);
552 		mutex_exit(&parent->ct_lock);
553 	} else {
554 		ct->ct_regent = NULL;
555 		ct->ct_type->ct_type_ops->contop_abandon(ct);
556 	}
557 
558 	/*
559 	 * ct_lock has been dropped; we can safely trim the event
560 	 * queue now.
561 	 */
562 	if (q) {
563 		mutex_enter(&q->ctq_lock);
564 		cte_trim(q, ct);
565 		mutex_exit(&q->ctq_lock);
566 	}
567 
568 	contract_rele(ct);
569 
570 	return (0);
571 }
572 
573 /*
574  * contract_adopt
575  *
576  * Adopts a contract.  After a successful call to this routine, the
577  * previously inherited contract will belong to the calling process,
578  * and its events will have been appended to its new owner's process
579  * bundle queue.
580  */
581 int
582 contract_adopt(contract_t *ct, proc_t *p)
583 {
584 	avl_index_t where;
585 	ct_equeue_t *q;
586 	contract_t *parent;
587 
588 	ASSERT(p == curproc);
589 
590 	/*
591 	 * Ensure the process has an event queue.  Checked by ASSERTs
592 	 * below.
593 	 */
594 	(void) contract_type_pbundle(ct->ct_type, p);
595 
596 	mutex_enter(&ct->ct_lock);
597 	parent = ct->ct_regent;
598 	if (ct->ct_state != CTS_INHERITED ||
599 	    &p->p_ct_process->conp_contract != parent ||
600 	    p->p_zone->zone_uniqid != ct->ct_czuniqid) {
601 		mutex_exit(&ct->ct_lock);
602 		return (EINVAL);
603 	}
604 
605 	/*
606 	 * Multiple contract locks are taken contract -> subcontract.
607 	 */
608 	mutex_exit(&ct->ct_lock);
609 	mutex_enter(&parent->ct_lock);
610 	mutex_enter(&ct->ct_lock);
611 
612 	/*
613 	 * It is possible that the contract was adopted by someone else
614 	 * while its lock was dropped.  It isn't possible for the
615 	 * contract to have been inherited by a different regent
616 	 * contract.
617 	 */
618 	if (ct->ct_state != CTS_INHERITED) {
619 		mutex_exit(&parent->ct_lock);
620 		mutex_exit(&ct->ct_lock);
621 		return (EBUSY);
622 	}
623 	ASSERT(ct->ct_regent == parent);
624 
625 	ct->ct_state = CTS_OWNED;
626 
627 	contract_process_adopt(ct, p);
628 
629 	mutex_enter(&p->p_lock);
630 	ct->ct_owner = p;
631 	VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
632 	avl_insert(&p->p_ct_held, ct, where);
633 	mutex_exit(&p->p_lock);
634 
635 	ASSERT(ct->ct_owner->p_ct_equeue);
636 	ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
637 	q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
638 	cte_copy(&ct->ct_events, q);
639 	mutex_exit(&ct->ct_lock);
640 
641 	return (0);
642 }
643 
644 /*
645  * contract_ack
646  *
647  * Acknowledges receipt of a critical event.
648  */
649 int
650 contract_ack(contract_t *ct, uint64_t evid)
651 {
652 	ct_kevent_t *ev;
653 	list_t *queue = &ct->ct_events.ctq_events;
654 	int error = ESRCH;
655 
656 	mutex_enter(&ct->ct_lock);
657 	mutex_enter(&ct->ct_events.ctq_lock);
658 	/*
659 	 * We are probably ACKing something near the head of the queue.
660 	 */
661 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
662 		if (ev->cte_id == evid) {
663 			if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
664 				ev->cte_flags |= CTE_ACK;
665 				ct->ct_evcnt--;
666 				error = 0;
667 			}
668 			break;
669 		}
670 	}
671 	mutex_exit(&ct->ct_events.ctq_lock);
672 	mutex_exit(&ct->ct_lock);
673 
674 	return (error);
675 }
676 
677 /*
678  * contract_orphan
679  *
680  * Icky-poo.  This is a process-contract special, used to ACK all
681  * critical messages when a contract is orphaned.
682  */
683 void
684 contract_orphan(contract_t *ct)
685 {
686 	ct_kevent_t *ev;
687 	list_t *queue = &ct->ct_events.ctq_events;
688 
689 	ASSERT(MUTEX_HELD(&ct->ct_lock));
690 	ASSERT(ct->ct_state != CTS_ORPHAN);
691 
692 	mutex_enter(&ct->ct_events.ctq_lock);
693 	ct->ct_state = CTS_ORPHAN;
694 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
695 		if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
696 			ev->cte_flags |= CTE_ACK;
697 			ct->ct_evcnt--;
698 		}
699 	}
700 	mutex_exit(&ct->ct_events.ctq_lock);
701 
702 	ASSERT(ct->ct_evcnt == 0);
703 }
704 
705 /*
706  * contract_destroy
707  *
708  * Explicit contract destruction.  Called when contract is empty.
709  * The contract will actually stick around until all of its events are
710  * removed from the bundle and and process bundle queues, and all fds
711  * which refer to it are closed.  See contract_dtor if you are looking
712  * for what destroys the contract structure.
713  */
714 void
715 contract_destroy(contract_t *ct)
716 {
717 	ASSERT(MUTEX_HELD(&ct->ct_lock));
718 	ASSERT(ct->ct_state != CTS_DEAD);
719 	ASSERT(ct->ct_owner == NULL);
720 
721 	ct->ct_state = CTS_DEAD;
722 	cte_queue_drain(&ct->ct_events, 1);
723 	mutex_exit(&ct->ct_lock);
724 	mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
725 	cte_trim(&ct->ct_type->ct_type_events, ct);
726 	mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
727 	mutex_enter(&ct->ct_lock);
728 	ct->ct_type->ct_type_ops->contop_destroy(ct);
729 	mutex_exit(&ct->ct_lock);
730 	contract_rele(ct);
731 }
732 
733 /*
734  * contract_vnode_get
735  *
736  * Obtains the contract directory vnode for this contract, if there is
737  * one.  The caller must VN_RELE the vnode when they are through using
738  * it.
739  */
740 vnode_t *
741 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
742 {
743 	contract_vnode_t *ctv;
744 	vnode_t *vp = NULL;
745 
746 	mutex_enter(&ct->ct_lock);
747 	for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
748 	    ctv = list_next(&ct->ct_vnodes, ctv))
749 		if (ctv->ctv_vnode->v_vfsp == vfsp) {
750 			vp = ctv->ctv_vnode;
751 			VN_HOLD(vp);
752 			break;
753 		}
754 	mutex_exit(&ct->ct_lock);
755 	return (vp);
756 }
757 
758 /*
759  * contract_vnode_set
760  *
761  * Sets the contract directory vnode for this contract.  We don't hold
762  * a reference on the vnode because we don't want to prevent it from
763  * being freed.  The vnode's inactive entry point will take care of
764  * notifying us when it should be removed.
765  */
766 void
767 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
768 {
769 	mutex_enter(&ct->ct_lock);
770 	ctv->ctv_vnode = vnode;
771 	list_insert_head(&ct->ct_vnodes, ctv);
772 	mutex_exit(&ct->ct_lock);
773 }
774 
775 /*
776  * contract_vnode_clear
777  *
778  * Removes this vnode as the contract directory vnode for this
779  * contract.  Called from a contract directory's inactive entry point,
780  * this may return 0 indicating that the vnode gained another reference
781  * because of a simultaneous call to contract_vnode_get.
782  */
783 int
784 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
785 {
786 	vnode_t *vp = ctv->ctv_vnode;
787 	int result;
788 
789 	mutex_enter(&ct->ct_lock);
790 	mutex_enter(&vp->v_lock);
791 	if (vp->v_count == 1) {
792 		list_remove(&ct->ct_vnodes, ctv);
793 		result = 1;
794 	} else {
795 		vp->v_count--;
796 		result = 0;
797 	}
798 	mutex_exit(&vp->v_lock);
799 	mutex_exit(&ct->ct_lock);
800 
801 	return (result);
802 }
803 
804 /*
805  * contract_exit
806  *
807  * Abandons all contracts held by process p, and drains process p's
808  * bundle queues.  Called on process exit.
809  */
810 void
811 contract_exit(proc_t *p)
812 {
813 	contract_t *ct;
814 	void *cookie = NULL;
815 	int i;
816 
817 	ASSERT(p == curproc);
818 
819 	/*
820 	 * Abandon held contracts.  contract_abandon knows enough not
821 	 * to remove the contract from the list a second time.  We are
822 	 * exiting, so no locks are needed here.  But because
823 	 * contract_abandon will take p_lock, we need to make sure we
824 	 * aren't holding it.
825 	 */
826 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
827 	while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
828 		VERIFY(contract_abandon(ct, p, 0) == 0);
829 
830 	/*
831 	 * Drain pbundles.  Because a process bundle queue could have
832 	 * been passed to another process, they may not be freed right
833 	 * away.
834 	 */
835 	if (p->p_ct_equeue) {
836 		for (i = 0; i < CTT_MAXTYPE; i++)
837 			if (p->p_ct_equeue[i])
838 				cte_queue_drain(p->p_ct_equeue[i], 0);
839 		kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
840 	}
841 }
842 
843 /*
844  * contract_status_common
845  *
846  * Populates a ct_status structure.  Used by contract types in their
847  * status entry points and ctfs when only common information is
848  * requested.
849  */
850 void
851 contract_status_common(contract_t *ct, zone_t *zone, void *status,
852     model_t model)
853 {
854 	STRUCT_HANDLE(ct_status, lstatus);
855 
856 	STRUCT_SET_HANDLE(lstatus, model, status);
857 	ASSERT(MUTEX_HELD(&ct->ct_lock));
858 	if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
859 	    zone->zone_uniqid == ct->ct_czuniqid) {
860 		zone_t *czone;
861 		zoneid_t zoneid = -1;
862 
863 		/*
864 		 * Contracts don't have holds on the zones they were
865 		 * created by.  If the contract's zone no longer
866 		 * exists, we say its zoneid is -1.
867 		 */
868 		if (zone->zone_uniqid == ct->ct_czuniqid ||
869 		    ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
870 			zoneid = ct->ct_zoneid;
871 		} else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
872 			if (czone->zone_uniqid == ct->ct_mzuniqid)
873 				zoneid = ct->ct_zoneid;
874 			zone_rele(czone);
875 		}
876 
877 		STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
878 		STRUCT_FSET(lstatus, ctst_holder,
879 		    (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
880 		    (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
881 		STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
882 	} else {
883 		/*
884 		 * We are looking at a contract which was created by a
885 		 * process outside of our zone.  We provide fake zone,
886 		 * holder, and state information.
887 		 */
888 
889 		STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
890 		/*
891 		 * Since "zone" can't disappear until the calling ctfs
892 		 * is unmounted, zone_zsched must be valid.
893 		 */
894 		STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
895 		    zone->zone_zsched->p_pid : 0);
896 		STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
897 		    CTS_OWNED : ct->ct_state);
898 	}
899 	STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
900 	STRUCT_FSET(lstatus, ctst_ntime, -1);
901 	STRUCT_FSET(lstatus, ctst_qtime, -1);
902 	STRUCT_FSET(lstatus, ctst_nevid,
903 	    ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
904 	STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
905 	STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
906 	STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
907 	STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
908 	STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
909 }
910 
911 /*
912  * contract_checkcred
913  *
914  * Determines if the specified contract is owned by a process with the
915  * same effective uid as the specified credential.  The caller must
916  * ensure that the uid spaces are the same.  Returns 1 on success.
917  */
918 static int
919 contract_checkcred(contract_t *ct, const cred_t *cr)
920 {
921 	proc_t *p;
922 	int fail = 1;
923 
924 	mutex_enter(&ct->ct_lock);
925 	if ((p = ct->ct_owner) != NULL) {
926 		mutex_enter(&p->p_crlock);
927 		fail = crgetuid(cr) != crgetuid(p->p_cred);
928 		mutex_exit(&p->p_crlock);
929 	}
930 	mutex_exit(&ct->ct_lock);
931 
932 	return (!fail);
933 }
934 
935 /*
936  * contract_owned
937  *
938  * Determines if the specified credential can view an event generated
939  * by the specified contract.  If locked is set, the contract's ct_lock
940  * is held and the caller will need to do additional work to determine
941  * if they truly can see the event.  Returns 1 on success.
942  */
943 int
944 contract_owned(contract_t *ct, const cred_t *cr, int locked)
945 {
946 	int owner, cmatch, zmatch;
947 	uint64_t zuniqid, mzuniqid;
948 	uid_t euid;
949 
950 	ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
951 
952 	zuniqid = curproc->p_zone->zone_uniqid;
953 	mzuniqid = contract_getzuniqid(ct);
954 	euid = crgetuid(cr);
955 
956 	/*
957 	 * owner: we own the contract
958 	 * cmatch: we are in the creator's (and holder's) zone and our
959 	 *   uid matches the creator's or holder's
960 	 * zmatch: we are in the effective zone of a contract created
961 	 *   in the global zone, and our uid matches that of the
962 	 *   virtualized holder's (zsched/kcred)
963 	 */
964 	owner = (ct->ct_owner == curproc);
965 	cmatch = (zuniqid == ct->ct_czuniqid) &&
966 	    ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
967 	zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
968 	    (crgetuid(kcred) == euid);
969 
970 	return (owner || cmatch || zmatch);
971 }
972 
973 
974 /*
975  * contract_type_init
976  *
977  * Called by contract types to register themselves with the contracts
978  * framework.
979  */
980 ct_type_t *
981 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
982     ct_f_default_t *dfault)
983 {
984 	ct_type_t *result;
985 
986 	ASSERT(type < CTT_MAXTYPE);
987 
988 	result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
989 
990 	mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
991 	avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
992 	    offsetof(contract_t, ct_cttavl));
993 	cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
994 	result->ct_type_name = name;
995 	result->ct_type_ops = ops;
996 	result->ct_type_default = dfault;
997 	result->ct_type_evid = 0;
998 	gethrestime(&result->ct_type_timestruc);
999 	result->ct_type_index = type;
1000 
1001 	ct_types[type] = result;
1002 
1003 	return (result);
1004 }
1005 
1006 /*
1007  * contract_type_count
1008  *
1009  * Obtains the number of contracts of a particular type.
1010  */
1011 int
1012 contract_type_count(ct_type_t *type)
1013 {
1014 	ulong_t count;
1015 
1016 	mutex_enter(&type->ct_type_lock);
1017 	count = avl_numnodes(&type->ct_type_avl);
1018 	mutex_exit(&type->ct_type_lock);
1019 
1020 	return (count);
1021 }
1022 
1023 /*
1024  * contract_type_max
1025  *
1026  * Obtains the maximum contract id of of a particular type.
1027  */
1028 ctid_t
1029 contract_type_max(ct_type_t *type)
1030 {
1031 	contract_t *ct;
1032 	ctid_t res;
1033 
1034 	mutex_enter(&type->ct_type_lock);
1035 	ct = avl_last(&type->ct_type_avl);
1036 	res = ct ? ct->ct_id : -1;
1037 	mutex_exit(&type->ct_type_lock);
1038 
1039 	return (res);
1040 }
1041 
1042 /*
1043  * contract_max
1044  *
1045  * Obtains the maximum contract id.
1046  */
1047 ctid_t
1048 contract_max(void)
1049 {
1050 	contract_t *ct;
1051 	ctid_t res;
1052 
1053 	mutex_enter(&contract_lock);
1054 	ct = avl_last(&contract_avl);
1055 	res = ct ? ct->ct_id : -1;
1056 	mutex_exit(&contract_lock);
1057 
1058 	return (res);
1059 }
1060 
1061 /*
1062  * contract_lookup_common
1063  *
1064  * Common code for contract_lookup and contract_type_lookup.  Takes a
1065  * pointer to an AVL tree to search in.  Should be called with the
1066  * appropriate tree-protecting lock held (unfortunately unassertable).
1067  */
1068 static ctid_t
1069 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1070 {
1071 	contract_t template, *ct;
1072 	avl_index_t where;
1073 	ctid_t res;
1074 
1075 	template.ct_id = current;
1076 	ct = avl_find(tree, &template, &where);
1077 	if (ct == NULL)
1078 		ct = avl_nearest(tree, where, AVL_AFTER);
1079 	if (zuniqid != GLOBAL_ZONEUNIQID)
1080 		while (ct && (contract_getzuniqid(ct) != zuniqid))
1081 			ct = AVL_NEXT(tree, ct);
1082 	res = ct ? ct->ct_id : -1;
1083 
1084 	return (res);
1085 }
1086 
1087 /*
1088  * contract_type_lookup
1089  *
1090  * Returns the next type contract after the specified id, visible from
1091  * the specified zone.
1092  */
1093 ctid_t
1094 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1095 {
1096 	ctid_t res;
1097 
1098 	mutex_enter(&type->ct_type_lock);
1099 	res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1100 	mutex_exit(&type->ct_type_lock);
1101 
1102 	return (res);
1103 }
1104 
1105 /*
1106  * contract_lookup
1107  *
1108  * Returns the next contract after the specified id, visible from the
1109  * specified zone.
1110  */
1111 ctid_t
1112 contract_lookup(uint64_t zuniqid, ctid_t current)
1113 {
1114 	ctid_t res;
1115 
1116 	mutex_enter(&contract_lock);
1117 	res = contract_lookup_common(&contract_avl, zuniqid, current);
1118 	mutex_exit(&contract_lock);
1119 
1120 	return (res);
1121 }
1122 
1123 /*
1124  * contract_plookup
1125  *
1126  * Returns the next contract held by process p after the specified id,
1127  * visible from the specified zone.  Made complicated by the fact that
1128  * contracts visible in a zone but held by processes outside of the
1129  * zone need to appear as being held by zsched to zone members.
1130  */
1131 ctid_t
1132 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1133 {
1134 	contract_t template, *ct;
1135 	avl_index_t where;
1136 	ctid_t res;
1137 
1138 	template.ct_id = current;
1139 	if (zuniqid != GLOBAL_ZONEUNIQID &&
1140 	    (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1141 		/* This is inelegant. */
1142 		mutex_enter(&contract_lock);
1143 		ct = avl_find(&contract_avl, &template, &where);
1144 		if (ct == NULL)
1145 			ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1146 		while (ct && !(ct->ct_state < CTS_ORPHAN &&
1147 		    contract_getzuniqid(ct) == zuniqid &&
1148 		    ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1149 			ct = AVL_NEXT(&contract_avl, ct);
1150 		res = ct ? ct->ct_id : -1;
1151 		mutex_exit(&contract_lock);
1152 	} else {
1153 		mutex_enter(&p->p_lock);
1154 		ct = avl_find(&p->p_ct_held, &template, &where);
1155 		if (ct == NULL)
1156 			ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1157 		res = ct ? ct->ct_id : -1;
1158 		mutex_exit(&p->p_lock);
1159 	}
1160 
1161 	return (res);
1162 }
1163 
1164 /*
1165  * contract_ptr_common
1166  *
1167  * Common code for contract_ptr and contract_type_ptr.  Takes a pointer
1168  * to an AVL tree to search in.  Should be called with the appropriate
1169  * tree-protecting lock held (unfortunately unassertable).
1170  */
1171 static contract_t *
1172 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1173 {
1174 	contract_t template, *ct;
1175 
1176 	template.ct_id = id;
1177 	ct = avl_find(tree, &template, NULL);
1178 	if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1179 	    contract_getzuniqid(ct) != zuniqid)) {
1180 		return (NULL);
1181 	}
1182 
1183 	/*
1184 	 * Check to see if a thread is in the window in contract_rele
1185 	 * between dropping the reference count and removing the
1186 	 * contract from the type AVL.
1187 	 */
1188 	mutex_enter(&ct->ct_reflock);
1189 	if (ct->ct_ref) {
1190 		ct->ct_ref++;
1191 		mutex_exit(&ct->ct_reflock);
1192 	} else {
1193 		mutex_exit(&ct->ct_reflock);
1194 		ct = NULL;
1195 	}
1196 
1197 	return (ct);
1198 }
1199 
1200 /*
1201  * contract_type_ptr
1202  *
1203  * Returns a pointer to the contract with the specified id.  The
1204  * contract is held, so the caller needs to release the reference when
1205  * it is through with the contract.
1206  */
1207 contract_t *
1208 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1209 {
1210 	contract_t *ct;
1211 
1212 	mutex_enter(&type->ct_type_lock);
1213 	ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1214 	mutex_exit(&type->ct_type_lock);
1215 
1216 	return (ct);
1217 }
1218 
1219 /*
1220  * contract_ptr
1221  *
1222  * Returns a pointer to the contract with the specified id.  The
1223  * contract is held, so the caller needs to release the reference when
1224  * it is through with the contract.
1225  */
1226 contract_t *
1227 contract_ptr(ctid_t id, uint64_t zuniqid)
1228 {
1229 	contract_t *ct;
1230 
1231 	mutex_enter(&contract_lock);
1232 	ct = contract_ptr_common(&contract_avl, id, zuniqid);
1233 	mutex_exit(&contract_lock);
1234 
1235 	return (ct);
1236 }
1237 
1238 /*
1239  * contract_type_time
1240  *
1241  * Obtains the last time a contract of a particular type was created.
1242  */
1243 void
1244 contract_type_time(ct_type_t *type, timestruc_t *time)
1245 {
1246 	mutex_enter(&type->ct_type_lock);
1247 	*time = type->ct_type_timestruc;
1248 	mutex_exit(&type->ct_type_lock);
1249 }
1250 
1251 /*
1252  * contract_type_bundle
1253  *
1254  * Obtains a type's bundle queue.
1255  */
1256 ct_equeue_t *
1257 contract_type_bundle(ct_type_t *type)
1258 {
1259 	return (&type->ct_type_events);
1260 }
1261 
1262 /*
1263  * contract_type_pbundle
1264  *
1265  * Obtain's a process's bundle queue.  If one doesn't exist, one is
1266  * created.  Often used simply to ensure that a bundle queue is
1267  * allocated.
1268  */
1269 ct_equeue_t *
1270 contract_type_pbundle(ct_type_t *type, proc_t *pp)
1271 {
1272 	/*
1273 	 * If there isn't an array of bundle queues, allocate one.
1274 	 */
1275 	if (pp->p_ct_equeue == NULL) {
1276 		size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1277 		ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1278 
1279 		mutex_enter(&pp->p_lock);
1280 		if (pp->p_ct_equeue)
1281 			kmem_free(qa, size);
1282 		else
1283 			pp->p_ct_equeue = qa;
1284 		mutex_exit(&pp->p_lock);
1285 	}
1286 
1287 	/*
1288 	 * If there isn't a bundle queue of the required type, allocate
1289 	 * one.
1290 	 */
1291 	if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1292 		ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1293 		cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1294 
1295 		mutex_enter(&pp->p_lock);
1296 		if (pp->p_ct_equeue[type->ct_type_index])
1297 			cte_queue_drain(q, 0);
1298 		else
1299 			pp->p_ct_equeue[type->ct_type_index] = q;
1300 		mutex_exit(&pp->p_lock);
1301 	}
1302 
1303 	return (pp->p_ct_equeue[type->ct_type_index]);
1304 }
1305 
1306 /*
1307  * ctmpl_free
1308  *
1309  * Frees a template.
1310  */
1311 void
1312 ctmpl_free(ct_template_t *template)
1313 {
1314 	mutex_destroy(&template->ctmpl_lock);
1315 	template->ctmpl_ops->ctop_free(template);
1316 }
1317 
1318 /*
1319  * ctmpl_dup
1320  *
1321  * Creates a copy of a template.
1322  */
1323 ct_template_t *
1324 ctmpl_dup(ct_template_t *template)
1325 {
1326 	ct_template_t *new;
1327 
1328 	if (template == NULL)
1329 		return (NULL);
1330 
1331 	new = template->ctmpl_ops->ctop_dup(template);
1332 	/*
1333 	 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1334 	 * should have remain held until now.
1335 	 */
1336 	mutex_exit(&template->ctmpl_lock);
1337 
1338 	return (new);
1339 }
1340 
1341 /*
1342  * ctmpl_set
1343  *
1344  * Sets the requested terms of a template.
1345  */
1346 int
1347 ctmpl_set(ct_template_t *template, ct_param_t *param, const cred_t *cr)
1348 {
1349 	int result = 0;
1350 
1351 	mutex_enter(&template->ctmpl_lock);
1352 	switch (param->ctpm_id) {
1353 	case CTP_COOKIE:
1354 		template->ctmpl_cookie = param->ctpm_value;
1355 		break;
1356 	case CTP_EV_INFO:
1357 		if (param->ctpm_value &
1358 		    ~(uint64_t)template->ctmpl_ops->allevents)
1359 			result = EINVAL;
1360 		else
1361 			template->ctmpl_ev_info = param->ctpm_value;
1362 		break;
1363 	case CTP_EV_CRITICAL:
1364 		if (param->ctpm_value &
1365 		    ~(uint64_t)template->ctmpl_ops->allevents) {
1366 			result = EINVAL;
1367 			break;
1368 		} else if ((~template->ctmpl_ev_crit &
1369 		    param->ctpm_value) == 0) {
1370 			/*
1371 			 * Assume that a pure reduction of the critical
1372 			 * set is allowed by the contract type.
1373 			 */
1374 			template->ctmpl_ev_crit = param->ctpm_value;
1375 			break;
1376 		}
1377 		/*
1378 		 * There may be restrictions on what we can make
1379 		 * critical, so we defer to the judgement of the
1380 		 * contract type.
1381 		 */
1382 		/* FALLTHROUGH */
1383 	default:
1384 		result = template->ctmpl_ops->ctop_set(template, param, cr);
1385 	}
1386 	mutex_exit(&template->ctmpl_lock);
1387 
1388 	return (result);
1389 }
1390 
1391 /*
1392  * ctmpl_get
1393  *
1394  * Obtains the requested terms from a template.
1395  */
1396 int
1397 ctmpl_get(ct_template_t *template, ct_param_t *param)
1398 {
1399 	int result = 0;
1400 
1401 	mutex_enter(&template->ctmpl_lock);
1402 	switch (param->ctpm_id) {
1403 	case CTP_COOKIE:
1404 		param->ctpm_value = template->ctmpl_cookie;
1405 		break;
1406 	case CTP_EV_INFO:
1407 		param->ctpm_value = template->ctmpl_ev_info;
1408 		break;
1409 	case CTP_EV_CRITICAL:
1410 		param->ctpm_value = template->ctmpl_ev_crit;
1411 		break;
1412 	default:
1413 		result = template->ctmpl_ops->ctop_get(template, param);
1414 	}
1415 	mutex_exit(&template->ctmpl_lock);
1416 
1417 	return (result);
1418 }
1419 
1420 /*
1421  * ctmpl_makecurrent
1422  *
1423  * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1424  * active template.  Frees the old active template, if there was one.
1425  */
1426 static void
1427 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1428 {
1429 	klwp_t *curlwp = ttolwp(curthread);
1430 	proc_t *p = curproc;
1431 	ct_template_t *old;
1432 
1433 	mutex_enter(&p->p_lock);
1434 	old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1435 	curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1436 	mutex_exit(&p->p_lock);
1437 
1438 	if (old)
1439 		ctmpl_free(old);
1440 }
1441 
1442 /*
1443  * ctmpl_activate
1444  *
1445  * Copy the specified template as the current thread's activate
1446  * template of that type.
1447  */
1448 void
1449 ctmpl_activate(ct_template_t *template)
1450 {
1451 	ctmpl_makecurrent(template, ctmpl_dup(template));
1452 }
1453 
1454 /*
1455  * ctmpl_clear
1456  *
1457  * Clears the current thread's activate template of the same type as
1458  * the specified template.
1459  */
1460 void
1461 ctmpl_clear(ct_template_t *template)
1462 {
1463 	ctmpl_makecurrent(template, NULL);
1464 }
1465 
1466 /*
1467  * ctmpl_create
1468  *
1469  * Creates a new contract using the specified template.
1470  */
1471 int
1472 ctmpl_create(ct_template_t *template)
1473 {
1474 	return (template->ctmpl_ops->ctop_create(template));
1475 }
1476 
1477 /*
1478  * ctmpl_init
1479  *
1480  * Initializes the common portion of a new contract template.
1481  */
1482 void
1483 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1484 {
1485 	mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1486 	new->ctmpl_ops = ops;
1487 	new->ctmpl_type = type;
1488 	new->ctmpl_data = data;
1489 	new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1490 	new->ctmpl_cookie = 0;
1491 }
1492 
1493 /*
1494  * ctmpl_copy
1495  *
1496  * Copies the common portions of a contract template.  Intended for use
1497  * by a contract type's ctop_dup template op.  Returns with the old
1498  * template's lock held, which will should remain held until the
1499  * template op returns (it is dropped by ctmpl_dup).
1500  */
1501 void
1502 ctmpl_copy(ct_template_t *new, ct_template_t *old)
1503 {
1504 	mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1505 	mutex_enter(&old->ctmpl_lock);
1506 	new->ctmpl_ops = old->ctmpl_ops;
1507 	new->ctmpl_type = old->ctmpl_type;
1508 	new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1509 	new->ctmpl_ev_info = old->ctmpl_ev_info;
1510 	new->ctmpl_cookie = old->ctmpl_cookie;
1511 }
1512 
1513 /*
1514  * ctmpl_create_inval
1515  *
1516  * Returns EINVAL.  Provided for the convenience of those contract
1517  * types which don't support ct_tmpl_create(3contract) and would
1518  * otherwise need to create their own stub for the ctop_create template
1519  * op.
1520  */
1521 /*ARGSUSED*/
1522 int
1523 ctmpl_create_inval(ct_template_t *template)
1524 {
1525 	return (EINVAL);
1526 }
1527 
1528 
1529 /*
1530  * cte_queue_create
1531  *
1532  * Initializes a queue of a particular type.  If dynamic is set, the
1533  * queue is to be freed when its last listener is removed after being
1534  * drained.
1535  */
1536 static void
1537 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1538 {
1539 	mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1540 	q->ctq_listno = list;
1541 	list_create(&q->ctq_events, sizeof (ct_kevent_t),
1542 	    offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1543 	list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1544 	    offsetof(ct_listener_t, ctl_allnode));
1545 	list_create(&q->ctq_tail, sizeof (ct_listener_t),
1546 	    offsetof(ct_listener_t, ctl_tailnode));
1547 	gethrestime(&q->ctq_atime);
1548 	q->ctq_nlisteners = 0;
1549 	q->ctq_nreliable = 0;
1550 	q->ctq_ninf = 0;
1551 	q->ctq_max = maxinf;
1552 
1553 	/*
1554 	 * Bundle queues and contract queues are embedded in other
1555 	 * structures and are implicitly referenced counted by virtue
1556 	 * of their vnodes' indirect hold on their contracts.  Process
1557 	 * bundle queues are dynamically allocated and may persist
1558 	 * after the death of the process, so they must be explicitly
1559 	 * reference counted.
1560 	 */
1561 	q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1562 }
1563 
1564 /*
1565  * cte_queue_destroy
1566  *
1567  * Destroys the specified queue.  The queue is freed if referenced
1568  * counted.
1569  */
1570 static void
1571 cte_queue_destroy(ct_equeue_t *q)
1572 {
1573 	ASSERT(q->ctq_flags & CTQ_DEAD);
1574 	ASSERT(q->ctq_nlisteners == 0);
1575 	ASSERT(q->ctq_nreliable == 0);
1576 	list_destroy(&q->ctq_events);
1577 	list_destroy(&q->ctq_listeners);
1578 	list_destroy(&q->ctq_tail);
1579 	mutex_destroy(&q->ctq_lock);
1580 	if (q->ctq_flags & CTQ_REFFED)
1581 		kmem_free(q, sizeof (ct_equeue_t));
1582 }
1583 
1584 /*
1585  * cte_hold
1586  *
1587  * Takes a hold on the specified event.
1588  */
1589 static void
1590 cte_hold(ct_kevent_t *e)
1591 {
1592 	mutex_enter(&e->cte_lock);
1593 	ASSERT(e->cte_refs > 0);
1594 	e->cte_refs++;
1595 	mutex_exit(&e->cte_lock);
1596 }
1597 
1598 /*
1599  * cte_rele
1600  *
1601  * Releases a hold on the specified event.  If the caller had the last
1602  * reference, frees the event and releases its hold on the contract
1603  * that generated it.
1604  */
1605 static void
1606 cte_rele(ct_kevent_t *e)
1607 {
1608 	mutex_enter(&e->cte_lock);
1609 	ASSERT(e->cte_refs > 0);
1610 	if (--e->cte_refs) {
1611 		mutex_exit(&e->cte_lock);
1612 		return;
1613 	}
1614 
1615 	contract_rele(e->cte_contract);
1616 
1617 	mutex_destroy(&e->cte_lock);
1618 	if (e->cte_data)
1619 		nvlist_free(e->cte_data);
1620 	if (e->cte_gdata)
1621 		nvlist_free(e->cte_gdata);
1622 	kmem_free(e, sizeof (ct_kevent_t));
1623 }
1624 
1625 /*
1626  * cte_qrele
1627  *
1628  * Remove this listener's hold on the specified event, removing and
1629  * releasing the queue's hold on the event if appropriate.
1630  */
1631 static void
1632 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1633 {
1634 	ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1635 
1636 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1637 
1638 	if (l->ctl_flags & CTLF_RELIABLE)
1639 		member->ctm_nreliable--;
1640 	if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1641 		member->ctm_trimmed = 0;
1642 		list_remove(&q->ctq_events, e);
1643 		cte_rele(e);
1644 	}
1645 }
1646 
1647 /*
1648  * cte_qmove
1649  *
1650  * Move this listener to the specified event in the queue.
1651  */
1652 static ct_kevent_t *
1653 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1654 {
1655 	ct_kevent_t *olde;
1656 
1657 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1658 	ASSERT(l->ctl_equeue == q);
1659 
1660 	if ((olde = l->ctl_position) == NULL)
1661 		list_remove(&q->ctq_tail, l);
1662 
1663 	while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1664 		e = list_next(&q->ctq_events, e);
1665 
1666 	if (e != NULL) {
1667 		e->cte_nodes[q->ctq_listno].ctm_refs++;
1668 		if (l->ctl_flags & CTLF_RELIABLE)
1669 			e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1670 	} else {
1671 		list_insert_tail(&q->ctq_tail, l);
1672 	}
1673 
1674 	l->ctl_position = e;
1675 	if (olde)
1676 		cte_qrele(q, l, olde);
1677 
1678 	return (e);
1679 }
1680 
1681 /*
1682  * cte_checkcred
1683  *
1684  * Determines if the specified event's contract is owned by a process
1685  * with the same effective uid as the specified credential.  Called
1686  * after a failed call to contract_owned with locked set.  Because it
1687  * drops the queue lock, its caller (cte_qreadable) needs to make sure
1688  * we're still in the same place after we return.  Returns 1 on
1689  * success.
1690  */
1691 static int
1692 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1693 {
1694 	int result;
1695 	contract_t *ct = e->cte_contract;
1696 
1697 	cte_hold(e);
1698 	mutex_exit(&q->ctq_lock);
1699 	result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1700 	    contract_checkcred(ct, cr);
1701 	mutex_enter(&q->ctq_lock);
1702 	cte_rele(e);
1703 
1704 	return (result);
1705 }
1706 
1707 /*
1708  * cte_qreadable
1709  *
1710  * Ensures that the listener is pointing to a valid event that the
1711  * caller has the credentials to read.  Returns 0 if we can read the
1712  * event we're pointing to.
1713  */
1714 static int
1715 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1716     uint64_t zuniqid, int crit)
1717 {
1718 	ct_kevent_t *e, *next;
1719 	contract_t *ct;
1720 
1721 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1722 	ASSERT(l->ctl_equeue == q);
1723 
1724 	if (l->ctl_flags & CTLF_COPYOUT)
1725 		return (1);
1726 
1727 	next = l->ctl_position;
1728 	while (e = cte_qmove(q, l, next)) {
1729 		ct = e->cte_contract;
1730 		/*
1731 		 * Check obvious things first.  If we are looking for a
1732 		 * critical message, is this one?  If we aren't in the
1733 		 * global zone, is this message meant for us?
1734 		 */
1735 		if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1736 		    (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1737 		    zuniqid != contract_getzuniqid(ct))) {
1738 
1739 			next = list_next(&q->ctq_events, e);
1740 
1741 		/*
1742 		 * Next, see if our effective uid equals that of owner
1743 		 * or author of the contract.  Since we are holding the
1744 		 * queue lock, contract_owned can't always check if we
1745 		 * have the same effective uid as the contract's
1746 		 * owner.  If it comes to that, it fails and we take
1747 		 * the slow(er) path.
1748 		 */
1749 		} else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1750 
1751 			/*
1752 			 * At this point we either don't have any claim
1753 			 * to this contract or we match the effective
1754 			 * uid of the owner but couldn't tell.  We
1755 			 * first test for a NULL holder so that events
1756 			 * from orphans and inherited contracts avoid
1757 			 * the penalty phase.
1758 			 */
1759 			if (e->cte_contract->ct_owner == NULL &&
1760 			    !secpolicy_contract_observer_choice(cr))
1761 				next = list_next(&q->ctq_events, e);
1762 
1763 			/*
1764 			 * cte_checkcred will juggle locks to see if we
1765 			 * have the same uid as the event's contract's
1766 			 * current owner.  If it succeeds, we have to
1767 			 * make sure we are in the same point in the
1768 			 * queue.
1769 			 */
1770 			else if (cte_checkcred(q, e, cr) &&
1771 			    l->ctl_position == e)
1772 				break;
1773 
1774 			/*
1775 			 * cte_checkcred failed; see if we're in the
1776 			 * same place.
1777 			 */
1778 			else if (l->ctl_position == e)
1779 				if (secpolicy_contract_observer_choice(cr))
1780 					break;
1781 				else
1782 					next = list_next(&q->ctq_events, e);
1783 
1784 			/*
1785 			 * cte_checkcred failed, and our position was
1786 			 * changed.  Start from there.
1787 			 */
1788 			else
1789 				next = l->ctl_position;
1790 		} else {
1791 			break;
1792 		}
1793 	}
1794 
1795 	/*
1796 	 * We check for CTLF_COPYOUT again in case we dropped the queue
1797 	 * lock in cte_checkcred.
1798 	 */
1799 	return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
1800 }
1801 
1802 /*
1803  * cte_qwakeup
1804  *
1805  * Wakes up any waiting listeners and points them at the specified event.
1806  */
1807 static void
1808 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
1809 {
1810 	ct_listener_t *l;
1811 
1812 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1813 
1814 	while (l = list_head(&q->ctq_tail)) {
1815 		list_remove(&q->ctq_tail, l);
1816 		e->cte_nodes[q->ctq_listno].ctm_refs++;
1817 		if (l->ctl_flags & CTLF_RELIABLE)
1818 			e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1819 		l->ctl_position = e;
1820 		cv_signal(&l->ctl_cv);
1821 		pollwakeup(&l->ctl_pollhead, POLLIN);
1822 	}
1823 }
1824 
1825 /*
1826  * cte_copy
1827  *
1828  * Copies events from the specified contract event queue to the
1829  * end of the specified process bundle queue.  Only called from
1830  * contract_adopt.
1831  *
1832  * We copy to the end of the target queue instead of mixing the events
1833  * in their proper order because otherwise the act of adopting a
1834  * contract would require a process to reset all process bundle
1835  * listeners it needed to see the new events.  This would, in turn,
1836  * require the process to keep track of which preexisting events had
1837  * already been processed.
1838  */
1839 static void
1840 cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
1841 {
1842 	ct_kevent_t *e, *first = NULL;
1843 
1844 	ASSERT(q->ctq_listno == CTEL_CONTRACT);
1845 	ASSERT(newq->ctq_listno == CTEL_PBUNDLE);
1846 
1847 	mutex_enter(&q->ctq_lock);
1848 	mutex_enter(&newq->ctq_lock);
1849 
1850 	/*
1851 	 * For now, only copy critical events.
1852 	 */
1853 	for (e = list_head(&q->ctq_events); e != NULL;
1854 	    e = list_next(&q->ctq_events, e)) {
1855 		if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
1856 			if (first == NULL)
1857 				first = e;
1858 			list_insert_tail(&newq->ctq_events, e);
1859 			cte_hold(e);
1860 		}
1861 	}
1862 
1863 	mutex_exit(&q->ctq_lock);
1864 
1865 	if (first)
1866 		cte_qwakeup(newq, first);
1867 
1868 	mutex_exit(&newq->ctq_lock);
1869 }
1870 
1871 /*
1872  * cte_trim
1873  *
1874  * Trims unneeded events from an event queue.  Algorithm works as
1875  * follows:
1876  *
1877  *   Removes all informative and acknowledged critical events until the
1878  *   first referenced event is found.
1879  *
1880  *   If a contract is specified, removes all events (regardless of
1881  *   acknowledgement) generated by that contract until the first event
1882  *   referenced by a reliable listener is found.  Reference events are
1883  *   removed by marking them "trimmed".  Such events will be removed
1884  *   when the last reference is dropped and will be skipped by future
1885  *   listeners.
1886  *
1887  * This is pretty basic.  Ideally this should remove from the middle of
1888  * the list (i.e. beyond the first referenced event), and even
1889  * referenced events.
1890  */
1891 static void
1892 cte_trim(ct_equeue_t *q, contract_t *ct)
1893 {
1894 	ct_kevent_t *e, *next;
1895 	int flags, stopper;
1896 	int start = 1;
1897 
1898 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1899 
1900 	for (e = list_head(&q->ctq_events); e != NULL; e = next) {
1901 		next = list_next(&q->ctq_events, e);
1902 		flags = e->cte_flags;
1903 		stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
1904 		    (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
1905 		if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
1906 			if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
1907 			    (e->cte_contract == ct)) {
1908 				/*
1909 				 * Toss informative and ACKed critical messages.
1910 				 */
1911 				list_remove(&q->ctq_events, e);
1912 				cte_rele(e);
1913 			}
1914 		} else if ((e->cte_contract == ct) && !stopper) {
1915 			ASSERT(q->ctq_nlisteners != 0);
1916 			e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
1917 		} else if (ct && !stopper) {
1918 			start = 0;
1919 		} else {
1920 			/*
1921 			 * Don't free messages past the first reader.
1922 			 */
1923 			break;
1924 		}
1925 	}
1926 }
1927 
1928 /*
1929  * cte_queue_drain
1930  *
1931  * Drain all events from the specified queue, and mark it dead.  If
1932  * "ack" is set, acknowledge any critical events we find along the
1933  * way.
1934  */
1935 static void
1936 cte_queue_drain(ct_equeue_t *q, int ack)
1937 {
1938 	ct_kevent_t *e, *next;
1939 	ct_listener_t *l;
1940 
1941 	mutex_enter(&q->ctq_lock);
1942 
1943 	for (e = list_head(&q->ctq_events); e != NULL; e = next) {
1944 		next = list_next(&q->ctq_events, e);
1945 		if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
1946 			/*
1947 			 * Make sure critical messages are eventually
1948 			 * removed from the bundle queues.
1949 			 */
1950 			mutex_enter(&e->cte_lock);
1951 			e->cte_flags |= CTE_ACK;
1952 			mutex_exit(&e->cte_lock);
1953 			ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
1954 			e->cte_contract->ct_evcnt--;
1955 		}
1956 		list_remove(&q->ctq_events, e);
1957 		e->cte_nodes[q->ctq_listno].ctm_refs = 0;
1958 		e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
1959 		e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
1960 		cte_rele(e);
1961 	}
1962 
1963 	/*
1964 	 * This is necessary only because of CTEL_PBUNDLE listeners;
1965 	 * the events they point to can move from one pbundle to
1966 	 * another.  Fortunately, this only happens if the contract is
1967 	 * inherited, which (in turn) only happens if the process
1968 	 * exits, which means it's an all-or-nothing deal.  If this
1969 	 * wasn't the case, we would instead need to keep track of
1970 	 * listeners on a per-event basis, not just a per-queue basis.
1971 	 * This would have the side benefit of letting us clean up
1972 	 * trimmed events sooner (i.e. immediately), but would
1973 	 * unfortunately make events even bigger than they already
1974 	 * are.
1975 	 */
1976 	for (l = list_head(&q->ctq_listeners); l;
1977 	    l = list_next(&q->ctq_listeners, l)) {
1978 		l->ctl_flags |= CTLF_DEAD;
1979 		if (l->ctl_position) {
1980 			l->ctl_position = NULL;
1981 			list_insert_tail(&q->ctq_tail, l);
1982 		}
1983 		cv_broadcast(&l->ctl_cv);
1984 	}
1985 
1986 	/*
1987 	 * Disallow events.
1988 	 */
1989 	q->ctq_flags |= CTQ_DEAD;
1990 
1991 	/*
1992 	 * If we represent the last reference to a reference counted
1993 	 * process bundle queue, free it.
1994 	 */
1995 	if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
1996 		cte_queue_destroy(q);
1997 	else
1998 		mutex_exit(&q->ctq_lock);
1999 }
2000 
2001 /*
2002  * cte_publish
2003  *
2004  * Publishes an event to a specific queue.  Only called by
2005  * cte_publish_all.
2006  */
2007 static void
2008 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp)
2009 {
2010 	ASSERT(MUTEX_HELD(&q->ctq_lock));
2011 
2012 	q->ctq_atime = *tsp;
2013 
2014 	/*
2015 	 * Don't publish if the event is informative and there aren't
2016 	 * any listeners, or if the queue has been shut down.
2017 	 */
2018 	if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2019 	    (q->ctq_flags & CTQ_DEAD)) {
2020 		mutex_exit(&q->ctq_lock);
2021 		cte_rele(e);
2022 		return;
2023 	}
2024 
2025 	/*
2026 	 * Enqueue event
2027 	 */
2028 	list_insert_tail(&q->ctq_events, e);
2029 
2030 	/*
2031 	 * Check for waiting listeners
2032 	 */
2033 	cte_qwakeup(q, e);
2034 
2035 	/*
2036 	 * Trim unnecessary events from the queue.
2037 	 */
2038 	cte_trim(q, NULL);
2039 	mutex_exit(&q->ctq_lock);
2040 }
2041 
2042 /*
2043  * cte_publish_all
2044  *
2045  * Publish an event to all necessary event queues.  The event, e, must
2046  * be zallocated by the caller, and the event's flags and type must be
2047  * set.  The rest of the event's fields are initialized here.
2048  */
2049 void
2050 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2051 {
2052 	ct_equeue_t *q;
2053 	timespec_t ts;
2054 
2055 	e->cte_contract = ct;
2056 	e->cte_data = data;
2057 	e->cte_gdata = gdata;
2058 	e->cte_refs = 3;
2059 	e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
2060 	contract_hold(ct);
2061 
2062 	gethrestime(&ts);
2063 
2064 	/*
2065 	 * ct_evtlock simply (and only) ensures that two events sent
2066 	 * from the same contract are delivered to all queues in the
2067 	 * same order.
2068 	 */
2069 	mutex_enter(&ct->ct_evtlock);
2070 
2071 	/*
2072 	 * CTEL_CONTRACT - First deliver to the contract queue, acking
2073 	 * the event if the contract has been orphaned.
2074 	 */
2075 	mutex_enter(&ct->ct_lock);
2076 	mutex_enter(&ct->ct_events.ctq_lock);
2077 	if ((e->cte_flags & CTE_INFO) == 0) {
2078 		if (ct->ct_state >= CTS_ORPHAN)
2079 			e->cte_flags |= CTE_ACK;
2080 		else
2081 			ct->ct_evcnt++;
2082 	}
2083 	mutex_exit(&ct->ct_lock);
2084 	cte_publish(&ct->ct_events, e, &ts);
2085 
2086 	/*
2087 	 * CTEL_BUNDLE - Next deliver to the contract type's bundle
2088 	 * queue.
2089 	 */
2090 	mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2091 	cte_publish(&ct->ct_type->ct_type_events, e, &ts);
2092 
2093 	/*
2094 	 * CTEL_PBUNDLE - Finally, if the contract has an owner,
2095 	 * deliver to the owner's process bundle queue.
2096 	 */
2097 	mutex_enter(&ct->ct_lock);
2098 	if (ct->ct_owner) {
2099 		/*
2100 		 * proc_exit doesn't free event queues until it has
2101 		 * abandoned all contracts.
2102 		 */
2103 		ASSERT(ct->ct_owner->p_ct_equeue);
2104 		ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2105 		q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2106 		mutex_enter(&q->ctq_lock);
2107 		mutex_exit(&ct->ct_lock);
2108 		cte_publish(q, e, &ts);
2109 	} else {
2110 		mutex_exit(&ct->ct_lock);
2111 		cte_rele(e);
2112 	}
2113 
2114 	mutex_exit(&ct->ct_evtlock);
2115 }
2116 
2117 /*
2118  * cte_add_listener
2119  *
2120  * Add a new listener to an event queue.
2121  */
2122 void
2123 cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2124 {
2125 	cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2126 	l->ctl_equeue = q;
2127 	l->ctl_position = NULL;
2128 	l->ctl_flags = 0;
2129 
2130 	mutex_enter(&q->ctq_lock);
2131 	list_insert_head(&q->ctq_tail, l);
2132 	list_insert_head(&q->ctq_listeners, l);
2133 	q->ctq_nlisteners++;
2134 	mutex_exit(&q->ctq_lock);
2135 }
2136 
2137 /*
2138  * cte_remove_listener
2139  *
2140  * Remove a listener from an event queue.  No other queue activities
2141  * (e.g. cte_get event) may be in progress at this endpoint when this
2142  * is called.
2143  */
2144 void
2145 cte_remove_listener(ct_listener_t *l)
2146 {
2147 	ct_equeue_t *q = l->ctl_equeue;
2148 	ct_kevent_t *e;
2149 
2150 	mutex_enter(&q->ctq_lock);
2151 
2152 	ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2153 
2154 	if ((e = l->ctl_position) != NULL)
2155 		cte_qrele(q, l, e);
2156 	else
2157 		list_remove(&q->ctq_tail, l);
2158 	l->ctl_position = NULL;
2159 
2160 	q->ctq_nlisteners--;
2161 	list_remove(&q->ctq_listeners, l);
2162 
2163 	if (l->ctl_flags & CTLF_RELIABLE)
2164 		q->ctq_nreliable--;
2165 
2166 	/*
2167 	 * If we are a the last listener of a dead reference counted
2168 	 * queue (i.e. a process bundle) we free it.  Otherwise we just
2169 	 * trim any events which may have been kept around for our
2170 	 * benefit.
2171 	 */
2172 	if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2173 	    (q->ctq_nlisteners == 0)) {
2174 		cte_queue_destroy(q);
2175 	} else {
2176 		cte_trim(q, NULL);
2177 		mutex_exit(&q->ctq_lock);
2178 	}
2179 }
2180 
2181 /*
2182  * cte_reset_listener
2183  *
2184  * Moves a listener's queue pointer to the beginning of the queue.
2185  */
2186 void
2187 cte_reset_listener(ct_listener_t *l)
2188 {
2189 	ct_equeue_t *q = l->ctl_equeue;
2190 
2191 	mutex_enter(&q->ctq_lock);
2192 
2193 	/*
2194 	 * We allow an asynchronous reset because it doesn't make a
2195 	 * whole lot of sense to make reset block or fail.  We already
2196 	 * have most of the mechanism needed thanks to queue trimming,
2197 	 * so implementing it isn't a big deal.
2198 	 */
2199 	if (l->ctl_flags & CTLF_COPYOUT)
2200 		l->ctl_flags |= CTLF_RESET;
2201 
2202 	(void) cte_qmove(q, l, list_head(&q->ctq_events));
2203 
2204 	/*
2205 	 * Inform blocked readers.
2206 	 */
2207 	cv_broadcast(&l->ctl_cv);
2208 	pollwakeup(&l->ctl_pollhead, POLLIN);
2209 	mutex_exit(&q->ctq_lock);
2210 }
2211 
2212 /*
2213  * cte_next_event
2214  *
2215  * Moves the event pointer for the specified listener to the next event
2216  * on the queue.  To avoid races, this movement only occurs if the
2217  * specified event id matches that of the current event.  This is used
2218  * primarily to skip events that have been read but whose extended data
2219  * haven't been copied out.
2220  */
2221 int
2222 cte_next_event(ct_listener_t *l, uint64_t id)
2223 {
2224 	ct_equeue_t *q = l->ctl_equeue;
2225 	ct_kevent_t *old;
2226 
2227 	mutex_enter(&q->ctq_lock);
2228 
2229 	if (l->ctl_flags & CTLF_COPYOUT)
2230 		l->ctl_flags |= CTLF_RESET;
2231 
2232 	if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2233 		(void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2234 
2235 	mutex_exit(&q->ctq_lock);
2236 
2237 	return (0);
2238 }
2239 
2240 /*
2241  * cte_get_event
2242  *
2243  * Reads an event from an event endpoint.  If "nonblock" is clear, we
2244  * block until a suitable event is ready.  If "crit" is set, we only
2245  * read critical events.  Note that while "cr" is the caller's cred,
2246  * "zuniqid" is the unique id of the zone the calling contract
2247  * filesystem was mounted in.
2248  */
2249 int
2250 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2251     uint64_t zuniqid, int crit)
2252 {
2253 	ct_equeue_t *q = l->ctl_equeue;
2254 	ct_kevent_t *temp;
2255 	int result = 0;
2256 	int partial = 0;
2257 	size_t size, gsize, len;
2258 	model_t mdl = get_udatamodel();
2259 	STRUCT_DECL(ct_event, ev);
2260 	STRUCT_INIT(ev, mdl);
2261 
2262 	/*
2263 	 * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2264 	 * that there exists, and we are pointing to, an appropriate
2265 	 * event.  It may temporarily drop ctq_lock, but that doesn't
2266 	 * really matter to us.
2267 	 */
2268 	mutex_enter(&q->ctq_lock);
2269 	while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2270 		if (nonblock) {
2271 			result = EAGAIN;
2272 			goto error;
2273 		}
2274 		if (q->ctq_flags & CTQ_DEAD) {
2275 			result = EIDRM;
2276 			goto error;
2277 		}
2278 		result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2279 		if (result == 0) {
2280 			result = EINTR;
2281 			goto error;
2282 		}
2283 	}
2284 	temp = l->ctl_position;
2285 	cte_hold(temp);
2286 	l->ctl_flags |= CTLF_COPYOUT;
2287 	mutex_exit(&q->ctq_lock);
2288 
2289 	/*
2290 	 * We now have an event.  Copy in the user event structure to
2291 	 * see how much space we have to work with.
2292 	 */
2293 	result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2294 	if (result)
2295 		goto copyerr;
2296 
2297 	/*
2298 	 * Determine what data we have and what the user should be
2299 	 * allowed to see.
2300 	 */
2301 	size = gsize = 0;
2302 	if (temp->cte_data) {
2303 		VERIFY(nvlist_size(temp->cte_data, &size,
2304 		    NV_ENCODE_NATIVE) == 0);
2305 		ASSERT(size != 0);
2306 	}
2307 	if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2308 		VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2309 		    NV_ENCODE_NATIVE) == 0);
2310 		ASSERT(gsize != 0);
2311 	}
2312 
2313 	/*
2314 	 * If we have enough space, copy out the extended event data.
2315 	 */
2316 	len = size + gsize;
2317 	if (len) {
2318 		if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2319 			char *buf = kmem_alloc(len, KM_SLEEP);
2320 
2321 			if (size)
2322 				VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2323 				    NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2324 			if (gsize) {
2325 				char *tmp = buf + size;
2326 
2327 				VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2328 				    &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2329 			}
2330 
2331 			/* This shouldn't have changed */
2332 			ASSERT(size + gsize == len);
2333 			result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2334 			    len);
2335 			kmem_free(buf, len);
2336 			if (result)
2337 				goto copyerr;
2338 		} else {
2339 			partial = 1;
2340 		}
2341 	}
2342 
2343 	/*
2344 	 * Copy out the common event data.
2345 	 */
2346 	STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2347 	STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2348 	STRUCT_FSET(ev, ctev_cttype,
2349 	    temp->cte_contract->ct_type->ct_type_index);
2350 	STRUCT_FSET(ev, ctev_flags, temp->cte_flags & (CTE_ACK|CTE_INFO));
2351 	STRUCT_FSET(ev, ctev_type, temp->cte_type);
2352 	STRUCT_FSET(ev, ctev_nbytes, len);
2353 	STRUCT_FSET(ev, ctev_goffset, size);
2354 	result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2355 
2356 copyerr:
2357 	/*
2358 	 * Only move our location in the queue if all copyouts were
2359 	 * successful, the caller provided enough space for the entire
2360 	 * event, and our endpoint wasn't reset or otherwise moved by
2361 	 * another thread.
2362 	 */
2363 	mutex_enter(&q->ctq_lock);
2364 	if (result)
2365 		result = EFAULT;
2366 	else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2367 	    (l->ctl_position == temp))
2368 		(void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2369 	l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2370 	/*
2371 	 * Signal any readers blocked on our CTLF_COPYOUT.
2372 	 */
2373 	cv_signal(&l->ctl_cv);
2374 	cte_rele(temp);
2375 
2376 error:
2377 	mutex_exit(&q->ctq_lock);
2378 	return (result);
2379 }
2380 
2381 /*
2382  * cte_set_reliable
2383  *
2384  * Requests that events be reliably delivered to an event endpoint.
2385  * Unread informative and acknowledged critical events will not be
2386  * removed from the queue until this listener reads or skips them.
2387  * Because a listener could maliciously request reliable delivery and
2388  * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2389  * caller's effective set.
2390  */
2391 int
2392 cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2393 {
2394 	ct_equeue_t *q = l->ctl_equeue;
2395 	int error;
2396 
2397 	if ((error = secpolicy_contract_event(cr)) != 0)
2398 		return (error);
2399 
2400 	mutex_enter(&q->ctq_lock);
2401 	if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2402 		l->ctl_flags |= CTLF_RELIABLE;
2403 		q->ctq_nreliable++;
2404 		if (l->ctl_position != NULL)
2405 			l->ctl_position->cte_nodes[q->ctq_listno].
2406 			    ctm_nreliable++;
2407 	}
2408 	mutex_exit(&q->ctq_lock);
2409 
2410 	return (0);
2411 }
2412