xref: /illumos-gate/usr/src/uts/common/os/contract.c (revision cf988fac1debd92859f8068ee3d3e53782043469)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Contracts
28  * ---------
29  *
30  * Contracts are a primitive which enrich the relationships between
31  * processes and system resources.  The primary purpose of contracts is
32  * to provide a means for the system to negotiate the departure from a
33  * binding relationship (e.g. pages locked in memory or a thread bound
34  * to processor), but they can also be used as a purely asynchronous
35  * error reporting mechanism as they are with process contracts.
36  *
37  * More information on how one interfaces with contracts and what
38  * contracts can do for you can be found in:
39  *   PSARC 2003/193 Solaris Contracts
40  *   PSARC 2004/460 Contracts addendum
41  *
42  * This file contains the core contracts framework.  By itself it is
43  * useless: it depends the contracts filesystem (ctfs) to provide an
44  * interface to user processes and individual contract types to
45  * implement the process/resource relationships.
46  *
47  * Data structure overview
48  * -----------------------
49  *
50  * A contract is represented by a contract_t, which itself points to an
51  * encapsulating contract-type specific contract object.  A contract_t
52  * contains the contract's static identity (including its terms), its
53  * linkage to various bookkeeping structures, the contract-specific
54  * event queue, and a reference count.
55  *
56  * A contract template is represented by a ct_template_t, which, like a
57  * contract, points to an encapsulating contract-type specific template
58  * object.  A ct_template_t contains the template's terms.
59  *
60  * An event queue is represented by a ct_equeue_t, and consists of a
61  * list of events, a list of listeners, and a list of listeners who are
62  * waiting for new events (affectionately referred to as "tail
63  * listeners").  There are three queue types, defined by ct_listnum_t
64  * (an enum).  An event may be on one of each type of queue
65  * simultaneously; the list linkage used by a queue is determined by
66  * its type.
67  *
68  * An event is represented by a ct_kevent_t, which contains mostly
69  * static event data (e.g. id, payload).  It also has an array of
70  * ct_member_t structures, each of which contains a list_node_t and
71  * represent the event's linkage in a specific event queue.
72  *
73  * Each open of an event endpoint results in the creation of a new
74  * listener, represented by a ct_listener_t.  In addition to linkage
75  * into the aforementioned lists in the event_queue, a ct_listener_t
76  * contains a pointer to the ct_kevent_t it is currently positioned at
77  * as well as a set of status flags and other administrative data.
78  *
79  * Each process has a list of contracts it owns, p_ct_held; a pointer
80  * to the process contract it is a member of, p_ct_process; the linkage
81  * for that membership, p_ct_member; and an array of event queue
82  * structures representing the process bundle queues.
83  *
84  * Each LWP has an array of its active templates, lwp_ct_active; and
85  * the most recently created contracts, lwp_ct_latest.
86  *
87  * A process contract has a list of member processes and a list of
88  * inherited contracts.
89  *
90  * There is a system-wide list of all contracts, as well as per-type
91  * lists of contracts.
92  *
93  * Lock ordering overview
94  * ----------------------
95  *
96  * Locks at the top are taken first:
97  *
98  *                   ct_evtlock
99  *                   regent ct_lock
100  *                   member ct_lock
101  *                   pidlock
102  *                   p_lock
103  *    contract ctq_lock         contract_lock
104  *    pbundle ctq_lock
105  *    cte_lock
106  *                   ct_reflock
107  *
108  * contract_lock and ctq_lock/cte_lock are not currently taken at the
109  * same time.
110  *
111  * Reference counting and locking
112  * ------------------------------
113  *
114  * A contract has a reference count, protected by ct_reflock.
115  * (ct_reflock is also used in a couple other places where atomic
116  * access to a variable is needed in an innermost context).  A process
117  * maintains a hold on each contract it owns.  A process contract has a
118  * hold on each contract is has inherited.  Each event has a hold on
119  * the contract which generated it.  Process contract templates have
120  * holds on the contracts referred to by their transfer terms.  CTFS
121  * contract directory nodes have holds on contracts.  Lastly, various
122  * code paths may temporarily take holds on contracts to prevent them
123  * from disappearing while other processing is going on.  It is
124  * important to note that the global contract lists do not hold
125  * references on contracts; a contract is removed from these structures
126  * atomically with the release of its last reference.
127  *
128  * At a given point in time, a contract can either be owned by a
129  * process, inherited by a regent process contract, or orphaned.  A
130  * contract_t's  owner and regent pointers, ct_owner and ct_regent, are
131  * protected by its ct_lock.  The linkage in the holder's (holder =
132  * owner or regent) list of contracts, ct_ctlist, is protected by
133  * whatever lock protects the holder's data structure.  In order for
134  * these two directions to remain consistent, changing the holder of a
135  * contract requires that both locks be held.
136  *
137  * Events also have reference counts.  There is one hold on an event
138  * per queue it is present on, in addition to those needed for the
139  * usual sundry reasons.  Individual listeners are associated with
140  * specific queues, and increase a queue-specific reference count
141  * stored in the ct_member_t structure.
142  *
143  * The dynamic contents of an event (reference count and flags) are
144  * protected by its cte_lock, while the contents of the embedded
145  * ct_member_t structures are protected by the locks of the queues they
146  * are linked into.  A ct_listener_t's contents are also protected by
147  * its event queue's ctq_lock.
148  *
149  * Resource controls
150  * -----------------
151  *
152  * Control:      project.max-contracts (rc_project_contract)
153  * Description:  Maximum number of contracts allowed a project.
154  *
155  *   When a contract is created, the project's allocation is tested and
156  *   (assuming success) increased.  When the last reference to a
157  *   contract is released, the creating project's allocation is
158  *   decreased.
159  */
160 
161 #include <sys/mutex.h>
162 #include <sys/debug.h>
163 #include <sys/types.h>
164 #include <sys/param.h>
165 #include <sys/kmem.h>
166 #include <sys/thread.h>
167 #include <sys/id_space.h>
168 #include <sys/avl.h>
169 #include <sys/list.h>
170 #include <sys/sysmacros.h>
171 #include <sys/proc.h>
172 #include <sys/ctfs.h>
173 #include <sys/contract_impl.h>
174 #include <sys/contract/process_impl.h>
175 #include <sys/dditypes.h>
176 #include <sys/contract/device_impl.h>
177 #include <sys/systm.h>
178 #include <sys/atomic.h>
179 #include <sys/cmn_err.h>
180 #include <sys/model.h>
181 #include <sys/policy.h>
182 #include <sys/zone.h>
183 #include <sys/task.h>
184 #include <sys/ddi.h>
185 #include <sys/sunddi.h>
186 
187 extern rctl_hndl_t rc_project_contract;
188 
189 static id_space_t	*contract_ids;
190 static avl_tree_t	contract_avl;
191 static kmutex_t		contract_lock;
192 
193 int			ct_ntypes = CTT_MAXTYPE;
194 static ct_type_t	*ct_types_static[CTT_MAXTYPE];
195 ct_type_t		**ct_types = ct_types_static;
196 int			ct_debug;
197 
198 static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
199 static void cte_queue_destroy(ct_equeue_t *);
200 static void cte_queue_drain(ct_equeue_t *, int);
201 static void cte_trim(ct_equeue_t *, contract_t *);
202 static void cte_copy(ct_equeue_t *, ct_equeue_t *);
203 
204 /*
205  * contract_compar
206  *
207  * A contract comparator which sorts on contract ID.
208  */
209 int
210 contract_compar(const void *x, const void *y)
211 {
212 	const contract_t *ct1 = x;
213 	const contract_t *ct2 = y;
214 
215 	if (ct1->ct_id < ct2->ct_id)
216 		return (-1);
217 	if (ct1->ct_id > ct2->ct_id)
218 		return (1);
219 	return (0);
220 }
221 
222 /*
223  * contract_init
224  *
225  * Initializes the contract subsystem, the specific contract types, and
226  * process 0.
227  */
228 void
229 contract_init(void)
230 {
231 	/*
232 	 * Initialize contract subsystem.
233 	 */
234 	contract_ids = id_space_create("contracts", 1, INT_MAX);
235 	avl_create(&contract_avl, contract_compar, sizeof (contract_t),
236 	    offsetof(contract_t, ct_ctavl));
237 	mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
238 
239 	/*
240 	 * Initialize contract types.
241 	 */
242 	contract_process_init();
243 	contract_device_init();
244 
245 	/*
246 	 * Initialize p0/lwp0 contract state.
247 	 */
248 	avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
249 	    offsetof(contract_t, ct_ctlist));
250 }
251 
252 /*
253  * contract_dtor
254  *
255  * Performs basic destruction of the common portions of a contract.
256  * Called from the failure path of contract_ctor and from
257  * contract_rele.
258  */
259 static void
260 contract_dtor(contract_t *ct)
261 {
262 	cte_queue_destroy(&ct->ct_events);
263 	list_destroy(&ct->ct_vnodes);
264 	mutex_destroy(&ct->ct_reflock);
265 	mutex_destroy(&ct->ct_lock);
266 	mutex_destroy(&ct->ct_evtlock);
267 }
268 
269 /*
270  * contract_ctor
271  *
272  * Called by a contract type to initialize a contract.  Fails if the
273  * max-contract resource control would have been exceeded.  After a
274  * successful call to contract_ctor, the contract is unlocked and
275  * visible in all namespaces; any type-specific initialization should
276  * be completed before calling contract_ctor.  Returns 0 on success.
277  *
278  * Because not all callers can tolerate failure, a 0 value for canfail
279  * instructs contract_ctor to ignore the project.max-contracts resource
280  * control.  Obviously, this "out" should only be employed by callers
281  * who are sufficiently constrained in other ways (e.g. newproc).
282  */
283 int
284 contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
285     ctflags_t flags, proc_t *author, int canfail)
286 {
287 	avl_index_t where;
288 	klwp_t *curlwp = ttolwp(curthread);
289 
290 	ASSERT(author == curproc);
291 
292 	mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
293 	mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
294 	mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
295 	ct->ct_id = id_alloc(contract_ids);
296 
297 	cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
298 	list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
299 	    offsetof(contract_vnode_t, ctv_node));
300 
301 	/*
302 	 * Instance data
303 	 */
304 	ct->ct_ref = 2;		/* one for the holder, one for "latest" */
305 	ct->ct_cuid = crgetuid(CRED());
306 	ct->ct_type = type;
307 	ct->ct_data = data;
308 	gethrestime(&ct->ct_ctime);
309 	ct->ct_state = CTS_OWNED;
310 	ct->ct_flags = flags;
311 	ct->ct_regent = author->p_ct_process ?
312 	    &author->p_ct_process->conp_contract : NULL;
313 	ct->ct_ev_info = tmpl->ctmpl_ev_info;
314 	ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
315 	ct->ct_cookie = tmpl->ctmpl_cookie;
316 	ct->ct_owner = author;
317 	ct->ct_ntime.ctm_total = -1;
318 	ct->ct_qtime.ctm_total = -1;
319 	ct->ct_nevent = NULL;
320 
321 	/*
322 	 * Test project.max-contracts.
323 	 */
324 	mutex_enter(&author->p_lock);
325 	mutex_enter(&contract_lock);
326 	if (canfail && rctl_test(rc_project_contract,
327 	    author->p_task->tk_proj->kpj_rctls, author, 1,
328 	    RCA_SAFE) & RCT_DENY) {
329 		id_free(contract_ids, ct->ct_id);
330 		mutex_exit(&contract_lock);
331 		mutex_exit(&author->p_lock);
332 		ct->ct_events.ctq_flags |= CTQ_DEAD;
333 		contract_dtor(ct);
334 		return (1);
335 	}
336 	ct->ct_proj = author->p_task->tk_proj;
337 	ct->ct_proj->kpj_data.kpd_contract++;
338 	(void) project_hold(ct->ct_proj);
339 	mutex_exit(&contract_lock);
340 
341 	/*
342 	 * Insert into holder's avl of contracts.
343 	 * We use an avl not because order is important, but because
344 	 * readdir of /proc/contracts requires we be able to use a
345 	 * scalar as an index into the process's list of contracts
346 	 */
347 	ct->ct_zoneid = author->p_zone->zone_id;
348 	ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
349 	VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
350 	avl_insert(&author->p_ct_held, ct, where);
351 	mutex_exit(&author->p_lock);
352 
353 	/*
354 	 * Insert into global contract AVL
355 	 */
356 	mutex_enter(&contract_lock);
357 	VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
358 	avl_insert(&contract_avl, ct, where);
359 	mutex_exit(&contract_lock);
360 
361 	/*
362 	 * Insert into type AVL
363 	 */
364 	mutex_enter(&type->ct_type_lock);
365 	VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
366 	avl_insert(&type->ct_type_avl, ct, where);
367 	type->ct_type_timestruc = ct->ct_ctime;
368 	mutex_exit(&type->ct_type_lock);
369 
370 	if (curlwp->lwp_ct_latest[type->ct_type_index])
371 		contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
372 	curlwp->lwp_ct_latest[type->ct_type_index] = ct;
373 
374 	return (0);
375 }
376 
377 /*
378  * contract_rele
379  *
380  * Releases a reference to a contract.  If the caller had the last
381  * reference, the contract is removed from all namespaces, its
382  * allocation against the max-contracts resource control is released,
383  * and the contract type's free entry point is invoked for any
384  * type-specific deconstruction and to (presumably) free the object.
385  */
386 void
387 contract_rele(contract_t *ct)
388 {
389 	uint64_t nref;
390 
391 	mutex_enter(&ct->ct_reflock);
392 	ASSERT(ct->ct_ref > 0);
393 	nref = --ct->ct_ref;
394 	mutex_exit(&ct->ct_reflock);
395 	if (nref == 0) {
396 		/*
397 		 * ct_owner is cleared when it drops its reference.
398 		 */
399 		ASSERT(ct->ct_owner == NULL);
400 		ASSERT(ct->ct_evcnt == 0);
401 
402 		/*
403 		 * Remove from global contract AVL
404 		 */
405 		mutex_enter(&contract_lock);
406 		avl_remove(&contract_avl, ct);
407 		mutex_exit(&contract_lock);
408 
409 		/*
410 		 * Remove from type AVL
411 		 */
412 		mutex_enter(&ct->ct_type->ct_type_lock);
413 		avl_remove(&ct->ct_type->ct_type_avl, ct);
414 		mutex_exit(&ct->ct_type->ct_type_lock);
415 
416 		/*
417 		 * Release the contract's ID
418 		 */
419 		id_free(contract_ids, ct->ct_id);
420 
421 		/*
422 		 * Release project hold
423 		 */
424 		mutex_enter(&contract_lock);
425 		ct->ct_proj->kpj_data.kpd_contract--;
426 		project_rele(ct->ct_proj);
427 		mutex_exit(&contract_lock);
428 
429 		/*
430 		 * Free the contract
431 		 */
432 		contract_dtor(ct);
433 		ct->ct_type->ct_type_ops->contop_free(ct);
434 	}
435 }
436 
437 /*
438  * contract_hold
439  *
440  * Adds a reference to a contract
441  */
442 void
443 contract_hold(contract_t *ct)
444 {
445 	mutex_enter(&ct->ct_reflock);
446 	ASSERT(ct->ct_ref < UINT64_MAX);
447 	ct->ct_ref++;
448 	mutex_exit(&ct->ct_reflock);
449 }
450 
451 /*
452  * contract_getzuniqid
453  *
454  * Get a contract's zone unique ID.  Needed because 64-bit reads and
455  * writes aren't atomic on x86.  Since there are contexts where we are
456  * unable to take ct_lock, we instead use ct_reflock; in actuality any
457  * lock would do.
458  */
459 uint64_t
460 contract_getzuniqid(contract_t *ct)
461 {
462 	uint64_t zuniqid;
463 
464 	mutex_enter(&ct->ct_reflock);
465 	zuniqid = ct->ct_mzuniqid;
466 	mutex_exit(&ct->ct_reflock);
467 
468 	return (zuniqid);
469 }
470 
471 /*
472  * contract_setzuniqid
473  *
474  * Sets a contract's zone unique ID.   See contract_getzuniqid.
475  */
476 void
477 contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
478 {
479 	mutex_enter(&ct->ct_reflock);
480 	ct->ct_mzuniqid = zuniqid;
481 	mutex_exit(&ct->ct_reflock);
482 }
483 
484 /*
485  * contract_abandon
486  *
487  * Abandons the specified contract.  If "explicit" is clear, the
488  * contract was implicitly abandoned (by process exit) and should be
489  * inherited if its terms allow it and its owner was a member of a
490  * regent contract.  Otherwise, the contract type's abandon entry point
491  * is invoked to either destroy or orphan the contract.
492  */
493 int
494 contract_abandon(contract_t *ct, proc_t *p, int explicit)
495 {
496 	ct_equeue_t *q = NULL;
497 	contract_t *parent = &p->p_ct_process->conp_contract;
498 	int inherit = 0;
499 
500 	VERIFY(p == curproc);
501 
502 	mutex_enter(&ct->ct_lock);
503 
504 	/*
505 	 * Multiple contract locks are taken contract -> subcontract.
506 	 * Check if the contract will be inherited so we can acquire
507 	 * all the necessary locks before making sensitive changes.
508 	 */
509 	if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
510 	    contract_process_accept(parent)) {
511 		mutex_exit(&ct->ct_lock);
512 		mutex_enter(&parent->ct_lock);
513 		mutex_enter(&ct->ct_lock);
514 		inherit = 1;
515 	}
516 
517 	if (ct->ct_owner != p) {
518 		mutex_exit(&ct->ct_lock);
519 		if (inherit)
520 			mutex_exit(&parent->ct_lock);
521 		return (EINVAL);
522 	}
523 
524 	mutex_enter(&p->p_lock);
525 	if (explicit)
526 		avl_remove(&p->p_ct_held, ct);
527 	ct->ct_owner = NULL;
528 	mutex_exit(&p->p_lock);
529 
530 	/*
531 	 * Since we can't call cte_trim with the contract lock held,
532 	 * we grab the queue pointer here.
533 	 */
534 	if (p->p_ct_equeue)
535 		q = p->p_ct_equeue[ct->ct_type->ct_type_index];
536 
537 	/*
538 	 * contop_abandon may destroy the contract so we rely on it to
539 	 * drop ct_lock.  We retain a reference on the contract so that
540 	 * the cte_trim which follows functions properly.  Even though
541 	 * cte_trim doesn't dereference the contract pointer, it is
542 	 * still necessary to retain a reference to the contract so
543 	 * that we don't trim events which are sent by a subsequently
544 	 * allocated contract infortuitously located at the same address.
545 	 */
546 	contract_hold(ct);
547 
548 	if (inherit) {
549 		ct->ct_state = CTS_INHERITED;
550 		VERIFY(ct->ct_regent == parent);
551 		contract_process_take(parent, ct);
552 
553 		/*
554 		 * We are handing off the process's reference to the
555 		 * parent contract.  For this reason, the order in
556 		 * which we drop the contract locks is also important.
557 		 */
558 		mutex_exit(&ct->ct_lock);
559 		mutex_exit(&parent->ct_lock);
560 	} else {
561 		ct->ct_regent = NULL;
562 		ct->ct_type->ct_type_ops->contop_abandon(ct);
563 	}
564 
565 	/*
566 	 * ct_lock has been dropped; we can safely trim the event
567 	 * queue now.
568 	 */
569 	if (q) {
570 		mutex_enter(&q->ctq_lock);
571 		cte_trim(q, ct);
572 		mutex_exit(&q->ctq_lock);
573 	}
574 
575 	contract_rele(ct);
576 
577 	return (0);
578 }
579 
580 int
581 contract_newct(contract_t *ct)
582 {
583 	return (ct->ct_type->ct_type_ops->contop_newct(ct));
584 }
585 
586 /*
587  * contract_adopt
588  *
589  * Adopts a contract.  After a successful call to this routine, the
590  * previously inherited contract will belong to the calling process,
591  * and its events will have been appended to its new owner's process
592  * bundle queue.
593  */
594 int
595 contract_adopt(contract_t *ct, proc_t *p)
596 {
597 	avl_index_t where;
598 	ct_equeue_t *q;
599 	contract_t *parent;
600 
601 	ASSERT(p == curproc);
602 
603 	/*
604 	 * Ensure the process has an event queue.  Checked by ASSERTs
605 	 * below.
606 	 */
607 	(void) contract_type_pbundle(ct->ct_type, p);
608 
609 	mutex_enter(&ct->ct_lock);
610 	parent = ct->ct_regent;
611 	if (ct->ct_state != CTS_INHERITED ||
612 	    &p->p_ct_process->conp_contract != parent ||
613 	    p->p_zone->zone_uniqid != ct->ct_czuniqid) {
614 		mutex_exit(&ct->ct_lock);
615 		return (EINVAL);
616 	}
617 
618 	/*
619 	 * Multiple contract locks are taken contract -> subcontract.
620 	 */
621 	mutex_exit(&ct->ct_lock);
622 	mutex_enter(&parent->ct_lock);
623 	mutex_enter(&ct->ct_lock);
624 
625 	/*
626 	 * It is possible that the contract was adopted by someone else
627 	 * while its lock was dropped.  It isn't possible for the
628 	 * contract to have been inherited by a different regent
629 	 * contract.
630 	 */
631 	if (ct->ct_state != CTS_INHERITED) {
632 		mutex_exit(&parent->ct_lock);
633 		mutex_exit(&ct->ct_lock);
634 		return (EBUSY);
635 	}
636 	ASSERT(ct->ct_regent == parent);
637 
638 	ct->ct_state = CTS_OWNED;
639 
640 	contract_process_adopt(ct, p);
641 
642 	mutex_enter(&p->p_lock);
643 	ct->ct_owner = p;
644 	VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
645 	avl_insert(&p->p_ct_held, ct, where);
646 	mutex_exit(&p->p_lock);
647 
648 	ASSERT(ct->ct_owner->p_ct_equeue);
649 	ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
650 	q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
651 	cte_copy(&ct->ct_events, q);
652 	mutex_exit(&ct->ct_lock);
653 
654 	return (0);
655 }
656 
657 /*
658  * contract_ack
659  *
660  * Acknowledges receipt of a critical event.
661  */
662 int
663 contract_ack(contract_t *ct, uint64_t evid, int ack)
664 {
665 	ct_kevent_t *ev;
666 	list_t *queue = &ct->ct_events.ctq_events;
667 	int error = ESRCH;
668 	int nego = 0;
669 	uint_t evtype;
670 
671 	ASSERT(ack == CT_ACK || ack == CT_NACK);
672 
673 	mutex_enter(&ct->ct_lock);
674 	mutex_enter(&ct->ct_events.ctq_lock);
675 	/*
676 	 * We are probably ACKing something near the head of the queue.
677 	 */
678 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
679 		if (ev->cte_id == evid) {
680 			if (ev->cte_flags & CTE_NEG)
681 				nego = 1;
682 			else if (ack == CT_NACK)
683 				break;
684 			if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
685 				ev->cte_flags |= CTE_ACK;
686 				ct->ct_evcnt--;
687 				evtype = ev->cte_type;
688 				error = 0;
689 			}
690 			break;
691 		}
692 	}
693 	mutex_exit(&ct->ct_events.ctq_lock);
694 	mutex_exit(&ct->ct_lock);
695 
696 	/*
697 	 * Not all critical events are negotiation events, however
698 	 * every negotiation event is a critical event. NEGEND events
699 	 * are critical events but are not negotiation events
700 	 */
701 	if (error || !nego)
702 		return (error);
703 
704 	if (ack == CT_ACK)
705 		error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
706 	else
707 		error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
708 
709 	return (error);
710 }
711 
712 /*ARGSUSED*/
713 int
714 contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
715 {
716 	cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
717 	    ct->ct_id);
718 	return (ENOSYS);
719 }
720 
721 /*ARGSUSED*/
722 int
723 contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
724 {
725 	cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
726 	    ct->ct_id);
727 	return (ENOSYS);
728 }
729 
730 /*ARGSUSED*/
731 int
732 contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
733 {
734 	return (ERANGE);
735 }
736 
737 /*
738  * contract_qack
739  *
740  * Asks that negotiations be extended by another time quantum
741  */
742 int
743 contract_qack(contract_t *ct, uint64_t evid)
744 {
745 	ct_kevent_t *ev;
746 	list_t *queue = &ct->ct_events.ctq_events;
747 	int nego = 0;
748 	uint_t evtype;
749 
750 	mutex_enter(&ct->ct_lock);
751 	mutex_enter(&ct->ct_events.ctq_lock);
752 
753 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
754 		if (ev->cte_id == evid) {
755 			if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
756 				evtype = ev->cte_type;
757 				nego = 1;
758 			}
759 			break;
760 		}
761 	}
762 	mutex_exit(&ct->ct_events.ctq_lock);
763 	mutex_exit(&ct->ct_lock);
764 
765 	/*
766 	 * Only a negotiated event (which is by definition also a critical
767 	 * event) which has not yet been acknowledged can provide
768 	 * time quanta to a negotiating owner process.
769 	 */
770 	if (!nego)
771 		return (ESRCH);
772 
773 	return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
774 }
775 
776 /*
777  * contract_orphan
778  *
779  * Icky-poo.  This is a process-contract special, used to ACK all
780  * critical messages when a contract is orphaned.
781  */
782 void
783 contract_orphan(contract_t *ct)
784 {
785 	ct_kevent_t *ev;
786 	list_t *queue = &ct->ct_events.ctq_events;
787 
788 	ASSERT(MUTEX_HELD(&ct->ct_lock));
789 	ASSERT(ct->ct_state != CTS_ORPHAN);
790 
791 	mutex_enter(&ct->ct_events.ctq_lock);
792 	ct->ct_state = CTS_ORPHAN;
793 	for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
794 		if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
795 			ev->cte_flags |= CTE_ACK;
796 			ct->ct_evcnt--;
797 		}
798 	}
799 	mutex_exit(&ct->ct_events.ctq_lock);
800 
801 	ASSERT(ct->ct_evcnt == 0);
802 }
803 
804 /*
805  * contract_destroy
806  *
807  * Explicit contract destruction.  Called when contract is empty.
808  * The contract will actually stick around until all of its events are
809  * removed from the bundle and and process bundle queues, and all fds
810  * which refer to it are closed.  See contract_dtor if you are looking
811  * for what destroys the contract structure.
812  */
813 void
814 contract_destroy(contract_t *ct)
815 {
816 	ASSERT(MUTEX_HELD(&ct->ct_lock));
817 	ASSERT(ct->ct_state != CTS_DEAD);
818 	ASSERT(ct->ct_owner == NULL);
819 
820 	ct->ct_state = CTS_DEAD;
821 	cte_queue_drain(&ct->ct_events, 1);
822 	mutex_exit(&ct->ct_lock);
823 	mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
824 	cte_trim(&ct->ct_type->ct_type_events, ct);
825 	mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
826 	mutex_enter(&ct->ct_lock);
827 	ct->ct_type->ct_type_ops->contop_destroy(ct);
828 	mutex_exit(&ct->ct_lock);
829 	contract_rele(ct);
830 }
831 
832 /*
833  * contract_vnode_get
834  *
835  * Obtains the contract directory vnode for this contract, if there is
836  * one.  The caller must VN_RELE the vnode when they are through using
837  * it.
838  */
839 vnode_t *
840 contract_vnode_get(contract_t *ct, vfs_t *vfsp)
841 {
842 	contract_vnode_t *ctv;
843 	vnode_t *vp = NULL;
844 
845 	mutex_enter(&ct->ct_lock);
846 	for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
847 	    ctv = list_next(&ct->ct_vnodes, ctv))
848 		if (ctv->ctv_vnode->v_vfsp == vfsp) {
849 			vp = ctv->ctv_vnode;
850 			VN_HOLD(vp);
851 			break;
852 		}
853 	mutex_exit(&ct->ct_lock);
854 	return (vp);
855 }
856 
857 /*
858  * contract_vnode_set
859  *
860  * Sets the contract directory vnode for this contract.  We don't hold
861  * a reference on the vnode because we don't want to prevent it from
862  * being freed.  The vnode's inactive entry point will take care of
863  * notifying us when it should be removed.
864  */
865 void
866 contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
867 {
868 	mutex_enter(&ct->ct_lock);
869 	ctv->ctv_vnode = vnode;
870 	list_insert_head(&ct->ct_vnodes, ctv);
871 	mutex_exit(&ct->ct_lock);
872 }
873 
874 /*
875  * contract_vnode_clear
876  *
877  * Removes this vnode as the contract directory vnode for this
878  * contract.  Called from a contract directory's inactive entry point,
879  * this may return 0 indicating that the vnode gained another reference
880  * because of a simultaneous call to contract_vnode_get.
881  */
882 int
883 contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
884 {
885 	vnode_t *vp = ctv->ctv_vnode;
886 	int result;
887 
888 	mutex_enter(&ct->ct_lock);
889 	mutex_enter(&vp->v_lock);
890 	if (vp->v_count == 1) {
891 		list_remove(&ct->ct_vnodes, ctv);
892 		result = 1;
893 	} else {
894 		vp->v_count--;
895 		result = 0;
896 	}
897 	mutex_exit(&vp->v_lock);
898 	mutex_exit(&ct->ct_lock);
899 
900 	return (result);
901 }
902 
903 /*
904  * contract_exit
905  *
906  * Abandons all contracts held by process p, and drains process p's
907  * bundle queues.  Called on process exit.
908  */
909 void
910 contract_exit(proc_t *p)
911 {
912 	contract_t *ct;
913 	void *cookie = NULL;
914 	int i;
915 
916 	ASSERT(p == curproc);
917 
918 	/*
919 	 * Abandon held contracts.  contract_abandon knows enough not
920 	 * to remove the contract from the list a second time.  We are
921 	 * exiting, so no locks are needed here.  But because
922 	 * contract_abandon will take p_lock, we need to make sure we
923 	 * aren't holding it.
924 	 */
925 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
926 	while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
927 		VERIFY(contract_abandon(ct, p, 0) == 0);
928 
929 	/*
930 	 * Drain pbundles.  Because a process bundle queue could have
931 	 * been passed to another process, they may not be freed right
932 	 * away.
933 	 */
934 	if (p->p_ct_equeue) {
935 		for (i = 0; i < CTT_MAXTYPE; i++)
936 			if (p->p_ct_equeue[i])
937 				cte_queue_drain(p->p_ct_equeue[i], 0);
938 		kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
939 	}
940 }
941 
942 static int
943 get_time_left(struct ct_time *t)
944 {
945 	clock_t ticks_elapsed;
946 	int secs_elapsed;
947 
948 	if (t->ctm_total == -1)
949 		return (-1);
950 
951 	ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
952 	secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
953 	return (secs_elapsed > 0 ? secs_elapsed : 0);
954 }
955 
956 /*
957  * contract_status_common
958  *
959  * Populates a ct_status structure.  Used by contract types in their
960  * status entry points and ctfs when only common information is
961  * requested.
962  */
963 void
964 contract_status_common(contract_t *ct, zone_t *zone, void *status,
965     model_t model)
966 {
967 	STRUCT_HANDLE(ct_status, lstatus);
968 
969 	STRUCT_SET_HANDLE(lstatus, model, status);
970 	ASSERT(MUTEX_HELD(&ct->ct_lock));
971 	if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
972 	    zone->zone_uniqid == ct->ct_czuniqid) {
973 		zone_t *czone;
974 		zoneid_t zoneid = -1;
975 
976 		/*
977 		 * Contracts don't have holds on the zones they were
978 		 * created by.  If the contract's zone no longer
979 		 * exists, we say its zoneid is -1.
980 		 */
981 		if (zone->zone_uniqid == ct->ct_czuniqid ||
982 		    ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
983 			zoneid = ct->ct_zoneid;
984 		} else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
985 			if (czone->zone_uniqid == ct->ct_mzuniqid)
986 				zoneid = ct->ct_zoneid;
987 			zone_rele(czone);
988 		}
989 
990 		STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
991 		STRUCT_FSET(lstatus, ctst_holder,
992 		    (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
993 		    (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
994 		STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
995 	} else {
996 		/*
997 		 * We are looking at a contract which was created by a
998 		 * process outside of our zone.  We provide fake zone,
999 		 * holder, and state information.
1000 		 */
1001 
1002 		STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
1003 		/*
1004 		 * Since "zone" can't disappear until the calling ctfs
1005 		 * is unmounted, zone_zsched must be valid.
1006 		 */
1007 		STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
1008 		    zone->zone_zsched->p_pid : 0);
1009 		STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
1010 		    CTS_OWNED : ct->ct_state);
1011 	}
1012 	STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
1013 	STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
1014 	STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
1015 	STRUCT_FSET(lstatus, ctst_nevid,
1016 	    ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
1017 	STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
1018 	STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
1019 	STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
1020 	STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
1021 	STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
1022 }
1023 
1024 /*
1025  * contract_checkcred
1026  *
1027  * Determines if the specified contract is owned by a process with the
1028  * same effective uid as the specified credential.  The caller must
1029  * ensure that the uid spaces are the same.  Returns 1 on success.
1030  */
1031 static int
1032 contract_checkcred(contract_t *ct, const cred_t *cr)
1033 {
1034 	proc_t *p;
1035 	int fail = 1;
1036 
1037 	mutex_enter(&ct->ct_lock);
1038 	if ((p = ct->ct_owner) != NULL) {
1039 		mutex_enter(&p->p_crlock);
1040 		fail = crgetuid(cr) != crgetuid(p->p_cred);
1041 		mutex_exit(&p->p_crlock);
1042 	}
1043 	mutex_exit(&ct->ct_lock);
1044 
1045 	return (!fail);
1046 }
1047 
1048 /*
1049  * contract_owned
1050  *
1051  * Determines if the specified credential can view an event generated
1052  * by the specified contract.  If locked is set, the contract's ct_lock
1053  * is held and the caller will need to do additional work to determine
1054  * if they truly can see the event.  Returns 1 on success.
1055  */
1056 int
1057 contract_owned(contract_t *ct, const cred_t *cr, int locked)
1058 {
1059 	int owner, cmatch, zmatch;
1060 	uint64_t zuniqid, mzuniqid;
1061 	uid_t euid;
1062 
1063 	ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
1064 
1065 	zuniqid = curproc->p_zone->zone_uniqid;
1066 	mzuniqid = contract_getzuniqid(ct);
1067 	euid = crgetuid(cr);
1068 
1069 	/*
1070 	 * owner: we own the contract
1071 	 * cmatch: we are in the creator's (and holder's) zone and our
1072 	 *   uid matches the creator's or holder's
1073 	 * zmatch: we are in the effective zone of a contract created
1074 	 *   in the global zone, and our uid matches that of the
1075 	 *   virtualized holder's (zsched/kcred)
1076 	 */
1077 	owner = (ct->ct_owner == curproc);
1078 	cmatch = (zuniqid == ct->ct_czuniqid) &&
1079 	    ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
1080 	zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
1081 	    (crgetuid(kcred) == euid);
1082 
1083 	return (owner || cmatch || zmatch);
1084 }
1085 
1086 
1087 /*
1088  * contract_type_init
1089  *
1090  * Called by contract types to register themselves with the contracts
1091  * framework.
1092  */
1093 ct_type_t *
1094 contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
1095     ct_f_default_t *dfault)
1096 {
1097 	ct_type_t *result;
1098 
1099 	ASSERT(type < CTT_MAXTYPE);
1100 
1101 	result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
1102 
1103 	mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
1104 	avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
1105 	    offsetof(contract_t, ct_cttavl));
1106 	cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
1107 	result->ct_type_name = name;
1108 	result->ct_type_ops = ops;
1109 	result->ct_type_default = dfault;
1110 	result->ct_type_evid = 0;
1111 	gethrestime(&result->ct_type_timestruc);
1112 	result->ct_type_index = type;
1113 
1114 	ct_types[type] = result;
1115 
1116 	return (result);
1117 }
1118 
1119 /*
1120  * contract_type_count
1121  *
1122  * Obtains the number of contracts of a particular type.
1123  */
1124 int
1125 contract_type_count(ct_type_t *type)
1126 {
1127 	ulong_t count;
1128 
1129 	mutex_enter(&type->ct_type_lock);
1130 	count = avl_numnodes(&type->ct_type_avl);
1131 	mutex_exit(&type->ct_type_lock);
1132 
1133 	return (count);
1134 }
1135 
1136 /*
1137  * contract_type_max
1138  *
1139  * Obtains the maximum contract id of of a particular type.
1140  */
1141 ctid_t
1142 contract_type_max(ct_type_t *type)
1143 {
1144 	contract_t *ct;
1145 	ctid_t res;
1146 
1147 	mutex_enter(&type->ct_type_lock);
1148 	ct = avl_last(&type->ct_type_avl);
1149 	res = ct ? ct->ct_id : -1;
1150 	mutex_exit(&type->ct_type_lock);
1151 
1152 	return (res);
1153 }
1154 
1155 /*
1156  * contract_max
1157  *
1158  * Obtains the maximum contract id.
1159  */
1160 ctid_t
1161 contract_max(void)
1162 {
1163 	contract_t *ct;
1164 	ctid_t res;
1165 
1166 	mutex_enter(&contract_lock);
1167 	ct = avl_last(&contract_avl);
1168 	res = ct ? ct->ct_id : -1;
1169 	mutex_exit(&contract_lock);
1170 
1171 	return (res);
1172 }
1173 
1174 /*
1175  * contract_lookup_common
1176  *
1177  * Common code for contract_lookup and contract_type_lookup.  Takes a
1178  * pointer to an AVL tree to search in.  Should be called with the
1179  * appropriate tree-protecting lock held (unfortunately unassertable).
1180  */
1181 static ctid_t
1182 contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
1183 {
1184 	contract_t template, *ct;
1185 	avl_index_t where;
1186 	ctid_t res;
1187 
1188 	template.ct_id = current;
1189 	ct = avl_find(tree, &template, &where);
1190 	if (ct == NULL)
1191 		ct = avl_nearest(tree, where, AVL_AFTER);
1192 	if (zuniqid != GLOBAL_ZONEUNIQID)
1193 		while (ct && (contract_getzuniqid(ct) != zuniqid))
1194 			ct = AVL_NEXT(tree, ct);
1195 	res = ct ? ct->ct_id : -1;
1196 
1197 	return (res);
1198 }
1199 
1200 /*
1201  * contract_type_lookup
1202  *
1203  * Returns the next type contract after the specified id, visible from
1204  * the specified zone.
1205  */
1206 ctid_t
1207 contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
1208 {
1209 	ctid_t res;
1210 
1211 	mutex_enter(&type->ct_type_lock);
1212 	res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
1213 	mutex_exit(&type->ct_type_lock);
1214 
1215 	return (res);
1216 }
1217 
1218 /*
1219  * contract_lookup
1220  *
1221  * Returns the next contract after the specified id, visible from the
1222  * specified zone.
1223  */
1224 ctid_t
1225 contract_lookup(uint64_t zuniqid, ctid_t current)
1226 {
1227 	ctid_t res;
1228 
1229 	mutex_enter(&contract_lock);
1230 	res = contract_lookup_common(&contract_avl, zuniqid, current);
1231 	mutex_exit(&contract_lock);
1232 
1233 	return (res);
1234 }
1235 
1236 /*
1237  * contract_plookup
1238  *
1239  * Returns the next contract held by process p after the specified id,
1240  * visible from the specified zone.  Made complicated by the fact that
1241  * contracts visible in a zone but held by processes outside of the
1242  * zone need to appear as being held by zsched to zone members.
1243  */
1244 ctid_t
1245 contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
1246 {
1247 	contract_t template, *ct;
1248 	avl_index_t where;
1249 	ctid_t res;
1250 
1251 	template.ct_id = current;
1252 	if (zuniqid != GLOBAL_ZONEUNIQID &&
1253 	    (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
1254 		/* This is inelegant. */
1255 		mutex_enter(&contract_lock);
1256 		ct = avl_find(&contract_avl, &template, &where);
1257 		if (ct == NULL)
1258 			ct = avl_nearest(&contract_avl, where, AVL_AFTER);
1259 		while (ct && !(ct->ct_state < CTS_ORPHAN &&
1260 		    contract_getzuniqid(ct) == zuniqid &&
1261 		    ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
1262 			ct = AVL_NEXT(&contract_avl, ct);
1263 		res = ct ? ct->ct_id : -1;
1264 		mutex_exit(&contract_lock);
1265 	} else {
1266 		mutex_enter(&p->p_lock);
1267 		ct = avl_find(&p->p_ct_held, &template, &where);
1268 		if (ct == NULL)
1269 			ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
1270 		res = ct ? ct->ct_id : -1;
1271 		mutex_exit(&p->p_lock);
1272 	}
1273 
1274 	return (res);
1275 }
1276 
1277 /*
1278  * contract_ptr_common
1279  *
1280  * Common code for contract_ptr and contract_type_ptr.  Takes a pointer
1281  * to an AVL tree to search in.  Should be called with the appropriate
1282  * tree-protecting lock held (unfortunately unassertable).
1283  */
1284 static contract_t *
1285 contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
1286 {
1287 	contract_t template, *ct;
1288 
1289 	template.ct_id = id;
1290 	ct = avl_find(tree, &template, NULL);
1291 	if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
1292 	    contract_getzuniqid(ct) != zuniqid)) {
1293 		return (NULL);
1294 	}
1295 
1296 	/*
1297 	 * Check to see if a thread is in the window in contract_rele
1298 	 * between dropping the reference count and removing the
1299 	 * contract from the type AVL.
1300 	 */
1301 	mutex_enter(&ct->ct_reflock);
1302 	if (ct->ct_ref) {
1303 		ct->ct_ref++;
1304 		mutex_exit(&ct->ct_reflock);
1305 	} else {
1306 		mutex_exit(&ct->ct_reflock);
1307 		ct = NULL;
1308 	}
1309 
1310 	return (ct);
1311 }
1312 
1313 /*
1314  * contract_type_ptr
1315  *
1316  * Returns a pointer to the contract with the specified id.  The
1317  * contract is held, so the caller needs to release the reference when
1318  * it is through with the contract.
1319  */
1320 contract_t *
1321 contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
1322 {
1323 	contract_t *ct;
1324 
1325 	mutex_enter(&type->ct_type_lock);
1326 	ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
1327 	mutex_exit(&type->ct_type_lock);
1328 
1329 	return (ct);
1330 }
1331 
1332 /*
1333  * contract_ptr
1334  *
1335  * Returns a pointer to the contract with the specified id.  The
1336  * contract is held, so the caller needs to release the reference when
1337  * it is through with the contract.
1338  */
1339 contract_t *
1340 contract_ptr(ctid_t id, uint64_t zuniqid)
1341 {
1342 	contract_t *ct;
1343 
1344 	mutex_enter(&contract_lock);
1345 	ct = contract_ptr_common(&contract_avl, id, zuniqid);
1346 	mutex_exit(&contract_lock);
1347 
1348 	return (ct);
1349 }
1350 
1351 /*
1352  * contract_type_time
1353  *
1354  * Obtains the last time a contract of a particular type was created.
1355  */
1356 void
1357 contract_type_time(ct_type_t *type, timestruc_t *time)
1358 {
1359 	mutex_enter(&type->ct_type_lock);
1360 	*time = type->ct_type_timestruc;
1361 	mutex_exit(&type->ct_type_lock);
1362 }
1363 
1364 /*
1365  * contract_type_bundle
1366  *
1367  * Obtains a type's bundle queue.
1368  */
1369 ct_equeue_t *
1370 contract_type_bundle(ct_type_t *type)
1371 {
1372 	return (&type->ct_type_events);
1373 }
1374 
1375 /*
1376  * contract_type_pbundle
1377  *
1378  * Obtain's a process's bundle queue.  If one doesn't exist, one is
1379  * created.  Often used simply to ensure that a bundle queue is
1380  * allocated.
1381  */
1382 ct_equeue_t *
1383 contract_type_pbundle(ct_type_t *type, proc_t *pp)
1384 {
1385 	/*
1386 	 * If there isn't an array of bundle queues, allocate one.
1387 	 */
1388 	if (pp->p_ct_equeue == NULL) {
1389 		size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
1390 		ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
1391 
1392 		mutex_enter(&pp->p_lock);
1393 		if (pp->p_ct_equeue)
1394 			kmem_free(qa, size);
1395 		else
1396 			pp->p_ct_equeue = qa;
1397 		mutex_exit(&pp->p_lock);
1398 	}
1399 
1400 	/*
1401 	 * If there isn't a bundle queue of the required type, allocate
1402 	 * one.
1403 	 */
1404 	if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
1405 		ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
1406 		cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
1407 
1408 		mutex_enter(&pp->p_lock);
1409 		if (pp->p_ct_equeue[type->ct_type_index])
1410 			cte_queue_drain(q, 0);
1411 		else
1412 			pp->p_ct_equeue[type->ct_type_index] = q;
1413 		mutex_exit(&pp->p_lock);
1414 	}
1415 
1416 	return (pp->p_ct_equeue[type->ct_type_index]);
1417 }
1418 
1419 /*
1420  * ctparam_copyin
1421  *
1422  * copyin a ct_param_t for CT_TSET or CT_TGET commands.
1423  * If ctparam_copyout() is not called after ctparam_copyin(), then
1424  * the caller must kmem_free() the buffer pointed by kparam->ctpm_kbuf.
1425  *
1426  * The copyin/out of ct_param_t is not done in ctmpl_set() and ctmpl_get()
1427  * because prctioctl() calls ctmpl_set() and ctmpl_get() while holding a
1428  * process lock.
1429  */
1430 int
1431 ctparam_copyin(const void *uaddr, ct_kparam_t *kparam, int flag, int cmd)
1432 {
1433 	uint32_t size;
1434 	void *ubuf;
1435 	ct_param_t *param = &kparam->param;
1436 	STRUCT_DECL(ct_param, uarg);
1437 
1438 	STRUCT_INIT(uarg, flag);
1439 	if (copyin(uaddr, STRUCT_BUF(uarg), STRUCT_SIZE(uarg)))
1440 		return (EFAULT);
1441 	size = STRUCT_FGET(uarg, ctpm_size);
1442 	ubuf = STRUCT_FGETP(uarg, ctpm_value);
1443 
1444 	if (size > CT_PARAM_MAX_SIZE || size == 0)
1445 		return (EINVAL);
1446 
1447 	kparam->ctpm_kbuf = kmem_alloc(size, KM_SLEEP);
1448 	if (cmd == CT_TSET) {
1449 		if (copyin(ubuf, kparam->ctpm_kbuf, size)) {
1450 			kmem_free(kparam->ctpm_kbuf, size);
1451 			return (EFAULT);
1452 		}
1453 	}
1454 	param->ctpm_id = STRUCT_FGET(uarg, ctpm_id);
1455 	param->ctpm_size = size;
1456 	param->ctpm_value = ubuf;
1457 	kparam->ret_size = 0;
1458 
1459 	return (0);
1460 }
1461 
1462 /*
1463  * ctparam_copyout
1464  *
1465  * copyout a ct_kparam_t and frees the buffer pointed by the member
1466  * ctpm_kbuf of ct_kparam_t
1467  */
1468 int
1469 ctparam_copyout(ct_kparam_t *kparam, void *uaddr, int flag)
1470 {
1471 	int r = 0;
1472 	ct_param_t *param = &kparam->param;
1473 	STRUCT_DECL(ct_param, uarg);
1474 
1475 	STRUCT_INIT(uarg, flag);
1476 
1477 	STRUCT_FSET(uarg, ctpm_id, param->ctpm_id);
1478 	STRUCT_FSET(uarg, ctpm_size, kparam->ret_size);
1479 	STRUCT_FSETP(uarg, ctpm_value, param->ctpm_value);
1480 	if (copyout(STRUCT_BUF(uarg), uaddr, STRUCT_SIZE(uarg))) {
1481 		r = EFAULT;
1482 		goto error;
1483 	}
1484 	if (copyout(kparam->ctpm_kbuf, param->ctpm_value,
1485 	    MIN(kparam->ret_size, param->ctpm_size))) {
1486 		r = EFAULT;
1487 	}
1488 
1489 error:
1490 	kmem_free(kparam->ctpm_kbuf, param->ctpm_size);
1491 
1492 	return (r);
1493 }
1494 
1495 /*
1496  * ctmpl_free
1497  *
1498  * Frees a template.
1499  */
1500 void
1501 ctmpl_free(ct_template_t *template)
1502 {
1503 	mutex_destroy(&template->ctmpl_lock);
1504 	template->ctmpl_ops->ctop_free(template);
1505 }
1506 
1507 /*
1508  * ctmpl_dup
1509  *
1510  * Creates a copy of a template.
1511  */
1512 ct_template_t *
1513 ctmpl_dup(ct_template_t *template)
1514 {
1515 	ct_template_t *new;
1516 
1517 	if (template == NULL)
1518 		return (NULL);
1519 
1520 	new = template->ctmpl_ops->ctop_dup(template);
1521 	/*
1522 	 * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
1523 	 * should have remain held until now.
1524 	 */
1525 	mutex_exit(&template->ctmpl_lock);
1526 
1527 	return (new);
1528 }
1529 
1530 /*
1531  * ctmpl_set
1532  *
1533  * Sets the requested terms of a template.
1534  */
1535 int
1536 ctmpl_set(ct_template_t *template, ct_kparam_t *kparam, const cred_t *cr)
1537 {
1538 	int result = 0;
1539 	ct_param_t *param = &kparam->param;
1540 	uint64_t param_value;
1541 
1542 	if (param->ctpm_id == CTP_COOKIE ||
1543 	    param->ctpm_id == CTP_EV_INFO ||
1544 	    param->ctpm_id == CTP_EV_CRITICAL) {
1545 		if (param->ctpm_size < sizeof (uint64_t)) {
1546 			return (EINVAL);
1547 		} else {
1548 			param_value = *(uint64_t *)kparam->ctpm_kbuf;
1549 		}
1550 	}
1551 
1552 	mutex_enter(&template->ctmpl_lock);
1553 	switch (param->ctpm_id) {
1554 	case CTP_COOKIE:
1555 		template->ctmpl_cookie = param_value;
1556 		break;
1557 	case CTP_EV_INFO:
1558 		if (param_value & ~(uint64_t)template->ctmpl_ops->allevents)
1559 			result = EINVAL;
1560 		else
1561 			template->ctmpl_ev_info = param_value;
1562 		break;
1563 	case CTP_EV_CRITICAL:
1564 		if (param_value & ~(uint64_t)template->ctmpl_ops->allevents) {
1565 			result = EINVAL;
1566 			break;
1567 		} else if ((~template->ctmpl_ev_crit & param_value) == 0) {
1568 			/*
1569 			 * Assume that a pure reduction of the critical
1570 			 * set is allowed by the contract type.
1571 			 */
1572 			template->ctmpl_ev_crit = param_value;
1573 			break;
1574 		}
1575 		/*
1576 		 * There may be restrictions on what we can make
1577 		 * critical, so we defer to the judgement of the
1578 		 * contract type.
1579 		 */
1580 		/* FALLTHROUGH */
1581 	default:
1582 		result = template->ctmpl_ops->ctop_set(template, kparam, cr);
1583 	}
1584 	mutex_exit(&template->ctmpl_lock);
1585 
1586 	return (result);
1587 }
1588 
1589 /*
1590  * ctmpl_get
1591  *
1592  * Obtains the requested terms from a template.
1593  *
1594  * If the term requested is a variable-sized term and the buffer
1595  * provided is too small for the data, we truncate the data and return
1596  * the buffer size necessary to fit the term in kparam->ret_size. If the
1597  * term requested is fix-sized (uint64_t) and the buffer provided is too
1598  * small, we return EINVAL.  This should never happen if you're using
1599  * libcontract(3LIB), only if you call ioctl with a hand constructed
1600  * ct_param_t argument.
1601  *
1602  * Currently, only contract specific parameters have variable-sized
1603  * parameters.
1604  */
1605 int
1606 ctmpl_get(ct_template_t *template, ct_kparam_t *kparam)
1607 {
1608 	int result = 0;
1609 	ct_param_t *param = &kparam->param;
1610 	uint64_t *param_value;
1611 
1612 	if (param->ctpm_id == CTP_COOKIE ||
1613 	    param->ctpm_id == CTP_EV_INFO ||
1614 	    param->ctpm_id == CTP_EV_CRITICAL) {
1615 		if (param->ctpm_size < sizeof (uint64_t)) {
1616 			return (EINVAL);
1617 		} else {
1618 			param_value = kparam->ctpm_kbuf;
1619 			kparam->ret_size = sizeof (uint64_t);
1620 		}
1621 	}
1622 
1623 	mutex_enter(&template->ctmpl_lock);
1624 	switch (param->ctpm_id) {
1625 	case CTP_COOKIE:
1626 		*param_value = template->ctmpl_cookie;
1627 		break;
1628 	case CTP_EV_INFO:
1629 		*param_value = template->ctmpl_ev_info;
1630 		break;
1631 	case CTP_EV_CRITICAL:
1632 		*param_value = template->ctmpl_ev_crit;
1633 		break;
1634 	default:
1635 		result = template->ctmpl_ops->ctop_get(template, kparam);
1636 	}
1637 	mutex_exit(&template->ctmpl_lock);
1638 
1639 	return (result);
1640 }
1641 
1642 /*
1643  * ctmpl_makecurrent
1644  *
1645  * Used by ctmpl_activate and ctmpl_clear to set the current thread's
1646  * active template.  Frees the old active template, if there was one.
1647  */
1648 static void
1649 ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
1650 {
1651 	klwp_t *curlwp = ttolwp(curthread);
1652 	proc_t *p = curproc;
1653 	ct_template_t *old;
1654 
1655 	mutex_enter(&p->p_lock);
1656 	old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
1657 	curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
1658 	mutex_exit(&p->p_lock);
1659 
1660 	if (old)
1661 		ctmpl_free(old);
1662 }
1663 
1664 /*
1665  * ctmpl_activate
1666  *
1667  * Copy the specified template as the current thread's activate
1668  * template of that type.
1669  */
1670 void
1671 ctmpl_activate(ct_template_t *template)
1672 {
1673 	ctmpl_makecurrent(template, ctmpl_dup(template));
1674 }
1675 
1676 /*
1677  * ctmpl_clear
1678  *
1679  * Clears the current thread's activate template of the same type as
1680  * the specified template.
1681  */
1682 void
1683 ctmpl_clear(ct_template_t *template)
1684 {
1685 	ctmpl_makecurrent(template, NULL);
1686 }
1687 
1688 /*
1689  * ctmpl_create
1690  *
1691  * Creates a new contract using the specified template.
1692  */
1693 int
1694 ctmpl_create(ct_template_t *template, ctid_t *ctidp)
1695 {
1696 	return (template->ctmpl_ops->ctop_create(template, ctidp));
1697 }
1698 
1699 /*
1700  * ctmpl_init
1701  *
1702  * Initializes the common portion of a new contract template.
1703  */
1704 void
1705 ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
1706 {
1707 	mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1708 	new->ctmpl_ops = ops;
1709 	new->ctmpl_type = type;
1710 	new->ctmpl_data = data;
1711 	new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
1712 	new->ctmpl_cookie = 0;
1713 }
1714 
1715 /*
1716  * ctmpl_copy
1717  *
1718  * Copies the common portions of a contract template.  Intended for use
1719  * by a contract type's ctop_dup template op.  Returns with the old
1720  * template's lock held, which will should remain held until the
1721  * template op returns (it is dropped by ctmpl_dup).
1722  */
1723 void
1724 ctmpl_copy(ct_template_t *new, ct_template_t *old)
1725 {
1726 	mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
1727 	mutex_enter(&old->ctmpl_lock);
1728 	new->ctmpl_ops = old->ctmpl_ops;
1729 	new->ctmpl_type = old->ctmpl_type;
1730 	new->ctmpl_ev_crit = old->ctmpl_ev_crit;
1731 	new->ctmpl_ev_info = old->ctmpl_ev_info;
1732 	new->ctmpl_cookie = old->ctmpl_cookie;
1733 }
1734 
1735 /*
1736  * ctmpl_create_inval
1737  *
1738  * Returns EINVAL.  Provided for the convenience of those contract
1739  * types which don't support ct_tmpl_create(3contract) and would
1740  * otherwise need to create their own stub for the ctop_create template
1741  * op.
1742  */
1743 /*ARGSUSED*/
1744 int
1745 ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
1746 {
1747 	return (EINVAL);
1748 }
1749 
1750 
1751 /*
1752  * cte_queue_create
1753  *
1754  * Initializes a queue of a particular type.  If dynamic is set, the
1755  * queue is to be freed when its last listener is removed after being
1756  * drained.
1757  */
1758 static void
1759 cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
1760 {
1761 	mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
1762 	q->ctq_listno = list;
1763 	list_create(&q->ctq_events, sizeof (ct_kevent_t),
1764 	    offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
1765 	list_create(&q->ctq_listeners, sizeof (ct_listener_t),
1766 	    offsetof(ct_listener_t, ctl_allnode));
1767 	list_create(&q->ctq_tail, sizeof (ct_listener_t),
1768 	    offsetof(ct_listener_t, ctl_tailnode));
1769 	gethrestime(&q->ctq_atime);
1770 	q->ctq_nlisteners = 0;
1771 	q->ctq_nreliable = 0;
1772 	q->ctq_ninf = 0;
1773 	q->ctq_max = maxinf;
1774 
1775 	/*
1776 	 * Bundle queues and contract queues are embedded in other
1777 	 * structures and are implicitly referenced counted by virtue
1778 	 * of their vnodes' indirect hold on their contracts.  Process
1779 	 * bundle queues are dynamically allocated and may persist
1780 	 * after the death of the process, so they must be explicitly
1781 	 * reference counted.
1782 	 */
1783 	q->ctq_flags = dynamic ? CTQ_REFFED : 0;
1784 }
1785 
1786 /*
1787  * cte_queue_destroy
1788  *
1789  * Destroys the specified queue.  The queue is freed if referenced
1790  * counted.
1791  */
1792 static void
1793 cte_queue_destroy(ct_equeue_t *q)
1794 {
1795 	ASSERT(q->ctq_flags & CTQ_DEAD);
1796 	ASSERT(q->ctq_nlisteners == 0);
1797 	ASSERT(q->ctq_nreliable == 0);
1798 	list_destroy(&q->ctq_events);
1799 	list_destroy(&q->ctq_listeners);
1800 	list_destroy(&q->ctq_tail);
1801 	mutex_destroy(&q->ctq_lock);
1802 	if (q->ctq_flags & CTQ_REFFED)
1803 		kmem_free(q, sizeof (ct_equeue_t));
1804 }
1805 
1806 /*
1807  * cte_hold
1808  *
1809  * Takes a hold on the specified event.
1810  */
1811 static void
1812 cte_hold(ct_kevent_t *e)
1813 {
1814 	mutex_enter(&e->cte_lock);
1815 	ASSERT(e->cte_refs > 0);
1816 	e->cte_refs++;
1817 	mutex_exit(&e->cte_lock);
1818 }
1819 
1820 /*
1821  * cte_rele
1822  *
1823  * Releases a hold on the specified event.  If the caller had the last
1824  * reference, frees the event and releases its hold on the contract
1825  * that generated it.
1826  */
1827 static void
1828 cte_rele(ct_kevent_t *e)
1829 {
1830 	mutex_enter(&e->cte_lock);
1831 	ASSERT(e->cte_refs > 0);
1832 	if (--e->cte_refs) {
1833 		mutex_exit(&e->cte_lock);
1834 		return;
1835 	}
1836 
1837 	contract_rele(e->cte_contract);
1838 
1839 	mutex_destroy(&e->cte_lock);
1840 	if (e->cte_data)
1841 		nvlist_free(e->cte_data);
1842 	if (e->cte_gdata)
1843 		nvlist_free(e->cte_gdata);
1844 	kmem_free(e, sizeof (ct_kevent_t));
1845 }
1846 
1847 /*
1848  * cte_qrele
1849  *
1850  * Remove this listener's hold on the specified event, removing and
1851  * releasing the queue's hold on the event if appropriate.
1852  */
1853 static void
1854 cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1855 {
1856 	ct_member_t *member = &e->cte_nodes[q->ctq_listno];
1857 
1858 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1859 
1860 	if (l->ctl_flags & CTLF_RELIABLE)
1861 		member->ctm_nreliable--;
1862 	if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
1863 		member->ctm_trimmed = 0;
1864 		list_remove(&q->ctq_events, e);
1865 		cte_rele(e);
1866 	}
1867 }
1868 
1869 /*
1870  * cte_qmove
1871  *
1872  * Move this listener to the specified event in the queue.
1873  */
1874 static ct_kevent_t *
1875 cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
1876 {
1877 	ct_kevent_t *olde;
1878 
1879 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1880 	ASSERT(l->ctl_equeue == q);
1881 
1882 	if ((olde = l->ctl_position) == NULL)
1883 		list_remove(&q->ctq_tail, l);
1884 
1885 	while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
1886 		e = list_next(&q->ctq_events, e);
1887 
1888 	if (e != NULL) {
1889 		e->cte_nodes[q->ctq_listno].ctm_refs++;
1890 		if (l->ctl_flags & CTLF_RELIABLE)
1891 			e->cte_nodes[q->ctq_listno].ctm_nreliable++;
1892 	} else {
1893 		list_insert_tail(&q->ctq_tail, l);
1894 	}
1895 
1896 	l->ctl_position = e;
1897 	if (olde)
1898 		cte_qrele(q, l, olde);
1899 
1900 	return (e);
1901 }
1902 
1903 /*
1904  * cte_checkcred
1905  *
1906  * Determines if the specified event's contract is owned by a process
1907  * with the same effective uid as the specified credential.  Called
1908  * after a failed call to contract_owned with locked set.  Because it
1909  * drops the queue lock, its caller (cte_qreadable) needs to make sure
1910  * we're still in the same place after we return.  Returns 1 on
1911  * success.
1912  */
1913 static int
1914 cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
1915 {
1916 	int result;
1917 	contract_t *ct = e->cte_contract;
1918 
1919 	cte_hold(e);
1920 	mutex_exit(&q->ctq_lock);
1921 	result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
1922 	    contract_checkcred(ct, cr);
1923 	mutex_enter(&q->ctq_lock);
1924 	cte_rele(e);
1925 
1926 	return (result);
1927 }
1928 
1929 /*
1930  * cte_qreadable
1931  *
1932  * Ensures that the listener is pointing to a valid event that the
1933  * caller has the credentials to read.  Returns 0 if we can read the
1934  * event we're pointing to.
1935  */
1936 static int
1937 cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
1938     uint64_t zuniqid, int crit)
1939 {
1940 	ct_kevent_t *e, *next;
1941 	contract_t *ct;
1942 
1943 	ASSERT(MUTEX_HELD(&q->ctq_lock));
1944 	ASSERT(l->ctl_equeue == q);
1945 
1946 	if (l->ctl_flags & CTLF_COPYOUT)
1947 		return (1);
1948 
1949 	next = l->ctl_position;
1950 	while (e = cte_qmove(q, l, next)) {
1951 		ct = e->cte_contract;
1952 		/*
1953 		 * Check obvious things first.  If we are looking for a
1954 		 * critical message, is this one?  If we aren't in the
1955 		 * global zone, is this message meant for us?
1956 		 */
1957 		if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
1958 		    (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
1959 		    zuniqid != contract_getzuniqid(ct))) {
1960 
1961 			next = list_next(&q->ctq_events, e);
1962 
1963 		/*
1964 		 * Next, see if our effective uid equals that of owner
1965 		 * or author of the contract.  Since we are holding the
1966 		 * queue lock, contract_owned can't always check if we
1967 		 * have the same effective uid as the contract's
1968 		 * owner.  If it comes to that, it fails and we take
1969 		 * the slow(er) path.
1970 		 */
1971 		} else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
1972 
1973 			/*
1974 			 * At this point we either don't have any claim
1975 			 * to this contract or we match the effective
1976 			 * uid of the owner but couldn't tell.  We
1977 			 * first test for a NULL holder so that events
1978 			 * from orphans and inherited contracts avoid
1979 			 * the penalty phase.
1980 			 */
1981 			if (e->cte_contract->ct_owner == NULL &&
1982 			    !secpolicy_contract_observer_choice(cr))
1983 				next = list_next(&q->ctq_events, e);
1984 
1985 			/*
1986 			 * cte_checkcred will juggle locks to see if we
1987 			 * have the same uid as the event's contract's
1988 			 * current owner.  If it succeeds, we have to
1989 			 * make sure we are in the same point in the
1990 			 * queue.
1991 			 */
1992 			else if (cte_checkcred(q, e, cr) &&
1993 			    l->ctl_position == e)
1994 				break;
1995 
1996 			/*
1997 			 * cte_checkcred failed; see if we're in the
1998 			 * same place.
1999 			 */
2000 			else if (l->ctl_position == e)
2001 				if (secpolicy_contract_observer_choice(cr))
2002 					break;
2003 				else
2004 					next = list_next(&q->ctq_events, e);
2005 
2006 			/*
2007 			 * cte_checkcred failed, and our position was
2008 			 * changed.  Start from there.
2009 			 */
2010 			else
2011 				next = l->ctl_position;
2012 		} else {
2013 			break;
2014 		}
2015 	}
2016 
2017 	/*
2018 	 * We check for CTLF_COPYOUT again in case we dropped the queue
2019 	 * lock in cte_checkcred.
2020 	 */
2021 	return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
2022 }
2023 
2024 /*
2025  * cte_qwakeup
2026  *
2027  * Wakes up any waiting listeners and points them at the specified event.
2028  */
2029 static void
2030 cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
2031 {
2032 	ct_listener_t *l;
2033 
2034 	ASSERT(MUTEX_HELD(&q->ctq_lock));
2035 
2036 	while (l = list_head(&q->ctq_tail)) {
2037 		list_remove(&q->ctq_tail, l);
2038 		e->cte_nodes[q->ctq_listno].ctm_refs++;
2039 		if (l->ctl_flags & CTLF_RELIABLE)
2040 			e->cte_nodes[q->ctq_listno].ctm_nreliable++;
2041 		l->ctl_position = e;
2042 		cv_signal(&l->ctl_cv);
2043 		pollwakeup(&l->ctl_pollhead, POLLIN);
2044 	}
2045 }
2046 
2047 /*
2048  * cte_copy
2049  *
2050  * Copies events from the specified contract event queue to the
2051  * end of the specified process bundle queue.  Only called from
2052  * contract_adopt.
2053  *
2054  * We copy to the end of the target queue instead of mixing the events
2055  * in their proper order because otherwise the act of adopting a
2056  * contract would require a process to reset all process bundle
2057  * listeners it needed to see the new events.  This would, in turn,
2058  * require the process to keep track of which preexisting events had
2059  * already been processed.
2060  */
2061 static void
2062 cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
2063 {
2064 	ct_kevent_t *e, *first = NULL;
2065 
2066 	VERIFY(q->ctq_listno == CTEL_CONTRACT);
2067 	VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
2068 
2069 	mutex_enter(&q->ctq_lock);
2070 	mutex_enter(&newq->ctq_lock);
2071 
2072 	/*
2073 	 * For now, only copy critical events.
2074 	 */
2075 	for (e = list_head(&q->ctq_events); e != NULL;
2076 	    e = list_next(&q->ctq_events, e)) {
2077 		if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
2078 			if (first == NULL)
2079 				first = e;
2080 			/*
2081 			 * It is possible for adoption to race with an owner's
2082 			 * cte_publish_all(); we must only enqueue events that
2083 			 * have not already been enqueued.
2084 			 */
2085 			if (!list_link_active((list_node_t *)
2086 			    ((uintptr_t)e + newq->ctq_events.list_offset))) {
2087 				list_insert_tail(&newq->ctq_events, e);
2088 				cte_hold(e);
2089 			}
2090 		}
2091 	}
2092 
2093 	mutex_exit(&q->ctq_lock);
2094 
2095 	if (first)
2096 		cte_qwakeup(newq, first);
2097 
2098 	mutex_exit(&newq->ctq_lock);
2099 }
2100 
2101 /*
2102  * cte_trim
2103  *
2104  * Trims unneeded events from an event queue.  Algorithm works as
2105  * follows:
2106  *
2107  *   Removes all informative and acknowledged critical events until the
2108  *   first referenced event is found.
2109  *
2110  *   If a contract is specified, removes all events (regardless of
2111  *   acknowledgement) generated by that contract until the first event
2112  *   referenced by a reliable listener is found.  Reference events are
2113  *   removed by marking them "trimmed".  Such events will be removed
2114  *   when the last reference is dropped and will be skipped by future
2115  *   listeners.
2116  *
2117  * This is pretty basic.  Ideally this should remove from the middle of
2118  * the list (i.e. beyond the first referenced event), and even
2119  * referenced events.
2120  */
2121 static void
2122 cte_trim(ct_equeue_t *q, contract_t *ct)
2123 {
2124 	ct_kevent_t *e, *next;
2125 	int flags, stopper;
2126 	int start = 1;
2127 
2128 	VERIFY(MUTEX_HELD(&q->ctq_lock));
2129 
2130 	for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2131 		next = list_next(&q->ctq_events, e);
2132 		flags = e->cte_flags;
2133 		stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
2134 		    (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
2135 		if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
2136 			if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
2137 			    (e->cte_contract == ct)) {
2138 				/*
2139 				 * Toss informative and ACKed critical messages.
2140 				 */
2141 				list_remove(&q->ctq_events, e);
2142 				cte_rele(e);
2143 			}
2144 		} else if ((e->cte_contract == ct) && !stopper) {
2145 			ASSERT(q->ctq_nlisteners != 0);
2146 			e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
2147 		} else if (ct && !stopper) {
2148 			start = 0;
2149 		} else {
2150 			/*
2151 			 * Don't free messages past the first reader.
2152 			 */
2153 			break;
2154 		}
2155 	}
2156 }
2157 
2158 /*
2159  * cte_queue_drain
2160  *
2161  * Drain all events from the specified queue, and mark it dead.  If
2162  * "ack" is set, acknowledge any critical events we find along the
2163  * way.
2164  */
2165 static void
2166 cte_queue_drain(ct_equeue_t *q, int ack)
2167 {
2168 	ct_kevent_t *e, *next;
2169 	ct_listener_t *l;
2170 
2171 	mutex_enter(&q->ctq_lock);
2172 
2173 	for (e = list_head(&q->ctq_events); e != NULL; e = next) {
2174 		next = list_next(&q->ctq_events, e);
2175 		if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
2176 			/*
2177 			 * Make sure critical messages are eventually
2178 			 * removed from the bundle queues.
2179 			 */
2180 			mutex_enter(&e->cte_lock);
2181 			e->cte_flags |= CTE_ACK;
2182 			mutex_exit(&e->cte_lock);
2183 			ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
2184 			e->cte_contract->ct_evcnt--;
2185 		}
2186 		list_remove(&q->ctq_events, e);
2187 		e->cte_nodes[q->ctq_listno].ctm_refs = 0;
2188 		e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
2189 		e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
2190 		cte_rele(e);
2191 	}
2192 
2193 	/*
2194 	 * This is necessary only because of CTEL_PBUNDLE listeners;
2195 	 * the events they point to can move from one pbundle to
2196 	 * another.  Fortunately, this only happens if the contract is
2197 	 * inherited, which (in turn) only happens if the process
2198 	 * exits, which means it's an all-or-nothing deal.  If this
2199 	 * wasn't the case, we would instead need to keep track of
2200 	 * listeners on a per-event basis, not just a per-queue basis.
2201 	 * This would have the side benefit of letting us clean up
2202 	 * trimmed events sooner (i.e. immediately), but would
2203 	 * unfortunately make events even bigger than they already
2204 	 * are.
2205 	 */
2206 	for (l = list_head(&q->ctq_listeners); l;
2207 	    l = list_next(&q->ctq_listeners, l)) {
2208 		l->ctl_flags |= CTLF_DEAD;
2209 		if (l->ctl_position) {
2210 			l->ctl_position = NULL;
2211 			list_insert_tail(&q->ctq_tail, l);
2212 		}
2213 		cv_broadcast(&l->ctl_cv);
2214 	}
2215 
2216 	/*
2217 	 * Disallow events.
2218 	 */
2219 	q->ctq_flags |= CTQ_DEAD;
2220 
2221 	/*
2222 	 * If we represent the last reference to a reference counted
2223 	 * process bundle queue, free it.
2224 	 */
2225 	if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
2226 		cte_queue_destroy(q);
2227 	else
2228 		mutex_exit(&q->ctq_lock);
2229 }
2230 
2231 /*
2232  * cte_publish
2233  *
2234  * Publishes an event to a specific queue.  Only called by
2235  * cte_publish_all.
2236  */
2237 static void
2238 cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
2239 {
2240 	ASSERT(MUTEX_HELD(&q->ctq_lock));
2241 
2242 	q->ctq_atime = *tsp;
2243 
2244 	/*
2245 	 * If this event may already exist on this queue, check to see if it
2246 	 * is already there and return if so.
2247 	 */
2248 	if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
2249 	    q->ctq_events.list_offset))) {
2250 		mutex_exit(&q->ctq_lock);
2251 		cte_rele(e);
2252 		return;
2253 	}
2254 
2255 	/*
2256 	 * Don't publish if the event is informative and there aren't
2257 	 * any listeners, or if the queue has been shut down.
2258 	 */
2259 	if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
2260 	    (q->ctq_flags & CTQ_DEAD)) {
2261 		mutex_exit(&q->ctq_lock);
2262 		cte_rele(e);
2263 		return;
2264 	}
2265 
2266 	/*
2267 	 * Enqueue event
2268 	 */
2269 	VERIFY(!list_link_active((list_node_t *)
2270 	    ((uintptr_t)e + q->ctq_events.list_offset)));
2271 	list_insert_tail(&q->ctq_events, e);
2272 
2273 	/*
2274 	 * Check for waiting listeners
2275 	 */
2276 	cte_qwakeup(q, e);
2277 
2278 	/*
2279 	 * Trim unnecessary events from the queue.
2280 	 */
2281 	cte_trim(q, NULL);
2282 	mutex_exit(&q->ctq_lock);
2283 }
2284 
2285 /*
2286  * cte_publish_all
2287  *
2288  * Publish an event to all necessary event queues.  The event, e, must
2289  * be zallocated by the caller, and the event's flags and type must be
2290  * set.  The rest of the event's fields are initialized here.
2291  */
2292 uint64_t
2293 cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
2294 {
2295 	ct_equeue_t *q;
2296 	timespec_t ts;
2297 	uint64_t evid;
2298 	ct_kevent_t *negev;
2299 	int negend;
2300 
2301 	e->cte_contract = ct;
2302 	e->cte_data = data;
2303 	e->cte_gdata = gdata;
2304 	e->cte_refs = 3;
2305 	evid = e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
2306 	contract_hold(ct);
2307 
2308 	/*
2309 	 * For a negotiation event we set the ct->ct_nevent field of the
2310 	 * contract for the duration of the negotiation
2311 	 */
2312 	negend = 0;
2313 	if (e->cte_flags & CTE_NEG) {
2314 		cte_hold(e);
2315 		ct->ct_nevent = e;
2316 	} else if (e->cte_type == CT_EV_NEGEND) {
2317 		negend = 1;
2318 	}
2319 
2320 	gethrestime(&ts);
2321 
2322 	/*
2323 	 * ct_evtlock simply (and only) ensures that two events sent
2324 	 * from the same contract are delivered to all queues in the
2325 	 * same order.
2326 	 */
2327 	mutex_enter(&ct->ct_evtlock);
2328 
2329 	/*
2330 	 * CTEL_CONTRACT - First deliver to the contract queue, acking
2331 	 * the event if the contract has been orphaned.
2332 	 */
2333 	mutex_enter(&ct->ct_lock);
2334 	mutex_enter(&ct->ct_events.ctq_lock);
2335 	if ((e->cte_flags & CTE_INFO) == 0) {
2336 		if (ct->ct_state >= CTS_ORPHAN)
2337 			e->cte_flags |= CTE_ACK;
2338 		else
2339 			ct->ct_evcnt++;
2340 	}
2341 	mutex_exit(&ct->ct_lock);
2342 	cte_publish(&ct->ct_events, e, &ts, B_FALSE);
2343 
2344 	/*
2345 	 * CTEL_BUNDLE - Next deliver to the contract type's bundle
2346 	 * queue.
2347 	 */
2348 	mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
2349 	cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
2350 
2351 	/*
2352 	 * CTEL_PBUNDLE - Finally, if the contract has an owner,
2353 	 * deliver to the owner's process bundle queue.
2354 	 */
2355 	mutex_enter(&ct->ct_lock);
2356 	if (ct->ct_owner) {
2357 		/*
2358 		 * proc_exit doesn't free event queues until it has
2359 		 * abandoned all contracts.
2360 		 */
2361 		ASSERT(ct->ct_owner->p_ct_equeue);
2362 		ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
2363 		q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
2364 		mutex_enter(&q->ctq_lock);
2365 		mutex_exit(&ct->ct_lock);
2366 
2367 		/*
2368 		 * It is possible for this code to race with adoption; we
2369 		 * publish the event indicating that the event may already
2370 		 * be enqueued because adoption beat us to it (in which case
2371 		 * cte_pubish() does nothing).
2372 		 */
2373 		cte_publish(q, e, &ts, B_TRUE);
2374 	} else {
2375 		mutex_exit(&ct->ct_lock);
2376 		cte_rele(e);
2377 	}
2378 
2379 	if (negend) {
2380 		mutex_enter(&ct->ct_lock);
2381 		negev = ct->ct_nevent;
2382 		ct->ct_nevent = NULL;
2383 		cte_rele(negev);
2384 		mutex_exit(&ct->ct_lock);
2385 	}
2386 
2387 	mutex_exit(&ct->ct_evtlock);
2388 
2389 	return (evid);
2390 }
2391 
2392 /*
2393  * cte_add_listener
2394  *
2395  * Add a new listener to an event queue.
2396  */
2397 void
2398 cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
2399 {
2400 	cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
2401 	l->ctl_equeue = q;
2402 	l->ctl_position = NULL;
2403 	l->ctl_flags = 0;
2404 
2405 	mutex_enter(&q->ctq_lock);
2406 	list_insert_head(&q->ctq_tail, l);
2407 	list_insert_head(&q->ctq_listeners, l);
2408 	q->ctq_nlisteners++;
2409 	mutex_exit(&q->ctq_lock);
2410 }
2411 
2412 /*
2413  * cte_remove_listener
2414  *
2415  * Remove a listener from an event queue.  No other queue activities
2416  * (e.g. cte_get event) may be in progress at this endpoint when this
2417  * is called.
2418  */
2419 void
2420 cte_remove_listener(ct_listener_t *l)
2421 {
2422 	ct_equeue_t *q = l->ctl_equeue;
2423 	ct_kevent_t *e;
2424 
2425 	mutex_enter(&q->ctq_lock);
2426 
2427 	ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
2428 
2429 	if ((e = l->ctl_position) != NULL)
2430 		cte_qrele(q, l, e);
2431 	else
2432 		list_remove(&q->ctq_tail, l);
2433 	l->ctl_position = NULL;
2434 
2435 	q->ctq_nlisteners--;
2436 	list_remove(&q->ctq_listeners, l);
2437 
2438 	if (l->ctl_flags & CTLF_RELIABLE)
2439 		q->ctq_nreliable--;
2440 
2441 	/*
2442 	 * If we are a the last listener of a dead reference counted
2443 	 * queue (i.e. a process bundle) we free it.  Otherwise we just
2444 	 * trim any events which may have been kept around for our
2445 	 * benefit.
2446 	 */
2447 	if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
2448 	    (q->ctq_nlisteners == 0)) {
2449 		cte_queue_destroy(q);
2450 	} else {
2451 		cte_trim(q, NULL);
2452 		mutex_exit(&q->ctq_lock);
2453 	}
2454 }
2455 
2456 /*
2457  * cte_reset_listener
2458  *
2459  * Moves a listener's queue pointer to the beginning of the queue.
2460  */
2461 void
2462 cte_reset_listener(ct_listener_t *l)
2463 {
2464 	ct_equeue_t *q = l->ctl_equeue;
2465 
2466 	mutex_enter(&q->ctq_lock);
2467 
2468 	/*
2469 	 * We allow an asynchronous reset because it doesn't make a
2470 	 * whole lot of sense to make reset block or fail.  We already
2471 	 * have most of the mechanism needed thanks to queue trimming,
2472 	 * so implementing it isn't a big deal.
2473 	 */
2474 	if (l->ctl_flags & CTLF_COPYOUT)
2475 		l->ctl_flags |= CTLF_RESET;
2476 
2477 	(void) cte_qmove(q, l, list_head(&q->ctq_events));
2478 
2479 	/*
2480 	 * Inform blocked readers.
2481 	 */
2482 	cv_broadcast(&l->ctl_cv);
2483 	pollwakeup(&l->ctl_pollhead, POLLIN);
2484 	mutex_exit(&q->ctq_lock);
2485 }
2486 
2487 /*
2488  * cte_next_event
2489  *
2490  * Moves the event pointer for the specified listener to the next event
2491  * on the queue.  To avoid races, this movement only occurs if the
2492  * specified event id matches that of the current event.  This is used
2493  * primarily to skip events that have been read but whose extended data
2494  * haven't been copied out.
2495  */
2496 int
2497 cte_next_event(ct_listener_t *l, uint64_t id)
2498 {
2499 	ct_equeue_t *q = l->ctl_equeue;
2500 	ct_kevent_t *old;
2501 
2502 	mutex_enter(&q->ctq_lock);
2503 
2504 	if (l->ctl_flags & CTLF_COPYOUT)
2505 		l->ctl_flags |= CTLF_RESET;
2506 
2507 	if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
2508 		(void) cte_qmove(q, l, list_next(&q->ctq_events, old));
2509 
2510 	mutex_exit(&q->ctq_lock);
2511 
2512 	return (0);
2513 }
2514 
2515 /*
2516  * cte_get_event
2517  *
2518  * Reads an event from an event endpoint.  If "nonblock" is clear, we
2519  * block until a suitable event is ready.  If "crit" is set, we only
2520  * read critical events.  Note that while "cr" is the caller's cred,
2521  * "zuniqid" is the unique id of the zone the calling contract
2522  * filesystem was mounted in.
2523  */
2524 int
2525 cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
2526     uint64_t zuniqid, int crit)
2527 {
2528 	ct_equeue_t *q = l->ctl_equeue;
2529 	ct_kevent_t *temp;
2530 	int result = 0;
2531 	int partial = 0;
2532 	size_t size, gsize, len;
2533 	model_t mdl = get_udatamodel();
2534 	STRUCT_DECL(ct_event, ev);
2535 	STRUCT_INIT(ev, mdl);
2536 
2537 	/*
2538 	 * cte_qreadable checks for CTLF_COPYOUT as well as ensures
2539 	 * that there exists, and we are pointing to, an appropriate
2540 	 * event.  It may temporarily drop ctq_lock, but that doesn't
2541 	 * really matter to us.
2542 	 */
2543 	mutex_enter(&q->ctq_lock);
2544 	while (cte_qreadable(q, l, cr, zuniqid, crit)) {
2545 		if (nonblock) {
2546 			result = EAGAIN;
2547 			goto error;
2548 		}
2549 		if (q->ctq_flags & CTQ_DEAD) {
2550 			result = EIDRM;
2551 			goto error;
2552 		}
2553 		result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
2554 		if (result == 0) {
2555 			result = EINTR;
2556 			goto error;
2557 		}
2558 	}
2559 	temp = l->ctl_position;
2560 	cte_hold(temp);
2561 	l->ctl_flags |= CTLF_COPYOUT;
2562 	mutex_exit(&q->ctq_lock);
2563 
2564 	/*
2565 	 * We now have an event.  Copy in the user event structure to
2566 	 * see how much space we have to work with.
2567 	 */
2568 	result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
2569 	if (result)
2570 		goto copyerr;
2571 
2572 	/*
2573 	 * Determine what data we have and what the user should be
2574 	 * allowed to see.
2575 	 */
2576 	size = gsize = 0;
2577 	if (temp->cte_data) {
2578 		VERIFY(nvlist_size(temp->cte_data, &size,
2579 		    NV_ENCODE_NATIVE) == 0);
2580 		ASSERT(size != 0);
2581 	}
2582 	if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
2583 		VERIFY(nvlist_size(temp->cte_gdata, &gsize,
2584 		    NV_ENCODE_NATIVE) == 0);
2585 		ASSERT(gsize != 0);
2586 	}
2587 
2588 	/*
2589 	 * If we have enough space, copy out the extended event data.
2590 	 */
2591 	len = size + gsize;
2592 	if (len) {
2593 		if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
2594 			char *buf = kmem_alloc(len, KM_SLEEP);
2595 
2596 			if (size)
2597 				VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
2598 				    NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2599 			if (gsize) {
2600 				char *tmp = buf + size;
2601 
2602 				VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
2603 				    &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
2604 			}
2605 
2606 			/* This shouldn't have changed */
2607 			ASSERT(size + gsize == len);
2608 			result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
2609 			    len);
2610 			kmem_free(buf, len);
2611 			if (result)
2612 				goto copyerr;
2613 		} else {
2614 			partial = 1;
2615 		}
2616 	}
2617 
2618 	/*
2619 	 * Copy out the common event data.
2620 	 */
2621 	STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
2622 	STRUCT_FSET(ev, ctev_evid, temp->cte_id);
2623 	STRUCT_FSET(ev, ctev_cttype,
2624 	    temp->cte_contract->ct_type->ct_type_index);
2625 	STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
2626 	    (CTE_ACK|CTE_INFO|CTE_NEG));
2627 	STRUCT_FSET(ev, ctev_type, temp->cte_type);
2628 	STRUCT_FSET(ev, ctev_nbytes, len);
2629 	STRUCT_FSET(ev, ctev_goffset, size);
2630 	result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
2631 
2632 copyerr:
2633 	/*
2634 	 * Only move our location in the queue if all copyouts were
2635 	 * successful, the caller provided enough space for the entire
2636 	 * event, and our endpoint wasn't reset or otherwise moved by
2637 	 * another thread.
2638 	 */
2639 	mutex_enter(&q->ctq_lock);
2640 	if (result)
2641 		result = EFAULT;
2642 	else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
2643 	    (l->ctl_position == temp))
2644 		(void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
2645 	l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
2646 	/*
2647 	 * Signal any readers blocked on our CTLF_COPYOUT.
2648 	 */
2649 	cv_signal(&l->ctl_cv);
2650 	cte_rele(temp);
2651 
2652 error:
2653 	mutex_exit(&q->ctq_lock);
2654 	return (result);
2655 }
2656 
2657 /*
2658  * cte_set_reliable
2659  *
2660  * Requests that events be reliably delivered to an event endpoint.
2661  * Unread informative and acknowledged critical events will not be
2662  * removed from the queue until this listener reads or skips them.
2663  * Because a listener could maliciously request reliable delivery and
2664  * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
2665  * caller's effective set.
2666  */
2667 int
2668 cte_set_reliable(ct_listener_t *l, const cred_t *cr)
2669 {
2670 	ct_equeue_t *q = l->ctl_equeue;
2671 	int error;
2672 
2673 	if ((error = secpolicy_contract_event(cr)) != 0)
2674 		return (error);
2675 
2676 	mutex_enter(&q->ctq_lock);
2677 	if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
2678 		l->ctl_flags |= CTLF_RELIABLE;
2679 		q->ctq_nreliable++;
2680 		if (l->ctl_position != NULL)
2681 			l->ctl_position->cte_nodes[q->ctq_listno].
2682 			    ctm_nreliable++;
2683 	}
2684 	mutex_exit(&q->ctq_lock);
2685 
2686 	return (0);
2687 }
2688