xref: /titanic_50/usr/src/uts/common/contract/device.c (revision 25e8c5aa2b496d9026e958ac731a610167574f59)
1*25e8c5aaSvikram /*
2*25e8c5aaSvikram  * CDDL HEADER START
3*25e8c5aaSvikram  *
4*25e8c5aaSvikram  * The contents of this file are subject to the terms of the
5*25e8c5aaSvikram  * Common Development and Distribution License (the "License").
6*25e8c5aaSvikram  * You may not use this file except in compliance with the License.
7*25e8c5aaSvikram  *
8*25e8c5aaSvikram  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25e8c5aaSvikram  * or http://www.opensolaris.org/os/licensing.
10*25e8c5aaSvikram  * See the License for the specific language governing permissions
11*25e8c5aaSvikram  * and limitations under the License.
12*25e8c5aaSvikram  *
13*25e8c5aaSvikram  * When distributing Covered Code, include this CDDL HEADER in each
14*25e8c5aaSvikram  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25e8c5aaSvikram  * If applicable, add the following below this CDDL HEADER, with the
16*25e8c5aaSvikram  * fields enclosed by brackets "[]" replaced with your own identifying
17*25e8c5aaSvikram  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25e8c5aaSvikram  *
19*25e8c5aaSvikram  * CDDL HEADER END
20*25e8c5aaSvikram  */
21*25e8c5aaSvikram /*
22*25e8c5aaSvikram  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*25e8c5aaSvikram  * Use is subject to license terms.
24*25e8c5aaSvikram  */
25*25e8c5aaSvikram 
26*25e8c5aaSvikram #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*25e8c5aaSvikram 
28*25e8c5aaSvikram #include <sys/mutex.h>
29*25e8c5aaSvikram #include <sys/debug.h>
30*25e8c5aaSvikram #include <sys/types.h>
31*25e8c5aaSvikram #include <sys/param.h>
32*25e8c5aaSvikram #include <sys/kmem.h>
33*25e8c5aaSvikram #include <sys/thread.h>
34*25e8c5aaSvikram #include <sys/id_space.h>
35*25e8c5aaSvikram #include <sys/avl.h>
36*25e8c5aaSvikram #include <sys/list.h>
37*25e8c5aaSvikram #include <sys/sysmacros.h>
38*25e8c5aaSvikram #include <sys/proc.h>
39*25e8c5aaSvikram #include <sys/contract.h>
40*25e8c5aaSvikram #include <sys/contract_impl.h>
41*25e8c5aaSvikram #include <sys/contract/device.h>
42*25e8c5aaSvikram #include <sys/contract/device_impl.h>
43*25e8c5aaSvikram #include <sys/cmn_err.h>
44*25e8c5aaSvikram #include <sys/nvpair.h>
45*25e8c5aaSvikram #include <sys/policy.h>
46*25e8c5aaSvikram #include <sys/ddi_impldefs.h>
47*25e8c5aaSvikram #include <sys/ddi_implfuncs.h>
48*25e8c5aaSvikram #include <sys/systm.h>
49*25e8c5aaSvikram #include <sys/stat.h>
50*25e8c5aaSvikram #include <sys/sunddi.h>
51*25e8c5aaSvikram #include <sys/esunddi.h>
52*25e8c5aaSvikram #include <sys/ddi.h>
53*25e8c5aaSvikram #include <sys/fs/dv_node.h>
54*25e8c5aaSvikram #include <sys/sunndi.h>
55*25e8c5aaSvikram #undef ct_lock	/* needed because clnt.h defines ct_lock as a macro */
56*25e8c5aaSvikram 
57*25e8c5aaSvikram /*
58*25e8c5aaSvikram  * Device Contracts
59*25e8c5aaSvikram  * -----------------
60*25e8c5aaSvikram  * This file contains the core code for the device contracts framework.
61*25e8c5aaSvikram  * A device contract is an agreement or a contract between a process and
62*25e8c5aaSvikram  * the kernel regarding the state of the device. A device contract may be
63*25e8c5aaSvikram  * created when a relationship is formed between a device and a process
64*25e8c5aaSvikram  * i.e. at open(2) time, or it may be created at some point after the device
65*25e8c5aaSvikram  * has been opened. A device contract once formed may be broken by either party.
66*25e8c5aaSvikram  * A device contract can be broken by the process by an explicit abandon of the
67*25e8c5aaSvikram  * contract or by an implicit abandon when the process exits. A device contract
68*25e8c5aaSvikram  * can be broken by the kernel either asynchronously (without negotiation) or
69*25e8c5aaSvikram  * synchronously (with negotiation). Exactly which happens depends on the device
70*25e8c5aaSvikram  * state transition. The following state diagram shows the transitions between
71*25e8c5aaSvikram  * device states. Only device state transitions currently supported by device
72*25e8c5aaSvikram  * contracts is shown.
73*25e8c5aaSvikram  *
74*25e8c5aaSvikram  *                              <-- A -->
75*25e8c5aaSvikram  *                       /-----------------> DEGRADED
76*25e8c5aaSvikram  *                       |                      |
77*25e8c5aaSvikram  *                       |                      |
78*25e8c5aaSvikram  *                       |                      | S
79*25e8c5aaSvikram  *                       |                      | |
80*25e8c5aaSvikram  *                       |                      | v
81*25e8c5aaSvikram  *                       v       S -->          v
82*25e8c5aaSvikram  *                      ONLINE ------------> OFFLINE
83*25e8c5aaSvikram  *
84*25e8c5aaSvikram  *
85*25e8c5aaSvikram  * In the figure above, the arrows indicate the direction of transition. The
86*25e8c5aaSvikram  * letter S refers to transitions which are inherently synchronous i.e.
87*25e8c5aaSvikram  * require negotiation and the letter A indicates transitions which are
88*25e8c5aaSvikram  * asynchronous i.e. are done without contract negotiations. A good example
89*25e8c5aaSvikram  * of a synchronous transition is the ONLINE -> OFFLINE transition. This
90*25e8c5aaSvikram  * transition cannot happen as long as there are consumers which have the
91*25e8c5aaSvikram  * device open. Thus some form of negotiation needs to happen between the
92*25e8c5aaSvikram  * consumers and the kernel to ensure that consumers either close devices
93*25e8c5aaSvikram  * or disallow the move to OFFLINE. Certain other transitions such as
94*25e8c5aaSvikram  * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
95*25e8c5aaSvikram  * non-negotiable. A device that suffers a fault that degrades its
96*25e8c5aaSvikram  * capabilities will become degraded irrespective of what consumers it has,
97*25e8c5aaSvikram  * so a negotiation in this case is pointless.
98*25e8c5aaSvikram  *
99*25e8c5aaSvikram  * The following device states are currently defined for device contracts:
100*25e8c5aaSvikram  *
101*25e8c5aaSvikram  *      CT_DEV_EV_ONLINE
102*25e8c5aaSvikram  *              The device is online and functioning normally
103*25e8c5aaSvikram  *      CT_DEV_EV_DEGRADED
104*25e8c5aaSvikram  *              The device is online but is functioning in a degraded capacity
105*25e8c5aaSvikram  *      CT_DEV_EV_OFFLINE
106*25e8c5aaSvikram  *              The device is offline and is no longer configured
107*25e8c5aaSvikram  *
108*25e8c5aaSvikram  * A typical consumer of device contracts starts out with a contract
109*25e8c5aaSvikram  * template and adds terms to that template. These include the
110*25e8c5aaSvikram  * "acceptable set" (A-set) term, which is a bitset of device states which
111*25e8c5aaSvikram  * are guaranteed by the contract. If the device moves out of a state in
112*25e8c5aaSvikram  * the A-set, the contract is broken. The breaking of the contract can
113*25e8c5aaSvikram  * be asynchronous in which case a critical contract event is sent to the
114*25e8c5aaSvikram  * contract holder but no negotiations take place. If the breaking of the
115*25e8c5aaSvikram  * contract is synchronous, negotations are opened between the affected
116*25e8c5aaSvikram  * consumer and the kernel. The kernel does this by sending a critical
117*25e8c5aaSvikram  * event to the consumer with the CTE_NEG flag set indicating that this
118*25e8c5aaSvikram  * is a negotiation event. The consumer can accept this change by sending
119*25e8c5aaSvikram  * a ACK message to the kernel. Alternatively, if it has the necessary
120*25e8c5aaSvikram  * privileges, it can send a NACK message to the kernel which will block
121*25e8c5aaSvikram  * the device state change. To NACK a negotiable event, a process must
122*25e8c5aaSvikram  * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
123*25e8c5aaSvikram  *
124*25e8c5aaSvikram  * Other terms include the "minor path" term, specified explicitly if the
125*25e8c5aaSvikram  * contract is not being created at open(2) time or specified implicitly
126*25e8c5aaSvikram  * if the contract is being created at open time via an activated template.
127*25e8c5aaSvikram  *
128*25e8c5aaSvikram  * A contract event is sent on any state change to which the contract
129*25e8c5aaSvikram  * owner has subscribed via the informative or critical event sets. Only
130*25e8c5aaSvikram  * critical events are guaranteed to be delivered. Since all device state
131*25e8c5aaSvikram  * changes are controlled by the kernel and cannot be arbitrarily generated
132*25e8c5aaSvikram  * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
133*25e8c5aaSvikram  * need to be asserted in a process's effective set to designate an event as
134*25e8c5aaSvikram  * critical. To ensure privacy, a process must either have the same effective
135*25e8c5aaSvikram  * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
136*25e8c5aaSvikram  * asserted in its effective set in order to observe device contract events
137*25e8c5aaSvikram  * off the device contract type specific endpoint.
138*25e8c5aaSvikram  *
139*25e8c5aaSvikram  * Yet another term available with device contracts is the "non-negotiable"
140*25e8c5aaSvikram  * term. This term is used to pre-specify a NACK to any contract negotiation.
141*25e8c5aaSvikram  * This term is ignored for asynchronous state changes. For example, a
142*25e8c5aaSvikram  * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
143*25e8c5aaSvikram  * non-negotiable. In this case, the device contract framework assumes a
144*25e8c5aaSvikram  * NACK for any transition to OFFLINE and blocks the offline. If the A-set
145*25e8c5aaSvikram  * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
146*25e8c5aaSvikram  * are NACKed but transitions to DEGRADE succeed.
147*25e8c5aaSvikram  *
148*25e8c5aaSvikram  * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
149*25e8c5aaSvikram  * happens just before the I/O framework attempts to offline a device
150*25e8c5aaSvikram  * (i.e. detach a device and set the offline flag so that it cannot be
151*25e8c5aaSvikram  * reattached). A device contract holder is expected to either NACK the offline
152*25e8c5aaSvikram  * (if privileged) or release the device and allow the offline to proceed.
153*25e8c5aaSvikram  *
154*25e8c5aaSvikram  * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
155*25e8c5aaSvikram  * is generated just before the I/O framework transitions the device state
156*25e8c5aaSvikram  * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
157*25e8c5aaSvikram  *
158*25e8c5aaSvikram  * The contract holder is expected to ACK or NACK a negotiation event
159*25e8c5aaSvikram  * within a certain period of time. If the ACK/NACK is not received
160*25e8c5aaSvikram  * within the timeout period, the device contract framework will behave
161*25e8c5aaSvikram  * as if the contract does not exist and will proceed with the event.
162*25e8c5aaSvikram  *
163*25e8c5aaSvikram  * Unlike a process contract a device contract does not need to exist
164*25e8c5aaSvikram  * once it is abandoned, since it does not define a fault boundary. It
165*25e8c5aaSvikram  * merely represents an agreement between a process and the kernel
166*25e8c5aaSvikram  * regarding the state of the device. Once the process has abandoned
167*25e8c5aaSvikram  * the contract (either implicitly via a process exit or explicitly)
168*25e8c5aaSvikram  * the kernel has no reason to retain the contract. As a result
169*25e8c5aaSvikram  * device contracts are neither inheritable nor need to exist in an
170*25e8c5aaSvikram  * orphan state.
171*25e8c5aaSvikram  *
172*25e8c5aaSvikram  * A device unlike a process may exist in multiple contracts and has
173*25e8c5aaSvikram  * a "life" outside a device contract. A device unlike a process
174*25e8c5aaSvikram  * may exist without an associated contract. Unlike a process contract
175*25e8c5aaSvikram  * a device contract may be formed after a binding relationship is
176*25e8c5aaSvikram  * formed between a process and a device.
177*25e8c5aaSvikram  *
178*25e8c5aaSvikram  *	IMPLEMENTATION NOTES
179*25e8c5aaSvikram  *	====================
180*25e8c5aaSvikram  * DATA STRUCTURES
181*25e8c5aaSvikram  * ----------------
182*25e8c5aaSvikram  * 	The heart of the device contracts implementation is the device contract
183*25e8c5aaSvikram  * 	private cont_device_t (or ctd for short) data structure. It encapsulates
184*25e8c5aaSvikram  * 	the generic contract_t data structure and has a number of private
185*25e8c5aaSvikram  *	fields.
186*25e8c5aaSvikram  * 	These include:
187*25e8c5aaSvikram  *		cond_minor: The minor device that is the subject of the contract
188*25e8c5aaSvikram  *		cond_aset:  The bitset of states which are guaranteed by the
189*25e8c5aaSvikram  *			   contract
190*25e8c5aaSvikram  *		cond_noneg: If set, indicates that the result of negotiation has
191*25e8c5aaSvikram  *			    been predefined to be a NACK
192*25e8c5aaSvikram  * 	In addition, there are other device identifiers such the devinfo node,
193*25e8c5aaSvikram  * 	dev_t and spec_type of the minor node. There are also a few fields that
194*25e8c5aaSvikram  * 	are used during negotiation to maintain state. See
195*25e8c5aaSvikram  *		uts/common/sys/contract/device_impl.h
196*25e8c5aaSvikram  * 	for details.
197*25e8c5aaSvikram  * 	The ctd structure represents the device private part of a contract of
198*25e8c5aaSvikram  * 	type "device"
199*25e8c5aaSvikram  *
200*25e8c5aaSvikram  * 	Another data structure used by device contracts is ctmpl_device. It is
201*25e8c5aaSvikram  * 	the device contracts private part of the contract template structure. It
202*25e8c5aaSvikram  *	encapsulates the generic template structure "ct_template_t" and includes
203*25e8c5aaSvikram  *	the following device contract specific fields
204*25e8c5aaSvikram  *		ctd_aset:   The bitset of states that should be guaranteed by a
205*25e8c5aaSvikram  *			    contract
206*25e8c5aaSvikram  *		ctd_noneg:  If set, indicates that contract should NACK a
207*25e8c5aaSvikram  *			    negotiation
208*25e8c5aaSvikram  *		ctd_minor:  The devfs_path (without the /devices prefix) of the
209*25e8c5aaSvikram  *			    minor node that is the subject of the contract.
210*25e8c5aaSvikram  *
211*25e8c5aaSvikram  * ALGORITHMS
212*25e8c5aaSvikram  * ---------
213*25e8c5aaSvikram  * There are three sets of routines in this file
214*25e8c5aaSvikram  * 	Template related routines
215*25e8c5aaSvikram  * 	-------------------------
216*25e8c5aaSvikram  *	These routines provide support for template related operations initated
217*25e8c5aaSvikram  *	via the generic template operations. These include routines that dup
218*25e8c5aaSvikram  *	a template, free it, and set various terms in the template
219*25e8c5aaSvikram  *	(such as the minor node path, the acceptable state set (or A-set)
220*25e8c5aaSvikram  *	and the non-negotiable term) as well as a routine to query the
221*25e8c5aaSvikram  *	device specific portion of the template for the abovementioned terms.
222*25e8c5aaSvikram  *	There is also a routine to create (ctmpl_device_create) that is used to
223*25e8c5aaSvikram  *	create a contract from a template. This routine calls (after initial
224*25e8c5aaSvikram  *	setup) the common function used to create a device contract
225*25e8c5aaSvikram  *	(contract_device_create).
226*25e8c5aaSvikram  *
227*25e8c5aaSvikram  *	core device contract implementation
228*25e8c5aaSvikram  *	----------------------------------
229*25e8c5aaSvikram  *	These routines support the generic contract framework to provide
230*25e8c5aaSvikram  *	functionality that allows contracts to be created, managed and
231*25e8c5aaSvikram  *	destroyed. The contract_device_create() routine is a routine used
232*25e8c5aaSvikram  *	to create a contract from a template (either via an explicit create
233*25e8c5aaSvikram  *	operation on a template or implicitly via an open with an
234*25e8c5aaSvikram  *	activated template.). The contract_device_free() routine assists
235*25e8c5aaSvikram  *	in freeing the device contract specific parts. There are routines
236*25e8c5aaSvikram  *	used to abandon (contract_device_abandon) a device contract as well
237*25e8c5aaSvikram  *	as a routine to destroy (which despite its name does not destroy,
238*25e8c5aaSvikram  *	it only moves a contract to a dead state) a contract.
239*25e8c5aaSvikram  *	There is also a routine to return status information about a
240*25e8c5aaSvikram  *	contract - the level of detail depends on what is requested by the
241*25e8c5aaSvikram  *	user. A value of CTD_FIXED only returns fixed length fields such
242*25e8c5aaSvikram  *	as the A-set, state of device and value of the "noneg" term. If
243*25e8c5aaSvikram  *	CTD_ALL is specified, the minor node path is returned as well.
244*25e8c5aaSvikram  *
245*25e8c5aaSvikram  *	In addition there are interfaces (contract_device_ack/nack) which
246*25e8c5aaSvikram  *	are used to support negotiation between userland processes and
247*25e8c5aaSvikram  *	device contracts. These interfaces record the acknowledgement
248*25e8c5aaSvikram  *	or lack thereof for negotiation events and help determine if the
249*25e8c5aaSvikram  *	negotiated event should occur.
250*25e8c5aaSvikram  *
251*25e8c5aaSvikram  *	"backend routines"
252*25e8c5aaSvikram  *	-----------------
253*25e8c5aaSvikram  *	The backend routines form the interface between the I/O framework
254*25e8c5aaSvikram  *	and the device contract subsystem. These routines, allow the I/O
255*25e8c5aaSvikram  *	framework to call into the device contract subsystem to notify it of
256*25e8c5aaSvikram  *	impending changes to a device state as well as to inform of the
257*25e8c5aaSvikram  *	final disposition of such attempted state changes. Routines in this
258*25e8c5aaSvikram  *	class include contract_device_offline() that indicates an attempt to
259*25e8c5aaSvikram  *	offline a device, contract_device_degrade() that indicates that
260*25e8c5aaSvikram  *	a device is moving to the degraded state and contract_device_negend()
261*25e8c5aaSvikram  *	that is used by the I/O framework to inform the contracts subsystem of
262*25e8c5aaSvikram  *	the final disposition of an attempted operation.
263*25e8c5aaSvikram  *
264*25e8c5aaSvikram  *	SUMMARY
265*25e8c5aaSvikram  *	-------
266*25e8c5aaSvikram  *      A contract starts its life as a template. A process allocates a device
267*25e8c5aaSvikram  *	contract template and sets various terms:
268*25e8c5aaSvikram  *		The A-set
269*25e8c5aaSvikram  *		The device minor node
270*25e8c5aaSvikram  *		Critical and informative events
271*25e8c5aaSvikram  *		The noneg i.e. no negotition term
272*25e8c5aaSvikram  *	Setting of these terms in the template is done via the
273*25e8c5aaSvikram  *	ctmpl_device_set() entry point in this file. A process can query a
274*25e8c5aaSvikram  *	template to determine the terms already set in the template - this is
275*25e8c5aaSvikram  *	facilitated by the ctmpl_device_get() routine.
276*25e8c5aaSvikram  *
277*25e8c5aaSvikram  *	Once all the appropriate terms are set, the contract is instantiated via
278*25e8c5aaSvikram  *	one of two methods
279*25e8c5aaSvikram  *	- via an explicit create operation - this is facilitated by the
280*25e8c5aaSvikram  *	  ctmpl_device_create() entry point
281*25e8c5aaSvikram  *	- synchronously with the open(2) system call - this is achieved via the
282*25e8c5aaSvikram  *	  contract_device_open() routine.
283*25e8c5aaSvikram  *	The core work for both these above functions is done by
284*25e8c5aaSvikram  *	contract_device_create()
285*25e8c5aaSvikram  *
286*25e8c5aaSvikram  *	A contract once created can be queried for its status. Support for
287*25e8c5aaSvikram  *	status info is provided by both the common contracts framework and by
288*25e8c5aaSvikram  *	the "device" contract type. If the level of detail requested is
289*25e8c5aaSvikram  *	CTD_COMMON, only the common contract framework data is used. Higher
290*25e8c5aaSvikram  *	levels of detail result in calls to contract_device_status() to supply
291*25e8c5aaSvikram  *	device contract type specific status information.
292*25e8c5aaSvikram  *
293*25e8c5aaSvikram  *	A contract once created may be abandoned either explicitly or implictly.
294*25e8c5aaSvikram  *	In either case, the contract_device_abandon() function is invoked. This
295*25e8c5aaSvikram  * 	function merely calls contract_destroy() which moves the contract to
296*25e8c5aaSvikram  *	the DEAD state. The device contract portion of destroy processing is
297*25e8c5aaSvikram  *	provided by contract_device_destroy() which merely disassociates the
298*25e8c5aaSvikram  *	contract from its device devinfo node. A contract in the DEAD state is
299*25e8c5aaSvikram  *	not freed. It hanbgs around until all references to the contract are
300*25e8c5aaSvikram  *	gone. When that happens, the contract is finally deallocated. The
301*25e8c5aaSvikram  *	device contract specific portion of the free is done by
302*25e8c5aaSvikram  *	contract_device_free() which finally frees the device contract specific
303*25e8c5aaSvikram  *	data structure (cont_device_t).
304*25e8c5aaSvikram  *
305*25e8c5aaSvikram  *	When a device undergoes a state change, the I/O framework calls the
306*25e8c5aaSvikram  *	corresponding device contract entry point. For example, when a device
307*25e8c5aaSvikram  *	is about to go OFFLINE, the routine contract_device_offline() is
308*25e8c5aaSvikram  *	invoked. Similarly if a device moves to DEGRADED state, the routine
309*25e8c5aaSvikram  *	contract_device_degrade() function is called. These functions call the
310*25e8c5aaSvikram  *	core routine contract_device_publish(). This function determines via
311*25e8c5aaSvikram  *	the function is_sync_neg() whether an event is a synchronous (i.e.
312*25e8c5aaSvikram  *	negotiable) event or not. In the former case contract_device_publish()
313*25e8c5aaSvikram  *	publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
314*25e8c5aaSvikram  *	and/or NACKs from contract holders. In the latter case, it simply
315*25e8c5aaSvikram  *	publishes the event and does not wait. In the negotiation case, ACKs or
316*25e8c5aaSvikram  *	NACKs from userland consumers results in contract_device_ack_nack()
317*25e8c5aaSvikram  *	being called where the result of the negotiation is recorded in the
318*25e8c5aaSvikram  *	contract data structure. Once all outstanding contract owners have
319*25e8c5aaSvikram  *	responded, the device contract code in wait_for_acks() determines the
320*25e8c5aaSvikram  *	final result of the negotiation. A single NACK overrides all other ACKs
321*25e8c5aaSvikram  *	If there is no NACK, then a single ACK will result in an overall ACK
322*25e8c5aaSvikram  *	result. If there are no ACKs or NACKs, then the result CT_NONE is
323*25e8c5aaSvikram  *	returned back to the I/O framework. Once the event is permitted or
324*25e8c5aaSvikram  *	blocked, the I/O framework proceeds or aborts the state change. The
325*25e8c5aaSvikram  *	I/O framework then calls contract_device_negend() with a result code
326*25e8c5aaSvikram  *	indicating final disposition of the event. This call releases the
327*25e8c5aaSvikram  *	barrier and other state associated with the previous negotiation,
328*25e8c5aaSvikram  *	which permits the next event (if any) to come into the device contract
329*25e8c5aaSvikram  *	framework.
330*25e8c5aaSvikram  *
331*25e8c5aaSvikram  *	Finally, a device that has outstanding contracts may be removed from
332*25e8c5aaSvikram  *	the system which results in its devinfo node being freed. The devinfo
333*25e8c5aaSvikram  *	free routine in the I/O framework, calls into the device contract
334*25e8c5aaSvikram  *	function - contract_device_remove_dip(). This routine, disassociates
335*25e8c5aaSvikram  *	the dip from all contracts associated with the contract being freed,
336*25e8c5aaSvikram  *	allowing the devinfo node to be freed.
337*25e8c5aaSvikram  *
338*25e8c5aaSvikram  * LOCKING
339*25e8c5aaSvikram  * ---------
340*25e8c5aaSvikram  * 	There are four sets of data that need to be protected by locks
341*25e8c5aaSvikram  *
342*25e8c5aaSvikram  *	i) device contract specific portion of the contract template - This data
343*25e8c5aaSvikram  *	is protected by the template lock ctmpl_lock.
344*25e8c5aaSvikram  *
345*25e8c5aaSvikram  *	ii) device contract specific portion of the contract - This data is
346*25e8c5aaSvikram  *	protected by the contract lock ct_lock
347*25e8c5aaSvikram  *
348*25e8c5aaSvikram  *	iii) The linked list of contracts hanging off a devinfo node - This
349*25e8c5aaSvikram  *	list is protected by the per-devinfo node lock devi_ct_lock
350*25e8c5aaSvikram  *
351*25e8c5aaSvikram  *	iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
352*25e8c5aaSvikram  *	and devi_ct_count that controls state changes to a dip
353*25e8c5aaSvikram  *
354*25e8c5aaSvikram  *	The template lock is independent in that none of the other locks in this
355*25e8c5aaSvikram  *	file may be taken while holding the template lock (and vice versa).
356*25e8c5aaSvikram  *
357*25e8c5aaSvikram  *	The remaining three locks have the following lock order
358*25e8c5aaSvikram  *
359*25e8c5aaSvikram  *	devi_ct_lock  -> ct_count barrier ->  ct_lock
360*25e8c5aaSvikram  *
361*25e8c5aaSvikram  */
362*25e8c5aaSvikram 
363*25e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
364*25e8c5aaSvikram     int spec_type, proc_t *owner, int *errorp);
365*25e8c5aaSvikram 
366*25e8c5aaSvikram /* barrier routines */
367*25e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip);
368*25e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip);
369*25e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip);
370*25e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip);
371*25e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip);
372*25e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
373*25e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip);
374*25e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip);
375*25e8c5aaSvikram 
376*25e8c5aaSvikram ct_type_t *device_type;
377*25e8c5aaSvikram 
378*25e8c5aaSvikram /*
379*25e8c5aaSvikram  * Macro predicates for determining when events should be sent and how.
380*25e8c5aaSvikram  */
381*25e8c5aaSvikram #define	EVSENDP(ctd, flag) \
382*25e8c5aaSvikram 	((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
383*25e8c5aaSvikram 
384*25e8c5aaSvikram #define	EVINFOP(ctd, flag) \
385*25e8c5aaSvikram 	((ctd->cond_contract.ct_ev_crit & flag) == 0)
386*25e8c5aaSvikram 
387*25e8c5aaSvikram /*
388*25e8c5aaSvikram  * State transition table showing which transitions are synchronous and which
389*25e8c5aaSvikram  * are not.
390*25e8c5aaSvikram  */
391*25e8c5aaSvikram struct ct_dev_negtable {
392*25e8c5aaSvikram 	uint_t	st_old;
393*25e8c5aaSvikram 	uint_t	st_new;
394*25e8c5aaSvikram 	uint_t	st_neg;
395*25e8c5aaSvikram } ct_dev_negtable[] = {
396*25e8c5aaSvikram 	{CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE,	1},
397*25e8c5aaSvikram 	{CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED,	0},
398*25e8c5aaSvikram 	{CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE,	0},
399*25e8c5aaSvikram 	{CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE,	1},
400*25e8c5aaSvikram 	{0}
401*25e8c5aaSvikram };
402*25e8c5aaSvikram 
403*25e8c5aaSvikram /*
404*25e8c5aaSvikram  * Device contract template implementation
405*25e8c5aaSvikram  */
406*25e8c5aaSvikram 
407*25e8c5aaSvikram /*
408*25e8c5aaSvikram  * ctmpl_device_dup
409*25e8c5aaSvikram  *
410*25e8c5aaSvikram  * The device contract template dup entry point.
411*25e8c5aaSvikram  * This simply copies all the fields (generic as well as device contract
412*25e8c5aaSvikram  * specific) fields of the original.
413*25e8c5aaSvikram  */
414*25e8c5aaSvikram static struct ct_template *
415*25e8c5aaSvikram ctmpl_device_dup(struct ct_template *template)
416*25e8c5aaSvikram {
417*25e8c5aaSvikram 	ctmpl_device_t *new;
418*25e8c5aaSvikram 	ctmpl_device_t *old = template->ctmpl_data;
419*25e8c5aaSvikram 	char *buf;
420*25e8c5aaSvikram 	char *minor;
421*25e8c5aaSvikram 
422*25e8c5aaSvikram 	new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
423*25e8c5aaSvikram 	buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
424*25e8c5aaSvikram 
425*25e8c5aaSvikram 	/*
426*25e8c5aaSvikram 	 * copy generic fields.
427*25e8c5aaSvikram 	 * ctmpl_copy returns with old template lock held
428*25e8c5aaSvikram 	 */
429*25e8c5aaSvikram 	ctmpl_copy(&new->ctd_ctmpl, template);
430*25e8c5aaSvikram 
431*25e8c5aaSvikram 	new->ctd_ctmpl.ctmpl_data = new;
432*25e8c5aaSvikram 	new->ctd_aset = old->ctd_aset;
433*25e8c5aaSvikram 	new->ctd_minor = NULL;
434*25e8c5aaSvikram 	new->ctd_noneg = old->ctd_noneg;
435*25e8c5aaSvikram 
436*25e8c5aaSvikram 	if (old->ctd_minor) {
437*25e8c5aaSvikram 		ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
438*25e8c5aaSvikram 		bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
439*25e8c5aaSvikram 	} else {
440*25e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
441*25e8c5aaSvikram 		buf = NULL;
442*25e8c5aaSvikram 	}
443*25e8c5aaSvikram 
444*25e8c5aaSvikram 	mutex_exit(&template->ctmpl_lock);
445*25e8c5aaSvikram 	if (buf) {
446*25e8c5aaSvikram 		minor = i_ddi_strdup(buf, KM_SLEEP);
447*25e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
448*25e8c5aaSvikram 		buf = NULL;
449*25e8c5aaSvikram 	} else {
450*25e8c5aaSvikram 		minor = NULL;
451*25e8c5aaSvikram 	}
452*25e8c5aaSvikram 	mutex_enter(&template->ctmpl_lock);
453*25e8c5aaSvikram 
454*25e8c5aaSvikram 	if (minor) {
455*25e8c5aaSvikram 		new->ctd_minor = minor;
456*25e8c5aaSvikram 	}
457*25e8c5aaSvikram 
458*25e8c5aaSvikram 	ASSERT(buf == NULL);
459*25e8c5aaSvikram 	return (&new->ctd_ctmpl);
460*25e8c5aaSvikram }
461*25e8c5aaSvikram 
462*25e8c5aaSvikram /*
463*25e8c5aaSvikram  * ctmpl_device_free
464*25e8c5aaSvikram  *
465*25e8c5aaSvikram  * The device contract template free entry point.  Just
466*25e8c5aaSvikram  * frees the template.
467*25e8c5aaSvikram  */
468*25e8c5aaSvikram static void
469*25e8c5aaSvikram ctmpl_device_free(struct ct_template *template)
470*25e8c5aaSvikram {
471*25e8c5aaSvikram 	ctmpl_device_t *dtmpl = template->ctmpl_data;
472*25e8c5aaSvikram 
473*25e8c5aaSvikram 	if (dtmpl->ctd_minor)
474*25e8c5aaSvikram 		kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
475*25e8c5aaSvikram 
476*25e8c5aaSvikram 	kmem_free(dtmpl, sizeof (ctmpl_device_t));
477*25e8c5aaSvikram }
478*25e8c5aaSvikram 
479*25e8c5aaSvikram /*
480*25e8c5aaSvikram  * SAFE_EV is the set of events which a non-privileged process is
481*25e8c5aaSvikram  * allowed to make critical. An unprivileged device contract owner has
482*25e8c5aaSvikram  * no control over when a device changes state, so all device events
483*25e8c5aaSvikram  * can be in the critical set.
484*25e8c5aaSvikram  *
485*25e8c5aaSvikram  * EXCESS tells us if "value", a critical event set, requires
486*25e8c5aaSvikram  * additional privilege. For device contracts EXCESS currently
487*25e8c5aaSvikram  * evaluates to 0.
488*25e8c5aaSvikram  */
489*25e8c5aaSvikram #define	SAFE_EV		(CT_DEV_ALLEVENT)
490*25e8c5aaSvikram #define	EXCESS(value)	((value) & ~SAFE_EV)
491*25e8c5aaSvikram 
492*25e8c5aaSvikram 
493*25e8c5aaSvikram /*
494*25e8c5aaSvikram  * ctmpl_device_set
495*25e8c5aaSvikram  *
496*25e8c5aaSvikram  * The device contract template set entry point. Sets various terms in the
497*25e8c5aaSvikram  * template. The non-negotiable  term can only be set if the process has
498*25e8c5aaSvikram  * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
499*25e8c5aaSvikram  */
500*25e8c5aaSvikram static int
501*25e8c5aaSvikram ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr)
502*25e8c5aaSvikram {
503*25e8c5aaSvikram 	ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
504*25e8c5aaSvikram 	char *buf;
505*25e8c5aaSvikram 	int error;
506*25e8c5aaSvikram 	dev_info_t *dip;
507*25e8c5aaSvikram 	int spec_type;
508*25e8c5aaSvikram 
509*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
510*25e8c5aaSvikram 
511*25e8c5aaSvikram 	switch (param->ctpm_id) {
512*25e8c5aaSvikram 	case CTDP_ACCEPT:
513*25e8c5aaSvikram 		if (param->ctpm_value & ~CT_DEV_ALLEVENT)
514*25e8c5aaSvikram 			return (EINVAL);
515*25e8c5aaSvikram 		if (param->ctpm_value == 0)
516*25e8c5aaSvikram 			return (EINVAL);
517*25e8c5aaSvikram 		if (param->ctpm_value == CT_DEV_ALLEVENT)
518*25e8c5aaSvikram 			return (EINVAL);
519*25e8c5aaSvikram 
520*25e8c5aaSvikram 		dtmpl->ctd_aset = param->ctpm_value;
521*25e8c5aaSvikram 		break;
522*25e8c5aaSvikram 	case CTDP_NONEG:
523*25e8c5aaSvikram 		if (param->ctpm_value != CTDP_NONEG_SET &&
524*25e8c5aaSvikram 		    param->ctpm_value != CTDP_NONEG_CLEAR)
525*25e8c5aaSvikram 			return (EINVAL);
526*25e8c5aaSvikram 
527*25e8c5aaSvikram 		/*
528*25e8c5aaSvikram 		 * only privileged processes can designate a contract
529*25e8c5aaSvikram 		 * non-negotiatble.
530*25e8c5aaSvikram 		 */
531*25e8c5aaSvikram 		if (param->ctpm_value == CTDP_NONEG_SET &&
532*25e8c5aaSvikram 		    (error = secpolicy_sys_devices(cr)) != 0) {
533*25e8c5aaSvikram 			return (error);
534*25e8c5aaSvikram 		}
535*25e8c5aaSvikram 
536*25e8c5aaSvikram 		dtmpl->ctd_noneg = param->ctpm_value;
537*25e8c5aaSvikram 		break;
538*25e8c5aaSvikram 
539*25e8c5aaSvikram 	case CTDP_MINOR:
540*25e8c5aaSvikram 		if (param->ctpm_value == NULL)
541*25e8c5aaSvikram 			return (EINVAL);
542*25e8c5aaSvikram 
543*25e8c5aaSvikram 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
544*25e8c5aaSvikram 
545*25e8c5aaSvikram 		/*
546*25e8c5aaSvikram 		 * Copyin the device path
547*25e8c5aaSvikram 		 */
548*25e8c5aaSvikram 		error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf,
549*25e8c5aaSvikram 		    MAXPATHLEN, NULL);
550*25e8c5aaSvikram 		if (error != 0) {
551*25e8c5aaSvikram 			kmem_free(buf, MAXPATHLEN);
552*25e8c5aaSvikram 			return (error);
553*25e8c5aaSvikram 		}
554*25e8c5aaSvikram 		buf[MAXPATHLEN - 1] = '\0';
555*25e8c5aaSvikram 
556*25e8c5aaSvikram 		if (*buf != '/' ||
557*25e8c5aaSvikram 		    strncmp(buf, "/devices/", strlen("/devices/")) == 0 ||
558*25e8c5aaSvikram 		    strstr(buf, "../devices/") || strchr(buf, ':') == NULL) {
559*25e8c5aaSvikram 			kmem_free(buf, MAXPATHLEN);
560*25e8c5aaSvikram 			return (EINVAL);
561*25e8c5aaSvikram 		}
562*25e8c5aaSvikram 
563*25e8c5aaSvikram 		spec_type = 0;
564*25e8c5aaSvikram 		dip = NULL;
565*25e8c5aaSvikram 		if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) {
566*25e8c5aaSvikram 			kmem_free(buf, MAXPATHLEN);
567*25e8c5aaSvikram 			return (ERANGE);
568*25e8c5aaSvikram 		}
569*25e8c5aaSvikram 		ddi_release_devi(dip);
570*25e8c5aaSvikram 
571*25e8c5aaSvikram 		if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
572*25e8c5aaSvikram 			kmem_free(buf, MAXPATHLEN);
573*25e8c5aaSvikram 			return (EINVAL);
574*25e8c5aaSvikram 		}
575*25e8c5aaSvikram 
576*25e8c5aaSvikram 		if (dtmpl->ctd_minor != NULL) {
577*25e8c5aaSvikram 			kmem_free(dtmpl->ctd_minor,
578*25e8c5aaSvikram 			    strlen(dtmpl->ctd_minor) + 1);
579*25e8c5aaSvikram 		}
580*25e8c5aaSvikram 		dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP);
581*25e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
582*25e8c5aaSvikram 		break;
583*25e8c5aaSvikram 	case CTP_EV_CRITICAL:
584*25e8c5aaSvikram 		/*
585*25e8c5aaSvikram 		 * Currently for device contracts, any event
586*25e8c5aaSvikram 		 * may be added to the critical set. We retain the
587*25e8c5aaSvikram 		 * following code however for future enhancements.
588*25e8c5aaSvikram 		 */
589*25e8c5aaSvikram 		if (EXCESS(param->ctpm_value) &&
590*25e8c5aaSvikram 		    (error = secpolicy_contract_event(cr)) != 0)
591*25e8c5aaSvikram 			return (error);
592*25e8c5aaSvikram 		tmpl->ctmpl_ev_crit = param->ctpm_value;
593*25e8c5aaSvikram 		break;
594*25e8c5aaSvikram 	default:
595*25e8c5aaSvikram 		return (EINVAL);
596*25e8c5aaSvikram 	}
597*25e8c5aaSvikram 
598*25e8c5aaSvikram 	return (0);
599*25e8c5aaSvikram }
600*25e8c5aaSvikram 
601*25e8c5aaSvikram /*
602*25e8c5aaSvikram  * ctmpl_device_get
603*25e8c5aaSvikram  *
604*25e8c5aaSvikram  * The device contract template get entry point.  Simply fetches and
605*25e8c5aaSvikram  * returns the value of the requested term.
606*25e8c5aaSvikram  */
607*25e8c5aaSvikram static int
608*25e8c5aaSvikram ctmpl_device_get(struct ct_template *template, ct_param_t *param)
609*25e8c5aaSvikram {
610*25e8c5aaSvikram 	ctmpl_device_t *dtmpl = template->ctmpl_data;
611*25e8c5aaSvikram 	int		error;
612*25e8c5aaSvikram 
613*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&template->ctmpl_lock));
614*25e8c5aaSvikram 
615*25e8c5aaSvikram 	switch (param->ctpm_id) {
616*25e8c5aaSvikram 	case CTDP_ACCEPT:
617*25e8c5aaSvikram 		param->ctpm_value = dtmpl->ctd_aset;
618*25e8c5aaSvikram 		break;
619*25e8c5aaSvikram 	case CTDP_NONEG:
620*25e8c5aaSvikram 		param->ctpm_value = dtmpl->ctd_noneg;
621*25e8c5aaSvikram 		break;
622*25e8c5aaSvikram 	case CTDP_MINOR:
623*25e8c5aaSvikram 		if (dtmpl->ctd_minor) {
624*25e8c5aaSvikram 			error = copyoutstr(dtmpl->ctd_minor,
625*25e8c5aaSvikram 			    (char *)(uintptr_t)param->ctpm_value,
626*25e8c5aaSvikram 			    MAXPATHLEN, NULL);
627*25e8c5aaSvikram 			if (error != 0)
628*25e8c5aaSvikram 				return (error);
629*25e8c5aaSvikram 		} else {
630*25e8c5aaSvikram 			return (ENOENT);
631*25e8c5aaSvikram 		}
632*25e8c5aaSvikram 		break;
633*25e8c5aaSvikram 	default:
634*25e8c5aaSvikram 		return (EINVAL);
635*25e8c5aaSvikram 	}
636*25e8c5aaSvikram 
637*25e8c5aaSvikram 	return (0);
638*25e8c5aaSvikram }
639*25e8c5aaSvikram 
640*25e8c5aaSvikram /*
641*25e8c5aaSvikram  * Device contract type specific portion of creating a contract using
642*25e8c5aaSvikram  * a specified template
643*25e8c5aaSvikram  */
644*25e8c5aaSvikram /*ARGSUSED*/
645*25e8c5aaSvikram int
646*25e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
647*25e8c5aaSvikram {
648*25e8c5aaSvikram 	ctmpl_device_t *dtmpl;
649*25e8c5aaSvikram 	char *buf;
650*25e8c5aaSvikram 	dev_t dev;
651*25e8c5aaSvikram 	int spec_type;
652*25e8c5aaSvikram 	int error;
653*25e8c5aaSvikram 	cont_device_t *ctd;
654*25e8c5aaSvikram 
655*25e8c5aaSvikram 	if (ctidp == NULL)
656*25e8c5aaSvikram 		return (EINVAL);
657*25e8c5aaSvikram 
658*25e8c5aaSvikram 	buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
659*25e8c5aaSvikram 
660*25e8c5aaSvikram 	dtmpl = template->ctmpl_data;
661*25e8c5aaSvikram 
662*25e8c5aaSvikram 	mutex_enter(&template->ctmpl_lock);
663*25e8c5aaSvikram 	if (dtmpl->ctd_minor == NULL) {
664*25e8c5aaSvikram 		/* incomplete template */
665*25e8c5aaSvikram 		mutex_exit(&template->ctmpl_lock);
666*25e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
667*25e8c5aaSvikram 		return (EINVAL);
668*25e8c5aaSvikram 	} else {
669*25e8c5aaSvikram 		ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
670*25e8c5aaSvikram 		bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
671*25e8c5aaSvikram 	}
672*25e8c5aaSvikram 	mutex_exit(&template->ctmpl_lock);
673*25e8c5aaSvikram 
674*25e8c5aaSvikram 	spec_type = 0;
675*25e8c5aaSvikram 	dev = NODEV;
676*25e8c5aaSvikram 	if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
677*25e8c5aaSvikram 	    dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
678*25e8c5aaSvikram 	    (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
679*25e8c5aaSvikram 		CT_DEBUG((CE_WARN,
680*25e8c5aaSvikram 		    "tmpl_create: failed to find device: %s", buf));
681*25e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
682*25e8c5aaSvikram 		return (ERANGE);
683*25e8c5aaSvikram 	}
684*25e8c5aaSvikram 	kmem_free(buf, MAXPATHLEN);
685*25e8c5aaSvikram 
686*25e8c5aaSvikram 	ctd = contract_device_create(template->ctmpl_data,
687*25e8c5aaSvikram 	    dev, spec_type, curproc, &error);
688*25e8c5aaSvikram 
689*25e8c5aaSvikram 	if (ctd == NULL) {
690*25e8c5aaSvikram 		CT_DEBUG((CE_WARN, "Failed to create device contract for "
691*25e8c5aaSvikram 		    "process (%d) with device (devt = %lu, spec_type = %s)",
692*25e8c5aaSvikram 		    curproc->p_pid, dev,
693*25e8c5aaSvikram 		    spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
694*25e8c5aaSvikram 		return (error);
695*25e8c5aaSvikram 	}
696*25e8c5aaSvikram 
697*25e8c5aaSvikram 	mutex_enter(&ctd->cond_contract.ct_lock);
698*25e8c5aaSvikram 	*ctidp = ctd->cond_contract.ct_id;
699*25e8c5aaSvikram 	mutex_exit(&ctd->cond_contract.ct_lock);
700*25e8c5aaSvikram 
701*25e8c5aaSvikram 	return (0);
702*25e8c5aaSvikram }
703*25e8c5aaSvikram 
704*25e8c5aaSvikram /*
705*25e8c5aaSvikram  * Device contract specific template entry points
706*25e8c5aaSvikram  */
707*25e8c5aaSvikram static ctmplops_t ctmpl_device_ops = {
708*25e8c5aaSvikram 	ctmpl_device_dup,		/* ctop_dup */
709*25e8c5aaSvikram 	ctmpl_device_free,		/* ctop_free */
710*25e8c5aaSvikram 	ctmpl_device_set,		/* ctop_set */
711*25e8c5aaSvikram 	ctmpl_device_get,		/* ctop_get */
712*25e8c5aaSvikram 	ctmpl_device_create,		/* ctop_create */
713*25e8c5aaSvikram 	CT_DEV_ALLEVENT			/* all device events bitmask */
714*25e8c5aaSvikram };
715*25e8c5aaSvikram 
716*25e8c5aaSvikram 
717*25e8c5aaSvikram /*
718*25e8c5aaSvikram  * Device contract implementation
719*25e8c5aaSvikram  */
720*25e8c5aaSvikram 
721*25e8c5aaSvikram /*
722*25e8c5aaSvikram  * contract_device_default
723*25e8c5aaSvikram  *
724*25e8c5aaSvikram  * The device contract default template entry point.  Creates a
725*25e8c5aaSvikram  * device contract template with a default A-set and no "noneg" ,
726*25e8c5aaSvikram  * with informative degrade events and critical offline events.
727*25e8c5aaSvikram  * There is no default minor path.
728*25e8c5aaSvikram  */
729*25e8c5aaSvikram static ct_template_t *
730*25e8c5aaSvikram contract_device_default(void)
731*25e8c5aaSvikram {
732*25e8c5aaSvikram 	ctmpl_device_t *new;
733*25e8c5aaSvikram 
734*25e8c5aaSvikram 	new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
735*25e8c5aaSvikram 	ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
736*25e8c5aaSvikram 
737*25e8c5aaSvikram 	new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
738*25e8c5aaSvikram 	new->ctd_noneg = 0;
739*25e8c5aaSvikram 	new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
740*25e8c5aaSvikram 	new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
741*25e8c5aaSvikram 
742*25e8c5aaSvikram 	return (&new->ctd_ctmpl);
743*25e8c5aaSvikram }
744*25e8c5aaSvikram 
745*25e8c5aaSvikram /*
746*25e8c5aaSvikram  * contract_device_free
747*25e8c5aaSvikram  *
748*25e8c5aaSvikram  * Destroys the device contract specific portion of a contract and
749*25e8c5aaSvikram  * frees the contract.
750*25e8c5aaSvikram  */
751*25e8c5aaSvikram static void
752*25e8c5aaSvikram contract_device_free(contract_t *ct)
753*25e8c5aaSvikram {
754*25e8c5aaSvikram 	cont_device_t *ctd = ct->ct_data;
755*25e8c5aaSvikram 
756*25e8c5aaSvikram 	ASSERT(ctd->cond_minor);
757*25e8c5aaSvikram 	ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
758*25e8c5aaSvikram 	kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
759*25e8c5aaSvikram 
760*25e8c5aaSvikram 	ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
761*25e8c5aaSvikram 	    ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
762*25e8c5aaSvikram 
763*25e8c5aaSvikram 	ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
764*25e8c5aaSvikram 
765*25e8c5aaSvikram 	ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
766*25e8c5aaSvikram 	ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
767*25e8c5aaSvikram 
768*25e8c5aaSvikram 	ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
769*25e8c5aaSvikram 	ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
770*25e8c5aaSvikram 
771*25e8c5aaSvikram 	ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
772*25e8c5aaSvikram 	ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
773*25e8c5aaSvikram 
774*25e8c5aaSvikram 	ASSERT(!list_link_active(&ctd->cond_next));
775*25e8c5aaSvikram 
776*25e8c5aaSvikram 	kmem_free(ctd, sizeof (cont_device_t));
777*25e8c5aaSvikram }
778*25e8c5aaSvikram 
779*25e8c5aaSvikram /*
780*25e8c5aaSvikram  * contract_device_abandon
781*25e8c5aaSvikram  *
782*25e8c5aaSvikram  * The device contract abandon entry point.
783*25e8c5aaSvikram  */
784*25e8c5aaSvikram static void
785*25e8c5aaSvikram contract_device_abandon(contract_t *ct)
786*25e8c5aaSvikram {
787*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&ct->ct_lock));
788*25e8c5aaSvikram 
789*25e8c5aaSvikram 	/*
790*25e8c5aaSvikram 	 * device contracts cannot be inherited or orphaned.
791*25e8c5aaSvikram 	 * Move the contract to the DEAD_STATE. It will be freed
792*25e8c5aaSvikram 	 * once all references to it are gone.
793*25e8c5aaSvikram 	 */
794*25e8c5aaSvikram 	contract_destroy(ct);
795*25e8c5aaSvikram }
796*25e8c5aaSvikram 
797*25e8c5aaSvikram /*
798*25e8c5aaSvikram  * contract_device_destroy
799*25e8c5aaSvikram  *
800*25e8c5aaSvikram  * The device contract destroy entry point.
801*25e8c5aaSvikram  * Called from contract_destroy() to do any type specific destroy. Note
802*25e8c5aaSvikram  * that destroy is a misnomer - this does not free the contract, it only
803*25e8c5aaSvikram  * moves it to the dead state. A contract is actually freed via
804*25e8c5aaSvikram  * 	contract_rele() -> contract_dtor(), contop_free()
805*25e8c5aaSvikram  */
806*25e8c5aaSvikram static void
807*25e8c5aaSvikram contract_device_destroy(contract_t *ct)
808*25e8c5aaSvikram {
809*25e8c5aaSvikram 	cont_device_t	*ctd = ct->ct_data;
810*25e8c5aaSvikram 	dev_info_t	*dip = ctd->cond_dip;
811*25e8c5aaSvikram 
812*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&ct->ct_lock));
813*25e8c5aaSvikram 
814*25e8c5aaSvikram 	if (dip == NULL) {
815*25e8c5aaSvikram 		/*
816*25e8c5aaSvikram 		 * The dip has been removed, this is a dangling contract
817*25e8c5aaSvikram 		 * Check that dip linkages are NULL
818*25e8c5aaSvikram 		 */
819*25e8c5aaSvikram 		ASSERT(!list_link_active(&ctd->cond_next));
820*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no "
821*25e8c5aaSvikram 		    "devinfo node. contract ctid : %d", ct->ct_id));
822*25e8c5aaSvikram 		return;
823*25e8c5aaSvikram 	}
824*25e8c5aaSvikram 
825*25e8c5aaSvikram 	/*
826*25e8c5aaSvikram 	 * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock
827*25e8c5aaSvikram 	 */
828*25e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
829*25e8c5aaSvikram 
830*25e8c5aaSvikram 	/*
831*25e8c5aaSvikram 	 * Waiting for the barrier to be released is strictly speaking not
832*25e8c5aaSvikram 	 * necessary. But it simplifies the implementation of
833*25e8c5aaSvikram 	 * contract_device_publish() by establishing the invariant that
834*25e8c5aaSvikram 	 * device contracts cannot go away during negotiation.
835*25e8c5aaSvikram 	 */
836*25e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
837*25e8c5aaSvikram 	ct_barrier_wait_for_release(dip);
838*25e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
839*25e8c5aaSvikram 
840*25e8c5aaSvikram 	list_remove(&(DEVI(dip)->devi_ct), ctd);
841*25e8c5aaSvikram 	ctd->cond_dip = NULL; /* no longer linked to dip */
842*25e8c5aaSvikram 	contract_rele(ct);	/* remove hold for dip linkage */
843*25e8c5aaSvikram 
844*25e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
845*25e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
846*25e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
847*25e8c5aaSvikram }
848*25e8c5aaSvikram 
849*25e8c5aaSvikram /*
850*25e8c5aaSvikram  * contract_device_status
851*25e8c5aaSvikram  *
852*25e8c5aaSvikram  * The device contract status entry point. Called when level of "detail"
853*25e8c5aaSvikram  * is either CTD_FIXED or CTD_ALL
854*25e8c5aaSvikram  *
855*25e8c5aaSvikram  */
856*25e8c5aaSvikram static void
857*25e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
858*25e8c5aaSvikram     void *status, model_t model)
859*25e8c5aaSvikram {
860*25e8c5aaSvikram 	cont_device_t *ctd = ct->ct_data;
861*25e8c5aaSvikram 
862*25e8c5aaSvikram 	ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
863*25e8c5aaSvikram 
864*25e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
865*25e8c5aaSvikram 	contract_status_common(ct, zone, status, model);
866*25e8c5aaSvikram 
867*25e8c5aaSvikram 	/*
868*25e8c5aaSvikram 	 * There's no need to hold the contract lock while accessing static
869*25e8c5aaSvikram 	 * data like aset or noneg. But since we need the lock to access other
870*25e8c5aaSvikram 	 * data like state, we hold it anyway.
871*25e8c5aaSvikram 	 */
872*25e8c5aaSvikram 	VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
873*25e8c5aaSvikram 	VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
874*25e8c5aaSvikram 	VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
875*25e8c5aaSvikram 
876*25e8c5aaSvikram 	if (detail == CTD_FIXED) {
877*25e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
878*25e8c5aaSvikram 		return;
879*25e8c5aaSvikram 	}
880*25e8c5aaSvikram 
881*25e8c5aaSvikram 	ASSERT(ctd->cond_minor);
882*25e8c5aaSvikram 	VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
883*25e8c5aaSvikram 
884*25e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
885*25e8c5aaSvikram }
886*25e8c5aaSvikram 
887*25e8c5aaSvikram /*
888*25e8c5aaSvikram  * Converts a result integer into the corresponding string. Used for printing
889*25e8c5aaSvikram  * messages
890*25e8c5aaSvikram  */
891*25e8c5aaSvikram static char *
892*25e8c5aaSvikram result_str(uint_t result)
893*25e8c5aaSvikram {
894*25e8c5aaSvikram 	switch (result) {
895*25e8c5aaSvikram 	case CT_ACK:
896*25e8c5aaSvikram 		return ("CT_ACK");
897*25e8c5aaSvikram 	case CT_NACK:
898*25e8c5aaSvikram 		return ("CT_NACK");
899*25e8c5aaSvikram 	case CT_NONE:
900*25e8c5aaSvikram 		return ("CT_NONE");
901*25e8c5aaSvikram 	default:
902*25e8c5aaSvikram 		return ("UNKNOWN");
903*25e8c5aaSvikram 	}
904*25e8c5aaSvikram }
905*25e8c5aaSvikram 
906*25e8c5aaSvikram /*
907*25e8c5aaSvikram  * Converts a device state integer constant into the corresponding string.
908*25e8c5aaSvikram  * Used to print messages.
909*25e8c5aaSvikram  */
910*25e8c5aaSvikram static char *
911*25e8c5aaSvikram state_str(uint_t state)
912*25e8c5aaSvikram {
913*25e8c5aaSvikram 	switch (state) {
914*25e8c5aaSvikram 	case CT_DEV_EV_ONLINE:
915*25e8c5aaSvikram 		return ("ONLINE");
916*25e8c5aaSvikram 	case CT_DEV_EV_DEGRADED:
917*25e8c5aaSvikram 		return ("DEGRADED");
918*25e8c5aaSvikram 	case CT_DEV_EV_OFFLINE:
919*25e8c5aaSvikram 		return ("OFFLINE");
920*25e8c5aaSvikram 	default:
921*25e8c5aaSvikram 		return ("UNKNOWN");
922*25e8c5aaSvikram 	}
923*25e8c5aaSvikram }
924*25e8c5aaSvikram 
925*25e8c5aaSvikram /*
926*25e8c5aaSvikram  * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
927*25e8c5aaSvikram  * synchronous state change or not.
928*25e8c5aaSvikram  */
929*25e8c5aaSvikram static int
930*25e8c5aaSvikram is_sync_neg(uint_t old, uint_t new)
931*25e8c5aaSvikram {
932*25e8c5aaSvikram 	int	i;
933*25e8c5aaSvikram 
934*25e8c5aaSvikram 	ASSERT(old & CT_DEV_ALLEVENT);
935*25e8c5aaSvikram 	ASSERT(new & CT_DEV_ALLEVENT);
936*25e8c5aaSvikram 
937*25e8c5aaSvikram 	if (old == new) {
938*25e8c5aaSvikram 		CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
939*25e8c5aaSvikram 		    state_str(new)));
940*25e8c5aaSvikram 		return (-2);
941*25e8c5aaSvikram 	}
942*25e8c5aaSvikram 
943*25e8c5aaSvikram 	for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
944*25e8c5aaSvikram 		if (old == ct_dev_negtable[i].st_old &&
945*25e8c5aaSvikram 		    new == ct_dev_negtable[i].st_new) {
946*25e8c5aaSvikram 			return (ct_dev_negtable[i].st_neg);
947*25e8c5aaSvikram 		}
948*25e8c5aaSvikram 	}
949*25e8c5aaSvikram 
950*25e8c5aaSvikram 	CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
951*25e8c5aaSvikram 	    "old = %s -> new = %s", state_str(old), state_str(new)));
952*25e8c5aaSvikram 
953*25e8c5aaSvikram 	return (-1);
954*25e8c5aaSvikram }
955*25e8c5aaSvikram 
956*25e8c5aaSvikram /*
957*25e8c5aaSvikram  * Used to cleanup cached dv_nodes so that when a device is released by
958*25e8c5aaSvikram  * a contract holder, its devinfo node can be successfully detached.
959*25e8c5aaSvikram  */
960*25e8c5aaSvikram static int
961*25e8c5aaSvikram contract_device_dvclean(dev_info_t *dip)
962*25e8c5aaSvikram {
963*25e8c5aaSvikram 	char		*devnm;
964*25e8c5aaSvikram 	dev_info_t	*pdip;
965*25e8c5aaSvikram 	int		error;
966*25e8c5aaSvikram 
967*25e8c5aaSvikram 	ASSERT(dip);
968*25e8c5aaSvikram 
969*25e8c5aaSvikram 	/* pdip can be NULL if we have contracts against the root dip */
970*25e8c5aaSvikram 	pdip = ddi_get_parent(dip);
971*25e8c5aaSvikram 
972*25e8c5aaSvikram 	if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
973*25e8c5aaSvikram 		char		*path;
974*25e8c5aaSvikram 
975*25e8c5aaSvikram 		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
976*25e8c5aaSvikram 		(void) ddi_pathname(dip, path);
977*25e8c5aaSvikram 		CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
978*25e8c5aaSvikram 		    "device=%s", path));
979*25e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
980*25e8c5aaSvikram 		return (EDEADLOCK);
981*25e8c5aaSvikram 	}
982*25e8c5aaSvikram 
983*25e8c5aaSvikram 	if (pdip) {
984*25e8c5aaSvikram 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
985*25e8c5aaSvikram 		(void) ddi_deviname(dip, devnm);
986*25e8c5aaSvikram 		error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
987*25e8c5aaSvikram 		kmem_free(devnm, MAXNAMELEN + 1);
988*25e8c5aaSvikram 	} else {
989*25e8c5aaSvikram 		error = devfs_clean(dip, NULL, DV_CLEAN_FORCE);
990*25e8c5aaSvikram 	}
991*25e8c5aaSvikram 
992*25e8c5aaSvikram 	return (error);
993*25e8c5aaSvikram }
994*25e8c5aaSvikram 
995*25e8c5aaSvikram /*
996*25e8c5aaSvikram  * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
997*25e8c5aaSvikram  * Results in the ACK or NACK being recorded on the dip for one particular
998*25e8c5aaSvikram  * contract. The device contracts framework evaluates the ACK/NACKs for all
999*25e8c5aaSvikram  * contracts against a device to determine if a particular device state change
1000*25e8c5aaSvikram  * should be allowed.
1001*25e8c5aaSvikram  */
1002*25e8c5aaSvikram static int
1003*25e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
1004*25e8c5aaSvikram     uint_t cmd)
1005*25e8c5aaSvikram {
1006*25e8c5aaSvikram 	cont_device_t *ctd = ct->ct_data;
1007*25e8c5aaSvikram 	dev_info_t *dip;
1008*25e8c5aaSvikram 	ctid_t	ctid;
1009*25e8c5aaSvikram 	int error;
1010*25e8c5aaSvikram 
1011*25e8c5aaSvikram 	ctid = ct->ct_id;
1012*25e8c5aaSvikram 
1013*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
1014*25e8c5aaSvikram 
1015*25e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
1016*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
1017*25e8c5aaSvikram 
1018*25e8c5aaSvikram 	dip = ctd->cond_dip;
1019*25e8c5aaSvikram 
1020*25e8c5aaSvikram 	ASSERT(ctd->cond_minor);
1021*25e8c5aaSvikram 	ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
1022*25e8c5aaSvikram 
1023*25e8c5aaSvikram 	/*
1024*25e8c5aaSvikram 	 * Negotiation only if new state is not in A-set
1025*25e8c5aaSvikram 	 */
1026*25e8c5aaSvikram 	ASSERT(!(ctd->cond_aset & evtype));
1027*25e8c5aaSvikram 
1028*25e8c5aaSvikram 	/*
1029*25e8c5aaSvikram 	 * Negotiation only if transition is synchronous
1030*25e8c5aaSvikram 	 */
1031*25e8c5aaSvikram 	ASSERT(is_sync_neg(ctd->cond_state, evtype));
1032*25e8c5aaSvikram 
1033*25e8c5aaSvikram 	/*
1034*25e8c5aaSvikram 	 * We shouldn't be negotiating if the "noneg" flag is set
1035*25e8c5aaSvikram 	 */
1036*25e8c5aaSvikram 	ASSERT(!ctd->cond_noneg);
1037*25e8c5aaSvikram 
1038*25e8c5aaSvikram 	if (dip)
1039*25e8c5aaSvikram 		ndi_hold_devi(dip);
1040*25e8c5aaSvikram 
1041*25e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
1042*25e8c5aaSvikram 
1043*25e8c5aaSvikram 	/*
1044*25e8c5aaSvikram 	 * dv_clean only if !NACK and offline state change
1045*25e8c5aaSvikram 	 */
1046*25e8c5aaSvikram 	if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
1047*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
1048*25e8c5aaSvikram 		error = contract_device_dvclean(dip);
1049*25e8c5aaSvikram 		if (error != 0) {
1050*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
1051*25e8c5aaSvikram 			    ctid));
1052*25e8c5aaSvikram 			ddi_release_devi(dip);
1053*25e8c5aaSvikram 		}
1054*25e8c5aaSvikram 	}
1055*25e8c5aaSvikram 
1056*25e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
1057*25e8c5aaSvikram 
1058*25e8c5aaSvikram 	if (dip)
1059*25e8c5aaSvikram 		ddi_release_devi(dip);
1060*25e8c5aaSvikram 
1061*25e8c5aaSvikram 	if (dip == NULL) {
1062*25e8c5aaSvikram 		if (ctd->cond_currev_id != evid) {
1063*25e8c5aaSvikram 			CT_DEBUG((CE_WARN, "%sACK for non-current event "
1064*25e8c5aaSvikram 			    "(type=%s, id=%llu) on removed device",
1065*25e8c5aaSvikram 			    cmd == CT_NACK ? "N" : "",
1066*25e8c5aaSvikram 			    state_str(evtype), (unsigned long long)evid));
1067*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
1068*25e8c5aaSvikram 			    ctid));
1069*25e8c5aaSvikram 		} else {
1070*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == evtype);
1071*25e8c5aaSvikram 			CT_DEBUG((CE_WARN, "contract_ack: no such device: "
1072*25e8c5aaSvikram 			    "ctid: %d", ctid));
1073*25e8c5aaSvikram 		}
1074*25e8c5aaSvikram 		error = (ct->ct_state == CTS_DEAD) ? ESRCH :
1075*25e8c5aaSvikram 		    ((cmd == CT_NACK) ? ETIMEDOUT : 0);
1076*25e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
1077*25e8c5aaSvikram 		return (error);
1078*25e8c5aaSvikram 	}
1079*25e8c5aaSvikram 
1080*25e8c5aaSvikram 	/*
1081*25e8c5aaSvikram 	 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
1082*25e8c5aaSvikram 	 */
1083*25e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
1084*25e8c5aaSvikram 
1085*25e8c5aaSvikram 	mutex_enter(&DEVI(dip)->devi_ct_lock);
1086*25e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
1087*25e8c5aaSvikram 	if (ctd->cond_currev_id != evid) {
1088*25e8c5aaSvikram 		char *buf;
1089*25e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
1090*25e8c5aaSvikram 		mutex_exit(&DEVI(dip)->devi_ct_lock);
1091*25e8c5aaSvikram 		ndi_hold_devi(dip);
1092*25e8c5aaSvikram 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1093*25e8c5aaSvikram 		(void) ddi_pathname(dip, buf);
1094*25e8c5aaSvikram 		ddi_release_devi(dip);
1095*25e8c5aaSvikram 		CT_DEBUG((CE_WARN, "%sACK for non-current event"
1096*25e8c5aaSvikram 		    "(type=%s, id=%llu) on device %s",
1097*25e8c5aaSvikram 		    cmd == CT_NACK ? "N" : "",
1098*25e8c5aaSvikram 		    state_str(evtype), (unsigned long long)evid, buf));
1099*25e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
1100*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
1101*25e8c5aaSvikram 		    cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
1102*25e8c5aaSvikram 		return (cmd == CT_ACK ? 0 : ETIMEDOUT);
1103*25e8c5aaSvikram 	}
1104*25e8c5aaSvikram 
1105*25e8c5aaSvikram 	ASSERT(ctd->cond_currev_type == evtype);
1106*25e8c5aaSvikram 	ASSERT(cmd == CT_ACK || cmd == CT_NACK);
1107*25e8c5aaSvikram 
1108*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
1109*25e8c5aaSvikram 	    cmd == CT_NACK ? "N" : "", ctid));
1110*25e8c5aaSvikram 
1111*25e8c5aaSvikram 	ctd->cond_currev_ack = cmd;
1112*25e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
1113*25e8c5aaSvikram 
1114*25e8c5aaSvikram 	ct_barrier_decr(dip);
1115*25e8c5aaSvikram 	mutex_exit(&DEVI(dip)->devi_ct_lock);
1116*25e8c5aaSvikram 
1117*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
1118*25e8c5aaSvikram 
1119*25e8c5aaSvikram 	return (0);
1120*25e8c5aaSvikram }
1121*25e8c5aaSvikram 
1122*25e8c5aaSvikram /*
1123*25e8c5aaSvikram  * Invoked when a userland contract holder approves (i.e. ACKs) a state change
1124*25e8c5aaSvikram  */
1125*25e8c5aaSvikram static int
1126*25e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
1127*25e8c5aaSvikram {
1128*25e8c5aaSvikram 	return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
1129*25e8c5aaSvikram }
1130*25e8c5aaSvikram 
1131*25e8c5aaSvikram /*
1132*25e8c5aaSvikram  * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
1133*25e8c5aaSvikram  */
1134*25e8c5aaSvikram static int
1135*25e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
1136*25e8c5aaSvikram {
1137*25e8c5aaSvikram 	return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
1138*25e8c5aaSvikram }
1139*25e8c5aaSvikram 
1140*25e8c5aaSvikram /*
1141*25e8c5aaSvikram  * Creates a new contract synchronously with the breaking of an existing
1142*25e8c5aaSvikram  * contract. Currently not supported.
1143*25e8c5aaSvikram  */
1144*25e8c5aaSvikram /*ARGSUSED*/
1145*25e8c5aaSvikram static int
1146*25e8c5aaSvikram contract_device_newct(contract_t *ct)
1147*25e8c5aaSvikram {
1148*25e8c5aaSvikram 	return (ENOTSUP);
1149*25e8c5aaSvikram }
1150*25e8c5aaSvikram 
1151*25e8c5aaSvikram /*
1152*25e8c5aaSvikram  * Core device contract implementation entry points
1153*25e8c5aaSvikram  */
1154*25e8c5aaSvikram static contops_t contract_device_ops = {
1155*25e8c5aaSvikram 	contract_device_free,		/* contop_free */
1156*25e8c5aaSvikram 	contract_device_abandon,	/* contop_abandon */
1157*25e8c5aaSvikram 	contract_device_destroy,	/* contop_destroy */
1158*25e8c5aaSvikram 	contract_device_status,		/* contop_status */
1159*25e8c5aaSvikram 	contract_device_ack,		/* contop_ack */
1160*25e8c5aaSvikram 	contract_device_nack,		/* contop_nack */
1161*25e8c5aaSvikram 	contract_qack_notsup,		/* contop_qack */
1162*25e8c5aaSvikram 	contract_device_newct		/* contop_newct */
1163*25e8c5aaSvikram };
1164*25e8c5aaSvikram 
1165*25e8c5aaSvikram /*
1166*25e8c5aaSvikram  * contract_device_init
1167*25e8c5aaSvikram  *
1168*25e8c5aaSvikram  * Initializes the device contract type.
1169*25e8c5aaSvikram  */
1170*25e8c5aaSvikram void
1171*25e8c5aaSvikram contract_device_init(void)
1172*25e8c5aaSvikram {
1173*25e8c5aaSvikram 	device_type = contract_type_init(CTT_DEVICE, "device",
1174*25e8c5aaSvikram 	    &contract_device_ops, contract_device_default);
1175*25e8c5aaSvikram }
1176*25e8c5aaSvikram 
1177*25e8c5aaSvikram /*
1178*25e8c5aaSvikram  * contract_device_create
1179*25e8c5aaSvikram  *
1180*25e8c5aaSvikram  * create a device contract given template "tmpl" and the "owner" process.
1181*25e8c5aaSvikram  * May fail and return NULL if project.max-contracts would have been exceeded.
1182*25e8c5aaSvikram  *
1183*25e8c5aaSvikram  * Common device contract creation routine called for both open-time and
1184*25e8c5aaSvikram  * non-open time device contract creation
1185*25e8c5aaSvikram  */
1186*25e8c5aaSvikram static cont_device_t *
1187*25e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
1188*25e8c5aaSvikram     proc_t *owner, int *errorp)
1189*25e8c5aaSvikram {
1190*25e8c5aaSvikram 	cont_device_t *ctd;
1191*25e8c5aaSvikram 	char *minor;
1192*25e8c5aaSvikram 	char *path;
1193*25e8c5aaSvikram 	dev_info_t *dip;
1194*25e8c5aaSvikram 
1195*25e8c5aaSvikram 	ASSERT(dtmpl != NULL);
1196*25e8c5aaSvikram 	ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
1197*25e8c5aaSvikram 	ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
1198*25e8c5aaSvikram 	ASSERT(errorp);
1199*25e8c5aaSvikram 
1200*25e8c5aaSvikram 	*errorp = 0;
1201*25e8c5aaSvikram 
1202*25e8c5aaSvikram 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1203*25e8c5aaSvikram 
1204*25e8c5aaSvikram 	mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
1205*25e8c5aaSvikram 	ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
1206*25e8c5aaSvikram 	bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
1207*25e8c5aaSvikram 	mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1208*25e8c5aaSvikram 
1209*25e8c5aaSvikram 	dip = e_ddi_hold_devi_by_path(path, 0);
1210*25e8c5aaSvikram 	if (dip == NULL) {
1211*25e8c5aaSvikram 		cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
1212*25e8c5aaSvikram 		    "for device path (%s)", path);
1213*25e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
1214*25e8c5aaSvikram 		*errorp = ERANGE;
1215*25e8c5aaSvikram 		return (NULL);
1216*25e8c5aaSvikram 	}
1217*25e8c5aaSvikram 
1218*25e8c5aaSvikram 	/*
1219*25e8c5aaSvikram 	 * Lock out any parallel contract negotiations
1220*25e8c5aaSvikram 	 */
1221*25e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
1222*25e8c5aaSvikram 	ct_barrier_acquire(dip);
1223*25e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
1224*25e8c5aaSvikram 
1225*25e8c5aaSvikram 	minor = i_ddi_strdup(path, KM_SLEEP);
1226*25e8c5aaSvikram 	kmem_free(path, MAXPATHLEN);
1227*25e8c5aaSvikram 
1228*25e8c5aaSvikram 	(void) contract_type_pbundle(device_type, owner);
1229*25e8c5aaSvikram 
1230*25e8c5aaSvikram 	ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
1231*25e8c5aaSvikram 
1232*25e8c5aaSvikram 	/*
1233*25e8c5aaSvikram 	 * Only we hold a refernce to this contract. Safe to access
1234*25e8c5aaSvikram 	 * the fields without a ct_lock
1235*25e8c5aaSvikram 	 */
1236*25e8c5aaSvikram 	ctd->cond_minor = minor;
1237*25e8c5aaSvikram 	/*
1238*25e8c5aaSvikram 	 * It is safe to set the dip pointer in the contract
1239*25e8c5aaSvikram 	 * as the contract will always be destroyed before the dip
1240*25e8c5aaSvikram 	 * is released
1241*25e8c5aaSvikram 	 */
1242*25e8c5aaSvikram 	ctd->cond_dip = dip;
1243*25e8c5aaSvikram 	ctd->cond_devt = dev;
1244*25e8c5aaSvikram 	ctd->cond_spec = spec_type;
1245*25e8c5aaSvikram 
1246*25e8c5aaSvikram 	/*
1247*25e8c5aaSvikram 	 * Since we are able to lookup the device, it is either
1248*25e8c5aaSvikram 	 * online or degraded
1249*25e8c5aaSvikram 	 */
1250*25e8c5aaSvikram 	ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
1251*25e8c5aaSvikram 	    CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
1252*25e8c5aaSvikram 
1253*25e8c5aaSvikram 	mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
1254*25e8c5aaSvikram 	ctd->cond_aset = dtmpl->ctd_aset;
1255*25e8c5aaSvikram 	ctd->cond_noneg = dtmpl->ctd_noneg;
1256*25e8c5aaSvikram 
1257*25e8c5aaSvikram 	/*
1258*25e8c5aaSvikram 	 * contract_ctor() initailizes the common portion of a contract
1259*25e8c5aaSvikram 	 * contract_dtor() destroys the common portion of a contract
1260*25e8c5aaSvikram 	 */
1261*25e8c5aaSvikram 	if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
1262*25e8c5aaSvikram 	    ctd, 0, owner, B_TRUE)) {
1263*25e8c5aaSvikram 		mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1264*25e8c5aaSvikram 		/*
1265*25e8c5aaSvikram 		 * contract_device_free() destroys the type specific
1266*25e8c5aaSvikram 		 * portion of a contract and frees the contract.
1267*25e8c5aaSvikram 		 * The "minor" path and "cred" is a part of the type specific
1268*25e8c5aaSvikram 		 * portion of the contract and will be freed by
1269*25e8c5aaSvikram 		 * contract_device_free()
1270*25e8c5aaSvikram 		 */
1271*25e8c5aaSvikram 		contract_device_free(&ctd->cond_contract);
1272*25e8c5aaSvikram 
1273*25e8c5aaSvikram 		/* release barrier */
1274*25e8c5aaSvikram 		mutex_enter(&(DEVI(dip)->devi_ct_lock));
1275*25e8c5aaSvikram 		ct_barrier_release(dip);
1276*25e8c5aaSvikram 		mutex_exit(&(DEVI(dip)->devi_ct_lock));
1277*25e8c5aaSvikram 
1278*25e8c5aaSvikram 		ddi_release_devi(dip);
1279*25e8c5aaSvikram 		*errorp = EAGAIN;
1280*25e8c5aaSvikram 		return (NULL);
1281*25e8c5aaSvikram 	}
1282*25e8c5aaSvikram 	mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
1283*25e8c5aaSvikram 
1284*25e8c5aaSvikram 	mutex_enter(&ctd->cond_contract.ct_lock);
1285*25e8c5aaSvikram 	ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
1286*25e8c5aaSvikram 	ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
1287*25e8c5aaSvikram 	ctd->cond_contract.ct_ntime.ctm_start = -1;
1288*25e8c5aaSvikram 	ctd->cond_contract.ct_qtime.ctm_start = -1;
1289*25e8c5aaSvikram 	mutex_exit(&ctd->cond_contract.ct_lock);
1290*25e8c5aaSvikram 
1291*25e8c5aaSvikram 	/*
1292*25e8c5aaSvikram 	 * Insert device contract into list hanging off the dip
1293*25e8c5aaSvikram 	 * Bump up the ref-count on the contract to reflect this
1294*25e8c5aaSvikram 	 */
1295*25e8c5aaSvikram 	contract_hold(&ctd->cond_contract);
1296*25e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
1297*25e8c5aaSvikram 	list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
1298*25e8c5aaSvikram 
1299*25e8c5aaSvikram 	/* release barrier */
1300*25e8c5aaSvikram 	ct_barrier_release(dip);
1301*25e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
1302*25e8c5aaSvikram 
1303*25e8c5aaSvikram 	ddi_release_devi(dip);
1304*25e8c5aaSvikram 
1305*25e8c5aaSvikram 	return (ctd);
1306*25e8c5aaSvikram }
1307*25e8c5aaSvikram 
1308*25e8c5aaSvikram /*
1309*25e8c5aaSvikram  * Called when a device is successfully opened to create an open-time contract
1310*25e8c5aaSvikram  * i.e. synchronously with a device open.
1311*25e8c5aaSvikram  */
1312*25e8c5aaSvikram int
1313*25e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
1314*25e8c5aaSvikram {
1315*25e8c5aaSvikram 	ctmpl_device_t *dtmpl;
1316*25e8c5aaSvikram 	ct_template_t  *tmpl;
1317*25e8c5aaSvikram 	cont_device_t *ctd;
1318*25e8c5aaSvikram 	char *path;
1319*25e8c5aaSvikram 	klwp_t *lwp;
1320*25e8c5aaSvikram 	int error;
1321*25e8c5aaSvikram 
1322*25e8c5aaSvikram 	if (ctpp)
1323*25e8c5aaSvikram 		*ctpp = NULL;
1324*25e8c5aaSvikram 
1325*25e8c5aaSvikram 	/*
1326*25e8c5aaSvikram 	 * Check if we are in user-context i.e. if we have an lwp
1327*25e8c5aaSvikram 	 */
1328*25e8c5aaSvikram 	lwp = ttolwp(curthread);
1329*25e8c5aaSvikram 	if (lwp == NULL) {
1330*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
1331*25e8c5aaSvikram 		return (0);
1332*25e8c5aaSvikram 	}
1333*25e8c5aaSvikram 
1334*25e8c5aaSvikram 	tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
1335*25e8c5aaSvikram 	if (tmpl == NULL) {
1336*25e8c5aaSvikram 		return (0);
1337*25e8c5aaSvikram 	}
1338*25e8c5aaSvikram 	dtmpl = tmpl->ctmpl_data;
1339*25e8c5aaSvikram 
1340*25e8c5aaSvikram 	/*
1341*25e8c5aaSvikram 	 * If the user set a minor path in the template before an open,
1342*25e8c5aaSvikram 	 * ignore it. We use the minor path of the actual minor opened.
1343*25e8c5aaSvikram 	 */
1344*25e8c5aaSvikram 	mutex_enter(&tmpl->ctmpl_lock);
1345*25e8c5aaSvikram 	if (dtmpl->ctd_minor != NULL) {
1346*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
1347*25e8c5aaSvikram 		    "ignoring device minor path in active template: %s",
1348*25e8c5aaSvikram 		    curproc->p_pid, dtmpl->ctd_minor));
1349*25e8c5aaSvikram 		/*
1350*25e8c5aaSvikram 		 * This is a copy of the actual activated template.
1351*25e8c5aaSvikram 		 * Safe to make changes such as freeing the minor
1352*25e8c5aaSvikram 		 * path in the template.
1353*25e8c5aaSvikram 		 */
1354*25e8c5aaSvikram 		kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
1355*25e8c5aaSvikram 		dtmpl->ctd_minor = NULL;
1356*25e8c5aaSvikram 	}
1357*25e8c5aaSvikram 	mutex_exit(&tmpl->ctmpl_lock);
1358*25e8c5aaSvikram 
1359*25e8c5aaSvikram 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1360*25e8c5aaSvikram 
1361*25e8c5aaSvikram 	if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
1362*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
1363*25e8c5aaSvikram 		    "minor path from dev_t,spec {%lu, %d} for process (%d)",
1364*25e8c5aaSvikram 		    dev, spec_type, curproc->p_pid));
1365*25e8c5aaSvikram 		ctmpl_free(tmpl);
1366*25e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
1367*25e8c5aaSvikram 		return (1);
1368*25e8c5aaSvikram 	}
1369*25e8c5aaSvikram 
1370*25e8c5aaSvikram 	mutex_enter(&tmpl->ctmpl_lock);
1371*25e8c5aaSvikram 	ASSERT(dtmpl->ctd_minor == NULL);
1372*25e8c5aaSvikram 	dtmpl->ctd_minor = path;
1373*25e8c5aaSvikram 	mutex_exit(&tmpl->ctmpl_lock);
1374*25e8c5aaSvikram 
1375*25e8c5aaSvikram 	ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
1376*25e8c5aaSvikram 
1377*25e8c5aaSvikram 	mutex_enter(&tmpl->ctmpl_lock);
1378*25e8c5aaSvikram 	ASSERT(dtmpl->ctd_minor);
1379*25e8c5aaSvikram 	dtmpl->ctd_minor = NULL;
1380*25e8c5aaSvikram 	mutex_exit(&tmpl->ctmpl_lock);
1381*25e8c5aaSvikram 	ctmpl_free(tmpl);
1382*25e8c5aaSvikram 	kmem_free(path, MAXPATHLEN);
1383*25e8c5aaSvikram 
1384*25e8c5aaSvikram 	if (ctd == NULL) {
1385*25e8c5aaSvikram 		cmn_err(CE_NOTE, "contract_device_open(): Failed to "
1386*25e8c5aaSvikram 		    "create device contract for process (%d) holding "
1387*25e8c5aaSvikram 		    "device (devt = %lu, spec_type = %d)",
1388*25e8c5aaSvikram 		    curproc->p_pid, dev, spec_type);
1389*25e8c5aaSvikram 		return (1);
1390*25e8c5aaSvikram 	}
1391*25e8c5aaSvikram 
1392*25e8c5aaSvikram 	if (ctpp) {
1393*25e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
1394*25e8c5aaSvikram 		*ctpp = &ctd->cond_contract;
1395*25e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
1396*25e8c5aaSvikram 	}
1397*25e8c5aaSvikram 	return (0);
1398*25e8c5aaSvikram }
1399*25e8c5aaSvikram 
1400*25e8c5aaSvikram /*
1401*25e8c5aaSvikram  * Called during contract negotiation by the device contract framework to wait
1402*25e8c5aaSvikram  * for ACKs or NACKs from contract holders. If all responses are not received
1403*25e8c5aaSvikram  * before a specified timeout, this routine times out.
1404*25e8c5aaSvikram  */
1405*25e8c5aaSvikram static uint_t
1406*25e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
1407*25e8c5aaSvikram {
1408*25e8c5aaSvikram 	cont_device_t *ctd;
1409*25e8c5aaSvikram 	int timed_out = 0;
1410*25e8c5aaSvikram 	int result = CT_NONE;
1411*25e8c5aaSvikram 	int ack;
1412*25e8c5aaSvikram 	char *f = "wait_for_acks";
1413*25e8c5aaSvikram 
1414*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
1415*25e8c5aaSvikram 	ASSERT(dip);
1416*25e8c5aaSvikram 	ASSERT(evtype & CT_DEV_ALLEVENT);
1417*25e8c5aaSvikram 	ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
1418*25e8c5aaSvikram 	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
1419*25e8c5aaSvikram 	    (spec_type == S_IFBLK || spec_type == S_IFCHR));
1420*25e8c5aaSvikram 
1421*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
1422*25e8c5aaSvikram 
1423*25e8c5aaSvikram 	if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
1424*25e8c5aaSvikram 		/*
1425*25e8c5aaSvikram 		 * some contract owner(s) didn't respond in time
1426*25e8c5aaSvikram 		 */
1427*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
1428*25e8c5aaSvikram 		timed_out = 1;
1429*25e8c5aaSvikram 	}
1430*25e8c5aaSvikram 
1431*25e8c5aaSvikram 	ack = 0;
1432*25e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1433*25e8c5aaSvikram 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1434*25e8c5aaSvikram 
1435*25e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
1436*25e8c5aaSvikram 
1437*25e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
1438*25e8c5aaSvikram 
1439*25e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
1440*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1441*25e8c5aaSvikram 			continue;
1442*25e8c5aaSvikram 		}
1443*25e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
1444*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1445*25e8c5aaSvikram 			continue;
1446*25e8c5aaSvikram 		}
1447*25e8c5aaSvikram 
1448*25e8c5aaSvikram 		/* skip if non-negotiable contract */
1449*25e8c5aaSvikram 		if (ctd->cond_noneg) {
1450*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1451*25e8c5aaSvikram 			continue;
1452*25e8c5aaSvikram 		}
1453*25e8c5aaSvikram 
1454*25e8c5aaSvikram 		ASSERT(ctd->cond_currev_type == evtype);
1455*25e8c5aaSvikram 		if (ctd->cond_currev_ack == CT_NACK) {
1456*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
1457*25e8c5aaSvikram 			    f, (void *)dip));
1458*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1459*25e8c5aaSvikram 			return (CT_NACK);
1460*25e8c5aaSvikram 		} else if (ctd->cond_currev_ack == CT_ACK) {
1461*25e8c5aaSvikram 			ack = 1;
1462*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
1463*25e8c5aaSvikram 			    f, (void *)dip));
1464*25e8c5aaSvikram 		}
1465*25e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
1466*25e8c5aaSvikram 	}
1467*25e8c5aaSvikram 
1468*25e8c5aaSvikram 	if (ack) {
1469*25e8c5aaSvikram 		result = CT_ACK;
1470*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
1471*25e8c5aaSvikram 	} else if (timed_out) {
1472*25e8c5aaSvikram 		result = CT_NONE;
1473*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
1474*25e8c5aaSvikram 		    f, (void *)dip));
1475*25e8c5aaSvikram 	} else {
1476*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
1477*25e8c5aaSvikram 		    f, (void *)dip));
1478*25e8c5aaSvikram 	}
1479*25e8c5aaSvikram 
1480*25e8c5aaSvikram 
1481*25e8c5aaSvikram 	return (result);
1482*25e8c5aaSvikram }
1483*25e8c5aaSvikram 
1484*25e8c5aaSvikram /*
1485*25e8c5aaSvikram  * Determines the current state of a device (i.e a devinfo node
1486*25e8c5aaSvikram  */
1487*25e8c5aaSvikram static int
1488*25e8c5aaSvikram get_state(dev_info_t *dip)
1489*25e8c5aaSvikram {
1490*25e8c5aaSvikram 	if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
1491*25e8c5aaSvikram 		return (CT_DEV_EV_OFFLINE);
1492*25e8c5aaSvikram 	else if (DEVI_IS_DEVICE_DEGRADED(dip))
1493*25e8c5aaSvikram 		return (CT_DEV_EV_DEGRADED);
1494*25e8c5aaSvikram 	else
1495*25e8c5aaSvikram 		return (CT_DEV_EV_ONLINE);
1496*25e8c5aaSvikram }
1497*25e8c5aaSvikram 
1498*25e8c5aaSvikram /*
1499*25e8c5aaSvikram  * Sets the current state of a device in a device contract
1500*25e8c5aaSvikram  */
1501*25e8c5aaSvikram static void
1502*25e8c5aaSvikram set_cond_state(dev_info_t *dip)
1503*25e8c5aaSvikram {
1504*25e8c5aaSvikram 	uint_t state = get_state(dip);
1505*25e8c5aaSvikram 	cont_device_t *ctd;
1506*25e8c5aaSvikram 
1507*25e8c5aaSvikram 	/* verify that barrier is held */
1508*25e8c5aaSvikram 	ASSERT(ct_barrier_held(dip));
1509*25e8c5aaSvikram 
1510*25e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1511*25e8c5aaSvikram 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1512*25e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
1513*25e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
1514*25e8c5aaSvikram 		ctd->cond_state = state;
1515*25e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
1516*25e8c5aaSvikram 	}
1517*25e8c5aaSvikram }
1518*25e8c5aaSvikram 
1519*25e8c5aaSvikram /*
1520*25e8c5aaSvikram  * Core routine called by event-specific routines when an event occurs.
1521*25e8c5aaSvikram  * Determines if an event should be be published, and if it is to be
1522*25e8c5aaSvikram  * published, whether a negotiation should take place. Also implements
1523*25e8c5aaSvikram  * NEGEND events which publish the final disposition of an event after
1524*25e8c5aaSvikram  * negotiations are complete.
1525*25e8c5aaSvikram  *
1526*25e8c5aaSvikram  * When an event occurs on a minor node, this routine walks the list of
1527*25e8c5aaSvikram  * contracts hanging off a devinfo node and for each contract on the affected
1528*25e8c5aaSvikram  * dip, evaluates the following cases
1529*25e8c5aaSvikram  *
1530*25e8c5aaSvikram  *	a. an event that is synchronous, breaks the contract and NONEG not set
1531*25e8c5aaSvikram  *		- bumps up the outstanding negotiation counts on the dip
1532*25e8c5aaSvikram  *		- marks the dip as undergoing negotiation (devi_ct_neg)
1533*25e8c5aaSvikram  *		- event of type CTE_NEG is published
1534*25e8c5aaSvikram  *	b. an event that is synchronous, breaks the contract and NONEG is set
1535*25e8c5aaSvikram  *		- sets the final result to CT_NACK, event is blocked
1536*25e8c5aaSvikram  *		- does not publish an event
1537*25e8c5aaSvikram  *	c. event is asynchronous and breaks the contract
1538*25e8c5aaSvikram  *		- publishes a critical event irrespect of whether the NONEG
1539*25e8c5aaSvikram  *		  flag is set, since the contract will be broken and contract
1540*25e8c5aaSvikram  *		  owner needs to be informed.
1541*25e8c5aaSvikram  *	d. No contract breakage but the owner has subscribed to the event
1542*25e8c5aaSvikram  *		- publishes the event irrespective of the NONEG event as the
1543*25e8c5aaSvikram  *		  owner has explicitly subscribed to the event.
1544*25e8c5aaSvikram  *	e. NEGEND event
1545*25e8c5aaSvikram  *		- publishes a critical event. Should only be doing this if
1546*25e8c5aaSvikram  *		  if NONEG is not set.
1547*25e8c5aaSvikram  *	f. all other events
1548*25e8c5aaSvikram  *		- Since a contract is not broken and this event has not been
1549*25e8c5aaSvikram  *		  subscribed to, this event does not need to be published for
1550*25e8c5aaSvikram  *		  for this contract.
1551*25e8c5aaSvikram  *
1552*25e8c5aaSvikram  *	Once an event is published, what happens next depends on the type of
1553*25e8c5aaSvikram  *	event:
1554*25e8c5aaSvikram  *
1555*25e8c5aaSvikram  *	a. NEGEND event
1556*25e8c5aaSvikram  *		- cleanup all state associated with the preceding negotiation
1557*25e8c5aaSvikram  *		  and return CT_ACK to the caller of contract_device_publish()
1558*25e8c5aaSvikram  *	b. NACKed event
1559*25e8c5aaSvikram  *		- One or more contracts had the NONEG term, so the event was
1560*25e8c5aaSvikram  *		  blocked. Return CT_NACK to the caller.
1561*25e8c5aaSvikram  *	c. Negotiated event
1562*25e8c5aaSvikram  *		- Call wait_for_acks() to wait for responses from contract
1563*25e8c5aaSvikram  *		holders. The end result is either CT_ACK (event is permitted),
1564*25e8c5aaSvikram  *		CT_NACK (event is blocked) or CT_NONE (no contract owner)
1565*25e8c5aaSvikram  *		responded. This result is returned back to the caller.
1566*25e8c5aaSvikram  *	d. All other events
1567*25e8c5aaSvikram  *		- If the event was asynchronous (i.e. not negotiated) or
1568*25e8c5aaSvikram  *		a contract was not broken return CT_ACK to the caller.
1569*25e8c5aaSvikram  */
1570*25e8c5aaSvikram static uint_t
1571*25e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
1572*25e8c5aaSvikram     uint_t evtype, nvlist_t *tnvl)
1573*25e8c5aaSvikram {
1574*25e8c5aaSvikram 	cont_device_t *ctd;
1575*25e8c5aaSvikram 	uint_t result = CT_NONE;
1576*25e8c5aaSvikram 	uint64_t evid = 0;
1577*25e8c5aaSvikram 	uint64_t nevid = 0;
1578*25e8c5aaSvikram 	char *path = NULL;
1579*25e8c5aaSvikram 	int negend;
1580*25e8c5aaSvikram 	int match;
1581*25e8c5aaSvikram 	int sync = 0;
1582*25e8c5aaSvikram 	contract_t *ct;
1583*25e8c5aaSvikram 	ct_kevent_t *event;
1584*25e8c5aaSvikram 	nvlist_t *nvl;
1585*25e8c5aaSvikram 	int broken = 0;
1586*25e8c5aaSvikram 
1587*25e8c5aaSvikram 	ASSERT(dip);
1588*25e8c5aaSvikram 	ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
1589*25e8c5aaSvikram 	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
1590*25e8c5aaSvikram 	    (spec_type == S_IFBLK || spec_type == S_IFCHR));
1591*25e8c5aaSvikram 	ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
1592*25e8c5aaSvikram 
1593*25e8c5aaSvikram 	/* Is this a synchronous state change ? */
1594*25e8c5aaSvikram 	if (evtype != CT_EV_NEGEND) {
1595*25e8c5aaSvikram 		sync = is_sync_neg(get_state(dip), evtype);
1596*25e8c5aaSvikram 		/* NOP if unsupported transition */
1597*25e8c5aaSvikram 		if (sync == -2 || sync == -1) {
1598*25e8c5aaSvikram 			DEVI(dip)->devi_flags |= DEVI_CT_NOP;
1599*25e8c5aaSvikram 			result = (sync == -2) ? CT_ACK : CT_NONE;
1600*25e8c5aaSvikram 			goto out;
1601*25e8c5aaSvikram 		}
1602*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
1603*25e8c5aaSvikram 		    sync ? "" : " not"));
1604*25e8c5aaSvikram 	} else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
1605*25e8c5aaSvikram 		DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
1606*25e8c5aaSvikram 		result = CT_ACK;
1607*25e8c5aaSvikram 		goto out;
1608*25e8c5aaSvikram 	}
1609*25e8c5aaSvikram 
1610*25e8c5aaSvikram 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1611*25e8c5aaSvikram 	(void) ddi_pathname(dip, path);
1612*25e8c5aaSvikram 
1613*25e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
1614*25e8c5aaSvikram 
1615*25e8c5aaSvikram 	/*
1616*25e8c5aaSvikram 	 * Negotiation end - set the state of the device in the contract
1617*25e8c5aaSvikram 	 */
1618*25e8c5aaSvikram 	if (evtype == CT_EV_NEGEND) {
1619*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
1620*25e8c5aaSvikram 		set_cond_state(dip);
1621*25e8c5aaSvikram 	}
1622*25e8c5aaSvikram 
1623*25e8c5aaSvikram 	/*
1624*25e8c5aaSvikram 	 * If this device didn't go through negotiation, don't publish
1625*25e8c5aaSvikram 	 * a NEGEND event - simply release the barrier to allow other
1626*25e8c5aaSvikram 	 * device events in.
1627*25e8c5aaSvikram 	 */
1628*25e8c5aaSvikram 	negend = 0;
1629*25e8c5aaSvikram 	if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
1630*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
1631*25e8c5aaSvikram 		ct_barrier_release(dip);
1632*25e8c5aaSvikram 		mutex_exit(&(DEVI(dip)->devi_ct_lock));
1633*25e8c5aaSvikram 		result = CT_ACK;
1634*25e8c5aaSvikram 		goto out;
1635*25e8c5aaSvikram 	} else if (evtype == CT_EV_NEGEND) {
1636*25e8c5aaSvikram 		/*
1637*25e8c5aaSvikram 		 * There are negotiated contract breakages that
1638*25e8c5aaSvikram 		 * need a NEGEND event
1639*25e8c5aaSvikram 		 */
1640*25e8c5aaSvikram 		ASSERT(ct_barrier_held(dip));
1641*25e8c5aaSvikram 		negend = 1;
1642*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
1643*25e8c5aaSvikram 	} else {
1644*25e8c5aaSvikram 		/*
1645*25e8c5aaSvikram 		 * This is a new event, not a NEGEND event. Wait for previous
1646*25e8c5aaSvikram 		 * contract events to complete.
1647*25e8c5aaSvikram 		 */
1648*25e8c5aaSvikram 		ct_barrier_acquire(dip);
1649*25e8c5aaSvikram 	}
1650*25e8c5aaSvikram 
1651*25e8c5aaSvikram 
1652*25e8c5aaSvikram 	match = 0;
1653*25e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
1654*25e8c5aaSvikram 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
1655*25e8c5aaSvikram 
1656*25e8c5aaSvikram 		ctid_t ctid;
1657*25e8c5aaSvikram 		size_t len = strlen(path);
1658*25e8c5aaSvikram 
1659*25e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
1660*25e8c5aaSvikram 
1661*25e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
1662*25e8c5aaSvikram 		ASSERT(ctd->cond_minor);
1663*25e8c5aaSvikram 		ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
1664*25e8c5aaSvikram 		    ctd->cond_minor[len] == ':');
1665*25e8c5aaSvikram 
1666*25e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
1667*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1668*25e8c5aaSvikram 			continue;
1669*25e8c5aaSvikram 		}
1670*25e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
1671*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1672*25e8c5aaSvikram 			continue;
1673*25e8c5aaSvikram 		}
1674*25e8c5aaSvikram 
1675*25e8c5aaSvikram 		/* We have a matching contract */
1676*25e8c5aaSvikram 		match = 1;
1677*25e8c5aaSvikram 		ctid = ctd->cond_contract.ct_id;
1678*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
1679*25e8c5aaSvikram 		    ctid));
1680*25e8c5aaSvikram 
1681*25e8c5aaSvikram 		/*
1682*25e8c5aaSvikram 		 * There are 4 possible cases
1683*25e8c5aaSvikram 		 * 1. A contract is broken (dev not in acceptable state) and
1684*25e8c5aaSvikram 		 *    the state change is synchronous - start negotiation
1685*25e8c5aaSvikram 		 *    by sending a CTE_NEG critical event.
1686*25e8c5aaSvikram 		 * 2. A contract is broken and the state change is
1687*25e8c5aaSvikram 		 *    asynchronous - just send a critical event and
1688*25e8c5aaSvikram 		 *    break the contract.
1689*25e8c5aaSvikram 		 * 3. Contract is not broken, but consumer has subscribed
1690*25e8c5aaSvikram 		 *    to the event as a critical or informative event
1691*25e8c5aaSvikram 		 *    - just send the appropriate event
1692*25e8c5aaSvikram 		 * 4. contract waiting for negend event - just send the critical
1693*25e8c5aaSvikram 		 *    NEGEND event.
1694*25e8c5aaSvikram 		 */
1695*25e8c5aaSvikram 		broken = 0;
1696*25e8c5aaSvikram 		if (!negend && !(evtype & ctd->cond_aset)) {
1697*25e8c5aaSvikram 			broken = 1;
1698*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
1699*25e8c5aaSvikram 			    ctid));
1700*25e8c5aaSvikram 		}
1701*25e8c5aaSvikram 
1702*25e8c5aaSvikram 		/*
1703*25e8c5aaSvikram 		 * Don't send event if
1704*25e8c5aaSvikram 		 *	- contract is not broken AND
1705*25e8c5aaSvikram 		 *	- contract holder has not subscribed to this event AND
1706*25e8c5aaSvikram 		 *	- contract not waiting for a NEGEND event
1707*25e8c5aaSvikram 		 */
1708*25e8c5aaSvikram 		if (!broken && !EVSENDP(ctd, evtype) &&
1709*25e8c5aaSvikram 		    !ctd->cond_neg) {
1710*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "contract_device_publish(): "
1711*25e8c5aaSvikram 			    "contract (%d): no publish reqd: event %d",
1712*25e8c5aaSvikram 			    ctd->cond_contract.ct_id, evtype));
1713*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1714*25e8c5aaSvikram 			continue;
1715*25e8c5aaSvikram 		}
1716*25e8c5aaSvikram 
1717*25e8c5aaSvikram 		/*
1718*25e8c5aaSvikram 		 * Note: need to kmem_zalloc() the event so mutexes are
1719*25e8c5aaSvikram 		 * initialized automatically
1720*25e8c5aaSvikram 		 */
1721*25e8c5aaSvikram 		ct = &ctd->cond_contract;
1722*25e8c5aaSvikram 		event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
1723*25e8c5aaSvikram 		event->cte_type = evtype;
1724*25e8c5aaSvikram 
1725*25e8c5aaSvikram 		if (broken && sync) {
1726*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: broken + sync: "
1727*25e8c5aaSvikram 			    "ctid: %d", ctid));
1728*25e8c5aaSvikram 			ASSERT(!negend);
1729*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
1730*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
1731*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
1732*25e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
1733*25e8c5aaSvikram 			if (ctd->cond_noneg) {
1734*25e8c5aaSvikram 				/* Nothing to publish. Event has been blocked */
1735*25e8c5aaSvikram 				CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
1736*25e8c5aaSvikram 				    "not publishing blocked ev: ctid: %d",
1737*25e8c5aaSvikram 				    ctid));
1738*25e8c5aaSvikram 				result = CT_NACK;
1739*25e8c5aaSvikram 				kmem_free(event, sizeof (ct_kevent_t));
1740*25e8c5aaSvikram 				mutex_exit(&ctd->cond_contract.ct_lock);
1741*25e8c5aaSvikram 				continue;
1742*25e8c5aaSvikram 			}
1743*25e8c5aaSvikram 			event->cte_flags = CTE_NEG; /* critical neg. event */
1744*25e8c5aaSvikram 			ctd->cond_currev_type = event->cte_type;
1745*25e8c5aaSvikram 			ct_barrier_incr(dip);
1746*25e8c5aaSvikram 			DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
1747*25e8c5aaSvikram 			ctd->cond_neg = 1;
1748*25e8c5aaSvikram 		} else if (broken && !sync) {
1749*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
1750*25e8c5aaSvikram 			    ctid));
1751*25e8c5aaSvikram 			ASSERT(!negend);
1752*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
1753*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
1754*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
1755*25e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
1756*25e8c5aaSvikram 			event->cte_flags = 0; /* critical event */
1757*25e8c5aaSvikram 		} else if (EVSENDP(ctd, event->cte_type)) {
1758*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
1759*25e8c5aaSvikram 			    ctid));
1760*25e8c5aaSvikram 			ASSERT(!negend);
1761*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
1762*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
1763*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
1764*25e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
1765*25e8c5aaSvikram 			event->cte_flags = EVINFOP(ctd, event->cte_type) ?
1766*25e8c5aaSvikram 			    CTE_INFO : 0;
1767*25e8c5aaSvikram 		} else if (ctd->cond_neg) {
1768*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
1769*25e8c5aaSvikram 			ASSERT(negend);
1770*25e8c5aaSvikram 			ASSERT(ctd->cond_noneg == 0);
1771*25e8c5aaSvikram 			nevid = ctd->cond_contract.ct_nevent ?
1772*25e8c5aaSvikram 			    ctd->cond_contract.ct_nevent->cte_id : 0;
1773*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == nevid);
1774*25e8c5aaSvikram 			event->cte_flags = 0;	/* NEGEND is always critical */
1775*25e8c5aaSvikram 			ctd->cond_currev_id = 0;
1776*25e8c5aaSvikram 			ctd->cond_currev_type = 0;
1777*25e8c5aaSvikram 			ctd->cond_currev_ack = 0;
1778*25e8c5aaSvikram 			ctd->cond_neg = 0;
1779*25e8c5aaSvikram 		} else {
1780*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: not publishing event for "
1781*25e8c5aaSvikram 			    "ctid: %d, evtype: %d",
1782*25e8c5aaSvikram 			    ctd->cond_contract.ct_id, event->cte_type));
1783*25e8c5aaSvikram 			ASSERT(!negend);
1784*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
1785*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
1786*25e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
1787*25e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
1788*25e8c5aaSvikram 			kmem_free(event, sizeof (ct_kevent_t));
1789*25e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
1790*25e8c5aaSvikram 			continue;
1791*25e8c5aaSvikram 		}
1792*25e8c5aaSvikram 
1793*25e8c5aaSvikram 		nvl = NULL;
1794*25e8c5aaSvikram 		if (tnvl) {
1795*25e8c5aaSvikram 			VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
1796*25e8c5aaSvikram 			if (negend) {
1797*25e8c5aaSvikram 				int32_t newct = 0;
1798*25e8c5aaSvikram 				ASSERT(ctd->cond_noneg == 0);
1799*25e8c5aaSvikram 				VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
1800*25e8c5aaSvikram 				    == 0);
1801*25e8c5aaSvikram 				VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
1802*25e8c5aaSvikram 				    &newct) == 0);
1803*25e8c5aaSvikram 				VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
1804*25e8c5aaSvikram 				    newct == 1 ? 0 :
1805*25e8c5aaSvikram 				    ctd->cond_contract.ct_id) == 0);
1806*25e8c5aaSvikram 				CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
1807*25e8c5aaSvikram 				    "CTS_NEVID: %llu, CTS_NEWCT: %s",
1808*25e8c5aaSvikram 				    ctid, (unsigned long long)nevid,
1809*25e8c5aaSvikram 				    newct ? "success" : "failure"));
1810*25e8c5aaSvikram 
1811*25e8c5aaSvikram 			}
1812*25e8c5aaSvikram 		}
1813*25e8c5aaSvikram 
1814*25e8c5aaSvikram 		if (ctd->cond_neg) {
1815*25e8c5aaSvikram 			ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
1816*25e8c5aaSvikram 			ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
1817*25e8c5aaSvikram 			ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
1818*25e8c5aaSvikram 			ctd->cond_contract.ct_qtime.ctm_start =
1819*25e8c5aaSvikram 			    ctd->cond_contract.ct_ntime.ctm_start;
1820*25e8c5aaSvikram 		}
1821*25e8c5aaSvikram 
1822*25e8c5aaSvikram 		/*
1823*25e8c5aaSvikram 		 * by holding the dip's devi_ct_lock we ensure that
1824*25e8c5aaSvikram 		 * all ACK/NACKs are held up until we have finished
1825*25e8c5aaSvikram 		 * publishing to all contracts.
1826*25e8c5aaSvikram 		 */
1827*25e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
1828*25e8c5aaSvikram 		evid = cte_publish_all(ct, event, nvl, NULL);
1829*25e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
1830*25e8c5aaSvikram 
1831*25e8c5aaSvikram 		if (ctd->cond_neg) {
1832*25e8c5aaSvikram 			ASSERT(!negend);
1833*25e8c5aaSvikram 			ASSERT(broken);
1834*25e8c5aaSvikram 			ASSERT(sync);
1835*25e8c5aaSvikram 			ASSERT(!ctd->cond_noneg);
1836*25e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
1837*25e8c5aaSvikram 			    ": %d", ctid));
1838*25e8c5aaSvikram 			ctd->cond_currev_id = evid;
1839*25e8c5aaSvikram 		} else if (negend) {
1840*25e8c5aaSvikram 			ctd->cond_contract.ct_ntime.ctm_start = -1;
1841*25e8c5aaSvikram 			ctd->cond_contract.ct_qtime.ctm_start = -1;
1842*25e8c5aaSvikram 		}
1843*25e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
1844*25e8c5aaSvikram 	}
1845*25e8c5aaSvikram 
1846*25e8c5aaSvikram 	/*
1847*25e8c5aaSvikram 	 * If "negend" set counter back to initial state (-1) so that
1848*25e8c5aaSvikram 	 * other events can be published. Also clear the negotiation flag
1849*25e8c5aaSvikram 	 * on dip.
1850*25e8c5aaSvikram 	 *
1851*25e8c5aaSvikram 	 * 0 .. n are used for counting.
1852*25e8c5aaSvikram 	 * -1 indicates counter is available for use.
1853*25e8c5aaSvikram 	 */
1854*25e8c5aaSvikram 	if (negend) {
1855*25e8c5aaSvikram 		/*
1856*25e8c5aaSvikram 		 * devi_ct_count not necessarily 0. We may have
1857*25e8c5aaSvikram 		 * timed out in which case, count will be non-zero.
1858*25e8c5aaSvikram 		 */
1859*25e8c5aaSvikram 		ct_barrier_release(dip);
1860*25e8c5aaSvikram 		DEVI(dip)->devi_ct_neg = 0;
1861*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
1862*25e8c5aaSvikram 		    (void *)dip));
1863*25e8c5aaSvikram 	} else if (DEVI(dip)->devi_ct_neg) {
1864*25e8c5aaSvikram 		ASSERT(match);
1865*25e8c5aaSvikram 		ASSERT(!ct_barrier_empty(dip));
1866*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
1867*25e8c5aaSvikram 		    DEVI(dip)->devi_ct_count, (void *)dip));
1868*25e8c5aaSvikram 	} else {
1869*25e8c5aaSvikram 		/*
1870*25e8c5aaSvikram 		 * for non-negotiated events or subscribed events or no
1871*25e8c5aaSvikram 		 * matching contracts
1872*25e8c5aaSvikram 		 */
1873*25e8c5aaSvikram 		ASSERT(ct_barrier_empty(dip));
1874*25e8c5aaSvikram 		ASSERT(DEVI(dip)->devi_ct_neg == 0);
1875*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
1876*25e8c5aaSvikram 		    "dip=%p", (void *)dip));
1877*25e8c5aaSvikram 
1878*25e8c5aaSvikram 		/*
1879*25e8c5aaSvikram 		 * only this function when called from contract_device_negend()
1880*25e8c5aaSvikram 		 * can reset the counter to READY state i.e. -1. This function
1881*25e8c5aaSvikram 		 * is so called for every event whether a NEGEND event is needed
1882*25e8c5aaSvikram 		 * or not, but the negend event is only published if the event
1883*25e8c5aaSvikram 		 * whose end they signal is a negotiated event for the contract.
1884*25e8c5aaSvikram 		 */
1885*25e8c5aaSvikram 	}
1886*25e8c5aaSvikram 
1887*25e8c5aaSvikram 	if (!match) {
1888*25e8c5aaSvikram 		/* No matching contracts */
1889*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: No matching contract"));
1890*25e8c5aaSvikram 		result = CT_NONE;
1891*25e8c5aaSvikram 	} else if (result == CT_NACK) {
1892*25e8c5aaSvikram 		/* a non-negotiable contract exists and this is a neg. event */
1893*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
1894*25e8c5aaSvikram 		(void) wait_for_acks(dip, dev, spec_type, evtype);
1895*25e8c5aaSvikram 	} else if (DEVI(dip)->devi_ct_neg) {
1896*25e8c5aaSvikram 		/* one or more contracts going through negotations  */
1897*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
1898*25e8c5aaSvikram 		result = wait_for_acks(dip, dev, spec_type, evtype);
1899*25e8c5aaSvikram 	} else {
1900*25e8c5aaSvikram 		/* no negotiated contracts or no broken contracts or NEGEND */
1901*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
1902*25e8c5aaSvikram 		result = CT_ACK;
1903*25e8c5aaSvikram 	}
1904*25e8c5aaSvikram 
1905*25e8c5aaSvikram 	/*
1906*25e8c5aaSvikram 	 * Release the lock only now so that the only point where we
1907*25e8c5aaSvikram 	 * drop the lock is in wait_for_acks(). This is so that we don't
1908*25e8c5aaSvikram 	 * miss cv_signal/cv_broadcast from contract holders
1909*25e8c5aaSvikram 	 */
1910*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
1911*25e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
1912*25e8c5aaSvikram 
1913*25e8c5aaSvikram out:
1914*25e8c5aaSvikram 	if (tnvl)
1915*25e8c5aaSvikram 		nvlist_free(tnvl);
1916*25e8c5aaSvikram 	if (path)
1917*25e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
1918*25e8c5aaSvikram 
1919*25e8c5aaSvikram 
1920*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
1921*25e8c5aaSvikram 	return (result);
1922*25e8c5aaSvikram }
1923*25e8c5aaSvikram 
1924*25e8c5aaSvikram 
1925*25e8c5aaSvikram /*
1926*25e8c5aaSvikram  * contract_device_offline
1927*25e8c5aaSvikram  *
1928*25e8c5aaSvikram  * Event publishing routine called by I/O framework when a device is offlined.
1929*25e8c5aaSvikram  */
1930*25e8c5aaSvikram ct_ack_t
1931*25e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
1932*25e8c5aaSvikram {
1933*25e8c5aaSvikram 	nvlist_t *nvl;
1934*25e8c5aaSvikram 	uint_t result;
1935*25e8c5aaSvikram 	uint_t evtype;
1936*25e8c5aaSvikram 
1937*25e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1938*25e8c5aaSvikram 
1939*25e8c5aaSvikram 	evtype = CT_DEV_EV_OFFLINE;
1940*25e8c5aaSvikram 	result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
1941*25e8c5aaSvikram 
1942*25e8c5aaSvikram 	/*
1943*25e8c5aaSvikram 	 * If a contract offline is NACKED, the framework expects us to call
1944*25e8c5aaSvikram 	 * NEGEND ourselves, since we know the final result
1945*25e8c5aaSvikram 	 */
1946*25e8c5aaSvikram 	if (result == CT_NACK) {
1947*25e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
1948*25e8c5aaSvikram 	}
1949*25e8c5aaSvikram 
1950*25e8c5aaSvikram 	return (result);
1951*25e8c5aaSvikram }
1952*25e8c5aaSvikram 
1953*25e8c5aaSvikram /*
1954*25e8c5aaSvikram  * contract_device_degrade
1955*25e8c5aaSvikram  *
1956*25e8c5aaSvikram  * Event publishing routine called by I/O framework when a device
1957*25e8c5aaSvikram  * moves to degrade state.
1958*25e8c5aaSvikram  */
1959*25e8c5aaSvikram /*ARGSUSED*/
1960*25e8c5aaSvikram void
1961*25e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
1962*25e8c5aaSvikram {
1963*25e8c5aaSvikram 	nvlist_t *nvl;
1964*25e8c5aaSvikram 	uint_t evtype;
1965*25e8c5aaSvikram 
1966*25e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1967*25e8c5aaSvikram 
1968*25e8c5aaSvikram 	evtype = CT_DEV_EV_DEGRADED;
1969*25e8c5aaSvikram 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
1970*25e8c5aaSvikram }
1971*25e8c5aaSvikram 
1972*25e8c5aaSvikram /*
1973*25e8c5aaSvikram  * contract_device_undegrade
1974*25e8c5aaSvikram  *
1975*25e8c5aaSvikram  * Event publishing routine called by I/O framework when a device
1976*25e8c5aaSvikram  * moves from degraded state to online state.
1977*25e8c5aaSvikram  */
1978*25e8c5aaSvikram /*ARGSUSED*/
1979*25e8c5aaSvikram void
1980*25e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
1981*25e8c5aaSvikram {
1982*25e8c5aaSvikram 	nvlist_t *nvl;
1983*25e8c5aaSvikram 	uint_t evtype;
1984*25e8c5aaSvikram 
1985*25e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1986*25e8c5aaSvikram 
1987*25e8c5aaSvikram 	evtype = CT_DEV_EV_ONLINE;
1988*25e8c5aaSvikram 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
1989*25e8c5aaSvikram }
1990*25e8c5aaSvikram 
1991*25e8c5aaSvikram /*
1992*25e8c5aaSvikram  * For all contracts which have undergone a negotiation (because the device
1993*25e8c5aaSvikram  * moved out of the acceptable state for that contract and the state
1994*25e8c5aaSvikram  * change is synchronous i.e. requires negotiation) this routine publishes
1995*25e8c5aaSvikram  * a CT_EV_NEGEND event with the final disposition of the event.
1996*25e8c5aaSvikram  *
1997*25e8c5aaSvikram  * This event is always a critical event.
1998*25e8c5aaSvikram  */
1999*25e8c5aaSvikram void
2000*25e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
2001*25e8c5aaSvikram {
2002*25e8c5aaSvikram 	nvlist_t *nvl;
2003*25e8c5aaSvikram 	uint_t evtype;
2004*25e8c5aaSvikram 
2005*25e8c5aaSvikram 	ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
2006*25e8c5aaSvikram 
2007*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
2008*25e8c5aaSvikram 	    "dip: %p", result, (void *)dip));
2009*25e8c5aaSvikram 
2010*25e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2011*25e8c5aaSvikram 	VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
2012*25e8c5aaSvikram 	    result == CT_EV_SUCCESS ? 1 : 0) == 0);
2013*25e8c5aaSvikram 
2014*25e8c5aaSvikram 	evtype = CT_EV_NEGEND;
2015*25e8c5aaSvikram 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
2016*25e8c5aaSvikram 
2017*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
2018*25e8c5aaSvikram 	    (void *)dip));
2019*25e8c5aaSvikram }
2020*25e8c5aaSvikram 
2021*25e8c5aaSvikram /*
2022*25e8c5aaSvikram  * Wrapper routine called by other subsystems (such as LDI) to start
2023*25e8c5aaSvikram  * negotiations when a synchronous device state change occurs.
2024*25e8c5aaSvikram  * Returns CT_ACK or CT_NACK.
2025*25e8c5aaSvikram  */
2026*25e8c5aaSvikram ct_ack_t
2027*25e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
2028*25e8c5aaSvikram     uint_t evtype)
2029*25e8c5aaSvikram {
2030*25e8c5aaSvikram 	int	result;
2031*25e8c5aaSvikram 
2032*25e8c5aaSvikram 	ASSERT(dip);
2033*25e8c5aaSvikram 	ASSERT(dev != NODEV);
2034*25e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_ANY);
2035*25e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_NONE);
2036*25e8c5aaSvikram 	ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
2037*25e8c5aaSvikram 
2038*25e8c5aaSvikram 	switch (evtype) {
2039*25e8c5aaSvikram 	case CT_DEV_EV_OFFLINE:
2040*25e8c5aaSvikram 		result = contract_device_offline(dip, dev, spec_type);
2041*25e8c5aaSvikram 		break;
2042*25e8c5aaSvikram 	default:
2043*25e8c5aaSvikram 		cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
2044*25e8c5aaSvikram 		    "not supported: event (%d) for dev_t (%lu) and spec (%d), "
2045*25e8c5aaSvikram 		    "dip (%p)", evtype, dev, spec_type, (void *)dip);
2046*25e8c5aaSvikram 		result = CT_NACK;
2047*25e8c5aaSvikram 		break;
2048*25e8c5aaSvikram 	}
2049*25e8c5aaSvikram 
2050*25e8c5aaSvikram 	return (result);
2051*25e8c5aaSvikram }
2052*25e8c5aaSvikram 
2053*25e8c5aaSvikram /*
2054*25e8c5aaSvikram  * A wrapper routine called by other subsystems (such as the LDI) to
2055*25e8c5aaSvikram  * finalize event processing for a state change event. For synchronous
2056*25e8c5aaSvikram  * state changes, this publishes NEGEND events. For asynchronous i.e.
2057*25e8c5aaSvikram  * non-negotiable events this publishes the event.
2058*25e8c5aaSvikram  */
2059*25e8c5aaSvikram void
2060*25e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
2061*25e8c5aaSvikram     uint_t evtype, int ct_result)
2062*25e8c5aaSvikram {
2063*25e8c5aaSvikram 	ASSERT(dip);
2064*25e8c5aaSvikram 	ASSERT(dev != NODEV);
2065*25e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_ANY);
2066*25e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_NONE);
2067*25e8c5aaSvikram 	ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
2068*25e8c5aaSvikram 
2069*25e8c5aaSvikram 	switch (evtype) {
2070*25e8c5aaSvikram 	case CT_DEV_EV_OFFLINE:
2071*25e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, ct_result);
2072*25e8c5aaSvikram 		break;
2073*25e8c5aaSvikram 	case CT_DEV_EV_DEGRADED:
2074*25e8c5aaSvikram 		contract_device_degrade(dip, dev, spec_type);
2075*25e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, ct_result);
2076*25e8c5aaSvikram 		break;
2077*25e8c5aaSvikram 	case CT_DEV_EV_ONLINE:
2078*25e8c5aaSvikram 		contract_device_undegrade(dip, dev, spec_type);
2079*25e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, ct_result);
2080*25e8c5aaSvikram 		break;
2081*25e8c5aaSvikram 	default:
2082*25e8c5aaSvikram 		cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
2083*25e8c5aaSvikram 		    "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
2084*25e8c5aaSvikram 		    evtype, dev, spec_type, (void *)dip);
2085*25e8c5aaSvikram 		break;
2086*25e8c5aaSvikram 	}
2087*25e8c5aaSvikram }
2088*25e8c5aaSvikram 
2089*25e8c5aaSvikram /*
2090*25e8c5aaSvikram  * Called by I/O framework when a devinfo node is freed to remove the
2091*25e8c5aaSvikram  * association between a devinfo node and its contracts.
2092*25e8c5aaSvikram  */
2093*25e8c5aaSvikram void
2094*25e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip)
2095*25e8c5aaSvikram {
2096*25e8c5aaSvikram 	cont_device_t *ctd;
2097*25e8c5aaSvikram 	cont_device_t *next;
2098*25e8c5aaSvikram 	contract_t *ct;
2099*25e8c5aaSvikram 
2100*25e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
2101*25e8c5aaSvikram 	ct_barrier_wait_for_release(dip);
2102*25e8c5aaSvikram 
2103*25e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
2104*25e8c5aaSvikram 		next = list_next(&(DEVI(dip)->devi_ct), ctd);
2105*25e8c5aaSvikram 		list_remove(&(DEVI(dip)->devi_ct), ctd);
2106*25e8c5aaSvikram 		ct = &ctd->cond_contract;
2107*25e8c5aaSvikram 		/*
2108*25e8c5aaSvikram 		 * Unlink the dip associated with this contract
2109*25e8c5aaSvikram 		 */
2110*25e8c5aaSvikram 		mutex_enter(&ct->ct_lock);
2111*25e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
2112*25e8c5aaSvikram 		ctd->cond_dip = NULL; /* no longer linked to dip */
2113*25e8c5aaSvikram 		contract_rele(ct);	/* remove hold for dip linkage */
2114*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
2115*25e8c5aaSvikram 		    "ctid: %d", ct->ct_id));
2116*25e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
2117*25e8c5aaSvikram 	}
2118*25e8c5aaSvikram 	ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
2119*25e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
2120*25e8c5aaSvikram }
2121*25e8c5aaSvikram 
2122*25e8c5aaSvikram /*
2123*25e8c5aaSvikram  * Barrier related routines
2124*25e8c5aaSvikram  */
2125*25e8c5aaSvikram static void
2126*25e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip)
2127*25e8c5aaSvikram {
2128*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2129*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
2130*25e8c5aaSvikram 	while (DEVI(dip)->devi_ct_count != -1)
2131*25e8c5aaSvikram 		cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
2132*25e8c5aaSvikram 	DEVI(dip)->devi_ct_count = 0;
2133*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
2134*25e8c5aaSvikram }
2135*25e8c5aaSvikram 
2136*25e8c5aaSvikram static void
2137*25e8c5aaSvikram ct_barrier_release(dev_info_t *dip)
2138*25e8c5aaSvikram {
2139*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2140*25e8c5aaSvikram 	ASSERT(DEVI(dip)->devi_ct_count != -1);
2141*25e8c5aaSvikram 	DEVI(dip)->devi_ct_count = -1;
2142*25e8c5aaSvikram 	cv_broadcast(&(DEVI(dip)->devi_ct_cv));
2143*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
2144*25e8c5aaSvikram }
2145*25e8c5aaSvikram 
2146*25e8c5aaSvikram static int
2147*25e8c5aaSvikram ct_barrier_held(dev_info_t *dip)
2148*25e8c5aaSvikram {
2149*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2150*25e8c5aaSvikram 	return (DEVI(dip)->devi_ct_count != -1);
2151*25e8c5aaSvikram }
2152*25e8c5aaSvikram 
2153*25e8c5aaSvikram static int
2154*25e8c5aaSvikram ct_barrier_empty(dev_info_t *dip)
2155*25e8c5aaSvikram {
2156*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2157*25e8c5aaSvikram 	ASSERT(DEVI(dip)->devi_ct_count != -1);
2158*25e8c5aaSvikram 	return (DEVI(dip)->devi_ct_count == 0);
2159*25e8c5aaSvikram }
2160*25e8c5aaSvikram 
2161*25e8c5aaSvikram static void
2162*25e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip)
2163*25e8c5aaSvikram {
2164*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2165*25e8c5aaSvikram 	while (DEVI(dip)->devi_ct_count != -1)
2166*25e8c5aaSvikram 		cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
2167*25e8c5aaSvikram }
2168*25e8c5aaSvikram 
2169*25e8c5aaSvikram static void
2170*25e8c5aaSvikram ct_barrier_decr(dev_info_t *dip)
2171*25e8c5aaSvikram {
2172*25e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "barrier_decr:  ct_count before decr: %d",
2173*25e8c5aaSvikram 	    DEVI(dip)->devi_ct_count));
2174*25e8c5aaSvikram 
2175*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2176*25e8c5aaSvikram 	ASSERT(DEVI(dip)->devi_ct_count > 0);
2177*25e8c5aaSvikram 
2178*25e8c5aaSvikram 	DEVI(dip)->devi_ct_count--;
2179*25e8c5aaSvikram 	if (DEVI(dip)->devi_ct_count == 0) {
2180*25e8c5aaSvikram 		cv_broadcast(&DEVI(dip)->devi_ct_cv);
2181*25e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
2182*25e8c5aaSvikram 	}
2183*25e8c5aaSvikram }
2184*25e8c5aaSvikram 
2185*25e8c5aaSvikram static void
2186*25e8c5aaSvikram ct_barrier_incr(dev_info_t *dip)
2187*25e8c5aaSvikram {
2188*25e8c5aaSvikram 	ASSERT(ct_barrier_held(dip));
2189*25e8c5aaSvikram 	DEVI(dip)->devi_ct_count++;
2190*25e8c5aaSvikram }
2191*25e8c5aaSvikram 
2192*25e8c5aaSvikram static int
2193*25e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
2194*25e8c5aaSvikram {
2195*25e8c5aaSvikram 	clock_t abstime;
2196*25e8c5aaSvikram 
2197*25e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
2198*25e8c5aaSvikram 
2199*25e8c5aaSvikram 	abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
2200*25e8c5aaSvikram 	while (DEVI(dip)->devi_ct_count) {
2201*25e8c5aaSvikram 		if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
2202*25e8c5aaSvikram 		    &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
2203*25e8c5aaSvikram 			return (-1);
2204*25e8c5aaSvikram 		}
2205*25e8c5aaSvikram 	}
2206*25e8c5aaSvikram 	return (0);
2207*25e8c5aaSvikram }
2208