xref: /titanic_51/usr/src/uts/common/contract/device.c (revision 320fb3728d7a41ef534ad3c46554215829c86386)
125e8c5aaSvikram /*
225e8c5aaSvikram  * CDDL HEADER START
325e8c5aaSvikram  *
425e8c5aaSvikram  * The contents of this file are subject to the terms of the
525e8c5aaSvikram  * Common Development and Distribution License (the "License").
625e8c5aaSvikram  * You may not use this file except in compliance with the License.
725e8c5aaSvikram  *
825e8c5aaSvikram  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925e8c5aaSvikram  * or http://www.opensolaris.org/os/licensing.
1025e8c5aaSvikram  * See the License for the specific language governing permissions
1125e8c5aaSvikram  * and limitations under the License.
1225e8c5aaSvikram  *
1325e8c5aaSvikram  * When distributing Covered Code, include this CDDL HEADER in each
1425e8c5aaSvikram  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525e8c5aaSvikram  * If applicable, add the following below this CDDL HEADER, with the
1625e8c5aaSvikram  * fields enclosed by brackets "[]" replaced with your own identifying
1725e8c5aaSvikram  * information: Portions Copyright [yyyy] [name of copyright owner]
1825e8c5aaSvikram  *
1925e8c5aaSvikram  * CDDL HEADER END
2025e8c5aaSvikram  */
2125e8c5aaSvikram /*
222eb07f5eSStephen Hanson  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
2325e8c5aaSvikram  */
2425e8c5aaSvikram 
2525e8c5aaSvikram #include <sys/mutex.h>
2625e8c5aaSvikram #include <sys/debug.h>
2725e8c5aaSvikram #include <sys/types.h>
2825e8c5aaSvikram #include <sys/param.h>
2925e8c5aaSvikram #include <sys/kmem.h>
3025e8c5aaSvikram #include <sys/thread.h>
3125e8c5aaSvikram #include <sys/id_space.h>
3225e8c5aaSvikram #include <sys/avl.h>
3325e8c5aaSvikram #include <sys/list.h>
3425e8c5aaSvikram #include <sys/sysmacros.h>
3525e8c5aaSvikram #include <sys/proc.h>
3625e8c5aaSvikram #include <sys/contract.h>
3725e8c5aaSvikram #include <sys/contract_impl.h>
3825e8c5aaSvikram #include <sys/contract/device.h>
3925e8c5aaSvikram #include <sys/contract/device_impl.h>
4025e8c5aaSvikram #include <sys/cmn_err.h>
4125e8c5aaSvikram #include <sys/nvpair.h>
4225e8c5aaSvikram #include <sys/policy.h>
4325e8c5aaSvikram #include <sys/ddi_impldefs.h>
4425e8c5aaSvikram #include <sys/ddi_implfuncs.h>
4525e8c5aaSvikram #include <sys/systm.h>
4625e8c5aaSvikram #include <sys/stat.h>
4725e8c5aaSvikram #include <sys/sunddi.h>
4825e8c5aaSvikram #include <sys/esunddi.h>
4925e8c5aaSvikram #include <sys/ddi.h>
5025e8c5aaSvikram #include <sys/fs/dv_node.h>
5125e8c5aaSvikram #include <sys/sunndi.h>
5225e8c5aaSvikram #undef ct_lock	/* needed because clnt.h defines ct_lock as a macro */
5325e8c5aaSvikram 
5425e8c5aaSvikram /*
5525e8c5aaSvikram  * Device Contracts
5625e8c5aaSvikram  * -----------------
5725e8c5aaSvikram  * This file contains the core code for the device contracts framework.
5825e8c5aaSvikram  * A device contract is an agreement or a contract between a process and
5925e8c5aaSvikram  * the kernel regarding the state of the device. A device contract may be
6025e8c5aaSvikram  * created when a relationship is formed between a device and a process
6125e8c5aaSvikram  * i.e. at open(2) time, or it may be created at some point after the device
6225e8c5aaSvikram  * has been opened. A device contract once formed may be broken by either party.
6325e8c5aaSvikram  * A device contract can be broken by the process by an explicit abandon of the
6425e8c5aaSvikram  * contract or by an implicit abandon when the process exits. A device contract
6525e8c5aaSvikram  * can be broken by the kernel either asynchronously (without negotiation) or
6625e8c5aaSvikram  * synchronously (with negotiation). Exactly which happens depends on the device
6725e8c5aaSvikram  * state transition. The following state diagram shows the transitions between
6825e8c5aaSvikram  * device states. Only device state transitions currently supported by device
6925e8c5aaSvikram  * contracts is shown.
7025e8c5aaSvikram  *
7125e8c5aaSvikram  *                              <-- A -->
7225e8c5aaSvikram  *                       /-----------------> DEGRADED
7325e8c5aaSvikram  *                       |                      |
7425e8c5aaSvikram  *                       |                      |
7525e8c5aaSvikram  *                       |                      | S
7625e8c5aaSvikram  *                       |                      | |
7725e8c5aaSvikram  *                       |                      | v
7825e8c5aaSvikram  *                       v       S -->          v
7925e8c5aaSvikram  *                      ONLINE ------------> OFFLINE
8025e8c5aaSvikram  *
8125e8c5aaSvikram  *
8225e8c5aaSvikram  * In the figure above, the arrows indicate the direction of transition. The
8325e8c5aaSvikram  * letter S refers to transitions which are inherently synchronous i.e.
8425e8c5aaSvikram  * require negotiation and the letter A indicates transitions which are
8525e8c5aaSvikram  * asynchronous i.e. are done without contract negotiations. A good example
8625e8c5aaSvikram  * of a synchronous transition is the ONLINE -> OFFLINE transition. This
8725e8c5aaSvikram  * transition cannot happen as long as there are consumers which have the
8825e8c5aaSvikram  * device open. Thus some form of negotiation needs to happen between the
8925e8c5aaSvikram  * consumers and the kernel to ensure that consumers either close devices
9025e8c5aaSvikram  * or disallow the move to OFFLINE. Certain other transitions such as
9125e8c5aaSvikram  * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
9225e8c5aaSvikram  * non-negotiable. A device that suffers a fault that degrades its
9325e8c5aaSvikram  * capabilities will become degraded irrespective of what consumers it has,
9425e8c5aaSvikram  * so a negotiation in this case is pointless.
9525e8c5aaSvikram  *
9625e8c5aaSvikram  * The following device states are currently defined for device contracts:
9725e8c5aaSvikram  *
9825e8c5aaSvikram  *      CT_DEV_EV_ONLINE
9925e8c5aaSvikram  *              The device is online and functioning normally
10025e8c5aaSvikram  *      CT_DEV_EV_DEGRADED
10125e8c5aaSvikram  *              The device is online but is functioning in a degraded capacity
10225e8c5aaSvikram  *      CT_DEV_EV_OFFLINE
10325e8c5aaSvikram  *              The device is offline and is no longer configured
10425e8c5aaSvikram  *
10525e8c5aaSvikram  * A typical consumer of device contracts starts out with a contract
10625e8c5aaSvikram  * template and adds terms to that template. These include the
10725e8c5aaSvikram  * "acceptable set" (A-set) term, which is a bitset of device states which
10825e8c5aaSvikram  * are guaranteed by the contract. If the device moves out of a state in
10925e8c5aaSvikram  * the A-set, the contract is broken. The breaking of the contract can
11025e8c5aaSvikram  * be asynchronous in which case a critical contract event is sent to the
11125e8c5aaSvikram  * contract holder but no negotiations take place. If the breaking of the
11225e8c5aaSvikram  * contract is synchronous, negotations are opened between the affected
11325e8c5aaSvikram  * consumer and the kernel. The kernel does this by sending a critical
11425e8c5aaSvikram  * event to the consumer with the CTE_NEG flag set indicating that this
11525e8c5aaSvikram  * is a negotiation event. The consumer can accept this change by sending
11625e8c5aaSvikram  * a ACK message to the kernel. Alternatively, if it has the necessary
11725e8c5aaSvikram  * privileges, it can send a NACK message to the kernel which will block
11825e8c5aaSvikram  * the device state change. To NACK a negotiable event, a process must
11925e8c5aaSvikram  * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
12025e8c5aaSvikram  *
12125e8c5aaSvikram  * Other terms include the "minor path" term, specified explicitly if the
12225e8c5aaSvikram  * contract is not being created at open(2) time or specified implicitly
12325e8c5aaSvikram  * if the contract is being created at open time via an activated template.
12425e8c5aaSvikram  *
12525e8c5aaSvikram  * A contract event is sent on any state change to which the contract
12625e8c5aaSvikram  * owner has subscribed via the informative or critical event sets. Only
12725e8c5aaSvikram  * critical events are guaranteed to be delivered. Since all device state
12825e8c5aaSvikram  * changes are controlled by the kernel and cannot be arbitrarily generated
12925e8c5aaSvikram  * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
13025e8c5aaSvikram  * need to be asserted in a process's effective set to designate an event as
13125e8c5aaSvikram  * critical. To ensure privacy, a process must either have the same effective
13225e8c5aaSvikram  * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
13325e8c5aaSvikram  * asserted in its effective set in order to observe device contract events
13425e8c5aaSvikram  * off the device contract type specific endpoint.
13525e8c5aaSvikram  *
13625e8c5aaSvikram  * Yet another term available with device contracts is the "non-negotiable"
13725e8c5aaSvikram  * term. This term is used to pre-specify a NACK to any contract negotiation.
13825e8c5aaSvikram  * This term is ignored for asynchronous state changes. For example, a
13925e8c5aaSvikram  * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
14025e8c5aaSvikram  * non-negotiable. In this case, the device contract framework assumes a
14125e8c5aaSvikram  * NACK for any transition to OFFLINE and blocks the offline. If the A-set
14225e8c5aaSvikram  * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
14325e8c5aaSvikram  * are NACKed but transitions to DEGRADE succeed.
14425e8c5aaSvikram  *
14525e8c5aaSvikram  * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
14625e8c5aaSvikram  * happens just before the I/O framework attempts to offline a device
14725e8c5aaSvikram  * (i.e. detach a device and set the offline flag so that it cannot be
14825e8c5aaSvikram  * reattached). A device contract holder is expected to either NACK the offline
14925e8c5aaSvikram  * (if privileged) or release the device and allow the offline to proceed.
15025e8c5aaSvikram  *
15125e8c5aaSvikram  * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
15225e8c5aaSvikram  * is generated just before the I/O framework transitions the device state
15325e8c5aaSvikram  * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
15425e8c5aaSvikram  *
15525e8c5aaSvikram  * The contract holder is expected to ACK or NACK a negotiation event
15625e8c5aaSvikram  * within a certain period of time. If the ACK/NACK is not received
15725e8c5aaSvikram  * within the timeout period, the device contract framework will behave
15825e8c5aaSvikram  * as if the contract does not exist and will proceed with the event.
15925e8c5aaSvikram  *
16025e8c5aaSvikram  * Unlike a process contract a device contract does not need to exist
16125e8c5aaSvikram  * once it is abandoned, since it does not define a fault boundary. It
16225e8c5aaSvikram  * merely represents an agreement between a process and the kernel
16325e8c5aaSvikram  * regarding the state of the device. Once the process has abandoned
16425e8c5aaSvikram  * the contract (either implicitly via a process exit or explicitly)
16525e8c5aaSvikram  * the kernel has no reason to retain the contract. As a result
16625e8c5aaSvikram  * device contracts are neither inheritable nor need to exist in an
16725e8c5aaSvikram  * orphan state.
16825e8c5aaSvikram  *
16925e8c5aaSvikram  * A device unlike a process may exist in multiple contracts and has
17025e8c5aaSvikram  * a "life" outside a device contract. A device unlike a process
17125e8c5aaSvikram  * may exist without an associated contract. Unlike a process contract
17225e8c5aaSvikram  * a device contract may be formed after a binding relationship is
17325e8c5aaSvikram  * formed between a process and a device.
17425e8c5aaSvikram  *
17525e8c5aaSvikram  *	IMPLEMENTATION NOTES
17625e8c5aaSvikram  *	====================
17725e8c5aaSvikram  * DATA STRUCTURES
17825e8c5aaSvikram  * ----------------
17925e8c5aaSvikram  * 	The heart of the device contracts implementation is the device contract
18025e8c5aaSvikram  * 	private cont_device_t (or ctd for short) data structure. It encapsulates
18125e8c5aaSvikram  * 	the generic contract_t data structure and has a number of private
18225e8c5aaSvikram  *	fields.
18325e8c5aaSvikram  * 	These include:
18425e8c5aaSvikram  *		cond_minor: The minor device that is the subject of the contract
18525e8c5aaSvikram  *		cond_aset:  The bitset of states which are guaranteed by the
18625e8c5aaSvikram  *			   contract
18725e8c5aaSvikram  *		cond_noneg: If set, indicates that the result of negotiation has
18825e8c5aaSvikram  *			    been predefined to be a NACK
18925e8c5aaSvikram  * 	In addition, there are other device identifiers such the devinfo node,
19025e8c5aaSvikram  * 	dev_t and spec_type of the minor node. There are also a few fields that
19125e8c5aaSvikram  * 	are used during negotiation to maintain state. See
19225e8c5aaSvikram  *		uts/common/sys/contract/device_impl.h
19325e8c5aaSvikram  * 	for details.
19425e8c5aaSvikram  * 	The ctd structure represents the device private part of a contract of
19525e8c5aaSvikram  * 	type "device"
19625e8c5aaSvikram  *
19725e8c5aaSvikram  * 	Another data structure used by device contracts is ctmpl_device. It is
19825e8c5aaSvikram  * 	the device contracts private part of the contract template structure. It
19925e8c5aaSvikram  *	encapsulates the generic template structure "ct_template_t" and includes
20025e8c5aaSvikram  *	the following device contract specific fields
20125e8c5aaSvikram  *		ctd_aset:   The bitset of states that should be guaranteed by a
20225e8c5aaSvikram  *			    contract
20325e8c5aaSvikram  *		ctd_noneg:  If set, indicates that contract should NACK a
20425e8c5aaSvikram  *			    negotiation
20525e8c5aaSvikram  *		ctd_minor:  The devfs_path (without the /devices prefix) of the
20625e8c5aaSvikram  *			    minor node that is the subject of the contract.
20725e8c5aaSvikram  *
20825e8c5aaSvikram  * ALGORITHMS
20925e8c5aaSvikram  * ---------
21025e8c5aaSvikram  * There are three sets of routines in this file
21125e8c5aaSvikram  * 	Template related routines
21225e8c5aaSvikram  * 	-------------------------
21325e8c5aaSvikram  *	These routines provide support for template related operations initated
21425e8c5aaSvikram  *	via the generic template operations. These include routines that dup
21525e8c5aaSvikram  *	a template, free it, and set various terms in the template
21625e8c5aaSvikram  *	(such as the minor node path, the acceptable state set (or A-set)
21725e8c5aaSvikram  *	and the non-negotiable term) as well as a routine to query the
21825e8c5aaSvikram  *	device specific portion of the template for the abovementioned terms.
21925e8c5aaSvikram  *	There is also a routine to create (ctmpl_device_create) that is used to
22025e8c5aaSvikram  *	create a contract from a template. This routine calls (after initial
22125e8c5aaSvikram  *	setup) the common function used to create a device contract
22225e8c5aaSvikram  *	(contract_device_create).
22325e8c5aaSvikram  *
22425e8c5aaSvikram  *	core device contract implementation
22525e8c5aaSvikram  *	----------------------------------
22625e8c5aaSvikram  *	These routines support the generic contract framework to provide
22725e8c5aaSvikram  *	functionality that allows contracts to be created, managed and
22825e8c5aaSvikram  *	destroyed. The contract_device_create() routine is a routine used
22925e8c5aaSvikram  *	to create a contract from a template (either via an explicit create
23025e8c5aaSvikram  *	operation on a template or implicitly via an open with an
23125e8c5aaSvikram  *	activated template.). The contract_device_free() routine assists
23225e8c5aaSvikram  *	in freeing the device contract specific parts. There are routines
23325e8c5aaSvikram  *	used to abandon (contract_device_abandon) a device contract as well
23425e8c5aaSvikram  *	as a routine to destroy (which despite its name does not destroy,
23525e8c5aaSvikram  *	it only moves a contract to a dead state) a contract.
23625e8c5aaSvikram  *	There is also a routine to return status information about a
23725e8c5aaSvikram  *	contract - the level of detail depends on what is requested by the
23825e8c5aaSvikram  *	user. A value of CTD_FIXED only returns fixed length fields such
23925e8c5aaSvikram  *	as the A-set, state of device and value of the "noneg" term. If
24025e8c5aaSvikram  *	CTD_ALL is specified, the minor node path is returned as well.
24125e8c5aaSvikram  *
24225e8c5aaSvikram  *	In addition there are interfaces (contract_device_ack/nack) which
24325e8c5aaSvikram  *	are used to support negotiation between userland processes and
24425e8c5aaSvikram  *	device contracts. These interfaces record the acknowledgement
24525e8c5aaSvikram  *	or lack thereof for negotiation events and help determine if the
24625e8c5aaSvikram  *	negotiated event should occur.
24725e8c5aaSvikram  *
24825e8c5aaSvikram  *	"backend routines"
24925e8c5aaSvikram  *	-----------------
25025e8c5aaSvikram  *	The backend routines form the interface between the I/O framework
25125e8c5aaSvikram  *	and the device contract subsystem. These routines, allow the I/O
25225e8c5aaSvikram  *	framework to call into the device contract subsystem to notify it of
25325e8c5aaSvikram  *	impending changes to a device state as well as to inform of the
25425e8c5aaSvikram  *	final disposition of such attempted state changes. Routines in this
25525e8c5aaSvikram  *	class include contract_device_offline() that indicates an attempt to
25625e8c5aaSvikram  *	offline a device, contract_device_degrade() that indicates that
25725e8c5aaSvikram  *	a device is moving to the degraded state and contract_device_negend()
25825e8c5aaSvikram  *	that is used by the I/O framework to inform the contracts subsystem of
25925e8c5aaSvikram  *	the final disposition of an attempted operation.
26025e8c5aaSvikram  *
26125e8c5aaSvikram  *	SUMMARY
26225e8c5aaSvikram  *	-------
26325e8c5aaSvikram  *      A contract starts its life as a template. A process allocates a device
26425e8c5aaSvikram  *	contract template and sets various terms:
26525e8c5aaSvikram  *		The A-set
26625e8c5aaSvikram  *		The device minor node
26725e8c5aaSvikram  *		Critical and informative events
26825e8c5aaSvikram  *		The noneg i.e. no negotition term
26925e8c5aaSvikram  *	Setting of these terms in the template is done via the
27025e8c5aaSvikram  *	ctmpl_device_set() entry point in this file. A process can query a
27125e8c5aaSvikram  *	template to determine the terms already set in the template - this is
27225e8c5aaSvikram  *	facilitated by the ctmpl_device_get() routine.
27325e8c5aaSvikram  *
27425e8c5aaSvikram  *	Once all the appropriate terms are set, the contract is instantiated via
27525e8c5aaSvikram  *	one of two methods
27625e8c5aaSvikram  *	- via an explicit create operation - this is facilitated by the
27725e8c5aaSvikram  *	  ctmpl_device_create() entry point
27825e8c5aaSvikram  *	- synchronously with the open(2) system call - this is achieved via the
27925e8c5aaSvikram  *	  contract_device_open() routine.
28025e8c5aaSvikram  *	The core work for both these above functions is done by
28125e8c5aaSvikram  *	contract_device_create()
28225e8c5aaSvikram  *
28325e8c5aaSvikram  *	A contract once created can be queried for its status. Support for
28425e8c5aaSvikram  *	status info is provided by both the common contracts framework and by
28525e8c5aaSvikram  *	the "device" contract type. If the level of detail requested is
28625e8c5aaSvikram  *	CTD_COMMON, only the common contract framework data is used. Higher
28725e8c5aaSvikram  *	levels of detail result in calls to contract_device_status() to supply
28825e8c5aaSvikram  *	device contract type specific status information.
28925e8c5aaSvikram  *
29025e8c5aaSvikram  *	A contract once created may be abandoned either explicitly or implictly.
29125e8c5aaSvikram  *	In either case, the contract_device_abandon() function is invoked. This
29225e8c5aaSvikram  * 	function merely calls contract_destroy() which moves the contract to
29325e8c5aaSvikram  *	the DEAD state. The device contract portion of destroy processing is
29425e8c5aaSvikram  *	provided by contract_device_destroy() which merely disassociates the
29525e8c5aaSvikram  *	contract from its device devinfo node. A contract in the DEAD state is
29625e8c5aaSvikram  *	not freed. It hanbgs around until all references to the contract are
29725e8c5aaSvikram  *	gone. When that happens, the contract is finally deallocated. The
29825e8c5aaSvikram  *	device contract specific portion of the free is done by
29925e8c5aaSvikram  *	contract_device_free() which finally frees the device contract specific
30025e8c5aaSvikram  *	data structure (cont_device_t).
30125e8c5aaSvikram  *
30225e8c5aaSvikram  *	When a device undergoes a state change, the I/O framework calls the
30325e8c5aaSvikram  *	corresponding device contract entry point. For example, when a device
30425e8c5aaSvikram  *	is about to go OFFLINE, the routine contract_device_offline() is
30525e8c5aaSvikram  *	invoked. Similarly if a device moves to DEGRADED state, the routine
30625e8c5aaSvikram  *	contract_device_degrade() function is called. These functions call the
30725e8c5aaSvikram  *	core routine contract_device_publish(). This function determines via
30825e8c5aaSvikram  *	the function is_sync_neg() whether an event is a synchronous (i.e.
30925e8c5aaSvikram  *	negotiable) event or not. In the former case contract_device_publish()
31025e8c5aaSvikram  *	publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
31125e8c5aaSvikram  *	and/or NACKs from contract holders. In the latter case, it simply
31225e8c5aaSvikram  *	publishes the event and does not wait. In the negotiation case, ACKs or
31325e8c5aaSvikram  *	NACKs from userland consumers results in contract_device_ack_nack()
31425e8c5aaSvikram  *	being called where the result of the negotiation is recorded in the
31525e8c5aaSvikram  *	contract data structure. Once all outstanding contract owners have
31625e8c5aaSvikram  *	responded, the device contract code in wait_for_acks() determines the
31725e8c5aaSvikram  *	final result of the negotiation. A single NACK overrides all other ACKs
31825e8c5aaSvikram  *	If there is no NACK, then a single ACK will result in an overall ACK
31925e8c5aaSvikram  *	result. If there are no ACKs or NACKs, then the result CT_NONE is
32025e8c5aaSvikram  *	returned back to the I/O framework. Once the event is permitted or
32125e8c5aaSvikram  *	blocked, the I/O framework proceeds or aborts the state change. The
32225e8c5aaSvikram  *	I/O framework then calls contract_device_negend() with a result code
32325e8c5aaSvikram  *	indicating final disposition of the event. This call releases the
32425e8c5aaSvikram  *	barrier and other state associated with the previous negotiation,
32525e8c5aaSvikram  *	which permits the next event (if any) to come into the device contract
32625e8c5aaSvikram  *	framework.
32725e8c5aaSvikram  *
32825e8c5aaSvikram  *	Finally, a device that has outstanding contracts may be removed from
32925e8c5aaSvikram  *	the system which results in its devinfo node being freed. The devinfo
33025e8c5aaSvikram  *	free routine in the I/O framework, calls into the device contract
33125e8c5aaSvikram  *	function - contract_device_remove_dip(). This routine, disassociates
33225e8c5aaSvikram  *	the dip from all contracts associated with the contract being freed,
33325e8c5aaSvikram  *	allowing the devinfo node to be freed.
33425e8c5aaSvikram  *
33525e8c5aaSvikram  * LOCKING
33625e8c5aaSvikram  * ---------
33725e8c5aaSvikram  * 	There are four sets of data that need to be protected by locks
33825e8c5aaSvikram  *
33925e8c5aaSvikram  *	i) device contract specific portion of the contract template - This data
34025e8c5aaSvikram  *	is protected by the template lock ctmpl_lock.
34125e8c5aaSvikram  *
34225e8c5aaSvikram  *	ii) device contract specific portion of the contract - This data is
34325e8c5aaSvikram  *	protected by the contract lock ct_lock
34425e8c5aaSvikram  *
34525e8c5aaSvikram  *	iii) The linked list of contracts hanging off a devinfo node - This
34625e8c5aaSvikram  *	list is protected by the per-devinfo node lock devi_ct_lock
34725e8c5aaSvikram  *
34825e8c5aaSvikram  *	iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
34925e8c5aaSvikram  *	and devi_ct_count that controls state changes to a dip
35025e8c5aaSvikram  *
35125e8c5aaSvikram  *	The template lock is independent in that none of the other locks in this
35225e8c5aaSvikram  *	file may be taken while holding the template lock (and vice versa).
35325e8c5aaSvikram  *
35425e8c5aaSvikram  *	The remaining three locks have the following lock order
35525e8c5aaSvikram  *
35625e8c5aaSvikram  *	devi_ct_lock  -> ct_count barrier ->  ct_lock
35725e8c5aaSvikram  *
35825e8c5aaSvikram  */
35925e8c5aaSvikram 
36025e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
36125e8c5aaSvikram     int spec_type, proc_t *owner, int *errorp);
36225e8c5aaSvikram 
36325e8c5aaSvikram /* barrier routines */
36425e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip);
36525e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip);
36625e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip);
36725e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip);
36825e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip);
36925e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
37025e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip);
37125e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip);
37225e8c5aaSvikram 
37325e8c5aaSvikram ct_type_t *device_type;
37425e8c5aaSvikram 
37525e8c5aaSvikram /*
37625e8c5aaSvikram  * Macro predicates for determining when events should be sent and how.
37725e8c5aaSvikram  */
37825e8c5aaSvikram #define	EVSENDP(ctd, flag) \
37925e8c5aaSvikram 	((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
38025e8c5aaSvikram 
38125e8c5aaSvikram #define	EVINFOP(ctd, flag) \
38225e8c5aaSvikram 	((ctd->cond_contract.ct_ev_crit & flag) == 0)
38325e8c5aaSvikram 
38425e8c5aaSvikram /*
38525e8c5aaSvikram  * State transition table showing which transitions are synchronous and which
38625e8c5aaSvikram  * are not.
38725e8c5aaSvikram  */
38825e8c5aaSvikram struct ct_dev_negtable {
38925e8c5aaSvikram 	uint_t	st_old;
39025e8c5aaSvikram 	uint_t	st_new;
39125e8c5aaSvikram 	uint_t	st_neg;
39225e8c5aaSvikram } ct_dev_negtable[] = {
39325e8c5aaSvikram 	{CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE,	1},
39425e8c5aaSvikram 	{CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED,	0},
39525e8c5aaSvikram 	{CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE,	0},
39625e8c5aaSvikram 	{CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE,	1},
39725e8c5aaSvikram 	{0}
39825e8c5aaSvikram };
39925e8c5aaSvikram 
40025e8c5aaSvikram /*
40125e8c5aaSvikram  * Device contract template implementation
40225e8c5aaSvikram  */
40325e8c5aaSvikram 
40425e8c5aaSvikram /*
40525e8c5aaSvikram  * ctmpl_device_dup
40625e8c5aaSvikram  *
40725e8c5aaSvikram  * The device contract template dup entry point.
40825e8c5aaSvikram  * This simply copies all the fields (generic as well as device contract
40925e8c5aaSvikram  * specific) fields of the original.
41025e8c5aaSvikram  */
41125e8c5aaSvikram static struct ct_template *
41225e8c5aaSvikram ctmpl_device_dup(struct ct_template *template)
41325e8c5aaSvikram {
41425e8c5aaSvikram 	ctmpl_device_t *new;
41525e8c5aaSvikram 	ctmpl_device_t *old = template->ctmpl_data;
41625e8c5aaSvikram 	char *buf;
41725e8c5aaSvikram 	char *minor;
41825e8c5aaSvikram 
41925e8c5aaSvikram 	new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
42025e8c5aaSvikram 	buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
42125e8c5aaSvikram 
42225e8c5aaSvikram 	/*
42325e8c5aaSvikram 	 * copy generic fields.
42425e8c5aaSvikram 	 * ctmpl_copy returns with old template lock held
42525e8c5aaSvikram 	 */
42625e8c5aaSvikram 	ctmpl_copy(&new->ctd_ctmpl, template);
42725e8c5aaSvikram 
42825e8c5aaSvikram 	new->ctd_ctmpl.ctmpl_data = new;
42925e8c5aaSvikram 	new->ctd_aset = old->ctd_aset;
43025e8c5aaSvikram 	new->ctd_minor = NULL;
43125e8c5aaSvikram 	new->ctd_noneg = old->ctd_noneg;
43225e8c5aaSvikram 
43325e8c5aaSvikram 	if (old->ctd_minor) {
43425e8c5aaSvikram 		ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
43525e8c5aaSvikram 		bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
43625e8c5aaSvikram 	} else {
43725e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
43825e8c5aaSvikram 		buf = NULL;
43925e8c5aaSvikram 	}
44025e8c5aaSvikram 
44125e8c5aaSvikram 	mutex_exit(&template->ctmpl_lock);
44225e8c5aaSvikram 	if (buf) {
44325e8c5aaSvikram 		minor = i_ddi_strdup(buf, KM_SLEEP);
44425e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
44525e8c5aaSvikram 		buf = NULL;
44625e8c5aaSvikram 	} else {
44725e8c5aaSvikram 		minor = NULL;
44825e8c5aaSvikram 	}
44925e8c5aaSvikram 	mutex_enter(&template->ctmpl_lock);
45025e8c5aaSvikram 
45125e8c5aaSvikram 	if (minor) {
45225e8c5aaSvikram 		new->ctd_minor = minor;
45325e8c5aaSvikram 	}
45425e8c5aaSvikram 
45525e8c5aaSvikram 	ASSERT(buf == NULL);
45625e8c5aaSvikram 	return (&new->ctd_ctmpl);
45725e8c5aaSvikram }
45825e8c5aaSvikram 
45925e8c5aaSvikram /*
46025e8c5aaSvikram  * ctmpl_device_free
46125e8c5aaSvikram  *
46225e8c5aaSvikram  * The device contract template free entry point.  Just
46325e8c5aaSvikram  * frees the template.
46425e8c5aaSvikram  */
46525e8c5aaSvikram static void
46625e8c5aaSvikram ctmpl_device_free(struct ct_template *template)
46725e8c5aaSvikram {
46825e8c5aaSvikram 	ctmpl_device_t *dtmpl = template->ctmpl_data;
46925e8c5aaSvikram 
47025e8c5aaSvikram 	if (dtmpl->ctd_minor)
47125e8c5aaSvikram 		kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
47225e8c5aaSvikram 
47325e8c5aaSvikram 	kmem_free(dtmpl, sizeof (ctmpl_device_t));
47425e8c5aaSvikram }
47525e8c5aaSvikram 
47625e8c5aaSvikram /*
47725e8c5aaSvikram  * SAFE_EV is the set of events which a non-privileged process is
47825e8c5aaSvikram  * allowed to make critical. An unprivileged device contract owner has
47925e8c5aaSvikram  * no control over when a device changes state, so all device events
48025e8c5aaSvikram  * can be in the critical set.
48125e8c5aaSvikram  *
48225e8c5aaSvikram  * EXCESS tells us if "value", a critical event set, requires
48325e8c5aaSvikram  * additional privilege. For device contracts EXCESS currently
48425e8c5aaSvikram  * evaluates to 0.
48525e8c5aaSvikram  */
48625e8c5aaSvikram #define	SAFE_EV		(CT_DEV_ALLEVENT)
48725e8c5aaSvikram #define	EXCESS(value)	((value) & ~SAFE_EV)
48825e8c5aaSvikram 
48925e8c5aaSvikram 
49025e8c5aaSvikram /*
49125e8c5aaSvikram  * ctmpl_device_set
49225e8c5aaSvikram  *
49325e8c5aaSvikram  * The device contract template set entry point. Sets various terms in the
49425e8c5aaSvikram  * template. The non-negotiable  term can only be set if the process has
49525e8c5aaSvikram  * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
49625e8c5aaSvikram  */
49725e8c5aaSvikram static int
498c5a9a4fcSAntonello Cruz ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam,
499c5a9a4fcSAntonello Cruz     const cred_t *cr)
50025e8c5aaSvikram {
50125e8c5aaSvikram 	ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
502c5a9a4fcSAntonello Cruz 	ct_param_t *param = &kparam->param;
50325e8c5aaSvikram 	int error;
50425e8c5aaSvikram 	dev_info_t *dip;
50525e8c5aaSvikram 	int spec_type;
5067b209c2cSacruz 	uint64_t param_value;
5077b209c2cSacruz 	char *str_value;
50825e8c5aaSvikram 
50925e8c5aaSvikram 	ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
51025e8c5aaSvikram 
5117b209c2cSacruz 	if (param->ctpm_id == CTDP_MINOR) {
512c5a9a4fcSAntonello Cruz 		str_value = (char *)kparam->ctpm_kbuf;
5137b209c2cSacruz 		str_value[param->ctpm_size - 1] = '\0';
5147b209c2cSacruz 	} else {
515d170b13aSacruz 		if (param->ctpm_size < sizeof (uint64_t))
516d170b13aSacruz 			return (EINVAL);
517c5a9a4fcSAntonello Cruz 		param_value = *(uint64_t *)kparam->ctpm_kbuf;
5187b209c2cSacruz 	}
5197b209c2cSacruz 
52025e8c5aaSvikram 	switch (param->ctpm_id) {
52125e8c5aaSvikram 	case CTDP_ACCEPT:
5227b209c2cSacruz 		if (param_value & ~CT_DEV_ALLEVENT)
52325e8c5aaSvikram 			return (EINVAL);
5247b209c2cSacruz 		if (param_value == 0)
52525e8c5aaSvikram 			return (EINVAL);
5267b209c2cSacruz 		if (param_value == CT_DEV_ALLEVENT)
52725e8c5aaSvikram 			return (EINVAL);
52825e8c5aaSvikram 
5297b209c2cSacruz 		dtmpl->ctd_aset = param_value;
53025e8c5aaSvikram 		break;
53125e8c5aaSvikram 	case CTDP_NONEG:
5327b209c2cSacruz 		if (param_value != CTDP_NONEG_SET &&
5337b209c2cSacruz 		    param_value != CTDP_NONEG_CLEAR)
53425e8c5aaSvikram 			return (EINVAL);
53525e8c5aaSvikram 
53625e8c5aaSvikram 		/*
53725e8c5aaSvikram 		 * only privileged processes can designate a contract
53825e8c5aaSvikram 		 * non-negotiatble.
53925e8c5aaSvikram 		 */
5407b209c2cSacruz 		if (param_value == CTDP_NONEG_SET &&
54125e8c5aaSvikram 		    (error = secpolicy_sys_devices(cr)) != 0) {
54225e8c5aaSvikram 			return (error);
54325e8c5aaSvikram 		}
54425e8c5aaSvikram 
5457b209c2cSacruz 		dtmpl->ctd_noneg = param_value;
54625e8c5aaSvikram 		break;
54725e8c5aaSvikram 
54825e8c5aaSvikram 	case CTDP_MINOR:
5497b209c2cSacruz 		if (*str_value != '/' ||
5507b209c2cSacruz 		    strncmp(str_value, "/devices/",
5517b209c2cSacruz 		    strlen("/devices/")) == 0 ||
5527b209c2cSacruz 		    strstr(str_value, "../devices/") != NULL ||
5537b209c2cSacruz 		    strchr(str_value, ':') == NULL) {
55425e8c5aaSvikram 			return (EINVAL);
55525e8c5aaSvikram 		}
55625e8c5aaSvikram 
55725e8c5aaSvikram 		spec_type = 0;
55825e8c5aaSvikram 		dip = NULL;
5597b209c2cSacruz 		if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) {
56025e8c5aaSvikram 			return (ERANGE);
56125e8c5aaSvikram 		}
56225e8c5aaSvikram 		ddi_release_devi(dip);
56325e8c5aaSvikram 
56425e8c5aaSvikram 		if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
56525e8c5aaSvikram 			return (EINVAL);
56625e8c5aaSvikram 		}
56725e8c5aaSvikram 
56825e8c5aaSvikram 		if (dtmpl->ctd_minor != NULL) {
56925e8c5aaSvikram 			kmem_free(dtmpl->ctd_minor,
57025e8c5aaSvikram 			    strlen(dtmpl->ctd_minor) + 1);
57125e8c5aaSvikram 		}
5727b209c2cSacruz 		dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP);
57325e8c5aaSvikram 		break;
57425e8c5aaSvikram 	case CTP_EV_CRITICAL:
57525e8c5aaSvikram 		/*
57625e8c5aaSvikram 		 * Currently for device contracts, any event
57725e8c5aaSvikram 		 * may be added to the critical set. We retain the
57825e8c5aaSvikram 		 * following code however for future enhancements.
57925e8c5aaSvikram 		 */
5807b209c2cSacruz 		if (EXCESS(param_value) &&
58125e8c5aaSvikram 		    (error = secpolicy_contract_event(cr)) != 0)
58225e8c5aaSvikram 			return (error);
5837b209c2cSacruz 		tmpl->ctmpl_ev_crit = param_value;
58425e8c5aaSvikram 		break;
58525e8c5aaSvikram 	default:
58625e8c5aaSvikram 		return (EINVAL);
58725e8c5aaSvikram 	}
58825e8c5aaSvikram 
58925e8c5aaSvikram 	return (0);
59025e8c5aaSvikram }
59125e8c5aaSvikram 
59225e8c5aaSvikram /*
59325e8c5aaSvikram  * ctmpl_device_get
59425e8c5aaSvikram  *
59525e8c5aaSvikram  * The device contract template get entry point.  Simply fetches and
59625e8c5aaSvikram  * returns the value of the requested term.
59725e8c5aaSvikram  */
59825e8c5aaSvikram static int
599c5a9a4fcSAntonello Cruz ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam)
60025e8c5aaSvikram {
60125e8c5aaSvikram 	ctmpl_device_t *dtmpl = template->ctmpl_data;
602c5a9a4fcSAntonello Cruz 	ct_param_t *param = &kparam->param;
603c5a9a4fcSAntonello Cruz 	uint64_t *param_value = kparam->ctpm_kbuf;
60425e8c5aaSvikram 
60525e8c5aaSvikram 	ASSERT(MUTEX_HELD(&template->ctmpl_lock));
60625e8c5aaSvikram 
607d170b13aSacruz 	if (param->ctpm_id == CTDP_ACCEPT ||
608d170b13aSacruz 	    param->ctpm_id == CTDP_NONEG) {
609d170b13aSacruz 		if (param->ctpm_size < sizeof (uint64_t))
610d170b13aSacruz 			return (EINVAL);
611c5a9a4fcSAntonello Cruz 		kparam->ret_size = sizeof (uint64_t);
612d170b13aSacruz 	}
613d170b13aSacruz 
61425e8c5aaSvikram 	switch (param->ctpm_id) {
61525e8c5aaSvikram 	case CTDP_ACCEPT:
6167b209c2cSacruz 		*param_value = dtmpl->ctd_aset;
61725e8c5aaSvikram 		break;
61825e8c5aaSvikram 	case CTDP_NONEG:
6197b209c2cSacruz 		*param_value = dtmpl->ctd_noneg;
62025e8c5aaSvikram 		break;
62125e8c5aaSvikram 	case CTDP_MINOR:
62225e8c5aaSvikram 		if (dtmpl->ctd_minor) {
623c5a9a4fcSAntonello Cruz 			kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf,
6247b209c2cSacruz 			    dtmpl->ctd_minor, param->ctpm_size);
625c5a9a4fcSAntonello Cruz 			kparam->ret_size++;
62625e8c5aaSvikram 		} else {
62725e8c5aaSvikram 			return (ENOENT);
62825e8c5aaSvikram 		}
62925e8c5aaSvikram 		break;
63025e8c5aaSvikram 	default:
63125e8c5aaSvikram 		return (EINVAL);
63225e8c5aaSvikram 	}
63325e8c5aaSvikram 
63425e8c5aaSvikram 	return (0);
63525e8c5aaSvikram }
63625e8c5aaSvikram 
63725e8c5aaSvikram /*
63825e8c5aaSvikram  * Device contract type specific portion of creating a contract using
63925e8c5aaSvikram  * a specified template
64025e8c5aaSvikram  */
64125e8c5aaSvikram /*ARGSUSED*/
64225e8c5aaSvikram int
64325e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
64425e8c5aaSvikram {
64525e8c5aaSvikram 	ctmpl_device_t *dtmpl;
64625e8c5aaSvikram 	char *buf;
64725e8c5aaSvikram 	dev_t dev;
64825e8c5aaSvikram 	int spec_type;
64925e8c5aaSvikram 	int error;
65025e8c5aaSvikram 	cont_device_t *ctd;
65125e8c5aaSvikram 
65225e8c5aaSvikram 	if (ctidp == NULL)
65325e8c5aaSvikram 		return (EINVAL);
65425e8c5aaSvikram 
65525e8c5aaSvikram 	buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
65625e8c5aaSvikram 
65725e8c5aaSvikram 	dtmpl = template->ctmpl_data;
65825e8c5aaSvikram 
65925e8c5aaSvikram 	mutex_enter(&template->ctmpl_lock);
66025e8c5aaSvikram 	if (dtmpl->ctd_minor == NULL) {
66125e8c5aaSvikram 		/* incomplete template */
66225e8c5aaSvikram 		mutex_exit(&template->ctmpl_lock);
66325e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
66425e8c5aaSvikram 		return (EINVAL);
66525e8c5aaSvikram 	} else {
66625e8c5aaSvikram 		ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
66725e8c5aaSvikram 		bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
66825e8c5aaSvikram 	}
66925e8c5aaSvikram 	mutex_exit(&template->ctmpl_lock);
67025e8c5aaSvikram 
67125e8c5aaSvikram 	spec_type = 0;
67225e8c5aaSvikram 	dev = NODEV;
67325e8c5aaSvikram 	if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
67425e8c5aaSvikram 	    dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
67525e8c5aaSvikram 	    (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
67625e8c5aaSvikram 		CT_DEBUG((CE_WARN,
67725e8c5aaSvikram 		    "tmpl_create: failed to find device: %s", buf));
67825e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
67925e8c5aaSvikram 		return (ERANGE);
68025e8c5aaSvikram 	}
68125e8c5aaSvikram 	kmem_free(buf, MAXPATHLEN);
68225e8c5aaSvikram 
68325e8c5aaSvikram 	ctd = contract_device_create(template->ctmpl_data,
68425e8c5aaSvikram 	    dev, spec_type, curproc, &error);
68525e8c5aaSvikram 
68625e8c5aaSvikram 	if (ctd == NULL) {
68725e8c5aaSvikram 		CT_DEBUG((CE_WARN, "Failed to create device contract for "
68825e8c5aaSvikram 		    "process (%d) with device (devt = %lu, spec_type = %s)",
68925e8c5aaSvikram 		    curproc->p_pid, dev,
69025e8c5aaSvikram 		    spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
69125e8c5aaSvikram 		return (error);
69225e8c5aaSvikram 	}
69325e8c5aaSvikram 
69425e8c5aaSvikram 	mutex_enter(&ctd->cond_contract.ct_lock);
69525e8c5aaSvikram 	*ctidp = ctd->cond_contract.ct_id;
69625e8c5aaSvikram 	mutex_exit(&ctd->cond_contract.ct_lock);
69725e8c5aaSvikram 
69825e8c5aaSvikram 	return (0);
69925e8c5aaSvikram }
70025e8c5aaSvikram 
70125e8c5aaSvikram /*
70225e8c5aaSvikram  * Device contract specific template entry points
70325e8c5aaSvikram  */
70425e8c5aaSvikram static ctmplops_t ctmpl_device_ops = {
70525e8c5aaSvikram 	ctmpl_device_dup,		/* ctop_dup */
70625e8c5aaSvikram 	ctmpl_device_free,		/* ctop_free */
70725e8c5aaSvikram 	ctmpl_device_set,		/* ctop_set */
70825e8c5aaSvikram 	ctmpl_device_get,		/* ctop_get */
70925e8c5aaSvikram 	ctmpl_device_create,		/* ctop_create */
71025e8c5aaSvikram 	CT_DEV_ALLEVENT			/* all device events bitmask */
71125e8c5aaSvikram };
71225e8c5aaSvikram 
71325e8c5aaSvikram 
71425e8c5aaSvikram /*
71525e8c5aaSvikram  * Device contract implementation
71625e8c5aaSvikram  */
71725e8c5aaSvikram 
71825e8c5aaSvikram /*
71925e8c5aaSvikram  * contract_device_default
72025e8c5aaSvikram  *
72125e8c5aaSvikram  * The device contract default template entry point.  Creates a
72225e8c5aaSvikram  * device contract template with a default A-set and no "noneg" ,
72325e8c5aaSvikram  * with informative degrade events and critical offline events.
72425e8c5aaSvikram  * There is no default minor path.
72525e8c5aaSvikram  */
72625e8c5aaSvikram static ct_template_t *
72725e8c5aaSvikram contract_device_default(void)
72825e8c5aaSvikram {
72925e8c5aaSvikram 	ctmpl_device_t *new;
73025e8c5aaSvikram 
73125e8c5aaSvikram 	new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
73225e8c5aaSvikram 	ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
73325e8c5aaSvikram 
73425e8c5aaSvikram 	new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
73525e8c5aaSvikram 	new->ctd_noneg = 0;
73625e8c5aaSvikram 	new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
73725e8c5aaSvikram 	new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
73825e8c5aaSvikram 
73925e8c5aaSvikram 	return (&new->ctd_ctmpl);
74025e8c5aaSvikram }
74125e8c5aaSvikram 
74225e8c5aaSvikram /*
74325e8c5aaSvikram  * contract_device_free
74425e8c5aaSvikram  *
74525e8c5aaSvikram  * Destroys the device contract specific portion of a contract and
74625e8c5aaSvikram  * frees the contract.
74725e8c5aaSvikram  */
74825e8c5aaSvikram static void
74925e8c5aaSvikram contract_device_free(contract_t *ct)
75025e8c5aaSvikram {
75125e8c5aaSvikram 	cont_device_t *ctd = ct->ct_data;
75225e8c5aaSvikram 
75325e8c5aaSvikram 	ASSERT(ctd->cond_minor);
75425e8c5aaSvikram 	ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
75525e8c5aaSvikram 	kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
75625e8c5aaSvikram 
75725e8c5aaSvikram 	ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
75825e8c5aaSvikram 	    ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
75925e8c5aaSvikram 
76025e8c5aaSvikram 	ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
76125e8c5aaSvikram 
76225e8c5aaSvikram 	ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
76325e8c5aaSvikram 	ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
76425e8c5aaSvikram 
76525e8c5aaSvikram 	ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
76625e8c5aaSvikram 	ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
76725e8c5aaSvikram 
76825e8c5aaSvikram 	ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
76925e8c5aaSvikram 	ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
77025e8c5aaSvikram 
77125e8c5aaSvikram 	ASSERT(!list_link_active(&ctd->cond_next));
77225e8c5aaSvikram 
77325e8c5aaSvikram 	kmem_free(ctd, sizeof (cont_device_t));
77425e8c5aaSvikram }
77525e8c5aaSvikram 
77625e8c5aaSvikram /*
77725e8c5aaSvikram  * contract_device_abandon
77825e8c5aaSvikram  *
77925e8c5aaSvikram  * The device contract abandon entry point.
78025e8c5aaSvikram  */
78125e8c5aaSvikram static void
78225e8c5aaSvikram contract_device_abandon(contract_t *ct)
78325e8c5aaSvikram {
78425e8c5aaSvikram 	ASSERT(MUTEX_HELD(&ct->ct_lock));
78525e8c5aaSvikram 
78625e8c5aaSvikram 	/*
78725e8c5aaSvikram 	 * device contracts cannot be inherited or orphaned.
78825e8c5aaSvikram 	 * Move the contract to the DEAD_STATE. It will be freed
78925e8c5aaSvikram 	 * once all references to it are gone.
79025e8c5aaSvikram 	 */
79125e8c5aaSvikram 	contract_destroy(ct);
79225e8c5aaSvikram }
79325e8c5aaSvikram 
79425e8c5aaSvikram /*
79525e8c5aaSvikram  * contract_device_destroy
79625e8c5aaSvikram  *
79725e8c5aaSvikram  * The device contract destroy entry point.
79825e8c5aaSvikram  * Called from contract_destroy() to do any type specific destroy. Note
79925e8c5aaSvikram  * that destroy is a misnomer - this does not free the contract, it only
80025e8c5aaSvikram  * moves it to the dead state. A contract is actually freed via
80125e8c5aaSvikram  * 	contract_rele() -> contract_dtor(), contop_free()
80225e8c5aaSvikram  */
80325e8c5aaSvikram static void
80425e8c5aaSvikram contract_device_destroy(contract_t *ct)
80525e8c5aaSvikram {
8062eb07f5eSStephen Hanson 	cont_device_t	*ctd;
8072eb07f5eSStephen Hanson 	dev_info_t	*dip;
80825e8c5aaSvikram 
80925e8c5aaSvikram 	ASSERT(MUTEX_HELD(&ct->ct_lock));
81025e8c5aaSvikram 
8112eb07f5eSStephen Hanson 	for (;;) {
8122eb07f5eSStephen Hanson 		ctd = ct->ct_data;
8132eb07f5eSStephen Hanson 		dip = ctd->cond_dip;
81425e8c5aaSvikram 		if (dip == NULL) {
81525e8c5aaSvikram 			/*
81625e8c5aaSvikram 			 * The dip has been removed, this is a dangling contract
81725e8c5aaSvikram 			 * Check that dip linkages are NULL
81825e8c5aaSvikram 			 */
81925e8c5aaSvikram 			ASSERT(!list_link_active(&ctd->cond_next));
8202eb07f5eSStephen Hanson 			CT_DEBUG((CE_NOTE, "contract_device_destroy:"
8212eb07f5eSStephen Hanson 			    " contract has no devinfo node. contract ctid : %d",
8222eb07f5eSStephen Hanson 			    ct->ct_id));
82325e8c5aaSvikram 			return;
82425e8c5aaSvikram 		}
82525e8c5aaSvikram 
82625e8c5aaSvikram 		/*
8272eb07f5eSStephen Hanson 		 * The intended lock order is : devi_ct_lock -> ct_count
8282eb07f5eSStephen Hanson 		 * barrier -> ct_lock.
8292eb07f5eSStephen Hanson 		 * However we can't do this here as dropping the ct_lock allows
8302eb07f5eSStephen Hanson 		 * a race condition with i_ddi_free_node()/
8312eb07f5eSStephen Hanson 		 * contract_device_remove_dip() which may free off dip before
8322eb07f5eSStephen Hanson 		 * we can take devi_ct_lock. So use mutex_tryenter to avoid
8332eb07f5eSStephen Hanson 		 * dropping ct_lock until we have acquired devi_ct_lock.
83425e8c5aaSvikram 		 */
8352eb07f5eSStephen Hanson 		if (mutex_tryenter(&(DEVI(dip)->devi_ct_lock)) != 0)
8362eb07f5eSStephen Hanson 			break;
8372eb07f5eSStephen Hanson 		mutex_exit(&ct->ct_lock);
8382eb07f5eSStephen Hanson 		delay(drv_usectohz(1000));
8392eb07f5eSStephen Hanson 		mutex_enter(&ct->ct_lock);
8402eb07f5eSStephen Hanson 	}
84125e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
84225e8c5aaSvikram 
84325e8c5aaSvikram 	/*
84425e8c5aaSvikram 	 * Waiting for the barrier to be released is strictly speaking not
84525e8c5aaSvikram 	 * necessary. But it simplifies the implementation of
84625e8c5aaSvikram 	 * contract_device_publish() by establishing the invariant that
84725e8c5aaSvikram 	 * device contracts cannot go away during negotiation.
84825e8c5aaSvikram 	 */
84925e8c5aaSvikram 	ct_barrier_wait_for_release(dip);
85025e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
85125e8c5aaSvikram 
85225e8c5aaSvikram 	list_remove(&(DEVI(dip)->devi_ct), ctd);
85325e8c5aaSvikram 	ctd->cond_dip = NULL; /* no longer linked to dip */
85425e8c5aaSvikram 	contract_rele(ct);	/* remove hold for dip linkage */
85525e8c5aaSvikram 
85625e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
85725e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
85825e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
85925e8c5aaSvikram }
86025e8c5aaSvikram 
86125e8c5aaSvikram /*
86225e8c5aaSvikram  * contract_device_status
86325e8c5aaSvikram  *
86425e8c5aaSvikram  * The device contract status entry point. Called when level of "detail"
86525e8c5aaSvikram  * is either CTD_FIXED or CTD_ALL
86625e8c5aaSvikram  *
86725e8c5aaSvikram  */
86825e8c5aaSvikram static void
86925e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
87025e8c5aaSvikram     void *status, model_t model)
87125e8c5aaSvikram {
87225e8c5aaSvikram 	cont_device_t *ctd = ct->ct_data;
87325e8c5aaSvikram 
87425e8c5aaSvikram 	ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
87525e8c5aaSvikram 
87625e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
87725e8c5aaSvikram 	contract_status_common(ct, zone, status, model);
87825e8c5aaSvikram 
87925e8c5aaSvikram 	/*
88025e8c5aaSvikram 	 * There's no need to hold the contract lock while accessing static
88125e8c5aaSvikram 	 * data like aset or noneg. But since we need the lock to access other
88225e8c5aaSvikram 	 * data like state, we hold it anyway.
88325e8c5aaSvikram 	 */
88425e8c5aaSvikram 	VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
88525e8c5aaSvikram 	VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
88625e8c5aaSvikram 	VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
88725e8c5aaSvikram 
88825e8c5aaSvikram 	if (detail == CTD_FIXED) {
88925e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
89025e8c5aaSvikram 		return;
89125e8c5aaSvikram 	}
89225e8c5aaSvikram 
89325e8c5aaSvikram 	ASSERT(ctd->cond_minor);
89425e8c5aaSvikram 	VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
89525e8c5aaSvikram 
89625e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
89725e8c5aaSvikram }
89825e8c5aaSvikram 
89925e8c5aaSvikram /*
90025e8c5aaSvikram  * Converts a result integer into the corresponding string. Used for printing
90125e8c5aaSvikram  * messages
90225e8c5aaSvikram  */
90325e8c5aaSvikram static char *
90425e8c5aaSvikram result_str(uint_t result)
90525e8c5aaSvikram {
90625e8c5aaSvikram 	switch (result) {
90725e8c5aaSvikram 	case CT_ACK:
90825e8c5aaSvikram 		return ("CT_ACK");
90925e8c5aaSvikram 	case CT_NACK:
91025e8c5aaSvikram 		return ("CT_NACK");
91125e8c5aaSvikram 	case CT_NONE:
91225e8c5aaSvikram 		return ("CT_NONE");
91325e8c5aaSvikram 	default:
91425e8c5aaSvikram 		return ("UNKNOWN");
91525e8c5aaSvikram 	}
91625e8c5aaSvikram }
91725e8c5aaSvikram 
91825e8c5aaSvikram /*
91925e8c5aaSvikram  * Converts a device state integer constant into the corresponding string.
92025e8c5aaSvikram  * Used to print messages.
92125e8c5aaSvikram  */
92225e8c5aaSvikram static char *
92325e8c5aaSvikram state_str(uint_t state)
92425e8c5aaSvikram {
92525e8c5aaSvikram 	switch (state) {
92625e8c5aaSvikram 	case CT_DEV_EV_ONLINE:
92725e8c5aaSvikram 		return ("ONLINE");
92825e8c5aaSvikram 	case CT_DEV_EV_DEGRADED:
92925e8c5aaSvikram 		return ("DEGRADED");
93025e8c5aaSvikram 	case CT_DEV_EV_OFFLINE:
93125e8c5aaSvikram 		return ("OFFLINE");
93225e8c5aaSvikram 	default:
93325e8c5aaSvikram 		return ("UNKNOWN");
93425e8c5aaSvikram 	}
93525e8c5aaSvikram }
93625e8c5aaSvikram 
93725e8c5aaSvikram /*
93825e8c5aaSvikram  * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
93925e8c5aaSvikram  * synchronous state change or not.
94025e8c5aaSvikram  */
94125e8c5aaSvikram static int
94225e8c5aaSvikram is_sync_neg(uint_t old, uint_t new)
94325e8c5aaSvikram {
94425e8c5aaSvikram 	int	i;
94525e8c5aaSvikram 
94625e8c5aaSvikram 	ASSERT(old & CT_DEV_ALLEVENT);
94725e8c5aaSvikram 	ASSERT(new & CT_DEV_ALLEVENT);
94825e8c5aaSvikram 
94925e8c5aaSvikram 	if (old == new) {
95025e8c5aaSvikram 		CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
95125e8c5aaSvikram 		    state_str(new)));
95225e8c5aaSvikram 		return (-2);
95325e8c5aaSvikram 	}
95425e8c5aaSvikram 
95525e8c5aaSvikram 	for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
95625e8c5aaSvikram 		if (old == ct_dev_negtable[i].st_old &&
95725e8c5aaSvikram 		    new == ct_dev_negtable[i].st_new) {
95825e8c5aaSvikram 			return (ct_dev_negtable[i].st_neg);
95925e8c5aaSvikram 		}
96025e8c5aaSvikram 	}
96125e8c5aaSvikram 
96225e8c5aaSvikram 	CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
96325e8c5aaSvikram 	    "old = %s -> new = %s", state_str(old), state_str(new)));
96425e8c5aaSvikram 
96525e8c5aaSvikram 	return (-1);
96625e8c5aaSvikram }
96725e8c5aaSvikram 
96825e8c5aaSvikram /*
96925e8c5aaSvikram  * Used to cleanup cached dv_nodes so that when a device is released by
97025e8c5aaSvikram  * a contract holder, its devinfo node can be successfully detached.
97125e8c5aaSvikram  */
97225e8c5aaSvikram static int
97325e8c5aaSvikram contract_device_dvclean(dev_info_t *dip)
97425e8c5aaSvikram {
97525e8c5aaSvikram 	char		*devnm;
97625e8c5aaSvikram 	dev_info_t	*pdip;
97725e8c5aaSvikram 
97825e8c5aaSvikram 	ASSERT(dip);
97925e8c5aaSvikram 
98025e8c5aaSvikram 	/* pdip can be NULL if we have contracts against the root dip */
98125e8c5aaSvikram 	pdip = ddi_get_parent(dip);
98225e8c5aaSvikram 
98325e8c5aaSvikram 	if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
98425e8c5aaSvikram 		char		*path;
98525e8c5aaSvikram 
98625e8c5aaSvikram 		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
98725e8c5aaSvikram 		(void) ddi_pathname(dip, path);
98825e8c5aaSvikram 		CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
98925e8c5aaSvikram 		    "device=%s", path));
99025e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
99125e8c5aaSvikram 		return (EDEADLOCK);
99225e8c5aaSvikram 	}
99325e8c5aaSvikram 
99425e8c5aaSvikram 	if (pdip) {
99525e8c5aaSvikram 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
99625e8c5aaSvikram 		(void) ddi_deviname(dip, devnm);
997*320fb372SYuri Pankov 		(void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
99825e8c5aaSvikram 		kmem_free(devnm, MAXNAMELEN + 1);
99925e8c5aaSvikram 	} else {
1000*320fb372SYuri Pankov 		(void) devfs_clean(dip, NULL, DV_CLEAN_FORCE);
100125e8c5aaSvikram 	}
100225e8c5aaSvikram 
1003*320fb372SYuri Pankov 	return (0);
100425e8c5aaSvikram }
100525e8c5aaSvikram 
100625e8c5aaSvikram /*
100725e8c5aaSvikram  * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
100825e8c5aaSvikram  * Results in the ACK or NACK being recorded on the dip for one particular
100925e8c5aaSvikram  * contract. The device contracts framework evaluates the ACK/NACKs for all
101025e8c5aaSvikram  * contracts against a device to determine if a particular device state change
101125e8c5aaSvikram  * should be allowed.
101225e8c5aaSvikram  */
101325e8c5aaSvikram static int
101425e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
101525e8c5aaSvikram     uint_t cmd)
101625e8c5aaSvikram {
101725e8c5aaSvikram 	cont_device_t *ctd = ct->ct_data;
101825e8c5aaSvikram 	dev_info_t *dip;
101925e8c5aaSvikram 	ctid_t	ctid;
102025e8c5aaSvikram 	int error;
102125e8c5aaSvikram 
102225e8c5aaSvikram 	ctid = ct->ct_id;
102325e8c5aaSvikram 
102425e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
102525e8c5aaSvikram 
102625e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
102725e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
102825e8c5aaSvikram 
102925e8c5aaSvikram 	dip = ctd->cond_dip;
103025e8c5aaSvikram 
103125e8c5aaSvikram 	ASSERT(ctd->cond_minor);
103225e8c5aaSvikram 	ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
103325e8c5aaSvikram 
103425e8c5aaSvikram 	/*
103525e8c5aaSvikram 	 * Negotiation only if new state is not in A-set
103625e8c5aaSvikram 	 */
103725e8c5aaSvikram 	ASSERT(!(ctd->cond_aset & evtype));
103825e8c5aaSvikram 
103925e8c5aaSvikram 	/*
104025e8c5aaSvikram 	 * Negotiation only if transition is synchronous
104125e8c5aaSvikram 	 */
104225e8c5aaSvikram 	ASSERT(is_sync_neg(ctd->cond_state, evtype));
104325e8c5aaSvikram 
104425e8c5aaSvikram 	/*
104525e8c5aaSvikram 	 * We shouldn't be negotiating if the "noneg" flag is set
104625e8c5aaSvikram 	 */
104725e8c5aaSvikram 	ASSERT(!ctd->cond_noneg);
104825e8c5aaSvikram 
104925e8c5aaSvikram 	if (dip)
105025e8c5aaSvikram 		ndi_hold_devi(dip);
105125e8c5aaSvikram 
105225e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
105325e8c5aaSvikram 
105425e8c5aaSvikram 	/*
105525e8c5aaSvikram 	 * dv_clean only if !NACK and offline state change
105625e8c5aaSvikram 	 */
105725e8c5aaSvikram 	if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
105825e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
105925e8c5aaSvikram 		error = contract_device_dvclean(dip);
106025e8c5aaSvikram 		if (error != 0) {
106125e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
106225e8c5aaSvikram 			    ctid));
106325e8c5aaSvikram 			ddi_release_devi(dip);
106425e8c5aaSvikram 		}
106525e8c5aaSvikram 	}
106625e8c5aaSvikram 
106725e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
106825e8c5aaSvikram 
106925e8c5aaSvikram 	if (dip)
107025e8c5aaSvikram 		ddi_release_devi(dip);
107125e8c5aaSvikram 
107225e8c5aaSvikram 	if (dip == NULL) {
107325e8c5aaSvikram 		if (ctd->cond_currev_id != evid) {
107425e8c5aaSvikram 			CT_DEBUG((CE_WARN, "%sACK for non-current event "
107525e8c5aaSvikram 			    "(type=%s, id=%llu) on removed device",
107625e8c5aaSvikram 			    cmd == CT_NACK ? "N" : "",
107725e8c5aaSvikram 			    state_str(evtype), (unsigned long long)evid));
107825e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
107925e8c5aaSvikram 			    ctid));
108025e8c5aaSvikram 		} else {
108125e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == evtype);
108225e8c5aaSvikram 			CT_DEBUG((CE_WARN, "contract_ack: no such device: "
108325e8c5aaSvikram 			    "ctid: %d", ctid));
108425e8c5aaSvikram 		}
108525e8c5aaSvikram 		error = (ct->ct_state == CTS_DEAD) ? ESRCH :
108625e8c5aaSvikram 		    ((cmd == CT_NACK) ? ETIMEDOUT : 0);
108725e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
108825e8c5aaSvikram 		return (error);
108925e8c5aaSvikram 	}
109025e8c5aaSvikram 
109125e8c5aaSvikram 	/*
109225e8c5aaSvikram 	 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
109325e8c5aaSvikram 	 */
109425e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
109525e8c5aaSvikram 
109625e8c5aaSvikram 	mutex_enter(&DEVI(dip)->devi_ct_lock);
109725e8c5aaSvikram 	mutex_enter(&ct->ct_lock);
109825e8c5aaSvikram 	if (ctd->cond_currev_id != evid) {
109925e8c5aaSvikram 		char *buf;
110025e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
110125e8c5aaSvikram 		mutex_exit(&DEVI(dip)->devi_ct_lock);
110225e8c5aaSvikram 		ndi_hold_devi(dip);
110325e8c5aaSvikram 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
110425e8c5aaSvikram 		(void) ddi_pathname(dip, buf);
110525e8c5aaSvikram 		ddi_release_devi(dip);
110625e8c5aaSvikram 		CT_DEBUG((CE_WARN, "%sACK for non-current event"
110725e8c5aaSvikram 		    "(type=%s, id=%llu) on device %s",
110825e8c5aaSvikram 		    cmd == CT_NACK ? "N" : "",
110925e8c5aaSvikram 		    state_str(evtype), (unsigned long long)evid, buf));
111025e8c5aaSvikram 		kmem_free(buf, MAXPATHLEN);
111125e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
111225e8c5aaSvikram 		    cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
111325e8c5aaSvikram 		return (cmd == CT_ACK ? 0 : ETIMEDOUT);
111425e8c5aaSvikram 	}
111525e8c5aaSvikram 
111625e8c5aaSvikram 	ASSERT(ctd->cond_currev_type == evtype);
111725e8c5aaSvikram 	ASSERT(cmd == CT_ACK || cmd == CT_NACK);
111825e8c5aaSvikram 
111925e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
112025e8c5aaSvikram 	    cmd == CT_NACK ? "N" : "", ctid));
112125e8c5aaSvikram 
112225e8c5aaSvikram 	ctd->cond_currev_ack = cmd;
112325e8c5aaSvikram 	mutex_exit(&ct->ct_lock);
112425e8c5aaSvikram 
112525e8c5aaSvikram 	ct_barrier_decr(dip);
112625e8c5aaSvikram 	mutex_exit(&DEVI(dip)->devi_ct_lock);
112725e8c5aaSvikram 
112825e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
112925e8c5aaSvikram 
113025e8c5aaSvikram 	return (0);
113125e8c5aaSvikram }
113225e8c5aaSvikram 
113325e8c5aaSvikram /*
113425e8c5aaSvikram  * Invoked when a userland contract holder approves (i.e. ACKs) a state change
113525e8c5aaSvikram  */
113625e8c5aaSvikram static int
113725e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
113825e8c5aaSvikram {
113925e8c5aaSvikram 	return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
114025e8c5aaSvikram }
114125e8c5aaSvikram 
114225e8c5aaSvikram /*
114325e8c5aaSvikram  * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
114425e8c5aaSvikram  */
114525e8c5aaSvikram static int
114625e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
114725e8c5aaSvikram {
114825e8c5aaSvikram 	return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
114925e8c5aaSvikram }
115025e8c5aaSvikram 
115125e8c5aaSvikram /*
115225e8c5aaSvikram  * Creates a new contract synchronously with the breaking of an existing
115325e8c5aaSvikram  * contract. Currently not supported.
115425e8c5aaSvikram  */
115525e8c5aaSvikram /*ARGSUSED*/
115625e8c5aaSvikram static int
115725e8c5aaSvikram contract_device_newct(contract_t *ct)
115825e8c5aaSvikram {
115925e8c5aaSvikram 	return (ENOTSUP);
116025e8c5aaSvikram }
116125e8c5aaSvikram 
116225e8c5aaSvikram /*
116325e8c5aaSvikram  * Core device contract implementation entry points
116425e8c5aaSvikram  */
116525e8c5aaSvikram static contops_t contract_device_ops = {
116625e8c5aaSvikram 	contract_device_free,		/* contop_free */
116725e8c5aaSvikram 	contract_device_abandon,	/* contop_abandon */
116825e8c5aaSvikram 	contract_device_destroy,	/* contop_destroy */
116925e8c5aaSvikram 	contract_device_status,		/* contop_status */
117025e8c5aaSvikram 	contract_device_ack,		/* contop_ack */
117125e8c5aaSvikram 	contract_device_nack,		/* contop_nack */
117225e8c5aaSvikram 	contract_qack_notsup,		/* contop_qack */
117325e8c5aaSvikram 	contract_device_newct		/* contop_newct */
117425e8c5aaSvikram };
117525e8c5aaSvikram 
117625e8c5aaSvikram /*
117725e8c5aaSvikram  * contract_device_init
117825e8c5aaSvikram  *
117925e8c5aaSvikram  * Initializes the device contract type.
118025e8c5aaSvikram  */
118125e8c5aaSvikram void
118225e8c5aaSvikram contract_device_init(void)
118325e8c5aaSvikram {
118425e8c5aaSvikram 	device_type = contract_type_init(CTT_DEVICE, "device",
118525e8c5aaSvikram 	    &contract_device_ops, contract_device_default);
118625e8c5aaSvikram }
118725e8c5aaSvikram 
118825e8c5aaSvikram /*
118925e8c5aaSvikram  * contract_device_create
119025e8c5aaSvikram  *
119125e8c5aaSvikram  * create a device contract given template "tmpl" and the "owner" process.
119225e8c5aaSvikram  * May fail and return NULL if project.max-contracts would have been exceeded.
119325e8c5aaSvikram  *
119425e8c5aaSvikram  * Common device contract creation routine called for both open-time and
119525e8c5aaSvikram  * non-open time device contract creation
119625e8c5aaSvikram  */
119725e8c5aaSvikram static cont_device_t *
119825e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
119925e8c5aaSvikram     proc_t *owner, int *errorp)
120025e8c5aaSvikram {
120125e8c5aaSvikram 	cont_device_t *ctd;
120225e8c5aaSvikram 	char *minor;
120325e8c5aaSvikram 	char *path;
120425e8c5aaSvikram 	dev_info_t *dip;
120525e8c5aaSvikram 
120625e8c5aaSvikram 	ASSERT(dtmpl != NULL);
120725e8c5aaSvikram 	ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
120825e8c5aaSvikram 	ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
120925e8c5aaSvikram 	ASSERT(errorp);
121025e8c5aaSvikram 
121125e8c5aaSvikram 	*errorp = 0;
121225e8c5aaSvikram 
121325e8c5aaSvikram 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
121425e8c5aaSvikram 
121525e8c5aaSvikram 	mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
121625e8c5aaSvikram 	ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
121725e8c5aaSvikram 	bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
121825e8c5aaSvikram 	mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
121925e8c5aaSvikram 
122025e8c5aaSvikram 	dip = e_ddi_hold_devi_by_path(path, 0);
122125e8c5aaSvikram 	if (dip == NULL) {
122225e8c5aaSvikram 		cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
122325e8c5aaSvikram 		    "for device path (%s)", path);
122425e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
122525e8c5aaSvikram 		*errorp = ERANGE;
122625e8c5aaSvikram 		return (NULL);
122725e8c5aaSvikram 	}
122825e8c5aaSvikram 
122925e8c5aaSvikram 	/*
123025e8c5aaSvikram 	 * Lock out any parallel contract negotiations
123125e8c5aaSvikram 	 */
123225e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
123325e8c5aaSvikram 	ct_barrier_acquire(dip);
123425e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
123525e8c5aaSvikram 
123625e8c5aaSvikram 	minor = i_ddi_strdup(path, KM_SLEEP);
123725e8c5aaSvikram 	kmem_free(path, MAXPATHLEN);
123825e8c5aaSvikram 
123925e8c5aaSvikram 	(void) contract_type_pbundle(device_type, owner);
124025e8c5aaSvikram 
124125e8c5aaSvikram 	ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
124225e8c5aaSvikram 
124325e8c5aaSvikram 	/*
124425e8c5aaSvikram 	 * Only we hold a refernce to this contract. Safe to access
124525e8c5aaSvikram 	 * the fields without a ct_lock
124625e8c5aaSvikram 	 */
124725e8c5aaSvikram 	ctd->cond_minor = minor;
124825e8c5aaSvikram 	/*
124925e8c5aaSvikram 	 * It is safe to set the dip pointer in the contract
125025e8c5aaSvikram 	 * as the contract will always be destroyed before the dip
125125e8c5aaSvikram 	 * is released
125225e8c5aaSvikram 	 */
125325e8c5aaSvikram 	ctd->cond_dip = dip;
125425e8c5aaSvikram 	ctd->cond_devt = dev;
125525e8c5aaSvikram 	ctd->cond_spec = spec_type;
125625e8c5aaSvikram 
125725e8c5aaSvikram 	/*
125825e8c5aaSvikram 	 * Since we are able to lookup the device, it is either
125925e8c5aaSvikram 	 * online or degraded
126025e8c5aaSvikram 	 */
126125e8c5aaSvikram 	ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
126225e8c5aaSvikram 	    CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
126325e8c5aaSvikram 
126425e8c5aaSvikram 	mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
126525e8c5aaSvikram 	ctd->cond_aset = dtmpl->ctd_aset;
126625e8c5aaSvikram 	ctd->cond_noneg = dtmpl->ctd_noneg;
126725e8c5aaSvikram 
126825e8c5aaSvikram 	/*
126925e8c5aaSvikram 	 * contract_ctor() initailizes the common portion of a contract
127025e8c5aaSvikram 	 * contract_dtor() destroys the common portion of a contract
127125e8c5aaSvikram 	 */
127225e8c5aaSvikram 	if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
127325e8c5aaSvikram 	    ctd, 0, owner, B_TRUE)) {
127425e8c5aaSvikram 		mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
127525e8c5aaSvikram 		/*
127625e8c5aaSvikram 		 * contract_device_free() destroys the type specific
127725e8c5aaSvikram 		 * portion of a contract and frees the contract.
127825e8c5aaSvikram 		 * The "minor" path and "cred" is a part of the type specific
127925e8c5aaSvikram 		 * portion of the contract and will be freed by
128025e8c5aaSvikram 		 * contract_device_free()
128125e8c5aaSvikram 		 */
128225e8c5aaSvikram 		contract_device_free(&ctd->cond_contract);
128325e8c5aaSvikram 
128425e8c5aaSvikram 		/* release barrier */
128525e8c5aaSvikram 		mutex_enter(&(DEVI(dip)->devi_ct_lock));
128625e8c5aaSvikram 		ct_barrier_release(dip);
128725e8c5aaSvikram 		mutex_exit(&(DEVI(dip)->devi_ct_lock));
128825e8c5aaSvikram 
128925e8c5aaSvikram 		ddi_release_devi(dip);
129025e8c5aaSvikram 		*errorp = EAGAIN;
129125e8c5aaSvikram 		return (NULL);
129225e8c5aaSvikram 	}
129325e8c5aaSvikram 	mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
129425e8c5aaSvikram 
129525e8c5aaSvikram 	mutex_enter(&ctd->cond_contract.ct_lock);
129625e8c5aaSvikram 	ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
129725e8c5aaSvikram 	ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
129825e8c5aaSvikram 	ctd->cond_contract.ct_ntime.ctm_start = -1;
129925e8c5aaSvikram 	ctd->cond_contract.ct_qtime.ctm_start = -1;
130025e8c5aaSvikram 	mutex_exit(&ctd->cond_contract.ct_lock);
130125e8c5aaSvikram 
130225e8c5aaSvikram 	/*
130325e8c5aaSvikram 	 * Insert device contract into list hanging off the dip
130425e8c5aaSvikram 	 * Bump up the ref-count on the contract to reflect this
130525e8c5aaSvikram 	 */
130625e8c5aaSvikram 	contract_hold(&ctd->cond_contract);
130725e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
130825e8c5aaSvikram 	list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
130925e8c5aaSvikram 
131025e8c5aaSvikram 	/* release barrier */
131125e8c5aaSvikram 	ct_barrier_release(dip);
131225e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
131325e8c5aaSvikram 
131425e8c5aaSvikram 	ddi_release_devi(dip);
131525e8c5aaSvikram 
131625e8c5aaSvikram 	return (ctd);
131725e8c5aaSvikram }
131825e8c5aaSvikram 
131925e8c5aaSvikram /*
132025e8c5aaSvikram  * Called when a device is successfully opened to create an open-time contract
132125e8c5aaSvikram  * i.e. synchronously with a device open.
132225e8c5aaSvikram  */
132325e8c5aaSvikram int
132425e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
132525e8c5aaSvikram {
132625e8c5aaSvikram 	ctmpl_device_t *dtmpl;
132725e8c5aaSvikram 	ct_template_t  *tmpl;
132825e8c5aaSvikram 	cont_device_t *ctd;
132925e8c5aaSvikram 	char *path;
133025e8c5aaSvikram 	klwp_t *lwp;
133125e8c5aaSvikram 	int error;
133225e8c5aaSvikram 
133325e8c5aaSvikram 	if (ctpp)
133425e8c5aaSvikram 		*ctpp = NULL;
133525e8c5aaSvikram 
133625e8c5aaSvikram 	/*
133725e8c5aaSvikram 	 * Check if we are in user-context i.e. if we have an lwp
133825e8c5aaSvikram 	 */
133925e8c5aaSvikram 	lwp = ttolwp(curthread);
134025e8c5aaSvikram 	if (lwp == NULL) {
134125e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
134225e8c5aaSvikram 		return (0);
134325e8c5aaSvikram 	}
134425e8c5aaSvikram 
134525e8c5aaSvikram 	tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
134625e8c5aaSvikram 	if (tmpl == NULL) {
134725e8c5aaSvikram 		return (0);
134825e8c5aaSvikram 	}
134925e8c5aaSvikram 	dtmpl = tmpl->ctmpl_data;
135025e8c5aaSvikram 
135125e8c5aaSvikram 	/*
135225e8c5aaSvikram 	 * If the user set a minor path in the template before an open,
135325e8c5aaSvikram 	 * ignore it. We use the minor path of the actual minor opened.
135425e8c5aaSvikram 	 */
135525e8c5aaSvikram 	mutex_enter(&tmpl->ctmpl_lock);
135625e8c5aaSvikram 	if (dtmpl->ctd_minor != NULL) {
135725e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
135825e8c5aaSvikram 		    "ignoring device minor path in active template: %s",
135925e8c5aaSvikram 		    curproc->p_pid, dtmpl->ctd_minor));
136025e8c5aaSvikram 		/*
136125e8c5aaSvikram 		 * This is a copy of the actual activated template.
136225e8c5aaSvikram 		 * Safe to make changes such as freeing the minor
136325e8c5aaSvikram 		 * path in the template.
136425e8c5aaSvikram 		 */
136525e8c5aaSvikram 		kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
136625e8c5aaSvikram 		dtmpl->ctd_minor = NULL;
136725e8c5aaSvikram 	}
136825e8c5aaSvikram 	mutex_exit(&tmpl->ctmpl_lock);
136925e8c5aaSvikram 
137025e8c5aaSvikram 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
137125e8c5aaSvikram 
137225e8c5aaSvikram 	if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
137325e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
137425e8c5aaSvikram 		    "minor path from dev_t,spec {%lu, %d} for process (%d)",
137525e8c5aaSvikram 		    dev, spec_type, curproc->p_pid));
137625e8c5aaSvikram 		ctmpl_free(tmpl);
137725e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
137825e8c5aaSvikram 		return (1);
137925e8c5aaSvikram 	}
138025e8c5aaSvikram 
138125e8c5aaSvikram 	mutex_enter(&tmpl->ctmpl_lock);
138225e8c5aaSvikram 	ASSERT(dtmpl->ctd_minor == NULL);
138325e8c5aaSvikram 	dtmpl->ctd_minor = path;
138425e8c5aaSvikram 	mutex_exit(&tmpl->ctmpl_lock);
138525e8c5aaSvikram 
138625e8c5aaSvikram 	ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
138725e8c5aaSvikram 
138825e8c5aaSvikram 	mutex_enter(&tmpl->ctmpl_lock);
138925e8c5aaSvikram 	ASSERT(dtmpl->ctd_minor);
139025e8c5aaSvikram 	dtmpl->ctd_minor = NULL;
139125e8c5aaSvikram 	mutex_exit(&tmpl->ctmpl_lock);
139225e8c5aaSvikram 	ctmpl_free(tmpl);
139325e8c5aaSvikram 	kmem_free(path, MAXPATHLEN);
139425e8c5aaSvikram 
139525e8c5aaSvikram 	if (ctd == NULL) {
139625e8c5aaSvikram 		cmn_err(CE_NOTE, "contract_device_open(): Failed to "
139725e8c5aaSvikram 		    "create device contract for process (%d) holding "
139825e8c5aaSvikram 		    "device (devt = %lu, spec_type = %d)",
139925e8c5aaSvikram 		    curproc->p_pid, dev, spec_type);
140025e8c5aaSvikram 		return (1);
140125e8c5aaSvikram 	}
140225e8c5aaSvikram 
140325e8c5aaSvikram 	if (ctpp) {
140425e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
140525e8c5aaSvikram 		*ctpp = &ctd->cond_contract;
140625e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
140725e8c5aaSvikram 	}
140825e8c5aaSvikram 	return (0);
140925e8c5aaSvikram }
141025e8c5aaSvikram 
141125e8c5aaSvikram /*
141225e8c5aaSvikram  * Called during contract negotiation by the device contract framework to wait
141325e8c5aaSvikram  * for ACKs or NACKs from contract holders. If all responses are not received
141425e8c5aaSvikram  * before a specified timeout, this routine times out.
141525e8c5aaSvikram  */
141625e8c5aaSvikram static uint_t
141725e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
141825e8c5aaSvikram {
141925e8c5aaSvikram 	cont_device_t *ctd;
142025e8c5aaSvikram 	int timed_out = 0;
142125e8c5aaSvikram 	int result = CT_NONE;
142225e8c5aaSvikram 	int ack;
142325e8c5aaSvikram 	char *f = "wait_for_acks";
142425e8c5aaSvikram 
142525e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
142625e8c5aaSvikram 	ASSERT(dip);
142725e8c5aaSvikram 	ASSERT(evtype & CT_DEV_ALLEVENT);
142825e8c5aaSvikram 	ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
142925e8c5aaSvikram 	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
143025e8c5aaSvikram 	    (spec_type == S_IFBLK || spec_type == S_IFCHR));
143125e8c5aaSvikram 
143225e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
143325e8c5aaSvikram 
143425e8c5aaSvikram 	if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
143525e8c5aaSvikram 		/*
143625e8c5aaSvikram 		 * some contract owner(s) didn't respond in time
143725e8c5aaSvikram 		 */
143825e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
143925e8c5aaSvikram 		timed_out = 1;
144025e8c5aaSvikram 	}
144125e8c5aaSvikram 
144225e8c5aaSvikram 	ack = 0;
144325e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
144425e8c5aaSvikram 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
144525e8c5aaSvikram 
144625e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
144725e8c5aaSvikram 
144825e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
144925e8c5aaSvikram 
145025e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
145125e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
145225e8c5aaSvikram 			continue;
145325e8c5aaSvikram 		}
145425e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
145525e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
145625e8c5aaSvikram 			continue;
145725e8c5aaSvikram 		}
145825e8c5aaSvikram 
145925e8c5aaSvikram 		/* skip if non-negotiable contract */
146025e8c5aaSvikram 		if (ctd->cond_noneg) {
146125e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
146225e8c5aaSvikram 			continue;
146325e8c5aaSvikram 		}
146425e8c5aaSvikram 
146525e8c5aaSvikram 		ASSERT(ctd->cond_currev_type == evtype);
146625e8c5aaSvikram 		if (ctd->cond_currev_ack == CT_NACK) {
146725e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
146825e8c5aaSvikram 			    f, (void *)dip));
146925e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
147025e8c5aaSvikram 			return (CT_NACK);
147125e8c5aaSvikram 		} else if (ctd->cond_currev_ack == CT_ACK) {
147225e8c5aaSvikram 			ack = 1;
147325e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
147425e8c5aaSvikram 			    f, (void *)dip));
147525e8c5aaSvikram 		}
147625e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
147725e8c5aaSvikram 	}
147825e8c5aaSvikram 
147925e8c5aaSvikram 	if (ack) {
148025e8c5aaSvikram 		result = CT_ACK;
148125e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
148225e8c5aaSvikram 	} else if (timed_out) {
148325e8c5aaSvikram 		result = CT_NONE;
148425e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
148525e8c5aaSvikram 		    f, (void *)dip));
148625e8c5aaSvikram 	} else {
148725e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
148825e8c5aaSvikram 		    f, (void *)dip));
148925e8c5aaSvikram 	}
149025e8c5aaSvikram 
149125e8c5aaSvikram 
149225e8c5aaSvikram 	return (result);
149325e8c5aaSvikram }
149425e8c5aaSvikram 
149525e8c5aaSvikram /*
149625e8c5aaSvikram  * Determines the current state of a device (i.e a devinfo node
149725e8c5aaSvikram  */
149825e8c5aaSvikram static int
149925e8c5aaSvikram get_state(dev_info_t *dip)
150025e8c5aaSvikram {
150125e8c5aaSvikram 	if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
150225e8c5aaSvikram 		return (CT_DEV_EV_OFFLINE);
150325e8c5aaSvikram 	else if (DEVI_IS_DEVICE_DEGRADED(dip))
150425e8c5aaSvikram 		return (CT_DEV_EV_DEGRADED);
150525e8c5aaSvikram 	else
150625e8c5aaSvikram 		return (CT_DEV_EV_ONLINE);
150725e8c5aaSvikram }
150825e8c5aaSvikram 
150925e8c5aaSvikram /*
151025e8c5aaSvikram  * Sets the current state of a device in a device contract
151125e8c5aaSvikram  */
151225e8c5aaSvikram static void
151325e8c5aaSvikram set_cond_state(dev_info_t *dip)
151425e8c5aaSvikram {
151525e8c5aaSvikram 	uint_t state = get_state(dip);
151625e8c5aaSvikram 	cont_device_t *ctd;
151725e8c5aaSvikram 
151825e8c5aaSvikram 	/* verify that barrier is held */
151925e8c5aaSvikram 	ASSERT(ct_barrier_held(dip));
152025e8c5aaSvikram 
152125e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
152225e8c5aaSvikram 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
152325e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
152425e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
152525e8c5aaSvikram 		ctd->cond_state = state;
152625e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
152725e8c5aaSvikram 	}
152825e8c5aaSvikram }
152925e8c5aaSvikram 
153025e8c5aaSvikram /*
153125e8c5aaSvikram  * Core routine called by event-specific routines when an event occurs.
153225e8c5aaSvikram  * Determines if an event should be be published, and if it is to be
153325e8c5aaSvikram  * published, whether a negotiation should take place. Also implements
153425e8c5aaSvikram  * NEGEND events which publish the final disposition of an event after
153525e8c5aaSvikram  * negotiations are complete.
153625e8c5aaSvikram  *
153725e8c5aaSvikram  * When an event occurs on a minor node, this routine walks the list of
153825e8c5aaSvikram  * contracts hanging off a devinfo node and for each contract on the affected
153925e8c5aaSvikram  * dip, evaluates the following cases
154025e8c5aaSvikram  *
154125e8c5aaSvikram  *	a. an event that is synchronous, breaks the contract and NONEG not set
154225e8c5aaSvikram  *		- bumps up the outstanding negotiation counts on the dip
154325e8c5aaSvikram  *		- marks the dip as undergoing negotiation (devi_ct_neg)
154425e8c5aaSvikram  *		- event of type CTE_NEG is published
154525e8c5aaSvikram  *	b. an event that is synchronous, breaks the contract and NONEG is set
154625e8c5aaSvikram  *		- sets the final result to CT_NACK, event is blocked
154725e8c5aaSvikram  *		- does not publish an event
154825e8c5aaSvikram  *	c. event is asynchronous and breaks the contract
154925e8c5aaSvikram  *		- publishes a critical event irrespect of whether the NONEG
155025e8c5aaSvikram  *		  flag is set, since the contract will be broken and contract
155125e8c5aaSvikram  *		  owner needs to be informed.
155225e8c5aaSvikram  *	d. No contract breakage but the owner has subscribed to the event
155325e8c5aaSvikram  *		- publishes the event irrespective of the NONEG event as the
155425e8c5aaSvikram  *		  owner has explicitly subscribed to the event.
155525e8c5aaSvikram  *	e. NEGEND event
155625e8c5aaSvikram  *		- publishes a critical event. Should only be doing this if
155725e8c5aaSvikram  *		  if NONEG is not set.
155825e8c5aaSvikram  *	f. all other events
155925e8c5aaSvikram  *		- Since a contract is not broken and this event has not been
156025e8c5aaSvikram  *		  subscribed to, this event does not need to be published for
156125e8c5aaSvikram  *		  for this contract.
156225e8c5aaSvikram  *
156325e8c5aaSvikram  *	Once an event is published, what happens next depends on the type of
156425e8c5aaSvikram  *	event:
156525e8c5aaSvikram  *
156625e8c5aaSvikram  *	a. NEGEND event
156725e8c5aaSvikram  *		- cleanup all state associated with the preceding negotiation
156825e8c5aaSvikram  *		  and return CT_ACK to the caller of contract_device_publish()
156925e8c5aaSvikram  *	b. NACKed event
157025e8c5aaSvikram  *		- One or more contracts had the NONEG term, so the event was
157125e8c5aaSvikram  *		  blocked. Return CT_NACK to the caller.
157225e8c5aaSvikram  *	c. Negotiated event
157325e8c5aaSvikram  *		- Call wait_for_acks() to wait for responses from contract
157425e8c5aaSvikram  *		holders. The end result is either CT_ACK (event is permitted),
157525e8c5aaSvikram  *		CT_NACK (event is blocked) or CT_NONE (no contract owner)
157625e8c5aaSvikram  *		responded. This result is returned back to the caller.
157725e8c5aaSvikram  *	d. All other events
157825e8c5aaSvikram  *		- If the event was asynchronous (i.e. not negotiated) or
157925e8c5aaSvikram  *		a contract was not broken return CT_ACK to the caller.
158025e8c5aaSvikram  */
158125e8c5aaSvikram static uint_t
158225e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
158325e8c5aaSvikram     uint_t evtype, nvlist_t *tnvl)
158425e8c5aaSvikram {
158525e8c5aaSvikram 	cont_device_t *ctd;
158625e8c5aaSvikram 	uint_t result = CT_NONE;
158725e8c5aaSvikram 	uint64_t evid = 0;
158825e8c5aaSvikram 	uint64_t nevid = 0;
158925e8c5aaSvikram 	char *path = NULL;
159025e8c5aaSvikram 	int negend;
159125e8c5aaSvikram 	int match;
159225e8c5aaSvikram 	int sync = 0;
159325e8c5aaSvikram 	contract_t *ct;
159425e8c5aaSvikram 	ct_kevent_t *event;
159525e8c5aaSvikram 	nvlist_t *nvl;
159625e8c5aaSvikram 	int broken = 0;
159725e8c5aaSvikram 
159825e8c5aaSvikram 	ASSERT(dip);
159925e8c5aaSvikram 	ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
160025e8c5aaSvikram 	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
160125e8c5aaSvikram 	    (spec_type == S_IFBLK || spec_type == S_IFCHR));
160225e8c5aaSvikram 	ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
160325e8c5aaSvikram 
160425e8c5aaSvikram 	/* Is this a synchronous state change ? */
160525e8c5aaSvikram 	if (evtype != CT_EV_NEGEND) {
160625e8c5aaSvikram 		sync = is_sync_neg(get_state(dip), evtype);
160725e8c5aaSvikram 		/* NOP if unsupported transition */
160825e8c5aaSvikram 		if (sync == -2 || sync == -1) {
160925e8c5aaSvikram 			DEVI(dip)->devi_flags |= DEVI_CT_NOP;
161025e8c5aaSvikram 			result = (sync == -2) ? CT_ACK : CT_NONE;
161125e8c5aaSvikram 			goto out;
161225e8c5aaSvikram 		}
161325e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
161425e8c5aaSvikram 		    sync ? "" : " not"));
161525e8c5aaSvikram 	} else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
161625e8c5aaSvikram 		DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
161725e8c5aaSvikram 		result = CT_ACK;
161825e8c5aaSvikram 		goto out;
161925e8c5aaSvikram 	}
162025e8c5aaSvikram 
162125e8c5aaSvikram 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
162225e8c5aaSvikram 	(void) ddi_pathname(dip, path);
162325e8c5aaSvikram 
162425e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
162525e8c5aaSvikram 
162625e8c5aaSvikram 	/*
162725e8c5aaSvikram 	 * Negotiation end - set the state of the device in the contract
162825e8c5aaSvikram 	 */
162925e8c5aaSvikram 	if (evtype == CT_EV_NEGEND) {
163025e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
163125e8c5aaSvikram 		set_cond_state(dip);
163225e8c5aaSvikram 	}
163325e8c5aaSvikram 
163425e8c5aaSvikram 	/*
163525e8c5aaSvikram 	 * If this device didn't go through negotiation, don't publish
163625e8c5aaSvikram 	 * a NEGEND event - simply release the barrier to allow other
163725e8c5aaSvikram 	 * device events in.
163825e8c5aaSvikram 	 */
163925e8c5aaSvikram 	negend = 0;
164025e8c5aaSvikram 	if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
164125e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
164225e8c5aaSvikram 		ct_barrier_release(dip);
164325e8c5aaSvikram 		mutex_exit(&(DEVI(dip)->devi_ct_lock));
164425e8c5aaSvikram 		result = CT_ACK;
164525e8c5aaSvikram 		goto out;
164625e8c5aaSvikram 	} else if (evtype == CT_EV_NEGEND) {
164725e8c5aaSvikram 		/*
164825e8c5aaSvikram 		 * There are negotiated contract breakages that
164925e8c5aaSvikram 		 * need a NEGEND event
165025e8c5aaSvikram 		 */
165125e8c5aaSvikram 		ASSERT(ct_barrier_held(dip));
165225e8c5aaSvikram 		negend = 1;
165325e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
165425e8c5aaSvikram 	} else {
165525e8c5aaSvikram 		/*
165625e8c5aaSvikram 		 * This is a new event, not a NEGEND event. Wait for previous
165725e8c5aaSvikram 		 * contract events to complete.
165825e8c5aaSvikram 		 */
165925e8c5aaSvikram 		ct_barrier_acquire(dip);
166025e8c5aaSvikram 	}
166125e8c5aaSvikram 
166225e8c5aaSvikram 
166325e8c5aaSvikram 	match = 0;
166425e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
166525e8c5aaSvikram 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
166625e8c5aaSvikram 
166725e8c5aaSvikram 		ctid_t ctid;
166825e8c5aaSvikram 		size_t len = strlen(path);
166925e8c5aaSvikram 
167025e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
167125e8c5aaSvikram 
167225e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
167325e8c5aaSvikram 		ASSERT(ctd->cond_minor);
167425e8c5aaSvikram 		ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
167525e8c5aaSvikram 		    ctd->cond_minor[len] == ':');
167625e8c5aaSvikram 
167725e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
167825e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
167925e8c5aaSvikram 			continue;
168025e8c5aaSvikram 		}
168125e8c5aaSvikram 		if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
168225e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
168325e8c5aaSvikram 			continue;
168425e8c5aaSvikram 		}
168525e8c5aaSvikram 
168625e8c5aaSvikram 		/* We have a matching contract */
168725e8c5aaSvikram 		match = 1;
168825e8c5aaSvikram 		ctid = ctd->cond_contract.ct_id;
168925e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
169025e8c5aaSvikram 		    ctid));
169125e8c5aaSvikram 
169225e8c5aaSvikram 		/*
169325e8c5aaSvikram 		 * There are 4 possible cases
169425e8c5aaSvikram 		 * 1. A contract is broken (dev not in acceptable state) and
169525e8c5aaSvikram 		 *    the state change is synchronous - start negotiation
169625e8c5aaSvikram 		 *    by sending a CTE_NEG critical event.
169725e8c5aaSvikram 		 * 2. A contract is broken and the state change is
169825e8c5aaSvikram 		 *    asynchronous - just send a critical event and
169925e8c5aaSvikram 		 *    break the contract.
170025e8c5aaSvikram 		 * 3. Contract is not broken, but consumer has subscribed
170125e8c5aaSvikram 		 *    to the event as a critical or informative event
170225e8c5aaSvikram 		 *    - just send the appropriate event
170325e8c5aaSvikram 		 * 4. contract waiting for negend event - just send the critical
170425e8c5aaSvikram 		 *    NEGEND event.
170525e8c5aaSvikram 		 */
170625e8c5aaSvikram 		broken = 0;
170725e8c5aaSvikram 		if (!negend && !(evtype & ctd->cond_aset)) {
170825e8c5aaSvikram 			broken = 1;
170925e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
171025e8c5aaSvikram 			    ctid));
171125e8c5aaSvikram 		}
171225e8c5aaSvikram 
171325e8c5aaSvikram 		/*
171425e8c5aaSvikram 		 * Don't send event if
171525e8c5aaSvikram 		 *	- contract is not broken AND
171625e8c5aaSvikram 		 *	- contract holder has not subscribed to this event AND
171725e8c5aaSvikram 		 *	- contract not waiting for a NEGEND event
171825e8c5aaSvikram 		 */
171925e8c5aaSvikram 		if (!broken && !EVSENDP(ctd, evtype) &&
172025e8c5aaSvikram 		    !ctd->cond_neg) {
172125e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "contract_device_publish(): "
172225e8c5aaSvikram 			    "contract (%d): no publish reqd: event %d",
172325e8c5aaSvikram 			    ctd->cond_contract.ct_id, evtype));
172425e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
172525e8c5aaSvikram 			continue;
172625e8c5aaSvikram 		}
172725e8c5aaSvikram 
172825e8c5aaSvikram 		/*
172925e8c5aaSvikram 		 * Note: need to kmem_zalloc() the event so mutexes are
173025e8c5aaSvikram 		 * initialized automatically
173125e8c5aaSvikram 		 */
173225e8c5aaSvikram 		ct = &ctd->cond_contract;
173325e8c5aaSvikram 		event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
173425e8c5aaSvikram 		event->cte_type = evtype;
173525e8c5aaSvikram 
173625e8c5aaSvikram 		if (broken && sync) {
173725e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: broken + sync: "
173825e8c5aaSvikram 			    "ctid: %d", ctid));
173925e8c5aaSvikram 			ASSERT(!negend);
174025e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
174125e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
174225e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
174325e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
174425e8c5aaSvikram 			if (ctd->cond_noneg) {
174525e8c5aaSvikram 				/* Nothing to publish. Event has been blocked */
174625e8c5aaSvikram 				CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
174725e8c5aaSvikram 				    "not publishing blocked ev: ctid: %d",
174825e8c5aaSvikram 				    ctid));
174925e8c5aaSvikram 				result = CT_NACK;
175025e8c5aaSvikram 				kmem_free(event, sizeof (ct_kevent_t));
175125e8c5aaSvikram 				mutex_exit(&ctd->cond_contract.ct_lock);
175225e8c5aaSvikram 				continue;
175325e8c5aaSvikram 			}
175425e8c5aaSvikram 			event->cte_flags = CTE_NEG; /* critical neg. event */
175525e8c5aaSvikram 			ctd->cond_currev_type = event->cte_type;
175625e8c5aaSvikram 			ct_barrier_incr(dip);
175725e8c5aaSvikram 			DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
175825e8c5aaSvikram 			ctd->cond_neg = 1;
175925e8c5aaSvikram 		} else if (broken && !sync) {
176025e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
176125e8c5aaSvikram 			    ctid));
176225e8c5aaSvikram 			ASSERT(!negend);
176325e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
176425e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
176525e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
176625e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
176725e8c5aaSvikram 			event->cte_flags = 0; /* critical event */
176825e8c5aaSvikram 		} else if (EVSENDP(ctd, event->cte_type)) {
176925e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
177025e8c5aaSvikram 			    ctid));
177125e8c5aaSvikram 			ASSERT(!negend);
177225e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
177325e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
177425e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
177525e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
177625e8c5aaSvikram 			event->cte_flags = EVINFOP(ctd, event->cte_type) ?
177725e8c5aaSvikram 			    CTE_INFO : 0;
177825e8c5aaSvikram 		} else if (ctd->cond_neg) {
177925e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
178025e8c5aaSvikram 			ASSERT(negend);
178125e8c5aaSvikram 			ASSERT(ctd->cond_noneg == 0);
178225e8c5aaSvikram 			nevid = ctd->cond_contract.ct_nevent ?
178325e8c5aaSvikram 			    ctd->cond_contract.ct_nevent->cte_id : 0;
178425e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == nevid);
178525e8c5aaSvikram 			event->cte_flags = 0;	/* NEGEND is always critical */
178625e8c5aaSvikram 			ctd->cond_currev_id = 0;
178725e8c5aaSvikram 			ctd->cond_currev_type = 0;
178825e8c5aaSvikram 			ctd->cond_currev_ack = 0;
178925e8c5aaSvikram 			ctd->cond_neg = 0;
179025e8c5aaSvikram 		} else {
179125e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: not publishing event for "
179225e8c5aaSvikram 			    "ctid: %d, evtype: %d",
179325e8c5aaSvikram 			    ctd->cond_contract.ct_id, event->cte_type));
179425e8c5aaSvikram 			ASSERT(!negend);
179525e8c5aaSvikram 			ASSERT(ctd->cond_currev_id == 0);
179625e8c5aaSvikram 			ASSERT(ctd->cond_currev_type == 0);
179725e8c5aaSvikram 			ASSERT(ctd->cond_currev_ack == 0);
179825e8c5aaSvikram 			ASSERT(ctd->cond_neg == 0);
179925e8c5aaSvikram 			kmem_free(event, sizeof (ct_kevent_t));
180025e8c5aaSvikram 			mutex_exit(&ctd->cond_contract.ct_lock);
180125e8c5aaSvikram 			continue;
180225e8c5aaSvikram 		}
180325e8c5aaSvikram 
180425e8c5aaSvikram 		nvl = NULL;
180525e8c5aaSvikram 		if (tnvl) {
180625e8c5aaSvikram 			VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
180725e8c5aaSvikram 			if (negend) {
180825e8c5aaSvikram 				int32_t newct = 0;
180925e8c5aaSvikram 				ASSERT(ctd->cond_noneg == 0);
181025e8c5aaSvikram 				VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
181125e8c5aaSvikram 				    == 0);
181225e8c5aaSvikram 				VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
181325e8c5aaSvikram 				    &newct) == 0);
181425e8c5aaSvikram 				VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
181525e8c5aaSvikram 				    newct == 1 ? 0 :
181625e8c5aaSvikram 				    ctd->cond_contract.ct_id) == 0);
181725e8c5aaSvikram 				CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
181825e8c5aaSvikram 				    "CTS_NEVID: %llu, CTS_NEWCT: %s",
181925e8c5aaSvikram 				    ctid, (unsigned long long)nevid,
182025e8c5aaSvikram 				    newct ? "success" : "failure"));
182125e8c5aaSvikram 
182225e8c5aaSvikram 			}
182325e8c5aaSvikram 		}
182425e8c5aaSvikram 
182525e8c5aaSvikram 		if (ctd->cond_neg) {
182625e8c5aaSvikram 			ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
182725e8c5aaSvikram 			ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
182825e8c5aaSvikram 			ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
182925e8c5aaSvikram 			ctd->cond_contract.ct_qtime.ctm_start =
183025e8c5aaSvikram 			    ctd->cond_contract.ct_ntime.ctm_start;
183125e8c5aaSvikram 		}
183225e8c5aaSvikram 
183325e8c5aaSvikram 		/*
183425e8c5aaSvikram 		 * by holding the dip's devi_ct_lock we ensure that
183525e8c5aaSvikram 		 * all ACK/NACKs are held up until we have finished
183625e8c5aaSvikram 		 * publishing to all contracts.
183725e8c5aaSvikram 		 */
183825e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
183925e8c5aaSvikram 		evid = cte_publish_all(ct, event, nvl, NULL);
184025e8c5aaSvikram 		mutex_enter(&ctd->cond_contract.ct_lock);
184125e8c5aaSvikram 
184225e8c5aaSvikram 		if (ctd->cond_neg) {
184325e8c5aaSvikram 			ASSERT(!negend);
184425e8c5aaSvikram 			ASSERT(broken);
184525e8c5aaSvikram 			ASSERT(sync);
184625e8c5aaSvikram 			ASSERT(!ctd->cond_noneg);
184725e8c5aaSvikram 			CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
184825e8c5aaSvikram 			    ": %d", ctid));
184925e8c5aaSvikram 			ctd->cond_currev_id = evid;
185025e8c5aaSvikram 		} else if (negend) {
185125e8c5aaSvikram 			ctd->cond_contract.ct_ntime.ctm_start = -1;
185225e8c5aaSvikram 			ctd->cond_contract.ct_qtime.ctm_start = -1;
185325e8c5aaSvikram 		}
185425e8c5aaSvikram 		mutex_exit(&ctd->cond_contract.ct_lock);
185525e8c5aaSvikram 	}
185625e8c5aaSvikram 
185725e8c5aaSvikram 	/*
185825e8c5aaSvikram 	 * If "negend" set counter back to initial state (-1) so that
185925e8c5aaSvikram 	 * other events can be published. Also clear the negotiation flag
186025e8c5aaSvikram 	 * on dip.
186125e8c5aaSvikram 	 *
186225e8c5aaSvikram 	 * 0 .. n are used for counting.
186325e8c5aaSvikram 	 * -1 indicates counter is available for use.
186425e8c5aaSvikram 	 */
186525e8c5aaSvikram 	if (negend) {
186625e8c5aaSvikram 		/*
186725e8c5aaSvikram 		 * devi_ct_count not necessarily 0. We may have
186825e8c5aaSvikram 		 * timed out in which case, count will be non-zero.
186925e8c5aaSvikram 		 */
187025e8c5aaSvikram 		ct_barrier_release(dip);
187125e8c5aaSvikram 		DEVI(dip)->devi_ct_neg = 0;
187225e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
187325e8c5aaSvikram 		    (void *)dip));
187425e8c5aaSvikram 	} else if (DEVI(dip)->devi_ct_neg) {
187525e8c5aaSvikram 		ASSERT(match);
187625e8c5aaSvikram 		ASSERT(!ct_barrier_empty(dip));
187725e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
187825e8c5aaSvikram 		    DEVI(dip)->devi_ct_count, (void *)dip));
187925e8c5aaSvikram 	} else {
188025e8c5aaSvikram 		/*
188125e8c5aaSvikram 		 * for non-negotiated events or subscribed events or no
188225e8c5aaSvikram 		 * matching contracts
188325e8c5aaSvikram 		 */
188425e8c5aaSvikram 		ASSERT(ct_barrier_empty(dip));
188525e8c5aaSvikram 		ASSERT(DEVI(dip)->devi_ct_neg == 0);
188625e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
188725e8c5aaSvikram 		    "dip=%p", (void *)dip));
188825e8c5aaSvikram 
188925e8c5aaSvikram 		/*
189025e8c5aaSvikram 		 * only this function when called from contract_device_negend()
189125e8c5aaSvikram 		 * can reset the counter to READY state i.e. -1. This function
189225e8c5aaSvikram 		 * is so called for every event whether a NEGEND event is needed
189325e8c5aaSvikram 		 * or not, but the negend event is only published if the event
189425e8c5aaSvikram 		 * whose end they signal is a negotiated event for the contract.
189525e8c5aaSvikram 		 */
189625e8c5aaSvikram 	}
189725e8c5aaSvikram 
189825e8c5aaSvikram 	if (!match) {
189925e8c5aaSvikram 		/* No matching contracts */
190025e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: No matching contract"));
190125e8c5aaSvikram 		result = CT_NONE;
190225e8c5aaSvikram 	} else if (result == CT_NACK) {
190325e8c5aaSvikram 		/* a non-negotiable contract exists and this is a neg. event */
190425e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
190525e8c5aaSvikram 		(void) wait_for_acks(dip, dev, spec_type, evtype);
190625e8c5aaSvikram 	} else if (DEVI(dip)->devi_ct_neg) {
190725e8c5aaSvikram 		/* one or more contracts going through negotations  */
190825e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
190925e8c5aaSvikram 		result = wait_for_acks(dip, dev, spec_type, evtype);
191025e8c5aaSvikram 	} else {
191125e8c5aaSvikram 		/* no negotiated contracts or no broken contracts or NEGEND */
191225e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
191325e8c5aaSvikram 		result = CT_ACK;
191425e8c5aaSvikram 	}
191525e8c5aaSvikram 
191625e8c5aaSvikram 	/*
191725e8c5aaSvikram 	 * Release the lock only now so that the only point where we
191825e8c5aaSvikram 	 * drop the lock is in wait_for_acks(). This is so that we don't
191925e8c5aaSvikram 	 * miss cv_signal/cv_broadcast from contract holders
192025e8c5aaSvikram 	 */
192125e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
192225e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
192325e8c5aaSvikram 
192425e8c5aaSvikram out:
192525e8c5aaSvikram 	nvlist_free(tnvl);
192625e8c5aaSvikram 	if (path)
192725e8c5aaSvikram 		kmem_free(path, MAXPATHLEN);
192825e8c5aaSvikram 
192925e8c5aaSvikram 
193025e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
193125e8c5aaSvikram 	return (result);
193225e8c5aaSvikram }
193325e8c5aaSvikram 
193425e8c5aaSvikram 
193525e8c5aaSvikram /*
193625e8c5aaSvikram  * contract_device_offline
193725e8c5aaSvikram  *
193825e8c5aaSvikram  * Event publishing routine called by I/O framework when a device is offlined.
193925e8c5aaSvikram  */
194025e8c5aaSvikram ct_ack_t
194125e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
194225e8c5aaSvikram {
194325e8c5aaSvikram 	nvlist_t *nvl;
194425e8c5aaSvikram 	uint_t result;
194525e8c5aaSvikram 	uint_t evtype;
194625e8c5aaSvikram 
194725e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
194825e8c5aaSvikram 
194925e8c5aaSvikram 	evtype = CT_DEV_EV_OFFLINE;
195025e8c5aaSvikram 	result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
195125e8c5aaSvikram 
195225e8c5aaSvikram 	/*
195325e8c5aaSvikram 	 * If a contract offline is NACKED, the framework expects us to call
195425e8c5aaSvikram 	 * NEGEND ourselves, since we know the final result
195525e8c5aaSvikram 	 */
195625e8c5aaSvikram 	if (result == CT_NACK) {
195725e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
195825e8c5aaSvikram 	}
195925e8c5aaSvikram 
196025e8c5aaSvikram 	return (result);
196125e8c5aaSvikram }
196225e8c5aaSvikram 
196325e8c5aaSvikram /*
196425e8c5aaSvikram  * contract_device_degrade
196525e8c5aaSvikram  *
196625e8c5aaSvikram  * Event publishing routine called by I/O framework when a device
196725e8c5aaSvikram  * moves to degrade state.
196825e8c5aaSvikram  */
196925e8c5aaSvikram /*ARGSUSED*/
197025e8c5aaSvikram void
197125e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
197225e8c5aaSvikram {
197325e8c5aaSvikram 	nvlist_t *nvl;
197425e8c5aaSvikram 	uint_t evtype;
197525e8c5aaSvikram 
197625e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
197725e8c5aaSvikram 
197825e8c5aaSvikram 	evtype = CT_DEV_EV_DEGRADED;
197925e8c5aaSvikram 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
198025e8c5aaSvikram }
198125e8c5aaSvikram 
198225e8c5aaSvikram /*
198325e8c5aaSvikram  * contract_device_undegrade
198425e8c5aaSvikram  *
198525e8c5aaSvikram  * Event publishing routine called by I/O framework when a device
198625e8c5aaSvikram  * moves from degraded state to online state.
198725e8c5aaSvikram  */
198825e8c5aaSvikram /*ARGSUSED*/
198925e8c5aaSvikram void
199025e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
199125e8c5aaSvikram {
199225e8c5aaSvikram 	nvlist_t *nvl;
199325e8c5aaSvikram 	uint_t evtype;
199425e8c5aaSvikram 
199525e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
199625e8c5aaSvikram 
199725e8c5aaSvikram 	evtype = CT_DEV_EV_ONLINE;
199825e8c5aaSvikram 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
199925e8c5aaSvikram }
200025e8c5aaSvikram 
200125e8c5aaSvikram /*
200225e8c5aaSvikram  * For all contracts which have undergone a negotiation (because the device
200325e8c5aaSvikram  * moved out of the acceptable state for that contract and the state
200425e8c5aaSvikram  * change is synchronous i.e. requires negotiation) this routine publishes
200525e8c5aaSvikram  * a CT_EV_NEGEND event with the final disposition of the event.
200625e8c5aaSvikram  *
200725e8c5aaSvikram  * This event is always a critical event.
200825e8c5aaSvikram  */
200925e8c5aaSvikram void
201025e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
201125e8c5aaSvikram {
201225e8c5aaSvikram 	nvlist_t *nvl;
201325e8c5aaSvikram 	uint_t evtype;
201425e8c5aaSvikram 
201525e8c5aaSvikram 	ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
201625e8c5aaSvikram 
201725e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
201825e8c5aaSvikram 	    "dip: %p", result, (void *)dip));
201925e8c5aaSvikram 
202025e8c5aaSvikram 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
202125e8c5aaSvikram 	VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
202225e8c5aaSvikram 	    result == CT_EV_SUCCESS ? 1 : 0) == 0);
202325e8c5aaSvikram 
202425e8c5aaSvikram 	evtype = CT_EV_NEGEND;
202525e8c5aaSvikram 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
202625e8c5aaSvikram 
202725e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
202825e8c5aaSvikram 	    (void *)dip));
202925e8c5aaSvikram }
203025e8c5aaSvikram 
203125e8c5aaSvikram /*
203225e8c5aaSvikram  * Wrapper routine called by other subsystems (such as LDI) to start
203325e8c5aaSvikram  * negotiations when a synchronous device state change occurs.
203425e8c5aaSvikram  * Returns CT_ACK or CT_NACK.
203525e8c5aaSvikram  */
203625e8c5aaSvikram ct_ack_t
203725e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
203825e8c5aaSvikram     uint_t evtype)
203925e8c5aaSvikram {
204025e8c5aaSvikram 	int	result;
204125e8c5aaSvikram 
204225e8c5aaSvikram 	ASSERT(dip);
204325e8c5aaSvikram 	ASSERT(dev != NODEV);
204425e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_ANY);
204525e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_NONE);
204625e8c5aaSvikram 	ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
204725e8c5aaSvikram 
204825e8c5aaSvikram 	switch (evtype) {
204925e8c5aaSvikram 	case CT_DEV_EV_OFFLINE:
205025e8c5aaSvikram 		result = contract_device_offline(dip, dev, spec_type);
205125e8c5aaSvikram 		break;
205225e8c5aaSvikram 	default:
205325e8c5aaSvikram 		cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
205425e8c5aaSvikram 		    "not supported: event (%d) for dev_t (%lu) and spec (%d), "
205525e8c5aaSvikram 		    "dip (%p)", evtype, dev, spec_type, (void *)dip);
205625e8c5aaSvikram 		result = CT_NACK;
205725e8c5aaSvikram 		break;
205825e8c5aaSvikram 	}
205925e8c5aaSvikram 
206025e8c5aaSvikram 	return (result);
206125e8c5aaSvikram }
206225e8c5aaSvikram 
206325e8c5aaSvikram /*
206425e8c5aaSvikram  * A wrapper routine called by other subsystems (such as the LDI) to
206525e8c5aaSvikram  * finalize event processing for a state change event. For synchronous
206625e8c5aaSvikram  * state changes, this publishes NEGEND events. For asynchronous i.e.
206725e8c5aaSvikram  * non-negotiable events this publishes the event.
206825e8c5aaSvikram  */
206925e8c5aaSvikram void
207025e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
207125e8c5aaSvikram     uint_t evtype, int ct_result)
207225e8c5aaSvikram {
207325e8c5aaSvikram 	ASSERT(dip);
207425e8c5aaSvikram 	ASSERT(dev != NODEV);
207525e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_ANY);
207625e8c5aaSvikram 	ASSERT(dev != DDI_DEV_T_NONE);
207725e8c5aaSvikram 	ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
207825e8c5aaSvikram 
207925e8c5aaSvikram 	switch (evtype) {
208025e8c5aaSvikram 	case CT_DEV_EV_OFFLINE:
208125e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, ct_result);
208225e8c5aaSvikram 		break;
208325e8c5aaSvikram 	case CT_DEV_EV_DEGRADED:
208425e8c5aaSvikram 		contract_device_degrade(dip, dev, spec_type);
208525e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, ct_result);
208625e8c5aaSvikram 		break;
208725e8c5aaSvikram 	case CT_DEV_EV_ONLINE:
208825e8c5aaSvikram 		contract_device_undegrade(dip, dev, spec_type);
208925e8c5aaSvikram 		contract_device_negend(dip, dev, spec_type, ct_result);
209025e8c5aaSvikram 		break;
209125e8c5aaSvikram 	default:
209225e8c5aaSvikram 		cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
209325e8c5aaSvikram 		    "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
209425e8c5aaSvikram 		    evtype, dev, spec_type, (void *)dip);
209525e8c5aaSvikram 		break;
209625e8c5aaSvikram 	}
209725e8c5aaSvikram }
209825e8c5aaSvikram 
209925e8c5aaSvikram /*
210025e8c5aaSvikram  * Called by I/O framework when a devinfo node is freed to remove the
210125e8c5aaSvikram  * association between a devinfo node and its contracts.
210225e8c5aaSvikram  */
210325e8c5aaSvikram void
210425e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip)
210525e8c5aaSvikram {
210625e8c5aaSvikram 	cont_device_t *ctd;
210725e8c5aaSvikram 	cont_device_t *next;
210825e8c5aaSvikram 	contract_t *ct;
210925e8c5aaSvikram 
211025e8c5aaSvikram 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
211125e8c5aaSvikram 	ct_barrier_wait_for_release(dip);
211225e8c5aaSvikram 
211325e8c5aaSvikram 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
211425e8c5aaSvikram 		next = list_next(&(DEVI(dip)->devi_ct), ctd);
211525e8c5aaSvikram 		list_remove(&(DEVI(dip)->devi_ct), ctd);
211625e8c5aaSvikram 		ct = &ctd->cond_contract;
211725e8c5aaSvikram 		/*
211825e8c5aaSvikram 		 * Unlink the dip associated with this contract
211925e8c5aaSvikram 		 */
212025e8c5aaSvikram 		mutex_enter(&ct->ct_lock);
212125e8c5aaSvikram 		ASSERT(ctd->cond_dip == dip);
212225e8c5aaSvikram 		ctd->cond_dip = NULL; /* no longer linked to dip */
212325e8c5aaSvikram 		contract_rele(ct);	/* remove hold for dip linkage */
212425e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
212525e8c5aaSvikram 		    "ctid: %d", ct->ct_id));
212625e8c5aaSvikram 		mutex_exit(&ct->ct_lock);
212725e8c5aaSvikram 	}
212825e8c5aaSvikram 	ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
212925e8c5aaSvikram 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
213025e8c5aaSvikram }
213125e8c5aaSvikram 
213225e8c5aaSvikram /*
213325e8c5aaSvikram  * Barrier related routines
213425e8c5aaSvikram  */
213525e8c5aaSvikram static void
213625e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip)
213725e8c5aaSvikram {
213825e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
213925e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
214025e8c5aaSvikram 	while (DEVI(dip)->devi_ct_count != -1)
214125e8c5aaSvikram 		cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
214225e8c5aaSvikram 	DEVI(dip)->devi_ct_count = 0;
214325e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
214425e8c5aaSvikram }
214525e8c5aaSvikram 
214625e8c5aaSvikram static void
214725e8c5aaSvikram ct_barrier_release(dev_info_t *dip)
214825e8c5aaSvikram {
214925e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
215025e8c5aaSvikram 	ASSERT(DEVI(dip)->devi_ct_count != -1);
215125e8c5aaSvikram 	DEVI(dip)->devi_ct_count = -1;
215225e8c5aaSvikram 	cv_broadcast(&(DEVI(dip)->devi_ct_cv));
215325e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
215425e8c5aaSvikram }
215525e8c5aaSvikram 
215625e8c5aaSvikram static int
215725e8c5aaSvikram ct_barrier_held(dev_info_t *dip)
215825e8c5aaSvikram {
215925e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
216025e8c5aaSvikram 	return (DEVI(dip)->devi_ct_count != -1);
216125e8c5aaSvikram }
216225e8c5aaSvikram 
216325e8c5aaSvikram static int
216425e8c5aaSvikram ct_barrier_empty(dev_info_t *dip)
216525e8c5aaSvikram {
216625e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
216725e8c5aaSvikram 	ASSERT(DEVI(dip)->devi_ct_count != -1);
216825e8c5aaSvikram 	return (DEVI(dip)->devi_ct_count == 0);
216925e8c5aaSvikram }
217025e8c5aaSvikram 
217125e8c5aaSvikram static void
217225e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip)
217325e8c5aaSvikram {
217425e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
217525e8c5aaSvikram 	while (DEVI(dip)->devi_ct_count != -1)
217625e8c5aaSvikram 		cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
217725e8c5aaSvikram }
217825e8c5aaSvikram 
217925e8c5aaSvikram static void
218025e8c5aaSvikram ct_barrier_decr(dev_info_t *dip)
218125e8c5aaSvikram {
218225e8c5aaSvikram 	CT_DEBUG((CE_NOTE, "barrier_decr:  ct_count before decr: %d",
218325e8c5aaSvikram 	    DEVI(dip)->devi_ct_count));
218425e8c5aaSvikram 
218525e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
218625e8c5aaSvikram 	ASSERT(DEVI(dip)->devi_ct_count > 0);
218725e8c5aaSvikram 
218825e8c5aaSvikram 	DEVI(dip)->devi_ct_count--;
218925e8c5aaSvikram 	if (DEVI(dip)->devi_ct_count == 0) {
219025e8c5aaSvikram 		cv_broadcast(&DEVI(dip)->devi_ct_cv);
219125e8c5aaSvikram 		CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
219225e8c5aaSvikram 	}
219325e8c5aaSvikram }
219425e8c5aaSvikram 
219525e8c5aaSvikram static void
219625e8c5aaSvikram ct_barrier_incr(dev_info_t *dip)
219725e8c5aaSvikram {
219825e8c5aaSvikram 	ASSERT(ct_barrier_held(dip));
219925e8c5aaSvikram 	DEVI(dip)->devi_ct_count++;
220025e8c5aaSvikram }
220125e8c5aaSvikram 
220225e8c5aaSvikram static int
220325e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
220425e8c5aaSvikram {
220525e8c5aaSvikram 	clock_t abstime;
220625e8c5aaSvikram 
220725e8c5aaSvikram 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
220825e8c5aaSvikram 
220925e8c5aaSvikram 	abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
221025e8c5aaSvikram 	while (DEVI(dip)->devi_ct_count) {
221125e8c5aaSvikram 		if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
221225e8c5aaSvikram 		    &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
221325e8c5aaSvikram 			return (-1);
221425e8c5aaSvikram 		}
221525e8c5aaSvikram 	}
221625e8c5aaSvikram 	return (0);
221725e8c5aaSvikram }
2218