125e8c5aaSvikram /* 225e8c5aaSvikram * CDDL HEADER START 325e8c5aaSvikram * 425e8c5aaSvikram * The contents of this file are subject to the terms of the 525e8c5aaSvikram * Common Development and Distribution License (the "License"). 625e8c5aaSvikram * You may not use this file except in compliance with the License. 725e8c5aaSvikram * 825e8c5aaSvikram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925e8c5aaSvikram * or http://www.opensolaris.org/os/licensing. 1025e8c5aaSvikram * See the License for the specific language governing permissions 1125e8c5aaSvikram * and limitations under the License. 1225e8c5aaSvikram * 1325e8c5aaSvikram * When distributing Covered Code, include this CDDL HEADER in each 1425e8c5aaSvikram * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525e8c5aaSvikram * If applicable, add the following below this CDDL HEADER, with the 1625e8c5aaSvikram * fields enclosed by brackets "[]" replaced with your own identifying 1725e8c5aaSvikram * information: Portions Copyright [yyyy] [name of copyright owner] 1825e8c5aaSvikram * 1925e8c5aaSvikram * CDDL HEADER END 2025e8c5aaSvikram */ 2125e8c5aaSvikram /* 227b209c2cSacruz * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2325e8c5aaSvikram * Use is subject to license terms. 2425e8c5aaSvikram */ 2525e8c5aaSvikram 2625e8c5aaSvikram #pragma ident "%Z%%M% %I% %E% SMI" 2725e8c5aaSvikram 2825e8c5aaSvikram #include <sys/mutex.h> 2925e8c5aaSvikram #include <sys/debug.h> 3025e8c5aaSvikram #include <sys/types.h> 3125e8c5aaSvikram #include <sys/param.h> 3225e8c5aaSvikram #include <sys/kmem.h> 3325e8c5aaSvikram #include <sys/thread.h> 3425e8c5aaSvikram #include <sys/id_space.h> 3525e8c5aaSvikram #include <sys/avl.h> 3625e8c5aaSvikram #include <sys/list.h> 3725e8c5aaSvikram #include <sys/sysmacros.h> 3825e8c5aaSvikram #include <sys/proc.h> 3925e8c5aaSvikram #include <sys/contract.h> 4025e8c5aaSvikram #include <sys/contract_impl.h> 4125e8c5aaSvikram #include <sys/contract/device.h> 4225e8c5aaSvikram #include <sys/contract/device_impl.h> 4325e8c5aaSvikram #include <sys/cmn_err.h> 4425e8c5aaSvikram #include <sys/nvpair.h> 4525e8c5aaSvikram #include <sys/policy.h> 4625e8c5aaSvikram #include <sys/ddi_impldefs.h> 4725e8c5aaSvikram #include <sys/ddi_implfuncs.h> 4825e8c5aaSvikram #include <sys/systm.h> 4925e8c5aaSvikram #include <sys/stat.h> 5025e8c5aaSvikram #include <sys/sunddi.h> 5125e8c5aaSvikram #include <sys/esunddi.h> 5225e8c5aaSvikram #include <sys/ddi.h> 5325e8c5aaSvikram #include <sys/fs/dv_node.h> 5425e8c5aaSvikram #include <sys/sunndi.h> 5525e8c5aaSvikram #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 5625e8c5aaSvikram 5725e8c5aaSvikram /* 5825e8c5aaSvikram * Device Contracts 5925e8c5aaSvikram * ----------------- 6025e8c5aaSvikram * This file contains the core code for the device contracts framework. 6125e8c5aaSvikram * A device contract is an agreement or a contract between a process and 6225e8c5aaSvikram * the kernel regarding the state of the device. A device contract may be 6325e8c5aaSvikram * created when a relationship is formed between a device and a process 6425e8c5aaSvikram * i.e. at open(2) time, or it may be created at some point after the device 6525e8c5aaSvikram * has been opened. A device contract once formed may be broken by either party. 6625e8c5aaSvikram * A device contract can be broken by the process by an explicit abandon of the 6725e8c5aaSvikram * contract or by an implicit abandon when the process exits. A device contract 6825e8c5aaSvikram * can be broken by the kernel either asynchronously (without negotiation) or 6925e8c5aaSvikram * synchronously (with negotiation). Exactly which happens depends on the device 7025e8c5aaSvikram * state transition. The following state diagram shows the transitions between 7125e8c5aaSvikram * device states. Only device state transitions currently supported by device 7225e8c5aaSvikram * contracts is shown. 7325e8c5aaSvikram * 7425e8c5aaSvikram * <-- A --> 7525e8c5aaSvikram * /-----------------> DEGRADED 7625e8c5aaSvikram * | | 7725e8c5aaSvikram * | | 7825e8c5aaSvikram * | | S 7925e8c5aaSvikram * | | | 8025e8c5aaSvikram * | | v 8125e8c5aaSvikram * v S --> v 8225e8c5aaSvikram * ONLINE ------------> OFFLINE 8325e8c5aaSvikram * 8425e8c5aaSvikram * 8525e8c5aaSvikram * In the figure above, the arrows indicate the direction of transition. The 8625e8c5aaSvikram * letter S refers to transitions which are inherently synchronous i.e. 8725e8c5aaSvikram * require negotiation and the letter A indicates transitions which are 8825e8c5aaSvikram * asynchronous i.e. are done without contract negotiations. A good example 8925e8c5aaSvikram * of a synchronous transition is the ONLINE -> OFFLINE transition. This 9025e8c5aaSvikram * transition cannot happen as long as there are consumers which have the 9125e8c5aaSvikram * device open. Thus some form of negotiation needs to happen between the 9225e8c5aaSvikram * consumers and the kernel to ensure that consumers either close devices 9325e8c5aaSvikram * or disallow the move to OFFLINE. Certain other transitions such as 9425e8c5aaSvikram * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 9525e8c5aaSvikram * non-negotiable. A device that suffers a fault that degrades its 9625e8c5aaSvikram * capabilities will become degraded irrespective of what consumers it has, 9725e8c5aaSvikram * so a negotiation in this case is pointless. 9825e8c5aaSvikram * 9925e8c5aaSvikram * The following device states are currently defined for device contracts: 10025e8c5aaSvikram * 10125e8c5aaSvikram * CT_DEV_EV_ONLINE 10225e8c5aaSvikram * The device is online and functioning normally 10325e8c5aaSvikram * CT_DEV_EV_DEGRADED 10425e8c5aaSvikram * The device is online but is functioning in a degraded capacity 10525e8c5aaSvikram * CT_DEV_EV_OFFLINE 10625e8c5aaSvikram * The device is offline and is no longer configured 10725e8c5aaSvikram * 10825e8c5aaSvikram * A typical consumer of device contracts starts out with a contract 10925e8c5aaSvikram * template and adds terms to that template. These include the 11025e8c5aaSvikram * "acceptable set" (A-set) term, which is a bitset of device states which 11125e8c5aaSvikram * are guaranteed by the contract. If the device moves out of a state in 11225e8c5aaSvikram * the A-set, the contract is broken. The breaking of the contract can 11325e8c5aaSvikram * be asynchronous in which case a critical contract event is sent to the 11425e8c5aaSvikram * contract holder but no negotiations take place. If the breaking of the 11525e8c5aaSvikram * contract is synchronous, negotations are opened between the affected 11625e8c5aaSvikram * consumer and the kernel. The kernel does this by sending a critical 11725e8c5aaSvikram * event to the consumer with the CTE_NEG flag set indicating that this 11825e8c5aaSvikram * is a negotiation event. The consumer can accept this change by sending 11925e8c5aaSvikram * a ACK message to the kernel. Alternatively, if it has the necessary 12025e8c5aaSvikram * privileges, it can send a NACK message to the kernel which will block 12125e8c5aaSvikram * the device state change. To NACK a negotiable event, a process must 12225e8c5aaSvikram * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 12325e8c5aaSvikram * 12425e8c5aaSvikram * Other terms include the "minor path" term, specified explicitly if the 12525e8c5aaSvikram * contract is not being created at open(2) time or specified implicitly 12625e8c5aaSvikram * if the contract is being created at open time via an activated template. 12725e8c5aaSvikram * 12825e8c5aaSvikram * A contract event is sent on any state change to which the contract 12925e8c5aaSvikram * owner has subscribed via the informative or critical event sets. Only 13025e8c5aaSvikram * critical events are guaranteed to be delivered. Since all device state 13125e8c5aaSvikram * changes are controlled by the kernel and cannot be arbitrarily generated 13225e8c5aaSvikram * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 13325e8c5aaSvikram * need to be asserted in a process's effective set to designate an event as 13425e8c5aaSvikram * critical. To ensure privacy, a process must either have the same effective 13525e8c5aaSvikram * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 13625e8c5aaSvikram * asserted in its effective set in order to observe device contract events 13725e8c5aaSvikram * off the device contract type specific endpoint. 13825e8c5aaSvikram * 13925e8c5aaSvikram * Yet another term available with device contracts is the "non-negotiable" 14025e8c5aaSvikram * term. This term is used to pre-specify a NACK to any contract negotiation. 14125e8c5aaSvikram * This term is ignored for asynchronous state changes. For example, a 14225e8c5aaSvikram * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 14325e8c5aaSvikram * non-negotiable. In this case, the device contract framework assumes a 14425e8c5aaSvikram * NACK for any transition to OFFLINE and blocks the offline. If the A-set 14525e8c5aaSvikram * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 14625e8c5aaSvikram * are NACKed but transitions to DEGRADE succeed. 14725e8c5aaSvikram * 14825e8c5aaSvikram * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 14925e8c5aaSvikram * happens just before the I/O framework attempts to offline a device 15025e8c5aaSvikram * (i.e. detach a device and set the offline flag so that it cannot be 15125e8c5aaSvikram * reattached). A device contract holder is expected to either NACK the offline 15225e8c5aaSvikram * (if privileged) or release the device and allow the offline to proceed. 15325e8c5aaSvikram * 15425e8c5aaSvikram * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 15525e8c5aaSvikram * is generated just before the I/O framework transitions the device state 15625e8c5aaSvikram * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 15725e8c5aaSvikram * 15825e8c5aaSvikram * The contract holder is expected to ACK or NACK a negotiation event 15925e8c5aaSvikram * within a certain period of time. If the ACK/NACK is not received 16025e8c5aaSvikram * within the timeout period, the device contract framework will behave 16125e8c5aaSvikram * as if the contract does not exist and will proceed with the event. 16225e8c5aaSvikram * 16325e8c5aaSvikram * Unlike a process contract a device contract does not need to exist 16425e8c5aaSvikram * once it is abandoned, since it does not define a fault boundary. It 16525e8c5aaSvikram * merely represents an agreement between a process and the kernel 16625e8c5aaSvikram * regarding the state of the device. Once the process has abandoned 16725e8c5aaSvikram * the contract (either implicitly via a process exit or explicitly) 16825e8c5aaSvikram * the kernel has no reason to retain the contract. As a result 16925e8c5aaSvikram * device contracts are neither inheritable nor need to exist in an 17025e8c5aaSvikram * orphan state. 17125e8c5aaSvikram * 17225e8c5aaSvikram * A device unlike a process may exist in multiple contracts and has 17325e8c5aaSvikram * a "life" outside a device contract. A device unlike a process 17425e8c5aaSvikram * may exist without an associated contract. Unlike a process contract 17525e8c5aaSvikram * a device contract may be formed after a binding relationship is 17625e8c5aaSvikram * formed between a process and a device. 17725e8c5aaSvikram * 17825e8c5aaSvikram * IMPLEMENTATION NOTES 17925e8c5aaSvikram * ==================== 18025e8c5aaSvikram * DATA STRUCTURES 18125e8c5aaSvikram * ---------------- 18225e8c5aaSvikram * The heart of the device contracts implementation is the device contract 18325e8c5aaSvikram * private cont_device_t (or ctd for short) data structure. It encapsulates 18425e8c5aaSvikram * the generic contract_t data structure and has a number of private 18525e8c5aaSvikram * fields. 18625e8c5aaSvikram * These include: 18725e8c5aaSvikram * cond_minor: The minor device that is the subject of the contract 18825e8c5aaSvikram * cond_aset: The bitset of states which are guaranteed by the 18925e8c5aaSvikram * contract 19025e8c5aaSvikram * cond_noneg: If set, indicates that the result of negotiation has 19125e8c5aaSvikram * been predefined to be a NACK 19225e8c5aaSvikram * In addition, there are other device identifiers such the devinfo node, 19325e8c5aaSvikram * dev_t and spec_type of the minor node. There are also a few fields that 19425e8c5aaSvikram * are used during negotiation to maintain state. See 19525e8c5aaSvikram * uts/common/sys/contract/device_impl.h 19625e8c5aaSvikram * for details. 19725e8c5aaSvikram * The ctd structure represents the device private part of a contract of 19825e8c5aaSvikram * type "device" 19925e8c5aaSvikram * 20025e8c5aaSvikram * Another data structure used by device contracts is ctmpl_device. It is 20125e8c5aaSvikram * the device contracts private part of the contract template structure. It 20225e8c5aaSvikram * encapsulates the generic template structure "ct_template_t" and includes 20325e8c5aaSvikram * the following device contract specific fields 20425e8c5aaSvikram * ctd_aset: The bitset of states that should be guaranteed by a 20525e8c5aaSvikram * contract 20625e8c5aaSvikram * ctd_noneg: If set, indicates that contract should NACK a 20725e8c5aaSvikram * negotiation 20825e8c5aaSvikram * ctd_minor: The devfs_path (without the /devices prefix) of the 20925e8c5aaSvikram * minor node that is the subject of the contract. 21025e8c5aaSvikram * 21125e8c5aaSvikram * ALGORITHMS 21225e8c5aaSvikram * --------- 21325e8c5aaSvikram * There are three sets of routines in this file 21425e8c5aaSvikram * Template related routines 21525e8c5aaSvikram * ------------------------- 21625e8c5aaSvikram * These routines provide support for template related operations initated 21725e8c5aaSvikram * via the generic template operations. These include routines that dup 21825e8c5aaSvikram * a template, free it, and set various terms in the template 21925e8c5aaSvikram * (such as the minor node path, the acceptable state set (or A-set) 22025e8c5aaSvikram * and the non-negotiable term) as well as a routine to query the 22125e8c5aaSvikram * device specific portion of the template for the abovementioned terms. 22225e8c5aaSvikram * There is also a routine to create (ctmpl_device_create) that is used to 22325e8c5aaSvikram * create a contract from a template. This routine calls (after initial 22425e8c5aaSvikram * setup) the common function used to create a device contract 22525e8c5aaSvikram * (contract_device_create). 22625e8c5aaSvikram * 22725e8c5aaSvikram * core device contract implementation 22825e8c5aaSvikram * ---------------------------------- 22925e8c5aaSvikram * These routines support the generic contract framework to provide 23025e8c5aaSvikram * functionality that allows contracts to be created, managed and 23125e8c5aaSvikram * destroyed. The contract_device_create() routine is a routine used 23225e8c5aaSvikram * to create a contract from a template (either via an explicit create 23325e8c5aaSvikram * operation on a template or implicitly via an open with an 23425e8c5aaSvikram * activated template.). The contract_device_free() routine assists 23525e8c5aaSvikram * in freeing the device contract specific parts. There are routines 23625e8c5aaSvikram * used to abandon (contract_device_abandon) a device contract as well 23725e8c5aaSvikram * as a routine to destroy (which despite its name does not destroy, 23825e8c5aaSvikram * it only moves a contract to a dead state) a contract. 23925e8c5aaSvikram * There is also a routine to return status information about a 24025e8c5aaSvikram * contract - the level of detail depends on what is requested by the 24125e8c5aaSvikram * user. A value of CTD_FIXED only returns fixed length fields such 24225e8c5aaSvikram * as the A-set, state of device and value of the "noneg" term. If 24325e8c5aaSvikram * CTD_ALL is specified, the minor node path is returned as well. 24425e8c5aaSvikram * 24525e8c5aaSvikram * In addition there are interfaces (contract_device_ack/nack) which 24625e8c5aaSvikram * are used to support negotiation between userland processes and 24725e8c5aaSvikram * device contracts. These interfaces record the acknowledgement 24825e8c5aaSvikram * or lack thereof for negotiation events and help determine if the 24925e8c5aaSvikram * negotiated event should occur. 25025e8c5aaSvikram * 25125e8c5aaSvikram * "backend routines" 25225e8c5aaSvikram * ----------------- 25325e8c5aaSvikram * The backend routines form the interface between the I/O framework 25425e8c5aaSvikram * and the device contract subsystem. These routines, allow the I/O 25525e8c5aaSvikram * framework to call into the device contract subsystem to notify it of 25625e8c5aaSvikram * impending changes to a device state as well as to inform of the 25725e8c5aaSvikram * final disposition of such attempted state changes. Routines in this 25825e8c5aaSvikram * class include contract_device_offline() that indicates an attempt to 25925e8c5aaSvikram * offline a device, contract_device_degrade() that indicates that 26025e8c5aaSvikram * a device is moving to the degraded state and contract_device_negend() 26125e8c5aaSvikram * that is used by the I/O framework to inform the contracts subsystem of 26225e8c5aaSvikram * the final disposition of an attempted operation. 26325e8c5aaSvikram * 26425e8c5aaSvikram * SUMMARY 26525e8c5aaSvikram * ------- 26625e8c5aaSvikram * A contract starts its life as a template. A process allocates a device 26725e8c5aaSvikram * contract template and sets various terms: 26825e8c5aaSvikram * The A-set 26925e8c5aaSvikram * The device minor node 27025e8c5aaSvikram * Critical and informative events 27125e8c5aaSvikram * The noneg i.e. no negotition term 27225e8c5aaSvikram * Setting of these terms in the template is done via the 27325e8c5aaSvikram * ctmpl_device_set() entry point in this file. A process can query a 27425e8c5aaSvikram * template to determine the terms already set in the template - this is 27525e8c5aaSvikram * facilitated by the ctmpl_device_get() routine. 27625e8c5aaSvikram * 27725e8c5aaSvikram * Once all the appropriate terms are set, the contract is instantiated via 27825e8c5aaSvikram * one of two methods 27925e8c5aaSvikram * - via an explicit create operation - this is facilitated by the 28025e8c5aaSvikram * ctmpl_device_create() entry point 28125e8c5aaSvikram * - synchronously with the open(2) system call - this is achieved via the 28225e8c5aaSvikram * contract_device_open() routine. 28325e8c5aaSvikram * The core work for both these above functions is done by 28425e8c5aaSvikram * contract_device_create() 28525e8c5aaSvikram * 28625e8c5aaSvikram * A contract once created can be queried for its status. Support for 28725e8c5aaSvikram * status info is provided by both the common contracts framework and by 28825e8c5aaSvikram * the "device" contract type. If the level of detail requested is 28925e8c5aaSvikram * CTD_COMMON, only the common contract framework data is used. Higher 29025e8c5aaSvikram * levels of detail result in calls to contract_device_status() to supply 29125e8c5aaSvikram * device contract type specific status information. 29225e8c5aaSvikram * 29325e8c5aaSvikram * A contract once created may be abandoned either explicitly or implictly. 29425e8c5aaSvikram * In either case, the contract_device_abandon() function is invoked. This 29525e8c5aaSvikram * function merely calls contract_destroy() which moves the contract to 29625e8c5aaSvikram * the DEAD state. The device contract portion of destroy processing is 29725e8c5aaSvikram * provided by contract_device_destroy() which merely disassociates the 29825e8c5aaSvikram * contract from its device devinfo node. A contract in the DEAD state is 29925e8c5aaSvikram * not freed. It hanbgs around until all references to the contract are 30025e8c5aaSvikram * gone. When that happens, the contract is finally deallocated. The 30125e8c5aaSvikram * device contract specific portion of the free is done by 30225e8c5aaSvikram * contract_device_free() which finally frees the device contract specific 30325e8c5aaSvikram * data structure (cont_device_t). 30425e8c5aaSvikram * 30525e8c5aaSvikram * When a device undergoes a state change, the I/O framework calls the 30625e8c5aaSvikram * corresponding device contract entry point. For example, when a device 30725e8c5aaSvikram * is about to go OFFLINE, the routine contract_device_offline() is 30825e8c5aaSvikram * invoked. Similarly if a device moves to DEGRADED state, the routine 30925e8c5aaSvikram * contract_device_degrade() function is called. These functions call the 31025e8c5aaSvikram * core routine contract_device_publish(). This function determines via 31125e8c5aaSvikram * the function is_sync_neg() whether an event is a synchronous (i.e. 31225e8c5aaSvikram * negotiable) event or not. In the former case contract_device_publish() 31325e8c5aaSvikram * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 31425e8c5aaSvikram * and/or NACKs from contract holders. In the latter case, it simply 31525e8c5aaSvikram * publishes the event and does not wait. In the negotiation case, ACKs or 31625e8c5aaSvikram * NACKs from userland consumers results in contract_device_ack_nack() 31725e8c5aaSvikram * being called where the result of the negotiation is recorded in the 31825e8c5aaSvikram * contract data structure. Once all outstanding contract owners have 31925e8c5aaSvikram * responded, the device contract code in wait_for_acks() determines the 32025e8c5aaSvikram * final result of the negotiation. A single NACK overrides all other ACKs 32125e8c5aaSvikram * If there is no NACK, then a single ACK will result in an overall ACK 32225e8c5aaSvikram * result. If there are no ACKs or NACKs, then the result CT_NONE is 32325e8c5aaSvikram * returned back to the I/O framework. Once the event is permitted or 32425e8c5aaSvikram * blocked, the I/O framework proceeds or aborts the state change. The 32525e8c5aaSvikram * I/O framework then calls contract_device_negend() with a result code 32625e8c5aaSvikram * indicating final disposition of the event. This call releases the 32725e8c5aaSvikram * barrier and other state associated with the previous negotiation, 32825e8c5aaSvikram * which permits the next event (if any) to come into the device contract 32925e8c5aaSvikram * framework. 33025e8c5aaSvikram * 33125e8c5aaSvikram * Finally, a device that has outstanding contracts may be removed from 33225e8c5aaSvikram * the system which results in its devinfo node being freed. The devinfo 33325e8c5aaSvikram * free routine in the I/O framework, calls into the device contract 33425e8c5aaSvikram * function - contract_device_remove_dip(). This routine, disassociates 33525e8c5aaSvikram * the dip from all contracts associated with the contract being freed, 33625e8c5aaSvikram * allowing the devinfo node to be freed. 33725e8c5aaSvikram * 33825e8c5aaSvikram * LOCKING 33925e8c5aaSvikram * --------- 34025e8c5aaSvikram * There are four sets of data that need to be protected by locks 34125e8c5aaSvikram * 34225e8c5aaSvikram * i) device contract specific portion of the contract template - This data 34325e8c5aaSvikram * is protected by the template lock ctmpl_lock. 34425e8c5aaSvikram * 34525e8c5aaSvikram * ii) device contract specific portion of the contract - This data is 34625e8c5aaSvikram * protected by the contract lock ct_lock 34725e8c5aaSvikram * 34825e8c5aaSvikram * iii) The linked list of contracts hanging off a devinfo node - This 34925e8c5aaSvikram * list is protected by the per-devinfo node lock devi_ct_lock 35025e8c5aaSvikram * 35125e8c5aaSvikram * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 35225e8c5aaSvikram * and devi_ct_count that controls state changes to a dip 35325e8c5aaSvikram * 35425e8c5aaSvikram * The template lock is independent in that none of the other locks in this 35525e8c5aaSvikram * file may be taken while holding the template lock (and vice versa). 35625e8c5aaSvikram * 35725e8c5aaSvikram * The remaining three locks have the following lock order 35825e8c5aaSvikram * 35925e8c5aaSvikram * devi_ct_lock -> ct_count barrier -> ct_lock 36025e8c5aaSvikram * 36125e8c5aaSvikram */ 36225e8c5aaSvikram 36325e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 36425e8c5aaSvikram int spec_type, proc_t *owner, int *errorp); 36525e8c5aaSvikram 36625e8c5aaSvikram /* barrier routines */ 36725e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip); 36825e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip); 36925e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip); 37025e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip); 37125e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip); 37225e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 37325e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip); 37425e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip); 37525e8c5aaSvikram 37625e8c5aaSvikram ct_type_t *device_type; 37725e8c5aaSvikram 37825e8c5aaSvikram /* 37925e8c5aaSvikram * Macro predicates for determining when events should be sent and how. 38025e8c5aaSvikram */ 38125e8c5aaSvikram #define EVSENDP(ctd, flag) \ 38225e8c5aaSvikram ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 38325e8c5aaSvikram 38425e8c5aaSvikram #define EVINFOP(ctd, flag) \ 38525e8c5aaSvikram ((ctd->cond_contract.ct_ev_crit & flag) == 0) 38625e8c5aaSvikram 38725e8c5aaSvikram /* 38825e8c5aaSvikram * State transition table showing which transitions are synchronous and which 38925e8c5aaSvikram * are not. 39025e8c5aaSvikram */ 39125e8c5aaSvikram struct ct_dev_negtable { 39225e8c5aaSvikram uint_t st_old; 39325e8c5aaSvikram uint_t st_new; 39425e8c5aaSvikram uint_t st_neg; 39525e8c5aaSvikram } ct_dev_negtable[] = { 39625e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 39725e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 39825e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 39925e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 40025e8c5aaSvikram {0} 40125e8c5aaSvikram }; 40225e8c5aaSvikram 40325e8c5aaSvikram /* 40425e8c5aaSvikram * Device contract template implementation 40525e8c5aaSvikram */ 40625e8c5aaSvikram 40725e8c5aaSvikram /* 40825e8c5aaSvikram * ctmpl_device_dup 40925e8c5aaSvikram * 41025e8c5aaSvikram * The device contract template dup entry point. 41125e8c5aaSvikram * This simply copies all the fields (generic as well as device contract 41225e8c5aaSvikram * specific) fields of the original. 41325e8c5aaSvikram */ 41425e8c5aaSvikram static struct ct_template * 41525e8c5aaSvikram ctmpl_device_dup(struct ct_template *template) 41625e8c5aaSvikram { 41725e8c5aaSvikram ctmpl_device_t *new; 41825e8c5aaSvikram ctmpl_device_t *old = template->ctmpl_data; 41925e8c5aaSvikram char *buf; 42025e8c5aaSvikram char *minor; 42125e8c5aaSvikram 42225e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 42325e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 42425e8c5aaSvikram 42525e8c5aaSvikram /* 42625e8c5aaSvikram * copy generic fields. 42725e8c5aaSvikram * ctmpl_copy returns with old template lock held 42825e8c5aaSvikram */ 42925e8c5aaSvikram ctmpl_copy(&new->ctd_ctmpl, template); 43025e8c5aaSvikram 43125e8c5aaSvikram new->ctd_ctmpl.ctmpl_data = new; 43225e8c5aaSvikram new->ctd_aset = old->ctd_aset; 43325e8c5aaSvikram new->ctd_minor = NULL; 43425e8c5aaSvikram new->ctd_noneg = old->ctd_noneg; 43525e8c5aaSvikram 43625e8c5aaSvikram if (old->ctd_minor) { 43725e8c5aaSvikram ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 43825e8c5aaSvikram bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 43925e8c5aaSvikram } else { 44025e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 44125e8c5aaSvikram buf = NULL; 44225e8c5aaSvikram } 44325e8c5aaSvikram 44425e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 44525e8c5aaSvikram if (buf) { 44625e8c5aaSvikram minor = i_ddi_strdup(buf, KM_SLEEP); 44725e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 44825e8c5aaSvikram buf = NULL; 44925e8c5aaSvikram } else { 45025e8c5aaSvikram minor = NULL; 45125e8c5aaSvikram } 45225e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 45325e8c5aaSvikram 45425e8c5aaSvikram if (minor) { 45525e8c5aaSvikram new->ctd_minor = minor; 45625e8c5aaSvikram } 45725e8c5aaSvikram 45825e8c5aaSvikram ASSERT(buf == NULL); 45925e8c5aaSvikram return (&new->ctd_ctmpl); 46025e8c5aaSvikram } 46125e8c5aaSvikram 46225e8c5aaSvikram /* 46325e8c5aaSvikram * ctmpl_device_free 46425e8c5aaSvikram * 46525e8c5aaSvikram * The device contract template free entry point. Just 46625e8c5aaSvikram * frees the template. 46725e8c5aaSvikram */ 46825e8c5aaSvikram static void 46925e8c5aaSvikram ctmpl_device_free(struct ct_template *template) 47025e8c5aaSvikram { 47125e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 47225e8c5aaSvikram 47325e8c5aaSvikram if (dtmpl->ctd_minor) 47425e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 47525e8c5aaSvikram 47625e8c5aaSvikram kmem_free(dtmpl, sizeof (ctmpl_device_t)); 47725e8c5aaSvikram } 47825e8c5aaSvikram 47925e8c5aaSvikram /* 48025e8c5aaSvikram * SAFE_EV is the set of events which a non-privileged process is 48125e8c5aaSvikram * allowed to make critical. An unprivileged device contract owner has 48225e8c5aaSvikram * no control over when a device changes state, so all device events 48325e8c5aaSvikram * can be in the critical set. 48425e8c5aaSvikram * 48525e8c5aaSvikram * EXCESS tells us if "value", a critical event set, requires 48625e8c5aaSvikram * additional privilege. For device contracts EXCESS currently 48725e8c5aaSvikram * evaluates to 0. 48825e8c5aaSvikram */ 48925e8c5aaSvikram #define SAFE_EV (CT_DEV_ALLEVENT) 49025e8c5aaSvikram #define EXCESS(value) ((value) & ~SAFE_EV) 49125e8c5aaSvikram 49225e8c5aaSvikram 49325e8c5aaSvikram /* 49425e8c5aaSvikram * ctmpl_device_set 49525e8c5aaSvikram * 49625e8c5aaSvikram * The device contract template set entry point. Sets various terms in the 49725e8c5aaSvikram * template. The non-negotiable term can only be set if the process has 49825e8c5aaSvikram * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 49925e8c5aaSvikram */ 50025e8c5aaSvikram static int 50125e8c5aaSvikram ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) 50225e8c5aaSvikram { 50325e8c5aaSvikram ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 50425e8c5aaSvikram int error; 50525e8c5aaSvikram dev_info_t *dip; 50625e8c5aaSvikram int spec_type; 5077b209c2cSacruz uint64_t param_value; 5087b209c2cSacruz char *str_value; 50925e8c5aaSvikram 51025e8c5aaSvikram ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 51125e8c5aaSvikram 5127b209c2cSacruz if (param->ctpm_id == CTDP_MINOR) { 5137b209c2cSacruz str_value = (char *)param->ctpm_value; 5147b209c2cSacruz str_value[param->ctpm_size - 1] = '\0'; 5157b209c2cSacruz } else { 516*d170b13aSacruz if (param->ctpm_size < sizeof (uint64_t)) 517*d170b13aSacruz return (EINVAL); 5187b209c2cSacruz param_value = *(uint64_t *)param->ctpm_value; 5197b209c2cSacruz } 5207b209c2cSacruz 52125e8c5aaSvikram switch (param->ctpm_id) { 52225e8c5aaSvikram case CTDP_ACCEPT: 5237b209c2cSacruz if (param_value & ~CT_DEV_ALLEVENT) 52425e8c5aaSvikram return (EINVAL); 5257b209c2cSacruz if (param_value == 0) 52625e8c5aaSvikram return (EINVAL); 5277b209c2cSacruz if (param_value == CT_DEV_ALLEVENT) 52825e8c5aaSvikram return (EINVAL); 52925e8c5aaSvikram 5307b209c2cSacruz dtmpl->ctd_aset = param_value; 53125e8c5aaSvikram break; 53225e8c5aaSvikram case CTDP_NONEG: 5337b209c2cSacruz if (param_value != CTDP_NONEG_SET && 5347b209c2cSacruz param_value != CTDP_NONEG_CLEAR) 53525e8c5aaSvikram return (EINVAL); 53625e8c5aaSvikram 53725e8c5aaSvikram /* 53825e8c5aaSvikram * only privileged processes can designate a contract 53925e8c5aaSvikram * non-negotiatble. 54025e8c5aaSvikram */ 5417b209c2cSacruz if (param_value == CTDP_NONEG_SET && 54225e8c5aaSvikram (error = secpolicy_sys_devices(cr)) != 0) { 54325e8c5aaSvikram return (error); 54425e8c5aaSvikram } 54525e8c5aaSvikram 5467b209c2cSacruz dtmpl->ctd_noneg = param_value; 54725e8c5aaSvikram break; 54825e8c5aaSvikram 54925e8c5aaSvikram case CTDP_MINOR: 5507b209c2cSacruz if (*str_value != '/' || 5517b209c2cSacruz strncmp(str_value, "/devices/", 5527b209c2cSacruz strlen("/devices/")) == 0 || 5537b209c2cSacruz strstr(str_value, "../devices/") != NULL || 5547b209c2cSacruz strchr(str_value, ':') == NULL) { 55525e8c5aaSvikram return (EINVAL); 55625e8c5aaSvikram } 55725e8c5aaSvikram 55825e8c5aaSvikram spec_type = 0; 55925e8c5aaSvikram dip = NULL; 5607b209c2cSacruz if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 56125e8c5aaSvikram return (ERANGE); 56225e8c5aaSvikram } 56325e8c5aaSvikram ddi_release_devi(dip); 56425e8c5aaSvikram 56525e8c5aaSvikram if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 56625e8c5aaSvikram return (EINVAL); 56725e8c5aaSvikram } 56825e8c5aaSvikram 56925e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 57025e8c5aaSvikram kmem_free(dtmpl->ctd_minor, 57125e8c5aaSvikram strlen(dtmpl->ctd_minor) + 1); 57225e8c5aaSvikram } 5737b209c2cSacruz dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 57425e8c5aaSvikram break; 57525e8c5aaSvikram case CTP_EV_CRITICAL: 57625e8c5aaSvikram /* 57725e8c5aaSvikram * Currently for device contracts, any event 57825e8c5aaSvikram * may be added to the critical set. We retain the 57925e8c5aaSvikram * following code however for future enhancements. 58025e8c5aaSvikram */ 5817b209c2cSacruz if (EXCESS(param_value) && 58225e8c5aaSvikram (error = secpolicy_contract_event(cr)) != 0) 58325e8c5aaSvikram return (error); 5847b209c2cSacruz tmpl->ctmpl_ev_crit = param_value; 58525e8c5aaSvikram break; 58625e8c5aaSvikram default: 58725e8c5aaSvikram return (EINVAL); 58825e8c5aaSvikram } 58925e8c5aaSvikram 59025e8c5aaSvikram return (0); 59125e8c5aaSvikram } 59225e8c5aaSvikram 59325e8c5aaSvikram /* 59425e8c5aaSvikram * ctmpl_device_get 59525e8c5aaSvikram * 59625e8c5aaSvikram * The device contract template get entry point. Simply fetches and 59725e8c5aaSvikram * returns the value of the requested term. 59825e8c5aaSvikram */ 59925e8c5aaSvikram static int 60025e8c5aaSvikram ctmpl_device_get(struct ct_template *template, ct_param_t *param) 60125e8c5aaSvikram { 60225e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 6037b209c2cSacruz uint64_t *param_value = param->ctpm_value; 60425e8c5aaSvikram 60525e8c5aaSvikram ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 60625e8c5aaSvikram 607*d170b13aSacruz if (param->ctpm_id == CTDP_ACCEPT || 608*d170b13aSacruz param->ctpm_id == CTDP_NONEG) { 609*d170b13aSacruz if (param->ctpm_size < sizeof (uint64_t)) 610*d170b13aSacruz return (EINVAL); 611*d170b13aSacruz param->ctpm_size = sizeof (uint64_t); 612*d170b13aSacruz } 613*d170b13aSacruz 61425e8c5aaSvikram switch (param->ctpm_id) { 61525e8c5aaSvikram case CTDP_ACCEPT: 6167b209c2cSacruz *param_value = dtmpl->ctd_aset; 61725e8c5aaSvikram break; 61825e8c5aaSvikram case CTDP_NONEG: 6197b209c2cSacruz *param_value = dtmpl->ctd_noneg; 62025e8c5aaSvikram break; 62125e8c5aaSvikram case CTDP_MINOR: 62225e8c5aaSvikram if (dtmpl->ctd_minor) { 6237b209c2cSacruz param->ctpm_size = strlcpy((char *)param->ctpm_value, 6247b209c2cSacruz dtmpl->ctd_minor, param->ctpm_size); 6257b209c2cSacruz param->ctpm_size++; 62625e8c5aaSvikram } else { 62725e8c5aaSvikram return (ENOENT); 62825e8c5aaSvikram } 62925e8c5aaSvikram break; 63025e8c5aaSvikram default: 63125e8c5aaSvikram return (EINVAL); 63225e8c5aaSvikram } 63325e8c5aaSvikram 63425e8c5aaSvikram return (0); 63525e8c5aaSvikram } 63625e8c5aaSvikram 63725e8c5aaSvikram /* 63825e8c5aaSvikram * Device contract type specific portion of creating a contract using 63925e8c5aaSvikram * a specified template 64025e8c5aaSvikram */ 64125e8c5aaSvikram /*ARGSUSED*/ 64225e8c5aaSvikram int 64325e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 64425e8c5aaSvikram { 64525e8c5aaSvikram ctmpl_device_t *dtmpl; 64625e8c5aaSvikram char *buf; 64725e8c5aaSvikram dev_t dev; 64825e8c5aaSvikram int spec_type; 64925e8c5aaSvikram int error; 65025e8c5aaSvikram cont_device_t *ctd; 65125e8c5aaSvikram 65225e8c5aaSvikram if (ctidp == NULL) 65325e8c5aaSvikram return (EINVAL); 65425e8c5aaSvikram 65525e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 65625e8c5aaSvikram 65725e8c5aaSvikram dtmpl = template->ctmpl_data; 65825e8c5aaSvikram 65925e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 66025e8c5aaSvikram if (dtmpl->ctd_minor == NULL) { 66125e8c5aaSvikram /* incomplete template */ 66225e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 66325e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 66425e8c5aaSvikram return (EINVAL); 66525e8c5aaSvikram } else { 66625e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 66725e8c5aaSvikram bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 66825e8c5aaSvikram } 66925e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 67025e8c5aaSvikram 67125e8c5aaSvikram spec_type = 0; 67225e8c5aaSvikram dev = NODEV; 67325e8c5aaSvikram if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 67425e8c5aaSvikram dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 67525e8c5aaSvikram (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 67625e8c5aaSvikram CT_DEBUG((CE_WARN, 67725e8c5aaSvikram "tmpl_create: failed to find device: %s", buf)); 67825e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 67925e8c5aaSvikram return (ERANGE); 68025e8c5aaSvikram } 68125e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 68225e8c5aaSvikram 68325e8c5aaSvikram ctd = contract_device_create(template->ctmpl_data, 68425e8c5aaSvikram dev, spec_type, curproc, &error); 68525e8c5aaSvikram 68625e8c5aaSvikram if (ctd == NULL) { 68725e8c5aaSvikram CT_DEBUG((CE_WARN, "Failed to create device contract for " 68825e8c5aaSvikram "process (%d) with device (devt = %lu, spec_type = %s)", 68925e8c5aaSvikram curproc->p_pid, dev, 69025e8c5aaSvikram spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 69125e8c5aaSvikram return (error); 69225e8c5aaSvikram } 69325e8c5aaSvikram 69425e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 69525e8c5aaSvikram *ctidp = ctd->cond_contract.ct_id; 69625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 69725e8c5aaSvikram 69825e8c5aaSvikram return (0); 69925e8c5aaSvikram } 70025e8c5aaSvikram 70125e8c5aaSvikram /* 70225e8c5aaSvikram * Device contract specific template entry points 70325e8c5aaSvikram */ 70425e8c5aaSvikram static ctmplops_t ctmpl_device_ops = { 70525e8c5aaSvikram ctmpl_device_dup, /* ctop_dup */ 70625e8c5aaSvikram ctmpl_device_free, /* ctop_free */ 70725e8c5aaSvikram ctmpl_device_set, /* ctop_set */ 70825e8c5aaSvikram ctmpl_device_get, /* ctop_get */ 70925e8c5aaSvikram ctmpl_device_create, /* ctop_create */ 71025e8c5aaSvikram CT_DEV_ALLEVENT /* all device events bitmask */ 71125e8c5aaSvikram }; 71225e8c5aaSvikram 71325e8c5aaSvikram 71425e8c5aaSvikram /* 71525e8c5aaSvikram * Device contract implementation 71625e8c5aaSvikram */ 71725e8c5aaSvikram 71825e8c5aaSvikram /* 71925e8c5aaSvikram * contract_device_default 72025e8c5aaSvikram * 72125e8c5aaSvikram * The device contract default template entry point. Creates a 72225e8c5aaSvikram * device contract template with a default A-set and no "noneg" , 72325e8c5aaSvikram * with informative degrade events and critical offline events. 72425e8c5aaSvikram * There is no default minor path. 72525e8c5aaSvikram */ 72625e8c5aaSvikram static ct_template_t * 72725e8c5aaSvikram contract_device_default(void) 72825e8c5aaSvikram { 72925e8c5aaSvikram ctmpl_device_t *new; 73025e8c5aaSvikram 73125e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 73225e8c5aaSvikram ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 73325e8c5aaSvikram 73425e8c5aaSvikram new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 73525e8c5aaSvikram new->ctd_noneg = 0; 73625e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 73725e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 73825e8c5aaSvikram 73925e8c5aaSvikram return (&new->ctd_ctmpl); 74025e8c5aaSvikram } 74125e8c5aaSvikram 74225e8c5aaSvikram /* 74325e8c5aaSvikram * contract_device_free 74425e8c5aaSvikram * 74525e8c5aaSvikram * Destroys the device contract specific portion of a contract and 74625e8c5aaSvikram * frees the contract. 74725e8c5aaSvikram */ 74825e8c5aaSvikram static void 74925e8c5aaSvikram contract_device_free(contract_t *ct) 75025e8c5aaSvikram { 75125e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 75225e8c5aaSvikram 75325e8c5aaSvikram ASSERT(ctd->cond_minor); 75425e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 75525e8c5aaSvikram kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 75625e8c5aaSvikram 75725e8c5aaSvikram ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 75825e8c5aaSvikram ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 75925e8c5aaSvikram 76025e8c5aaSvikram ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 76125e8c5aaSvikram 76225e8c5aaSvikram ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 76325e8c5aaSvikram ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 76425e8c5aaSvikram 76525e8c5aaSvikram ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 76625e8c5aaSvikram ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 76725e8c5aaSvikram 76825e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 76925e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 77025e8c5aaSvikram 77125e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 77225e8c5aaSvikram 77325e8c5aaSvikram kmem_free(ctd, sizeof (cont_device_t)); 77425e8c5aaSvikram } 77525e8c5aaSvikram 77625e8c5aaSvikram /* 77725e8c5aaSvikram * contract_device_abandon 77825e8c5aaSvikram * 77925e8c5aaSvikram * The device contract abandon entry point. 78025e8c5aaSvikram */ 78125e8c5aaSvikram static void 78225e8c5aaSvikram contract_device_abandon(contract_t *ct) 78325e8c5aaSvikram { 78425e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 78525e8c5aaSvikram 78625e8c5aaSvikram /* 78725e8c5aaSvikram * device contracts cannot be inherited or orphaned. 78825e8c5aaSvikram * Move the contract to the DEAD_STATE. It will be freed 78925e8c5aaSvikram * once all references to it are gone. 79025e8c5aaSvikram */ 79125e8c5aaSvikram contract_destroy(ct); 79225e8c5aaSvikram } 79325e8c5aaSvikram 79425e8c5aaSvikram /* 79525e8c5aaSvikram * contract_device_destroy 79625e8c5aaSvikram * 79725e8c5aaSvikram * The device contract destroy entry point. 79825e8c5aaSvikram * Called from contract_destroy() to do any type specific destroy. Note 79925e8c5aaSvikram * that destroy is a misnomer - this does not free the contract, it only 80025e8c5aaSvikram * moves it to the dead state. A contract is actually freed via 80125e8c5aaSvikram * contract_rele() -> contract_dtor(), contop_free() 80225e8c5aaSvikram */ 80325e8c5aaSvikram static void 80425e8c5aaSvikram contract_device_destroy(contract_t *ct) 80525e8c5aaSvikram { 80625e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 80725e8c5aaSvikram dev_info_t *dip = ctd->cond_dip; 80825e8c5aaSvikram 80925e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 81025e8c5aaSvikram 81125e8c5aaSvikram if (dip == NULL) { 81225e8c5aaSvikram /* 81325e8c5aaSvikram * The dip has been removed, this is a dangling contract 81425e8c5aaSvikram * Check that dip linkages are NULL 81525e8c5aaSvikram */ 81625e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 81725e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 81825e8c5aaSvikram "devinfo node. contract ctid : %d", ct->ct_id)); 81925e8c5aaSvikram return; 82025e8c5aaSvikram } 82125e8c5aaSvikram 82225e8c5aaSvikram /* 82325e8c5aaSvikram * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 82425e8c5aaSvikram */ 82525e8c5aaSvikram mutex_exit(&ct->ct_lock); 82625e8c5aaSvikram 82725e8c5aaSvikram /* 82825e8c5aaSvikram * Waiting for the barrier to be released is strictly speaking not 82925e8c5aaSvikram * necessary. But it simplifies the implementation of 83025e8c5aaSvikram * contract_device_publish() by establishing the invariant that 83125e8c5aaSvikram * device contracts cannot go away during negotiation. 83225e8c5aaSvikram */ 83325e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 83425e8c5aaSvikram ct_barrier_wait_for_release(dip); 83525e8c5aaSvikram mutex_enter(&ct->ct_lock); 83625e8c5aaSvikram 83725e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 83825e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 83925e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 84025e8c5aaSvikram 84125e8c5aaSvikram mutex_exit(&ct->ct_lock); 84225e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 84325e8c5aaSvikram mutex_enter(&ct->ct_lock); 84425e8c5aaSvikram } 84525e8c5aaSvikram 84625e8c5aaSvikram /* 84725e8c5aaSvikram * contract_device_status 84825e8c5aaSvikram * 84925e8c5aaSvikram * The device contract status entry point. Called when level of "detail" 85025e8c5aaSvikram * is either CTD_FIXED or CTD_ALL 85125e8c5aaSvikram * 85225e8c5aaSvikram */ 85325e8c5aaSvikram static void 85425e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 85525e8c5aaSvikram void *status, model_t model) 85625e8c5aaSvikram { 85725e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 85825e8c5aaSvikram 85925e8c5aaSvikram ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 86025e8c5aaSvikram 86125e8c5aaSvikram mutex_enter(&ct->ct_lock); 86225e8c5aaSvikram contract_status_common(ct, zone, status, model); 86325e8c5aaSvikram 86425e8c5aaSvikram /* 86525e8c5aaSvikram * There's no need to hold the contract lock while accessing static 86625e8c5aaSvikram * data like aset or noneg. But since we need the lock to access other 86725e8c5aaSvikram * data like state, we hold it anyway. 86825e8c5aaSvikram */ 86925e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 87025e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 87125e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 87225e8c5aaSvikram 87325e8c5aaSvikram if (detail == CTD_FIXED) { 87425e8c5aaSvikram mutex_exit(&ct->ct_lock); 87525e8c5aaSvikram return; 87625e8c5aaSvikram } 87725e8c5aaSvikram 87825e8c5aaSvikram ASSERT(ctd->cond_minor); 87925e8c5aaSvikram VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 88025e8c5aaSvikram 88125e8c5aaSvikram mutex_exit(&ct->ct_lock); 88225e8c5aaSvikram } 88325e8c5aaSvikram 88425e8c5aaSvikram /* 88525e8c5aaSvikram * Converts a result integer into the corresponding string. Used for printing 88625e8c5aaSvikram * messages 88725e8c5aaSvikram */ 88825e8c5aaSvikram static char * 88925e8c5aaSvikram result_str(uint_t result) 89025e8c5aaSvikram { 89125e8c5aaSvikram switch (result) { 89225e8c5aaSvikram case CT_ACK: 89325e8c5aaSvikram return ("CT_ACK"); 89425e8c5aaSvikram case CT_NACK: 89525e8c5aaSvikram return ("CT_NACK"); 89625e8c5aaSvikram case CT_NONE: 89725e8c5aaSvikram return ("CT_NONE"); 89825e8c5aaSvikram default: 89925e8c5aaSvikram return ("UNKNOWN"); 90025e8c5aaSvikram } 90125e8c5aaSvikram } 90225e8c5aaSvikram 90325e8c5aaSvikram /* 90425e8c5aaSvikram * Converts a device state integer constant into the corresponding string. 90525e8c5aaSvikram * Used to print messages. 90625e8c5aaSvikram */ 90725e8c5aaSvikram static char * 90825e8c5aaSvikram state_str(uint_t state) 90925e8c5aaSvikram { 91025e8c5aaSvikram switch (state) { 91125e8c5aaSvikram case CT_DEV_EV_ONLINE: 91225e8c5aaSvikram return ("ONLINE"); 91325e8c5aaSvikram case CT_DEV_EV_DEGRADED: 91425e8c5aaSvikram return ("DEGRADED"); 91525e8c5aaSvikram case CT_DEV_EV_OFFLINE: 91625e8c5aaSvikram return ("OFFLINE"); 91725e8c5aaSvikram default: 91825e8c5aaSvikram return ("UNKNOWN"); 91925e8c5aaSvikram } 92025e8c5aaSvikram } 92125e8c5aaSvikram 92225e8c5aaSvikram /* 92325e8c5aaSvikram * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 92425e8c5aaSvikram * synchronous state change or not. 92525e8c5aaSvikram */ 92625e8c5aaSvikram static int 92725e8c5aaSvikram is_sync_neg(uint_t old, uint_t new) 92825e8c5aaSvikram { 92925e8c5aaSvikram int i; 93025e8c5aaSvikram 93125e8c5aaSvikram ASSERT(old & CT_DEV_ALLEVENT); 93225e8c5aaSvikram ASSERT(new & CT_DEV_ALLEVENT); 93325e8c5aaSvikram 93425e8c5aaSvikram if (old == new) { 93525e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 93625e8c5aaSvikram state_str(new))); 93725e8c5aaSvikram return (-2); 93825e8c5aaSvikram } 93925e8c5aaSvikram 94025e8c5aaSvikram for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 94125e8c5aaSvikram if (old == ct_dev_negtable[i].st_old && 94225e8c5aaSvikram new == ct_dev_negtable[i].st_new) { 94325e8c5aaSvikram return (ct_dev_negtable[i].st_neg); 94425e8c5aaSvikram } 94525e8c5aaSvikram } 94625e8c5aaSvikram 94725e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 94825e8c5aaSvikram "old = %s -> new = %s", state_str(old), state_str(new))); 94925e8c5aaSvikram 95025e8c5aaSvikram return (-1); 95125e8c5aaSvikram } 95225e8c5aaSvikram 95325e8c5aaSvikram /* 95425e8c5aaSvikram * Used to cleanup cached dv_nodes so that when a device is released by 95525e8c5aaSvikram * a contract holder, its devinfo node can be successfully detached. 95625e8c5aaSvikram */ 95725e8c5aaSvikram static int 95825e8c5aaSvikram contract_device_dvclean(dev_info_t *dip) 95925e8c5aaSvikram { 96025e8c5aaSvikram char *devnm; 96125e8c5aaSvikram dev_info_t *pdip; 96225e8c5aaSvikram int error; 96325e8c5aaSvikram 96425e8c5aaSvikram ASSERT(dip); 96525e8c5aaSvikram 96625e8c5aaSvikram /* pdip can be NULL if we have contracts against the root dip */ 96725e8c5aaSvikram pdip = ddi_get_parent(dip); 96825e8c5aaSvikram 96925e8c5aaSvikram if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 97025e8c5aaSvikram char *path; 97125e8c5aaSvikram 97225e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 97325e8c5aaSvikram (void) ddi_pathname(dip, path); 97425e8c5aaSvikram CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 97525e8c5aaSvikram "device=%s", path)); 97625e8c5aaSvikram kmem_free(path, MAXPATHLEN); 97725e8c5aaSvikram return (EDEADLOCK); 97825e8c5aaSvikram } 97925e8c5aaSvikram 98025e8c5aaSvikram if (pdip) { 98125e8c5aaSvikram devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 98225e8c5aaSvikram (void) ddi_deviname(dip, devnm); 98325e8c5aaSvikram error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 98425e8c5aaSvikram kmem_free(devnm, MAXNAMELEN + 1); 98525e8c5aaSvikram } else { 98625e8c5aaSvikram error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 98725e8c5aaSvikram } 98825e8c5aaSvikram 98925e8c5aaSvikram return (error); 99025e8c5aaSvikram } 99125e8c5aaSvikram 99225e8c5aaSvikram /* 99325e8c5aaSvikram * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 99425e8c5aaSvikram * Results in the ACK or NACK being recorded on the dip for one particular 99525e8c5aaSvikram * contract. The device contracts framework evaluates the ACK/NACKs for all 99625e8c5aaSvikram * contracts against a device to determine if a particular device state change 99725e8c5aaSvikram * should be allowed. 99825e8c5aaSvikram */ 99925e8c5aaSvikram static int 100025e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 100125e8c5aaSvikram uint_t cmd) 100225e8c5aaSvikram { 100325e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 100425e8c5aaSvikram dev_info_t *dip; 100525e8c5aaSvikram ctid_t ctid; 100625e8c5aaSvikram int error; 100725e8c5aaSvikram 100825e8c5aaSvikram ctid = ct->ct_id; 100925e8c5aaSvikram 101025e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 101125e8c5aaSvikram 101225e8c5aaSvikram mutex_enter(&ct->ct_lock); 101325e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 101425e8c5aaSvikram 101525e8c5aaSvikram dip = ctd->cond_dip; 101625e8c5aaSvikram 101725e8c5aaSvikram ASSERT(ctd->cond_minor); 101825e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 101925e8c5aaSvikram 102025e8c5aaSvikram /* 102125e8c5aaSvikram * Negotiation only if new state is not in A-set 102225e8c5aaSvikram */ 102325e8c5aaSvikram ASSERT(!(ctd->cond_aset & evtype)); 102425e8c5aaSvikram 102525e8c5aaSvikram /* 102625e8c5aaSvikram * Negotiation only if transition is synchronous 102725e8c5aaSvikram */ 102825e8c5aaSvikram ASSERT(is_sync_neg(ctd->cond_state, evtype)); 102925e8c5aaSvikram 103025e8c5aaSvikram /* 103125e8c5aaSvikram * We shouldn't be negotiating if the "noneg" flag is set 103225e8c5aaSvikram */ 103325e8c5aaSvikram ASSERT(!ctd->cond_noneg); 103425e8c5aaSvikram 103525e8c5aaSvikram if (dip) 103625e8c5aaSvikram ndi_hold_devi(dip); 103725e8c5aaSvikram 103825e8c5aaSvikram mutex_exit(&ct->ct_lock); 103925e8c5aaSvikram 104025e8c5aaSvikram /* 104125e8c5aaSvikram * dv_clean only if !NACK and offline state change 104225e8c5aaSvikram */ 104325e8c5aaSvikram if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 104425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 104525e8c5aaSvikram error = contract_device_dvclean(dip); 104625e8c5aaSvikram if (error != 0) { 104725e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 104825e8c5aaSvikram ctid)); 104925e8c5aaSvikram ddi_release_devi(dip); 105025e8c5aaSvikram } 105125e8c5aaSvikram } 105225e8c5aaSvikram 105325e8c5aaSvikram mutex_enter(&ct->ct_lock); 105425e8c5aaSvikram 105525e8c5aaSvikram if (dip) 105625e8c5aaSvikram ddi_release_devi(dip); 105725e8c5aaSvikram 105825e8c5aaSvikram if (dip == NULL) { 105925e8c5aaSvikram if (ctd->cond_currev_id != evid) { 106025e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event " 106125e8c5aaSvikram "(type=%s, id=%llu) on removed device", 106225e8c5aaSvikram cmd == CT_NACK ? "N" : "", 106325e8c5aaSvikram state_str(evtype), (unsigned long long)evid)); 106425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 106525e8c5aaSvikram ctid)); 106625e8c5aaSvikram } else { 106725e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 106825e8c5aaSvikram CT_DEBUG((CE_WARN, "contract_ack: no such device: " 106925e8c5aaSvikram "ctid: %d", ctid)); 107025e8c5aaSvikram } 107125e8c5aaSvikram error = (ct->ct_state == CTS_DEAD) ? ESRCH : 107225e8c5aaSvikram ((cmd == CT_NACK) ? ETIMEDOUT : 0); 107325e8c5aaSvikram mutex_exit(&ct->ct_lock); 107425e8c5aaSvikram return (error); 107525e8c5aaSvikram } 107625e8c5aaSvikram 107725e8c5aaSvikram /* 107825e8c5aaSvikram * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 107925e8c5aaSvikram */ 108025e8c5aaSvikram mutex_exit(&ct->ct_lock); 108125e8c5aaSvikram 108225e8c5aaSvikram mutex_enter(&DEVI(dip)->devi_ct_lock); 108325e8c5aaSvikram mutex_enter(&ct->ct_lock); 108425e8c5aaSvikram if (ctd->cond_currev_id != evid) { 108525e8c5aaSvikram char *buf; 108625e8c5aaSvikram mutex_exit(&ct->ct_lock); 108725e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 108825e8c5aaSvikram ndi_hold_devi(dip); 108925e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 109025e8c5aaSvikram (void) ddi_pathname(dip, buf); 109125e8c5aaSvikram ddi_release_devi(dip); 109225e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event" 109325e8c5aaSvikram "(type=%s, id=%llu) on device %s", 109425e8c5aaSvikram cmd == CT_NACK ? "N" : "", 109525e8c5aaSvikram state_str(evtype), (unsigned long long)evid, buf)); 109625e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 109725e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 109825e8c5aaSvikram cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 109925e8c5aaSvikram return (cmd == CT_ACK ? 0 : ETIMEDOUT); 110025e8c5aaSvikram } 110125e8c5aaSvikram 110225e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 110325e8c5aaSvikram ASSERT(cmd == CT_ACK || cmd == CT_NACK); 110425e8c5aaSvikram 110525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 110625e8c5aaSvikram cmd == CT_NACK ? "N" : "", ctid)); 110725e8c5aaSvikram 110825e8c5aaSvikram ctd->cond_currev_ack = cmd; 110925e8c5aaSvikram mutex_exit(&ct->ct_lock); 111025e8c5aaSvikram 111125e8c5aaSvikram ct_barrier_decr(dip); 111225e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 111325e8c5aaSvikram 111425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 111525e8c5aaSvikram 111625e8c5aaSvikram return (0); 111725e8c5aaSvikram } 111825e8c5aaSvikram 111925e8c5aaSvikram /* 112025e8c5aaSvikram * Invoked when a userland contract holder approves (i.e. ACKs) a state change 112125e8c5aaSvikram */ 112225e8c5aaSvikram static int 112325e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 112425e8c5aaSvikram { 112525e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 112625e8c5aaSvikram } 112725e8c5aaSvikram 112825e8c5aaSvikram /* 112925e8c5aaSvikram * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 113025e8c5aaSvikram */ 113125e8c5aaSvikram static int 113225e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 113325e8c5aaSvikram { 113425e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 113525e8c5aaSvikram } 113625e8c5aaSvikram 113725e8c5aaSvikram /* 113825e8c5aaSvikram * Creates a new contract synchronously with the breaking of an existing 113925e8c5aaSvikram * contract. Currently not supported. 114025e8c5aaSvikram */ 114125e8c5aaSvikram /*ARGSUSED*/ 114225e8c5aaSvikram static int 114325e8c5aaSvikram contract_device_newct(contract_t *ct) 114425e8c5aaSvikram { 114525e8c5aaSvikram return (ENOTSUP); 114625e8c5aaSvikram } 114725e8c5aaSvikram 114825e8c5aaSvikram /* 114925e8c5aaSvikram * Core device contract implementation entry points 115025e8c5aaSvikram */ 115125e8c5aaSvikram static contops_t contract_device_ops = { 115225e8c5aaSvikram contract_device_free, /* contop_free */ 115325e8c5aaSvikram contract_device_abandon, /* contop_abandon */ 115425e8c5aaSvikram contract_device_destroy, /* contop_destroy */ 115525e8c5aaSvikram contract_device_status, /* contop_status */ 115625e8c5aaSvikram contract_device_ack, /* contop_ack */ 115725e8c5aaSvikram contract_device_nack, /* contop_nack */ 115825e8c5aaSvikram contract_qack_notsup, /* contop_qack */ 115925e8c5aaSvikram contract_device_newct /* contop_newct */ 116025e8c5aaSvikram }; 116125e8c5aaSvikram 116225e8c5aaSvikram /* 116325e8c5aaSvikram * contract_device_init 116425e8c5aaSvikram * 116525e8c5aaSvikram * Initializes the device contract type. 116625e8c5aaSvikram */ 116725e8c5aaSvikram void 116825e8c5aaSvikram contract_device_init(void) 116925e8c5aaSvikram { 117025e8c5aaSvikram device_type = contract_type_init(CTT_DEVICE, "device", 117125e8c5aaSvikram &contract_device_ops, contract_device_default); 117225e8c5aaSvikram } 117325e8c5aaSvikram 117425e8c5aaSvikram /* 117525e8c5aaSvikram * contract_device_create 117625e8c5aaSvikram * 117725e8c5aaSvikram * create a device contract given template "tmpl" and the "owner" process. 117825e8c5aaSvikram * May fail and return NULL if project.max-contracts would have been exceeded. 117925e8c5aaSvikram * 118025e8c5aaSvikram * Common device contract creation routine called for both open-time and 118125e8c5aaSvikram * non-open time device contract creation 118225e8c5aaSvikram */ 118325e8c5aaSvikram static cont_device_t * 118425e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 118525e8c5aaSvikram proc_t *owner, int *errorp) 118625e8c5aaSvikram { 118725e8c5aaSvikram cont_device_t *ctd; 118825e8c5aaSvikram char *minor; 118925e8c5aaSvikram char *path; 119025e8c5aaSvikram dev_info_t *dip; 119125e8c5aaSvikram 119225e8c5aaSvikram ASSERT(dtmpl != NULL); 119325e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 119425e8c5aaSvikram ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 119525e8c5aaSvikram ASSERT(errorp); 119625e8c5aaSvikram 119725e8c5aaSvikram *errorp = 0; 119825e8c5aaSvikram 119925e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 120025e8c5aaSvikram 120125e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 120225e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 120325e8c5aaSvikram bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 120425e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 120525e8c5aaSvikram 120625e8c5aaSvikram dip = e_ddi_hold_devi_by_path(path, 0); 120725e8c5aaSvikram if (dip == NULL) { 120825e8c5aaSvikram cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 120925e8c5aaSvikram "for device path (%s)", path); 121025e8c5aaSvikram kmem_free(path, MAXPATHLEN); 121125e8c5aaSvikram *errorp = ERANGE; 121225e8c5aaSvikram return (NULL); 121325e8c5aaSvikram } 121425e8c5aaSvikram 121525e8c5aaSvikram /* 121625e8c5aaSvikram * Lock out any parallel contract negotiations 121725e8c5aaSvikram */ 121825e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 121925e8c5aaSvikram ct_barrier_acquire(dip); 122025e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 122125e8c5aaSvikram 122225e8c5aaSvikram minor = i_ddi_strdup(path, KM_SLEEP); 122325e8c5aaSvikram kmem_free(path, MAXPATHLEN); 122425e8c5aaSvikram 122525e8c5aaSvikram (void) contract_type_pbundle(device_type, owner); 122625e8c5aaSvikram 122725e8c5aaSvikram ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 122825e8c5aaSvikram 122925e8c5aaSvikram /* 123025e8c5aaSvikram * Only we hold a refernce to this contract. Safe to access 123125e8c5aaSvikram * the fields without a ct_lock 123225e8c5aaSvikram */ 123325e8c5aaSvikram ctd->cond_minor = minor; 123425e8c5aaSvikram /* 123525e8c5aaSvikram * It is safe to set the dip pointer in the contract 123625e8c5aaSvikram * as the contract will always be destroyed before the dip 123725e8c5aaSvikram * is released 123825e8c5aaSvikram */ 123925e8c5aaSvikram ctd->cond_dip = dip; 124025e8c5aaSvikram ctd->cond_devt = dev; 124125e8c5aaSvikram ctd->cond_spec = spec_type; 124225e8c5aaSvikram 124325e8c5aaSvikram /* 124425e8c5aaSvikram * Since we are able to lookup the device, it is either 124525e8c5aaSvikram * online or degraded 124625e8c5aaSvikram */ 124725e8c5aaSvikram ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 124825e8c5aaSvikram CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 124925e8c5aaSvikram 125025e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 125125e8c5aaSvikram ctd->cond_aset = dtmpl->ctd_aset; 125225e8c5aaSvikram ctd->cond_noneg = dtmpl->ctd_noneg; 125325e8c5aaSvikram 125425e8c5aaSvikram /* 125525e8c5aaSvikram * contract_ctor() initailizes the common portion of a contract 125625e8c5aaSvikram * contract_dtor() destroys the common portion of a contract 125725e8c5aaSvikram */ 125825e8c5aaSvikram if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 125925e8c5aaSvikram ctd, 0, owner, B_TRUE)) { 126025e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 126125e8c5aaSvikram /* 126225e8c5aaSvikram * contract_device_free() destroys the type specific 126325e8c5aaSvikram * portion of a contract and frees the contract. 126425e8c5aaSvikram * The "minor" path and "cred" is a part of the type specific 126525e8c5aaSvikram * portion of the contract and will be freed by 126625e8c5aaSvikram * contract_device_free() 126725e8c5aaSvikram */ 126825e8c5aaSvikram contract_device_free(&ctd->cond_contract); 126925e8c5aaSvikram 127025e8c5aaSvikram /* release barrier */ 127125e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 127225e8c5aaSvikram ct_barrier_release(dip); 127325e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 127425e8c5aaSvikram 127525e8c5aaSvikram ddi_release_devi(dip); 127625e8c5aaSvikram *errorp = EAGAIN; 127725e8c5aaSvikram return (NULL); 127825e8c5aaSvikram } 127925e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 128025e8c5aaSvikram 128125e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 128225e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 128325e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 128425e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 128525e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 128625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 128725e8c5aaSvikram 128825e8c5aaSvikram /* 128925e8c5aaSvikram * Insert device contract into list hanging off the dip 129025e8c5aaSvikram * Bump up the ref-count on the contract to reflect this 129125e8c5aaSvikram */ 129225e8c5aaSvikram contract_hold(&ctd->cond_contract); 129325e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 129425e8c5aaSvikram list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 129525e8c5aaSvikram 129625e8c5aaSvikram /* release barrier */ 129725e8c5aaSvikram ct_barrier_release(dip); 129825e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 129925e8c5aaSvikram 130025e8c5aaSvikram ddi_release_devi(dip); 130125e8c5aaSvikram 130225e8c5aaSvikram return (ctd); 130325e8c5aaSvikram } 130425e8c5aaSvikram 130525e8c5aaSvikram /* 130625e8c5aaSvikram * Called when a device is successfully opened to create an open-time contract 130725e8c5aaSvikram * i.e. synchronously with a device open. 130825e8c5aaSvikram */ 130925e8c5aaSvikram int 131025e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 131125e8c5aaSvikram { 131225e8c5aaSvikram ctmpl_device_t *dtmpl; 131325e8c5aaSvikram ct_template_t *tmpl; 131425e8c5aaSvikram cont_device_t *ctd; 131525e8c5aaSvikram char *path; 131625e8c5aaSvikram klwp_t *lwp; 131725e8c5aaSvikram int error; 131825e8c5aaSvikram 131925e8c5aaSvikram if (ctpp) 132025e8c5aaSvikram *ctpp = NULL; 132125e8c5aaSvikram 132225e8c5aaSvikram /* 132325e8c5aaSvikram * Check if we are in user-context i.e. if we have an lwp 132425e8c5aaSvikram */ 132525e8c5aaSvikram lwp = ttolwp(curthread); 132625e8c5aaSvikram if (lwp == NULL) { 132725e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 132825e8c5aaSvikram return (0); 132925e8c5aaSvikram } 133025e8c5aaSvikram 133125e8c5aaSvikram tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 133225e8c5aaSvikram if (tmpl == NULL) { 133325e8c5aaSvikram return (0); 133425e8c5aaSvikram } 133525e8c5aaSvikram dtmpl = tmpl->ctmpl_data; 133625e8c5aaSvikram 133725e8c5aaSvikram /* 133825e8c5aaSvikram * If the user set a minor path in the template before an open, 133925e8c5aaSvikram * ignore it. We use the minor path of the actual minor opened. 134025e8c5aaSvikram */ 134125e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 134225e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 134325e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 134425e8c5aaSvikram "ignoring device minor path in active template: %s", 134525e8c5aaSvikram curproc->p_pid, dtmpl->ctd_minor)); 134625e8c5aaSvikram /* 134725e8c5aaSvikram * This is a copy of the actual activated template. 134825e8c5aaSvikram * Safe to make changes such as freeing the minor 134925e8c5aaSvikram * path in the template. 135025e8c5aaSvikram */ 135125e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 135225e8c5aaSvikram dtmpl->ctd_minor = NULL; 135325e8c5aaSvikram } 135425e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 135525e8c5aaSvikram 135625e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 135725e8c5aaSvikram 135825e8c5aaSvikram if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 135925e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 136025e8c5aaSvikram "minor path from dev_t,spec {%lu, %d} for process (%d)", 136125e8c5aaSvikram dev, spec_type, curproc->p_pid)); 136225e8c5aaSvikram ctmpl_free(tmpl); 136325e8c5aaSvikram kmem_free(path, MAXPATHLEN); 136425e8c5aaSvikram return (1); 136525e8c5aaSvikram } 136625e8c5aaSvikram 136725e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 136825e8c5aaSvikram ASSERT(dtmpl->ctd_minor == NULL); 136925e8c5aaSvikram dtmpl->ctd_minor = path; 137025e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 137125e8c5aaSvikram 137225e8c5aaSvikram ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 137325e8c5aaSvikram 137425e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 137525e8c5aaSvikram ASSERT(dtmpl->ctd_minor); 137625e8c5aaSvikram dtmpl->ctd_minor = NULL; 137725e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 137825e8c5aaSvikram ctmpl_free(tmpl); 137925e8c5aaSvikram kmem_free(path, MAXPATHLEN); 138025e8c5aaSvikram 138125e8c5aaSvikram if (ctd == NULL) { 138225e8c5aaSvikram cmn_err(CE_NOTE, "contract_device_open(): Failed to " 138325e8c5aaSvikram "create device contract for process (%d) holding " 138425e8c5aaSvikram "device (devt = %lu, spec_type = %d)", 138525e8c5aaSvikram curproc->p_pid, dev, spec_type); 138625e8c5aaSvikram return (1); 138725e8c5aaSvikram } 138825e8c5aaSvikram 138925e8c5aaSvikram if (ctpp) { 139025e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 139125e8c5aaSvikram *ctpp = &ctd->cond_contract; 139225e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 139325e8c5aaSvikram } 139425e8c5aaSvikram return (0); 139525e8c5aaSvikram } 139625e8c5aaSvikram 139725e8c5aaSvikram /* 139825e8c5aaSvikram * Called during contract negotiation by the device contract framework to wait 139925e8c5aaSvikram * for ACKs or NACKs from contract holders. If all responses are not received 140025e8c5aaSvikram * before a specified timeout, this routine times out. 140125e8c5aaSvikram */ 140225e8c5aaSvikram static uint_t 140325e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 140425e8c5aaSvikram { 140525e8c5aaSvikram cont_device_t *ctd; 140625e8c5aaSvikram int timed_out = 0; 140725e8c5aaSvikram int result = CT_NONE; 140825e8c5aaSvikram int ack; 140925e8c5aaSvikram char *f = "wait_for_acks"; 141025e8c5aaSvikram 141125e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 141225e8c5aaSvikram ASSERT(dip); 141325e8c5aaSvikram ASSERT(evtype & CT_DEV_ALLEVENT); 141425e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 141525e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 141625e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 141725e8c5aaSvikram 141825e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 141925e8c5aaSvikram 142025e8c5aaSvikram if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 142125e8c5aaSvikram /* 142225e8c5aaSvikram * some contract owner(s) didn't respond in time 142325e8c5aaSvikram */ 142425e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 142525e8c5aaSvikram timed_out = 1; 142625e8c5aaSvikram } 142725e8c5aaSvikram 142825e8c5aaSvikram ack = 0; 142925e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 143025e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 143125e8c5aaSvikram 143225e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 143325e8c5aaSvikram 143425e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 143525e8c5aaSvikram 143625e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 143725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 143825e8c5aaSvikram continue; 143925e8c5aaSvikram } 144025e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 144125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 144225e8c5aaSvikram continue; 144325e8c5aaSvikram } 144425e8c5aaSvikram 144525e8c5aaSvikram /* skip if non-negotiable contract */ 144625e8c5aaSvikram if (ctd->cond_noneg) { 144725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 144825e8c5aaSvikram continue; 144925e8c5aaSvikram } 145025e8c5aaSvikram 145125e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 145225e8c5aaSvikram if (ctd->cond_currev_ack == CT_NACK) { 145325e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 145425e8c5aaSvikram f, (void *)dip)); 145525e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 145625e8c5aaSvikram return (CT_NACK); 145725e8c5aaSvikram } else if (ctd->cond_currev_ack == CT_ACK) { 145825e8c5aaSvikram ack = 1; 145925e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 146025e8c5aaSvikram f, (void *)dip)); 146125e8c5aaSvikram } 146225e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 146325e8c5aaSvikram } 146425e8c5aaSvikram 146525e8c5aaSvikram if (ack) { 146625e8c5aaSvikram result = CT_ACK; 146725e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 146825e8c5aaSvikram } else if (timed_out) { 146925e8c5aaSvikram result = CT_NONE; 147025e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 147125e8c5aaSvikram f, (void *)dip)); 147225e8c5aaSvikram } else { 147325e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 147425e8c5aaSvikram f, (void *)dip)); 147525e8c5aaSvikram } 147625e8c5aaSvikram 147725e8c5aaSvikram 147825e8c5aaSvikram return (result); 147925e8c5aaSvikram } 148025e8c5aaSvikram 148125e8c5aaSvikram /* 148225e8c5aaSvikram * Determines the current state of a device (i.e a devinfo node 148325e8c5aaSvikram */ 148425e8c5aaSvikram static int 148525e8c5aaSvikram get_state(dev_info_t *dip) 148625e8c5aaSvikram { 148725e8c5aaSvikram if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 148825e8c5aaSvikram return (CT_DEV_EV_OFFLINE); 148925e8c5aaSvikram else if (DEVI_IS_DEVICE_DEGRADED(dip)) 149025e8c5aaSvikram return (CT_DEV_EV_DEGRADED); 149125e8c5aaSvikram else 149225e8c5aaSvikram return (CT_DEV_EV_ONLINE); 149325e8c5aaSvikram } 149425e8c5aaSvikram 149525e8c5aaSvikram /* 149625e8c5aaSvikram * Sets the current state of a device in a device contract 149725e8c5aaSvikram */ 149825e8c5aaSvikram static void 149925e8c5aaSvikram set_cond_state(dev_info_t *dip) 150025e8c5aaSvikram { 150125e8c5aaSvikram uint_t state = get_state(dip); 150225e8c5aaSvikram cont_device_t *ctd; 150325e8c5aaSvikram 150425e8c5aaSvikram /* verify that barrier is held */ 150525e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 150625e8c5aaSvikram 150725e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 150825e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 150925e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 151025e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 151125e8c5aaSvikram ctd->cond_state = state; 151225e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 151325e8c5aaSvikram } 151425e8c5aaSvikram } 151525e8c5aaSvikram 151625e8c5aaSvikram /* 151725e8c5aaSvikram * Core routine called by event-specific routines when an event occurs. 151825e8c5aaSvikram * Determines if an event should be be published, and if it is to be 151925e8c5aaSvikram * published, whether a negotiation should take place. Also implements 152025e8c5aaSvikram * NEGEND events which publish the final disposition of an event after 152125e8c5aaSvikram * negotiations are complete. 152225e8c5aaSvikram * 152325e8c5aaSvikram * When an event occurs on a minor node, this routine walks the list of 152425e8c5aaSvikram * contracts hanging off a devinfo node and for each contract on the affected 152525e8c5aaSvikram * dip, evaluates the following cases 152625e8c5aaSvikram * 152725e8c5aaSvikram * a. an event that is synchronous, breaks the contract and NONEG not set 152825e8c5aaSvikram * - bumps up the outstanding negotiation counts on the dip 152925e8c5aaSvikram * - marks the dip as undergoing negotiation (devi_ct_neg) 153025e8c5aaSvikram * - event of type CTE_NEG is published 153125e8c5aaSvikram * b. an event that is synchronous, breaks the contract and NONEG is set 153225e8c5aaSvikram * - sets the final result to CT_NACK, event is blocked 153325e8c5aaSvikram * - does not publish an event 153425e8c5aaSvikram * c. event is asynchronous and breaks the contract 153525e8c5aaSvikram * - publishes a critical event irrespect of whether the NONEG 153625e8c5aaSvikram * flag is set, since the contract will be broken and contract 153725e8c5aaSvikram * owner needs to be informed. 153825e8c5aaSvikram * d. No contract breakage but the owner has subscribed to the event 153925e8c5aaSvikram * - publishes the event irrespective of the NONEG event as the 154025e8c5aaSvikram * owner has explicitly subscribed to the event. 154125e8c5aaSvikram * e. NEGEND event 154225e8c5aaSvikram * - publishes a critical event. Should only be doing this if 154325e8c5aaSvikram * if NONEG is not set. 154425e8c5aaSvikram * f. all other events 154525e8c5aaSvikram * - Since a contract is not broken and this event has not been 154625e8c5aaSvikram * subscribed to, this event does not need to be published for 154725e8c5aaSvikram * for this contract. 154825e8c5aaSvikram * 154925e8c5aaSvikram * Once an event is published, what happens next depends on the type of 155025e8c5aaSvikram * event: 155125e8c5aaSvikram * 155225e8c5aaSvikram * a. NEGEND event 155325e8c5aaSvikram * - cleanup all state associated with the preceding negotiation 155425e8c5aaSvikram * and return CT_ACK to the caller of contract_device_publish() 155525e8c5aaSvikram * b. NACKed event 155625e8c5aaSvikram * - One or more contracts had the NONEG term, so the event was 155725e8c5aaSvikram * blocked. Return CT_NACK to the caller. 155825e8c5aaSvikram * c. Negotiated event 155925e8c5aaSvikram * - Call wait_for_acks() to wait for responses from contract 156025e8c5aaSvikram * holders. The end result is either CT_ACK (event is permitted), 156125e8c5aaSvikram * CT_NACK (event is blocked) or CT_NONE (no contract owner) 156225e8c5aaSvikram * responded. This result is returned back to the caller. 156325e8c5aaSvikram * d. All other events 156425e8c5aaSvikram * - If the event was asynchronous (i.e. not negotiated) or 156525e8c5aaSvikram * a contract was not broken return CT_ACK to the caller. 156625e8c5aaSvikram */ 156725e8c5aaSvikram static uint_t 156825e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 156925e8c5aaSvikram uint_t evtype, nvlist_t *tnvl) 157025e8c5aaSvikram { 157125e8c5aaSvikram cont_device_t *ctd; 157225e8c5aaSvikram uint_t result = CT_NONE; 157325e8c5aaSvikram uint64_t evid = 0; 157425e8c5aaSvikram uint64_t nevid = 0; 157525e8c5aaSvikram char *path = NULL; 157625e8c5aaSvikram int negend; 157725e8c5aaSvikram int match; 157825e8c5aaSvikram int sync = 0; 157925e8c5aaSvikram contract_t *ct; 158025e8c5aaSvikram ct_kevent_t *event; 158125e8c5aaSvikram nvlist_t *nvl; 158225e8c5aaSvikram int broken = 0; 158325e8c5aaSvikram 158425e8c5aaSvikram ASSERT(dip); 158525e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 158625e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 158725e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 158825e8c5aaSvikram ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 158925e8c5aaSvikram 159025e8c5aaSvikram /* Is this a synchronous state change ? */ 159125e8c5aaSvikram if (evtype != CT_EV_NEGEND) { 159225e8c5aaSvikram sync = is_sync_neg(get_state(dip), evtype); 159325e8c5aaSvikram /* NOP if unsupported transition */ 159425e8c5aaSvikram if (sync == -2 || sync == -1) { 159525e8c5aaSvikram DEVI(dip)->devi_flags |= DEVI_CT_NOP; 159625e8c5aaSvikram result = (sync == -2) ? CT_ACK : CT_NONE; 159725e8c5aaSvikram goto out; 159825e8c5aaSvikram } 159925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 160025e8c5aaSvikram sync ? "" : " not")); 160125e8c5aaSvikram } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 160225e8c5aaSvikram DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 160325e8c5aaSvikram result = CT_ACK; 160425e8c5aaSvikram goto out; 160525e8c5aaSvikram } 160625e8c5aaSvikram 160725e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 160825e8c5aaSvikram (void) ddi_pathname(dip, path); 160925e8c5aaSvikram 161025e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 161125e8c5aaSvikram 161225e8c5aaSvikram /* 161325e8c5aaSvikram * Negotiation end - set the state of the device in the contract 161425e8c5aaSvikram */ 161525e8c5aaSvikram if (evtype == CT_EV_NEGEND) { 161625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 161725e8c5aaSvikram set_cond_state(dip); 161825e8c5aaSvikram } 161925e8c5aaSvikram 162025e8c5aaSvikram /* 162125e8c5aaSvikram * If this device didn't go through negotiation, don't publish 162225e8c5aaSvikram * a NEGEND event - simply release the barrier to allow other 162325e8c5aaSvikram * device events in. 162425e8c5aaSvikram */ 162525e8c5aaSvikram negend = 0; 162625e8c5aaSvikram if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 162725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 162825e8c5aaSvikram ct_barrier_release(dip); 162925e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 163025e8c5aaSvikram result = CT_ACK; 163125e8c5aaSvikram goto out; 163225e8c5aaSvikram } else if (evtype == CT_EV_NEGEND) { 163325e8c5aaSvikram /* 163425e8c5aaSvikram * There are negotiated contract breakages that 163525e8c5aaSvikram * need a NEGEND event 163625e8c5aaSvikram */ 163725e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 163825e8c5aaSvikram negend = 1; 163925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 164025e8c5aaSvikram } else { 164125e8c5aaSvikram /* 164225e8c5aaSvikram * This is a new event, not a NEGEND event. Wait for previous 164325e8c5aaSvikram * contract events to complete. 164425e8c5aaSvikram */ 164525e8c5aaSvikram ct_barrier_acquire(dip); 164625e8c5aaSvikram } 164725e8c5aaSvikram 164825e8c5aaSvikram 164925e8c5aaSvikram match = 0; 165025e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 165125e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 165225e8c5aaSvikram 165325e8c5aaSvikram ctid_t ctid; 165425e8c5aaSvikram size_t len = strlen(path); 165525e8c5aaSvikram 165625e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 165725e8c5aaSvikram 165825e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 165925e8c5aaSvikram ASSERT(ctd->cond_minor); 166025e8c5aaSvikram ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 166125e8c5aaSvikram ctd->cond_minor[len] == ':'); 166225e8c5aaSvikram 166325e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 166425e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 166525e8c5aaSvikram continue; 166625e8c5aaSvikram } 166725e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 166825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 166925e8c5aaSvikram continue; 167025e8c5aaSvikram } 167125e8c5aaSvikram 167225e8c5aaSvikram /* We have a matching contract */ 167325e8c5aaSvikram match = 1; 167425e8c5aaSvikram ctid = ctd->cond_contract.ct_id; 167525e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 167625e8c5aaSvikram ctid)); 167725e8c5aaSvikram 167825e8c5aaSvikram /* 167925e8c5aaSvikram * There are 4 possible cases 168025e8c5aaSvikram * 1. A contract is broken (dev not in acceptable state) and 168125e8c5aaSvikram * the state change is synchronous - start negotiation 168225e8c5aaSvikram * by sending a CTE_NEG critical event. 168325e8c5aaSvikram * 2. A contract is broken and the state change is 168425e8c5aaSvikram * asynchronous - just send a critical event and 168525e8c5aaSvikram * break the contract. 168625e8c5aaSvikram * 3. Contract is not broken, but consumer has subscribed 168725e8c5aaSvikram * to the event as a critical or informative event 168825e8c5aaSvikram * - just send the appropriate event 168925e8c5aaSvikram * 4. contract waiting for negend event - just send the critical 169025e8c5aaSvikram * NEGEND event. 169125e8c5aaSvikram */ 169225e8c5aaSvikram broken = 0; 169325e8c5aaSvikram if (!negend && !(evtype & ctd->cond_aset)) { 169425e8c5aaSvikram broken = 1; 169525e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 169625e8c5aaSvikram ctid)); 169725e8c5aaSvikram } 169825e8c5aaSvikram 169925e8c5aaSvikram /* 170025e8c5aaSvikram * Don't send event if 170125e8c5aaSvikram * - contract is not broken AND 170225e8c5aaSvikram * - contract holder has not subscribed to this event AND 170325e8c5aaSvikram * - contract not waiting for a NEGEND event 170425e8c5aaSvikram */ 170525e8c5aaSvikram if (!broken && !EVSENDP(ctd, evtype) && 170625e8c5aaSvikram !ctd->cond_neg) { 170725e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_publish(): " 170825e8c5aaSvikram "contract (%d): no publish reqd: event %d", 170925e8c5aaSvikram ctd->cond_contract.ct_id, evtype)); 171025e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 171125e8c5aaSvikram continue; 171225e8c5aaSvikram } 171325e8c5aaSvikram 171425e8c5aaSvikram /* 171525e8c5aaSvikram * Note: need to kmem_zalloc() the event so mutexes are 171625e8c5aaSvikram * initialized automatically 171725e8c5aaSvikram */ 171825e8c5aaSvikram ct = &ctd->cond_contract; 171925e8c5aaSvikram event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 172025e8c5aaSvikram event->cte_type = evtype; 172125e8c5aaSvikram 172225e8c5aaSvikram if (broken && sync) { 172325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + sync: " 172425e8c5aaSvikram "ctid: %d", ctid)); 172525e8c5aaSvikram ASSERT(!negend); 172625e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 172725e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 172825e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 172925e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 173025e8c5aaSvikram if (ctd->cond_noneg) { 173125e8c5aaSvikram /* Nothing to publish. Event has been blocked */ 173225e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 173325e8c5aaSvikram "not publishing blocked ev: ctid: %d", 173425e8c5aaSvikram ctid)); 173525e8c5aaSvikram result = CT_NACK; 173625e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 173725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 173825e8c5aaSvikram continue; 173925e8c5aaSvikram } 174025e8c5aaSvikram event->cte_flags = CTE_NEG; /* critical neg. event */ 174125e8c5aaSvikram ctd->cond_currev_type = event->cte_type; 174225e8c5aaSvikram ct_barrier_incr(dip); 174325e8c5aaSvikram DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 174425e8c5aaSvikram ctd->cond_neg = 1; 174525e8c5aaSvikram } else if (broken && !sync) { 174625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 174725e8c5aaSvikram ctid)); 174825e8c5aaSvikram ASSERT(!negend); 174925e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 175025e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 175125e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 175225e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 175325e8c5aaSvikram event->cte_flags = 0; /* critical event */ 175425e8c5aaSvikram } else if (EVSENDP(ctd, event->cte_type)) { 175525e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 175625e8c5aaSvikram ctid)); 175725e8c5aaSvikram ASSERT(!negend); 175825e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 175925e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 176025e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 176125e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 176225e8c5aaSvikram event->cte_flags = EVINFOP(ctd, event->cte_type) ? 176325e8c5aaSvikram CTE_INFO : 0; 176425e8c5aaSvikram } else if (ctd->cond_neg) { 176525e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 176625e8c5aaSvikram ASSERT(negend); 176725e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 176825e8c5aaSvikram nevid = ctd->cond_contract.ct_nevent ? 176925e8c5aaSvikram ctd->cond_contract.ct_nevent->cte_id : 0; 177025e8c5aaSvikram ASSERT(ctd->cond_currev_id == nevid); 177125e8c5aaSvikram event->cte_flags = 0; /* NEGEND is always critical */ 177225e8c5aaSvikram ctd->cond_currev_id = 0; 177325e8c5aaSvikram ctd->cond_currev_type = 0; 177425e8c5aaSvikram ctd->cond_currev_ack = 0; 177525e8c5aaSvikram ctd->cond_neg = 0; 177625e8c5aaSvikram } else { 177725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: not publishing event for " 177825e8c5aaSvikram "ctid: %d, evtype: %d", 177925e8c5aaSvikram ctd->cond_contract.ct_id, event->cte_type)); 178025e8c5aaSvikram ASSERT(!negend); 178125e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 178225e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 178325e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 178425e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 178525e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 178625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 178725e8c5aaSvikram continue; 178825e8c5aaSvikram } 178925e8c5aaSvikram 179025e8c5aaSvikram nvl = NULL; 179125e8c5aaSvikram if (tnvl) { 179225e8c5aaSvikram VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 179325e8c5aaSvikram if (negend) { 179425e8c5aaSvikram int32_t newct = 0; 179525e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 179625e8c5aaSvikram VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 179725e8c5aaSvikram == 0); 179825e8c5aaSvikram VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 179925e8c5aaSvikram &newct) == 0); 180025e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 180125e8c5aaSvikram newct == 1 ? 0 : 180225e8c5aaSvikram ctd->cond_contract.ct_id) == 0); 180325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 180425e8c5aaSvikram "CTS_NEVID: %llu, CTS_NEWCT: %s", 180525e8c5aaSvikram ctid, (unsigned long long)nevid, 180625e8c5aaSvikram newct ? "success" : "failure")); 180725e8c5aaSvikram 180825e8c5aaSvikram } 180925e8c5aaSvikram } 181025e8c5aaSvikram 181125e8c5aaSvikram if (ctd->cond_neg) { 181225e8c5aaSvikram ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 181325e8c5aaSvikram ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 181425e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 181525e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = 181625e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start; 181725e8c5aaSvikram } 181825e8c5aaSvikram 181925e8c5aaSvikram /* 182025e8c5aaSvikram * by holding the dip's devi_ct_lock we ensure that 182125e8c5aaSvikram * all ACK/NACKs are held up until we have finished 182225e8c5aaSvikram * publishing to all contracts. 182325e8c5aaSvikram */ 182425e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 182525e8c5aaSvikram evid = cte_publish_all(ct, event, nvl, NULL); 182625e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 182725e8c5aaSvikram 182825e8c5aaSvikram if (ctd->cond_neg) { 182925e8c5aaSvikram ASSERT(!negend); 183025e8c5aaSvikram ASSERT(broken); 183125e8c5aaSvikram ASSERT(sync); 183225e8c5aaSvikram ASSERT(!ctd->cond_noneg); 183325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 183425e8c5aaSvikram ": %d", ctid)); 183525e8c5aaSvikram ctd->cond_currev_id = evid; 183625e8c5aaSvikram } else if (negend) { 183725e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 183825e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 183925e8c5aaSvikram } 184025e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 184125e8c5aaSvikram } 184225e8c5aaSvikram 184325e8c5aaSvikram /* 184425e8c5aaSvikram * If "negend" set counter back to initial state (-1) so that 184525e8c5aaSvikram * other events can be published. Also clear the negotiation flag 184625e8c5aaSvikram * on dip. 184725e8c5aaSvikram * 184825e8c5aaSvikram * 0 .. n are used for counting. 184925e8c5aaSvikram * -1 indicates counter is available for use. 185025e8c5aaSvikram */ 185125e8c5aaSvikram if (negend) { 185225e8c5aaSvikram /* 185325e8c5aaSvikram * devi_ct_count not necessarily 0. We may have 185425e8c5aaSvikram * timed out in which case, count will be non-zero. 185525e8c5aaSvikram */ 185625e8c5aaSvikram ct_barrier_release(dip); 185725e8c5aaSvikram DEVI(dip)->devi_ct_neg = 0; 185825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 185925e8c5aaSvikram (void *)dip)); 186025e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 186125e8c5aaSvikram ASSERT(match); 186225e8c5aaSvikram ASSERT(!ct_barrier_empty(dip)); 186325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 186425e8c5aaSvikram DEVI(dip)->devi_ct_count, (void *)dip)); 186525e8c5aaSvikram } else { 186625e8c5aaSvikram /* 186725e8c5aaSvikram * for non-negotiated events or subscribed events or no 186825e8c5aaSvikram * matching contracts 186925e8c5aaSvikram */ 187025e8c5aaSvikram ASSERT(ct_barrier_empty(dip)); 187125e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_neg == 0); 187225e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 187325e8c5aaSvikram "dip=%p", (void *)dip)); 187425e8c5aaSvikram 187525e8c5aaSvikram /* 187625e8c5aaSvikram * only this function when called from contract_device_negend() 187725e8c5aaSvikram * can reset the counter to READY state i.e. -1. This function 187825e8c5aaSvikram * is so called for every event whether a NEGEND event is needed 187925e8c5aaSvikram * or not, but the negend event is only published if the event 188025e8c5aaSvikram * whose end they signal is a negotiated event for the contract. 188125e8c5aaSvikram */ 188225e8c5aaSvikram } 188325e8c5aaSvikram 188425e8c5aaSvikram if (!match) { 188525e8c5aaSvikram /* No matching contracts */ 188625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: No matching contract")); 188725e8c5aaSvikram result = CT_NONE; 188825e8c5aaSvikram } else if (result == CT_NACK) { 188925e8c5aaSvikram /* a non-negotiable contract exists and this is a neg. event */ 189025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 189125e8c5aaSvikram (void) wait_for_acks(dip, dev, spec_type, evtype); 189225e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 189325e8c5aaSvikram /* one or more contracts going through negotations */ 189425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 189525e8c5aaSvikram result = wait_for_acks(dip, dev, spec_type, evtype); 189625e8c5aaSvikram } else { 189725e8c5aaSvikram /* no negotiated contracts or no broken contracts or NEGEND */ 189825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 189925e8c5aaSvikram result = CT_ACK; 190025e8c5aaSvikram } 190125e8c5aaSvikram 190225e8c5aaSvikram /* 190325e8c5aaSvikram * Release the lock only now so that the only point where we 190425e8c5aaSvikram * drop the lock is in wait_for_acks(). This is so that we don't 190525e8c5aaSvikram * miss cv_signal/cv_broadcast from contract holders 190625e8c5aaSvikram */ 190725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 190825e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 190925e8c5aaSvikram 191025e8c5aaSvikram out: 191125e8c5aaSvikram if (tnvl) 191225e8c5aaSvikram nvlist_free(tnvl); 191325e8c5aaSvikram if (path) 191425e8c5aaSvikram kmem_free(path, MAXPATHLEN); 191525e8c5aaSvikram 191625e8c5aaSvikram 191725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 191825e8c5aaSvikram return (result); 191925e8c5aaSvikram } 192025e8c5aaSvikram 192125e8c5aaSvikram 192225e8c5aaSvikram /* 192325e8c5aaSvikram * contract_device_offline 192425e8c5aaSvikram * 192525e8c5aaSvikram * Event publishing routine called by I/O framework when a device is offlined. 192625e8c5aaSvikram */ 192725e8c5aaSvikram ct_ack_t 192825e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 192925e8c5aaSvikram { 193025e8c5aaSvikram nvlist_t *nvl; 193125e8c5aaSvikram uint_t result; 193225e8c5aaSvikram uint_t evtype; 193325e8c5aaSvikram 193425e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 193525e8c5aaSvikram 193625e8c5aaSvikram evtype = CT_DEV_EV_OFFLINE; 193725e8c5aaSvikram result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 193825e8c5aaSvikram 193925e8c5aaSvikram /* 194025e8c5aaSvikram * If a contract offline is NACKED, the framework expects us to call 194125e8c5aaSvikram * NEGEND ourselves, since we know the final result 194225e8c5aaSvikram */ 194325e8c5aaSvikram if (result == CT_NACK) { 194425e8c5aaSvikram contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 194525e8c5aaSvikram } 194625e8c5aaSvikram 194725e8c5aaSvikram return (result); 194825e8c5aaSvikram } 194925e8c5aaSvikram 195025e8c5aaSvikram /* 195125e8c5aaSvikram * contract_device_degrade 195225e8c5aaSvikram * 195325e8c5aaSvikram * Event publishing routine called by I/O framework when a device 195425e8c5aaSvikram * moves to degrade state. 195525e8c5aaSvikram */ 195625e8c5aaSvikram /*ARGSUSED*/ 195725e8c5aaSvikram void 195825e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 195925e8c5aaSvikram { 196025e8c5aaSvikram nvlist_t *nvl; 196125e8c5aaSvikram uint_t evtype; 196225e8c5aaSvikram 196325e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 196425e8c5aaSvikram 196525e8c5aaSvikram evtype = CT_DEV_EV_DEGRADED; 196625e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 196725e8c5aaSvikram } 196825e8c5aaSvikram 196925e8c5aaSvikram /* 197025e8c5aaSvikram * contract_device_undegrade 197125e8c5aaSvikram * 197225e8c5aaSvikram * Event publishing routine called by I/O framework when a device 197325e8c5aaSvikram * moves from degraded state to online state. 197425e8c5aaSvikram */ 197525e8c5aaSvikram /*ARGSUSED*/ 197625e8c5aaSvikram void 197725e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 197825e8c5aaSvikram { 197925e8c5aaSvikram nvlist_t *nvl; 198025e8c5aaSvikram uint_t evtype; 198125e8c5aaSvikram 198225e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 198325e8c5aaSvikram 198425e8c5aaSvikram evtype = CT_DEV_EV_ONLINE; 198525e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 198625e8c5aaSvikram } 198725e8c5aaSvikram 198825e8c5aaSvikram /* 198925e8c5aaSvikram * For all contracts which have undergone a negotiation (because the device 199025e8c5aaSvikram * moved out of the acceptable state for that contract and the state 199125e8c5aaSvikram * change is synchronous i.e. requires negotiation) this routine publishes 199225e8c5aaSvikram * a CT_EV_NEGEND event with the final disposition of the event. 199325e8c5aaSvikram * 199425e8c5aaSvikram * This event is always a critical event. 199525e8c5aaSvikram */ 199625e8c5aaSvikram void 199725e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 199825e8c5aaSvikram { 199925e8c5aaSvikram nvlist_t *nvl; 200025e8c5aaSvikram uint_t evtype; 200125e8c5aaSvikram 200225e8c5aaSvikram ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 200325e8c5aaSvikram 200425e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 200525e8c5aaSvikram "dip: %p", result, (void *)dip)); 200625e8c5aaSvikram 200725e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 200825e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 200925e8c5aaSvikram result == CT_EV_SUCCESS ? 1 : 0) == 0); 201025e8c5aaSvikram 201125e8c5aaSvikram evtype = CT_EV_NEGEND; 201225e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 201325e8c5aaSvikram 201425e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 201525e8c5aaSvikram (void *)dip)); 201625e8c5aaSvikram } 201725e8c5aaSvikram 201825e8c5aaSvikram /* 201925e8c5aaSvikram * Wrapper routine called by other subsystems (such as LDI) to start 202025e8c5aaSvikram * negotiations when a synchronous device state change occurs. 202125e8c5aaSvikram * Returns CT_ACK or CT_NACK. 202225e8c5aaSvikram */ 202325e8c5aaSvikram ct_ack_t 202425e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 202525e8c5aaSvikram uint_t evtype) 202625e8c5aaSvikram { 202725e8c5aaSvikram int result; 202825e8c5aaSvikram 202925e8c5aaSvikram ASSERT(dip); 203025e8c5aaSvikram ASSERT(dev != NODEV); 203125e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 203225e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 203325e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 203425e8c5aaSvikram 203525e8c5aaSvikram switch (evtype) { 203625e8c5aaSvikram case CT_DEV_EV_OFFLINE: 203725e8c5aaSvikram result = contract_device_offline(dip, dev, spec_type); 203825e8c5aaSvikram break; 203925e8c5aaSvikram default: 204025e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 204125e8c5aaSvikram "not supported: event (%d) for dev_t (%lu) and spec (%d), " 204225e8c5aaSvikram "dip (%p)", evtype, dev, spec_type, (void *)dip); 204325e8c5aaSvikram result = CT_NACK; 204425e8c5aaSvikram break; 204525e8c5aaSvikram } 204625e8c5aaSvikram 204725e8c5aaSvikram return (result); 204825e8c5aaSvikram } 204925e8c5aaSvikram 205025e8c5aaSvikram /* 205125e8c5aaSvikram * A wrapper routine called by other subsystems (such as the LDI) to 205225e8c5aaSvikram * finalize event processing for a state change event. For synchronous 205325e8c5aaSvikram * state changes, this publishes NEGEND events. For asynchronous i.e. 205425e8c5aaSvikram * non-negotiable events this publishes the event. 205525e8c5aaSvikram */ 205625e8c5aaSvikram void 205725e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 205825e8c5aaSvikram uint_t evtype, int ct_result) 205925e8c5aaSvikram { 206025e8c5aaSvikram ASSERT(dip); 206125e8c5aaSvikram ASSERT(dev != NODEV); 206225e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 206325e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 206425e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 206525e8c5aaSvikram 206625e8c5aaSvikram switch (evtype) { 206725e8c5aaSvikram case CT_DEV_EV_OFFLINE: 206825e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 206925e8c5aaSvikram break; 207025e8c5aaSvikram case CT_DEV_EV_DEGRADED: 207125e8c5aaSvikram contract_device_degrade(dip, dev, spec_type); 207225e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 207325e8c5aaSvikram break; 207425e8c5aaSvikram case CT_DEV_EV_ONLINE: 207525e8c5aaSvikram contract_device_undegrade(dip, dev, spec_type); 207625e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 207725e8c5aaSvikram break; 207825e8c5aaSvikram default: 207925e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 208025e8c5aaSvikram "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 208125e8c5aaSvikram evtype, dev, spec_type, (void *)dip); 208225e8c5aaSvikram break; 208325e8c5aaSvikram } 208425e8c5aaSvikram } 208525e8c5aaSvikram 208625e8c5aaSvikram /* 208725e8c5aaSvikram * Called by I/O framework when a devinfo node is freed to remove the 208825e8c5aaSvikram * association between a devinfo node and its contracts. 208925e8c5aaSvikram */ 209025e8c5aaSvikram void 209125e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip) 209225e8c5aaSvikram { 209325e8c5aaSvikram cont_device_t *ctd; 209425e8c5aaSvikram cont_device_t *next; 209525e8c5aaSvikram contract_t *ct; 209625e8c5aaSvikram 209725e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 209825e8c5aaSvikram ct_barrier_wait_for_release(dip); 209925e8c5aaSvikram 210025e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 210125e8c5aaSvikram next = list_next(&(DEVI(dip)->devi_ct), ctd); 210225e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 210325e8c5aaSvikram ct = &ctd->cond_contract; 210425e8c5aaSvikram /* 210525e8c5aaSvikram * Unlink the dip associated with this contract 210625e8c5aaSvikram */ 210725e8c5aaSvikram mutex_enter(&ct->ct_lock); 210825e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 210925e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 211025e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 211125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 211225e8c5aaSvikram "ctid: %d", ct->ct_id)); 211325e8c5aaSvikram mutex_exit(&ct->ct_lock); 211425e8c5aaSvikram } 211525e8c5aaSvikram ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 211625e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 211725e8c5aaSvikram } 211825e8c5aaSvikram 211925e8c5aaSvikram /* 212025e8c5aaSvikram * Barrier related routines 212125e8c5aaSvikram */ 212225e8c5aaSvikram static void 212325e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip) 212425e8c5aaSvikram { 212525e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 212625e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 212725e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 212825e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 212925e8c5aaSvikram DEVI(dip)->devi_ct_count = 0; 213025e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 213125e8c5aaSvikram } 213225e8c5aaSvikram 213325e8c5aaSvikram static void 213425e8c5aaSvikram ct_barrier_release(dev_info_t *dip) 213525e8c5aaSvikram { 213625e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 213725e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 213825e8c5aaSvikram DEVI(dip)->devi_ct_count = -1; 213925e8c5aaSvikram cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 214025e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 214125e8c5aaSvikram } 214225e8c5aaSvikram 214325e8c5aaSvikram static int 214425e8c5aaSvikram ct_barrier_held(dev_info_t *dip) 214525e8c5aaSvikram { 214625e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 214725e8c5aaSvikram return (DEVI(dip)->devi_ct_count != -1); 214825e8c5aaSvikram } 214925e8c5aaSvikram 215025e8c5aaSvikram static int 215125e8c5aaSvikram ct_barrier_empty(dev_info_t *dip) 215225e8c5aaSvikram { 215325e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 215425e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 215525e8c5aaSvikram return (DEVI(dip)->devi_ct_count == 0); 215625e8c5aaSvikram } 215725e8c5aaSvikram 215825e8c5aaSvikram static void 215925e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip) 216025e8c5aaSvikram { 216125e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 216225e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 216325e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 216425e8c5aaSvikram } 216525e8c5aaSvikram 216625e8c5aaSvikram static void 216725e8c5aaSvikram ct_barrier_decr(dev_info_t *dip) 216825e8c5aaSvikram { 216925e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 217025e8c5aaSvikram DEVI(dip)->devi_ct_count)); 217125e8c5aaSvikram 217225e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 217325e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count > 0); 217425e8c5aaSvikram 217525e8c5aaSvikram DEVI(dip)->devi_ct_count--; 217625e8c5aaSvikram if (DEVI(dip)->devi_ct_count == 0) { 217725e8c5aaSvikram cv_broadcast(&DEVI(dip)->devi_ct_cv); 217825e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 217925e8c5aaSvikram } 218025e8c5aaSvikram } 218125e8c5aaSvikram 218225e8c5aaSvikram static void 218325e8c5aaSvikram ct_barrier_incr(dev_info_t *dip) 218425e8c5aaSvikram { 218525e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 218625e8c5aaSvikram DEVI(dip)->devi_ct_count++; 218725e8c5aaSvikram } 218825e8c5aaSvikram 218925e8c5aaSvikram static int 219025e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 219125e8c5aaSvikram { 219225e8c5aaSvikram clock_t abstime; 219325e8c5aaSvikram 219425e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 219525e8c5aaSvikram 219625e8c5aaSvikram abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 219725e8c5aaSvikram while (DEVI(dip)->devi_ct_count) { 219825e8c5aaSvikram if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 219925e8c5aaSvikram &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 220025e8c5aaSvikram return (-1); 220125e8c5aaSvikram } 220225e8c5aaSvikram } 220325e8c5aaSvikram return (0); 220425e8c5aaSvikram } 2205