125e8c5aaSvikram /* 225e8c5aaSvikram * CDDL HEADER START 325e8c5aaSvikram * 425e8c5aaSvikram * The contents of this file are subject to the terms of the 525e8c5aaSvikram * Common Development and Distribution License (the "License"). 625e8c5aaSvikram * You may not use this file except in compliance with the License. 725e8c5aaSvikram * 825e8c5aaSvikram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925e8c5aaSvikram * or http://www.opensolaris.org/os/licensing. 1025e8c5aaSvikram * See the License for the specific language governing permissions 1125e8c5aaSvikram * and limitations under the License. 1225e8c5aaSvikram * 1325e8c5aaSvikram * When distributing Covered Code, include this CDDL HEADER in each 1425e8c5aaSvikram * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525e8c5aaSvikram * If applicable, add the following below this CDDL HEADER, with the 1625e8c5aaSvikram * fields enclosed by brackets "[]" replaced with your own identifying 1725e8c5aaSvikram * information: Portions Copyright [yyyy] [name of copyright owner] 1825e8c5aaSvikram * 1925e8c5aaSvikram * CDDL HEADER END 2025e8c5aaSvikram */ 2125e8c5aaSvikram /* 227b209c2cSacruz * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2325e8c5aaSvikram * Use is subject to license terms. 2425e8c5aaSvikram */ 2525e8c5aaSvikram 2625e8c5aaSvikram #include <sys/mutex.h> 2725e8c5aaSvikram #include <sys/debug.h> 2825e8c5aaSvikram #include <sys/types.h> 2925e8c5aaSvikram #include <sys/param.h> 3025e8c5aaSvikram #include <sys/kmem.h> 3125e8c5aaSvikram #include <sys/thread.h> 3225e8c5aaSvikram #include <sys/id_space.h> 3325e8c5aaSvikram #include <sys/avl.h> 3425e8c5aaSvikram #include <sys/list.h> 3525e8c5aaSvikram #include <sys/sysmacros.h> 3625e8c5aaSvikram #include <sys/proc.h> 3725e8c5aaSvikram #include <sys/contract.h> 3825e8c5aaSvikram #include <sys/contract_impl.h> 3925e8c5aaSvikram #include <sys/contract/device.h> 4025e8c5aaSvikram #include <sys/contract/device_impl.h> 4125e8c5aaSvikram #include <sys/cmn_err.h> 4225e8c5aaSvikram #include <sys/nvpair.h> 4325e8c5aaSvikram #include <sys/policy.h> 4425e8c5aaSvikram #include <sys/ddi_impldefs.h> 4525e8c5aaSvikram #include <sys/ddi_implfuncs.h> 4625e8c5aaSvikram #include <sys/systm.h> 4725e8c5aaSvikram #include <sys/stat.h> 4825e8c5aaSvikram #include <sys/sunddi.h> 4925e8c5aaSvikram #include <sys/esunddi.h> 5025e8c5aaSvikram #include <sys/ddi.h> 5125e8c5aaSvikram #include <sys/fs/dv_node.h> 5225e8c5aaSvikram #include <sys/sunndi.h> 5325e8c5aaSvikram #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 5425e8c5aaSvikram 5525e8c5aaSvikram /* 5625e8c5aaSvikram * Device Contracts 5725e8c5aaSvikram * ----------------- 5825e8c5aaSvikram * This file contains the core code for the device contracts framework. 5925e8c5aaSvikram * A device contract is an agreement or a contract between a process and 6025e8c5aaSvikram * the kernel regarding the state of the device. A device contract may be 6125e8c5aaSvikram * created when a relationship is formed between a device and a process 6225e8c5aaSvikram * i.e. at open(2) time, or it may be created at some point after the device 6325e8c5aaSvikram * has been opened. A device contract once formed may be broken by either party. 6425e8c5aaSvikram * A device contract can be broken by the process by an explicit abandon of the 6525e8c5aaSvikram * contract or by an implicit abandon when the process exits. A device contract 6625e8c5aaSvikram * can be broken by the kernel either asynchronously (without negotiation) or 6725e8c5aaSvikram * synchronously (with negotiation). Exactly which happens depends on the device 6825e8c5aaSvikram * state transition. The following state diagram shows the transitions between 6925e8c5aaSvikram * device states. Only device state transitions currently supported by device 7025e8c5aaSvikram * contracts is shown. 7125e8c5aaSvikram * 7225e8c5aaSvikram * <-- A --> 7325e8c5aaSvikram * /-----------------> DEGRADED 7425e8c5aaSvikram * | | 7525e8c5aaSvikram * | | 7625e8c5aaSvikram * | | S 7725e8c5aaSvikram * | | | 7825e8c5aaSvikram * | | v 7925e8c5aaSvikram * v S --> v 8025e8c5aaSvikram * ONLINE ------------> OFFLINE 8125e8c5aaSvikram * 8225e8c5aaSvikram * 8325e8c5aaSvikram * In the figure above, the arrows indicate the direction of transition. The 8425e8c5aaSvikram * letter S refers to transitions which are inherently synchronous i.e. 8525e8c5aaSvikram * require negotiation and the letter A indicates transitions which are 8625e8c5aaSvikram * asynchronous i.e. are done without contract negotiations. A good example 8725e8c5aaSvikram * of a synchronous transition is the ONLINE -> OFFLINE transition. This 8825e8c5aaSvikram * transition cannot happen as long as there are consumers which have the 8925e8c5aaSvikram * device open. Thus some form of negotiation needs to happen between the 9025e8c5aaSvikram * consumers and the kernel to ensure that consumers either close devices 9125e8c5aaSvikram * or disallow the move to OFFLINE. Certain other transitions such as 9225e8c5aaSvikram * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 9325e8c5aaSvikram * non-negotiable. A device that suffers a fault that degrades its 9425e8c5aaSvikram * capabilities will become degraded irrespective of what consumers it has, 9525e8c5aaSvikram * so a negotiation in this case is pointless. 9625e8c5aaSvikram * 9725e8c5aaSvikram * The following device states are currently defined for device contracts: 9825e8c5aaSvikram * 9925e8c5aaSvikram * CT_DEV_EV_ONLINE 10025e8c5aaSvikram * The device is online and functioning normally 10125e8c5aaSvikram * CT_DEV_EV_DEGRADED 10225e8c5aaSvikram * The device is online but is functioning in a degraded capacity 10325e8c5aaSvikram * CT_DEV_EV_OFFLINE 10425e8c5aaSvikram * The device is offline and is no longer configured 10525e8c5aaSvikram * 10625e8c5aaSvikram * A typical consumer of device contracts starts out with a contract 10725e8c5aaSvikram * template and adds terms to that template. These include the 10825e8c5aaSvikram * "acceptable set" (A-set) term, which is a bitset of device states which 10925e8c5aaSvikram * are guaranteed by the contract. If the device moves out of a state in 11025e8c5aaSvikram * the A-set, the contract is broken. The breaking of the contract can 11125e8c5aaSvikram * be asynchronous in which case a critical contract event is sent to the 11225e8c5aaSvikram * contract holder but no negotiations take place. If the breaking of the 11325e8c5aaSvikram * contract is synchronous, negotations are opened between the affected 11425e8c5aaSvikram * consumer and the kernel. The kernel does this by sending a critical 11525e8c5aaSvikram * event to the consumer with the CTE_NEG flag set indicating that this 11625e8c5aaSvikram * is a negotiation event. The consumer can accept this change by sending 11725e8c5aaSvikram * a ACK message to the kernel. Alternatively, if it has the necessary 11825e8c5aaSvikram * privileges, it can send a NACK message to the kernel which will block 11925e8c5aaSvikram * the device state change. To NACK a negotiable event, a process must 12025e8c5aaSvikram * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 12125e8c5aaSvikram * 12225e8c5aaSvikram * Other terms include the "minor path" term, specified explicitly if the 12325e8c5aaSvikram * contract is not being created at open(2) time or specified implicitly 12425e8c5aaSvikram * if the contract is being created at open time via an activated template. 12525e8c5aaSvikram * 12625e8c5aaSvikram * A contract event is sent on any state change to which the contract 12725e8c5aaSvikram * owner has subscribed via the informative or critical event sets. Only 12825e8c5aaSvikram * critical events are guaranteed to be delivered. Since all device state 12925e8c5aaSvikram * changes are controlled by the kernel and cannot be arbitrarily generated 13025e8c5aaSvikram * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 13125e8c5aaSvikram * need to be asserted in a process's effective set to designate an event as 13225e8c5aaSvikram * critical. To ensure privacy, a process must either have the same effective 13325e8c5aaSvikram * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 13425e8c5aaSvikram * asserted in its effective set in order to observe device contract events 13525e8c5aaSvikram * off the device contract type specific endpoint. 13625e8c5aaSvikram * 13725e8c5aaSvikram * Yet another term available with device contracts is the "non-negotiable" 13825e8c5aaSvikram * term. This term is used to pre-specify a NACK to any contract negotiation. 13925e8c5aaSvikram * This term is ignored for asynchronous state changes. For example, a 14025e8c5aaSvikram * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 14125e8c5aaSvikram * non-negotiable. In this case, the device contract framework assumes a 14225e8c5aaSvikram * NACK for any transition to OFFLINE and blocks the offline. If the A-set 14325e8c5aaSvikram * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 14425e8c5aaSvikram * are NACKed but transitions to DEGRADE succeed. 14525e8c5aaSvikram * 14625e8c5aaSvikram * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 14725e8c5aaSvikram * happens just before the I/O framework attempts to offline a device 14825e8c5aaSvikram * (i.e. detach a device and set the offline flag so that it cannot be 14925e8c5aaSvikram * reattached). A device contract holder is expected to either NACK the offline 15025e8c5aaSvikram * (if privileged) or release the device and allow the offline to proceed. 15125e8c5aaSvikram * 15225e8c5aaSvikram * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 15325e8c5aaSvikram * is generated just before the I/O framework transitions the device state 15425e8c5aaSvikram * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 15525e8c5aaSvikram * 15625e8c5aaSvikram * The contract holder is expected to ACK or NACK a negotiation event 15725e8c5aaSvikram * within a certain period of time. If the ACK/NACK is not received 15825e8c5aaSvikram * within the timeout period, the device contract framework will behave 15925e8c5aaSvikram * as if the contract does not exist and will proceed with the event. 16025e8c5aaSvikram * 16125e8c5aaSvikram * Unlike a process contract a device contract does not need to exist 16225e8c5aaSvikram * once it is abandoned, since it does not define a fault boundary. It 16325e8c5aaSvikram * merely represents an agreement between a process and the kernel 16425e8c5aaSvikram * regarding the state of the device. Once the process has abandoned 16525e8c5aaSvikram * the contract (either implicitly via a process exit or explicitly) 16625e8c5aaSvikram * the kernel has no reason to retain the contract. As a result 16725e8c5aaSvikram * device contracts are neither inheritable nor need to exist in an 16825e8c5aaSvikram * orphan state. 16925e8c5aaSvikram * 17025e8c5aaSvikram * A device unlike a process may exist in multiple contracts and has 17125e8c5aaSvikram * a "life" outside a device contract. A device unlike a process 17225e8c5aaSvikram * may exist without an associated contract. Unlike a process contract 17325e8c5aaSvikram * a device contract may be formed after a binding relationship is 17425e8c5aaSvikram * formed between a process and a device. 17525e8c5aaSvikram * 17625e8c5aaSvikram * IMPLEMENTATION NOTES 17725e8c5aaSvikram * ==================== 17825e8c5aaSvikram * DATA STRUCTURES 17925e8c5aaSvikram * ---------------- 18025e8c5aaSvikram * The heart of the device contracts implementation is the device contract 18125e8c5aaSvikram * private cont_device_t (or ctd for short) data structure. It encapsulates 18225e8c5aaSvikram * the generic contract_t data structure and has a number of private 18325e8c5aaSvikram * fields. 18425e8c5aaSvikram * These include: 18525e8c5aaSvikram * cond_minor: The minor device that is the subject of the contract 18625e8c5aaSvikram * cond_aset: The bitset of states which are guaranteed by the 18725e8c5aaSvikram * contract 18825e8c5aaSvikram * cond_noneg: If set, indicates that the result of negotiation has 18925e8c5aaSvikram * been predefined to be a NACK 19025e8c5aaSvikram * In addition, there are other device identifiers such the devinfo node, 19125e8c5aaSvikram * dev_t and spec_type of the minor node. There are also a few fields that 19225e8c5aaSvikram * are used during negotiation to maintain state. See 19325e8c5aaSvikram * uts/common/sys/contract/device_impl.h 19425e8c5aaSvikram * for details. 19525e8c5aaSvikram * The ctd structure represents the device private part of a contract of 19625e8c5aaSvikram * type "device" 19725e8c5aaSvikram * 19825e8c5aaSvikram * Another data structure used by device contracts is ctmpl_device. It is 19925e8c5aaSvikram * the device contracts private part of the contract template structure. It 20025e8c5aaSvikram * encapsulates the generic template structure "ct_template_t" and includes 20125e8c5aaSvikram * the following device contract specific fields 20225e8c5aaSvikram * ctd_aset: The bitset of states that should be guaranteed by a 20325e8c5aaSvikram * contract 20425e8c5aaSvikram * ctd_noneg: If set, indicates that contract should NACK a 20525e8c5aaSvikram * negotiation 20625e8c5aaSvikram * ctd_minor: The devfs_path (without the /devices prefix) of the 20725e8c5aaSvikram * minor node that is the subject of the contract. 20825e8c5aaSvikram * 20925e8c5aaSvikram * ALGORITHMS 21025e8c5aaSvikram * --------- 21125e8c5aaSvikram * There are three sets of routines in this file 21225e8c5aaSvikram * Template related routines 21325e8c5aaSvikram * ------------------------- 21425e8c5aaSvikram * These routines provide support for template related operations initated 21525e8c5aaSvikram * via the generic template operations. These include routines that dup 21625e8c5aaSvikram * a template, free it, and set various terms in the template 21725e8c5aaSvikram * (such as the minor node path, the acceptable state set (or A-set) 21825e8c5aaSvikram * and the non-negotiable term) as well as a routine to query the 21925e8c5aaSvikram * device specific portion of the template for the abovementioned terms. 22025e8c5aaSvikram * There is also a routine to create (ctmpl_device_create) that is used to 22125e8c5aaSvikram * create a contract from a template. This routine calls (after initial 22225e8c5aaSvikram * setup) the common function used to create a device contract 22325e8c5aaSvikram * (contract_device_create). 22425e8c5aaSvikram * 22525e8c5aaSvikram * core device contract implementation 22625e8c5aaSvikram * ---------------------------------- 22725e8c5aaSvikram * These routines support the generic contract framework to provide 22825e8c5aaSvikram * functionality that allows contracts to be created, managed and 22925e8c5aaSvikram * destroyed. The contract_device_create() routine is a routine used 23025e8c5aaSvikram * to create a contract from a template (either via an explicit create 23125e8c5aaSvikram * operation on a template or implicitly via an open with an 23225e8c5aaSvikram * activated template.). The contract_device_free() routine assists 23325e8c5aaSvikram * in freeing the device contract specific parts. There are routines 23425e8c5aaSvikram * used to abandon (contract_device_abandon) a device contract as well 23525e8c5aaSvikram * as a routine to destroy (which despite its name does not destroy, 23625e8c5aaSvikram * it only moves a contract to a dead state) a contract. 23725e8c5aaSvikram * There is also a routine to return status information about a 23825e8c5aaSvikram * contract - the level of detail depends on what is requested by the 23925e8c5aaSvikram * user. A value of CTD_FIXED only returns fixed length fields such 24025e8c5aaSvikram * as the A-set, state of device and value of the "noneg" term. If 24125e8c5aaSvikram * CTD_ALL is specified, the minor node path is returned as well. 24225e8c5aaSvikram * 24325e8c5aaSvikram * In addition there are interfaces (contract_device_ack/nack) which 24425e8c5aaSvikram * are used to support negotiation between userland processes and 24525e8c5aaSvikram * device contracts. These interfaces record the acknowledgement 24625e8c5aaSvikram * or lack thereof for negotiation events and help determine if the 24725e8c5aaSvikram * negotiated event should occur. 24825e8c5aaSvikram * 24925e8c5aaSvikram * "backend routines" 25025e8c5aaSvikram * ----------------- 25125e8c5aaSvikram * The backend routines form the interface between the I/O framework 25225e8c5aaSvikram * and the device contract subsystem. These routines, allow the I/O 25325e8c5aaSvikram * framework to call into the device contract subsystem to notify it of 25425e8c5aaSvikram * impending changes to a device state as well as to inform of the 25525e8c5aaSvikram * final disposition of such attempted state changes. Routines in this 25625e8c5aaSvikram * class include contract_device_offline() that indicates an attempt to 25725e8c5aaSvikram * offline a device, contract_device_degrade() that indicates that 25825e8c5aaSvikram * a device is moving to the degraded state and contract_device_negend() 25925e8c5aaSvikram * that is used by the I/O framework to inform the contracts subsystem of 26025e8c5aaSvikram * the final disposition of an attempted operation. 26125e8c5aaSvikram * 26225e8c5aaSvikram * SUMMARY 26325e8c5aaSvikram * ------- 26425e8c5aaSvikram * A contract starts its life as a template. A process allocates a device 26525e8c5aaSvikram * contract template and sets various terms: 26625e8c5aaSvikram * The A-set 26725e8c5aaSvikram * The device minor node 26825e8c5aaSvikram * Critical and informative events 26925e8c5aaSvikram * The noneg i.e. no negotition term 27025e8c5aaSvikram * Setting of these terms in the template is done via the 27125e8c5aaSvikram * ctmpl_device_set() entry point in this file. A process can query a 27225e8c5aaSvikram * template to determine the terms already set in the template - this is 27325e8c5aaSvikram * facilitated by the ctmpl_device_get() routine. 27425e8c5aaSvikram * 27525e8c5aaSvikram * Once all the appropriate terms are set, the contract is instantiated via 27625e8c5aaSvikram * one of two methods 27725e8c5aaSvikram * - via an explicit create operation - this is facilitated by the 27825e8c5aaSvikram * ctmpl_device_create() entry point 27925e8c5aaSvikram * - synchronously with the open(2) system call - this is achieved via the 28025e8c5aaSvikram * contract_device_open() routine. 28125e8c5aaSvikram * The core work for both these above functions is done by 28225e8c5aaSvikram * contract_device_create() 28325e8c5aaSvikram * 28425e8c5aaSvikram * A contract once created can be queried for its status. Support for 28525e8c5aaSvikram * status info is provided by both the common contracts framework and by 28625e8c5aaSvikram * the "device" contract type. If the level of detail requested is 28725e8c5aaSvikram * CTD_COMMON, only the common contract framework data is used. Higher 28825e8c5aaSvikram * levels of detail result in calls to contract_device_status() to supply 28925e8c5aaSvikram * device contract type specific status information. 29025e8c5aaSvikram * 29125e8c5aaSvikram * A contract once created may be abandoned either explicitly or implictly. 29225e8c5aaSvikram * In either case, the contract_device_abandon() function is invoked. This 29325e8c5aaSvikram * function merely calls contract_destroy() which moves the contract to 29425e8c5aaSvikram * the DEAD state. The device contract portion of destroy processing is 29525e8c5aaSvikram * provided by contract_device_destroy() which merely disassociates the 29625e8c5aaSvikram * contract from its device devinfo node. A contract in the DEAD state is 29725e8c5aaSvikram * not freed. It hanbgs around until all references to the contract are 29825e8c5aaSvikram * gone. When that happens, the contract is finally deallocated. The 29925e8c5aaSvikram * device contract specific portion of the free is done by 30025e8c5aaSvikram * contract_device_free() which finally frees the device contract specific 30125e8c5aaSvikram * data structure (cont_device_t). 30225e8c5aaSvikram * 30325e8c5aaSvikram * When a device undergoes a state change, the I/O framework calls the 30425e8c5aaSvikram * corresponding device contract entry point. For example, when a device 30525e8c5aaSvikram * is about to go OFFLINE, the routine contract_device_offline() is 30625e8c5aaSvikram * invoked. Similarly if a device moves to DEGRADED state, the routine 30725e8c5aaSvikram * contract_device_degrade() function is called. These functions call the 30825e8c5aaSvikram * core routine contract_device_publish(). This function determines via 30925e8c5aaSvikram * the function is_sync_neg() whether an event is a synchronous (i.e. 31025e8c5aaSvikram * negotiable) event or not. In the former case contract_device_publish() 31125e8c5aaSvikram * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 31225e8c5aaSvikram * and/or NACKs from contract holders. In the latter case, it simply 31325e8c5aaSvikram * publishes the event and does not wait. In the negotiation case, ACKs or 31425e8c5aaSvikram * NACKs from userland consumers results in contract_device_ack_nack() 31525e8c5aaSvikram * being called where the result of the negotiation is recorded in the 31625e8c5aaSvikram * contract data structure. Once all outstanding contract owners have 31725e8c5aaSvikram * responded, the device contract code in wait_for_acks() determines the 31825e8c5aaSvikram * final result of the negotiation. A single NACK overrides all other ACKs 31925e8c5aaSvikram * If there is no NACK, then a single ACK will result in an overall ACK 32025e8c5aaSvikram * result. If there are no ACKs or NACKs, then the result CT_NONE is 32125e8c5aaSvikram * returned back to the I/O framework. Once the event is permitted or 32225e8c5aaSvikram * blocked, the I/O framework proceeds or aborts the state change. The 32325e8c5aaSvikram * I/O framework then calls contract_device_negend() with a result code 32425e8c5aaSvikram * indicating final disposition of the event. This call releases the 32525e8c5aaSvikram * barrier and other state associated with the previous negotiation, 32625e8c5aaSvikram * which permits the next event (if any) to come into the device contract 32725e8c5aaSvikram * framework. 32825e8c5aaSvikram * 32925e8c5aaSvikram * Finally, a device that has outstanding contracts may be removed from 33025e8c5aaSvikram * the system which results in its devinfo node being freed. The devinfo 33125e8c5aaSvikram * free routine in the I/O framework, calls into the device contract 33225e8c5aaSvikram * function - contract_device_remove_dip(). This routine, disassociates 33325e8c5aaSvikram * the dip from all contracts associated with the contract being freed, 33425e8c5aaSvikram * allowing the devinfo node to be freed. 33525e8c5aaSvikram * 33625e8c5aaSvikram * LOCKING 33725e8c5aaSvikram * --------- 33825e8c5aaSvikram * There are four sets of data that need to be protected by locks 33925e8c5aaSvikram * 34025e8c5aaSvikram * i) device contract specific portion of the contract template - This data 34125e8c5aaSvikram * is protected by the template lock ctmpl_lock. 34225e8c5aaSvikram * 34325e8c5aaSvikram * ii) device contract specific portion of the contract - This data is 34425e8c5aaSvikram * protected by the contract lock ct_lock 34525e8c5aaSvikram * 34625e8c5aaSvikram * iii) The linked list of contracts hanging off a devinfo node - This 34725e8c5aaSvikram * list is protected by the per-devinfo node lock devi_ct_lock 34825e8c5aaSvikram * 34925e8c5aaSvikram * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 35025e8c5aaSvikram * and devi_ct_count that controls state changes to a dip 35125e8c5aaSvikram * 35225e8c5aaSvikram * The template lock is independent in that none of the other locks in this 35325e8c5aaSvikram * file may be taken while holding the template lock (and vice versa). 35425e8c5aaSvikram * 35525e8c5aaSvikram * The remaining three locks have the following lock order 35625e8c5aaSvikram * 35725e8c5aaSvikram * devi_ct_lock -> ct_count barrier -> ct_lock 35825e8c5aaSvikram * 35925e8c5aaSvikram */ 36025e8c5aaSvikram 36125e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 36225e8c5aaSvikram int spec_type, proc_t *owner, int *errorp); 36325e8c5aaSvikram 36425e8c5aaSvikram /* barrier routines */ 36525e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip); 36625e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip); 36725e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip); 36825e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip); 36925e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip); 37025e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 37125e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip); 37225e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip); 37325e8c5aaSvikram 37425e8c5aaSvikram ct_type_t *device_type; 37525e8c5aaSvikram 37625e8c5aaSvikram /* 37725e8c5aaSvikram * Macro predicates for determining when events should be sent and how. 37825e8c5aaSvikram */ 37925e8c5aaSvikram #define EVSENDP(ctd, flag) \ 38025e8c5aaSvikram ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 38125e8c5aaSvikram 38225e8c5aaSvikram #define EVINFOP(ctd, flag) \ 38325e8c5aaSvikram ((ctd->cond_contract.ct_ev_crit & flag) == 0) 38425e8c5aaSvikram 38525e8c5aaSvikram /* 38625e8c5aaSvikram * State transition table showing which transitions are synchronous and which 38725e8c5aaSvikram * are not. 38825e8c5aaSvikram */ 38925e8c5aaSvikram struct ct_dev_negtable { 39025e8c5aaSvikram uint_t st_old; 39125e8c5aaSvikram uint_t st_new; 39225e8c5aaSvikram uint_t st_neg; 39325e8c5aaSvikram } ct_dev_negtable[] = { 39425e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 39525e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 39625e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 39725e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 39825e8c5aaSvikram {0} 39925e8c5aaSvikram }; 40025e8c5aaSvikram 40125e8c5aaSvikram /* 40225e8c5aaSvikram * Device contract template implementation 40325e8c5aaSvikram */ 40425e8c5aaSvikram 40525e8c5aaSvikram /* 40625e8c5aaSvikram * ctmpl_device_dup 40725e8c5aaSvikram * 40825e8c5aaSvikram * The device contract template dup entry point. 40925e8c5aaSvikram * This simply copies all the fields (generic as well as device contract 41025e8c5aaSvikram * specific) fields of the original. 41125e8c5aaSvikram */ 41225e8c5aaSvikram static struct ct_template * 41325e8c5aaSvikram ctmpl_device_dup(struct ct_template *template) 41425e8c5aaSvikram { 41525e8c5aaSvikram ctmpl_device_t *new; 41625e8c5aaSvikram ctmpl_device_t *old = template->ctmpl_data; 41725e8c5aaSvikram char *buf; 41825e8c5aaSvikram char *minor; 41925e8c5aaSvikram 42025e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 42125e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 42225e8c5aaSvikram 42325e8c5aaSvikram /* 42425e8c5aaSvikram * copy generic fields. 42525e8c5aaSvikram * ctmpl_copy returns with old template lock held 42625e8c5aaSvikram */ 42725e8c5aaSvikram ctmpl_copy(&new->ctd_ctmpl, template); 42825e8c5aaSvikram 42925e8c5aaSvikram new->ctd_ctmpl.ctmpl_data = new; 43025e8c5aaSvikram new->ctd_aset = old->ctd_aset; 43125e8c5aaSvikram new->ctd_minor = NULL; 43225e8c5aaSvikram new->ctd_noneg = old->ctd_noneg; 43325e8c5aaSvikram 43425e8c5aaSvikram if (old->ctd_minor) { 43525e8c5aaSvikram ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 43625e8c5aaSvikram bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 43725e8c5aaSvikram } else { 43825e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 43925e8c5aaSvikram buf = NULL; 44025e8c5aaSvikram } 44125e8c5aaSvikram 44225e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 44325e8c5aaSvikram if (buf) { 44425e8c5aaSvikram minor = i_ddi_strdup(buf, KM_SLEEP); 44525e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 44625e8c5aaSvikram buf = NULL; 44725e8c5aaSvikram } else { 44825e8c5aaSvikram minor = NULL; 44925e8c5aaSvikram } 45025e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 45125e8c5aaSvikram 45225e8c5aaSvikram if (minor) { 45325e8c5aaSvikram new->ctd_minor = minor; 45425e8c5aaSvikram } 45525e8c5aaSvikram 45625e8c5aaSvikram ASSERT(buf == NULL); 45725e8c5aaSvikram return (&new->ctd_ctmpl); 45825e8c5aaSvikram } 45925e8c5aaSvikram 46025e8c5aaSvikram /* 46125e8c5aaSvikram * ctmpl_device_free 46225e8c5aaSvikram * 46325e8c5aaSvikram * The device contract template free entry point. Just 46425e8c5aaSvikram * frees the template. 46525e8c5aaSvikram */ 46625e8c5aaSvikram static void 46725e8c5aaSvikram ctmpl_device_free(struct ct_template *template) 46825e8c5aaSvikram { 46925e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 47025e8c5aaSvikram 47125e8c5aaSvikram if (dtmpl->ctd_minor) 47225e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 47325e8c5aaSvikram 47425e8c5aaSvikram kmem_free(dtmpl, sizeof (ctmpl_device_t)); 47525e8c5aaSvikram } 47625e8c5aaSvikram 47725e8c5aaSvikram /* 47825e8c5aaSvikram * SAFE_EV is the set of events which a non-privileged process is 47925e8c5aaSvikram * allowed to make critical. An unprivileged device contract owner has 48025e8c5aaSvikram * no control over when a device changes state, so all device events 48125e8c5aaSvikram * can be in the critical set. 48225e8c5aaSvikram * 48325e8c5aaSvikram * EXCESS tells us if "value", a critical event set, requires 48425e8c5aaSvikram * additional privilege. For device contracts EXCESS currently 48525e8c5aaSvikram * evaluates to 0. 48625e8c5aaSvikram */ 48725e8c5aaSvikram #define SAFE_EV (CT_DEV_ALLEVENT) 48825e8c5aaSvikram #define EXCESS(value) ((value) & ~SAFE_EV) 48925e8c5aaSvikram 49025e8c5aaSvikram 49125e8c5aaSvikram /* 49225e8c5aaSvikram * ctmpl_device_set 49325e8c5aaSvikram * 49425e8c5aaSvikram * The device contract template set entry point. Sets various terms in the 49525e8c5aaSvikram * template. The non-negotiable term can only be set if the process has 49625e8c5aaSvikram * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 49725e8c5aaSvikram */ 49825e8c5aaSvikram static int 499*c5a9a4fcSAntonello Cruz ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam, 500*c5a9a4fcSAntonello Cruz const cred_t *cr) 50125e8c5aaSvikram { 50225e8c5aaSvikram ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 503*c5a9a4fcSAntonello Cruz ct_param_t *param = &kparam->param; 50425e8c5aaSvikram int error; 50525e8c5aaSvikram dev_info_t *dip; 50625e8c5aaSvikram int spec_type; 5077b209c2cSacruz uint64_t param_value; 5087b209c2cSacruz char *str_value; 50925e8c5aaSvikram 51025e8c5aaSvikram ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 51125e8c5aaSvikram 5127b209c2cSacruz if (param->ctpm_id == CTDP_MINOR) { 513*c5a9a4fcSAntonello Cruz str_value = (char *)kparam->ctpm_kbuf; 5147b209c2cSacruz str_value[param->ctpm_size - 1] = '\0'; 5157b209c2cSacruz } else { 516d170b13aSacruz if (param->ctpm_size < sizeof (uint64_t)) 517d170b13aSacruz return (EINVAL); 518*c5a9a4fcSAntonello Cruz param_value = *(uint64_t *)kparam->ctpm_kbuf; 5197b209c2cSacruz } 5207b209c2cSacruz 52125e8c5aaSvikram switch (param->ctpm_id) { 52225e8c5aaSvikram case CTDP_ACCEPT: 5237b209c2cSacruz if (param_value & ~CT_DEV_ALLEVENT) 52425e8c5aaSvikram return (EINVAL); 5257b209c2cSacruz if (param_value == 0) 52625e8c5aaSvikram return (EINVAL); 5277b209c2cSacruz if (param_value == CT_DEV_ALLEVENT) 52825e8c5aaSvikram return (EINVAL); 52925e8c5aaSvikram 5307b209c2cSacruz dtmpl->ctd_aset = param_value; 53125e8c5aaSvikram break; 53225e8c5aaSvikram case CTDP_NONEG: 5337b209c2cSacruz if (param_value != CTDP_NONEG_SET && 5347b209c2cSacruz param_value != CTDP_NONEG_CLEAR) 53525e8c5aaSvikram return (EINVAL); 53625e8c5aaSvikram 53725e8c5aaSvikram /* 53825e8c5aaSvikram * only privileged processes can designate a contract 53925e8c5aaSvikram * non-negotiatble. 54025e8c5aaSvikram */ 5417b209c2cSacruz if (param_value == CTDP_NONEG_SET && 54225e8c5aaSvikram (error = secpolicy_sys_devices(cr)) != 0) { 54325e8c5aaSvikram return (error); 54425e8c5aaSvikram } 54525e8c5aaSvikram 5467b209c2cSacruz dtmpl->ctd_noneg = param_value; 54725e8c5aaSvikram break; 54825e8c5aaSvikram 54925e8c5aaSvikram case CTDP_MINOR: 5507b209c2cSacruz if (*str_value != '/' || 5517b209c2cSacruz strncmp(str_value, "/devices/", 5527b209c2cSacruz strlen("/devices/")) == 0 || 5537b209c2cSacruz strstr(str_value, "../devices/") != NULL || 5547b209c2cSacruz strchr(str_value, ':') == NULL) { 55525e8c5aaSvikram return (EINVAL); 55625e8c5aaSvikram } 55725e8c5aaSvikram 55825e8c5aaSvikram spec_type = 0; 55925e8c5aaSvikram dip = NULL; 5607b209c2cSacruz if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 56125e8c5aaSvikram return (ERANGE); 56225e8c5aaSvikram } 56325e8c5aaSvikram ddi_release_devi(dip); 56425e8c5aaSvikram 56525e8c5aaSvikram if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 56625e8c5aaSvikram return (EINVAL); 56725e8c5aaSvikram } 56825e8c5aaSvikram 56925e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 57025e8c5aaSvikram kmem_free(dtmpl->ctd_minor, 57125e8c5aaSvikram strlen(dtmpl->ctd_minor) + 1); 57225e8c5aaSvikram } 5737b209c2cSacruz dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 57425e8c5aaSvikram break; 57525e8c5aaSvikram case CTP_EV_CRITICAL: 57625e8c5aaSvikram /* 57725e8c5aaSvikram * Currently for device contracts, any event 57825e8c5aaSvikram * may be added to the critical set. We retain the 57925e8c5aaSvikram * following code however for future enhancements. 58025e8c5aaSvikram */ 5817b209c2cSacruz if (EXCESS(param_value) && 58225e8c5aaSvikram (error = secpolicy_contract_event(cr)) != 0) 58325e8c5aaSvikram return (error); 5847b209c2cSacruz tmpl->ctmpl_ev_crit = param_value; 58525e8c5aaSvikram break; 58625e8c5aaSvikram default: 58725e8c5aaSvikram return (EINVAL); 58825e8c5aaSvikram } 58925e8c5aaSvikram 59025e8c5aaSvikram return (0); 59125e8c5aaSvikram } 59225e8c5aaSvikram 59325e8c5aaSvikram /* 59425e8c5aaSvikram * ctmpl_device_get 59525e8c5aaSvikram * 59625e8c5aaSvikram * The device contract template get entry point. Simply fetches and 59725e8c5aaSvikram * returns the value of the requested term. 59825e8c5aaSvikram */ 59925e8c5aaSvikram static int 600*c5a9a4fcSAntonello Cruz ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam) 60125e8c5aaSvikram { 60225e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 603*c5a9a4fcSAntonello Cruz ct_param_t *param = &kparam->param; 604*c5a9a4fcSAntonello Cruz uint64_t *param_value = kparam->ctpm_kbuf; 60525e8c5aaSvikram 60625e8c5aaSvikram ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 60725e8c5aaSvikram 608d170b13aSacruz if (param->ctpm_id == CTDP_ACCEPT || 609d170b13aSacruz param->ctpm_id == CTDP_NONEG) { 610d170b13aSacruz if (param->ctpm_size < sizeof (uint64_t)) 611d170b13aSacruz return (EINVAL); 612*c5a9a4fcSAntonello Cruz kparam->ret_size = sizeof (uint64_t); 613d170b13aSacruz } 614d170b13aSacruz 61525e8c5aaSvikram switch (param->ctpm_id) { 61625e8c5aaSvikram case CTDP_ACCEPT: 6177b209c2cSacruz *param_value = dtmpl->ctd_aset; 61825e8c5aaSvikram break; 61925e8c5aaSvikram case CTDP_NONEG: 6207b209c2cSacruz *param_value = dtmpl->ctd_noneg; 62125e8c5aaSvikram break; 62225e8c5aaSvikram case CTDP_MINOR: 62325e8c5aaSvikram if (dtmpl->ctd_minor) { 624*c5a9a4fcSAntonello Cruz kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf, 6257b209c2cSacruz dtmpl->ctd_minor, param->ctpm_size); 626*c5a9a4fcSAntonello Cruz kparam->ret_size++; 62725e8c5aaSvikram } else { 62825e8c5aaSvikram return (ENOENT); 62925e8c5aaSvikram } 63025e8c5aaSvikram break; 63125e8c5aaSvikram default: 63225e8c5aaSvikram return (EINVAL); 63325e8c5aaSvikram } 63425e8c5aaSvikram 63525e8c5aaSvikram return (0); 63625e8c5aaSvikram } 63725e8c5aaSvikram 63825e8c5aaSvikram /* 63925e8c5aaSvikram * Device contract type specific portion of creating a contract using 64025e8c5aaSvikram * a specified template 64125e8c5aaSvikram */ 64225e8c5aaSvikram /*ARGSUSED*/ 64325e8c5aaSvikram int 64425e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 64525e8c5aaSvikram { 64625e8c5aaSvikram ctmpl_device_t *dtmpl; 64725e8c5aaSvikram char *buf; 64825e8c5aaSvikram dev_t dev; 64925e8c5aaSvikram int spec_type; 65025e8c5aaSvikram int error; 65125e8c5aaSvikram cont_device_t *ctd; 65225e8c5aaSvikram 65325e8c5aaSvikram if (ctidp == NULL) 65425e8c5aaSvikram return (EINVAL); 65525e8c5aaSvikram 65625e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 65725e8c5aaSvikram 65825e8c5aaSvikram dtmpl = template->ctmpl_data; 65925e8c5aaSvikram 66025e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 66125e8c5aaSvikram if (dtmpl->ctd_minor == NULL) { 66225e8c5aaSvikram /* incomplete template */ 66325e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 66425e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 66525e8c5aaSvikram return (EINVAL); 66625e8c5aaSvikram } else { 66725e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 66825e8c5aaSvikram bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 66925e8c5aaSvikram } 67025e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 67125e8c5aaSvikram 67225e8c5aaSvikram spec_type = 0; 67325e8c5aaSvikram dev = NODEV; 67425e8c5aaSvikram if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 67525e8c5aaSvikram dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 67625e8c5aaSvikram (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 67725e8c5aaSvikram CT_DEBUG((CE_WARN, 67825e8c5aaSvikram "tmpl_create: failed to find device: %s", buf)); 67925e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 68025e8c5aaSvikram return (ERANGE); 68125e8c5aaSvikram } 68225e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 68325e8c5aaSvikram 68425e8c5aaSvikram ctd = contract_device_create(template->ctmpl_data, 68525e8c5aaSvikram dev, spec_type, curproc, &error); 68625e8c5aaSvikram 68725e8c5aaSvikram if (ctd == NULL) { 68825e8c5aaSvikram CT_DEBUG((CE_WARN, "Failed to create device contract for " 68925e8c5aaSvikram "process (%d) with device (devt = %lu, spec_type = %s)", 69025e8c5aaSvikram curproc->p_pid, dev, 69125e8c5aaSvikram spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 69225e8c5aaSvikram return (error); 69325e8c5aaSvikram } 69425e8c5aaSvikram 69525e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 69625e8c5aaSvikram *ctidp = ctd->cond_contract.ct_id; 69725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 69825e8c5aaSvikram 69925e8c5aaSvikram return (0); 70025e8c5aaSvikram } 70125e8c5aaSvikram 70225e8c5aaSvikram /* 70325e8c5aaSvikram * Device contract specific template entry points 70425e8c5aaSvikram */ 70525e8c5aaSvikram static ctmplops_t ctmpl_device_ops = { 70625e8c5aaSvikram ctmpl_device_dup, /* ctop_dup */ 70725e8c5aaSvikram ctmpl_device_free, /* ctop_free */ 70825e8c5aaSvikram ctmpl_device_set, /* ctop_set */ 70925e8c5aaSvikram ctmpl_device_get, /* ctop_get */ 71025e8c5aaSvikram ctmpl_device_create, /* ctop_create */ 71125e8c5aaSvikram CT_DEV_ALLEVENT /* all device events bitmask */ 71225e8c5aaSvikram }; 71325e8c5aaSvikram 71425e8c5aaSvikram 71525e8c5aaSvikram /* 71625e8c5aaSvikram * Device contract implementation 71725e8c5aaSvikram */ 71825e8c5aaSvikram 71925e8c5aaSvikram /* 72025e8c5aaSvikram * contract_device_default 72125e8c5aaSvikram * 72225e8c5aaSvikram * The device contract default template entry point. Creates a 72325e8c5aaSvikram * device contract template with a default A-set and no "noneg" , 72425e8c5aaSvikram * with informative degrade events and critical offline events. 72525e8c5aaSvikram * There is no default minor path. 72625e8c5aaSvikram */ 72725e8c5aaSvikram static ct_template_t * 72825e8c5aaSvikram contract_device_default(void) 72925e8c5aaSvikram { 73025e8c5aaSvikram ctmpl_device_t *new; 73125e8c5aaSvikram 73225e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 73325e8c5aaSvikram ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 73425e8c5aaSvikram 73525e8c5aaSvikram new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 73625e8c5aaSvikram new->ctd_noneg = 0; 73725e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 73825e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 73925e8c5aaSvikram 74025e8c5aaSvikram return (&new->ctd_ctmpl); 74125e8c5aaSvikram } 74225e8c5aaSvikram 74325e8c5aaSvikram /* 74425e8c5aaSvikram * contract_device_free 74525e8c5aaSvikram * 74625e8c5aaSvikram * Destroys the device contract specific portion of a contract and 74725e8c5aaSvikram * frees the contract. 74825e8c5aaSvikram */ 74925e8c5aaSvikram static void 75025e8c5aaSvikram contract_device_free(contract_t *ct) 75125e8c5aaSvikram { 75225e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 75325e8c5aaSvikram 75425e8c5aaSvikram ASSERT(ctd->cond_minor); 75525e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 75625e8c5aaSvikram kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 75725e8c5aaSvikram 75825e8c5aaSvikram ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 75925e8c5aaSvikram ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 76025e8c5aaSvikram 76125e8c5aaSvikram ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 76225e8c5aaSvikram 76325e8c5aaSvikram ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 76425e8c5aaSvikram ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 76525e8c5aaSvikram 76625e8c5aaSvikram ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 76725e8c5aaSvikram ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 76825e8c5aaSvikram 76925e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 77025e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 77125e8c5aaSvikram 77225e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 77325e8c5aaSvikram 77425e8c5aaSvikram kmem_free(ctd, sizeof (cont_device_t)); 77525e8c5aaSvikram } 77625e8c5aaSvikram 77725e8c5aaSvikram /* 77825e8c5aaSvikram * contract_device_abandon 77925e8c5aaSvikram * 78025e8c5aaSvikram * The device contract abandon entry point. 78125e8c5aaSvikram */ 78225e8c5aaSvikram static void 78325e8c5aaSvikram contract_device_abandon(contract_t *ct) 78425e8c5aaSvikram { 78525e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 78625e8c5aaSvikram 78725e8c5aaSvikram /* 78825e8c5aaSvikram * device contracts cannot be inherited or orphaned. 78925e8c5aaSvikram * Move the contract to the DEAD_STATE. It will be freed 79025e8c5aaSvikram * once all references to it are gone. 79125e8c5aaSvikram */ 79225e8c5aaSvikram contract_destroy(ct); 79325e8c5aaSvikram } 79425e8c5aaSvikram 79525e8c5aaSvikram /* 79625e8c5aaSvikram * contract_device_destroy 79725e8c5aaSvikram * 79825e8c5aaSvikram * The device contract destroy entry point. 79925e8c5aaSvikram * Called from contract_destroy() to do any type specific destroy. Note 80025e8c5aaSvikram * that destroy is a misnomer - this does not free the contract, it only 80125e8c5aaSvikram * moves it to the dead state. A contract is actually freed via 80225e8c5aaSvikram * contract_rele() -> contract_dtor(), contop_free() 80325e8c5aaSvikram */ 80425e8c5aaSvikram static void 80525e8c5aaSvikram contract_device_destroy(contract_t *ct) 80625e8c5aaSvikram { 80725e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 80825e8c5aaSvikram dev_info_t *dip = ctd->cond_dip; 80925e8c5aaSvikram 81025e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 81125e8c5aaSvikram 81225e8c5aaSvikram if (dip == NULL) { 81325e8c5aaSvikram /* 81425e8c5aaSvikram * The dip has been removed, this is a dangling contract 81525e8c5aaSvikram * Check that dip linkages are NULL 81625e8c5aaSvikram */ 81725e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 81825e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 81925e8c5aaSvikram "devinfo node. contract ctid : %d", ct->ct_id)); 82025e8c5aaSvikram return; 82125e8c5aaSvikram } 82225e8c5aaSvikram 82325e8c5aaSvikram /* 82425e8c5aaSvikram * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 82525e8c5aaSvikram */ 82625e8c5aaSvikram mutex_exit(&ct->ct_lock); 82725e8c5aaSvikram 82825e8c5aaSvikram /* 82925e8c5aaSvikram * Waiting for the barrier to be released is strictly speaking not 83025e8c5aaSvikram * necessary. But it simplifies the implementation of 83125e8c5aaSvikram * contract_device_publish() by establishing the invariant that 83225e8c5aaSvikram * device contracts cannot go away during negotiation. 83325e8c5aaSvikram */ 83425e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 83525e8c5aaSvikram ct_barrier_wait_for_release(dip); 83625e8c5aaSvikram mutex_enter(&ct->ct_lock); 83725e8c5aaSvikram 83825e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 83925e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 84025e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 84125e8c5aaSvikram 84225e8c5aaSvikram mutex_exit(&ct->ct_lock); 84325e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 84425e8c5aaSvikram mutex_enter(&ct->ct_lock); 84525e8c5aaSvikram } 84625e8c5aaSvikram 84725e8c5aaSvikram /* 84825e8c5aaSvikram * contract_device_status 84925e8c5aaSvikram * 85025e8c5aaSvikram * The device contract status entry point. Called when level of "detail" 85125e8c5aaSvikram * is either CTD_FIXED or CTD_ALL 85225e8c5aaSvikram * 85325e8c5aaSvikram */ 85425e8c5aaSvikram static void 85525e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 85625e8c5aaSvikram void *status, model_t model) 85725e8c5aaSvikram { 85825e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 85925e8c5aaSvikram 86025e8c5aaSvikram ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 86125e8c5aaSvikram 86225e8c5aaSvikram mutex_enter(&ct->ct_lock); 86325e8c5aaSvikram contract_status_common(ct, zone, status, model); 86425e8c5aaSvikram 86525e8c5aaSvikram /* 86625e8c5aaSvikram * There's no need to hold the contract lock while accessing static 86725e8c5aaSvikram * data like aset or noneg. But since we need the lock to access other 86825e8c5aaSvikram * data like state, we hold it anyway. 86925e8c5aaSvikram */ 87025e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 87125e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 87225e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 87325e8c5aaSvikram 87425e8c5aaSvikram if (detail == CTD_FIXED) { 87525e8c5aaSvikram mutex_exit(&ct->ct_lock); 87625e8c5aaSvikram return; 87725e8c5aaSvikram } 87825e8c5aaSvikram 87925e8c5aaSvikram ASSERT(ctd->cond_minor); 88025e8c5aaSvikram VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 88125e8c5aaSvikram 88225e8c5aaSvikram mutex_exit(&ct->ct_lock); 88325e8c5aaSvikram } 88425e8c5aaSvikram 88525e8c5aaSvikram /* 88625e8c5aaSvikram * Converts a result integer into the corresponding string. Used for printing 88725e8c5aaSvikram * messages 88825e8c5aaSvikram */ 88925e8c5aaSvikram static char * 89025e8c5aaSvikram result_str(uint_t result) 89125e8c5aaSvikram { 89225e8c5aaSvikram switch (result) { 89325e8c5aaSvikram case CT_ACK: 89425e8c5aaSvikram return ("CT_ACK"); 89525e8c5aaSvikram case CT_NACK: 89625e8c5aaSvikram return ("CT_NACK"); 89725e8c5aaSvikram case CT_NONE: 89825e8c5aaSvikram return ("CT_NONE"); 89925e8c5aaSvikram default: 90025e8c5aaSvikram return ("UNKNOWN"); 90125e8c5aaSvikram } 90225e8c5aaSvikram } 90325e8c5aaSvikram 90425e8c5aaSvikram /* 90525e8c5aaSvikram * Converts a device state integer constant into the corresponding string. 90625e8c5aaSvikram * Used to print messages. 90725e8c5aaSvikram */ 90825e8c5aaSvikram static char * 90925e8c5aaSvikram state_str(uint_t state) 91025e8c5aaSvikram { 91125e8c5aaSvikram switch (state) { 91225e8c5aaSvikram case CT_DEV_EV_ONLINE: 91325e8c5aaSvikram return ("ONLINE"); 91425e8c5aaSvikram case CT_DEV_EV_DEGRADED: 91525e8c5aaSvikram return ("DEGRADED"); 91625e8c5aaSvikram case CT_DEV_EV_OFFLINE: 91725e8c5aaSvikram return ("OFFLINE"); 91825e8c5aaSvikram default: 91925e8c5aaSvikram return ("UNKNOWN"); 92025e8c5aaSvikram } 92125e8c5aaSvikram } 92225e8c5aaSvikram 92325e8c5aaSvikram /* 92425e8c5aaSvikram * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 92525e8c5aaSvikram * synchronous state change or not. 92625e8c5aaSvikram */ 92725e8c5aaSvikram static int 92825e8c5aaSvikram is_sync_neg(uint_t old, uint_t new) 92925e8c5aaSvikram { 93025e8c5aaSvikram int i; 93125e8c5aaSvikram 93225e8c5aaSvikram ASSERT(old & CT_DEV_ALLEVENT); 93325e8c5aaSvikram ASSERT(new & CT_DEV_ALLEVENT); 93425e8c5aaSvikram 93525e8c5aaSvikram if (old == new) { 93625e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 93725e8c5aaSvikram state_str(new))); 93825e8c5aaSvikram return (-2); 93925e8c5aaSvikram } 94025e8c5aaSvikram 94125e8c5aaSvikram for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 94225e8c5aaSvikram if (old == ct_dev_negtable[i].st_old && 94325e8c5aaSvikram new == ct_dev_negtable[i].st_new) { 94425e8c5aaSvikram return (ct_dev_negtable[i].st_neg); 94525e8c5aaSvikram } 94625e8c5aaSvikram } 94725e8c5aaSvikram 94825e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 94925e8c5aaSvikram "old = %s -> new = %s", state_str(old), state_str(new))); 95025e8c5aaSvikram 95125e8c5aaSvikram return (-1); 95225e8c5aaSvikram } 95325e8c5aaSvikram 95425e8c5aaSvikram /* 95525e8c5aaSvikram * Used to cleanup cached dv_nodes so that when a device is released by 95625e8c5aaSvikram * a contract holder, its devinfo node can be successfully detached. 95725e8c5aaSvikram */ 95825e8c5aaSvikram static int 95925e8c5aaSvikram contract_device_dvclean(dev_info_t *dip) 96025e8c5aaSvikram { 96125e8c5aaSvikram char *devnm; 96225e8c5aaSvikram dev_info_t *pdip; 96325e8c5aaSvikram int error; 96425e8c5aaSvikram 96525e8c5aaSvikram ASSERT(dip); 96625e8c5aaSvikram 96725e8c5aaSvikram /* pdip can be NULL if we have contracts against the root dip */ 96825e8c5aaSvikram pdip = ddi_get_parent(dip); 96925e8c5aaSvikram 97025e8c5aaSvikram if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 97125e8c5aaSvikram char *path; 97225e8c5aaSvikram 97325e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 97425e8c5aaSvikram (void) ddi_pathname(dip, path); 97525e8c5aaSvikram CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 97625e8c5aaSvikram "device=%s", path)); 97725e8c5aaSvikram kmem_free(path, MAXPATHLEN); 97825e8c5aaSvikram return (EDEADLOCK); 97925e8c5aaSvikram } 98025e8c5aaSvikram 98125e8c5aaSvikram if (pdip) { 98225e8c5aaSvikram devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 98325e8c5aaSvikram (void) ddi_deviname(dip, devnm); 98425e8c5aaSvikram error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 98525e8c5aaSvikram kmem_free(devnm, MAXNAMELEN + 1); 98625e8c5aaSvikram } else { 98725e8c5aaSvikram error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 98825e8c5aaSvikram } 98925e8c5aaSvikram 99025e8c5aaSvikram return (error); 99125e8c5aaSvikram } 99225e8c5aaSvikram 99325e8c5aaSvikram /* 99425e8c5aaSvikram * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 99525e8c5aaSvikram * Results in the ACK or NACK being recorded on the dip for one particular 99625e8c5aaSvikram * contract. The device contracts framework evaluates the ACK/NACKs for all 99725e8c5aaSvikram * contracts against a device to determine if a particular device state change 99825e8c5aaSvikram * should be allowed. 99925e8c5aaSvikram */ 100025e8c5aaSvikram static int 100125e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 100225e8c5aaSvikram uint_t cmd) 100325e8c5aaSvikram { 100425e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 100525e8c5aaSvikram dev_info_t *dip; 100625e8c5aaSvikram ctid_t ctid; 100725e8c5aaSvikram int error; 100825e8c5aaSvikram 100925e8c5aaSvikram ctid = ct->ct_id; 101025e8c5aaSvikram 101125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 101225e8c5aaSvikram 101325e8c5aaSvikram mutex_enter(&ct->ct_lock); 101425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 101525e8c5aaSvikram 101625e8c5aaSvikram dip = ctd->cond_dip; 101725e8c5aaSvikram 101825e8c5aaSvikram ASSERT(ctd->cond_minor); 101925e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 102025e8c5aaSvikram 102125e8c5aaSvikram /* 102225e8c5aaSvikram * Negotiation only if new state is not in A-set 102325e8c5aaSvikram */ 102425e8c5aaSvikram ASSERT(!(ctd->cond_aset & evtype)); 102525e8c5aaSvikram 102625e8c5aaSvikram /* 102725e8c5aaSvikram * Negotiation only if transition is synchronous 102825e8c5aaSvikram */ 102925e8c5aaSvikram ASSERT(is_sync_neg(ctd->cond_state, evtype)); 103025e8c5aaSvikram 103125e8c5aaSvikram /* 103225e8c5aaSvikram * We shouldn't be negotiating if the "noneg" flag is set 103325e8c5aaSvikram */ 103425e8c5aaSvikram ASSERT(!ctd->cond_noneg); 103525e8c5aaSvikram 103625e8c5aaSvikram if (dip) 103725e8c5aaSvikram ndi_hold_devi(dip); 103825e8c5aaSvikram 103925e8c5aaSvikram mutex_exit(&ct->ct_lock); 104025e8c5aaSvikram 104125e8c5aaSvikram /* 104225e8c5aaSvikram * dv_clean only if !NACK and offline state change 104325e8c5aaSvikram */ 104425e8c5aaSvikram if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 104525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 104625e8c5aaSvikram error = contract_device_dvclean(dip); 104725e8c5aaSvikram if (error != 0) { 104825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 104925e8c5aaSvikram ctid)); 105025e8c5aaSvikram ddi_release_devi(dip); 105125e8c5aaSvikram } 105225e8c5aaSvikram } 105325e8c5aaSvikram 105425e8c5aaSvikram mutex_enter(&ct->ct_lock); 105525e8c5aaSvikram 105625e8c5aaSvikram if (dip) 105725e8c5aaSvikram ddi_release_devi(dip); 105825e8c5aaSvikram 105925e8c5aaSvikram if (dip == NULL) { 106025e8c5aaSvikram if (ctd->cond_currev_id != evid) { 106125e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event " 106225e8c5aaSvikram "(type=%s, id=%llu) on removed device", 106325e8c5aaSvikram cmd == CT_NACK ? "N" : "", 106425e8c5aaSvikram state_str(evtype), (unsigned long long)evid)); 106525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 106625e8c5aaSvikram ctid)); 106725e8c5aaSvikram } else { 106825e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 106925e8c5aaSvikram CT_DEBUG((CE_WARN, "contract_ack: no such device: " 107025e8c5aaSvikram "ctid: %d", ctid)); 107125e8c5aaSvikram } 107225e8c5aaSvikram error = (ct->ct_state == CTS_DEAD) ? ESRCH : 107325e8c5aaSvikram ((cmd == CT_NACK) ? ETIMEDOUT : 0); 107425e8c5aaSvikram mutex_exit(&ct->ct_lock); 107525e8c5aaSvikram return (error); 107625e8c5aaSvikram } 107725e8c5aaSvikram 107825e8c5aaSvikram /* 107925e8c5aaSvikram * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 108025e8c5aaSvikram */ 108125e8c5aaSvikram mutex_exit(&ct->ct_lock); 108225e8c5aaSvikram 108325e8c5aaSvikram mutex_enter(&DEVI(dip)->devi_ct_lock); 108425e8c5aaSvikram mutex_enter(&ct->ct_lock); 108525e8c5aaSvikram if (ctd->cond_currev_id != evid) { 108625e8c5aaSvikram char *buf; 108725e8c5aaSvikram mutex_exit(&ct->ct_lock); 108825e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 108925e8c5aaSvikram ndi_hold_devi(dip); 109025e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 109125e8c5aaSvikram (void) ddi_pathname(dip, buf); 109225e8c5aaSvikram ddi_release_devi(dip); 109325e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event" 109425e8c5aaSvikram "(type=%s, id=%llu) on device %s", 109525e8c5aaSvikram cmd == CT_NACK ? "N" : "", 109625e8c5aaSvikram state_str(evtype), (unsigned long long)evid, buf)); 109725e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 109825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 109925e8c5aaSvikram cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 110025e8c5aaSvikram return (cmd == CT_ACK ? 0 : ETIMEDOUT); 110125e8c5aaSvikram } 110225e8c5aaSvikram 110325e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 110425e8c5aaSvikram ASSERT(cmd == CT_ACK || cmd == CT_NACK); 110525e8c5aaSvikram 110625e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 110725e8c5aaSvikram cmd == CT_NACK ? "N" : "", ctid)); 110825e8c5aaSvikram 110925e8c5aaSvikram ctd->cond_currev_ack = cmd; 111025e8c5aaSvikram mutex_exit(&ct->ct_lock); 111125e8c5aaSvikram 111225e8c5aaSvikram ct_barrier_decr(dip); 111325e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 111425e8c5aaSvikram 111525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 111625e8c5aaSvikram 111725e8c5aaSvikram return (0); 111825e8c5aaSvikram } 111925e8c5aaSvikram 112025e8c5aaSvikram /* 112125e8c5aaSvikram * Invoked when a userland contract holder approves (i.e. ACKs) a state change 112225e8c5aaSvikram */ 112325e8c5aaSvikram static int 112425e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 112525e8c5aaSvikram { 112625e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 112725e8c5aaSvikram } 112825e8c5aaSvikram 112925e8c5aaSvikram /* 113025e8c5aaSvikram * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 113125e8c5aaSvikram */ 113225e8c5aaSvikram static int 113325e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 113425e8c5aaSvikram { 113525e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 113625e8c5aaSvikram } 113725e8c5aaSvikram 113825e8c5aaSvikram /* 113925e8c5aaSvikram * Creates a new contract synchronously with the breaking of an existing 114025e8c5aaSvikram * contract. Currently not supported. 114125e8c5aaSvikram */ 114225e8c5aaSvikram /*ARGSUSED*/ 114325e8c5aaSvikram static int 114425e8c5aaSvikram contract_device_newct(contract_t *ct) 114525e8c5aaSvikram { 114625e8c5aaSvikram return (ENOTSUP); 114725e8c5aaSvikram } 114825e8c5aaSvikram 114925e8c5aaSvikram /* 115025e8c5aaSvikram * Core device contract implementation entry points 115125e8c5aaSvikram */ 115225e8c5aaSvikram static contops_t contract_device_ops = { 115325e8c5aaSvikram contract_device_free, /* contop_free */ 115425e8c5aaSvikram contract_device_abandon, /* contop_abandon */ 115525e8c5aaSvikram contract_device_destroy, /* contop_destroy */ 115625e8c5aaSvikram contract_device_status, /* contop_status */ 115725e8c5aaSvikram contract_device_ack, /* contop_ack */ 115825e8c5aaSvikram contract_device_nack, /* contop_nack */ 115925e8c5aaSvikram contract_qack_notsup, /* contop_qack */ 116025e8c5aaSvikram contract_device_newct /* contop_newct */ 116125e8c5aaSvikram }; 116225e8c5aaSvikram 116325e8c5aaSvikram /* 116425e8c5aaSvikram * contract_device_init 116525e8c5aaSvikram * 116625e8c5aaSvikram * Initializes the device contract type. 116725e8c5aaSvikram */ 116825e8c5aaSvikram void 116925e8c5aaSvikram contract_device_init(void) 117025e8c5aaSvikram { 117125e8c5aaSvikram device_type = contract_type_init(CTT_DEVICE, "device", 117225e8c5aaSvikram &contract_device_ops, contract_device_default); 117325e8c5aaSvikram } 117425e8c5aaSvikram 117525e8c5aaSvikram /* 117625e8c5aaSvikram * contract_device_create 117725e8c5aaSvikram * 117825e8c5aaSvikram * create a device contract given template "tmpl" and the "owner" process. 117925e8c5aaSvikram * May fail and return NULL if project.max-contracts would have been exceeded. 118025e8c5aaSvikram * 118125e8c5aaSvikram * Common device contract creation routine called for both open-time and 118225e8c5aaSvikram * non-open time device contract creation 118325e8c5aaSvikram */ 118425e8c5aaSvikram static cont_device_t * 118525e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 118625e8c5aaSvikram proc_t *owner, int *errorp) 118725e8c5aaSvikram { 118825e8c5aaSvikram cont_device_t *ctd; 118925e8c5aaSvikram char *minor; 119025e8c5aaSvikram char *path; 119125e8c5aaSvikram dev_info_t *dip; 119225e8c5aaSvikram 119325e8c5aaSvikram ASSERT(dtmpl != NULL); 119425e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 119525e8c5aaSvikram ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 119625e8c5aaSvikram ASSERT(errorp); 119725e8c5aaSvikram 119825e8c5aaSvikram *errorp = 0; 119925e8c5aaSvikram 120025e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 120125e8c5aaSvikram 120225e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 120325e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 120425e8c5aaSvikram bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 120525e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 120625e8c5aaSvikram 120725e8c5aaSvikram dip = e_ddi_hold_devi_by_path(path, 0); 120825e8c5aaSvikram if (dip == NULL) { 120925e8c5aaSvikram cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 121025e8c5aaSvikram "for device path (%s)", path); 121125e8c5aaSvikram kmem_free(path, MAXPATHLEN); 121225e8c5aaSvikram *errorp = ERANGE; 121325e8c5aaSvikram return (NULL); 121425e8c5aaSvikram } 121525e8c5aaSvikram 121625e8c5aaSvikram /* 121725e8c5aaSvikram * Lock out any parallel contract negotiations 121825e8c5aaSvikram */ 121925e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 122025e8c5aaSvikram ct_barrier_acquire(dip); 122125e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 122225e8c5aaSvikram 122325e8c5aaSvikram minor = i_ddi_strdup(path, KM_SLEEP); 122425e8c5aaSvikram kmem_free(path, MAXPATHLEN); 122525e8c5aaSvikram 122625e8c5aaSvikram (void) contract_type_pbundle(device_type, owner); 122725e8c5aaSvikram 122825e8c5aaSvikram ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 122925e8c5aaSvikram 123025e8c5aaSvikram /* 123125e8c5aaSvikram * Only we hold a refernce to this contract. Safe to access 123225e8c5aaSvikram * the fields without a ct_lock 123325e8c5aaSvikram */ 123425e8c5aaSvikram ctd->cond_minor = minor; 123525e8c5aaSvikram /* 123625e8c5aaSvikram * It is safe to set the dip pointer in the contract 123725e8c5aaSvikram * as the contract will always be destroyed before the dip 123825e8c5aaSvikram * is released 123925e8c5aaSvikram */ 124025e8c5aaSvikram ctd->cond_dip = dip; 124125e8c5aaSvikram ctd->cond_devt = dev; 124225e8c5aaSvikram ctd->cond_spec = spec_type; 124325e8c5aaSvikram 124425e8c5aaSvikram /* 124525e8c5aaSvikram * Since we are able to lookup the device, it is either 124625e8c5aaSvikram * online or degraded 124725e8c5aaSvikram */ 124825e8c5aaSvikram ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 124925e8c5aaSvikram CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 125025e8c5aaSvikram 125125e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 125225e8c5aaSvikram ctd->cond_aset = dtmpl->ctd_aset; 125325e8c5aaSvikram ctd->cond_noneg = dtmpl->ctd_noneg; 125425e8c5aaSvikram 125525e8c5aaSvikram /* 125625e8c5aaSvikram * contract_ctor() initailizes the common portion of a contract 125725e8c5aaSvikram * contract_dtor() destroys the common portion of a contract 125825e8c5aaSvikram */ 125925e8c5aaSvikram if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 126025e8c5aaSvikram ctd, 0, owner, B_TRUE)) { 126125e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 126225e8c5aaSvikram /* 126325e8c5aaSvikram * contract_device_free() destroys the type specific 126425e8c5aaSvikram * portion of a contract and frees the contract. 126525e8c5aaSvikram * The "minor" path and "cred" is a part of the type specific 126625e8c5aaSvikram * portion of the contract and will be freed by 126725e8c5aaSvikram * contract_device_free() 126825e8c5aaSvikram */ 126925e8c5aaSvikram contract_device_free(&ctd->cond_contract); 127025e8c5aaSvikram 127125e8c5aaSvikram /* release barrier */ 127225e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 127325e8c5aaSvikram ct_barrier_release(dip); 127425e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 127525e8c5aaSvikram 127625e8c5aaSvikram ddi_release_devi(dip); 127725e8c5aaSvikram *errorp = EAGAIN; 127825e8c5aaSvikram return (NULL); 127925e8c5aaSvikram } 128025e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 128125e8c5aaSvikram 128225e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 128325e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 128425e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 128525e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 128625e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 128725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 128825e8c5aaSvikram 128925e8c5aaSvikram /* 129025e8c5aaSvikram * Insert device contract into list hanging off the dip 129125e8c5aaSvikram * Bump up the ref-count on the contract to reflect this 129225e8c5aaSvikram */ 129325e8c5aaSvikram contract_hold(&ctd->cond_contract); 129425e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 129525e8c5aaSvikram list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 129625e8c5aaSvikram 129725e8c5aaSvikram /* release barrier */ 129825e8c5aaSvikram ct_barrier_release(dip); 129925e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 130025e8c5aaSvikram 130125e8c5aaSvikram ddi_release_devi(dip); 130225e8c5aaSvikram 130325e8c5aaSvikram return (ctd); 130425e8c5aaSvikram } 130525e8c5aaSvikram 130625e8c5aaSvikram /* 130725e8c5aaSvikram * Called when a device is successfully opened to create an open-time contract 130825e8c5aaSvikram * i.e. synchronously with a device open. 130925e8c5aaSvikram */ 131025e8c5aaSvikram int 131125e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 131225e8c5aaSvikram { 131325e8c5aaSvikram ctmpl_device_t *dtmpl; 131425e8c5aaSvikram ct_template_t *tmpl; 131525e8c5aaSvikram cont_device_t *ctd; 131625e8c5aaSvikram char *path; 131725e8c5aaSvikram klwp_t *lwp; 131825e8c5aaSvikram int error; 131925e8c5aaSvikram 132025e8c5aaSvikram if (ctpp) 132125e8c5aaSvikram *ctpp = NULL; 132225e8c5aaSvikram 132325e8c5aaSvikram /* 132425e8c5aaSvikram * Check if we are in user-context i.e. if we have an lwp 132525e8c5aaSvikram */ 132625e8c5aaSvikram lwp = ttolwp(curthread); 132725e8c5aaSvikram if (lwp == NULL) { 132825e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 132925e8c5aaSvikram return (0); 133025e8c5aaSvikram } 133125e8c5aaSvikram 133225e8c5aaSvikram tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 133325e8c5aaSvikram if (tmpl == NULL) { 133425e8c5aaSvikram return (0); 133525e8c5aaSvikram } 133625e8c5aaSvikram dtmpl = tmpl->ctmpl_data; 133725e8c5aaSvikram 133825e8c5aaSvikram /* 133925e8c5aaSvikram * If the user set a minor path in the template before an open, 134025e8c5aaSvikram * ignore it. We use the minor path of the actual minor opened. 134125e8c5aaSvikram */ 134225e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 134325e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 134425e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 134525e8c5aaSvikram "ignoring device minor path in active template: %s", 134625e8c5aaSvikram curproc->p_pid, dtmpl->ctd_minor)); 134725e8c5aaSvikram /* 134825e8c5aaSvikram * This is a copy of the actual activated template. 134925e8c5aaSvikram * Safe to make changes such as freeing the minor 135025e8c5aaSvikram * path in the template. 135125e8c5aaSvikram */ 135225e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 135325e8c5aaSvikram dtmpl->ctd_minor = NULL; 135425e8c5aaSvikram } 135525e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 135625e8c5aaSvikram 135725e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 135825e8c5aaSvikram 135925e8c5aaSvikram if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 136025e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 136125e8c5aaSvikram "minor path from dev_t,spec {%lu, %d} for process (%d)", 136225e8c5aaSvikram dev, spec_type, curproc->p_pid)); 136325e8c5aaSvikram ctmpl_free(tmpl); 136425e8c5aaSvikram kmem_free(path, MAXPATHLEN); 136525e8c5aaSvikram return (1); 136625e8c5aaSvikram } 136725e8c5aaSvikram 136825e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 136925e8c5aaSvikram ASSERT(dtmpl->ctd_minor == NULL); 137025e8c5aaSvikram dtmpl->ctd_minor = path; 137125e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 137225e8c5aaSvikram 137325e8c5aaSvikram ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 137425e8c5aaSvikram 137525e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 137625e8c5aaSvikram ASSERT(dtmpl->ctd_minor); 137725e8c5aaSvikram dtmpl->ctd_minor = NULL; 137825e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 137925e8c5aaSvikram ctmpl_free(tmpl); 138025e8c5aaSvikram kmem_free(path, MAXPATHLEN); 138125e8c5aaSvikram 138225e8c5aaSvikram if (ctd == NULL) { 138325e8c5aaSvikram cmn_err(CE_NOTE, "contract_device_open(): Failed to " 138425e8c5aaSvikram "create device contract for process (%d) holding " 138525e8c5aaSvikram "device (devt = %lu, spec_type = %d)", 138625e8c5aaSvikram curproc->p_pid, dev, spec_type); 138725e8c5aaSvikram return (1); 138825e8c5aaSvikram } 138925e8c5aaSvikram 139025e8c5aaSvikram if (ctpp) { 139125e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 139225e8c5aaSvikram *ctpp = &ctd->cond_contract; 139325e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 139425e8c5aaSvikram } 139525e8c5aaSvikram return (0); 139625e8c5aaSvikram } 139725e8c5aaSvikram 139825e8c5aaSvikram /* 139925e8c5aaSvikram * Called during contract negotiation by the device contract framework to wait 140025e8c5aaSvikram * for ACKs or NACKs from contract holders. If all responses are not received 140125e8c5aaSvikram * before a specified timeout, this routine times out. 140225e8c5aaSvikram */ 140325e8c5aaSvikram static uint_t 140425e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 140525e8c5aaSvikram { 140625e8c5aaSvikram cont_device_t *ctd; 140725e8c5aaSvikram int timed_out = 0; 140825e8c5aaSvikram int result = CT_NONE; 140925e8c5aaSvikram int ack; 141025e8c5aaSvikram char *f = "wait_for_acks"; 141125e8c5aaSvikram 141225e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 141325e8c5aaSvikram ASSERT(dip); 141425e8c5aaSvikram ASSERT(evtype & CT_DEV_ALLEVENT); 141525e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 141625e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 141725e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 141825e8c5aaSvikram 141925e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 142025e8c5aaSvikram 142125e8c5aaSvikram if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 142225e8c5aaSvikram /* 142325e8c5aaSvikram * some contract owner(s) didn't respond in time 142425e8c5aaSvikram */ 142525e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 142625e8c5aaSvikram timed_out = 1; 142725e8c5aaSvikram } 142825e8c5aaSvikram 142925e8c5aaSvikram ack = 0; 143025e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 143125e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 143225e8c5aaSvikram 143325e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 143425e8c5aaSvikram 143525e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 143625e8c5aaSvikram 143725e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 143825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 143925e8c5aaSvikram continue; 144025e8c5aaSvikram } 144125e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 144225e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 144325e8c5aaSvikram continue; 144425e8c5aaSvikram } 144525e8c5aaSvikram 144625e8c5aaSvikram /* skip if non-negotiable contract */ 144725e8c5aaSvikram if (ctd->cond_noneg) { 144825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 144925e8c5aaSvikram continue; 145025e8c5aaSvikram } 145125e8c5aaSvikram 145225e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 145325e8c5aaSvikram if (ctd->cond_currev_ack == CT_NACK) { 145425e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 145525e8c5aaSvikram f, (void *)dip)); 145625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 145725e8c5aaSvikram return (CT_NACK); 145825e8c5aaSvikram } else if (ctd->cond_currev_ack == CT_ACK) { 145925e8c5aaSvikram ack = 1; 146025e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 146125e8c5aaSvikram f, (void *)dip)); 146225e8c5aaSvikram } 146325e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 146425e8c5aaSvikram } 146525e8c5aaSvikram 146625e8c5aaSvikram if (ack) { 146725e8c5aaSvikram result = CT_ACK; 146825e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 146925e8c5aaSvikram } else if (timed_out) { 147025e8c5aaSvikram result = CT_NONE; 147125e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 147225e8c5aaSvikram f, (void *)dip)); 147325e8c5aaSvikram } else { 147425e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 147525e8c5aaSvikram f, (void *)dip)); 147625e8c5aaSvikram } 147725e8c5aaSvikram 147825e8c5aaSvikram 147925e8c5aaSvikram return (result); 148025e8c5aaSvikram } 148125e8c5aaSvikram 148225e8c5aaSvikram /* 148325e8c5aaSvikram * Determines the current state of a device (i.e a devinfo node 148425e8c5aaSvikram */ 148525e8c5aaSvikram static int 148625e8c5aaSvikram get_state(dev_info_t *dip) 148725e8c5aaSvikram { 148825e8c5aaSvikram if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 148925e8c5aaSvikram return (CT_DEV_EV_OFFLINE); 149025e8c5aaSvikram else if (DEVI_IS_DEVICE_DEGRADED(dip)) 149125e8c5aaSvikram return (CT_DEV_EV_DEGRADED); 149225e8c5aaSvikram else 149325e8c5aaSvikram return (CT_DEV_EV_ONLINE); 149425e8c5aaSvikram } 149525e8c5aaSvikram 149625e8c5aaSvikram /* 149725e8c5aaSvikram * Sets the current state of a device in a device contract 149825e8c5aaSvikram */ 149925e8c5aaSvikram static void 150025e8c5aaSvikram set_cond_state(dev_info_t *dip) 150125e8c5aaSvikram { 150225e8c5aaSvikram uint_t state = get_state(dip); 150325e8c5aaSvikram cont_device_t *ctd; 150425e8c5aaSvikram 150525e8c5aaSvikram /* verify that barrier is held */ 150625e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 150725e8c5aaSvikram 150825e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 150925e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 151025e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 151125e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 151225e8c5aaSvikram ctd->cond_state = state; 151325e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 151425e8c5aaSvikram } 151525e8c5aaSvikram } 151625e8c5aaSvikram 151725e8c5aaSvikram /* 151825e8c5aaSvikram * Core routine called by event-specific routines when an event occurs. 151925e8c5aaSvikram * Determines if an event should be be published, and if it is to be 152025e8c5aaSvikram * published, whether a negotiation should take place. Also implements 152125e8c5aaSvikram * NEGEND events which publish the final disposition of an event after 152225e8c5aaSvikram * negotiations are complete. 152325e8c5aaSvikram * 152425e8c5aaSvikram * When an event occurs on a minor node, this routine walks the list of 152525e8c5aaSvikram * contracts hanging off a devinfo node and for each contract on the affected 152625e8c5aaSvikram * dip, evaluates the following cases 152725e8c5aaSvikram * 152825e8c5aaSvikram * a. an event that is synchronous, breaks the contract and NONEG not set 152925e8c5aaSvikram * - bumps up the outstanding negotiation counts on the dip 153025e8c5aaSvikram * - marks the dip as undergoing negotiation (devi_ct_neg) 153125e8c5aaSvikram * - event of type CTE_NEG is published 153225e8c5aaSvikram * b. an event that is synchronous, breaks the contract and NONEG is set 153325e8c5aaSvikram * - sets the final result to CT_NACK, event is blocked 153425e8c5aaSvikram * - does not publish an event 153525e8c5aaSvikram * c. event is asynchronous and breaks the contract 153625e8c5aaSvikram * - publishes a critical event irrespect of whether the NONEG 153725e8c5aaSvikram * flag is set, since the contract will be broken and contract 153825e8c5aaSvikram * owner needs to be informed. 153925e8c5aaSvikram * d. No contract breakage but the owner has subscribed to the event 154025e8c5aaSvikram * - publishes the event irrespective of the NONEG event as the 154125e8c5aaSvikram * owner has explicitly subscribed to the event. 154225e8c5aaSvikram * e. NEGEND event 154325e8c5aaSvikram * - publishes a critical event. Should only be doing this if 154425e8c5aaSvikram * if NONEG is not set. 154525e8c5aaSvikram * f. all other events 154625e8c5aaSvikram * - Since a contract is not broken and this event has not been 154725e8c5aaSvikram * subscribed to, this event does not need to be published for 154825e8c5aaSvikram * for this contract. 154925e8c5aaSvikram * 155025e8c5aaSvikram * Once an event is published, what happens next depends on the type of 155125e8c5aaSvikram * event: 155225e8c5aaSvikram * 155325e8c5aaSvikram * a. NEGEND event 155425e8c5aaSvikram * - cleanup all state associated with the preceding negotiation 155525e8c5aaSvikram * and return CT_ACK to the caller of contract_device_publish() 155625e8c5aaSvikram * b. NACKed event 155725e8c5aaSvikram * - One or more contracts had the NONEG term, so the event was 155825e8c5aaSvikram * blocked. Return CT_NACK to the caller. 155925e8c5aaSvikram * c. Negotiated event 156025e8c5aaSvikram * - Call wait_for_acks() to wait for responses from contract 156125e8c5aaSvikram * holders. The end result is either CT_ACK (event is permitted), 156225e8c5aaSvikram * CT_NACK (event is blocked) or CT_NONE (no contract owner) 156325e8c5aaSvikram * responded. This result is returned back to the caller. 156425e8c5aaSvikram * d. All other events 156525e8c5aaSvikram * - If the event was asynchronous (i.e. not negotiated) or 156625e8c5aaSvikram * a contract was not broken return CT_ACK to the caller. 156725e8c5aaSvikram */ 156825e8c5aaSvikram static uint_t 156925e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 157025e8c5aaSvikram uint_t evtype, nvlist_t *tnvl) 157125e8c5aaSvikram { 157225e8c5aaSvikram cont_device_t *ctd; 157325e8c5aaSvikram uint_t result = CT_NONE; 157425e8c5aaSvikram uint64_t evid = 0; 157525e8c5aaSvikram uint64_t nevid = 0; 157625e8c5aaSvikram char *path = NULL; 157725e8c5aaSvikram int negend; 157825e8c5aaSvikram int match; 157925e8c5aaSvikram int sync = 0; 158025e8c5aaSvikram contract_t *ct; 158125e8c5aaSvikram ct_kevent_t *event; 158225e8c5aaSvikram nvlist_t *nvl; 158325e8c5aaSvikram int broken = 0; 158425e8c5aaSvikram 158525e8c5aaSvikram ASSERT(dip); 158625e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 158725e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 158825e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 158925e8c5aaSvikram ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 159025e8c5aaSvikram 159125e8c5aaSvikram /* Is this a synchronous state change ? */ 159225e8c5aaSvikram if (evtype != CT_EV_NEGEND) { 159325e8c5aaSvikram sync = is_sync_neg(get_state(dip), evtype); 159425e8c5aaSvikram /* NOP if unsupported transition */ 159525e8c5aaSvikram if (sync == -2 || sync == -1) { 159625e8c5aaSvikram DEVI(dip)->devi_flags |= DEVI_CT_NOP; 159725e8c5aaSvikram result = (sync == -2) ? CT_ACK : CT_NONE; 159825e8c5aaSvikram goto out; 159925e8c5aaSvikram } 160025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 160125e8c5aaSvikram sync ? "" : " not")); 160225e8c5aaSvikram } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 160325e8c5aaSvikram DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 160425e8c5aaSvikram result = CT_ACK; 160525e8c5aaSvikram goto out; 160625e8c5aaSvikram } 160725e8c5aaSvikram 160825e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 160925e8c5aaSvikram (void) ddi_pathname(dip, path); 161025e8c5aaSvikram 161125e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 161225e8c5aaSvikram 161325e8c5aaSvikram /* 161425e8c5aaSvikram * Negotiation end - set the state of the device in the contract 161525e8c5aaSvikram */ 161625e8c5aaSvikram if (evtype == CT_EV_NEGEND) { 161725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 161825e8c5aaSvikram set_cond_state(dip); 161925e8c5aaSvikram } 162025e8c5aaSvikram 162125e8c5aaSvikram /* 162225e8c5aaSvikram * If this device didn't go through negotiation, don't publish 162325e8c5aaSvikram * a NEGEND event - simply release the barrier to allow other 162425e8c5aaSvikram * device events in. 162525e8c5aaSvikram */ 162625e8c5aaSvikram negend = 0; 162725e8c5aaSvikram if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 162825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 162925e8c5aaSvikram ct_barrier_release(dip); 163025e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 163125e8c5aaSvikram result = CT_ACK; 163225e8c5aaSvikram goto out; 163325e8c5aaSvikram } else if (evtype == CT_EV_NEGEND) { 163425e8c5aaSvikram /* 163525e8c5aaSvikram * There are negotiated contract breakages that 163625e8c5aaSvikram * need a NEGEND event 163725e8c5aaSvikram */ 163825e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 163925e8c5aaSvikram negend = 1; 164025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 164125e8c5aaSvikram } else { 164225e8c5aaSvikram /* 164325e8c5aaSvikram * This is a new event, not a NEGEND event. Wait for previous 164425e8c5aaSvikram * contract events to complete. 164525e8c5aaSvikram */ 164625e8c5aaSvikram ct_barrier_acquire(dip); 164725e8c5aaSvikram } 164825e8c5aaSvikram 164925e8c5aaSvikram 165025e8c5aaSvikram match = 0; 165125e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 165225e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 165325e8c5aaSvikram 165425e8c5aaSvikram ctid_t ctid; 165525e8c5aaSvikram size_t len = strlen(path); 165625e8c5aaSvikram 165725e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 165825e8c5aaSvikram 165925e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 166025e8c5aaSvikram ASSERT(ctd->cond_minor); 166125e8c5aaSvikram ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 166225e8c5aaSvikram ctd->cond_minor[len] == ':'); 166325e8c5aaSvikram 166425e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 166525e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 166625e8c5aaSvikram continue; 166725e8c5aaSvikram } 166825e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 166925e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 167025e8c5aaSvikram continue; 167125e8c5aaSvikram } 167225e8c5aaSvikram 167325e8c5aaSvikram /* We have a matching contract */ 167425e8c5aaSvikram match = 1; 167525e8c5aaSvikram ctid = ctd->cond_contract.ct_id; 167625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 167725e8c5aaSvikram ctid)); 167825e8c5aaSvikram 167925e8c5aaSvikram /* 168025e8c5aaSvikram * There are 4 possible cases 168125e8c5aaSvikram * 1. A contract is broken (dev not in acceptable state) and 168225e8c5aaSvikram * the state change is synchronous - start negotiation 168325e8c5aaSvikram * by sending a CTE_NEG critical event. 168425e8c5aaSvikram * 2. A contract is broken and the state change is 168525e8c5aaSvikram * asynchronous - just send a critical event and 168625e8c5aaSvikram * break the contract. 168725e8c5aaSvikram * 3. Contract is not broken, but consumer has subscribed 168825e8c5aaSvikram * to the event as a critical or informative event 168925e8c5aaSvikram * - just send the appropriate event 169025e8c5aaSvikram * 4. contract waiting for negend event - just send the critical 169125e8c5aaSvikram * NEGEND event. 169225e8c5aaSvikram */ 169325e8c5aaSvikram broken = 0; 169425e8c5aaSvikram if (!negend && !(evtype & ctd->cond_aset)) { 169525e8c5aaSvikram broken = 1; 169625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 169725e8c5aaSvikram ctid)); 169825e8c5aaSvikram } 169925e8c5aaSvikram 170025e8c5aaSvikram /* 170125e8c5aaSvikram * Don't send event if 170225e8c5aaSvikram * - contract is not broken AND 170325e8c5aaSvikram * - contract holder has not subscribed to this event AND 170425e8c5aaSvikram * - contract not waiting for a NEGEND event 170525e8c5aaSvikram */ 170625e8c5aaSvikram if (!broken && !EVSENDP(ctd, evtype) && 170725e8c5aaSvikram !ctd->cond_neg) { 170825e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_publish(): " 170925e8c5aaSvikram "contract (%d): no publish reqd: event %d", 171025e8c5aaSvikram ctd->cond_contract.ct_id, evtype)); 171125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 171225e8c5aaSvikram continue; 171325e8c5aaSvikram } 171425e8c5aaSvikram 171525e8c5aaSvikram /* 171625e8c5aaSvikram * Note: need to kmem_zalloc() the event so mutexes are 171725e8c5aaSvikram * initialized automatically 171825e8c5aaSvikram */ 171925e8c5aaSvikram ct = &ctd->cond_contract; 172025e8c5aaSvikram event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 172125e8c5aaSvikram event->cte_type = evtype; 172225e8c5aaSvikram 172325e8c5aaSvikram if (broken && sync) { 172425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + sync: " 172525e8c5aaSvikram "ctid: %d", ctid)); 172625e8c5aaSvikram ASSERT(!negend); 172725e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 172825e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 172925e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 173025e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 173125e8c5aaSvikram if (ctd->cond_noneg) { 173225e8c5aaSvikram /* Nothing to publish. Event has been blocked */ 173325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 173425e8c5aaSvikram "not publishing blocked ev: ctid: %d", 173525e8c5aaSvikram ctid)); 173625e8c5aaSvikram result = CT_NACK; 173725e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 173825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 173925e8c5aaSvikram continue; 174025e8c5aaSvikram } 174125e8c5aaSvikram event->cte_flags = CTE_NEG; /* critical neg. event */ 174225e8c5aaSvikram ctd->cond_currev_type = event->cte_type; 174325e8c5aaSvikram ct_barrier_incr(dip); 174425e8c5aaSvikram DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 174525e8c5aaSvikram ctd->cond_neg = 1; 174625e8c5aaSvikram } else if (broken && !sync) { 174725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 174825e8c5aaSvikram ctid)); 174925e8c5aaSvikram ASSERT(!negend); 175025e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 175125e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 175225e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 175325e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 175425e8c5aaSvikram event->cte_flags = 0; /* critical event */ 175525e8c5aaSvikram } else if (EVSENDP(ctd, event->cte_type)) { 175625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 175725e8c5aaSvikram ctid)); 175825e8c5aaSvikram ASSERT(!negend); 175925e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 176025e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 176125e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 176225e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 176325e8c5aaSvikram event->cte_flags = EVINFOP(ctd, event->cte_type) ? 176425e8c5aaSvikram CTE_INFO : 0; 176525e8c5aaSvikram } else if (ctd->cond_neg) { 176625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 176725e8c5aaSvikram ASSERT(negend); 176825e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 176925e8c5aaSvikram nevid = ctd->cond_contract.ct_nevent ? 177025e8c5aaSvikram ctd->cond_contract.ct_nevent->cte_id : 0; 177125e8c5aaSvikram ASSERT(ctd->cond_currev_id == nevid); 177225e8c5aaSvikram event->cte_flags = 0; /* NEGEND is always critical */ 177325e8c5aaSvikram ctd->cond_currev_id = 0; 177425e8c5aaSvikram ctd->cond_currev_type = 0; 177525e8c5aaSvikram ctd->cond_currev_ack = 0; 177625e8c5aaSvikram ctd->cond_neg = 0; 177725e8c5aaSvikram } else { 177825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: not publishing event for " 177925e8c5aaSvikram "ctid: %d, evtype: %d", 178025e8c5aaSvikram ctd->cond_contract.ct_id, event->cte_type)); 178125e8c5aaSvikram ASSERT(!negend); 178225e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 178325e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 178425e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 178525e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 178625e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 178725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 178825e8c5aaSvikram continue; 178925e8c5aaSvikram } 179025e8c5aaSvikram 179125e8c5aaSvikram nvl = NULL; 179225e8c5aaSvikram if (tnvl) { 179325e8c5aaSvikram VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 179425e8c5aaSvikram if (negend) { 179525e8c5aaSvikram int32_t newct = 0; 179625e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 179725e8c5aaSvikram VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 179825e8c5aaSvikram == 0); 179925e8c5aaSvikram VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 180025e8c5aaSvikram &newct) == 0); 180125e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 180225e8c5aaSvikram newct == 1 ? 0 : 180325e8c5aaSvikram ctd->cond_contract.ct_id) == 0); 180425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 180525e8c5aaSvikram "CTS_NEVID: %llu, CTS_NEWCT: %s", 180625e8c5aaSvikram ctid, (unsigned long long)nevid, 180725e8c5aaSvikram newct ? "success" : "failure")); 180825e8c5aaSvikram 180925e8c5aaSvikram } 181025e8c5aaSvikram } 181125e8c5aaSvikram 181225e8c5aaSvikram if (ctd->cond_neg) { 181325e8c5aaSvikram ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 181425e8c5aaSvikram ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 181525e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 181625e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = 181725e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start; 181825e8c5aaSvikram } 181925e8c5aaSvikram 182025e8c5aaSvikram /* 182125e8c5aaSvikram * by holding the dip's devi_ct_lock we ensure that 182225e8c5aaSvikram * all ACK/NACKs are held up until we have finished 182325e8c5aaSvikram * publishing to all contracts. 182425e8c5aaSvikram */ 182525e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 182625e8c5aaSvikram evid = cte_publish_all(ct, event, nvl, NULL); 182725e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 182825e8c5aaSvikram 182925e8c5aaSvikram if (ctd->cond_neg) { 183025e8c5aaSvikram ASSERT(!negend); 183125e8c5aaSvikram ASSERT(broken); 183225e8c5aaSvikram ASSERT(sync); 183325e8c5aaSvikram ASSERT(!ctd->cond_noneg); 183425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 183525e8c5aaSvikram ": %d", ctid)); 183625e8c5aaSvikram ctd->cond_currev_id = evid; 183725e8c5aaSvikram } else if (negend) { 183825e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 183925e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 184025e8c5aaSvikram } 184125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 184225e8c5aaSvikram } 184325e8c5aaSvikram 184425e8c5aaSvikram /* 184525e8c5aaSvikram * If "negend" set counter back to initial state (-1) so that 184625e8c5aaSvikram * other events can be published. Also clear the negotiation flag 184725e8c5aaSvikram * on dip. 184825e8c5aaSvikram * 184925e8c5aaSvikram * 0 .. n are used for counting. 185025e8c5aaSvikram * -1 indicates counter is available for use. 185125e8c5aaSvikram */ 185225e8c5aaSvikram if (negend) { 185325e8c5aaSvikram /* 185425e8c5aaSvikram * devi_ct_count not necessarily 0. We may have 185525e8c5aaSvikram * timed out in which case, count will be non-zero. 185625e8c5aaSvikram */ 185725e8c5aaSvikram ct_barrier_release(dip); 185825e8c5aaSvikram DEVI(dip)->devi_ct_neg = 0; 185925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 186025e8c5aaSvikram (void *)dip)); 186125e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 186225e8c5aaSvikram ASSERT(match); 186325e8c5aaSvikram ASSERT(!ct_barrier_empty(dip)); 186425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 186525e8c5aaSvikram DEVI(dip)->devi_ct_count, (void *)dip)); 186625e8c5aaSvikram } else { 186725e8c5aaSvikram /* 186825e8c5aaSvikram * for non-negotiated events or subscribed events or no 186925e8c5aaSvikram * matching contracts 187025e8c5aaSvikram */ 187125e8c5aaSvikram ASSERT(ct_barrier_empty(dip)); 187225e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_neg == 0); 187325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 187425e8c5aaSvikram "dip=%p", (void *)dip)); 187525e8c5aaSvikram 187625e8c5aaSvikram /* 187725e8c5aaSvikram * only this function when called from contract_device_negend() 187825e8c5aaSvikram * can reset the counter to READY state i.e. -1. This function 187925e8c5aaSvikram * is so called for every event whether a NEGEND event is needed 188025e8c5aaSvikram * or not, but the negend event is only published if the event 188125e8c5aaSvikram * whose end they signal is a negotiated event for the contract. 188225e8c5aaSvikram */ 188325e8c5aaSvikram } 188425e8c5aaSvikram 188525e8c5aaSvikram if (!match) { 188625e8c5aaSvikram /* No matching contracts */ 188725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: No matching contract")); 188825e8c5aaSvikram result = CT_NONE; 188925e8c5aaSvikram } else if (result == CT_NACK) { 189025e8c5aaSvikram /* a non-negotiable contract exists and this is a neg. event */ 189125e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 189225e8c5aaSvikram (void) wait_for_acks(dip, dev, spec_type, evtype); 189325e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 189425e8c5aaSvikram /* one or more contracts going through negotations */ 189525e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 189625e8c5aaSvikram result = wait_for_acks(dip, dev, spec_type, evtype); 189725e8c5aaSvikram } else { 189825e8c5aaSvikram /* no negotiated contracts or no broken contracts or NEGEND */ 189925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 190025e8c5aaSvikram result = CT_ACK; 190125e8c5aaSvikram } 190225e8c5aaSvikram 190325e8c5aaSvikram /* 190425e8c5aaSvikram * Release the lock only now so that the only point where we 190525e8c5aaSvikram * drop the lock is in wait_for_acks(). This is so that we don't 190625e8c5aaSvikram * miss cv_signal/cv_broadcast from contract holders 190725e8c5aaSvikram */ 190825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 190925e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 191025e8c5aaSvikram 191125e8c5aaSvikram out: 191225e8c5aaSvikram if (tnvl) 191325e8c5aaSvikram nvlist_free(tnvl); 191425e8c5aaSvikram if (path) 191525e8c5aaSvikram kmem_free(path, MAXPATHLEN); 191625e8c5aaSvikram 191725e8c5aaSvikram 191825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 191925e8c5aaSvikram return (result); 192025e8c5aaSvikram } 192125e8c5aaSvikram 192225e8c5aaSvikram 192325e8c5aaSvikram /* 192425e8c5aaSvikram * contract_device_offline 192525e8c5aaSvikram * 192625e8c5aaSvikram * Event publishing routine called by I/O framework when a device is offlined. 192725e8c5aaSvikram */ 192825e8c5aaSvikram ct_ack_t 192925e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 193025e8c5aaSvikram { 193125e8c5aaSvikram nvlist_t *nvl; 193225e8c5aaSvikram uint_t result; 193325e8c5aaSvikram uint_t evtype; 193425e8c5aaSvikram 193525e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 193625e8c5aaSvikram 193725e8c5aaSvikram evtype = CT_DEV_EV_OFFLINE; 193825e8c5aaSvikram result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 193925e8c5aaSvikram 194025e8c5aaSvikram /* 194125e8c5aaSvikram * If a contract offline is NACKED, the framework expects us to call 194225e8c5aaSvikram * NEGEND ourselves, since we know the final result 194325e8c5aaSvikram */ 194425e8c5aaSvikram if (result == CT_NACK) { 194525e8c5aaSvikram contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 194625e8c5aaSvikram } 194725e8c5aaSvikram 194825e8c5aaSvikram return (result); 194925e8c5aaSvikram } 195025e8c5aaSvikram 195125e8c5aaSvikram /* 195225e8c5aaSvikram * contract_device_degrade 195325e8c5aaSvikram * 195425e8c5aaSvikram * Event publishing routine called by I/O framework when a device 195525e8c5aaSvikram * moves to degrade state. 195625e8c5aaSvikram */ 195725e8c5aaSvikram /*ARGSUSED*/ 195825e8c5aaSvikram void 195925e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 196025e8c5aaSvikram { 196125e8c5aaSvikram nvlist_t *nvl; 196225e8c5aaSvikram uint_t evtype; 196325e8c5aaSvikram 196425e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 196525e8c5aaSvikram 196625e8c5aaSvikram evtype = CT_DEV_EV_DEGRADED; 196725e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 196825e8c5aaSvikram } 196925e8c5aaSvikram 197025e8c5aaSvikram /* 197125e8c5aaSvikram * contract_device_undegrade 197225e8c5aaSvikram * 197325e8c5aaSvikram * Event publishing routine called by I/O framework when a device 197425e8c5aaSvikram * moves from degraded state to online state. 197525e8c5aaSvikram */ 197625e8c5aaSvikram /*ARGSUSED*/ 197725e8c5aaSvikram void 197825e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 197925e8c5aaSvikram { 198025e8c5aaSvikram nvlist_t *nvl; 198125e8c5aaSvikram uint_t evtype; 198225e8c5aaSvikram 198325e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 198425e8c5aaSvikram 198525e8c5aaSvikram evtype = CT_DEV_EV_ONLINE; 198625e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 198725e8c5aaSvikram } 198825e8c5aaSvikram 198925e8c5aaSvikram /* 199025e8c5aaSvikram * For all contracts which have undergone a negotiation (because the device 199125e8c5aaSvikram * moved out of the acceptable state for that contract and the state 199225e8c5aaSvikram * change is synchronous i.e. requires negotiation) this routine publishes 199325e8c5aaSvikram * a CT_EV_NEGEND event with the final disposition of the event. 199425e8c5aaSvikram * 199525e8c5aaSvikram * This event is always a critical event. 199625e8c5aaSvikram */ 199725e8c5aaSvikram void 199825e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 199925e8c5aaSvikram { 200025e8c5aaSvikram nvlist_t *nvl; 200125e8c5aaSvikram uint_t evtype; 200225e8c5aaSvikram 200325e8c5aaSvikram ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 200425e8c5aaSvikram 200525e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 200625e8c5aaSvikram "dip: %p", result, (void *)dip)); 200725e8c5aaSvikram 200825e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 200925e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 201025e8c5aaSvikram result == CT_EV_SUCCESS ? 1 : 0) == 0); 201125e8c5aaSvikram 201225e8c5aaSvikram evtype = CT_EV_NEGEND; 201325e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 201425e8c5aaSvikram 201525e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 201625e8c5aaSvikram (void *)dip)); 201725e8c5aaSvikram } 201825e8c5aaSvikram 201925e8c5aaSvikram /* 202025e8c5aaSvikram * Wrapper routine called by other subsystems (such as LDI) to start 202125e8c5aaSvikram * negotiations when a synchronous device state change occurs. 202225e8c5aaSvikram * Returns CT_ACK or CT_NACK. 202325e8c5aaSvikram */ 202425e8c5aaSvikram ct_ack_t 202525e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 202625e8c5aaSvikram uint_t evtype) 202725e8c5aaSvikram { 202825e8c5aaSvikram int result; 202925e8c5aaSvikram 203025e8c5aaSvikram ASSERT(dip); 203125e8c5aaSvikram ASSERT(dev != NODEV); 203225e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 203325e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 203425e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 203525e8c5aaSvikram 203625e8c5aaSvikram switch (evtype) { 203725e8c5aaSvikram case CT_DEV_EV_OFFLINE: 203825e8c5aaSvikram result = contract_device_offline(dip, dev, spec_type); 203925e8c5aaSvikram break; 204025e8c5aaSvikram default: 204125e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 204225e8c5aaSvikram "not supported: event (%d) for dev_t (%lu) and spec (%d), " 204325e8c5aaSvikram "dip (%p)", evtype, dev, spec_type, (void *)dip); 204425e8c5aaSvikram result = CT_NACK; 204525e8c5aaSvikram break; 204625e8c5aaSvikram } 204725e8c5aaSvikram 204825e8c5aaSvikram return (result); 204925e8c5aaSvikram } 205025e8c5aaSvikram 205125e8c5aaSvikram /* 205225e8c5aaSvikram * A wrapper routine called by other subsystems (such as the LDI) to 205325e8c5aaSvikram * finalize event processing for a state change event. For synchronous 205425e8c5aaSvikram * state changes, this publishes NEGEND events. For asynchronous i.e. 205525e8c5aaSvikram * non-negotiable events this publishes the event. 205625e8c5aaSvikram */ 205725e8c5aaSvikram void 205825e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 205925e8c5aaSvikram uint_t evtype, int ct_result) 206025e8c5aaSvikram { 206125e8c5aaSvikram ASSERT(dip); 206225e8c5aaSvikram ASSERT(dev != NODEV); 206325e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 206425e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 206525e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 206625e8c5aaSvikram 206725e8c5aaSvikram switch (evtype) { 206825e8c5aaSvikram case CT_DEV_EV_OFFLINE: 206925e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 207025e8c5aaSvikram break; 207125e8c5aaSvikram case CT_DEV_EV_DEGRADED: 207225e8c5aaSvikram contract_device_degrade(dip, dev, spec_type); 207325e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 207425e8c5aaSvikram break; 207525e8c5aaSvikram case CT_DEV_EV_ONLINE: 207625e8c5aaSvikram contract_device_undegrade(dip, dev, spec_type); 207725e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 207825e8c5aaSvikram break; 207925e8c5aaSvikram default: 208025e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 208125e8c5aaSvikram "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 208225e8c5aaSvikram evtype, dev, spec_type, (void *)dip); 208325e8c5aaSvikram break; 208425e8c5aaSvikram } 208525e8c5aaSvikram } 208625e8c5aaSvikram 208725e8c5aaSvikram /* 208825e8c5aaSvikram * Called by I/O framework when a devinfo node is freed to remove the 208925e8c5aaSvikram * association between a devinfo node and its contracts. 209025e8c5aaSvikram */ 209125e8c5aaSvikram void 209225e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip) 209325e8c5aaSvikram { 209425e8c5aaSvikram cont_device_t *ctd; 209525e8c5aaSvikram cont_device_t *next; 209625e8c5aaSvikram contract_t *ct; 209725e8c5aaSvikram 209825e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 209925e8c5aaSvikram ct_barrier_wait_for_release(dip); 210025e8c5aaSvikram 210125e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 210225e8c5aaSvikram next = list_next(&(DEVI(dip)->devi_ct), ctd); 210325e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 210425e8c5aaSvikram ct = &ctd->cond_contract; 210525e8c5aaSvikram /* 210625e8c5aaSvikram * Unlink the dip associated with this contract 210725e8c5aaSvikram */ 210825e8c5aaSvikram mutex_enter(&ct->ct_lock); 210925e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 211025e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 211125e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 211225e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 211325e8c5aaSvikram "ctid: %d", ct->ct_id)); 211425e8c5aaSvikram mutex_exit(&ct->ct_lock); 211525e8c5aaSvikram } 211625e8c5aaSvikram ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 211725e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 211825e8c5aaSvikram } 211925e8c5aaSvikram 212025e8c5aaSvikram /* 212125e8c5aaSvikram * Barrier related routines 212225e8c5aaSvikram */ 212325e8c5aaSvikram static void 212425e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip) 212525e8c5aaSvikram { 212625e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 212725e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 212825e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 212925e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 213025e8c5aaSvikram DEVI(dip)->devi_ct_count = 0; 213125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 213225e8c5aaSvikram } 213325e8c5aaSvikram 213425e8c5aaSvikram static void 213525e8c5aaSvikram ct_barrier_release(dev_info_t *dip) 213625e8c5aaSvikram { 213725e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 213825e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 213925e8c5aaSvikram DEVI(dip)->devi_ct_count = -1; 214025e8c5aaSvikram cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 214125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 214225e8c5aaSvikram } 214325e8c5aaSvikram 214425e8c5aaSvikram static int 214525e8c5aaSvikram ct_barrier_held(dev_info_t *dip) 214625e8c5aaSvikram { 214725e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 214825e8c5aaSvikram return (DEVI(dip)->devi_ct_count != -1); 214925e8c5aaSvikram } 215025e8c5aaSvikram 215125e8c5aaSvikram static int 215225e8c5aaSvikram ct_barrier_empty(dev_info_t *dip) 215325e8c5aaSvikram { 215425e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 215525e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 215625e8c5aaSvikram return (DEVI(dip)->devi_ct_count == 0); 215725e8c5aaSvikram } 215825e8c5aaSvikram 215925e8c5aaSvikram static void 216025e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip) 216125e8c5aaSvikram { 216225e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 216325e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 216425e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 216525e8c5aaSvikram } 216625e8c5aaSvikram 216725e8c5aaSvikram static void 216825e8c5aaSvikram ct_barrier_decr(dev_info_t *dip) 216925e8c5aaSvikram { 217025e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 217125e8c5aaSvikram DEVI(dip)->devi_ct_count)); 217225e8c5aaSvikram 217325e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 217425e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count > 0); 217525e8c5aaSvikram 217625e8c5aaSvikram DEVI(dip)->devi_ct_count--; 217725e8c5aaSvikram if (DEVI(dip)->devi_ct_count == 0) { 217825e8c5aaSvikram cv_broadcast(&DEVI(dip)->devi_ct_cv); 217925e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 218025e8c5aaSvikram } 218125e8c5aaSvikram } 218225e8c5aaSvikram 218325e8c5aaSvikram static void 218425e8c5aaSvikram ct_barrier_incr(dev_info_t *dip) 218525e8c5aaSvikram { 218625e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 218725e8c5aaSvikram DEVI(dip)->devi_ct_count++; 218825e8c5aaSvikram } 218925e8c5aaSvikram 219025e8c5aaSvikram static int 219125e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 219225e8c5aaSvikram { 219325e8c5aaSvikram clock_t abstime; 219425e8c5aaSvikram 219525e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 219625e8c5aaSvikram 219725e8c5aaSvikram abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 219825e8c5aaSvikram while (DEVI(dip)->devi_ct_count) { 219925e8c5aaSvikram if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 220025e8c5aaSvikram &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 220125e8c5aaSvikram return (-1); 220225e8c5aaSvikram } 220325e8c5aaSvikram } 220425e8c5aaSvikram return (0); 220525e8c5aaSvikram } 2206