125e8c5aaSvikram /* 225e8c5aaSvikram * CDDL HEADER START 325e8c5aaSvikram * 425e8c5aaSvikram * The contents of this file are subject to the terms of the 525e8c5aaSvikram * Common Development and Distribution License (the "License"). 625e8c5aaSvikram * You may not use this file except in compliance with the License. 725e8c5aaSvikram * 825e8c5aaSvikram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925e8c5aaSvikram * or http://www.opensolaris.org/os/licensing. 1025e8c5aaSvikram * See the License for the specific language governing permissions 1125e8c5aaSvikram * and limitations under the License. 1225e8c5aaSvikram * 1325e8c5aaSvikram * When distributing Covered Code, include this CDDL HEADER in each 1425e8c5aaSvikram * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525e8c5aaSvikram * If applicable, add the following below this CDDL HEADER, with the 1625e8c5aaSvikram * fields enclosed by brackets "[]" replaced with your own identifying 1725e8c5aaSvikram * information: Portions Copyright [yyyy] [name of copyright owner] 1825e8c5aaSvikram * 1925e8c5aaSvikram * CDDL HEADER END 2025e8c5aaSvikram */ 2125e8c5aaSvikram /* 22*7b209c2cSacruz * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2325e8c5aaSvikram * Use is subject to license terms. 2425e8c5aaSvikram */ 2525e8c5aaSvikram 2625e8c5aaSvikram #pragma ident "%Z%%M% %I% %E% SMI" 2725e8c5aaSvikram 2825e8c5aaSvikram #include <sys/mutex.h> 2925e8c5aaSvikram #include <sys/debug.h> 3025e8c5aaSvikram #include <sys/types.h> 3125e8c5aaSvikram #include <sys/param.h> 3225e8c5aaSvikram #include <sys/kmem.h> 3325e8c5aaSvikram #include <sys/thread.h> 3425e8c5aaSvikram #include <sys/id_space.h> 3525e8c5aaSvikram #include <sys/avl.h> 3625e8c5aaSvikram #include <sys/list.h> 3725e8c5aaSvikram #include <sys/sysmacros.h> 3825e8c5aaSvikram #include <sys/proc.h> 3925e8c5aaSvikram #include <sys/contract.h> 4025e8c5aaSvikram #include <sys/contract_impl.h> 4125e8c5aaSvikram #include <sys/contract/device.h> 4225e8c5aaSvikram #include <sys/contract/device_impl.h> 4325e8c5aaSvikram #include <sys/cmn_err.h> 4425e8c5aaSvikram #include <sys/nvpair.h> 4525e8c5aaSvikram #include <sys/policy.h> 4625e8c5aaSvikram #include <sys/ddi_impldefs.h> 4725e8c5aaSvikram #include <sys/ddi_implfuncs.h> 4825e8c5aaSvikram #include <sys/systm.h> 4925e8c5aaSvikram #include <sys/stat.h> 5025e8c5aaSvikram #include <sys/sunddi.h> 5125e8c5aaSvikram #include <sys/esunddi.h> 5225e8c5aaSvikram #include <sys/ddi.h> 5325e8c5aaSvikram #include <sys/fs/dv_node.h> 5425e8c5aaSvikram #include <sys/sunndi.h> 5525e8c5aaSvikram #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 5625e8c5aaSvikram 5725e8c5aaSvikram /* 5825e8c5aaSvikram * Device Contracts 5925e8c5aaSvikram * ----------------- 6025e8c5aaSvikram * This file contains the core code for the device contracts framework. 6125e8c5aaSvikram * A device contract is an agreement or a contract between a process and 6225e8c5aaSvikram * the kernel regarding the state of the device. A device contract may be 6325e8c5aaSvikram * created when a relationship is formed between a device and a process 6425e8c5aaSvikram * i.e. at open(2) time, or it may be created at some point after the device 6525e8c5aaSvikram * has been opened. A device contract once formed may be broken by either party. 6625e8c5aaSvikram * A device contract can be broken by the process by an explicit abandon of the 6725e8c5aaSvikram * contract or by an implicit abandon when the process exits. A device contract 6825e8c5aaSvikram * can be broken by the kernel either asynchronously (without negotiation) or 6925e8c5aaSvikram * synchronously (with negotiation). Exactly which happens depends on the device 7025e8c5aaSvikram * state transition. The following state diagram shows the transitions between 7125e8c5aaSvikram * device states. Only device state transitions currently supported by device 7225e8c5aaSvikram * contracts is shown. 7325e8c5aaSvikram * 7425e8c5aaSvikram * <-- A --> 7525e8c5aaSvikram * /-----------------> DEGRADED 7625e8c5aaSvikram * | | 7725e8c5aaSvikram * | | 7825e8c5aaSvikram * | | S 7925e8c5aaSvikram * | | | 8025e8c5aaSvikram * | | v 8125e8c5aaSvikram * v S --> v 8225e8c5aaSvikram * ONLINE ------------> OFFLINE 8325e8c5aaSvikram * 8425e8c5aaSvikram * 8525e8c5aaSvikram * In the figure above, the arrows indicate the direction of transition. The 8625e8c5aaSvikram * letter S refers to transitions which are inherently synchronous i.e. 8725e8c5aaSvikram * require negotiation and the letter A indicates transitions which are 8825e8c5aaSvikram * asynchronous i.e. are done without contract negotiations. A good example 8925e8c5aaSvikram * of a synchronous transition is the ONLINE -> OFFLINE transition. This 9025e8c5aaSvikram * transition cannot happen as long as there are consumers which have the 9125e8c5aaSvikram * device open. Thus some form of negotiation needs to happen between the 9225e8c5aaSvikram * consumers and the kernel to ensure that consumers either close devices 9325e8c5aaSvikram * or disallow the move to OFFLINE. Certain other transitions such as 9425e8c5aaSvikram * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 9525e8c5aaSvikram * non-negotiable. A device that suffers a fault that degrades its 9625e8c5aaSvikram * capabilities will become degraded irrespective of what consumers it has, 9725e8c5aaSvikram * so a negotiation in this case is pointless. 9825e8c5aaSvikram * 9925e8c5aaSvikram * The following device states are currently defined for device contracts: 10025e8c5aaSvikram * 10125e8c5aaSvikram * CT_DEV_EV_ONLINE 10225e8c5aaSvikram * The device is online and functioning normally 10325e8c5aaSvikram * CT_DEV_EV_DEGRADED 10425e8c5aaSvikram * The device is online but is functioning in a degraded capacity 10525e8c5aaSvikram * CT_DEV_EV_OFFLINE 10625e8c5aaSvikram * The device is offline and is no longer configured 10725e8c5aaSvikram * 10825e8c5aaSvikram * A typical consumer of device contracts starts out with a contract 10925e8c5aaSvikram * template and adds terms to that template. These include the 11025e8c5aaSvikram * "acceptable set" (A-set) term, which is a bitset of device states which 11125e8c5aaSvikram * are guaranteed by the contract. If the device moves out of a state in 11225e8c5aaSvikram * the A-set, the contract is broken. The breaking of the contract can 11325e8c5aaSvikram * be asynchronous in which case a critical contract event is sent to the 11425e8c5aaSvikram * contract holder but no negotiations take place. If the breaking of the 11525e8c5aaSvikram * contract is synchronous, negotations are opened between the affected 11625e8c5aaSvikram * consumer and the kernel. The kernel does this by sending a critical 11725e8c5aaSvikram * event to the consumer with the CTE_NEG flag set indicating that this 11825e8c5aaSvikram * is a negotiation event. The consumer can accept this change by sending 11925e8c5aaSvikram * a ACK message to the kernel. Alternatively, if it has the necessary 12025e8c5aaSvikram * privileges, it can send a NACK message to the kernel which will block 12125e8c5aaSvikram * the device state change. To NACK a negotiable event, a process must 12225e8c5aaSvikram * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 12325e8c5aaSvikram * 12425e8c5aaSvikram * Other terms include the "minor path" term, specified explicitly if the 12525e8c5aaSvikram * contract is not being created at open(2) time or specified implicitly 12625e8c5aaSvikram * if the contract is being created at open time via an activated template. 12725e8c5aaSvikram * 12825e8c5aaSvikram * A contract event is sent on any state change to which the contract 12925e8c5aaSvikram * owner has subscribed via the informative or critical event sets. Only 13025e8c5aaSvikram * critical events are guaranteed to be delivered. Since all device state 13125e8c5aaSvikram * changes are controlled by the kernel and cannot be arbitrarily generated 13225e8c5aaSvikram * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 13325e8c5aaSvikram * need to be asserted in a process's effective set to designate an event as 13425e8c5aaSvikram * critical. To ensure privacy, a process must either have the same effective 13525e8c5aaSvikram * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 13625e8c5aaSvikram * asserted in its effective set in order to observe device contract events 13725e8c5aaSvikram * off the device contract type specific endpoint. 13825e8c5aaSvikram * 13925e8c5aaSvikram * Yet another term available with device contracts is the "non-negotiable" 14025e8c5aaSvikram * term. This term is used to pre-specify a NACK to any contract negotiation. 14125e8c5aaSvikram * This term is ignored for asynchronous state changes. For example, a 14225e8c5aaSvikram * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 14325e8c5aaSvikram * non-negotiable. In this case, the device contract framework assumes a 14425e8c5aaSvikram * NACK for any transition to OFFLINE and blocks the offline. If the A-set 14525e8c5aaSvikram * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 14625e8c5aaSvikram * are NACKed but transitions to DEGRADE succeed. 14725e8c5aaSvikram * 14825e8c5aaSvikram * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 14925e8c5aaSvikram * happens just before the I/O framework attempts to offline a device 15025e8c5aaSvikram * (i.e. detach a device and set the offline flag so that it cannot be 15125e8c5aaSvikram * reattached). A device contract holder is expected to either NACK the offline 15225e8c5aaSvikram * (if privileged) or release the device and allow the offline to proceed. 15325e8c5aaSvikram * 15425e8c5aaSvikram * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 15525e8c5aaSvikram * is generated just before the I/O framework transitions the device state 15625e8c5aaSvikram * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 15725e8c5aaSvikram * 15825e8c5aaSvikram * The contract holder is expected to ACK or NACK a negotiation event 15925e8c5aaSvikram * within a certain period of time. If the ACK/NACK is not received 16025e8c5aaSvikram * within the timeout period, the device contract framework will behave 16125e8c5aaSvikram * as if the contract does not exist and will proceed with the event. 16225e8c5aaSvikram * 16325e8c5aaSvikram * Unlike a process contract a device contract does not need to exist 16425e8c5aaSvikram * once it is abandoned, since it does not define a fault boundary. It 16525e8c5aaSvikram * merely represents an agreement between a process and the kernel 16625e8c5aaSvikram * regarding the state of the device. Once the process has abandoned 16725e8c5aaSvikram * the contract (either implicitly via a process exit or explicitly) 16825e8c5aaSvikram * the kernel has no reason to retain the contract. As a result 16925e8c5aaSvikram * device contracts are neither inheritable nor need to exist in an 17025e8c5aaSvikram * orphan state. 17125e8c5aaSvikram * 17225e8c5aaSvikram * A device unlike a process may exist in multiple contracts and has 17325e8c5aaSvikram * a "life" outside a device contract. A device unlike a process 17425e8c5aaSvikram * may exist without an associated contract. Unlike a process contract 17525e8c5aaSvikram * a device contract may be formed after a binding relationship is 17625e8c5aaSvikram * formed between a process and a device. 17725e8c5aaSvikram * 17825e8c5aaSvikram * IMPLEMENTATION NOTES 17925e8c5aaSvikram * ==================== 18025e8c5aaSvikram * DATA STRUCTURES 18125e8c5aaSvikram * ---------------- 18225e8c5aaSvikram * The heart of the device contracts implementation is the device contract 18325e8c5aaSvikram * private cont_device_t (or ctd for short) data structure. It encapsulates 18425e8c5aaSvikram * the generic contract_t data structure and has a number of private 18525e8c5aaSvikram * fields. 18625e8c5aaSvikram * These include: 18725e8c5aaSvikram * cond_minor: The minor device that is the subject of the contract 18825e8c5aaSvikram * cond_aset: The bitset of states which are guaranteed by the 18925e8c5aaSvikram * contract 19025e8c5aaSvikram * cond_noneg: If set, indicates that the result of negotiation has 19125e8c5aaSvikram * been predefined to be a NACK 19225e8c5aaSvikram * In addition, there are other device identifiers such the devinfo node, 19325e8c5aaSvikram * dev_t and spec_type of the minor node. There are also a few fields that 19425e8c5aaSvikram * are used during negotiation to maintain state. See 19525e8c5aaSvikram * uts/common/sys/contract/device_impl.h 19625e8c5aaSvikram * for details. 19725e8c5aaSvikram * The ctd structure represents the device private part of a contract of 19825e8c5aaSvikram * type "device" 19925e8c5aaSvikram * 20025e8c5aaSvikram * Another data structure used by device contracts is ctmpl_device. It is 20125e8c5aaSvikram * the device contracts private part of the contract template structure. It 20225e8c5aaSvikram * encapsulates the generic template structure "ct_template_t" and includes 20325e8c5aaSvikram * the following device contract specific fields 20425e8c5aaSvikram * ctd_aset: The bitset of states that should be guaranteed by a 20525e8c5aaSvikram * contract 20625e8c5aaSvikram * ctd_noneg: If set, indicates that contract should NACK a 20725e8c5aaSvikram * negotiation 20825e8c5aaSvikram * ctd_minor: The devfs_path (without the /devices prefix) of the 20925e8c5aaSvikram * minor node that is the subject of the contract. 21025e8c5aaSvikram * 21125e8c5aaSvikram * ALGORITHMS 21225e8c5aaSvikram * --------- 21325e8c5aaSvikram * There are three sets of routines in this file 21425e8c5aaSvikram * Template related routines 21525e8c5aaSvikram * ------------------------- 21625e8c5aaSvikram * These routines provide support for template related operations initated 21725e8c5aaSvikram * via the generic template operations. These include routines that dup 21825e8c5aaSvikram * a template, free it, and set various terms in the template 21925e8c5aaSvikram * (such as the minor node path, the acceptable state set (or A-set) 22025e8c5aaSvikram * and the non-negotiable term) as well as a routine to query the 22125e8c5aaSvikram * device specific portion of the template for the abovementioned terms. 22225e8c5aaSvikram * There is also a routine to create (ctmpl_device_create) that is used to 22325e8c5aaSvikram * create a contract from a template. This routine calls (after initial 22425e8c5aaSvikram * setup) the common function used to create a device contract 22525e8c5aaSvikram * (contract_device_create). 22625e8c5aaSvikram * 22725e8c5aaSvikram * core device contract implementation 22825e8c5aaSvikram * ---------------------------------- 22925e8c5aaSvikram * These routines support the generic contract framework to provide 23025e8c5aaSvikram * functionality that allows contracts to be created, managed and 23125e8c5aaSvikram * destroyed. The contract_device_create() routine is a routine used 23225e8c5aaSvikram * to create a contract from a template (either via an explicit create 23325e8c5aaSvikram * operation on a template or implicitly via an open with an 23425e8c5aaSvikram * activated template.). The contract_device_free() routine assists 23525e8c5aaSvikram * in freeing the device contract specific parts. There are routines 23625e8c5aaSvikram * used to abandon (contract_device_abandon) a device contract as well 23725e8c5aaSvikram * as a routine to destroy (which despite its name does not destroy, 23825e8c5aaSvikram * it only moves a contract to a dead state) a contract. 23925e8c5aaSvikram * There is also a routine to return status information about a 24025e8c5aaSvikram * contract - the level of detail depends on what is requested by the 24125e8c5aaSvikram * user. A value of CTD_FIXED only returns fixed length fields such 24225e8c5aaSvikram * as the A-set, state of device and value of the "noneg" term. If 24325e8c5aaSvikram * CTD_ALL is specified, the minor node path is returned as well. 24425e8c5aaSvikram * 24525e8c5aaSvikram * In addition there are interfaces (contract_device_ack/nack) which 24625e8c5aaSvikram * are used to support negotiation between userland processes and 24725e8c5aaSvikram * device contracts. These interfaces record the acknowledgement 24825e8c5aaSvikram * or lack thereof for negotiation events and help determine if the 24925e8c5aaSvikram * negotiated event should occur. 25025e8c5aaSvikram * 25125e8c5aaSvikram * "backend routines" 25225e8c5aaSvikram * ----------------- 25325e8c5aaSvikram * The backend routines form the interface between the I/O framework 25425e8c5aaSvikram * and the device contract subsystem. These routines, allow the I/O 25525e8c5aaSvikram * framework to call into the device contract subsystem to notify it of 25625e8c5aaSvikram * impending changes to a device state as well as to inform of the 25725e8c5aaSvikram * final disposition of such attempted state changes. Routines in this 25825e8c5aaSvikram * class include contract_device_offline() that indicates an attempt to 25925e8c5aaSvikram * offline a device, contract_device_degrade() that indicates that 26025e8c5aaSvikram * a device is moving to the degraded state and contract_device_negend() 26125e8c5aaSvikram * that is used by the I/O framework to inform the contracts subsystem of 26225e8c5aaSvikram * the final disposition of an attempted operation. 26325e8c5aaSvikram * 26425e8c5aaSvikram * SUMMARY 26525e8c5aaSvikram * ------- 26625e8c5aaSvikram * A contract starts its life as a template. A process allocates a device 26725e8c5aaSvikram * contract template and sets various terms: 26825e8c5aaSvikram * The A-set 26925e8c5aaSvikram * The device minor node 27025e8c5aaSvikram * Critical and informative events 27125e8c5aaSvikram * The noneg i.e. no negotition term 27225e8c5aaSvikram * Setting of these terms in the template is done via the 27325e8c5aaSvikram * ctmpl_device_set() entry point in this file. A process can query a 27425e8c5aaSvikram * template to determine the terms already set in the template - this is 27525e8c5aaSvikram * facilitated by the ctmpl_device_get() routine. 27625e8c5aaSvikram * 27725e8c5aaSvikram * Once all the appropriate terms are set, the contract is instantiated via 27825e8c5aaSvikram * one of two methods 27925e8c5aaSvikram * - via an explicit create operation - this is facilitated by the 28025e8c5aaSvikram * ctmpl_device_create() entry point 28125e8c5aaSvikram * - synchronously with the open(2) system call - this is achieved via the 28225e8c5aaSvikram * contract_device_open() routine. 28325e8c5aaSvikram * The core work for both these above functions is done by 28425e8c5aaSvikram * contract_device_create() 28525e8c5aaSvikram * 28625e8c5aaSvikram * A contract once created can be queried for its status. Support for 28725e8c5aaSvikram * status info is provided by both the common contracts framework and by 28825e8c5aaSvikram * the "device" contract type. If the level of detail requested is 28925e8c5aaSvikram * CTD_COMMON, only the common contract framework data is used. Higher 29025e8c5aaSvikram * levels of detail result in calls to contract_device_status() to supply 29125e8c5aaSvikram * device contract type specific status information. 29225e8c5aaSvikram * 29325e8c5aaSvikram * A contract once created may be abandoned either explicitly or implictly. 29425e8c5aaSvikram * In either case, the contract_device_abandon() function is invoked. This 29525e8c5aaSvikram * function merely calls contract_destroy() which moves the contract to 29625e8c5aaSvikram * the DEAD state. The device contract portion of destroy processing is 29725e8c5aaSvikram * provided by contract_device_destroy() which merely disassociates the 29825e8c5aaSvikram * contract from its device devinfo node. A contract in the DEAD state is 29925e8c5aaSvikram * not freed. It hanbgs around until all references to the contract are 30025e8c5aaSvikram * gone. When that happens, the contract is finally deallocated. The 30125e8c5aaSvikram * device contract specific portion of the free is done by 30225e8c5aaSvikram * contract_device_free() which finally frees the device contract specific 30325e8c5aaSvikram * data structure (cont_device_t). 30425e8c5aaSvikram * 30525e8c5aaSvikram * When a device undergoes a state change, the I/O framework calls the 30625e8c5aaSvikram * corresponding device contract entry point. For example, when a device 30725e8c5aaSvikram * is about to go OFFLINE, the routine contract_device_offline() is 30825e8c5aaSvikram * invoked. Similarly if a device moves to DEGRADED state, the routine 30925e8c5aaSvikram * contract_device_degrade() function is called. These functions call the 31025e8c5aaSvikram * core routine contract_device_publish(). This function determines via 31125e8c5aaSvikram * the function is_sync_neg() whether an event is a synchronous (i.e. 31225e8c5aaSvikram * negotiable) event or not. In the former case contract_device_publish() 31325e8c5aaSvikram * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 31425e8c5aaSvikram * and/or NACKs from contract holders. In the latter case, it simply 31525e8c5aaSvikram * publishes the event and does not wait. In the negotiation case, ACKs or 31625e8c5aaSvikram * NACKs from userland consumers results in contract_device_ack_nack() 31725e8c5aaSvikram * being called where the result of the negotiation is recorded in the 31825e8c5aaSvikram * contract data structure. Once all outstanding contract owners have 31925e8c5aaSvikram * responded, the device contract code in wait_for_acks() determines the 32025e8c5aaSvikram * final result of the negotiation. A single NACK overrides all other ACKs 32125e8c5aaSvikram * If there is no NACK, then a single ACK will result in an overall ACK 32225e8c5aaSvikram * result. If there are no ACKs or NACKs, then the result CT_NONE is 32325e8c5aaSvikram * returned back to the I/O framework. Once the event is permitted or 32425e8c5aaSvikram * blocked, the I/O framework proceeds or aborts the state change. The 32525e8c5aaSvikram * I/O framework then calls contract_device_negend() with a result code 32625e8c5aaSvikram * indicating final disposition of the event. This call releases the 32725e8c5aaSvikram * barrier and other state associated with the previous negotiation, 32825e8c5aaSvikram * which permits the next event (if any) to come into the device contract 32925e8c5aaSvikram * framework. 33025e8c5aaSvikram * 33125e8c5aaSvikram * Finally, a device that has outstanding contracts may be removed from 33225e8c5aaSvikram * the system which results in its devinfo node being freed. The devinfo 33325e8c5aaSvikram * free routine in the I/O framework, calls into the device contract 33425e8c5aaSvikram * function - contract_device_remove_dip(). This routine, disassociates 33525e8c5aaSvikram * the dip from all contracts associated with the contract being freed, 33625e8c5aaSvikram * allowing the devinfo node to be freed. 33725e8c5aaSvikram * 33825e8c5aaSvikram * LOCKING 33925e8c5aaSvikram * --------- 34025e8c5aaSvikram * There are four sets of data that need to be protected by locks 34125e8c5aaSvikram * 34225e8c5aaSvikram * i) device contract specific portion of the contract template - This data 34325e8c5aaSvikram * is protected by the template lock ctmpl_lock. 34425e8c5aaSvikram * 34525e8c5aaSvikram * ii) device contract specific portion of the contract - This data is 34625e8c5aaSvikram * protected by the contract lock ct_lock 34725e8c5aaSvikram * 34825e8c5aaSvikram * iii) The linked list of contracts hanging off a devinfo node - This 34925e8c5aaSvikram * list is protected by the per-devinfo node lock devi_ct_lock 35025e8c5aaSvikram * 35125e8c5aaSvikram * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 35225e8c5aaSvikram * and devi_ct_count that controls state changes to a dip 35325e8c5aaSvikram * 35425e8c5aaSvikram * The template lock is independent in that none of the other locks in this 35525e8c5aaSvikram * file may be taken while holding the template lock (and vice versa). 35625e8c5aaSvikram * 35725e8c5aaSvikram * The remaining three locks have the following lock order 35825e8c5aaSvikram * 35925e8c5aaSvikram * devi_ct_lock -> ct_count barrier -> ct_lock 36025e8c5aaSvikram * 36125e8c5aaSvikram */ 36225e8c5aaSvikram 36325e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 36425e8c5aaSvikram int spec_type, proc_t *owner, int *errorp); 36525e8c5aaSvikram 36625e8c5aaSvikram /* barrier routines */ 36725e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip); 36825e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip); 36925e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip); 37025e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip); 37125e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip); 37225e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 37325e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip); 37425e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip); 37525e8c5aaSvikram 37625e8c5aaSvikram ct_type_t *device_type; 37725e8c5aaSvikram 37825e8c5aaSvikram /* 37925e8c5aaSvikram * Macro predicates for determining when events should be sent and how. 38025e8c5aaSvikram */ 38125e8c5aaSvikram #define EVSENDP(ctd, flag) \ 38225e8c5aaSvikram ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 38325e8c5aaSvikram 38425e8c5aaSvikram #define EVINFOP(ctd, flag) \ 38525e8c5aaSvikram ((ctd->cond_contract.ct_ev_crit & flag) == 0) 38625e8c5aaSvikram 38725e8c5aaSvikram /* 38825e8c5aaSvikram * State transition table showing which transitions are synchronous and which 38925e8c5aaSvikram * are not. 39025e8c5aaSvikram */ 39125e8c5aaSvikram struct ct_dev_negtable { 39225e8c5aaSvikram uint_t st_old; 39325e8c5aaSvikram uint_t st_new; 39425e8c5aaSvikram uint_t st_neg; 39525e8c5aaSvikram } ct_dev_negtable[] = { 39625e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 39725e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 39825e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 39925e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 40025e8c5aaSvikram {0} 40125e8c5aaSvikram }; 40225e8c5aaSvikram 40325e8c5aaSvikram /* 40425e8c5aaSvikram * Device contract template implementation 40525e8c5aaSvikram */ 40625e8c5aaSvikram 40725e8c5aaSvikram /* 40825e8c5aaSvikram * ctmpl_device_dup 40925e8c5aaSvikram * 41025e8c5aaSvikram * The device contract template dup entry point. 41125e8c5aaSvikram * This simply copies all the fields (generic as well as device contract 41225e8c5aaSvikram * specific) fields of the original. 41325e8c5aaSvikram */ 41425e8c5aaSvikram static struct ct_template * 41525e8c5aaSvikram ctmpl_device_dup(struct ct_template *template) 41625e8c5aaSvikram { 41725e8c5aaSvikram ctmpl_device_t *new; 41825e8c5aaSvikram ctmpl_device_t *old = template->ctmpl_data; 41925e8c5aaSvikram char *buf; 42025e8c5aaSvikram char *minor; 42125e8c5aaSvikram 42225e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 42325e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 42425e8c5aaSvikram 42525e8c5aaSvikram /* 42625e8c5aaSvikram * copy generic fields. 42725e8c5aaSvikram * ctmpl_copy returns with old template lock held 42825e8c5aaSvikram */ 42925e8c5aaSvikram ctmpl_copy(&new->ctd_ctmpl, template); 43025e8c5aaSvikram 43125e8c5aaSvikram new->ctd_ctmpl.ctmpl_data = new; 43225e8c5aaSvikram new->ctd_aset = old->ctd_aset; 43325e8c5aaSvikram new->ctd_minor = NULL; 43425e8c5aaSvikram new->ctd_noneg = old->ctd_noneg; 43525e8c5aaSvikram 43625e8c5aaSvikram if (old->ctd_minor) { 43725e8c5aaSvikram ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 43825e8c5aaSvikram bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 43925e8c5aaSvikram } else { 44025e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 44125e8c5aaSvikram buf = NULL; 44225e8c5aaSvikram } 44325e8c5aaSvikram 44425e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 44525e8c5aaSvikram if (buf) { 44625e8c5aaSvikram minor = i_ddi_strdup(buf, KM_SLEEP); 44725e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 44825e8c5aaSvikram buf = NULL; 44925e8c5aaSvikram } else { 45025e8c5aaSvikram minor = NULL; 45125e8c5aaSvikram } 45225e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 45325e8c5aaSvikram 45425e8c5aaSvikram if (minor) { 45525e8c5aaSvikram new->ctd_minor = minor; 45625e8c5aaSvikram } 45725e8c5aaSvikram 45825e8c5aaSvikram ASSERT(buf == NULL); 45925e8c5aaSvikram return (&new->ctd_ctmpl); 46025e8c5aaSvikram } 46125e8c5aaSvikram 46225e8c5aaSvikram /* 46325e8c5aaSvikram * ctmpl_device_free 46425e8c5aaSvikram * 46525e8c5aaSvikram * The device contract template free entry point. Just 46625e8c5aaSvikram * frees the template. 46725e8c5aaSvikram */ 46825e8c5aaSvikram static void 46925e8c5aaSvikram ctmpl_device_free(struct ct_template *template) 47025e8c5aaSvikram { 47125e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 47225e8c5aaSvikram 47325e8c5aaSvikram if (dtmpl->ctd_minor) 47425e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 47525e8c5aaSvikram 47625e8c5aaSvikram kmem_free(dtmpl, sizeof (ctmpl_device_t)); 47725e8c5aaSvikram } 47825e8c5aaSvikram 47925e8c5aaSvikram /* 48025e8c5aaSvikram * SAFE_EV is the set of events which a non-privileged process is 48125e8c5aaSvikram * allowed to make critical. An unprivileged device contract owner has 48225e8c5aaSvikram * no control over when a device changes state, so all device events 48325e8c5aaSvikram * can be in the critical set. 48425e8c5aaSvikram * 48525e8c5aaSvikram * EXCESS tells us if "value", a critical event set, requires 48625e8c5aaSvikram * additional privilege. For device contracts EXCESS currently 48725e8c5aaSvikram * evaluates to 0. 48825e8c5aaSvikram */ 48925e8c5aaSvikram #define SAFE_EV (CT_DEV_ALLEVENT) 49025e8c5aaSvikram #define EXCESS(value) ((value) & ~SAFE_EV) 49125e8c5aaSvikram 49225e8c5aaSvikram 49325e8c5aaSvikram /* 49425e8c5aaSvikram * ctmpl_device_set 49525e8c5aaSvikram * 49625e8c5aaSvikram * The device contract template set entry point. Sets various terms in the 49725e8c5aaSvikram * template. The non-negotiable term can only be set if the process has 49825e8c5aaSvikram * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 49925e8c5aaSvikram */ 50025e8c5aaSvikram static int 50125e8c5aaSvikram ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) 50225e8c5aaSvikram { 50325e8c5aaSvikram ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 50425e8c5aaSvikram int error; 50525e8c5aaSvikram dev_info_t *dip; 50625e8c5aaSvikram int spec_type; 507*7b209c2cSacruz uint64_t param_value; 508*7b209c2cSacruz char *str_value; 50925e8c5aaSvikram 51025e8c5aaSvikram ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 51125e8c5aaSvikram 512*7b209c2cSacruz if (param->ctpm_id == CTDP_MINOR) { 513*7b209c2cSacruz str_value = (char *)param->ctpm_value; 514*7b209c2cSacruz str_value[param->ctpm_size - 1] = '\0'; 515*7b209c2cSacruz } else { 516*7b209c2cSacruz param_value = *(uint64_t *)param->ctpm_value; 517*7b209c2cSacruz } 518*7b209c2cSacruz 51925e8c5aaSvikram switch (param->ctpm_id) { 52025e8c5aaSvikram case CTDP_ACCEPT: 521*7b209c2cSacruz if (param_value & ~CT_DEV_ALLEVENT) 52225e8c5aaSvikram return (EINVAL); 523*7b209c2cSacruz if (param_value == 0) 52425e8c5aaSvikram return (EINVAL); 525*7b209c2cSacruz if (param_value == CT_DEV_ALLEVENT) 52625e8c5aaSvikram return (EINVAL); 52725e8c5aaSvikram 528*7b209c2cSacruz dtmpl->ctd_aset = param_value; 52925e8c5aaSvikram break; 53025e8c5aaSvikram case CTDP_NONEG: 531*7b209c2cSacruz if (param_value != CTDP_NONEG_SET && 532*7b209c2cSacruz param_value != CTDP_NONEG_CLEAR) 53325e8c5aaSvikram return (EINVAL); 53425e8c5aaSvikram 53525e8c5aaSvikram /* 53625e8c5aaSvikram * only privileged processes can designate a contract 53725e8c5aaSvikram * non-negotiatble. 53825e8c5aaSvikram */ 539*7b209c2cSacruz if (param_value == CTDP_NONEG_SET && 54025e8c5aaSvikram (error = secpolicy_sys_devices(cr)) != 0) { 54125e8c5aaSvikram return (error); 54225e8c5aaSvikram } 54325e8c5aaSvikram 544*7b209c2cSacruz dtmpl->ctd_noneg = param_value; 54525e8c5aaSvikram break; 54625e8c5aaSvikram 54725e8c5aaSvikram case CTDP_MINOR: 548*7b209c2cSacruz if (*str_value != '/' || 549*7b209c2cSacruz strncmp(str_value, "/devices/", 550*7b209c2cSacruz strlen("/devices/")) == 0 || 551*7b209c2cSacruz strstr(str_value, "../devices/") != NULL || 552*7b209c2cSacruz strchr(str_value, ':') == NULL) { 55325e8c5aaSvikram return (EINVAL); 55425e8c5aaSvikram } 55525e8c5aaSvikram 55625e8c5aaSvikram spec_type = 0; 55725e8c5aaSvikram dip = NULL; 558*7b209c2cSacruz if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 55925e8c5aaSvikram return (ERANGE); 56025e8c5aaSvikram } 56125e8c5aaSvikram ddi_release_devi(dip); 56225e8c5aaSvikram 56325e8c5aaSvikram if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 56425e8c5aaSvikram return (EINVAL); 56525e8c5aaSvikram } 56625e8c5aaSvikram 56725e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 56825e8c5aaSvikram kmem_free(dtmpl->ctd_minor, 56925e8c5aaSvikram strlen(dtmpl->ctd_minor) + 1); 57025e8c5aaSvikram } 571*7b209c2cSacruz dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 57225e8c5aaSvikram break; 57325e8c5aaSvikram case CTP_EV_CRITICAL: 57425e8c5aaSvikram /* 57525e8c5aaSvikram * Currently for device contracts, any event 57625e8c5aaSvikram * may be added to the critical set. We retain the 57725e8c5aaSvikram * following code however for future enhancements. 57825e8c5aaSvikram */ 579*7b209c2cSacruz if (EXCESS(param_value) && 58025e8c5aaSvikram (error = secpolicy_contract_event(cr)) != 0) 58125e8c5aaSvikram return (error); 582*7b209c2cSacruz tmpl->ctmpl_ev_crit = param_value; 58325e8c5aaSvikram break; 58425e8c5aaSvikram default: 58525e8c5aaSvikram return (EINVAL); 58625e8c5aaSvikram } 58725e8c5aaSvikram 58825e8c5aaSvikram return (0); 58925e8c5aaSvikram } 59025e8c5aaSvikram 59125e8c5aaSvikram /* 59225e8c5aaSvikram * ctmpl_device_get 59325e8c5aaSvikram * 59425e8c5aaSvikram * The device contract template get entry point. Simply fetches and 59525e8c5aaSvikram * returns the value of the requested term. 59625e8c5aaSvikram */ 59725e8c5aaSvikram static int 59825e8c5aaSvikram ctmpl_device_get(struct ct_template *template, ct_param_t *param) 59925e8c5aaSvikram { 60025e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 601*7b209c2cSacruz uint64_t *param_value = param->ctpm_value; 60225e8c5aaSvikram 60325e8c5aaSvikram ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 60425e8c5aaSvikram 60525e8c5aaSvikram switch (param->ctpm_id) { 60625e8c5aaSvikram case CTDP_ACCEPT: 607*7b209c2cSacruz *param_value = dtmpl->ctd_aset; 60825e8c5aaSvikram break; 60925e8c5aaSvikram case CTDP_NONEG: 610*7b209c2cSacruz *param_value = dtmpl->ctd_noneg; 61125e8c5aaSvikram break; 61225e8c5aaSvikram case CTDP_MINOR: 61325e8c5aaSvikram if (dtmpl->ctd_minor) { 614*7b209c2cSacruz param->ctpm_size = strlcpy((char *)param->ctpm_value, 615*7b209c2cSacruz dtmpl->ctd_minor, param->ctpm_size); 616*7b209c2cSacruz param->ctpm_size++; 61725e8c5aaSvikram } else { 61825e8c5aaSvikram return (ENOENT); 61925e8c5aaSvikram } 62025e8c5aaSvikram break; 62125e8c5aaSvikram default: 62225e8c5aaSvikram return (EINVAL); 62325e8c5aaSvikram } 62425e8c5aaSvikram 62525e8c5aaSvikram return (0); 62625e8c5aaSvikram } 62725e8c5aaSvikram 62825e8c5aaSvikram /* 62925e8c5aaSvikram * Device contract type specific portion of creating a contract using 63025e8c5aaSvikram * a specified template 63125e8c5aaSvikram */ 63225e8c5aaSvikram /*ARGSUSED*/ 63325e8c5aaSvikram int 63425e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 63525e8c5aaSvikram { 63625e8c5aaSvikram ctmpl_device_t *dtmpl; 63725e8c5aaSvikram char *buf; 63825e8c5aaSvikram dev_t dev; 63925e8c5aaSvikram int spec_type; 64025e8c5aaSvikram int error; 64125e8c5aaSvikram cont_device_t *ctd; 64225e8c5aaSvikram 64325e8c5aaSvikram if (ctidp == NULL) 64425e8c5aaSvikram return (EINVAL); 64525e8c5aaSvikram 64625e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 64725e8c5aaSvikram 64825e8c5aaSvikram dtmpl = template->ctmpl_data; 64925e8c5aaSvikram 65025e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 65125e8c5aaSvikram if (dtmpl->ctd_minor == NULL) { 65225e8c5aaSvikram /* incomplete template */ 65325e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 65425e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 65525e8c5aaSvikram return (EINVAL); 65625e8c5aaSvikram } else { 65725e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 65825e8c5aaSvikram bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 65925e8c5aaSvikram } 66025e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 66125e8c5aaSvikram 66225e8c5aaSvikram spec_type = 0; 66325e8c5aaSvikram dev = NODEV; 66425e8c5aaSvikram if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 66525e8c5aaSvikram dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 66625e8c5aaSvikram (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 66725e8c5aaSvikram CT_DEBUG((CE_WARN, 66825e8c5aaSvikram "tmpl_create: failed to find device: %s", buf)); 66925e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 67025e8c5aaSvikram return (ERANGE); 67125e8c5aaSvikram } 67225e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 67325e8c5aaSvikram 67425e8c5aaSvikram ctd = contract_device_create(template->ctmpl_data, 67525e8c5aaSvikram dev, spec_type, curproc, &error); 67625e8c5aaSvikram 67725e8c5aaSvikram if (ctd == NULL) { 67825e8c5aaSvikram CT_DEBUG((CE_WARN, "Failed to create device contract for " 67925e8c5aaSvikram "process (%d) with device (devt = %lu, spec_type = %s)", 68025e8c5aaSvikram curproc->p_pid, dev, 68125e8c5aaSvikram spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 68225e8c5aaSvikram return (error); 68325e8c5aaSvikram } 68425e8c5aaSvikram 68525e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 68625e8c5aaSvikram *ctidp = ctd->cond_contract.ct_id; 68725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 68825e8c5aaSvikram 68925e8c5aaSvikram return (0); 69025e8c5aaSvikram } 69125e8c5aaSvikram 69225e8c5aaSvikram /* 69325e8c5aaSvikram * Device contract specific template entry points 69425e8c5aaSvikram */ 69525e8c5aaSvikram static ctmplops_t ctmpl_device_ops = { 69625e8c5aaSvikram ctmpl_device_dup, /* ctop_dup */ 69725e8c5aaSvikram ctmpl_device_free, /* ctop_free */ 69825e8c5aaSvikram ctmpl_device_set, /* ctop_set */ 69925e8c5aaSvikram ctmpl_device_get, /* ctop_get */ 70025e8c5aaSvikram ctmpl_device_create, /* ctop_create */ 70125e8c5aaSvikram CT_DEV_ALLEVENT /* all device events bitmask */ 70225e8c5aaSvikram }; 70325e8c5aaSvikram 70425e8c5aaSvikram 70525e8c5aaSvikram /* 70625e8c5aaSvikram * Device contract implementation 70725e8c5aaSvikram */ 70825e8c5aaSvikram 70925e8c5aaSvikram /* 71025e8c5aaSvikram * contract_device_default 71125e8c5aaSvikram * 71225e8c5aaSvikram * The device contract default template entry point. Creates a 71325e8c5aaSvikram * device contract template with a default A-set and no "noneg" , 71425e8c5aaSvikram * with informative degrade events and critical offline events. 71525e8c5aaSvikram * There is no default minor path. 71625e8c5aaSvikram */ 71725e8c5aaSvikram static ct_template_t * 71825e8c5aaSvikram contract_device_default(void) 71925e8c5aaSvikram { 72025e8c5aaSvikram ctmpl_device_t *new; 72125e8c5aaSvikram 72225e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 72325e8c5aaSvikram ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 72425e8c5aaSvikram 72525e8c5aaSvikram new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 72625e8c5aaSvikram new->ctd_noneg = 0; 72725e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 72825e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 72925e8c5aaSvikram 73025e8c5aaSvikram return (&new->ctd_ctmpl); 73125e8c5aaSvikram } 73225e8c5aaSvikram 73325e8c5aaSvikram /* 73425e8c5aaSvikram * contract_device_free 73525e8c5aaSvikram * 73625e8c5aaSvikram * Destroys the device contract specific portion of a contract and 73725e8c5aaSvikram * frees the contract. 73825e8c5aaSvikram */ 73925e8c5aaSvikram static void 74025e8c5aaSvikram contract_device_free(contract_t *ct) 74125e8c5aaSvikram { 74225e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 74325e8c5aaSvikram 74425e8c5aaSvikram ASSERT(ctd->cond_minor); 74525e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 74625e8c5aaSvikram kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 74725e8c5aaSvikram 74825e8c5aaSvikram ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 74925e8c5aaSvikram ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 75025e8c5aaSvikram 75125e8c5aaSvikram ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 75225e8c5aaSvikram 75325e8c5aaSvikram ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 75425e8c5aaSvikram ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 75525e8c5aaSvikram 75625e8c5aaSvikram ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 75725e8c5aaSvikram ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 75825e8c5aaSvikram 75925e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 76025e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 76125e8c5aaSvikram 76225e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 76325e8c5aaSvikram 76425e8c5aaSvikram kmem_free(ctd, sizeof (cont_device_t)); 76525e8c5aaSvikram } 76625e8c5aaSvikram 76725e8c5aaSvikram /* 76825e8c5aaSvikram * contract_device_abandon 76925e8c5aaSvikram * 77025e8c5aaSvikram * The device contract abandon entry point. 77125e8c5aaSvikram */ 77225e8c5aaSvikram static void 77325e8c5aaSvikram contract_device_abandon(contract_t *ct) 77425e8c5aaSvikram { 77525e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 77625e8c5aaSvikram 77725e8c5aaSvikram /* 77825e8c5aaSvikram * device contracts cannot be inherited or orphaned. 77925e8c5aaSvikram * Move the contract to the DEAD_STATE. It will be freed 78025e8c5aaSvikram * once all references to it are gone. 78125e8c5aaSvikram */ 78225e8c5aaSvikram contract_destroy(ct); 78325e8c5aaSvikram } 78425e8c5aaSvikram 78525e8c5aaSvikram /* 78625e8c5aaSvikram * contract_device_destroy 78725e8c5aaSvikram * 78825e8c5aaSvikram * The device contract destroy entry point. 78925e8c5aaSvikram * Called from contract_destroy() to do any type specific destroy. Note 79025e8c5aaSvikram * that destroy is a misnomer - this does not free the contract, it only 79125e8c5aaSvikram * moves it to the dead state. A contract is actually freed via 79225e8c5aaSvikram * contract_rele() -> contract_dtor(), contop_free() 79325e8c5aaSvikram */ 79425e8c5aaSvikram static void 79525e8c5aaSvikram contract_device_destroy(contract_t *ct) 79625e8c5aaSvikram { 79725e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 79825e8c5aaSvikram dev_info_t *dip = ctd->cond_dip; 79925e8c5aaSvikram 80025e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 80125e8c5aaSvikram 80225e8c5aaSvikram if (dip == NULL) { 80325e8c5aaSvikram /* 80425e8c5aaSvikram * The dip has been removed, this is a dangling contract 80525e8c5aaSvikram * Check that dip linkages are NULL 80625e8c5aaSvikram */ 80725e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 80825e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 80925e8c5aaSvikram "devinfo node. contract ctid : %d", ct->ct_id)); 81025e8c5aaSvikram return; 81125e8c5aaSvikram } 81225e8c5aaSvikram 81325e8c5aaSvikram /* 81425e8c5aaSvikram * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 81525e8c5aaSvikram */ 81625e8c5aaSvikram mutex_exit(&ct->ct_lock); 81725e8c5aaSvikram 81825e8c5aaSvikram /* 81925e8c5aaSvikram * Waiting for the barrier to be released is strictly speaking not 82025e8c5aaSvikram * necessary. But it simplifies the implementation of 82125e8c5aaSvikram * contract_device_publish() by establishing the invariant that 82225e8c5aaSvikram * device contracts cannot go away during negotiation. 82325e8c5aaSvikram */ 82425e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 82525e8c5aaSvikram ct_barrier_wait_for_release(dip); 82625e8c5aaSvikram mutex_enter(&ct->ct_lock); 82725e8c5aaSvikram 82825e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 82925e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 83025e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 83125e8c5aaSvikram 83225e8c5aaSvikram mutex_exit(&ct->ct_lock); 83325e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 83425e8c5aaSvikram mutex_enter(&ct->ct_lock); 83525e8c5aaSvikram } 83625e8c5aaSvikram 83725e8c5aaSvikram /* 83825e8c5aaSvikram * contract_device_status 83925e8c5aaSvikram * 84025e8c5aaSvikram * The device contract status entry point. Called when level of "detail" 84125e8c5aaSvikram * is either CTD_FIXED or CTD_ALL 84225e8c5aaSvikram * 84325e8c5aaSvikram */ 84425e8c5aaSvikram static void 84525e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 84625e8c5aaSvikram void *status, model_t model) 84725e8c5aaSvikram { 84825e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 84925e8c5aaSvikram 85025e8c5aaSvikram ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 85125e8c5aaSvikram 85225e8c5aaSvikram mutex_enter(&ct->ct_lock); 85325e8c5aaSvikram contract_status_common(ct, zone, status, model); 85425e8c5aaSvikram 85525e8c5aaSvikram /* 85625e8c5aaSvikram * There's no need to hold the contract lock while accessing static 85725e8c5aaSvikram * data like aset or noneg. But since we need the lock to access other 85825e8c5aaSvikram * data like state, we hold it anyway. 85925e8c5aaSvikram */ 86025e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 86125e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 86225e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 86325e8c5aaSvikram 86425e8c5aaSvikram if (detail == CTD_FIXED) { 86525e8c5aaSvikram mutex_exit(&ct->ct_lock); 86625e8c5aaSvikram return; 86725e8c5aaSvikram } 86825e8c5aaSvikram 86925e8c5aaSvikram ASSERT(ctd->cond_minor); 87025e8c5aaSvikram VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 87125e8c5aaSvikram 87225e8c5aaSvikram mutex_exit(&ct->ct_lock); 87325e8c5aaSvikram } 87425e8c5aaSvikram 87525e8c5aaSvikram /* 87625e8c5aaSvikram * Converts a result integer into the corresponding string. Used for printing 87725e8c5aaSvikram * messages 87825e8c5aaSvikram */ 87925e8c5aaSvikram static char * 88025e8c5aaSvikram result_str(uint_t result) 88125e8c5aaSvikram { 88225e8c5aaSvikram switch (result) { 88325e8c5aaSvikram case CT_ACK: 88425e8c5aaSvikram return ("CT_ACK"); 88525e8c5aaSvikram case CT_NACK: 88625e8c5aaSvikram return ("CT_NACK"); 88725e8c5aaSvikram case CT_NONE: 88825e8c5aaSvikram return ("CT_NONE"); 88925e8c5aaSvikram default: 89025e8c5aaSvikram return ("UNKNOWN"); 89125e8c5aaSvikram } 89225e8c5aaSvikram } 89325e8c5aaSvikram 89425e8c5aaSvikram /* 89525e8c5aaSvikram * Converts a device state integer constant into the corresponding string. 89625e8c5aaSvikram * Used to print messages. 89725e8c5aaSvikram */ 89825e8c5aaSvikram static char * 89925e8c5aaSvikram state_str(uint_t state) 90025e8c5aaSvikram { 90125e8c5aaSvikram switch (state) { 90225e8c5aaSvikram case CT_DEV_EV_ONLINE: 90325e8c5aaSvikram return ("ONLINE"); 90425e8c5aaSvikram case CT_DEV_EV_DEGRADED: 90525e8c5aaSvikram return ("DEGRADED"); 90625e8c5aaSvikram case CT_DEV_EV_OFFLINE: 90725e8c5aaSvikram return ("OFFLINE"); 90825e8c5aaSvikram default: 90925e8c5aaSvikram return ("UNKNOWN"); 91025e8c5aaSvikram } 91125e8c5aaSvikram } 91225e8c5aaSvikram 91325e8c5aaSvikram /* 91425e8c5aaSvikram * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 91525e8c5aaSvikram * synchronous state change or not. 91625e8c5aaSvikram */ 91725e8c5aaSvikram static int 91825e8c5aaSvikram is_sync_neg(uint_t old, uint_t new) 91925e8c5aaSvikram { 92025e8c5aaSvikram int i; 92125e8c5aaSvikram 92225e8c5aaSvikram ASSERT(old & CT_DEV_ALLEVENT); 92325e8c5aaSvikram ASSERT(new & CT_DEV_ALLEVENT); 92425e8c5aaSvikram 92525e8c5aaSvikram if (old == new) { 92625e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 92725e8c5aaSvikram state_str(new))); 92825e8c5aaSvikram return (-2); 92925e8c5aaSvikram } 93025e8c5aaSvikram 93125e8c5aaSvikram for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 93225e8c5aaSvikram if (old == ct_dev_negtable[i].st_old && 93325e8c5aaSvikram new == ct_dev_negtable[i].st_new) { 93425e8c5aaSvikram return (ct_dev_negtable[i].st_neg); 93525e8c5aaSvikram } 93625e8c5aaSvikram } 93725e8c5aaSvikram 93825e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 93925e8c5aaSvikram "old = %s -> new = %s", state_str(old), state_str(new))); 94025e8c5aaSvikram 94125e8c5aaSvikram return (-1); 94225e8c5aaSvikram } 94325e8c5aaSvikram 94425e8c5aaSvikram /* 94525e8c5aaSvikram * Used to cleanup cached dv_nodes so that when a device is released by 94625e8c5aaSvikram * a contract holder, its devinfo node can be successfully detached. 94725e8c5aaSvikram */ 94825e8c5aaSvikram static int 94925e8c5aaSvikram contract_device_dvclean(dev_info_t *dip) 95025e8c5aaSvikram { 95125e8c5aaSvikram char *devnm; 95225e8c5aaSvikram dev_info_t *pdip; 95325e8c5aaSvikram int error; 95425e8c5aaSvikram 95525e8c5aaSvikram ASSERT(dip); 95625e8c5aaSvikram 95725e8c5aaSvikram /* pdip can be NULL if we have contracts against the root dip */ 95825e8c5aaSvikram pdip = ddi_get_parent(dip); 95925e8c5aaSvikram 96025e8c5aaSvikram if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 96125e8c5aaSvikram char *path; 96225e8c5aaSvikram 96325e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 96425e8c5aaSvikram (void) ddi_pathname(dip, path); 96525e8c5aaSvikram CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 96625e8c5aaSvikram "device=%s", path)); 96725e8c5aaSvikram kmem_free(path, MAXPATHLEN); 96825e8c5aaSvikram return (EDEADLOCK); 96925e8c5aaSvikram } 97025e8c5aaSvikram 97125e8c5aaSvikram if (pdip) { 97225e8c5aaSvikram devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 97325e8c5aaSvikram (void) ddi_deviname(dip, devnm); 97425e8c5aaSvikram error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 97525e8c5aaSvikram kmem_free(devnm, MAXNAMELEN + 1); 97625e8c5aaSvikram } else { 97725e8c5aaSvikram error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 97825e8c5aaSvikram } 97925e8c5aaSvikram 98025e8c5aaSvikram return (error); 98125e8c5aaSvikram } 98225e8c5aaSvikram 98325e8c5aaSvikram /* 98425e8c5aaSvikram * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 98525e8c5aaSvikram * Results in the ACK or NACK being recorded on the dip for one particular 98625e8c5aaSvikram * contract. The device contracts framework evaluates the ACK/NACKs for all 98725e8c5aaSvikram * contracts against a device to determine if a particular device state change 98825e8c5aaSvikram * should be allowed. 98925e8c5aaSvikram */ 99025e8c5aaSvikram static int 99125e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 99225e8c5aaSvikram uint_t cmd) 99325e8c5aaSvikram { 99425e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 99525e8c5aaSvikram dev_info_t *dip; 99625e8c5aaSvikram ctid_t ctid; 99725e8c5aaSvikram int error; 99825e8c5aaSvikram 99925e8c5aaSvikram ctid = ct->ct_id; 100025e8c5aaSvikram 100125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 100225e8c5aaSvikram 100325e8c5aaSvikram mutex_enter(&ct->ct_lock); 100425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 100525e8c5aaSvikram 100625e8c5aaSvikram dip = ctd->cond_dip; 100725e8c5aaSvikram 100825e8c5aaSvikram ASSERT(ctd->cond_minor); 100925e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 101025e8c5aaSvikram 101125e8c5aaSvikram /* 101225e8c5aaSvikram * Negotiation only if new state is not in A-set 101325e8c5aaSvikram */ 101425e8c5aaSvikram ASSERT(!(ctd->cond_aset & evtype)); 101525e8c5aaSvikram 101625e8c5aaSvikram /* 101725e8c5aaSvikram * Negotiation only if transition is synchronous 101825e8c5aaSvikram */ 101925e8c5aaSvikram ASSERT(is_sync_neg(ctd->cond_state, evtype)); 102025e8c5aaSvikram 102125e8c5aaSvikram /* 102225e8c5aaSvikram * We shouldn't be negotiating if the "noneg" flag is set 102325e8c5aaSvikram */ 102425e8c5aaSvikram ASSERT(!ctd->cond_noneg); 102525e8c5aaSvikram 102625e8c5aaSvikram if (dip) 102725e8c5aaSvikram ndi_hold_devi(dip); 102825e8c5aaSvikram 102925e8c5aaSvikram mutex_exit(&ct->ct_lock); 103025e8c5aaSvikram 103125e8c5aaSvikram /* 103225e8c5aaSvikram * dv_clean only if !NACK and offline state change 103325e8c5aaSvikram */ 103425e8c5aaSvikram if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 103525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 103625e8c5aaSvikram error = contract_device_dvclean(dip); 103725e8c5aaSvikram if (error != 0) { 103825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 103925e8c5aaSvikram ctid)); 104025e8c5aaSvikram ddi_release_devi(dip); 104125e8c5aaSvikram } 104225e8c5aaSvikram } 104325e8c5aaSvikram 104425e8c5aaSvikram mutex_enter(&ct->ct_lock); 104525e8c5aaSvikram 104625e8c5aaSvikram if (dip) 104725e8c5aaSvikram ddi_release_devi(dip); 104825e8c5aaSvikram 104925e8c5aaSvikram if (dip == NULL) { 105025e8c5aaSvikram if (ctd->cond_currev_id != evid) { 105125e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event " 105225e8c5aaSvikram "(type=%s, id=%llu) on removed device", 105325e8c5aaSvikram cmd == CT_NACK ? "N" : "", 105425e8c5aaSvikram state_str(evtype), (unsigned long long)evid)); 105525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 105625e8c5aaSvikram ctid)); 105725e8c5aaSvikram } else { 105825e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 105925e8c5aaSvikram CT_DEBUG((CE_WARN, "contract_ack: no such device: " 106025e8c5aaSvikram "ctid: %d", ctid)); 106125e8c5aaSvikram } 106225e8c5aaSvikram error = (ct->ct_state == CTS_DEAD) ? ESRCH : 106325e8c5aaSvikram ((cmd == CT_NACK) ? ETIMEDOUT : 0); 106425e8c5aaSvikram mutex_exit(&ct->ct_lock); 106525e8c5aaSvikram return (error); 106625e8c5aaSvikram } 106725e8c5aaSvikram 106825e8c5aaSvikram /* 106925e8c5aaSvikram * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 107025e8c5aaSvikram */ 107125e8c5aaSvikram mutex_exit(&ct->ct_lock); 107225e8c5aaSvikram 107325e8c5aaSvikram mutex_enter(&DEVI(dip)->devi_ct_lock); 107425e8c5aaSvikram mutex_enter(&ct->ct_lock); 107525e8c5aaSvikram if (ctd->cond_currev_id != evid) { 107625e8c5aaSvikram char *buf; 107725e8c5aaSvikram mutex_exit(&ct->ct_lock); 107825e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 107925e8c5aaSvikram ndi_hold_devi(dip); 108025e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 108125e8c5aaSvikram (void) ddi_pathname(dip, buf); 108225e8c5aaSvikram ddi_release_devi(dip); 108325e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event" 108425e8c5aaSvikram "(type=%s, id=%llu) on device %s", 108525e8c5aaSvikram cmd == CT_NACK ? "N" : "", 108625e8c5aaSvikram state_str(evtype), (unsigned long long)evid, buf)); 108725e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 108825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 108925e8c5aaSvikram cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 109025e8c5aaSvikram return (cmd == CT_ACK ? 0 : ETIMEDOUT); 109125e8c5aaSvikram } 109225e8c5aaSvikram 109325e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 109425e8c5aaSvikram ASSERT(cmd == CT_ACK || cmd == CT_NACK); 109525e8c5aaSvikram 109625e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 109725e8c5aaSvikram cmd == CT_NACK ? "N" : "", ctid)); 109825e8c5aaSvikram 109925e8c5aaSvikram ctd->cond_currev_ack = cmd; 110025e8c5aaSvikram mutex_exit(&ct->ct_lock); 110125e8c5aaSvikram 110225e8c5aaSvikram ct_barrier_decr(dip); 110325e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 110425e8c5aaSvikram 110525e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 110625e8c5aaSvikram 110725e8c5aaSvikram return (0); 110825e8c5aaSvikram } 110925e8c5aaSvikram 111025e8c5aaSvikram /* 111125e8c5aaSvikram * Invoked when a userland contract holder approves (i.e. ACKs) a state change 111225e8c5aaSvikram */ 111325e8c5aaSvikram static int 111425e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 111525e8c5aaSvikram { 111625e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 111725e8c5aaSvikram } 111825e8c5aaSvikram 111925e8c5aaSvikram /* 112025e8c5aaSvikram * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 112125e8c5aaSvikram */ 112225e8c5aaSvikram static int 112325e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 112425e8c5aaSvikram { 112525e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 112625e8c5aaSvikram } 112725e8c5aaSvikram 112825e8c5aaSvikram /* 112925e8c5aaSvikram * Creates a new contract synchronously with the breaking of an existing 113025e8c5aaSvikram * contract. Currently not supported. 113125e8c5aaSvikram */ 113225e8c5aaSvikram /*ARGSUSED*/ 113325e8c5aaSvikram static int 113425e8c5aaSvikram contract_device_newct(contract_t *ct) 113525e8c5aaSvikram { 113625e8c5aaSvikram return (ENOTSUP); 113725e8c5aaSvikram } 113825e8c5aaSvikram 113925e8c5aaSvikram /* 114025e8c5aaSvikram * Core device contract implementation entry points 114125e8c5aaSvikram */ 114225e8c5aaSvikram static contops_t contract_device_ops = { 114325e8c5aaSvikram contract_device_free, /* contop_free */ 114425e8c5aaSvikram contract_device_abandon, /* contop_abandon */ 114525e8c5aaSvikram contract_device_destroy, /* contop_destroy */ 114625e8c5aaSvikram contract_device_status, /* contop_status */ 114725e8c5aaSvikram contract_device_ack, /* contop_ack */ 114825e8c5aaSvikram contract_device_nack, /* contop_nack */ 114925e8c5aaSvikram contract_qack_notsup, /* contop_qack */ 115025e8c5aaSvikram contract_device_newct /* contop_newct */ 115125e8c5aaSvikram }; 115225e8c5aaSvikram 115325e8c5aaSvikram /* 115425e8c5aaSvikram * contract_device_init 115525e8c5aaSvikram * 115625e8c5aaSvikram * Initializes the device contract type. 115725e8c5aaSvikram */ 115825e8c5aaSvikram void 115925e8c5aaSvikram contract_device_init(void) 116025e8c5aaSvikram { 116125e8c5aaSvikram device_type = contract_type_init(CTT_DEVICE, "device", 116225e8c5aaSvikram &contract_device_ops, contract_device_default); 116325e8c5aaSvikram } 116425e8c5aaSvikram 116525e8c5aaSvikram /* 116625e8c5aaSvikram * contract_device_create 116725e8c5aaSvikram * 116825e8c5aaSvikram * create a device contract given template "tmpl" and the "owner" process. 116925e8c5aaSvikram * May fail and return NULL if project.max-contracts would have been exceeded. 117025e8c5aaSvikram * 117125e8c5aaSvikram * Common device contract creation routine called for both open-time and 117225e8c5aaSvikram * non-open time device contract creation 117325e8c5aaSvikram */ 117425e8c5aaSvikram static cont_device_t * 117525e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 117625e8c5aaSvikram proc_t *owner, int *errorp) 117725e8c5aaSvikram { 117825e8c5aaSvikram cont_device_t *ctd; 117925e8c5aaSvikram char *minor; 118025e8c5aaSvikram char *path; 118125e8c5aaSvikram dev_info_t *dip; 118225e8c5aaSvikram 118325e8c5aaSvikram ASSERT(dtmpl != NULL); 118425e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 118525e8c5aaSvikram ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 118625e8c5aaSvikram ASSERT(errorp); 118725e8c5aaSvikram 118825e8c5aaSvikram *errorp = 0; 118925e8c5aaSvikram 119025e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 119125e8c5aaSvikram 119225e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 119325e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 119425e8c5aaSvikram bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 119525e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 119625e8c5aaSvikram 119725e8c5aaSvikram dip = e_ddi_hold_devi_by_path(path, 0); 119825e8c5aaSvikram if (dip == NULL) { 119925e8c5aaSvikram cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 120025e8c5aaSvikram "for device path (%s)", path); 120125e8c5aaSvikram kmem_free(path, MAXPATHLEN); 120225e8c5aaSvikram *errorp = ERANGE; 120325e8c5aaSvikram return (NULL); 120425e8c5aaSvikram } 120525e8c5aaSvikram 120625e8c5aaSvikram /* 120725e8c5aaSvikram * Lock out any parallel contract negotiations 120825e8c5aaSvikram */ 120925e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 121025e8c5aaSvikram ct_barrier_acquire(dip); 121125e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 121225e8c5aaSvikram 121325e8c5aaSvikram minor = i_ddi_strdup(path, KM_SLEEP); 121425e8c5aaSvikram kmem_free(path, MAXPATHLEN); 121525e8c5aaSvikram 121625e8c5aaSvikram (void) contract_type_pbundle(device_type, owner); 121725e8c5aaSvikram 121825e8c5aaSvikram ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 121925e8c5aaSvikram 122025e8c5aaSvikram /* 122125e8c5aaSvikram * Only we hold a refernce to this contract. Safe to access 122225e8c5aaSvikram * the fields without a ct_lock 122325e8c5aaSvikram */ 122425e8c5aaSvikram ctd->cond_minor = minor; 122525e8c5aaSvikram /* 122625e8c5aaSvikram * It is safe to set the dip pointer in the contract 122725e8c5aaSvikram * as the contract will always be destroyed before the dip 122825e8c5aaSvikram * is released 122925e8c5aaSvikram */ 123025e8c5aaSvikram ctd->cond_dip = dip; 123125e8c5aaSvikram ctd->cond_devt = dev; 123225e8c5aaSvikram ctd->cond_spec = spec_type; 123325e8c5aaSvikram 123425e8c5aaSvikram /* 123525e8c5aaSvikram * Since we are able to lookup the device, it is either 123625e8c5aaSvikram * online or degraded 123725e8c5aaSvikram */ 123825e8c5aaSvikram ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 123925e8c5aaSvikram CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 124025e8c5aaSvikram 124125e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 124225e8c5aaSvikram ctd->cond_aset = dtmpl->ctd_aset; 124325e8c5aaSvikram ctd->cond_noneg = dtmpl->ctd_noneg; 124425e8c5aaSvikram 124525e8c5aaSvikram /* 124625e8c5aaSvikram * contract_ctor() initailizes the common portion of a contract 124725e8c5aaSvikram * contract_dtor() destroys the common portion of a contract 124825e8c5aaSvikram */ 124925e8c5aaSvikram if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 125025e8c5aaSvikram ctd, 0, owner, B_TRUE)) { 125125e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 125225e8c5aaSvikram /* 125325e8c5aaSvikram * contract_device_free() destroys the type specific 125425e8c5aaSvikram * portion of a contract and frees the contract. 125525e8c5aaSvikram * The "minor" path and "cred" is a part of the type specific 125625e8c5aaSvikram * portion of the contract and will be freed by 125725e8c5aaSvikram * contract_device_free() 125825e8c5aaSvikram */ 125925e8c5aaSvikram contract_device_free(&ctd->cond_contract); 126025e8c5aaSvikram 126125e8c5aaSvikram /* release barrier */ 126225e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 126325e8c5aaSvikram ct_barrier_release(dip); 126425e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 126525e8c5aaSvikram 126625e8c5aaSvikram ddi_release_devi(dip); 126725e8c5aaSvikram *errorp = EAGAIN; 126825e8c5aaSvikram return (NULL); 126925e8c5aaSvikram } 127025e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 127125e8c5aaSvikram 127225e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 127325e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 127425e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 127525e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 127625e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 127725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 127825e8c5aaSvikram 127925e8c5aaSvikram /* 128025e8c5aaSvikram * Insert device contract into list hanging off the dip 128125e8c5aaSvikram * Bump up the ref-count on the contract to reflect this 128225e8c5aaSvikram */ 128325e8c5aaSvikram contract_hold(&ctd->cond_contract); 128425e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 128525e8c5aaSvikram list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 128625e8c5aaSvikram 128725e8c5aaSvikram /* release barrier */ 128825e8c5aaSvikram ct_barrier_release(dip); 128925e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 129025e8c5aaSvikram 129125e8c5aaSvikram ddi_release_devi(dip); 129225e8c5aaSvikram 129325e8c5aaSvikram return (ctd); 129425e8c5aaSvikram } 129525e8c5aaSvikram 129625e8c5aaSvikram /* 129725e8c5aaSvikram * Called when a device is successfully opened to create an open-time contract 129825e8c5aaSvikram * i.e. synchronously with a device open. 129925e8c5aaSvikram */ 130025e8c5aaSvikram int 130125e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 130225e8c5aaSvikram { 130325e8c5aaSvikram ctmpl_device_t *dtmpl; 130425e8c5aaSvikram ct_template_t *tmpl; 130525e8c5aaSvikram cont_device_t *ctd; 130625e8c5aaSvikram char *path; 130725e8c5aaSvikram klwp_t *lwp; 130825e8c5aaSvikram int error; 130925e8c5aaSvikram 131025e8c5aaSvikram if (ctpp) 131125e8c5aaSvikram *ctpp = NULL; 131225e8c5aaSvikram 131325e8c5aaSvikram /* 131425e8c5aaSvikram * Check if we are in user-context i.e. if we have an lwp 131525e8c5aaSvikram */ 131625e8c5aaSvikram lwp = ttolwp(curthread); 131725e8c5aaSvikram if (lwp == NULL) { 131825e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 131925e8c5aaSvikram return (0); 132025e8c5aaSvikram } 132125e8c5aaSvikram 132225e8c5aaSvikram tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 132325e8c5aaSvikram if (tmpl == NULL) { 132425e8c5aaSvikram return (0); 132525e8c5aaSvikram } 132625e8c5aaSvikram dtmpl = tmpl->ctmpl_data; 132725e8c5aaSvikram 132825e8c5aaSvikram /* 132925e8c5aaSvikram * If the user set a minor path in the template before an open, 133025e8c5aaSvikram * ignore it. We use the minor path of the actual minor opened. 133125e8c5aaSvikram */ 133225e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 133325e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 133425e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 133525e8c5aaSvikram "ignoring device minor path in active template: %s", 133625e8c5aaSvikram curproc->p_pid, dtmpl->ctd_minor)); 133725e8c5aaSvikram /* 133825e8c5aaSvikram * This is a copy of the actual activated template. 133925e8c5aaSvikram * Safe to make changes such as freeing the minor 134025e8c5aaSvikram * path in the template. 134125e8c5aaSvikram */ 134225e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 134325e8c5aaSvikram dtmpl->ctd_minor = NULL; 134425e8c5aaSvikram } 134525e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 134625e8c5aaSvikram 134725e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 134825e8c5aaSvikram 134925e8c5aaSvikram if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 135025e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 135125e8c5aaSvikram "minor path from dev_t,spec {%lu, %d} for process (%d)", 135225e8c5aaSvikram dev, spec_type, curproc->p_pid)); 135325e8c5aaSvikram ctmpl_free(tmpl); 135425e8c5aaSvikram kmem_free(path, MAXPATHLEN); 135525e8c5aaSvikram return (1); 135625e8c5aaSvikram } 135725e8c5aaSvikram 135825e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 135925e8c5aaSvikram ASSERT(dtmpl->ctd_minor == NULL); 136025e8c5aaSvikram dtmpl->ctd_minor = path; 136125e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 136225e8c5aaSvikram 136325e8c5aaSvikram ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 136425e8c5aaSvikram 136525e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 136625e8c5aaSvikram ASSERT(dtmpl->ctd_minor); 136725e8c5aaSvikram dtmpl->ctd_minor = NULL; 136825e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 136925e8c5aaSvikram ctmpl_free(tmpl); 137025e8c5aaSvikram kmem_free(path, MAXPATHLEN); 137125e8c5aaSvikram 137225e8c5aaSvikram if (ctd == NULL) { 137325e8c5aaSvikram cmn_err(CE_NOTE, "contract_device_open(): Failed to " 137425e8c5aaSvikram "create device contract for process (%d) holding " 137525e8c5aaSvikram "device (devt = %lu, spec_type = %d)", 137625e8c5aaSvikram curproc->p_pid, dev, spec_type); 137725e8c5aaSvikram return (1); 137825e8c5aaSvikram } 137925e8c5aaSvikram 138025e8c5aaSvikram if (ctpp) { 138125e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 138225e8c5aaSvikram *ctpp = &ctd->cond_contract; 138325e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 138425e8c5aaSvikram } 138525e8c5aaSvikram return (0); 138625e8c5aaSvikram } 138725e8c5aaSvikram 138825e8c5aaSvikram /* 138925e8c5aaSvikram * Called during contract negotiation by the device contract framework to wait 139025e8c5aaSvikram * for ACKs or NACKs from contract holders. If all responses are not received 139125e8c5aaSvikram * before a specified timeout, this routine times out. 139225e8c5aaSvikram */ 139325e8c5aaSvikram static uint_t 139425e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 139525e8c5aaSvikram { 139625e8c5aaSvikram cont_device_t *ctd; 139725e8c5aaSvikram int timed_out = 0; 139825e8c5aaSvikram int result = CT_NONE; 139925e8c5aaSvikram int ack; 140025e8c5aaSvikram char *f = "wait_for_acks"; 140125e8c5aaSvikram 140225e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 140325e8c5aaSvikram ASSERT(dip); 140425e8c5aaSvikram ASSERT(evtype & CT_DEV_ALLEVENT); 140525e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 140625e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 140725e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 140825e8c5aaSvikram 140925e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 141025e8c5aaSvikram 141125e8c5aaSvikram if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 141225e8c5aaSvikram /* 141325e8c5aaSvikram * some contract owner(s) didn't respond in time 141425e8c5aaSvikram */ 141525e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 141625e8c5aaSvikram timed_out = 1; 141725e8c5aaSvikram } 141825e8c5aaSvikram 141925e8c5aaSvikram ack = 0; 142025e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 142125e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 142225e8c5aaSvikram 142325e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 142425e8c5aaSvikram 142525e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 142625e8c5aaSvikram 142725e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 142825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 142925e8c5aaSvikram continue; 143025e8c5aaSvikram } 143125e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 143225e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 143325e8c5aaSvikram continue; 143425e8c5aaSvikram } 143525e8c5aaSvikram 143625e8c5aaSvikram /* skip if non-negotiable contract */ 143725e8c5aaSvikram if (ctd->cond_noneg) { 143825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 143925e8c5aaSvikram continue; 144025e8c5aaSvikram } 144125e8c5aaSvikram 144225e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 144325e8c5aaSvikram if (ctd->cond_currev_ack == CT_NACK) { 144425e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 144525e8c5aaSvikram f, (void *)dip)); 144625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 144725e8c5aaSvikram return (CT_NACK); 144825e8c5aaSvikram } else if (ctd->cond_currev_ack == CT_ACK) { 144925e8c5aaSvikram ack = 1; 145025e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 145125e8c5aaSvikram f, (void *)dip)); 145225e8c5aaSvikram } 145325e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 145425e8c5aaSvikram } 145525e8c5aaSvikram 145625e8c5aaSvikram if (ack) { 145725e8c5aaSvikram result = CT_ACK; 145825e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 145925e8c5aaSvikram } else if (timed_out) { 146025e8c5aaSvikram result = CT_NONE; 146125e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 146225e8c5aaSvikram f, (void *)dip)); 146325e8c5aaSvikram } else { 146425e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 146525e8c5aaSvikram f, (void *)dip)); 146625e8c5aaSvikram } 146725e8c5aaSvikram 146825e8c5aaSvikram 146925e8c5aaSvikram return (result); 147025e8c5aaSvikram } 147125e8c5aaSvikram 147225e8c5aaSvikram /* 147325e8c5aaSvikram * Determines the current state of a device (i.e a devinfo node 147425e8c5aaSvikram */ 147525e8c5aaSvikram static int 147625e8c5aaSvikram get_state(dev_info_t *dip) 147725e8c5aaSvikram { 147825e8c5aaSvikram if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 147925e8c5aaSvikram return (CT_DEV_EV_OFFLINE); 148025e8c5aaSvikram else if (DEVI_IS_DEVICE_DEGRADED(dip)) 148125e8c5aaSvikram return (CT_DEV_EV_DEGRADED); 148225e8c5aaSvikram else 148325e8c5aaSvikram return (CT_DEV_EV_ONLINE); 148425e8c5aaSvikram } 148525e8c5aaSvikram 148625e8c5aaSvikram /* 148725e8c5aaSvikram * Sets the current state of a device in a device contract 148825e8c5aaSvikram */ 148925e8c5aaSvikram static void 149025e8c5aaSvikram set_cond_state(dev_info_t *dip) 149125e8c5aaSvikram { 149225e8c5aaSvikram uint_t state = get_state(dip); 149325e8c5aaSvikram cont_device_t *ctd; 149425e8c5aaSvikram 149525e8c5aaSvikram /* verify that barrier is held */ 149625e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 149725e8c5aaSvikram 149825e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 149925e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 150025e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 150125e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 150225e8c5aaSvikram ctd->cond_state = state; 150325e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 150425e8c5aaSvikram } 150525e8c5aaSvikram } 150625e8c5aaSvikram 150725e8c5aaSvikram /* 150825e8c5aaSvikram * Core routine called by event-specific routines when an event occurs. 150925e8c5aaSvikram * Determines if an event should be be published, and if it is to be 151025e8c5aaSvikram * published, whether a negotiation should take place. Also implements 151125e8c5aaSvikram * NEGEND events which publish the final disposition of an event after 151225e8c5aaSvikram * negotiations are complete. 151325e8c5aaSvikram * 151425e8c5aaSvikram * When an event occurs on a minor node, this routine walks the list of 151525e8c5aaSvikram * contracts hanging off a devinfo node and for each contract on the affected 151625e8c5aaSvikram * dip, evaluates the following cases 151725e8c5aaSvikram * 151825e8c5aaSvikram * a. an event that is synchronous, breaks the contract and NONEG not set 151925e8c5aaSvikram * - bumps up the outstanding negotiation counts on the dip 152025e8c5aaSvikram * - marks the dip as undergoing negotiation (devi_ct_neg) 152125e8c5aaSvikram * - event of type CTE_NEG is published 152225e8c5aaSvikram * b. an event that is synchronous, breaks the contract and NONEG is set 152325e8c5aaSvikram * - sets the final result to CT_NACK, event is blocked 152425e8c5aaSvikram * - does not publish an event 152525e8c5aaSvikram * c. event is asynchronous and breaks the contract 152625e8c5aaSvikram * - publishes a critical event irrespect of whether the NONEG 152725e8c5aaSvikram * flag is set, since the contract will be broken and contract 152825e8c5aaSvikram * owner needs to be informed. 152925e8c5aaSvikram * d. No contract breakage but the owner has subscribed to the event 153025e8c5aaSvikram * - publishes the event irrespective of the NONEG event as the 153125e8c5aaSvikram * owner has explicitly subscribed to the event. 153225e8c5aaSvikram * e. NEGEND event 153325e8c5aaSvikram * - publishes a critical event. Should only be doing this if 153425e8c5aaSvikram * if NONEG is not set. 153525e8c5aaSvikram * f. all other events 153625e8c5aaSvikram * - Since a contract is not broken and this event has not been 153725e8c5aaSvikram * subscribed to, this event does not need to be published for 153825e8c5aaSvikram * for this contract. 153925e8c5aaSvikram * 154025e8c5aaSvikram * Once an event is published, what happens next depends on the type of 154125e8c5aaSvikram * event: 154225e8c5aaSvikram * 154325e8c5aaSvikram * a. NEGEND event 154425e8c5aaSvikram * - cleanup all state associated with the preceding negotiation 154525e8c5aaSvikram * and return CT_ACK to the caller of contract_device_publish() 154625e8c5aaSvikram * b. NACKed event 154725e8c5aaSvikram * - One or more contracts had the NONEG term, so the event was 154825e8c5aaSvikram * blocked. Return CT_NACK to the caller. 154925e8c5aaSvikram * c. Negotiated event 155025e8c5aaSvikram * - Call wait_for_acks() to wait for responses from contract 155125e8c5aaSvikram * holders. The end result is either CT_ACK (event is permitted), 155225e8c5aaSvikram * CT_NACK (event is blocked) or CT_NONE (no contract owner) 155325e8c5aaSvikram * responded. This result is returned back to the caller. 155425e8c5aaSvikram * d. All other events 155525e8c5aaSvikram * - If the event was asynchronous (i.e. not negotiated) or 155625e8c5aaSvikram * a contract was not broken return CT_ACK to the caller. 155725e8c5aaSvikram */ 155825e8c5aaSvikram static uint_t 155925e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 156025e8c5aaSvikram uint_t evtype, nvlist_t *tnvl) 156125e8c5aaSvikram { 156225e8c5aaSvikram cont_device_t *ctd; 156325e8c5aaSvikram uint_t result = CT_NONE; 156425e8c5aaSvikram uint64_t evid = 0; 156525e8c5aaSvikram uint64_t nevid = 0; 156625e8c5aaSvikram char *path = NULL; 156725e8c5aaSvikram int negend; 156825e8c5aaSvikram int match; 156925e8c5aaSvikram int sync = 0; 157025e8c5aaSvikram contract_t *ct; 157125e8c5aaSvikram ct_kevent_t *event; 157225e8c5aaSvikram nvlist_t *nvl; 157325e8c5aaSvikram int broken = 0; 157425e8c5aaSvikram 157525e8c5aaSvikram ASSERT(dip); 157625e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 157725e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 157825e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 157925e8c5aaSvikram ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 158025e8c5aaSvikram 158125e8c5aaSvikram /* Is this a synchronous state change ? */ 158225e8c5aaSvikram if (evtype != CT_EV_NEGEND) { 158325e8c5aaSvikram sync = is_sync_neg(get_state(dip), evtype); 158425e8c5aaSvikram /* NOP if unsupported transition */ 158525e8c5aaSvikram if (sync == -2 || sync == -1) { 158625e8c5aaSvikram DEVI(dip)->devi_flags |= DEVI_CT_NOP; 158725e8c5aaSvikram result = (sync == -2) ? CT_ACK : CT_NONE; 158825e8c5aaSvikram goto out; 158925e8c5aaSvikram } 159025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 159125e8c5aaSvikram sync ? "" : " not")); 159225e8c5aaSvikram } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 159325e8c5aaSvikram DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 159425e8c5aaSvikram result = CT_ACK; 159525e8c5aaSvikram goto out; 159625e8c5aaSvikram } 159725e8c5aaSvikram 159825e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 159925e8c5aaSvikram (void) ddi_pathname(dip, path); 160025e8c5aaSvikram 160125e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 160225e8c5aaSvikram 160325e8c5aaSvikram /* 160425e8c5aaSvikram * Negotiation end - set the state of the device in the contract 160525e8c5aaSvikram */ 160625e8c5aaSvikram if (evtype == CT_EV_NEGEND) { 160725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 160825e8c5aaSvikram set_cond_state(dip); 160925e8c5aaSvikram } 161025e8c5aaSvikram 161125e8c5aaSvikram /* 161225e8c5aaSvikram * If this device didn't go through negotiation, don't publish 161325e8c5aaSvikram * a NEGEND event - simply release the barrier to allow other 161425e8c5aaSvikram * device events in. 161525e8c5aaSvikram */ 161625e8c5aaSvikram negend = 0; 161725e8c5aaSvikram if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 161825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 161925e8c5aaSvikram ct_barrier_release(dip); 162025e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 162125e8c5aaSvikram result = CT_ACK; 162225e8c5aaSvikram goto out; 162325e8c5aaSvikram } else if (evtype == CT_EV_NEGEND) { 162425e8c5aaSvikram /* 162525e8c5aaSvikram * There are negotiated contract breakages that 162625e8c5aaSvikram * need a NEGEND event 162725e8c5aaSvikram */ 162825e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 162925e8c5aaSvikram negend = 1; 163025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 163125e8c5aaSvikram } else { 163225e8c5aaSvikram /* 163325e8c5aaSvikram * This is a new event, not a NEGEND event. Wait for previous 163425e8c5aaSvikram * contract events to complete. 163525e8c5aaSvikram */ 163625e8c5aaSvikram ct_barrier_acquire(dip); 163725e8c5aaSvikram } 163825e8c5aaSvikram 163925e8c5aaSvikram 164025e8c5aaSvikram match = 0; 164125e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 164225e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 164325e8c5aaSvikram 164425e8c5aaSvikram ctid_t ctid; 164525e8c5aaSvikram size_t len = strlen(path); 164625e8c5aaSvikram 164725e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 164825e8c5aaSvikram 164925e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 165025e8c5aaSvikram ASSERT(ctd->cond_minor); 165125e8c5aaSvikram ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 165225e8c5aaSvikram ctd->cond_minor[len] == ':'); 165325e8c5aaSvikram 165425e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 165525e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 165625e8c5aaSvikram continue; 165725e8c5aaSvikram } 165825e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 165925e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 166025e8c5aaSvikram continue; 166125e8c5aaSvikram } 166225e8c5aaSvikram 166325e8c5aaSvikram /* We have a matching contract */ 166425e8c5aaSvikram match = 1; 166525e8c5aaSvikram ctid = ctd->cond_contract.ct_id; 166625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 166725e8c5aaSvikram ctid)); 166825e8c5aaSvikram 166925e8c5aaSvikram /* 167025e8c5aaSvikram * There are 4 possible cases 167125e8c5aaSvikram * 1. A contract is broken (dev not in acceptable state) and 167225e8c5aaSvikram * the state change is synchronous - start negotiation 167325e8c5aaSvikram * by sending a CTE_NEG critical event. 167425e8c5aaSvikram * 2. A contract is broken and the state change is 167525e8c5aaSvikram * asynchronous - just send a critical event and 167625e8c5aaSvikram * break the contract. 167725e8c5aaSvikram * 3. Contract is not broken, but consumer has subscribed 167825e8c5aaSvikram * to the event as a critical or informative event 167925e8c5aaSvikram * - just send the appropriate event 168025e8c5aaSvikram * 4. contract waiting for negend event - just send the critical 168125e8c5aaSvikram * NEGEND event. 168225e8c5aaSvikram */ 168325e8c5aaSvikram broken = 0; 168425e8c5aaSvikram if (!negend && !(evtype & ctd->cond_aset)) { 168525e8c5aaSvikram broken = 1; 168625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 168725e8c5aaSvikram ctid)); 168825e8c5aaSvikram } 168925e8c5aaSvikram 169025e8c5aaSvikram /* 169125e8c5aaSvikram * Don't send event if 169225e8c5aaSvikram * - contract is not broken AND 169325e8c5aaSvikram * - contract holder has not subscribed to this event AND 169425e8c5aaSvikram * - contract not waiting for a NEGEND event 169525e8c5aaSvikram */ 169625e8c5aaSvikram if (!broken && !EVSENDP(ctd, evtype) && 169725e8c5aaSvikram !ctd->cond_neg) { 169825e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_publish(): " 169925e8c5aaSvikram "contract (%d): no publish reqd: event %d", 170025e8c5aaSvikram ctd->cond_contract.ct_id, evtype)); 170125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 170225e8c5aaSvikram continue; 170325e8c5aaSvikram } 170425e8c5aaSvikram 170525e8c5aaSvikram /* 170625e8c5aaSvikram * Note: need to kmem_zalloc() the event so mutexes are 170725e8c5aaSvikram * initialized automatically 170825e8c5aaSvikram */ 170925e8c5aaSvikram ct = &ctd->cond_contract; 171025e8c5aaSvikram event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 171125e8c5aaSvikram event->cte_type = evtype; 171225e8c5aaSvikram 171325e8c5aaSvikram if (broken && sync) { 171425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + sync: " 171525e8c5aaSvikram "ctid: %d", ctid)); 171625e8c5aaSvikram ASSERT(!negend); 171725e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 171825e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 171925e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 172025e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 172125e8c5aaSvikram if (ctd->cond_noneg) { 172225e8c5aaSvikram /* Nothing to publish. Event has been blocked */ 172325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 172425e8c5aaSvikram "not publishing blocked ev: ctid: %d", 172525e8c5aaSvikram ctid)); 172625e8c5aaSvikram result = CT_NACK; 172725e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 172825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 172925e8c5aaSvikram continue; 173025e8c5aaSvikram } 173125e8c5aaSvikram event->cte_flags = CTE_NEG; /* critical neg. event */ 173225e8c5aaSvikram ctd->cond_currev_type = event->cte_type; 173325e8c5aaSvikram ct_barrier_incr(dip); 173425e8c5aaSvikram DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 173525e8c5aaSvikram ctd->cond_neg = 1; 173625e8c5aaSvikram } else if (broken && !sync) { 173725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 173825e8c5aaSvikram ctid)); 173925e8c5aaSvikram ASSERT(!negend); 174025e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 174125e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 174225e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 174325e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 174425e8c5aaSvikram event->cte_flags = 0; /* critical event */ 174525e8c5aaSvikram } else if (EVSENDP(ctd, event->cte_type)) { 174625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 174725e8c5aaSvikram ctid)); 174825e8c5aaSvikram ASSERT(!negend); 174925e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 175025e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 175125e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 175225e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 175325e8c5aaSvikram event->cte_flags = EVINFOP(ctd, event->cte_type) ? 175425e8c5aaSvikram CTE_INFO : 0; 175525e8c5aaSvikram } else if (ctd->cond_neg) { 175625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 175725e8c5aaSvikram ASSERT(negend); 175825e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 175925e8c5aaSvikram nevid = ctd->cond_contract.ct_nevent ? 176025e8c5aaSvikram ctd->cond_contract.ct_nevent->cte_id : 0; 176125e8c5aaSvikram ASSERT(ctd->cond_currev_id == nevid); 176225e8c5aaSvikram event->cte_flags = 0; /* NEGEND is always critical */ 176325e8c5aaSvikram ctd->cond_currev_id = 0; 176425e8c5aaSvikram ctd->cond_currev_type = 0; 176525e8c5aaSvikram ctd->cond_currev_ack = 0; 176625e8c5aaSvikram ctd->cond_neg = 0; 176725e8c5aaSvikram } else { 176825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: not publishing event for " 176925e8c5aaSvikram "ctid: %d, evtype: %d", 177025e8c5aaSvikram ctd->cond_contract.ct_id, event->cte_type)); 177125e8c5aaSvikram ASSERT(!negend); 177225e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 177325e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 177425e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 177525e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 177625e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 177725e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 177825e8c5aaSvikram continue; 177925e8c5aaSvikram } 178025e8c5aaSvikram 178125e8c5aaSvikram nvl = NULL; 178225e8c5aaSvikram if (tnvl) { 178325e8c5aaSvikram VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 178425e8c5aaSvikram if (negend) { 178525e8c5aaSvikram int32_t newct = 0; 178625e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 178725e8c5aaSvikram VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 178825e8c5aaSvikram == 0); 178925e8c5aaSvikram VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 179025e8c5aaSvikram &newct) == 0); 179125e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 179225e8c5aaSvikram newct == 1 ? 0 : 179325e8c5aaSvikram ctd->cond_contract.ct_id) == 0); 179425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 179525e8c5aaSvikram "CTS_NEVID: %llu, CTS_NEWCT: %s", 179625e8c5aaSvikram ctid, (unsigned long long)nevid, 179725e8c5aaSvikram newct ? "success" : "failure")); 179825e8c5aaSvikram 179925e8c5aaSvikram } 180025e8c5aaSvikram } 180125e8c5aaSvikram 180225e8c5aaSvikram if (ctd->cond_neg) { 180325e8c5aaSvikram ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 180425e8c5aaSvikram ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 180525e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 180625e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = 180725e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start; 180825e8c5aaSvikram } 180925e8c5aaSvikram 181025e8c5aaSvikram /* 181125e8c5aaSvikram * by holding the dip's devi_ct_lock we ensure that 181225e8c5aaSvikram * all ACK/NACKs are held up until we have finished 181325e8c5aaSvikram * publishing to all contracts. 181425e8c5aaSvikram */ 181525e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 181625e8c5aaSvikram evid = cte_publish_all(ct, event, nvl, NULL); 181725e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 181825e8c5aaSvikram 181925e8c5aaSvikram if (ctd->cond_neg) { 182025e8c5aaSvikram ASSERT(!negend); 182125e8c5aaSvikram ASSERT(broken); 182225e8c5aaSvikram ASSERT(sync); 182325e8c5aaSvikram ASSERT(!ctd->cond_noneg); 182425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 182525e8c5aaSvikram ": %d", ctid)); 182625e8c5aaSvikram ctd->cond_currev_id = evid; 182725e8c5aaSvikram } else if (negend) { 182825e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 182925e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 183025e8c5aaSvikram } 183125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 183225e8c5aaSvikram } 183325e8c5aaSvikram 183425e8c5aaSvikram /* 183525e8c5aaSvikram * If "negend" set counter back to initial state (-1) so that 183625e8c5aaSvikram * other events can be published. Also clear the negotiation flag 183725e8c5aaSvikram * on dip. 183825e8c5aaSvikram * 183925e8c5aaSvikram * 0 .. n are used for counting. 184025e8c5aaSvikram * -1 indicates counter is available for use. 184125e8c5aaSvikram */ 184225e8c5aaSvikram if (negend) { 184325e8c5aaSvikram /* 184425e8c5aaSvikram * devi_ct_count not necessarily 0. We may have 184525e8c5aaSvikram * timed out in which case, count will be non-zero. 184625e8c5aaSvikram */ 184725e8c5aaSvikram ct_barrier_release(dip); 184825e8c5aaSvikram DEVI(dip)->devi_ct_neg = 0; 184925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 185025e8c5aaSvikram (void *)dip)); 185125e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 185225e8c5aaSvikram ASSERT(match); 185325e8c5aaSvikram ASSERT(!ct_barrier_empty(dip)); 185425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 185525e8c5aaSvikram DEVI(dip)->devi_ct_count, (void *)dip)); 185625e8c5aaSvikram } else { 185725e8c5aaSvikram /* 185825e8c5aaSvikram * for non-negotiated events or subscribed events or no 185925e8c5aaSvikram * matching contracts 186025e8c5aaSvikram */ 186125e8c5aaSvikram ASSERT(ct_barrier_empty(dip)); 186225e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_neg == 0); 186325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 186425e8c5aaSvikram "dip=%p", (void *)dip)); 186525e8c5aaSvikram 186625e8c5aaSvikram /* 186725e8c5aaSvikram * only this function when called from contract_device_negend() 186825e8c5aaSvikram * can reset the counter to READY state i.e. -1. This function 186925e8c5aaSvikram * is so called for every event whether a NEGEND event is needed 187025e8c5aaSvikram * or not, but the negend event is only published if the event 187125e8c5aaSvikram * whose end they signal is a negotiated event for the contract. 187225e8c5aaSvikram */ 187325e8c5aaSvikram } 187425e8c5aaSvikram 187525e8c5aaSvikram if (!match) { 187625e8c5aaSvikram /* No matching contracts */ 187725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: No matching contract")); 187825e8c5aaSvikram result = CT_NONE; 187925e8c5aaSvikram } else if (result == CT_NACK) { 188025e8c5aaSvikram /* a non-negotiable contract exists and this is a neg. event */ 188125e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 188225e8c5aaSvikram (void) wait_for_acks(dip, dev, spec_type, evtype); 188325e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 188425e8c5aaSvikram /* one or more contracts going through negotations */ 188525e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 188625e8c5aaSvikram result = wait_for_acks(dip, dev, spec_type, evtype); 188725e8c5aaSvikram } else { 188825e8c5aaSvikram /* no negotiated contracts or no broken contracts or NEGEND */ 188925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 189025e8c5aaSvikram result = CT_ACK; 189125e8c5aaSvikram } 189225e8c5aaSvikram 189325e8c5aaSvikram /* 189425e8c5aaSvikram * Release the lock only now so that the only point where we 189525e8c5aaSvikram * drop the lock is in wait_for_acks(). This is so that we don't 189625e8c5aaSvikram * miss cv_signal/cv_broadcast from contract holders 189725e8c5aaSvikram */ 189825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 189925e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 190025e8c5aaSvikram 190125e8c5aaSvikram out: 190225e8c5aaSvikram if (tnvl) 190325e8c5aaSvikram nvlist_free(tnvl); 190425e8c5aaSvikram if (path) 190525e8c5aaSvikram kmem_free(path, MAXPATHLEN); 190625e8c5aaSvikram 190725e8c5aaSvikram 190825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 190925e8c5aaSvikram return (result); 191025e8c5aaSvikram } 191125e8c5aaSvikram 191225e8c5aaSvikram 191325e8c5aaSvikram /* 191425e8c5aaSvikram * contract_device_offline 191525e8c5aaSvikram * 191625e8c5aaSvikram * Event publishing routine called by I/O framework when a device is offlined. 191725e8c5aaSvikram */ 191825e8c5aaSvikram ct_ack_t 191925e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 192025e8c5aaSvikram { 192125e8c5aaSvikram nvlist_t *nvl; 192225e8c5aaSvikram uint_t result; 192325e8c5aaSvikram uint_t evtype; 192425e8c5aaSvikram 192525e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 192625e8c5aaSvikram 192725e8c5aaSvikram evtype = CT_DEV_EV_OFFLINE; 192825e8c5aaSvikram result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 192925e8c5aaSvikram 193025e8c5aaSvikram /* 193125e8c5aaSvikram * If a contract offline is NACKED, the framework expects us to call 193225e8c5aaSvikram * NEGEND ourselves, since we know the final result 193325e8c5aaSvikram */ 193425e8c5aaSvikram if (result == CT_NACK) { 193525e8c5aaSvikram contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 193625e8c5aaSvikram } 193725e8c5aaSvikram 193825e8c5aaSvikram return (result); 193925e8c5aaSvikram } 194025e8c5aaSvikram 194125e8c5aaSvikram /* 194225e8c5aaSvikram * contract_device_degrade 194325e8c5aaSvikram * 194425e8c5aaSvikram * Event publishing routine called by I/O framework when a device 194525e8c5aaSvikram * moves to degrade state. 194625e8c5aaSvikram */ 194725e8c5aaSvikram /*ARGSUSED*/ 194825e8c5aaSvikram void 194925e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 195025e8c5aaSvikram { 195125e8c5aaSvikram nvlist_t *nvl; 195225e8c5aaSvikram uint_t evtype; 195325e8c5aaSvikram 195425e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 195525e8c5aaSvikram 195625e8c5aaSvikram evtype = CT_DEV_EV_DEGRADED; 195725e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 195825e8c5aaSvikram } 195925e8c5aaSvikram 196025e8c5aaSvikram /* 196125e8c5aaSvikram * contract_device_undegrade 196225e8c5aaSvikram * 196325e8c5aaSvikram * Event publishing routine called by I/O framework when a device 196425e8c5aaSvikram * moves from degraded state to online state. 196525e8c5aaSvikram */ 196625e8c5aaSvikram /*ARGSUSED*/ 196725e8c5aaSvikram void 196825e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 196925e8c5aaSvikram { 197025e8c5aaSvikram nvlist_t *nvl; 197125e8c5aaSvikram uint_t evtype; 197225e8c5aaSvikram 197325e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 197425e8c5aaSvikram 197525e8c5aaSvikram evtype = CT_DEV_EV_ONLINE; 197625e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 197725e8c5aaSvikram } 197825e8c5aaSvikram 197925e8c5aaSvikram /* 198025e8c5aaSvikram * For all contracts which have undergone a negotiation (because the device 198125e8c5aaSvikram * moved out of the acceptable state for that contract and the state 198225e8c5aaSvikram * change is synchronous i.e. requires negotiation) this routine publishes 198325e8c5aaSvikram * a CT_EV_NEGEND event with the final disposition of the event. 198425e8c5aaSvikram * 198525e8c5aaSvikram * This event is always a critical event. 198625e8c5aaSvikram */ 198725e8c5aaSvikram void 198825e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 198925e8c5aaSvikram { 199025e8c5aaSvikram nvlist_t *nvl; 199125e8c5aaSvikram uint_t evtype; 199225e8c5aaSvikram 199325e8c5aaSvikram ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 199425e8c5aaSvikram 199525e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 199625e8c5aaSvikram "dip: %p", result, (void *)dip)); 199725e8c5aaSvikram 199825e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 199925e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 200025e8c5aaSvikram result == CT_EV_SUCCESS ? 1 : 0) == 0); 200125e8c5aaSvikram 200225e8c5aaSvikram evtype = CT_EV_NEGEND; 200325e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 200425e8c5aaSvikram 200525e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 200625e8c5aaSvikram (void *)dip)); 200725e8c5aaSvikram } 200825e8c5aaSvikram 200925e8c5aaSvikram /* 201025e8c5aaSvikram * Wrapper routine called by other subsystems (such as LDI) to start 201125e8c5aaSvikram * negotiations when a synchronous device state change occurs. 201225e8c5aaSvikram * Returns CT_ACK or CT_NACK. 201325e8c5aaSvikram */ 201425e8c5aaSvikram ct_ack_t 201525e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 201625e8c5aaSvikram uint_t evtype) 201725e8c5aaSvikram { 201825e8c5aaSvikram int result; 201925e8c5aaSvikram 202025e8c5aaSvikram ASSERT(dip); 202125e8c5aaSvikram ASSERT(dev != NODEV); 202225e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 202325e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 202425e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 202525e8c5aaSvikram 202625e8c5aaSvikram switch (evtype) { 202725e8c5aaSvikram case CT_DEV_EV_OFFLINE: 202825e8c5aaSvikram result = contract_device_offline(dip, dev, spec_type); 202925e8c5aaSvikram break; 203025e8c5aaSvikram default: 203125e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 203225e8c5aaSvikram "not supported: event (%d) for dev_t (%lu) and spec (%d), " 203325e8c5aaSvikram "dip (%p)", evtype, dev, spec_type, (void *)dip); 203425e8c5aaSvikram result = CT_NACK; 203525e8c5aaSvikram break; 203625e8c5aaSvikram } 203725e8c5aaSvikram 203825e8c5aaSvikram return (result); 203925e8c5aaSvikram } 204025e8c5aaSvikram 204125e8c5aaSvikram /* 204225e8c5aaSvikram * A wrapper routine called by other subsystems (such as the LDI) to 204325e8c5aaSvikram * finalize event processing for a state change event. For synchronous 204425e8c5aaSvikram * state changes, this publishes NEGEND events. For asynchronous i.e. 204525e8c5aaSvikram * non-negotiable events this publishes the event. 204625e8c5aaSvikram */ 204725e8c5aaSvikram void 204825e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 204925e8c5aaSvikram uint_t evtype, int ct_result) 205025e8c5aaSvikram { 205125e8c5aaSvikram ASSERT(dip); 205225e8c5aaSvikram ASSERT(dev != NODEV); 205325e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 205425e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 205525e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 205625e8c5aaSvikram 205725e8c5aaSvikram switch (evtype) { 205825e8c5aaSvikram case CT_DEV_EV_OFFLINE: 205925e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 206025e8c5aaSvikram break; 206125e8c5aaSvikram case CT_DEV_EV_DEGRADED: 206225e8c5aaSvikram contract_device_degrade(dip, dev, spec_type); 206325e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 206425e8c5aaSvikram break; 206525e8c5aaSvikram case CT_DEV_EV_ONLINE: 206625e8c5aaSvikram contract_device_undegrade(dip, dev, spec_type); 206725e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 206825e8c5aaSvikram break; 206925e8c5aaSvikram default: 207025e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 207125e8c5aaSvikram "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 207225e8c5aaSvikram evtype, dev, spec_type, (void *)dip); 207325e8c5aaSvikram break; 207425e8c5aaSvikram } 207525e8c5aaSvikram } 207625e8c5aaSvikram 207725e8c5aaSvikram /* 207825e8c5aaSvikram * Called by I/O framework when a devinfo node is freed to remove the 207925e8c5aaSvikram * association between a devinfo node and its contracts. 208025e8c5aaSvikram */ 208125e8c5aaSvikram void 208225e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip) 208325e8c5aaSvikram { 208425e8c5aaSvikram cont_device_t *ctd; 208525e8c5aaSvikram cont_device_t *next; 208625e8c5aaSvikram contract_t *ct; 208725e8c5aaSvikram 208825e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 208925e8c5aaSvikram ct_barrier_wait_for_release(dip); 209025e8c5aaSvikram 209125e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 209225e8c5aaSvikram next = list_next(&(DEVI(dip)->devi_ct), ctd); 209325e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 209425e8c5aaSvikram ct = &ctd->cond_contract; 209525e8c5aaSvikram /* 209625e8c5aaSvikram * Unlink the dip associated with this contract 209725e8c5aaSvikram */ 209825e8c5aaSvikram mutex_enter(&ct->ct_lock); 209925e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 210025e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 210125e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 210225e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 210325e8c5aaSvikram "ctid: %d", ct->ct_id)); 210425e8c5aaSvikram mutex_exit(&ct->ct_lock); 210525e8c5aaSvikram } 210625e8c5aaSvikram ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 210725e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 210825e8c5aaSvikram } 210925e8c5aaSvikram 211025e8c5aaSvikram /* 211125e8c5aaSvikram * Barrier related routines 211225e8c5aaSvikram */ 211325e8c5aaSvikram static void 211425e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip) 211525e8c5aaSvikram { 211625e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 211725e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 211825e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 211925e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 212025e8c5aaSvikram DEVI(dip)->devi_ct_count = 0; 212125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 212225e8c5aaSvikram } 212325e8c5aaSvikram 212425e8c5aaSvikram static void 212525e8c5aaSvikram ct_barrier_release(dev_info_t *dip) 212625e8c5aaSvikram { 212725e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 212825e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 212925e8c5aaSvikram DEVI(dip)->devi_ct_count = -1; 213025e8c5aaSvikram cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 213125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 213225e8c5aaSvikram } 213325e8c5aaSvikram 213425e8c5aaSvikram static int 213525e8c5aaSvikram ct_barrier_held(dev_info_t *dip) 213625e8c5aaSvikram { 213725e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 213825e8c5aaSvikram return (DEVI(dip)->devi_ct_count != -1); 213925e8c5aaSvikram } 214025e8c5aaSvikram 214125e8c5aaSvikram static int 214225e8c5aaSvikram ct_barrier_empty(dev_info_t *dip) 214325e8c5aaSvikram { 214425e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 214525e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 214625e8c5aaSvikram return (DEVI(dip)->devi_ct_count == 0); 214725e8c5aaSvikram } 214825e8c5aaSvikram 214925e8c5aaSvikram static void 215025e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip) 215125e8c5aaSvikram { 215225e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 215325e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 215425e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 215525e8c5aaSvikram } 215625e8c5aaSvikram 215725e8c5aaSvikram static void 215825e8c5aaSvikram ct_barrier_decr(dev_info_t *dip) 215925e8c5aaSvikram { 216025e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 216125e8c5aaSvikram DEVI(dip)->devi_ct_count)); 216225e8c5aaSvikram 216325e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 216425e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count > 0); 216525e8c5aaSvikram 216625e8c5aaSvikram DEVI(dip)->devi_ct_count--; 216725e8c5aaSvikram if (DEVI(dip)->devi_ct_count == 0) { 216825e8c5aaSvikram cv_broadcast(&DEVI(dip)->devi_ct_cv); 216925e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 217025e8c5aaSvikram } 217125e8c5aaSvikram } 217225e8c5aaSvikram 217325e8c5aaSvikram static void 217425e8c5aaSvikram ct_barrier_incr(dev_info_t *dip) 217525e8c5aaSvikram { 217625e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 217725e8c5aaSvikram DEVI(dip)->devi_ct_count++; 217825e8c5aaSvikram } 217925e8c5aaSvikram 218025e8c5aaSvikram static int 218125e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 218225e8c5aaSvikram { 218325e8c5aaSvikram clock_t abstime; 218425e8c5aaSvikram 218525e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 218625e8c5aaSvikram 218725e8c5aaSvikram abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 218825e8c5aaSvikram while (DEVI(dip)->devi_ct_count) { 218925e8c5aaSvikram if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 219025e8c5aaSvikram &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 219125e8c5aaSvikram return (-1); 219225e8c5aaSvikram } 219325e8c5aaSvikram } 219425e8c5aaSvikram return (0); 219525e8c5aaSvikram } 2196