125e8c5aaSvikram /* 225e8c5aaSvikram * CDDL HEADER START 325e8c5aaSvikram * 425e8c5aaSvikram * The contents of this file are subject to the terms of the 525e8c5aaSvikram * Common Development and Distribution License (the "License"). 625e8c5aaSvikram * You may not use this file except in compliance with the License. 725e8c5aaSvikram * 825e8c5aaSvikram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925e8c5aaSvikram * or http://www.opensolaris.org/os/licensing. 1025e8c5aaSvikram * See the License for the specific language governing permissions 1125e8c5aaSvikram * and limitations under the License. 1225e8c5aaSvikram * 1325e8c5aaSvikram * When distributing Covered Code, include this CDDL HEADER in each 1425e8c5aaSvikram * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525e8c5aaSvikram * If applicable, add the following below this CDDL HEADER, with the 1625e8c5aaSvikram * fields enclosed by brackets "[]" replaced with your own identifying 1725e8c5aaSvikram * information: Portions Copyright [yyyy] [name of copyright owner] 1825e8c5aaSvikram * 1925e8c5aaSvikram * CDDL HEADER END 2025e8c5aaSvikram */ 2125e8c5aaSvikram /* 222eb07f5eSStephen Hanson * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 2325e8c5aaSvikram */ 2425e8c5aaSvikram 2525e8c5aaSvikram #include <sys/mutex.h> 2625e8c5aaSvikram #include <sys/debug.h> 2725e8c5aaSvikram #include <sys/types.h> 2825e8c5aaSvikram #include <sys/param.h> 2925e8c5aaSvikram #include <sys/kmem.h> 3025e8c5aaSvikram #include <sys/thread.h> 3125e8c5aaSvikram #include <sys/id_space.h> 3225e8c5aaSvikram #include <sys/avl.h> 3325e8c5aaSvikram #include <sys/list.h> 3425e8c5aaSvikram #include <sys/sysmacros.h> 3525e8c5aaSvikram #include <sys/proc.h> 3625e8c5aaSvikram #include <sys/contract.h> 3725e8c5aaSvikram #include <sys/contract_impl.h> 3825e8c5aaSvikram #include <sys/contract/device.h> 3925e8c5aaSvikram #include <sys/contract/device_impl.h> 4025e8c5aaSvikram #include <sys/cmn_err.h> 4125e8c5aaSvikram #include <sys/nvpair.h> 4225e8c5aaSvikram #include <sys/policy.h> 4325e8c5aaSvikram #include <sys/ddi_impldefs.h> 4425e8c5aaSvikram #include <sys/ddi_implfuncs.h> 4525e8c5aaSvikram #include <sys/systm.h> 4625e8c5aaSvikram #include <sys/stat.h> 4725e8c5aaSvikram #include <sys/sunddi.h> 4825e8c5aaSvikram #include <sys/esunddi.h> 4925e8c5aaSvikram #include <sys/ddi.h> 5025e8c5aaSvikram #include <sys/fs/dv_node.h> 5125e8c5aaSvikram #include <sys/sunndi.h> 5225e8c5aaSvikram #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 5325e8c5aaSvikram 5425e8c5aaSvikram /* 5525e8c5aaSvikram * Device Contracts 5625e8c5aaSvikram * ----------------- 5725e8c5aaSvikram * This file contains the core code for the device contracts framework. 5825e8c5aaSvikram * A device contract is an agreement or a contract between a process and 5925e8c5aaSvikram * the kernel regarding the state of the device. A device contract may be 6025e8c5aaSvikram * created when a relationship is formed between a device and a process 6125e8c5aaSvikram * i.e. at open(2) time, or it may be created at some point after the device 6225e8c5aaSvikram * has been opened. A device contract once formed may be broken by either party. 6325e8c5aaSvikram * A device contract can be broken by the process by an explicit abandon of the 6425e8c5aaSvikram * contract or by an implicit abandon when the process exits. A device contract 6525e8c5aaSvikram * can be broken by the kernel either asynchronously (without negotiation) or 6625e8c5aaSvikram * synchronously (with negotiation). Exactly which happens depends on the device 6725e8c5aaSvikram * state transition. The following state diagram shows the transitions between 6825e8c5aaSvikram * device states. Only device state transitions currently supported by device 6925e8c5aaSvikram * contracts is shown. 7025e8c5aaSvikram * 7125e8c5aaSvikram * <-- A --> 7225e8c5aaSvikram * /-----------------> DEGRADED 7325e8c5aaSvikram * | | 7425e8c5aaSvikram * | | 7525e8c5aaSvikram * | | S 7625e8c5aaSvikram * | | | 7725e8c5aaSvikram * | | v 7825e8c5aaSvikram * v S --> v 7925e8c5aaSvikram * ONLINE ------------> OFFLINE 8025e8c5aaSvikram * 8125e8c5aaSvikram * 8225e8c5aaSvikram * In the figure above, the arrows indicate the direction of transition. The 8325e8c5aaSvikram * letter S refers to transitions which are inherently synchronous i.e. 8425e8c5aaSvikram * require negotiation and the letter A indicates transitions which are 8525e8c5aaSvikram * asynchronous i.e. are done without contract negotiations. A good example 8625e8c5aaSvikram * of a synchronous transition is the ONLINE -> OFFLINE transition. This 8725e8c5aaSvikram * transition cannot happen as long as there are consumers which have the 8825e8c5aaSvikram * device open. Thus some form of negotiation needs to happen between the 8925e8c5aaSvikram * consumers and the kernel to ensure that consumers either close devices 9025e8c5aaSvikram * or disallow the move to OFFLINE. Certain other transitions such as 9125e8c5aaSvikram * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 9225e8c5aaSvikram * non-negotiable. A device that suffers a fault that degrades its 9325e8c5aaSvikram * capabilities will become degraded irrespective of what consumers it has, 9425e8c5aaSvikram * so a negotiation in this case is pointless. 9525e8c5aaSvikram * 9625e8c5aaSvikram * The following device states are currently defined for device contracts: 9725e8c5aaSvikram * 9825e8c5aaSvikram * CT_DEV_EV_ONLINE 9925e8c5aaSvikram * The device is online and functioning normally 10025e8c5aaSvikram * CT_DEV_EV_DEGRADED 10125e8c5aaSvikram * The device is online but is functioning in a degraded capacity 10225e8c5aaSvikram * CT_DEV_EV_OFFLINE 10325e8c5aaSvikram * The device is offline and is no longer configured 10425e8c5aaSvikram * 10525e8c5aaSvikram * A typical consumer of device contracts starts out with a contract 10625e8c5aaSvikram * template and adds terms to that template. These include the 10725e8c5aaSvikram * "acceptable set" (A-set) term, which is a bitset of device states which 10825e8c5aaSvikram * are guaranteed by the contract. If the device moves out of a state in 10925e8c5aaSvikram * the A-set, the contract is broken. The breaking of the contract can 11025e8c5aaSvikram * be asynchronous in which case a critical contract event is sent to the 11125e8c5aaSvikram * contract holder but no negotiations take place. If the breaking of the 11225e8c5aaSvikram * contract is synchronous, negotations are opened between the affected 11325e8c5aaSvikram * consumer and the kernel. The kernel does this by sending a critical 11425e8c5aaSvikram * event to the consumer with the CTE_NEG flag set indicating that this 11525e8c5aaSvikram * is a negotiation event. The consumer can accept this change by sending 11625e8c5aaSvikram * a ACK message to the kernel. Alternatively, if it has the necessary 11725e8c5aaSvikram * privileges, it can send a NACK message to the kernel which will block 11825e8c5aaSvikram * the device state change. To NACK a negotiable event, a process must 11925e8c5aaSvikram * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 12025e8c5aaSvikram * 12125e8c5aaSvikram * Other terms include the "minor path" term, specified explicitly if the 12225e8c5aaSvikram * contract is not being created at open(2) time or specified implicitly 12325e8c5aaSvikram * if the contract is being created at open time via an activated template. 12425e8c5aaSvikram * 12525e8c5aaSvikram * A contract event is sent on any state change to which the contract 12625e8c5aaSvikram * owner has subscribed via the informative or critical event sets. Only 12725e8c5aaSvikram * critical events are guaranteed to be delivered. Since all device state 12825e8c5aaSvikram * changes are controlled by the kernel and cannot be arbitrarily generated 12925e8c5aaSvikram * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 13025e8c5aaSvikram * need to be asserted in a process's effective set to designate an event as 13125e8c5aaSvikram * critical. To ensure privacy, a process must either have the same effective 13225e8c5aaSvikram * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 13325e8c5aaSvikram * asserted in its effective set in order to observe device contract events 13425e8c5aaSvikram * off the device contract type specific endpoint. 13525e8c5aaSvikram * 13625e8c5aaSvikram * Yet another term available with device contracts is the "non-negotiable" 13725e8c5aaSvikram * term. This term is used to pre-specify a NACK to any contract negotiation. 13825e8c5aaSvikram * This term is ignored for asynchronous state changes. For example, a 13925e8c5aaSvikram * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 14025e8c5aaSvikram * non-negotiable. In this case, the device contract framework assumes a 14125e8c5aaSvikram * NACK for any transition to OFFLINE and blocks the offline. If the A-set 14225e8c5aaSvikram * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 14325e8c5aaSvikram * are NACKed but transitions to DEGRADE succeed. 14425e8c5aaSvikram * 14525e8c5aaSvikram * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 14625e8c5aaSvikram * happens just before the I/O framework attempts to offline a device 14725e8c5aaSvikram * (i.e. detach a device and set the offline flag so that it cannot be 14825e8c5aaSvikram * reattached). A device contract holder is expected to either NACK the offline 14925e8c5aaSvikram * (if privileged) or release the device and allow the offline to proceed. 15025e8c5aaSvikram * 15125e8c5aaSvikram * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 15225e8c5aaSvikram * is generated just before the I/O framework transitions the device state 15325e8c5aaSvikram * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 15425e8c5aaSvikram * 15525e8c5aaSvikram * The contract holder is expected to ACK or NACK a negotiation event 15625e8c5aaSvikram * within a certain period of time. If the ACK/NACK is not received 15725e8c5aaSvikram * within the timeout period, the device contract framework will behave 15825e8c5aaSvikram * as if the contract does not exist and will proceed with the event. 15925e8c5aaSvikram * 16025e8c5aaSvikram * Unlike a process contract a device contract does not need to exist 16125e8c5aaSvikram * once it is abandoned, since it does not define a fault boundary. It 16225e8c5aaSvikram * merely represents an agreement between a process and the kernel 16325e8c5aaSvikram * regarding the state of the device. Once the process has abandoned 16425e8c5aaSvikram * the contract (either implicitly via a process exit or explicitly) 16525e8c5aaSvikram * the kernel has no reason to retain the contract. As a result 16625e8c5aaSvikram * device contracts are neither inheritable nor need to exist in an 16725e8c5aaSvikram * orphan state. 16825e8c5aaSvikram * 16925e8c5aaSvikram * A device unlike a process may exist in multiple contracts and has 17025e8c5aaSvikram * a "life" outside a device contract. A device unlike a process 17125e8c5aaSvikram * may exist without an associated contract. Unlike a process contract 17225e8c5aaSvikram * a device contract may be formed after a binding relationship is 17325e8c5aaSvikram * formed between a process and a device. 17425e8c5aaSvikram * 17525e8c5aaSvikram * IMPLEMENTATION NOTES 17625e8c5aaSvikram * ==================== 17725e8c5aaSvikram * DATA STRUCTURES 17825e8c5aaSvikram * ---------------- 17925e8c5aaSvikram * The heart of the device contracts implementation is the device contract 18025e8c5aaSvikram * private cont_device_t (or ctd for short) data structure. It encapsulates 18125e8c5aaSvikram * the generic contract_t data structure and has a number of private 18225e8c5aaSvikram * fields. 18325e8c5aaSvikram * These include: 18425e8c5aaSvikram * cond_minor: The minor device that is the subject of the contract 18525e8c5aaSvikram * cond_aset: The bitset of states which are guaranteed by the 18625e8c5aaSvikram * contract 18725e8c5aaSvikram * cond_noneg: If set, indicates that the result of negotiation has 18825e8c5aaSvikram * been predefined to be a NACK 18925e8c5aaSvikram * In addition, there are other device identifiers such the devinfo node, 19025e8c5aaSvikram * dev_t and spec_type of the minor node. There are also a few fields that 19125e8c5aaSvikram * are used during negotiation to maintain state. See 19225e8c5aaSvikram * uts/common/sys/contract/device_impl.h 19325e8c5aaSvikram * for details. 19425e8c5aaSvikram * The ctd structure represents the device private part of a contract of 19525e8c5aaSvikram * type "device" 19625e8c5aaSvikram * 19725e8c5aaSvikram * Another data structure used by device contracts is ctmpl_device. It is 19825e8c5aaSvikram * the device contracts private part of the contract template structure. It 19925e8c5aaSvikram * encapsulates the generic template structure "ct_template_t" and includes 20025e8c5aaSvikram * the following device contract specific fields 20125e8c5aaSvikram * ctd_aset: The bitset of states that should be guaranteed by a 20225e8c5aaSvikram * contract 20325e8c5aaSvikram * ctd_noneg: If set, indicates that contract should NACK a 20425e8c5aaSvikram * negotiation 20525e8c5aaSvikram * ctd_minor: The devfs_path (without the /devices prefix) of the 20625e8c5aaSvikram * minor node that is the subject of the contract. 20725e8c5aaSvikram * 20825e8c5aaSvikram * ALGORITHMS 20925e8c5aaSvikram * --------- 21025e8c5aaSvikram * There are three sets of routines in this file 21125e8c5aaSvikram * Template related routines 21225e8c5aaSvikram * ------------------------- 21325e8c5aaSvikram * These routines provide support for template related operations initated 21425e8c5aaSvikram * via the generic template operations. These include routines that dup 21525e8c5aaSvikram * a template, free it, and set various terms in the template 21625e8c5aaSvikram * (such as the minor node path, the acceptable state set (or A-set) 21725e8c5aaSvikram * and the non-negotiable term) as well as a routine to query the 21825e8c5aaSvikram * device specific portion of the template for the abovementioned terms. 21925e8c5aaSvikram * There is also a routine to create (ctmpl_device_create) that is used to 22025e8c5aaSvikram * create a contract from a template. This routine calls (after initial 22125e8c5aaSvikram * setup) the common function used to create a device contract 22225e8c5aaSvikram * (contract_device_create). 22325e8c5aaSvikram * 22425e8c5aaSvikram * core device contract implementation 22525e8c5aaSvikram * ---------------------------------- 22625e8c5aaSvikram * These routines support the generic contract framework to provide 22725e8c5aaSvikram * functionality that allows contracts to be created, managed and 22825e8c5aaSvikram * destroyed. The contract_device_create() routine is a routine used 22925e8c5aaSvikram * to create a contract from a template (either via an explicit create 23025e8c5aaSvikram * operation on a template or implicitly via an open with an 23125e8c5aaSvikram * activated template.). The contract_device_free() routine assists 23225e8c5aaSvikram * in freeing the device contract specific parts. There are routines 23325e8c5aaSvikram * used to abandon (contract_device_abandon) a device contract as well 23425e8c5aaSvikram * as a routine to destroy (which despite its name does not destroy, 23525e8c5aaSvikram * it only moves a contract to a dead state) a contract. 23625e8c5aaSvikram * There is also a routine to return status information about a 23725e8c5aaSvikram * contract - the level of detail depends on what is requested by the 23825e8c5aaSvikram * user. A value of CTD_FIXED only returns fixed length fields such 23925e8c5aaSvikram * as the A-set, state of device and value of the "noneg" term. If 24025e8c5aaSvikram * CTD_ALL is specified, the minor node path is returned as well. 24125e8c5aaSvikram * 24225e8c5aaSvikram * In addition there are interfaces (contract_device_ack/nack) which 24325e8c5aaSvikram * are used to support negotiation between userland processes and 24425e8c5aaSvikram * device contracts. These interfaces record the acknowledgement 24525e8c5aaSvikram * or lack thereof for negotiation events and help determine if the 24625e8c5aaSvikram * negotiated event should occur. 24725e8c5aaSvikram * 24825e8c5aaSvikram * "backend routines" 24925e8c5aaSvikram * ----------------- 25025e8c5aaSvikram * The backend routines form the interface between the I/O framework 25125e8c5aaSvikram * and the device contract subsystem. These routines, allow the I/O 25225e8c5aaSvikram * framework to call into the device contract subsystem to notify it of 25325e8c5aaSvikram * impending changes to a device state as well as to inform of the 25425e8c5aaSvikram * final disposition of such attempted state changes. Routines in this 25525e8c5aaSvikram * class include contract_device_offline() that indicates an attempt to 25625e8c5aaSvikram * offline a device, contract_device_degrade() that indicates that 25725e8c5aaSvikram * a device is moving to the degraded state and contract_device_negend() 25825e8c5aaSvikram * that is used by the I/O framework to inform the contracts subsystem of 25925e8c5aaSvikram * the final disposition of an attempted operation. 26025e8c5aaSvikram * 26125e8c5aaSvikram * SUMMARY 26225e8c5aaSvikram * ------- 26325e8c5aaSvikram * A contract starts its life as a template. A process allocates a device 26425e8c5aaSvikram * contract template and sets various terms: 26525e8c5aaSvikram * The A-set 26625e8c5aaSvikram * The device minor node 26725e8c5aaSvikram * Critical and informative events 26825e8c5aaSvikram * The noneg i.e. no negotition term 26925e8c5aaSvikram * Setting of these terms in the template is done via the 27025e8c5aaSvikram * ctmpl_device_set() entry point in this file. A process can query a 27125e8c5aaSvikram * template to determine the terms already set in the template - this is 27225e8c5aaSvikram * facilitated by the ctmpl_device_get() routine. 27325e8c5aaSvikram * 27425e8c5aaSvikram * Once all the appropriate terms are set, the contract is instantiated via 27525e8c5aaSvikram * one of two methods 27625e8c5aaSvikram * - via an explicit create operation - this is facilitated by the 27725e8c5aaSvikram * ctmpl_device_create() entry point 27825e8c5aaSvikram * - synchronously with the open(2) system call - this is achieved via the 27925e8c5aaSvikram * contract_device_open() routine. 28025e8c5aaSvikram * The core work for both these above functions is done by 28125e8c5aaSvikram * contract_device_create() 28225e8c5aaSvikram * 28325e8c5aaSvikram * A contract once created can be queried for its status. Support for 28425e8c5aaSvikram * status info is provided by both the common contracts framework and by 28525e8c5aaSvikram * the "device" contract type. If the level of detail requested is 28625e8c5aaSvikram * CTD_COMMON, only the common contract framework data is used. Higher 28725e8c5aaSvikram * levels of detail result in calls to contract_device_status() to supply 28825e8c5aaSvikram * device contract type specific status information. 28925e8c5aaSvikram * 29025e8c5aaSvikram * A contract once created may be abandoned either explicitly or implictly. 29125e8c5aaSvikram * In either case, the contract_device_abandon() function is invoked. This 29225e8c5aaSvikram * function merely calls contract_destroy() which moves the contract to 29325e8c5aaSvikram * the DEAD state. The device contract portion of destroy processing is 29425e8c5aaSvikram * provided by contract_device_destroy() which merely disassociates the 29525e8c5aaSvikram * contract from its device devinfo node. A contract in the DEAD state is 29625e8c5aaSvikram * not freed. It hanbgs around until all references to the contract are 29725e8c5aaSvikram * gone. When that happens, the contract is finally deallocated. The 29825e8c5aaSvikram * device contract specific portion of the free is done by 29925e8c5aaSvikram * contract_device_free() which finally frees the device contract specific 30025e8c5aaSvikram * data structure (cont_device_t). 30125e8c5aaSvikram * 30225e8c5aaSvikram * When a device undergoes a state change, the I/O framework calls the 30325e8c5aaSvikram * corresponding device contract entry point. For example, when a device 30425e8c5aaSvikram * is about to go OFFLINE, the routine contract_device_offline() is 30525e8c5aaSvikram * invoked. Similarly if a device moves to DEGRADED state, the routine 30625e8c5aaSvikram * contract_device_degrade() function is called. These functions call the 30725e8c5aaSvikram * core routine contract_device_publish(). This function determines via 30825e8c5aaSvikram * the function is_sync_neg() whether an event is a synchronous (i.e. 30925e8c5aaSvikram * negotiable) event or not. In the former case contract_device_publish() 31025e8c5aaSvikram * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 31125e8c5aaSvikram * and/or NACKs from contract holders. In the latter case, it simply 31225e8c5aaSvikram * publishes the event and does not wait. In the negotiation case, ACKs or 31325e8c5aaSvikram * NACKs from userland consumers results in contract_device_ack_nack() 31425e8c5aaSvikram * being called where the result of the negotiation is recorded in the 31525e8c5aaSvikram * contract data structure. Once all outstanding contract owners have 31625e8c5aaSvikram * responded, the device contract code in wait_for_acks() determines the 31725e8c5aaSvikram * final result of the negotiation. A single NACK overrides all other ACKs 31825e8c5aaSvikram * If there is no NACK, then a single ACK will result in an overall ACK 31925e8c5aaSvikram * result. If there are no ACKs or NACKs, then the result CT_NONE is 32025e8c5aaSvikram * returned back to the I/O framework. Once the event is permitted or 32125e8c5aaSvikram * blocked, the I/O framework proceeds or aborts the state change. The 32225e8c5aaSvikram * I/O framework then calls contract_device_negend() with a result code 32325e8c5aaSvikram * indicating final disposition of the event. This call releases the 32425e8c5aaSvikram * barrier and other state associated with the previous negotiation, 32525e8c5aaSvikram * which permits the next event (if any) to come into the device contract 32625e8c5aaSvikram * framework. 32725e8c5aaSvikram * 32825e8c5aaSvikram * Finally, a device that has outstanding contracts may be removed from 32925e8c5aaSvikram * the system which results in its devinfo node being freed. The devinfo 33025e8c5aaSvikram * free routine in the I/O framework, calls into the device contract 33125e8c5aaSvikram * function - contract_device_remove_dip(). This routine, disassociates 33225e8c5aaSvikram * the dip from all contracts associated with the contract being freed, 33325e8c5aaSvikram * allowing the devinfo node to be freed. 33425e8c5aaSvikram * 33525e8c5aaSvikram * LOCKING 33625e8c5aaSvikram * --------- 33725e8c5aaSvikram * There are four sets of data that need to be protected by locks 33825e8c5aaSvikram * 33925e8c5aaSvikram * i) device contract specific portion of the contract template - This data 34025e8c5aaSvikram * is protected by the template lock ctmpl_lock. 34125e8c5aaSvikram * 34225e8c5aaSvikram * ii) device contract specific portion of the contract - This data is 34325e8c5aaSvikram * protected by the contract lock ct_lock 34425e8c5aaSvikram * 34525e8c5aaSvikram * iii) The linked list of contracts hanging off a devinfo node - This 34625e8c5aaSvikram * list is protected by the per-devinfo node lock devi_ct_lock 34725e8c5aaSvikram * 34825e8c5aaSvikram * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 34925e8c5aaSvikram * and devi_ct_count that controls state changes to a dip 35025e8c5aaSvikram * 35125e8c5aaSvikram * The template lock is independent in that none of the other locks in this 35225e8c5aaSvikram * file may be taken while holding the template lock (and vice versa). 35325e8c5aaSvikram * 35425e8c5aaSvikram * The remaining three locks have the following lock order 35525e8c5aaSvikram * 35625e8c5aaSvikram * devi_ct_lock -> ct_count barrier -> ct_lock 35725e8c5aaSvikram * 35825e8c5aaSvikram */ 35925e8c5aaSvikram 36025e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 36125e8c5aaSvikram int spec_type, proc_t *owner, int *errorp); 36225e8c5aaSvikram 36325e8c5aaSvikram /* barrier routines */ 36425e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip); 36525e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip); 36625e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip); 36725e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip); 36825e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip); 36925e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 37025e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip); 37125e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip); 37225e8c5aaSvikram 37325e8c5aaSvikram ct_type_t *device_type; 37425e8c5aaSvikram 37525e8c5aaSvikram /* 37625e8c5aaSvikram * Macro predicates for determining when events should be sent and how. 37725e8c5aaSvikram */ 37825e8c5aaSvikram #define EVSENDP(ctd, flag) \ 37925e8c5aaSvikram ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 38025e8c5aaSvikram 38125e8c5aaSvikram #define EVINFOP(ctd, flag) \ 38225e8c5aaSvikram ((ctd->cond_contract.ct_ev_crit & flag) == 0) 38325e8c5aaSvikram 38425e8c5aaSvikram /* 38525e8c5aaSvikram * State transition table showing which transitions are synchronous and which 38625e8c5aaSvikram * are not. 38725e8c5aaSvikram */ 38825e8c5aaSvikram struct ct_dev_negtable { 38925e8c5aaSvikram uint_t st_old; 39025e8c5aaSvikram uint_t st_new; 39125e8c5aaSvikram uint_t st_neg; 39225e8c5aaSvikram } ct_dev_negtable[] = { 39325e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 39425e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 39525e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 39625e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 39725e8c5aaSvikram {0} 39825e8c5aaSvikram }; 39925e8c5aaSvikram 40025e8c5aaSvikram /* 40125e8c5aaSvikram * Device contract template implementation 40225e8c5aaSvikram */ 40325e8c5aaSvikram 40425e8c5aaSvikram /* 40525e8c5aaSvikram * ctmpl_device_dup 40625e8c5aaSvikram * 40725e8c5aaSvikram * The device contract template dup entry point. 40825e8c5aaSvikram * This simply copies all the fields (generic as well as device contract 40925e8c5aaSvikram * specific) fields of the original. 41025e8c5aaSvikram */ 41125e8c5aaSvikram static struct ct_template * 41225e8c5aaSvikram ctmpl_device_dup(struct ct_template *template) 41325e8c5aaSvikram { 41425e8c5aaSvikram ctmpl_device_t *new; 41525e8c5aaSvikram ctmpl_device_t *old = template->ctmpl_data; 41625e8c5aaSvikram char *buf; 41725e8c5aaSvikram char *minor; 41825e8c5aaSvikram 41925e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 42025e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 42125e8c5aaSvikram 42225e8c5aaSvikram /* 42325e8c5aaSvikram * copy generic fields. 42425e8c5aaSvikram * ctmpl_copy returns with old template lock held 42525e8c5aaSvikram */ 42625e8c5aaSvikram ctmpl_copy(&new->ctd_ctmpl, template); 42725e8c5aaSvikram 42825e8c5aaSvikram new->ctd_ctmpl.ctmpl_data = new; 42925e8c5aaSvikram new->ctd_aset = old->ctd_aset; 43025e8c5aaSvikram new->ctd_minor = NULL; 43125e8c5aaSvikram new->ctd_noneg = old->ctd_noneg; 43225e8c5aaSvikram 43325e8c5aaSvikram if (old->ctd_minor) { 43425e8c5aaSvikram ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 43525e8c5aaSvikram bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 43625e8c5aaSvikram } else { 43725e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 43825e8c5aaSvikram buf = NULL; 43925e8c5aaSvikram } 44025e8c5aaSvikram 44125e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 44225e8c5aaSvikram if (buf) { 44325e8c5aaSvikram minor = i_ddi_strdup(buf, KM_SLEEP); 44425e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 44525e8c5aaSvikram buf = NULL; 44625e8c5aaSvikram } else { 44725e8c5aaSvikram minor = NULL; 44825e8c5aaSvikram } 44925e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 45025e8c5aaSvikram 45125e8c5aaSvikram if (minor) { 45225e8c5aaSvikram new->ctd_minor = minor; 45325e8c5aaSvikram } 45425e8c5aaSvikram 45525e8c5aaSvikram ASSERT(buf == NULL); 45625e8c5aaSvikram return (&new->ctd_ctmpl); 45725e8c5aaSvikram } 45825e8c5aaSvikram 45925e8c5aaSvikram /* 46025e8c5aaSvikram * ctmpl_device_free 46125e8c5aaSvikram * 46225e8c5aaSvikram * The device contract template free entry point. Just 46325e8c5aaSvikram * frees the template. 46425e8c5aaSvikram */ 46525e8c5aaSvikram static void 46625e8c5aaSvikram ctmpl_device_free(struct ct_template *template) 46725e8c5aaSvikram { 46825e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 46925e8c5aaSvikram 47025e8c5aaSvikram if (dtmpl->ctd_minor) 47125e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 47225e8c5aaSvikram 47325e8c5aaSvikram kmem_free(dtmpl, sizeof (ctmpl_device_t)); 47425e8c5aaSvikram } 47525e8c5aaSvikram 47625e8c5aaSvikram /* 47725e8c5aaSvikram * SAFE_EV is the set of events which a non-privileged process is 47825e8c5aaSvikram * allowed to make critical. An unprivileged device contract owner has 47925e8c5aaSvikram * no control over when a device changes state, so all device events 48025e8c5aaSvikram * can be in the critical set. 48125e8c5aaSvikram * 48225e8c5aaSvikram * EXCESS tells us if "value", a critical event set, requires 48325e8c5aaSvikram * additional privilege. For device contracts EXCESS currently 48425e8c5aaSvikram * evaluates to 0. 48525e8c5aaSvikram */ 48625e8c5aaSvikram #define SAFE_EV (CT_DEV_ALLEVENT) 48725e8c5aaSvikram #define EXCESS(value) ((value) & ~SAFE_EV) 48825e8c5aaSvikram 48925e8c5aaSvikram 49025e8c5aaSvikram /* 49125e8c5aaSvikram * ctmpl_device_set 49225e8c5aaSvikram * 49325e8c5aaSvikram * The device contract template set entry point. Sets various terms in the 49425e8c5aaSvikram * template. The non-negotiable term can only be set if the process has 49525e8c5aaSvikram * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 49625e8c5aaSvikram */ 49725e8c5aaSvikram static int 498c5a9a4fcSAntonello Cruz ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam, 499c5a9a4fcSAntonello Cruz const cred_t *cr) 50025e8c5aaSvikram { 50125e8c5aaSvikram ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 502c5a9a4fcSAntonello Cruz ct_param_t *param = &kparam->param; 50325e8c5aaSvikram int error; 50425e8c5aaSvikram dev_info_t *dip; 50525e8c5aaSvikram int spec_type; 5067b209c2cSacruz uint64_t param_value; 5077b209c2cSacruz char *str_value; 50825e8c5aaSvikram 50925e8c5aaSvikram ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 51025e8c5aaSvikram 5117b209c2cSacruz if (param->ctpm_id == CTDP_MINOR) { 512c5a9a4fcSAntonello Cruz str_value = (char *)kparam->ctpm_kbuf; 5137b209c2cSacruz str_value[param->ctpm_size - 1] = '\0'; 5147b209c2cSacruz } else { 515d170b13aSacruz if (param->ctpm_size < sizeof (uint64_t)) 516d170b13aSacruz return (EINVAL); 517c5a9a4fcSAntonello Cruz param_value = *(uint64_t *)kparam->ctpm_kbuf; 5187b209c2cSacruz } 5197b209c2cSacruz 52025e8c5aaSvikram switch (param->ctpm_id) { 52125e8c5aaSvikram case CTDP_ACCEPT: 5227b209c2cSacruz if (param_value & ~CT_DEV_ALLEVENT) 52325e8c5aaSvikram return (EINVAL); 5247b209c2cSacruz if (param_value == 0) 52525e8c5aaSvikram return (EINVAL); 5267b209c2cSacruz if (param_value == CT_DEV_ALLEVENT) 52725e8c5aaSvikram return (EINVAL); 52825e8c5aaSvikram 5297b209c2cSacruz dtmpl->ctd_aset = param_value; 53025e8c5aaSvikram break; 53125e8c5aaSvikram case CTDP_NONEG: 5327b209c2cSacruz if (param_value != CTDP_NONEG_SET && 5337b209c2cSacruz param_value != CTDP_NONEG_CLEAR) 53425e8c5aaSvikram return (EINVAL); 53525e8c5aaSvikram 53625e8c5aaSvikram /* 53725e8c5aaSvikram * only privileged processes can designate a contract 53825e8c5aaSvikram * non-negotiatble. 53925e8c5aaSvikram */ 5407b209c2cSacruz if (param_value == CTDP_NONEG_SET && 54125e8c5aaSvikram (error = secpolicy_sys_devices(cr)) != 0) { 54225e8c5aaSvikram return (error); 54325e8c5aaSvikram } 54425e8c5aaSvikram 5457b209c2cSacruz dtmpl->ctd_noneg = param_value; 54625e8c5aaSvikram break; 54725e8c5aaSvikram 54825e8c5aaSvikram case CTDP_MINOR: 5497b209c2cSacruz if (*str_value != '/' || 5507b209c2cSacruz strncmp(str_value, "/devices/", 5517b209c2cSacruz strlen("/devices/")) == 0 || 5527b209c2cSacruz strstr(str_value, "../devices/") != NULL || 5537b209c2cSacruz strchr(str_value, ':') == NULL) { 55425e8c5aaSvikram return (EINVAL); 55525e8c5aaSvikram } 55625e8c5aaSvikram 55725e8c5aaSvikram spec_type = 0; 55825e8c5aaSvikram dip = NULL; 5597b209c2cSacruz if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 56025e8c5aaSvikram return (ERANGE); 56125e8c5aaSvikram } 56225e8c5aaSvikram ddi_release_devi(dip); 56325e8c5aaSvikram 56425e8c5aaSvikram if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 56525e8c5aaSvikram return (EINVAL); 56625e8c5aaSvikram } 56725e8c5aaSvikram 56825e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 56925e8c5aaSvikram kmem_free(dtmpl->ctd_minor, 57025e8c5aaSvikram strlen(dtmpl->ctd_minor) + 1); 57125e8c5aaSvikram } 5727b209c2cSacruz dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 57325e8c5aaSvikram break; 57425e8c5aaSvikram case CTP_EV_CRITICAL: 57525e8c5aaSvikram /* 57625e8c5aaSvikram * Currently for device contracts, any event 57725e8c5aaSvikram * may be added to the critical set. We retain the 57825e8c5aaSvikram * following code however for future enhancements. 57925e8c5aaSvikram */ 5807b209c2cSacruz if (EXCESS(param_value) && 58125e8c5aaSvikram (error = secpolicy_contract_event(cr)) != 0) 58225e8c5aaSvikram return (error); 5837b209c2cSacruz tmpl->ctmpl_ev_crit = param_value; 58425e8c5aaSvikram break; 58525e8c5aaSvikram default: 58625e8c5aaSvikram return (EINVAL); 58725e8c5aaSvikram } 58825e8c5aaSvikram 58925e8c5aaSvikram return (0); 59025e8c5aaSvikram } 59125e8c5aaSvikram 59225e8c5aaSvikram /* 59325e8c5aaSvikram * ctmpl_device_get 59425e8c5aaSvikram * 59525e8c5aaSvikram * The device contract template get entry point. Simply fetches and 59625e8c5aaSvikram * returns the value of the requested term. 59725e8c5aaSvikram */ 59825e8c5aaSvikram static int 599c5a9a4fcSAntonello Cruz ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam) 60025e8c5aaSvikram { 60125e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 602c5a9a4fcSAntonello Cruz ct_param_t *param = &kparam->param; 603c5a9a4fcSAntonello Cruz uint64_t *param_value = kparam->ctpm_kbuf; 60425e8c5aaSvikram 60525e8c5aaSvikram ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 60625e8c5aaSvikram 607d170b13aSacruz if (param->ctpm_id == CTDP_ACCEPT || 608d170b13aSacruz param->ctpm_id == CTDP_NONEG) { 609d170b13aSacruz if (param->ctpm_size < sizeof (uint64_t)) 610d170b13aSacruz return (EINVAL); 611c5a9a4fcSAntonello Cruz kparam->ret_size = sizeof (uint64_t); 612d170b13aSacruz } 613d170b13aSacruz 61425e8c5aaSvikram switch (param->ctpm_id) { 61525e8c5aaSvikram case CTDP_ACCEPT: 6167b209c2cSacruz *param_value = dtmpl->ctd_aset; 61725e8c5aaSvikram break; 61825e8c5aaSvikram case CTDP_NONEG: 6197b209c2cSacruz *param_value = dtmpl->ctd_noneg; 62025e8c5aaSvikram break; 62125e8c5aaSvikram case CTDP_MINOR: 62225e8c5aaSvikram if (dtmpl->ctd_minor) { 623c5a9a4fcSAntonello Cruz kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf, 6247b209c2cSacruz dtmpl->ctd_minor, param->ctpm_size); 625c5a9a4fcSAntonello Cruz kparam->ret_size++; 62625e8c5aaSvikram } else { 62725e8c5aaSvikram return (ENOENT); 62825e8c5aaSvikram } 62925e8c5aaSvikram break; 63025e8c5aaSvikram default: 63125e8c5aaSvikram return (EINVAL); 63225e8c5aaSvikram } 63325e8c5aaSvikram 63425e8c5aaSvikram return (0); 63525e8c5aaSvikram } 63625e8c5aaSvikram 63725e8c5aaSvikram /* 63825e8c5aaSvikram * Device contract type specific portion of creating a contract using 63925e8c5aaSvikram * a specified template 64025e8c5aaSvikram */ 64125e8c5aaSvikram /*ARGSUSED*/ 64225e8c5aaSvikram int 64325e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 64425e8c5aaSvikram { 64525e8c5aaSvikram ctmpl_device_t *dtmpl; 64625e8c5aaSvikram char *buf; 64725e8c5aaSvikram dev_t dev; 64825e8c5aaSvikram int spec_type; 64925e8c5aaSvikram int error; 65025e8c5aaSvikram cont_device_t *ctd; 65125e8c5aaSvikram 65225e8c5aaSvikram if (ctidp == NULL) 65325e8c5aaSvikram return (EINVAL); 65425e8c5aaSvikram 65525e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 65625e8c5aaSvikram 65725e8c5aaSvikram dtmpl = template->ctmpl_data; 65825e8c5aaSvikram 65925e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 66025e8c5aaSvikram if (dtmpl->ctd_minor == NULL) { 66125e8c5aaSvikram /* incomplete template */ 66225e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 66325e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 66425e8c5aaSvikram return (EINVAL); 66525e8c5aaSvikram } else { 66625e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 66725e8c5aaSvikram bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 66825e8c5aaSvikram } 66925e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 67025e8c5aaSvikram 67125e8c5aaSvikram spec_type = 0; 67225e8c5aaSvikram dev = NODEV; 67325e8c5aaSvikram if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 67425e8c5aaSvikram dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 67525e8c5aaSvikram (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 67625e8c5aaSvikram CT_DEBUG((CE_WARN, 67725e8c5aaSvikram "tmpl_create: failed to find device: %s", buf)); 67825e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 67925e8c5aaSvikram return (ERANGE); 68025e8c5aaSvikram } 68125e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 68225e8c5aaSvikram 68325e8c5aaSvikram ctd = contract_device_create(template->ctmpl_data, 68425e8c5aaSvikram dev, spec_type, curproc, &error); 68525e8c5aaSvikram 68625e8c5aaSvikram if (ctd == NULL) { 68725e8c5aaSvikram CT_DEBUG((CE_WARN, "Failed to create device contract for " 68825e8c5aaSvikram "process (%d) with device (devt = %lu, spec_type = %s)", 68925e8c5aaSvikram curproc->p_pid, dev, 69025e8c5aaSvikram spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 69125e8c5aaSvikram return (error); 69225e8c5aaSvikram } 69325e8c5aaSvikram 69425e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 69525e8c5aaSvikram *ctidp = ctd->cond_contract.ct_id; 69625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 69725e8c5aaSvikram 69825e8c5aaSvikram return (0); 69925e8c5aaSvikram } 70025e8c5aaSvikram 70125e8c5aaSvikram /* 70225e8c5aaSvikram * Device contract specific template entry points 70325e8c5aaSvikram */ 70425e8c5aaSvikram static ctmplops_t ctmpl_device_ops = { 70525e8c5aaSvikram ctmpl_device_dup, /* ctop_dup */ 70625e8c5aaSvikram ctmpl_device_free, /* ctop_free */ 70725e8c5aaSvikram ctmpl_device_set, /* ctop_set */ 70825e8c5aaSvikram ctmpl_device_get, /* ctop_get */ 70925e8c5aaSvikram ctmpl_device_create, /* ctop_create */ 71025e8c5aaSvikram CT_DEV_ALLEVENT /* all device events bitmask */ 71125e8c5aaSvikram }; 71225e8c5aaSvikram 71325e8c5aaSvikram 71425e8c5aaSvikram /* 71525e8c5aaSvikram * Device contract implementation 71625e8c5aaSvikram */ 71725e8c5aaSvikram 71825e8c5aaSvikram /* 71925e8c5aaSvikram * contract_device_default 72025e8c5aaSvikram * 72125e8c5aaSvikram * The device contract default template entry point. Creates a 72225e8c5aaSvikram * device contract template with a default A-set and no "noneg" , 72325e8c5aaSvikram * with informative degrade events and critical offline events. 72425e8c5aaSvikram * There is no default minor path. 72525e8c5aaSvikram */ 72625e8c5aaSvikram static ct_template_t * 72725e8c5aaSvikram contract_device_default(void) 72825e8c5aaSvikram { 72925e8c5aaSvikram ctmpl_device_t *new; 73025e8c5aaSvikram 73125e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 73225e8c5aaSvikram ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 73325e8c5aaSvikram 73425e8c5aaSvikram new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 73525e8c5aaSvikram new->ctd_noneg = 0; 73625e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 73725e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 73825e8c5aaSvikram 73925e8c5aaSvikram return (&new->ctd_ctmpl); 74025e8c5aaSvikram } 74125e8c5aaSvikram 74225e8c5aaSvikram /* 74325e8c5aaSvikram * contract_device_free 74425e8c5aaSvikram * 74525e8c5aaSvikram * Destroys the device contract specific portion of a contract and 74625e8c5aaSvikram * frees the contract. 74725e8c5aaSvikram */ 74825e8c5aaSvikram static void 74925e8c5aaSvikram contract_device_free(contract_t *ct) 75025e8c5aaSvikram { 75125e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 75225e8c5aaSvikram 75325e8c5aaSvikram ASSERT(ctd->cond_minor); 75425e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 75525e8c5aaSvikram kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 75625e8c5aaSvikram 75725e8c5aaSvikram ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 75825e8c5aaSvikram ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 75925e8c5aaSvikram 76025e8c5aaSvikram ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 76125e8c5aaSvikram 76225e8c5aaSvikram ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 76325e8c5aaSvikram ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 76425e8c5aaSvikram 76525e8c5aaSvikram ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 76625e8c5aaSvikram ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 76725e8c5aaSvikram 76825e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 76925e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 77025e8c5aaSvikram 77125e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 77225e8c5aaSvikram 77325e8c5aaSvikram kmem_free(ctd, sizeof (cont_device_t)); 77425e8c5aaSvikram } 77525e8c5aaSvikram 77625e8c5aaSvikram /* 77725e8c5aaSvikram * contract_device_abandon 77825e8c5aaSvikram * 77925e8c5aaSvikram * The device contract abandon entry point. 78025e8c5aaSvikram */ 78125e8c5aaSvikram static void 78225e8c5aaSvikram contract_device_abandon(contract_t *ct) 78325e8c5aaSvikram { 78425e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 78525e8c5aaSvikram 78625e8c5aaSvikram /* 78725e8c5aaSvikram * device contracts cannot be inherited or orphaned. 78825e8c5aaSvikram * Move the contract to the DEAD_STATE. It will be freed 78925e8c5aaSvikram * once all references to it are gone. 79025e8c5aaSvikram */ 79125e8c5aaSvikram contract_destroy(ct); 79225e8c5aaSvikram } 79325e8c5aaSvikram 79425e8c5aaSvikram /* 79525e8c5aaSvikram * contract_device_destroy 79625e8c5aaSvikram * 79725e8c5aaSvikram * The device contract destroy entry point. 79825e8c5aaSvikram * Called from contract_destroy() to do any type specific destroy. Note 79925e8c5aaSvikram * that destroy is a misnomer - this does not free the contract, it only 80025e8c5aaSvikram * moves it to the dead state. A contract is actually freed via 80125e8c5aaSvikram * contract_rele() -> contract_dtor(), contop_free() 80225e8c5aaSvikram */ 80325e8c5aaSvikram static void 80425e8c5aaSvikram contract_device_destroy(contract_t *ct) 80525e8c5aaSvikram { 8062eb07f5eSStephen Hanson cont_device_t *ctd; 8072eb07f5eSStephen Hanson dev_info_t *dip; 80825e8c5aaSvikram 80925e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 81025e8c5aaSvikram 8112eb07f5eSStephen Hanson for (;;) { 8122eb07f5eSStephen Hanson ctd = ct->ct_data; 8132eb07f5eSStephen Hanson dip = ctd->cond_dip; 81425e8c5aaSvikram if (dip == NULL) { 81525e8c5aaSvikram /* 81625e8c5aaSvikram * The dip has been removed, this is a dangling contract 81725e8c5aaSvikram * Check that dip linkages are NULL 81825e8c5aaSvikram */ 81925e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 8202eb07f5eSStephen Hanson CT_DEBUG((CE_NOTE, "contract_device_destroy:" 8212eb07f5eSStephen Hanson " contract has no devinfo node. contract ctid : %d", 8222eb07f5eSStephen Hanson ct->ct_id)); 82325e8c5aaSvikram return; 82425e8c5aaSvikram } 82525e8c5aaSvikram 82625e8c5aaSvikram /* 8272eb07f5eSStephen Hanson * The intended lock order is : devi_ct_lock -> ct_count 8282eb07f5eSStephen Hanson * barrier -> ct_lock. 8292eb07f5eSStephen Hanson * However we can't do this here as dropping the ct_lock allows 8302eb07f5eSStephen Hanson * a race condition with i_ddi_free_node()/ 8312eb07f5eSStephen Hanson * contract_device_remove_dip() which may free off dip before 8322eb07f5eSStephen Hanson * we can take devi_ct_lock. So use mutex_tryenter to avoid 8332eb07f5eSStephen Hanson * dropping ct_lock until we have acquired devi_ct_lock. 83425e8c5aaSvikram */ 8352eb07f5eSStephen Hanson if (mutex_tryenter(&(DEVI(dip)->devi_ct_lock)) != 0) 8362eb07f5eSStephen Hanson break; 8372eb07f5eSStephen Hanson mutex_exit(&ct->ct_lock); 8382eb07f5eSStephen Hanson delay(drv_usectohz(1000)); 8392eb07f5eSStephen Hanson mutex_enter(&ct->ct_lock); 8402eb07f5eSStephen Hanson } 84125e8c5aaSvikram mutex_exit(&ct->ct_lock); 84225e8c5aaSvikram 84325e8c5aaSvikram /* 84425e8c5aaSvikram * Waiting for the barrier to be released is strictly speaking not 84525e8c5aaSvikram * necessary. But it simplifies the implementation of 84625e8c5aaSvikram * contract_device_publish() by establishing the invariant that 84725e8c5aaSvikram * device contracts cannot go away during negotiation. 84825e8c5aaSvikram */ 84925e8c5aaSvikram ct_barrier_wait_for_release(dip); 85025e8c5aaSvikram mutex_enter(&ct->ct_lock); 85125e8c5aaSvikram 85225e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 85325e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 85425e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 85525e8c5aaSvikram 85625e8c5aaSvikram mutex_exit(&ct->ct_lock); 85725e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 85825e8c5aaSvikram mutex_enter(&ct->ct_lock); 85925e8c5aaSvikram } 86025e8c5aaSvikram 86125e8c5aaSvikram /* 86225e8c5aaSvikram * contract_device_status 86325e8c5aaSvikram * 86425e8c5aaSvikram * The device contract status entry point. Called when level of "detail" 86525e8c5aaSvikram * is either CTD_FIXED or CTD_ALL 86625e8c5aaSvikram * 86725e8c5aaSvikram */ 86825e8c5aaSvikram static void 86925e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 87025e8c5aaSvikram void *status, model_t model) 87125e8c5aaSvikram { 87225e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 87325e8c5aaSvikram 87425e8c5aaSvikram ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 87525e8c5aaSvikram 87625e8c5aaSvikram mutex_enter(&ct->ct_lock); 87725e8c5aaSvikram contract_status_common(ct, zone, status, model); 87825e8c5aaSvikram 87925e8c5aaSvikram /* 88025e8c5aaSvikram * There's no need to hold the contract lock while accessing static 88125e8c5aaSvikram * data like aset or noneg. But since we need the lock to access other 88225e8c5aaSvikram * data like state, we hold it anyway. 88325e8c5aaSvikram */ 88425e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 88525e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 88625e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 88725e8c5aaSvikram 88825e8c5aaSvikram if (detail == CTD_FIXED) { 88925e8c5aaSvikram mutex_exit(&ct->ct_lock); 89025e8c5aaSvikram return; 89125e8c5aaSvikram } 89225e8c5aaSvikram 89325e8c5aaSvikram ASSERT(ctd->cond_minor); 89425e8c5aaSvikram VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 89525e8c5aaSvikram 89625e8c5aaSvikram mutex_exit(&ct->ct_lock); 89725e8c5aaSvikram } 89825e8c5aaSvikram 89925e8c5aaSvikram /* 90025e8c5aaSvikram * Converts a result integer into the corresponding string. Used for printing 90125e8c5aaSvikram * messages 90225e8c5aaSvikram */ 90325e8c5aaSvikram static char * 90425e8c5aaSvikram result_str(uint_t result) 90525e8c5aaSvikram { 90625e8c5aaSvikram switch (result) { 90725e8c5aaSvikram case CT_ACK: 90825e8c5aaSvikram return ("CT_ACK"); 90925e8c5aaSvikram case CT_NACK: 91025e8c5aaSvikram return ("CT_NACK"); 91125e8c5aaSvikram case CT_NONE: 91225e8c5aaSvikram return ("CT_NONE"); 91325e8c5aaSvikram default: 91425e8c5aaSvikram return ("UNKNOWN"); 91525e8c5aaSvikram } 91625e8c5aaSvikram } 91725e8c5aaSvikram 91825e8c5aaSvikram /* 91925e8c5aaSvikram * Converts a device state integer constant into the corresponding string. 92025e8c5aaSvikram * Used to print messages. 92125e8c5aaSvikram */ 92225e8c5aaSvikram static char * 92325e8c5aaSvikram state_str(uint_t state) 92425e8c5aaSvikram { 92525e8c5aaSvikram switch (state) { 92625e8c5aaSvikram case CT_DEV_EV_ONLINE: 92725e8c5aaSvikram return ("ONLINE"); 92825e8c5aaSvikram case CT_DEV_EV_DEGRADED: 92925e8c5aaSvikram return ("DEGRADED"); 93025e8c5aaSvikram case CT_DEV_EV_OFFLINE: 93125e8c5aaSvikram return ("OFFLINE"); 93225e8c5aaSvikram default: 93325e8c5aaSvikram return ("UNKNOWN"); 93425e8c5aaSvikram } 93525e8c5aaSvikram } 93625e8c5aaSvikram 93725e8c5aaSvikram /* 93825e8c5aaSvikram * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 93925e8c5aaSvikram * synchronous state change or not. 94025e8c5aaSvikram */ 94125e8c5aaSvikram static int 94225e8c5aaSvikram is_sync_neg(uint_t old, uint_t new) 94325e8c5aaSvikram { 94425e8c5aaSvikram int i; 94525e8c5aaSvikram 94625e8c5aaSvikram ASSERT(old & CT_DEV_ALLEVENT); 94725e8c5aaSvikram ASSERT(new & CT_DEV_ALLEVENT); 94825e8c5aaSvikram 94925e8c5aaSvikram if (old == new) { 95025e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 95125e8c5aaSvikram state_str(new))); 95225e8c5aaSvikram return (-2); 95325e8c5aaSvikram } 95425e8c5aaSvikram 95525e8c5aaSvikram for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 95625e8c5aaSvikram if (old == ct_dev_negtable[i].st_old && 95725e8c5aaSvikram new == ct_dev_negtable[i].st_new) { 95825e8c5aaSvikram return (ct_dev_negtable[i].st_neg); 95925e8c5aaSvikram } 96025e8c5aaSvikram } 96125e8c5aaSvikram 96225e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 96325e8c5aaSvikram "old = %s -> new = %s", state_str(old), state_str(new))); 96425e8c5aaSvikram 96525e8c5aaSvikram return (-1); 96625e8c5aaSvikram } 96725e8c5aaSvikram 96825e8c5aaSvikram /* 96925e8c5aaSvikram * Used to cleanup cached dv_nodes so that when a device is released by 97025e8c5aaSvikram * a contract holder, its devinfo node can be successfully detached. 97125e8c5aaSvikram */ 97225e8c5aaSvikram static int 97325e8c5aaSvikram contract_device_dvclean(dev_info_t *dip) 97425e8c5aaSvikram { 97525e8c5aaSvikram char *devnm; 97625e8c5aaSvikram dev_info_t *pdip; 97725e8c5aaSvikram 97825e8c5aaSvikram ASSERT(dip); 97925e8c5aaSvikram 98025e8c5aaSvikram /* pdip can be NULL if we have contracts against the root dip */ 98125e8c5aaSvikram pdip = ddi_get_parent(dip); 98225e8c5aaSvikram 98325e8c5aaSvikram if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 98425e8c5aaSvikram char *path; 98525e8c5aaSvikram 98625e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 98725e8c5aaSvikram (void) ddi_pathname(dip, path); 98825e8c5aaSvikram CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 98925e8c5aaSvikram "device=%s", path)); 99025e8c5aaSvikram kmem_free(path, MAXPATHLEN); 99125e8c5aaSvikram return (EDEADLOCK); 99225e8c5aaSvikram } 99325e8c5aaSvikram 99425e8c5aaSvikram if (pdip) { 99525e8c5aaSvikram devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 99625e8c5aaSvikram (void) ddi_deviname(dip, devnm); 997*320fb372SYuri Pankov (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 99825e8c5aaSvikram kmem_free(devnm, MAXNAMELEN + 1); 99925e8c5aaSvikram } else { 1000*320fb372SYuri Pankov (void) devfs_clean(dip, NULL, DV_CLEAN_FORCE); 100125e8c5aaSvikram } 100225e8c5aaSvikram 1003*320fb372SYuri Pankov return (0); 100425e8c5aaSvikram } 100525e8c5aaSvikram 100625e8c5aaSvikram /* 100725e8c5aaSvikram * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 100825e8c5aaSvikram * Results in the ACK or NACK being recorded on the dip for one particular 100925e8c5aaSvikram * contract. The device contracts framework evaluates the ACK/NACKs for all 101025e8c5aaSvikram * contracts against a device to determine if a particular device state change 101125e8c5aaSvikram * should be allowed. 101225e8c5aaSvikram */ 101325e8c5aaSvikram static int 101425e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 101525e8c5aaSvikram uint_t cmd) 101625e8c5aaSvikram { 101725e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 101825e8c5aaSvikram dev_info_t *dip; 101925e8c5aaSvikram ctid_t ctid; 102025e8c5aaSvikram int error; 102125e8c5aaSvikram 102225e8c5aaSvikram ctid = ct->ct_id; 102325e8c5aaSvikram 102425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 102525e8c5aaSvikram 102625e8c5aaSvikram mutex_enter(&ct->ct_lock); 102725e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 102825e8c5aaSvikram 102925e8c5aaSvikram dip = ctd->cond_dip; 103025e8c5aaSvikram 103125e8c5aaSvikram ASSERT(ctd->cond_minor); 103225e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 103325e8c5aaSvikram 103425e8c5aaSvikram /* 103525e8c5aaSvikram * Negotiation only if new state is not in A-set 103625e8c5aaSvikram */ 103725e8c5aaSvikram ASSERT(!(ctd->cond_aset & evtype)); 103825e8c5aaSvikram 103925e8c5aaSvikram /* 104025e8c5aaSvikram * Negotiation only if transition is synchronous 104125e8c5aaSvikram */ 104225e8c5aaSvikram ASSERT(is_sync_neg(ctd->cond_state, evtype)); 104325e8c5aaSvikram 104425e8c5aaSvikram /* 104525e8c5aaSvikram * We shouldn't be negotiating if the "noneg" flag is set 104625e8c5aaSvikram */ 104725e8c5aaSvikram ASSERT(!ctd->cond_noneg); 104825e8c5aaSvikram 104925e8c5aaSvikram if (dip) 105025e8c5aaSvikram ndi_hold_devi(dip); 105125e8c5aaSvikram 105225e8c5aaSvikram mutex_exit(&ct->ct_lock); 105325e8c5aaSvikram 105425e8c5aaSvikram /* 105525e8c5aaSvikram * dv_clean only if !NACK and offline state change 105625e8c5aaSvikram */ 105725e8c5aaSvikram if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 105825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 105925e8c5aaSvikram error = contract_device_dvclean(dip); 106025e8c5aaSvikram if (error != 0) { 106125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 106225e8c5aaSvikram ctid)); 106325e8c5aaSvikram ddi_release_devi(dip); 106425e8c5aaSvikram } 106525e8c5aaSvikram } 106625e8c5aaSvikram 106725e8c5aaSvikram mutex_enter(&ct->ct_lock); 106825e8c5aaSvikram 106925e8c5aaSvikram if (dip) 107025e8c5aaSvikram ddi_release_devi(dip); 107125e8c5aaSvikram 107225e8c5aaSvikram if (dip == NULL) { 107325e8c5aaSvikram if (ctd->cond_currev_id != evid) { 107425e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event " 107525e8c5aaSvikram "(type=%s, id=%llu) on removed device", 107625e8c5aaSvikram cmd == CT_NACK ? "N" : "", 107725e8c5aaSvikram state_str(evtype), (unsigned long long)evid)); 107825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 107925e8c5aaSvikram ctid)); 108025e8c5aaSvikram } else { 108125e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 108225e8c5aaSvikram CT_DEBUG((CE_WARN, "contract_ack: no such device: " 108325e8c5aaSvikram "ctid: %d", ctid)); 108425e8c5aaSvikram } 108525e8c5aaSvikram error = (ct->ct_state == CTS_DEAD) ? ESRCH : 108625e8c5aaSvikram ((cmd == CT_NACK) ? ETIMEDOUT : 0); 108725e8c5aaSvikram mutex_exit(&ct->ct_lock); 108825e8c5aaSvikram return (error); 108925e8c5aaSvikram } 109025e8c5aaSvikram 109125e8c5aaSvikram /* 109225e8c5aaSvikram * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 109325e8c5aaSvikram */ 109425e8c5aaSvikram mutex_exit(&ct->ct_lock); 109525e8c5aaSvikram 109625e8c5aaSvikram mutex_enter(&DEVI(dip)->devi_ct_lock); 109725e8c5aaSvikram mutex_enter(&ct->ct_lock); 109825e8c5aaSvikram if (ctd->cond_currev_id != evid) { 109925e8c5aaSvikram char *buf; 110025e8c5aaSvikram mutex_exit(&ct->ct_lock); 110125e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 110225e8c5aaSvikram ndi_hold_devi(dip); 110325e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 110425e8c5aaSvikram (void) ddi_pathname(dip, buf); 110525e8c5aaSvikram ddi_release_devi(dip); 110625e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event" 110725e8c5aaSvikram "(type=%s, id=%llu) on device %s", 110825e8c5aaSvikram cmd == CT_NACK ? "N" : "", 110925e8c5aaSvikram state_str(evtype), (unsigned long long)evid, buf)); 111025e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 111125e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 111225e8c5aaSvikram cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 111325e8c5aaSvikram return (cmd == CT_ACK ? 0 : ETIMEDOUT); 111425e8c5aaSvikram } 111525e8c5aaSvikram 111625e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 111725e8c5aaSvikram ASSERT(cmd == CT_ACK || cmd == CT_NACK); 111825e8c5aaSvikram 111925e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 112025e8c5aaSvikram cmd == CT_NACK ? "N" : "", ctid)); 112125e8c5aaSvikram 112225e8c5aaSvikram ctd->cond_currev_ack = cmd; 112325e8c5aaSvikram mutex_exit(&ct->ct_lock); 112425e8c5aaSvikram 112525e8c5aaSvikram ct_barrier_decr(dip); 112625e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 112725e8c5aaSvikram 112825e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 112925e8c5aaSvikram 113025e8c5aaSvikram return (0); 113125e8c5aaSvikram } 113225e8c5aaSvikram 113325e8c5aaSvikram /* 113425e8c5aaSvikram * Invoked when a userland contract holder approves (i.e. ACKs) a state change 113525e8c5aaSvikram */ 113625e8c5aaSvikram static int 113725e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 113825e8c5aaSvikram { 113925e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 114025e8c5aaSvikram } 114125e8c5aaSvikram 114225e8c5aaSvikram /* 114325e8c5aaSvikram * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 114425e8c5aaSvikram */ 114525e8c5aaSvikram static int 114625e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 114725e8c5aaSvikram { 114825e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 114925e8c5aaSvikram } 115025e8c5aaSvikram 115125e8c5aaSvikram /* 115225e8c5aaSvikram * Creates a new contract synchronously with the breaking of an existing 115325e8c5aaSvikram * contract. Currently not supported. 115425e8c5aaSvikram */ 115525e8c5aaSvikram /*ARGSUSED*/ 115625e8c5aaSvikram static int 115725e8c5aaSvikram contract_device_newct(contract_t *ct) 115825e8c5aaSvikram { 115925e8c5aaSvikram return (ENOTSUP); 116025e8c5aaSvikram } 116125e8c5aaSvikram 116225e8c5aaSvikram /* 116325e8c5aaSvikram * Core device contract implementation entry points 116425e8c5aaSvikram */ 116525e8c5aaSvikram static contops_t contract_device_ops = { 116625e8c5aaSvikram contract_device_free, /* contop_free */ 116725e8c5aaSvikram contract_device_abandon, /* contop_abandon */ 116825e8c5aaSvikram contract_device_destroy, /* contop_destroy */ 116925e8c5aaSvikram contract_device_status, /* contop_status */ 117025e8c5aaSvikram contract_device_ack, /* contop_ack */ 117125e8c5aaSvikram contract_device_nack, /* contop_nack */ 117225e8c5aaSvikram contract_qack_notsup, /* contop_qack */ 117325e8c5aaSvikram contract_device_newct /* contop_newct */ 117425e8c5aaSvikram }; 117525e8c5aaSvikram 117625e8c5aaSvikram /* 117725e8c5aaSvikram * contract_device_init 117825e8c5aaSvikram * 117925e8c5aaSvikram * Initializes the device contract type. 118025e8c5aaSvikram */ 118125e8c5aaSvikram void 118225e8c5aaSvikram contract_device_init(void) 118325e8c5aaSvikram { 118425e8c5aaSvikram device_type = contract_type_init(CTT_DEVICE, "device", 118525e8c5aaSvikram &contract_device_ops, contract_device_default); 118625e8c5aaSvikram } 118725e8c5aaSvikram 118825e8c5aaSvikram /* 118925e8c5aaSvikram * contract_device_create 119025e8c5aaSvikram * 119125e8c5aaSvikram * create a device contract given template "tmpl" and the "owner" process. 119225e8c5aaSvikram * May fail and return NULL if project.max-contracts would have been exceeded. 119325e8c5aaSvikram * 119425e8c5aaSvikram * Common device contract creation routine called for both open-time and 119525e8c5aaSvikram * non-open time device contract creation 119625e8c5aaSvikram */ 119725e8c5aaSvikram static cont_device_t * 119825e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 119925e8c5aaSvikram proc_t *owner, int *errorp) 120025e8c5aaSvikram { 120125e8c5aaSvikram cont_device_t *ctd; 120225e8c5aaSvikram char *minor; 120325e8c5aaSvikram char *path; 120425e8c5aaSvikram dev_info_t *dip; 120525e8c5aaSvikram 120625e8c5aaSvikram ASSERT(dtmpl != NULL); 120725e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 120825e8c5aaSvikram ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 120925e8c5aaSvikram ASSERT(errorp); 121025e8c5aaSvikram 121125e8c5aaSvikram *errorp = 0; 121225e8c5aaSvikram 121325e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 121425e8c5aaSvikram 121525e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 121625e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 121725e8c5aaSvikram bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 121825e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 121925e8c5aaSvikram 122025e8c5aaSvikram dip = e_ddi_hold_devi_by_path(path, 0); 122125e8c5aaSvikram if (dip == NULL) { 122225e8c5aaSvikram cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 122325e8c5aaSvikram "for device path (%s)", path); 122425e8c5aaSvikram kmem_free(path, MAXPATHLEN); 122525e8c5aaSvikram *errorp = ERANGE; 122625e8c5aaSvikram return (NULL); 122725e8c5aaSvikram } 122825e8c5aaSvikram 122925e8c5aaSvikram /* 123025e8c5aaSvikram * Lock out any parallel contract negotiations 123125e8c5aaSvikram */ 123225e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 123325e8c5aaSvikram ct_barrier_acquire(dip); 123425e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 123525e8c5aaSvikram 123625e8c5aaSvikram minor = i_ddi_strdup(path, KM_SLEEP); 123725e8c5aaSvikram kmem_free(path, MAXPATHLEN); 123825e8c5aaSvikram 123925e8c5aaSvikram (void) contract_type_pbundle(device_type, owner); 124025e8c5aaSvikram 124125e8c5aaSvikram ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 124225e8c5aaSvikram 124325e8c5aaSvikram /* 124425e8c5aaSvikram * Only we hold a refernce to this contract. Safe to access 124525e8c5aaSvikram * the fields without a ct_lock 124625e8c5aaSvikram */ 124725e8c5aaSvikram ctd->cond_minor = minor; 124825e8c5aaSvikram /* 124925e8c5aaSvikram * It is safe to set the dip pointer in the contract 125025e8c5aaSvikram * as the contract will always be destroyed before the dip 125125e8c5aaSvikram * is released 125225e8c5aaSvikram */ 125325e8c5aaSvikram ctd->cond_dip = dip; 125425e8c5aaSvikram ctd->cond_devt = dev; 125525e8c5aaSvikram ctd->cond_spec = spec_type; 125625e8c5aaSvikram 125725e8c5aaSvikram /* 125825e8c5aaSvikram * Since we are able to lookup the device, it is either 125925e8c5aaSvikram * online or degraded 126025e8c5aaSvikram */ 126125e8c5aaSvikram ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 126225e8c5aaSvikram CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 126325e8c5aaSvikram 126425e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 126525e8c5aaSvikram ctd->cond_aset = dtmpl->ctd_aset; 126625e8c5aaSvikram ctd->cond_noneg = dtmpl->ctd_noneg; 126725e8c5aaSvikram 126825e8c5aaSvikram /* 126925e8c5aaSvikram * contract_ctor() initailizes the common portion of a contract 127025e8c5aaSvikram * contract_dtor() destroys the common portion of a contract 127125e8c5aaSvikram */ 127225e8c5aaSvikram if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 127325e8c5aaSvikram ctd, 0, owner, B_TRUE)) { 127425e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 127525e8c5aaSvikram /* 127625e8c5aaSvikram * contract_device_free() destroys the type specific 127725e8c5aaSvikram * portion of a contract and frees the contract. 127825e8c5aaSvikram * The "minor" path and "cred" is a part of the type specific 127925e8c5aaSvikram * portion of the contract and will be freed by 128025e8c5aaSvikram * contract_device_free() 128125e8c5aaSvikram */ 128225e8c5aaSvikram contract_device_free(&ctd->cond_contract); 128325e8c5aaSvikram 128425e8c5aaSvikram /* release barrier */ 128525e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 128625e8c5aaSvikram ct_barrier_release(dip); 128725e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 128825e8c5aaSvikram 128925e8c5aaSvikram ddi_release_devi(dip); 129025e8c5aaSvikram *errorp = EAGAIN; 129125e8c5aaSvikram return (NULL); 129225e8c5aaSvikram } 129325e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 129425e8c5aaSvikram 129525e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 129625e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 129725e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 129825e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 129925e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 130025e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 130125e8c5aaSvikram 130225e8c5aaSvikram /* 130325e8c5aaSvikram * Insert device contract into list hanging off the dip 130425e8c5aaSvikram * Bump up the ref-count on the contract to reflect this 130525e8c5aaSvikram */ 130625e8c5aaSvikram contract_hold(&ctd->cond_contract); 130725e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 130825e8c5aaSvikram list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 130925e8c5aaSvikram 131025e8c5aaSvikram /* release barrier */ 131125e8c5aaSvikram ct_barrier_release(dip); 131225e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 131325e8c5aaSvikram 131425e8c5aaSvikram ddi_release_devi(dip); 131525e8c5aaSvikram 131625e8c5aaSvikram return (ctd); 131725e8c5aaSvikram } 131825e8c5aaSvikram 131925e8c5aaSvikram /* 132025e8c5aaSvikram * Called when a device is successfully opened to create an open-time contract 132125e8c5aaSvikram * i.e. synchronously with a device open. 132225e8c5aaSvikram */ 132325e8c5aaSvikram int 132425e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 132525e8c5aaSvikram { 132625e8c5aaSvikram ctmpl_device_t *dtmpl; 132725e8c5aaSvikram ct_template_t *tmpl; 132825e8c5aaSvikram cont_device_t *ctd; 132925e8c5aaSvikram char *path; 133025e8c5aaSvikram klwp_t *lwp; 133125e8c5aaSvikram int error; 133225e8c5aaSvikram 133325e8c5aaSvikram if (ctpp) 133425e8c5aaSvikram *ctpp = NULL; 133525e8c5aaSvikram 133625e8c5aaSvikram /* 133725e8c5aaSvikram * Check if we are in user-context i.e. if we have an lwp 133825e8c5aaSvikram */ 133925e8c5aaSvikram lwp = ttolwp(curthread); 134025e8c5aaSvikram if (lwp == NULL) { 134125e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 134225e8c5aaSvikram return (0); 134325e8c5aaSvikram } 134425e8c5aaSvikram 134525e8c5aaSvikram tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 134625e8c5aaSvikram if (tmpl == NULL) { 134725e8c5aaSvikram return (0); 134825e8c5aaSvikram } 134925e8c5aaSvikram dtmpl = tmpl->ctmpl_data; 135025e8c5aaSvikram 135125e8c5aaSvikram /* 135225e8c5aaSvikram * If the user set a minor path in the template before an open, 135325e8c5aaSvikram * ignore it. We use the minor path of the actual minor opened. 135425e8c5aaSvikram */ 135525e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 135625e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 135725e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 135825e8c5aaSvikram "ignoring device minor path in active template: %s", 135925e8c5aaSvikram curproc->p_pid, dtmpl->ctd_minor)); 136025e8c5aaSvikram /* 136125e8c5aaSvikram * This is a copy of the actual activated template. 136225e8c5aaSvikram * Safe to make changes such as freeing the minor 136325e8c5aaSvikram * path in the template. 136425e8c5aaSvikram */ 136525e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 136625e8c5aaSvikram dtmpl->ctd_minor = NULL; 136725e8c5aaSvikram } 136825e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 136925e8c5aaSvikram 137025e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 137125e8c5aaSvikram 137225e8c5aaSvikram if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 137325e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 137425e8c5aaSvikram "minor path from dev_t,spec {%lu, %d} for process (%d)", 137525e8c5aaSvikram dev, spec_type, curproc->p_pid)); 137625e8c5aaSvikram ctmpl_free(tmpl); 137725e8c5aaSvikram kmem_free(path, MAXPATHLEN); 137825e8c5aaSvikram return (1); 137925e8c5aaSvikram } 138025e8c5aaSvikram 138125e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 138225e8c5aaSvikram ASSERT(dtmpl->ctd_minor == NULL); 138325e8c5aaSvikram dtmpl->ctd_minor = path; 138425e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 138525e8c5aaSvikram 138625e8c5aaSvikram ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 138725e8c5aaSvikram 138825e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 138925e8c5aaSvikram ASSERT(dtmpl->ctd_minor); 139025e8c5aaSvikram dtmpl->ctd_minor = NULL; 139125e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 139225e8c5aaSvikram ctmpl_free(tmpl); 139325e8c5aaSvikram kmem_free(path, MAXPATHLEN); 139425e8c5aaSvikram 139525e8c5aaSvikram if (ctd == NULL) { 139625e8c5aaSvikram cmn_err(CE_NOTE, "contract_device_open(): Failed to " 139725e8c5aaSvikram "create device contract for process (%d) holding " 139825e8c5aaSvikram "device (devt = %lu, spec_type = %d)", 139925e8c5aaSvikram curproc->p_pid, dev, spec_type); 140025e8c5aaSvikram return (1); 140125e8c5aaSvikram } 140225e8c5aaSvikram 140325e8c5aaSvikram if (ctpp) { 140425e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 140525e8c5aaSvikram *ctpp = &ctd->cond_contract; 140625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 140725e8c5aaSvikram } 140825e8c5aaSvikram return (0); 140925e8c5aaSvikram } 141025e8c5aaSvikram 141125e8c5aaSvikram /* 141225e8c5aaSvikram * Called during contract negotiation by the device contract framework to wait 141325e8c5aaSvikram * for ACKs or NACKs from contract holders. If all responses are not received 141425e8c5aaSvikram * before a specified timeout, this routine times out. 141525e8c5aaSvikram */ 141625e8c5aaSvikram static uint_t 141725e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 141825e8c5aaSvikram { 141925e8c5aaSvikram cont_device_t *ctd; 142025e8c5aaSvikram int timed_out = 0; 142125e8c5aaSvikram int result = CT_NONE; 142225e8c5aaSvikram int ack; 142325e8c5aaSvikram char *f = "wait_for_acks"; 142425e8c5aaSvikram 142525e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 142625e8c5aaSvikram ASSERT(dip); 142725e8c5aaSvikram ASSERT(evtype & CT_DEV_ALLEVENT); 142825e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 142925e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 143025e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 143125e8c5aaSvikram 143225e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 143325e8c5aaSvikram 143425e8c5aaSvikram if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 143525e8c5aaSvikram /* 143625e8c5aaSvikram * some contract owner(s) didn't respond in time 143725e8c5aaSvikram */ 143825e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 143925e8c5aaSvikram timed_out = 1; 144025e8c5aaSvikram } 144125e8c5aaSvikram 144225e8c5aaSvikram ack = 0; 144325e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 144425e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 144525e8c5aaSvikram 144625e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 144725e8c5aaSvikram 144825e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 144925e8c5aaSvikram 145025e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 145125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 145225e8c5aaSvikram continue; 145325e8c5aaSvikram } 145425e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 145525e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 145625e8c5aaSvikram continue; 145725e8c5aaSvikram } 145825e8c5aaSvikram 145925e8c5aaSvikram /* skip if non-negotiable contract */ 146025e8c5aaSvikram if (ctd->cond_noneg) { 146125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 146225e8c5aaSvikram continue; 146325e8c5aaSvikram } 146425e8c5aaSvikram 146525e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 146625e8c5aaSvikram if (ctd->cond_currev_ack == CT_NACK) { 146725e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 146825e8c5aaSvikram f, (void *)dip)); 146925e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 147025e8c5aaSvikram return (CT_NACK); 147125e8c5aaSvikram } else if (ctd->cond_currev_ack == CT_ACK) { 147225e8c5aaSvikram ack = 1; 147325e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 147425e8c5aaSvikram f, (void *)dip)); 147525e8c5aaSvikram } 147625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 147725e8c5aaSvikram } 147825e8c5aaSvikram 147925e8c5aaSvikram if (ack) { 148025e8c5aaSvikram result = CT_ACK; 148125e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 148225e8c5aaSvikram } else if (timed_out) { 148325e8c5aaSvikram result = CT_NONE; 148425e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 148525e8c5aaSvikram f, (void *)dip)); 148625e8c5aaSvikram } else { 148725e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 148825e8c5aaSvikram f, (void *)dip)); 148925e8c5aaSvikram } 149025e8c5aaSvikram 149125e8c5aaSvikram 149225e8c5aaSvikram return (result); 149325e8c5aaSvikram } 149425e8c5aaSvikram 149525e8c5aaSvikram /* 149625e8c5aaSvikram * Determines the current state of a device (i.e a devinfo node 149725e8c5aaSvikram */ 149825e8c5aaSvikram static int 149925e8c5aaSvikram get_state(dev_info_t *dip) 150025e8c5aaSvikram { 150125e8c5aaSvikram if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 150225e8c5aaSvikram return (CT_DEV_EV_OFFLINE); 150325e8c5aaSvikram else if (DEVI_IS_DEVICE_DEGRADED(dip)) 150425e8c5aaSvikram return (CT_DEV_EV_DEGRADED); 150525e8c5aaSvikram else 150625e8c5aaSvikram return (CT_DEV_EV_ONLINE); 150725e8c5aaSvikram } 150825e8c5aaSvikram 150925e8c5aaSvikram /* 151025e8c5aaSvikram * Sets the current state of a device in a device contract 151125e8c5aaSvikram */ 151225e8c5aaSvikram static void 151325e8c5aaSvikram set_cond_state(dev_info_t *dip) 151425e8c5aaSvikram { 151525e8c5aaSvikram uint_t state = get_state(dip); 151625e8c5aaSvikram cont_device_t *ctd; 151725e8c5aaSvikram 151825e8c5aaSvikram /* verify that barrier is held */ 151925e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 152025e8c5aaSvikram 152125e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 152225e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 152325e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 152425e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 152525e8c5aaSvikram ctd->cond_state = state; 152625e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 152725e8c5aaSvikram } 152825e8c5aaSvikram } 152925e8c5aaSvikram 153025e8c5aaSvikram /* 153125e8c5aaSvikram * Core routine called by event-specific routines when an event occurs. 153225e8c5aaSvikram * Determines if an event should be be published, and if it is to be 153325e8c5aaSvikram * published, whether a negotiation should take place. Also implements 153425e8c5aaSvikram * NEGEND events which publish the final disposition of an event after 153525e8c5aaSvikram * negotiations are complete. 153625e8c5aaSvikram * 153725e8c5aaSvikram * When an event occurs on a minor node, this routine walks the list of 153825e8c5aaSvikram * contracts hanging off a devinfo node and for each contract on the affected 153925e8c5aaSvikram * dip, evaluates the following cases 154025e8c5aaSvikram * 154125e8c5aaSvikram * a. an event that is synchronous, breaks the contract and NONEG not set 154225e8c5aaSvikram * - bumps up the outstanding negotiation counts on the dip 154325e8c5aaSvikram * - marks the dip as undergoing negotiation (devi_ct_neg) 154425e8c5aaSvikram * - event of type CTE_NEG is published 154525e8c5aaSvikram * b. an event that is synchronous, breaks the contract and NONEG is set 154625e8c5aaSvikram * - sets the final result to CT_NACK, event is blocked 154725e8c5aaSvikram * - does not publish an event 154825e8c5aaSvikram * c. event is asynchronous and breaks the contract 154925e8c5aaSvikram * - publishes a critical event irrespect of whether the NONEG 155025e8c5aaSvikram * flag is set, since the contract will be broken and contract 155125e8c5aaSvikram * owner needs to be informed. 155225e8c5aaSvikram * d. No contract breakage but the owner has subscribed to the event 155325e8c5aaSvikram * - publishes the event irrespective of the NONEG event as the 155425e8c5aaSvikram * owner has explicitly subscribed to the event. 155525e8c5aaSvikram * e. NEGEND event 155625e8c5aaSvikram * - publishes a critical event. Should only be doing this if 155725e8c5aaSvikram * if NONEG is not set. 155825e8c5aaSvikram * f. all other events 155925e8c5aaSvikram * - Since a contract is not broken and this event has not been 156025e8c5aaSvikram * subscribed to, this event does not need to be published for 156125e8c5aaSvikram * for this contract. 156225e8c5aaSvikram * 156325e8c5aaSvikram * Once an event is published, what happens next depends on the type of 156425e8c5aaSvikram * event: 156525e8c5aaSvikram * 156625e8c5aaSvikram * a. NEGEND event 156725e8c5aaSvikram * - cleanup all state associated with the preceding negotiation 156825e8c5aaSvikram * and return CT_ACK to the caller of contract_device_publish() 156925e8c5aaSvikram * b. NACKed event 157025e8c5aaSvikram * - One or more contracts had the NONEG term, so the event was 157125e8c5aaSvikram * blocked. Return CT_NACK to the caller. 157225e8c5aaSvikram * c. Negotiated event 157325e8c5aaSvikram * - Call wait_for_acks() to wait for responses from contract 157425e8c5aaSvikram * holders. The end result is either CT_ACK (event is permitted), 157525e8c5aaSvikram * CT_NACK (event is blocked) or CT_NONE (no contract owner) 157625e8c5aaSvikram * responded. This result is returned back to the caller. 157725e8c5aaSvikram * d. All other events 157825e8c5aaSvikram * - If the event was asynchronous (i.e. not negotiated) or 157925e8c5aaSvikram * a contract was not broken return CT_ACK to the caller. 158025e8c5aaSvikram */ 158125e8c5aaSvikram static uint_t 158225e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 158325e8c5aaSvikram uint_t evtype, nvlist_t *tnvl) 158425e8c5aaSvikram { 158525e8c5aaSvikram cont_device_t *ctd; 158625e8c5aaSvikram uint_t result = CT_NONE; 158725e8c5aaSvikram uint64_t evid = 0; 158825e8c5aaSvikram uint64_t nevid = 0; 158925e8c5aaSvikram char *path = NULL; 159025e8c5aaSvikram int negend; 159125e8c5aaSvikram int match; 159225e8c5aaSvikram int sync = 0; 159325e8c5aaSvikram contract_t *ct; 159425e8c5aaSvikram ct_kevent_t *event; 159525e8c5aaSvikram nvlist_t *nvl; 159625e8c5aaSvikram int broken = 0; 159725e8c5aaSvikram 159825e8c5aaSvikram ASSERT(dip); 159925e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 160025e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 160125e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 160225e8c5aaSvikram ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 160325e8c5aaSvikram 160425e8c5aaSvikram /* Is this a synchronous state change ? */ 160525e8c5aaSvikram if (evtype != CT_EV_NEGEND) { 160625e8c5aaSvikram sync = is_sync_neg(get_state(dip), evtype); 160725e8c5aaSvikram /* NOP if unsupported transition */ 160825e8c5aaSvikram if (sync == -2 || sync == -1) { 160925e8c5aaSvikram DEVI(dip)->devi_flags |= DEVI_CT_NOP; 161025e8c5aaSvikram result = (sync == -2) ? CT_ACK : CT_NONE; 161125e8c5aaSvikram goto out; 161225e8c5aaSvikram } 161325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 161425e8c5aaSvikram sync ? "" : " not")); 161525e8c5aaSvikram } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 161625e8c5aaSvikram DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 161725e8c5aaSvikram result = CT_ACK; 161825e8c5aaSvikram goto out; 161925e8c5aaSvikram } 162025e8c5aaSvikram 162125e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 162225e8c5aaSvikram (void) ddi_pathname(dip, path); 162325e8c5aaSvikram 162425e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 162525e8c5aaSvikram 162625e8c5aaSvikram /* 162725e8c5aaSvikram * Negotiation end - set the state of the device in the contract 162825e8c5aaSvikram */ 162925e8c5aaSvikram if (evtype == CT_EV_NEGEND) { 163025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 163125e8c5aaSvikram set_cond_state(dip); 163225e8c5aaSvikram } 163325e8c5aaSvikram 163425e8c5aaSvikram /* 163525e8c5aaSvikram * If this device didn't go through negotiation, don't publish 163625e8c5aaSvikram * a NEGEND event - simply release the barrier to allow other 163725e8c5aaSvikram * device events in. 163825e8c5aaSvikram */ 163925e8c5aaSvikram negend = 0; 164025e8c5aaSvikram if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 164125e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 164225e8c5aaSvikram ct_barrier_release(dip); 164325e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 164425e8c5aaSvikram result = CT_ACK; 164525e8c5aaSvikram goto out; 164625e8c5aaSvikram } else if (evtype == CT_EV_NEGEND) { 164725e8c5aaSvikram /* 164825e8c5aaSvikram * There are negotiated contract breakages that 164925e8c5aaSvikram * need a NEGEND event 165025e8c5aaSvikram */ 165125e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 165225e8c5aaSvikram negend = 1; 165325e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 165425e8c5aaSvikram } else { 165525e8c5aaSvikram /* 165625e8c5aaSvikram * This is a new event, not a NEGEND event. Wait for previous 165725e8c5aaSvikram * contract events to complete. 165825e8c5aaSvikram */ 165925e8c5aaSvikram ct_barrier_acquire(dip); 166025e8c5aaSvikram } 166125e8c5aaSvikram 166225e8c5aaSvikram 166325e8c5aaSvikram match = 0; 166425e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 166525e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 166625e8c5aaSvikram 166725e8c5aaSvikram ctid_t ctid; 166825e8c5aaSvikram size_t len = strlen(path); 166925e8c5aaSvikram 167025e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 167125e8c5aaSvikram 167225e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 167325e8c5aaSvikram ASSERT(ctd->cond_minor); 167425e8c5aaSvikram ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 167525e8c5aaSvikram ctd->cond_minor[len] == ':'); 167625e8c5aaSvikram 167725e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 167825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 167925e8c5aaSvikram continue; 168025e8c5aaSvikram } 168125e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 168225e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 168325e8c5aaSvikram continue; 168425e8c5aaSvikram } 168525e8c5aaSvikram 168625e8c5aaSvikram /* We have a matching contract */ 168725e8c5aaSvikram match = 1; 168825e8c5aaSvikram ctid = ctd->cond_contract.ct_id; 168925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 169025e8c5aaSvikram ctid)); 169125e8c5aaSvikram 169225e8c5aaSvikram /* 169325e8c5aaSvikram * There are 4 possible cases 169425e8c5aaSvikram * 1. A contract is broken (dev not in acceptable state) and 169525e8c5aaSvikram * the state change is synchronous - start negotiation 169625e8c5aaSvikram * by sending a CTE_NEG critical event. 169725e8c5aaSvikram * 2. A contract is broken and the state change is 169825e8c5aaSvikram * asynchronous - just send a critical event and 169925e8c5aaSvikram * break the contract. 170025e8c5aaSvikram * 3. Contract is not broken, but consumer has subscribed 170125e8c5aaSvikram * to the event as a critical or informative event 170225e8c5aaSvikram * - just send the appropriate event 170325e8c5aaSvikram * 4. contract waiting for negend event - just send the critical 170425e8c5aaSvikram * NEGEND event. 170525e8c5aaSvikram */ 170625e8c5aaSvikram broken = 0; 170725e8c5aaSvikram if (!negend && !(evtype & ctd->cond_aset)) { 170825e8c5aaSvikram broken = 1; 170925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 171025e8c5aaSvikram ctid)); 171125e8c5aaSvikram } 171225e8c5aaSvikram 171325e8c5aaSvikram /* 171425e8c5aaSvikram * Don't send event if 171525e8c5aaSvikram * - contract is not broken AND 171625e8c5aaSvikram * - contract holder has not subscribed to this event AND 171725e8c5aaSvikram * - contract not waiting for a NEGEND event 171825e8c5aaSvikram */ 171925e8c5aaSvikram if (!broken && !EVSENDP(ctd, evtype) && 172025e8c5aaSvikram !ctd->cond_neg) { 172125e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_publish(): " 172225e8c5aaSvikram "contract (%d): no publish reqd: event %d", 172325e8c5aaSvikram ctd->cond_contract.ct_id, evtype)); 172425e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 172525e8c5aaSvikram continue; 172625e8c5aaSvikram } 172725e8c5aaSvikram 172825e8c5aaSvikram /* 172925e8c5aaSvikram * Note: need to kmem_zalloc() the event so mutexes are 173025e8c5aaSvikram * initialized automatically 173125e8c5aaSvikram */ 173225e8c5aaSvikram ct = &ctd->cond_contract; 173325e8c5aaSvikram event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 173425e8c5aaSvikram event->cte_type = evtype; 173525e8c5aaSvikram 173625e8c5aaSvikram if (broken && sync) { 173725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + sync: " 173825e8c5aaSvikram "ctid: %d", ctid)); 173925e8c5aaSvikram ASSERT(!negend); 174025e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 174125e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 174225e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 174325e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 174425e8c5aaSvikram if (ctd->cond_noneg) { 174525e8c5aaSvikram /* Nothing to publish. Event has been blocked */ 174625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 174725e8c5aaSvikram "not publishing blocked ev: ctid: %d", 174825e8c5aaSvikram ctid)); 174925e8c5aaSvikram result = CT_NACK; 175025e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 175125e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 175225e8c5aaSvikram continue; 175325e8c5aaSvikram } 175425e8c5aaSvikram event->cte_flags = CTE_NEG; /* critical neg. event */ 175525e8c5aaSvikram ctd->cond_currev_type = event->cte_type; 175625e8c5aaSvikram ct_barrier_incr(dip); 175725e8c5aaSvikram DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 175825e8c5aaSvikram ctd->cond_neg = 1; 175925e8c5aaSvikram } else if (broken && !sync) { 176025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 176125e8c5aaSvikram ctid)); 176225e8c5aaSvikram ASSERT(!negend); 176325e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 176425e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 176525e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 176625e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 176725e8c5aaSvikram event->cte_flags = 0; /* critical event */ 176825e8c5aaSvikram } else if (EVSENDP(ctd, event->cte_type)) { 176925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 177025e8c5aaSvikram ctid)); 177125e8c5aaSvikram ASSERT(!negend); 177225e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 177325e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 177425e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 177525e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 177625e8c5aaSvikram event->cte_flags = EVINFOP(ctd, event->cte_type) ? 177725e8c5aaSvikram CTE_INFO : 0; 177825e8c5aaSvikram } else if (ctd->cond_neg) { 177925e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 178025e8c5aaSvikram ASSERT(negend); 178125e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 178225e8c5aaSvikram nevid = ctd->cond_contract.ct_nevent ? 178325e8c5aaSvikram ctd->cond_contract.ct_nevent->cte_id : 0; 178425e8c5aaSvikram ASSERT(ctd->cond_currev_id == nevid); 178525e8c5aaSvikram event->cte_flags = 0; /* NEGEND is always critical */ 178625e8c5aaSvikram ctd->cond_currev_id = 0; 178725e8c5aaSvikram ctd->cond_currev_type = 0; 178825e8c5aaSvikram ctd->cond_currev_ack = 0; 178925e8c5aaSvikram ctd->cond_neg = 0; 179025e8c5aaSvikram } else { 179125e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: not publishing event for " 179225e8c5aaSvikram "ctid: %d, evtype: %d", 179325e8c5aaSvikram ctd->cond_contract.ct_id, event->cte_type)); 179425e8c5aaSvikram ASSERT(!negend); 179525e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 179625e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 179725e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 179825e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 179925e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 180025e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 180125e8c5aaSvikram continue; 180225e8c5aaSvikram } 180325e8c5aaSvikram 180425e8c5aaSvikram nvl = NULL; 180525e8c5aaSvikram if (tnvl) { 180625e8c5aaSvikram VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 180725e8c5aaSvikram if (negend) { 180825e8c5aaSvikram int32_t newct = 0; 180925e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 181025e8c5aaSvikram VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 181125e8c5aaSvikram == 0); 181225e8c5aaSvikram VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 181325e8c5aaSvikram &newct) == 0); 181425e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 181525e8c5aaSvikram newct == 1 ? 0 : 181625e8c5aaSvikram ctd->cond_contract.ct_id) == 0); 181725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 181825e8c5aaSvikram "CTS_NEVID: %llu, CTS_NEWCT: %s", 181925e8c5aaSvikram ctid, (unsigned long long)nevid, 182025e8c5aaSvikram newct ? "success" : "failure")); 182125e8c5aaSvikram 182225e8c5aaSvikram } 182325e8c5aaSvikram } 182425e8c5aaSvikram 182525e8c5aaSvikram if (ctd->cond_neg) { 182625e8c5aaSvikram ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 182725e8c5aaSvikram ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 182825e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 182925e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = 183025e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start; 183125e8c5aaSvikram } 183225e8c5aaSvikram 183325e8c5aaSvikram /* 183425e8c5aaSvikram * by holding the dip's devi_ct_lock we ensure that 183525e8c5aaSvikram * all ACK/NACKs are held up until we have finished 183625e8c5aaSvikram * publishing to all contracts. 183725e8c5aaSvikram */ 183825e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 183925e8c5aaSvikram evid = cte_publish_all(ct, event, nvl, NULL); 184025e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 184125e8c5aaSvikram 184225e8c5aaSvikram if (ctd->cond_neg) { 184325e8c5aaSvikram ASSERT(!negend); 184425e8c5aaSvikram ASSERT(broken); 184525e8c5aaSvikram ASSERT(sync); 184625e8c5aaSvikram ASSERT(!ctd->cond_noneg); 184725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 184825e8c5aaSvikram ": %d", ctid)); 184925e8c5aaSvikram ctd->cond_currev_id = evid; 185025e8c5aaSvikram } else if (negend) { 185125e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 185225e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 185325e8c5aaSvikram } 185425e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 185525e8c5aaSvikram } 185625e8c5aaSvikram 185725e8c5aaSvikram /* 185825e8c5aaSvikram * If "negend" set counter back to initial state (-1) so that 185925e8c5aaSvikram * other events can be published. Also clear the negotiation flag 186025e8c5aaSvikram * on dip. 186125e8c5aaSvikram * 186225e8c5aaSvikram * 0 .. n are used for counting. 186325e8c5aaSvikram * -1 indicates counter is available for use. 186425e8c5aaSvikram */ 186525e8c5aaSvikram if (negend) { 186625e8c5aaSvikram /* 186725e8c5aaSvikram * devi_ct_count not necessarily 0. We may have 186825e8c5aaSvikram * timed out in which case, count will be non-zero. 186925e8c5aaSvikram */ 187025e8c5aaSvikram ct_barrier_release(dip); 187125e8c5aaSvikram DEVI(dip)->devi_ct_neg = 0; 187225e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 187325e8c5aaSvikram (void *)dip)); 187425e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 187525e8c5aaSvikram ASSERT(match); 187625e8c5aaSvikram ASSERT(!ct_barrier_empty(dip)); 187725e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 187825e8c5aaSvikram DEVI(dip)->devi_ct_count, (void *)dip)); 187925e8c5aaSvikram } else { 188025e8c5aaSvikram /* 188125e8c5aaSvikram * for non-negotiated events or subscribed events or no 188225e8c5aaSvikram * matching contracts 188325e8c5aaSvikram */ 188425e8c5aaSvikram ASSERT(ct_barrier_empty(dip)); 188525e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_neg == 0); 188625e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 188725e8c5aaSvikram "dip=%p", (void *)dip)); 188825e8c5aaSvikram 188925e8c5aaSvikram /* 189025e8c5aaSvikram * only this function when called from contract_device_negend() 189125e8c5aaSvikram * can reset the counter to READY state i.e. -1. This function 189225e8c5aaSvikram * is so called for every event whether a NEGEND event is needed 189325e8c5aaSvikram * or not, but the negend event is only published if the event 189425e8c5aaSvikram * whose end they signal is a negotiated event for the contract. 189525e8c5aaSvikram */ 189625e8c5aaSvikram } 189725e8c5aaSvikram 189825e8c5aaSvikram if (!match) { 189925e8c5aaSvikram /* No matching contracts */ 190025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: No matching contract")); 190125e8c5aaSvikram result = CT_NONE; 190225e8c5aaSvikram } else if (result == CT_NACK) { 190325e8c5aaSvikram /* a non-negotiable contract exists and this is a neg. event */ 190425e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 190525e8c5aaSvikram (void) wait_for_acks(dip, dev, spec_type, evtype); 190625e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 190725e8c5aaSvikram /* one or more contracts going through negotations */ 190825e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 190925e8c5aaSvikram result = wait_for_acks(dip, dev, spec_type, evtype); 191025e8c5aaSvikram } else { 191125e8c5aaSvikram /* no negotiated contracts or no broken contracts or NEGEND */ 191225e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 191325e8c5aaSvikram result = CT_ACK; 191425e8c5aaSvikram } 191525e8c5aaSvikram 191625e8c5aaSvikram /* 191725e8c5aaSvikram * Release the lock only now so that the only point where we 191825e8c5aaSvikram * drop the lock is in wait_for_acks(). This is so that we don't 191925e8c5aaSvikram * miss cv_signal/cv_broadcast from contract holders 192025e8c5aaSvikram */ 192125e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 192225e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 192325e8c5aaSvikram 192425e8c5aaSvikram out: 192525e8c5aaSvikram nvlist_free(tnvl); 192625e8c5aaSvikram if (path) 192725e8c5aaSvikram kmem_free(path, MAXPATHLEN); 192825e8c5aaSvikram 192925e8c5aaSvikram 193025e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 193125e8c5aaSvikram return (result); 193225e8c5aaSvikram } 193325e8c5aaSvikram 193425e8c5aaSvikram 193525e8c5aaSvikram /* 193625e8c5aaSvikram * contract_device_offline 193725e8c5aaSvikram * 193825e8c5aaSvikram * Event publishing routine called by I/O framework when a device is offlined. 193925e8c5aaSvikram */ 194025e8c5aaSvikram ct_ack_t 194125e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 194225e8c5aaSvikram { 194325e8c5aaSvikram nvlist_t *nvl; 194425e8c5aaSvikram uint_t result; 194525e8c5aaSvikram uint_t evtype; 194625e8c5aaSvikram 194725e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 194825e8c5aaSvikram 194925e8c5aaSvikram evtype = CT_DEV_EV_OFFLINE; 195025e8c5aaSvikram result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 195125e8c5aaSvikram 195225e8c5aaSvikram /* 195325e8c5aaSvikram * If a contract offline is NACKED, the framework expects us to call 195425e8c5aaSvikram * NEGEND ourselves, since we know the final result 195525e8c5aaSvikram */ 195625e8c5aaSvikram if (result == CT_NACK) { 195725e8c5aaSvikram contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 195825e8c5aaSvikram } 195925e8c5aaSvikram 196025e8c5aaSvikram return (result); 196125e8c5aaSvikram } 196225e8c5aaSvikram 196325e8c5aaSvikram /* 196425e8c5aaSvikram * contract_device_degrade 196525e8c5aaSvikram * 196625e8c5aaSvikram * Event publishing routine called by I/O framework when a device 196725e8c5aaSvikram * moves to degrade state. 196825e8c5aaSvikram */ 196925e8c5aaSvikram /*ARGSUSED*/ 197025e8c5aaSvikram void 197125e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 197225e8c5aaSvikram { 197325e8c5aaSvikram nvlist_t *nvl; 197425e8c5aaSvikram uint_t evtype; 197525e8c5aaSvikram 197625e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 197725e8c5aaSvikram 197825e8c5aaSvikram evtype = CT_DEV_EV_DEGRADED; 197925e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 198025e8c5aaSvikram } 198125e8c5aaSvikram 198225e8c5aaSvikram /* 198325e8c5aaSvikram * contract_device_undegrade 198425e8c5aaSvikram * 198525e8c5aaSvikram * Event publishing routine called by I/O framework when a device 198625e8c5aaSvikram * moves from degraded state to online state. 198725e8c5aaSvikram */ 198825e8c5aaSvikram /*ARGSUSED*/ 198925e8c5aaSvikram void 199025e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 199125e8c5aaSvikram { 199225e8c5aaSvikram nvlist_t *nvl; 199325e8c5aaSvikram uint_t evtype; 199425e8c5aaSvikram 199525e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 199625e8c5aaSvikram 199725e8c5aaSvikram evtype = CT_DEV_EV_ONLINE; 199825e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 199925e8c5aaSvikram } 200025e8c5aaSvikram 200125e8c5aaSvikram /* 200225e8c5aaSvikram * For all contracts which have undergone a negotiation (because the device 200325e8c5aaSvikram * moved out of the acceptable state for that contract and the state 200425e8c5aaSvikram * change is synchronous i.e. requires negotiation) this routine publishes 200525e8c5aaSvikram * a CT_EV_NEGEND event with the final disposition of the event. 200625e8c5aaSvikram * 200725e8c5aaSvikram * This event is always a critical event. 200825e8c5aaSvikram */ 200925e8c5aaSvikram void 201025e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 201125e8c5aaSvikram { 201225e8c5aaSvikram nvlist_t *nvl; 201325e8c5aaSvikram uint_t evtype; 201425e8c5aaSvikram 201525e8c5aaSvikram ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 201625e8c5aaSvikram 201725e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 201825e8c5aaSvikram "dip: %p", result, (void *)dip)); 201925e8c5aaSvikram 202025e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 202125e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 202225e8c5aaSvikram result == CT_EV_SUCCESS ? 1 : 0) == 0); 202325e8c5aaSvikram 202425e8c5aaSvikram evtype = CT_EV_NEGEND; 202525e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 202625e8c5aaSvikram 202725e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 202825e8c5aaSvikram (void *)dip)); 202925e8c5aaSvikram } 203025e8c5aaSvikram 203125e8c5aaSvikram /* 203225e8c5aaSvikram * Wrapper routine called by other subsystems (such as LDI) to start 203325e8c5aaSvikram * negotiations when a synchronous device state change occurs. 203425e8c5aaSvikram * Returns CT_ACK or CT_NACK. 203525e8c5aaSvikram */ 203625e8c5aaSvikram ct_ack_t 203725e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 203825e8c5aaSvikram uint_t evtype) 203925e8c5aaSvikram { 204025e8c5aaSvikram int result; 204125e8c5aaSvikram 204225e8c5aaSvikram ASSERT(dip); 204325e8c5aaSvikram ASSERT(dev != NODEV); 204425e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 204525e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 204625e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 204725e8c5aaSvikram 204825e8c5aaSvikram switch (evtype) { 204925e8c5aaSvikram case CT_DEV_EV_OFFLINE: 205025e8c5aaSvikram result = contract_device_offline(dip, dev, spec_type); 205125e8c5aaSvikram break; 205225e8c5aaSvikram default: 205325e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 205425e8c5aaSvikram "not supported: event (%d) for dev_t (%lu) and spec (%d), " 205525e8c5aaSvikram "dip (%p)", evtype, dev, spec_type, (void *)dip); 205625e8c5aaSvikram result = CT_NACK; 205725e8c5aaSvikram break; 205825e8c5aaSvikram } 205925e8c5aaSvikram 206025e8c5aaSvikram return (result); 206125e8c5aaSvikram } 206225e8c5aaSvikram 206325e8c5aaSvikram /* 206425e8c5aaSvikram * A wrapper routine called by other subsystems (such as the LDI) to 206525e8c5aaSvikram * finalize event processing for a state change event. For synchronous 206625e8c5aaSvikram * state changes, this publishes NEGEND events. For asynchronous i.e. 206725e8c5aaSvikram * non-negotiable events this publishes the event. 206825e8c5aaSvikram */ 206925e8c5aaSvikram void 207025e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 207125e8c5aaSvikram uint_t evtype, int ct_result) 207225e8c5aaSvikram { 207325e8c5aaSvikram ASSERT(dip); 207425e8c5aaSvikram ASSERT(dev != NODEV); 207525e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 207625e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 207725e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 207825e8c5aaSvikram 207925e8c5aaSvikram switch (evtype) { 208025e8c5aaSvikram case CT_DEV_EV_OFFLINE: 208125e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 208225e8c5aaSvikram break; 208325e8c5aaSvikram case CT_DEV_EV_DEGRADED: 208425e8c5aaSvikram contract_device_degrade(dip, dev, spec_type); 208525e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 208625e8c5aaSvikram break; 208725e8c5aaSvikram case CT_DEV_EV_ONLINE: 208825e8c5aaSvikram contract_device_undegrade(dip, dev, spec_type); 208925e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 209025e8c5aaSvikram break; 209125e8c5aaSvikram default: 209225e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 209325e8c5aaSvikram "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 209425e8c5aaSvikram evtype, dev, spec_type, (void *)dip); 209525e8c5aaSvikram break; 209625e8c5aaSvikram } 209725e8c5aaSvikram } 209825e8c5aaSvikram 209925e8c5aaSvikram /* 210025e8c5aaSvikram * Called by I/O framework when a devinfo node is freed to remove the 210125e8c5aaSvikram * association between a devinfo node and its contracts. 210225e8c5aaSvikram */ 210325e8c5aaSvikram void 210425e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip) 210525e8c5aaSvikram { 210625e8c5aaSvikram cont_device_t *ctd; 210725e8c5aaSvikram cont_device_t *next; 210825e8c5aaSvikram contract_t *ct; 210925e8c5aaSvikram 211025e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 211125e8c5aaSvikram ct_barrier_wait_for_release(dip); 211225e8c5aaSvikram 211325e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 211425e8c5aaSvikram next = list_next(&(DEVI(dip)->devi_ct), ctd); 211525e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 211625e8c5aaSvikram ct = &ctd->cond_contract; 211725e8c5aaSvikram /* 211825e8c5aaSvikram * Unlink the dip associated with this contract 211925e8c5aaSvikram */ 212025e8c5aaSvikram mutex_enter(&ct->ct_lock); 212125e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 212225e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 212325e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 212425e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 212525e8c5aaSvikram "ctid: %d", ct->ct_id)); 212625e8c5aaSvikram mutex_exit(&ct->ct_lock); 212725e8c5aaSvikram } 212825e8c5aaSvikram ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 212925e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 213025e8c5aaSvikram } 213125e8c5aaSvikram 213225e8c5aaSvikram /* 213325e8c5aaSvikram * Barrier related routines 213425e8c5aaSvikram */ 213525e8c5aaSvikram static void 213625e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip) 213725e8c5aaSvikram { 213825e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 213925e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 214025e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 214125e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 214225e8c5aaSvikram DEVI(dip)->devi_ct_count = 0; 214325e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 214425e8c5aaSvikram } 214525e8c5aaSvikram 214625e8c5aaSvikram static void 214725e8c5aaSvikram ct_barrier_release(dev_info_t *dip) 214825e8c5aaSvikram { 214925e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 215025e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 215125e8c5aaSvikram DEVI(dip)->devi_ct_count = -1; 215225e8c5aaSvikram cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 215325e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 215425e8c5aaSvikram } 215525e8c5aaSvikram 215625e8c5aaSvikram static int 215725e8c5aaSvikram ct_barrier_held(dev_info_t *dip) 215825e8c5aaSvikram { 215925e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 216025e8c5aaSvikram return (DEVI(dip)->devi_ct_count != -1); 216125e8c5aaSvikram } 216225e8c5aaSvikram 216325e8c5aaSvikram static int 216425e8c5aaSvikram ct_barrier_empty(dev_info_t *dip) 216525e8c5aaSvikram { 216625e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 216725e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 216825e8c5aaSvikram return (DEVI(dip)->devi_ct_count == 0); 216925e8c5aaSvikram } 217025e8c5aaSvikram 217125e8c5aaSvikram static void 217225e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip) 217325e8c5aaSvikram { 217425e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 217525e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 217625e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 217725e8c5aaSvikram } 217825e8c5aaSvikram 217925e8c5aaSvikram static void 218025e8c5aaSvikram ct_barrier_decr(dev_info_t *dip) 218125e8c5aaSvikram { 218225e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 218325e8c5aaSvikram DEVI(dip)->devi_ct_count)); 218425e8c5aaSvikram 218525e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 218625e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count > 0); 218725e8c5aaSvikram 218825e8c5aaSvikram DEVI(dip)->devi_ct_count--; 218925e8c5aaSvikram if (DEVI(dip)->devi_ct_count == 0) { 219025e8c5aaSvikram cv_broadcast(&DEVI(dip)->devi_ct_cv); 219125e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 219225e8c5aaSvikram } 219325e8c5aaSvikram } 219425e8c5aaSvikram 219525e8c5aaSvikram static void 219625e8c5aaSvikram ct_barrier_incr(dev_info_t *dip) 219725e8c5aaSvikram { 219825e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 219925e8c5aaSvikram DEVI(dip)->devi_ct_count++; 220025e8c5aaSvikram } 220125e8c5aaSvikram 220225e8c5aaSvikram static int 220325e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 220425e8c5aaSvikram { 220525e8c5aaSvikram clock_t abstime; 220625e8c5aaSvikram 220725e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 220825e8c5aaSvikram 220925e8c5aaSvikram abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 221025e8c5aaSvikram while (DEVI(dip)->devi_ct_count) { 221125e8c5aaSvikram if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 221225e8c5aaSvikram &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 221325e8c5aaSvikram return (-1); 221425e8c5aaSvikram } 221525e8c5aaSvikram } 221625e8c5aaSvikram return (0); 221725e8c5aaSvikram } 2218