1*25e8c5aaSvikram /* 2*25e8c5aaSvikram * CDDL HEADER START 3*25e8c5aaSvikram * 4*25e8c5aaSvikram * The contents of this file are subject to the terms of the 5*25e8c5aaSvikram * Common Development and Distribution License (the "License"). 6*25e8c5aaSvikram * You may not use this file except in compliance with the License. 7*25e8c5aaSvikram * 8*25e8c5aaSvikram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25e8c5aaSvikram * or http://www.opensolaris.org/os/licensing. 10*25e8c5aaSvikram * See the License for the specific language governing permissions 11*25e8c5aaSvikram * and limitations under the License. 12*25e8c5aaSvikram * 13*25e8c5aaSvikram * When distributing Covered Code, include this CDDL HEADER in each 14*25e8c5aaSvikram * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25e8c5aaSvikram * If applicable, add the following below this CDDL HEADER, with the 16*25e8c5aaSvikram * fields enclosed by brackets "[]" replaced with your own identifying 17*25e8c5aaSvikram * information: Portions Copyright [yyyy] [name of copyright owner] 18*25e8c5aaSvikram * 19*25e8c5aaSvikram * CDDL HEADER END 20*25e8c5aaSvikram */ 21*25e8c5aaSvikram /* 22*25e8c5aaSvikram * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23*25e8c5aaSvikram * Use is subject to license terms. 24*25e8c5aaSvikram */ 25*25e8c5aaSvikram 26*25e8c5aaSvikram #pragma ident "%Z%%M% %I% %E% SMI" 27*25e8c5aaSvikram 28*25e8c5aaSvikram #include <sys/mutex.h> 29*25e8c5aaSvikram #include <sys/debug.h> 30*25e8c5aaSvikram #include <sys/types.h> 31*25e8c5aaSvikram #include <sys/param.h> 32*25e8c5aaSvikram #include <sys/kmem.h> 33*25e8c5aaSvikram #include <sys/thread.h> 34*25e8c5aaSvikram #include <sys/id_space.h> 35*25e8c5aaSvikram #include <sys/avl.h> 36*25e8c5aaSvikram #include <sys/list.h> 37*25e8c5aaSvikram #include <sys/sysmacros.h> 38*25e8c5aaSvikram #include <sys/proc.h> 39*25e8c5aaSvikram #include <sys/contract.h> 40*25e8c5aaSvikram #include <sys/contract_impl.h> 41*25e8c5aaSvikram #include <sys/contract/device.h> 42*25e8c5aaSvikram #include <sys/contract/device_impl.h> 43*25e8c5aaSvikram #include <sys/cmn_err.h> 44*25e8c5aaSvikram #include <sys/nvpair.h> 45*25e8c5aaSvikram #include <sys/policy.h> 46*25e8c5aaSvikram #include <sys/ddi_impldefs.h> 47*25e8c5aaSvikram #include <sys/ddi_implfuncs.h> 48*25e8c5aaSvikram #include <sys/systm.h> 49*25e8c5aaSvikram #include <sys/stat.h> 50*25e8c5aaSvikram #include <sys/sunddi.h> 51*25e8c5aaSvikram #include <sys/esunddi.h> 52*25e8c5aaSvikram #include <sys/ddi.h> 53*25e8c5aaSvikram #include <sys/fs/dv_node.h> 54*25e8c5aaSvikram #include <sys/sunndi.h> 55*25e8c5aaSvikram #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 56*25e8c5aaSvikram 57*25e8c5aaSvikram /* 58*25e8c5aaSvikram * Device Contracts 59*25e8c5aaSvikram * ----------------- 60*25e8c5aaSvikram * This file contains the core code for the device contracts framework. 61*25e8c5aaSvikram * A device contract is an agreement or a contract between a process and 62*25e8c5aaSvikram * the kernel regarding the state of the device. A device contract may be 63*25e8c5aaSvikram * created when a relationship is formed between a device and a process 64*25e8c5aaSvikram * i.e. at open(2) time, or it may be created at some point after the device 65*25e8c5aaSvikram * has been opened. A device contract once formed may be broken by either party. 66*25e8c5aaSvikram * A device contract can be broken by the process by an explicit abandon of the 67*25e8c5aaSvikram * contract or by an implicit abandon when the process exits. A device contract 68*25e8c5aaSvikram * can be broken by the kernel either asynchronously (without negotiation) or 69*25e8c5aaSvikram * synchronously (with negotiation). Exactly which happens depends on the device 70*25e8c5aaSvikram * state transition. The following state diagram shows the transitions between 71*25e8c5aaSvikram * device states. Only device state transitions currently supported by device 72*25e8c5aaSvikram * contracts is shown. 73*25e8c5aaSvikram * 74*25e8c5aaSvikram * <-- A --> 75*25e8c5aaSvikram * /-----------------> DEGRADED 76*25e8c5aaSvikram * | | 77*25e8c5aaSvikram * | | 78*25e8c5aaSvikram * | | S 79*25e8c5aaSvikram * | | | 80*25e8c5aaSvikram * | | v 81*25e8c5aaSvikram * v S --> v 82*25e8c5aaSvikram * ONLINE ------------> OFFLINE 83*25e8c5aaSvikram * 84*25e8c5aaSvikram * 85*25e8c5aaSvikram * In the figure above, the arrows indicate the direction of transition. The 86*25e8c5aaSvikram * letter S refers to transitions which are inherently synchronous i.e. 87*25e8c5aaSvikram * require negotiation and the letter A indicates transitions which are 88*25e8c5aaSvikram * asynchronous i.e. are done without contract negotiations. A good example 89*25e8c5aaSvikram * of a synchronous transition is the ONLINE -> OFFLINE transition. This 90*25e8c5aaSvikram * transition cannot happen as long as there are consumers which have the 91*25e8c5aaSvikram * device open. Thus some form of negotiation needs to happen between the 92*25e8c5aaSvikram * consumers and the kernel to ensure that consumers either close devices 93*25e8c5aaSvikram * or disallow the move to OFFLINE. Certain other transitions such as 94*25e8c5aaSvikram * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 95*25e8c5aaSvikram * non-negotiable. A device that suffers a fault that degrades its 96*25e8c5aaSvikram * capabilities will become degraded irrespective of what consumers it has, 97*25e8c5aaSvikram * so a negotiation in this case is pointless. 98*25e8c5aaSvikram * 99*25e8c5aaSvikram * The following device states are currently defined for device contracts: 100*25e8c5aaSvikram * 101*25e8c5aaSvikram * CT_DEV_EV_ONLINE 102*25e8c5aaSvikram * The device is online and functioning normally 103*25e8c5aaSvikram * CT_DEV_EV_DEGRADED 104*25e8c5aaSvikram * The device is online but is functioning in a degraded capacity 105*25e8c5aaSvikram * CT_DEV_EV_OFFLINE 106*25e8c5aaSvikram * The device is offline and is no longer configured 107*25e8c5aaSvikram * 108*25e8c5aaSvikram * A typical consumer of device contracts starts out with a contract 109*25e8c5aaSvikram * template and adds terms to that template. These include the 110*25e8c5aaSvikram * "acceptable set" (A-set) term, which is a bitset of device states which 111*25e8c5aaSvikram * are guaranteed by the contract. If the device moves out of a state in 112*25e8c5aaSvikram * the A-set, the contract is broken. The breaking of the contract can 113*25e8c5aaSvikram * be asynchronous in which case a critical contract event is sent to the 114*25e8c5aaSvikram * contract holder but no negotiations take place. If the breaking of the 115*25e8c5aaSvikram * contract is synchronous, negotations are opened between the affected 116*25e8c5aaSvikram * consumer and the kernel. The kernel does this by sending a critical 117*25e8c5aaSvikram * event to the consumer with the CTE_NEG flag set indicating that this 118*25e8c5aaSvikram * is a negotiation event. The consumer can accept this change by sending 119*25e8c5aaSvikram * a ACK message to the kernel. Alternatively, if it has the necessary 120*25e8c5aaSvikram * privileges, it can send a NACK message to the kernel which will block 121*25e8c5aaSvikram * the device state change. To NACK a negotiable event, a process must 122*25e8c5aaSvikram * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 123*25e8c5aaSvikram * 124*25e8c5aaSvikram * Other terms include the "minor path" term, specified explicitly if the 125*25e8c5aaSvikram * contract is not being created at open(2) time or specified implicitly 126*25e8c5aaSvikram * if the contract is being created at open time via an activated template. 127*25e8c5aaSvikram * 128*25e8c5aaSvikram * A contract event is sent on any state change to which the contract 129*25e8c5aaSvikram * owner has subscribed via the informative or critical event sets. Only 130*25e8c5aaSvikram * critical events are guaranteed to be delivered. Since all device state 131*25e8c5aaSvikram * changes are controlled by the kernel and cannot be arbitrarily generated 132*25e8c5aaSvikram * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 133*25e8c5aaSvikram * need to be asserted in a process's effective set to designate an event as 134*25e8c5aaSvikram * critical. To ensure privacy, a process must either have the same effective 135*25e8c5aaSvikram * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 136*25e8c5aaSvikram * asserted in its effective set in order to observe device contract events 137*25e8c5aaSvikram * off the device contract type specific endpoint. 138*25e8c5aaSvikram * 139*25e8c5aaSvikram * Yet another term available with device contracts is the "non-negotiable" 140*25e8c5aaSvikram * term. This term is used to pre-specify a NACK to any contract negotiation. 141*25e8c5aaSvikram * This term is ignored for asynchronous state changes. For example, a 142*25e8c5aaSvikram * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 143*25e8c5aaSvikram * non-negotiable. In this case, the device contract framework assumes a 144*25e8c5aaSvikram * NACK for any transition to OFFLINE and blocks the offline. If the A-set 145*25e8c5aaSvikram * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 146*25e8c5aaSvikram * are NACKed but transitions to DEGRADE succeed. 147*25e8c5aaSvikram * 148*25e8c5aaSvikram * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 149*25e8c5aaSvikram * happens just before the I/O framework attempts to offline a device 150*25e8c5aaSvikram * (i.e. detach a device and set the offline flag so that it cannot be 151*25e8c5aaSvikram * reattached). A device contract holder is expected to either NACK the offline 152*25e8c5aaSvikram * (if privileged) or release the device and allow the offline to proceed. 153*25e8c5aaSvikram * 154*25e8c5aaSvikram * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 155*25e8c5aaSvikram * is generated just before the I/O framework transitions the device state 156*25e8c5aaSvikram * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 157*25e8c5aaSvikram * 158*25e8c5aaSvikram * The contract holder is expected to ACK or NACK a negotiation event 159*25e8c5aaSvikram * within a certain period of time. If the ACK/NACK is not received 160*25e8c5aaSvikram * within the timeout period, the device contract framework will behave 161*25e8c5aaSvikram * as if the contract does not exist and will proceed with the event. 162*25e8c5aaSvikram * 163*25e8c5aaSvikram * Unlike a process contract a device contract does not need to exist 164*25e8c5aaSvikram * once it is abandoned, since it does not define a fault boundary. It 165*25e8c5aaSvikram * merely represents an agreement between a process and the kernel 166*25e8c5aaSvikram * regarding the state of the device. Once the process has abandoned 167*25e8c5aaSvikram * the contract (either implicitly via a process exit or explicitly) 168*25e8c5aaSvikram * the kernel has no reason to retain the contract. As a result 169*25e8c5aaSvikram * device contracts are neither inheritable nor need to exist in an 170*25e8c5aaSvikram * orphan state. 171*25e8c5aaSvikram * 172*25e8c5aaSvikram * A device unlike a process may exist in multiple contracts and has 173*25e8c5aaSvikram * a "life" outside a device contract. A device unlike a process 174*25e8c5aaSvikram * may exist without an associated contract. Unlike a process contract 175*25e8c5aaSvikram * a device contract may be formed after a binding relationship is 176*25e8c5aaSvikram * formed between a process and a device. 177*25e8c5aaSvikram * 178*25e8c5aaSvikram * IMPLEMENTATION NOTES 179*25e8c5aaSvikram * ==================== 180*25e8c5aaSvikram * DATA STRUCTURES 181*25e8c5aaSvikram * ---------------- 182*25e8c5aaSvikram * The heart of the device contracts implementation is the device contract 183*25e8c5aaSvikram * private cont_device_t (or ctd for short) data structure. It encapsulates 184*25e8c5aaSvikram * the generic contract_t data structure and has a number of private 185*25e8c5aaSvikram * fields. 186*25e8c5aaSvikram * These include: 187*25e8c5aaSvikram * cond_minor: The minor device that is the subject of the contract 188*25e8c5aaSvikram * cond_aset: The bitset of states which are guaranteed by the 189*25e8c5aaSvikram * contract 190*25e8c5aaSvikram * cond_noneg: If set, indicates that the result of negotiation has 191*25e8c5aaSvikram * been predefined to be a NACK 192*25e8c5aaSvikram * In addition, there are other device identifiers such the devinfo node, 193*25e8c5aaSvikram * dev_t and spec_type of the minor node. There are also a few fields that 194*25e8c5aaSvikram * are used during negotiation to maintain state. See 195*25e8c5aaSvikram * uts/common/sys/contract/device_impl.h 196*25e8c5aaSvikram * for details. 197*25e8c5aaSvikram * The ctd structure represents the device private part of a contract of 198*25e8c5aaSvikram * type "device" 199*25e8c5aaSvikram * 200*25e8c5aaSvikram * Another data structure used by device contracts is ctmpl_device. It is 201*25e8c5aaSvikram * the device contracts private part of the contract template structure. It 202*25e8c5aaSvikram * encapsulates the generic template structure "ct_template_t" and includes 203*25e8c5aaSvikram * the following device contract specific fields 204*25e8c5aaSvikram * ctd_aset: The bitset of states that should be guaranteed by a 205*25e8c5aaSvikram * contract 206*25e8c5aaSvikram * ctd_noneg: If set, indicates that contract should NACK a 207*25e8c5aaSvikram * negotiation 208*25e8c5aaSvikram * ctd_minor: The devfs_path (without the /devices prefix) of the 209*25e8c5aaSvikram * minor node that is the subject of the contract. 210*25e8c5aaSvikram * 211*25e8c5aaSvikram * ALGORITHMS 212*25e8c5aaSvikram * --------- 213*25e8c5aaSvikram * There are three sets of routines in this file 214*25e8c5aaSvikram * Template related routines 215*25e8c5aaSvikram * ------------------------- 216*25e8c5aaSvikram * These routines provide support for template related operations initated 217*25e8c5aaSvikram * via the generic template operations. These include routines that dup 218*25e8c5aaSvikram * a template, free it, and set various terms in the template 219*25e8c5aaSvikram * (such as the minor node path, the acceptable state set (or A-set) 220*25e8c5aaSvikram * and the non-negotiable term) as well as a routine to query the 221*25e8c5aaSvikram * device specific portion of the template for the abovementioned terms. 222*25e8c5aaSvikram * There is also a routine to create (ctmpl_device_create) that is used to 223*25e8c5aaSvikram * create a contract from a template. This routine calls (after initial 224*25e8c5aaSvikram * setup) the common function used to create a device contract 225*25e8c5aaSvikram * (contract_device_create). 226*25e8c5aaSvikram * 227*25e8c5aaSvikram * core device contract implementation 228*25e8c5aaSvikram * ---------------------------------- 229*25e8c5aaSvikram * These routines support the generic contract framework to provide 230*25e8c5aaSvikram * functionality that allows contracts to be created, managed and 231*25e8c5aaSvikram * destroyed. The contract_device_create() routine is a routine used 232*25e8c5aaSvikram * to create a contract from a template (either via an explicit create 233*25e8c5aaSvikram * operation on a template or implicitly via an open with an 234*25e8c5aaSvikram * activated template.). The contract_device_free() routine assists 235*25e8c5aaSvikram * in freeing the device contract specific parts. There are routines 236*25e8c5aaSvikram * used to abandon (contract_device_abandon) a device contract as well 237*25e8c5aaSvikram * as a routine to destroy (which despite its name does not destroy, 238*25e8c5aaSvikram * it only moves a contract to a dead state) a contract. 239*25e8c5aaSvikram * There is also a routine to return status information about a 240*25e8c5aaSvikram * contract - the level of detail depends on what is requested by the 241*25e8c5aaSvikram * user. A value of CTD_FIXED only returns fixed length fields such 242*25e8c5aaSvikram * as the A-set, state of device and value of the "noneg" term. If 243*25e8c5aaSvikram * CTD_ALL is specified, the minor node path is returned as well. 244*25e8c5aaSvikram * 245*25e8c5aaSvikram * In addition there are interfaces (contract_device_ack/nack) which 246*25e8c5aaSvikram * are used to support negotiation between userland processes and 247*25e8c5aaSvikram * device contracts. These interfaces record the acknowledgement 248*25e8c5aaSvikram * or lack thereof for negotiation events and help determine if the 249*25e8c5aaSvikram * negotiated event should occur. 250*25e8c5aaSvikram * 251*25e8c5aaSvikram * "backend routines" 252*25e8c5aaSvikram * ----------------- 253*25e8c5aaSvikram * The backend routines form the interface between the I/O framework 254*25e8c5aaSvikram * and the device contract subsystem. These routines, allow the I/O 255*25e8c5aaSvikram * framework to call into the device contract subsystem to notify it of 256*25e8c5aaSvikram * impending changes to a device state as well as to inform of the 257*25e8c5aaSvikram * final disposition of such attempted state changes. Routines in this 258*25e8c5aaSvikram * class include contract_device_offline() that indicates an attempt to 259*25e8c5aaSvikram * offline a device, contract_device_degrade() that indicates that 260*25e8c5aaSvikram * a device is moving to the degraded state and contract_device_negend() 261*25e8c5aaSvikram * that is used by the I/O framework to inform the contracts subsystem of 262*25e8c5aaSvikram * the final disposition of an attempted operation. 263*25e8c5aaSvikram * 264*25e8c5aaSvikram * SUMMARY 265*25e8c5aaSvikram * ------- 266*25e8c5aaSvikram * A contract starts its life as a template. A process allocates a device 267*25e8c5aaSvikram * contract template and sets various terms: 268*25e8c5aaSvikram * The A-set 269*25e8c5aaSvikram * The device minor node 270*25e8c5aaSvikram * Critical and informative events 271*25e8c5aaSvikram * The noneg i.e. no negotition term 272*25e8c5aaSvikram * Setting of these terms in the template is done via the 273*25e8c5aaSvikram * ctmpl_device_set() entry point in this file. A process can query a 274*25e8c5aaSvikram * template to determine the terms already set in the template - this is 275*25e8c5aaSvikram * facilitated by the ctmpl_device_get() routine. 276*25e8c5aaSvikram * 277*25e8c5aaSvikram * Once all the appropriate terms are set, the contract is instantiated via 278*25e8c5aaSvikram * one of two methods 279*25e8c5aaSvikram * - via an explicit create operation - this is facilitated by the 280*25e8c5aaSvikram * ctmpl_device_create() entry point 281*25e8c5aaSvikram * - synchronously with the open(2) system call - this is achieved via the 282*25e8c5aaSvikram * contract_device_open() routine. 283*25e8c5aaSvikram * The core work for both these above functions is done by 284*25e8c5aaSvikram * contract_device_create() 285*25e8c5aaSvikram * 286*25e8c5aaSvikram * A contract once created can be queried for its status. Support for 287*25e8c5aaSvikram * status info is provided by both the common contracts framework and by 288*25e8c5aaSvikram * the "device" contract type. If the level of detail requested is 289*25e8c5aaSvikram * CTD_COMMON, only the common contract framework data is used. Higher 290*25e8c5aaSvikram * levels of detail result in calls to contract_device_status() to supply 291*25e8c5aaSvikram * device contract type specific status information. 292*25e8c5aaSvikram * 293*25e8c5aaSvikram * A contract once created may be abandoned either explicitly or implictly. 294*25e8c5aaSvikram * In either case, the contract_device_abandon() function is invoked. This 295*25e8c5aaSvikram * function merely calls contract_destroy() which moves the contract to 296*25e8c5aaSvikram * the DEAD state. The device contract portion of destroy processing is 297*25e8c5aaSvikram * provided by contract_device_destroy() which merely disassociates the 298*25e8c5aaSvikram * contract from its device devinfo node. A contract in the DEAD state is 299*25e8c5aaSvikram * not freed. It hanbgs around until all references to the contract are 300*25e8c5aaSvikram * gone. When that happens, the contract is finally deallocated. The 301*25e8c5aaSvikram * device contract specific portion of the free is done by 302*25e8c5aaSvikram * contract_device_free() which finally frees the device contract specific 303*25e8c5aaSvikram * data structure (cont_device_t). 304*25e8c5aaSvikram * 305*25e8c5aaSvikram * When a device undergoes a state change, the I/O framework calls the 306*25e8c5aaSvikram * corresponding device contract entry point. For example, when a device 307*25e8c5aaSvikram * is about to go OFFLINE, the routine contract_device_offline() is 308*25e8c5aaSvikram * invoked. Similarly if a device moves to DEGRADED state, the routine 309*25e8c5aaSvikram * contract_device_degrade() function is called. These functions call the 310*25e8c5aaSvikram * core routine contract_device_publish(). This function determines via 311*25e8c5aaSvikram * the function is_sync_neg() whether an event is a synchronous (i.e. 312*25e8c5aaSvikram * negotiable) event or not. In the former case contract_device_publish() 313*25e8c5aaSvikram * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 314*25e8c5aaSvikram * and/or NACKs from contract holders. In the latter case, it simply 315*25e8c5aaSvikram * publishes the event and does not wait. In the negotiation case, ACKs or 316*25e8c5aaSvikram * NACKs from userland consumers results in contract_device_ack_nack() 317*25e8c5aaSvikram * being called where the result of the negotiation is recorded in the 318*25e8c5aaSvikram * contract data structure. Once all outstanding contract owners have 319*25e8c5aaSvikram * responded, the device contract code in wait_for_acks() determines the 320*25e8c5aaSvikram * final result of the negotiation. A single NACK overrides all other ACKs 321*25e8c5aaSvikram * If there is no NACK, then a single ACK will result in an overall ACK 322*25e8c5aaSvikram * result. If there are no ACKs or NACKs, then the result CT_NONE is 323*25e8c5aaSvikram * returned back to the I/O framework. Once the event is permitted or 324*25e8c5aaSvikram * blocked, the I/O framework proceeds or aborts the state change. The 325*25e8c5aaSvikram * I/O framework then calls contract_device_negend() with a result code 326*25e8c5aaSvikram * indicating final disposition of the event. This call releases the 327*25e8c5aaSvikram * barrier and other state associated with the previous negotiation, 328*25e8c5aaSvikram * which permits the next event (if any) to come into the device contract 329*25e8c5aaSvikram * framework. 330*25e8c5aaSvikram * 331*25e8c5aaSvikram * Finally, a device that has outstanding contracts may be removed from 332*25e8c5aaSvikram * the system which results in its devinfo node being freed. The devinfo 333*25e8c5aaSvikram * free routine in the I/O framework, calls into the device contract 334*25e8c5aaSvikram * function - contract_device_remove_dip(). This routine, disassociates 335*25e8c5aaSvikram * the dip from all contracts associated with the contract being freed, 336*25e8c5aaSvikram * allowing the devinfo node to be freed. 337*25e8c5aaSvikram * 338*25e8c5aaSvikram * LOCKING 339*25e8c5aaSvikram * --------- 340*25e8c5aaSvikram * There are four sets of data that need to be protected by locks 341*25e8c5aaSvikram * 342*25e8c5aaSvikram * i) device contract specific portion of the contract template - This data 343*25e8c5aaSvikram * is protected by the template lock ctmpl_lock. 344*25e8c5aaSvikram * 345*25e8c5aaSvikram * ii) device contract specific portion of the contract - This data is 346*25e8c5aaSvikram * protected by the contract lock ct_lock 347*25e8c5aaSvikram * 348*25e8c5aaSvikram * iii) The linked list of contracts hanging off a devinfo node - This 349*25e8c5aaSvikram * list is protected by the per-devinfo node lock devi_ct_lock 350*25e8c5aaSvikram * 351*25e8c5aaSvikram * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 352*25e8c5aaSvikram * and devi_ct_count that controls state changes to a dip 353*25e8c5aaSvikram * 354*25e8c5aaSvikram * The template lock is independent in that none of the other locks in this 355*25e8c5aaSvikram * file may be taken while holding the template lock (and vice versa). 356*25e8c5aaSvikram * 357*25e8c5aaSvikram * The remaining three locks have the following lock order 358*25e8c5aaSvikram * 359*25e8c5aaSvikram * devi_ct_lock -> ct_count barrier -> ct_lock 360*25e8c5aaSvikram * 361*25e8c5aaSvikram */ 362*25e8c5aaSvikram 363*25e8c5aaSvikram static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 364*25e8c5aaSvikram int spec_type, proc_t *owner, int *errorp); 365*25e8c5aaSvikram 366*25e8c5aaSvikram /* barrier routines */ 367*25e8c5aaSvikram static void ct_barrier_acquire(dev_info_t *dip); 368*25e8c5aaSvikram static void ct_barrier_release(dev_info_t *dip); 369*25e8c5aaSvikram static int ct_barrier_held(dev_info_t *dip); 370*25e8c5aaSvikram static int ct_barrier_empty(dev_info_t *dip); 371*25e8c5aaSvikram static void ct_barrier_wait_for_release(dev_info_t *dip); 372*25e8c5aaSvikram static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 373*25e8c5aaSvikram static void ct_barrier_decr(dev_info_t *dip); 374*25e8c5aaSvikram static void ct_barrier_incr(dev_info_t *dip); 375*25e8c5aaSvikram 376*25e8c5aaSvikram ct_type_t *device_type; 377*25e8c5aaSvikram 378*25e8c5aaSvikram /* 379*25e8c5aaSvikram * Macro predicates for determining when events should be sent and how. 380*25e8c5aaSvikram */ 381*25e8c5aaSvikram #define EVSENDP(ctd, flag) \ 382*25e8c5aaSvikram ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 383*25e8c5aaSvikram 384*25e8c5aaSvikram #define EVINFOP(ctd, flag) \ 385*25e8c5aaSvikram ((ctd->cond_contract.ct_ev_crit & flag) == 0) 386*25e8c5aaSvikram 387*25e8c5aaSvikram /* 388*25e8c5aaSvikram * State transition table showing which transitions are synchronous and which 389*25e8c5aaSvikram * are not. 390*25e8c5aaSvikram */ 391*25e8c5aaSvikram struct ct_dev_negtable { 392*25e8c5aaSvikram uint_t st_old; 393*25e8c5aaSvikram uint_t st_new; 394*25e8c5aaSvikram uint_t st_neg; 395*25e8c5aaSvikram } ct_dev_negtable[] = { 396*25e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 397*25e8c5aaSvikram {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 398*25e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 399*25e8c5aaSvikram {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 400*25e8c5aaSvikram {0} 401*25e8c5aaSvikram }; 402*25e8c5aaSvikram 403*25e8c5aaSvikram /* 404*25e8c5aaSvikram * Device contract template implementation 405*25e8c5aaSvikram */ 406*25e8c5aaSvikram 407*25e8c5aaSvikram /* 408*25e8c5aaSvikram * ctmpl_device_dup 409*25e8c5aaSvikram * 410*25e8c5aaSvikram * The device contract template dup entry point. 411*25e8c5aaSvikram * This simply copies all the fields (generic as well as device contract 412*25e8c5aaSvikram * specific) fields of the original. 413*25e8c5aaSvikram */ 414*25e8c5aaSvikram static struct ct_template * 415*25e8c5aaSvikram ctmpl_device_dup(struct ct_template *template) 416*25e8c5aaSvikram { 417*25e8c5aaSvikram ctmpl_device_t *new; 418*25e8c5aaSvikram ctmpl_device_t *old = template->ctmpl_data; 419*25e8c5aaSvikram char *buf; 420*25e8c5aaSvikram char *minor; 421*25e8c5aaSvikram 422*25e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 423*25e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 424*25e8c5aaSvikram 425*25e8c5aaSvikram /* 426*25e8c5aaSvikram * copy generic fields. 427*25e8c5aaSvikram * ctmpl_copy returns with old template lock held 428*25e8c5aaSvikram */ 429*25e8c5aaSvikram ctmpl_copy(&new->ctd_ctmpl, template); 430*25e8c5aaSvikram 431*25e8c5aaSvikram new->ctd_ctmpl.ctmpl_data = new; 432*25e8c5aaSvikram new->ctd_aset = old->ctd_aset; 433*25e8c5aaSvikram new->ctd_minor = NULL; 434*25e8c5aaSvikram new->ctd_noneg = old->ctd_noneg; 435*25e8c5aaSvikram 436*25e8c5aaSvikram if (old->ctd_minor) { 437*25e8c5aaSvikram ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 438*25e8c5aaSvikram bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 439*25e8c5aaSvikram } else { 440*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 441*25e8c5aaSvikram buf = NULL; 442*25e8c5aaSvikram } 443*25e8c5aaSvikram 444*25e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 445*25e8c5aaSvikram if (buf) { 446*25e8c5aaSvikram minor = i_ddi_strdup(buf, KM_SLEEP); 447*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 448*25e8c5aaSvikram buf = NULL; 449*25e8c5aaSvikram } else { 450*25e8c5aaSvikram minor = NULL; 451*25e8c5aaSvikram } 452*25e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 453*25e8c5aaSvikram 454*25e8c5aaSvikram if (minor) { 455*25e8c5aaSvikram new->ctd_minor = minor; 456*25e8c5aaSvikram } 457*25e8c5aaSvikram 458*25e8c5aaSvikram ASSERT(buf == NULL); 459*25e8c5aaSvikram return (&new->ctd_ctmpl); 460*25e8c5aaSvikram } 461*25e8c5aaSvikram 462*25e8c5aaSvikram /* 463*25e8c5aaSvikram * ctmpl_device_free 464*25e8c5aaSvikram * 465*25e8c5aaSvikram * The device contract template free entry point. Just 466*25e8c5aaSvikram * frees the template. 467*25e8c5aaSvikram */ 468*25e8c5aaSvikram static void 469*25e8c5aaSvikram ctmpl_device_free(struct ct_template *template) 470*25e8c5aaSvikram { 471*25e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 472*25e8c5aaSvikram 473*25e8c5aaSvikram if (dtmpl->ctd_minor) 474*25e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 475*25e8c5aaSvikram 476*25e8c5aaSvikram kmem_free(dtmpl, sizeof (ctmpl_device_t)); 477*25e8c5aaSvikram } 478*25e8c5aaSvikram 479*25e8c5aaSvikram /* 480*25e8c5aaSvikram * SAFE_EV is the set of events which a non-privileged process is 481*25e8c5aaSvikram * allowed to make critical. An unprivileged device contract owner has 482*25e8c5aaSvikram * no control over when a device changes state, so all device events 483*25e8c5aaSvikram * can be in the critical set. 484*25e8c5aaSvikram * 485*25e8c5aaSvikram * EXCESS tells us if "value", a critical event set, requires 486*25e8c5aaSvikram * additional privilege. For device contracts EXCESS currently 487*25e8c5aaSvikram * evaluates to 0. 488*25e8c5aaSvikram */ 489*25e8c5aaSvikram #define SAFE_EV (CT_DEV_ALLEVENT) 490*25e8c5aaSvikram #define EXCESS(value) ((value) & ~SAFE_EV) 491*25e8c5aaSvikram 492*25e8c5aaSvikram 493*25e8c5aaSvikram /* 494*25e8c5aaSvikram * ctmpl_device_set 495*25e8c5aaSvikram * 496*25e8c5aaSvikram * The device contract template set entry point. Sets various terms in the 497*25e8c5aaSvikram * template. The non-negotiable term can only be set if the process has 498*25e8c5aaSvikram * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 499*25e8c5aaSvikram */ 500*25e8c5aaSvikram static int 501*25e8c5aaSvikram ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) 502*25e8c5aaSvikram { 503*25e8c5aaSvikram ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 504*25e8c5aaSvikram char *buf; 505*25e8c5aaSvikram int error; 506*25e8c5aaSvikram dev_info_t *dip; 507*25e8c5aaSvikram int spec_type; 508*25e8c5aaSvikram 509*25e8c5aaSvikram ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 510*25e8c5aaSvikram 511*25e8c5aaSvikram switch (param->ctpm_id) { 512*25e8c5aaSvikram case CTDP_ACCEPT: 513*25e8c5aaSvikram if (param->ctpm_value & ~CT_DEV_ALLEVENT) 514*25e8c5aaSvikram return (EINVAL); 515*25e8c5aaSvikram if (param->ctpm_value == 0) 516*25e8c5aaSvikram return (EINVAL); 517*25e8c5aaSvikram if (param->ctpm_value == CT_DEV_ALLEVENT) 518*25e8c5aaSvikram return (EINVAL); 519*25e8c5aaSvikram 520*25e8c5aaSvikram dtmpl->ctd_aset = param->ctpm_value; 521*25e8c5aaSvikram break; 522*25e8c5aaSvikram case CTDP_NONEG: 523*25e8c5aaSvikram if (param->ctpm_value != CTDP_NONEG_SET && 524*25e8c5aaSvikram param->ctpm_value != CTDP_NONEG_CLEAR) 525*25e8c5aaSvikram return (EINVAL); 526*25e8c5aaSvikram 527*25e8c5aaSvikram /* 528*25e8c5aaSvikram * only privileged processes can designate a contract 529*25e8c5aaSvikram * non-negotiatble. 530*25e8c5aaSvikram */ 531*25e8c5aaSvikram if (param->ctpm_value == CTDP_NONEG_SET && 532*25e8c5aaSvikram (error = secpolicy_sys_devices(cr)) != 0) { 533*25e8c5aaSvikram return (error); 534*25e8c5aaSvikram } 535*25e8c5aaSvikram 536*25e8c5aaSvikram dtmpl->ctd_noneg = param->ctpm_value; 537*25e8c5aaSvikram break; 538*25e8c5aaSvikram 539*25e8c5aaSvikram case CTDP_MINOR: 540*25e8c5aaSvikram if (param->ctpm_value == NULL) 541*25e8c5aaSvikram return (EINVAL); 542*25e8c5aaSvikram 543*25e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 544*25e8c5aaSvikram 545*25e8c5aaSvikram /* 546*25e8c5aaSvikram * Copyin the device path 547*25e8c5aaSvikram */ 548*25e8c5aaSvikram error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf, 549*25e8c5aaSvikram MAXPATHLEN, NULL); 550*25e8c5aaSvikram if (error != 0) { 551*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 552*25e8c5aaSvikram return (error); 553*25e8c5aaSvikram } 554*25e8c5aaSvikram buf[MAXPATHLEN - 1] = '\0'; 555*25e8c5aaSvikram 556*25e8c5aaSvikram if (*buf != '/' || 557*25e8c5aaSvikram strncmp(buf, "/devices/", strlen("/devices/")) == 0 || 558*25e8c5aaSvikram strstr(buf, "../devices/") || strchr(buf, ':') == NULL) { 559*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 560*25e8c5aaSvikram return (EINVAL); 561*25e8c5aaSvikram } 562*25e8c5aaSvikram 563*25e8c5aaSvikram spec_type = 0; 564*25e8c5aaSvikram dip = NULL; 565*25e8c5aaSvikram if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) { 566*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 567*25e8c5aaSvikram return (ERANGE); 568*25e8c5aaSvikram } 569*25e8c5aaSvikram ddi_release_devi(dip); 570*25e8c5aaSvikram 571*25e8c5aaSvikram if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 572*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 573*25e8c5aaSvikram return (EINVAL); 574*25e8c5aaSvikram } 575*25e8c5aaSvikram 576*25e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 577*25e8c5aaSvikram kmem_free(dtmpl->ctd_minor, 578*25e8c5aaSvikram strlen(dtmpl->ctd_minor) + 1); 579*25e8c5aaSvikram } 580*25e8c5aaSvikram dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP); 581*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 582*25e8c5aaSvikram break; 583*25e8c5aaSvikram case CTP_EV_CRITICAL: 584*25e8c5aaSvikram /* 585*25e8c5aaSvikram * Currently for device contracts, any event 586*25e8c5aaSvikram * may be added to the critical set. We retain the 587*25e8c5aaSvikram * following code however for future enhancements. 588*25e8c5aaSvikram */ 589*25e8c5aaSvikram if (EXCESS(param->ctpm_value) && 590*25e8c5aaSvikram (error = secpolicy_contract_event(cr)) != 0) 591*25e8c5aaSvikram return (error); 592*25e8c5aaSvikram tmpl->ctmpl_ev_crit = param->ctpm_value; 593*25e8c5aaSvikram break; 594*25e8c5aaSvikram default: 595*25e8c5aaSvikram return (EINVAL); 596*25e8c5aaSvikram } 597*25e8c5aaSvikram 598*25e8c5aaSvikram return (0); 599*25e8c5aaSvikram } 600*25e8c5aaSvikram 601*25e8c5aaSvikram /* 602*25e8c5aaSvikram * ctmpl_device_get 603*25e8c5aaSvikram * 604*25e8c5aaSvikram * The device contract template get entry point. Simply fetches and 605*25e8c5aaSvikram * returns the value of the requested term. 606*25e8c5aaSvikram */ 607*25e8c5aaSvikram static int 608*25e8c5aaSvikram ctmpl_device_get(struct ct_template *template, ct_param_t *param) 609*25e8c5aaSvikram { 610*25e8c5aaSvikram ctmpl_device_t *dtmpl = template->ctmpl_data; 611*25e8c5aaSvikram int error; 612*25e8c5aaSvikram 613*25e8c5aaSvikram ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 614*25e8c5aaSvikram 615*25e8c5aaSvikram switch (param->ctpm_id) { 616*25e8c5aaSvikram case CTDP_ACCEPT: 617*25e8c5aaSvikram param->ctpm_value = dtmpl->ctd_aset; 618*25e8c5aaSvikram break; 619*25e8c5aaSvikram case CTDP_NONEG: 620*25e8c5aaSvikram param->ctpm_value = dtmpl->ctd_noneg; 621*25e8c5aaSvikram break; 622*25e8c5aaSvikram case CTDP_MINOR: 623*25e8c5aaSvikram if (dtmpl->ctd_minor) { 624*25e8c5aaSvikram error = copyoutstr(dtmpl->ctd_minor, 625*25e8c5aaSvikram (char *)(uintptr_t)param->ctpm_value, 626*25e8c5aaSvikram MAXPATHLEN, NULL); 627*25e8c5aaSvikram if (error != 0) 628*25e8c5aaSvikram return (error); 629*25e8c5aaSvikram } else { 630*25e8c5aaSvikram return (ENOENT); 631*25e8c5aaSvikram } 632*25e8c5aaSvikram break; 633*25e8c5aaSvikram default: 634*25e8c5aaSvikram return (EINVAL); 635*25e8c5aaSvikram } 636*25e8c5aaSvikram 637*25e8c5aaSvikram return (0); 638*25e8c5aaSvikram } 639*25e8c5aaSvikram 640*25e8c5aaSvikram /* 641*25e8c5aaSvikram * Device contract type specific portion of creating a contract using 642*25e8c5aaSvikram * a specified template 643*25e8c5aaSvikram */ 644*25e8c5aaSvikram /*ARGSUSED*/ 645*25e8c5aaSvikram int 646*25e8c5aaSvikram ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 647*25e8c5aaSvikram { 648*25e8c5aaSvikram ctmpl_device_t *dtmpl; 649*25e8c5aaSvikram char *buf; 650*25e8c5aaSvikram dev_t dev; 651*25e8c5aaSvikram int spec_type; 652*25e8c5aaSvikram int error; 653*25e8c5aaSvikram cont_device_t *ctd; 654*25e8c5aaSvikram 655*25e8c5aaSvikram if (ctidp == NULL) 656*25e8c5aaSvikram return (EINVAL); 657*25e8c5aaSvikram 658*25e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 659*25e8c5aaSvikram 660*25e8c5aaSvikram dtmpl = template->ctmpl_data; 661*25e8c5aaSvikram 662*25e8c5aaSvikram mutex_enter(&template->ctmpl_lock); 663*25e8c5aaSvikram if (dtmpl->ctd_minor == NULL) { 664*25e8c5aaSvikram /* incomplete template */ 665*25e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 666*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 667*25e8c5aaSvikram return (EINVAL); 668*25e8c5aaSvikram } else { 669*25e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 670*25e8c5aaSvikram bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 671*25e8c5aaSvikram } 672*25e8c5aaSvikram mutex_exit(&template->ctmpl_lock); 673*25e8c5aaSvikram 674*25e8c5aaSvikram spec_type = 0; 675*25e8c5aaSvikram dev = NODEV; 676*25e8c5aaSvikram if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 677*25e8c5aaSvikram dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 678*25e8c5aaSvikram (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 679*25e8c5aaSvikram CT_DEBUG((CE_WARN, 680*25e8c5aaSvikram "tmpl_create: failed to find device: %s", buf)); 681*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 682*25e8c5aaSvikram return (ERANGE); 683*25e8c5aaSvikram } 684*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 685*25e8c5aaSvikram 686*25e8c5aaSvikram ctd = contract_device_create(template->ctmpl_data, 687*25e8c5aaSvikram dev, spec_type, curproc, &error); 688*25e8c5aaSvikram 689*25e8c5aaSvikram if (ctd == NULL) { 690*25e8c5aaSvikram CT_DEBUG((CE_WARN, "Failed to create device contract for " 691*25e8c5aaSvikram "process (%d) with device (devt = %lu, spec_type = %s)", 692*25e8c5aaSvikram curproc->p_pid, dev, 693*25e8c5aaSvikram spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 694*25e8c5aaSvikram return (error); 695*25e8c5aaSvikram } 696*25e8c5aaSvikram 697*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 698*25e8c5aaSvikram *ctidp = ctd->cond_contract.ct_id; 699*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 700*25e8c5aaSvikram 701*25e8c5aaSvikram return (0); 702*25e8c5aaSvikram } 703*25e8c5aaSvikram 704*25e8c5aaSvikram /* 705*25e8c5aaSvikram * Device contract specific template entry points 706*25e8c5aaSvikram */ 707*25e8c5aaSvikram static ctmplops_t ctmpl_device_ops = { 708*25e8c5aaSvikram ctmpl_device_dup, /* ctop_dup */ 709*25e8c5aaSvikram ctmpl_device_free, /* ctop_free */ 710*25e8c5aaSvikram ctmpl_device_set, /* ctop_set */ 711*25e8c5aaSvikram ctmpl_device_get, /* ctop_get */ 712*25e8c5aaSvikram ctmpl_device_create, /* ctop_create */ 713*25e8c5aaSvikram CT_DEV_ALLEVENT /* all device events bitmask */ 714*25e8c5aaSvikram }; 715*25e8c5aaSvikram 716*25e8c5aaSvikram 717*25e8c5aaSvikram /* 718*25e8c5aaSvikram * Device contract implementation 719*25e8c5aaSvikram */ 720*25e8c5aaSvikram 721*25e8c5aaSvikram /* 722*25e8c5aaSvikram * contract_device_default 723*25e8c5aaSvikram * 724*25e8c5aaSvikram * The device contract default template entry point. Creates a 725*25e8c5aaSvikram * device contract template with a default A-set and no "noneg" , 726*25e8c5aaSvikram * with informative degrade events and critical offline events. 727*25e8c5aaSvikram * There is no default minor path. 728*25e8c5aaSvikram */ 729*25e8c5aaSvikram static ct_template_t * 730*25e8c5aaSvikram contract_device_default(void) 731*25e8c5aaSvikram { 732*25e8c5aaSvikram ctmpl_device_t *new; 733*25e8c5aaSvikram 734*25e8c5aaSvikram new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 735*25e8c5aaSvikram ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 736*25e8c5aaSvikram 737*25e8c5aaSvikram new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 738*25e8c5aaSvikram new->ctd_noneg = 0; 739*25e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 740*25e8c5aaSvikram new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 741*25e8c5aaSvikram 742*25e8c5aaSvikram return (&new->ctd_ctmpl); 743*25e8c5aaSvikram } 744*25e8c5aaSvikram 745*25e8c5aaSvikram /* 746*25e8c5aaSvikram * contract_device_free 747*25e8c5aaSvikram * 748*25e8c5aaSvikram * Destroys the device contract specific portion of a contract and 749*25e8c5aaSvikram * frees the contract. 750*25e8c5aaSvikram */ 751*25e8c5aaSvikram static void 752*25e8c5aaSvikram contract_device_free(contract_t *ct) 753*25e8c5aaSvikram { 754*25e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 755*25e8c5aaSvikram 756*25e8c5aaSvikram ASSERT(ctd->cond_minor); 757*25e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 758*25e8c5aaSvikram kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 759*25e8c5aaSvikram 760*25e8c5aaSvikram ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 761*25e8c5aaSvikram ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 762*25e8c5aaSvikram 763*25e8c5aaSvikram ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 764*25e8c5aaSvikram 765*25e8c5aaSvikram ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 766*25e8c5aaSvikram ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 767*25e8c5aaSvikram 768*25e8c5aaSvikram ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 769*25e8c5aaSvikram ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 770*25e8c5aaSvikram 771*25e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 772*25e8c5aaSvikram ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 773*25e8c5aaSvikram 774*25e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 775*25e8c5aaSvikram 776*25e8c5aaSvikram kmem_free(ctd, sizeof (cont_device_t)); 777*25e8c5aaSvikram } 778*25e8c5aaSvikram 779*25e8c5aaSvikram /* 780*25e8c5aaSvikram * contract_device_abandon 781*25e8c5aaSvikram * 782*25e8c5aaSvikram * The device contract abandon entry point. 783*25e8c5aaSvikram */ 784*25e8c5aaSvikram static void 785*25e8c5aaSvikram contract_device_abandon(contract_t *ct) 786*25e8c5aaSvikram { 787*25e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 788*25e8c5aaSvikram 789*25e8c5aaSvikram /* 790*25e8c5aaSvikram * device contracts cannot be inherited or orphaned. 791*25e8c5aaSvikram * Move the contract to the DEAD_STATE. It will be freed 792*25e8c5aaSvikram * once all references to it are gone. 793*25e8c5aaSvikram */ 794*25e8c5aaSvikram contract_destroy(ct); 795*25e8c5aaSvikram } 796*25e8c5aaSvikram 797*25e8c5aaSvikram /* 798*25e8c5aaSvikram * contract_device_destroy 799*25e8c5aaSvikram * 800*25e8c5aaSvikram * The device contract destroy entry point. 801*25e8c5aaSvikram * Called from contract_destroy() to do any type specific destroy. Note 802*25e8c5aaSvikram * that destroy is a misnomer - this does not free the contract, it only 803*25e8c5aaSvikram * moves it to the dead state. A contract is actually freed via 804*25e8c5aaSvikram * contract_rele() -> contract_dtor(), contop_free() 805*25e8c5aaSvikram */ 806*25e8c5aaSvikram static void 807*25e8c5aaSvikram contract_device_destroy(contract_t *ct) 808*25e8c5aaSvikram { 809*25e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 810*25e8c5aaSvikram dev_info_t *dip = ctd->cond_dip; 811*25e8c5aaSvikram 812*25e8c5aaSvikram ASSERT(MUTEX_HELD(&ct->ct_lock)); 813*25e8c5aaSvikram 814*25e8c5aaSvikram if (dip == NULL) { 815*25e8c5aaSvikram /* 816*25e8c5aaSvikram * The dip has been removed, this is a dangling contract 817*25e8c5aaSvikram * Check that dip linkages are NULL 818*25e8c5aaSvikram */ 819*25e8c5aaSvikram ASSERT(!list_link_active(&ctd->cond_next)); 820*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 821*25e8c5aaSvikram "devinfo node. contract ctid : %d", ct->ct_id)); 822*25e8c5aaSvikram return; 823*25e8c5aaSvikram } 824*25e8c5aaSvikram 825*25e8c5aaSvikram /* 826*25e8c5aaSvikram * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 827*25e8c5aaSvikram */ 828*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 829*25e8c5aaSvikram 830*25e8c5aaSvikram /* 831*25e8c5aaSvikram * Waiting for the barrier to be released is strictly speaking not 832*25e8c5aaSvikram * necessary. But it simplifies the implementation of 833*25e8c5aaSvikram * contract_device_publish() by establishing the invariant that 834*25e8c5aaSvikram * device contracts cannot go away during negotiation. 835*25e8c5aaSvikram */ 836*25e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 837*25e8c5aaSvikram ct_barrier_wait_for_release(dip); 838*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 839*25e8c5aaSvikram 840*25e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 841*25e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 842*25e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 843*25e8c5aaSvikram 844*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 845*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 846*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 847*25e8c5aaSvikram } 848*25e8c5aaSvikram 849*25e8c5aaSvikram /* 850*25e8c5aaSvikram * contract_device_status 851*25e8c5aaSvikram * 852*25e8c5aaSvikram * The device contract status entry point. Called when level of "detail" 853*25e8c5aaSvikram * is either CTD_FIXED or CTD_ALL 854*25e8c5aaSvikram * 855*25e8c5aaSvikram */ 856*25e8c5aaSvikram static void 857*25e8c5aaSvikram contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 858*25e8c5aaSvikram void *status, model_t model) 859*25e8c5aaSvikram { 860*25e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 861*25e8c5aaSvikram 862*25e8c5aaSvikram ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 863*25e8c5aaSvikram 864*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 865*25e8c5aaSvikram contract_status_common(ct, zone, status, model); 866*25e8c5aaSvikram 867*25e8c5aaSvikram /* 868*25e8c5aaSvikram * There's no need to hold the contract lock while accessing static 869*25e8c5aaSvikram * data like aset or noneg. But since we need the lock to access other 870*25e8c5aaSvikram * data like state, we hold it anyway. 871*25e8c5aaSvikram */ 872*25e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 873*25e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 874*25e8c5aaSvikram VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 875*25e8c5aaSvikram 876*25e8c5aaSvikram if (detail == CTD_FIXED) { 877*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 878*25e8c5aaSvikram return; 879*25e8c5aaSvikram } 880*25e8c5aaSvikram 881*25e8c5aaSvikram ASSERT(ctd->cond_minor); 882*25e8c5aaSvikram VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 883*25e8c5aaSvikram 884*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 885*25e8c5aaSvikram } 886*25e8c5aaSvikram 887*25e8c5aaSvikram /* 888*25e8c5aaSvikram * Converts a result integer into the corresponding string. Used for printing 889*25e8c5aaSvikram * messages 890*25e8c5aaSvikram */ 891*25e8c5aaSvikram static char * 892*25e8c5aaSvikram result_str(uint_t result) 893*25e8c5aaSvikram { 894*25e8c5aaSvikram switch (result) { 895*25e8c5aaSvikram case CT_ACK: 896*25e8c5aaSvikram return ("CT_ACK"); 897*25e8c5aaSvikram case CT_NACK: 898*25e8c5aaSvikram return ("CT_NACK"); 899*25e8c5aaSvikram case CT_NONE: 900*25e8c5aaSvikram return ("CT_NONE"); 901*25e8c5aaSvikram default: 902*25e8c5aaSvikram return ("UNKNOWN"); 903*25e8c5aaSvikram } 904*25e8c5aaSvikram } 905*25e8c5aaSvikram 906*25e8c5aaSvikram /* 907*25e8c5aaSvikram * Converts a device state integer constant into the corresponding string. 908*25e8c5aaSvikram * Used to print messages. 909*25e8c5aaSvikram */ 910*25e8c5aaSvikram static char * 911*25e8c5aaSvikram state_str(uint_t state) 912*25e8c5aaSvikram { 913*25e8c5aaSvikram switch (state) { 914*25e8c5aaSvikram case CT_DEV_EV_ONLINE: 915*25e8c5aaSvikram return ("ONLINE"); 916*25e8c5aaSvikram case CT_DEV_EV_DEGRADED: 917*25e8c5aaSvikram return ("DEGRADED"); 918*25e8c5aaSvikram case CT_DEV_EV_OFFLINE: 919*25e8c5aaSvikram return ("OFFLINE"); 920*25e8c5aaSvikram default: 921*25e8c5aaSvikram return ("UNKNOWN"); 922*25e8c5aaSvikram } 923*25e8c5aaSvikram } 924*25e8c5aaSvikram 925*25e8c5aaSvikram /* 926*25e8c5aaSvikram * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 927*25e8c5aaSvikram * synchronous state change or not. 928*25e8c5aaSvikram */ 929*25e8c5aaSvikram static int 930*25e8c5aaSvikram is_sync_neg(uint_t old, uint_t new) 931*25e8c5aaSvikram { 932*25e8c5aaSvikram int i; 933*25e8c5aaSvikram 934*25e8c5aaSvikram ASSERT(old & CT_DEV_ALLEVENT); 935*25e8c5aaSvikram ASSERT(new & CT_DEV_ALLEVENT); 936*25e8c5aaSvikram 937*25e8c5aaSvikram if (old == new) { 938*25e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 939*25e8c5aaSvikram state_str(new))); 940*25e8c5aaSvikram return (-2); 941*25e8c5aaSvikram } 942*25e8c5aaSvikram 943*25e8c5aaSvikram for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 944*25e8c5aaSvikram if (old == ct_dev_negtable[i].st_old && 945*25e8c5aaSvikram new == ct_dev_negtable[i].st_new) { 946*25e8c5aaSvikram return (ct_dev_negtable[i].st_neg); 947*25e8c5aaSvikram } 948*25e8c5aaSvikram } 949*25e8c5aaSvikram 950*25e8c5aaSvikram CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 951*25e8c5aaSvikram "old = %s -> new = %s", state_str(old), state_str(new))); 952*25e8c5aaSvikram 953*25e8c5aaSvikram return (-1); 954*25e8c5aaSvikram } 955*25e8c5aaSvikram 956*25e8c5aaSvikram /* 957*25e8c5aaSvikram * Used to cleanup cached dv_nodes so that when a device is released by 958*25e8c5aaSvikram * a contract holder, its devinfo node can be successfully detached. 959*25e8c5aaSvikram */ 960*25e8c5aaSvikram static int 961*25e8c5aaSvikram contract_device_dvclean(dev_info_t *dip) 962*25e8c5aaSvikram { 963*25e8c5aaSvikram char *devnm; 964*25e8c5aaSvikram dev_info_t *pdip; 965*25e8c5aaSvikram int error; 966*25e8c5aaSvikram 967*25e8c5aaSvikram ASSERT(dip); 968*25e8c5aaSvikram 969*25e8c5aaSvikram /* pdip can be NULL if we have contracts against the root dip */ 970*25e8c5aaSvikram pdip = ddi_get_parent(dip); 971*25e8c5aaSvikram 972*25e8c5aaSvikram if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 973*25e8c5aaSvikram char *path; 974*25e8c5aaSvikram 975*25e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 976*25e8c5aaSvikram (void) ddi_pathname(dip, path); 977*25e8c5aaSvikram CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 978*25e8c5aaSvikram "device=%s", path)); 979*25e8c5aaSvikram kmem_free(path, MAXPATHLEN); 980*25e8c5aaSvikram return (EDEADLOCK); 981*25e8c5aaSvikram } 982*25e8c5aaSvikram 983*25e8c5aaSvikram if (pdip) { 984*25e8c5aaSvikram devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 985*25e8c5aaSvikram (void) ddi_deviname(dip, devnm); 986*25e8c5aaSvikram error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 987*25e8c5aaSvikram kmem_free(devnm, MAXNAMELEN + 1); 988*25e8c5aaSvikram } else { 989*25e8c5aaSvikram error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 990*25e8c5aaSvikram } 991*25e8c5aaSvikram 992*25e8c5aaSvikram return (error); 993*25e8c5aaSvikram } 994*25e8c5aaSvikram 995*25e8c5aaSvikram /* 996*25e8c5aaSvikram * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 997*25e8c5aaSvikram * Results in the ACK or NACK being recorded on the dip for one particular 998*25e8c5aaSvikram * contract. The device contracts framework evaluates the ACK/NACKs for all 999*25e8c5aaSvikram * contracts against a device to determine if a particular device state change 1000*25e8c5aaSvikram * should be allowed. 1001*25e8c5aaSvikram */ 1002*25e8c5aaSvikram static int 1003*25e8c5aaSvikram contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 1004*25e8c5aaSvikram uint_t cmd) 1005*25e8c5aaSvikram { 1006*25e8c5aaSvikram cont_device_t *ctd = ct->ct_data; 1007*25e8c5aaSvikram dev_info_t *dip; 1008*25e8c5aaSvikram ctid_t ctid; 1009*25e8c5aaSvikram int error; 1010*25e8c5aaSvikram 1011*25e8c5aaSvikram ctid = ct->ct_id; 1012*25e8c5aaSvikram 1013*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 1014*25e8c5aaSvikram 1015*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 1016*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 1017*25e8c5aaSvikram 1018*25e8c5aaSvikram dip = ctd->cond_dip; 1019*25e8c5aaSvikram 1020*25e8c5aaSvikram ASSERT(ctd->cond_minor); 1021*25e8c5aaSvikram ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 1022*25e8c5aaSvikram 1023*25e8c5aaSvikram /* 1024*25e8c5aaSvikram * Negotiation only if new state is not in A-set 1025*25e8c5aaSvikram */ 1026*25e8c5aaSvikram ASSERT(!(ctd->cond_aset & evtype)); 1027*25e8c5aaSvikram 1028*25e8c5aaSvikram /* 1029*25e8c5aaSvikram * Negotiation only if transition is synchronous 1030*25e8c5aaSvikram */ 1031*25e8c5aaSvikram ASSERT(is_sync_neg(ctd->cond_state, evtype)); 1032*25e8c5aaSvikram 1033*25e8c5aaSvikram /* 1034*25e8c5aaSvikram * We shouldn't be negotiating if the "noneg" flag is set 1035*25e8c5aaSvikram */ 1036*25e8c5aaSvikram ASSERT(!ctd->cond_noneg); 1037*25e8c5aaSvikram 1038*25e8c5aaSvikram if (dip) 1039*25e8c5aaSvikram ndi_hold_devi(dip); 1040*25e8c5aaSvikram 1041*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 1042*25e8c5aaSvikram 1043*25e8c5aaSvikram /* 1044*25e8c5aaSvikram * dv_clean only if !NACK and offline state change 1045*25e8c5aaSvikram */ 1046*25e8c5aaSvikram if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 1047*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 1048*25e8c5aaSvikram error = contract_device_dvclean(dip); 1049*25e8c5aaSvikram if (error != 0) { 1050*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 1051*25e8c5aaSvikram ctid)); 1052*25e8c5aaSvikram ddi_release_devi(dip); 1053*25e8c5aaSvikram } 1054*25e8c5aaSvikram } 1055*25e8c5aaSvikram 1056*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 1057*25e8c5aaSvikram 1058*25e8c5aaSvikram if (dip) 1059*25e8c5aaSvikram ddi_release_devi(dip); 1060*25e8c5aaSvikram 1061*25e8c5aaSvikram if (dip == NULL) { 1062*25e8c5aaSvikram if (ctd->cond_currev_id != evid) { 1063*25e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event " 1064*25e8c5aaSvikram "(type=%s, id=%llu) on removed device", 1065*25e8c5aaSvikram cmd == CT_NACK ? "N" : "", 1066*25e8c5aaSvikram state_str(evtype), (unsigned long long)evid)); 1067*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 1068*25e8c5aaSvikram ctid)); 1069*25e8c5aaSvikram } else { 1070*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 1071*25e8c5aaSvikram CT_DEBUG((CE_WARN, "contract_ack: no such device: " 1072*25e8c5aaSvikram "ctid: %d", ctid)); 1073*25e8c5aaSvikram } 1074*25e8c5aaSvikram error = (ct->ct_state == CTS_DEAD) ? ESRCH : 1075*25e8c5aaSvikram ((cmd == CT_NACK) ? ETIMEDOUT : 0); 1076*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 1077*25e8c5aaSvikram return (error); 1078*25e8c5aaSvikram } 1079*25e8c5aaSvikram 1080*25e8c5aaSvikram /* 1081*25e8c5aaSvikram * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 1082*25e8c5aaSvikram */ 1083*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 1084*25e8c5aaSvikram 1085*25e8c5aaSvikram mutex_enter(&DEVI(dip)->devi_ct_lock); 1086*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 1087*25e8c5aaSvikram if (ctd->cond_currev_id != evid) { 1088*25e8c5aaSvikram char *buf; 1089*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 1090*25e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 1091*25e8c5aaSvikram ndi_hold_devi(dip); 1092*25e8c5aaSvikram buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1093*25e8c5aaSvikram (void) ddi_pathname(dip, buf); 1094*25e8c5aaSvikram ddi_release_devi(dip); 1095*25e8c5aaSvikram CT_DEBUG((CE_WARN, "%sACK for non-current event" 1096*25e8c5aaSvikram "(type=%s, id=%llu) on device %s", 1097*25e8c5aaSvikram cmd == CT_NACK ? "N" : "", 1098*25e8c5aaSvikram state_str(evtype), (unsigned long long)evid, buf)); 1099*25e8c5aaSvikram kmem_free(buf, MAXPATHLEN); 1100*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 1101*25e8c5aaSvikram cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 1102*25e8c5aaSvikram return (cmd == CT_ACK ? 0 : ETIMEDOUT); 1103*25e8c5aaSvikram } 1104*25e8c5aaSvikram 1105*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 1106*25e8c5aaSvikram ASSERT(cmd == CT_ACK || cmd == CT_NACK); 1107*25e8c5aaSvikram 1108*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 1109*25e8c5aaSvikram cmd == CT_NACK ? "N" : "", ctid)); 1110*25e8c5aaSvikram 1111*25e8c5aaSvikram ctd->cond_currev_ack = cmd; 1112*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 1113*25e8c5aaSvikram 1114*25e8c5aaSvikram ct_barrier_decr(dip); 1115*25e8c5aaSvikram mutex_exit(&DEVI(dip)->devi_ct_lock); 1116*25e8c5aaSvikram 1117*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 1118*25e8c5aaSvikram 1119*25e8c5aaSvikram return (0); 1120*25e8c5aaSvikram } 1121*25e8c5aaSvikram 1122*25e8c5aaSvikram /* 1123*25e8c5aaSvikram * Invoked when a userland contract holder approves (i.e. ACKs) a state change 1124*25e8c5aaSvikram */ 1125*25e8c5aaSvikram static int 1126*25e8c5aaSvikram contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 1127*25e8c5aaSvikram { 1128*25e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 1129*25e8c5aaSvikram } 1130*25e8c5aaSvikram 1131*25e8c5aaSvikram /* 1132*25e8c5aaSvikram * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 1133*25e8c5aaSvikram */ 1134*25e8c5aaSvikram static int 1135*25e8c5aaSvikram contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 1136*25e8c5aaSvikram { 1137*25e8c5aaSvikram return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 1138*25e8c5aaSvikram } 1139*25e8c5aaSvikram 1140*25e8c5aaSvikram /* 1141*25e8c5aaSvikram * Creates a new contract synchronously with the breaking of an existing 1142*25e8c5aaSvikram * contract. Currently not supported. 1143*25e8c5aaSvikram */ 1144*25e8c5aaSvikram /*ARGSUSED*/ 1145*25e8c5aaSvikram static int 1146*25e8c5aaSvikram contract_device_newct(contract_t *ct) 1147*25e8c5aaSvikram { 1148*25e8c5aaSvikram return (ENOTSUP); 1149*25e8c5aaSvikram } 1150*25e8c5aaSvikram 1151*25e8c5aaSvikram /* 1152*25e8c5aaSvikram * Core device contract implementation entry points 1153*25e8c5aaSvikram */ 1154*25e8c5aaSvikram static contops_t contract_device_ops = { 1155*25e8c5aaSvikram contract_device_free, /* contop_free */ 1156*25e8c5aaSvikram contract_device_abandon, /* contop_abandon */ 1157*25e8c5aaSvikram contract_device_destroy, /* contop_destroy */ 1158*25e8c5aaSvikram contract_device_status, /* contop_status */ 1159*25e8c5aaSvikram contract_device_ack, /* contop_ack */ 1160*25e8c5aaSvikram contract_device_nack, /* contop_nack */ 1161*25e8c5aaSvikram contract_qack_notsup, /* contop_qack */ 1162*25e8c5aaSvikram contract_device_newct /* contop_newct */ 1163*25e8c5aaSvikram }; 1164*25e8c5aaSvikram 1165*25e8c5aaSvikram /* 1166*25e8c5aaSvikram * contract_device_init 1167*25e8c5aaSvikram * 1168*25e8c5aaSvikram * Initializes the device contract type. 1169*25e8c5aaSvikram */ 1170*25e8c5aaSvikram void 1171*25e8c5aaSvikram contract_device_init(void) 1172*25e8c5aaSvikram { 1173*25e8c5aaSvikram device_type = contract_type_init(CTT_DEVICE, "device", 1174*25e8c5aaSvikram &contract_device_ops, contract_device_default); 1175*25e8c5aaSvikram } 1176*25e8c5aaSvikram 1177*25e8c5aaSvikram /* 1178*25e8c5aaSvikram * contract_device_create 1179*25e8c5aaSvikram * 1180*25e8c5aaSvikram * create a device contract given template "tmpl" and the "owner" process. 1181*25e8c5aaSvikram * May fail and return NULL if project.max-contracts would have been exceeded. 1182*25e8c5aaSvikram * 1183*25e8c5aaSvikram * Common device contract creation routine called for both open-time and 1184*25e8c5aaSvikram * non-open time device contract creation 1185*25e8c5aaSvikram */ 1186*25e8c5aaSvikram static cont_device_t * 1187*25e8c5aaSvikram contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 1188*25e8c5aaSvikram proc_t *owner, int *errorp) 1189*25e8c5aaSvikram { 1190*25e8c5aaSvikram cont_device_t *ctd; 1191*25e8c5aaSvikram char *minor; 1192*25e8c5aaSvikram char *path; 1193*25e8c5aaSvikram dev_info_t *dip; 1194*25e8c5aaSvikram 1195*25e8c5aaSvikram ASSERT(dtmpl != NULL); 1196*25e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 1197*25e8c5aaSvikram ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 1198*25e8c5aaSvikram ASSERT(errorp); 1199*25e8c5aaSvikram 1200*25e8c5aaSvikram *errorp = 0; 1201*25e8c5aaSvikram 1202*25e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1203*25e8c5aaSvikram 1204*25e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1205*25e8c5aaSvikram ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 1206*25e8c5aaSvikram bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 1207*25e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1208*25e8c5aaSvikram 1209*25e8c5aaSvikram dip = e_ddi_hold_devi_by_path(path, 0); 1210*25e8c5aaSvikram if (dip == NULL) { 1211*25e8c5aaSvikram cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 1212*25e8c5aaSvikram "for device path (%s)", path); 1213*25e8c5aaSvikram kmem_free(path, MAXPATHLEN); 1214*25e8c5aaSvikram *errorp = ERANGE; 1215*25e8c5aaSvikram return (NULL); 1216*25e8c5aaSvikram } 1217*25e8c5aaSvikram 1218*25e8c5aaSvikram /* 1219*25e8c5aaSvikram * Lock out any parallel contract negotiations 1220*25e8c5aaSvikram */ 1221*25e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1222*25e8c5aaSvikram ct_barrier_acquire(dip); 1223*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1224*25e8c5aaSvikram 1225*25e8c5aaSvikram minor = i_ddi_strdup(path, KM_SLEEP); 1226*25e8c5aaSvikram kmem_free(path, MAXPATHLEN); 1227*25e8c5aaSvikram 1228*25e8c5aaSvikram (void) contract_type_pbundle(device_type, owner); 1229*25e8c5aaSvikram 1230*25e8c5aaSvikram ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 1231*25e8c5aaSvikram 1232*25e8c5aaSvikram /* 1233*25e8c5aaSvikram * Only we hold a refernce to this contract. Safe to access 1234*25e8c5aaSvikram * the fields without a ct_lock 1235*25e8c5aaSvikram */ 1236*25e8c5aaSvikram ctd->cond_minor = minor; 1237*25e8c5aaSvikram /* 1238*25e8c5aaSvikram * It is safe to set the dip pointer in the contract 1239*25e8c5aaSvikram * as the contract will always be destroyed before the dip 1240*25e8c5aaSvikram * is released 1241*25e8c5aaSvikram */ 1242*25e8c5aaSvikram ctd->cond_dip = dip; 1243*25e8c5aaSvikram ctd->cond_devt = dev; 1244*25e8c5aaSvikram ctd->cond_spec = spec_type; 1245*25e8c5aaSvikram 1246*25e8c5aaSvikram /* 1247*25e8c5aaSvikram * Since we are able to lookup the device, it is either 1248*25e8c5aaSvikram * online or degraded 1249*25e8c5aaSvikram */ 1250*25e8c5aaSvikram ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 1251*25e8c5aaSvikram CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 1252*25e8c5aaSvikram 1253*25e8c5aaSvikram mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1254*25e8c5aaSvikram ctd->cond_aset = dtmpl->ctd_aset; 1255*25e8c5aaSvikram ctd->cond_noneg = dtmpl->ctd_noneg; 1256*25e8c5aaSvikram 1257*25e8c5aaSvikram /* 1258*25e8c5aaSvikram * contract_ctor() initailizes the common portion of a contract 1259*25e8c5aaSvikram * contract_dtor() destroys the common portion of a contract 1260*25e8c5aaSvikram */ 1261*25e8c5aaSvikram if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 1262*25e8c5aaSvikram ctd, 0, owner, B_TRUE)) { 1263*25e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1264*25e8c5aaSvikram /* 1265*25e8c5aaSvikram * contract_device_free() destroys the type specific 1266*25e8c5aaSvikram * portion of a contract and frees the contract. 1267*25e8c5aaSvikram * The "minor" path and "cred" is a part of the type specific 1268*25e8c5aaSvikram * portion of the contract and will be freed by 1269*25e8c5aaSvikram * contract_device_free() 1270*25e8c5aaSvikram */ 1271*25e8c5aaSvikram contract_device_free(&ctd->cond_contract); 1272*25e8c5aaSvikram 1273*25e8c5aaSvikram /* release barrier */ 1274*25e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1275*25e8c5aaSvikram ct_barrier_release(dip); 1276*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1277*25e8c5aaSvikram 1278*25e8c5aaSvikram ddi_release_devi(dip); 1279*25e8c5aaSvikram *errorp = EAGAIN; 1280*25e8c5aaSvikram return (NULL); 1281*25e8c5aaSvikram } 1282*25e8c5aaSvikram mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1283*25e8c5aaSvikram 1284*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 1285*25e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 1286*25e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 1287*25e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 1288*25e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 1289*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1290*25e8c5aaSvikram 1291*25e8c5aaSvikram /* 1292*25e8c5aaSvikram * Insert device contract into list hanging off the dip 1293*25e8c5aaSvikram * Bump up the ref-count on the contract to reflect this 1294*25e8c5aaSvikram */ 1295*25e8c5aaSvikram contract_hold(&ctd->cond_contract); 1296*25e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1297*25e8c5aaSvikram list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 1298*25e8c5aaSvikram 1299*25e8c5aaSvikram /* release barrier */ 1300*25e8c5aaSvikram ct_barrier_release(dip); 1301*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1302*25e8c5aaSvikram 1303*25e8c5aaSvikram ddi_release_devi(dip); 1304*25e8c5aaSvikram 1305*25e8c5aaSvikram return (ctd); 1306*25e8c5aaSvikram } 1307*25e8c5aaSvikram 1308*25e8c5aaSvikram /* 1309*25e8c5aaSvikram * Called when a device is successfully opened to create an open-time contract 1310*25e8c5aaSvikram * i.e. synchronously with a device open. 1311*25e8c5aaSvikram */ 1312*25e8c5aaSvikram int 1313*25e8c5aaSvikram contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 1314*25e8c5aaSvikram { 1315*25e8c5aaSvikram ctmpl_device_t *dtmpl; 1316*25e8c5aaSvikram ct_template_t *tmpl; 1317*25e8c5aaSvikram cont_device_t *ctd; 1318*25e8c5aaSvikram char *path; 1319*25e8c5aaSvikram klwp_t *lwp; 1320*25e8c5aaSvikram int error; 1321*25e8c5aaSvikram 1322*25e8c5aaSvikram if (ctpp) 1323*25e8c5aaSvikram *ctpp = NULL; 1324*25e8c5aaSvikram 1325*25e8c5aaSvikram /* 1326*25e8c5aaSvikram * Check if we are in user-context i.e. if we have an lwp 1327*25e8c5aaSvikram */ 1328*25e8c5aaSvikram lwp = ttolwp(curthread); 1329*25e8c5aaSvikram if (lwp == NULL) { 1330*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 1331*25e8c5aaSvikram return (0); 1332*25e8c5aaSvikram } 1333*25e8c5aaSvikram 1334*25e8c5aaSvikram tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 1335*25e8c5aaSvikram if (tmpl == NULL) { 1336*25e8c5aaSvikram return (0); 1337*25e8c5aaSvikram } 1338*25e8c5aaSvikram dtmpl = tmpl->ctmpl_data; 1339*25e8c5aaSvikram 1340*25e8c5aaSvikram /* 1341*25e8c5aaSvikram * If the user set a minor path in the template before an open, 1342*25e8c5aaSvikram * ignore it. We use the minor path of the actual minor opened. 1343*25e8c5aaSvikram */ 1344*25e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 1345*25e8c5aaSvikram if (dtmpl->ctd_minor != NULL) { 1346*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 1347*25e8c5aaSvikram "ignoring device minor path in active template: %s", 1348*25e8c5aaSvikram curproc->p_pid, dtmpl->ctd_minor)); 1349*25e8c5aaSvikram /* 1350*25e8c5aaSvikram * This is a copy of the actual activated template. 1351*25e8c5aaSvikram * Safe to make changes such as freeing the minor 1352*25e8c5aaSvikram * path in the template. 1353*25e8c5aaSvikram */ 1354*25e8c5aaSvikram kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 1355*25e8c5aaSvikram dtmpl->ctd_minor = NULL; 1356*25e8c5aaSvikram } 1357*25e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 1358*25e8c5aaSvikram 1359*25e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1360*25e8c5aaSvikram 1361*25e8c5aaSvikram if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 1362*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 1363*25e8c5aaSvikram "minor path from dev_t,spec {%lu, %d} for process (%d)", 1364*25e8c5aaSvikram dev, spec_type, curproc->p_pid)); 1365*25e8c5aaSvikram ctmpl_free(tmpl); 1366*25e8c5aaSvikram kmem_free(path, MAXPATHLEN); 1367*25e8c5aaSvikram return (1); 1368*25e8c5aaSvikram } 1369*25e8c5aaSvikram 1370*25e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 1371*25e8c5aaSvikram ASSERT(dtmpl->ctd_minor == NULL); 1372*25e8c5aaSvikram dtmpl->ctd_minor = path; 1373*25e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 1374*25e8c5aaSvikram 1375*25e8c5aaSvikram ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 1376*25e8c5aaSvikram 1377*25e8c5aaSvikram mutex_enter(&tmpl->ctmpl_lock); 1378*25e8c5aaSvikram ASSERT(dtmpl->ctd_minor); 1379*25e8c5aaSvikram dtmpl->ctd_minor = NULL; 1380*25e8c5aaSvikram mutex_exit(&tmpl->ctmpl_lock); 1381*25e8c5aaSvikram ctmpl_free(tmpl); 1382*25e8c5aaSvikram kmem_free(path, MAXPATHLEN); 1383*25e8c5aaSvikram 1384*25e8c5aaSvikram if (ctd == NULL) { 1385*25e8c5aaSvikram cmn_err(CE_NOTE, "contract_device_open(): Failed to " 1386*25e8c5aaSvikram "create device contract for process (%d) holding " 1387*25e8c5aaSvikram "device (devt = %lu, spec_type = %d)", 1388*25e8c5aaSvikram curproc->p_pid, dev, spec_type); 1389*25e8c5aaSvikram return (1); 1390*25e8c5aaSvikram } 1391*25e8c5aaSvikram 1392*25e8c5aaSvikram if (ctpp) { 1393*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 1394*25e8c5aaSvikram *ctpp = &ctd->cond_contract; 1395*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1396*25e8c5aaSvikram } 1397*25e8c5aaSvikram return (0); 1398*25e8c5aaSvikram } 1399*25e8c5aaSvikram 1400*25e8c5aaSvikram /* 1401*25e8c5aaSvikram * Called during contract negotiation by the device contract framework to wait 1402*25e8c5aaSvikram * for ACKs or NACKs from contract holders. If all responses are not received 1403*25e8c5aaSvikram * before a specified timeout, this routine times out. 1404*25e8c5aaSvikram */ 1405*25e8c5aaSvikram static uint_t 1406*25e8c5aaSvikram wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 1407*25e8c5aaSvikram { 1408*25e8c5aaSvikram cont_device_t *ctd; 1409*25e8c5aaSvikram int timed_out = 0; 1410*25e8c5aaSvikram int result = CT_NONE; 1411*25e8c5aaSvikram int ack; 1412*25e8c5aaSvikram char *f = "wait_for_acks"; 1413*25e8c5aaSvikram 1414*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 1415*25e8c5aaSvikram ASSERT(dip); 1416*25e8c5aaSvikram ASSERT(evtype & CT_DEV_ALLEVENT); 1417*25e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1418*25e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1419*25e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1420*25e8c5aaSvikram 1421*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 1422*25e8c5aaSvikram 1423*25e8c5aaSvikram if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 1424*25e8c5aaSvikram /* 1425*25e8c5aaSvikram * some contract owner(s) didn't respond in time 1426*25e8c5aaSvikram */ 1427*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 1428*25e8c5aaSvikram timed_out = 1; 1429*25e8c5aaSvikram } 1430*25e8c5aaSvikram 1431*25e8c5aaSvikram ack = 0; 1432*25e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1433*25e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1434*25e8c5aaSvikram 1435*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 1436*25e8c5aaSvikram 1437*25e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 1438*25e8c5aaSvikram 1439*25e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1440*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1441*25e8c5aaSvikram continue; 1442*25e8c5aaSvikram } 1443*25e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1444*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1445*25e8c5aaSvikram continue; 1446*25e8c5aaSvikram } 1447*25e8c5aaSvikram 1448*25e8c5aaSvikram /* skip if non-negotiable contract */ 1449*25e8c5aaSvikram if (ctd->cond_noneg) { 1450*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1451*25e8c5aaSvikram continue; 1452*25e8c5aaSvikram } 1453*25e8c5aaSvikram 1454*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == evtype); 1455*25e8c5aaSvikram if (ctd->cond_currev_ack == CT_NACK) { 1456*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 1457*25e8c5aaSvikram f, (void *)dip)); 1458*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1459*25e8c5aaSvikram return (CT_NACK); 1460*25e8c5aaSvikram } else if (ctd->cond_currev_ack == CT_ACK) { 1461*25e8c5aaSvikram ack = 1; 1462*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 1463*25e8c5aaSvikram f, (void *)dip)); 1464*25e8c5aaSvikram } 1465*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1466*25e8c5aaSvikram } 1467*25e8c5aaSvikram 1468*25e8c5aaSvikram if (ack) { 1469*25e8c5aaSvikram result = CT_ACK; 1470*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 1471*25e8c5aaSvikram } else if (timed_out) { 1472*25e8c5aaSvikram result = CT_NONE; 1473*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 1474*25e8c5aaSvikram f, (void *)dip)); 1475*25e8c5aaSvikram } else { 1476*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 1477*25e8c5aaSvikram f, (void *)dip)); 1478*25e8c5aaSvikram } 1479*25e8c5aaSvikram 1480*25e8c5aaSvikram 1481*25e8c5aaSvikram return (result); 1482*25e8c5aaSvikram } 1483*25e8c5aaSvikram 1484*25e8c5aaSvikram /* 1485*25e8c5aaSvikram * Determines the current state of a device (i.e a devinfo node 1486*25e8c5aaSvikram */ 1487*25e8c5aaSvikram static int 1488*25e8c5aaSvikram get_state(dev_info_t *dip) 1489*25e8c5aaSvikram { 1490*25e8c5aaSvikram if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 1491*25e8c5aaSvikram return (CT_DEV_EV_OFFLINE); 1492*25e8c5aaSvikram else if (DEVI_IS_DEVICE_DEGRADED(dip)) 1493*25e8c5aaSvikram return (CT_DEV_EV_DEGRADED); 1494*25e8c5aaSvikram else 1495*25e8c5aaSvikram return (CT_DEV_EV_ONLINE); 1496*25e8c5aaSvikram } 1497*25e8c5aaSvikram 1498*25e8c5aaSvikram /* 1499*25e8c5aaSvikram * Sets the current state of a device in a device contract 1500*25e8c5aaSvikram */ 1501*25e8c5aaSvikram static void 1502*25e8c5aaSvikram set_cond_state(dev_info_t *dip) 1503*25e8c5aaSvikram { 1504*25e8c5aaSvikram uint_t state = get_state(dip); 1505*25e8c5aaSvikram cont_device_t *ctd; 1506*25e8c5aaSvikram 1507*25e8c5aaSvikram /* verify that barrier is held */ 1508*25e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 1509*25e8c5aaSvikram 1510*25e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1511*25e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1512*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 1513*25e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 1514*25e8c5aaSvikram ctd->cond_state = state; 1515*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1516*25e8c5aaSvikram } 1517*25e8c5aaSvikram } 1518*25e8c5aaSvikram 1519*25e8c5aaSvikram /* 1520*25e8c5aaSvikram * Core routine called by event-specific routines when an event occurs. 1521*25e8c5aaSvikram * Determines if an event should be be published, and if it is to be 1522*25e8c5aaSvikram * published, whether a negotiation should take place. Also implements 1523*25e8c5aaSvikram * NEGEND events which publish the final disposition of an event after 1524*25e8c5aaSvikram * negotiations are complete. 1525*25e8c5aaSvikram * 1526*25e8c5aaSvikram * When an event occurs on a minor node, this routine walks the list of 1527*25e8c5aaSvikram * contracts hanging off a devinfo node and for each contract on the affected 1528*25e8c5aaSvikram * dip, evaluates the following cases 1529*25e8c5aaSvikram * 1530*25e8c5aaSvikram * a. an event that is synchronous, breaks the contract and NONEG not set 1531*25e8c5aaSvikram * - bumps up the outstanding negotiation counts on the dip 1532*25e8c5aaSvikram * - marks the dip as undergoing negotiation (devi_ct_neg) 1533*25e8c5aaSvikram * - event of type CTE_NEG is published 1534*25e8c5aaSvikram * b. an event that is synchronous, breaks the contract and NONEG is set 1535*25e8c5aaSvikram * - sets the final result to CT_NACK, event is blocked 1536*25e8c5aaSvikram * - does not publish an event 1537*25e8c5aaSvikram * c. event is asynchronous and breaks the contract 1538*25e8c5aaSvikram * - publishes a critical event irrespect of whether the NONEG 1539*25e8c5aaSvikram * flag is set, since the contract will be broken and contract 1540*25e8c5aaSvikram * owner needs to be informed. 1541*25e8c5aaSvikram * d. No contract breakage but the owner has subscribed to the event 1542*25e8c5aaSvikram * - publishes the event irrespective of the NONEG event as the 1543*25e8c5aaSvikram * owner has explicitly subscribed to the event. 1544*25e8c5aaSvikram * e. NEGEND event 1545*25e8c5aaSvikram * - publishes a critical event. Should only be doing this if 1546*25e8c5aaSvikram * if NONEG is not set. 1547*25e8c5aaSvikram * f. all other events 1548*25e8c5aaSvikram * - Since a contract is not broken and this event has not been 1549*25e8c5aaSvikram * subscribed to, this event does not need to be published for 1550*25e8c5aaSvikram * for this contract. 1551*25e8c5aaSvikram * 1552*25e8c5aaSvikram * Once an event is published, what happens next depends on the type of 1553*25e8c5aaSvikram * event: 1554*25e8c5aaSvikram * 1555*25e8c5aaSvikram * a. NEGEND event 1556*25e8c5aaSvikram * - cleanup all state associated with the preceding negotiation 1557*25e8c5aaSvikram * and return CT_ACK to the caller of contract_device_publish() 1558*25e8c5aaSvikram * b. NACKed event 1559*25e8c5aaSvikram * - One or more contracts had the NONEG term, so the event was 1560*25e8c5aaSvikram * blocked. Return CT_NACK to the caller. 1561*25e8c5aaSvikram * c. Negotiated event 1562*25e8c5aaSvikram * - Call wait_for_acks() to wait for responses from contract 1563*25e8c5aaSvikram * holders. The end result is either CT_ACK (event is permitted), 1564*25e8c5aaSvikram * CT_NACK (event is blocked) or CT_NONE (no contract owner) 1565*25e8c5aaSvikram * responded. This result is returned back to the caller. 1566*25e8c5aaSvikram * d. All other events 1567*25e8c5aaSvikram * - If the event was asynchronous (i.e. not negotiated) or 1568*25e8c5aaSvikram * a contract was not broken return CT_ACK to the caller. 1569*25e8c5aaSvikram */ 1570*25e8c5aaSvikram static uint_t 1571*25e8c5aaSvikram contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 1572*25e8c5aaSvikram uint_t evtype, nvlist_t *tnvl) 1573*25e8c5aaSvikram { 1574*25e8c5aaSvikram cont_device_t *ctd; 1575*25e8c5aaSvikram uint_t result = CT_NONE; 1576*25e8c5aaSvikram uint64_t evid = 0; 1577*25e8c5aaSvikram uint64_t nevid = 0; 1578*25e8c5aaSvikram char *path = NULL; 1579*25e8c5aaSvikram int negend; 1580*25e8c5aaSvikram int match; 1581*25e8c5aaSvikram int sync = 0; 1582*25e8c5aaSvikram contract_t *ct; 1583*25e8c5aaSvikram ct_kevent_t *event; 1584*25e8c5aaSvikram nvlist_t *nvl; 1585*25e8c5aaSvikram int broken = 0; 1586*25e8c5aaSvikram 1587*25e8c5aaSvikram ASSERT(dip); 1588*25e8c5aaSvikram ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1589*25e8c5aaSvikram ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1590*25e8c5aaSvikram (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1591*25e8c5aaSvikram ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 1592*25e8c5aaSvikram 1593*25e8c5aaSvikram /* Is this a synchronous state change ? */ 1594*25e8c5aaSvikram if (evtype != CT_EV_NEGEND) { 1595*25e8c5aaSvikram sync = is_sync_neg(get_state(dip), evtype); 1596*25e8c5aaSvikram /* NOP if unsupported transition */ 1597*25e8c5aaSvikram if (sync == -2 || sync == -1) { 1598*25e8c5aaSvikram DEVI(dip)->devi_flags |= DEVI_CT_NOP; 1599*25e8c5aaSvikram result = (sync == -2) ? CT_ACK : CT_NONE; 1600*25e8c5aaSvikram goto out; 1601*25e8c5aaSvikram } 1602*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 1603*25e8c5aaSvikram sync ? "" : " not")); 1604*25e8c5aaSvikram } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 1605*25e8c5aaSvikram DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 1606*25e8c5aaSvikram result = CT_ACK; 1607*25e8c5aaSvikram goto out; 1608*25e8c5aaSvikram } 1609*25e8c5aaSvikram 1610*25e8c5aaSvikram path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1611*25e8c5aaSvikram (void) ddi_pathname(dip, path); 1612*25e8c5aaSvikram 1613*25e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1614*25e8c5aaSvikram 1615*25e8c5aaSvikram /* 1616*25e8c5aaSvikram * Negotiation end - set the state of the device in the contract 1617*25e8c5aaSvikram */ 1618*25e8c5aaSvikram if (evtype == CT_EV_NEGEND) { 1619*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 1620*25e8c5aaSvikram set_cond_state(dip); 1621*25e8c5aaSvikram } 1622*25e8c5aaSvikram 1623*25e8c5aaSvikram /* 1624*25e8c5aaSvikram * If this device didn't go through negotiation, don't publish 1625*25e8c5aaSvikram * a NEGEND event - simply release the barrier to allow other 1626*25e8c5aaSvikram * device events in. 1627*25e8c5aaSvikram */ 1628*25e8c5aaSvikram negend = 0; 1629*25e8c5aaSvikram if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 1630*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 1631*25e8c5aaSvikram ct_barrier_release(dip); 1632*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1633*25e8c5aaSvikram result = CT_ACK; 1634*25e8c5aaSvikram goto out; 1635*25e8c5aaSvikram } else if (evtype == CT_EV_NEGEND) { 1636*25e8c5aaSvikram /* 1637*25e8c5aaSvikram * There are negotiated contract breakages that 1638*25e8c5aaSvikram * need a NEGEND event 1639*25e8c5aaSvikram */ 1640*25e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 1641*25e8c5aaSvikram negend = 1; 1642*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 1643*25e8c5aaSvikram } else { 1644*25e8c5aaSvikram /* 1645*25e8c5aaSvikram * This is a new event, not a NEGEND event. Wait for previous 1646*25e8c5aaSvikram * contract events to complete. 1647*25e8c5aaSvikram */ 1648*25e8c5aaSvikram ct_barrier_acquire(dip); 1649*25e8c5aaSvikram } 1650*25e8c5aaSvikram 1651*25e8c5aaSvikram 1652*25e8c5aaSvikram match = 0; 1653*25e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1654*25e8c5aaSvikram ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1655*25e8c5aaSvikram 1656*25e8c5aaSvikram ctid_t ctid; 1657*25e8c5aaSvikram size_t len = strlen(path); 1658*25e8c5aaSvikram 1659*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 1660*25e8c5aaSvikram 1661*25e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 1662*25e8c5aaSvikram ASSERT(ctd->cond_minor); 1663*25e8c5aaSvikram ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 1664*25e8c5aaSvikram ctd->cond_minor[len] == ':'); 1665*25e8c5aaSvikram 1666*25e8c5aaSvikram if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1667*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1668*25e8c5aaSvikram continue; 1669*25e8c5aaSvikram } 1670*25e8c5aaSvikram if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1671*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1672*25e8c5aaSvikram continue; 1673*25e8c5aaSvikram } 1674*25e8c5aaSvikram 1675*25e8c5aaSvikram /* We have a matching contract */ 1676*25e8c5aaSvikram match = 1; 1677*25e8c5aaSvikram ctid = ctd->cond_contract.ct_id; 1678*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 1679*25e8c5aaSvikram ctid)); 1680*25e8c5aaSvikram 1681*25e8c5aaSvikram /* 1682*25e8c5aaSvikram * There are 4 possible cases 1683*25e8c5aaSvikram * 1. A contract is broken (dev not in acceptable state) and 1684*25e8c5aaSvikram * the state change is synchronous - start negotiation 1685*25e8c5aaSvikram * by sending a CTE_NEG critical event. 1686*25e8c5aaSvikram * 2. A contract is broken and the state change is 1687*25e8c5aaSvikram * asynchronous - just send a critical event and 1688*25e8c5aaSvikram * break the contract. 1689*25e8c5aaSvikram * 3. Contract is not broken, but consumer has subscribed 1690*25e8c5aaSvikram * to the event as a critical or informative event 1691*25e8c5aaSvikram * - just send the appropriate event 1692*25e8c5aaSvikram * 4. contract waiting for negend event - just send the critical 1693*25e8c5aaSvikram * NEGEND event. 1694*25e8c5aaSvikram */ 1695*25e8c5aaSvikram broken = 0; 1696*25e8c5aaSvikram if (!negend && !(evtype & ctd->cond_aset)) { 1697*25e8c5aaSvikram broken = 1; 1698*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 1699*25e8c5aaSvikram ctid)); 1700*25e8c5aaSvikram } 1701*25e8c5aaSvikram 1702*25e8c5aaSvikram /* 1703*25e8c5aaSvikram * Don't send event if 1704*25e8c5aaSvikram * - contract is not broken AND 1705*25e8c5aaSvikram * - contract holder has not subscribed to this event AND 1706*25e8c5aaSvikram * - contract not waiting for a NEGEND event 1707*25e8c5aaSvikram */ 1708*25e8c5aaSvikram if (!broken && !EVSENDP(ctd, evtype) && 1709*25e8c5aaSvikram !ctd->cond_neg) { 1710*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_publish(): " 1711*25e8c5aaSvikram "contract (%d): no publish reqd: event %d", 1712*25e8c5aaSvikram ctd->cond_contract.ct_id, evtype)); 1713*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1714*25e8c5aaSvikram continue; 1715*25e8c5aaSvikram } 1716*25e8c5aaSvikram 1717*25e8c5aaSvikram /* 1718*25e8c5aaSvikram * Note: need to kmem_zalloc() the event so mutexes are 1719*25e8c5aaSvikram * initialized automatically 1720*25e8c5aaSvikram */ 1721*25e8c5aaSvikram ct = &ctd->cond_contract; 1722*25e8c5aaSvikram event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 1723*25e8c5aaSvikram event->cte_type = evtype; 1724*25e8c5aaSvikram 1725*25e8c5aaSvikram if (broken && sync) { 1726*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + sync: " 1727*25e8c5aaSvikram "ctid: %d", ctid)); 1728*25e8c5aaSvikram ASSERT(!negend); 1729*25e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 1730*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 1731*25e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 1732*25e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 1733*25e8c5aaSvikram if (ctd->cond_noneg) { 1734*25e8c5aaSvikram /* Nothing to publish. Event has been blocked */ 1735*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 1736*25e8c5aaSvikram "not publishing blocked ev: ctid: %d", 1737*25e8c5aaSvikram ctid)); 1738*25e8c5aaSvikram result = CT_NACK; 1739*25e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 1740*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1741*25e8c5aaSvikram continue; 1742*25e8c5aaSvikram } 1743*25e8c5aaSvikram event->cte_flags = CTE_NEG; /* critical neg. event */ 1744*25e8c5aaSvikram ctd->cond_currev_type = event->cte_type; 1745*25e8c5aaSvikram ct_barrier_incr(dip); 1746*25e8c5aaSvikram DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 1747*25e8c5aaSvikram ctd->cond_neg = 1; 1748*25e8c5aaSvikram } else if (broken && !sync) { 1749*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 1750*25e8c5aaSvikram ctid)); 1751*25e8c5aaSvikram ASSERT(!negend); 1752*25e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 1753*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 1754*25e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 1755*25e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 1756*25e8c5aaSvikram event->cte_flags = 0; /* critical event */ 1757*25e8c5aaSvikram } else if (EVSENDP(ctd, event->cte_type)) { 1758*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 1759*25e8c5aaSvikram ctid)); 1760*25e8c5aaSvikram ASSERT(!negend); 1761*25e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 1762*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 1763*25e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 1764*25e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 1765*25e8c5aaSvikram event->cte_flags = EVINFOP(ctd, event->cte_type) ? 1766*25e8c5aaSvikram CTE_INFO : 0; 1767*25e8c5aaSvikram } else if (ctd->cond_neg) { 1768*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 1769*25e8c5aaSvikram ASSERT(negend); 1770*25e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 1771*25e8c5aaSvikram nevid = ctd->cond_contract.ct_nevent ? 1772*25e8c5aaSvikram ctd->cond_contract.ct_nevent->cte_id : 0; 1773*25e8c5aaSvikram ASSERT(ctd->cond_currev_id == nevid); 1774*25e8c5aaSvikram event->cte_flags = 0; /* NEGEND is always critical */ 1775*25e8c5aaSvikram ctd->cond_currev_id = 0; 1776*25e8c5aaSvikram ctd->cond_currev_type = 0; 1777*25e8c5aaSvikram ctd->cond_currev_ack = 0; 1778*25e8c5aaSvikram ctd->cond_neg = 0; 1779*25e8c5aaSvikram } else { 1780*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: not publishing event for " 1781*25e8c5aaSvikram "ctid: %d, evtype: %d", 1782*25e8c5aaSvikram ctd->cond_contract.ct_id, event->cte_type)); 1783*25e8c5aaSvikram ASSERT(!negend); 1784*25e8c5aaSvikram ASSERT(ctd->cond_currev_id == 0); 1785*25e8c5aaSvikram ASSERT(ctd->cond_currev_type == 0); 1786*25e8c5aaSvikram ASSERT(ctd->cond_currev_ack == 0); 1787*25e8c5aaSvikram ASSERT(ctd->cond_neg == 0); 1788*25e8c5aaSvikram kmem_free(event, sizeof (ct_kevent_t)); 1789*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1790*25e8c5aaSvikram continue; 1791*25e8c5aaSvikram } 1792*25e8c5aaSvikram 1793*25e8c5aaSvikram nvl = NULL; 1794*25e8c5aaSvikram if (tnvl) { 1795*25e8c5aaSvikram VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 1796*25e8c5aaSvikram if (negend) { 1797*25e8c5aaSvikram int32_t newct = 0; 1798*25e8c5aaSvikram ASSERT(ctd->cond_noneg == 0); 1799*25e8c5aaSvikram VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 1800*25e8c5aaSvikram == 0); 1801*25e8c5aaSvikram VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 1802*25e8c5aaSvikram &newct) == 0); 1803*25e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 1804*25e8c5aaSvikram newct == 1 ? 0 : 1805*25e8c5aaSvikram ctd->cond_contract.ct_id) == 0); 1806*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 1807*25e8c5aaSvikram "CTS_NEVID: %llu, CTS_NEWCT: %s", 1808*25e8c5aaSvikram ctid, (unsigned long long)nevid, 1809*25e8c5aaSvikram newct ? "success" : "failure")); 1810*25e8c5aaSvikram 1811*25e8c5aaSvikram } 1812*25e8c5aaSvikram } 1813*25e8c5aaSvikram 1814*25e8c5aaSvikram if (ctd->cond_neg) { 1815*25e8c5aaSvikram ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 1816*25e8c5aaSvikram ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 1817*25e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 1818*25e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = 1819*25e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start; 1820*25e8c5aaSvikram } 1821*25e8c5aaSvikram 1822*25e8c5aaSvikram /* 1823*25e8c5aaSvikram * by holding the dip's devi_ct_lock we ensure that 1824*25e8c5aaSvikram * all ACK/NACKs are held up until we have finished 1825*25e8c5aaSvikram * publishing to all contracts. 1826*25e8c5aaSvikram */ 1827*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1828*25e8c5aaSvikram evid = cte_publish_all(ct, event, nvl, NULL); 1829*25e8c5aaSvikram mutex_enter(&ctd->cond_contract.ct_lock); 1830*25e8c5aaSvikram 1831*25e8c5aaSvikram if (ctd->cond_neg) { 1832*25e8c5aaSvikram ASSERT(!negend); 1833*25e8c5aaSvikram ASSERT(broken); 1834*25e8c5aaSvikram ASSERT(sync); 1835*25e8c5aaSvikram ASSERT(!ctd->cond_noneg); 1836*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 1837*25e8c5aaSvikram ": %d", ctid)); 1838*25e8c5aaSvikram ctd->cond_currev_id = evid; 1839*25e8c5aaSvikram } else if (negend) { 1840*25e8c5aaSvikram ctd->cond_contract.ct_ntime.ctm_start = -1; 1841*25e8c5aaSvikram ctd->cond_contract.ct_qtime.ctm_start = -1; 1842*25e8c5aaSvikram } 1843*25e8c5aaSvikram mutex_exit(&ctd->cond_contract.ct_lock); 1844*25e8c5aaSvikram } 1845*25e8c5aaSvikram 1846*25e8c5aaSvikram /* 1847*25e8c5aaSvikram * If "negend" set counter back to initial state (-1) so that 1848*25e8c5aaSvikram * other events can be published. Also clear the negotiation flag 1849*25e8c5aaSvikram * on dip. 1850*25e8c5aaSvikram * 1851*25e8c5aaSvikram * 0 .. n are used for counting. 1852*25e8c5aaSvikram * -1 indicates counter is available for use. 1853*25e8c5aaSvikram */ 1854*25e8c5aaSvikram if (negend) { 1855*25e8c5aaSvikram /* 1856*25e8c5aaSvikram * devi_ct_count not necessarily 0. We may have 1857*25e8c5aaSvikram * timed out in which case, count will be non-zero. 1858*25e8c5aaSvikram */ 1859*25e8c5aaSvikram ct_barrier_release(dip); 1860*25e8c5aaSvikram DEVI(dip)->devi_ct_neg = 0; 1861*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 1862*25e8c5aaSvikram (void *)dip)); 1863*25e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 1864*25e8c5aaSvikram ASSERT(match); 1865*25e8c5aaSvikram ASSERT(!ct_barrier_empty(dip)); 1866*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 1867*25e8c5aaSvikram DEVI(dip)->devi_ct_count, (void *)dip)); 1868*25e8c5aaSvikram } else { 1869*25e8c5aaSvikram /* 1870*25e8c5aaSvikram * for non-negotiated events or subscribed events or no 1871*25e8c5aaSvikram * matching contracts 1872*25e8c5aaSvikram */ 1873*25e8c5aaSvikram ASSERT(ct_barrier_empty(dip)); 1874*25e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_neg == 0); 1875*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 1876*25e8c5aaSvikram "dip=%p", (void *)dip)); 1877*25e8c5aaSvikram 1878*25e8c5aaSvikram /* 1879*25e8c5aaSvikram * only this function when called from contract_device_negend() 1880*25e8c5aaSvikram * can reset the counter to READY state i.e. -1. This function 1881*25e8c5aaSvikram * is so called for every event whether a NEGEND event is needed 1882*25e8c5aaSvikram * or not, but the negend event is only published if the event 1883*25e8c5aaSvikram * whose end they signal is a negotiated event for the contract. 1884*25e8c5aaSvikram */ 1885*25e8c5aaSvikram } 1886*25e8c5aaSvikram 1887*25e8c5aaSvikram if (!match) { 1888*25e8c5aaSvikram /* No matching contracts */ 1889*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: No matching contract")); 1890*25e8c5aaSvikram result = CT_NONE; 1891*25e8c5aaSvikram } else if (result == CT_NACK) { 1892*25e8c5aaSvikram /* a non-negotiable contract exists and this is a neg. event */ 1893*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 1894*25e8c5aaSvikram (void) wait_for_acks(dip, dev, spec_type, evtype); 1895*25e8c5aaSvikram } else if (DEVI(dip)->devi_ct_neg) { 1896*25e8c5aaSvikram /* one or more contracts going through negotations */ 1897*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 1898*25e8c5aaSvikram result = wait_for_acks(dip, dev, spec_type, evtype); 1899*25e8c5aaSvikram } else { 1900*25e8c5aaSvikram /* no negotiated contracts or no broken contracts or NEGEND */ 1901*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 1902*25e8c5aaSvikram result = CT_ACK; 1903*25e8c5aaSvikram } 1904*25e8c5aaSvikram 1905*25e8c5aaSvikram /* 1906*25e8c5aaSvikram * Release the lock only now so that the only point where we 1907*25e8c5aaSvikram * drop the lock is in wait_for_acks(). This is so that we don't 1908*25e8c5aaSvikram * miss cv_signal/cv_broadcast from contract holders 1909*25e8c5aaSvikram */ 1910*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 1911*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1912*25e8c5aaSvikram 1913*25e8c5aaSvikram out: 1914*25e8c5aaSvikram if (tnvl) 1915*25e8c5aaSvikram nvlist_free(tnvl); 1916*25e8c5aaSvikram if (path) 1917*25e8c5aaSvikram kmem_free(path, MAXPATHLEN); 1918*25e8c5aaSvikram 1919*25e8c5aaSvikram 1920*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 1921*25e8c5aaSvikram return (result); 1922*25e8c5aaSvikram } 1923*25e8c5aaSvikram 1924*25e8c5aaSvikram 1925*25e8c5aaSvikram /* 1926*25e8c5aaSvikram * contract_device_offline 1927*25e8c5aaSvikram * 1928*25e8c5aaSvikram * Event publishing routine called by I/O framework when a device is offlined. 1929*25e8c5aaSvikram */ 1930*25e8c5aaSvikram ct_ack_t 1931*25e8c5aaSvikram contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 1932*25e8c5aaSvikram { 1933*25e8c5aaSvikram nvlist_t *nvl; 1934*25e8c5aaSvikram uint_t result; 1935*25e8c5aaSvikram uint_t evtype; 1936*25e8c5aaSvikram 1937*25e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1938*25e8c5aaSvikram 1939*25e8c5aaSvikram evtype = CT_DEV_EV_OFFLINE; 1940*25e8c5aaSvikram result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 1941*25e8c5aaSvikram 1942*25e8c5aaSvikram /* 1943*25e8c5aaSvikram * If a contract offline is NACKED, the framework expects us to call 1944*25e8c5aaSvikram * NEGEND ourselves, since we know the final result 1945*25e8c5aaSvikram */ 1946*25e8c5aaSvikram if (result == CT_NACK) { 1947*25e8c5aaSvikram contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 1948*25e8c5aaSvikram } 1949*25e8c5aaSvikram 1950*25e8c5aaSvikram return (result); 1951*25e8c5aaSvikram } 1952*25e8c5aaSvikram 1953*25e8c5aaSvikram /* 1954*25e8c5aaSvikram * contract_device_degrade 1955*25e8c5aaSvikram * 1956*25e8c5aaSvikram * Event publishing routine called by I/O framework when a device 1957*25e8c5aaSvikram * moves to degrade state. 1958*25e8c5aaSvikram */ 1959*25e8c5aaSvikram /*ARGSUSED*/ 1960*25e8c5aaSvikram void 1961*25e8c5aaSvikram contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 1962*25e8c5aaSvikram { 1963*25e8c5aaSvikram nvlist_t *nvl; 1964*25e8c5aaSvikram uint_t evtype; 1965*25e8c5aaSvikram 1966*25e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1967*25e8c5aaSvikram 1968*25e8c5aaSvikram evtype = CT_DEV_EV_DEGRADED; 1969*25e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1970*25e8c5aaSvikram } 1971*25e8c5aaSvikram 1972*25e8c5aaSvikram /* 1973*25e8c5aaSvikram * contract_device_undegrade 1974*25e8c5aaSvikram * 1975*25e8c5aaSvikram * Event publishing routine called by I/O framework when a device 1976*25e8c5aaSvikram * moves from degraded state to online state. 1977*25e8c5aaSvikram */ 1978*25e8c5aaSvikram /*ARGSUSED*/ 1979*25e8c5aaSvikram void 1980*25e8c5aaSvikram contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 1981*25e8c5aaSvikram { 1982*25e8c5aaSvikram nvlist_t *nvl; 1983*25e8c5aaSvikram uint_t evtype; 1984*25e8c5aaSvikram 1985*25e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1986*25e8c5aaSvikram 1987*25e8c5aaSvikram evtype = CT_DEV_EV_ONLINE; 1988*25e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1989*25e8c5aaSvikram } 1990*25e8c5aaSvikram 1991*25e8c5aaSvikram /* 1992*25e8c5aaSvikram * For all contracts which have undergone a negotiation (because the device 1993*25e8c5aaSvikram * moved out of the acceptable state for that contract and the state 1994*25e8c5aaSvikram * change is synchronous i.e. requires negotiation) this routine publishes 1995*25e8c5aaSvikram * a CT_EV_NEGEND event with the final disposition of the event. 1996*25e8c5aaSvikram * 1997*25e8c5aaSvikram * This event is always a critical event. 1998*25e8c5aaSvikram */ 1999*25e8c5aaSvikram void 2000*25e8c5aaSvikram contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 2001*25e8c5aaSvikram { 2002*25e8c5aaSvikram nvlist_t *nvl; 2003*25e8c5aaSvikram uint_t evtype; 2004*25e8c5aaSvikram 2005*25e8c5aaSvikram ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 2006*25e8c5aaSvikram 2007*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 2008*25e8c5aaSvikram "dip: %p", result, (void *)dip)); 2009*25e8c5aaSvikram 2010*25e8c5aaSvikram VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2011*25e8c5aaSvikram VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 2012*25e8c5aaSvikram result == CT_EV_SUCCESS ? 1 : 0) == 0); 2013*25e8c5aaSvikram 2014*25e8c5aaSvikram evtype = CT_EV_NEGEND; 2015*25e8c5aaSvikram (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 2016*25e8c5aaSvikram 2017*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 2018*25e8c5aaSvikram (void *)dip)); 2019*25e8c5aaSvikram } 2020*25e8c5aaSvikram 2021*25e8c5aaSvikram /* 2022*25e8c5aaSvikram * Wrapper routine called by other subsystems (such as LDI) to start 2023*25e8c5aaSvikram * negotiations when a synchronous device state change occurs. 2024*25e8c5aaSvikram * Returns CT_ACK or CT_NACK. 2025*25e8c5aaSvikram */ 2026*25e8c5aaSvikram ct_ack_t 2027*25e8c5aaSvikram contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 2028*25e8c5aaSvikram uint_t evtype) 2029*25e8c5aaSvikram { 2030*25e8c5aaSvikram int result; 2031*25e8c5aaSvikram 2032*25e8c5aaSvikram ASSERT(dip); 2033*25e8c5aaSvikram ASSERT(dev != NODEV); 2034*25e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 2035*25e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 2036*25e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2037*25e8c5aaSvikram 2038*25e8c5aaSvikram switch (evtype) { 2039*25e8c5aaSvikram case CT_DEV_EV_OFFLINE: 2040*25e8c5aaSvikram result = contract_device_offline(dip, dev, spec_type); 2041*25e8c5aaSvikram break; 2042*25e8c5aaSvikram default: 2043*25e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 2044*25e8c5aaSvikram "not supported: event (%d) for dev_t (%lu) and spec (%d), " 2045*25e8c5aaSvikram "dip (%p)", evtype, dev, spec_type, (void *)dip); 2046*25e8c5aaSvikram result = CT_NACK; 2047*25e8c5aaSvikram break; 2048*25e8c5aaSvikram } 2049*25e8c5aaSvikram 2050*25e8c5aaSvikram return (result); 2051*25e8c5aaSvikram } 2052*25e8c5aaSvikram 2053*25e8c5aaSvikram /* 2054*25e8c5aaSvikram * A wrapper routine called by other subsystems (such as the LDI) to 2055*25e8c5aaSvikram * finalize event processing for a state change event. For synchronous 2056*25e8c5aaSvikram * state changes, this publishes NEGEND events. For asynchronous i.e. 2057*25e8c5aaSvikram * non-negotiable events this publishes the event. 2058*25e8c5aaSvikram */ 2059*25e8c5aaSvikram void 2060*25e8c5aaSvikram contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 2061*25e8c5aaSvikram uint_t evtype, int ct_result) 2062*25e8c5aaSvikram { 2063*25e8c5aaSvikram ASSERT(dip); 2064*25e8c5aaSvikram ASSERT(dev != NODEV); 2065*25e8c5aaSvikram ASSERT(dev != DDI_DEV_T_ANY); 2066*25e8c5aaSvikram ASSERT(dev != DDI_DEV_T_NONE); 2067*25e8c5aaSvikram ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2068*25e8c5aaSvikram 2069*25e8c5aaSvikram switch (evtype) { 2070*25e8c5aaSvikram case CT_DEV_EV_OFFLINE: 2071*25e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 2072*25e8c5aaSvikram break; 2073*25e8c5aaSvikram case CT_DEV_EV_DEGRADED: 2074*25e8c5aaSvikram contract_device_degrade(dip, dev, spec_type); 2075*25e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 2076*25e8c5aaSvikram break; 2077*25e8c5aaSvikram case CT_DEV_EV_ONLINE: 2078*25e8c5aaSvikram contract_device_undegrade(dip, dev, spec_type); 2079*25e8c5aaSvikram contract_device_negend(dip, dev, spec_type, ct_result); 2080*25e8c5aaSvikram break; 2081*25e8c5aaSvikram default: 2082*25e8c5aaSvikram cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 2083*25e8c5aaSvikram "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 2084*25e8c5aaSvikram evtype, dev, spec_type, (void *)dip); 2085*25e8c5aaSvikram break; 2086*25e8c5aaSvikram } 2087*25e8c5aaSvikram } 2088*25e8c5aaSvikram 2089*25e8c5aaSvikram /* 2090*25e8c5aaSvikram * Called by I/O framework when a devinfo node is freed to remove the 2091*25e8c5aaSvikram * association between a devinfo node and its contracts. 2092*25e8c5aaSvikram */ 2093*25e8c5aaSvikram void 2094*25e8c5aaSvikram contract_device_remove_dip(dev_info_t *dip) 2095*25e8c5aaSvikram { 2096*25e8c5aaSvikram cont_device_t *ctd; 2097*25e8c5aaSvikram cont_device_t *next; 2098*25e8c5aaSvikram contract_t *ct; 2099*25e8c5aaSvikram 2100*25e8c5aaSvikram mutex_enter(&(DEVI(dip)->devi_ct_lock)); 2101*25e8c5aaSvikram ct_barrier_wait_for_release(dip); 2102*25e8c5aaSvikram 2103*25e8c5aaSvikram for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 2104*25e8c5aaSvikram next = list_next(&(DEVI(dip)->devi_ct), ctd); 2105*25e8c5aaSvikram list_remove(&(DEVI(dip)->devi_ct), ctd); 2106*25e8c5aaSvikram ct = &ctd->cond_contract; 2107*25e8c5aaSvikram /* 2108*25e8c5aaSvikram * Unlink the dip associated with this contract 2109*25e8c5aaSvikram */ 2110*25e8c5aaSvikram mutex_enter(&ct->ct_lock); 2111*25e8c5aaSvikram ASSERT(ctd->cond_dip == dip); 2112*25e8c5aaSvikram ctd->cond_dip = NULL; /* no longer linked to dip */ 2113*25e8c5aaSvikram contract_rele(ct); /* remove hold for dip linkage */ 2114*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 2115*25e8c5aaSvikram "ctid: %d", ct->ct_id)); 2116*25e8c5aaSvikram mutex_exit(&ct->ct_lock); 2117*25e8c5aaSvikram } 2118*25e8c5aaSvikram ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 2119*25e8c5aaSvikram mutex_exit(&(DEVI(dip)->devi_ct_lock)); 2120*25e8c5aaSvikram } 2121*25e8c5aaSvikram 2122*25e8c5aaSvikram /* 2123*25e8c5aaSvikram * Barrier related routines 2124*25e8c5aaSvikram */ 2125*25e8c5aaSvikram static void 2126*25e8c5aaSvikram ct_barrier_acquire(dev_info_t *dip) 2127*25e8c5aaSvikram { 2128*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2129*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 2130*25e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 2131*25e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2132*25e8c5aaSvikram DEVI(dip)->devi_ct_count = 0; 2133*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 2134*25e8c5aaSvikram } 2135*25e8c5aaSvikram 2136*25e8c5aaSvikram static void 2137*25e8c5aaSvikram ct_barrier_release(dev_info_t *dip) 2138*25e8c5aaSvikram { 2139*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2140*25e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 2141*25e8c5aaSvikram DEVI(dip)->devi_ct_count = -1; 2142*25e8c5aaSvikram cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 2143*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 2144*25e8c5aaSvikram } 2145*25e8c5aaSvikram 2146*25e8c5aaSvikram static int 2147*25e8c5aaSvikram ct_barrier_held(dev_info_t *dip) 2148*25e8c5aaSvikram { 2149*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2150*25e8c5aaSvikram return (DEVI(dip)->devi_ct_count != -1); 2151*25e8c5aaSvikram } 2152*25e8c5aaSvikram 2153*25e8c5aaSvikram static int 2154*25e8c5aaSvikram ct_barrier_empty(dev_info_t *dip) 2155*25e8c5aaSvikram { 2156*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2157*25e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count != -1); 2158*25e8c5aaSvikram return (DEVI(dip)->devi_ct_count == 0); 2159*25e8c5aaSvikram } 2160*25e8c5aaSvikram 2161*25e8c5aaSvikram static void 2162*25e8c5aaSvikram ct_barrier_wait_for_release(dev_info_t *dip) 2163*25e8c5aaSvikram { 2164*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2165*25e8c5aaSvikram while (DEVI(dip)->devi_ct_count != -1) 2166*25e8c5aaSvikram cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2167*25e8c5aaSvikram } 2168*25e8c5aaSvikram 2169*25e8c5aaSvikram static void 2170*25e8c5aaSvikram ct_barrier_decr(dev_info_t *dip) 2171*25e8c5aaSvikram { 2172*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 2173*25e8c5aaSvikram DEVI(dip)->devi_ct_count)); 2174*25e8c5aaSvikram 2175*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2176*25e8c5aaSvikram ASSERT(DEVI(dip)->devi_ct_count > 0); 2177*25e8c5aaSvikram 2178*25e8c5aaSvikram DEVI(dip)->devi_ct_count--; 2179*25e8c5aaSvikram if (DEVI(dip)->devi_ct_count == 0) { 2180*25e8c5aaSvikram cv_broadcast(&DEVI(dip)->devi_ct_cv); 2181*25e8c5aaSvikram CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 2182*25e8c5aaSvikram } 2183*25e8c5aaSvikram } 2184*25e8c5aaSvikram 2185*25e8c5aaSvikram static void 2186*25e8c5aaSvikram ct_barrier_incr(dev_info_t *dip) 2187*25e8c5aaSvikram { 2188*25e8c5aaSvikram ASSERT(ct_barrier_held(dip)); 2189*25e8c5aaSvikram DEVI(dip)->devi_ct_count++; 2190*25e8c5aaSvikram } 2191*25e8c5aaSvikram 2192*25e8c5aaSvikram static int 2193*25e8c5aaSvikram ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 2194*25e8c5aaSvikram { 2195*25e8c5aaSvikram clock_t abstime; 2196*25e8c5aaSvikram 2197*25e8c5aaSvikram ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2198*25e8c5aaSvikram 2199*25e8c5aaSvikram abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 2200*25e8c5aaSvikram while (DEVI(dip)->devi_ct_count) { 2201*25e8c5aaSvikram if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 2202*25e8c5aaSvikram &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 2203*25e8c5aaSvikram return (-1); 2204*25e8c5aaSvikram } 2205*25e8c5aaSvikram } 2206*25e8c5aaSvikram return (0); 2207*25e8c5aaSvikram } 2208