1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/mutex.h> 27 #include <sys/debug.h> 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/kmem.h> 31 #include <sys/thread.h> 32 #include <sys/id_space.h> 33 #include <sys/avl.h> 34 #include <sys/list.h> 35 #include <sys/sysmacros.h> 36 #include <sys/proc.h> 37 #include <sys/contract.h> 38 #include <sys/contract_impl.h> 39 #include <sys/contract/device.h> 40 #include <sys/contract/device_impl.h> 41 #include <sys/cmn_err.h> 42 #include <sys/nvpair.h> 43 #include <sys/policy.h> 44 #include <sys/ddi_impldefs.h> 45 #include <sys/ddi_implfuncs.h> 46 #include <sys/systm.h> 47 #include <sys/stat.h> 48 #include <sys/sunddi.h> 49 #include <sys/esunddi.h> 50 #include <sys/ddi.h> 51 #include <sys/fs/dv_node.h> 52 #include <sys/sunndi.h> 53 #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 54 55 /* 56 * Device Contracts 57 * ----------------- 58 * This file contains the core code for the device contracts framework. 59 * A device contract is an agreement or a contract between a process and 60 * the kernel regarding the state of the device. A device contract may be 61 * created when a relationship is formed between a device and a process 62 * i.e. at open(2) time, or it may be created at some point after the device 63 * has been opened. A device contract once formed may be broken by either party. 64 * A device contract can be broken by the process by an explicit abandon of the 65 * contract or by an implicit abandon when the process exits. A device contract 66 * can be broken by the kernel either asynchronously (without negotiation) or 67 * synchronously (with negotiation). Exactly which happens depends on the device 68 * state transition. The following state diagram shows the transitions between 69 * device states. Only device state transitions currently supported by device 70 * contracts is shown. 71 * 72 * <-- A --> 73 * /-----------------> DEGRADED 74 * | | 75 * | | 76 * | | S 77 * | | | 78 * | | v 79 * v S --> v 80 * ONLINE ------------> OFFLINE 81 * 82 * 83 * In the figure above, the arrows indicate the direction of transition. The 84 * letter S refers to transitions which are inherently synchronous i.e. 85 * require negotiation and the letter A indicates transitions which are 86 * asynchronous i.e. are done without contract negotiations. A good example 87 * of a synchronous transition is the ONLINE -> OFFLINE transition. This 88 * transition cannot happen as long as there are consumers which have the 89 * device open. Thus some form of negotiation needs to happen between the 90 * consumers and the kernel to ensure that consumers either close devices 91 * or disallow the move to OFFLINE. Certain other transitions such as 92 * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 93 * non-negotiable. A device that suffers a fault that degrades its 94 * capabilities will become degraded irrespective of what consumers it has, 95 * so a negotiation in this case is pointless. 96 * 97 * The following device states are currently defined for device contracts: 98 * 99 * CT_DEV_EV_ONLINE 100 * The device is online and functioning normally 101 * CT_DEV_EV_DEGRADED 102 * The device is online but is functioning in a degraded capacity 103 * CT_DEV_EV_OFFLINE 104 * The device is offline and is no longer configured 105 * 106 * A typical consumer of device contracts starts out with a contract 107 * template and adds terms to that template. These include the 108 * "acceptable set" (A-set) term, which is a bitset of device states which 109 * are guaranteed by the contract. If the device moves out of a state in 110 * the A-set, the contract is broken. The breaking of the contract can 111 * be asynchronous in which case a critical contract event is sent to the 112 * contract holder but no negotiations take place. If the breaking of the 113 * contract is synchronous, negotations are opened between the affected 114 * consumer and the kernel. The kernel does this by sending a critical 115 * event to the consumer with the CTE_NEG flag set indicating that this 116 * is a negotiation event. The consumer can accept this change by sending 117 * a ACK message to the kernel. Alternatively, if it has the necessary 118 * privileges, it can send a NACK message to the kernel which will block 119 * the device state change. To NACK a negotiable event, a process must 120 * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 121 * 122 * Other terms include the "minor path" term, specified explicitly if the 123 * contract is not being created at open(2) time or specified implicitly 124 * if the contract is being created at open time via an activated template. 125 * 126 * A contract event is sent on any state change to which the contract 127 * owner has subscribed via the informative or critical event sets. Only 128 * critical events are guaranteed to be delivered. Since all device state 129 * changes are controlled by the kernel and cannot be arbitrarily generated 130 * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 131 * need to be asserted in a process's effective set to designate an event as 132 * critical. To ensure privacy, a process must either have the same effective 133 * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 134 * asserted in its effective set in order to observe device contract events 135 * off the device contract type specific endpoint. 136 * 137 * Yet another term available with device contracts is the "non-negotiable" 138 * term. This term is used to pre-specify a NACK to any contract negotiation. 139 * This term is ignored for asynchronous state changes. For example, a 140 * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 141 * non-negotiable. In this case, the device contract framework assumes a 142 * NACK for any transition to OFFLINE and blocks the offline. If the A-set 143 * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 144 * are NACKed but transitions to DEGRADE succeed. 145 * 146 * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 147 * happens just before the I/O framework attempts to offline a device 148 * (i.e. detach a device and set the offline flag so that it cannot be 149 * reattached). A device contract holder is expected to either NACK the offline 150 * (if privileged) or release the device and allow the offline to proceed. 151 * 152 * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 153 * is generated just before the I/O framework transitions the device state 154 * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 155 * 156 * The contract holder is expected to ACK or NACK a negotiation event 157 * within a certain period of time. If the ACK/NACK is not received 158 * within the timeout period, the device contract framework will behave 159 * as if the contract does not exist and will proceed with the event. 160 * 161 * Unlike a process contract a device contract does not need to exist 162 * once it is abandoned, since it does not define a fault boundary. It 163 * merely represents an agreement between a process and the kernel 164 * regarding the state of the device. Once the process has abandoned 165 * the contract (either implicitly via a process exit or explicitly) 166 * the kernel has no reason to retain the contract. As a result 167 * device contracts are neither inheritable nor need to exist in an 168 * orphan state. 169 * 170 * A device unlike a process may exist in multiple contracts and has 171 * a "life" outside a device contract. A device unlike a process 172 * may exist without an associated contract. Unlike a process contract 173 * a device contract may be formed after a binding relationship is 174 * formed between a process and a device. 175 * 176 * IMPLEMENTATION NOTES 177 * ==================== 178 * DATA STRUCTURES 179 * ---------------- 180 * The heart of the device contracts implementation is the device contract 181 * private cont_device_t (or ctd for short) data structure. It encapsulates 182 * the generic contract_t data structure and has a number of private 183 * fields. 184 * These include: 185 * cond_minor: The minor device that is the subject of the contract 186 * cond_aset: The bitset of states which are guaranteed by the 187 * contract 188 * cond_noneg: If set, indicates that the result of negotiation has 189 * been predefined to be a NACK 190 * In addition, there are other device identifiers such the devinfo node, 191 * dev_t and spec_type of the minor node. There are also a few fields that 192 * are used during negotiation to maintain state. See 193 * uts/common/sys/contract/device_impl.h 194 * for details. 195 * The ctd structure represents the device private part of a contract of 196 * type "device" 197 * 198 * Another data structure used by device contracts is ctmpl_device. It is 199 * the device contracts private part of the contract template structure. It 200 * encapsulates the generic template structure "ct_template_t" and includes 201 * the following device contract specific fields 202 * ctd_aset: The bitset of states that should be guaranteed by a 203 * contract 204 * ctd_noneg: If set, indicates that contract should NACK a 205 * negotiation 206 * ctd_minor: The devfs_path (without the /devices prefix) of the 207 * minor node that is the subject of the contract. 208 * 209 * ALGORITHMS 210 * --------- 211 * There are three sets of routines in this file 212 * Template related routines 213 * ------------------------- 214 * These routines provide support for template related operations initated 215 * via the generic template operations. These include routines that dup 216 * a template, free it, and set various terms in the template 217 * (such as the minor node path, the acceptable state set (or A-set) 218 * and the non-negotiable term) as well as a routine to query the 219 * device specific portion of the template for the abovementioned terms. 220 * There is also a routine to create (ctmpl_device_create) that is used to 221 * create a contract from a template. This routine calls (after initial 222 * setup) the common function used to create a device contract 223 * (contract_device_create). 224 * 225 * core device contract implementation 226 * ---------------------------------- 227 * These routines support the generic contract framework to provide 228 * functionality that allows contracts to be created, managed and 229 * destroyed. The contract_device_create() routine is a routine used 230 * to create a contract from a template (either via an explicit create 231 * operation on a template or implicitly via an open with an 232 * activated template.). The contract_device_free() routine assists 233 * in freeing the device contract specific parts. There are routines 234 * used to abandon (contract_device_abandon) a device contract as well 235 * as a routine to destroy (which despite its name does not destroy, 236 * it only moves a contract to a dead state) a contract. 237 * There is also a routine to return status information about a 238 * contract - the level of detail depends on what is requested by the 239 * user. A value of CTD_FIXED only returns fixed length fields such 240 * as the A-set, state of device and value of the "noneg" term. If 241 * CTD_ALL is specified, the minor node path is returned as well. 242 * 243 * In addition there are interfaces (contract_device_ack/nack) which 244 * are used to support negotiation between userland processes and 245 * device contracts. These interfaces record the acknowledgement 246 * or lack thereof for negotiation events and help determine if the 247 * negotiated event should occur. 248 * 249 * "backend routines" 250 * ----------------- 251 * The backend routines form the interface between the I/O framework 252 * and the device contract subsystem. These routines, allow the I/O 253 * framework to call into the device contract subsystem to notify it of 254 * impending changes to a device state as well as to inform of the 255 * final disposition of such attempted state changes. Routines in this 256 * class include contract_device_offline() that indicates an attempt to 257 * offline a device, contract_device_degrade() that indicates that 258 * a device is moving to the degraded state and contract_device_negend() 259 * that is used by the I/O framework to inform the contracts subsystem of 260 * the final disposition of an attempted operation. 261 * 262 * SUMMARY 263 * ------- 264 * A contract starts its life as a template. A process allocates a device 265 * contract template and sets various terms: 266 * The A-set 267 * The device minor node 268 * Critical and informative events 269 * The noneg i.e. no negotition term 270 * Setting of these terms in the template is done via the 271 * ctmpl_device_set() entry point in this file. A process can query a 272 * template to determine the terms already set in the template - this is 273 * facilitated by the ctmpl_device_get() routine. 274 * 275 * Once all the appropriate terms are set, the contract is instantiated via 276 * one of two methods 277 * - via an explicit create operation - this is facilitated by the 278 * ctmpl_device_create() entry point 279 * - synchronously with the open(2) system call - this is achieved via the 280 * contract_device_open() routine. 281 * The core work for both these above functions is done by 282 * contract_device_create() 283 * 284 * A contract once created can be queried for its status. Support for 285 * status info is provided by both the common contracts framework and by 286 * the "device" contract type. If the level of detail requested is 287 * CTD_COMMON, only the common contract framework data is used. Higher 288 * levels of detail result in calls to contract_device_status() to supply 289 * device contract type specific status information. 290 * 291 * A contract once created may be abandoned either explicitly or implictly. 292 * In either case, the contract_device_abandon() function is invoked. This 293 * function merely calls contract_destroy() which moves the contract to 294 * the DEAD state. The device contract portion of destroy processing is 295 * provided by contract_device_destroy() which merely disassociates the 296 * contract from its device devinfo node. A contract in the DEAD state is 297 * not freed. It hanbgs around until all references to the contract are 298 * gone. When that happens, the contract is finally deallocated. The 299 * device contract specific portion of the free is done by 300 * contract_device_free() which finally frees the device contract specific 301 * data structure (cont_device_t). 302 * 303 * When a device undergoes a state change, the I/O framework calls the 304 * corresponding device contract entry point. For example, when a device 305 * is about to go OFFLINE, the routine contract_device_offline() is 306 * invoked. Similarly if a device moves to DEGRADED state, the routine 307 * contract_device_degrade() function is called. These functions call the 308 * core routine contract_device_publish(). This function determines via 309 * the function is_sync_neg() whether an event is a synchronous (i.e. 310 * negotiable) event or not. In the former case contract_device_publish() 311 * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 312 * and/or NACKs from contract holders. In the latter case, it simply 313 * publishes the event and does not wait. In the negotiation case, ACKs or 314 * NACKs from userland consumers results in contract_device_ack_nack() 315 * being called where the result of the negotiation is recorded in the 316 * contract data structure. Once all outstanding contract owners have 317 * responded, the device contract code in wait_for_acks() determines the 318 * final result of the negotiation. A single NACK overrides all other ACKs 319 * If there is no NACK, then a single ACK will result in an overall ACK 320 * result. If there are no ACKs or NACKs, then the result CT_NONE is 321 * returned back to the I/O framework. Once the event is permitted or 322 * blocked, the I/O framework proceeds or aborts the state change. The 323 * I/O framework then calls contract_device_negend() with a result code 324 * indicating final disposition of the event. This call releases the 325 * barrier and other state associated with the previous negotiation, 326 * which permits the next event (if any) to come into the device contract 327 * framework. 328 * 329 * Finally, a device that has outstanding contracts may be removed from 330 * the system which results in its devinfo node being freed. The devinfo 331 * free routine in the I/O framework, calls into the device contract 332 * function - contract_device_remove_dip(). This routine, disassociates 333 * the dip from all contracts associated with the contract being freed, 334 * allowing the devinfo node to be freed. 335 * 336 * LOCKING 337 * --------- 338 * There are four sets of data that need to be protected by locks 339 * 340 * i) device contract specific portion of the contract template - This data 341 * is protected by the template lock ctmpl_lock. 342 * 343 * ii) device contract specific portion of the contract - This data is 344 * protected by the contract lock ct_lock 345 * 346 * iii) The linked list of contracts hanging off a devinfo node - This 347 * list is protected by the per-devinfo node lock devi_ct_lock 348 * 349 * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 350 * and devi_ct_count that controls state changes to a dip 351 * 352 * The template lock is independent in that none of the other locks in this 353 * file may be taken while holding the template lock (and vice versa). 354 * 355 * The remaining three locks have the following lock order 356 * 357 * devi_ct_lock -> ct_count barrier -> ct_lock 358 * 359 */ 360 361 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 362 int spec_type, proc_t *owner, int *errorp); 363 364 /* barrier routines */ 365 static void ct_barrier_acquire(dev_info_t *dip); 366 static void ct_barrier_release(dev_info_t *dip); 367 static int ct_barrier_held(dev_info_t *dip); 368 static int ct_barrier_empty(dev_info_t *dip); 369 static void ct_barrier_wait_for_release(dev_info_t *dip); 370 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 371 static void ct_barrier_decr(dev_info_t *dip); 372 static void ct_barrier_incr(dev_info_t *dip); 373 374 ct_type_t *device_type; 375 376 /* 377 * Macro predicates for determining when events should be sent and how. 378 */ 379 #define EVSENDP(ctd, flag) \ 380 ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 381 382 #define EVINFOP(ctd, flag) \ 383 ((ctd->cond_contract.ct_ev_crit & flag) == 0) 384 385 /* 386 * State transition table showing which transitions are synchronous and which 387 * are not. 388 */ 389 struct ct_dev_negtable { 390 uint_t st_old; 391 uint_t st_new; 392 uint_t st_neg; 393 } ct_dev_negtable[] = { 394 {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 395 {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 396 {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 397 {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 398 {0} 399 }; 400 401 /* 402 * Device contract template implementation 403 */ 404 405 /* 406 * ctmpl_device_dup 407 * 408 * The device contract template dup entry point. 409 * This simply copies all the fields (generic as well as device contract 410 * specific) fields of the original. 411 */ 412 static struct ct_template * 413 ctmpl_device_dup(struct ct_template *template) 414 { 415 ctmpl_device_t *new; 416 ctmpl_device_t *old = template->ctmpl_data; 417 char *buf; 418 char *minor; 419 420 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 421 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 422 423 /* 424 * copy generic fields. 425 * ctmpl_copy returns with old template lock held 426 */ 427 ctmpl_copy(&new->ctd_ctmpl, template); 428 429 new->ctd_ctmpl.ctmpl_data = new; 430 new->ctd_aset = old->ctd_aset; 431 new->ctd_minor = NULL; 432 new->ctd_noneg = old->ctd_noneg; 433 434 if (old->ctd_minor) { 435 ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 436 bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 437 } else { 438 kmem_free(buf, MAXPATHLEN); 439 buf = NULL; 440 } 441 442 mutex_exit(&template->ctmpl_lock); 443 if (buf) { 444 minor = i_ddi_strdup(buf, KM_SLEEP); 445 kmem_free(buf, MAXPATHLEN); 446 buf = NULL; 447 } else { 448 minor = NULL; 449 } 450 mutex_enter(&template->ctmpl_lock); 451 452 if (minor) { 453 new->ctd_minor = minor; 454 } 455 456 ASSERT(buf == NULL); 457 return (&new->ctd_ctmpl); 458 } 459 460 /* 461 * ctmpl_device_free 462 * 463 * The device contract template free entry point. Just 464 * frees the template. 465 */ 466 static void 467 ctmpl_device_free(struct ct_template *template) 468 { 469 ctmpl_device_t *dtmpl = template->ctmpl_data; 470 471 if (dtmpl->ctd_minor) 472 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 473 474 kmem_free(dtmpl, sizeof (ctmpl_device_t)); 475 } 476 477 /* 478 * SAFE_EV is the set of events which a non-privileged process is 479 * allowed to make critical. An unprivileged device contract owner has 480 * no control over when a device changes state, so all device events 481 * can be in the critical set. 482 * 483 * EXCESS tells us if "value", a critical event set, requires 484 * additional privilege. For device contracts EXCESS currently 485 * evaluates to 0. 486 */ 487 #define SAFE_EV (CT_DEV_ALLEVENT) 488 #define EXCESS(value) ((value) & ~SAFE_EV) 489 490 491 /* 492 * ctmpl_device_set 493 * 494 * The device contract template set entry point. Sets various terms in the 495 * template. The non-negotiable term can only be set if the process has 496 * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 497 */ 498 static int 499 ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam, 500 const cred_t *cr) 501 { 502 ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 503 ct_param_t *param = &kparam->param; 504 int error; 505 dev_info_t *dip; 506 int spec_type; 507 uint64_t param_value; 508 char *str_value; 509 510 ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 511 512 if (param->ctpm_id == CTDP_MINOR) { 513 str_value = (char *)kparam->ctpm_kbuf; 514 str_value[param->ctpm_size - 1] = '\0'; 515 } else { 516 if (param->ctpm_size < sizeof (uint64_t)) 517 return (EINVAL); 518 param_value = *(uint64_t *)kparam->ctpm_kbuf; 519 } 520 521 switch (param->ctpm_id) { 522 case CTDP_ACCEPT: 523 if (param_value & ~CT_DEV_ALLEVENT) 524 return (EINVAL); 525 if (param_value == 0) 526 return (EINVAL); 527 if (param_value == CT_DEV_ALLEVENT) 528 return (EINVAL); 529 530 dtmpl->ctd_aset = param_value; 531 break; 532 case CTDP_NONEG: 533 if (param_value != CTDP_NONEG_SET && 534 param_value != CTDP_NONEG_CLEAR) 535 return (EINVAL); 536 537 /* 538 * only privileged processes can designate a contract 539 * non-negotiatble. 540 */ 541 if (param_value == CTDP_NONEG_SET && 542 (error = secpolicy_sys_devices(cr)) != 0) { 543 return (error); 544 } 545 546 dtmpl->ctd_noneg = param_value; 547 break; 548 549 case CTDP_MINOR: 550 if (*str_value != '/' || 551 strncmp(str_value, "/devices/", 552 strlen("/devices/")) == 0 || 553 strstr(str_value, "../devices/") != NULL || 554 strchr(str_value, ':') == NULL) { 555 return (EINVAL); 556 } 557 558 spec_type = 0; 559 dip = NULL; 560 if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 561 return (ERANGE); 562 } 563 ddi_release_devi(dip); 564 565 if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 566 return (EINVAL); 567 } 568 569 if (dtmpl->ctd_minor != NULL) { 570 kmem_free(dtmpl->ctd_minor, 571 strlen(dtmpl->ctd_minor) + 1); 572 } 573 dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 574 break; 575 case CTP_EV_CRITICAL: 576 /* 577 * Currently for device contracts, any event 578 * may be added to the critical set. We retain the 579 * following code however for future enhancements. 580 */ 581 if (EXCESS(param_value) && 582 (error = secpolicy_contract_event(cr)) != 0) 583 return (error); 584 tmpl->ctmpl_ev_crit = param_value; 585 break; 586 default: 587 return (EINVAL); 588 } 589 590 return (0); 591 } 592 593 /* 594 * ctmpl_device_get 595 * 596 * The device contract template get entry point. Simply fetches and 597 * returns the value of the requested term. 598 */ 599 static int 600 ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam) 601 { 602 ctmpl_device_t *dtmpl = template->ctmpl_data; 603 ct_param_t *param = &kparam->param; 604 uint64_t *param_value = kparam->ctpm_kbuf; 605 606 ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 607 608 if (param->ctpm_id == CTDP_ACCEPT || 609 param->ctpm_id == CTDP_NONEG) { 610 if (param->ctpm_size < sizeof (uint64_t)) 611 return (EINVAL); 612 kparam->ret_size = sizeof (uint64_t); 613 } 614 615 switch (param->ctpm_id) { 616 case CTDP_ACCEPT: 617 *param_value = dtmpl->ctd_aset; 618 break; 619 case CTDP_NONEG: 620 *param_value = dtmpl->ctd_noneg; 621 break; 622 case CTDP_MINOR: 623 if (dtmpl->ctd_minor) { 624 kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf, 625 dtmpl->ctd_minor, param->ctpm_size); 626 kparam->ret_size++; 627 } else { 628 return (ENOENT); 629 } 630 break; 631 default: 632 return (EINVAL); 633 } 634 635 return (0); 636 } 637 638 /* 639 * Device contract type specific portion of creating a contract using 640 * a specified template 641 */ 642 /*ARGSUSED*/ 643 int 644 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 645 { 646 ctmpl_device_t *dtmpl; 647 char *buf; 648 dev_t dev; 649 int spec_type; 650 int error; 651 cont_device_t *ctd; 652 653 if (ctidp == NULL) 654 return (EINVAL); 655 656 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 657 658 dtmpl = template->ctmpl_data; 659 660 mutex_enter(&template->ctmpl_lock); 661 if (dtmpl->ctd_minor == NULL) { 662 /* incomplete template */ 663 mutex_exit(&template->ctmpl_lock); 664 kmem_free(buf, MAXPATHLEN); 665 return (EINVAL); 666 } else { 667 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 668 bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 669 } 670 mutex_exit(&template->ctmpl_lock); 671 672 spec_type = 0; 673 dev = NODEV; 674 if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 675 dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 676 (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 677 CT_DEBUG((CE_WARN, 678 "tmpl_create: failed to find device: %s", buf)); 679 kmem_free(buf, MAXPATHLEN); 680 return (ERANGE); 681 } 682 kmem_free(buf, MAXPATHLEN); 683 684 ctd = contract_device_create(template->ctmpl_data, 685 dev, spec_type, curproc, &error); 686 687 if (ctd == NULL) { 688 CT_DEBUG((CE_WARN, "Failed to create device contract for " 689 "process (%d) with device (devt = %lu, spec_type = %s)", 690 curproc->p_pid, dev, 691 spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 692 return (error); 693 } 694 695 mutex_enter(&ctd->cond_contract.ct_lock); 696 *ctidp = ctd->cond_contract.ct_id; 697 mutex_exit(&ctd->cond_contract.ct_lock); 698 699 return (0); 700 } 701 702 /* 703 * Device contract specific template entry points 704 */ 705 static ctmplops_t ctmpl_device_ops = { 706 ctmpl_device_dup, /* ctop_dup */ 707 ctmpl_device_free, /* ctop_free */ 708 ctmpl_device_set, /* ctop_set */ 709 ctmpl_device_get, /* ctop_get */ 710 ctmpl_device_create, /* ctop_create */ 711 CT_DEV_ALLEVENT /* all device events bitmask */ 712 }; 713 714 715 /* 716 * Device contract implementation 717 */ 718 719 /* 720 * contract_device_default 721 * 722 * The device contract default template entry point. Creates a 723 * device contract template with a default A-set and no "noneg" , 724 * with informative degrade events and critical offline events. 725 * There is no default minor path. 726 */ 727 static ct_template_t * 728 contract_device_default(void) 729 { 730 ctmpl_device_t *new; 731 732 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 733 ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 734 735 new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 736 new->ctd_noneg = 0; 737 new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 738 new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 739 740 return (&new->ctd_ctmpl); 741 } 742 743 /* 744 * contract_device_free 745 * 746 * Destroys the device contract specific portion of a contract and 747 * frees the contract. 748 */ 749 static void 750 contract_device_free(contract_t *ct) 751 { 752 cont_device_t *ctd = ct->ct_data; 753 754 ASSERT(ctd->cond_minor); 755 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 756 kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 757 758 ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 759 ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 760 761 ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 762 763 ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 764 ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 765 766 ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 767 ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 768 769 ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 770 ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 771 772 ASSERT(!list_link_active(&ctd->cond_next)); 773 774 kmem_free(ctd, sizeof (cont_device_t)); 775 } 776 777 /* 778 * contract_device_abandon 779 * 780 * The device contract abandon entry point. 781 */ 782 static void 783 contract_device_abandon(contract_t *ct) 784 { 785 ASSERT(MUTEX_HELD(&ct->ct_lock)); 786 787 /* 788 * device contracts cannot be inherited or orphaned. 789 * Move the contract to the DEAD_STATE. It will be freed 790 * once all references to it are gone. 791 */ 792 contract_destroy(ct); 793 } 794 795 /* 796 * contract_device_destroy 797 * 798 * The device contract destroy entry point. 799 * Called from contract_destroy() to do any type specific destroy. Note 800 * that destroy is a misnomer - this does not free the contract, it only 801 * moves it to the dead state. A contract is actually freed via 802 * contract_rele() -> contract_dtor(), contop_free() 803 */ 804 static void 805 contract_device_destroy(contract_t *ct) 806 { 807 cont_device_t *ctd = ct->ct_data; 808 dev_info_t *dip = ctd->cond_dip; 809 810 ASSERT(MUTEX_HELD(&ct->ct_lock)); 811 812 if (dip == NULL) { 813 /* 814 * The dip has been removed, this is a dangling contract 815 * Check that dip linkages are NULL 816 */ 817 ASSERT(!list_link_active(&ctd->cond_next)); 818 CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 819 "devinfo node. contract ctid : %d", ct->ct_id)); 820 return; 821 } 822 823 /* 824 * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 825 */ 826 mutex_exit(&ct->ct_lock); 827 828 /* 829 * Waiting for the barrier to be released is strictly speaking not 830 * necessary. But it simplifies the implementation of 831 * contract_device_publish() by establishing the invariant that 832 * device contracts cannot go away during negotiation. 833 */ 834 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 835 ct_barrier_wait_for_release(dip); 836 mutex_enter(&ct->ct_lock); 837 838 list_remove(&(DEVI(dip)->devi_ct), ctd); 839 ctd->cond_dip = NULL; /* no longer linked to dip */ 840 contract_rele(ct); /* remove hold for dip linkage */ 841 842 mutex_exit(&ct->ct_lock); 843 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 844 mutex_enter(&ct->ct_lock); 845 } 846 847 /* 848 * contract_device_status 849 * 850 * The device contract status entry point. Called when level of "detail" 851 * is either CTD_FIXED or CTD_ALL 852 * 853 */ 854 static void 855 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 856 void *status, model_t model) 857 { 858 cont_device_t *ctd = ct->ct_data; 859 860 ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 861 862 mutex_enter(&ct->ct_lock); 863 contract_status_common(ct, zone, status, model); 864 865 /* 866 * There's no need to hold the contract lock while accessing static 867 * data like aset or noneg. But since we need the lock to access other 868 * data like state, we hold it anyway. 869 */ 870 VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 871 VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 872 VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 873 874 if (detail == CTD_FIXED) { 875 mutex_exit(&ct->ct_lock); 876 return; 877 } 878 879 ASSERT(ctd->cond_minor); 880 VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 881 882 mutex_exit(&ct->ct_lock); 883 } 884 885 /* 886 * Converts a result integer into the corresponding string. Used for printing 887 * messages 888 */ 889 static char * 890 result_str(uint_t result) 891 { 892 switch (result) { 893 case CT_ACK: 894 return ("CT_ACK"); 895 case CT_NACK: 896 return ("CT_NACK"); 897 case CT_NONE: 898 return ("CT_NONE"); 899 default: 900 return ("UNKNOWN"); 901 } 902 } 903 904 /* 905 * Converts a device state integer constant into the corresponding string. 906 * Used to print messages. 907 */ 908 static char * 909 state_str(uint_t state) 910 { 911 switch (state) { 912 case CT_DEV_EV_ONLINE: 913 return ("ONLINE"); 914 case CT_DEV_EV_DEGRADED: 915 return ("DEGRADED"); 916 case CT_DEV_EV_OFFLINE: 917 return ("OFFLINE"); 918 default: 919 return ("UNKNOWN"); 920 } 921 } 922 923 /* 924 * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 925 * synchronous state change or not. 926 */ 927 static int 928 is_sync_neg(uint_t old, uint_t new) 929 { 930 int i; 931 932 ASSERT(old & CT_DEV_ALLEVENT); 933 ASSERT(new & CT_DEV_ALLEVENT); 934 935 if (old == new) { 936 CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 937 state_str(new))); 938 return (-2); 939 } 940 941 for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 942 if (old == ct_dev_negtable[i].st_old && 943 new == ct_dev_negtable[i].st_new) { 944 return (ct_dev_negtable[i].st_neg); 945 } 946 } 947 948 CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 949 "old = %s -> new = %s", state_str(old), state_str(new))); 950 951 return (-1); 952 } 953 954 /* 955 * Used to cleanup cached dv_nodes so that when a device is released by 956 * a contract holder, its devinfo node can be successfully detached. 957 */ 958 static int 959 contract_device_dvclean(dev_info_t *dip) 960 { 961 char *devnm; 962 dev_info_t *pdip; 963 int error; 964 965 ASSERT(dip); 966 967 /* pdip can be NULL if we have contracts against the root dip */ 968 pdip = ddi_get_parent(dip); 969 970 if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 971 char *path; 972 973 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 974 (void) ddi_pathname(dip, path); 975 CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 976 "device=%s", path)); 977 kmem_free(path, MAXPATHLEN); 978 return (EDEADLOCK); 979 } 980 981 if (pdip) { 982 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 983 (void) ddi_deviname(dip, devnm); 984 error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 985 kmem_free(devnm, MAXNAMELEN + 1); 986 } else { 987 error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 988 } 989 990 return (error); 991 } 992 993 /* 994 * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 995 * Results in the ACK or NACK being recorded on the dip for one particular 996 * contract. The device contracts framework evaluates the ACK/NACKs for all 997 * contracts against a device to determine if a particular device state change 998 * should be allowed. 999 */ 1000 static int 1001 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 1002 uint_t cmd) 1003 { 1004 cont_device_t *ctd = ct->ct_data; 1005 dev_info_t *dip; 1006 ctid_t ctid; 1007 int error; 1008 1009 ctid = ct->ct_id; 1010 1011 CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 1012 1013 mutex_enter(&ct->ct_lock); 1014 CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 1015 1016 dip = ctd->cond_dip; 1017 1018 ASSERT(ctd->cond_minor); 1019 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 1020 1021 /* 1022 * Negotiation only if new state is not in A-set 1023 */ 1024 ASSERT(!(ctd->cond_aset & evtype)); 1025 1026 /* 1027 * Negotiation only if transition is synchronous 1028 */ 1029 ASSERT(is_sync_neg(ctd->cond_state, evtype)); 1030 1031 /* 1032 * We shouldn't be negotiating if the "noneg" flag is set 1033 */ 1034 ASSERT(!ctd->cond_noneg); 1035 1036 if (dip) 1037 ndi_hold_devi(dip); 1038 1039 mutex_exit(&ct->ct_lock); 1040 1041 /* 1042 * dv_clean only if !NACK and offline state change 1043 */ 1044 if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 1045 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 1046 error = contract_device_dvclean(dip); 1047 if (error != 0) { 1048 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 1049 ctid)); 1050 ddi_release_devi(dip); 1051 } 1052 } 1053 1054 mutex_enter(&ct->ct_lock); 1055 1056 if (dip) 1057 ddi_release_devi(dip); 1058 1059 if (dip == NULL) { 1060 if (ctd->cond_currev_id != evid) { 1061 CT_DEBUG((CE_WARN, "%sACK for non-current event " 1062 "(type=%s, id=%llu) on removed device", 1063 cmd == CT_NACK ? "N" : "", 1064 state_str(evtype), (unsigned long long)evid)); 1065 CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 1066 ctid)); 1067 } else { 1068 ASSERT(ctd->cond_currev_type == evtype); 1069 CT_DEBUG((CE_WARN, "contract_ack: no such device: " 1070 "ctid: %d", ctid)); 1071 } 1072 error = (ct->ct_state == CTS_DEAD) ? ESRCH : 1073 ((cmd == CT_NACK) ? ETIMEDOUT : 0); 1074 mutex_exit(&ct->ct_lock); 1075 return (error); 1076 } 1077 1078 /* 1079 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 1080 */ 1081 mutex_exit(&ct->ct_lock); 1082 1083 mutex_enter(&DEVI(dip)->devi_ct_lock); 1084 mutex_enter(&ct->ct_lock); 1085 if (ctd->cond_currev_id != evid) { 1086 char *buf; 1087 mutex_exit(&ct->ct_lock); 1088 mutex_exit(&DEVI(dip)->devi_ct_lock); 1089 ndi_hold_devi(dip); 1090 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1091 (void) ddi_pathname(dip, buf); 1092 ddi_release_devi(dip); 1093 CT_DEBUG((CE_WARN, "%sACK for non-current event" 1094 "(type=%s, id=%llu) on device %s", 1095 cmd == CT_NACK ? "N" : "", 1096 state_str(evtype), (unsigned long long)evid, buf)); 1097 kmem_free(buf, MAXPATHLEN); 1098 CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 1099 cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 1100 return (cmd == CT_ACK ? 0 : ETIMEDOUT); 1101 } 1102 1103 ASSERT(ctd->cond_currev_type == evtype); 1104 ASSERT(cmd == CT_ACK || cmd == CT_NACK); 1105 1106 CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 1107 cmd == CT_NACK ? "N" : "", ctid)); 1108 1109 ctd->cond_currev_ack = cmd; 1110 mutex_exit(&ct->ct_lock); 1111 1112 ct_barrier_decr(dip); 1113 mutex_exit(&DEVI(dip)->devi_ct_lock); 1114 1115 CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 1116 1117 return (0); 1118 } 1119 1120 /* 1121 * Invoked when a userland contract holder approves (i.e. ACKs) a state change 1122 */ 1123 static int 1124 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 1125 { 1126 return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 1127 } 1128 1129 /* 1130 * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 1131 */ 1132 static int 1133 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 1134 { 1135 return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 1136 } 1137 1138 /* 1139 * Creates a new contract synchronously with the breaking of an existing 1140 * contract. Currently not supported. 1141 */ 1142 /*ARGSUSED*/ 1143 static int 1144 contract_device_newct(contract_t *ct) 1145 { 1146 return (ENOTSUP); 1147 } 1148 1149 /* 1150 * Core device contract implementation entry points 1151 */ 1152 static contops_t contract_device_ops = { 1153 contract_device_free, /* contop_free */ 1154 contract_device_abandon, /* contop_abandon */ 1155 contract_device_destroy, /* contop_destroy */ 1156 contract_device_status, /* contop_status */ 1157 contract_device_ack, /* contop_ack */ 1158 contract_device_nack, /* contop_nack */ 1159 contract_qack_notsup, /* contop_qack */ 1160 contract_device_newct /* contop_newct */ 1161 }; 1162 1163 /* 1164 * contract_device_init 1165 * 1166 * Initializes the device contract type. 1167 */ 1168 void 1169 contract_device_init(void) 1170 { 1171 device_type = contract_type_init(CTT_DEVICE, "device", 1172 &contract_device_ops, contract_device_default); 1173 } 1174 1175 /* 1176 * contract_device_create 1177 * 1178 * create a device contract given template "tmpl" and the "owner" process. 1179 * May fail and return NULL if project.max-contracts would have been exceeded. 1180 * 1181 * Common device contract creation routine called for both open-time and 1182 * non-open time device contract creation 1183 */ 1184 static cont_device_t * 1185 contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 1186 proc_t *owner, int *errorp) 1187 { 1188 cont_device_t *ctd; 1189 char *minor; 1190 char *path; 1191 dev_info_t *dip; 1192 1193 ASSERT(dtmpl != NULL); 1194 ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 1195 ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 1196 ASSERT(errorp); 1197 1198 *errorp = 0; 1199 1200 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1201 1202 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1203 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 1204 bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 1205 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1206 1207 dip = e_ddi_hold_devi_by_path(path, 0); 1208 if (dip == NULL) { 1209 cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 1210 "for device path (%s)", path); 1211 kmem_free(path, MAXPATHLEN); 1212 *errorp = ERANGE; 1213 return (NULL); 1214 } 1215 1216 /* 1217 * Lock out any parallel contract negotiations 1218 */ 1219 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1220 ct_barrier_acquire(dip); 1221 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1222 1223 minor = i_ddi_strdup(path, KM_SLEEP); 1224 kmem_free(path, MAXPATHLEN); 1225 1226 (void) contract_type_pbundle(device_type, owner); 1227 1228 ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 1229 1230 /* 1231 * Only we hold a refernce to this contract. Safe to access 1232 * the fields without a ct_lock 1233 */ 1234 ctd->cond_minor = minor; 1235 /* 1236 * It is safe to set the dip pointer in the contract 1237 * as the contract will always be destroyed before the dip 1238 * is released 1239 */ 1240 ctd->cond_dip = dip; 1241 ctd->cond_devt = dev; 1242 ctd->cond_spec = spec_type; 1243 1244 /* 1245 * Since we are able to lookup the device, it is either 1246 * online or degraded 1247 */ 1248 ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 1249 CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 1250 1251 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1252 ctd->cond_aset = dtmpl->ctd_aset; 1253 ctd->cond_noneg = dtmpl->ctd_noneg; 1254 1255 /* 1256 * contract_ctor() initailizes the common portion of a contract 1257 * contract_dtor() destroys the common portion of a contract 1258 */ 1259 if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 1260 ctd, 0, owner, B_TRUE)) { 1261 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1262 /* 1263 * contract_device_free() destroys the type specific 1264 * portion of a contract and frees the contract. 1265 * The "minor" path and "cred" is a part of the type specific 1266 * portion of the contract and will be freed by 1267 * contract_device_free() 1268 */ 1269 contract_device_free(&ctd->cond_contract); 1270 1271 /* release barrier */ 1272 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1273 ct_barrier_release(dip); 1274 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1275 1276 ddi_release_devi(dip); 1277 *errorp = EAGAIN; 1278 return (NULL); 1279 } 1280 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1281 1282 mutex_enter(&ctd->cond_contract.ct_lock); 1283 ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 1284 ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 1285 ctd->cond_contract.ct_ntime.ctm_start = -1; 1286 ctd->cond_contract.ct_qtime.ctm_start = -1; 1287 mutex_exit(&ctd->cond_contract.ct_lock); 1288 1289 /* 1290 * Insert device contract into list hanging off the dip 1291 * Bump up the ref-count on the contract to reflect this 1292 */ 1293 contract_hold(&ctd->cond_contract); 1294 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1295 list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 1296 1297 /* release barrier */ 1298 ct_barrier_release(dip); 1299 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1300 1301 ddi_release_devi(dip); 1302 1303 return (ctd); 1304 } 1305 1306 /* 1307 * Called when a device is successfully opened to create an open-time contract 1308 * i.e. synchronously with a device open. 1309 */ 1310 int 1311 contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 1312 { 1313 ctmpl_device_t *dtmpl; 1314 ct_template_t *tmpl; 1315 cont_device_t *ctd; 1316 char *path; 1317 klwp_t *lwp; 1318 int error; 1319 1320 if (ctpp) 1321 *ctpp = NULL; 1322 1323 /* 1324 * Check if we are in user-context i.e. if we have an lwp 1325 */ 1326 lwp = ttolwp(curthread); 1327 if (lwp == NULL) { 1328 CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 1329 return (0); 1330 } 1331 1332 tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 1333 if (tmpl == NULL) { 1334 return (0); 1335 } 1336 dtmpl = tmpl->ctmpl_data; 1337 1338 /* 1339 * If the user set a minor path in the template before an open, 1340 * ignore it. We use the minor path of the actual minor opened. 1341 */ 1342 mutex_enter(&tmpl->ctmpl_lock); 1343 if (dtmpl->ctd_minor != NULL) { 1344 CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 1345 "ignoring device minor path in active template: %s", 1346 curproc->p_pid, dtmpl->ctd_minor)); 1347 /* 1348 * This is a copy of the actual activated template. 1349 * Safe to make changes such as freeing the minor 1350 * path in the template. 1351 */ 1352 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 1353 dtmpl->ctd_minor = NULL; 1354 } 1355 mutex_exit(&tmpl->ctmpl_lock); 1356 1357 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1358 1359 if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 1360 CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 1361 "minor path from dev_t,spec {%lu, %d} for process (%d)", 1362 dev, spec_type, curproc->p_pid)); 1363 ctmpl_free(tmpl); 1364 kmem_free(path, MAXPATHLEN); 1365 return (1); 1366 } 1367 1368 mutex_enter(&tmpl->ctmpl_lock); 1369 ASSERT(dtmpl->ctd_minor == NULL); 1370 dtmpl->ctd_minor = path; 1371 mutex_exit(&tmpl->ctmpl_lock); 1372 1373 ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 1374 1375 mutex_enter(&tmpl->ctmpl_lock); 1376 ASSERT(dtmpl->ctd_minor); 1377 dtmpl->ctd_minor = NULL; 1378 mutex_exit(&tmpl->ctmpl_lock); 1379 ctmpl_free(tmpl); 1380 kmem_free(path, MAXPATHLEN); 1381 1382 if (ctd == NULL) { 1383 cmn_err(CE_NOTE, "contract_device_open(): Failed to " 1384 "create device contract for process (%d) holding " 1385 "device (devt = %lu, spec_type = %d)", 1386 curproc->p_pid, dev, spec_type); 1387 return (1); 1388 } 1389 1390 if (ctpp) { 1391 mutex_enter(&ctd->cond_contract.ct_lock); 1392 *ctpp = &ctd->cond_contract; 1393 mutex_exit(&ctd->cond_contract.ct_lock); 1394 } 1395 return (0); 1396 } 1397 1398 /* 1399 * Called during contract negotiation by the device contract framework to wait 1400 * for ACKs or NACKs from contract holders. If all responses are not received 1401 * before a specified timeout, this routine times out. 1402 */ 1403 static uint_t 1404 wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 1405 { 1406 cont_device_t *ctd; 1407 int timed_out = 0; 1408 int result = CT_NONE; 1409 int ack; 1410 char *f = "wait_for_acks"; 1411 1412 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 1413 ASSERT(dip); 1414 ASSERT(evtype & CT_DEV_ALLEVENT); 1415 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1416 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1417 (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1418 1419 CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 1420 1421 if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 1422 /* 1423 * some contract owner(s) didn't respond in time 1424 */ 1425 CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 1426 timed_out = 1; 1427 } 1428 1429 ack = 0; 1430 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1431 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1432 1433 mutex_enter(&ctd->cond_contract.ct_lock); 1434 1435 ASSERT(ctd->cond_dip == dip); 1436 1437 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1438 mutex_exit(&ctd->cond_contract.ct_lock); 1439 continue; 1440 } 1441 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1442 mutex_exit(&ctd->cond_contract.ct_lock); 1443 continue; 1444 } 1445 1446 /* skip if non-negotiable contract */ 1447 if (ctd->cond_noneg) { 1448 mutex_exit(&ctd->cond_contract.ct_lock); 1449 continue; 1450 } 1451 1452 ASSERT(ctd->cond_currev_type == evtype); 1453 if (ctd->cond_currev_ack == CT_NACK) { 1454 CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 1455 f, (void *)dip)); 1456 mutex_exit(&ctd->cond_contract.ct_lock); 1457 return (CT_NACK); 1458 } else if (ctd->cond_currev_ack == CT_ACK) { 1459 ack = 1; 1460 CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 1461 f, (void *)dip)); 1462 } 1463 mutex_exit(&ctd->cond_contract.ct_lock); 1464 } 1465 1466 if (ack) { 1467 result = CT_ACK; 1468 CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 1469 } else if (timed_out) { 1470 result = CT_NONE; 1471 CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 1472 f, (void *)dip)); 1473 } else { 1474 CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 1475 f, (void *)dip)); 1476 } 1477 1478 1479 return (result); 1480 } 1481 1482 /* 1483 * Determines the current state of a device (i.e a devinfo node 1484 */ 1485 static int 1486 get_state(dev_info_t *dip) 1487 { 1488 if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 1489 return (CT_DEV_EV_OFFLINE); 1490 else if (DEVI_IS_DEVICE_DEGRADED(dip)) 1491 return (CT_DEV_EV_DEGRADED); 1492 else 1493 return (CT_DEV_EV_ONLINE); 1494 } 1495 1496 /* 1497 * Sets the current state of a device in a device contract 1498 */ 1499 static void 1500 set_cond_state(dev_info_t *dip) 1501 { 1502 uint_t state = get_state(dip); 1503 cont_device_t *ctd; 1504 1505 /* verify that barrier is held */ 1506 ASSERT(ct_barrier_held(dip)); 1507 1508 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1509 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1510 mutex_enter(&ctd->cond_contract.ct_lock); 1511 ASSERT(ctd->cond_dip == dip); 1512 ctd->cond_state = state; 1513 mutex_exit(&ctd->cond_contract.ct_lock); 1514 } 1515 } 1516 1517 /* 1518 * Core routine called by event-specific routines when an event occurs. 1519 * Determines if an event should be be published, and if it is to be 1520 * published, whether a negotiation should take place. Also implements 1521 * NEGEND events which publish the final disposition of an event after 1522 * negotiations are complete. 1523 * 1524 * When an event occurs on a minor node, this routine walks the list of 1525 * contracts hanging off a devinfo node and for each contract on the affected 1526 * dip, evaluates the following cases 1527 * 1528 * a. an event that is synchronous, breaks the contract and NONEG not set 1529 * - bumps up the outstanding negotiation counts on the dip 1530 * - marks the dip as undergoing negotiation (devi_ct_neg) 1531 * - event of type CTE_NEG is published 1532 * b. an event that is synchronous, breaks the contract and NONEG is set 1533 * - sets the final result to CT_NACK, event is blocked 1534 * - does not publish an event 1535 * c. event is asynchronous and breaks the contract 1536 * - publishes a critical event irrespect of whether the NONEG 1537 * flag is set, since the contract will be broken and contract 1538 * owner needs to be informed. 1539 * d. No contract breakage but the owner has subscribed to the event 1540 * - publishes the event irrespective of the NONEG event as the 1541 * owner has explicitly subscribed to the event. 1542 * e. NEGEND event 1543 * - publishes a critical event. Should only be doing this if 1544 * if NONEG is not set. 1545 * f. all other events 1546 * - Since a contract is not broken and this event has not been 1547 * subscribed to, this event does not need to be published for 1548 * for this contract. 1549 * 1550 * Once an event is published, what happens next depends on the type of 1551 * event: 1552 * 1553 * a. NEGEND event 1554 * - cleanup all state associated with the preceding negotiation 1555 * and return CT_ACK to the caller of contract_device_publish() 1556 * b. NACKed event 1557 * - One or more contracts had the NONEG term, so the event was 1558 * blocked. Return CT_NACK to the caller. 1559 * c. Negotiated event 1560 * - Call wait_for_acks() to wait for responses from contract 1561 * holders. The end result is either CT_ACK (event is permitted), 1562 * CT_NACK (event is blocked) or CT_NONE (no contract owner) 1563 * responded. This result is returned back to the caller. 1564 * d. All other events 1565 * - If the event was asynchronous (i.e. not negotiated) or 1566 * a contract was not broken return CT_ACK to the caller. 1567 */ 1568 static uint_t 1569 contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 1570 uint_t evtype, nvlist_t *tnvl) 1571 { 1572 cont_device_t *ctd; 1573 uint_t result = CT_NONE; 1574 uint64_t evid = 0; 1575 uint64_t nevid = 0; 1576 char *path = NULL; 1577 int negend; 1578 int match; 1579 int sync = 0; 1580 contract_t *ct; 1581 ct_kevent_t *event; 1582 nvlist_t *nvl; 1583 int broken = 0; 1584 1585 ASSERT(dip); 1586 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1587 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1588 (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1589 ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 1590 1591 /* Is this a synchronous state change ? */ 1592 if (evtype != CT_EV_NEGEND) { 1593 sync = is_sync_neg(get_state(dip), evtype); 1594 /* NOP if unsupported transition */ 1595 if (sync == -2 || sync == -1) { 1596 DEVI(dip)->devi_flags |= DEVI_CT_NOP; 1597 result = (sync == -2) ? CT_ACK : CT_NONE; 1598 goto out; 1599 } 1600 CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 1601 sync ? "" : " not")); 1602 } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 1603 DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 1604 result = CT_ACK; 1605 goto out; 1606 } 1607 1608 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1609 (void) ddi_pathname(dip, path); 1610 1611 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1612 1613 /* 1614 * Negotiation end - set the state of the device in the contract 1615 */ 1616 if (evtype == CT_EV_NEGEND) { 1617 CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 1618 set_cond_state(dip); 1619 } 1620 1621 /* 1622 * If this device didn't go through negotiation, don't publish 1623 * a NEGEND event - simply release the barrier to allow other 1624 * device events in. 1625 */ 1626 negend = 0; 1627 if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 1628 CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 1629 ct_barrier_release(dip); 1630 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1631 result = CT_ACK; 1632 goto out; 1633 } else if (evtype == CT_EV_NEGEND) { 1634 /* 1635 * There are negotiated contract breakages that 1636 * need a NEGEND event 1637 */ 1638 ASSERT(ct_barrier_held(dip)); 1639 negend = 1; 1640 CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 1641 } else { 1642 /* 1643 * This is a new event, not a NEGEND event. Wait for previous 1644 * contract events to complete. 1645 */ 1646 ct_barrier_acquire(dip); 1647 } 1648 1649 1650 match = 0; 1651 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1652 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1653 1654 ctid_t ctid; 1655 size_t len = strlen(path); 1656 1657 mutex_enter(&ctd->cond_contract.ct_lock); 1658 1659 ASSERT(ctd->cond_dip == dip); 1660 ASSERT(ctd->cond_minor); 1661 ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 1662 ctd->cond_minor[len] == ':'); 1663 1664 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1665 mutex_exit(&ctd->cond_contract.ct_lock); 1666 continue; 1667 } 1668 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1669 mutex_exit(&ctd->cond_contract.ct_lock); 1670 continue; 1671 } 1672 1673 /* We have a matching contract */ 1674 match = 1; 1675 ctid = ctd->cond_contract.ct_id; 1676 CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 1677 ctid)); 1678 1679 /* 1680 * There are 4 possible cases 1681 * 1. A contract is broken (dev not in acceptable state) and 1682 * the state change is synchronous - start negotiation 1683 * by sending a CTE_NEG critical event. 1684 * 2. A contract is broken and the state change is 1685 * asynchronous - just send a critical event and 1686 * break the contract. 1687 * 3. Contract is not broken, but consumer has subscribed 1688 * to the event as a critical or informative event 1689 * - just send the appropriate event 1690 * 4. contract waiting for negend event - just send the critical 1691 * NEGEND event. 1692 */ 1693 broken = 0; 1694 if (!negend && !(evtype & ctd->cond_aset)) { 1695 broken = 1; 1696 CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 1697 ctid)); 1698 } 1699 1700 /* 1701 * Don't send event if 1702 * - contract is not broken AND 1703 * - contract holder has not subscribed to this event AND 1704 * - contract not waiting for a NEGEND event 1705 */ 1706 if (!broken && !EVSENDP(ctd, evtype) && 1707 !ctd->cond_neg) { 1708 CT_DEBUG((CE_NOTE, "contract_device_publish(): " 1709 "contract (%d): no publish reqd: event %d", 1710 ctd->cond_contract.ct_id, evtype)); 1711 mutex_exit(&ctd->cond_contract.ct_lock); 1712 continue; 1713 } 1714 1715 /* 1716 * Note: need to kmem_zalloc() the event so mutexes are 1717 * initialized automatically 1718 */ 1719 ct = &ctd->cond_contract; 1720 event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 1721 event->cte_type = evtype; 1722 1723 if (broken && sync) { 1724 CT_DEBUG((CE_NOTE, "publish: broken + sync: " 1725 "ctid: %d", ctid)); 1726 ASSERT(!negend); 1727 ASSERT(ctd->cond_currev_id == 0); 1728 ASSERT(ctd->cond_currev_type == 0); 1729 ASSERT(ctd->cond_currev_ack == 0); 1730 ASSERT(ctd->cond_neg == 0); 1731 if (ctd->cond_noneg) { 1732 /* Nothing to publish. Event has been blocked */ 1733 CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 1734 "not publishing blocked ev: ctid: %d", 1735 ctid)); 1736 result = CT_NACK; 1737 kmem_free(event, sizeof (ct_kevent_t)); 1738 mutex_exit(&ctd->cond_contract.ct_lock); 1739 continue; 1740 } 1741 event->cte_flags = CTE_NEG; /* critical neg. event */ 1742 ctd->cond_currev_type = event->cte_type; 1743 ct_barrier_incr(dip); 1744 DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 1745 ctd->cond_neg = 1; 1746 } else if (broken && !sync) { 1747 CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 1748 ctid)); 1749 ASSERT(!negend); 1750 ASSERT(ctd->cond_currev_id == 0); 1751 ASSERT(ctd->cond_currev_type == 0); 1752 ASSERT(ctd->cond_currev_ack == 0); 1753 ASSERT(ctd->cond_neg == 0); 1754 event->cte_flags = 0; /* critical event */ 1755 } else if (EVSENDP(ctd, event->cte_type)) { 1756 CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 1757 ctid)); 1758 ASSERT(!negend); 1759 ASSERT(ctd->cond_currev_id == 0); 1760 ASSERT(ctd->cond_currev_type == 0); 1761 ASSERT(ctd->cond_currev_ack == 0); 1762 ASSERT(ctd->cond_neg == 0); 1763 event->cte_flags = EVINFOP(ctd, event->cte_type) ? 1764 CTE_INFO : 0; 1765 } else if (ctd->cond_neg) { 1766 CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 1767 ASSERT(negend); 1768 ASSERT(ctd->cond_noneg == 0); 1769 nevid = ctd->cond_contract.ct_nevent ? 1770 ctd->cond_contract.ct_nevent->cte_id : 0; 1771 ASSERT(ctd->cond_currev_id == nevid); 1772 event->cte_flags = 0; /* NEGEND is always critical */ 1773 ctd->cond_currev_id = 0; 1774 ctd->cond_currev_type = 0; 1775 ctd->cond_currev_ack = 0; 1776 ctd->cond_neg = 0; 1777 } else { 1778 CT_DEBUG((CE_NOTE, "publish: not publishing event for " 1779 "ctid: %d, evtype: %d", 1780 ctd->cond_contract.ct_id, event->cte_type)); 1781 ASSERT(!negend); 1782 ASSERT(ctd->cond_currev_id == 0); 1783 ASSERT(ctd->cond_currev_type == 0); 1784 ASSERT(ctd->cond_currev_ack == 0); 1785 ASSERT(ctd->cond_neg == 0); 1786 kmem_free(event, sizeof (ct_kevent_t)); 1787 mutex_exit(&ctd->cond_contract.ct_lock); 1788 continue; 1789 } 1790 1791 nvl = NULL; 1792 if (tnvl) { 1793 VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 1794 if (negend) { 1795 int32_t newct = 0; 1796 ASSERT(ctd->cond_noneg == 0); 1797 VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 1798 == 0); 1799 VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 1800 &newct) == 0); 1801 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 1802 newct == 1 ? 0 : 1803 ctd->cond_contract.ct_id) == 0); 1804 CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 1805 "CTS_NEVID: %llu, CTS_NEWCT: %s", 1806 ctid, (unsigned long long)nevid, 1807 newct ? "success" : "failure")); 1808 1809 } 1810 } 1811 1812 if (ctd->cond_neg) { 1813 ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 1814 ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 1815 ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 1816 ctd->cond_contract.ct_qtime.ctm_start = 1817 ctd->cond_contract.ct_ntime.ctm_start; 1818 } 1819 1820 /* 1821 * by holding the dip's devi_ct_lock we ensure that 1822 * all ACK/NACKs are held up until we have finished 1823 * publishing to all contracts. 1824 */ 1825 mutex_exit(&ctd->cond_contract.ct_lock); 1826 evid = cte_publish_all(ct, event, nvl, NULL); 1827 mutex_enter(&ctd->cond_contract.ct_lock); 1828 1829 if (ctd->cond_neg) { 1830 ASSERT(!negend); 1831 ASSERT(broken); 1832 ASSERT(sync); 1833 ASSERT(!ctd->cond_noneg); 1834 CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 1835 ": %d", ctid)); 1836 ctd->cond_currev_id = evid; 1837 } else if (negend) { 1838 ctd->cond_contract.ct_ntime.ctm_start = -1; 1839 ctd->cond_contract.ct_qtime.ctm_start = -1; 1840 } 1841 mutex_exit(&ctd->cond_contract.ct_lock); 1842 } 1843 1844 /* 1845 * If "negend" set counter back to initial state (-1) so that 1846 * other events can be published. Also clear the negotiation flag 1847 * on dip. 1848 * 1849 * 0 .. n are used for counting. 1850 * -1 indicates counter is available for use. 1851 */ 1852 if (negend) { 1853 /* 1854 * devi_ct_count not necessarily 0. We may have 1855 * timed out in which case, count will be non-zero. 1856 */ 1857 ct_barrier_release(dip); 1858 DEVI(dip)->devi_ct_neg = 0; 1859 CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 1860 (void *)dip)); 1861 } else if (DEVI(dip)->devi_ct_neg) { 1862 ASSERT(match); 1863 ASSERT(!ct_barrier_empty(dip)); 1864 CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 1865 DEVI(dip)->devi_ct_count, (void *)dip)); 1866 } else { 1867 /* 1868 * for non-negotiated events or subscribed events or no 1869 * matching contracts 1870 */ 1871 ASSERT(ct_barrier_empty(dip)); 1872 ASSERT(DEVI(dip)->devi_ct_neg == 0); 1873 CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 1874 "dip=%p", (void *)dip)); 1875 1876 /* 1877 * only this function when called from contract_device_negend() 1878 * can reset the counter to READY state i.e. -1. This function 1879 * is so called for every event whether a NEGEND event is needed 1880 * or not, but the negend event is only published if the event 1881 * whose end they signal is a negotiated event for the contract. 1882 */ 1883 } 1884 1885 if (!match) { 1886 /* No matching contracts */ 1887 CT_DEBUG((CE_NOTE, "publish: No matching contract")); 1888 result = CT_NONE; 1889 } else if (result == CT_NACK) { 1890 /* a non-negotiable contract exists and this is a neg. event */ 1891 CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 1892 (void) wait_for_acks(dip, dev, spec_type, evtype); 1893 } else if (DEVI(dip)->devi_ct_neg) { 1894 /* one or more contracts going through negotations */ 1895 CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 1896 result = wait_for_acks(dip, dev, spec_type, evtype); 1897 } else { 1898 /* no negotiated contracts or no broken contracts or NEGEND */ 1899 CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 1900 result = CT_ACK; 1901 } 1902 1903 /* 1904 * Release the lock only now so that the only point where we 1905 * drop the lock is in wait_for_acks(). This is so that we don't 1906 * miss cv_signal/cv_broadcast from contract holders 1907 */ 1908 CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 1909 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1910 1911 out: 1912 if (tnvl) 1913 nvlist_free(tnvl); 1914 if (path) 1915 kmem_free(path, MAXPATHLEN); 1916 1917 1918 CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 1919 return (result); 1920 } 1921 1922 1923 /* 1924 * contract_device_offline 1925 * 1926 * Event publishing routine called by I/O framework when a device is offlined. 1927 */ 1928 ct_ack_t 1929 contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 1930 { 1931 nvlist_t *nvl; 1932 uint_t result; 1933 uint_t evtype; 1934 1935 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1936 1937 evtype = CT_DEV_EV_OFFLINE; 1938 result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 1939 1940 /* 1941 * If a contract offline is NACKED, the framework expects us to call 1942 * NEGEND ourselves, since we know the final result 1943 */ 1944 if (result == CT_NACK) { 1945 contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 1946 } 1947 1948 return (result); 1949 } 1950 1951 /* 1952 * contract_device_degrade 1953 * 1954 * Event publishing routine called by I/O framework when a device 1955 * moves to degrade state. 1956 */ 1957 /*ARGSUSED*/ 1958 void 1959 contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 1960 { 1961 nvlist_t *nvl; 1962 uint_t evtype; 1963 1964 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1965 1966 evtype = CT_DEV_EV_DEGRADED; 1967 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1968 } 1969 1970 /* 1971 * contract_device_undegrade 1972 * 1973 * Event publishing routine called by I/O framework when a device 1974 * moves from degraded state to online state. 1975 */ 1976 /*ARGSUSED*/ 1977 void 1978 contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 1979 { 1980 nvlist_t *nvl; 1981 uint_t evtype; 1982 1983 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1984 1985 evtype = CT_DEV_EV_ONLINE; 1986 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1987 } 1988 1989 /* 1990 * For all contracts which have undergone a negotiation (because the device 1991 * moved out of the acceptable state for that contract and the state 1992 * change is synchronous i.e. requires negotiation) this routine publishes 1993 * a CT_EV_NEGEND event with the final disposition of the event. 1994 * 1995 * This event is always a critical event. 1996 */ 1997 void 1998 contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 1999 { 2000 nvlist_t *nvl; 2001 uint_t evtype; 2002 2003 ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 2004 2005 CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 2006 "dip: %p", result, (void *)dip)); 2007 2008 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2009 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 2010 result == CT_EV_SUCCESS ? 1 : 0) == 0); 2011 2012 evtype = CT_EV_NEGEND; 2013 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 2014 2015 CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 2016 (void *)dip)); 2017 } 2018 2019 /* 2020 * Wrapper routine called by other subsystems (such as LDI) to start 2021 * negotiations when a synchronous device state change occurs. 2022 * Returns CT_ACK or CT_NACK. 2023 */ 2024 ct_ack_t 2025 contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 2026 uint_t evtype) 2027 { 2028 int result; 2029 2030 ASSERT(dip); 2031 ASSERT(dev != NODEV); 2032 ASSERT(dev != DDI_DEV_T_ANY); 2033 ASSERT(dev != DDI_DEV_T_NONE); 2034 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2035 2036 switch (evtype) { 2037 case CT_DEV_EV_OFFLINE: 2038 result = contract_device_offline(dip, dev, spec_type); 2039 break; 2040 default: 2041 cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 2042 "not supported: event (%d) for dev_t (%lu) and spec (%d), " 2043 "dip (%p)", evtype, dev, spec_type, (void *)dip); 2044 result = CT_NACK; 2045 break; 2046 } 2047 2048 return (result); 2049 } 2050 2051 /* 2052 * A wrapper routine called by other subsystems (such as the LDI) to 2053 * finalize event processing for a state change event. For synchronous 2054 * state changes, this publishes NEGEND events. For asynchronous i.e. 2055 * non-negotiable events this publishes the event. 2056 */ 2057 void 2058 contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 2059 uint_t evtype, int ct_result) 2060 { 2061 ASSERT(dip); 2062 ASSERT(dev != NODEV); 2063 ASSERT(dev != DDI_DEV_T_ANY); 2064 ASSERT(dev != DDI_DEV_T_NONE); 2065 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2066 2067 switch (evtype) { 2068 case CT_DEV_EV_OFFLINE: 2069 contract_device_negend(dip, dev, spec_type, ct_result); 2070 break; 2071 case CT_DEV_EV_DEGRADED: 2072 contract_device_degrade(dip, dev, spec_type); 2073 contract_device_negend(dip, dev, spec_type, ct_result); 2074 break; 2075 case CT_DEV_EV_ONLINE: 2076 contract_device_undegrade(dip, dev, spec_type); 2077 contract_device_negend(dip, dev, spec_type, ct_result); 2078 break; 2079 default: 2080 cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 2081 "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 2082 evtype, dev, spec_type, (void *)dip); 2083 break; 2084 } 2085 } 2086 2087 /* 2088 * Called by I/O framework when a devinfo node is freed to remove the 2089 * association between a devinfo node and its contracts. 2090 */ 2091 void 2092 contract_device_remove_dip(dev_info_t *dip) 2093 { 2094 cont_device_t *ctd; 2095 cont_device_t *next; 2096 contract_t *ct; 2097 2098 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 2099 ct_barrier_wait_for_release(dip); 2100 2101 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 2102 next = list_next(&(DEVI(dip)->devi_ct), ctd); 2103 list_remove(&(DEVI(dip)->devi_ct), ctd); 2104 ct = &ctd->cond_contract; 2105 /* 2106 * Unlink the dip associated with this contract 2107 */ 2108 mutex_enter(&ct->ct_lock); 2109 ASSERT(ctd->cond_dip == dip); 2110 ctd->cond_dip = NULL; /* no longer linked to dip */ 2111 contract_rele(ct); /* remove hold for dip linkage */ 2112 CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 2113 "ctid: %d", ct->ct_id)); 2114 mutex_exit(&ct->ct_lock); 2115 } 2116 ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 2117 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 2118 } 2119 2120 /* 2121 * Barrier related routines 2122 */ 2123 static void 2124 ct_barrier_acquire(dev_info_t *dip) 2125 { 2126 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2127 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 2128 while (DEVI(dip)->devi_ct_count != -1) 2129 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2130 DEVI(dip)->devi_ct_count = 0; 2131 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 2132 } 2133 2134 static void 2135 ct_barrier_release(dev_info_t *dip) 2136 { 2137 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2138 ASSERT(DEVI(dip)->devi_ct_count != -1); 2139 DEVI(dip)->devi_ct_count = -1; 2140 cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 2141 CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 2142 } 2143 2144 static int 2145 ct_barrier_held(dev_info_t *dip) 2146 { 2147 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2148 return (DEVI(dip)->devi_ct_count != -1); 2149 } 2150 2151 static int 2152 ct_barrier_empty(dev_info_t *dip) 2153 { 2154 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2155 ASSERT(DEVI(dip)->devi_ct_count != -1); 2156 return (DEVI(dip)->devi_ct_count == 0); 2157 } 2158 2159 static void 2160 ct_barrier_wait_for_release(dev_info_t *dip) 2161 { 2162 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2163 while (DEVI(dip)->devi_ct_count != -1) 2164 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2165 } 2166 2167 static void 2168 ct_barrier_decr(dev_info_t *dip) 2169 { 2170 CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 2171 DEVI(dip)->devi_ct_count)); 2172 2173 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2174 ASSERT(DEVI(dip)->devi_ct_count > 0); 2175 2176 DEVI(dip)->devi_ct_count--; 2177 if (DEVI(dip)->devi_ct_count == 0) { 2178 cv_broadcast(&DEVI(dip)->devi_ct_cv); 2179 CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 2180 } 2181 } 2182 2183 static void 2184 ct_barrier_incr(dev_info_t *dip) 2185 { 2186 ASSERT(ct_barrier_held(dip)); 2187 DEVI(dip)->devi_ct_count++; 2188 } 2189 2190 static int 2191 ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 2192 { 2193 clock_t abstime; 2194 2195 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2196 2197 abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 2198 while (DEVI(dip)->devi_ct_count) { 2199 if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 2200 &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 2201 return (-1); 2202 } 2203 } 2204 return (0); 2205 } 2206