1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/mutex.h> 29 #include <sys/debug.h> 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/kmem.h> 33 #include <sys/thread.h> 34 #include <sys/id_space.h> 35 #include <sys/avl.h> 36 #include <sys/list.h> 37 #include <sys/sysmacros.h> 38 #include <sys/proc.h> 39 #include <sys/contract.h> 40 #include <sys/contract_impl.h> 41 #include <sys/contract/device.h> 42 #include <sys/contract/device_impl.h> 43 #include <sys/cmn_err.h> 44 #include <sys/nvpair.h> 45 #include <sys/policy.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ddi_implfuncs.h> 48 #include <sys/systm.h> 49 #include <sys/stat.h> 50 #include <sys/sunddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/ddi.h> 53 #include <sys/fs/dv_node.h> 54 #include <sys/sunndi.h> 55 #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 56 57 /* 58 * Device Contracts 59 * ----------------- 60 * This file contains the core code for the device contracts framework. 61 * A device contract is an agreement or a contract between a process and 62 * the kernel regarding the state of the device. A device contract may be 63 * created when a relationship is formed between a device and a process 64 * i.e. at open(2) time, or it may be created at some point after the device 65 * has been opened. A device contract once formed may be broken by either party. 66 * A device contract can be broken by the process by an explicit abandon of the 67 * contract or by an implicit abandon when the process exits. A device contract 68 * can be broken by the kernel either asynchronously (without negotiation) or 69 * synchronously (with negotiation). Exactly which happens depends on the device 70 * state transition. The following state diagram shows the transitions between 71 * device states. Only device state transitions currently supported by device 72 * contracts is shown. 73 * 74 * <-- A --> 75 * /-----------------> DEGRADED 76 * | | 77 * | | 78 * | | S 79 * | | | 80 * | | v 81 * v S --> v 82 * ONLINE ------------> OFFLINE 83 * 84 * 85 * In the figure above, the arrows indicate the direction of transition. The 86 * letter S refers to transitions which are inherently synchronous i.e. 87 * require negotiation and the letter A indicates transitions which are 88 * asynchronous i.e. are done without contract negotiations. A good example 89 * of a synchronous transition is the ONLINE -> OFFLINE transition. This 90 * transition cannot happen as long as there are consumers which have the 91 * device open. Thus some form of negotiation needs to happen between the 92 * consumers and the kernel to ensure that consumers either close devices 93 * or disallow the move to OFFLINE. Certain other transitions such as 94 * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 95 * non-negotiable. A device that suffers a fault that degrades its 96 * capabilities will become degraded irrespective of what consumers it has, 97 * so a negotiation in this case is pointless. 98 * 99 * The following device states are currently defined for device contracts: 100 * 101 * CT_DEV_EV_ONLINE 102 * The device is online and functioning normally 103 * CT_DEV_EV_DEGRADED 104 * The device is online but is functioning in a degraded capacity 105 * CT_DEV_EV_OFFLINE 106 * The device is offline and is no longer configured 107 * 108 * A typical consumer of device contracts starts out with a contract 109 * template and adds terms to that template. These include the 110 * "acceptable set" (A-set) term, which is a bitset of device states which 111 * are guaranteed by the contract. If the device moves out of a state in 112 * the A-set, the contract is broken. The breaking of the contract can 113 * be asynchronous in which case a critical contract event is sent to the 114 * contract holder but no negotiations take place. If the breaking of the 115 * contract is synchronous, negotations are opened between the affected 116 * consumer and the kernel. The kernel does this by sending a critical 117 * event to the consumer with the CTE_NEG flag set indicating that this 118 * is a negotiation event. The consumer can accept this change by sending 119 * a ACK message to the kernel. Alternatively, if it has the necessary 120 * privileges, it can send a NACK message to the kernel which will block 121 * the device state change. To NACK a negotiable event, a process must 122 * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 123 * 124 * Other terms include the "minor path" term, specified explicitly if the 125 * contract is not being created at open(2) time or specified implicitly 126 * if the contract is being created at open time via an activated template. 127 * 128 * A contract event is sent on any state change to which the contract 129 * owner has subscribed via the informative or critical event sets. Only 130 * critical events are guaranteed to be delivered. Since all device state 131 * changes are controlled by the kernel and cannot be arbitrarily generated 132 * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 133 * need to be asserted in a process's effective set to designate an event as 134 * critical. To ensure privacy, a process must either have the same effective 135 * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 136 * asserted in its effective set in order to observe device contract events 137 * off the device contract type specific endpoint. 138 * 139 * Yet another term available with device contracts is the "non-negotiable" 140 * term. This term is used to pre-specify a NACK to any contract negotiation. 141 * This term is ignored for asynchronous state changes. For example, a 142 * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 143 * non-negotiable. In this case, the device contract framework assumes a 144 * NACK for any transition to OFFLINE and blocks the offline. If the A-set 145 * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 146 * are NACKed but transitions to DEGRADE succeed. 147 * 148 * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 149 * happens just before the I/O framework attempts to offline a device 150 * (i.e. detach a device and set the offline flag so that it cannot be 151 * reattached). A device contract holder is expected to either NACK the offline 152 * (if privileged) or release the device and allow the offline to proceed. 153 * 154 * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 155 * is generated just before the I/O framework transitions the device state 156 * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 157 * 158 * The contract holder is expected to ACK or NACK a negotiation event 159 * within a certain period of time. If the ACK/NACK is not received 160 * within the timeout period, the device contract framework will behave 161 * as if the contract does not exist and will proceed with the event. 162 * 163 * Unlike a process contract a device contract does not need to exist 164 * once it is abandoned, since it does not define a fault boundary. It 165 * merely represents an agreement between a process and the kernel 166 * regarding the state of the device. Once the process has abandoned 167 * the contract (either implicitly via a process exit or explicitly) 168 * the kernel has no reason to retain the contract. As a result 169 * device contracts are neither inheritable nor need to exist in an 170 * orphan state. 171 * 172 * A device unlike a process may exist in multiple contracts and has 173 * a "life" outside a device contract. A device unlike a process 174 * may exist without an associated contract. Unlike a process contract 175 * a device contract may be formed after a binding relationship is 176 * formed between a process and a device. 177 * 178 * IMPLEMENTATION NOTES 179 * ==================== 180 * DATA STRUCTURES 181 * ---------------- 182 * The heart of the device contracts implementation is the device contract 183 * private cont_device_t (or ctd for short) data structure. It encapsulates 184 * the generic contract_t data structure and has a number of private 185 * fields. 186 * These include: 187 * cond_minor: The minor device that is the subject of the contract 188 * cond_aset: The bitset of states which are guaranteed by the 189 * contract 190 * cond_noneg: If set, indicates that the result of negotiation has 191 * been predefined to be a NACK 192 * In addition, there are other device identifiers such the devinfo node, 193 * dev_t and spec_type of the minor node. There are also a few fields that 194 * are used during negotiation to maintain state. See 195 * uts/common/sys/contract/device_impl.h 196 * for details. 197 * The ctd structure represents the device private part of a contract of 198 * type "device" 199 * 200 * Another data structure used by device contracts is ctmpl_device. It is 201 * the device contracts private part of the contract template structure. It 202 * encapsulates the generic template structure "ct_template_t" and includes 203 * the following device contract specific fields 204 * ctd_aset: The bitset of states that should be guaranteed by a 205 * contract 206 * ctd_noneg: If set, indicates that contract should NACK a 207 * negotiation 208 * ctd_minor: The devfs_path (without the /devices prefix) of the 209 * minor node that is the subject of the contract. 210 * 211 * ALGORITHMS 212 * --------- 213 * There are three sets of routines in this file 214 * Template related routines 215 * ------------------------- 216 * These routines provide support for template related operations initated 217 * via the generic template operations. These include routines that dup 218 * a template, free it, and set various terms in the template 219 * (such as the minor node path, the acceptable state set (or A-set) 220 * and the non-negotiable term) as well as a routine to query the 221 * device specific portion of the template for the abovementioned terms. 222 * There is also a routine to create (ctmpl_device_create) that is used to 223 * create a contract from a template. This routine calls (after initial 224 * setup) the common function used to create a device contract 225 * (contract_device_create). 226 * 227 * core device contract implementation 228 * ---------------------------------- 229 * These routines support the generic contract framework to provide 230 * functionality that allows contracts to be created, managed and 231 * destroyed. The contract_device_create() routine is a routine used 232 * to create a contract from a template (either via an explicit create 233 * operation on a template or implicitly via an open with an 234 * activated template.). The contract_device_free() routine assists 235 * in freeing the device contract specific parts. There are routines 236 * used to abandon (contract_device_abandon) a device contract as well 237 * as a routine to destroy (which despite its name does not destroy, 238 * it only moves a contract to a dead state) a contract. 239 * There is also a routine to return status information about a 240 * contract - the level of detail depends on what is requested by the 241 * user. A value of CTD_FIXED only returns fixed length fields such 242 * as the A-set, state of device and value of the "noneg" term. If 243 * CTD_ALL is specified, the minor node path is returned as well. 244 * 245 * In addition there are interfaces (contract_device_ack/nack) which 246 * are used to support negotiation between userland processes and 247 * device contracts. These interfaces record the acknowledgement 248 * or lack thereof for negotiation events and help determine if the 249 * negotiated event should occur. 250 * 251 * "backend routines" 252 * ----------------- 253 * The backend routines form the interface between the I/O framework 254 * and the device contract subsystem. These routines, allow the I/O 255 * framework to call into the device contract subsystem to notify it of 256 * impending changes to a device state as well as to inform of the 257 * final disposition of such attempted state changes. Routines in this 258 * class include contract_device_offline() that indicates an attempt to 259 * offline a device, contract_device_degrade() that indicates that 260 * a device is moving to the degraded state and contract_device_negend() 261 * that is used by the I/O framework to inform the contracts subsystem of 262 * the final disposition of an attempted operation. 263 * 264 * SUMMARY 265 * ------- 266 * A contract starts its life as a template. A process allocates a device 267 * contract template and sets various terms: 268 * The A-set 269 * The device minor node 270 * Critical and informative events 271 * The noneg i.e. no negotition term 272 * Setting of these terms in the template is done via the 273 * ctmpl_device_set() entry point in this file. A process can query a 274 * template to determine the terms already set in the template - this is 275 * facilitated by the ctmpl_device_get() routine. 276 * 277 * Once all the appropriate terms are set, the contract is instantiated via 278 * one of two methods 279 * - via an explicit create operation - this is facilitated by the 280 * ctmpl_device_create() entry point 281 * - synchronously with the open(2) system call - this is achieved via the 282 * contract_device_open() routine. 283 * The core work for both these above functions is done by 284 * contract_device_create() 285 * 286 * A contract once created can be queried for its status. Support for 287 * status info is provided by both the common contracts framework and by 288 * the "device" contract type. If the level of detail requested is 289 * CTD_COMMON, only the common contract framework data is used. Higher 290 * levels of detail result in calls to contract_device_status() to supply 291 * device contract type specific status information. 292 * 293 * A contract once created may be abandoned either explicitly or implictly. 294 * In either case, the contract_device_abandon() function is invoked. This 295 * function merely calls contract_destroy() which moves the contract to 296 * the DEAD state. The device contract portion of destroy processing is 297 * provided by contract_device_destroy() which merely disassociates the 298 * contract from its device devinfo node. A contract in the DEAD state is 299 * not freed. It hanbgs around until all references to the contract are 300 * gone. When that happens, the contract is finally deallocated. The 301 * device contract specific portion of the free is done by 302 * contract_device_free() which finally frees the device contract specific 303 * data structure (cont_device_t). 304 * 305 * When a device undergoes a state change, the I/O framework calls the 306 * corresponding device contract entry point. For example, when a device 307 * is about to go OFFLINE, the routine contract_device_offline() is 308 * invoked. Similarly if a device moves to DEGRADED state, the routine 309 * contract_device_degrade() function is called. These functions call the 310 * core routine contract_device_publish(). This function determines via 311 * the function is_sync_neg() whether an event is a synchronous (i.e. 312 * negotiable) event or not. In the former case contract_device_publish() 313 * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 314 * and/or NACKs from contract holders. In the latter case, it simply 315 * publishes the event and does not wait. In the negotiation case, ACKs or 316 * NACKs from userland consumers results in contract_device_ack_nack() 317 * being called where the result of the negotiation is recorded in the 318 * contract data structure. Once all outstanding contract owners have 319 * responded, the device contract code in wait_for_acks() determines the 320 * final result of the negotiation. A single NACK overrides all other ACKs 321 * If there is no NACK, then a single ACK will result in an overall ACK 322 * result. If there are no ACKs or NACKs, then the result CT_NONE is 323 * returned back to the I/O framework. Once the event is permitted or 324 * blocked, the I/O framework proceeds or aborts the state change. The 325 * I/O framework then calls contract_device_negend() with a result code 326 * indicating final disposition of the event. This call releases the 327 * barrier and other state associated with the previous negotiation, 328 * which permits the next event (if any) to come into the device contract 329 * framework. 330 * 331 * Finally, a device that has outstanding contracts may be removed from 332 * the system which results in its devinfo node being freed. The devinfo 333 * free routine in the I/O framework, calls into the device contract 334 * function - contract_device_remove_dip(). This routine, disassociates 335 * the dip from all contracts associated with the contract being freed, 336 * allowing the devinfo node to be freed. 337 * 338 * LOCKING 339 * --------- 340 * There are four sets of data that need to be protected by locks 341 * 342 * i) device contract specific portion of the contract template - This data 343 * is protected by the template lock ctmpl_lock. 344 * 345 * ii) device contract specific portion of the contract - This data is 346 * protected by the contract lock ct_lock 347 * 348 * iii) The linked list of contracts hanging off a devinfo node - This 349 * list is protected by the per-devinfo node lock devi_ct_lock 350 * 351 * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 352 * and devi_ct_count that controls state changes to a dip 353 * 354 * The template lock is independent in that none of the other locks in this 355 * file may be taken while holding the template lock (and vice versa). 356 * 357 * The remaining three locks have the following lock order 358 * 359 * devi_ct_lock -> ct_count barrier -> ct_lock 360 * 361 */ 362 363 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 364 int spec_type, proc_t *owner, int *errorp); 365 366 /* barrier routines */ 367 static void ct_barrier_acquire(dev_info_t *dip); 368 static void ct_barrier_release(dev_info_t *dip); 369 static int ct_barrier_held(dev_info_t *dip); 370 static int ct_barrier_empty(dev_info_t *dip); 371 static void ct_barrier_wait_for_release(dev_info_t *dip); 372 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 373 static void ct_barrier_decr(dev_info_t *dip); 374 static void ct_barrier_incr(dev_info_t *dip); 375 376 ct_type_t *device_type; 377 378 /* 379 * Macro predicates for determining when events should be sent and how. 380 */ 381 #define EVSENDP(ctd, flag) \ 382 ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 383 384 #define EVINFOP(ctd, flag) \ 385 ((ctd->cond_contract.ct_ev_crit & flag) == 0) 386 387 /* 388 * State transition table showing which transitions are synchronous and which 389 * are not. 390 */ 391 struct ct_dev_negtable { 392 uint_t st_old; 393 uint_t st_new; 394 uint_t st_neg; 395 } ct_dev_negtable[] = { 396 {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 397 {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 398 {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 399 {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 400 {0} 401 }; 402 403 /* 404 * Device contract template implementation 405 */ 406 407 /* 408 * ctmpl_device_dup 409 * 410 * The device contract template dup entry point. 411 * This simply copies all the fields (generic as well as device contract 412 * specific) fields of the original. 413 */ 414 static struct ct_template * 415 ctmpl_device_dup(struct ct_template *template) 416 { 417 ctmpl_device_t *new; 418 ctmpl_device_t *old = template->ctmpl_data; 419 char *buf; 420 char *minor; 421 422 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 423 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 424 425 /* 426 * copy generic fields. 427 * ctmpl_copy returns with old template lock held 428 */ 429 ctmpl_copy(&new->ctd_ctmpl, template); 430 431 new->ctd_ctmpl.ctmpl_data = new; 432 new->ctd_aset = old->ctd_aset; 433 new->ctd_minor = NULL; 434 new->ctd_noneg = old->ctd_noneg; 435 436 if (old->ctd_minor) { 437 ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 438 bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 439 } else { 440 kmem_free(buf, MAXPATHLEN); 441 buf = NULL; 442 } 443 444 mutex_exit(&template->ctmpl_lock); 445 if (buf) { 446 minor = i_ddi_strdup(buf, KM_SLEEP); 447 kmem_free(buf, MAXPATHLEN); 448 buf = NULL; 449 } else { 450 minor = NULL; 451 } 452 mutex_enter(&template->ctmpl_lock); 453 454 if (minor) { 455 new->ctd_minor = minor; 456 } 457 458 ASSERT(buf == NULL); 459 return (&new->ctd_ctmpl); 460 } 461 462 /* 463 * ctmpl_device_free 464 * 465 * The device contract template free entry point. Just 466 * frees the template. 467 */ 468 static void 469 ctmpl_device_free(struct ct_template *template) 470 { 471 ctmpl_device_t *dtmpl = template->ctmpl_data; 472 473 if (dtmpl->ctd_minor) 474 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 475 476 kmem_free(dtmpl, sizeof (ctmpl_device_t)); 477 } 478 479 /* 480 * SAFE_EV is the set of events which a non-privileged process is 481 * allowed to make critical. An unprivileged device contract owner has 482 * no control over when a device changes state, so all device events 483 * can be in the critical set. 484 * 485 * EXCESS tells us if "value", a critical event set, requires 486 * additional privilege. For device contracts EXCESS currently 487 * evaluates to 0. 488 */ 489 #define SAFE_EV (CT_DEV_ALLEVENT) 490 #define EXCESS(value) ((value) & ~SAFE_EV) 491 492 493 /* 494 * ctmpl_device_set 495 * 496 * The device contract template set entry point. Sets various terms in the 497 * template. The non-negotiable term can only be set if the process has 498 * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 499 */ 500 static int 501 ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) 502 { 503 ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 504 char *buf; 505 int error; 506 dev_info_t *dip; 507 int spec_type; 508 509 ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 510 511 switch (param->ctpm_id) { 512 case CTDP_ACCEPT: 513 if (param->ctpm_value & ~CT_DEV_ALLEVENT) 514 return (EINVAL); 515 if (param->ctpm_value == 0) 516 return (EINVAL); 517 if (param->ctpm_value == CT_DEV_ALLEVENT) 518 return (EINVAL); 519 520 dtmpl->ctd_aset = param->ctpm_value; 521 break; 522 case CTDP_NONEG: 523 if (param->ctpm_value != CTDP_NONEG_SET && 524 param->ctpm_value != CTDP_NONEG_CLEAR) 525 return (EINVAL); 526 527 /* 528 * only privileged processes can designate a contract 529 * non-negotiatble. 530 */ 531 if (param->ctpm_value == CTDP_NONEG_SET && 532 (error = secpolicy_sys_devices(cr)) != 0) { 533 return (error); 534 } 535 536 dtmpl->ctd_noneg = param->ctpm_value; 537 break; 538 539 case CTDP_MINOR: 540 if (param->ctpm_value == NULL) 541 return (EINVAL); 542 543 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 544 545 /* 546 * Copyin the device path 547 */ 548 error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf, 549 MAXPATHLEN, NULL); 550 if (error != 0) { 551 kmem_free(buf, MAXPATHLEN); 552 return (error); 553 } 554 buf[MAXPATHLEN - 1] = '\0'; 555 556 if (*buf != '/' || 557 strncmp(buf, "/devices/", strlen("/devices/")) == 0 || 558 strstr(buf, "../devices/") || strchr(buf, ':') == NULL) { 559 kmem_free(buf, MAXPATHLEN); 560 return (EINVAL); 561 } 562 563 spec_type = 0; 564 dip = NULL; 565 if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) { 566 kmem_free(buf, MAXPATHLEN); 567 return (ERANGE); 568 } 569 ddi_release_devi(dip); 570 571 if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 572 kmem_free(buf, MAXPATHLEN); 573 return (EINVAL); 574 } 575 576 if (dtmpl->ctd_minor != NULL) { 577 kmem_free(dtmpl->ctd_minor, 578 strlen(dtmpl->ctd_minor) + 1); 579 } 580 dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP); 581 kmem_free(buf, MAXPATHLEN); 582 break; 583 case CTP_EV_CRITICAL: 584 /* 585 * Currently for device contracts, any event 586 * may be added to the critical set. We retain the 587 * following code however for future enhancements. 588 */ 589 if (EXCESS(param->ctpm_value) && 590 (error = secpolicy_contract_event(cr)) != 0) 591 return (error); 592 tmpl->ctmpl_ev_crit = param->ctpm_value; 593 break; 594 default: 595 return (EINVAL); 596 } 597 598 return (0); 599 } 600 601 /* 602 * ctmpl_device_get 603 * 604 * The device contract template get entry point. Simply fetches and 605 * returns the value of the requested term. 606 */ 607 static int 608 ctmpl_device_get(struct ct_template *template, ct_param_t *param) 609 { 610 ctmpl_device_t *dtmpl = template->ctmpl_data; 611 int error; 612 613 ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 614 615 switch (param->ctpm_id) { 616 case CTDP_ACCEPT: 617 param->ctpm_value = dtmpl->ctd_aset; 618 break; 619 case CTDP_NONEG: 620 param->ctpm_value = dtmpl->ctd_noneg; 621 break; 622 case CTDP_MINOR: 623 if (dtmpl->ctd_minor) { 624 error = copyoutstr(dtmpl->ctd_minor, 625 (char *)(uintptr_t)param->ctpm_value, 626 MAXPATHLEN, NULL); 627 if (error != 0) 628 return (error); 629 } else { 630 return (ENOENT); 631 } 632 break; 633 default: 634 return (EINVAL); 635 } 636 637 return (0); 638 } 639 640 /* 641 * Device contract type specific portion of creating a contract using 642 * a specified template 643 */ 644 /*ARGSUSED*/ 645 int 646 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 647 { 648 ctmpl_device_t *dtmpl; 649 char *buf; 650 dev_t dev; 651 int spec_type; 652 int error; 653 cont_device_t *ctd; 654 655 if (ctidp == NULL) 656 return (EINVAL); 657 658 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 659 660 dtmpl = template->ctmpl_data; 661 662 mutex_enter(&template->ctmpl_lock); 663 if (dtmpl->ctd_minor == NULL) { 664 /* incomplete template */ 665 mutex_exit(&template->ctmpl_lock); 666 kmem_free(buf, MAXPATHLEN); 667 return (EINVAL); 668 } else { 669 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 670 bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 671 } 672 mutex_exit(&template->ctmpl_lock); 673 674 spec_type = 0; 675 dev = NODEV; 676 if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 677 dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 678 (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 679 CT_DEBUG((CE_WARN, 680 "tmpl_create: failed to find device: %s", buf)); 681 kmem_free(buf, MAXPATHLEN); 682 return (ERANGE); 683 } 684 kmem_free(buf, MAXPATHLEN); 685 686 ctd = contract_device_create(template->ctmpl_data, 687 dev, spec_type, curproc, &error); 688 689 if (ctd == NULL) { 690 CT_DEBUG((CE_WARN, "Failed to create device contract for " 691 "process (%d) with device (devt = %lu, spec_type = %s)", 692 curproc->p_pid, dev, 693 spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 694 return (error); 695 } 696 697 mutex_enter(&ctd->cond_contract.ct_lock); 698 *ctidp = ctd->cond_contract.ct_id; 699 mutex_exit(&ctd->cond_contract.ct_lock); 700 701 return (0); 702 } 703 704 /* 705 * Device contract specific template entry points 706 */ 707 static ctmplops_t ctmpl_device_ops = { 708 ctmpl_device_dup, /* ctop_dup */ 709 ctmpl_device_free, /* ctop_free */ 710 ctmpl_device_set, /* ctop_set */ 711 ctmpl_device_get, /* ctop_get */ 712 ctmpl_device_create, /* ctop_create */ 713 CT_DEV_ALLEVENT /* all device events bitmask */ 714 }; 715 716 717 /* 718 * Device contract implementation 719 */ 720 721 /* 722 * contract_device_default 723 * 724 * The device contract default template entry point. Creates a 725 * device contract template with a default A-set and no "noneg" , 726 * with informative degrade events and critical offline events. 727 * There is no default minor path. 728 */ 729 static ct_template_t * 730 contract_device_default(void) 731 { 732 ctmpl_device_t *new; 733 734 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 735 ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 736 737 new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 738 new->ctd_noneg = 0; 739 new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 740 new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 741 742 return (&new->ctd_ctmpl); 743 } 744 745 /* 746 * contract_device_free 747 * 748 * Destroys the device contract specific portion of a contract and 749 * frees the contract. 750 */ 751 static void 752 contract_device_free(contract_t *ct) 753 { 754 cont_device_t *ctd = ct->ct_data; 755 756 ASSERT(ctd->cond_minor); 757 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 758 kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 759 760 ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 761 ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 762 763 ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 764 765 ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 766 ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 767 768 ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 769 ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 770 771 ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 772 ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 773 774 ASSERT(!list_link_active(&ctd->cond_next)); 775 776 kmem_free(ctd, sizeof (cont_device_t)); 777 } 778 779 /* 780 * contract_device_abandon 781 * 782 * The device contract abandon entry point. 783 */ 784 static void 785 contract_device_abandon(contract_t *ct) 786 { 787 ASSERT(MUTEX_HELD(&ct->ct_lock)); 788 789 /* 790 * device contracts cannot be inherited or orphaned. 791 * Move the contract to the DEAD_STATE. It will be freed 792 * once all references to it are gone. 793 */ 794 contract_destroy(ct); 795 } 796 797 /* 798 * contract_device_destroy 799 * 800 * The device contract destroy entry point. 801 * Called from contract_destroy() to do any type specific destroy. Note 802 * that destroy is a misnomer - this does not free the contract, it only 803 * moves it to the dead state. A contract is actually freed via 804 * contract_rele() -> contract_dtor(), contop_free() 805 */ 806 static void 807 contract_device_destroy(contract_t *ct) 808 { 809 cont_device_t *ctd = ct->ct_data; 810 dev_info_t *dip = ctd->cond_dip; 811 812 ASSERT(MUTEX_HELD(&ct->ct_lock)); 813 814 if (dip == NULL) { 815 /* 816 * The dip has been removed, this is a dangling contract 817 * Check that dip linkages are NULL 818 */ 819 ASSERT(!list_link_active(&ctd->cond_next)); 820 CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 821 "devinfo node. contract ctid : %d", ct->ct_id)); 822 return; 823 } 824 825 /* 826 * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 827 */ 828 mutex_exit(&ct->ct_lock); 829 830 /* 831 * Waiting for the barrier to be released is strictly speaking not 832 * necessary. But it simplifies the implementation of 833 * contract_device_publish() by establishing the invariant that 834 * device contracts cannot go away during negotiation. 835 */ 836 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 837 ct_barrier_wait_for_release(dip); 838 mutex_enter(&ct->ct_lock); 839 840 list_remove(&(DEVI(dip)->devi_ct), ctd); 841 ctd->cond_dip = NULL; /* no longer linked to dip */ 842 contract_rele(ct); /* remove hold for dip linkage */ 843 844 mutex_exit(&ct->ct_lock); 845 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 846 mutex_enter(&ct->ct_lock); 847 } 848 849 /* 850 * contract_device_status 851 * 852 * The device contract status entry point. Called when level of "detail" 853 * is either CTD_FIXED or CTD_ALL 854 * 855 */ 856 static void 857 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 858 void *status, model_t model) 859 { 860 cont_device_t *ctd = ct->ct_data; 861 862 ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 863 864 mutex_enter(&ct->ct_lock); 865 contract_status_common(ct, zone, status, model); 866 867 /* 868 * There's no need to hold the contract lock while accessing static 869 * data like aset or noneg. But since we need the lock to access other 870 * data like state, we hold it anyway. 871 */ 872 VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 873 VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 874 VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 875 876 if (detail == CTD_FIXED) { 877 mutex_exit(&ct->ct_lock); 878 return; 879 } 880 881 ASSERT(ctd->cond_minor); 882 VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 883 884 mutex_exit(&ct->ct_lock); 885 } 886 887 /* 888 * Converts a result integer into the corresponding string. Used for printing 889 * messages 890 */ 891 static char * 892 result_str(uint_t result) 893 { 894 switch (result) { 895 case CT_ACK: 896 return ("CT_ACK"); 897 case CT_NACK: 898 return ("CT_NACK"); 899 case CT_NONE: 900 return ("CT_NONE"); 901 default: 902 return ("UNKNOWN"); 903 } 904 } 905 906 /* 907 * Converts a device state integer constant into the corresponding string. 908 * Used to print messages. 909 */ 910 static char * 911 state_str(uint_t state) 912 { 913 switch (state) { 914 case CT_DEV_EV_ONLINE: 915 return ("ONLINE"); 916 case CT_DEV_EV_DEGRADED: 917 return ("DEGRADED"); 918 case CT_DEV_EV_OFFLINE: 919 return ("OFFLINE"); 920 default: 921 return ("UNKNOWN"); 922 } 923 } 924 925 /* 926 * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 927 * synchronous state change or not. 928 */ 929 static int 930 is_sync_neg(uint_t old, uint_t new) 931 { 932 int i; 933 934 ASSERT(old & CT_DEV_ALLEVENT); 935 ASSERT(new & CT_DEV_ALLEVENT); 936 937 if (old == new) { 938 CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 939 state_str(new))); 940 return (-2); 941 } 942 943 for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 944 if (old == ct_dev_negtable[i].st_old && 945 new == ct_dev_negtable[i].st_new) { 946 return (ct_dev_negtable[i].st_neg); 947 } 948 } 949 950 CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 951 "old = %s -> new = %s", state_str(old), state_str(new))); 952 953 return (-1); 954 } 955 956 /* 957 * Used to cleanup cached dv_nodes so that when a device is released by 958 * a contract holder, its devinfo node can be successfully detached. 959 */ 960 static int 961 contract_device_dvclean(dev_info_t *dip) 962 { 963 char *devnm; 964 dev_info_t *pdip; 965 int error; 966 967 ASSERT(dip); 968 969 /* pdip can be NULL if we have contracts against the root dip */ 970 pdip = ddi_get_parent(dip); 971 972 if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 973 char *path; 974 975 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 976 (void) ddi_pathname(dip, path); 977 CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 978 "device=%s", path)); 979 kmem_free(path, MAXPATHLEN); 980 return (EDEADLOCK); 981 } 982 983 if (pdip) { 984 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 985 (void) ddi_deviname(dip, devnm); 986 error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 987 kmem_free(devnm, MAXNAMELEN + 1); 988 } else { 989 error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 990 } 991 992 return (error); 993 } 994 995 /* 996 * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 997 * Results in the ACK or NACK being recorded on the dip for one particular 998 * contract. The device contracts framework evaluates the ACK/NACKs for all 999 * contracts against a device to determine if a particular device state change 1000 * should be allowed. 1001 */ 1002 static int 1003 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 1004 uint_t cmd) 1005 { 1006 cont_device_t *ctd = ct->ct_data; 1007 dev_info_t *dip; 1008 ctid_t ctid; 1009 int error; 1010 1011 ctid = ct->ct_id; 1012 1013 CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 1014 1015 mutex_enter(&ct->ct_lock); 1016 CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 1017 1018 dip = ctd->cond_dip; 1019 1020 ASSERT(ctd->cond_minor); 1021 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 1022 1023 /* 1024 * Negotiation only if new state is not in A-set 1025 */ 1026 ASSERT(!(ctd->cond_aset & evtype)); 1027 1028 /* 1029 * Negotiation only if transition is synchronous 1030 */ 1031 ASSERT(is_sync_neg(ctd->cond_state, evtype)); 1032 1033 /* 1034 * We shouldn't be negotiating if the "noneg" flag is set 1035 */ 1036 ASSERT(!ctd->cond_noneg); 1037 1038 if (dip) 1039 ndi_hold_devi(dip); 1040 1041 mutex_exit(&ct->ct_lock); 1042 1043 /* 1044 * dv_clean only if !NACK and offline state change 1045 */ 1046 if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 1047 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 1048 error = contract_device_dvclean(dip); 1049 if (error != 0) { 1050 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 1051 ctid)); 1052 ddi_release_devi(dip); 1053 } 1054 } 1055 1056 mutex_enter(&ct->ct_lock); 1057 1058 if (dip) 1059 ddi_release_devi(dip); 1060 1061 if (dip == NULL) { 1062 if (ctd->cond_currev_id != evid) { 1063 CT_DEBUG((CE_WARN, "%sACK for non-current event " 1064 "(type=%s, id=%llu) on removed device", 1065 cmd == CT_NACK ? "N" : "", 1066 state_str(evtype), (unsigned long long)evid)); 1067 CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 1068 ctid)); 1069 } else { 1070 ASSERT(ctd->cond_currev_type == evtype); 1071 CT_DEBUG((CE_WARN, "contract_ack: no such device: " 1072 "ctid: %d", ctid)); 1073 } 1074 error = (ct->ct_state == CTS_DEAD) ? ESRCH : 1075 ((cmd == CT_NACK) ? ETIMEDOUT : 0); 1076 mutex_exit(&ct->ct_lock); 1077 return (error); 1078 } 1079 1080 /* 1081 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 1082 */ 1083 mutex_exit(&ct->ct_lock); 1084 1085 mutex_enter(&DEVI(dip)->devi_ct_lock); 1086 mutex_enter(&ct->ct_lock); 1087 if (ctd->cond_currev_id != evid) { 1088 char *buf; 1089 mutex_exit(&ct->ct_lock); 1090 mutex_exit(&DEVI(dip)->devi_ct_lock); 1091 ndi_hold_devi(dip); 1092 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1093 (void) ddi_pathname(dip, buf); 1094 ddi_release_devi(dip); 1095 CT_DEBUG((CE_WARN, "%sACK for non-current event" 1096 "(type=%s, id=%llu) on device %s", 1097 cmd == CT_NACK ? "N" : "", 1098 state_str(evtype), (unsigned long long)evid, buf)); 1099 kmem_free(buf, MAXPATHLEN); 1100 CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 1101 cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 1102 return (cmd == CT_ACK ? 0 : ETIMEDOUT); 1103 } 1104 1105 ASSERT(ctd->cond_currev_type == evtype); 1106 ASSERT(cmd == CT_ACK || cmd == CT_NACK); 1107 1108 CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 1109 cmd == CT_NACK ? "N" : "", ctid)); 1110 1111 ctd->cond_currev_ack = cmd; 1112 mutex_exit(&ct->ct_lock); 1113 1114 ct_barrier_decr(dip); 1115 mutex_exit(&DEVI(dip)->devi_ct_lock); 1116 1117 CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 1118 1119 return (0); 1120 } 1121 1122 /* 1123 * Invoked when a userland contract holder approves (i.e. ACKs) a state change 1124 */ 1125 static int 1126 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 1127 { 1128 return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 1129 } 1130 1131 /* 1132 * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 1133 */ 1134 static int 1135 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 1136 { 1137 return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 1138 } 1139 1140 /* 1141 * Creates a new contract synchronously with the breaking of an existing 1142 * contract. Currently not supported. 1143 */ 1144 /*ARGSUSED*/ 1145 static int 1146 contract_device_newct(contract_t *ct) 1147 { 1148 return (ENOTSUP); 1149 } 1150 1151 /* 1152 * Core device contract implementation entry points 1153 */ 1154 static contops_t contract_device_ops = { 1155 contract_device_free, /* contop_free */ 1156 contract_device_abandon, /* contop_abandon */ 1157 contract_device_destroy, /* contop_destroy */ 1158 contract_device_status, /* contop_status */ 1159 contract_device_ack, /* contop_ack */ 1160 contract_device_nack, /* contop_nack */ 1161 contract_qack_notsup, /* contop_qack */ 1162 contract_device_newct /* contop_newct */ 1163 }; 1164 1165 /* 1166 * contract_device_init 1167 * 1168 * Initializes the device contract type. 1169 */ 1170 void 1171 contract_device_init(void) 1172 { 1173 device_type = contract_type_init(CTT_DEVICE, "device", 1174 &contract_device_ops, contract_device_default); 1175 } 1176 1177 /* 1178 * contract_device_create 1179 * 1180 * create a device contract given template "tmpl" and the "owner" process. 1181 * May fail and return NULL if project.max-contracts would have been exceeded. 1182 * 1183 * Common device contract creation routine called for both open-time and 1184 * non-open time device contract creation 1185 */ 1186 static cont_device_t * 1187 contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 1188 proc_t *owner, int *errorp) 1189 { 1190 cont_device_t *ctd; 1191 char *minor; 1192 char *path; 1193 dev_info_t *dip; 1194 1195 ASSERT(dtmpl != NULL); 1196 ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 1197 ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 1198 ASSERT(errorp); 1199 1200 *errorp = 0; 1201 1202 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1203 1204 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1205 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 1206 bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 1207 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1208 1209 dip = e_ddi_hold_devi_by_path(path, 0); 1210 if (dip == NULL) { 1211 cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 1212 "for device path (%s)", path); 1213 kmem_free(path, MAXPATHLEN); 1214 *errorp = ERANGE; 1215 return (NULL); 1216 } 1217 1218 /* 1219 * Lock out any parallel contract negotiations 1220 */ 1221 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1222 ct_barrier_acquire(dip); 1223 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1224 1225 minor = i_ddi_strdup(path, KM_SLEEP); 1226 kmem_free(path, MAXPATHLEN); 1227 1228 (void) contract_type_pbundle(device_type, owner); 1229 1230 ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 1231 1232 /* 1233 * Only we hold a refernce to this contract. Safe to access 1234 * the fields without a ct_lock 1235 */ 1236 ctd->cond_minor = minor; 1237 /* 1238 * It is safe to set the dip pointer in the contract 1239 * as the contract will always be destroyed before the dip 1240 * is released 1241 */ 1242 ctd->cond_dip = dip; 1243 ctd->cond_devt = dev; 1244 ctd->cond_spec = spec_type; 1245 1246 /* 1247 * Since we are able to lookup the device, it is either 1248 * online or degraded 1249 */ 1250 ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 1251 CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 1252 1253 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1254 ctd->cond_aset = dtmpl->ctd_aset; 1255 ctd->cond_noneg = dtmpl->ctd_noneg; 1256 1257 /* 1258 * contract_ctor() initailizes the common portion of a contract 1259 * contract_dtor() destroys the common portion of a contract 1260 */ 1261 if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 1262 ctd, 0, owner, B_TRUE)) { 1263 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1264 /* 1265 * contract_device_free() destroys the type specific 1266 * portion of a contract and frees the contract. 1267 * The "minor" path and "cred" is a part of the type specific 1268 * portion of the contract and will be freed by 1269 * contract_device_free() 1270 */ 1271 contract_device_free(&ctd->cond_contract); 1272 1273 /* release barrier */ 1274 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1275 ct_barrier_release(dip); 1276 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1277 1278 ddi_release_devi(dip); 1279 *errorp = EAGAIN; 1280 return (NULL); 1281 } 1282 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1283 1284 mutex_enter(&ctd->cond_contract.ct_lock); 1285 ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 1286 ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 1287 ctd->cond_contract.ct_ntime.ctm_start = -1; 1288 ctd->cond_contract.ct_qtime.ctm_start = -1; 1289 mutex_exit(&ctd->cond_contract.ct_lock); 1290 1291 /* 1292 * Insert device contract into list hanging off the dip 1293 * Bump up the ref-count on the contract to reflect this 1294 */ 1295 contract_hold(&ctd->cond_contract); 1296 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1297 list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 1298 1299 /* release barrier */ 1300 ct_barrier_release(dip); 1301 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1302 1303 ddi_release_devi(dip); 1304 1305 return (ctd); 1306 } 1307 1308 /* 1309 * Called when a device is successfully opened to create an open-time contract 1310 * i.e. synchronously with a device open. 1311 */ 1312 int 1313 contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 1314 { 1315 ctmpl_device_t *dtmpl; 1316 ct_template_t *tmpl; 1317 cont_device_t *ctd; 1318 char *path; 1319 klwp_t *lwp; 1320 int error; 1321 1322 if (ctpp) 1323 *ctpp = NULL; 1324 1325 /* 1326 * Check if we are in user-context i.e. if we have an lwp 1327 */ 1328 lwp = ttolwp(curthread); 1329 if (lwp == NULL) { 1330 CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 1331 return (0); 1332 } 1333 1334 tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 1335 if (tmpl == NULL) { 1336 return (0); 1337 } 1338 dtmpl = tmpl->ctmpl_data; 1339 1340 /* 1341 * If the user set a minor path in the template before an open, 1342 * ignore it. We use the minor path of the actual minor opened. 1343 */ 1344 mutex_enter(&tmpl->ctmpl_lock); 1345 if (dtmpl->ctd_minor != NULL) { 1346 CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 1347 "ignoring device minor path in active template: %s", 1348 curproc->p_pid, dtmpl->ctd_minor)); 1349 /* 1350 * This is a copy of the actual activated template. 1351 * Safe to make changes such as freeing the minor 1352 * path in the template. 1353 */ 1354 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 1355 dtmpl->ctd_minor = NULL; 1356 } 1357 mutex_exit(&tmpl->ctmpl_lock); 1358 1359 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1360 1361 if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 1362 CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 1363 "minor path from dev_t,spec {%lu, %d} for process (%d)", 1364 dev, spec_type, curproc->p_pid)); 1365 ctmpl_free(tmpl); 1366 kmem_free(path, MAXPATHLEN); 1367 return (1); 1368 } 1369 1370 mutex_enter(&tmpl->ctmpl_lock); 1371 ASSERT(dtmpl->ctd_minor == NULL); 1372 dtmpl->ctd_minor = path; 1373 mutex_exit(&tmpl->ctmpl_lock); 1374 1375 ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 1376 1377 mutex_enter(&tmpl->ctmpl_lock); 1378 ASSERT(dtmpl->ctd_minor); 1379 dtmpl->ctd_minor = NULL; 1380 mutex_exit(&tmpl->ctmpl_lock); 1381 ctmpl_free(tmpl); 1382 kmem_free(path, MAXPATHLEN); 1383 1384 if (ctd == NULL) { 1385 cmn_err(CE_NOTE, "contract_device_open(): Failed to " 1386 "create device contract for process (%d) holding " 1387 "device (devt = %lu, spec_type = %d)", 1388 curproc->p_pid, dev, spec_type); 1389 return (1); 1390 } 1391 1392 if (ctpp) { 1393 mutex_enter(&ctd->cond_contract.ct_lock); 1394 *ctpp = &ctd->cond_contract; 1395 mutex_exit(&ctd->cond_contract.ct_lock); 1396 } 1397 return (0); 1398 } 1399 1400 /* 1401 * Called during contract negotiation by the device contract framework to wait 1402 * for ACKs or NACKs from contract holders. If all responses are not received 1403 * before a specified timeout, this routine times out. 1404 */ 1405 static uint_t 1406 wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 1407 { 1408 cont_device_t *ctd; 1409 int timed_out = 0; 1410 int result = CT_NONE; 1411 int ack; 1412 char *f = "wait_for_acks"; 1413 1414 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 1415 ASSERT(dip); 1416 ASSERT(evtype & CT_DEV_ALLEVENT); 1417 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1418 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1419 (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1420 1421 CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 1422 1423 if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 1424 /* 1425 * some contract owner(s) didn't respond in time 1426 */ 1427 CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 1428 timed_out = 1; 1429 } 1430 1431 ack = 0; 1432 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1433 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1434 1435 mutex_enter(&ctd->cond_contract.ct_lock); 1436 1437 ASSERT(ctd->cond_dip == dip); 1438 1439 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1440 mutex_exit(&ctd->cond_contract.ct_lock); 1441 continue; 1442 } 1443 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1444 mutex_exit(&ctd->cond_contract.ct_lock); 1445 continue; 1446 } 1447 1448 /* skip if non-negotiable contract */ 1449 if (ctd->cond_noneg) { 1450 mutex_exit(&ctd->cond_contract.ct_lock); 1451 continue; 1452 } 1453 1454 ASSERT(ctd->cond_currev_type == evtype); 1455 if (ctd->cond_currev_ack == CT_NACK) { 1456 CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 1457 f, (void *)dip)); 1458 mutex_exit(&ctd->cond_contract.ct_lock); 1459 return (CT_NACK); 1460 } else if (ctd->cond_currev_ack == CT_ACK) { 1461 ack = 1; 1462 CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 1463 f, (void *)dip)); 1464 } 1465 mutex_exit(&ctd->cond_contract.ct_lock); 1466 } 1467 1468 if (ack) { 1469 result = CT_ACK; 1470 CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 1471 } else if (timed_out) { 1472 result = CT_NONE; 1473 CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 1474 f, (void *)dip)); 1475 } else { 1476 CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 1477 f, (void *)dip)); 1478 } 1479 1480 1481 return (result); 1482 } 1483 1484 /* 1485 * Determines the current state of a device (i.e a devinfo node 1486 */ 1487 static int 1488 get_state(dev_info_t *dip) 1489 { 1490 if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 1491 return (CT_DEV_EV_OFFLINE); 1492 else if (DEVI_IS_DEVICE_DEGRADED(dip)) 1493 return (CT_DEV_EV_DEGRADED); 1494 else 1495 return (CT_DEV_EV_ONLINE); 1496 } 1497 1498 /* 1499 * Sets the current state of a device in a device contract 1500 */ 1501 static void 1502 set_cond_state(dev_info_t *dip) 1503 { 1504 uint_t state = get_state(dip); 1505 cont_device_t *ctd; 1506 1507 /* verify that barrier is held */ 1508 ASSERT(ct_barrier_held(dip)); 1509 1510 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1511 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1512 mutex_enter(&ctd->cond_contract.ct_lock); 1513 ASSERT(ctd->cond_dip == dip); 1514 ctd->cond_state = state; 1515 mutex_exit(&ctd->cond_contract.ct_lock); 1516 } 1517 } 1518 1519 /* 1520 * Core routine called by event-specific routines when an event occurs. 1521 * Determines if an event should be be published, and if it is to be 1522 * published, whether a negotiation should take place. Also implements 1523 * NEGEND events which publish the final disposition of an event after 1524 * negotiations are complete. 1525 * 1526 * When an event occurs on a minor node, this routine walks the list of 1527 * contracts hanging off a devinfo node and for each contract on the affected 1528 * dip, evaluates the following cases 1529 * 1530 * a. an event that is synchronous, breaks the contract and NONEG not set 1531 * - bumps up the outstanding negotiation counts on the dip 1532 * - marks the dip as undergoing negotiation (devi_ct_neg) 1533 * - event of type CTE_NEG is published 1534 * b. an event that is synchronous, breaks the contract and NONEG is set 1535 * - sets the final result to CT_NACK, event is blocked 1536 * - does not publish an event 1537 * c. event is asynchronous and breaks the contract 1538 * - publishes a critical event irrespect of whether the NONEG 1539 * flag is set, since the contract will be broken and contract 1540 * owner needs to be informed. 1541 * d. No contract breakage but the owner has subscribed to the event 1542 * - publishes the event irrespective of the NONEG event as the 1543 * owner has explicitly subscribed to the event. 1544 * e. NEGEND event 1545 * - publishes a critical event. Should only be doing this if 1546 * if NONEG is not set. 1547 * f. all other events 1548 * - Since a contract is not broken and this event has not been 1549 * subscribed to, this event does not need to be published for 1550 * for this contract. 1551 * 1552 * Once an event is published, what happens next depends on the type of 1553 * event: 1554 * 1555 * a. NEGEND event 1556 * - cleanup all state associated with the preceding negotiation 1557 * and return CT_ACK to the caller of contract_device_publish() 1558 * b. NACKed event 1559 * - One or more contracts had the NONEG term, so the event was 1560 * blocked. Return CT_NACK to the caller. 1561 * c. Negotiated event 1562 * - Call wait_for_acks() to wait for responses from contract 1563 * holders. The end result is either CT_ACK (event is permitted), 1564 * CT_NACK (event is blocked) or CT_NONE (no contract owner) 1565 * responded. This result is returned back to the caller. 1566 * d. All other events 1567 * - If the event was asynchronous (i.e. not negotiated) or 1568 * a contract was not broken return CT_ACK to the caller. 1569 */ 1570 static uint_t 1571 contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 1572 uint_t evtype, nvlist_t *tnvl) 1573 { 1574 cont_device_t *ctd; 1575 uint_t result = CT_NONE; 1576 uint64_t evid = 0; 1577 uint64_t nevid = 0; 1578 char *path = NULL; 1579 int negend; 1580 int match; 1581 int sync = 0; 1582 contract_t *ct; 1583 ct_kevent_t *event; 1584 nvlist_t *nvl; 1585 int broken = 0; 1586 1587 ASSERT(dip); 1588 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1589 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1590 (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1591 ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 1592 1593 /* Is this a synchronous state change ? */ 1594 if (evtype != CT_EV_NEGEND) { 1595 sync = is_sync_neg(get_state(dip), evtype); 1596 /* NOP if unsupported transition */ 1597 if (sync == -2 || sync == -1) { 1598 DEVI(dip)->devi_flags |= DEVI_CT_NOP; 1599 result = (sync == -2) ? CT_ACK : CT_NONE; 1600 goto out; 1601 } 1602 CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 1603 sync ? "" : " not")); 1604 } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 1605 DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 1606 result = CT_ACK; 1607 goto out; 1608 } 1609 1610 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1611 (void) ddi_pathname(dip, path); 1612 1613 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1614 1615 /* 1616 * Negotiation end - set the state of the device in the contract 1617 */ 1618 if (evtype == CT_EV_NEGEND) { 1619 CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 1620 set_cond_state(dip); 1621 } 1622 1623 /* 1624 * If this device didn't go through negotiation, don't publish 1625 * a NEGEND event - simply release the barrier to allow other 1626 * device events in. 1627 */ 1628 negend = 0; 1629 if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 1630 CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 1631 ct_barrier_release(dip); 1632 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1633 result = CT_ACK; 1634 goto out; 1635 } else if (evtype == CT_EV_NEGEND) { 1636 /* 1637 * There are negotiated contract breakages that 1638 * need a NEGEND event 1639 */ 1640 ASSERT(ct_barrier_held(dip)); 1641 negend = 1; 1642 CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 1643 } else { 1644 /* 1645 * This is a new event, not a NEGEND event. Wait for previous 1646 * contract events to complete. 1647 */ 1648 ct_barrier_acquire(dip); 1649 } 1650 1651 1652 match = 0; 1653 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1654 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1655 1656 ctid_t ctid; 1657 size_t len = strlen(path); 1658 1659 mutex_enter(&ctd->cond_contract.ct_lock); 1660 1661 ASSERT(ctd->cond_dip == dip); 1662 ASSERT(ctd->cond_minor); 1663 ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 1664 ctd->cond_minor[len] == ':'); 1665 1666 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1667 mutex_exit(&ctd->cond_contract.ct_lock); 1668 continue; 1669 } 1670 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1671 mutex_exit(&ctd->cond_contract.ct_lock); 1672 continue; 1673 } 1674 1675 /* We have a matching contract */ 1676 match = 1; 1677 ctid = ctd->cond_contract.ct_id; 1678 CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 1679 ctid)); 1680 1681 /* 1682 * There are 4 possible cases 1683 * 1. A contract is broken (dev not in acceptable state) and 1684 * the state change is synchronous - start negotiation 1685 * by sending a CTE_NEG critical event. 1686 * 2. A contract is broken and the state change is 1687 * asynchronous - just send a critical event and 1688 * break the contract. 1689 * 3. Contract is not broken, but consumer has subscribed 1690 * to the event as a critical or informative event 1691 * - just send the appropriate event 1692 * 4. contract waiting for negend event - just send the critical 1693 * NEGEND event. 1694 */ 1695 broken = 0; 1696 if (!negend && !(evtype & ctd->cond_aset)) { 1697 broken = 1; 1698 CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 1699 ctid)); 1700 } 1701 1702 /* 1703 * Don't send event if 1704 * - contract is not broken AND 1705 * - contract holder has not subscribed to this event AND 1706 * - contract not waiting for a NEGEND event 1707 */ 1708 if (!broken && !EVSENDP(ctd, evtype) && 1709 !ctd->cond_neg) { 1710 CT_DEBUG((CE_NOTE, "contract_device_publish(): " 1711 "contract (%d): no publish reqd: event %d", 1712 ctd->cond_contract.ct_id, evtype)); 1713 mutex_exit(&ctd->cond_contract.ct_lock); 1714 continue; 1715 } 1716 1717 /* 1718 * Note: need to kmem_zalloc() the event so mutexes are 1719 * initialized automatically 1720 */ 1721 ct = &ctd->cond_contract; 1722 event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 1723 event->cte_type = evtype; 1724 1725 if (broken && sync) { 1726 CT_DEBUG((CE_NOTE, "publish: broken + sync: " 1727 "ctid: %d", ctid)); 1728 ASSERT(!negend); 1729 ASSERT(ctd->cond_currev_id == 0); 1730 ASSERT(ctd->cond_currev_type == 0); 1731 ASSERT(ctd->cond_currev_ack == 0); 1732 ASSERT(ctd->cond_neg == 0); 1733 if (ctd->cond_noneg) { 1734 /* Nothing to publish. Event has been blocked */ 1735 CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 1736 "not publishing blocked ev: ctid: %d", 1737 ctid)); 1738 result = CT_NACK; 1739 kmem_free(event, sizeof (ct_kevent_t)); 1740 mutex_exit(&ctd->cond_contract.ct_lock); 1741 continue; 1742 } 1743 event->cte_flags = CTE_NEG; /* critical neg. event */ 1744 ctd->cond_currev_type = event->cte_type; 1745 ct_barrier_incr(dip); 1746 DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 1747 ctd->cond_neg = 1; 1748 } else if (broken && !sync) { 1749 CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 1750 ctid)); 1751 ASSERT(!negend); 1752 ASSERT(ctd->cond_currev_id == 0); 1753 ASSERT(ctd->cond_currev_type == 0); 1754 ASSERT(ctd->cond_currev_ack == 0); 1755 ASSERT(ctd->cond_neg == 0); 1756 event->cte_flags = 0; /* critical event */ 1757 } else if (EVSENDP(ctd, event->cte_type)) { 1758 CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 1759 ctid)); 1760 ASSERT(!negend); 1761 ASSERT(ctd->cond_currev_id == 0); 1762 ASSERT(ctd->cond_currev_type == 0); 1763 ASSERT(ctd->cond_currev_ack == 0); 1764 ASSERT(ctd->cond_neg == 0); 1765 event->cte_flags = EVINFOP(ctd, event->cte_type) ? 1766 CTE_INFO : 0; 1767 } else if (ctd->cond_neg) { 1768 CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 1769 ASSERT(negend); 1770 ASSERT(ctd->cond_noneg == 0); 1771 nevid = ctd->cond_contract.ct_nevent ? 1772 ctd->cond_contract.ct_nevent->cte_id : 0; 1773 ASSERT(ctd->cond_currev_id == nevid); 1774 event->cte_flags = 0; /* NEGEND is always critical */ 1775 ctd->cond_currev_id = 0; 1776 ctd->cond_currev_type = 0; 1777 ctd->cond_currev_ack = 0; 1778 ctd->cond_neg = 0; 1779 } else { 1780 CT_DEBUG((CE_NOTE, "publish: not publishing event for " 1781 "ctid: %d, evtype: %d", 1782 ctd->cond_contract.ct_id, event->cte_type)); 1783 ASSERT(!negend); 1784 ASSERT(ctd->cond_currev_id == 0); 1785 ASSERT(ctd->cond_currev_type == 0); 1786 ASSERT(ctd->cond_currev_ack == 0); 1787 ASSERT(ctd->cond_neg == 0); 1788 kmem_free(event, sizeof (ct_kevent_t)); 1789 mutex_exit(&ctd->cond_contract.ct_lock); 1790 continue; 1791 } 1792 1793 nvl = NULL; 1794 if (tnvl) { 1795 VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 1796 if (negend) { 1797 int32_t newct = 0; 1798 ASSERT(ctd->cond_noneg == 0); 1799 VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 1800 == 0); 1801 VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 1802 &newct) == 0); 1803 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 1804 newct == 1 ? 0 : 1805 ctd->cond_contract.ct_id) == 0); 1806 CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 1807 "CTS_NEVID: %llu, CTS_NEWCT: %s", 1808 ctid, (unsigned long long)nevid, 1809 newct ? "success" : "failure")); 1810 1811 } 1812 } 1813 1814 if (ctd->cond_neg) { 1815 ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 1816 ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 1817 ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 1818 ctd->cond_contract.ct_qtime.ctm_start = 1819 ctd->cond_contract.ct_ntime.ctm_start; 1820 } 1821 1822 /* 1823 * by holding the dip's devi_ct_lock we ensure that 1824 * all ACK/NACKs are held up until we have finished 1825 * publishing to all contracts. 1826 */ 1827 mutex_exit(&ctd->cond_contract.ct_lock); 1828 evid = cte_publish_all(ct, event, nvl, NULL); 1829 mutex_enter(&ctd->cond_contract.ct_lock); 1830 1831 if (ctd->cond_neg) { 1832 ASSERT(!negend); 1833 ASSERT(broken); 1834 ASSERT(sync); 1835 ASSERT(!ctd->cond_noneg); 1836 CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 1837 ": %d", ctid)); 1838 ctd->cond_currev_id = evid; 1839 } else if (negend) { 1840 ctd->cond_contract.ct_ntime.ctm_start = -1; 1841 ctd->cond_contract.ct_qtime.ctm_start = -1; 1842 } 1843 mutex_exit(&ctd->cond_contract.ct_lock); 1844 } 1845 1846 /* 1847 * If "negend" set counter back to initial state (-1) so that 1848 * other events can be published. Also clear the negotiation flag 1849 * on dip. 1850 * 1851 * 0 .. n are used for counting. 1852 * -1 indicates counter is available for use. 1853 */ 1854 if (negend) { 1855 /* 1856 * devi_ct_count not necessarily 0. We may have 1857 * timed out in which case, count will be non-zero. 1858 */ 1859 ct_barrier_release(dip); 1860 DEVI(dip)->devi_ct_neg = 0; 1861 CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 1862 (void *)dip)); 1863 } else if (DEVI(dip)->devi_ct_neg) { 1864 ASSERT(match); 1865 ASSERT(!ct_barrier_empty(dip)); 1866 CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 1867 DEVI(dip)->devi_ct_count, (void *)dip)); 1868 } else { 1869 /* 1870 * for non-negotiated events or subscribed events or no 1871 * matching contracts 1872 */ 1873 ASSERT(ct_barrier_empty(dip)); 1874 ASSERT(DEVI(dip)->devi_ct_neg == 0); 1875 CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 1876 "dip=%p", (void *)dip)); 1877 1878 /* 1879 * only this function when called from contract_device_negend() 1880 * can reset the counter to READY state i.e. -1. This function 1881 * is so called for every event whether a NEGEND event is needed 1882 * or not, but the negend event is only published if the event 1883 * whose end they signal is a negotiated event for the contract. 1884 */ 1885 } 1886 1887 if (!match) { 1888 /* No matching contracts */ 1889 CT_DEBUG((CE_NOTE, "publish: No matching contract")); 1890 result = CT_NONE; 1891 } else if (result == CT_NACK) { 1892 /* a non-negotiable contract exists and this is a neg. event */ 1893 CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 1894 (void) wait_for_acks(dip, dev, spec_type, evtype); 1895 } else if (DEVI(dip)->devi_ct_neg) { 1896 /* one or more contracts going through negotations */ 1897 CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 1898 result = wait_for_acks(dip, dev, spec_type, evtype); 1899 } else { 1900 /* no negotiated contracts or no broken contracts or NEGEND */ 1901 CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 1902 result = CT_ACK; 1903 } 1904 1905 /* 1906 * Release the lock only now so that the only point where we 1907 * drop the lock is in wait_for_acks(). This is so that we don't 1908 * miss cv_signal/cv_broadcast from contract holders 1909 */ 1910 CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 1911 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1912 1913 out: 1914 if (tnvl) 1915 nvlist_free(tnvl); 1916 if (path) 1917 kmem_free(path, MAXPATHLEN); 1918 1919 1920 CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 1921 return (result); 1922 } 1923 1924 1925 /* 1926 * contract_device_offline 1927 * 1928 * Event publishing routine called by I/O framework when a device is offlined. 1929 */ 1930 ct_ack_t 1931 contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 1932 { 1933 nvlist_t *nvl; 1934 uint_t result; 1935 uint_t evtype; 1936 1937 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1938 1939 evtype = CT_DEV_EV_OFFLINE; 1940 result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 1941 1942 /* 1943 * If a contract offline is NACKED, the framework expects us to call 1944 * NEGEND ourselves, since we know the final result 1945 */ 1946 if (result == CT_NACK) { 1947 contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 1948 } 1949 1950 return (result); 1951 } 1952 1953 /* 1954 * contract_device_degrade 1955 * 1956 * Event publishing routine called by I/O framework when a device 1957 * moves to degrade state. 1958 */ 1959 /*ARGSUSED*/ 1960 void 1961 contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 1962 { 1963 nvlist_t *nvl; 1964 uint_t evtype; 1965 1966 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1967 1968 evtype = CT_DEV_EV_DEGRADED; 1969 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1970 } 1971 1972 /* 1973 * contract_device_undegrade 1974 * 1975 * Event publishing routine called by I/O framework when a device 1976 * moves from degraded state to online state. 1977 */ 1978 /*ARGSUSED*/ 1979 void 1980 contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 1981 { 1982 nvlist_t *nvl; 1983 uint_t evtype; 1984 1985 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1986 1987 evtype = CT_DEV_EV_ONLINE; 1988 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1989 } 1990 1991 /* 1992 * For all contracts which have undergone a negotiation (because the device 1993 * moved out of the acceptable state for that contract and the state 1994 * change is synchronous i.e. requires negotiation) this routine publishes 1995 * a CT_EV_NEGEND event with the final disposition of the event. 1996 * 1997 * This event is always a critical event. 1998 */ 1999 void 2000 contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 2001 { 2002 nvlist_t *nvl; 2003 uint_t evtype; 2004 2005 ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 2006 2007 CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 2008 "dip: %p", result, (void *)dip)); 2009 2010 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2011 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 2012 result == CT_EV_SUCCESS ? 1 : 0) == 0); 2013 2014 evtype = CT_EV_NEGEND; 2015 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 2016 2017 CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 2018 (void *)dip)); 2019 } 2020 2021 /* 2022 * Wrapper routine called by other subsystems (such as LDI) to start 2023 * negotiations when a synchronous device state change occurs. 2024 * Returns CT_ACK or CT_NACK. 2025 */ 2026 ct_ack_t 2027 contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 2028 uint_t evtype) 2029 { 2030 int result; 2031 2032 ASSERT(dip); 2033 ASSERT(dev != NODEV); 2034 ASSERT(dev != DDI_DEV_T_ANY); 2035 ASSERT(dev != DDI_DEV_T_NONE); 2036 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2037 2038 switch (evtype) { 2039 case CT_DEV_EV_OFFLINE: 2040 result = contract_device_offline(dip, dev, spec_type); 2041 break; 2042 default: 2043 cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 2044 "not supported: event (%d) for dev_t (%lu) and spec (%d), " 2045 "dip (%p)", evtype, dev, spec_type, (void *)dip); 2046 result = CT_NACK; 2047 break; 2048 } 2049 2050 return (result); 2051 } 2052 2053 /* 2054 * A wrapper routine called by other subsystems (such as the LDI) to 2055 * finalize event processing for a state change event. For synchronous 2056 * state changes, this publishes NEGEND events. For asynchronous i.e. 2057 * non-negotiable events this publishes the event. 2058 */ 2059 void 2060 contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 2061 uint_t evtype, int ct_result) 2062 { 2063 ASSERT(dip); 2064 ASSERT(dev != NODEV); 2065 ASSERT(dev != DDI_DEV_T_ANY); 2066 ASSERT(dev != DDI_DEV_T_NONE); 2067 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2068 2069 switch (evtype) { 2070 case CT_DEV_EV_OFFLINE: 2071 contract_device_negend(dip, dev, spec_type, ct_result); 2072 break; 2073 case CT_DEV_EV_DEGRADED: 2074 contract_device_degrade(dip, dev, spec_type); 2075 contract_device_negend(dip, dev, spec_type, ct_result); 2076 break; 2077 case CT_DEV_EV_ONLINE: 2078 contract_device_undegrade(dip, dev, spec_type); 2079 contract_device_negend(dip, dev, spec_type, ct_result); 2080 break; 2081 default: 2082 cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 2083 "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 2084 evtype, dev, spec_type, (void *)dip); 2085 break; 2086 } 2087 } 2088 2089 /* 2090 * Called by I/O framework when a devinfo node is freed to remove the 2091 * association between a devinfo node and its contracts. 2092 */ 2093 void 2094 contract_device_remove_dip(dev_info_t *dip) 2095 { 2096 cont_device_t *ctd; 2097 cont_device_t *next; 2098 contract_t *ct; 2099 2100 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 2101 ct_barrier_wait_for_release(dip); 2102 2103 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 2104 next = list_next(&(DEVI(dip)->devi_ct), ctd); 2105 list_remove(&(DEVI(dip)->devi_ct), ctd); 2106 ct = &ctd->cond_contract; 2107 /* 2108 * Unlink the dip associated with this contract 2109 */ 2110 mutex_enter(&ct->ct_lock); 2111 ASSERT(ctd->cond_dip == dip); 2112 ctd->cond_dip = NULL; /* no longer linked to dip */ 2113 contract_rele(ct); /* remove hold for dip linkage */ 2114 CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 2115 "ctid: %d", ct->ct_id)); 2116 mutex_exit(&ct->ct_lock); 2117 } 2118 ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 2119 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 2120 } 2121 2122 /* 2123 * Barrier related routines 2124 */ 2125 static void 2126 ct_barrier_acquire(dev_info_t *dip) 2127 { 2128 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2129 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 2130 while (DEVI(dip)->devi_ct_count != -1) 2131 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2132 DEVI(dip)->devi_ct_count = 0; 2133 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 2134 } 2135 2136 static void 2137 ct_barrier_release(dev_info_t *dip) 2138 { 2139 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2140 ASSERT(DEVI(dip)->devi_ct_count != -1); 2141 DEVI(dip)->devi_ct_count = -1; 2142 cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 2143 CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 2144 } 2145 2146 static int 2147 ct_barrier_held(dev_info_t *dip) 2148 { 2149 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2150 return (DEVI(dip)->devi_ct_count != -1); 2151 } 2152 2153 static int 2154 ct_barrier_empty(dev_info_t *dip) 2155 { 2156 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2157 ASSERT(DEVI(dip)->devi_ct_count != -1); 2158 return (DEVI(dip)->devi_ct_count == 0); 2159 } 2160 2161 static void 2162 ct_barrier_wait_for_release(dev_info_t *dip) 2163 { 2164 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2165 while (DEVI(dip)->devi_ct_count != -1) 2166 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2167 } 2168 2169 static void 2170 ct_barrier_decr(dev_info_t *dip) 2171 { 2172 CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 2173 DEVI(dip)->devi_ct_count)); 2174 2175 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2176 ASSERT(DEVI(dip)->devi_ct_count > 0); 2177 2178 DEVI(dip)->devi_ct_count--; 2179 if (DEVI(dip)->devi_ct_count == 0) { 2180 cv_broadcast(&DEVI(dip)->devi_ct_cv); 2181 CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 2182 } 2183 } 2184 2185 static void 2186 ct_barrier_incr(dev_info_t *dip) 2187 { 2188 ASSERT(ct_barrier_held(dip)); 2189 DEVI(dip)->devi_ct_count++; 2190 } 2191 2192 static int 2193 ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 2194 { 2195 clock_t abstime; 2196 2197 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2198 2199 abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 2200 while (DEVI(dip)->devi_ct_count) { 2201 if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 2202 &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 2203 return (-1); 2204 } 2205 } 2206 return (0); 2207 } 2208