1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_MDI_IMPLDEFS_H 27 #define _SYS_MDI_IMPLDEFS_H 28 29 30 #include <sys/note.h> 31 #include <sys/types.h> 32 #include <sys/sunmdi.h> 33 #include <sys/modhash.h> 34 #include <sys/callb.h> 35 #include <sys/devctl.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #ifdef _KERNEL 42 43 /* 44 * Multipath Driver Interfaces 45 * 46 * The multipathing framework is provided in two modules. The 'mpxio' misc. 47 * module provides the core multipath framework and the 'scsi_vhci' nexus 48 * driver provides the SCSI-III command set driver functionality for 49 * managing Fibre-Channel storage devices. 50 * 51 * As in any multipathing solution there are three major problems to solve: 52 * 53 * 1) Identification and enumeration of multipath client devices. 54 * 2) Optimal path selection when routing I/O requests. 55 * 3) Observability interfaces to snapshot the multipath configuration, 56 * and infrastructure to provide performance and error statistics. 57 * 58 * The mpxio framework consists of several major components: 59 * 60 * 1) The MDI is the Multiplexed Device Interface; this is the core glue which 61 * holds the following components together. 62 * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing 63 * services for a given bus technology (example: 'scsi_vhci' provides 64 * multipathing support for SCSI-III fibre-channel devices). 65 * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport 66 * services for a given host controller (example: 'fcp' provides transport 67 * for fibre-channel devices). 68 * 4) Client Devices are standard Solaris target (or leaf) drivers 69 * (example: 'ssd' is the standard disk driver for fibre-channel arrays). 70 * 5) Multipath information nodes ('pathinfo' nodes) connect client device 71 * nodes and pHCI device nodes in the device tree. 72 * 73 * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might 74 * look like this: 75 * 76 * /\ 77 * / ............ 78 * <vHCI>:/ \ 79 * +-----------+ +-----------+ 80 * | scsi_vhci | | pci@1f,0 | 81 * +-----------+ +-----------+ 82 * / \ \ 83 * <Client>: / \ :<Client> \ :parent(pHCI) 84 * +----------+ +-----------+ +-------------+ 85 * | ssd 1 | | ssd 2 | | qlc@0,0 | 86 * +----------+ +-----------+ +-------------+ 87 * | | / \ 88 * | | <pHCI>: / \ :<pHCI> 89 * | | +-------------+ +-------------+ 90 * | | | pHCI 1 (fp) | | pHCI 2 (fp) | 91 * | | +-------------+ +-------------+ 92 * | | / | / | 93 * | | +------+ | +------+ | 94 * | | | ssd 3| | | ssd | | 95 * | | |!mpxio| | | (OBP)| | 96 * | | +------+ | +------+ | 97 * | | | | 98 * | | <pathinfo>: | | 99 * | | +-------+ +--------+ 100 * | +-------------->| path |-------->| path | 101 * | | info | | info | 102 * | | node 1| | node 3 | 103 * | +-------+ +--------+ 104 * | | | 105 * | | +~~~~~~~~+ 106 * | +-------+ :+--------+ 107 * +--------------------------->| path |-------->| path | 108 * | info | :| info | 109 * | node 2| +| node 4 | 110 * +-------+ +--------+ 111 * 112 * The multipath information nodes (mdi_pathinfo nodes) establish the 113 * relationship between the pseudo client driver instance nodes (children 114 * of the vHCI) and the physical host controller interconnect (pHCI 115 * drivers) forming a matrix structure. 116 * 117 * The mpxio module implements locking at multiple granularity levels to 118 * support the needs of various consumers. The multipath matrix can be 119 * column locked, or row locked depending on the consumer. The intention 120 * is to balance simplicity and performance. 121 * 122 * Locking: 123 * 124 * The devinfo locking still applies: 125 * 126 * 1) An ndi_devi_enter of a parent protects linkage/state of children. 127 * 2) state >= DS_INITIALIZED adds devi_ref of parent 128 * 3) devi_ref at state >= DS_ATTACHED prevents detach(9E). 129 * 130 * The ordering of 1) is (vHCI, pHCI). For a DEBUG kernel this ordering 131 * is asserted by the ndi_devi_enter() implementation. There is also an 132 * ndi_devi_enter(Client), which is atypical since the client is a leaf. 133 * This is done to synchronize pathinfo nodes during devinfo snapshot (see 134 * di_register_pip) by pretending that the pathinfo nodes are children 135 * of the client. 136 * 137 * In addition to devinfo locking the current implementation utilizes 138 * the following locks: 139 * 140 * mdi_mutex: protects the global list of vHCIs. 141 * 142 * vh_phci_mutex: per-vHCI (mutex) lock: protects list of pHCIs registered 143 * with vHCI. 144 * 145 * vh_client_mutex: per-vHCI (mutex) lock: protects list/hash of Clients 146 * associated with vHCI. 147 * 148 * ph_mutex: per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo 149 * node list) and per-pHCI structure fields. mdi_pathinfo node creation, 150 * deletion and child mdi_pathinfo node state changes are serialized on per 151 * pHCI basis (Protection against DR). 152 * 153 * ct_mutex: per-client (mutex) lock: protects the row (client-mdi_pathinfo 154 * node list) and per-client structure fields. The client-mdi_pathinfo node 155 * list is typically walked to select an optimal path when routing I/O 156 * requests. 157 * 158 * pi_mutex: per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node 159 * structure fields. 160 * 161 * Note that per-Client structure and per-pHCI fields are freely readable when 162 * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo 163 * node guarantees that its corresponding client and pHCI devices will not be 164 * freed. 165 */ 166 167 /* 168 * MDI Client global unique identifier property name string definition 169 */ 170 extern const char *mdi_client_guid_prop; 171 #define MDI_CLIENT_GUID_PROP (char *)mdi_client_guid_prop 172 173 /* 174 * MDI Client load balancing policy definitions 175 * 176 * Load balancing policies are determined on a per-vHCI basis and are 177 * configurable via the vHCI's driver.conf file. 178 */ 179 typedef enum { 180 LOAD_BALANCE_NONE, /* Alternate pathing */ 181 LOAD_BALANCE_RR, /* Round Robin */ 182 LOAD_BALANCE_LBA /* Logical Block Addressing */ 183 } client_lb_t; 184 185 typedef struct { 186 int region_size; 187 }client_lb_args_t; 188 189 /* 190 * MDI client load balancing property name/value string definitions 191 */ 192 extern const char *mdi_load_balance; 193 extern const char *mdi_load_balance_none; 194 extern const char *mdi_load_balance_ap; 195 extern const char *mdi_load_balance_rr; 196 extern const char *mdi_load_balance_lba; 197 198 #define LOAD_BALANCE_PROP (char *)mdi_load_balance 199 #define LOAD_BALANCE_PROP_NONE (char *)mdi_load_balance_none 200 #define LOAD_BALANCE_PROP_AP (char *)mdi_load_balance_ap 201 #define LOAD_BALANCE_PROP_RR (char *)mdi_load_balance_rr 202 #define LOAD_BALANCE_PROP_LBA (char *)mdi_load_balance_lba 203 204 /* default for region size */ 205 #define LOAD_BALANCE_DEFAULT_REGION_SIZE 18 206 207 /* 208 * vHCI drivers: 209 * 210 * vHCI drivers are pseudo nexus drivers which implement multipath services 211 * for a specific command set or bus architecture ('class'). There is a 212 * single instance of the vHCI driver for each command set which supports 213 * multipath devices. 214 * 215 * Each vHCI driver registers the following callbacks from attach(9e). 216 */ 217 #define MDI_VHCI_OPS_REV_1 1 218 #define MDI_VHCI_OPS_REV MDI_VHCI_OPS_REV_1 219 220 typedef struct mdi_vhci_ops { 221 /* revision management */ 222 int vo_revision; 223 224 /* mdi_pathinfo node init callback */ 225 int (*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags); 226 227 /* mdi_pathinfo node uninit callback */ 228 int (*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip, 229 int flags); 230 231 /* mdi_pathinfo node state change callback */ 232 int (*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip, 233 mdi_pathinfo_state_t state, uint32_t, int flags); 234 235 /* Client path failover callback */ 236 int (*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags); 237 238 /* Client attached callback */ 239 void (*vo_client_attached)(dev_info_t *cdip); 240 241 /* Ask vHCI if 'cinfo' device is support as a client */ 242 int (*vo_is_dev_supported)(dev_info_t *vdip, dev_info_t *pdip, 243 void *cinfo); 244 } mdi_vhci_ops_t; 245 246 /* 247 * An mdi_vhci structure is created and bound to the devinfo node of every 248 * registered vHCI class driver; this happens when a vHCI registers itself from 249 * attach(9e). This structure is unbound and freed when the vHCI unregisters 250 * at detach(9e) time; 251 * 252 * Each vHCI driver is associated with a vHCI class name; this is the handle 253 * used to register and unregister pHCI drivers for a given transport. 254 * 255 * Locking: Different parts of this structure are guarded by different 256 * locks: global threading of multiple vHCIs and initialization is protected 257 * by mdi_mutex, the list of pHCIs associated with a vHCI is protected by 258 * vh_phci_mutex, and Clients are protected by vh_client_mutex. 259 * 260 * XXX Depending on the context, some of the fields can be freely read without 261 * holding any locks (ex. holding vh_client_mutex lock also guarantees that 262 * the vHCI (parent) cannot be unexpectedly freed). 263 */ 264 typedef struct mdi_vhci { 265 /* protected by mdi_mutex... */ 266 struct mdi_vhci *vh_next; /* next vHCI link */ 267 struct mdi_vhci *vh_prev; /* prev vHCI link */ 268 char *vh_class; /* vHCI class name */ 269 dev_info_t *vh_dip; /* vHCI devi handle */ 270 int vh_refcnt; /* vHCI reference count */ 271 struct mdi_vhci_config *vh_config; /* vHCI config */ 272 client_lb_t vh_lb; /* vHCI load-balancing */ 273 struct mdi_vhci_ops *vh_ops; /* vHCI callback vectors */ 274 275 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 276 kmutex_t vh_phci_mutex; /* pHCI mutex */ 277 int vh_phci_count; /* pHCI device count */ 278 struct mdi_phci *vh_phci_head; /* pHCI list head */ 279 struct mdi_phci *vh_phci_tail; /* pHCI list tail */ 280 281 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 282 kmutex_t vh_client_mutex; /* Client mutex */ 283 int vh_client_count; /* Client count */ 284 struct client_hash *vh_client_table; /* Client hash */ 285 } mdi_vhci_t; 286 287 /* 288 * per-vHCI lock macros 289 */ 290 #define MDI_VHCI_PHCI_LOCK(vh) mutex_enter(&(vh)->vh_phci_mutex) 291 #define MDI_VHCI_PHCI_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_phci_mutex) 292 #define MDI_VHCI_PHCI_UNLOCK(vh) mutex_exit(&(vh)->vh_phci_mutex) 293 #ifdef DEBUG 294 #define MDI_VHCI_PCHI_LOCKED(vh) MUTEX_HELD(&(vh)->vh_phci_mutex) 295 #endif /* DEBUG */ 296 #define MDI_VHCI_CLIENT_LOCK(vh) mutex_enter(&(vh)->vh_client_mutex) 297 #define MDI_VHCI_CLIENT_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_client_mutex) 298 #define MDI_VHCI_CLIENT_UNLOCK(vh) mutex_exit(&(vh)->vh_client_mutex) 299 #ifdef DEBUG 300 #define MDI_VHCI_CLIENT_LOCKED(vh) MUTEX_HELD(&(vh)->vh_client_mutex) 301 #endif /* DEBUG */ 302 303 304 /* 305 * GUID Hash definitions 306 * 307 * Since all the mpxio managed devices for a given class are enumerated under 308 * the single vHCI instance for that class, sequentially walking through the 309 * client device link to find a client would be prohibitively slow. 310 */ 311 312 #define CLIENT_HASH_TABLE_SIZE (32) /* GUID hash */ 313 314 /* 315 * Client hash table structure 316 */ 317 struct client_hash { 318 struct mdi_client *ct_hash_head; /* Client hash head */ 319 int ct_hash_count; /* Client hash count */ 320 }; 321 322 323 /* 324 * pHCI Drivers: 325 * 326 * Physical HBA drivers provide transport services for mpxio-managed devices. 327 * As each pHCI instance is attached, it must register itself with the mpxio 328 * framework using mdi_phci_register(). When the pHCI is detached it must 329 * similarly call mdi_phci_unregister(). 330 * 331 * The framework maintains a list of registered pHCI device instances for each 332 * vHCI. This list involves (vh_phci_count, vh_phci_head, vh_phci_tail) and 333 * (ph_next, ph_prev, ph_vhci) and is protected by vh_phci_mutex. 334 * 335 * Locking order: 336 * 337 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) XXX 338 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock)) XXX 339 */ 340 typedef struct mdi_phci { 341 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 342 struct mdi_phci *ph_next; /* next pHCI link */ 343 struct mdi_phci *ph_prev; /* prev pHCI link */ 344 dev_info_t *ph_dip; /* pHCI devi handle */ 345 struct mdi_vhci *ph_vhci; /* pHCI back ref. to vHCI */ 346 347 /* protected by MDI_PHCI_LOCK ph_mutex... */ 348 kmutex_t ph_mutex; /* per-pHCI mutex */ 349 int ph_path_count; /* pi count */ 350 mdi_pathinfo_t *ph_path_head; /* pi list head */ 351 mdi_pathinfo_t *ph_path_tail; /* pi list tail */ 352 int ph_flags; /* pHCI operation flags */ 353 int ph_unstable; /* Paths in transient state */ 354 kcondvar_t ph_unstable_cv; /* Paths in transient state */ 355 356 /* protected by mdi_phci_[gs]et_vhci_private caller... */ 357 void *ph_vprivate; /* vHCI driver private */ 358 } mdi_phci_t; 359 360 /* 361 * A pHCI device is 'unstable' while one or more paths are in a transitional 362 * state. Hotplugging is prevented during this state. 363 */ 364 #define MDI_PHCI_UNSTABLE(ph) (ph)->ph_unstable++; 365 #define MDI_PHCI_STABLE(ph) { \ 366 (ph)->ph_unstable--; \ 367 if ((ph)->ph_unstable == 0) { \ 368 cv_broadcast(&(ph)->ph_unstable_cv); \ 369 } \ 370 } 371 372 /* 373 * per-pHCI lock macros 374 */ 375 #define MDI_PHCI_LOCK(ph) mutex_enter(&(ph)->ph_mutex) 376 #define MDI_PHCI_TRYLOCK(ph) mutex_tryenter(&(ph)->ph_mutex) 377 #define MDI_PHCI_UNLOCK(ph) mutex_exit(&(ph)->ph_mutex) 378 #ifdef DEBUG 379 #define MDI_PHCI_LOCKED(vh) MUTEX_HELD(&(ph)->ph_mutex) 380 #endif /* DEBUG */ 381 382 /* 383 * pHCI state definitions and macros to track the pHCI driver instance state 384 */ 385 #define MDI_PHCI_FLAGS_OFFLINE 0x1 /* pHCI is offline */ 386 #define MDI_PHCI_FLAGS_SUSPEND 0x2 /* pHCI is suspended */ 387 #define MDI_PHCI_FLAGS_POWER_DOWN 0x4 /* pHCI is power down */ 388 #define MDI_PHCI_FLAGS_DETACH 0x8 /* pHCI is detached */ 389 #define MDI_PHCI_FLAGS_USER_DISABLE 0x10 /* pHCI is disabled,user */ 390 #define MDI_PHCI_FLAGS_D_DISABLE 0x20 /* pHCI is disabled,driver */ 391 #define MDI_PHCI_FLAGS_D_DISABLE_TRANS 0x40 /* pHCI is disabled,transient */ 392 #define MDI_PHCI_FLAGS_POWER_TRANSITION 0x80 /* pHCI is power transition */ 393 394 #define MDI_PHCI_DISABLE_MASK \ 395 (MDI_PHCI_FLAGS_USER_DISABLE | MDI_PHCI_FLAGS_D_DISABLE | \ 396 MDI_PHCI_FLAGS_D_DISABLE_TRANS) 397 398 #define MDI_PHCI_IS_READY(ph) \ 399 (((ph)->ph_flags & MDI_PHCI_DISABLE_MASK) == 0) 400 401 #define MDI_PHCI_SET_OFFLINE(ph) {\ 402 ASSERT(MDI_PHCI_LOCKED(ph)); \ 403 (ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE; } 404 #define MDI_PHCI_SET_ONLINE(ph) {\ 405 ASSERT(MDI_PHCI_LOCKED(ph)); \ 406 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE; } 407 #define MDI_PHCI_IS_OFFLINE(ph) \ 408 ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE) 409 410 #define MDI_PHCI_SET_SUSPEND(ph) {\ 411 ASSERT(MDI_PHCI_LOCKED(ph)); \ 412 (ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND; } 413 #define MDI_PHCI_SET_RESUME(ph) {\ 414 ASSERT(MDI_PHCI_LOCKED(ph)); \ 415 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND; } 416 #define MDI_PHCI_IS_SUSPENDED(ph) \ 417 ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 418 419 #define MDI_PHCI_SET_DETACH(ph) {\ 420 ASSERT(MDI_PHCI_LOCKED(ph)); \ 421 (ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH; } 422 #define MDI_PHCI_SET_ATTACH(ph) {\ 423 ASSERT(MDI_PHCI_LOCKED(ph)); \ 424 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH; } 425 426 #define MDI_PHCI_SET_POWER_DOWN(ph) {\ 427 ASSERT(MDI_PHCI_LOCKED(ph)); \ 428 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN; } 429 #define MDI_PHCI_SET_POWER_UP(ph) {\ 430 ASSERT(MDI_PHCI_LOCKED(ph)); \ 431 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN; } 432 #define MDI_PHCI_IS_POWERED_DOWN(ph) \ 433 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN) 434 435 #define MDI_PHCI_SET_USER_ENABLE(ph) {\ 436 ASSERT(MDI_PHCI_LOCKED(ph)); \ 437 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE; } 438 #define MDI_PHCI_SET_USER_DISABLE(ph) {\ 439 ASSERT(MDI_PHCI_LOCKED(ph)); \ 440 (ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE; } 441 #define MDI_PHCI_IS_USER_DISABLED(ph) \ 442 ((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE) 443 444 #define MDI_PHCI_SET_DRV_ENABLE(ph) {\ 445 ASSERT(MDI_PHCI_LOCKED(ph)); \ 446 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE; } 447 #define MDI_PHCI_SET_DRV_DISABLE(ph) {\ 448 ASSERT(MDI_PHCI_LOCKED(ph)); \ 449 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE; } 450 #define MDI_PHCI_IS_DRV_DISABLED(ph) \ 451 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE) 452 453 #define MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph) {\ 454 ASSERT(MDI_PHCI_LOCKED(ph)); \ 455 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 456 #define MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph) {\ 457 ASSERT(MDI_PHCI_LOCKED(ph)); \ 458 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 459 #define MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph) \ 460 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS) 461 462 #define MDI_PHCI_SET_POWER_TRANSITION(ph) {\ 463 ASSERT(MDI_PHCI_LOCKED(ph)); \ 464 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION; } 465 #define MDI_PHCI_CLEAR_POWER_TRANSITION(ph) {\ 466 ASSERT(MDI_PHCI_LOCKED(ph)); \ 467 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION; } 468 #define MDI_PHCI_IS_POWER_TRANSITION(ph) \ 469 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION) 470 471 /* 472 * mpxio Managed Clients: 473 * 474 * This framework creates a struct mdi_client for every client device created 475 * by the framework as a result of self-enumeration of target devices by the 476 * registered pHCI devices. This structure is bound to client device dev_info 477 * node at the time of client device allocation (ndi_devi_alloc(9e)). This 478 * structure is unbound from the dev_info node when mpxio framework removes a 479 * client device node from the system. 480 * 481 * This structure is created when a first path is enumerated and removed when 482 * last path is de-enumerated from the system. 483 * 484 * Multipath client devices are instantiated as children of corresponding vHCI 485 * driver instance. Each client device is uniquely identified by a GUID 486 * provided by target device itself. The parent vHCI device also maintains a 487 * hashed list of client devices, protected by vh_client_mutex. 488 * 489 * Typically pHCI devices self-enumerate their child devices using taskq, 490 * resulting in multiple paths to the same client device to be enumerated by 491 * competing threads. 492 * 493 * Currently this framework supports two kinds of load-balancing policy 494 * configurable through the vHCI driver configuration files. 495 * 496 * NONE - Legacy AP mode 497 * Round Robin - Balance the pHCI load in a Round Robin fashion. 498 * 499 * This framework identifies the client device in three distinct states: 500 * 501 * OPTIMAL - Client device has at least one redundant path. 502 * DEGRADED - No redundant paths (critical). Failure in the current active 503 * path would result in data access failures. 504 * FAILED - No paths are available to access this device. 505 * 506 * Locking order: 507 * 508 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) XXX 509 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock)) XXX 510 */ 511 typedef struct mdi_client { 512 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 513 struct mdi_client *ct_hnext; /* next client */ 514 struct mdi_client *ct_hprev; /* prev client */ 515 dev_info_t *ct_dip; /* client devi handle */ 516 struct mdi_vhci *ct_vhci; /* vHCI back ref */ 517 char *ct_drvname; /* client driver name */ 518 char *ct_guid; /* client guid */ 519 client_lb_t ct_lb; /* load balancing scheme */ 520 client_lb_args_t *ct_lb_args; /* load balancing args */ 521 522 523 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 524 kmutex_t ct_mutex; /* per-client mutex */ 525 int ct_path_count; /* multi path count */ 526 mdi_pathinfo_t *ct_path_head; /* multi path list head */ 527 mdi_pathinfo_t *ct_path_tail; /* multi path list tail */ 528 mdi_pathinfo_t *ct_path_last; /* last path used for i/o */ 529 int ct_state; /* state information */ 530 int ct_flags; /* Driver op. flags */ 531 int ct_failover_flags; /* Failover args */ 532 int ct_failover_status; /* last fo status */ 533 kcondvar_t ct_failover_cv; /* Failover status cv */ 534 int ct_unstable; /* Paths in transient state */ 535 kcondvar_t ct_unstable_cv; /* Paths in transient state */ 536 537 int ct_power_cnt; /* Hold count on parent power */ 538 kcondvar_t ct_powerchange_cv; 539 /* Paths in power transient state */ 540 short ct_powercnt_config; 541 /* held in pre/post config */ 542 short ct_powercnt_unconfig; 543 /* held in pre/post unconfig */ 544 int ct_powercnt_reset; 545 /* ct_power_cnt was reset */ 546 547 void *ct_cprivate; /* client driver private */ 548 void *ct_vprivate; /* vHCI driver private */ 549 } mdi_client_t; 550 551 /* 552 * per-Client device locking definitions 553 */ 554 #define MDI_CLIENT_LOCK(ct) mutex_enter(&(ct)->ct_mutex) 555 #define MDI_CLIENT_TRYLOCK(ct) mutex_tryenter(&(ct)->ct_mutex) 556 #define MDI_CLIENT_UNLOCK(ct) mutex_exit(&(ct)->ct_mutex) 557 #ifdef DEBUG 558 #define MDI_CLIENT_LOCKED(ct) MUTEX_HELD(&(ct)->ct_mutex) 559 #endif /* DEBUG */ 560 561 /* 562 * A Client device is in unstable while one or more paths are in transitional 563 * state. We do not allow failover to take place while paths are in transient 564 * state. Similarly we do not allow state transition while client device 565 * failover is in progress. 566 */ 567 #define MDI_CLIENT_UNSTABLE(ct) (ct)->ct_unstable++; 568 #define MDI_CLIENT_STABLE(ct) { \ 569 (ct)->ct_unstable--; \ 570 if ((ct)->ct_unstable == 0) { \ 571 cv_broadcast(&(ct)->ct_unstable_cv); \ 572 } \ 573 } 574 575 /* 576 * Client driver instance state definitions: 577 */ 578 #define MDI_CLIENT_FLAGS_OFFLINE 0x00000001 579 #define MDI_CLIENT_FLAGS_SUSPEND 0x00000002 580 #define MDI_CLIENT_FLAGS_POWER_DOWN 0x00000004 581 #define MDI_CLIENT_FLAGS_DETACH 0x00000008 582 #define MDI_CLIENT_FLAGS_FAILOVER 0x00000010 583 #define MDI_CLIENT_FLAGS_REPORT_DEV 0x00000020 584 #define MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS 0x00000040 585 #define MDI_CLIENT_FLAGS_ASYNC_FREE 0x00000080 586 #define MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED 0x00000100 587 #define MDI_CLIENT_FLAGS_POWER_TRANSITION 0x00000200 588 589 #define MDI_CLIENT_SET_OFFLINE(ct) {\ 590 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 591 (ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE; } 592 #define MDI_CLIENT_SET_ONLINE(ct) {\ 593 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 594 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE; } 595 #define MDI_CLIENT_IS_OFFLINE(ct) \ 596 ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE) 597 598 #define MDI_CLIENT_SET_SUSPEND(ct) {\ 599 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 600 (ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND; } 601 #define MDI_CLIENT_SET_RESUME(ct) {\ 602 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 603 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND; } 604 #define MDI_CLIENT_IS_SUSPENDED(ct) \ 605 ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND) 606 607 #define MDI_CLIENT_SET_POWER_DOWN(ct) {\ 608 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 609 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN; } 610 #define MDI_CLIENT_SET_POWER_UP(ct) {\ 611 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 612 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN; } 613 #define MDI_CLIENT_IS_POWERED_DOWN(ct) \ 614 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN) 615 616 #define MDI_CLIENT_SET_POWER_TRANSITION(ct) {\ 617 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 618 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION; } 619 #define MDI_CLIENT_CLEAR_POWER_TRANSITION(ct) {\ 620 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 621 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION; } 622 #define MDI_CLIENT_IS_POWER_TRANSITION(ct) \ 623 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION) 624 625 #define MDI_CLIENT_SET_DETACH(ct) {\ 626 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 627 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH; } 628 #define MDI_CLIENT_SET_ATTACH(ct) {\ 629 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 630 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH; } 631 #define MDI_CLIENT_IS_DETACHED(ct) \ 632 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH) 633 634 #define MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) {\ 635 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 636 (ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER; } 637 #define MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) {\ 638 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 639 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER; } 640 #define MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \ 641 ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER) 642 643 #define MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) {\ 644 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 645 (ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV; } 646 #define MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) {\ 647 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 648 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV; } 649 #define MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \ 650 ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV) 651 652 #define MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct) {\ 653 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 654 (ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 655 #define MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct) {\ 656 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 657 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 658 #define MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \ 659 ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS) 660 661 #define MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct) {\ 662 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 663 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED; } 664 #define MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \ 665 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED) 666 667 /* 668 * Client operating states. 669 */ 670 #define MDI_CLIENT_STATE_OPTIMAL 1 671 #define MDI_CLIENT_STATE_DEGRADED 2 672 #define MDI_CLIENT_STATE_FAILED 3 673 674 #define MDI_CLIENT_STATE(ct) ((ct)->ct_state) 675 #define MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state) 676 677 #define MDI_CLIENT_IS_FAILED(ct) \ 678 ((ct)->ct_state == MDI_CLIENT_STATE_FAILED) 679 680 /* 681 * mdi_pathinfo nodes: 682 * 683 * From this framework's perspective, a 'path' is a tuple consisting of a 684 * client or end device, a host controller which provides device 685 * identification and transport services (pHCI), and bus specific unit 686 * addressing information. A path may be decorated with properties which 687 * describe the capabilities of the path; such properties are analogous to 688 * device node and minor node properties. 689 * 690 * The framework maintains link list of mdi_pathinfo nodes created by every 691 * pHCI driver instance via the pi_phci_link linkage; this is used (for example) 692 * to make sure that all relevant pathinfo nodes are freed before the pHCI 693 * is unregistered. 694 * 695 * Locking order: 696 * 697 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) XXX 698 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) XXX 699 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) XXX 700 * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex)) XXX 701 * 702 * mdi_pathinfo node structure definition 703 */ 704 struct mdi_pathinfo { 705 /* protected by MDI_PHCI_LOCK ph_mutex... */ 706 struct mdi_pathinfo *pi_phci_link; /* next path in phci list */ 707 mdi_phci_t *pi_phci; /* pHCI dev_info node */ 708 709 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 710 struct mdi_pathinfo *pi_client_link; /* next path in client list */ 711 mdi_client_t *pi_client; /* client */ 712 713 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 714 char *pi_addr; /* path unit address */ 715 int pi_path_instance; /* path instance */ 716 717 /* protected by MDI_PI_LOCK pi_mutex... */ 718 kmutex_t pi_mutex; /* per path mutex */ 719 mdi_pathinfo_state_t pi_state; /* path state */ 720 mdi_pathinfo_state_t pi_old_state; /* path state */ 721 kcondvar_t pi_state_cv; /* path state condvar */ 722 nvlist_t *pi_prop; /* Properties */ 723 void *pi_cprivate; /* client private info */ 724 void *pi_pprivate; /* phci private info */ 725 int pi_ref_cnt; /* pi reference count */ 726 kcondvar_t pi_ref_cv; /* condition variable */ 727 struct mdi_pi_kstats *pi_kstats; /* aggregate kstats */ 728 int pi_pm_held; /* phci's kidsup incremented */ 729 int pi_preferred; /* Preferred path */ 730 void *pi_vprivate; /* vhci private info */ 731 uint_t pi_flags; /* path flags */ 732 }; 733 734 /* 735 * pathinfo statistics: 736 * 737 * The mpxio architecture allows for multiple pathinfo nodes for each 738 * client-pHCI combination. For statistics purposes, these statistics are 739 * aggregated into a single client-pHCI set of kstats. 740 */ 741 struct mdi_pi_kstats { 742 int pi_kstat_ref; /* # paths aggregated, also a ref cnt */ 743 kstat_t *pi_kstat_iostats; /* mdi:iopath statistic set */ 744 kstat_t *pi_kstat_errstats; /* error statistics */ 745 }; 746 747 /* 748 * pathinfo error kstat 749 */ 750 struct pi_errs { 751 struct kstat_named pi_softerrs; /* "Soft" Error */ 752 struct kstat_named pi_harderrs; /* "Hard" Error */ 753 struct kstat_named pi_transerrs; /* Transport Errors */ 754 struct kstat_named pi_icnt_busy; /* Interconnect Busy */ 755 struct kstat_named pi_icnt_errors; /* Interconnect Errors */ 756 struct kstat_named pi_phci_rsrc; /* pHCI No Resources */ 757 struct kstat_named pi_phci_localerr; /* pHCI Local Errors */ 758 struct kstat_named pi_phci_invstate; /* pHCI Invalid State */ 759 struct kstat_named pi_failedfrom; /* Failover: Failed From */ 760 struct kstat_named pi_failedto; /* Failover: Failed To */ 761 }; 762 763 /* 764 * increment an error counter 765 */ 766 #define MDI_PI_ERRSTAT(pip, x) { \ 767 if (MDI_PI((pip))->pi_kstats != NULL) { \ 768 struct pi_errs *pep; \ 769 pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \ 770 pep->x.value.ui32++; \ 771 } \ 772 } 773 774 /* 775 * error codes which can be passed to MDI_PI_ERRSTAT 776 */ 777 #define MDI_PI_SOFTERR pi_softerrs 778 #define MDI_PI_HARDERR pi_harderrs 779 #define MDI_PI_TRANSERR pi_transerrs 780 #define MDI_PI_ICNTBUSY pi_icnt_busy 781 #define MDI_PI_ICNTERR pi_icnt_errors 782 #define MDI_PI_PHCIRSRC pi_phci_rsrc 783 #define MDI_PI_PHCILOCL pi_phci_localerr 784 #define MDI_PI_PHCIINVS pi_phci_invstate 785 #define MDI_PI_FAILFROM pi_failedfrom 786 #define MDI_PI_FAILTO pi_failedto 787 788 #define MDI_PI(type) ((struct mdi_pathinfo *)(type)) 789 790 #define MDI_PI_LOCK(pip) mutex_enter(&MDI_PI(pip)->pi_mutex) 791 #define MDI_PI_TRYLOCK(pip) mutex_tryenter(&MDI_PI(pip)->pi_mutex) 792 #define MDI_PI_UNLOCK(pip) mutex_exit(&MDI_PI(pip)->pi_mutex) 793 #ifdef DEBUG 794 #define MDI_PI_LOCKED(pip) MUTEX_HELD(&MDI_PI(pip)->pi_mutex) 795 #endif /* DEBUG */ 796 797 #define MDI_PI_HOLD(pip) (++MDI_PI(pip)->pi_ref_cnt) 798 #define MDI_PI_RELE(pip) (--MDI_PI(pip)->pi_ref_cnt) 799 800 #define MDI_EXT_STATE_CHANGE 0x10000000 801 802 803 #define MDI_DISABLE_OP 0x1 804 #define MDI_ENABLE_OP 0x2 805 #define MDI_BEFORE_STATE_CHANGE 0x4 806 #define MDI_AFTER_STATE_CHANGE 0x8 807 #define MDI_SYNC_FLAG 0x10 808 809 #define MDI_PI_STATE(pip) \ 810 (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK) 811 #define MDI_PI_OLD_STATE(pip) \ 812 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK) 813 814 #define MDI_PI_EXT_STATE(pip) \ 815 (MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK) 816 #define MDI_PI_OLD_EXT_STATE(pip) \ 817 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK) 818 819 #define MDI_PI_SET_TRANSIENT(pip) {\ 820 ASSERT(MDI_PI_LOCKED(pip)); \ 821 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT; } 822 #define MDI_PI_CLEAR_TRANSIENT(pip) {\ 823 ASSERT(MDI_PI_LOCKED(pip)); \ 824 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT; } 825 #define MDI_PI_IS_TRANSIENT(pip) \ 826 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT) 827 828 #define MDI_PI_SET_USER_DISABLE(pip) {\ 829 ASSERT(MDI_PI_LOCKED(pip)); \ 830 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE; } 831 #define MDI_PI_SET_DRV_DISABLE(pip) {\ 832 ASSERT(MDI_PI_LOCKED(pip)); \ 833 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE; } 834 #define MDI_PI_SET_DRV_DISABLE_TRANS(pip) {\ 835 ASSERT(MDI_PI_LOCKED(pip)); \ 836 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 837 838 #define MDI_PI_SET_USER_ENABLE(pip) {\ 839 ASSERT(MDI_PI_LOCKED(pip)); \ 840 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE; } 841 #define MDI_PI_SET_DRV_ENABLE(pip) {\ 842 ASSERT(MDI_PI_LOCKED(pip)); \ 843 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE; } 844 #define MDI_PI_SET_DRV_ENABLE_TRANS(pip) {\ 845 ASSERT(MDI_PI_LOCKED(pip)); \ 846 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 847 848 #define MDI_PI_IS_USER_DISABLE(pip) \ 849 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE) 850 #define MDI_PI_IS_DRV_DISABLE(pip) \ 851 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE) 852 #define MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) \ 853 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT) 854 855 #define MDI_PI_IS_DISABLE(pip) \ 856 (MDI_PI_IS_USER_DISABLE(pip) || \ 857 MDI_PI_IS_DRV_DISABLE(pip) || \ 858 MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)) 859 860 #define MDI_PI_IS_INIT(pip) \ 861 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 862 MDI_PATHINFO_STATE_INIT) 863 864 #define MDI_PI_IS_INITING(pip) \ 865 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 866 (MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT)) 867 868 #define MDI_PI_SET_INIT(pip) {\ 869 ASSERT(MDI_PI_LOCKED(pip)); \ 870 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT; } 871 872 #define MDI_PI_SET_ONLINING(pip) {\ 873 uint32_t ext_state; \ 874 ASSERT(MDI_PI_LOCKED(pip)); \ 875 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 876 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 877 MDI_PI(pip)->pi_state = \ 878 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 879 MDI_PI(pip)->pi_state |= ext_state; } 880 881 #define MDI_PI_IS_ONLINING(pip) \ 882 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 883 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT)) 884 885 #define MDI_PI_SET_ONLINE(pip) {\ 886 uint32_t ext_state; \ 887 ASSERT(MDI_PI_LOCKED(pip)); \ 888 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 889 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE; \ 890 MDI_PI(pip)->pi_state |= ext_state; } 891 892 #define MDI_PI_IS_ONLINE(pip) \ 893 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 894 MDI_PATHINFO_STATE_ONLINE) 895 896 #define MDI_PI_SET_OFFLINING(pip) {\ 897 uint32_t ext_state; \ 898 ASSERT(MDI_PI_LOCKED(pip)); \ 899 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 900 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 901 MDI_PI(pip)->pi_state = \ 902 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 903 MDI_PI(pip)->pi_state |= ext_state; } 904 905 #define MDI_PI_IS_OFFLINING(pip) \ 906 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 907 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT)) 908 909 #define MDI_PI_SET_OFFLINE(pip) {\ 910 uint32_t ext_state; \ 911 ASSERT(MDI_PI_LOCKED(pip)); \ 912 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 913 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE; \ 914 MDI_PI(pip)->pi_state |= ext_state; } 915 916 #define MDI_PI_IS_OFFLINE(pip) \ 917 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 918 MDI_PATHINFO_STATE_OFFLINE) 919 920 #define MDI_PI_SET_STANDBYING(pip) {\ 921 uint32_t ext_state; \ 922 ASSERT(MDI_PI_LOCKED(pip)); \ 923 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 924 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 925 MDI_PI(pip)->pi_state = \ 926 (MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \ 927 MDI_PI(pip)->pi_state |= ext_state; } 928 929 #define MDI_PI_SET_STANDBY(pip) {\ 930 uint32_t ext_state; \ 931 ASSERT(MDI_PI_LOCKED(pip)); \ 932 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 933 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY; \ 934 MDI_PI(pip)->pi_state |= ext_state; } 935 936 #define MDI_PI_IS_STANDBY(pip) \ 937 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 938 MDI_PATHINFO_STATE_STANDBY) 939 940 #define MDI_PI_SET_FAULTING(pip) {\ 941 uint32_t ext_state; \ 942 ASSERT(MDI_PI_LOCKED(pip)); \ 943 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 944 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 945 MDI_PI(pip)->pi_state = \ 946 (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \ 947 MDI_PI(pip)->pi_state |= ext_state; } 948 949 #define MDI_PI_SET_FAULT(pip) {\ 950 uint32_t ext_state; \ 951 ASSERT(MDI_PI_LOCKED(pip)); \ 952 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 953 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT; \ 954 MDI_PI(pip)->pi_state |= ext_state; } 955 956 #define MDI_PI_IS_FAULT(pip) \ 957 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 958 MDI_PATHINFO_STATE_FAULT) 959 960 #define MDI_PI_IS_SUSPENDED(pip) \ 961 ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 962 963 #define MDI_PI_FLAGS_SET_HIDDEN(pip) {\ 964 ASSERT(MDI_PI_LOCKED(pip)); \ 965 MDI_PI(pip)->pi_flags |= MDI_PATHINFO_FLAGS_HIDDEN; } 966 #define MDI_PI_FLAGS_CLR_HIDDEN(pip) {\ 967 ASSERT(MDI_PI_LOCKED(pip)); \ 968 MDI_PI(pip)->pi_flags &= ~MDI_PATHINFO_FLAGS_HIDDEN; } 969 #define MDI_PI_FLAGS_IS_HIDDEN(pip) \ 970 ((MDI_PI(pip)->pi_flags & MDI_PATHINFO_FLAGS_HIDDEN) == \ 971 MDI_PATHINFO_FLAGS_HIDDEN) 972 973 #define MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip) {\ 974 ASSERT(MDI_PI_LOCKED(pip)); \ 975 MDI_PI(pip)->pi_flags |= MDI_PATHINFO_FLAGS_DEVICE_REMOVED; } 976 #define MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip) {\ 977 ASSERT(MDI_PI_LOCKED(pip)); \ 978 MDI_PI(pip)->pi_flags &= ~MDI_PATHINFO_FLAGS_DEVICE_REMOVED; } 979 #define MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip) \ 980 ((MDI_PI(pip)->pi_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED) == \ 981 MDI_PATHINFO_FLAGS_DEVICE_REMOVED) 982 983 /* 984 * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures 985 * hold the vhci to phci client mappings of the on-disk vhci busconfig cache. 986 */ 987 988 /* phci structure of vhci cache */ 989 typedef struct mdi_vhcache_phci { 990 char *cphci_path; /* phci path name */ 991 uint32_t cphci_id; /* used when building nvlist */ 992 mdi_phci_t *cphci_phci; /* pointer to actual phci */ 993 struct mdi_vhcache_phci *cphci_next; /* next in vhci phci list */ 994 } mdi_vhcache_phci_t; 995 996 /* pathinfo structure of vhci cache */ 997 typedef struct mdi_vhcache_pathinfo { 998 char *cpi_addr; /* path address */ 999 mdi_vhcache_phci_t *cpi_cphci; /* phci the path belongs to */ 1000 struct mdi_pathinfo *cpi_pip; /* ptr to actual pathinfo */ 1001 uint32_t cpi_flags; /* see below */ 1002 struct mdi_vhcache_pathinfo *cpi_next; /* next path for the client */ 1003 } mdi_vhcache_pathinfo_t; 1004 1005 /* 1006 * cpi_flags 1007 * 1008 * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has 1009 * failed. 1010 */ 1011 #define MDI_CPI_HINT_PATH_DOES_NOT_EXIST 0x0001 1012 1013 /* client structure of vhci cache */ 1014 typedef struct mdi_vhcache_client { 1015 char *cct_name_addr; /* client address */ 1016 mdi_vhcache_pathinfo_t *cct_cpi_head; /* client's path list head */ 1017 mdi_vhcache_pathinfo_t *cct_cpi_tail; /* client's path list tail */ 1018 struct mdi_vhcache_client *cct_next; /* next in vhci client list */ 1019 } mdi_vhcache_client_t; 1020 1021 /* vhci cache structure - one for vhci instance */ 1022 typedef struct mdi_vhci_cache { 1023 mdi_vhcache_phci_t *vhcache_phci_head; /* phci list head */ 1024 mdi_vhcache_phci_t *vhcache_phci_tail; /* phci list tail */ 1025 mdi_vhcache_client_t *vhcache_client_head; /* client list head */ 1026 mdi_vhcache_client_t *vhcache_client_tail; /* client list tail */ 1027 mod_hash_t *vhcache_client_hash; /* client hash */ 1028 int vhcache_flags; /* see below */ 1029 int64_t vhcache_clean_time; /* last clean time */ 1030 krwlock_t vhcache_lock; /* cache lock */ 1031 } mdi_vhci_cache_t; 1032 1033 /* vhcache_flags */ 1034 #define MDI_VHCI_CACHE_SETUP_DONE 0x0001 /* cache setup completed */ 1035 1036 /* vhci bus config structure - one for vhci instance */ 1037 typedef struct mdi_vhci_config { 1038 char *vhc_vhcache_filename; /* on-disk file name */ 1039 mdi_vhci_cache_t vhc_vhcache; /* vhci cache */ 1040 kmutex_t vhc_lock; /* vhci config lock */ 1041 kcondvar_t vhc_cv; 1042 int vhc_flags; /* see below */ 1043 1044 /* flush vhci cache when lbolt reaches vhc_flush_at_ticks */ 1045 clock_t vhc_flush_at_ticks; 1046 1047 /* 1048 * Head and tail of the client list whose paths are being configured 1049 * asynchronously. vhc_acc_count is the number of clients on this list. 1050 * vhc_acc_thrcount is the number threads running to configure 1051 * the paths for these clients. 1052 */ 1053 struct mdi_async_client_config *vhc_acc_list_head; 1054 struct mdi_async_client_config *vhc_acc_list_tail; 1055 int vhc_acc_count; 1056 int vhc_acc_thrcount; 1057 1058 /* callback id - for flushing the cache during system shutdown */ 1059 callb_id_t vhc_cbid; 1060 1061 /* 1062 * vhc_path_discovery_boot - number of times path discovery will be 1063 * attempted during early boot. 1064 * vhc_path_discovery_postboot number of times path discovery will be 1065 * attempted during late boot. 1066 * vhc_path_discovery_cutoff_time - time at which paths were last 1067 * discovered + some timeout 1068 */ 1069 int vhc_path_discovery_boot; 1070 int vhc_path_discovery_postboot; 1071 int64_t vhc_path_discovery_cutoff_time; 1072 } mdi_vhci_config_t; 1073 1074 /* vhc_flags */ 1075 #define MDI_VHC_SINGLE_THREADED 0x0001 /* config single threaded */ 1076 #define MDI_VHC_EXIT 0x0002 /* exit all config activity */ 1077 #define MDI_VHC_VHCACHE_DIRTY 0x0004 /* cache dirty */ 1078 #define MDI_VHC_VHCACHE_FLUSH_THREAD 0x0008 /* cache flush thead running */ 1079 #define MDI_VHC_VHCACHE_FLUSH_ERROR 0x0010 /* failed to flush cache */ 1080 #define MDI_VHC_READONLY_FS 0x0020 /* filesys is readonly */ 1081 1082 typedef struct mdi_phys_path { 1083 char *phys_path; 1084 struct mdi_phys_path *phys_path_next; 1085 } mdi_phys_path_t; 1086 1087 /* 1088 * Lookup tokens are used to cache the result of the vhci cache client lookup 1089 * operations (to reduce the number of real lookup operations). 1090 */ 1091 typedef struct mdi_vhcache_lookup_token { 1092 mdi_vhcache_client_t *lt_cct; /* vhcache client */ 1093 int64_t lt_cct_lookup_time; /* last lookup time */ 1094 } mdi_vhcache_lookup_token_t; 1095 1096 /* asynchronous configuration of client paths */ 1097 typedef struct mdi_async_client_config { 1098 char *acc_ct_name; /* client name */ 1099 char *acc_ct_addr; /* client address */ 1100 mdi_phys_path_t *acc_phclient_path_list_head; /* path head */ 1101 mdi_vhcache_lookup_token_t acc_token; /* lookup token */ 1102 struct mdi_async_client_config *acc_next; /* next in vhci acc list */ 1103 } mdi_async_client_config_t; 1104 1105 /* 1106 * vHCI driver instance registration/unregistration 1107 * 1108 * mdi_vhci_register() is called by a vHCI driver to register itself as the 1109 * manager of devices from a particular 'class'. This should be called from 1110 * attach(9e). 1111 * 1112 * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI 1113 * instance from the framework. 1114 */ 1115 int mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int); 1116 int mdi_vhci_unregister(dev_info_t *, int); 1117 1118 /* 1119 * Utility functions 1120 */ 1121 int mdi_phci_get_path_count(dev_info_t *); 1122 dev_info_t *mdi_phci_path2devinfo(dev_info_t *, caddr_t); 1123 1124 1125 /* 1126 * Path Selection Functions: 1127 * 1128 * mdi_select_path() is called by a vHCI driver to select to which path an 1129 * I/O request should be routed. The caller passes the 'buf' structure as 1130 * one of the parameters. The mpxio framework uses the buf's contents to 1131 * maintain per path statistics (total I/O size / count pending). If more 1132 * than one online path is available, the framework automatically selects 1133 * a suitable one. If a failover operation is active for this client device 1134 * the call fails, returning MDI_BUSY. 1135 * 1136 * By default this function returns a suitable path in the 'online' state, 1137 * based on the current load balancing policy. Currently we support 1138 * LOAD_BALANCE_NONE (Previously selected online path will continue to be 1139 * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths 1140 * will be selected in a round robin fashion). The load balancing scheme 1141 * can be configured in the vHCI driver's configuration file (driver.conf). 1142 * 1143 * vHCI drivers may override this default behavior by specifying appropriate 1144 * flags. If start_pip is specified (non NULL), it is used as the routine's 1145 * starting point; it starts walking from there to find the next appropriate 1146 * path. 1147 * 1148 * The following values for 'flags' are currently defined, the third argument 1149 * to mdi_select_path depends on the flags used. 1150 * 1151 * <none>: default, arg is pip 1152 * MDI_SELECT_ONLINE_PATH: select an ONLINE path preferred-first, 1153 * arg is pip 1154 * MDI_SELECT_STANDBY_PATH: select a STANDBY path, arg is pip 1155 * MDI_SELECT_USER_DISABLE_PATH: select user disable for failover and 1156 * auto_failback 1157 * MDI_SELECT_PATH_INSTANCE: select a specific path, arg is 1158 * path instance 1159 * MDI_SELECT_NO_PREFERRED: select path without preferred-first 1160 * 1161 * The selected paths are returned in an mdi_hold_path() state (pi_ref_cnt), 1162 * caller should release the hold by calling mdi_rele_path() at the end of 1163 * operation. 1164 */ 1165 int mdi_select_path(dev_info_t *, struct buf *, int, 1166 void *, mdi_pathinfo_t **); 1167 int mdi_set_lb_policy(dev_info_t *, client_lb_t); 1168 int mdi_set_lb_region_size(dev_info_t *, int); 1169 client_lb_t mdi_get_lb_policy(dev_info_t *); 1170 1171 /* 1172 * flags for mdi_select_path() routine 1173 */ 1174 #define MDI_SELECT_ONLINE_PATH 0x0001 1175 #define MDI_SELECT_STANDBY_PATH 0x0002 1176 #define MDI_SELECT_USER_DISABLE_PATH 0x0004 1177 #define MDI_SELECT_PATH_INSTANCE 0x0008 1178 #define MDI_SELECT_NO_PREFERRED 0x0010 1179 1180 /* 1181 * MDI client device utility functions 1182 */ 1183 int mdi_client_get_path_count(dev_info_t *); 1184 dev_info_t *mdi_client_path2devinfo(dev_info_t *, caddr_t); 1185 1186 /* 1187 * Failover: 1188 * 1189 * The vHCI driver calls mdi_failover() to initiate a failover operation. 1190 * mdi_failover() calls back into the vHCI driver's vo_failover() 1191 * entry point to perform the actual failover operation. The reason 1192 * for requiring the vHCI driver to initiate failover by calling 1193 * mdi_failover(), instead of directly executing vo_failover() itself, 1194 * is to ensure that the mdi framework can keep track of the client 1195 * state properly. Additionally, mdi_failover() provides as a 1196 * convenience the option of performing the failover operation 1197 * synchronously or asynchronously 1198 * 1199 * Upon successful completion of the failover operation, the paths that were 1200 * previously ONLINE will be in the STANDBY state, and the newly activated 1201 * paths will be in the ONLINE state. 1202 * 1203 * The flags modifier determines whether the activation is done synchronously 1204 */ 1205 int mdi_failover(dev_info_t *, dev_info_t *, int); 1206 1207 /* 1208 * Client device failover mode of operation 1209 */ 1210 #define MDI_FAILOVER_SYNC 1 /* Synchronous Failover */ 1211 #define MDI_FAILOVER_ASYNC 2 /* Asynchronous Failover */ 1212 1213 /* 1214 * mdi_is_dev_supported: The pHCI driver bus_config implementation calls 1215 * mdi_is_dev_supported to determine if a child device should is supported as 1216 * a vHCI child (i.e. as a client). The method used to specify the child 1217 * device, via the cinfo argument, is by agreement between the pHCI and the 1218 * vHCI. In the case of SCSA and scsi_vhci cinfo is a pointer to the pHCI 1219 * probe dev_info node, which is decorated with the device idenity information 1220 * necessary to determine scsi_vhci support. 1221 */ 1222 int mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo); 1223 1224 /* 1225 * mdi_pathinfo node kstat functions. 1226 */ 1227 int mdi_pi_kstat_exists(mdi_pathinfo_t *); 1228 int mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ks_name); 1229 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *); 1230 1231 /* 1232 * mdi_pathinfo node extended state change functions. 1233 */ 1234 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *); 1235 int mdi_pi_get_preferred(mdi_pathinfo_t *); 1236 1237 /* 1238 * mdi_pathinfo node member functions 1239 */ 1240 void *mdi_pi_get_client_private(mdi_pathinfo_t *); 1241 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *); 1242 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t); 1243 void mdi_pi_set_preferred(mdi_pathinfo_t *, int); 1244 1245 /* get/set vhci private data */ 1246 void *mdi_client_get_vhci_private(dev_info_t *); 1247 void mdi_client_set_vhci_private(dev_info_t *, void *); 1248 void *mdi_phci_get_vhci_private(dev_info_t *); 1249 void mdi_phci_set_vhci_private(dev_info_t *, void *); 1250 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *); 1251 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *); 1252 int mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp); 1253 1254 /* 1255 * mdi_pathinfo Property utilities 1256 */ 1257 int mdi_prop_size(mdi_pathinfo_t *, size_t *); 1258 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t); 1259 1260 /* obsolete interface, to be removed */ 1261 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **); 1262 int mdi_get_component_type(dev_info_t *); 1263 1264 #endif /* _KERNEL */ 1265 1266 #ifdef __cplusplus 1267 } 1268 #endif 1269 1270 #endif /* _SYS_MDI_IMPLDEFS_H */ 1271