1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_MDI_IMPLDEFS_H 27 #define _SYS_MDI_IMPLDEFS_H 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/note.h> 32 #include <sys/types.h> 33 #include <sys/sunmdi.h> 34 #include <sys/modhash.h> 35 #include <sys/callb.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #ifdef _KERNEL 42 43 /* 44 * Multipath Driver Interfaces 45 * 46 * The multipathing framework is provided in two modules. The 'mpxio' misc. 47 * module provides the core multipath framework and the 'scsi_vhci' nexus 48 * driver provides the SCSI-III command set driver functionality for 49 * managing Fibre-Channel storage devices. 50 * 51 * As in any multipathing solution there are three major problems to solve: 52 * 53 * 1) Identification and enumeration of multipath client devices. 54 * 2) Optimal path selection when routing I/O requests. 55 * 3) Observability interfaces to snapshot the multipath configuration, 56 * and infrastructure to provide performance and error statistics. 57 * 58 * The mpxio framework consists of several major components: 59 * 60 * 1) The MDI is the Multiplexed Device Interface; this is the core glue which 61 * holds the following components together. 62 * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing 63 * services for a given bus technology (example: 'scsi_vhci' provides 64 * multipathing support for SCSI-III fibre-channel devices). 65 * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport 66 * services for a given host controller (example: 'fcp' provides transport 67 * for fibre-channel devices). 68 * 4) Client Devices are standard Solaris target (or leaf) drivers 69 * (example: 'ssd' is the standard disk driver for fibre-channel arrays). 70 * 5) Multipath information nodes ('pathinfo' nodes) connect client device 71 * nodes and pHCI device nodes in the device tree. 72 * 73 * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might 74 * look like this: 75 * 76 * /\ 77 * / ............ 78 * <vHCI>:/ \ 79 * +-----------+ +-----------+ 80 * | scsi_vhci | | pci@1f,0 | 81 * +-----------+ +-----------+ 82 * / \ \ 83 * <Client>: / \ :<Client> \ :parent(pHCI) 84 * +----------+ +-----------+ +-------------+ 85 * | ssd 1 | | ssd 2 | | qlc@0,0 | 86 * +----------+ +-----------+ +-------------+ 87 * | | / \ 88 * | | <pHCI>: / \ :<pHCI> 89 * | | +-------------+ +-------------+ 90 * | | | pHCI 1 (fp) | | pHCI 2 (fp) | 91 * | | +-------------+ +-------------+ 92 * | | / | / | 93 * | | +------+ | +------+ | 94 * | | | ssd 3| | | ssd | | 95 * | | |!mpxio| | | (OBP)| | 96 * | | +------+ | +------+ | 97 * | | | | 98 * | | <pathinfo>: | | 99 * | | +-------+ +--------+ 100 * | +-------------->| path |-------->| path | 101 * | | info | | info | 102 * | | node 1| | node 3 | 103 * | +-------+ +--------+ 104 * | | | 105 * | | +~~~~~~~~+ 106 * | +-------+ :+--------+ 107 * +--------------------------->| path |-------->| path | 108 * | info | :| info | 109 * | node 2| +| node 4 | 110 * +-------+ +--------+ 111 * 112 * The multipath information nodes (mdi_pathinfo nodes) establish the 113 * relationship between the pseudo client driver instance nodes (children 114 * of the vHCI) and the physical host controller interconnect (pHCI 115 * drivers) forming a matrix structure. 116 * 117 * The mpxio module implements locking at multiple granularity levels to 118 * support the needs of various consumers. The multipath matrix can be 119 * column locked, or row locked depending on the consumer. The intention 120 * is to balance simplicity and performance. 121 * 122 * Locking: 123 * 124 * The devinfo locking still applies: 125 * 126 * 1) An ndi_devi_enter of a parent protects linkage/state of children. 127 * 2) state >= DS_INITIALIZED adds devi_ref of parent 128 * 3) devi_ref at state >= DS_ATTACHED prevents detach(9E). 129 * 130 * The ordering of 1) is (vHCI, pHCI). For a DEBUG kernel this ordering 131 * is asserted by the ndi_devi_enter() implementation. There is also an 132 * ndi_devi_enter(Client), which is atypical since the client is a leaf. 133 * This is done to synchronize pathinfo nodes during devinfo snapshot (see 134 * di_register_pip) by pretending that the pathinfo nodes are children 135 * of the client. 136 * 137 * In addition to devinfo locking the current implementation utilizes 138 * the following locks: 139 * 140 * mdi_mutex: protects the global list of vHCIs. 141 * 142 * vh_phci_mutex: per-vHCI (mutex) lock: protects list of pHCIs registered 143 * with vHCI. 144 * 145 * vh_client_mutex: per-vHCI (mutex) lock: protects list/hash of Clients 146 * associated with vHCI. 147 * 148 * ph_mutex: per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo 149 * node list) and per-pHCI structure fields. mdi_pathinfo node creation, 150 * deletion and child mdi_pathinfo node state changes are serialized on per 151 * pHCI basis (Protection against DR). 152 * 153 * ct_mutex: per-client (mutex) lock: protects the row (client-mdi_pathinfo 154 * node list) and per-client structure fields. The client-mdi_pathinfo node 155 * list is typically walked to select an optimal path when routing I/O 156 * requests. 157 * 158 * pi_mutex: per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node 159 * structure fields. 160 * 161 * Note that per-Client structure and per-pHCI fields are freely readable when 162 * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo 163 * node guarantees that its corresponding client and pHCI devices will not be 164 * freed. 165 */ 166 167 /* 168 * MDI Client global unique identifier property name string definition 169 */ 170 extern const char *mdi_client_guid_prop; 171 #define MDI_CLIENT_GUID_PROP (char *)mdi_client_guid_prop 172 173 /* 174 * MDI Client load balancing policy definitions 175 * 176 * Load balancing policies are determined on a per-vHCI basis and are 177 * configurable via the vHCI's driver.conf file. 178 */ 179 typedef enum { 180 LOAD_BALANCE_NONE, /* Alternate pathing */ 181 LOAD_BALANCE_RR, /* Round Robin */ 182 LOAD_BALANCE_LBA /* Logical Block Addressing */ 183 } client_lb_t; 184 185 typedef struct { 186 int region_size; 187 }client_lb_args_t; 188 189 /* 190 * MDI client load balancing property name/value string definitions 191 */ 192 extern const char *mdi_load_balance; 193 extern const char *mdi_load_balance_none; 194 extern const char *mdi_load_balance_ap; 195 extern const char *mdi_load_balance_rr; 196 extern const char *mdi_load_balance_lba; 197 198 #define LOAD_BALANCE_PROP (char *)mdi_load_balance 199 #define LOAD_BALANCE_PROP_NONE (char *)mdi_load_balance_none 200 #define LOAD_BALANCE_PROP_AP (char *)mdi_load_balance_ap 201 #define LOAD_BALANCE_PROP_RR (char *)mdi_load_balance_rr 202 #define LOAD_BALANCE_PROP_LBA (char *)mdi_load_balance_lba 203 204 /* default for region size */ 205 #define LOAD_BALANCE_DEFAULT_REGION_SIZE 18 206 207 /* 208 * vHCI drivers: 209 * 210 * vHCI drivers are pseudo nexus drivers which implement multipath services 211 * for a specific command set or bus architecture ('class'). There is a 212 * single instance of the vHCI driver for each command set which supports 213 * multipath devices. 214 * 215 * Each vHCI driver registers the following callbacks from attach(9e). 216 */ 217 #define MDI_VHCI_OPS_REV_1 1 218 /* 219 * Change MDI_VHCI_OPS_REV_NAME as per MDI_VHCI_OPS_REV 220 */ 221 #define MDI_VHCI_OPS_REV MDI_VHCI_OPS_REV_1 222 #define MDI_VHCI_OPS_REV_NAME "1" 223 224 typedef struct mdi_vhci_ops { 225 /* revision management */ 226 int vo_revision; 227 228 /* mdi_pathinfo node init callback */ 229 int (*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags); 230 231 /* mdi_pathinfo node uninit callback */ 232 int (*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip, 233 int flags); 234 235 /* mdi_pathinfo node state change callback */ 236 int (*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip, 237 mdi_pathinfo_state_t state, uint32_t, int flags); 238 239 /* Client path failover callback */ 240 int (*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags); 241 } mdi_vhci_ops_t; 242 243 /* 244 * An mdi_vhci structure is created and bound to the devinfo node of every 245 * registered vHCI class driver; this happens when a vHCI registers itself from 246 * attach(9e). This structure is unbound and freed when the vHCI unregisters 247 * at detach(9e) time; 248 * 249 * Each vHCI driver is associated with a vHCI class name; this is the handle 250 * used to register and unregister pHCI drivers for a given transport. 251 * 252 * Locking: Different parts of this structure are guarded by different 253 * locks: global threading of multiple vHCIs and initialization is protected 254 * by mdi_mutex, the list of pHCIs associated with a vHCI is protected by 255 * vh_phci_mutex, and Clients are protected by vh_client_mutex. 256 * 257 * XXX Depending on the context, some of the fields can be freely read without 258 * holding any locks (ex. holding vh_client_mutex lock also guarantees that 259 * the vHCI (parent) cannot be unexpectedly freed). 260 */ 261 typedef struct mdi_vhci { 262 /* protected by mdi_mutex... */ 263 struct mdi_vhci *vh_next; /* next vHCI link */ 264 struct mdi_vhci *vh_prev; /* prev vHCI link */ 265 char *vh_class; /* vHCI class name */ 266 dev_info_t *vh_dip; /* vHCI devi handle */ 267 int vh_refcnt; /* vHCI reference count */ 268 struct mdi_vhci_config *vh_config; /* vHCI config */ 269 client_lb_t vh_lb; /* vHCI load-balancing */ 270 struct mdi_vhci_ops *vh_ops; /* vHCI callback vectors */ 271 272 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 273 kmutex_t vh_phci_mutex; /* pHCI mutex */ 274 int vh_phci_count; /* pHCI device count */ 275 struct mdi_phci *vh_phci_head; /* pHCI list head */ 276 struct mdi_phci *vh_phci_tail; /* pHCI list tail */ 277 278 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 279 kmutex_t vh_client_mutex; /* Client mutex */ 280 int vh_client_count; /* Client count */ 281 struct client_hash *vh_client_table; /* Client hash */ 282 } mdi_vhci_t; 283 284 /* 285 * per-vHCI lock macros 286 */ 287 #define MDI_VHCI_PHCI_LOCK(vh) mutex_enter(&(vh)->vh_phci_mutex) 288 #define MDI_VHCI_PHCI_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_phci_mutex) 289 #define MDI_VHCI_PHCI_UNLOCK(vh) mutex_exit(&(vh)->vh_phci_mutex) 290 #ifdef DEBUG 291 #define MDI_VHCI_PCHI_LOCKED(vh) MUTEX_HELD(&(vh)->vh_phci_mutex) 292 #endif /* DEBUG */ 293 #define MDI_VHCI_CLIENT_LOCK(vh) mutex_enter(&(vh)->vh_client_mutex) 294 #define MDI_VHCI_CLIENT_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_client_mutex) 295 #define MDI_VHCI_CLIENT_UNLOCK(vh) mutex_exit(&(vh)->vh_client_mutex) 296 #ifdef DEBUG 297 #define MDI_VHCI_CLIENT_LOCKED(vh) MUTEX_HELD(&(vh)->vh_client_mutex) 298 #endif /* DEBUG */ 299 300 301 /* 302 * GUID Hash definitions 303 * 304 * Since all the mpxio managed devices for a given class are enumerated under 305 * the single vHCI instance for that class, sequentially walking through the 306 * client device link to find a client would be prohibitively slow. 307 */ 308 309 #define CLIENT_HASH_TABLE_SIZE (32) /* GUID hash */ 310 311 /* 312 * Client hash table structure 313 */ 314 struct client_hash { 315 struct mdi_client *ct_hash_head; /* Client hash head */ 316 int ct_hash_count; /* Client hash count */ 317 }; 318 319 320 /* 321 * pHCI Drivers: 322 * 323 * Physical HBA drivers provide transport services for mpxio-managed devices. 324 * As each pHCI instance is attached, it must register itself with the mpxio 325 * framework using mdi_phci_register(). When the pHCI is detached it must 326 * similarly call mdi_phci_unregister(). 327 * 328 * The framework maintains a list of registered pHCI device instances for each 329 * vHCI. This list involves (vh_phci_count, vh_phci_head, vh_phci_tail) and 330 * (ph_next, ph_prev, ph_vhci) and is protected by vh_phci_mutex. 331 * 332 * Locking order: 333 * 334 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) XXX 335 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock)) XXX 336 */ 337 typedef struct mdi_phci { 338 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 339 struct mdi_phci *ph_next; /* next pHCI link */ 340 struct mdi_phci *ph_prev; /* prev pHCI link */ 341 dev_info_t *ph_dip; /* pHCI devi handle */ 342 struct mdi_vhci *ph_vhci; /* pHCI back ref. to vHCI */ 343 344 /* protected by MDI_PHCI_LOCK ph_mutex... */ 345 kmutex_t ph_mutex; /* per-pHCI mutex */ 346 int ph_path_count; /* pi count */ 347 mdi_pathinfo_t *ph_path_head; /* pi list head */ 348 mdi_pathinfo_t *ph_path_tail; /* pi list tail */ 349 int ph_flags; /* pHCI operation flags */ 350 int ph_unstable; /* Paths in transient state */ 351 kcondvar_t ph_unstable_cv; /* Paths in transient state */ 352 353 /* protected by mdi_phci_[gs]et_vhci_private caller... */ 354 void *ph_vprivate; /* vHCI driver private */ 355 } mdi_phci_t; 356 357 /* 358 * A pHCI device is 'unstable' while one or more paths are in a transitional 359 * state. Hotplugging is prevented during this state. 360 */ 361 #define MDI_PHCI_UNSTABLE(ph) (ph)->ph_unstable++; 362 #define MDI_PHCI_STABLE(ph) { \ 363 (ph)->ph_unstable--; \ 364 if ((ph)->ph_unstable == 0) { \ 365 cv_broadcast(&(ph)->ph_unstable_cv); \ 366 } \ 367 } 368 369 /* 370 * per-pHCI lock macros 371 */ 372 #define MDI_PHCI_LOCK(ph) mutex_enter(&(ph)->ph_mutex) 373 #define MDI_PHCI_TRYLOCK(ph) mutex_tryenter(&(ph)->ph_mutex) 374 #define MDI_PHCI_UNLOCK(ph) mutex_exit(&(ph)->ph_mutex) 375 #ifdef DEBUG 376 #define MDI_PHCI_LOCKED(vh) MUTEX_HELD(&(ph)->ph_mutex) 377 #endif /* DEBUG */ 378 379 /* 380 * pHCI state definitions and macros to track the pHCI driver instance state 381 */ 382 #define MDI_PHCI_FLAGS_OFFLINE 0x1 /* pHCI is offline */ 383 #define MDI_PHCI_FLAGS_SUSPEND 0x2 /* pHCI is suspended */ 384 #define MDI_PHCI_FLAGS_POWER_DOWN 0x4 /* pHCI is power down */ 385 #define MDI_PHCI_FLAGS_DETACH 0x8 /* pHCI is detached */ 386 #define MDI_PHCI_FLAGS_USER_DISABLE 0x10 /* pHCI is disabled,user */ 387 #define MDI_PHCI_FLAGS_D_DISABLE 0x20 /* pHCI is disabled,driver */ 388 #define MDI_PHCI_FLAGS_D_DISABLE_TRANS 0x40 /* pHCI is disabled,transient */ 389 #define MDI_PHCI_FLAGS_POWER_TRANSITION 0x80 /* pHCI is power transition */ 390 391 #define MDI_PHCI_DISABLE_MASK \ 392 (MDI_PHCI_FLAGS_USER_DISABLE | MDI_PHCI_FLAGS_D_DISABLE | \ 393 MDI_PHCI_FLAGS_D_DISABLE_TRANS) 394 395 #define MDI_PHCI_IS_READY(ph) \ 396 (((ph)->ph_flags & MDI_PHCI_DISABLE_MASK) == 0) 397 398 #define MDI_PHCI_SET_OFFLINE(ph) {\ 399 ASSERT(MDI_PHCI_LOCKED(ph)); \ 400 (ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE; } 401 #define MDI_PHCI_SET_ONLINE(ph) {\ 402 ASSERT(MDI_PHCI_LOCKED(ph)); \ 403 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE; } 404 #define MDI_PHCI_IS_OFFLINE(ph) \ 405 ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE) 406 407 #define MDI_PHCI_SET_SUSPEND(ph) {\ 408 ASSERT(MDI_PHCI_LOCKED(ph)); \ 409 (ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND; } 410 #define MDI_PHCI_SET_RESUME(ph) {\ 411 ASSERT(MDI_PHCI_LOCKED(ph)); \ 412 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND; } 413 #define MDI_PHCI_IS_SUSPENDED(ph) \ 414 ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 415 416 #define MDI_PHCI_SET_DETACH(ph) {\ 417 ASSERT(MDI_PHCI_LOCKED(ph)); \ 418 (ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH; } 419 #define MDI_PHCI_SET_ATTACH(ph) {\ 420 ASSERT(MDI_PHCI_LOCKED(ph)); \ 421 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH; } 422 423 #define MDI_PHCI_SET_POWER_DOWN(ph) {\ 424 ASSERT(MDI_PHCI_LOCKED(ph)); \ 425 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN; } 426 #define MDI_PHCI_SET_POWER_UP(ph) {\ 427 ASSERT(MDI_PHCI_LOCKED(ph)); \ 428 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN; } 429 #define MDI_PHCI_IS_POWERED_DOWN(ph) \ 430 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN) 431 432 #define MDI_PHCI_SET_USER_ENABLE(ph) {\ 433 ASSERT(MDI_PHCI_LOCKED(ph)); \ 434 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE; } 435 #define MDI_PHCI_SET_USER_DISABLE(ph) {\ 436 ASSERT(MDI_PHCI_LOCKED(ph)); \ 437 (ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE; } 438 #define MDI_PHCI_IS_USER_DISABLED(ph) \ 439 ((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE) 440 441 #define MDI_PHCI_SET_DRV_ENABLE(ph) {\ 442 ASSERT(MDI_PHCI_LOCKED(ph)); \ 443 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE; } 444 #define MDI_PHCI_SET_DRV_DISABLE(ph) {\ 445 ASSERT(MDI_PHCI_LOCKED(ph)); \ 446 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE; } 447 #define MDI_PHCI_IS_DRV_DISABLED(ph) \ 448 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE) 449 450 #define MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph) {\ 451 ASSERT(MDI_PHCI_LOCKED(ph)); \ 452 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 453 #define MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph) {\ 454 ASSERT(MDI_PHCI_LOCKED(ph)); \ 455 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 456 #define MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph) \ 457 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS) 458 459 #define MDI_PHCI_SET_POWER_TRANSITION(ph) {\ 460 ASSERT(MDI_PHCI_LOCKED(ph)); \ 461 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION; } 462 #define MDI_PHCI_CLEAR_POWER_TRANSITION(ph) {\ 463 ASSERT(MDI_PHCI_LOCKED(ph)); \ 464 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION; } 465 #define MDI_PHCI_IS_POWER_TRANSITION(ph) \ 466 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION) 467 468 /* 469 * mpxio Managed Clients: 470 * 471 * This framework creates a struct mdi_client for every client device created 472 * by the framework as a result of self-enumeration of target devices by the 473 * registered pHCI devices. This structure is bound to client device dev_info 474 * node at the time of client device allocation (ndi_devi_alloc(9e)). This 475 * structure is unbound from the dev_info node when mpxio framework removes a 476 * client device node from the system. 477 * 478 * This structure is created when a first path is enumerated and removed when 479 * last path is de-enumerated from the system. 480 * 481 * Multipath client devices are instantiated as children of corresponding vHCI 482 * driver instance. Each client device is uniquely identified by a GUID 483 * provided by target device itself. The parent vHCI device also maintains a 484 * hashed list of client devices, protected by vh_client_mutex. 485 * 486 * Typically pHCI devices self-enumerate their child devices using taskq, 487 * resulting in multiple paths to the same client device to be enumerated by 488 * competing threads. 489 * 490 * Currently this framework supports two kinds of load-balancing policy 491 * configurable through the vHCI driver configuration files. 492 * 493 * NONE - Legacy AP mode 494 * Round Robin - Balance the pHCI load in a Round Robin fashion. 495 * 496 * This framework identifies the client device in three distinct states: 497 * 498 * OPTIMAL - Client device has atleast one redundant path. 499 * DEGRADED - No redundant paths (critical). Failure in the current active 500 * path would result in data access failures. 501 * FAILED - No paths are available to access this device. 502 * 503 * Locking order: 504 * 505 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) XXX 506 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock)) XXX 507 */ 508 typedef struct mdi_client { 509 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 510 struct mdi_client *ct_hnext; /* next client */ 511 struct mdi_client *ct_hprev; /* prev client */ 512 dev_info_t *ct_dip; /* client devi handle */ 513 struct mdi_vhci *ct_vhci; /* vHCI back ref */ 514 char *ct_drvname; /* client driver name */ 515 char *ct_guid; /* client guid */ 516 client_lb_t ct_lb; /* load balancing scheme */ 517 client_lb_args_t *ct_lb_args; /* load balancing args */ 518 519 520 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 521 kmutex_t ct_mutex; /* per-client mutex */ 522 int ct_path_count; /* multi path count */ 523 mdi_pathinfo_t *ct_path_head; /* multi path list head */ 524 mdi_pathinfo_t *ct_path_tail; /* multi path list tail */ 525 mdi_pathinfo_t *ct_path_last; /* last path used for i/o */ 526 int ct_state; /* state information */ 527 int ct_flags; /* Driver op. flags */ 528 int ct_failover_flags; /* Failover args */ 529 int ct_failover_status; /* last fo status */ 530 kcondvar_t ct_failover_cv; /* Failover status cv */ 531 int ct_unstable; /* Paths in transient state */ 532 kcondvar_t ct_unstable_cv; /* Paths in transient state */ 533 534 int ct_power_cnt; /* Hold count on parent power */ 535 kcondvar_t ct_powerchange_cv; 536 /* Paths in power transient state */ 537 short ct_powercnt_config; 538 /* held in pre/post config */ 539 short ct_powercnt_unconfig; 540 /* held in pre/post unconfig */ 541 int ct_powercnt_reset; 542 /* ct_power_cnt was resetted */ 543 544 void *ct_cprivate; /* client driver private */ 545 void *ct_vprivate; /* vHCI driver private */ 546 } mdi_client_t; 547 548 /* 549 * per-Client device locking definitions 550 */ 551 #define MDI_CLIENT_LOCK(ct) mutex_enter(&(ct)->ct_mutex) 552 #define MDI_CLIENT_TRYLOCK(ct) mutex_tryenter(&(ct)->ct_mutex) 553 #define MDI_CLIENT_UNLOCK(ct) mutex_exit(&(ct)->ct_mutex) 554 #ifdef DEBUG 555 #define MDI_CLIENT_LOCKED(ct) MUTEX_HELD(&(ct)->ct_mutex) 556 #endif /* DEBUG */ 557 558 /* 559 * A Client device is in unstable while one or more paths are in transitional 560 * state. We do not allow failover to take place while paths are in transient 561 * state. Similarly we do not allow state transition while client device 562 * failover is in progress. 563 */ 564 #define MDI_CLIENT_UNSTABLE(ct) (ct)->ct_unstable++; 565 #define MDI_CLIENT_STABLE(ct) { \ 566 (ct)->ct_unstable--; \ 567 if ((ct)->ct_unstable == 0) { \ 568 cv_broadcast(&(ct)->ct_unstable_cv); \ 569 } \ 570 } 571 572 /* 573 * Client driver instance state definitions: 574 */ 575 #define MDI_CLIENT_FLAGS_OFFLINE 0x00000001 576 #define MDI_CLIENT_FLAGS_SUSPEND 0x00000002 577 #define MDI_CLIENT_FLAGS_POWER_DOWN 0x00000004 578 #define MDI_CLIENT_FLAGS_DETACH 0x00000008 579 #define MDI_CLIENT_FLAGS_FAILOVER 0x00000010 580 #define MDI_CLIENT_FLAGS_REPORT_DEV 0x00000020 581 #define MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS 0x00000040 582 #define MDI_CLIENT_FLAGS_ASYNC_FREE 0x00000080 583 #define MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED 0x00000100 584 #define MDI_CLIENT_FLAGS_POWER_TRANSITION 0x00000200 585 586 #define MDI_CLIENT_SET_OFFLINE(ct) {\ 587 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 588 (ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE; } 589 #define MDI_CLIENT_SET_ONLINE(ct) {\ 590 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 591 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE; } 592 #define MDI_CLIENT_IS_OFFLINE(ct) \ 593 ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE) 594 595 #define MDI_CLIENT_SET_SUSPEND(ct) {\ 596 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 597 (ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND; } 598 #define MDI_CLIENT_SET_RESUME(ct) {\ 599 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 600 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND; } 601 #define MDI_CLIENT_IS_SUSPENDED(ct) \ 602 ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND) 603 604 #define MDI_CLIENT_SET_POWER_DOWN(ct) {\ 605 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 606 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN; } 607 #define MDI_CLIENT_SET_POWER_UP(ct) {\ 608 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 609 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN; } 610 #define MDI_CLIENT_IS_POWERED_DOWN(ct) \ 611 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN) 612 613 #define MDI_CLIENT_SET_POWER_TRANSITION(ct) {\ 614 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 615 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION; } 616 #define MDI_CLIENT_CLEAR_POWER_TRANSITION(ct) {\ 617 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 618 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION; } 619 #define MDI_CLIENT_IS_POWER_TRANSITION(ct) \ 620 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION) 621 622 #define MDI_CLIENT_SET_DETACH(ct) {\ 623 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 624 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH; } 625 #define MDI_CLIENT_SET_ATTACH(ct) {\ 626 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 627 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH; } 628 #define MDI_CLIENT_IS_DETACHED(ct) \ 629 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH) 630 631 #define MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) {\ 632 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 633 (ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER; } 634 #define MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) {\ 635 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 636 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER; } 637 #define MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \ 638 ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER) 639 640 #define MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) {\ 641 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 642 (ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV; } 643 #define MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) {\ 644 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 645 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV; } 646 #define MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \ 647 ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV) 648 649 #define MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct) {\ 650 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 651 (ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 652 #define MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct) {\ 653 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 654 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 655 #define MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \ 656 ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS) 657 658 #define MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct) {\ 659 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 660 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED; } 661 #define MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \ 662 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED) 663 664 /* 665 * Client operating states. 666 */ 667 #define MDI_CLIENT_STATE_OPTIMAL 1 668 #define MDI_CLIENT_STATE_DEGRADED 2 669 #define MDI_CLIENT_STATE_FAILED 3 670 671 #define MDI_CLIENT_STATE(ct) ((ct)->ct_state) 672 #define MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state) 673 674 #define MDI_CLIENT_IS_FAILED(ct) \ 675 ((ct)->ct_state == MDI_CLIENT_STATE_FAILED) 676 677 /* 678 * mdi_pathinfo nodes: 679 * 680 * From this framework's perspective, a 'path' is a tuple consisting of a 681 * client or end device, a host controller which provides device 682 * identification and transport services (pHCI), and bus specific unit 683 * addressing information. A path may be decorated with properties which 684 * describe the capabilities of the path; such properties are analogous to 685 * device node and minor node properties. 686 * 687 * The framework maintains link list of mdi_pathinfo nodes created by every 688 * pHCI driver instance via the pi_phci_link linkage; this is used (for example) 689 * to make sure that all relevant pathinfo nodes are freed before the pHCI 690 * is unregistered. 691 * 692 * Locking order: 693 * 694 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) XXX 695 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) XXX 696 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) XXX 697 * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex)) XXX 698 * 699 * mdi_pathinfo node structure definition 700 */ 701 struct mdi_pathinfo { 702 /* protected by MDI_PHCI_LOCK ph_mutex... */ 703 struct mdi_pathinfo *pi_phci_link; /* next path in phci list */ 704 mdi_phci_t *pi_phci; /* pHCI dev_info node */ 705 706 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 707 struct mdi_pathinfo *pi_client_link; /* next path in client list */ 708 mdi_client_t *pi_client; /* client */ 709 710 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 711 char *pi_addr; /* path unit address */ 712 713 /* protected by MDI_PI_LOCK pi_mutex... */ 714 kmutex_t pi_mutex; /* per path mutex */ 715 mdi_pathinfo_state_t pi_state; /* path state */ 716 mdi_pathinfo_state_t pi_old_state; /* path state */ 717 kcondvar_t pi_state_cv; /* path state condvar */ 718 nvlist_t *pi_prop; /* Properties */ 719 void *pi_cprivate; /* client private info */ 720 void *pi_pprivate; /* phci private info */ 721 int pi_ref_cnt; /* pi reference count */ 722 kcondvar_t pi_ref_cv; /* condition variable */ 723 struct mdi_pi_kstats *pi_kstats; /* aggregate kstats */ 724 int pi_pm_held; /* phci's kidsup incremented */ 725 int pi_preferred; /* Preferred path */ 726 void *pi_vprivate; /* vhci private info */ 727 }; 728 729 /* 730 * pathinfo statistics: 731 * 732 * The mpxio architecture allows for multiple pathinfo nodes for each 733 * client-pHCI combination. For statistics purposes, these statistics are 734 * aggregated into a single client-pHCI set of kstats. 735 */ 736 struct mdi_pi_kstats { 737 int pi_kstat_ref; /* # paths aggregated, also a ref cnt */ 738 kstat_t *pi_kstat_iostats; /* mdi:iopath statistic set */ 739 kstat_t *pi_kstat_errstats; /* error statistics */ 740 }; 741 742 /* 743 * pathinfo error kstat 744 */ 745 struct pi_errs { 746 struct kstat_named pi_softerrs; /* "Soft" Error */ 747 struct kstat_named pi_harderrs; /* "Hard" Error */ 748 struct kstat_named pi_transerrs; /* Transport Errors */ 749 struct kstat_named pi_icnt_busy; /* Interconnect Busy */ 750 struct kstat_named pi_icnt_errors; /* Interconnect Errors */ 751 struct kstat_named pi_phci_rsrc; /* pHCI No Resources */ 752 struct kstat_named pi_phci_localerr; /* pHCI Local Errors */ 753 struct kstat_named pi_phci_invstate; /* pHCI Invalid State */ 754 struct kstat_named pi_failedfrom; /* Failover: Failed From */ 755 struct kstat_named pi_failedto; /* Failover: Failed To */ 756 }; 757 758 /* 759 * increment an error counter 760 */ 761 #define MDI_PI_ERRSTAT(pip, x) { \ 762 if (MDI_PI((pip))->pi_kstats != NULL) { \ 763 struct pi_errs *pep; \ 764 pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \ 765 pep->x.value.ui32++; \ 766 } \ 767 } 768 769 /* 770 * error codes which can be passed to MDI_PI_ERRSTAT 771 */ 772 #define MDI_PI_SOFTERR pi_softerrs 773 #define MDI_PI_HARDERR pi_harderrs 774 #define MDI_PI_TRANSERR pi_transerrs 775 #define MDI_PI_ICNTBUSY pi_icnt_busy 776 #define MDI_PI_ICNTERR pi_icnt_errors 777 #define MDI_PI_PHCIRSRC pi_phci_rsrc 778 #define MDI_PI_PHCILOCL pi_phci_localerr 779 #define MDI_PI_PHCIINVS pi_phci_invstate 780 #define MDI_PI_FAILFROM pi_failedfrom 781 #define MDI_PI_FAILTO pi_failedto 782 783 #define MDI_PI(type) ((struct mdi_pathinfo *)(type)) 784 785 #define MDI_PI_LOCK(pip) mutex_enter(&MDI_PI(pip)->pi_mutex) 786 #define MDI_PI_TRYLOCK(pip) mutex_tryenter(&MDI_PI(pip)->pi_mutex) 787 #define MDI_PI_UNLOCK(pip) mutex_exit(&MDI_PI(pip)->pi_mutex) 788 #ifdef DEBUG 789 #define MDI_PI_LOCKED(pip) MUTEX_HELD(&MDI_PI(pip)->pi_mutex) 790 #endif /* DEBUG */ 791 792 #define MDI_PI_HOLD(pip) (++MDI_PI(pip)->pi_ref_cnt) 793 #define MDI_PI_RELE(pip) (--MDI_PI(pip)->pi_ref_cnt) 794 795 #define MDI_EXT_STATE_CHANGE 0x10000000 796 797 798 #define MDI_DISABLE_OP 0x1 799 #define MDI_ENABLE_OP 0x2 800 #define MDI_BEFORE_STATE_CHANGE 0x4 801 #define MDI_AFTER_STATE_CHANGE 0x8 802 #define MDI_SYNC_FLAG 0x10 803 804 #define MDI_PI_STATE(pip) \ 805 (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK) 806 #define MDI_PI_OLD_STATE(pip) \ 807 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK) 808 809 #define MDI_PI_EXT_STATE(pip) \ 810 (MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK) 811 #define MDI_PI_OLD_EXT_STATE(pip) \ 812 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK) 813 814 #define MDI_PI_SET_TRANSIENT(pip) {\ 815 ASSERT(MDI_PI_LOCKED(pip)); \ 816 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT; } 817 #define MDI_PI_CLEAR_TRANSIENT(pip) {\ 818 ASSERT(MDI_PI_LOCKED(pip)); \ 819 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT; } 820 #define MDI_PI_IS_TRANSIENT(pip) \ 821 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT) 822 823 #define MDI_PI_SET_USER_DISABLE(pip) {\ 824 ASSERT(MDI_PI_LOCKED(pip)); \ 825 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE; } 826 #define MDI_PI_SET_DRV_DISABLE(pip) {\ 827 ASSERT(MDI_PI_LOCKED(pip)); \ 828 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE; } 829 #define MDI_PI_SET_DRV_DISABLE_TRANS(pip) {\ 830 ASSERT(MDI_PI_LOCKED(pip)); \ 831 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 832 833 #define MDI_PI_SET_USER_ENABLE(pip) {\ 834 ASSERT(MDI_PI_LOCKED(pip)); \ 835 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE; } 836 #define MDI_PI_SET_DRV_ENABLE(pip) {\ 837 ASSERT(MDI_PI_LOCKED(pip)); \ 838 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE; } 839 #define MDI_PI_SET_DRV_ENABLE_TRANS(pip) {\ 840 ASSERT(MDI_PI_LOCKED(pip)); \ 841 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 842 843 #define MDI_PI_IS_USER_DISABLE(pip) \ 844 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE) 845 #define MDI_PI_IS_DRV_DISABLE(pip) \ 846 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE) 847 #define MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) \ 848 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT) 849 850 #define MDI_PI_IS_DISABLE(pip) \ 851 (MDI_PI_IS_USER_DISABLE(pip) || \ 852 MDI_PI_IS_DRV_DISABLE(pip) || \ 853 MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)) 854 855 #define MDI_PI_IS_INIT(pip) \ 856 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 857 MDI_PATHINFO_STATE_INIT) 858 859 #define MDI_PI_IS_INITING(pip) \ 860 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 861 (MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT)) 862 863 #define MDI_PI_SET_INIT(pip) {\ 864 ASSERT(MDI_PI_LOCKED(pip)); \ 865 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT; } 866 867 #define MDI_PI_SET_ONLINING(pip) {\ 868 uint32_t ext_state; \ 869 ASSERT(MDI_PI_LOCKED(pip)); \ 870 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 871 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 872 MDI_PI(pip)->pi_state = \ 873 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 874 MDI_PI(pip)->pi_state |= ext_state; } 875 876 #define MDI_PI_IS_ONLINING(pip) \ 877 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 878 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT)) 879 880 #define MDI_PI_SET_ONLINE(pip) {\ 881 uint32_t ext_state; \ 882 ASSERT(MDI_PI_LOCKED(pip)); \ 883 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 884 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE; \ 885 MDI_PI(pip)->pi_state |= ext_state; } 886 887 #define MDI_PI_IS_ONLINE(pip) \ 888 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 889 MDI_PATHINFO_STATE_ONLINE) 890 891 #define MDI_PI_SET_OFFLINING(pip) {\ 892 uint32_t ext_state; \ 893 ASSERT(MDI_PI_LOCKED(pip)); \ 894 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 895 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 896 MDI_PI(pip)->pi_state = \ 897 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 898 MDI_PI(pip)->pi_state |= ext_state; } 899 900 #define MDI_PI_IS_OFFLINING(pip) \ 901 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 902 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT)) 903 904 #define MDI_PI_SET_OFFLINE(pip) {\ 905 uint32_t ext_state; \ 906 ASSERT(MDI_PI_LOCKED(pip)); \ 907 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 908 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE; \ 909 MDI_PI(pip)->pi_state |= ext_state; } 910 911 #define MDI_PI_IS_OFFLINE(pip) \ 912 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 913 MDI_PATHINFO_STATE_OFFLINE) 914 915 #define MDI_PI_SET_STANDBYING(pip) {\ 916 uint32_t ext_state; \ 917 ASSERT(MDI_PI_LOCKED(pip)); \ 918 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 919 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 920 MDI_PI(pip)->pi_state = \ 921 (MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \ 922 MDI_PI(pip)->pi_state |= ext_state; } 923 924 #define MDI_PI_SET_STANDBY(pip) {\ 925 uint32_t ext_state; \ 926 ASSERT(MDI_PI_LOCKED(pip)); \ 927 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 928 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY; \ 929 MDI_PI(pip)->pi_state |= ext_state; } 930 931 #define MDI_PI_IS_STANDBY(pip) \ 932 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 933 MDI_PATHINFO_STATE_STANDBY) 934 935 #define MDI_PI_SET_FAULTING(pip) {\ 936 uint32_t ext_state; \ 937 ASSERT(MDI_PI_LOCKED(pip)); \ 938 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 939 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 940 MDI_PI(pip)->pi_state = \ 941 (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \ 942 MDI_PI(pip)->pi_state |= ext_state; } 943 944 #define MDI_PI_SET_FAULT(pip) {\ 945 uint32_t ext_state; \ 946 ASSERT(MDI_PI_LOCKED(pip)); \ 947 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 948 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT; \ 949 MDI_PI(pip)->pi_state |= ext_state; } 950 951 #define MDI_PI_IS_FAULT(pip) \ 952 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 953 MDI_PATHINFO_STATE_FAULT) 954 955 #define MDI_PI_IS_SUSPENDED(pip) \ 956 ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 957 958 /* 959 * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures 960 * hold the vhci to phci client mappings of the on-disk vhci busconfig cache. 961 */ 962 963 /* phci structure of vhci cache */ 964 typedef struct mdi_vhcache_phci { 965 char *cphci_path; /* phci path name */ 966 uint32_t cphci_id; /* used when building nvlist */ 967 mdi_phci_t *cphci_phci; /* pointer to actual phci */ 968 struct mdi_vhcache_phci *cphci_next; /* next in vhci phci list */ 969 } mdi_vhcache_phci_t; 970 971 /* pathinfo structure of vhci cache */ 972 typedef struct mdi_vhcache_pathinfo { 973 char *cpi_addr; /* path address */ 974 mdi_vhcache_phci_t *cpi_cphci; /* phci the path belongs to */ 975 struct mdi_pathinfo *cpi_pip; /* ptr to actual pathinfo */ 976 uint32_t cpi_flags; /* see below */ 977 struct mdi_vhcache_pathinfo *cpi_next; /* next path for the client */ 978 } mdi_vhcache_pathinfo_t; 979 980 /* 981 * cpi_flags 982 * 983 * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has 984 * failed. 985 */ 986 #define MDI_CPI_HINT_PATH_DOES_NOT_EXIST 0x0001 987 988 /* client structure of vhci cache */ 989 typedef struct mdi_vhcache_client { 990 char *cct_name_addr; /* client address */ 991 mdi_vhcache_pathinfo_t *cct_cpi_head; /* client's path list head */ 992 mdi_vhcache_pathinfo_t *cct_cpi_tail; /* client's path list tail */ 993 struct mdi_vhcache_client *cct_next; /* next in vhci client list */ 994 } mdi_vhcache_client_t; 995 996 /* vhci cache structure - one for vhci instance */ 997 typedef struct mdi_vhci_cache { 998 mdi_vhcache_phci_t *vhcache_phci_head; /* phci list head */ 999 mdi_vhcache_phci_t *vhcache_phci_tail; /* phci list tail */ 1000 mdi_vhcache_client_t *vhcache_client_head; /* client list head */ 1001 mdi_vhcache_client_t *vhcache_client_tail; /* client list tail */ 1002 mod_hash_t *vhcache_client_hash; /* client hash */ 1003 int vhcache_flags; /* see below */ 1004 int64_t vhcache_clean_time; /* last clean time */ 1005 krwlock_t vhcache_lock; /* cache lock */ 1006 } mdi_vhci_cache_t; 1007 1008 /* vhcache_flags */ 1009 #define MDI_VHCI_CACHE_SETUP_DONE 0x0001 /* cache setup completed */ 1010 1011 /* vhci bus config structure - one for vhci instance */ 1012 typedef struct mdi_vhci_config { 1013 char *vhc_vhcache_filename; /* on-disk file name */ 1014 mdi_vhci_cache_t vhc_vhcache; /* vhci cache */ 1015 kmutex_t vhc_lock; /* vhci config lock */ 1016 kcondvar_t vhc_cv; 1017 int vhc_flags; /* see below */ 1018 1019 /* flush vhci cache when lbolt reaches vhc_flush_at_ticks */ 1020 clock_t vhc_flush_at_ticks; 1021 1022 /* 1023 * Head and tail of the client list whose paths are being configured 1024 * asynchronously. vhc_acc_count is the number of clients on this list. 1025 * vhc_acc_thrcount is the number threads running to configure 1026 * the paths for these clients. 1027 */ 1028 struct mdi_async_client_config *vhc_acc_list_head; 1029 struct mdi_async_client_config *vhc_acc_list_tail; 1030 int vhc_acc_count; 1031 int vhc_acc_thrcount; 1032 1033 /* callback id - for flushing the cache during system shutdown */ 1034 callb_id_t vhc_cbid; 1035 1036 /* 1037 * vhc_path_discovery_boot - number of times path discovery will be 1038 * attempted during early boot. 1039 * vhc_path_discovery_postboot number of times path discovery will be 1040 * attempted during late boot. 1041 * vhc_path_discovery_cutoff_time - time at which paths were last 1042 * discovered + some timeout 1043 */ 1044 int vhc_path_discovery_boot; 1045 int vhc_path_discovery_postboot; 1046 int64_t vhc_path_discovery_cutoff_time; 1047 } mdi_vhci_config_t; 1048 1049 /* vhc_flags */ 1050 #define MDI_VHC_SINGLE_THREADED 0x0001 /* config single threaded */ 1051 #define MDI_VHC_EXIT 0x0002 /* exit all config activity */ 1052 #define MDI_VHC_VHCACHE_DIRTY 0x0004 /* cache dirty */ 1053 #define MDI_VHC_VHCACHE_FLUSH_THREAD 0x0008 /* cache flush thead running */ 1054 #define MDI_VHC_VHCACHE_FLUSH_ERROR 0x0010 /* failed to flush cache */ 1055 #define MDI_VHC_READONLY_FS 0x0020 /* filesys is readonly */ 1056 1057 typedef struct mdi_phys_path { 1058 char *phys_path; 1059 struct mdi_phys_path *phys_path_next; 1060 } mdi_phys_path_t; 1061 1062 /* 1063 * Lookup tokens are used to cache the result of the vhci cache client lookup 1064 * operations (to reduce the number of real lookup operations). 1065 */ 1066 typedef struct mdi_vhcache_lookup_token { 1067 mdi_vhcache_client_t *lt_cct; /* vhcache client */ 1068 int64_t lt_cct_lookup_time; /* last lookup time */ 1069 } mdi_vhcache_lookup_token_t; 1070 1071 /* asynchronous configuration of client paths */ 1072 typedef struct mdi_async_client_config { 1073 char *acc_ct_name; /* client name */ 1074 char *acc_ct_addr; /* client address */ 1075 mdi_phys_path_t *acc_phclient_path_list_head; /* path head */ 1076 mdi_vhcache_lookup_token_t acc_token; /* lookup token */ 1077 struct mdi_async_client_config *acc_next; /* next in vhci acc list */ 1078 } mdi_async_client_config_t; 1079 1080 /* 1081 * vHCI driver instance registration/unregistration 1082 * 1083 * mdi_vhci_register() is called by a vHCI driver to register itself as the 1084 * manager of devices from a particular 'class'. This should be called from 1085 * attach(9e). 1086 * 1087 * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI 1088 * instance from the framework. 1089 */ 1090 int mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int); 1091 int mdi_vhci_unregister(dev_info_t *, int); 1092 1093 /* 1094 * Utility functions 1095 */ 1096 int mdi_phci_get_path_count(dev_info_t *); 1097 dev_info_t *mdi_phci_path2devinfo(dev_info_t *, caddr_t); 1098 1099 1100 /* 1101 * Path Selection Functions: 1102 * 1103 * mdi_select_path() is called by a vHCI driver to select to which path an 1104 * I/O request should be routed. The caller passes the 'buf' structure as 1105 * one of the parameters. The mpxio framework uses the buf's contents to 1106 * maintain per path statistics (total I/O size / count pending). If more 1107 * than one online path is available, the framework automatically selects 1108 * a suitable one. If a failover operation is active for this client device 1109 * the call fails, returning MDI_BUSY. 1110 * 1111 * By default this function returns a suitable path in the 'online' state, 1112 * based on the current load balancing policy. Currently we support 1113 * LOAD_BALANCE_NONE (Previously selected online path will continue to be 1114 * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths 1115 * will be selected in a round robin fashion). The load balancing scheme 1116 * can be configured in the vHCI driver's configuration file (driver.conf). 1117 * 1118 * vHCI drivers may override this default behaviour by specifying appropriate 1119 * flags. If start_pip is specified (non NULL), it is used as the routine's 1120 * starting point; it starts walking from there to find the next appropriate 1121 * path. 1122 * 1123 * The following values for 'flags' are currently defined: 1124 * 1125 * MDI_SELECT_ONLINE_PATH: select an ONLINE path 1126 * MDI_SELECT_STANDBY_PATH: select a STANDBY path 1127 * MDI_SELECT_USER_DISABLE_PATH: select user disable for failover and 1128 * auto_failback 1129 * 1130 * The selected paths are returned in an mdi_hold_path() state (pi_ref_cnt), 1131 * caller should release the hold by calling mdi_rele_path() at the end of 1132 * operation. 1133 */ 1134 int mdi_select_path(dev_info_t *, struct buf *, int, 1135 mdi_pathinfo_t *, mdi_pathinfo_t **); 1136 int mdi_set_lb_policy(dev_info_t *, client_lb_t); 1137 int mdi_set_lb_region_size(dev_info_t *, int); 1138 client_lb_t mdi_get_lb_policy(dev_info_t *); 1139 1140 /* 1141 * flags for mdi_select_path() routine 1142 */ 1143 #define MDI_SELECT_ONLINE_PATH 0x0001 1144 #define MDI_SELECT_STANDBY_PATH 0x0002 1145 #define MDI_SELECT_USER_DISABLE_PATH 0x0004 1146 1147 /* 1148 * MDI client device utility functions 1149 */ 1150 int mdi_client_get_path_count(dev_info_t *); 1151 dev_info_t *mdi_client_path2devinfo(dev_info_t *, caddr_t); 1152 1153 /* 1154 * Failover: 1155 * 1156 * The vHCI driver calls mdi_failover() to initiate a failover operation. 1157 * mdi_failover() calls back into the vHCI driver's vo_failover() 1158 * entry point to perform the actual failover operation. The reason 1159 * for requiring the vHCI driver to initiate failover by calling 1160 * mdi_failover(), instead of directly executing vo_failover() itself, 1161 * is to ensure that the mdi framework can keep track of the client 1162 * state properly. Additionally, mdi_failover() provides as a 1163 * convenience the option of performing the failover operation 1164 * synchronously or asynchronously 1165 * 1166 * Upon successful completion of the failover operation, the paths that were 1167 * previously ONLINE will be in the STANDBY state, and the newly activated 1168 * paths will be in the ONLINE state. 1169 * 1170 * The flags modifier determines whether the activation is done synchronously 1171 */ 1172 int mdi_failover(dev_info_t *, dev_info_t *, int); 1173 1174 /* 1175 * Client device failover mode of operation 1176 */ 1177 #define MDI_FAILOVER_SYNC 1 /* Syncronous Failover */ 1178 #define MDI_FAILOVER_ASYNC 2 /* Asyncronous Failover */ 1179 1180 /* 1181 * mdi_pathinfo node state change functions. 1182 */ 1183 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *); 1184 1185 /* 1186 * mdi_pathinfo node extended state change functions. 1187 */ 1188 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *); 1189 int mdi_pi_get_preferred(mdi_pathinfo_t *); 1190 1191 /* 1192 * mdi_pathinfo node member functions 1193 */ 1194 void *mdi_pi_get_client_private(mdi_pathinfo_t *); 1195 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *); 1196 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t); 1197 void mdi_pi_set_preferred(mdi_pathinfo_t *, int); 1198 1199 /* get/set vhci private data */ 1200 void *mdi_client_get_vhci_private(dev_info_t *); 1201 void mdi_client_set_vhci_private(dev_info_t *, void *); 1202 void *mdi_phci_get_vhci_private(dev_info_t *); 1203 void mdi_phci_set_vhci_private(dev_info_t *, void *); 1204 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *); 1205 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *); 1206 1207 /* 1208 * mdi_pathinfo Property utilities 1209 */ 1210 int mdi_prop_size(mdi_pathinfo_t *, size_t *); 1211 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t); 1212 1213 /* obsolete interface, to be removed */ 1214 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **); 1215 int mdi_get_component_type(dev_info_t *); 1216 1217 #endif /* _KERNEL */ 1218 1219 #ifdef __cplusplus 1220 } 1221 #endif 1222 1223 #endif /* _SYS_MDI_IMPLDEFS_H */ 1224