1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_MDI_IMPLDEFS_H 27 #define _SYS_MDI_IMPLDEFS_H 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/note.h> 32 #include <sys/types.h> 33 #include <sys/sunmdi.h> 34 #include <sys/modhash.h> 35 #include <sys/callb.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #ifdef _KERNEL 42 43 /* 44 * Multipath Driver Interfaces 45 * 46 * The multipathing framework is provided in two modules. The 'mpxio' misc. 47 * module provides the core multipath framework and the 'scsi_vhci' nexus 48 * driver provides the SCSI-III command set driver functionality for 49 * managing Fibre-Channel storage devices. 50 * 51 * As in any multipathing solution there are three major problems to solve: 52 * 53 * 1) Identification and enumeration of multipath client devices. 54 * 2) Optimal path selection when routing I/O requests. 55 * 3) Observability interfaces to snapshot the multipath configuration, 56 * and infrastructure to provide performance and error statistics. 57 * 58 * The mpxio framework consists of several major components: 59 * 60 * 1) The MDI is the Multiplexed Device Interface; this is the core glue which 61 * holds the following components together. 62 * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing 63 * services for a given bus technology (example: 'scsi_vhci' provides 64 * multipathing support for SCSI-III fibre-channel devices). 65 * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport 66 * services for a given host controller (example: 'fcp' provides transport 67 * for fibre-channel devices). 68 * 4) Client Devices are standard Solaris target (or leaf) drivers 69 * (example: 'ssd' is the standard disk driver for fibre-channel arrays). 70 * 5) Multipath information nodes ('pathinfo' nodes) connect client device 71 * nodes and pHCI device nodes in the device tree. 72 * 73 * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might 74 * look like this: 75 * 76 * /\ 77 * / ............ 78 * <vHCI>:/ \ 79 * +-----------+ +-----------+ 80 * | scsi_vhci | | pci@1f,0 | 81 * +-----------+ +-----------+ 82 * / \ \ 83 * <Client>: / \ :<Client> \ :parent(pHCI) 84 * +----------+ +-----------+ +-------------+ 85 * | ssd 1 | | ssd 2 | | qlc@0,0 | 86 * +----------+ +-----------+ +-------------+ 87 * | | / \ 88 * | | <pHCI>: / \ :<pHCI> 89 * | | +-------------+ +-------------+ 90 * | | | pHCI 1 (fp) | | pHCI 2 (fp) | 91 * | | +-------------+ +-------------+ 92 * | | / | / | 93 * | | +------+ | +------+ | 94 * | | | ssd 3| | | ssd | | 95 * | | |!mpxio| | | (OBP)| | 96 * | | +------+ | +------+ | 97 * | | | | 98 * | | <pathinfo>: | | 99 * | | +-------+ +--------+ 100 * | +-------------->| path |-------->| path | 101 * | | info | | info | 102 * | | node 1| | node 3 | 103 * | +-------+ +--------+ 104 * | | | 105 * | | +~~~~~~~~+ 106 * | +-------+ :+--------+ 107 * +--------------------------->| path |-------->| path | 108 * | info | :| info | 109 * | node 2| +| node 4 | 110 * +-------+ +--------+ 111 * 112 * The multipath information nodes (mdi_pathinfo nodes) establish the 113 * relationship between the pseudo client driver instance nodes (children 114 * of the vHCI) and the physical host controller interconnect (pHCI 115 * drivers) forming a matrix structure. 116 * 117 * The mpxio module implements locking at multiple granularity levels to 118 * support the needs of various consumers. The multipath matrix can be 119 * column locked, or row locked depending on the consumer. The intention 120 * is to balance simplicity and performance. 121 * 122 * Locking: 123 * 124 * The devinfo locking still applies: 125 * 126 * 1) An ndi_devi_enter of a parent protects linkage/state of children. 127 * 2) state >= DS_INITIALIZED adds devi_ref of parent 128 * 3) devi_ref at state >= DS_ATTACHED prevents detach(9E). 129 * 130 * The ordering of 1) is (vHCI, pHCI). For a DEBUG kernel this ordering 131 * is asserted by the ndi_devi_enter() implementation. There is also an 132 * ndi_devi_enter(Client), which is atypical since the client is a leaf. 133 * This is done to synchronize pathinfo nodes during devinfo snapshot (see 134 * di_register_pip) by pretending that the pathinfo nodes are children 135 * of the client. 136 * 137 * In addition to devinfo locking the current implementation utilizes 138 * the following locks: 139 * 140 * mdi_mutex: protects the global list of vHCIs. 141 * 142 * vh_phci_mutex: per-vHCI (mutex) lock: protects list of pHCIs registered 143 * with vHCI. 144 * 145 * vh_client_mutex: per-vHCI (mutex) lock: protects list/hash of Clients 146 * associated with vHCI. 147 * 148 * ph_mutex: per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo 149 * node list) and per-pHCI structure fields. mdi_pathinfo node creation, 150 * deletion and child mdi_pathinfo node state changes are serialized on per 151 * pHCI basis (Protection against DR). 152 * 153 * ct_mutex: per-client (mutex) lock: protects the row (client-mdi_pathinfo 154 * node list) and per-client structure fields. The client-mdi_pathinfo node 155 * list is typically walked to select an optimal path when routing I/O 156 * requests. 157 * 158 * pi_mutex: per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node 159 * structure fields. 160 * 161 * Note that per-Client structure and per-pHCI fields are freely readable when 162 * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo 163 * node guarantees that its corresponding client and pHCI devices will not be 164 * freed. 165 */ 166 167 /* 168 * MDI Client global unique identifier property name string definition 169 */ 170 extern const char *mdi_client_guid_prop; 171 #define MDI_CLIENT_GUID_PROP (char *)mdi_client_guid_prop 172 173 /* 174 * MDI Client load balancing policy definitions 175 * 176 * Load balancing policies are determined on a per-vHCI basis and are 177 * configurable via the vHCI's driver.conf file. 178 */ 179 typedef enum { 180 LOAD_BALANCE_NONE, /* Alternate pathing */ 181 LOAD_BALANCE_RR, /* Round Robin */ 182 LOAD_BALANCE_LBA /* Logical Block Addressing */ 183 } client_lb_t; 184 185 typedef struct { 186 int region_size; 187 }client_lb_args_t; 188 189 /* 190 * MDI client load balancing property name/value string definitions 191 */ 192 extern const char *mdi_load_balance; 193 extern const char *mdi_load_balance_none; 194 extern const char *mdi_load_balance_ap; 195 extern const char *mdi_load_balance_rr; 196 extern const char *mdi_load_balance_lba; 197 198 #define LOAD_BALANCE_PROP (char *)mdi_load_balance 199 #define LOAD_BALANCE_PROP_NONE (char *)mdi_load_balance_none 200 #define LOAD_BALANCE_PROP_AP (char *)mdi_load_balance_ap 201 #define LOAD_BALANCE_PROP_RR (char *)mdi_load_balance_rr 202 #define LOAD_BALANCE_PROP_LBA (char *)mdi_load_balance_lba 203 204 /* default for region size */ 205 #define LOAD_BALANCE_DEFAULT_REGION_SIZE 18 206 207 /* 208 * vHCI drivers: 209 * 210 * vHCI drivers are pseudo nexus drivers which implement multipath services 211 * for a specific command set or bus architecture ('class'). There is a 212 * single instance of the vHCI driver for each command set which supports 213 * multipath devices. 214 * 215 * Each vHCI driver registers the following callbacks from attach(9e). 216 */ 217 #define MDI_VHCI_OPS_REV_1 1 218 /* 219 * Change MDI_VHCI_OPS_REV_NAME as per MDI_VHCI_OPS_REV 220 */ 221 #define MDI_VHCI_OPS_REV MDI_VHCI_OPS_REV_1 222 #define MDI_VHCI_OPS_REV_NAME "1" 223 224 typedef struct mdi_vhci_ops { 225 /* revision management */ 226 int vo_revision; 227 228 /* mdi_pathinfo node init callback */ 229 int (*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags); 230 231 /* mdi_pathinfo node uninit callback */ 232 int (*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip, 233 int flags); 234 235 /* mdi_pathinfo node state change callback */ 236 int (*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip, 237 mdi_pathinfo_state_t state, uint32_t, int flags); 238 239 /* Client path failover callback */ 240 int (*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags); 241 242 /* Client attached callback */ 243 void (*vo_client_attached)(dev_info_t *cdip); 244 } mdi_vhci_ops_t; 245 246 /* 247 * An mdi_vhci structure is created and bound to the devinfo node of every 248 * registered vHCI class driver; this happens when a vHCI registers itself from 249 * attach(9e). This structure is unbound and freed when the vHCI unregisters 250 * at detach(9e) time; 251 * 252 * Each vHCI driver is associated with a vHCI class name; this is the handle 253 * used to register and unregister pHCI drivers for a given transport. 254 * 255 * Locking: Different parts of this structure are guarded by different 256 * locks: global threading of multiple vHCIs and initialization is protected 257 * by mdi_mutex, the list of pHCIs associated with a vHCI is protected by 258 * vh_phci_mutex, and Clients are protected by vh_client_mutex. 259 * 260 * XXX Depending on the context, some of the fields can be freely read without 261 * holding any locks (ex. holding vh_client_mutex lock also guarantees that 262 * the vHCI (parent) cannot be unexpectedly freed). 263 */ 264 typedef struct mdi_vhci { 265 /* protected by mdi_mutex... */ 266 struct mdi_vhci *vh_next; /* next vHCI link */ 267 struct mdi_vhci *vh_prev; /* prev vHCI link */ 268 char *vh_class; /* vHCI class name */ 269 dev_info_t *vh_dip; /* vHCI devi handle */ 270 int vh_refcnt; /* vHCI reference count */ 271 struct mdi_vhci_config *vh_config; /* vHCI config */ 272 client_lb_t vh_lb; /* vHCI load-balancing */ 273 struct mdi_vhci_ops *vh_ops; /* vHCI callback vectors */ 274 275 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 276 kmutex_t vh_phci_mutex; /* pHCI mutex */ 277 int vh_phci_count; /* pHCI device count */ 278 struct mdi_phci *vh_phci_head; /* pHCI list head */ 279 struct mdi_phci *vh_phci_tail; /* pHCI list tail */ 280 281 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 282 kmutex_t vh_client_mutex; /* Client mutex */ 283 int vh_client_count; /* Client count */ 284 struct client_hash *vh_client_table; /* Client hash */ 285 } mdi_vhci_t; 286 287 /* 288 * per-vHCI lock macros 289 */ 290 #define MDI_VHCI_PHCI_LOCK(vh) mutex_enter(&(vh)->vh_phci_mutex) 291 #define MDI_VHCI_PHCI_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_phci_mutex) 292 #define MDI_VHCI_PHCI_UNLOCK(vh) mutex_exit(&(vh)->vh_phci_mutex) 293 #ifdef DEBUG 294 #define MDI_VHCI_PCHI_LOCKED(vh) MUTEX_HELD(&(vh)->vh_phci_mutex) 295 #endif /* DEBUG */ 296 #define MDI_VHCI_CLIENT_LOCK(vh) mutex_enter(&(vh)->vh_client_mutex) 297 #define MDI_VHCI_CLIENT_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_client_mutex) 298 #define MDI_VHCI_CLIENT_UNLOCK(vh) mutex_exit(&(vh)->vh_client_mutex) 299 #ifdef DEBUG 300 #define MDI_VHCI_CLIENT_LOCKED(vh) MUTEX_HELD(&(vh)->vh_client_mutex) 301 #endif /* DEBUG */ 302 303 304 /* 305 * GUID Hash definitions 306 * 307 * Since all the mpxio managed devices for a given class are enumerated under 308 * the single vHCI instance for that class, sequentially walking through the 309 * client device link to find a client would be prohibitively slow. 310 */ 311 312 #define CLIENT_HASH_TABLE_SIZE (32) /* GUID hash */ 313 314 /* 315 * Client hash table structure 316 */ 317 struct client_hash { 318 struct mdi_client *ct_hash_head; /* Client hash head */ 319 int ct_hash_count; /* Client hash count */ 320 }; 321 322 323 /* 324 * pHCI Drivers: 325 * 326 * Physical HBA drivers provide transport services for mpxio-managed devices. 327 * As each pHCI instance is attached, it must register itself with the mpxio 328 * framework using mdi_phci_register(). When the pHCI is detached it must 329 * similarly call mdi_phci_unregister(). 330 * 331 * The framework maintains a list of registered pHCI device instances for each 332 * vHCI. This list involves (vh_phci_count, vh_phci_head, vh_phci_tail) and 333 * (ph_next, ph_prev, ph_vhci) and is protected by vh_phci_mutex. 334 * 335 * Locking order: 336 * 337 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) XXX 338 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock)) XXX 339 */ 340 typedef struct mdi_phci { 341 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 342 struct mdi_phci *ph_next; /* next pHCI link */ 343 struct mdi_phci *ph_prev; /* prev pHCI link */ 344 dev_info_t *ph_dip; /* pHCI devi handle */ 345 struct mdi_vhci *ph_vhci; /* pHCI back ref. to vHCI */ 346 347 /* protected by MDI_PHCI_LOCK ph_mutex... */ 348 kmutex_t ph_mutex; /* per-pHCI mutex */ 349 int ph_path_count; /* pi count */ 350 mdi_pathinfo_t *ph_path_head; /* pi list head */ 351 mdi_pathinfo_t *ph_path_tail; /* pi list tail */ 352 int ph_flags; /* pHCI operation flags */ 353 int ph_unstable; /* Paths in transient state */ 354 kcondvar_t ph_unstable_cv; /* Paths in transient state */ 355 356 /* protected by mdi_phci_[gs]et_vhci_private caller... */ 357 void *ph_vprivate; /* vHCI driver private */ 358 } mdi_phci_t; 359 360 /* 361 * A pHCI device is 'unstable' while one or more paths are in a transitional 362 * state. Hotplugging is prevented during this state. 363 */ 364 #define MDI_PHCI_UNSTABLE(ph) (ph)->ph_unstable++; 365 #define MDI_PHCI_STABLE(ph) { \ 366 (ph)->ph_unstable--; \ 367 if ((ph)->ph_unstable == 0) { \ 368 cv_broadcast(&(ph)->ph_unstable_cv); \ 369 } \ 370 } 371 372 /* 373 * per-pHCI lock macros 374 */ 375 #define MDI_PHCI_LOCK(ph) mutex_enter(&(ph)->ph_mutex) 376 #define MDI_PHCI_TRYLOCK(ph) mutex_tryenter(&(ph)->ph_mutex) 377 #define MDI_PHCI_UNLOCK(ph) mutex_exit(&(ph)->ph_mutex) 378 #ifdef DEBUG 379 #define MDI_PHCI_LOCKED(vh) MUTEX_HELD(&(ph)->ph_mutex) 380 #endif /* DEBUG */ 381 382 /* 383 * pHCI state definitions and macros to track the pHCI driver instance state 384 */ 385 #define MDI_PHCI_FLAGS_OFFLINE 0x1 /* pHCI is offline */ 386 #define MDI_PHCI_FLAGS_SUSPEND 0x2 /* pHCI is suspended */ 387 #define MDI_PHCI_FLAGS_POWER_DOWN 0x4 /* pHCI is power down */ 388 #define MDI_PHCI_FLAGS_DETACH 0x8 /* pHCI is detached */ 389 #define MDI_PHCI_FLAGS_USER_DISABLE 0x10 /* pHCI is disabled,user */ 390 #define MDI_PHCI_FLAGS_D_DISABLE 0x20 /* pHCI is disabled,driver */ 391 #define MDI_PHCI_FLAGS_D_DISABLE_TRANS 0x40 /* pHCI is disabled,transient */ 392 #define MDI_PHCI_FLAGS_POWER_TRANSITION 0x80 /* pHCI is power transition */ 393 394 #define MDI_PHCI_DISABLE_MASK \ 395 (MDI_PHCI_FLAGS_USER_DISABLE | MDI_PHCI_FLAGS_D_DISABLE | \ 396 MDI_PHCI_FLAGS_D_DISABLE_TRANS) 397 398 #define MDI_PHCI_IS_READY(ph) \ 399 (((ph)->ph_flags & MDI_PHCI_DISABLE_MASK) == 0) 400 401 #define MDI_PHCI_SET_OFFLINE(ph) {\ 402 ASSERT(MDI_PHCI_LOCKED(ph)); \ 403 (ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE; } 404 #define MDI_PHCI_SET_ONLINE(ph) {\ 405 ASSERT(MDI_PHCI_LOCKED(ph)); \ 406 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE; } 407 #define MDI_PHCI_IS_OFFLINE(ph) \ 408 ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE) 409 410 #define MDI_PHCI_SET_SUSPEND(ph) {\ 411 ASSERT(MDI_PHCI_LOCKED(ph)); \ 412 (ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND; } 413 #define MDI_PHCI_SET_RESUME(ph) {\ 414 ASSERT(MDI_PHCI_LOCKED(ph)); \ 415 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND; } 416 #define MDI_PHCI_IS_SUSPENDED(ph) \ 417 ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 418 419 #define MDI_PHCI_SET_DETACH(ph) {\ 420 ASSERT(MDI_PHCI_LOCKED(ph)); \ 421 (ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH; } 422 #define MDI_PHCI_SET_ATTACH(ph) {\ 423 ASSERT(MDI_PHCI_LOCKED(ph)); \ 424 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH; } 425 426 #define MDI_PHCI_SET_POWER_DOWN(ph) {\ 427 ASSERT(MDI_PHCI_LOCKED(ph)); \ 428 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN; } 429 #define MDI_PHCI_SET_POWER_UP(ph) {\ 430 ASSERT(MDI_PHCI_LOCKED(ph)); \ 431 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN; } 432 #define MDI_PHCI_IS_POWERED_DOWN(ph) \ 433 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN) 434 435 #define MDI_PHCI_SET_USER_ENABLE(ph) {\ 436 ASSERT(MDI_PHCI_LOCKED(ph)); \ 437 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE; } 438 #define MDI_PHCI_SET_USER_DISABLE(ph) {\ 439 ASSERT(MDI_PHCI_LOCKED(ph)); \ 440 (ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE; } 441 #define MDI_PHCI_IS_USER_DISABLED(ph) \ 442 ((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE) 443 444 #define MDI_PHCI_SET_DRV_ENABLE(ph) {\ 445 ASSERT(MDI_PHCI_LOCKED(ph)); \ 446 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE; } 447 #define MDI_PHCI_SET_DRV_DISABLE(ph) {\ 448 ASSERT(MDI_PHCI_LOCKED(ph)); \ 449 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE; } 450 #define MDI_PHCI_IS_DRV_DISABLED(ph) \ 451 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE) 452 453 #define MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph) {\ 454 ASSERT(MDI_PHCI_LOCKED(ph)); \ 455 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 456 #define MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph) {\ 457 ASSERT(MDI_PHCI_LOCKED(ph)); \ 458 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 459 #define MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph) \ 460 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS) 461 462 #define MDI_PHCI_SET_POWER_TRANSITION(ph) {\ 463 ASSERT(MDI_PHCI_LOCKED(ph)); \ 464 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION; } 465 #define MDI_PHCI_CLEAR_POWER_TRANSITION(ph) {\ 466 ASSERT(MDI_PHCI_LOCKED(ph)); \ 467 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION; } 468 #define MDI_PHCI_IS_POWER_TRANSITION(ph) \ 469 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION) 470 471 /* 472 * mpxio Managed Clients: 473 * 474 * This framework creates a struct mdi_client for every client device created 475 * by the framework as a result of self-enumeration of target devices by the 476 * registered pHCI devices. This structure is bound to client device dev_info 477 * node at the time of client device allocation (ndi_devi_alloc(9e)). This 478 * structure is unbound from the dev_info node when mpxio framework removes a 479 * client device node from the system. 480 * 481 * This structure is created when a first path is enumerated and removed when 482 * last path is de-enumerated from the system. 483 * 484 * Multipath client devices are instantiated as children of corresponding vHCI 485 * driver instance. Each client device is uniquely identified by a GUID 486 * provided by target device itself. The parent vHCI device also maintains a 487 * hashed list of client devices, protected by vh_client_mutex. 488 * 489 * Typically pHCI devices self-enumerate their child devices using taskq, 490 * resulting in multiple paths to the same client device to be enumerated by 491 * competing threads. 492 * 493 * Currently this framework supports two kinds of load-balancing policy 494 * configurable through the vHCI driver configuration files. 495 * 496 * NONE - Legacy AP mode 497 * Round Robin - Balance the pHCI load in a Round Robin fashion. 498 * 499 * This framework identifies the client device in three distinct states: 500 * 501 * OPTIMAL - Client device has atleast one redundant path. 502 * DEGRADED - No redundant paths (critical). Failure in the current active 503 * path would result in data access failures. 504 * FAILED - No paths are available to access this device. 505 * 506 * Locking order: 507 * 508 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) XXX 509 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock)) XXX 510 */ 511 typedef struct mdi_client { 512 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 513 struct mdi_client *ct_hnext; /* next client */ 514 struct mdi_client *ct_hprev; /* prev client */ 515 dev_info_t *ct_dip; /* client devi handle */ 516 struct mdi_vhci *ct_vhci; /* vHCI back ref */ 517 char *ct_drvname; /* client driver name */ 518 char *ct_guid; /* client guid */ 519 client_lb_t ct_lb; /* load balancing scheme */ 520 client_lb_args_t *ct_lb_args; /* load balancing args */ 521 522 523 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 524 kmutex_t ct_mutex; /* per-client mutex */ 525 int ct_path_count; /* multi path count */ 526 mdi_pathinfo_t *ct_path_head; /* multi path list head */ 527 mdi_pathinfo_t *ct_path_tail; /* multi path list tail */ 528 mdi_pathinfo_t *ct_path_last; /* last path used for i/o */ 529 int ct_state; /* state information */ 530 int ct_flags; /* Driver op. flags */ 531 int ct_failover_flags; /* Failover args */ 532 int ct_failover_status; /* last fo status */ 533 kcondvar_t ct_failover_cv; /* Failover status cv */ 534 int ct_unstable; /* Paths in transient state */ 535 kcondvar_t ct_unstable_cv; /* Paths in transient state */ 536 537 int ct_power_cnt; /* Hold count on parent power */ 538 kcondvar_t ct_powerchange_cv; 539 /* Paths in power transient state */ 540 short ct_powercnt_config; 541 /* held in pre/post config */ 542 short ct_powercnt_unconfig; 543 /* held in pre/post unconfig */ 544 int ct_powercnt_reset; 545 /* ct_power_cnt was resetted */ 546 547 void *ct_cprivate; /* client driver private */ 548 void *ct_vprivate; /* vHCI driver private */ 549 } mdi_client_t; 550 551 /* 552 * per-Client device locking definitions 553 */ 554 #define MDI_CLIENT_LOCK(ct) mutex_enter(&(ct)->ct_mutex) 555 #define MDI_CLIENT_TRYLOCK(ct) mutex_tryenter(&(ct)->ct_mutex) 556 #define MDI_CLIENT_UNLOCK(ct) mutex_exit(&(ct)->ct_mutex) 557 #ifdef DEBUG 558 #define MDI_CLIENT_LOCKED(ct) MUTEX_HELD(&(ct)->ct_mutex) 559 #endif /* DEBUG */ 560 561 /* 562 * A Client device is in unstable while one or more paths are in transitional 563 * state. We do not allow failover to take place while paths are in transient 564 * state. Similarly we do not allow state transition while client device 565 * failover is in progress. 566 */ 567 #define MDI_CLIENT_UNSTABLE(ct) (ct)->ct_unstable++; 568 #define MDI_CLIENT_STABLE(ct) { \ 569 (ct)->ct_unstable--; \ 570 if ((ct)->ct_unstable == 0) { \ 571 cv_broadcast(&(ct)->ct_unstable_cv); \ 572 } \ 573 } 574 575 /* 576 * Client driver instance state definitions: 577 */ 578 #define MDI_CLIENT_FLAGS_OFFLINE 0x00000001 579 #define MDI_CLIENT_FLAGS_SUSPEND 0x00000002 580 #define MDI_CLIENT_FLAGS_POWER_DOWN 0x00000004 581 #define MDI_CLIENT_FLAGS_DETACH 0x00000008 582 #define MDI_CLIENT_FLAGS_FAILOVER 0x00000010 583 #define MDI_CLIENT_FLAGS_REPORT_DEV 0x00000020 584 #define MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS 0x00000040 585 #define MDI_CLIENT_FLAGS_ASYNC_FREE 0x00000080 586 #define MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED 0x00000100 587 #define MDI_CLIENT_FLAGS_POWER_TRANSITION 0x00000200 588 589 #define MDI_CLIENT_SET_OFFLINE(ct) {\ 590 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 591 (ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE; } 592 #define MDI_CLIENT_SET_ONLINE(ct) {\ 593 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 594 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE; } 595 #define MDI_CLIENT_IS_OFFLINE(ct) \ 596 ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE) 597 598 #define MDI_CLIENT_SET_SUSPEND(ct) {\ 599 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 600 (ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND; } 601 #define MDI_CLIENT_SET_RESUME(ct) {\ 602 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 603 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND; } 604 #define MDI_CLIENT_IS_SUSPENDED(ct) \ 605 ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND) 606 607 #define MDI_CLIENT_SET_POWER_DOWN(ct) {\ 608 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 609 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN; } 610 #define MDI_CLIENT_SET_POWER_UP(ct) {\ 611 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 612 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN; } 613 #define MDI_CLIENT_IS_POWERED_DOWN(ct) \ 614 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN) 615 616 #define MDI_CLIENT_SET_POWER_TRANSITION(ct) {\ 617 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 618 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION; } 619 #define MDI_CLIENT_CLEAR_POWER_TRANSITION(ct) {\ 620 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 621 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION; } 622 #define MDI_CLIENT_IS_POWER_TRANSITION(ct) \ 623 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION) 624 625 #define MDI_CLIENT_SET_DETACH(ct) {\ 626 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 627 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH; } 628 #define MDI_CLIENT_SET_ATTACH(ct) {\ 629 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 630 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH; } 631 #define MDI_CLIENT_IS_DETACHED(ct) \ 632 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH) 633 634 #define MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) {\ 635 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 636 (ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER; } 637 #define MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) {\ 638 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 639 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER; } 640 #define MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \ 641 ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER) 642 643 #define MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) {\ 644 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 645 (ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV; } 646 #define MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) {\ 647 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 648 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV; } 649 #define MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \ 650 ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV) 651 652 #define MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct) {\ 653 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 654 (ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 655 #define MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct) {\ 656 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 657 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 658 #define MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \ 659 ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS) 660 661 #define MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct) {\ 662 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 663 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED; } 664 #define MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \ 665 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED) 666 667 /* 668 * Client operating states. 669 */ 670 #define MDI_CLIENT_STATE_OPTIMAL 1 671 #define MDI_CLIENT_STATE_DEGRADED 2 672 #define MDI_CLIENT_STATE_FAILED 3 673 674 #define MDI_CLIENT_STATE(ct) ((ct)->ct_state) 675 #define MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state) 676 677 #define MDI_CLIENT_IS_FAILED(ct) \ 678 ((ct)->ct_state == MDI_CLIENT_STATE_FAILED) 679 680 /* 681 * mdi_pathinfo nodes: 682 * 683 * From this framework's perspective, a 'path' is a tuple consisting of a 684 * client or end device, a host controller which provides device 685 * identification and transport services (pHCI), and bus specific unit 686 * addressing information. A path may be decorated with properties which 687 * describe the capabilities of the path; such properties are analogous to 688 * device node and minor node properties. 689 * 690 * The framework maintains link list of mdi_pathinfo nodes created by every 691 * pHCI driver instance via the pi_phci_link linkage; this is used (for example) 692 * to make sure that all relevant pathinfo nodes are freed before the pHCI 693 * is unregistered. 694 * 695 * Locking order: 696 * 697 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) XXX 698 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) XXX 699 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) XXX 700 * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex)) XXX 701 * 702 * mdi_pathinfo node structure definition 703 */ 704 struct mdi_pathinfo { 705 /* protected by MDI_PHCI_LOCK ph_mutex... */ 706 struct mdi_pathinfo *pi_phci_link; /* next path in phci list */ 707 mdi_phci_t *pi_phci; /* pHCI dev_info node */ 708 709 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 710 struct mdi_pathinfo *pi_client_link; /* next path in client list */ 711 mdi_client_t *pi_client; /* client */ 712 713 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 714 char *pi_addr; /* path unit address */ 715 int pi_path_instance; /* path instance */ 716 717 /* protected by MDI_PI_LOCK pi_mutex... */ 718 kmutex_t pi_mutex; /* per path mutex */ 719 mdi_pathinfo_state_t pi_state; /* path state */ 720 mdi_pathinfo_state_t pi_old_state; /* path state */ 721 kcondvar_t pi_state_cv; /* path state condvar */ 722 nvlist_t *pi_prop; /* Properties */ 723 void *pi_cprivate; /* client private info */ 724 void *pi_pprivate; /* phci private info */ 725 int pi_ref_cnt; /* pi reference count */ 726 kcondvar_t pi_ref_cv; /* condition variable */ 727 struct mdi_pi_kstats *pi_kstats; /* aggregate kstats */ 728 int pi_pm_held; /* phci's kidsup incremented */ 729 int pi_preferred; /* Preferred path */ 730 void *pi_vprivate; /* vhci private info */ 731 }; 732 733 /* 734 * pathinfo statistics: 735 * 736 * The mpxio architecture allows for multiple pathinfo nodes for each 737 * client-pHCI combination. For statistics purposes, these statistics are 738 * aggregated into a single client-pHCI set of kstats. 739 */ 740 struct mdi_pi_kstats { 741 int pi_kstat_ref; /* # paths aggregated, also a ref cnt */ 742 kstat_t *pi_kstat_iostats; /* mdi:iopath statistic set */ 743 kstat_t *pi_kstat_errstats; /* error statistics */ 744 }; 745 746 /* 747 * pathinfo error kstat 748 */ 749 struct pi_errs { 750 struct kstat_named pi_softerrs; /* "Soft" Error */ 751 struct kstat_named pi_harderrs; /* "Hard" Error */ 752 struct kstat_named pi_transerrs; /* Transport Errors */ 753 struct kstat_named pi_icnt_busy; /* Interconnect Busy */ 754 struct kstat_named pi_icnt_errors; /* Interconnect Errors */ 755 struct kstat_named pi_phci_rsrc; /* pHCI No Resources */ 756 struct kstat_named pi_phci_localerr; /* pHCI Local Errors */ 757 struct kstat_named pi_phci_invstate; /* pHCI Invalid State */ 758 struct kstat_named pi_failedfrom; /* Failover: Failed From */ 759 struct kstat_named pi_failedto; /* Failover: Failed To */ 760 }; 761 762 /* 763 * increment an error counter 764 */ 765 #define MDI_PI_ERRSTAT(pip, x) { \ 766 if (MDI_PI((pip))->pi_kstats != NULL) { \ 767 struct pi_errs *pep; \ 768 pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \ 769 pep->x.value.ui32++; \ 770 } \ 771 } 772 773 /* 774 * error codes which can be passed to MDI_PI_ERRSTAT 775 */ 776 #define MDI_PI_SOFTERR pi_softerrs 777 #define MDI_PI_HARDERR pi_harderrs 778 #define MDI_PI_TRANSERR pi_transerrs 779 #define MDI_PI_ICNTBUSY pi_icnt_busy 780 #define MDI_PI_ICNTERR pi_icnt_errors 781 #define MDI_PI_PHCIRSRC pi_phci_rsrc 782 #define MDI_PI_PHCILOCL pi_phci_localerr 783 #define MDI_PI_PHCIINVS pi_phci_invstate 784 #define MDI_PI_FAILFROM pi_failedfrom 785 #define MDI_PI_FAILTO pi_failedto 786 787 #define MDI_PI(type) ((struct mdi_pathinfo *)(type)) 788 789 #define MDI_PI_LOCK(pip) mutex_enter(&MDI_PI(pip)->pi_mutex) 790 #define MDI_PI_TRYLOCK(pip) mutex_tryenter(&MDI_PI(pip)->pi_mutex) 791 #define MDI_PI_UNLOCK(pip) mutex_exit(&MDI_PI(pip)->pi_mutex) 792 #ifdef DEBUG 793 #define MDI_PI_LOCKED(pip) MUTEX_HELD(&MDI_PI(pip)->pi_mutex) 794 #endif /* DEBUG */ 795 796 #define MDI_PI_HOLD(pip) (++MDI_PI(pip)->pi_ref_cnt) 797 #define MDI_PI_RELE(pip) (--MDI_PI(pip)->pi_ref_cnt) 798 799 #define MDI_EXT_STATE_CHANGE 0x10000000 800 801 802 #define MDI_DISABLE_OP 0x1 803 #define MDI_ENABLE_OP 0x2 804 #define MDI_BEFORE_STATE_CHANGE 0x4 805 #define MDI_AFTER_STATE_CHANGE 0x8 806 #define MDI_SYNC_FLAG 0x10 807 808 #define MDI_PI_STATE(pip) \ 809 (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK) 810 #define MDI_PI_OLD_STATE(pip) \ 811 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK) 812 813 #define MDI_PI_EXT_STATE(pip) \ 814 (MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK) 815 #define MDI_PI_OLD_EXT_STATE(pip) \ 816 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK) 817 818 #define MDI_PI_SET_TRANSIENT(pip) {\ 819 ASSERT(MDI_PI_LOCKED(pip)); \ 820 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT; } 821 #define MDI_PI_CLEAR_TRANSIENT(pip) {\ 822 ASSERT(MDI_PI_LOCKED(pip)); \ 823 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT; } 824 #define MDI_PI_IS_TRANSIENT(pip) \ 825 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT) 826 827 #define MDI_PI_SET_USER_DISABLE(pip) {\ 828 ASSERT(MDI_PI_LOCKED(pip)); \ 829 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE; } 830 #define MDI_PI_SET_DRV_DISABLE(pip) {\ 831 ASSERT(MDI_PI_LOCKED(pip)); \ 832 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE; } 833 #define MDI_PI_SET_DRV_DISABLE_TRANS(pip) {\ 834 ASSERT(MDI_PI_LOCKED(pip)); \ 835 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 836 837 #define MDI_PI_SET_USER_ENABLE(pip) {\ 838 ASSERT(MDI_PI_LOCKED(pip)); \ 839 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE; } 840 #define MDI_PI_SET_DRV_ENABLE(pip) {\ 841 ASSERT(MDI_PI_LOCKED(pip)); \ 842 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE; } 843 #define MDI_PI_SET_DRV_ENABLE_TRANS(pip) {\ 844 ASSERT(MDI_PI_LOCKED(pip)); \ 845 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 846 847 #define MDI_PI_IS_USER_DISABLE(pip) \ 848 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE) 849 #define MDI_PI_IS_DRV_DISABLE(pip) \ 850 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE) 851 #define MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) \ 852 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT) 853 854 #define MDI_PI_IS_DISABLE(pip) \ 855 (MDI_PI_IS_USER_DISABLE(pip) || \ 856 MDI_PI_IS_DRV_DISABLE(pip) || \ 857 MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)) 858 859 #define MDI_PI_IS_INIT(pip) \ 860 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 861 MDI_PATHINFO_STATE_INIT) 862 863 #define MDI_PI_IS_INITING(pip) \ 864 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 865 (MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT)) 866 867 #define MDI_PI_SET_INIT(pip) {\ 868 ASSERT(MDI_PI_LOCKED(pip)); \ 869 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT; } 870 871 #define MDI_PI_SET_ONLINING(pip) {\ 872 uint32_t ext_state; \ 873 ASSERT(MDI_PI_LOCKED(pip)); \ 874 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 875 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 876 MDI_PI(pip)->pi_state = \ 877 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 878 MDI_PI(pip)->pi_state |= ext_state; } 879 880 #define MDI_PI_IS_ONLINING(pip) \ 881 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 882 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT)) 883 884 #define MDI_PI_SET_ONLINE(pip) {\ 885 uint32_t ext_state; \ 886 ASSERT(MDI_PI_LOCKED(pip)); \ 887 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 888 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE; \ 889 MDI_PI(pip)->pi_state |= ext_state; } 890 891 #define MDI_PI_IS_ONLINE(pip) \ 892 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 893 MDI_PATHINFO_STATE_ONLINE) 894 895 #define MDI_PI_SET_OFFLINING(pip) {\ 896 uint32_t ext_state; \ 897 ASSERT(MDI_PI_LOCKED(pip)); \ 898 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 899 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 900 MDI_PI(pip)->pi_state = \ 901 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 902 MDI_PI(pip)->pi_state |= ext_state; } 903 904 #define MDI_PI_IS_OFFLINING(pip) \ 905 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 906 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT)) 907 908 #define MDI_PI_SET_OFFLINE(pip) {\ 909 uint32_t ext_state; \ 910 ASSERT(MDI_PI_LOCKED(pip)); \ 911 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 912 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE; \ 913 MDI_PI(pip)->pi_state |= ext_state; } 914 915 #define MDI_PI_IS_OFFLINE(pip) \ 916 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 917 MDI_PATHINFO_STATE_OFFLINE) 918 919 #define MDI_PI_SET_STANDBYING(pip) {\ 920 uint32_t ext_state; \ 921 ASSERT(MDI_PI_LOCKED(pip)); \ 922 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 923 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 924 MDI_PI(pip)->pi_state = \ 925 (MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \ 926 MDI_PI(pip)->pi_state |= ext_state; } 927 928 #define MDI_PI_SET_STANDBY(pip) {\ 929 uint32_t ext_state; \ 930 ASSERT(MDI_PI_LOCKED(pip)); \ 931 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 932 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY; \ 933 MDI_PI(pip)->pi_state |= ext_state; } 934 935 #define MDI_PI_IS_STANDBY(pip) \ 936 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 937 MDI_PATHINFO_STATE_STANDBY) 938 939 #define MDI_PI_SET_FAULTING(pip) {\ 940 uint32_t ext_state; \ 941 ASSERT(MDI_PI_LOCKED(pip)); \ 942 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 943 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 944 MDI_PI(pip)->pi_state = \ 945 (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \ 946 MDI_PI(pip)->pi_state |= ext_state; } 947 948 #define MDI_PI_SET_FAULT(pip) {\ 949 uint32_t ext_state; \ 950 ASSERT(MDI_PI_LOCKED(pip)); \ 951 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 952 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT; \ 953 MDI_PI(pip)->pi_state |= ext_state; } 954 955 #define MDI_PI_IS_FAULT(pip) \ 956 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 957 MDI_PATHINFO_STATE_FAULT) 958 959 #define MDI_PI_IS_SUSPENDED(pip) \ 960 ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 961 962 /* 963 * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures 964 * hold the vhci to phci client mappings of the on-disk vhci busconfig cache. 965 */ 966 967 /* phci structure of vhci cache */ 968 typedef struct mdi_vhcache_phci { 969 char *cphci_path; /* phci path name */ 970 uint32_t cphci_id; /* used when building nvlist */ 971 mdi_phci_t *cphci_phci; /* pointer to actual phci */ 972 struct mdi_vhcache_phci *cphci_next; /* next in vhci phci list */ 973 } mdi_vhcache_phci_t; 974 975 /* pathinfo structure of vhci cache */ 976 typedef struct mdi_vhcache_pathinfo { 977 char *cpi_addr; /* path address */ 978 mdi_vhcache_phci_t *cpi_cphci; /* phci the path belongs to */ 979 struct mdi_pathinfo *cpi_pip; /* ptr to actual pathinfo */ 980 uint32_t cpi_flags; /* see below */ 981 struct mdi_vhcache_pathinfo *cpi_next; /* next path for the client */ 982 } mdi_vhcache_pathinfo_t; 983 984 /* 985 * cpi_flags 986 * 987 * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has 988 * failed. 989 */ 990 #define MDI_CPI_HINT_PATH_DOES_NOT_EXIST 0x0001 991 992 /* client structure of vhci cache */ 993 typedef struct mdi_vhcache_client { 994 char *cct_name_addr; /* client address */ 995 mdi_vhcache_pathinfo_t *cct_cpi_head; /* client's path list head */ 996 mdi_vhcache_pathinfo_t *cct_cpi_tail; /* client's path list tail */ 997 struct mdi_vhcache_client *cct_next; /* next in vhci client list */ 998 } mdi_vhcache_client_t; 999 1000 /* vhci cache structure - one for vhci instance */ 1001 typedef struct mdi_vhci_cache { 1002 mdi_vhcache_phci_t *vhcache_phci_head; /* phci list head */ 1003 mdi_vhcache_phci_t *vhcache_phci_tail; /* phci list tail */ 1004 mdi_vhcache_client_t *vhcache_client_head; /* client list head */ 1005 mdi_vhcache_client_t *vhcache_client_tail; /* client list tail */ 1006 mod_hash_t *vhcache_client_hash; /* client hash */ 1007 int vhcache_flags; /* see below */ 1008 int64_t vhcache_clean_time; /* last clean time */ 1009 krwlock_t vhcache_lock; /* cache lock */ 1010 } mdi_vhci_cache_t; 1011 1012 /* vhcache_flags */ 1013 #define MDI_VHCI_CACHE_SETUP_DONE 0x0001 /* cache setup completed */ 1014 1015 /* vhci bus config structure - one for vhci instance */ 1016 typedef struct mdi_vhci_config { 1017 char *vhc_vhcache_filename; /* on-disk file name */ 1018 mdi_vhci_cache_t vhc_vhcache; /* vhci cache */ 1019 kmutex_t vhc_lock; /* vhci config lock */ 1020 kcondvar_t vhc_cv; 1021 int vhc_flags; /* see below */ 1022 1023 /* flush vhci cache when lbolt reaches vhc_flush_at_ticks */ 1024 clock_t vhc_flush_at_ticks; 1025 1026 /* 1027 * Head and tail of the client list whose paths are being configured 1028 * asynchronously. vhc_acc_count is the number of clients on this list. 1029 * vhc_acc_thrcount is the number threads running to configure 1030 * the paths for these clients. 1031 */ 1032 struct mdi_async_client_config *vhc_acc_list_head; 1033 struct mdi_async_client_config *vhc_acc_list_tail; 1034 int vhc_acc_count; 1035 int vhc_acc_thrcount; 1036 1037 /* callback id - for flushing the cache during system shutdown */ 1038 callb_id_t vhc_cbid; 1039 1040 /* 1041 * vhc_path_discovery_boot - number of times path discovery will be 1042 * attempted during early boot. 1043 * vhc_path_discovery_postboot number of times path discovery will be 1044 * attempted during late boot. 1045 * vhc_path_discovery_cutoff_time - time at which paths were last 1046 * discovered + some timeout 1047 */ 1048 int vhc_path_discovery_boot; 1049 int vhc_path_discovery_postboot; 1050 int64_t vhc_path_discovery_cutoff_time; 1051 } mdi_vhci_config_t; 1052 1053 /* vhc_flags */ 1054 #define MDI_VHC_SINGLE_THREADED 0x0001 /* config single threaded */ 1055 #define MDI_VHC_EXIT 0x0002 /* exit all config activity */ 1056 #define MDI_VHC_VHCACHE_DIRTY 0x0004 /* cache dirty */ 1057 #define MDI_VHC_VHCACHE_FLUSH_THREAD 0x0008 /* cache flush thead running */ 1058 #define MDI_VHC_VHCACHE_FLUSH_ERROR 0x0010 /* failed to flush cache */ 1059 #define MDI_VHC_READONLY_FS 0x0020 /* filesys is readonly */ 1060 1061 typedef struct mdi_phys_path { 1062 char *phys_path; 1063 struct mdi_phys_path *phys_path_next; 1064 } mdi_phys_path_t; 1065 1066 /* 1067 * Lookup tokens are used to cache the result of the vhci cache client lookup 1068 * operations (to reduce the number of real lookup operations). 1069 */ 1070 typedef struct mdi_vhcache_lookup_token { 1071 mdi_vhcache_client_t *lt_cct; /* vhcache client */ 1072 int64_t lt_cct_lookup_time; /* last lookup time */ 1073 } mdi_vhcache_lookup_token_t; 1074 1075 /* asynchronous configuration of client paths */ 1076 typedef struct mdi_async_client_config { 1077 char *acc_ct_name; /* client name */ 1078 char *acc_ct_addr; /* client address */ 1079 mdi_phys_path_t *acc_phclient_path_list_head; /* path head */ 1080 mdi_vhcache_lookup_token_t acc_token; /* lookup token */ 1081 struct mdi_async_client_config *acc_next; /* next in vhci acc list */ 1082 } mdi_async_client_config_t; 1083 1084 /* 1085 * vHCI driver instance registration/unregistration 1086 * 1087 * mdi_vhci_register() is called by a vHCI driver to register itself as the 1088 * manager of devices from a particular 'class'. This should be called from 1089 * attach(9e). 1090 * 1091 * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI 1092 * instance from the framework. 1093 */ 1094 int mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int); 1095 int mdi_vhci_unregister(dev_info_t *, int); 1096 1097 /* 1098 * Utility functions 1099 */ 1100 int mdi_phci_get_path_count(dev_info_t *); 1101 dev_info_t *mdi_phci_path2devinfo(dev_info_t *, caddr_t); 1102 1103 1104 /* 1105 * Path Selection Functions: 1106 * 1107 * mdi_select_path() is called by a vHCI driver to select to which path an 1108 * I/O request should be routed. The caller passes the 'buf' structure as 1109 * one of the parameters. The mpxio framework uses the buf's contents to 1110 * maintain per path statistics (total I/O size / count pending). If more 1111 * than one online path is available, the framework automatically selects 1112 * a suitable one. If a failover operation is active for this client device 1113 * the call fails, returning MDI_BUSY. 1114 * 1115 * By default this function returns a suitable path in the 'online' state, 1116 * based on the current load balancing policy. Currently we support 1117 * LOAD_BALANCE_NONE (Previously selected online path will continue to be 1118 * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths 1119 * will be selected in a round robin fashion). The load balancing scheme 1120 * can be configured in the vHCI driver's configuration file (driver.conf). 1121 * 1122 * vHCI drivers may override this default behaviour by specifying appropriate 1123 * flags. If start_pip is specified (non NULL), it is used as the routine's 1124 * starting point; it starts walking from there to find the next appropriate 1125 * path. 1126 * 1127 * The following values for 'flags' are currently defined, the third argument 1128 * to mdi_select_path depends on the flags used. 1129 * 1130 * <none>: default, arg is pip 1131 * MDI_SELECT_ONLINE_PATH: select an ONLINE path, arg is pip 1132 * MDI_SELECT_STANDBY_PATH: select a STANDBY path, arg is pip 1133 * MDI_SELECT_USER_DISABLE_PATH: select user disable for failover and 1134 * auto_failback 1135 * MDI_SELECT_PATH_INSTANCE: select a specific path, arg is 1136 * path instance 1137 * 1138 * The selected paths are returned in an mdi_hold_path() state (pi_ref_cnt), 1139 * caller should release the hold by calling mdi_rele_path() at the end of 1140 * operation. 1141 */ 1142 int mdi_select_path(dev_info_t *, struct buf *, int, 1143 void *, mdi_pathinfo_t **); 1144 int mdi_set_lb_policy(dev_info_t *, client_lb_t); 1145 int mdi_set_lb_region_size(dev_info_t *, int); 1146 client_lb_t mdi_get_lb_policy(dev_info_t *); 1147 1148 /* 1149 * flags for mdi_select_path() routine 1150 */ 1151 #define MDI_SELECT_ONLINE_PATH 0x0001 1152 #define MDI_SELECT_STANDBY_PATH 0x0002 1153 #define MDI_SELECT_USER_DISABLE_PATH 0x0004 1154 #define MDI_SELECT_PATH_INSTANCE 0x0008 1155 1156 /* 1157 * MDI client device utility functions 1158 */ 1159 int mdi_client_get_path_count(dev_info_t *); 1160 dev_info_t *mdi_client_path2devinfo(dev_info_t *, caddr_t); 1161 1162 /* 1163 * Failover: 1164 * 1165 * The vHCI driver calls mdi_failover() to initiate a failover operation. 1166 * mdi_failover() calls back into the vHCI driver's vo_failover() 1167 * entry point to perform the actual failover operation. The reason 1168 * for requiring the vHCI driver to initiate failover by calling 1169 * mdi_failover(), instead of directly executing vo_failover() itself, 1170 * is to ensure that the mdi framework can keep track of the client 1171 * state properly. Additionally, mdi_failover() provides as a 1172 * convenience the option of performing the failover operation 1173 * synchronously or asynchronously 1174 * 1175 * Upon successful completion of the failover operation, the paths that were 1176 * previously ONLINE will be in the STANDBY state, and the newly activated 1177 * paths will be in the ONLINE state. 1178 * 1179 * The flags modifier determines whether the activation is done synchronously 1180 */ 1181 int mdi_failover(dev_info_t *, dev_info_t *, int); 1182 1183 /* 1184 * Client device failover mode of operation 1185 */ 1186 #define MDI_FAILOVER_SYNC 1 /* Syncronous Failover */ 1187 #define MDI_FAILOVER_ASYNC 2 /* Asyncronous Failover */ 1188 1189 /* 1190 * mdi_pathinfo node kstat functions. 1191 */ 1192 int mdi_pi_kstat_exists(mdi_pathinfo_t *); 1193 int mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ks_name); 1194 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *); 1195 1196 /* 1197 * mdi_pathinfo node extended state change functions. 1198 */ 1199 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *); 1200 int mdi_pi_get_preferred(mdi_pathinfo_t *); 1201 1202 /* 1203 * mdi_pathinfo node member functions 1204 */ 1205 void *mdi_pi_get_client_private(mdi_pathinfo_t *); 1206 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *); 1207 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t); 1208 void mdi_pi_set_preferred(mdi_pathinfo_t *, int); 1209 1210 /* get/set vhci private data */ 1211 void *mdi_client_get_vhci_private(dev_info_t *); 1212 void mdi_client_set_vhci_private(dev_info_t *, void *); 1213 void *mdi_phci_get_vhci_private(dev_info_t *); 1214 void mdi_phci_set_vhci_private(dev_info_t *, void *); 1215 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *); 1216 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *); 1217 1218 /* 1219 * mdi_pathinfo Property utilities 1220 */ 1221 int mdi_prop_size(mdi_pathinfo_t *, size_t *); 1222 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t); 1223 1224 /* obsolete interface, to be removed */ 1225 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **); 1226 int mdi_get_component_type(dev_info_t *); 1227 1228 #endif /* _KERNEL */ 1229 1230 #ifdef __cplusplus 1231 } 1232 #endif 1233 1234 #endif /* _SYS_MDI_IMPLDEFS_H */ 1235