1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_MDI_IMPLDEFS_H 27 #define _SYS_MDI_IMPLDEFS_H 28 29 30 #include <sys/note.h> 31 #include <sys/types.h> 32 #include <sys/sunmdi.h> 33 #include <sys/modhash.h> 34 #include <sys/callb.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #ifdef _KERNEL 41 42 /* 43 * Multipath Driver Interfaces 44 * 45 * The multipathing framework is provided in two modules. The 'mpxio' misc. 46 * module provides the core multipath framework and the 'scsi_vhci' nexus 47 * driver provides the SCSI-III command set driver functionality for 48 * managing Fibre-Channel storage devices. 49 * 50 * As in any multipathing solution there are three major problems to solve: 51 * 52 * 1) Identification and enumeration of multipath client devices. 53 * 2) Optimal path selection when routing I/O requests. 54 * 3) Observability interfaces to snapshot the multipath configuration, 55 * and infrastructure to provide performance and error statistics. 56 * 57 * The mpxio framework consists of several major components: 58 * 59 * 1) The MDI is the Multiplexed Device Interface; this is the core glue which 60 * holds the following components together. 61 * 2) vHCI (Virtual Host Controller Interconnect) drivers provide multipathing 62 * services for a given bus technology (example: 'scsi_vhci' provides 63 * multipathing support for SCSI-III fibre-channel devices). 64 * 3) pHCI (Physical Host Controller Interconnect) drivers provide transport 65 * services for a given host controller (example: 'fcp' provides transport 66 * for fibre-channel devices). 67 * 4) Client Devices are standard Solaris target (or leaf) drivers 68 * (example: 'ssd' is the standard disk driver for fibre-channel arrays). 69 * 5) Multipath information nodes ('pathinfo' nodes) connect client device 70 * nodes and pHCI device nodes in the device tree. 71 * 72 * With the scsi_vhci, a QLC card, and mpxio enabled, the device tree might 73 * look like this: 74 * 75 * /\ 76 * / ............ 77 * <vHCI>:/ \ 78 * +-----------+ +-----------+ 79 * | scsi_vhci | | pci@1f,0 | 80 * +-----------+ +-----------+ 81 * / \ \ 82 * <Client>: / \ :<Client> \ :parent(pHCI) 83 * +----------+ +-----------+ +-------------+ 84 * | ssd 1 | | ssd 2 | | qlc@0,0 | 85 * +----------+ +-----------+ +-------------+ 86 * | | / \ 87 * | | <pHCI>: / \ :<pHCI> 88 * | | +-------------+ +-------------+ 89 * | | | pHCI 1 (fp) | | pHCI 2 (fp) | 90 * | | +-------------+ +-------------+ 91 * | | / | / | 92 * | | +------+ | +------+ | 93 * | | | ssd 3| | | ssd | | 94 * | | |!mpxio| | | (OBP)| | 95 * | | +------+ | +------+ | 96 * | | | | 97 * | | <pathinfo>: | | 98 * | | +-------+ +--------+ 99 * | +-------------->| path |-------->| path | 100 * | | info | | info | 101 * | | node 1| | node 3 | 102 * | +-------+ +--------+ 103 * | | | 104 * | | +~~~~~~~~+ 105 * | +-------+ :+--------+ 106 * +--------------------------->| path |-------->| path | 107 * | info | :| info | 108 * | node 2| +| node 4 | 109 * +-------+ +--------+ 110 * 111 * The multipath information nodes (mdi_pathinfo nodes) establish the 112 * relationship between the pseudo client driver instance nodes (children 113 * of the vHCI) and the physical host controller interconnect (pHCI 114 * drivers) forming a matrix structure. 115 * 116 * The mpxio module implements locking at multiple granularity levels to 117 * support the needs of various consumers. The multipath matrix can be 118 * column locked, or row locked depending on the consumer. The intention 119 * is to balance simplicity and performance. 120 * 121 * Locking: 122 * 123 * The devinfo locking still applies: 124 * 125 * 1) An ndi_devi_enter of a parent protects linkage/state of children. 126 * 2) state >= DS_INITIALIZED adds devi_ref of parent 127 * 3) devi_ref at state >= DS_ATTACHED prevents detach(9E). 128 * 129 * The ordering of 1) is (vHCI, pHCI). For a DEBUG kernel this ordering 130 * is asserted by the ndi_devi_enter() implementation. There is also an 131 * ndi_devi_enter(Client), which is atypical since the client is a leaf. 132 * This is done to synchronize pathinfo nodes during devinfo snapshot (see 133 * di_register_pip) by pretending that the pathinfo nodes are children 134 * of the client. 135 * 136 * In addition to devinfo locking the current implementation utilizes 137 * the following locks: 138 * 139 * mdi_mutex: protects the global list of vHCIs. 140 * 141 * vh_phci_mutex: per-vHCI (mutex) lock: protects list of pHCIs registered 142 * with vHCI. 143 * 144 * vh_client_mutex: per-vHCI (mutex) lock: protects list/hash of Clients 145 * associated with vHCI. 146 * 147 * ph_mutex: per-pHCI (mutex) lock: protects the column (pHCI-mdi_pathinfo 148 * node list) and per-pHCI structure fields. mdi_pathinfo node creation, 149 * deletion and child mdi_pathinfo node state changes are serialized on per 150 * pHCI basis (Protection against DR). 151 * 152 * ct_mutex: per-client (mutex) lock: protects the row (client-mdi_pathinfo 153 * node list) and per-client structure fields. The client-mdi_pathinfo node 154 * list is typically walked to select an optimal path when routing I/O 155 * requests. 156 * 157 * pi_mutex: per-mdi_pathinfo (mutex) lock: protects the mdi_pathinfo node 158 * structure fields. 159 * 160 * Note that per-Client structure and per-pHCI fields are freely readable when 161 * corresponding mdi_pathinfo locks are held, since holding an mdi_pathinfo 162 * node guarantees that its corresponding client and pHCI devices will not be 163 * freed. 164 */ 165 166 /* 167 * MDI Client global unique identifier property name string definition 168 */ 169 extern const char *mdi_client_guid_prop; 170 #define MDI_CLIENT_GUID_PROP (char *)mdi_client_guid_prop 171 172 /* 173 * MDI Client load balancing policy definitions 174 * 175 * Load balancing policies are determined on a per-vHCI basis and are 176 * configurable via the vHCI's driver.conf file. 177 */ 178 typedef enum { 179 LOAD_BALANCE_NONE, /* Alternate pathing */ 180 LOAD_BALANCE_RR, /* Round Robin */ 181 LOAD_BALANCE_LBA /* Logical Block Addressing */ 182 } client_lb_t; 183 184 typedef struct { 185 int region_size; 186 }client_lb_args_t; 187 188 /* 189 * MDI client load balancing property name/value string definitions 190 */ 191 extern const char *mdi_load_balance; 192 extern const char *mdi_load_balance_none; 193 extern const char *mdi_load_balance_ap; 194 extern const char *mdi_load_balance_rr; 195 extern const char *mdi_load_balance_lba; 196 197 #define LOAD_BALANCE_PROP (char *)mdi_load_balance 198 #define LOAD_BALANCE_PROP_NONE (char *)mdi_load_balance_none 199 #define LOAD_BALANCE_PROP_AP (char *)mdi_load_balance_ap 200 #define LOAD_BALANCE_PROP_RR (char *)mdi_load_balance_rr 201 #define LOAD_BALANCE_PROP_LBA (char *)mdi_load_balance_lba 202 203 /* default for region size */ 204 #define LOAD_BALANCE_DEFAULT_REGION_SIZE 18 205 206 /* 207 * vHCI drivers: 208 * 209 * vHCI drivers are pseudo nexus drivers which implement multipath services 210 * for a specific command set or bus architecture ('class'). There is a 211 * single instance of the vHCI driver for each command set which supports 212 * multipath devices. 213 * 214 * Each vHCI driver registers the following callbacks from attach(9e). 215 */ 216 #define MDI_VHCI_OPS_REV_1 1 217 /* 218 * Change MDI_VHCI_OPS_REV_NAME as per MDI_VHCI_OPS_REV 219 */ 220 #define MDI_VHCI_OPS_REV MDI_VHCI_OPS_REV_1 221 #define MDI_VHCI_OPS_REV_NAME "1" 222 223 typedef struct mdi_vhci_ops { 224 /* revision management */ 225 int vo_revision; 226 227 /* mdi_pathinfo node init callback */ 228 int (*vo_pi_init)(dev_info_t *vdip, mdi_pathinfo_t *pip, int flags); 229 230 /* mdi_pathinfo node uninit callback */ 231 int (*vo_pi_uninit)(dev_info_t *vdip, mdi_pathinfo_t *pip, 232 int flags); 233 234 /* mdi_pathinfo node state change callback */ 235 int (*vo_pi_state_change)(dev_info_t *vdip, mdi_pathinfo_t *pip, 236 mdi_pathinfo_state_t state, uint32_t, int flags); 237 238 /* Client path failover callback */ 239 int (*vo_failover)(dev_info_t *vdip, dev_info_t *cdip, int flags); 240 241 /* Client attached callback */ 242 void (*vo_client_attached)(dev_info_t *cdip); 243 } mdi_vhci_ops_t; 244 245 /* 246 * An mdi_vhci structure is created and bound to the devinfo node of every 247 * registered vHCI class driver; this happens when a vHCI registers itself from 248 * attach(9e). This structure is unbound and freed when the vHCI unregisters 249 * at detach(9e) time; 250 * 251 * Each vHCI driver is associated with a vHCI class name; this is the handle 252 * used to register and unregister pHCI drivers for a given transport. 253 * 254 * Locking: Different parts of this structure are guarded by different 255 * locks: global threading of multiple vHCIs and initialization is protected 256 * by mdi_mutex, the list of pHCIs associated with a vHCI is protected by 257 * vh_phci_mutex, and Clients are protected by vh_client_mutex. 258 * 259 * XXX Depending on the context, some of the fields can be freely read without 260 * holding any locks (ex. holding vh_client_mutex lock also guarantees that 261 * the vHCI (parent) cannot be unexpectedly freed). 262 */ 263 typedef struct mdi_vhci { 264 /* protected by mdi_mutex... */ 265 struct mdi_vhci *vh_next; /* next vHCI link */ 266 struct mdi_vhci *vh_prev; /* prev vHCI link */ 267 char *vh_class; /* vHCI class name */ 268 dev_info_t *vh_dip; /* vHCI devi handle */ 269 int vh_refcnt; /* vHCI reference count */ 270 struct mdi_vhci_config *vh_config; /* vHCI config */ 271 client_lb_t vh_lb; /* vHCI load-balancing */ 272 struct mdi_vhci_ops *vh_ops; /* vHCI callback vectors */ 273 274 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 275 kmutex_t vh_phci_mutex; /* pHCI mutex */ 276 int vh_phci_count; /* pHCI device count */ 277 struct mdi_phci *vh_phci_head; /* pHCI list head */ 278 struct mdi_phci *vh_phci_tail; /* pHCI list tail */ 279 280 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 281 kmutex_t vh_client_mutex; /* Client mutex */ 282 int vh_client_count; /* Client count */ 283 struct client_hash *vh_client_table; /* Client hash */ 284 } mdi_vhci_t; 285 286 /* 287 * per-vHCI lock macros 288 */ 289 #define MDI_VHCI_PHCI_LOCK(vh) mutex_enter(&(vh)->vh_phci_mutex) 290 #define MDI_VHCI_PHCI_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_phci_mutex) 291 #define MDI_VHCI_PHCI_UNLOCK(vh) mutex_exit(&(vh)->vh_phci_mutex) 292 #ifdef DEBUG 293 #define MDI_VHCI_PCHI_LOCKED(vh) MUTEX_HELD(&(vh)->vh_phci_mutex) 294 #endif /* DEBUG */ 295 #define MDI_VHCI_CLIENT_LOCK(vh) mutex_enter(&(vh)->vh_client_mutex) 296 #define MDI_VHCI_CLIENT_TRYLOCK(vh) mutex_tryenter(&(vh)->vh_client_mutex) 297 #define MDI_VHCI_CLIENT_UNLOCK(vh) mutex_exit(&(vh)->vh_client_mutex) 298 #ifdef DEBUG 299 #define MDI_VHCI_CLIENT_LOCKED(vh) MUTEX_HELD(&(vh)->vh_client_mutex) 300 #endif /* DEBUG */ 301 302 303 /* 304 * GUID Hash definitions 305 * 306 * Since all the mpxio managed devices for a given class are enumerated under 307 * the single vHCI instance for that class, sequentially walking through the 308 * client device link to find a client would be prohibitively slow. 309 */ 310 311 #define CLIENT_HASH_TABLE_SIZE (32) /* GUID hash */ 312 313 /* 314 * Client hash table structure 315 */ 316 struct client_hash { 317 struct mdi_client *ct_hash_head; /* Client hash head */ 318 int ct_hash_count; /* Client hash count */ 319 }; 320 321 322 /* 323 * pHCI Drivers: 324 * 325 * Physical HBA drivers provide transport services for mpxio-managed devices. 326 * As each pHCI instance is attached, it must register itself with the mpxio 327 * framework using mdi_phci_register(). When the pHCI is detached it must 328 * similarly call mdi_phci_unregister(). 329 * 330 * The framework maintains a list of registered pHCI device instances for each 331 * vHCI. This list involves (vh_phci_count, vh_phci_head, vh_phci_tail) and 332 * (ph_next, ph_prev, ph_vhci) and is protected by vh_phci_mutex. 333 * 334 * Locking order: 335 * 336 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_phci::ph_mutex)) XXX 337 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex devinfo_tree_lock)) XXX 338 */ 339 typedef struct mdi_phci { 340 /* protected by MDI_VHCI_PHCI_LOCK vh_phci_mutex... */ 341 struct mdi_phci *ph_next; /* next pHCI link */ 342 struct mdi_phci *ph_prev; /* prev pHCI link */ 343 dev_info_t *ph_dip; /* pHCI devi handle */ 344 struct mdi_vhci *ph_vhci; /* pHCI back ref. to vHCI */ 345 346 /* protected by MDI_PHCI_LOCK ph_mutex... */ 347 kmutex_t ph_mutex; /* per-pHCI mutex */ 348 int ph_path_count; /* pi count */ 349 mdi_pathinfo_t *ph_path_head; /* pi list head */ 350 mdi_pathinfo_t *ph_path_tail; /* pi list tail */ 351 int ph_flags; /* pHCI operation flags */ 352 int ph_unstable; /* Paths in transient state */ 353 kcondvar_t ph_unstable_cv; /* Paths in transient state */ 354 355 /* protected by mdi_phci_[gs]et_vhci_private caller... */ 356 void *ph_vprivate; /* vHCI driver private */ 357 } mdi_phci_t; 358 359 /* 360 * A pHCI device is 'unstable' while one or more paths are in a transitional 361 * state. Hotplugging is prevented during this state. 362 */ 363 #define MDI_PHCI_UNSTABLE(ph) (ph)->ph_unstable++; 364 #define MDI_PHCI_STABLE(ph) { \ 365 (ph)->ph_unstable--; \ 366 if ((ph)->ph_unstable == 0) { \ 367 cv_broadcast(&(ph)->ph_unstable_cv); \ 368 } \ 369 } 370 371 /* 372 * per-pHCI lock macros 373 */ 374 #define MDI_PHCI_LOCK(ph) mutex_enter(&(ph)->ph_mutex) 375 #define MDI_PHCI_TRYLOCK(ph) mutex_tryenter(&(ph)->ph_mutex) 376 #define MDI_PHCI_UNLOCK(ph) mutex_exit(&(ph)->ph_mutex) 377 #ifdef DEBUG 378 #define MDI_PHCI_LOCKED(vh) MUTEX_HELD(&(ph)->ph_mutex) 379 #endif /* DEBUG */ 380 381 /* 382 * pHCI state definitions and macros to track the pHCI driver instance state 383 */ 384 #define MDI_PHCI_FLAGS_OFFLINE 0x1 /* pHCI is offline */ 385 #define MDI_PHCI_FLAGS_SUSPEND 0x2 /* pHCI is suspended */ 386 #define MDI_PHCI_FLAGS_POWER_DOWN 0x4 /* pHCI is power down */ 387 #define MDI_PHCI_FLAGS_DETACH 0x8 /* pHCI is detached */ 388 #define MDI_PHCI_FLAGS_USER_DISABLE 0x10 /* pHCI is disabled,user */ 389 #define MDI_PHCI_FLAGS_D_DISABLE 0x20 /* pHCI is disabled,driver */ 390 #define MDI_PHCI_FLAGS_D_DISABLE_TRANS 0x40 /* pHCI is disabled,transient */ 391 #define MDI_PHCI_FLAGS_POWER_TRANSITION 0x80 /* pHCI is power transition */ 392 393 #define MDI_PHCI_DISABLE_MASK \ 394 (MDI_PHCI_FLAGS_USER_DISABLE | MDI_PHCI_FLAGS_D_DISABLE | \ 395 MDI_PHCI_FLAGS_D_DISABLE_TRANS) 396 397 #define MDI_PHCI_IS_READY(ph) \ 398 (((ph)->ph_flags & MDI_PHCI_DISABLE_MASK) == 0) 399 400 #define MDI_PHCI_SET_OFFLINE(ph) {\ 401 ASSERT(MDI_PHCI_LOCKED(ph)); \ 402 (ph)->ph_flags |= MDI_PHCI_FLAGS_OFFLINE; } 403 #define MDI_PHCI_SET_ONLINE(ph) {\ 404 ASSERT(MDI_PHCI_LOCKED(ph)); \ 405 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_OFFLINE; } 406 #define MDI_PHCI_IS_OFFLINE(ph) \ 407 ((ph)->ph_flags & MDI_PHCI_FLAGS_OFFLINE) 408 409 #define MDI_PHCI_SET_SUSPEND(ph) {\ 410 ASSERT(MDI_PHCI_LOCKED(ph)); \ 411 (ph)->ph_flags |= MDI_PHCI_FLAGS_SUSPEND; } 412 #define MDI_PHCI_SET_RESUME(ph) {\ 413 ASSERT(MDI_PHCI_LOCKED(ph)); \ 414 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_SUSPEND; } 415 #define MDI_PHCI_IS_SUSPENDED(ph) \ 416 ((ph)->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 417 418 #define MDI_PHCI_SET_DETACH(ph) {\ 419 ASSERT(MDI_PHCI_LOCKED(ph)); \ 420 (ph)->ph_flags |= MDI_PHCI_FLAGS_DETACH; } 421 #define MDI_PHCI_SET_ATTACH(ph) {\ 422 ASSERT(MDI_PHCI_LOCKED(ph)); \ 423 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_DETACH; } 424 425 #define MDI_PHCI_SET_POWER_DOWN(ph) {\ 426 ASSERT(MDI_PHCI_LOCKED(ph)); \ 427 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_DOWN; } 428 #define MDI_PHCI_SET_POWER_UP(ph) {\ 429 ASSERT(MDI_PHCI_LOCKED(ph)); \ 430 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_DOWN; } 431 #define MDI_PHCI_IS_POWERED_DOWN(ph) \ 432 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_DOWN) 433 434 #define MDI_PHCI_SET_USER_ENABLE(ph) {\ 435 ASSERT(MDI_PHCI_LOCKED(ph)); \ 436 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_USER_DISABLE; } 437 #define MDI_PHCI_SET_USER_DISABLE(ph) {\ 438 ASSERT(MDI_PHCI_LOCKED(ph)); \ 439 (ph)->ph_flags |= MDI_PHCI_FLAGS_USER_DISABLE; } 440 #define MDI_PHCI_IS_USER_DISABLED(ph) \ 441 ((ph)->ph_flags & MDI_PHCI_FLAGS_USER_DISABLE) 442 443 #define MDI_PHCI_SET_DRV_ENABLE(ph) {\ 444 ASSERT(MDI_PHCI_LOCKED(ph)); \ 445 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE; } 446 #define MDI_PHCI_SET_DRV_DISABLE(ph) {\ 447 ASSERT(MDI_PHCI_LOCKED(ph)); \ 448 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE; } 449 #define MDI_PHCI_IS_DRV_DISABLED(ph) \ 450 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE) 451 452 #define MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph) {\ 453 ASSERT(MDI_PHCI_LOCKED(ph)); \ 454 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 455 #define MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph) {\ 456 ASSERT(MDI_PHCI_LOCKED(ph)); \ 457 (ph)->ph_flags |= MDI_PHCI_FLAGS_D_DISABLE_TRANS; } 458 #define MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph) \ 459 ((ph)->ph_flags & MDI_PHCI_FLAGS_D_DISABLE_TRANS) 460 461 #define MDI_PHCI_SET_POWER_TRANSITION(ph) {\ 462 ASSERT(MDI_PHCI_LOCKED(ph)); \ 463 (ph)->ph_flags |= MDI_PHCI_FLAGS_POWER_TRANSITION; } 464 #define MDI_PHCI_CLEAR_POWER_TRANSITION(ph) {\ 465 ASSERT(MDI_PHCI_LOCKED(ph)); \ 466 (ph)->ph_flags &= ~MDI_PHCI_FLAGS_POWER_TRANSITION; } 467 #define MDI_PHCI_IS_POWER_TRANSITION(ph) \ 468 ((ph)->ph_flags & MDI_PHCI_FLAGS_POWER_TRANSITION) 469 470 /* 471 * mpxio Managed Clients: 472 * 473 * This framework creates a struct mdi_client for every client device created 474 * by the framework as a result of self-enumeration of target devices by the 475 * registered pHCI devices. This structure is bound to client device dev_info 476 * node at the time of client device allocation (ndi_devi_alloc(9e)). This 477 * structure is unbound from the dev_info node when mpxio framework removes a 478 * client device node from the system. 479 * 480 * This structure is created when a first path is enumerated and removed when 481 * last path is de-enumerated from the system. 482 * 483 * Multipath client devices are instantiated as children of corresponding vHCI 484 * driver instance. Each client device is uniquely identified by a GUID 485 * provided by target device itself. The parent vHCI device also maintains a 486 * hashed list of client devices, protected by vh_client_mutex. 487 * 488 * Typically pHCI devices self-enumerate their child devices using taskq, 489 * resulting in multiple paths to the same client device to be enumerated by 490 * competing threads. 491 * 492 * Currently this framework supports two kinds of load-balancing policy 493 * configurable through the vHCI driver configuration files. 494 * 495 * NONE - Legacy AP mode 496 * Round Robin - Balance the pHCI load in a Round Robin fashion. 497 * 498 * This framework identifies the client device in three distinct states: 499 * 500 * OPTIMAL - Client device has atleast one redundant path. 501 * DEGRADED - No redundant paths (critical). Failure in the current active 502 * path would result in data access failures. 503 * FAILED - No paths are available to access this device. 504 * 505 * Locking order: 506 * 507 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_client::ct_mutex)) XXX 508 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex devinfo_tree_lock)) XXX 509 */ 510 typedef struct mdi_client { 511 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 512 struct mdi_client *ct_hnext; /* next client */ 513 struct mdi_client *ct_hprev; /* prev client */ 514 dev_info_t *ct_dip; /* client devi handle */ 515 struct mdi_vhci *ct_vhci; /* vHCI back ref */ 516 char *ct_drvname; /* client driver name */ 517 char *ct_guid; /* client guid */ 518 client_lb_t ct_lb; /* load balancing scheme */ 519 client_lb_args_t *ct_lb_args; /* load balancing args */ 520 521 522 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 523 kmutex_t ct_mutex; /* per-client mutex */ 524 int ct_path_count; /* multi path count */ 525 mdi_pathinfo_t *ct_path_head; /* multi path list head */ 526 mdi_pathinfo_t *ct_path_tail; /* multi path list tail */ 527 mdi_pathinfo_t *ct_path_last; /* last path used for i/o */ 528 int ct_state; /* state information */ 529 int ct_flags; /* Driver op. flags */ 530 int ct_failover_flags; /* Failover args */ 531 int ct_failover_status; /* last fo status */ 532 kcondvar_t ct_failover_cv; /* Failover status cv */ 533 int ct_unstable; /* Paths in transient state */ 534 kcondvar_t ct_unstable_cv; /* Paths in transient state */ 535 536 int ct_power_cnt; /* Hold count on parent power */ 537 kcondvar_t ct_powerchange_cv; 538 /* Paths in power transient state */ 539 short ct_powercnt_config; 540 /* held in pre/post config */ 541 short ct_powercnt_unconfig; 542 /* held in pre/post unconfig */ 543 int ct_powercnt_reset; 544 /* ct_power_cnt was resetted */ 545 546 void *ct_cprivate; /* client driver private */ 547 void *ct_vprivate; /* vHCI driver private */ 548 } mdi_client_t; 549 550 /* 551 * per-Client device locking definitions 552 */ 553 #define MDI_CLIENT_LOCK(ct) mutex_enter(&(ct)->ct_mutex) 554 #define MDI_CLIENT_TRYLOCK(ct) mutex_tryenter(&(ct)->ct_mutex) 555 #define MDI_CLIENT_UNLOCK(ct) mutex_exit(&(ct)->ct_mutex) 556 #ifdef DEBUG 557 #define MDI_CLIENT_LOCKED(ct) MUTEX_HELD(&(ct)->ct_mutex) 558 #endif /* DEBUG */ 559 560 /* 561 * A Client device is in unstable while one or more paths are in transitional 562 * state. We do not allow failover to take place while paths are in transient 563 * state. Similarly we do not allow state transition while client device 564 * failover is in progress. 565 */ 566 #define MDI_CLIENT_UNSTABLE(ct) (ct)->ct_unstable++; 567 #define MDI_CLIENT_STABLE(ct) { \ 568 (ct)->ct_unstable--; \ 569 if ((ct)->ct_unstable == 0) { \ 570 cv_broadcast(&(ct)->ct_unstable_cv); \ 571 } \ 572 } 573 574 /* 575 * Client driver instance state definitions: 576 */ 577 #define MDI_CLIENT_FLAGS_OFFLINE 0x00000001 578 #define MDI_CLIENT_FLAGS_SUSPEND 0x00000002 579 #define MDI_CLIENT_FLAGS_POWER_DOWN 0x00000004 580 #define MDI_CLIENT_FLAGS_DETACH 0x00000008 581 #define MDI_CLIENT_FLAGS_FAILOVER 0x00000010 582 #define MDI_CLIENT_FLAGS_REPORT_DEV 0x00000020 583 #define MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS 0x00000040 584 #define MDI_CLIENT_FLAGS_ASYNC_FREE 0x00000080 585 #define MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED 0x00000100 586 #define MDI_CLIENT_FLAGS_POWER_TRANSITION 0x00000200 587 588 #define MDI_CLIENT_SET_OFFLINE(ct) {\ 589 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 590 (ct)->ct_flags |= MDI_CLIENT_FLAGS_OFFLINE; } 591 #define MDI_CLIENT_SET_ONLINE(ct) {\ 592 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 593 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_OFFLINE; } 594 #define MDI_CLIENT_IS_OFFLINE(ct) \ 595 ((ct)->ct_flags & MDI_CLIENT_FLAGS_OFFLINE) 596 597 #define MDI_CLIENT_SET_SUSPEND(ct) {\ 598 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 599 (ct)->ct_flags |= MDI_CLIENT_FLAGS_SUSPEND; } 600 #define MDI_CLIENT_SET_RESUME(ct) {\ 601 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 602 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_SUSPEND; } 603 #define MDI_CLIENT_IS_SUSPENDED(ct) \ 604 ((ct)->ct_flags & MDI_CLIENT_FLAGS_SUSPEND) 605 606 #define MDI_CLIENT_SET_POWER_DOWN(ct) {\ 607 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 608 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_DOWN; } 609 #define MDI_CLIENT_SET_POWER_UP(ct) {\ 610 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 611 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_DOWN; } 612 #define MDI_CLIENT_IS_POWERED_DOWN(ct) \ 613 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_DOWN) 614 615 #define MDI_CLIENT_SET_POWER_TRANSITION(ct) {\ 616 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 617 (ct)->ct_flags |= MDI_CLIENT_FLAGS_POWER_TRANSITION; } 618 #define MDI_CLIENT_CLEAR_POWER_TRANSITION(ct) {\ 619 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 620 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_POWER_TRANSITION; } 621 #define MDI_CLIENT_IS_POWER_TRANSITION(ct) \ 622 ((ct)->ct_flags & MDI_CLIENT_FLAGS_POWER_TRANSITION) 623 624 #define MDI_CLIENT_SET_DETACH(ct) {\ 625 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 626 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DETACH; } 627 #define MDI_CLIENT_SET_ATTACH(ct) {\ 628 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 629 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_DETACH; } 630 #define MDI_CLIENT_IS_DETACHED(ct) \ 631 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DETACH) 632 633 #define MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct) {\ 634 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 635 (ct)->ct_flags |= MDI_CLIENT_FLAGS_FAILOVER; } 636 #define MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct) {\ 637 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 638 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_FAILOVER; } 639 #define MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct) \ 640 ((ct)->ct_flags & MDI_CLIENT_FLAGS_FAILOVER) 641 642 #define MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct) {\ 643 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 644 (ct)->ct_flags |= MDI_CLIENT_FLAGS_REPORT_DEV; } 645 #define MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct) {\ 646 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 647 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_REPORT_DEV; } 648 #define MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) \ 649 ((ct)->ct_flags & MDI_CLIENT_FLAGS_REPORT_DEV) 650 651 #define MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct) {\ 652 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 653 (ct)->ct_flags |= MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 654 #define MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct) {\ 655 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 656 (ct)->ct_flags &= ~MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS; } 657 #define MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct) \ 658 ((ct)->ct_flags & MDI_CLIENT_FLAGS_PATH_FREE_IN_PROGRESS) 659 660 #define MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct) {\ 661 ASSERT(MDI_CLIENT_LOCKED(ct)); \ 662 (ct)->ct_flags |= MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED; } 663 #define MDI_CLIENT_IS_DEV_NOT_SUPPORTED(ct) \ 664 ((ct)->ct_flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED) 665 666 /* 667 * Client operating states. 668 */ 669 #define MDI_CLIENT_STATE_OPTIMAL 1 670 #define MDI_CLIENT_STATE_DEGRADED 2 671 #define MDI_CLIENT_STATE_FAILED 3 672 673 #define MDI_CLIENT_STATE(ct) ((ct)->ct_state) 674 #define MDI_CLIENT_SET_STATE(ct, state) ((ct)->ct_state = state) 675 676 #define MDI_CLIENT_IS_FAILED(ct) \ 677 ((ct)->ct_state == MDI_CLIENT_STATE_FAILED) 678 679 /* 680 * mdi_pathinfo nodes: 681 * 682 * From this framework's perspective, a 'path' is a tuple consisting of a 683 * client or end device, a host controller which provides device 684 * identification and transport services (pHCI), and bus specific unit 685 * addressing information. A path may be decorated with properties which 686 * describe the capabilities of the path; such properties are analogous to 687 * device node and minor node properties. 688 * 689 * The framework maintains link list of mdi_pathinfo nodes created by every 690 * pHCI driver instance via the pi_phci_link linkage; this is used (for example) 691 * to make sure that all relevant pathinfo nodes are freed before the pHCI 692 * is unregistered. 693 * 694 * Locking order: 695 * 696 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) XXX 697 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) XXX 698 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) XXX 699 * _NOTE(LOCK_ORDER(devinfo_tree_lock mdi_pathinfo::pi_mutex)) XXX 700 * 701 * mdi_pathinfo node structure definition 702 */ 703 struct mdi_pathinfo { 704 /* protected by MDI_PHCI_LOCK ph_mutex... */ 705 struct mdi_pathinfo *pi_phci_link; /* next path in phci list */ 706 mdi_phci_t *pi_phci; /* pHCI dev_info node */ 707 708 /* protected by MDI_CLIENT_LOCK ct_mutex... */ 709 struct mdi_pathinfo *pi_client_link; /* next path in client list */ 710 mdi_client_t *pi_client; /* client */ 711 712 /* protected by MDI_VHCI_CLIENT_LOCK vh_client_mutex... */ 713 char *pi_addr; /* path unit address */ 714 int pi_path_instance; /* path instance */ 715 716 /* protected by MDI_PI_LOCK pi_mutex... */ 717 kmutex_t pi_mutex; /* per path mutex */ 718 mdi_pathinfo_state_t pi_state; /* path state */ 719 mdi_pathinfo_state_t pi_old_state; /* path state */ 720 kcondvar_t pi_state_cv; /* path state condvar */ 721 nvlist_t *pi_prop; /* Properties */ 722 void *pi_cprivate; /* client private info */ 723 void *pi_pprivate; /* phci private info */ 724 int pi_ref_cnt; /* pi reference count */ 725 kcondvar_t pi_ref_cv; /* condition variable */ 726 struct mdi_pi_kstats *pi_kstats; /* aggregate kstats */ 727 int pi_pm_held; /* phci's kidsup incremented */ 728 int pi_preferred; /* Preferred path */ 729 void *pi_vprivate; /* vhci private info */ 730 }; 731 732 /* 733 * pathinfo statistics: 734 * 735 * The mpxio architecture allows for multiple pathinfo nodes for each 736 * client-pHCI combination. For statistics purposes, these statistics are 737 * aggregated into a single client-pHCI set of kstats. 738 */ 739 struct mdi_pi_kstats { 740 int pi_kstat_ref; /* # paths aggregated, also a ref cnt */ 741 kstat_t *pi_kstat_iostats; /* mdi:iopath statistic set */ 742 kstat_t *pi_kstat_errstats; /* error statistics */ 743 }; 744 745 /* 746 * pathinfo error kstat 747 */ 748 struct pi_errs { 749 struct kstat_named pi_softerrs; /* "Soft" Error */ 750 struct kstat_named pi_harderrs; /* "Hard" Error */ 751 struct kstat_named pi_transerrs; /* Transport Errors */ 752 struct kstat_named pi_icnt_busy; /* Interconnect Busy */ 753 struct kstat_named pi_icnt_errors; /* Interconnect Errors */ 754 struct kstat_named pi_phci_rsrc; /* pHCI No Resources */ 755 struct kstat_named pi_phci_localerr; /* pHCI Local Errors */ 756 struct kstat_named pi_phci_invstate; /* pHCI Invalid State */ 757 struct kstat_named pi_failedfrom; /* Failover: Failed From */ 758 struct kstat_named pi_failedto; /* Failover: Failed To */ 759 }; 760 761 /* 762 * increment an error counter 763 */ 764 #define MDI_PI_ERRSTAT(pip, x) { \ 765 if (MDI_PI((pip))->pi_kstats != NULL) { \ 766 struct pi_errs *pep; \ 767 pep = MDI_PI(pip)->pi_kstats->pi_kstat_errstats->ks_data; \ 768 pep->x.value.ui32++; \ 769 } \ 770 } 771 772 /* 773 * error codes which can be passed to MDI_PI_ERRSTAT 774 */ 775 #define MDI_PI_SOFTERR pi_softerrs 776 #define MDI_PI_HARDERR pi_harderrs 777 #define MDI_PI_TRANSERR pi_transerrs 778 #define MDI_PI_ICNTBUSY pi_icnt_busy 779 #define MDI_PI_ICNTERR pi_icnt_errors 780 #define MDI_PI_PHCIRSRC pi_phci_rsrc 781 #define MDI_PI_PHCILOCL pi_phci_localerr 782 #define MDI_PI_PHCIINVS pi_phci_invstate 783 #define MDI_PI_FAILFROM pi_failedfrom 784 #define MDI_PI_FAILTO pi_failedto 785 786 #define MDI_PI(type) ((struct mdi_pathinfo *)(type)) 787 788 #define MDI_PI_LOCK(pip) mutex_enter(&MDI_PI(pip)->pi_mutex) 789 #define MDI_PI_TRYLOCK(pip) mutex_tryenter(&MDI_PI(pip)->pi_mutex) 790 #define MDI_PI_UNLOCK(pip) mutex_exit(&MDI_PI(pip)->pi_mutex) 791 #ifdef DEBUG 792 #define MDI_PI_LOCKED(pip) MUTEX_HELD(&MDI_PI(pip)->pi_mutex) 793 #endif /* DEBUG */ 794 795 #define MDI_PI_HOLD(pip) (++MDI_PI(pip)->pi_ref_cnt) 796 #define MDI_PI_RELE(pip) (--MDI_PI(pip)->pi_ref_cnt) 797 798 #define MDI_EXT_STATE_CHANGE 0x10000000 799 800 801 #define MDI_DISABLE_OP 0x1 802 #define MDI_ENABLE_OP 0x2 803 #define MDI_BEFORE_STATE_CHANGE 0x4 804 #define MDI_AFTER_STATE_CHANGE 0x8 805 #define MDI_SYNC_FLAG 0x10 806 807 #define MDI_PI_STATE(pip) \ 808 (MDI_PI((pip))->pi_state & MDI_PATHINFO_STATE_MASK) 809 #define MDI_PI_OLD_STATE(pip) \ 810 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_STATE_MASK) 811 812 #define MDI_PI_EXT_STATE(pip) \ 813 (MDI_PI((pip))->pi_state & MDI_PATHINFO_EXT_STATE_MASK) 814 #define MDI_PI_OLD_EXT_STATE(pip) \ 815 (MDI_PI((pip))->pi_old_state & MDI_PATHINFO_EXT_STATE_MASK) 816 817 #define MDI_PI_SET_TRANSIENT(pip) {\ 818 ASSERT(MDI_PI_LOCKED(pip)); \ 819 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_TRANSIENT; } 820 #define MDI_PI_CLEAR_TRANSIENT(pip) {\ 821 ASSERT(MDI_PI_LOCKED(pip)); \ 822 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_TRANSIENT; } 823 #define MDI_PI_IS_TRANSIENT(pip) \ 824 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_TRANSIENT) 825 826 #define MDI_PI_SET_USER_DISABLE(pip) {\ 827 ASSERT(MDI_PI_LOCKED(pip)); \ 828 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_USER_DISABLE; } 829 #define MDI_PI_SET_DRV_DISABLE(pip) {\ 830 ASSERT(MDI_PI_LOCKED(pip)); \ 831 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE; } 832 #define MDI_PI_SET_DRV_DISABLE_TRANS(pip) {\ 833 ASSERT(MDI_PI_LOCKED(pip)); \ 834 MDI_PI(pip)->pi_state |= MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 835 836 #define MDI_PI_SET_USER_ENABLE(pip) {\ 837 ASSERT(MDI_PI_LOCKED(pip)); \ 838 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_USER_DISABLE; } 839 #define MDI_PI_SET_DRV_ENABLE(pip) {\ 840 ASSERT(MDI_PI_LOCKED(pip)); \ 841 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE; } 842 #define MDI_PI_SET_DRV_ENABLE_TRANS(pip) {\ 843 ASSERT(MDI_PI_LOCKED(pip)); \ 844 MDI_PI(pip)->pi_state &= ~MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT; } 845 846 #define MDI_PI_IS_USER_DISABLE(pip) \ 847 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_USER_DISABLE) 848 #define MDI_PI_IS_DRV_DISABLE(pip) \ 849 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE) 850 #define MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) \ 851 (MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_DRV_DISABLE_TRANSIENT) 852 853 #define MDI_PI_IS_DISABLE(pip) \ 854 (MDI_PI_IS_USER_DISABLE(pip) || \ 855 MDI_PI_IS_DRV_DISABLE(pip) || \ 856 MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip)) 857 858 #define MDI_PI_IS_INIT(pip) \ 859 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 860 MDI_PATHINFO_STATE_INIT) 861 862 #define MDI_PI_IS_INITING(pip) \ 863 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 864 (MDI_PATHINFO_STATE_INIT | MDI_PATHINFO_STATE_TRANSIENT)) 865 866 #define MDI_PI_SET_INIT(pip) {\ 867 ASSERT(MDI_PI_LOCKED(pip)); \ 868 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT; } 869 870 #define MDI_PI_SET_ONLINING(pip) {\ 871 uint32_t ext_state; \ 872 ASSERT(MDI_PI_LOCKED(pip)); \ 873 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 874 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 875 MDI_PI(pip)->pi_state = \ 876 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 877 MDI_PI(pip)->pi_state |= ext_state; } 878 879 #define MDI_PI_IS_ONLINING(pip) \ 880 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 881 (MDI_PATHINFO_STATE_ONLINE | MDI_PATHINFO_STATE_TRANSIENT)) 882 883 #define MDI_PI_SET_ONLINE(pip) {\ 884 uint32_t ext_state; \ 885 ASSERT(MDI_PI_LOCKED(pip)); \ 886 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 887 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_ONLINE; \ 888 MDI_PI(pip)->pi_state |= ext_state; } 889 890 #define MDI_PI_IS_ONLINE(pip) \ 891 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 892 MDI_PATHINFO_STATE_ONLINE) 893 894 #define MDI_PI_SET_OFFLINING(pip) {\ 895 uint32_t ext_state; \ 896 ASSERT(MDI_PI_LOCKED(pip)); \ 897 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 898 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 899 MDI_PI(pip)->pi_state = \ 900 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT); \ 901 MDI_PI(pip)->pi_state |= ext_state; } 902 903 #define MDI_PI_IS_OFFLINING(pip) \ 904 ((MDI_PI(pip)->pi_state & ~MDI_PATHINFO_EXT_STATE_MASK) == \ 905 (MDI_PATHINFO_STATE_OFFLINE | MDI_PATHINFO_STATE_TRANSIENT)) 906 907 #define MDI_PI_SET_OFFLINE(pip) {\ 908 uint32_t ext_state; \ 909 ASSERT(MDI_PI_LOCKED(pip)); \ 910 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 911 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_OFFLINE; \ 912 MDI_PI(pip)->pi_state |= ext_state; } 913 914 #define MDI_PI_IS_OFFLINE(pip) \ 915 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 916 MDI_PATHINFO_STATE_OFFLINE) 917 918 #define MDI_PI_SET_STANDBYING(pip) {\ 919 uint32_t ext_state; \ 920 ASSERT(MDI_PI_LOCKED(pip)); \ 921 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 922 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 923 MDI_PI(pip)->pi_state = \ 924 (MDI_PATHINFO_STATE_STANDBY | MDI_PATHINFO_STATE_TRANSIENT); \ 925 MDI_PI(pip)->pi_state |= ext_state; } 926 927 #define MDI_PI_SET_STANDBY(pip) {\ 928 uint32_t ext_state; \ 929 ASSERT(MDI_PI_LOCKED(pip)); \ 930 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 931 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_STANDBY; \ 932 MDI_PI(pip)->pi_state |= ext_state; } 933 934 #define MDI_PI_IS_STANDBY(pip) \ 935 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 936 MDI_PATHINFO_STATE_STANDBY) 937 938 #define MDI_PI_SET_FAULTING(pip) {\ 939 uint32_t ext_state; \ 940 ASSERT(MDI_PI_LOCKED(pip)); \ 941 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 942 MDI_PI(pip)->pi_old_state = MDI_PI_STATE(pip); \ 943 MDI_PI(pip)->pi_state = \ 944 (MDI_PATHINFO_STATE_FAULT | MDI_PATHINFO_STATE_TRANSIENT); \ 945 MDI_PI(pip)->pi_state |= ext_state; } 946 947 #define MDI_PI_SET_FAULT(pip) {\ 948 uint32_t ext_state; \ 949 ASSERT(MDI_PI_LOCKED(pip)); \ 950 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; \ 951 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_FAULT; \ 952 MDI_PI(pip)->pi_state |= ext_state; } 953 954 #define MDI_PI_IS_FAULT(pip) \ 955 ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) == \ 956 MDI_PATHINFO_STATE_FAULT) 957 958 #define MDI_PI_IS_SUSPENDED(pip) \ 959 ((MDI_PI(pip))->pi_phci->ph_flags & MDI_PHCI_FLAGS_SUSPEND) 960 961 /* 962 * mdi_vhcache_client, mdi_vhcache_pathinfo, and mdi_vhcache_phci structures 963 * hold the vhci to phci client mappings of the on-disk vhci busconfig cache. 964 */ 965 966 /* phci structure of vhci cache */ 967 typedef struct mdi_vhcache_phci { 968 char *cphci_path; /* phci path name */ 969 uint32_t cphci_id; /* used when building nvlist */ 970 mdi_phci_t *cphci_phci; /* pointer to actual phci */ 971 struct mdi_vhcache_phci *cphci_next; /* next in vhci phci list */ 972 } mdi_vhcache_phci_t; 973 974 /* pathinfo structure of vhci cache */ 975 typedef struct mdi_vhcache_pathinfo { 976 char *cpi_addr; /* path address */ 977 mdi_vhcache_phci_t *cpi_cphci; /* phci the path belongs to */ 978 struct mdi_pathinfo *cpi_pip; /* ptr to actual pathinfo */ 979 uint32_t cpi_flags; /* see below */ 980 struct mdi_vhcache_pathinfo *cpi_next; /* next path for the client */ 981 } mdi_vhcache_pathinfo_t; 982 983 /* 984 * cpi_flags 985 * 986 * MDI_CPI_HINT_PATH_DOES_NOT_EXIST - set when configuration of the path has 987 * failed. 988 */ 989 #define MDI_CPI_HINT_PATH_DOES_NOT_EXIST 0x0001 990 991 /* client structure of vhci cache */ 992 typedef struct mdi_vhcache_client { 993 char *cct_name_addr; /* client address */ 994 mdi_vhcache_pathinfo_t *cct_cpi_head; /* client's path list head */ 995 mdi_vhcache_pathinfo_t *cct_cpi_tail; /* client's path list tail */ 996 struct mdi_vhcache_client *cct_next; /* next in vhci client list */ 997 } mdi_vhcache_client_t; 998 999 /* vhci cache structure - one for vhci instance */ 1000 typedef struct mdi_vhci_cache { 1001 mdi_vhcache_phci_t *vhcache_phci_head; /* phci list head */ 1002 mdi_vhcache_phci_t *vhcache_phci_tail; /* phci list tail */ 1003 mdi_vhcache_client_t *vhcache_client_head; /* client list head */ 1004 mdi_vhcache_client_t *vhcache_client_tail; /* client list tail */ 1005 mod_hash_t *vhcache_client_hash; /* client hash */ 1006 int vhcache_flags; /* see below */ 1007 int64_t vhcache_clean_time; /* last clean time */ 1008 krwlock_t vhcache_lock; /* cache lock */ 1009 } mdi_vhci_cache_t; 1010 1011 /* vhcache_flags */ 1012 #define MDI_VHCI_CACHE_SETUP_DONE 0x0001 /* cache setup completed */ 1013 1014 /* vhci bus config structure - one for vhci instance */ 1015 typedef struct mdi_vhci_config { 1016 char *vhc_vhcache_filename; /* on-disk file name */ 1017 mdi_vhci_cache_t vhc_vhcache; /* vhci cache */ 1018 kmutex_t vhc_lock; /* vhci config lock */ 1019 kcondvar_t vhc_cv; 1020 int vhc_flags; /* see below */ 1021 1022 /* flush vhci cache when lbolt reaches vhc_flush_at_ticks */ 1023 clock_t vhc_flush_at_ticks; 1024 1025 /* 1026 * Head and tail of the client list whose paths are being configured 1027 * asynchronously. vhc_acc_count is the number of clients on this list. 1028 * vhc_acc_thrcount is the number threads running to configure 1029 * the paths for these clients. 1030 */ 1031 struct mdi_async_client_config *vhc_acc_list_head; 1032 struct mdi_async_client_config *vhc_acc_list_tail; 1033 int vhc_acc_count; 1034 int vhc_acc_thrcount; 1035 1036 /* callback id - for flushing the cache during system shutdown */ 1037 callb_id_t vhc_cbid; 1038 1039 /* 1040 * vhc_path_discovery_boot - number of times path discovery will be 1041 * attempted during early boot. 1042 * vhc_path_discovery_postboot number of times path discovery will be 1043 * attempted during late boot. 1044 * vhc_path_discovery_cutoff_time - time at which paths were last 1045 * discovered + some timeout 1046 */ 1047 int vhc_path_discovery_boot; 1048 int vhc_path_discovery_postboot; 1049 int64_t vhc_path_discovery_cutoff_time; 1050 } mdi_vhci_config_t; 1051 1052 /* vhc_flags */ 1053 #define MDI_VHC_SINGLE_THREADED 0x0001 /* config single threaded */ 1054 #define MDI_VHC_EXIT 0x0002 /* exit all config activity */ 1055 #define MDI_VHC_VHCACHE_DIRTY 0x0004 /* cache dirty */ 1056 #define MDI_VHC_VHCACHE_FLUSH_THREAD 0x0008 /* cache flush thead running */ 1057 #define MDI_VHC_VHCACHE_FLUSH_ERROR 0x0010 /* failed to flush cache */ 1058 #define MDI_VHC_READONLY_FS 0x0020 /* filesys is readonly */ 1059 1060 typedef struct mdi_phys_path { 1061 char *phys_path; 1062 struct mdi_phys_path *phys_path_next; 1063 } mdi_phys_path_t; 1064 1065 /* 1066 * Lookup tokens are used to cache the result of the vhci cache client lookup 1067 * operations (to reduce the number of real lookup operations). 1068 */ 1069 typedef struct mdi_vhcache_lookup_token { 1070 mdi_vhcache_client_t *lt_cct; /* vhcache client */ 1071 int64_t lt_cct_lookup_time; /* last lookup time */ 1072 } mdi_vhcache_lookup_token_t; 1073 1074 /* asynchronous configuration of client paths */ 1075 typedef struct mdi_async_client_config { 1076 char *acc_ct_name; /* client name */ 1077 char *acc_ct_addr; /* client address */ 1078 mdi_phys_path_t *acc_phclient_path_list_head; /* path head */ 1079 mdi_vhcache_lookup_token_t acc_token; /* lookup token */ 1080 struct mdi_async_client_config *acc_next; /* next in vhci acc list */ 1081 } mdi_async_client_config_t; 1082 1083 /* 1084 * vHCI driver instance registration/unregistration 1085 * 1086 * mdi_vhci_register() is called by a vHCI driver to register itself as the 1087 * manager of devices from a particular 'class'. This should be called from 1088 * attach(9e). 1089 * 1090 * mdi_vhci_unregister() is called from detach(9E) to unregister a vHCI 1091 * instance from the framework. 1092 */ 1093 int mdi_vhci_register(char *, dev_info_t *, mdi_vhci_ops_t *, int); 1094 int mdi_vhci_unregister(dev_info_t *, int); 1095 1096 /* 1097 * Utility functions 1098 */ 1099 int mdi_phci_get_path_count(dev_info_t *); 1100 dev_info_t *mdi_phci_path2devinfo(dev_info_t *, caddr_t); 1101 1102 1103 /* 1104 * Path Selection Functions: 1105 * 1106 * mdi_select_path() is called by a vHCI driver to select to which path an 1107 * I/O request should be routed. The caller passes the 'buf' structure as 1108 * one of the parameters. The mpxio framework uses the buf's contents to 1109 * maintain per path statistics (total I/O size / count pending). If more 1110 * than one online path is available, the framework automatically selects 1111 * a suitable one. If a failover operation is active for this client device 1112 * the call fails, returning MDI_BUSY. 1113 * 1114 * By default this function returns a suitable path in the 'online' state, 1115 * based on the current load balancing policy. Currently we support 1116 * LOAD_BALANCE_NONE (Previously selected online path will continue to be 1117 * used as long as the path is usable) and LOAD_BALANCE_RR (Online paths 1118 * will be selected in a round robin fashion). The load balancing scheme 1119 * can be configured in the vHCI driver's configuration file (driver.conf). 1120 * 1121 * vHCI drivers may override this default behaviour by specifying appropriate 1122 * flags. If start_pip is specified (non NULL), it is used as the routine's 1123 * starting point; it starts walking from there to find the next appropriate 1124 * path. 1125 * 1126 * The following values for 'flags' are currently defined, the third argument 1127 * to mdi_select_path depends on the flags used. 1128 * 1129 * <none>: default, arg is pip 1130 * MDI_SELECT_ONLINE_PATH: select an ONLINE path preferred-first, 1131 * arg is pip 1132 * MDI_SELECT_STANDBY_PATH: select a STANDBY path, arg is pip 1133 * MDI_SELECT_USER_DISABLE_PATH: select user disable for failover and 1134 * auto_failback 1135 * MDI_SELECT_PATH_INSTANCE: select a specific path, arg is 1136 * path instance 1137 * MDI_SELECT_NO_PREFERRED: select path without preferred-first 1138 * 1139 * The selected paths are returned in an mdi_hold_path() state (pi_ref_cnt), 1140 * caller should release the hold by calling mdi_rele_path() at the end of 1141 * operation. 1142 */ 1143 int mdi_select_path(dev_info_t *, struct buf *, int, 1144 void *, mdi_pathinfo_t **); 1145 int mdi_set_lb_policy(dev_info_t *, client_lb_t); 1146 int mdi_set_lb_region_size(dev_info_t *, int); 1147 client_lb_t mdi_get_lb_policy(dev_info_t *); 1148 1149 /* 1150 * flags for mdi_select_path() routine 1151 */ 1152 #define MDI_SELECT_ONLINE_PATH 0x0001 1153 #define MDI_SELECT_STANDBY_PATH 0x0002 1154 #define MDI_SELECT_USER_DISABLE_PATH 0x0004 1155 #define MDI_SELECT_PATH_INSTANCE 0x0008 1156 #define MDI_SELECT_NO_PREFERRED 0x0010 1157 1158 /* 1159 * MDI client device utility functions 1160 */ 1161 int mdi_client_get_path_count(dev_info_t *); 1162 dev_info_t *mdi_client_path2devinfo(dev_info_t *, caddr_t); 1163 1164 /* 1165 * Failover: 1166 * 1167 * The vHCI driver calls mdi_failover() to initiate a failover operation. 1168 * mdi_failover() calls back into the vHCI driver's vo_failover() 1169 * entry point to perform the actual failover operation. The reason 1170 * for requiring the vHCI driver to initiate failover by calling 1171 * mdi_failover(), instead of directly executing vo_failover() itself, 1172 * is to ensure that the mdi framework can keep track of the client 1173 * state properly. Additionally, mdi_failover() provides as a 1174 * convenience the option of performing the failover operation 1175 * synchronously or asynchronously 1176 * 1177 * Upon successful completion of the failover operation, the paths that were 1178 * previously ONLINE will be in the STANDBY state, and the newly activated 1179 * paths will be in the ONLINE state. 1180 * 1181 * The flags modifier determines whether the activation is done synchronously 1182 */ 1183 int mdi_failover(dev_info_t *, dev_info_t *, int); 1184 1185 /* 1186 * Client device failover mode of operation 1187 */ 1188 #define MDI_FAILOVER_SYNC 1 /* Syncronous Failover */ 1189 #define MDI_FAILOVER_ASYNC 2 /* Asyncronous Failover */ 1190 1191 /* 1192 * mdi_pathinfo node kstat functions. 1193 */ 1194 int mdi_pi_kstat_exists(mdi_pathinfo_t *); 1195 int mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ks_name); 1196 void mdi_pi_kstat_iosupdate(mdi_pathinfo_t *, struct buf *); 1197 1198 /* 1199 * mdi_pathinfo node extended state change functions. 1200 */ 1201 int mdi_pi_get_state2(mdi_pathinfo_t *, mdi_pathinfo_state_t *, uint32_t *); 1202 int mdi_pi_get_preferred(mdi_pathinfo_t *); 1203 1204 /* 1205 * mdi_pathinfo node member functions 1206 */ 1207 void *mdi_pi_get_client_private(mdi_pathinfo_t *); 1208 void mdi_pi_set_client_private(mdi_pathinfo_t *, void *); 1209 void mdi_pi_set_state(mdi_pathinfo_t *, mdi_pathinfo_state_t); 1210 void mdi_pi_set_preferred(mdi_pathinfo_t *, int); 1211 1212 /* get/set vhci private data */ 1213 void *mdi_client_get_vhci_private(dev_info_t *); 1214 void mdi_client_set_vhci_private(dev_info_t *, void *); 1215 void *mdi_phci_get_vhci_private(dev_info_t *); 1216 void mdi_phci_set_vhci_private(dev_info_t *, void *); 1217 void *mdi_pi_get_vhci_private(mdi_pathinfo_t *); 1218 void mdi_pi_set_vhci_private(mdi_pathinfo_t *, void *); 1219 1220 /* 1221 * mdi_pathinfo Property utilities 1222 */ 1223 int mdi_prop_size(mdi_pathinfo_t *, size_t *); 1224 int mdi_prop_pack(mdi_pathinfo_t *, char **, uint_t); 1225 1226 /* obsolete interface, to be removed */ 1227 void mdi_get_next_path(dev_info_t *, mdi_pathinfo_t *, mdi_pathinfo_t **); 1228 int mdi_get_component_type(dev_info_t *); 1229 1230 #endif /* _KERNEL */ 1231 1232 #ifdef __cplusplus 1233 } 1234 #endif 1235 1236 #endif /* _SYS_MDI_IMPLDEFS_H */ 1237