1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 77 #define MDI_WARN CE_WARN, __func__ 78 #define MDI_NOTE CE_NOTE, __func__ 79 #define MDI_CONT CE_CONT, __func__ 80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 81 #else /* !DEBUG */ 82 #define MDI_DEBUG(dbglevel, pargs) 83 #endif /* DEBUG */ 84 int mdi_debug_consoleonly = 0; 85 86 extern pri_t minclsyspri; 87 extern int modrootloaded; 88 89 /* 90 * Global mutex: 91 * Protects vHCI list and structure members. 92 */ 93 kmutex_t mdi_mutex; 94 95 /* 96 * Registered vHCI class driver lists 97 */ 98 int mdi_vhci_count; 99 mdi_vhci_t *mdi_vhci_head; 100 mdi_vhci_t *mdi_vhci_tail; 101 102 /* 103 * Client Hash Table size 104 */ 105 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 106 107 /* 108 * taskq interface definitions 109 */ 110 #define MDI_TASKQ_N_THREADS 8 111 #define MDI_TASKQ_PRI minclsyspri 112 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 113 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 114 115 taskq_t *mdi_taskq; 116 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 117 118 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 119 120 /* 121 * The data should be "quiet" for this interval (in seconds) before the 122 * vhci cached data is flushed to the disk. 123 */ 124 static int mdi_vhcache_flush_delay = 10; 125 126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 127 static int mdi_vhcache_flush_daemon_idle_time = 60; 128 129 /* 130 * MDI falls back to discovery of all paths when a bus_config_one fails. 131 * The following parameters can be used to tune this operation. 132 * 133 * mdi_path_discovery_boot 134 * Number of times path discovery will be attempted during early boot. 135 * Probably there is no reason to ever set this value to greater than one. 136 * 137 * mdi_path_discovery_postboot 138 * Number of times path discovery will be attempted after early boot. 139 * Set it to a minimum of two to allow for discovery of iscsi paths which 140 * may happen very late during booting. 141 * 142 * mdi_path_discovery_interval 143 * Minimum number of seconds MDI will wait between successive discovery 144 * of all paths. Set it to -1 to disable discovery of all paths. 145 */ 146 static int mdi_path_discovery_boot = 1; 147 static int mdi_path_discovery_postboot = 2; 148 static int mdi_path_discovery_interval = 10; 149 150 /* 151 * number of seconds the asynchronous configuration thread will sleep idle 152 * before exiting. 153 */ 154 static int mdi_async_config_idle_time = 600; 155 156 static int mdi_bus_config_cache_hash_size = 256; 157 158 /* turns off multithreaded configuration for certain operations */ 159 static int mdi_mtc_off = 0; 160 161 /* 162 * The "path" to a pathinfo node is identical to the /devices path to a 163 * devinfo node had the device been enumerated under a pHCI instead of 164 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 165 * This association persists across create/delete of the pathinfo nodes, 166 * but not across reboot. 167 */ 168 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 169 static int mdi_pathmap_hash_size = 256; 170 static kmutex_t mdi_pathmap_mutex; 171 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 172 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 173 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 174 175 /* 176 * MDI component property name/value string definitions 177 */ 178 const char *mdi_component_prop = "mpxio-component"; 179 const char *mdi_component_prop_vhci = "vhci"; 180 const char *mdi_component_prop_phci = "phci"; 181 const char *mdi_component_prop_client = "client"; 182 183 /* 184 * MDI client global unique identifier property name 185 */ 186 const char *mdi_client_guid_prop = "client-guid"; 187 188 /* 189 * MDI client load balancing property name/value string definitions 190 */ 191 const char *mdi_load_balance = "load-balance"; 192 const char *mdi_load_balance_none = "none"; 193 const char *mdi_load_balance_rr = "round-robin"; 194 const char *mdi_load_balance_lba = "logical-block"; 195 196 /* 197 * Obsolete vHCI class definition; to be removed after Leadville update 198 */ 199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 200 201 static char vhci_greeting[] = 202 "\tThere already exists one vHCI driver for class %s\n" 203 "\tOnly one vHCI driver for each class is allowed\n"; 204 205 /* 206 * Static function prototypes 207 */ 208 static int i_mdi_phci_offline(dev_info_t *, uint_t); 209 static int i_mdi_client_offline(dev_info_t *, uint_t); 210 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 211 static void i_mdi_phci_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static int i_mdi_client_pre_detach(dev_info_t *, 214 ddi_detach_cmd_t); 215 static void i_mdi_client_post_detach(dev_info_t *, 216 ddi_detach_cmd_t, int); 217 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 218 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 219 static int i_mdi_lba_lb(mdi_client_t *ct, 220 mdi_pathinfo_t **ret_pip, struct buf *buf); 221 static void i_mdi_pm_hold_client(mdi_client_t *, int); 222 static void i_mdi_pm_rele_client(mdi_client_t *, int); 223 static void i_mdi_pm_reset_client(mdi_client_t *); 224 static int i_mdi_power_all_phci(mdi_client_t *); 225 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 226 227 228 /* 229 * Internal mdi_pathinfo node functions 230 */ 231 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 232 233 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 234 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 235 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 236 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 237 static void i_mdi_phci_unlock(mdi_phci_t *); 238 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 239 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 241 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 242 mdi_client_t *); 243 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 244 static void i_mdi_client_remove_path(mdi_client_t *, 245 mdi_pathinfo_t *); 246 247 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 248 mdi_pathinfo_state_t, int); 249 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 250 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 251 char **, int); 252 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 253 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 254 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 255 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 256 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 257 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 258 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 259 static void i_mdi_client_update_state(mdi_client_t *); 260 static int i_mdi_client_compute_state(mdi_client_t *, 261 mdi_phci_t *); 262 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 263 static void i_mdi_client_unlock(mdi_client_t *); 264 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 265 static mdi_client_t *i_devi_get_client(dev_info_t *); 266 /* 267 * NOTE: this will be removed once the NWS files are changed to use the new 268 * mdi_{enable,disable}_path interfaces 269 */ 270 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 271 int, int); 272 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 273 mdi_vhci_t *vh, int flags, int op); 274 /* 275 * Failover related function prototypes 276 */ 277 static int i_mdi_failover(void *); 278 279 /* 280 * misc internal functions 281 */ 282 static int i_mdi_get_hash_key(char *); 283 static int i_map_nvlist_error_to_mdi(int); 284 static void i_mdi_report_path_state(mdi_client_t *, 285 mdi_pathinfo_t *); 286 287 static void setup_vhci_cache(mdi_vhci_t *); 288 static int destroy_vhci_cache(mdi_vhci_t *); 289 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 290 static boolean_t stop_vhcache_flush_thread(void *, int); 291 static void free_string_array(char **, int); 292 static void free_vhcache_phci(mdi_vhcache_phci_t *); 293 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 294 static void free_vhcache_client(mdi_vhcache_client_t *); 295 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 296 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 297 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 298 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 299 static void vhcache_pi_add(mdi_vhci_config_t *, 300 struct mdi_pathinfo *); 301 static void vhcache_pi_remove(mdi_vhci_config_t *, 302 struct mdi_pathinfo *); 303 static void free_phclient_path_list(mdi_phys_path_t *); 304 static void sort_vhcache_paths(mdi_vhcache_client_t *); 305 static int flush_vhcache(mdi_vhci_config_t *, int); 306 static void vhcache_dirty(mdi_vhci_config_t *); 307 static void free_async_client_config(mdi_async_client_config_t *); 308 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 309 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 310 static nvlist_t *read_on_disk_vhci_cache(char *); 311 extern int fread_nvlist(char *, nvlist_t **); 312 extern int fwrite_nvlist(char *, nvlist_t *); 313 314 /* called once when first vhci registers with mdi */ 315 static void 316 i_mdi_init() 317 { 318 static int initialized = 0; 319 320 if (initialized) 321 return; 322 initialized = 1; 323 324 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 325 326 /* Create our taskq resources */ 327 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 328 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 329 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 330 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 331 332 /* Allocate ['path_instance' <-> "path"] maps */ 333 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 334 mdi_pathmap_bypath = mod_hash_create_strhash( 335 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 336 mod_hash_null_valdtor); 337 mdi_pathmap_byinstance = mod_hash_create_idhash( 338 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 339 mod_hash_null_valdtor); 340 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 341 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 342 mod_hash_null_valdtor); 343 } 344 345 /* 346 * mdi_get_component_type(): 347 * Return mpxio component type 348 * Return Values: 349 * MDI_COMPONENT_NONE 350 * MDI_COMPONENT_VHCI 351 * MDI_COMPONENT_PHCI 352 * MDI_COMPONENT_CLIENT 353 * XXX This doesn't work under multi-level MPxIO and should be 354 * removed when clients migrate mdi_component_is_*() interfaces. 355 */ 356 int 357 mdi_get_component_type(dev_info_t *dip) 358 { 359 return (DEVI(dip)->devi_mdi_component); 360 } 361 362 /* 363 * mdi_vhci_register(): 364 * Register a vHCI module with the mpxio framework 365 * mdi_vhci_register() is called by vHCI drivers to register the 366 * 'class_driver' vHCI driver and its MDI entrypoints with the 367 * mpxio framework. The vHCI driver must call this interface as 368 * part of its attach(9e) handler. 369 * Competing threads may try to attach mdi_vhci_register() as 370 * the vHCI drivers are loaded and attached as a result of pHCI 371 * driver instance registration (mdi_phci_register()) with the 372 * framework. 373 * Return Values: 374 * MDI_SUCCESS 375 * MDI_FAILURE 376 */ 377 /*ARGSUSED*/ 378 int 379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 380 int flags) 381 { 382 mdi_vhci_t *vh = NULL; 383 384 /* Registrant can't be older */ 385 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 386 387 #ifdef DEBUG 388 /* 389 * IB nexus driver is loaded only when IB hardware is present. 390 * In order to be able to do this there is a need to drive the loading 391 * and attaching of the IB nexus driver (especially when an IB hardware 392 * is dynamically plugged in) when an IB HCA driver (PHCI) 393 * is being attached. Unfortunately this gets into the limitations 394 * of devfs as there seems to be no clean way to drive configuration 395 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 396 * for IB. 397 */ 398 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 399 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 400 #endif 401 402 i_mdi_init(); 403 404 mutex_enter(&mdi_mutex); 405 /* 406 * Scan for already registered vhci 407 */ 408 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 409 if (strcmp(vh->vh_class, class) == 0) { 410 /* 411 * vHCI has already been created. Check for valid 412 * vHCI ops registration. We only support one vHCI 413 * module per class 414 */ 415 if (vh->vh_ops != NULL) { 416 mutex_exit(&mdi_mutex); 417 cmn_err(CE_NOTE, vhci_greeting, class); 418 return (MDI_FAILURE); 419 } 420 break; 421 } 422 } 423 424 /* 425 * if not yet created, create the vHCI component 426 */ 427 if (vh == NULL) { 428 struct client_hash *hash = NULL; 429 char *load_balance; 430 431 /* 432 * Allocate and initialize the mdi extensions 433 */ 434 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 435 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 436 KM_SLEEP); 437 vh->vh_client_table = hash; 438 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 439 (void) strcpy(vh->vh_class, class); 440 vh->vh_lb = LOAD_BALANCE_RR; 441 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 442 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 443 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 444 vh->vh_lb = LOAD_BALANCE_NONE; 445 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 446 == 0) { 447 vh->vh_lb = LOAD_BALANCE_LBA; 448 } 449 ddi_prop_free(load_balance); 450 } 451 452 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 453 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 454 455 /* 456 * Store the vHCI ops vectors 457 */ 458 vh->vh_dip = vdip; 459 vh->vh_ops = vops; 460 461 setup_vhci_cache(vh); 462 463 if (mdi_vhci_head == NULL) { 464 mdi_vhci_head = vh; 465 } 466 if (mdi_vhci_tail) { 467 mdi_vhci_tail->vh_next = vh; 468 } 469 mdi_vhci_tail = vh; 470 mdi_vhci_count++; 471 } 472 473 /* 474 * Claim the devfs node as a vhci component 475 */ 476 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 477 478 /* 479 * Initialize our back reference from dev_info node 480 */ 481 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 482 mutex_exit(&mdi_mutex); 483 return (MDI_SUCCESS); 484 } 485 486 /* 487 * mdi_vhci_unregister(): 488 * Unregister a vHCI module from mpxio framework 489 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 490 * of a vhci to unregister it from the framework. 491 * Return Values: 492 * MDI_SUCCESS 493 * MDI_FAILURE 494 */ 495 /*ARGSUSED*/ 496 int 497 mdi_vhci_unregister(dev_info_t *vdip, int flags) 498 { 499 mdi_vhci_t *found, *vh, *prev = NULL; 500 501 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 502 503 /* 504 * Check for invalid VHCI 505 */ 506 if ((vh = i_devi_get_vhci(vdip)) == NULL) 507 return (MDI_FAILURE); 508 509 /* 510 * Scan the list of registered vHCIs for a match 511 */ 512 mutex_enter(&mdi_mutex); 513 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 514 if (found == vh) 515 break; 516 prev = found; 517 } 518 519 if (found == NULL) { 520 mutex_exit(&mdi_mutex); 521 return (MDI_FAILURE); 522 } 523 524 /* 525 * Check the vHCI, pHCI and client count. All the pHCIs and clients 526 * should have been unregistered, before a vHCI can be 527 * unregistered. 528 */ 529 MDI_VHCI_PHCI_LOCK(vh); 530 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 531 MDI_VHCI_PHCI_UNLOCK(vh); 532 mutex_exit(&mdi_mutex); 533 return (MDI_FAILURE); 534 } 535 MDI_VHCI_PHCI_UNLOCK(vh); 536 537 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 538 mutex_exit(&mdi_mutex); 539 return (MDI_FAILURE); 540 } 541 542 /* 543 * Remove the vHCI from the global list 544 */ 545 if (vh == mdi_vhci_head) { 546 mdi_vhci_head = vh->vh_next; 547 } else { 548 prev->vh_next = vh->vh_next; 549 } 550 if (vh == mdi_vhci_tail) { 551 mdi_vhci_tail = prev; 552 } 553 mdi_vhci_count--; 554 mutex_exit(&mdi_mutex); 555 556 vh->vh_ops = NULL; 557 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 558 DEVI(vdip)->devi_mdi_xhci = NULL; 559 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 560 kmem_free(vh->vh_client_table, 561 mdi_client_table_size * sizeof (struct client_hash)); 562 mutex_destroy(&vh->vh_phci_mutex); 563 mutex_destroy(&vh->vh_client_mutex); 564 565 kmem_free(vh, sizeof (mdi_vhci_t)); 566 return (MDI_SUCCESS); 567 } 568 569 /* 570 * i_mdi_vhci_class2vhci(): 571 * Look for a matching vHCI module given a vHCI class name 572 * Return Values: 573 * Handle to a vHCI component 574 * NULL 575 */ 576 static mdi_vhci_t * 577 i_mdi_vhci_class2vhci(char *class) 578 { 579 mdi_vhci_t *vh = NULL; 580 581 ASSERT(!MUTEX_HELD(&mdi_mutex)); 582 583 mutex_enter(&mdi_mutex); 584 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 585 if (strcmp(vh->vh_class, class) == 0) { 586 break; 587 } 588 } 589 mutex_exit(&mdi_mutex); 590 return (vh); 591 } 592 593 /* 594 * i_devi_get_vhci(): 595 * Utility function to get the handle to a vHCI component 596 * Return Values: 597 * Handle to a vHCI component 598 * NULL 599 */ 600 mdi_vhci_t * 601 i_devi_get_vhci(dev_info_t *vdip) 602 { 603 mdi_vhci_t *vh = NULL; 604 if (MDI_VHCI(vdip)) { 605 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 606 } 607 return (vh); 608 } 609 610 /* 611 * mdi_phci_register(): 612 * Register a pHCI module with mpxio framework 613 * mdi_phci_register() is called by pHCI drivers to register with 614 * the mpxio framework and a specific 'class_driver' vHCI. The 615 * pHCI driver must call this interface as part of its attach(9e) 616 * handler. 617 * Return Values: 618 * MDI_SUCCESS 619 * MDI_FAILURE 620 */ 621 /*ARGSUSED*/ 622 int 623 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 624 { 625 mdi_phci_t *ph; 626 mdi_vhci_t *vh; 627 char *data; 628 629 /* 630 * Some subsystems, like fcp, perform pHCI registration from a 631 * different thread than the one doing the pHCI attach(9E) - the 632 * driver attach code is waiting for this other thread to complete. 633 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 634 * (indicating that some thread has done an ndi_devi_enter of parent) 635 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 636 */ 637 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 638 639 /* 640 * Check for mpxio-disable property. Enable mpxio if the property is 641 * missing or not set to "yes". 642 * If the property is set to "yes" then emit a brief message. 643 */ 644 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 645 &data) == DDI_SUCCESS)) { 646 if (strcmp(data, "yes") == 0) { 647 MDI_DEBUG(1, (MDI_CONT, pdip, 648 "?multipath capabilities disabled via %s.conf.", 649 ddi_driver_name(pdip))); 650 ddi_prop_free(data); 651 return (MDI_FAILURE); 652 } 653 ddi_prop_free(data); 654 } 655 656 /* 657 * Search for a matching vHCI 658 */ 659 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 660 if (vh == NULL) { 661 return (MDI_FAILURE); 662 } 663 664 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 665 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 666 ph->ph_dip = pdip; 667 ph->ph_vhci = vh; 668 ph->ph_next = NULL; 669 ph->ph_unstable = 0; 670 ph->ph_vprivate = 0; 671 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 672 673 MDI_PHCI_LOCK(ph); 674 MDI_PHCI_SET_POWER_UP(ph); 675 MDI_PHCI_UNLOCK(ph); 676 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 677 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 678 679 vhcache_phci_add(vh->vh_config, ph); 680 681 MDI_VHCI_PHCI_LOCK(vh); 682 if (vh->vh_phci_head == NULL) { 683 vh->vh_phci_head = ph; 684 } 685 if (vh->vh_phci_tail) { 686 vh->vh_phci_tail->ph_next = ph; 687 } 688 vh->vh_phci_tail = ph; 689 vh->vh_phci_count++; 690 MDI_VHCI_PHCI_UNLOCK(vh); 691 692 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 693 return (MDI_SUCCESS); 694 } 695 696 /* 697 * mdi_phci_unregister(): 698 * Unregister a pHCI module from mpxio framework 699 * mdi_phci_unregister() is called by the pHCI drivers from their 700 * detach(9E) handler to unregister their instances from the 701 * framework. 702 * Return Values: 703 * MDI_SUCCESS 704 * MDI_FAILURE 705 */ 706 /*ARGSUSED*/ 707 int 708 mdi_phci_unregister(dev_info_t *pdip, int flags) 709 { 710 mdi_vhci_t *vh; 711 mdi_phci_t *ph; 712 mdi_phci_t *tmp; 713 mdi_phci_t *prev = NULL; 714 mdi_pathinfo_t *pip; 715 716 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 717 718 ph = i_devi_get_phci(pdip); 719 if (ph == NULL) { 720 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 721 return (MDI_FAILURE); 722 } 723 724 vh = ph->ph_vhci; 725 ASSERT(vh != NULL); 726 if (vh == NULL) { 727 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 728 return (MDI_FAILURE); 729 } 730 731 MDI_VHCI_PHCI_LOCK(vh); 732 tmp = vh->vh_phci_head; 733 while (tmp) { 734 if (tmp == ph) { 735 break; 736 } 737 prev = tmp; 738 tmp = tmp->ph_next; 739 } 740 741 if (ph == vh->vh_phci_head) { 742 vh->vh_phci_head = ph->ph_next; 743 } else { 744 prev->ph_next = ph->ph_next; 745 } 746 747 if (ph == vh->vh_phci_tail) { 748 vh->vh_phci_tail = prev; 749 } 750 751 vh->vh_phci_count--; 752 MDI_VHCI_PHCI_UNLOCK(vh); 753 754 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 755 MDI_PHCI_LOCK(ph); 756 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 757 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 758 MDI_PI(pip)->pi_phci = NULL; 759 MDI_PHCI_UNLOCK(ph); 760 761 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 762 ESC_DDI_INITIATOR_UNREGISTER); 763 vhcache_phci_remove(vh->vh_config, ph); 764 cv_destroy(&ph->ph_unstable_cv); 765 mutex_destroy(&ph->ph_mutex); 766 kmem_free(ph, sizeof (mdi_phci_t)); 767 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 768 DEVI(pdip)->devi_mdi_xhci = NULL; 769 return (MDI_SUCCESS); 770 } 771 772 /* 773 * i_devi_get_phci(): 774 * Utility function to return the phci extensions. 775 */ 776 static mdi_phci_t * 777 i_devi_get_phci(dev_info_t *pdip) 778 { 779 mdi_phci_t *ph = NULL; 780 781 if (MDI_PHCI(pdip)) { 782 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 783 } 784 return (ph); 785 } 786 787 /* 788 * Single thread mdi entry into devinfo node for modifying its children. 789 * If necessary we perform an ndi_devi_enter of the vHCI before doing 790 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 791 * for the vHCI and one for the pHCI. 792 */ 793 void 794 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 795 { 796 dev_info_t *vdip; 797 int vcircular, pcircular; 798 799 /* Verify calling context */ 800 ASSERT(MDI_PHCI(phci_dip)); 801 vdip = mdi_devi_get_vdip(phci_dip); 802 ASSERT(vdip); /* A pHCI always has a vHCI */ 803 804 /* 805 * If pHCI is detaching then the framework has already entered the 806 * vHCI on a threads that went down the code path leading to 807 * detach_node(). This framework enter of the vHCI during pHCI 808 * detach is done to avoid deadlock with vHCI power management 809 * operations which enter the vHCI and the enter down the path 810 * to the pHCI. If pHCI is detaching then we piggyback this calls 811 * enter of the vHCI on frameworks vHCI enter that has already 812 * occurred - this is OK because we know that the framework thread 813 * doing detach is waiting for our completion. 814 * 815 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 816 * race with detach - but we can't do that because the framework has 817 * already entered the parent, so we have some complexity instead. 818 */ 819 for (;;) { 820 if (ndi_devi_tryenter(vdip, &vcircular)) { 821 ASSERT(vcircular != -1); 822 if (DEVI_IS_DETACHING(phci_dip)) { 823 ndi_devi_exit(vdip, vcircular); 824 vcircular = -1; 825 } 826 break; 827 } else if (DEVI_IS_DETACHING(phci_dip)) { 828 vcircular = -1; 829 break; 830 } else if (servicing_interrupt()) { 831 /* 832 * Don't delay an interrupt (and ensure adaptive 833 * mutex inversion support). 834 */ 835 ndi_devi_enter(vdip, &vcircular); 836 break; 837 } else { 838 delay_random(2); 839 } 840 } 841 842 ndi_devi_enter(phci_dip, &pcircular); 843 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 844 } 845 846 /* 847 * Attempt to mdi_devi_enter. 848 */ 849 int 850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 851 { 852 dev_info_t *vdip; 853 int vcircular, pcircular; 854 855 /* Verify calling context */ 856 ASSERT(MDI_PHCI(phci_dip)); 857 vdip = mdi_devi_get_vdip(phci_dip); 858 ASSERT(vdip); /* A pHCI always has a vHCI */ 859 860 if (ndi_devi_tryenter(vdip, &vcircular)) { 861 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 862 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 863 return (1); /* locked */ 864 } 865 ndi_devi_exit(vdip, vcircular); 866 } 867 return (0); /* busy */ 868 } 869 870 /* 871 * Release mdi_devi_enter or successful mdi_devi_tryenter. 872 */ 873 void 874 mdi_devi_exit(dev_info_t *phci_dip, int circular) 875 { 876 dev_info_t *vdip; 877 int vcircular, pcircular; 878 879 /* Verify calling context */ 880 ASSERT(MDI_PHCI(phci_dip)); 881 vdip = mdi_devi_get_vdip(phci_dip); 882 ASSERT(vdip); /* A pHCI always has a vHCI */ 883 884 /* extract two circular recursion values from single int */ 885 pcircular = (short)(circular & 0xFFFF); 886 vcircular = (short)((circular >> 16) & 0xFFFF); 887 888 ndi_devi_exit(phci_dip, pcircular); 889 if (vcircular != -1) 890 ndi_devi_exit(vdip, vcircular); 891 } 892 893 /* 894 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 895 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 896 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 897 * with vHCI power management code during path online/offline. Each 898 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 899 * occur within the scope of an active mdi_devi_enter that establishes the 900 * circular value. 901 */ 902 void 903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 904 { 905 int pcircular; 906 907 /* Verify calling context */ 908 ASSERT(MDI_PHCI(phci_dip)); 909 910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 911 ndi_hold_devi(phci_dip); 912 913 pcircular = (short)(circular & 0xFFFF); 914 ndi_devi_exit(phci_dip, pcircular); 915 } 916 917 void 918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 919 { 920 int pcircular; 921 922 /* Verify calling context */ 923 ASSERT(MDI_PHCI(phci_dip)); 924 925 ndi_devi_enter(phci_dip, &pcircular); 926 927 /* Drop hold from mdi_devi_exit_phci. */ 928 ndi_rele_devi(phci_dip); 929 930 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 931 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 932 } 933 934 /* 935 * mdi_devi_get_vdip(): 936 * given a pHCI dip return vHCI dip 937 */ 938 dev_info_t * 939 mdi_devi_get_vdip(dev_info_t *pdip) 940 { 941 mdi_phci_t *ph; 942 943 ph = i_devi_get_phci(pdip); 944 if (ph && ph->ph_vhci) 945 return (ph->ph_vhci->vh_dip); 946 return (NULL); 947 } 948 949 /* 950 * mdi_devi_pdip_entered(): 951 * Return 1 if we are vHCI and have done an ndi_devi_enter 952 * of a pHCI 953 */ 954 int 955 mdi_devi_pdip_entered(dev_info_t *vdip) 956 { 957 mdi_vhci_t *vh; 958 mdi_phci_t *ph; 959 960 vh = i_devi_get_vhci(vdip); 961 if (vh == NULL) 962 return (0); 963 964 MDI_VHCI_PHCI_LOCK(vh); 965 ph = vh->vh_phci_head; 966 while (ph) { 967 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 968 MDI_VHCI_PHCI_UNLOCK(vh); 969 return (1); 970 } 971 ph = ph->ph_next; 972 } 973 MDI_VHCI_PHCI_UNLOCK(vh); 974 return (0); 975 } 976 977 /* 978 * mdi_phci_path2devinfo(): 979 * Utility function to search for a valid phci device given 980 * the devfs pathname. 981 */ 982 dev_info_t * 983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 984 { 985 char *temp_pathname; 986 mdi_vhci_t *vh; 987 mdi_phci_t *ph; 988 dev_info_t *pdip = NULL; 989 990 vh = i_devi_get_vhci(vdip); 991 ASSERT(vh != NULL); 992 993 if (vh == NULL) { 994 /* 995 * Invalid vHCI component, return failure 996 */ 997 return (NULL); 998 } 999 1000 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1001 MDI_VHCI_PHCI_LOCK(vh); 1002 ph = vh->vh_phci_head; 1003 while (ph != NULL) { 1004 pdip = ph->ph_dip; 1005 ASSERT(pdip != NULL); 1006 *temp_pathname = '\0'; 1007 (void) ddi_pathname(pdip, temp_pathname); 1008 if (strcmp(temp_pathname, pathname) == 0) { 1009 break; 1010 } 1011 ph = ph->ph_next; 1012 } 1013 if (ph == NULL) { 1014 pdip = NULL; 1015 } 1016 MDI_VHCI_PHCI_UNLOCK(vh); 1017 kmem_free(temp_pathname, MAXPATHLEN); 1018 return (pdip); 1019 } 1020 1021 /* 1022 * mdi_phci_get_path_count(): 1023 * get number of path information nodes associated with a given 1024 * pHCI device. 1025 */ 1026 int 1027 mdi_phci_get_path_count(dev_info_t *pdip) 1028 { 1029 mdi_phci_t *ph; 1030 int count = 0; 1031 1032 ph = i_devi_get_phci(pdip); 1033 if (ph != NULL) { 1034 count = ph->ph_path_count; 1035 } 1036 return (count); 1037 } 1038 1039 /* 1040 * i_mdi_phci_lock(): 1041 * Lock a pHCI device 1042 * Return Values: 1043 * None 1044 * Note: 1045 * The default locking order is: 1046 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1047 * But there are number of situations where locks need to be 1048 * grabbed in reverse order. This routine implements try and lock 1049 * mechanism depending on the requested parameter option. 1050 */ 1051 static void 1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1053 { 1054 if (pip) { 1055 /* Reverse locking is requested. */ 1056 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1057 if (servicing_interrupt()) { 1058 MDI_PI_HOLD(pip); 1059 MDI_PI_UNLOCK(pip); 1060 MDI_PHCI_LOCK(ph); 1061 MDI_PI_LOCK(pip); 1062 MDI_PI_RELE(pip); 1063 break; 1064 } else { 1065 /* 1066 * tryenter failed. Try to grab again 1067 * after a small delay 1068 */ 1069 MDI_PI_HOLD(pip); 1070 MDI_PI_UNLOCK(pip); 1071 delay_random(2); 1072 MDI_PI_LOCK(pip); 1073 MDI_PI_RELE(pip); 1074 } 1075 } 1076 } else { 1077 MDI_PHCI_LOCK(ph); 1078 } 1079 } 1080 1081 /* 1082 * i_mdi_phci_unlock(): 1083 * Unlock the pHCI component 1084 */ 1085 static void 1086 i_mdi_phci_unlock(mdi_phci_t *ph) 1087 { 1088 MDI_PHCI_UNLOCK(ph); 1089 } 1090 1091 /* 1092 * i_mdi_devinfo_create(): 1093 * create client device's devinfo node 1094 * Return Values: 1095 * dev_info 1096 * NULL 1097 * Notes: 1098 */ 1099 static dev_info_t * 1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1101 char **compatible, int ncompatible) 1102 { 1103 dev_info_t *cdip = NULL; 1104 1105 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1106 1107 /* Verify for duplicate entry */ 1108 cdip = i_mdi_devinfo_find(vh, name, guid); 1109 ASSERT(cdip == NULL); 1110 if (cdip) { 1111 cmn_err(CE_WARN, 1112 "i_mdi_devinfo_create: client %s@%s already exists", 1113 name ? name : "", guid ? guid : ""); 1114 } 1115 1116 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1117 if (cdip == NULL) 1118 goto fail; 1119 1120 /* 1121 * Create component type and Global unique identifier 1122 * properties 1123 */ 1124 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1125 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1126 goto fail; 1127 } 1128 1129 /* Decorate the node with compatible property */ 1130 if (compatible && 1131 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1132 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1133 goto fail; 1134 } 1135 1136 return (cdip); 1137 1138 fail: 1139 if (cdip) { 1140 (void) ndi_prop_remove_all(cdip); 1141 (void) ndi_devi_free(cdip); 1142 } 1143 return (NULL); 1144 } 1145 1146 /* 1147 * i_mdi_devinfo_find(): 1148 * Find a matching devinfo node for given client node name 1149 * and its guid. 1150 * Return Values: 1151 * Handle to a dev_info node or NULL 1152 */ 1153 static dev_info_t * 1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1155 { 1156 char *data; 1157 dev_info_t *cdip = NULL; 1158 dev_info_t *ndip = NULL; 1159 int circular; 1160 1161 ndi_devi_enter(vh->vh_dip, &circular); 1162 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1163 while ((cdip = ndip) != NULL) { 1164 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1165 1166 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1167 continue; 1168 } 1169 1170 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1171 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1172 &data) != DDI_PROP_SUCCESS) { 1173 continue; 1174 } 1175 1176 if (strcmp(data, guid) != 0) { 1177 ddi_prop_free(data); 1178 continue; 1179 } 1180 ddi_prop_free(data); 1181 break; 1182 } 1183 ndi_devi_exit(vh->vh_dip, circular); 1184 return (cdip); 1185 } 1186 1187 /* 1188 * i_mdi_devinfo_remove(): 1189 * Remove a client device node 1190 */ 1191 static int 1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1193 { 1194 int rv = MDI_SUCCESS; 1195 1196 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1197 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1198 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1199 if (rv != NDI_SUCCESS) { 1200 MDI_DEBUG(1, (MDI_NOTE, cdip, 1201 "!failed: cdip %p", (void *)cdip)); 1202 } 1203 /* 1204 * Convert to MDI error code 1205 */ 1206 switch (rv) { 1207 case NDI_SUCCESS: 1208 rv = MDI_SUCCESS; 1209 break; 1210 case NDI_BUSY: 1211 rv = MDI_BUSY; 1212 break; 1213 default: 1214 rv = MDI_FAILURE; 1215 break; 1216 } 1217 } 1218 return (rv); 1219 } 1220 1221 /* 1222 * i_devi_get_client() 1223 * Utility function to get mpxio component extensions 1224 */ 1225 static mdi_client_t * 1226 i_devi_get_client(dev_info_t *cdip) 1227 { 1228 mdi_client_t *ct = NULL; 1229 1230 if (MDI_CLIENT(cdip)) { 1231 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1232 } 1233 return (ct); 1234 } 1235 1236 /* 1237 * i_mdi_is_child_present(): 1238 * Search for the presence of client device dev_info node 1239 */ 1240 static int 1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1242 { 1243 int rv = MDI_FAILURE; 1244 struct dev_info *dip; 1245 int circular; 1246 1247 ndi_devi_enter(vdip, &circular); 1248 dip = DEVI(vdip)->devi_child; 1249 while (dip) { 1250 if (dip == DEVI(cdip)) { 1251 rv = MDI_SUCCESS; 1252 break; 1253 } 1254 dip = dip->devi_sibling; 1255 } 1256 ndi_devi_exit(vdip, circular); 1257 return (rv); 1258 } 1259 1260 1261 /* 1262 * i_mdi_client_lock(): 1263 * Grab client component lock 1264 * Return Values: 1265 * None 1266 * Note: 1267 * The default locking order is: 1268 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1269 * But there are number of situations where locks need to be 1270 * grabbed in reverse order. This routine implements try and lock 1271 * mechanism depending on the requested parameter option. 1272 */ 1273 static void 1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1275 { 1276 if (pip) { 1277 /* 1278 * Reverse locking is requested. 1279 */ 1280 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1281 if (servicing_interrupt()) { 1282 MDI_PI_HOLD(pip); 1283 MDI_PI_UNLOCK(pip); 1284 MDI_CLIENT_LOCK(ct); 1285 MDI_PI_LOCK(pip); 1286 MDI_PI_RELE(pip); 1287 break; 1288 } else { 1289 /* 1290 * tryenter failed. Try to grab again 1291 * after a small delay 1292 */ 1293 MDI_PI_HOLD(pip); 1294 MDI_PI_UNLOCK(pip); 1295 delay_random(2); 1296 MDI_PI_LOCK(pip); 1297 MDI_PI_RELE(pip); 1298 } 1299 } 1300 } else { 1301 MDI_CLIENT_LOCK(ct); 1302 } 1303 } 1304 1305 /* 1306 * i_mdi_client_unlock(): 1307 * Unlock a client component 1308 */ 1309 static void 1310 i_mdi_client_unlock(mdi_client_t *ct) 1311 { 1312 MDI_CLIENT_UNLOCK(ct); 1313 } 1314 1315 /* 1316 * i_mdi_client_alloc(): 1317 * Allocate and initialize a client structure. Caller should 1318 * hold the vhci client lock. 1319 * Return Values: 1320 * Handle to a client component 1321 */ 1322 /*ARGSUSED*/ 1323 static mdi_client_t * 1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1325 { 1326 mdi_client_t *ct; 1327 1328 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1329 1330 /* 1331 * Allocate and initialize a component structure. 1332 */ 1333 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1334 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1335 ct->ct_hnext = NULL; 1336 ct->ct_hprev = NULL; 1337 ct->ct_dip = NULL; 1338 ct->ct_vhci = vh; 1339 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1340 (void) strcpy(ct->ct_drvname, name); 1341 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1342 (void) strcpy(ct->ct_guid, lguid); 1343 ct->ct_cprivate = NULL; 1344 ct->ct_vprivate = NULL; 1345 ct->ct_flags = 0; 1346 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1347 MDI_CLIENT_LOCK(ct); 1348 MDI_CLIENT_SET_OFFLINE(ct); 1349 MDI_CLIENT_SET_DETACH(ct); 1350 MDI_CLIENT_SET_POWER_UP(ct); 1351 MDI_CLIENT_UNLOCK(ct); 1352 ct->ct_failover_flags = 0; 1353 ct->ct_failover_status = 0; 1354 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1355 ct->ct_unstable = 0; 1356 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1357 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1358 ct->ct_lb = vh->vh_lb; 1359 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1360 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1361 ct->ct_path_count = 0; 1362 ct->ct_path_head = NULL; 1363 ct->ct_path_tail = NULL; 1364 ct->ct_path_last = NULL; 1365 1366 /* 1367 * Add this client component to our client hash queue 1368 */ 1369 i_mdi_client_enlist_table(vh, ct); 1370 return (ct); 1371 } 1372 1373 /* 1374 * i_mdi_client_enlist_table(): 1375 * Attach the client device to the client hash table. Caller 1376 * should hold the vhci client lock. 1377 */ 1378 static void 1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1380 { 1381 int index; 1382 struct client_hash *head; 1383 1384 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1385 1386 index = i_mdi_get_hash_key(ct->ct_guid); 1387 head = &vh->vh_client_table[index]; 1388 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1389 head->ct_hash_head = ct; 1390 head->ct_hash_count++; 1391 vh->vh_client_count++; 1392 } 1393 1394 /* 1395 * i_mdi_client_delist_table(): 1396 * Attach the client device to the client hash table. 1397 * Caller should hold the vhci client lock. 1398 */ 1399 static void 1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1401 { 1402 int index; 1403 char *guid; 1404 struct client_hash *head; 1405 mdi_client_t *next; 1406 mdi_client_t *last; 1407 1408 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1409 1410 guid = ct->ct_guid; 1411 index = i_mdi_get_hash_key(guid); 1412 head = &vh->vh_client_table[index]; 1413 1414 last = NULL; 1415 next = (mdi_client_t *)head->ct_hash_head; 1416 while (next != NULL) { 1417 if (next == ct) { 1418 break; 1419 } 1420 last = next; 1421 next = next->ct_hnext; 1422 } 1423 1424 if (next) { 1425 head->ct_hash_count--; 1426 if (last == NULL) { 1427 head->ct_hash_head = ct->ct_hnext; 1428 } else { 1429 last->ct_hnext = ct->ct_hnext; 1430 } 1431 ct->ct_hnext = NULL; 1432 vh->vh_client_count--; 1433 } 1434 } 1435 1436 1437 /* 1438 * i_mdi_client_free(): 1439 * Free a client component 1440 */ 1441 static int 1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1443 { 1444 int rv = MDI_SUCCESS; 1445 int flags = ct->ct_flags; 1446 dev_info_t *cdip; 1447 dev_info_t *vdip; 1448 1449 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1450 1451 vdip = vh->vh_dip; 1452 cdip = ct->ct_dip; 1453 1454 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1455 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1456 DEVI(cdip)->devi_mdi_client = NULL; 1457 1458 /* 1459 * Clear out back ref. to dev_info_t node 1460 */ 1461 ct->ct_dip = NULL; 1462 1463 /* 1464 * Remove this client from our hash queue 1465 */ 1466 i_mdi_client_delist_table(vh, ct); 1467 1468 /* 1469 * Uninitialize and free the component 1470 */ 1471 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1472 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1473 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1474 cv_destroy(&ct->ct_failover_cv); 1475 cv_destroy(&ct->ct_unstable_cv); 1476 cv_destroy(&ct->ct_powerchange_cv); 1477 mutex_destroy(&ct->ct_mutex); 1478 kmem_free(ct, sizeof (*ct)); 1479 1480 if (cdip != NULL) { 1481 MDI_VHCI_CLIENT_UNLOCK(vh); 1482 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1483 MDI_VHCI_CLIENT_LOCK(vh); 1484 } 1485 return (rv); 1486 } 1487 1488 /* 1489 * i_mdi_client_find(): 1490 * Find the client structure corresponding to a given guid 1491 * Caller should hold the vhci client lock. 1492 */ 1493 static mdi_client_t * 1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1495 { 1496 int index; 1497 struct client_hash *head; 1498 mdi_client_t *ct; 1499 1500 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1501 1502 index = i_mdi_get_hash_key(guid); 1503 head = &vh->vh_client_table[index]; 1504 1505 ct = head->ct_hash_head; 1506 while (ct != NULL) { 1507 if (strcmp(ct->ct_guid, guid) == 0 && 1508 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1509 break; 1510 } 1511 ct = ct->ct_hnext; 1512 } 1513 return (ct); 1514 } 1515 1516 /* 1517 * i_mdi_client_update_state(): 1518 * Compute and update client device state 1519 * Notes: 1520 * A client device can be in any of three possible states: 1521 * 1522 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1523 * one online/standby paths. Can tolerate failures. 1524 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1525 * no alternate paths available as standby. A failure on the online 1526 * would result in loss of access to device data. 1527 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1528 * no paths available to access the device. 1529 */ 1530 static void 1531 i_mdi_client_update_state(mdi_client_t *ct) 1532 { 1533 int state; 1534 1535 ASSERT(MDI_CLIENT_LOCKED(ct)); 1536 state = i_mdi_client_compute_state(ct, NULL); 1537 MDI_CLIENT_SET_STATE(ct, state); 1538 } 1539 1540 /* 1541 * i_mdi_client_compute_state(): 1542 * Compute client device state 1543 * 1544 * mdi_phci_t * Pointer to pHCI structure which should 1545 * while computing the new value. Used by 1546 * i_mdi_phci_offline() to find the new 1547 * client state after DR of a pHCI. 1548 */ 1549 static int 1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1551 { 1552 int state; 1553 int online_count = 0; 1554 int standby_count = 0; 1555 mdi_pathinfo_t *pip, *next; 1556 1557 ASSERT(MDI_CLIENT_LOCKED(ct)); 1558 pip = ct->ct_path_head; 1559 while (pip != NULL) { 1560 MDI_PI_LOCK(pip); 1561 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1562 if (MDI_PI(pip)->pi_phci == ph) { 1563 MDI_PI_UNLOCK(pip); 1564 pip = next; 1565 continue; 1566 } 1567 1568 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1569 == MDI_PATHINFO_STATE_ONLINE) 1570 online_count++; 1571 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1572 == MDI_PATHINFO_STATE_STANDBY) 1573 standby_count++; 1574 MDI_PI_UNLOCK(pip); 1575 pip = next; 1576 } 1577 1578 if (online_count == 0) { 1579 if (standby_count == 0) { 1580 state = MDI_CLIENT_STATE_FAILED; 1581 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1582 "client state failed: ct = %p", (void *)ct)); 1583 } else if (standby_count == 1) { 1584 state = MDI_CLIENT_STATE_DEGRADED; 1585 } else { 1586 state = MDI_CLIENT_STATE_OPTIMAL; 1587 } 1588 } else if (online_count == 1) { 1589 if (standby_count == 0) { 1590 state = MDI_CLIENT_STATE_DEGRADED; 1591 } else { 1592 state = MDI_CLIENT_STATE_OPTIMAL; 1593 } 1594 } else { 1595 state = MDI_CLIENT_STATE_OPTIMAL; 1596 } 1597 return (state); 1598 } 1599 1600 /* 1601 * i_mdi_client2devinfo(): 1602 * Utility function 1603 */ 1604 dev_info_t * 1605 i_mdi_client2devinfo(mdi_client_t *ct) 1606 { 1607 return (ct->ct_dip); 1608 } 1609 1610 /* 1611 * mdi_client_path2_devinfo(): 1612 * Given the parent devinfo and child devfs pathname, search for 1613 * a valid devfs node handle. 1614 */ 1615 dev_info_t * 1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1617 { 1618 dev_info_t *cdip = NULL; 1619 dev_info_t *ndip = NULL; 1620 char *temp_pathname; 1621 int circular; 1622 1623 /* 1624 * Allocate temp buffer 1625 */ 1626 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1627 1628 /* 1629 * Lock parent against changes 1630 */ 1631 ndi_devi_enter(vdip, &circular); 1632 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1633 while ((cdip = ndip) != NULL) { 1634 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1635 1636 *temp_pathname = '\0'; 1637 (void) ddi_pathname(cdip, temp_pathname); 1638 if (strcmp(temp_pathname, pathname) == 0) { 1639 break; 1640 } 1641 } 1642 /* 1643 * Release devinfo lock 1644 */ 1645 ndi_devi_exit(vdip, circular); 1646 1647 /* 1648 * Free the temp buffer 1649 */ 1650 kmem_free(temp_pathname, MAXPATHLEN); 1651 return (cdip); 1652 } 1653 1654 /* 1655 * mdi_client_get_path_count(): 1656 * Utility function to get number of path information nodes 1657 * associated with a given client device. 1658 */ 1659 int 1660 mdi_client_get_path_count(dev_info_t *cdip) 1661 { 1662 mdi_client_t *ct; 1663 int count = 0; 1664 1665 ct = i_devi_get_client(cdip); 1666 if (ct != NULL) { 1667 count = ct->ct_path_count; 1668 } 1669 return (count); 1670 } 1671 1672 1673 /* 1674 * i_mdi_get_hash_key(): 1675 * Create a hash using strings as keys 1676 * 1677 */ 1678 static int 1679 i_mdi_get_hash_key(char *str) 1680 { 1681 uint32_t g, hash = 0; 1682 char *p; 1683 1684 for (p = str; *p != '\0'; p++) { 1685 g = *p; 1686 hash += g; 1687 } 1688 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1689 } 1690 1691 /* 1692 * mdi_get_lb_policy(): 1693 * Get current load balancing policy for a given client device 1694 */ 1695 client_lb_t 1696 mdi_get_lb_policy(dev_info_t *cdip) 1697 { 1698 client_lb_t lb = LOAD_BALANCE_NONE; 1699 mdi_client_t *ct; 1700 1701 ct = i_devi_get_client(cdip); 1702 if (ct != NULL) { 1703 lb = ct->ct_lb; 1704 } 1705 return (lb); 1706 } 1707 1708 /* 1709 * mdi_set_lb_region_size(): 1710 * Set current region size for the load-balance 1711 */ 1712 int 1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1714 { 1715 mdi_client_t *ct; 1716 int rv = MDI_FAILURE; 1717 1718 ct = i_devi_get_client(cdip); 1719 if (ct != NULL && ct->ct_lb_args != NULL) { 1720 ct->ct_lb_args->region_size = region_size; 1721 rv = MDI_SUCCESS; 1722 } 1723 return (rv); 1724 } 1725 1726 /* 1727 * mdi_Set_lb_policy(): 1728 * Set current load balancing policy for a given client device 1729 */ 1730 int 1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1732 { 1733 mdi_client_t *ct; 1734 int rv = MDI_FAILURE; 1735 1736 ct = i_devi_get_client(cdip); 1737 if (ct != NULL) { 1738 ct->ct_lb = lb; 1739 rv = MDI_SUCCESS; 1740 } 1741 return (rv); 1742 } 1743 1744 /* 1745 * mdi_failover(): 1746 * failover function called by the vHCI drivers to initiate 1747 * a failover operation. This is typically due to non-availability 1748 * of online paths to route I/O requests. Failover can be 1749 * triggered through user application also. 1750 * 1751 * The vHCI driver calls mdi_failover() to initiate a failover 1752 * operation. mdi_failover() calls back into the vHCI driver's 1753 * vo_failover() entry point to perform the actual failover 1754 * operation. The reason for requiring the vHCI driver to 1755 * initiate failover by calling mdi_failover(), instead of directly 1756 * executing vo_failover() itself, is to ensure that the mdi 1757 * framework can keep track of the client state properly. 1758 * Additionally, mdi_failover() provides as a convenience the 1759 * option of performing the failover operation synchronously or 1760 * asynchronously 1761 * 1762 * Upon successful completion of the failover operation, the 1763 * paths that were previously ONLINE will be in the STANDBY state, 1764 * and the newly activated paths will be in the ONLINE state. 1765 * 1766 * The flags modifier determines whether the activation is done 1767 * synchronously: MDI_FAILOVER_SYNC 1768 * Return Values: 1769 * MDI_SUCCESS 1770 * MDI_FAILURE 1771 * MDI_BUSY 1772 */ 1773 /*ARGSUSED*/ 1774 int 1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1776 { 1777 int rv; 1778 mdi_client_t *ct; 1779 1780 ct = i_devi_get_client(cdip); 1781 ASSERT(ct != NULL); 1782 if (ct == NULL) { 1783 /* cdip is not a valid client device. Nothing more to do. */ 1784 return (MDI_FAILURE); 1785 } 1786 1787 MDI_CLIENT_LOCK(ct); 1788 1789 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1790 /* A path to the client is being freed */ 1791 MDI_CLIENT_UNLOCK(ct); 1792 return (MDI_BUSY); 1793 } 1794 1795 1796 if (MDI_CLIENT_IS_FAILED(ct)) { 1797 /* 1798 * Client is in failed state. Nothing more to do. 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_FAILURE); 1802 } 1803 1804 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1805 /* 1806 * Failover is already in progress; return BUSY 1807 */ 1808 MDI_CLIENT_UNLOCK(ct); 1809 return (MDI_BUSY); 1810 } 1811 /* 1812 * Make sure that mdi_pathinfo node state changes are processed. 1813 * We do not allow failovers to progress while client path state 1814 * changes are in progress 1815 */ 1816 if (ct->ct_unstable) { 1817 if (flags == MDI_FAILOVER_ASYNC) { 1818 MDI_CLIENT_UNLOCK(ct); 1819 return (MDI_BUSY); 1820 } else { 1821 while (ct->ct_unstable) 1822 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1823 } 1824 } 1825 1826 /* 1827 * Client device is in stable state. Before proceeding, perform sanity 1828 * checks again. 1829 */ 1830 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1831 (!i_ddi_devi_attached(ct->ct_dip))) { 1832 /* 1833 * Client is in failed state. Nothing more to do. 1834 */ 1835 MDI_CLIENT_UNLOCK(ct); 1836 return (MDI_FAILURE); 1837 } 1838 1839 /* 1840 * Set the client state as failover in progress. 1841 */ 1842 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1843 ct->ct_failover_flags = flags; 1844 MDI_CLIENT_UNLOCK(ct); 1845 1846 if (flags == MDI_FAILOVER_ASYNC) { 1847 /* 1848 * Submit the initiate failover request via CPR safe 1849 * taskq threads. 1850 */ 1851 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1852 ct, KM_SLEEP); 1853 return (MDI_ACCEPT); 1854 } else { 1855 /* 1856 * Synchronous failover mode. Typically invoked from the user 1857 * land. 1858 */ 1859 rv = i_mdi_failover(ct); 1860 } 1861 return (rv); 1862 } 1863 1864 /* 1865 * i_mdi_failover(): 1866 * internal failover function. Invokes vHCI drivers failover 1867 * callback function and process the failover status 1868 * Return Values: 1869 * None 1870 * 1871 * Note: A client device in failover state can not be detached or freed. 1872 */ 1873 static int 1874 i_mdi_failover(void *arg) 1875 { 1876 int rv = MDI_SUCCESS; 1877 mdi_client_t *ct = (mdi_client_t *)arg; 1878 mdi_vhci_t *vh = ct->ct_vhci; 1879 1880 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1881 1882 if (vh->vh_ops->vo_failover != NULL) { 1883 /* 1884 * Call vHCI drivers callback routine 1885 */ 1886 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1887 ct->ct_failover_flags); 1888 } 1889 1890 MDI_CLIENT_LOCK(ct); 1891 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1892 1893 /* 1894 * Save the failover return status 1895 */ 1896 ct->ct_failover_status = rv; 1897 1898 /* 1899 * As a result of failover, client status would have been changed. 1900 * Update the client state and wake up anyone waiting on this client 1901 * device. 1902 */ 1903 i_mdi_client_update_state(ct); 1904 1905 cv_broadcast(&ct->ct_failover_cv); 1906 MDI_CLIENT_UNLOCK(ct); 1907 return (rv); 1908 } 1909 1910 /* 1911 * Load balancing is logical block. 1912 * IOs within the range described by region_size 1913 * would go on the same path. This would improve the 1914 * performance by cache-hit on some of the RAID devices. 1915 * Search only for online paths(At some point we 1916 * may want to balance across target ports). 1917 * If no paths are found then default to round-robin. 1918 */ 1919 static int 1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1921 { 1922 int path_index = -1; 1923 int online_path_count = 0; 1924 int online_nonpref_path_count = 0; 1925 int region_size = ct->ct_lb_args->region_size; 1926 mdi_pathinfo_t *pip; 1927 mdi_pathinfo_t *next; 1928 int preferred, path_cnt; 1929 1930 pip = ct->ct_path_head; 1931 while (pip) { 1932 MDI_PI_LOCK(pip); 1933 if (MDI_PI(pip)->pi_state == 1934 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1935 online_path_count++; 1936 } else if (MDI_PI(pip)->pi_state == 1937 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1938 online_nonpref_path_count++; 1939 } 1940 next = (mdi_pathinfo_t *) 1941 MDI_PI(pip)->pi_client_link; 1942 MDI_PI_UNLOCK(pip); 1943 pip = next; 1944 } 1945 /* if found any online/preferred then use this type */ 1946 if (online_path_count > 0) { 1947 path_cnt = online_path_count; 1948 preferred = 1; 1949 } else if (online_nonpref_path_count > 0) { 1950 path_cnt = online_nonpref_path_count; 1951 preferred = 0; 1952 } else { 1953 path_cnt = 0; 1954 } 1955 if (path_cnt) { 1956 path_index = (bp->b_blkno >> region_size) % path_cnt; 1957 pip = ct->ct_path_head; 1958 while (pip && path_index != -1) { 1959 MDI_PI_LOCK(pip); 1960 if (path_index == 0 && 1961 (MDI_PI(pip)->pi_state == 1962 MDI_PATHINFO_STATE_ONLINE) && 1963 MDI_PI(pip)->pi_preferred == preferred) { 1964 MDI_PI_HOLD(pip); 1965 MDI_PI_UNLOCK(pip); 1966 *ret_pip = pip; 1967 return (MDI_SUCCESS); 1968 } 1969 path_index --; 1970 next = (mdi_pathinfo_t *) 1971 MDI_PI(pip)->pi_client_link; 1972 MDI_PI_UNLOCK(pip); 1973 pip = next; 1974 } 1975 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1976 "lba %llx: path %s %p", 1977 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1978 } 1979 return (MDI_FAILURE); 1980 } 1981 1982 /* 1983 * mdi_select_path(): 1984 * select a path to access a client device. 1985 * 1986 * mdi_select_path() function is called by the vHCI drivers to 1987 * select a path to route the I/O request to. The caller passes 1988 * the block I/O data transfer structure ("buf") as one of the 1989 * parameters. The mpxio framework uses the buf structure 1990 * contents to maintain per path statistics (total I/O size / 1991 * count pending). If more than one online paths are available to 1992 * select, the framework automatically selects a suitable path 1993 * for routing I/O request. If a failover operation is active for 1994 * this client device the call shall be failed with MDI_BUSY error 1995 * code. 1996 * 1997 * By default this function returns a suitable path in online 1998 * state based on the current load balancing policy. Currently 1999 * we support LOAD_BALANCE_NONE (Previously selected online path 2000 * will continue to be used till the path is usable) and 2001 * LOAD_BALANCE_RR (Online paths will be selected in a round 2002 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2003 * based on the logical block). The load balancing 2004 * through vHCI drivers configuration file (driver.conf). 2005 * 2006 * vHCI drivers may override this default behavior by specifying 2007 * appropriate flags. The meaning of the thrid argument depends 2008 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2009 * then the argument is the "path instance" of the path to select. 2010 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2011 * "start_pip". A non NULL "start_pip" is the starting point to 2012 * walk and find the next appropriate path. The following values 2013 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2014 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2015 * STANDBY path). 2016 * 2017 * The non-standard behavior is used by the scsi_vhci driver, 2018 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2019 * attach of client devices (to avoid an unnecessary failover 2020 * when the STANDBY path comes up first), during failover 2021 * (to activate a STANDBY path as ONLINE). 2022 * 2023 * The selected path is returned in a a mdi_hold_path() state 2024 * (pi_ref_cnt). Caller should release the hold by calling 2025 * mdi_rele_path(). 2026 * 2027 * Return Values: 2028 * MDI_SUCCESS - Completed successfully 2029 * MDI_BUSY - Client device is busy failing over 2030 * MDI_NOPATH - Client device is online, but no valid path are 2031 * available to access this client device 2032 * MDI_FAILURE - Invalid client device or state 2033 * MDI_DEVI_ONLINING 2034 * - Client device (struct dev_info state) is in 2035 * onlining state. 2036 */ 2037 2038 /*ARGSUSED*/ 2039 int 2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2041 void *arg, mdi_pathinfo_t **ret_pip) 2042 { 2043 mdi_client_t *ct; 2044 mdi_pathinfo_t *pip; 2045 mdi_pathinfo_t *next; 2046 mdi_pathinfo_t *head; 2047 mdi_pathinfo_t *start; 2048 client_lb_t lbp; /* load balancing policy */ 2049 int sb = 1; /* standard behavior */ 2050 int preferred = 1; /* preferred path */ 2051 int cond, cont = 1; 2052 int retry = 0; 2053 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2054 int path_instance; /* request specific path instance */ 2055 2056 /* determine type of arg based on flags */ 2057 if (flags & MDI_SELECT_PATH_INSTANCE) { 2058 path_instance = (int)(intptr_t)arg; 2059 start_pip = NULL; 2060 } else { 2061 path_instance = 0; 2062 start_pip = (mdi_pathinfo_t *)arg; 2063 } 2064 2065 if (flags != 0) { 2066 /* 2067 * disable default behavior 2068 */ 2069 sb = 0; 2070 } 2071 2072 *ret_pip = NULL; 2073 ct = i_devi_get_client(cdip); 2074 if (ct == NULL) { 2075 /* mdi extensions are NULL, Nothing more to do */ 2076 return (MDI_FAILURE); 2077 } 2078 2079 MDI_CLIENT_LOCK(ct); 2080 2081 if (sb) { 2082 if (MDI_CLIENT_IS_FAILED(ct)) { 2083 /* 2084 * Client is not ready to accept any I/O requests. 2085 * Fail this request. 2086 */ 2087 MDI_DEBUG(2, (MDI_NOTE, cdip, 2088 "client state offline ct = %p", (void *)ct)); 2089 MDI_CLIENT_UNLOCK(ct); 2090 return (MDI_FAILURE); 2091 } 2092 2093 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2094 /* 2095 * Check for Failover is in progress. If so tell the 2096 * caller that this device is busy. 2097 */ 2098 MDI_DEBUG(2, (MDI_NOTE, cdip, 2099 "client failover in progress ct = %p", 2100 (void *)ct)); 2101 MDI_CLIENT_UNLOCK(ct); 2102 return (MDI_BUSY); 2103 } 2104 2105 /* 2106 * Check to see whether the client device is attached. 2107 * If not so, let the vHCI driver manually select a path 2108 * (standby) and let the probe/attach process to continue. 2109 */ 2110 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2111 MDI_DEBUG(4, (MDI_NOTE, cdip, 2112 "devi is onlining ct = %p", (void *)ct)); 2113 MDI_CLIENT_UNLOCK(ct); 2114 return (MDI_DEVI_ONLINING); 2115 } 2116 } 2117 2118 /* 2119 * Cache in the client list head. If head of the list is NULL 2120 * return MDI_NOPATH 2121 */ 2122 head = ct->ct_path_head; 2123 if (head == NULL) { 2124 MDI_CLIENT_UNLOCK(ct); 2125 return (MDI_NOPATH); 2126 } 2127 2128 /* Caller is specifying a specific pathinfo path by path_instance */ 2129 if (path_instance) { 2130 /* search for pathinfo with correct path_instance */ 2131 for (pip = head; 2132 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2133 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2134 ; 2135 2136 /* If path can't be selected then MDI_FAILURE is returned. */ 2137 if (pip == NULL) { 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_FAILURE); 2140 } 2141 2142 /* 2143 * Verify state of path. When asked to select a specific 2144 * path_instance, we select the requested path in any 2145 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2146 * We don't however select paths where the pHCI has detached. 2147 * NOTE: last pathinfo node of an opened client device may 2148 * exist in an OFFLINE state after the pHCI associated with 2149 * that path has detached (but pi_phci will be NULL if that 2150 * has occurred). 2151 */ 2152 MDI_PI_LOCK(pip); 2153 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2154 (MDI_PI(pip)->pi_phci == NULL)) { 2155 MDI_PI_UNLOCK(pip); 2156 MDI_CLIENT_UNLOCK(ct); 2157 return (MDI_FAILURE); 2158 } 2159 2160 /* 2161 * Return the path in hold state. Caller should release the 2162 * lock by calling mdi_rele_path() 2163 */ 2164 MDI_PI_HOLD(pip); 2165 MDI_PI_UNLOCK(pip); 2166 *ret_pip = pip; 2167 MDI_CLIENT_UNLOCK(ct); 2168 return (MDI_SUCCESS); 2169 } 2170 2171 /* 2172 * for non default behavior, bypass current 2173 * load balancing policy and always use LOAD_BALANCE_RR 2174 * except that the start point will be adjusted based 2175 * on the provided start_pip 2176 */ 2177 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2178 2179 switch (lbp) { 2180 case LOAD_BALANCE_NONE: 2181 /* 2182 * Load balancing is None or Alternate path mode 2183 * Start looking for a online mdi_pathinfo node starting from 2184 * last known selected path 2185 */ 2186 preferred = 1; 2187 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2188 if (pip == NULL) { 2189 pip = head; 2190 } 2191 start = pip; 2192 do { 2193 MDI_PI_LOCK(pip); 2194 /* 2195 * No need to explicitly check if the path is disabled. 2196 * Since we are checking for state == ONLINE and the 2197 * same variable is used for DISABLE/ENABLE information. 2198 */ 2199 if ((MDI_PI(pip)->pi_state == 2200 MDI_PATHINFO_STATE_ONLINE) && 2201 preferred == MDI_PI(pip)->pi_preferred) { 2202 /* 2203 * Return the path in hold state. Caller should 2204 * release the lock by calling mdi_rele_path() 2205 */ 2206 MDI_PI_HOLD(pip); 2207 MDI_PI_UNLOCK(pip); 2208 ct->ct_path_last = pip; 2209 *ret_pip = pip; 2210 MDI_CLIENT_UNLOCK(ct); 2211 return (MDI_SUCCESS); 2212 } 2213 2214 /* 2215 * Path is busy. 2216 */ 2217 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2218 MDI_PI_IS_TRANSIENT(pip)) 2219 retry = 1; 2220 /* 2221 * Keep looking for a next available online path 2222 */ 2223 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2224 if (next == NULL) { 2225 next = head; 2226 } 2227 MDI_PI_UNLOCK(pip); 2228 pip = next; 2229 if (start == pip && preferred) { 2230 preferred = 0; 2231 } else if (start == pip && !preferred) { 2232 cont = 0; 2233 } 2234 } while (cont); 2235 break; 2236 2237 case LOAD_BALANCE_LBA: 2238 /* 2239 * Make sure we are looking 2240 * for an online path. Otherwise, if it is for a STANDBY 2241 * path request, it will go through and fetch an ONLINE 2242 * path which is not desirable. 2243 */ 2244 if ((ct->ct_lb_args != NULL) && 2245 (ct->ct_lb_args->region_size) && bp && 2246 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2247 if (i_mdi_lba_lb(ct, ret_pip, bp) 2248 == MDI_SUCCESS) { 2249 MDI_CLIENT_UNLOCK(ct); 2250 return (MDI_SUCCESS); 2251 } 2252 } 2253 /* FALLTHROUGH */ 2254 case LOAD_BALANCE_RR: 2255 /* 2256 * Load balancing is Round Robin. Start looking for a online 2257 * mdi_pathinfo node starting from last known selected path 2258 * as the start point. If override flags are specified, 2259 * process accordingly. 2260 * If the search is already in effect(start_pip not null), 2261 * then lets just use the same path preference to continue the 2262 * traversal. 2263 */ 2264 2265 if (start_pip != NULL) { 2266 preferred = MDI_PI(start_pip)->pi_preferred; 2267 } else { 2268 preferred = 1; 2269 } 2270 2271 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2272 if (start == NULL) { 2273 pip = head; 2274 } else { 2275 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2276 if (pip == NULL) { 2277 if ( flags & MDI_SELECT_NO_PREFERRED) { 2278 /* 2279 * Return since we hit the end of list 2280 */ 2281 MDI_CLIENT_UNLOCK(ct); 2282 return (MDI_NOPATH); 2283 } 2284 2285 if (!sb) { 2286 if (preferred == 0) { 2287 /* 2288 * Looks like we have completed 2289 * the traversal as preferred 2290 * value is 0. Time to bail out. 2291 */ 2292 *ret_pip = NULL; 2293 MDI_CLIENT_UNLOCK(ct); 2294 return (MDI_NOPATH); 2295 } else { 2296 /* 2297 * Looks like we reached the 2298 * end of the list. Lets enable 2299 * traversal of non preferred 2300 * paths. 2301 */ 2302 preferred = 0; 2303 } 2304 } 2305 pip = head; 2306 } 2307 } 2308 start = pip; 2309 do { 2310 MDI_PI_LOCK(pip); 2311 if (sb) { 2312 cond = ((MDI_PI(pip)->pi_state == 2313 MDI_PATHINFO_STATE_ONLINE && 2314 MDI_PI(pip)->pi_preferred == 2315 preferred) ? 1 : 0); 2316 } else { 2317 if (flags == MDI_SELECT_ONLINE_PATH) { 2318 cond = ((MDI_PI(pip)->pi_state == 2319 MDI_PATHINFO_STATE_ONLINE && 2320 MDI_PI(pip)->pi_preferred == 2321 preferred) ? 1 : 0); 2322 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2323 cond = ((MDI_PI(pip)->pi_state == 2324 MDI_PATHINFO_STATE_STANDBY && 2325 MDI_PI(pip)->pi_preferred == 2326 preferred) ? 1 : 0); 2327 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2328 MDI_SELECT_STANDBY_PATH)) { 2329 cond = (((MDI_PI(pip)->pi_state == 2330 MDI_PATHINFO_STATE_ONLINE || 2331 (MDI_PI(pip)->pi_state == 2332 MDI_PATHINFO_STATE_STANDBY)) && 2333 MDI_PI(pip)->pi_preferred == 2334 preferred) ? 1 : 0); 2335 } else if (flags == 2336 (MDI_SELECT_STANDBY_PATH | 2337 MDI_SELECT_ONLINE_PATH | 2338 MDI_SELECT_USER_DISABLE_PATH)) { 2339 cond = (((MDI_PI(pip)->pi_state == 2340 MDI_PATHINFO_STATE_ONLINE || 2341 (MDI_PI(pip)->pi_state == 2342 MDI_PATHINFO_STATE_STANDBY) || 2343 (MDI_PI(pip)->pi_state == 2344 (MDI_PATHINFO_STATE_ONLINE| 2345 MDI_PATHINFO_STATE_USER_DISABLE)) || 2346 (MDI_PI(pip)->pi_state == 2347 (MDI_PATHINFO_STATE_STANDBY | 2348 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2349 MDI_PI(pip)->pi_preferred == 2350 preferred) ? 1 : 0); 2351 } else if (flags == 2352 (MDI_SELECT_STANDBY_PATH | 2353 MDI_SELECT_ONLINE_PATH | 2354 MDI_SELECT_NO_PREFERRED)) { 2355 cond = (((MDI_PI(pip)->pi_state == 2356 MDI_PATHINFO_STATE_ONLINE) || 2357 (MDI_PI(pip)->pi_state == 2358 MDI_PATHINFO_STATE_STANDBY)) 2359 ? 1 : 0); 2360 } else { 2361 cond = 0; 2362 } 2363 } 2364 /* 2365 * No need to explicitly check if the path is disabled. 2366 * Since we are checking for state == ONLINE and the 2367 * same variable is used for DISABLE/ENABLE information. 2368 */ 2369 if (cond) { 2370 /* 2371 * Return the path in hold state. Caller should 2372 * release the lock by calling mdi_rele_path() 2373 */ 2374 MDI_PI_HOLD(pip); 2375 MDI_PI_UNLOCK(pip); 2376 if (sb) 2377 ct->ct_path_last = pip; 2378 *ret_pip = pip; 2379 MDI_CLIENT_UNLOCK(ct); 2380 return (MDI_SUCCESS); 2381 } 2382 /* 2383 * Path is busy. 2384 */ 2385 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2386 MDI_PI_IS_TRANSIENT(pip)) 2387 retry = 1; 2388 2389 /* 2390 * Keep looking for a next available online path 2391 */ 2392 do_again: 2393 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2394 if (next == NULL) { 2395 if ( flags & MDI_SELECT_NO_PREFERRED) { 2396 /* 2397 * Bail out since we hit the end of list 2398 */ 2399 MDI_PI_UNLOCK(pip); 2400 break; 2401 } 2402 2403 if (!sb) { 2404 if (preferred == 1) { 2405 /* 2406 * Looks like we reached the 2407 * end of the list. Lets enable 2408 * traversal of non preferred 2409 * paths. 2410 */ 2411 preferred = 0; 2412 next = head; 2413 } else { 2414 /* 2415 * We have done both the passes 2416 * Preferred as well as for 2417 * Non-preferred. Bail out now. 2418 */ 2419 cont = 0; 2420 } 2421 } else { 2422 /* 2423 * Standard behavior case. 2424 */ 2425 next = head; 2426 } 2427 } 2428 MDI_PI_UNLOCK(pip); 2429 if (cont == 0) { 2430 break; 2431 } 2432 pip = next; 2433 2434 if (!sb) { 2435 /* 2436 * We need to handle the selection of 2437 * non-preferred path in the following 2438 * case: 2439 * 2440 * +------+ +------+ +------+ +-----+ 2441 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2442 * +------+ +------+ +------+ +-----+ 2443 * 2444 * If we start the search with B, we need to 2445 * skip beyond B to pick C which is non - 2446 * preferred in the second pass. The following 2447 * test, if true, will allow us to skip over 2448 * the 'start'(B in the example) to select 2449 * other non preferred elements. 2450 */ 2451 if ((start_pip != NULL) && (start_pip == pip) && 2452 (MDI_PI(start_pip)->pi_preferred 2453 != preferred)) { 2454 /* 2455 * try again after going past the start 2456 * pip 2457 */ 2458 MDI_PI_LOCK(pip); 2459 goto do_again; 2460 } 2461 } else { 2462 /* 2463 * Standard behavior case 2464 */ 2465 if (start == pip && preferred) { 2466 /* look for nonpreferred paths */ 2467 preferred = 0; 2468 } else if (start == pip && !preferred) { 2469 /* 2470 * Exit condition 2471 */ 2472 cont = 0; 2473 } 2474 } 2475 } while (cont); 2476 break; 2477 } 2478 2479 MDI_CLIENT_UNLOCK(ct); 2480 if (retry == 1) { 2481 return (MDI_BUSY); 2482 } else { 2483 return (MDI_NOPATH); 2484 } 2485 } 2486 2487 /* 2488 * For a client, return the next available path to any phci 2489 * 2490 * Note: 2491 * Caller should hold the branch's devinfo node to get a consistent 2492 * snap shot of the mdi_pathinfo nodes. 2493 * 2494 * Please note that even the list is stable the mdi_pathinfo 2495 * node state and properties are volatile. The caller should lock 2496 * and unlock the nodes by calling mdi_pi_lock() and 2497 * mdi_pi_unlock() functions to get a stable properties. 2498 * 2499 * If there is a need to use the nodes beyond the hold of the 2500 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2501 * need to be held against unexpected removal by calling 2502 * mdi_hold_path() and should be released by calling 2503 * mdi_rele_path() on completion. 2504 */ 2505 mdi_pathinfo_t * 2506 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2507 { 2508 mdi_client_t *ct; 2509 2510 if (!MDI_CLIENT(ct_dip)) 2511 return (NULL); 2512 2513 /* 2514 * Walk through client link 2515 */ 2516 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2517 ASSERT(ct != NULL); 2518 2519 if (pip == NULL) 2520 return ((mdi_pathinfo_t *)ct->ct_path_head); 2521 2522 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2523 } 2524 2525 /* 2526 * For a phci, return the next available path to any client 2527 * Note: ditto mdi_get_next_phci_path() 2528 */ 2529 mdi_pathinfo_t * 2530 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2531 { 2532 mdi_phci_t *ph; 2533 2534 if (!MDI_PHCI(ph_dip)) 2535 return (NULL); 2536 2537 /* 2538 * Walk through pHCI link 2539 */ 2540 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2541 ASSERT(ph != NULL); 2542 2543 if (pip == NULL) 2544 return ((mdi_pathinfo_t *)ph->ph_path_head); 2545 2546 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2547 } 2548 2549 /* 2550 * mdi_hold_path(): 2551 * Hold the mdi_pathinfo node against unwanted unexpected free. 2552 * Return Values: 2553 * None 2554 */ 2555 void 2556 mdi_hold_path(mdi_pathinfo_t *pip) 2557 { 2558 if (pip) { 2559 MDI_PI_LOCK(pip); 2560 MDI_PI_HOLD(pip); 2561 MDI_PI_UNLOCK(pip); 2562 } 2563 } 2564 2565 2566 /* 2567 * mdi_rele_path(): 2568 * Release the mdi_pathinfo node which was selected 2569 * through mdi_select_path() mechanism or manually held by 2570 * calling mdi_hold_path(). 2571 * Return Values: 2572 * None 2573 */ 2574 void 2575 mdi_rele_path(mdi_pathinfo_t *pip) 2576 { 2577 if (pip) { 2578 MDI_PI_LOCK(pip); 2579 MDI_PI_RELE(pip); 2580 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2581 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2582 } 2583 MDI_PI_UNLOCK(pip); 2584 } 2585 } 2586 2587 /* 2588 * mdi_pi_lock(): 2589 * Lock the mdi_pathinfo node. 2590 * Note: 2591 * The caller should release the lock by calling mdi_pi_unlock() 2592 */ 2593 void 2594 mdi_pi_lock(mdi_pathinfo_t *pip) 2595 { 2596 ASSERT(pip != NULL); 2597 if (pip) { 2598 MDI_PI_LOCK(pip); 2599 } 2600 } 2601 2602 2603 /* 2604 * mdi_pi_unlock(): 2605 * Unlock the mdi_pathinfo node. 2606 * Note: 2607 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2608 */ 2609 void 2610 mdi_pi_unlock(mdi_pathinfo_t *pip) 2611 { 2612 ASSERT(pip != NULL); 2613 if (pip) { 2614 MDI_PI_UNLOCK(pip); 2615 } 2616 } 2617 2618 /* 2619 * mdi_pi_find(): 2620 * Search the list of mdi_pathinfo nodes attached to the 2621 * pHCI/Client device node whose path address matches "paddr". 2622 * Returns a pointer to the mdi_pathinfo node if a matching node is 2623 * found. 2624 * Return Values: 2625 * mdi_pathinfo node handle 2626 * NULL 2627 * Notes: 2628 * Caller need not hold any locks to call this function. 2629 */ 2630 mdi_pathinfo_t * 2631 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2632 { 2633 mdi_phci_t *ph; 2634 mdi_vhci_t *vh; 2635 mdi_client_t *ct; 2636 mdi_pathinfo_t *pip = NULL; 2637 2638 MDI_DEBUG(2, (MDI_NOTE, pdip, 2639 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2640 if ((pdip == NULL) || (paddr == NULL)) { 2641 return (NULL); 2642 } 2643 ph = i_devi_get_phci(pdip); 2644 if (ph == NULL) { 2645 /* 2646 * Invalid pHCI device, Nothing more to do. 2647 */ 2648 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2649 return (NULL); 2650 } 2651 2652 vh = ph->ph_vhci; 2653 if (vh == NULL) { 2654 /* 2655 * Invalid vHCI device, Nothing more to do. 2656 */ 2657 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2658 return (NULL); 2659 } 2660 2661 /* 2662 * Look for pathinfo node identified by paddr. 2663 */ 2664 if (caddr == NULL) { 2665 /* 2666 * Find a mdi_pathinfo node under pHCI list for a matching 2667 * unit address. 2668 */ 2669 MDI_PHCI_LOCK(ph); 2670 if (MDI_PHCI_IS_OFFLINE(ph)) { 2671 MDI_DEBUG(2, (MDI_WARN, pdip, 2672 "offline phci %p", (void *)ph)); 2673 MDI_PHCI_UNLOCK(ph); 2674 return (NULL); 2675 } 2676 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2677 2678 while (pip != NULL) { 2679 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2680 break; 2681 } 2682 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2683 } 2684 MDI_PHCI_UNLOCK(ph); 2685 MDI_DEBUG(2, (MDI_NOTE, pdip, 2686 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2687 return (pip); 2688 } 2689 2690 /* 2691 * XXX - Is the rest of the code in this function really necessary? 2692 * The consumers of mdi_pi_find() can search for the desired pathinfo 2693 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2694 * whether the search is based on the pathinfo nodes attached to 2695 * the pHCI or the client node, the result will be the same. 2696 */ 2697 2698 /* 2699 * Find the client device corresponding to 'caddr' 2700 */ 2701 MDI_VHCI_CLIENT_LOCK(vh); 2702 2703 /* 2704 * XXX - Passing NULL to the following function works as long as the 2705 * the client addresses (caddr) are unique per vhci basis. 2706 */ 2707 ct = i_mdi_client_find(vh, NULL, caddr); 2708 if (ct == NULL) { 2709 /* 2710 * Client not found, Obviously mdi_pathinfo node has not been 2711 * created yet. 2712 */ 2713 MDI_VHCI_CLIENT_UNLOCK(vh); 2714 MDI_DEBUG(2, (MDI_NOTE, pdip, 2715 "client not found for caddr @%s", caddr ? caddr : "")); 2716 return (NULL); 2717 } 2718 2719 /* 2720 * Hold the client lock and look for a mdi_pathinfo node with matching 2721 * pHCI and paddr 2722 */ 2723 MDI_CLIENT_LOCK(ct); 2724 2725 /* 2726 * Release the global mutex as it is no more needed. Note: We always 2727 * respect the locking order while acquiring. 2728 */ 2729 MDI_VHCI_CLIENT_UNLOCK(vh); 2730 2731 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2732 while (pip != NULL) { 2733 /* 2734 * Compare the unit address 2735 */ 2736 if ((MDI_PI(pip)->pi_phci == ph) && 2737 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2738 break; 2739 } 2740 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2741 } 2742 MDI_CLIENT_UNLOCK(ct); 2743 MDI_DEBUG(2, (MDI_NOTE, pdip, 2744 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2745 return (pip); 2746 } 2747 2748 /* 2749 * mdi_pi_alloc(): 2750 * Allocate and initialize a new instance of a mdi_pathinfo node. 2751 * The mdi_pathinfo node returned by this function identifies a 2752 * unique device path is capable of having properties attached 2753 * and passed to mdi_pi_online() to fully attach and online the 2754 * path and client device node. 2755 * The mdi_pathinfo node returned by this function must be 2756 * destroyed using mdi_pi_free() if the path is no longer 2757 * operational or if the caller fails to attach a client device 2758 * node when calling mdi_pi_online(). The framework will not free 2759 * the resources allocated. 2760 * This function can be called from both interrupt and kernel 2761 * contexts. DDI_NOSLEEP flag should be used while calling 2762 * from interrupt contexts. 2763 * Return Values: 2764 * MDI_SUCCESS 2765 * MDI_FAILURE 2766 * MDI_NOMEM 2767 */ 2768 /*ARGSUSED*/ 2769 int 2770 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2771 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2772 { 2773 mdi_vhci_t *vh; 2774 mdi_phci_t *ph; 2775 mdi_client_t *ct; 2776 mdi_pathinfo_t *pip = NULL; 2777 dev_info_t *cdip; 2778 int rv = MDI_NOMEM; 2779 int path_allocated = 0; 2780 2781 MDI_DEBUG(2, (MDI_NOTE, pdip, 2782 "cname %s: caddr@%s paddr@%s", 2783 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2784 2785 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2786 ret_pip == NULL) { 2787 /* Nothing more to do */ 2788 return (MDI_FAILURE); 2789 } 2790 2791 *ret_pip = NULL; 2792 2793 /* No allocations on detaching pHCI */ 2794 if (DEVI_IS_DETACHING(pdip)) { 2795 /* Invalid pHCI device, return failure */ 2796 MDI_DEBUG(1, (MDI_WARN, pdip, 2797 "!detaching pHCI=%p", (void *)pdip)); 2798 return (MDI_FAILURE); 2799 } 2800 2801 ph = i_devi_get_phci(pdip); 2802 ASSERT(ph != NULL); 2803 if (ph == NULL) { 2804 /* Invalid pHCI device, return failure */ 2805 MDI_DEBUG(1, (MDI_WARN, pdip, 2806 "!invalid pHCI=%p", (void *)pdip)); 2807 return (MDI_FAILURE); 2808 } 2809 2810 MDI_PHCI_LOCK(ph); 2811 vh = ph->ph_vhci; 2812 if (vh == NULL) { 2813 /* Invalid vHCI device, return failure */ 2814 MDI_DEBUG(1, (MDI_WARN, pdip, 2815 "!invalid vHCI=%p", (void *)pdip)); 2816 MDI_PHCI_UNLOCK(ph); 2817 return (MDI_FAILURE); 2818 } 2819 2820 if (MDI_PHCI_IS_READY(ph) == 0) { 2821 /* 2822 * Do not allow new node creation when pHCI is in 2823 * offline/suspended states 2824 */ 2825 MDI_DEBUG(1, (MDI_WARN, pdip, 2826 "pHCI=%p is not ready", (void *)ph)); 2827 MDI_PHCI_UNLOCK(ph); 2828 return (MDI_BUSY); 2829 } 2830 MDI_PHCI_UNSTABLE(ph); 2831 MDI_PHCI_UNLOCK(ph); 2832 2833 /* look for a matching client, create one if not found */ 2834 MDI_VHCI_CLIENT_LOCK(vh); 2835 ct = i_mdi_client_find(vh, cname, caddr); 2836 if (ct == NULL) { 2837 ct = i_mdi_client_alloc(vh, cname, caddr); 2838 ASSERT(ct != NULL); 2839 } 2840 2841 if (ct->ct_dip == NULL) { 2842 /* 2843 * Allocate a devinfo node 2844 */ 2845 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2846 compatible, ncompatible); 2847 if (ct->ct_dip == NULL) { 2848 (void) i_mdi_client_free(vh, ct); 2849 goto fail; 2850 } 2851 } 2852 cdip = ct->ct_dip; 2853 2854 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2855 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2856 2857 MDI_CLIENT_LOCK(ct); 2858 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2859 while (pip != NULL) { 2860 /* 2861 * Compare the unit address 2862 */ 2863 if ((MDI_PI(pip)->pi_phci == ph) && 2864 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2865 break; 2866 } 2867 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2868 } 2869 MDI_CLIENT_UNLOCK(ct); 2870 2871 if (pip == NULL) { 2872 /* 2873 * This is a new path for this client device. Allocate and 2874 * initialize a new pathinfo node 2875 */ 2876 pip = i_mdi_pi_alloc(ph, paddr, ct); 2877 ASSERT(pip != NULL); 2878 path_allocated = 1; 2879 } 2880 rv = MDI_SUCCESS; 2881 2882 fail: 2883 /* 2884 * Release the global mutex. 2885 */ 2886 MDI_VHCI_CLIENT_UNLOCK(vh); 2887 2888 /* 2889 * Mark the pHCI as stable 2890 */ 2891 MDI_PHCI_LOCK(ph); 2892 MDI_PHCI_STABLE(ph); 2893 MDI_PHCI_UNLOCK(ph); 2894 *ret_pip = pip; 2895 2896 MDI_DEBUG(2, (MDI_NOTE, pdip, 2897 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2898 2899 if (path_allocated) 2900 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2901 2902 return (rv); 2903 } 2904 2905 /*ARGSUSED*/ 2906 int 2907 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2908 int flags, mdi_pathinfo_t **ret_pip) 2909 { 2910 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2911 flags, ret_pip)); 2912 } 2913 2914 /* 2915 * i_mdi_pi_alloc(): 2916 * Allocate a mdi_pathinfo node and add to the pHCI path list 2917 * Return Values: 2918 * mdi_pathinfo 2919 */ 2920 /*ARGSUSED*/ 2921 static mdi_pathinfo_t * 2922 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2923 { 2924 mdi_pathinfo_t *pip; 2925 int ct_circular; 2926 int ph_circular; 2927 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2928 char *path_persistent; 2929 int path_instance; 2930 mod_hash_val_t hv; 2931 2932 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2933 2934 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2935 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2936 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2937 MDI_PATHINFO_STATE_TRANSIENT; 2938 2939 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2940 MDI_PI_SET_USER_DISABLE(pip); 2941 2942 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2943 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2944 2945 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2946 MDI_PI_SET_DRV_DISABLE(pip); 2947 2948 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2949 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2950 MDI_PI(pip)->pi_client = ct; 2951 MDI_PI(pip)->pi_phci = ph; 2952 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2953 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2954 2955 /* 2956 * We form the "path" to the pathinfo node, and see if we have 2957 * already allocated a 'path_instance' for that "path". If so, 2958 * we use the already allocated 'path_instance'. If not, we 2959 * allocate a new 'path_instance' and associate it with a copy of 2960 * the "path" string (which is never freed). The association 2961 * between a 'path_instance' this "path" string persists until 2962 * reboot. 2963 */ 2964 mutex_enter(&mdi_pathmap_mutex); 2965 (void) ddi_pathname(ph->ph_dip, path); 2966 (void) sprintf(path + strlen(path), "/%s@%s", 2967 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2968 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2969 path_instance = (uint_t)(intptr_t)hv; 2970 } else { 2971 /* allocate a new 'path_instance' and persistent "path" */ 2972 path_instance = mdi_pathmap_instance++; 2973 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2974 (void) mod_hash_insert(mdi_pathmap_bypath, 2975 (mod_hash_key_t)path_persistent, 2976 (mod_hash_val_t)(intptr_t)path_instance); 2977 (void) mod_hash_insert(mdi_pathmap_byinstance, 2978 (mod_hash_key_t)(intptr_t)path_instance, 2979 (mod_hash_val_t)path_persistent); 2980 2981 /* create shortpath name */ 2982 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2983 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2984 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2985 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2986 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2987 (mod_hash_key_t)(intptr_t)path_instance, 2988 (mod_hash_val_t)path_persistent); 2989 } 2990 mutex_exit(&mdi_pathmap_mutex); 2991 MDI_PI(pip)->pi_path_instance = path_instance; 2992 2993 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2994 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2995 MDI_PI(pip)->pi_pprivate = NULL; 2996 MDI_PI(pip)->pi_cprivate = NULL; 2997 MDI_PI(pip)->pi_vprivate = NULL; 2998 MDI_PI(pip)->pi_client_link = NULL; 2999 MDI_PI(pip)->pi_phci_link = NULL; 3000 MDI_PI(pip)->pi_ref_cnt = 0; 3001 MDI_PI(pip)->pi_kstats = NULL; 3002 MDI_PI(pip)->pi_preferred = 1; 3003 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3004 3005 /* 3006 * Lock both dev_info nodes against changes in parallel. 3007 * 3008 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3009 * This atypical operation is done to synchronize pathinfo nodes 3010 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3011 * the pathinfo nodes are children of the Client. 3012 */ 3013 ndi_devi_enter(ct->ct_dip, &ct_circular); 3014 ndi_devi_enter(ph->ph_dip, &ph_circular); 3015 3016 i_mdi_phci_add_path(ph, pip); 3017 i_mdi_client_add_path(ct, pip); 3018 3019 ndi_devi_exit(ph->ph_dip, ph_circular); 3020 ndi_devi_exit(ct->ct_dip, ct_circular); 3021 3022 return (pip); 3023 } 3024 3025 /* 3026 * mdi_pi_pathname_by_instance(): 3027 * Lookup of "path" by 'path_instance'. Return "path". 3028 * NOTE: returned "path" remains valid forever (until reboot). 3029 */ 3030 char * 3031 mdi_pi_pathname_by_instance(int path_instance) 3032 { 3033 char *path; 3034 mod_hash_val_t hv; 3035 3036 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3037 mutex_enter(&mdi_pathmap_mutex); 3038 if (mod_hash_find(mdi_pathmap_byinstance, 3039 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3040 path = (char *)hv; 3041 else 3042 path = NULL; 3043 mutex_exit(&mdi_pathmap_mutex); 3044 return (path); 3045 } 3046 3047 /* 3048 * mdi_pi_spathname_by_instance(): 3049 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3050 * NOTE: returned "shortpath" remains valid forever (until reboot). 3051 */ 3052 char * 3053 mdi_pi_spathname_by_instance(int path_instance) 3054 { 3055 char *path; 3056 mod_hash_val_t hv; 3057 3058 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3059 mutex_enter(&mdi_pathmap_mutex); 3060 if (mod_hash_find(mdi_pathmap_sbyinstance, 3061 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3062 path = (char *)hv; 3063 else 3064 path = NULL; 3065 mutex_exit(&mdi_pathmap_mutex); 3066 return (path); 3067 } 3068 3069 3070 /* 3071 * i_mdi_phci_add_path(): 3072 * Add a mdi_pathinfo node to pHCI list. 3073 * Notes: 3074 * Caller should per-pHCI mutex 3075 */ 3076 static void 3077 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3078 { 3079 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3080 3081 MDI_PHCI_LOCK(ph); 3082 if (ph->ph_path_head == NULL) { 3083 ph->ph_path_head = pip; 3084 } else { 3085 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3086 } 3087 ph->ph_path_tail = pip; 3088 ph->ph_path_count++; 3089 MDI_PHCI_UNLOCK(ph); 3090 } 3091 3092 /* 3093 * i_mdi_client_add_path(): 3094 * Add mdi_pathinfo node to client list 3095 */ 3096 static void 3097 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3098 { 3099 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3100 3101 MDI_CLIENT_LOCK(ct); 3102 if (ct->ct_path_head == NULL) { 3103 ct->ct_path_head = pip; 3104 } else { 3105 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3106 } 3107 ct->ct_path_tail = pip; 3108 ct->ct_path_count++; 3109 MDI_CLIENT_UNLOCK(ct); 3110 } 3111 3112 /* 3113 * mdi_pi_free(): 3114 * Free the mdi_pathinfo node and also client device node if this 3115 * is the last path to the device 3116 * Return Values: 3117 * MDI_SUCCESS 3118 * MDI_FAILURE 3119 * MDI_BUSY 3120 */ 3121 /*ARGSUSED*/ 3122 int 3123 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3124 { 3125 int rv = MDI_FAILURE; 3126 mdi_vhci_t *vh; 3127 mdi_phci_t *ph; 3128 mdi_client_t *ct; 3129 int (*f)(); 3130 int client_held = 0; 3131 3132 MDI_PI_LOCK(pip); 3133 ph = MDI_PI(pip)->pi_phci; 3134 ASSERT(ph != NULL); 3135 if (ph == NULL) { 3136 /* 3137 * Invalid pHCI device, return failure 3138 */ 3139 MDI_DEBUG(1, (MDI_WARN, NULL, 3140 "!invalid pHCI: pip %s %p", 3141 mdi_pi_spathname(pip), (void *)pip)); 3142 MDI_PI_UNLOCK(pip); 3143 return (MDI_FAILURE); 3144 } 3145 3146 vh = ph->ph_vhci; 3147 ASSERT(vh != NULL); 3148 if (vh == NULL) { 3149 /* Invalid pHCI device, return failure */ 3150 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3151 "!invalid vHCI: pip %s %p", 3152 mdi_pi_spathname(pip), (void *)pip)); 3153 MDI_PI_UNLOCK(pip); 3154 return (MDI_FAILURE); 3155 } 3156 3157 ct = MDI_PI(pip)->pi_client; 3158 ASSERT(ct != NULL); 3159 if (ct == NULL) { 3160 /* 3161 * Invalid Client device, return failure 3162 */ 3163 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3164 "!invalid client: pip %s %p", 3165 mdi_pi_spathname(pip), (void *)pip)); 3166 MDI_PI_UNLOCK(pip); 3167 return (MDI_FAILURE); 3168 } 3169 3170 /* 3171 * Check to see for busy condition. A mdi_pathinfo can only be freed 3172 * if the node state is either offline or init and the reference count 3173 * is zero. 3174 */ 3175 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3176 MDI_PI_IS_INITING(pip))) { 3177 /* 3178 * Node is busy 3179 */ 3180 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3181 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3182 MDI_PI_UNLOCK(pip); 3183 return (MDI_BUSY); 3184 } 3185 3186 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3187 /* 3188 * Give a chance for pending I/Os to complete. 3189 */ 3190 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3191 "!%d cmds still pending on path: %s %p", 3192 MDI_PI(pip)->pi_ref_cnt, 3193 mdi_pi_spathname(pip), (void *)pip)); 3194 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3195 &MDI_PI(pip)->pi_mutex, 3196 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3197 /* 3198 * The timeout time reached without ref_cnt being zero 3199 * being signaled. 3200 */ 3201 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3202 "!Timeout reached on path %s %p without the cond", 3203 mdi_pi_spathname(pip), (void *)pip)); 3204 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3205 "!%d cmds still pending on path %s %p", 3206 MDI_PI(pip)->pi_ref_cnt, 3207 mdi_pi_spathname(pip), (void *)pip)); 3208 MDI_PI_UNLOCK(pip); 3209 return (MDI_BUSY); 3210 } 3211 } 3212 if (MDI_PI(pip)->pi_pm_held) { 3213 client_held = 1; 3214 } 3215 MDI_PI_UNLOCK(pip); 3216 3217 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3218 3219 MDI_CLIENT_LOCK(ct); 3220 3221 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3222 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3223 3224 /* 3225 * Wait till failover is complete before removing this node. 3226 */ 3227 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3228 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3229 3230 MDI_CLIENT_UNLOCK(ct); 3231 MDI_VHCI_CLIENT_LOCK(vh); 3232 MDI_CLIENT_LOCK(ct); 3233 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3234 3235 if (!MDI_PI_IS_INITING(pip)) { 3236 f = vh->vh_ops->vo_pi_uninit; 3237 if (f != NULL) { 3238 rv = (*f)(vh->vh_dip, pip, 0); 3239 } 3240 } 3241 /* 3242 * If vo_pi_uninit() completed successfully. 3243 */ 3244 if (rv == MDI_SUCCESS) { 3245 if (client_held) { 3246 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3247 "i_mdi_pm_rele_client\n")); 3248 i_mdi_pm_rele_client(ct, 1); 3249 } 3250 i_mdi_pi_free(ph, pip, ct); 3251 if (ct->ct_path_count == 0) { 3252 /* 3253 * Client lost its last path. 3254 * Clean up the client device 3255 */ 3256 MDI_CLIENT_UNLOCK(ct); 3257 (void) i_mdi_client_free(ct->ct_vhci, ct); 3258 MDI_VHCI_CLIENT_UNLOCK(vh); 3259 return (rv); 3260 } 3261 } 3262 MDI_CLIENT_UNLOCK(ct); 3263 MDI_VHCI_CLIENT_UNLOCK(vh); 3264 3265 if (rv == MDI_FAILURE) 3266 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3267 3268 return (rv); 3269 } 3270 3271 /* 3272 * i_mdi_pi_free(): 3273 * Free the mdi_pathinfo node 3274 */ 3275 static void 3276 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3277 { 3278 int ct_circular; 3279 int ph_circular; 3280 3281 ASSERT(MDI_CLIENT_LOCKED(ct)); 3282 3283 /* 3284 * remove any per-path kstats 3285 */ 3286 i_mdi_pi_kstat_destroy(pip); 3287 3288 /* See comments in i_mdi_pi_alloc() */ 3289 ndi_devi_enter(ct->ct_dip, &ct_circular); 3290 ndi_devi_enter(ph->ph_dip, &ph_circular); 3291 3292 i_mdi_client_remove_path(ct, pip); 3293 i_mdi_phci_remove_path(ph, pip); 3294 3295 ndi_devi_exit(ph->ph_dip, ph_circular); 3296 ndi_devi_exit(ct->ct_dip, ct_circular); 3297 3298 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3299 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3300 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3301 if (MDI_PI(pip)->pi_addr) { 3302 kmem_free(MDI_PI(pip)->pi_addr, 3303 strlen(MDI_PI(pip)->pi_addr) + 1); 3304 MDI_PI(pip)->pi_addr = NULL; 3305 } 3306 3307 if (MDI_PI(pip)->pi_prop) { 3308 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3309 MDI_PI(pip)->pi_prop = NULL; 3310 } 3311 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3312 } 3313 3314 3315 /* 3316 * i_mdi_phci_remove_path(): 3317 * Remove a mdi_pathinfo node from pHCI list. 3318 * Notes: 3319 * Caller should hold per-pHCI mutex 3320 */ 3321 static void 3322 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3323 { 3324 mdi_pathinfo_t *prev = NULL; 3325 mdi_pathinfo_t *path = NULL; 3326 3327 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3328 3329 MDI_PHCI_LOCK(ph); 3330 path = ph->ph_path_head; 3331 while (path != NULL) { 3332 if (path == pip) { 3333 break; 3334 } 3335 prev = path; 3336 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3337 } 3338 3339 if (path) { 3340 ph->ph_path_count--; 3341 if (prev) { 3342 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3343 } else { 3344 ph->ph_path_head = 3345 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3346 } 3347 if (ph->ph_path_tail == path) { 3348 ph->ph_path_tail = prev; 3349 } 3350 } 3351 3352 /* 3353 * Clear the pHCI link 3354 */ 3355 MDI_PI(pip)->pi_phci_link = NULL; 3356 MDI_PI(pip)->pi_phci = NULL; 3357 MDI_PHCI_UNLOCK(ph); 3358 } 3359 3360 /* 3361 * i_mdi_client_remove_path(): 3362 * Remove a mdi_pathinfo node from client path list. 3363 */ 3364 static void 3365 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3366 { 3367 mdi_pathinfo_t *prev = NULL; 3368 mdi_pathinfo_t *path; 3369 3370 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3371 3372 ASSERT(MDI_CLIENT_LOCKED(ct)); 3373 path = ct->ct_path_head; 3374 while (path != NULL) { 3375 if (path == pip) { 3376 break; 3377 } 3378 prev = path; 3379 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3380 } 3381 3382 if (path) { 3383 ct->ct_path_count--; 3384 if (prev) { 3385 MDI_PI(prev)->pi_client_link = 3386 MDI_PI(path)->pi_client_link; 3387 } else { 3388 ct->ct_path_head = 3389 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3390 } 3391 if (ct->ct_path_tail == path) { 3392 ct->ct_path_tail = prev; 3393 } 3394 if (ct->ct_path_last == path) { 3395 ct->ct_path_last = ct->ct_path_head; 3396 } 3397 } 3398 MDI_PI(pip)->pi_client_link = NULL; 3399 MDI_PI(pip)->pi_client = NULL; 3400 } 3401 3402 /* 3403 * i_mdi_pi_state_change(): 3404 * online a mdi_pathinfo node 3405 * 3406 * Return Values: 3407 * MDI_SUCCESS 3408 * MDI_FAILURE 3409 */ 3410 /*ARGSUSED*/ 3411 static int 3412 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3413 { 3414 int rv = MDI_SUCCESS; 3415 mdi_vhci_t *vh; 3416 mdi_phci_t *ph; 3417 mdi_client_t *ct; 3418 int (*f)(); 3419 dev_info_t *cdip; 3420 3421 MDI_PI_LOCK(pip); 3422 3423 ph = MDI_PI(pip)->pi_phci; 3424 ASSERT(ph); 3425 if (ph == NULL) { 3426 /* 3427 * Invalid pHCI device, fail the request 3428 */ 3429 MDI_PI_UNLOCK(pip); 3430 MDI_DEBUG(1, (MDI_WARN, NULL, 3431 "!invalid phci: pip %s %p", 3432 mdi_pi_spathname(pip), (void *)pip)); 3433 return (MDI_FAILURE); 3434 } 3435 3436 vh = ph->ph_vhci; 3437 ASSERT(vh); 3438 if (vh == NULL) { 3439 /* 3440 * Invalid vHCI device, fail the request 3441 */ 3442 MDI_PI_UNLOCK(pip); 3443 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3444 "!invalid vhci: pip %s %p", 3445 mdi_pi_spathname(pip), (void *)pip)); 3446 return (MDI_FAILURE); 3447 } 3448 3449 ct = MDI_PI(pip)->pi_client; 3450 ASSERT(ct != NULL); 3451 if (ct == NULL) { 3452 /* 3453 * Invalid client device, fail the request 3454 */ 3455 MDI_PI_UNLOCK(pip); 3456 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3457 "!invalid client: pip %s %p", 3458 mdi_pi_spathname(pip), (void *)pip)); 3459 return (MDI_FAILURE); 3460 } 3461 3462 /* 3463 * If this path has not been initialized yet, Callback vHCI driver's 3464 * pathinfo node initialize entry point 3465 */ 3466 3467 if (MDI_PI_IS_INITING(pip)) { 3468 MDI_PI_UNLOCK(pip); 3469 f = vh->vh_ops->vo_pi_init; 3470 if (f != NULL) { 3471 rv = (*f)(vh->vh_dip, pip, 0); 3472 if (rv != MDI_SUCCESS) { 3473 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3474 "!vo_pi_init failed: vHCI %p, pip %s %p", 3475 (void *)vh, mdi_pi_spathname(pip), 3476 (void *)pip)); 3477 return (MDI_FAILURE); 3478 } 3479 } 3480 MDI_PI_LOCK(pip); 3481 MDI_PI_CLEAR_TRANSIENT(pip); 3482 } 3483 3484 /* 3485 * Do not allow state transition when pHCI is in offline/suspended 3486 * states 3487 */ 3488 i_mdi_phci_lock(ph, pip); 3489 if (MDI_PHCI_IS_READY(ph) == 0) { 3490 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3491 "!pHCI not ready, pHCI=%p", (void *)ph)); 3492 MDI_PI_UNLOCK(pip); 3493 i_mdi_phci_unlock(ph); 3494 return (MDI_BUSY); 3495 } 3496 MDI_PHCI_UNSTABLE(ph); 3497 i_mdi_phci_unlock(ph); 3498 3499 /* 3500 * Check if mdi_pathinfo state is in transient state. 3501 * If yes, offlining is in progress and wait till transient state is 3502 * cleared. 3503 */ 3504 if (MDI_PI_IS_TRANSIENT(pip)) { 3505 while (MDI_PI_IS_TRANSIENT(pip)) { 3506 cv_wait(&MDI_PI(pip)->pi_state_cv, 3507 &MDI_PI(pip)->pi_mutex); 3508 } 3509 } 3510 3511 /* 3512 * Grab the client lock in reverse order sequence and release the 3513 * mdi_pathinfo mutex. 3514 */ 3515 i_mdi_client_lock(ct, pip); 3516 MDI_PI_UNLOCK(pip); 3517 3518 /* 3519 * Wait till failover state is cleared 3520 */ 3521 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3522 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3523 3524 /* 3525 * Mark the mdi_pathinfo node state as transient 3526 */ 3527 MDI_PI_LOCK(pip); 3528 switch (state) { 3529 case MDI_PATHINFO_STATE_ONLINE: 3530 MDI_PI_SET_ONLINING(pip); 3531 break; 3532 3533 case MDI_PATHINFO_STATE_STANDBY: 3534 MDI_PI_SET_STANDBYING(pip); 3535 break; 3536 3537 case MDI_PATHINFO_STATE_FAULT: 3538 /* 3539 * Mark the pathinfo state as FAULTED 3540 */ 3541 MDI_PI_SET_FAULTING(pip); 3542 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3543 break; 3544 3545 case MDI_PATHINFO_STATE_OFFLINE: 3546 /* 3547 * ndi_devi_offline() cannot hold pip or ct locks. 3548 */ 3549 MDI_PI_UNLOCK(pip); 3550 3551 /* 3552 * If this is a user initiated path online->offline operation 3553 * who's success would transition a client from DEGRADED to 3554 * FAILED then only proceed if we can offline the client first. 3555 */ 3556 cdip = ct->ct_dip; 3557 if ((flag & NDI_USER_REQ) && 3558 MDI_PI_IS_ONLINE(pip) && 3559 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3560 i_mdi_client_unlock(ct); 3561 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3562 if (rv != NDI_SUCCESS) { 3563 /* 3564 * Convert to MDI error code 3565 */ 3566 switch (rv) { 3567 case NDI_BUSY: 3568 rv = MDI_BUSY; 3569 break; 3570 default: 3571 rv = MDI_FAILURE; 3572 break; 3573 } 3574 goto state_change_exit; 3575 } else { 3576 i_mdi_client_lock(ct, NULL); 3577 } 3578 } 3579 /* 3580 * Mark the mdi_pathinfo node state as transient 3581 */ 3582 MDI_PI_LOCK(pip); 3583 MDI_PI_SET_OFFLINING(pip); 3584 break; 3585 } 3586 MDI_PI_UNLOCK(pip); 3587 MDI_CLIENT_UNSTABLE(ct); 3588 i_mdi_client_unlock(ct); 3589 3590 f = vh->vh_ops->vo_pi_state_change; 3591 if (f != NULL) 3592 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3593 3594 MDI_CLIENT_LOCK(ct); 3595 MDI_PI_LOCK(pip); 3596 if (rv == MDI_NOT_SUPPORTED) { 3597 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3598 } 3599 if (rv != MDI_SUCCESS) { 3600 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3601 "vo_pi_state_change failed: rv %x", rv)); 3602 } 3603 if (MDI_PI_IS_TRANSIENT(pip)) { 3604 if (rv == MDI_SUCCESS) { 3605 MDI_PI_CLEAR_TRANSIENT(pip); 3606 } else { 3607 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3608 } 3609 } 3610 3611 /* 3612 * Wake anyone waiting for this mdi_pathinfo node 3613 */ 3614 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3615 MDI_PI_UNLOCK(pip); 3616 3617 /* 3618 * Mark the client device as stable 3619 */ 3620 MDI_CLIENT_STABLE(ct); 3621 if (rv == MDI_SUCCESS) { 3622 if (ct->ct_unstable == 0) { 3623 cdip = ct->ct_dip; 3624 3625 /* 3626 * Onlining the mdi_pathinfo node will impact the 3627 * client state Update the client and dev_info node 3628 * state accordingly 3629 */ 3630 rv = NDI_SUCCESS; 3631 i_mdi_client_update_state(ct); 3632 switch (MDI_CLIENT_STATE(ct)) { 3633 case MDI_CLIENT_STATE_OPTIMAL: 3634 case MDI_CLIENT_STATE_DEGRADED: 3635 if (cdip && !i_ddi_devi_attached(cdip) && 3636 ((state == MDI_PATHINFO_STATE_ONLINE) || 3637 (state == MDI_PATHINFO_STATE_STANDBY))) { 3638 3639 /* 3640 * Must do ndi_devi_online() through 3641 * hotplug thread for deferred 3642 * attach mechanism to work 3643 */ 3644 MDI_CLIENT_UNLOCK(ct); 3645 rv = ndi_devi_online(cdip, 0); 3646 MDI_CLIENT_LOCK(ct); 3647 if ((rv != NDI_SUCCESS) && 3648 (MDI_CLIENT_STATE(ct) == 3649 MDI_CLIENT_STATE_DEGRADED)) { 3650 /* 3651 * ndi_devi_online failed. 3652 * Reset client flags to 3653 * offline. 3654 */ 3655 MDI_DEBUG(1, (MDI_WARN, cdip, 3656 "!ndi_devi_online failed " 3657 "error %x", rv)); 3658 MDI_CLIENT_SET_OFFLINE(ct); 3659 } 3660 if (rv != NDI_SUCCESS) { 3661 /* Reset the path state */ 3662 MDI_PI_LOCK(pip); 3663 MDI_PI(pip)->pi_state = 3664 MDI_PI_OLD_STATE(pip); 3665 MDI_PI_UNLOCK(pip); 3666 } 3667 } 3668 break; 3669 3670 case MDI_CLIENT_STATE_FAILED: 3671 /* 3672 * This is the last path case for 3673 * non-user initiated events. 3674 */ 3675 if (((flag & NDI_USER_REQ) == 0) && 3676 cdip && (i_ddi_node_state(cdip) >= 3677 DS_INITIALIZED)) { 3678 MDI_CLIENT_UNLOCK(ct); 3679 rv = ndi_devi_offline(cdip, 3680 NDI_DEVFS_CLEAN); 3681 MDI_CLIENT_LOCK(ct); 3682 3683 if (rv != NDI_SUCCESS) { 3684 /* 3685 * ndi_devi_offline failed. 3686 * Reset client flags to 3687 * online as the path could not 3688 * be offlined. 3689 */ 3690 MDI_DEBUG(1, (MDI_WARN, cdip, 3691 "!ndi_devi_offline failed: " 3692 "error %x", rv)); 3693 MDI_CLIENT_SET_ONLINE(ct); 3694 } 3695 } 3696 break; 3697 } 3698 /* 3699 * Convert to MDI error code 3700 */ 3701 switch (rv) { 3702 case NDI_SUCCESS: 3703 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3704 i_mdi_report_path_state(ct, pip); 3705 rv = MDI_SUCCESS; 3706 break; 3707 case NDI_BUSY: 3708 rv = MDI_BUSY; 3709 break; 3710 default: 3711 rv = MDI_FAILURE; 3712 break; 3713 } 3714 } 3715 } 3716 MDI_CLIENT_UNLOCK(ct); 3717 3718 state_change_exit: 3719 /* 3720 * Mark the pHCI as stable again. 3721 */ 3722 MDI_PHCI_LOCK(ph); 3723 MDI_PHCI_STABLE(ph); 3724 MDI_PHCI_UNLOCK(ph); 3725 return (rv); 3726 } 3727 3728 /* 3729 * mdi_pi_online(): 3730 * Place the path_info node in the online state. The path is 3731 * now available to be selected by mdi_select_path() for 3732 * transporting I/O requests to client devices. 3733 * Return Values: 3734 * MDI_SUCCESS 3735 * MDI_FAILURE 3736 */ 3737 int 3738 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3739 { 3740 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3741 int client_held = 0; 3742 int rv; 3743 3744 ASSERT(ct != NULL); 3745 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3746 if (rv != MDI_SUCCESS) 3747 return (rv); 3748 3749 MDI_PI_LOCK(pip); 3750 if (MDI_PI(pip)->pi_pm_held == 0) { 3751 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3752 "i_mdi_pm_hold_pip %p", (void *)pip)); 3753 i_mdi_pm_hold_pip(pip); 3754 client_held = 1; 3755 } 3756 MDI_PI_UNLOCK(pip); 3757 3758 if (client_held) { 3759 MDI_CLIENT_LOCK(ct); 3760 if (ct->ct_power_cnt == 0) { 3761 rv = i_mdi_power_all_phci(ct); 3762 } 3763 3764 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3765 "i_mdi_pm_hold_client %p", (void *)ct)); 3766 i_mdi_pm_hold_client(ct, 1); 3767 MDI_CLIENT_UNLOCK(ct); 3768 } 3769 3770 return (rv); 3771 } 3772 3773 /* 3774 * mdi_pi_standby(): 3775 * Place the mdi_pathinfo node in standby state 3776 * 3777 * Return Values: 3778 * MDI_SUCCESS 3779 * MDI_FAILURE 3780 */ 3781 int 3782 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3783 { 3784 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3785 } 3786 3787 /* 3788 * mdi_pi_fault(): 3789 * Place the mdi_pathinfo node in fault'ed state 3790 * Return Values: 3791 * MDI_SUCCESS 3792 * MDI_FAILURE 3793 */ 3794 int 3795 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3796 { 3797 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3798 } 3799 3800 /* 3801 * mdi_pi_offline(): 3802 * Offline a mdi_pathinfo node. 3803 * Return Values: 3804 * MDI_SUCCESS 3805 * MDI_FAILURE 3806 */ 3807 int 3808 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3809 { 3810 int ret, client_held = 0; 3811 mdi_client_t *ct; 3812 3813 /* 3814 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3815 * used it to mean "user initiated operation" (i.e. devctl). Callers 3816 * should now just use NDI_USER_REQ. 3817 */ 3818 if (flags & NDI_DEVI_REMOVE) { 3819 flags &= ~NDI_DEVI_REMOVE; 3820 flags |= NDI_USER_REQ; 3821 } 3822 3823 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3824 3825 if (ret == MDI_SUCCESS) { 3826 MDI_PI_LOCK(pip); 3827 if (MDI_PI(pip)->pi_pm_held) { 3828 client_held = 1; 3829 } 3830 MDI_PI_UNLOCK(pip); 3831 3832 if (client_held) { 3833 ct = MDI_PI(pip)->pi_client; 3834 MDI_CLIENT_LOCK(ct); 3835 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3836 "i_mdi_pm_rele_client\n")); 3837 i_mdi_pm_rele_client(ct, 1); 3838 MDI_CLIENT_UNLOCK(ct); 3839 } 3840 } 3841 3842 return (ret); 3843 } 3844 3845 /* 3846 * i_mdi_pi_offline(): 3847 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3848 */ 3849 static int 3850 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3851 { 3852 dev_info_t *vdip = NULL; 3853 mdi_vhci_t *vh = NULL; 3854 mdi_client_t *ct = NULL; 3855 int (*f)(); 3856 int rv; 3857 3858 MDI_PI_LOCK(pip); 3859 ct = MDI_PI(pip)->pi_client; 3860 ASSERT(ct != NULL); 3861 3862 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3863 /* 3864 * Give a chance for pending I/Os to complete. 3865 */ 3866 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3867 "!%d cmds still pending on path %s %p", 3868 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3869 (void *)pip)); 3870 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3871 &MDI_PI(pip)->pi_mutex, 3872 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3873 /* 3874 * The timeout time reached without ref_cnt being zero 3875 * being signaled. 3876 */ 3877 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3878 "!Timeout reached on path %s %p without the cond", 3879 mdi_pi_spathname(pip), (void *)pip)); 3880 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3881 "!%d cmds still pending on path %s %p", 3882 MDI_PI(pip)->pi_ref_cnt, 3883 mdi_pi_spathname(pip), (void *)pip)); 3884 } 3885 } 3886 vh = ct->ct_vhci; 3887 vdip = vh->vh_dip; 3888 3889 /* 3890 * Notify vHCI that has registered this event 3891 */ 3892 ASSERT(vh->vh_ops); 3893 f = vh->vh_ops->vo_pi_state_change; 3894 3895 if (f != NULL) { 3896 MDI_PI_UNLOCK(pip); 3897 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3898 flags)) != MDI_SUCCESS) { 3899 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3900 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3901 ddi_driver_name(vdip), ddi_get_instance(vdip), 3902 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3903 } 3904 MDI_PI_LOCK(pip); 3905 } 3906 3907 /* 3908 * Set the mdi_pathinfo node state and clear the transient condition 3909 */ 3910 MDI_PI_SET_OFFLINE(pip); 3911 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3912 MDI_PI_UNLOCK(pip); 3913 3914 MDI_CLIENT_LOCK(ct); 3915 if (rv == MDI_SUCCESS) { 3916 if (ct->ct_unstable == 0) { 3917 dev_info_t *cdip = ct->ct_dip; 3918 3919 /* 3920 * Onlining the mdi_pathinfo node will impact the 3921 * client state Update the client and dev_info node 3922 * state accordingly 3923 */ 3924 i_mdi_client_update_state(ct); 3925 rv = NDI_SUCCESS; 3926 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3927 if (cdip && 3928 (i_ddi_node_state(cdip) >= 3929 DS_INITIALIZED)) { 3930 MDI_CLIENT_UNLOCK(ct); 3931 rv = ndi_devi_offline(cdip, 3932 NDI_DEVFS_CLEAN); 3933 MDI_CLIENT_LOCK(ct); 3934 if (rv != NDI_SUCCESS) { 3935 /* 3936 * ndi_devi_offline failed. 3937 * Reset client flags to 3938 * online. 3939 */ 3940 MDI_DEBUG(4, (MDI_WARN, cdip, 3941 "ndi_devi_offline failed: " 3942 "error %x", rv)); 3943 MDI_CLIENT_SET_ONLINE(ct); 3944 } 3945 } 3946 } 3947 /* 3948 * Convert to MDI error code 3949 */ 3950 switch (rv) { 3951 case NDI_SUCCESS: 3952 rv = MDI_SUCCESS; 3953 break; 3954 case NDI_BUSY: 3955 rv = MDI_BUSY; 3956 break; 3957 default: 3958 rv = MDI_FAILURE; 3959 break; 3960 } 3961 } 3962 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3963 i_mdi_report_path_state(ct, pip); 3964 } 3965 3966 MDI_CLIENT_UNLOCK(ct); 3967 3968 /* 3969 * Change in the mdi_pathinfo node state will impact the client state 3970 */ 3971 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3972 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3973 return (rv); 3974 } 3975 3976 /* 3977 * mdi_pi_get_node_name(): 3978 * Get the name associated with a mdi_pathinfo node. 3979 * Since pathinfo nodes are not directly named, we 3980 * return the node_name of the client. 3981 * 3982 * Return Values: 3983 * char * 3984 */ 3985 char * 3986 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3987 { 3988 mdi_client_t *ct; 3989 3990 if (pip == NULL) 3991 return (NULL); 3992 ct = MDI_PI(pip)->pi_client; 3993 if ((ct == NULL) || (ct->ct_dip == NULL)) 3994 return (NULL); 3995 return (ddi_node_name(ct->ct_dip)); 3996 } 3997 3998 /* 3999 * mdi_pi_get_addr(): 4000 * Get the unit address associated with a mdi_pathinfo node 4001 * 4002 * Return Values: 4003 * char * 4004 */ 4005 char * 4006 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4007 { 4008 if (pip == NULL) 4009 return (NULL); 4010 4011 return (MDI_PI(pip)->pi_addr); 4012 } 4013 4014 /* 4015 * mdi_pi_get_path_instance(): 4016 * Get the 'path_instance' of a mdi_pathinfo node 4017 * 4018 * Return Values: 4019 * path_instance 4020 */ 4021 int 4022 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4023 { 4024 if (pip == NULL) 4025 return (0); 4026 4027 return (MDI_PI(pip)->pi_path_instance); 4028 } 4029 4030 /* 4031 * mdi_pi_pathname(): 4032 * Return pointer to path to pathinfo node. 4033 */ 4034 char * 4035 mdi_pi_pathname(mdi_pathinfo_t *pip) 4036 { 4037 if (pip == NULL) 4038 return (NULL); 4039 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4040 } 4041 4042 /* 4043 * mdi_pi_spathname(): 4044 * Return pointer to shortpath to pathinfo node. Used for debug 4045 * messages, so return "" instead of NULL when unknown. 4046 */ 4047 char * 4048 mdi_pi_spathname(mdi_pathinfo_t *pip) 4049 { 4050 char *spath = ""; 4051 4052 if (pip) { 4053 spath = mdi_pi_spathname_by_instance( 4054 mdi_pi_get_path_instance(pip)); 4055 if (spath == NULL) 4056 spath = ""; 4057 } 4058 return (spath); 4059 } 4060 4061 char * 4062 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4063 { 4064 char *obp_path = NULL; 4065 if ((pip == NULL) || (path == NULL)) 4066 return (NULL); 4067 4068 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4069 (void) strcpy(path, obp_path); 4070 (void) mdi_prop_free(obp_path); 4071 } else { 4072 path = NULL; 4073 } 4074 return (path); 4075 } 4076 4077 int 4078 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4079 { 4080 dev_info_t *pdip; 4081 char *obp_path = NULL; 4082 int rc = MDI_FAILURE; 4083 4084 if (pip == NULL) 4085 return (MDI_FAILURE); 4086 4087 pdip = mdi_pi_get_phci(pip); 4088 if (pdip == NULL) 4089 return (MDI_FAILURE); 4090 4091 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4092 4093 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4094 (void) ddi_pathname(pdip, obp_path); 4095 } 4096 4097 if (component) { 4098 (void) strncat(obp_path, "/", MAXPATHLEN); 4099 (void) strncat(obp_path, component, MAXPATHLEN); 4100 } 4101 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4102 4103 if (obp_path) 4104 kmem_free(obp_path, MAXPATHLEN); 4105 return (rc); 4106 } 4107 4108 /* 4109 * mdi_pi_get_client(): 4110 * Get the client devinfo associated with a mdi_pathinfo node 4111 * 4112 * Return Values: 4113 * Handle to client device dev_info node 4114 */ 4115 dev_info_t * 4116 mdi_pi_get_client(mdi_pathinfo_t *pip) 4117 { 4118 dev_info_t *dip = NULL; 4119 if (pip) { 4120 dip = MDI_PI(pip)->pi_client->ct_dip; 4121 } 4122 return (dip); 4123 } 4124 4125 /* 4126 * mdi_pi_get_phci(): 4127 * Get the pHCI devinfo associated with the mdi_pathinfo node 4128 * Return Values: 4129 * Handle to dev_info node 4130 */ 4131 dev_info_t * 4132 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4133 { 4134 dev_info_t *dip = NULL; 4135 mdi_phci_t *ph; 4136 4137 if (pip) { 4138 ph = MDI_PI(pip)->pi_phci; 4139 if (ph) 4140 dip = ph->ph_dip; 4141 } 4142 return (dip); 4143 } 4144 4145 /* 4146 * mdi_pi_get_client_private(): 4147 * Get the client private information associated with the 4148 * mdi_pathinfo node 4149 */ 4150 void * 4151 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4152 { 4153 void *cprivate = NULL; 4154 if (pip) { 4155 cprivate = MDI_PI(pip)->pi_cprivate; 4156 } 4157 return (cprivate); 4158 } 4159 4160 /* 4161 * mdi_pi_set_client_private(): 4162 * Set the client private information in the mdi_pathinfo node 4163 */ 4164 void 4165 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4166 { 4167 if (pip) { 4168 MDI_PI(pip)->pi_cprivate = priv; 4169 } 4170 } 4171 4172 /* 4173 * mdi_pi_get_phci_private(): 4174 * Get the pHCI private information associated with the 4175 * mdi_pathinfo node 4176 */ 4177 caddr_t 4178 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4179 { 4180 caddr_t pprivate = NULL; 4181 4182 if (pip) { 4183 pprivate = MDI_PI(pip)->pi_pprivate; 4184 } 4185 return (pprivate); 4186 } 4187 4188 /* 4189 * mdi_pi_set_phci_private(): 4190 * Set the pHCI private information in the mdi_pathinfo node 4191 */ 4192 void 4193 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4194 { 4195 if (pip) { 4196 MDI_PI(pip)->pi_pprivate = priv; 4197 } 4198 } 4199 4200 /* 4201 * mdi_pi_get_state(): 4202 * Get the mdi_pathinfo node state. Transient states are internal 4203 * and not provided to the users 4204 */ 4205 mdi_pathinfo_state_t 4206 mdi_pi_get_state(mdi_pathinfo_t *pip) 4207 { 4208 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4209 4210 if (pip) { 4211 if (MDI_PI_IS_TRANSIENT(pip)) { 4212 /* 4213 * mdi_pathinfo is in state transition. Return the 4214 * last good state. 4215 */ 4216 state = MDI_PI_OLD_STATE(pip); 4217 } else { 4218 state = MDI_PI_STATE(pip); 4219 } 4220 } 4221 return (state); 4222 } 4223 4224 /* 4225 * mdi_pi_get_flags(): 4226 * Get the mdi_pathinfo node flags. 4227 */ 4228 uint_t 4229 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4230 { 4231 return (pip ? MDI_PI(pip)->pi_flags : 0); 4232 } 4233 4234 /* 4235 * Note that the following function needs to be the new interface for 4236 * mdi_pi_get_state when mpxio gets integrated to ON. 4237 */ 4238 int 4239 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4240 uint32_t *ext_state) 4241 { 4242 *state = MDI_PATHINFO_STATE_INIT; 4243 4244 if (pip) { 4245 if (MDI_PI_IS_TRANSIENT(pip)) { 4246 /* 4247 * mdi_pathinfo is in state transition. Return the 4248 * last good state. 4249 */ 4250 *state = MDI_PI_OLD_STATE(pip); 4251 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4252 } else { 4253 *state = MDI_PI_STATE(pip); 4254 *ext_state = MDI_PI_EXT_STATE(pip); 4255 } 4256 } 4257 return (MDI_SUCCESS); 4258 } 4259 4260 /* 4261 * mdi_pi_get_preferred: 4262 * Get the preferred path flag 4263 */ 4264 int 4265 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4266 { 4267 if (pip) { 4268 return (MDI_PI(pip)->pi_preferred); 4269 } 4270 return (0); 4271 } 4272 4273 /* 4274 * mdi_pi_set_preferred: 4275 * Set the preferred path flag 4276 */ 4277 void 4278 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4279 { 4280 if (pip) { 4281 MDI_PI(pip)->pi_preferred = preferred; 4282 } 4283 } 4284 4285 /* 4286 * mdi_pi_set_state(): 4287 * Set the mdi_pathinfo node state 4288 */ 4289 void 4290 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4291 { 4292 uint32_t ext_state; 4293 4294 if (pip) { 4295 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4296 MDI_PI(pip)->pi_state = state; 4297 MDI_PI(pip)->pi_state |= ext_state; 4298 4299 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4300 i_ddi_di_cache_invalidate(); 4301 } 4302 } 4303 4304 /* 4305 * Property functions: 4306 */ 4307 int 4308 i_map_nvlist_error_to_mdi(int val) 4309 { 4310 int rv; 4311 4312 switch (val) { 4313 case 0: 4314 rv = DDI_PROP_SUCCESS; 4315 break; 4316 case EINVAL: 4317 case ENOTSUP: 4318 rv = DDI_PROP_INVAL_ARG; 4319 break; 4320 case ENOMEM: 4321 rv = DDI_PROP_NO_MEMORY; 4322 break; 4323 default: 4324 rv = DDI_PROP_NOT_FOUND; 4325 break; 4326 } 4327 return (rv); 4328 } 4329 4330 /* 4331 * mdi_pi_get_next_prop(): 4332 * Property walk function. The caller should hold mdi_pi_lock() 4333 * and release by calling mdi_pi_unlock() at the end of walk to 4334 * get a consistent value. 4335 */ 4336 nvpair_t * 4337 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4338 { 4339 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4340 return (NULL); 4341 } 4342 ASSERT(MDI_PI_LOCKED(pip)); 4343 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4344 } 4345 4346 /* 4347 * mdi_prop_remove(): 4348 * Remove the named property from the named list. 4349 */ 4350 int 4351 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4352 { 4353 if (pip == NULL) { 4354 return (DDI_PROP_NOT_FOUND); 4355 } 4356 ASSERT(!MDI_PI_LOCKED(pip)); 4357 MDI_PI_LOCK(pip); 4358 if (MDI_PI(pip)->pi_prop == NULL) { 4359 MDI_PI_UNLOCK(pip); 4360 return (DDI_PROP_NOT_FOUND); 4361 } 4362 if (name) { 4363 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4364 } else { 4365 char nvp_name[MAXNAMELEN]; 4366 nvpair_t *nvp; 4367 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4368 while (nvp) { 4369 nvpair_t *next; 4370 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4371 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4372 nvpair_name(nvp)); 4373 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4374 nvp_name); 4375 nvp = next; 4376 } 4377 } 4378 MDI_PI_UNLOCK(pip); 4379 return (DDI_PROP_SUCCESS); 4380 } 4381 4382 /* 4383 * mdi_prop_size(): 4384 * Get buffer size needed to pack the property data. 4385 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4386 * buffer size. 4387 */ 4388 int 4389 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4390 { 4391 int rv; 4392 size_t bufsize; 4393 4394 *buflenp = 0; 4395 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4396 return (DDI_PROP_NOT_FOUND); 4397 } 4398 ASSERT(MDI_PI_LOCKED(pip)); 4399 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4400 &bufsize, NV_ENCODE_NATIVE); 4401 *buflenp = bufsize; 4402 return (i_map_nvlist_error_to_mdi(rv)); 4403 } 4404 4405 /* 4406 * mdi_prop_pack(): 4407 * pack the property list. The caller should hold the 4408 * mdi_pathinfo_t node to get a consistent data 4409 */ 4410 int 4411 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4412 { 4413 int rv; 4414 size_t bufsize; 4415 4416 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4417 return (DDI_PROP_NOT_FOUND); 4418 } 4419 4420 ASSERT(MDI_PI_LOCKED(pip)); 4421 4422 bufsize = buflen; 4423 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4424 NV_ENCODE_NATIVE, KM_SLEEP); 4425 4426 return (i_map_nvlist_error_to_mdi(rv)); 4427 } 4428 4429 /* 4430 * mdi_prop_update_byte(): 4431 * Create/Update a byte property 4432 */ 4433 int 4434 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4435 { 4436 int rv; 4437 4438 if (pip == NULL) { 4439 return (DDI_PROP_INVAL_ARG); 4440 } 4441 ASSERT(!MDI_PI_LOCKED(pip)); 4442 MDI_PI_LOCK(pip); 4443 if (MDI_PI(pip)->pi_prop == NULL) { 4444 MDI_PI_UNLOCK(pip); 4445 return (DDI_PROP_NOT_FOUND); 4446 } 4447 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4448 MDI_PI_UNLOCK(pip); 4449 return (i_map_nvlist_error_to_mdi(rv)); 4450 } 4451 4452 /* 4453 * mdi_prop_update_byte_array(): 4454 * Create/Update a byte array property 4455 */ 4456 int 4457 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4458 uint_t nelements) 4459 { 4460 int rv; 4461 4462 if (pip == NULL) { 4463 return (DDI_PROP_INVAL_ARG); 4464 } 4465 ASSERT(!MDI_PI_LOCKED(pip)); 4466 MDI_PI_LOCK(pip); 4467 if (MDI_PI(pip)->pi_prop == NULL) { 4468 MDI_PI_UNLOCK(pip); 4469 return (DDI_PROP_NOT_FOUND); 4470 } 4471 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4472 MDI_PI_UNLOCK(pip); 4473 return (i_map_nvlist_error_to_mdi(rv)); 4474 } 4475 4476 /* 4477 * mdi_prop_update_int(): 4478 * Create/Update a 32 bit integer property 4479 */ 4480 int 4481 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4482 { 4483 int rv; 4484 4485 if (pip == NULL) { 4486 return (DDI_PROP_INVAL_ARG); 4487 } 4488 ASSERT(!MDI_PI_LOCKED(pip)); 4489 MDI_PI_LOCK(pip); 4490 if (MDI_PI(pip)->pi_prop == NULL) { 4491 MDI_PI_UNLOCK(pip); 4492 return (DDI_PROP_NOT_FOUND); 4493 } 4494 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4495 MDI_PI_UNLOCK(pip); 4496 return (i_map_nvlist_error_to_mdi(rv)); 4497 } 4498 4499 /* 4500 * mdi_prop_update_int64(): 4501 * Create/Update a 64 bit integer property 4502 */ 4503 int 4504 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4505 { 4506 int rv; 4507 4508 if (pip == NULL) { 4509 return (DDI_PROP_INVAL_ARG); 4510 } 4511 ASSERT(!MDI_PI_LOCKED(pip)); 4512 MDI_PI_LOCK(pip); 4513 if (MDI_PI(pip)->pi_prop == NULL) { 4514 MDI_PI_UNLOCK(pip); 4515 return (DDI_PROP_NOT_FOUND); 4516 } 4517 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4518 MDI_PI_UNLOCK(pip); 4519 return (i_map_nvlist_error_to_mdi(rv)); 4520 } 4521 4522 /* 4523 * mdi_prop_update_int_array(): 4524 * Create/Update a int array property 4525 */ 4526 int 4527 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4528 uint_t nelements) 4529 { 4530 int rv; 4531 4532 if (pip == NULL) { 4533 return (DDI_PROP_INVAL_ARG); 4534 } 4535 ASSERT(!MDI_PI_LOCKED(pip)); 4536 MDI_PI_LOCK(pip); 4537 if (MDI_PI(pip)->pi_prop == NULL) { 4538 MDI_PI_UNLOCK(pip); 4539 return (DDI_PROP_NOT_FOUND); 4540 } 4541 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4542 nelements); 4543 MDI_PI_UNLOCK(pip); 4544 return (i_map_nvlist_error_to_mdi(rv)); 4545 } 4546 4547 /* 4548 * mdi_prop_update_string(): 4549 * Create/Update a string property 4550 */ 4551 int 4552 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4553 { 4554 int rv; 4555 4556 if (pip == NULL) { 4557 return (DDI_PROP_INVAL_ARG); 4558 } 4559 ASSERT(!MDI_PI_LOCKED(pip)); 4560 MDI_PI_LOCK(pip); 4561 if (MDI_PI(pip)->pi_prop == NULL) { 4562 MDI_PI_UNLOCK(pip); 4563 return (DDI_PROP_NOT_FOUND); 4564 } 4565 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4566 MDI_PI_UNLOCK(pip); 4567 return (i_map_nvlist_error_to_mdi(rv)); 4568 } 4569 4570 /* 4571 * mdi_prop_update_string_array(): 4572 * Create/Update a string array property 4573 */ 4574 int 4575 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4576 uint_t nelements) 4577 { 4578 int rv; 4579 4580 if (pip == NULL) { 4581 return (DDI_PROP_INVAL_ARG); 4582 } 4583 ASSERT(!MDI_PI_LOCKED(pip)); 4584 MDI_PI_LOCK(pip); 4585 if (MDI_PI(pip)->pi_prop == NULL) { 4586 MDI_PI_UNLOCK(pip); 4587 return (DDI_PROP_NOT_FOUND); 4588 } 4589 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4590 nelements); 4591 MDI_PI_UNLOCK(pip); 4592 return (i_map_nvlist_error_to_mdi(rv)); 4593 } 4594 4595 /* 4596 * mdi_prop_lookup_byte(): 4597 * Look for byte property identified by name. The data returned 4598 * is the actual property and valid as long as mdi_pathinfo_t node 4599 * is alive. 4600 */ 4601 int 4602 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4603 { 4604 int rv; 4605 4606 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4607 return (DDI_PROP_NOT_FOUND); 4608 } 4609 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4610 return (i_map_nvlist_error_to_mdi(rv)); 4611 } 4612 4613 4614 /* 4615 * mdi_prop_lookup_byte_array(): 4616 * Look for byte array property identified by name. The data 4617 * returned is the actual property and valid as long as 4618 * mdi_pathinfo_t node is alive. 4619 */ 4620 int 4621 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4622 uint_t *nelements) 4623 { 4624 int rv; 4625 4626 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4627 return (DDI_PROP_NOT_FOUND); 4628 } 4629 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4630 nelements); 4631 return (i_map_nvlist_error_to_mdi(rv)); 4632 } 4633 4634 /* 4635 * mdi_prop_lookup_int(): 4636 * Look for int property identified by name. The data returned 4637 * is the actual property and valid as long as mdi_pathinfo_t 4638 * node is alive. 4639 */ 4640 int 4641 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4642 { 4643 int rv; 4644 4645 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4646 return (DDI_PROP_NOT_FOUND); 4647 } 4648 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4649 return (i_map_nvlist_error_to_mdi(rv)); 4650 } 4651 4652 /* 4653 * mdi_prop_lookup_int64(): 4654 * Look for int64 property identified by name. The data returned 4655 * is the actual property and valid as long as mdi_pathinfo_t node 4656 * is alive. 4657 */ 4658 int 4659 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4660 { 4661 int rv; 4662 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4663 return (DDI_PROP_NOT_FOUND); 4664 } 4665 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4666 return (i_map_nvlist_error_to_mdi(rv)); 4667 } 4668 4669 /* 4670 * mdi_prop_lookup_int_array(): 4671 * Look for int array property identified by name. The data 4672 * returned is the actual property and valid as long as 4673 * mdi_pathinfo_t node is alive. 4674 */ 4675 int 4676 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4677 uint_t *nelements) 4678 { 4679 int rv; 4680 4681 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4682 return (DDI_PROP_NOT_FOUND); 4683 } 4684 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4685 (int32_t **)data, nelements); 4686 return (i_map_nvlist_error_to_mdi(rv)); 4687 } 4688 4689 /* 4690 * mdi_prop_lookup_string(): 4691 * Look for string property identified by name. The data 4692 * returned is the actual property and valid as long as 4693 * mdi_pathinfo_t node is alive. 4694 */ 4695 int 4696 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4697 { 4698 int rv; 4699 4700 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4701 return (DDI_PROP_NOT_FOUND); 4702 } 4703 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4704 return (i_map_nvlist_error_to_mdi(rv)); 4705 } 4706 4707 /* 4708 * mdi_prop_lookup_string_array(): 4709 * Look for string array property identified by name. The data 4710 * returned is the actual property and valid as long as 4711 * mdi_pathinfo_t node is alive. 4712 */ 4713 int 4714 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4715 uint_t *nelements) 4716 { 4717 int rv; 4718 4719 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4720 return (DDI_PROP_NOT_FOUND); 4721 } 4722 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4723 nelements); 4724 return (i_map_nvlist_error_to_mdi(rv)); 4725 } 4726 4727 /* 4728 * mdi_prop_free(): 4729 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4730 * functions return the pointer to actual property data and not a 4731 * copy of it. So the data returned is valid as long as 4732 * mdi_pathinfo_t node is valid. 4733 */ 4734 /*ARGSUSED*/ 4735 int 4736 mdi_prop_free(void *data) 4737 { 4738 return (DDI_PROP_SUCCESS); 4739 } 4740 4741 /*ARGSUSED*/ 4742 static void 4743 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4744 { 4745 char *ct_path; 4746 char *ct_status; 4747 char *status; 4748 dev_info_t *cdip = ct->ct_dip; 4749 char lb_buf[64]; 4750 int report_lb_c = 0, report_lb_p = 0; 4751 4752 ASSERT(MDI_CLIENT_LOCKED(ct)); 4753 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4754 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4755 return; 4756 } 4757 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4758 ct_status = "optimal"; 4759 report_lb_c = 1; 4760 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4761 ct_status = "degraded"; 4762 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4763 ct_status = "failed"; 4764 } else { 4765 ct_status = "unknown"; 4766 } 4767 4768 lb_buf[0] = 0; /* not interested in load balancing config */ 4769 4770 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4771 status = "removed"; 4772 } else if (MDI_PI_IS_OFFLINE(pip)) { 4773 status = "offline"; 4774 } else if (MDI_PI_IS_ONLINE(pip)) { 4775 status = "online"; 4776 report_lb_p = 1; 4777 } else if (MDI_PI_IS_STANDBY(pip)) { 4778 status = "standby"; 4779 } else if (MDI_PI_IS_FAULT(pip)) { 4780 status = "faulted"; 4781 } else { 4782 status = "unknown"; 4783 } 4784 4785 if (cdip) { 4786 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4787 4788 /* 4789 * NOTE: Keeping "multipath status: %s" and 4790 * "Load balancing: %s" format unchanged in case someone 4791 * scrubs /var/adm/messages looking for these messages. 4792 */ 4793 if (report_lb_c && report_lb_p) { 4794 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4795 (void) snprintf(lb_buf, sizeof (lb_buf), 4796 "%s, region-size: %d", mdi_load_balance_lba, 4797 ct->ct_lb_args->region_size); 4798 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4799 (void) snprintf(lb_buf, sizeof (lb_buf), 4800 "%s", mdi_load_balance_none); 4801 } else { 4802 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4803 mdi_load_balance_rr); 4804 } 4805 4806 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4807 "?%s (%s%d) multipath status: %s: " 4808 "path %d %s is %s: Load balancing: %s\n", 4809 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4810 ddi_get_instance(cdip), ct_status, 4811 mdi_pi_get_path_instance(pip), 4812 mdi_pi_spathname(pip), status, lb_buf); 4813 } else { 4814 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4815 "?%s (%s%d) multipath status: %s: " 4816 "path %d %s is %s\n", 4817 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4818 ddi_get_instance(cdip), ct_status, 4819 mdi_pi_get_path_instance(pip), 4820 mdi_pi_spathname(pip), status); 4821 } 4822 4823 kmem_free(ct_path, MAXPATHLEN); 4824 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4825 } 4826 } 4827 4828 #ifdef DEBUG 4829 /* 4830 * i_mdi_log(): 4831 * Utility function for error message management 4832 * 4833 * NOTE: Implementation takes care of trailing \n for cmn_err, 4834 * MDI_DEBUG should not terminate fmt strings with \n. 4835 * 4836 * NOTE: If the level is >= 2, and there is no leading !?^ 4837 * then a leading ! is implied (but can be overriden via 4838 * mdi_debug_consoleonly). If you are using kmdb on the console, 4839 * consider setting mdi_debug_consoleonly to 1 as an aid. 4840 */ 4841 /*PRINTFLIKE4*/ 4842 static void 4843 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4844 { 4845 char name[MAXNAMELEN]; 4846 char buf[512]; 4847 char *bp; 4848 va_list ap; 4849 int log_only = 0; 4850 int boot_only = 0; 4851 int console_only = 0; 4852 4853 if (dip) { 4854 (void) snprintf(name, sizeof(name), "%s%d: ", 4855 ddi_driver_name(dip), ddi_get_instance(dip)); 4856 } else { 4857 name[0] = 0; 4858 } 4859 4860 va_start(ap, fmt); 4861 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4862 va_end(ap); 4863 4864 switch (buf[0]) { 4865 case '!': 4866 bp = &buf[1]; 4867 log_only = 1; 4868 break; 4869 case '?': 4870 bp = &buf[1]; 4871 boot_only = 1; 4872 break; 4873 case '^': 4874 bp = &buf[1]; 4875 console_only = 1; 4876 break; 4877 default: 4878 if (level >= 2) 4879 log_only = 1; /* ! implied */ 4880 bp = buf; 4881 break; 4882 } 4883 if (mdi_debug_logonly) { 4884 log_only = 1; 4885 boot_only = 0; 4886 console_only = 0; 4887 } 4888 if (mdi_debug_consoleonly) { 4889 log_only = 0; 4890 boot_only = 0; 4891 console_only = 1; 4892 level = CE_NOTE; 4893 goto console; 4894 } 4895 4896 switch (level) { 4897 case CE_NOTE: 4898 level = CE_CONT; 4899 /* FALLTHROUGH */ 4900 case CE_CONT: 4901 if (boot_only) { 4902 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4903 } else if (console_only) { 4904 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4905 } else if (log_only) { 4906 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4907 } else { 4908 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4909 } 4910 break; 4911 4912 case CE_WARN: 4913 case CE_PANIC: 4914 console: 4915 if (boot_only) { 4916 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4917 } else if (console_only) { 4918 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4919 } else if (log_only) { 4920 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4921 } else { 4922 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4923 } 4924 break; 4925 default: 4926 cmn_err(level, "mdi: %s%s", name, bp); 4927 break; 4928 } 4929 } 4930 #endif /* DEBUG */ 4931 4932 void 4933 i_mdi_client_online(dev_info_t *ct_dip) 4934 { 4935 mdi_client_t *ct; 4936 4937 /* 4938 * Client online notification. Mark client state as online 4939 * restore our binding with dev_info node 4940 */ 4941 ct = i_devi_get_client(ct_dip); 4942 ASSERT(ct != NULL); 4943 MDI_CLIENT_LOCK(ct); 4944 MDI_CLIENT_SET_ONLINE(ct); 4945 /* catch for any memory leaks */ 4946 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4947 ct->ct_dip = ct_dip; 4948 4949 if (ct->ct_power_cnt == 0) 4950 (void) i_mdi_power_all_phci(ct); 4951 4952 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 4953 "i_mdi_pm_hold_client %p", (void *)ct)); 4954 i_mdi_pm_hold_client(ct, 1); 4955 4956 MDI_CLIENT_UNLOCK(ct); 4957 } 4958 4959 void 4960 i_mdi_phci_online(dev_info_t *ph_dip) 4961 { 4962 mdi_phci_t *ph; 4963 4964 /* pHCI online notification. Mark state accordingly */ 4965 ph = i_devi_get_phci(ph_dip); 4966 ASSERT(ph != NULL); 4967 MDI_PHCI_LOCK(ph); 4968 MDI_PHCI_SET_ONLINE(ph); 4969 MDI_PHCI_UNLOCK(ph); 4970 } 4971 4972 /* 4973 * mdi_devi_online(): 4974 * Online notification from NDI framework on pHCI/client 4975 * device online. 4976 * Return Values: 4977 * NDI_SUCCESS 4978 * MDI_FAILURE 4979 */ 4980 /*ARGSUSED*/ 4981 int 4982 mdi_devi_online(dev_info_t *dip, uint_t flags) 4983 { 4984 if (MDI_PHCI(dip)) { 4985 i_mdi_phci_online(dip); 4986 } 4987 4988 if (MDI_CLIENT(dip)) { 4989 i_mdi_client_online(dip); 4990 } 4991 return (NDI_SUCCESS); 4992 } 4993 4994 /* 4995 * mdi_devi_offline(): 4996 * Offline notification from NDI framework on pHCI/Client device 4997 * offline. 4998 * 4999 * Return Values: 5000 * NDI_SUCCESS 5001 * NDI_FAILURE 5002 */ 5003 /*ARGSUSED*/ 5004 int 5005 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5006 { 5007 int rv = NDI_SUCCESS; 5008 5009 if (MDI_CLIENT(dip)) { 5010 rv = i_mdi_client_offline(dip, flags); 5011 if (rv != NDI_SUCCESS) 5012 return (rv); 5013 } 5014 5015 if (MDI_PHCI(dip)) { 5016 rv = i_mdi_phci_offline(dip, flags); 5017 5018 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5019 /* set client back online */ 5020 i_mdi_client_online(dip); 5021 } 5022 } 5023 5024 return (rv); 5025 } 5026 5027 /*ARGSUSED*/ 5028 static int 5029 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5030 { 5031 int rv = NDI_SUCCESS; 5032 mdi_phci_t *ph; 5033 mdi_client_t *ct; 5034 mdi_pathinfo_t *pip; 5035 mdi_pathinfo_t *next; 5036 mdi_pathinfo_t *failed_pip = NULL; 5037 dev_info_t *cdip; 5038 5039 /* 5040 * pHCI component offline notification 5041 * Make sure that this pHCI instance is free to be offlined. 5042 * If it is OK to proceed, Offline and remove all the child 5043 * mdi_pathinfo nodes. This process automatically offlines 5044 * corresponding client devices, for which this pHCI provides 5045 * critical services. 5046 */ 5047 ph = i_devi_get_phci(dip); 5048 MDI_DEBUG(2, (MDI_NOTE, dip, 5049 "called %p %p", (void *)dip, (void *)ph)); 5050 if (ph == NULL) { 5051 return (rv); 5052 } 5053 5054 MDI_PHCI_LOCK(ph); 5055 5056 if (MDI_PHCI_IS_OFFLINE(ph)) { 5057 MDI_DEBUG(1, (MDI_WARN, dip, 5058 "!pHCI already offlined: %p", (void *)dip)); 5059 MDI_PHCI_UNLOCK(ph); 5060 return (NDI_SUCCESS); 5061 } 5062 5063 /* 5064 * Check to see if the pHCI can be offlined 5065 */ 5066 if (ph->ph_unstable) { 5067 MDI_DEBUG(1, (MDI_WARN, dip, 5068 "!One or more target devices are in transient state. " 5069 "This device can not be removed at this moment. " 5070 "Please try again later.")); 5071 MDI_PHCI_UNLOCK(ph); 5072 return (NDI_BUSY); 5073 } 5074 5075 pip = ph->ph_path_head; 5076 while (pip != NULL) { 5077 MDI_PI_LOCK(pip); 5078 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5079 5080 /* 5081 * The mdi_pathinfo state is OK. Check the client state. 5082 * If failover in progress fail the pHCI from offlining 5083 */ 5084 ct = MDI_PI(pip)->pi_client; 5085 i_mdi_client_lock(ct, pip); 5086 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5087 (ct->ct_unstable)) { 5088 /* 5089 * Failover is in progress, Fail the DR 5090 */ 5091 MDI_DEBUG(1, (MDI_WARN, dip, 5092 "!pHCI device is busy. " 5093 "This device can not be removed at this moment. " 5094 "Please try again later.")); 5095 MDI_PI_UNLOCK(pip); 5096 i_mdi_client_unlock(ct); 5097 MDI_PHCI_UNLOCK(ph); 5098 return (NDI_BUSY); 5099 } 5100 MDI_PI_UNLOCK(pip); 5101 5102 /* 5103 * Check to see of we are removing the last path of this 5104 * client device... 5105 */ 5106 cdip = ct->ct_dip; 5107 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5108 (i_mdi_client_compute_state(ct, ph) == 5109 MDI_CLIENT_STATE_FAILED)) { 5110 i_mdi_client_unlock(ct); 5111 MDI_PHCI_UNLOCK(ph); 5112 if (ndi_devi_offline(cdip, 5113 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5114 /* 5115 * ndi_devi_offline() failed. 5116 * This pHCI provides the critical path 5117 * to one or more client devices. 5118 * Return busy. 5119 */ 5120 MDI_PHCI_LOCK(ph); 5121 MDI_DEBUG(1, (MDI_WARN, dip, 5122 "!pHCI device is busy. " 5123 "This device can not be removed at this " 5124 "moment. Please try again later.")); 5125 failed_pip = pip; 5126 break; 5127 } else { 5128 MDI_PHCI_LOCK(ph); 5129 pip = next; 5130 } 5131 } else { 5132 i_mdi_client_unlock(ct); 5133 pip = next; 5134 } 5135 } 5136 5137 if (failed_pip) { 5138 pip = ph->ph_path_head; 5139 while (pip != failed_pip) { 5140 MDI_PI_LOCK(pip); 5141 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5142 ct = MDI_PI(pip)->pi_client; 5143 i_mdi_client_lock(ct, pip); 5144 cdip = ct->ct_dip; 5145 switch (MDI_CLIENT_STATE(ct)) { 5146 case MDI_CLIENT_STATE_OPTIMAL: 5147 case MDI_CLIENT_STATE_DEGRADED: 5148 if (cdip) { 5149 MDI_PI_UNLOCK(pip); 5150 i_mdi_client_unlock(ct); 5151 MDI_PHCI_UNLOCK(ph); 5152 (void) ndi_devi_online(cdip, 0); 5153 MDI_PHCI_LOCK(ph); 5154 pip = next; 5155 continue; 5156 } 5157 break; 5158 5159 case MDI_CLIENT_STATE_FAILED: 5160 if (cdip) { 5161 MDI_PI_UNLOCK(pip); 5162 i_mdi_client_unlock(ct); 5163 MDI_PHCI_UNLOCK(ph); 5164 (void) ndi_devi_offline(cdip, 5165 NDI_DEVFS_CLEAN); 5166 MDI_PHCI_LOCK(ph); 5167 pip = next; 5168 continue; 5169 } 5170 break; 5171 } 5172 MDI_PI_UNLOCK(pip); 5173 i_mdi_client_unlock(ct); 5174 pip = next; 5175 } 5176 MDI_PHCI_UNLOCK(ph); 5177 return (NDI_BUSY); 5178 } 5179 5180 /* 5181 * Mark the pHCI as offline 5182 */ 5183 MDI_PHCI_SET_OFFLINE(ph); 5184 5185 /* 5186 * Mark the child mdi_pathinfo nodes as transient 5187 */ 5188 pip = ph->ph_path_head; 5189 while (pip != NULL) { 5190 MDI_PI_LOCK(pip); 5191 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5192 MDI_PI_SET_OFFLINING(pip); 5193 MDI_PI_UNLOCK(pip); 5194 pip = next; 5195 } 5196 MDI_PHCI_UNLOCK(ph); 5197 /* 5198 * Give a chance for any pending commands to execute 5199 */ 5200 delay_random(5); 5201 MDI_PHCI_LOCK(ph); 5202 pip = ph->ph_path_head; 5203 while (pip != NULL) { 5204 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5205 (void) i_mdi_pi_offline(pip, flags); 5206 MDI_PI_LOCK(pip); 5207 ct = MDI_PI(pip)->pi_client; 5208 if (!MDI_PI_IS_OFFLINE(pip)) { 5209 MDI_DEBUG(1, (MDI_WARN, dip, 5210 "!pHCI device is busy. " 5211 "This device can not be removed at this moment. " 5212 "Please try again later.")); 5213 MDI_PI_UNLOCK(pip); 5214 MDI_PHCI_SET_ONLINE(ph); 5215 MDI_PHCI_UNLOCK(ph); 5216 return (NDI_BUSY); 5217 } 5218 MDI_PI_UNLOCK(pip); 5219 pip = next; 5220 } 5221 MDI_PHCI_UNLOCK(ph); 5222 5223 return (rv); 5224 } 5225 5226 void 5227 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5228 { 5229 mdi_phci_t *ph; 5230 mdi_client_t *ct; 5231 mdi_pathinfo_t *pip; 5232 mdi_pathinfo_t *next; 5233 dev_info_t *cdip; 5234 5235 if (!MDI_PHCI(dip)) 5236 return; 5237 5238 ph = i_devi_get_phci(dip); 5239 if (ph == NULL) { 5240 return; 5241 } 5242 5243 MDI_PHCI_LOCK(ph); 5244 5245 if (MDI_PHCI_IS_OFFLINE(ph)) { 5246 /* has no last path */ 5247 MDI_PHCI_UNLOCK(ph); 5248 return; 5249 } 5250 5251 pip = ph->ph_path_head; 5252 while (pip != NULL) { 5253 MDI_PI_LOCK(pip); 5254 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5255 5256 ct = MDI_PI(pip)->pi_client; 5257 i_mdi_client_lock(ct, pip); 5258 MDI_PI_UNLOCK(pip); 5259 5260 cdip = ct->ct_dip; 5261 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5262 (i_mdi_client_compute_state(ct, ph) == 5263 MDI_CLIENT_STATE_FAILED)) { 5264 /* Last path. Mark client dip as retiring */ 5265 i_mdi_client_unlock(ct); 5266 MDI_PHCI_UNLOCK(ph); 5267 (void) e_ddi_mark_retiring(cdip, cons_array); 5268 MDI_PHCI_LOCK(ph); 5269 pip = next; 5270 } else { 5271 i_mdi_client_unlock(ct); 5272 pip = next; 5273 } 5274 } 5275 5276 MDI_PHCI_UNLOCK(ph); 5277 5278 return; 5279 } 5280 5281 void 5282 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5283 { 5284 mdi_phci_t *ph; 5285 mdi_client_t *ct; 5286 mdi_pathinfo_t *pip; 5287 mdi_pathinfo_t *next; 5288 dev_info_t *cdip; 5289 5290 if (!MDI_PHCI(dip)) 5291 return; 5292 5293 ph = i_devi_get_phci(dip); 5294 if (ph == NULL) 5295 return; 5296 5297 MDI_PHCI_LOCK(ph); 5298 5299 if (MDI_PHCI_IS_OFFLINE(ph)) { 5300 MDI_PHCI_UNLOCK(ph); 5301 /* not last path */ 5302 return; 5303 } 5304 5305 if (ph->ph_unstable) { 5306 MDI_PHCI_UNLOCK(ph); 5307 /* can't check for constraints */ 5308 *constraint = 0; 5309 return; 5310 } 5311 5312 pip = ph->ph_path_head; 5313 while (pip != NULL) { 5314 MDI_PI_LOCK(pip); 5315 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5316 5317 /* 5318 * The mdi_pathinfo state is OK. Check the client state. 5319 * If failover in progress fail the pHCI from offlining 5320 */ 5321 ct = MDI_PI(pip)->pi_client; 5322 i_mdi_client_lock(ct, pip); 5323 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5324 (ct->ct_unstable)) { 5325 /* 5326 * Failover is in progress, can't check for constraints 5327 */ 5328 MDI_PI_UNLOCK(pip); 5329 i_mdi_client_unlock(ct); 5330 MDI_PHCI_UNLOCK(ph); 5331 *constraint = 0; 5332 return; 5333 } 5334 MDI_PI_UNLOCK(pip); 5335 5336 /* 5337 * Check to see of we are retiring the last path of this 5338 * client device... 5339 */ 5340 cdip = ct->ct_dip; 5341 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5342 (i_mdi_client_compute_state(ct, ph) == 5343 MDI_CLIENT_STATE_FAILED)) { 5344 i_mdi_client_unlock(ct); 5345 MDI_PHCI_UNLOCK(ph); 5346 (void) e_ddi_retire_notify(cdip, constraint); 5347 MDI_PHCI_LOCK(ph); 5348 pip = next; 5349 } else { 5350 i_mdi_client_unlock(ct); 5351 pip = next; 5352 } 5353 } 5354 5355 MDI_PHCI_UNLOCK(ph); 5356 5357 return; 5358 } 5359 5360 /* 5361 * offline the path(s) hanging off the pHCI. If the 5362 * last path to any client, check that constraints 5363 * have been applied. 5364 */ 5365 void 5366 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5367 { 5368 mdi_phci_t *ph; 5369 mdi_client_t *ct; 5370 mdi_pathinfo_t *pip; 5371 mdi_pathinfo_t *next; 5372 dev_info_t *cdip; 5373 int unstable = 0; 5374 int constraint; 5375 5376 if (!MDI_PHCI(dip)) 5377 return; 5378 5379 ph = i_devi_get_phci(dip); 5380 if (ph == NULL) { 5381 /* no last path and no pips */ 5382 return; 5383 } 5384 5385 MDI_PHCI_LOCK(ph); 5386 5387 if (MDI_PHCI_IS_OFFLINE(ph)) { 5388 MDI_PHCI_UNLOCK(ph); 5389 /* no last path and no pips */ 5390 return; 5391 } 5392 5393 /* 5394 * Check to see if the pHCI can be offlined 5395 */ 5396 if (ph->ph_unstable) { 5397 unstable = 1; 5398 } 5399 5400 pip = ph->ph_path_head; 5401 while (pip != NULL) { 5402 MDI_PI_LOCK(pip); 5403 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5404 5405 /* 5406 * if failover in progress fail the pHCI from offlining 5407 */ 5408 ct = MDI_PI(pip)->pi_client; 5409 i_mdi_client_lock(ct, pip); 5410 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5411 (ct->ct_unstable)) { 5412 unstable = 1; 5413 } 5414 MDI_PI_UNLOCK(pip); 5415 5416 /* 5417 * Check to see of we are removing the last path of this 5418 * client device... 5419 */ 5420 cdip = ct->ct_dip; 5421 if (!phci_only && cdip && 5422 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5423 (i_mdi_client_compute_state(ct, ph) == 5424 MDI_CLIENT_STATE_FAILED)) { 5425 i_mdi_client_unlock(ct); 5426 MDI_PHCI_UNLOCK(ph); 5427 /* 5428 * We don't retire clients we just retire the 5429 * path to a client. If it is the last path 5430 * to a client, constraints are checked and 5431 * if we pass the last path is offlined. MPXIO will 5432 * then fail all I/Os to the client. Since we don't 5433 * want to retire the client on a path error 5434 * set constraint = 0 so that the client dip 5435 * is not retired. 5436 */ 5437 constraint = 0; 5438 (void) e_ddi_retire_finalize(cdip, &constraint); 5439 MDI_PHCI_LOCK(ph); 5440 pip = next; 5441 } else { 5442 i_mdi_client_unlock(ct); 5443 pip = next; 5444 } 5445 } 5446 5447 /* 5448 * Cannot offline pip(s) 5449 */ 5450 if (unstable) { 5451 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5452 "pHCI in transient state, cannot retire", 5453 ddi_driver_name(dip), ddi_get_instance(dip)); 5454 MDI_PHCI_UNLOCK(ph); 5455 return; 5456 } 5457 5458 /* 5459 * Mark the pHCI as offline 5460 */ 5461 MDI_PHCI_SET_OFFLINE(ph); 5462 5463 /* 5464 * Mark the child mdi_pathinfo nodes as transient 5465 */ 5466 pip = ph->ph_path_head; 5467 while (pip != NULL) { 5468 MDI_PI_LOCK(pip); 5469 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5470 MDI_PI_SET_OFFLINING(pip); 5471 MDI_PI_UNLOCK(pip); 5472 pip = next; 5473 } 5474 MDI_PHCI_UNLOCK(ph); 5475 /* 5476 * Give a chance for any pending commands to execute 5477 */ 5478 delay_random(5); 5479 MDI_PHCI_LOCK(ph); 5480 pip = ph->ph_path_head; 5481 while (pip != NULL) { 5482 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5483 (void) i_mdi_pi_offline(pip, 0); 5484 MDI_PI_LOCK(pip); 5485 ct = MDI_PI(pip)->pi_client; 5486 if (!MDI_PI_IS_OFFLINE(pip)) { 5487 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5488 "path %d %s busy, cannot offline", 5489 mdi_pi_get_path_instance(pip), 5490 mdi_pi_spathname(pip)); 5491 MDI_PI_UNLOCK(pip); 5492 MDI_PHCI_SET_ONLINE(ph); 5493 MDI_PHCI_UNLOCK(ph); 5494 return; 5495 } 5496 MDI_PI_UNLOCK(pip); 5497 pip = next; 5498 } 5499 MDI_PHCI_UNLOCK(ph); 5500 5501 return; 5502 } 5503 5504 void 5505 mdi_phci_unretire(dev_info_t *dip) 5506 { 5507 ASSERT(MDI_PHCI(dip)); 5508 5509 /* 5510 * Online the phci 5511 */ 5512 i_mdi_phci_online(dip); 5513 } 5514 5515 /*ARGSUSED*/ 5516 static int 5517 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5518 { 5519 int rv = NDI_SUCCESS; 5520 mdi_client_t *ct; 5521 5522 /* 5523 * Client component to go offline. Make sure that we are 5524 * not in failing over state and update client state 5525 * accordingly 5526 */ 5527 ct = i_devi_get_client(dip); 5528 MDI_DEBUG(2, (MDI_NOTE, dip, 5529 "called %p %p", (void *)dip, (void *)ct)); 5530 if (ct != NULL) { 5531 MDI_CLIENT_LOCK(ct); 5532 if (ct->ct_unstable) { 5533 /* 5534 * One or more paths are in transient state, 5535 * Dont allow offline of a client device 5536 */ 5537 MDI_DEBUG(1, (MDI_WARN, dip, 5538 "!One or more paths to " 5539 "this device are in transient state. " 5540 "This device can not be removed at this moment. " 5541 "Please try again later.")); 5542 MDI_CLIENT_UNLOCK(ct); 5543 return (NDI_BUSY); 5544 } 5545 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5546 /* 5547 * Failover is in progress, Dont allow DR of 5548 * a client device 5549 */ 5550 MDI_DEBUG(1, (MDI_WARN, dip, 5551 "!Client device is Busy. " 5552 "This device can not be removed at this moment. " 5553 "Please try again later.")); 5554 MDI_CLIENT_UNLOCK(ct); 5555 return (NDI_BUSY); 5556 } 5557 MDI_CLIENT_SET_OFFLINE(ct); 5558 5559 /* 5560 * Unbind our relationship with the dev_info node 5561 */ 5562 if (flags & NDI_DEVI_REMOVE) { 5563 ct->ct_dip = NULL; 5564 } 5565 MDI_CLIENT_UNLOCK(ct); 5566 } 5567 return (rv); 5568 } 5569 5570 /* 5571 * mdi_pre_attach(): 5572 * Pre attach() notification handler 5573 */ 5574 /*ARGSUSED*/ 5575 int 5576 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5577 { 5578 /* don't support old DDI_PM_RESUME */ 5579 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5580 (cmd == DDI_PM_RESUME)) 5581 return (DDI_FAILURE); 5582 5583 return (DDI_SUCCESS); 5584 } 5585 5586 /* 5587 * mdi_post_attach(): 5588 * Post attach() notification handler 5589 */ 5590 /*ARGSUSED*/ 5591 void 5592 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5593 { 5594 mdi_phci_t *ph; 5595 mdi_client_t *ct; 5596 mdi_vhci_t *vh; 5597 5598 if (MDI_PHCI(dip)) { 5599 ph = i_devi_get_phci(dip); 5600 ASSERT(ph != NULL); 5601 5602 MDI_PHCI_LOCK(ph); 5603 switch (cmd) { 5604 case DDI_ATTACH: 5605 MDI_DEBUG(2, (MDI_NOTE, dip, 5606 "phci post_attach called %p", (void *)ph)); 5607 if (error == DDI_SUCCESS) { 5608 MDI_PHCI_SET_ATTACH(ph); 5609 } else { 5610 MDI_DEBUG(1, (MDI_NOTE, dip, 5611 "!pHCI post_attach failed: error %d", 5612 error)); 5613 MDI_PHCI_SET_DETACH(ph); 5614 } 5615 break; 5616 5617 case DDI_RESUME: 5618 MDI_DEBUG(2, (MDI_NOTE, dip, 5619 "pHCI post_resume: called %p", (void *)ph)); 5620 if (error == DDI_SUCCESS) { 5621 MDI_PHCI_SET_RESUME(ph); 5622 } else { 5623 MDI_DEBUG(1, (MDI_NOTE, dip, 5624 "!pHCI post_resume failed: error %d", 5625 error)); 5626 MDI_PHCI_SET_SUSPEND(ph); 5627 } 5628 break; 5629 } 5630 MDI_PHCI_UNLOCK(ph); 5631 } 5632 5633 if (MDI_CLIENT(dip)) { 5634 ct = i_devi_get_client(dip); 5635 ASSERT(ct != NULL); 5636 5637 MDI_CLIENT_LOCK(ct); 5638 switch (cmd) { 5639 case DDI_ATTACH: 5640 MDI_DEBUG(2, (MDI_NOTE, dip, 5641 "client post_attach called %p", (void *)ct)); 5642 if (error != DDI_SUCCESS) { 5643 MDI_DEBUG(1, (MDI_NOTE, dip, 5644 "!client post_attach failed: error %d", 5645 error)); 5646 MDI_CLIENT_SET_DETACH(ct); 5647 MDI_DEBUG(4, (MDI_WARN, dip, 5648 "i_mdi_pm_reset_client")); 5649 i_mdi_pm_reset_client(ct); 5650 break; 5651 } 5652 5653 /* 5654 * Client device has successfully attached, inform 5655 * the vhci. 5656 */ 5657 vh = ct->ct_vhci; 5658 if (vh->vh_ops->vo_client_attached) 5659 (*vh->vh_ops->vo_client_attached)(dip); 5660 5661 MDI_CLIENT_SET_ATTACH(ct); 5662 break; 5663 5664 case DDI_RESUME: 5665 MDI_DEBUG(2, (MDI_NOTE, dip, 5666 "client post_attach: called %p", (void *)ct)); 5667 if (error == DDI_SUCCESS) { 5668 MDI_CLIENT_SET_RESUME(ct); 5669 } else { 5670 MDI_DEBUG(1, (MDI_NOTE, dip, 5671 "!client post_resume failed: error %d", 5672 error)); 5673 MDI_CLIENT_SET_SUSPEND(ct); 5674 } 5675 break; 5676 } 5677 MDI_CLIENT_UNLOCK(ct); 5678 } 5679 } 5680 5681 /* 5682 * mdi_pre_detach(): 5683 * Pre detach notification handler 5684 */ 5685 /*ARGSUSED*/ 5686 int 5687 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5688 { 5689 int rv = DDI_SUCCESS; 5690 5691 if (MDI_CLIENT(dip)) { 5692 (void) i_mdi_client_pre_detach(dip, cmd); 5693 } 5694 5695 if (MDI_PHCI(dip)) { 5696 rv = i_mdi_phci_pre_detach(dip, cmd); 5697 } 5698 5699 return (rv); 5700 } 5701 5702 /*ARGSUSED*/ 5703 static int 5704 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5705 { 5706 int rv = DDI_SUCCESS; 5707 mdi_phci_t *ph; 5708 mdi_client_t *ct; 5709 mdi_pathinfo_t *pip; 5710 mdi_pathinfo_t *failed_pip = NULL; 5711 mdi_pathinfo_t *next; 5712 5713 ph = i_devi_get_phci(dip); 5714 if (ph == NULL) { 5715 return (rv); 5716 } 5717 5718 MDI_PHCI_LOCK(ph); 5719 switch (cmd) { 5720 case DDI_DETACH: 5721 MDI_DEBUG(2, (MDI_NOTE, dip, 5722 "pHCI pre_detach: called %p", (void *)ph)); 5723 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5724 /* 5725 * mdi_pathinfo nodes are still attached to 5726 * this pHCI. Fail the detach for this pHCI. 5727 */ 5728 MDI_DEBUG(2, (MDI_WARN, dip, 5729 "pHCI pre_detach: paths are still attached %p", 5730 (void *)ph)); 5731 rv = DDI_FAILURE; 5732 break; 5733 } 5734 MDI_PHCI_SET_DETACH(ph); 5735 break; 5736 5737 case DDI_SUSPEND: 5738 /* 5739 * pHCI is getting suspended. Since mpxio client 5740 * devices may not be suspended at this point, to avoid 5741 * a potential stack overflow, it is important to suspend 5742 * client devices before pHCI can be suspended. 5743 */ 5744 5745 MDI_DEBUG(2, (MDI_NOTE, dip, 5746 "pHCI pre_suspend: called %p", (void *)ph)); 5747 /* 5748 * Suspend all the client devices accessible through this pHCI 5749 */ 5750 pip = ph->ph_path_head; 5751 while (pip != NULL && rv == DDI_SUCCESS) { 5752 dev_info_t *cdip; 5753 MDI_PI_LOCK(pip); 5754 next = 5755 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5756 ct = MDI_PI(pip)->pi_client; 5757 i_mdi_client_lock(ct, pip); 5758 cdip = ct->ct_dip; 5759 MDI_PI_UNLOCK(pip); 5760 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5761 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5762 i_mdi_client_unlock(ct); 5763 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5764 DDI_SUCCESS) { 5765 /* 5766 * Suspend of one of the client 5767 * device has failed. 5768 */ 5769 MDI_DEBUG(1, (MDI_WARN, dip, 5770 "!suspend of device (%s%d) failed.", 5771 ddi_driver_name(cdip), 5772 ddi_get_instance(cdip))); 5773 failed_pip = pip; 5774 break; 5775 } 5776 } else { 5777 i_mdi_client_unlock(ct); 5778 } 5779 pip = next; 5780 } 5781 5782 if (rv == DDI_SUCCESS) { 5783 /* 5784 * Suspend of client devices is complete. Proceed 5785 * with pHCI suspend. 5786 */ 5787 MDI_PHCI_SET_SUSPEND(ph); 5788 } else { 5789 /* 5790 * Revert back all the suspended client device states 5791 * to converse. 5792 */ 5793 pip = ph->ph_path_head; 5794 while (pip != failed_pip) { 5795 dev_info_t *cdip; 5796 MDI_PI_LOCK(pip); 5797 next = 5798 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5799 ct = MDI_PI(pip)->pi_client; 5800 i_mdi_client_lock(ct, pip); 5801 cdip = ct->ct_dip; 5802 MDI_PI_UNLOCK(pip); 5803 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5804 i_mdi_client_unlock(ct); 5805 (void) devi_attach(cdip, DDI_RESUME); 5806 } else { 5807 i_mdi_client_unlock(ct); 5808 } 5809 pip = next; 5810 } 5811 } 5812 break; 5813 5814 default: 5815 rv = DDI_FAILURE; 5816 break; 5817 } 5818 MDI_PHCI_UNLOCK(ph); 5819 return (rv); 5820 } 5821 5822 /*ARGSUSED*/ 5823 static int 5824 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5825 { 5826 int rv = DDI_SUCCESS; 5827 mdi_client_t *ct; 5828 5829 ct = i_devi_get_client(dip); 5830 if (ct == NULL) { 5831 return (rv); 5832 } 5833 5834 MDI_CLIENT_LOCK(ct); 5835 switch (cmd) { 5836 case DDI_DETACH: 5837 MDI_DEBUG(2, (MDI_NOTE, dip, 5838 "client pre_detach: called %p", 5839 (void *)ct)); 5840 MDI_CLIENT_SET_DETACH(ct); 5841 break; 5842 5843 case DDI_SUSPEND: 5844 MDI_DEBUG(2, (MDI_NOTE, dip, 5845 "client pre_suspend: called %p", 5846 (void *)ct)); 5847 MDI_CLIENT_SET_SUSPEND(ct); 5848 break; 5849 5850 default: 5851 rv = DDI_FAILURE; 5852 break; 5853 } 5854 MDI_CLIENT_UNLOCK(ct); 5855 return (rv); 5856 } 5857 5858 /* 5859 * mdi_post_detach(): 5860 * Post detach notification handler 5861 */ 5862 /*ARGSUSED*/ 5863 void 5864 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5865 { 5866 /* 5867 * Detach/Suspend of mpxio component failed. Update our state 5868 * too 5869 */ 5870 if (MDI_PHCI(dip)) 5871 i_mdi_phci_post_detach(dip, cmd, error); 5872 5873 if (MDI_CLIENT(dip)) 5874 i_mdi_client_post_detach(dip, cmd, error); 5875 } 5876 5877 /*ARGSUSED*/ 5878 static void 5879 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5880 { 5881 mdi_phci_t *ph; 5882 5883 /* 5884 * Detach/Suspend of phci component failed. Update our state 5885 * too 5886 */ 5887 ph = i_devi_get_phci(dip); 5888 if (ph == NULL) { 5889 return; 5890 } 5891 5892 MDI_PHCI_LOCK(ph); 5893 /* 5894 * Detach of pHCI failed. Restore back converse 5895 * state 5896 */ 5897 switch (cmd) { 5898 case DDI_DETACH: 5899 MDI_DEBUG(2, (MDI_NOTE, dip, 5900 "pHCI post_detach: called %p", 5901 (void *)ph)); 5902 if (error != DDI_SUCCESS) 5903 MDI_PHCI_SET_ATTACH(ph); 5904 break; 5905 5906 case DDI_SUSPEND: 5907 MDI_DEBUG(2, (MDI_NOTE, dip, 5908 "pHCI post_suspend: called %p", 5909 (void *)ph)); 5910 if (error != DDI_SUCCESS) 5911 MDI_PHCI_SET_RESUME(ph); 5912 break; 5913 } 5914 MDI_PHCI_UNLOCK(ph); 5915 } 5916 5917 /*ARGSUSED*/ 5918 static void 5919 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5920 { 5921 mdi_client_t *ct; 5922 5923 ct = i_devi_get_client(dip); 5924 if (ct == NULL) { 5925 return; 5926 } 5927 MDI_CLIENT_LOCK(ct); 5928 /* 5929 * Detach of Client failed. Restore back converse 5930 * state 5931 */ 5932 switch (cmd) { 5933 case DDI_DETACH: 5934 MDI_DEBUG(2, (MDI_NOTE, dip, 5935 "client post_detach: called %p", (void *)ct)); 5936 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5937 MDI_DEBUG(4, (MDI_NOTE, dip, 5938 "i_mdi_pm_rele_client\n")); 5939 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5940 } else { 5941 MDI_DEBUG(4, (MDI_NOTE, dip, 5942 "i_mdi_pm_reset_client\n")); 5943 i_mdi_pm_reset_client(ct); 5944 } 5945 if (error != DDI_SUCCESS) 5946 MDI_CLIENT_SET_ATTACH(ct); 5947 break; 5948 5949 case DDI_SUSPEND: 5950 MDI_DEBUG(2, (MDI_NOTE, dip, 5951 "called %p", (void *)ct)); 5952 if (error != DDI_SUCCESS) 5953 MDI_CLIENT_SET_RESUME(ct); 5954 break; 5955 } 5956 MDI_CLIENT_UNLOCK(ct); 5957 } 5958 5959 int 5960 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5961 { 5962 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5963 } 5964 5965 /* 5966 * create and install per-path (client - pHCI) statistics 5967 * I/O stats supported: nread, nwritten, reads, and writes 5968 * Error stats - hard errors, soft errors, & transport errors 5969 */ 5970 int 5971 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5972 { 5973 kstat_t *kiosp, *kerrsp; 5974 struct pi_errs *nsp; 5975 struct mdi_pi_kstats *mdi_statp; 5976 5977 if (MDI_PI(pip)->pi_kstats != NULL) 5978 return (MDI_SUCCESS); 5979 5980 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5981 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5982 return (MDI_FAILURE); 5983 } 5984 5985 (void) strcat(ksname, ",err"); 5986 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5987 KSTAT_TYPE_NAMED, 5988 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5989 if (kerrsp == NULL) { 5990 kstat_delete(kiosp); 5991 return (MDI_FAILURE); 5992 } 5993 5994 nsp = (struct pi_errs *)kerrsp->ks_data; 5995 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5996 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5997 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5998 KSTAT_DATA_UINT32); 5999 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6000 KSTAT_DATA_UINT32); 6001 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6002 KSTAT_DATA_UINT32); 6003 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6004 KSTAT_DATA_UINT32); 6005 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6006 KSTAT_DATA_UINT32); 6007 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6008 KSTAT_DATA_UINT32); 6009 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6010 KSTAT_DATA_UINT32); 6011 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6012 6013 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6014 mdi_statp->pi_kstat_ref = 1; 6015 mdi_statp->pi_kstat_iostats = kiosp; 6016 mdi_statp->pi_kstat_errstats = kerrsp; 6017 kstat_install(kiosp); 6018 kstat_install(kerrsp); 6019 MDI_PI(pip)->pi_kstats = mdi_statp; 6020 return (MDI_SUCCESS); 6021 } 6022 6023 /* 6024 * destroy per-path properties 6025 */ 6026 static void 6027 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6028 { 6029 6030 struct mdi_pi_kstats *mdi_statp; 6031 6032 if (MDI_PI(pip)->pi_kstats == NULL) 6033 return; 6034 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6035 return; 6036 6037 MDI_PI(pip)->pi_kstats = NULL; 6038 6039 /* 6040 * the kstat may be shared between multiple pathinfo nodes 6041 * decrement this pathinfo's usage, removing the kstats 6042 * themselves when the last pathinfo reference is removed. 6043 */ 6044 ASSERT(mdi_statp->pi_kstat_ref > 0); 6045 if (--mdi_statp->pi_kstat_ref != 0) 6046 return; 6047 6048 kstat_delete(mdi_statp->pi_kstat_iostats); 6049 kstat_delete(mdi_statp->pi_kstat_errstats); 6050 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6051 } 6052 6053 /* 6054 * update I/O paths KSTATS 6055 */ 6056 void 6057 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6058 { 6059 kstat_t *iostatp; 6060 size_t xfer_cnt; 6061 6062 ASSERT(pip != NULL); 6063 6064 /* 6065 * I/O can be driven across a path prior to having path 6066 * statistics available, i.e. probe(9e). 6067 */ 6068 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6069 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6070 xfer_cnt = bp->b_bcount - bp->b_resid; 6071 if (bp->b_flags & B_READ) { 6072 KSTAT_IO_PTR(iostatp)->reads++; 6073 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6074 } else { 6075 KSTAT_IO_PTR(iostatp)->writes++; 6076 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6077 } 6078 } 6079 } 6080 6081 /* 6082 * Enable the path(specific client/target/initiator) 6083 * Enabling a path means that MPxIO may select the enabled path for routing 6084 * future I/O requests, subject to other path state constraints. 6085 */ 6086 int 6087 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6088 { 6089 mdi_phci_t *ph; 6090 6091 ph = MDI_PI(pip)->pi_phci; 6092 if (ph == NULL) { 6093 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6094 "!failed: path %s %p: NULL ph", 6095 mdi_pi_spathname(pip), (void *)pip)); 6096 return (MDI_FAILURE); 6097 } 6098 6099 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6100 MDI_ENABLE_OP); 6101 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6102 "!returning success pip = %p. ph = %p", 6103 (void *)pip, (void *)ph)); 6104 return (MDI_SUCCESS); 6105 6106 } 6107 6108 /* 6109 * Disable the path (specific client/target/initiator) 6110 * Disabling a path means that MPxIO will not select the disabled path for 6111 * routing any new I/O requests. 6112 */ 6113 int 6114 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6115 { 6116 mdi_phci_t *ph; 6117 6118 ph = MDI_PI(pip)->pi_phci; 6119 if (ph == NULL) { 6120 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6121 "!failed: path %s %p: NULL ph", 6122 mdi_pi_spathname(pip), (void *)pip)); 6123 return (MDI_FAILURE); 6124 } 6125 6126 (void) i_mdi_enable_disable_path(pip, 6127 ph->ph_vhci, flags, MDI_DISABLE_OP); 6128 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6129 "!returning success pip = %p. ph = %p", 6130 (void *)pip, (void *)ph)); 6131 return (MDI_SUCCESS); 6132 } 6133 6134 /* 6135 * disable the path to a particular pHCI (pHCI specified in the phci_path 6136 * argument) for a particular client (specified in the client_path argument). 6137 * Disabling a path means that MPxIO will not select the disabled path for 6138 * routing any new I/O requests. 6139 * NOTE: this will be removed once the NWS files are changed to use the new 6140 * mdi_{enable,disable}_path interfaces 6141 */ 6142 int 6143 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6144 { 6145 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6146 } 6147 6148 /* 6149 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6150 * argument) for a particular client (specified in the client_path argument). 6151 * Enabling a path means that MPxIO may select the enabled path for routing 6152 * future I/O requests, subject to other path state constraints. 6153 * NOTE: this will be removed once the NWS files are changed to use the new 6154 * mdi_{enable,disable}_path interfaces 6155 */ 6156 6157 int 6158 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6159 { 6160 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6161 } 6162 6163 /* 6164 * Common routine for doing enable/disable. 6165 */ 6166 static mdi_pathinfo_t * 6167 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6168 int op) 6169 { 6170 int sync_flag = 0; 6171 int rv; 6172 mdi_pathinfo_t *next; 6173 int (*f)() = NULL; 6174 6175 /* 6176 * Check to make sure the path is not already in the 6177 * requested state. If it is just return the next path 6178 * as we have nothing to do here. 6179 */ 6180 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6181 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6182 MDI_PI_LOCK(pip); 6183 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6184 MDI_PI_UNLOCK(pip); 6185 return (next); 6186 } 6187 6188 f = vh->vh_ops->vo_pi_state_change; 6189 6190 sync_flag = (flags << 8) & 0xf00; 6191 6192 /* 6193 * Do a callback into the mdi consumer to let it 6194 * know that path is about to get enabled/disabled. 6195 */ 6196 if (f != NULL) { 6197 rv = (*f)(vh->vh_dip, pip, 0, 6198 MDI_PI_EXT_STATE(pip), 6199 MDI_EXT_STATE_CHANGE | sync_flag | 6200 op | MDI_BEFORE_STATE_CHANGE); 6201 if (rv != MDI_SUCCESS) { 6202 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6203 "vo_pi_state_change: failed rv = %x", rv)); 6204 } 6205 } 6206 MDI_PI_LOCK(pip); 6207 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6208 6209 switch (flags) { 6210 case USER_DISABLE: 6211 if (op == MDI_DISABLE_OP) { 6212 MDI_PI_SET_USER_DISABLE(pip); 6213 } else { 6214 MDI_PI_SET_USER_ENABLE(pip); 6215 } 6216 break; 6217 case DRIVER_DISABLE: 6218 if (op == MDI_DISABLE_OP) { 6219 MDI_PI_SET_DRV_DISABLE(pip); 6220 } else { 6221 MDI_PI_SET_DRV_ENABLE(pip); 6222 } 6223 break; 6224 case DRIVER_DISABLE_TRANSIENT: 6225 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6226 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6227 } else { 6228 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6229 } 6230 break; 6231 } 6232 MDI_PI_UNLOCK(pip); 6233 /* 6234 * Do a callback into the mdi consumer to let it 6235 * know that path is now enabled/disabled. 6236 */ 6237 if (f != NULL) { 6238 rv = (*f)(vh->vh_dip, pip, 0, 6239 MDI_PI_EXT_STATE(pip), 6240 MDI_EXT_STATE_CHANGE | sync_flag | 6241 op | MDI_AFTER_STATE_CHANGE); 6242 if (rv != MDI_SUCCESS) { 6243 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6244 "vo_pi_state_change failed: rv = %x", rv)); 6245 } 6246 } 6247 return (next); 6248 } 6249 6250 /* 6251 * Common routine for doing enable/disable. 6252 * NOTE: this will be removed once the NWS files are changed to use the new 6253 * mdi_{enable,disable}_path has been putback 6254 */ 6255 int 6256 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6257 { 6258 6259 mdi_phci_t *ph; 6260 mdi_vhci_t *vh = NULL; 6261 mdi_client_t *ct; 6262 mdi_pathinfo_t *next, *pip; 6263 int found_it; 6264 6265 ph = i_devi_get_phci(pdip); 6266 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6267 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6268 (void *)cdip)); 6269 if (ph == NULL) { 6270 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6271 "!failed: operation %d: NULL ph", op)); 6272 return (MDI_FAILURE); 6273 } 6274 6275 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6276 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6277 "!failed: invalid operation %d", op)); 6278 return (MDI_FAILURE); 6279 } 6280 6281 vh = ph->ph_vhci; 6282 6283 if (cdip == NULL) { 6284 /* 6285 * Need to mark the Phci as enabled/disabled. 6286 */ 6287 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6288 "op %d for the phci", op)); 6289 MDI_PHCI_LOCK(ph); 6290 switch (flags) { 6291 case USER_DISABLE: 6292 if (op == MDI_DISABLE_OP) { 6293 MDI_PHCI_SET_USER_DISABLE(ph); 6294 } else { 6295 MDI_PHCI_SET_USER_ENABLE(ph); 6296 } 6297 break; 6298 case DRIVER_DISABLE: 6299 if (op == MDI_DISABLE_OP) { 6300 MDI_PHCI_SET_DRV_DISABLE(ph); 6301 } else { 6302 MDI_PHCI_SET_DRV_ENABLE(ph); 6303 } 6304 break; 6305 case DRIVER_DISABLE_TRANSIENT: 6306 if (op == MDI_DISABLE_OP) { 6307 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6308 } else { 6309 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6310 } 6311 break; 6312 default: 6313 MDI_PHCI_UNLOCK(ph); 6314 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6315 "!invalid flag argument= %d", flags)); 6316 } 6317 6318 /* 6319 * Phci has been disabled. Now try to enable/disable 6320 * path info's to each client. 6321 */ 6322 pip = ph->ph_path_head; 6323 while (pip != NULL) { 6324 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6325 } 6326 MDI_PHCI_UNLOCK(ph); 6327 } else { 6328 6329 /* 6330 * Disable a specific client. 6331 */ 6332 ct = i_devi_get_client(cdip); 6333 if (ct == NULL) { 6334 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6335 "!failed: operation = %d: NULL ct", op)); 6336 return (MDI_FAILURE); 6337 } 6338 6339 MDI_CLIENT_LOCK(ct); 6340 pip = ct->ct_path_head; 6341 found_it = 0; 6342 while (pip != NULL) { 6343 MDI_PI_LOCK(pip); 6344 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6345 if (MDI_PI(pip)->pi_phci == ph) { 6346 MDI_PI_UNLOCK(pip); 6347 found_it = 1; 6348 break; 6349 } 6350 MDI_PI_UNLOCK(pip); 6351 pip = next; 6352 } 6353 6354 6355 MDI_CLIENT_UNLOCK(ct); 6356 if (found_it == 0) { 6357 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6358 "!failed. Could not find corresponding pip\n")); 6359 return (MDI_FAILURE); 6360 } 6361 6362 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6363 } 6364 6365 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6366 "!op %d returning success pdip = %p cdip = %p", 6367 op, (void *)pdip, (void *)cdip)); 6368 return (MDI_SUCCESS); 6369 } 6370 6371 /* 6372 * Ensure phci powered up 6373 */ 6374 static void 6375 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6376 { 6377 dev_info_t *ph_dip; 6378 6379 ASSERT(pip != NULL); 6380 ASSERT(MDI_PI_LOCKED(pip)); 6381 6382 if (MDI_PI(pip)->pi_pm_held) { 6383 return; 6384 } 6385 6386 ph_dip = mdi_pi_get_phci(pip); 6387 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6388 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6389 if (ph_dip == NULL) { 6390 return; 6391 } 6392 6393 MDI_PI_UNLOCK(pip); 6394 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6395 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6396 pm_hold_power(ph_dip); 6397 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6398 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6399 MDI_PI_LOCK(pip); 6400 6401 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6402 if (DEVI(ph_dip)->devi_pm_info) 6403 MDI_PI(pip)->pi_pm_held = 1; 6404 } 6405 6406 /* 6407 * Allow phci powered down 6408 */ 6409 static void 6410 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6411 { 6412 dev_info_t *ph_dip = NULL; 6413 6414 ASSERT(pip != NULL); 6415 ASSERT(MDI_PI_LOCKED(pip)); 6416 6417 if (MDI_PI(pip)->pi_pm_held == 0) { 6418 return; 6419 } 6420 6421 ph_dip = mdi_pi_get_phci(pip); 6422 ASSERT(ph_dip != NULL); 6423 6424 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6425 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6426 6427 MDI_PI_UNLOCK(pip); 6428 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6429 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6430 pm_rele_power(ph_dip); 6431 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6432 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6433 MDI_PI_LOCK(pip); 6434 6435 MDI_PI(pip)->pi_pm_held = 0; 6436 } 6437 6438 static void 6439 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6440 { 6441 ASSERT(MDI_CLIENT_LOCKED(ct)); 6442 6443 ct->ct_power_cnt += incr; 6444 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6445 "%p ct_power_cnt = %d incr = %d", 6446 (void *)ct, ct->ct_power_cnt, incr)); 6447 ASSERT(ct->ct_power_cnt >= 0); 6448 } 6449 6450 static void 6451 i_mdi_rele_all_phci(mdi_client_t *ct) 6452 { 6453 mdi_pathinfo_t *pip; 6454 6455 ASSERT(MDI_CLIENT_LOCKED(ct)); 6456 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6457 while (pip != NULL) { 6458 mdi_hold_path(pip); 6459 MDI_PI_LOCK(pip); 6460 i_mdi_pm_rele_pip(pip); 6461 MDI_PI_UNLOCK(pip); 6462 mdi_rele_path(pip); 6463 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6464 } 6465 } 6466 6467 static void 6468 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6469 { 6470 ASSERT(MDI_CLIENT_LOCKED(ct)); 6471 6472 if (i_ddi_devi_attached(ct->ct_dip)) { 6473 ct->ct_power_cnt -= decr; 6474 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6475 "%p ct_power_cnt = %d decr = %d", 6476 (void *)ct, ct->ct_power_cnt, decr)); 6477 } 6478 6479 ASSERT(ct->ct_power_cnt >= 0); 6480 if (ct->ct_power_cnt == 0) { 6481 i_mdi_rele_all_phci(ct); 6482 return; 6483 } 6484 } 6485 6486 static void 6487 i_mdi_pm_reset_client(mdi_client_t *ct) 6488 { 6489 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6490 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6491 ASSERT(MDI_CLIENT_LOCKED(ct)); 6492 ct->ct_power_cnt = 0; 6493 i_mdi_rele_all_phci(ct); 6494 ct->ct_powercnt_config = 0; 6495 ct->ct_powercnt_unconfig = 0; 6496 ct->ct_powercnt_reset = 1; 6497 } 6498 6499 static int 6500 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6501 { 6502 int ret; 6503 dev_info_t *ph_dip; 6504 6505 MDI_PI_LOCK(pip); 6506 i_mdi_pm_hold_pip(pip); 6507 6508 ph_dip = mdi_pi_get_phci(pip); 6509 MDI_PI_UNLOCK(pip); 6510 6511 /* bring all components of phci to full power */ 6512 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6513 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6514 ddi_get_instance(ph_dip), (void *)pip)); 6515 6516 ret = pm_powerup(ph_dip); 6517 6518 if (ret == DDI_FAILURE) { 6519 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6520 "pm_powerup FAILED for %s%d %p", 6521 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6522 (void *)pip)); 6523 6524 MDI_PI_LOCK(pip); 6525 i_mdi_pm_rele_pip(pip); 6526 MDI_PI_UNLOCK(pip); 6527 return (MDI_FAILURE); 6528 } 6529 6530 return (MDI_SUCCESS); 6531 } 6532 6533 static int 6534 i_mdi_power_all_phci(mdi_client_t *ct) 6535 { 6536 mdi_pathinfo_t *pip; 6537 int succeeded = 0; 6538 6539 ASSERT(MDI_CLIENT_LOCKED(ct)); 6540 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6541 while (pip != NULL) { 6542 /* 6543 * Don't power if MDI_PATHINFO_STATE_FAULT 6544 * or MDI_PATHINFO_STATE_OFFLINE. 6545 */ 6546 if (MDI_PI_IS_INIT(pip) || 6547 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6548 mdi_hold_path(pip); 6549 MDI_CLIENT_UNLOCK(ct); 6550 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6551 succeeded = 1; 6552 6553 ASSERT(ct == MDI_PI(pip)->pi_client); 6554 MDI_CLIENT_LOCK(ct); 6555 mdi_rele_path(pip); 6556 } 6557 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6558 } 6559 6560 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6561 } 6562 6563 /* 6564 * mdi_bus_power(): 6565 * 1. Place the phci(s) into powered up state so that 6566 * client can do power management 6567 * 2. Ensure phci powered up as client power managing 6568 * Return Values: 6569 * MDI_SUCCESS 6570 * MDI_FAILURE 6571 */ 6572 int 6573 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6574 void *arg, void *result) 6575 { 6576 int ret = MDI_SUCCESS; 6577 pm_bp_child_pwrchg_t *bpc; 6578 mdi_client_t *ct; 6579 dev_info_t *cdip; 6580 pm_bp_has_changed_t *bphc; 6581 6582 /* 6583 * BUS_POWER_NOINVOL not supported 6584 */ 6585 if (op == BUS_POWER_NOINVOL) 6586 return (MDI_FAILURE); 6587 6588 /* 6589 * ignore other OPs. 6590 * return quickly to save cou cycles on the ct processing 6591 */ 6592 switch (op) { 6593 case BUS_POWER_PRE_NOTIFICATION: 6594 case BUS_POWER_POST_NOTIFICATION: 6595 bpc = (pm_bp_child_pwrchg_t *)arg; 6596 cdip = bpc->bpc_dip; 6597 break; 6598 case BUS_POWER_HAS_CHANGED: 6599 bphc = (pm_bp_has_changed_t *)arg; 6600 cdip = bphc->bphc_dip; 6601 break; 6602 default: 6603 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6604 } 6605 6606 ASSERT(MDI_CLIENT(cdip)); 6607 6608 ct = i_devi_get_client(cdip); 6609 if (ct == NULL) 6610 return (MDI_FAILURE); 6611 6612 /* 6613 * wait till the mdi_pathinfo node state change are processed 6614 */ 6615 MDI_CLIENT_LOCK(ct); 6616 switch (op) { 6617 case BUS_POWER_PRE_NOTIFICATION: 6618 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6619 "BUS_POWER_PRE_NOTIFICATION:" 6620 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6621 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6622 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6623 6624 /* serialize power level change per client */ 6625 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6626 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6627 6628 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6629 6630 if (ct->ct_power_cnt == 0) { 6631 ret = i_mdi_power_all_phci(ct); 6632 } 6633 6634 /* 6635 * if new_level > 0: 6636 * - hold phci(s) 6637 * - power up phci(s) if not already 6638 * ignore power down 6639 */ 6640 if (bpc->bpc_nlevel > 0) { 6641 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6642 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6643 "i_mdi_pm_hold_client\n")); 6644 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6645 } 6646 } 6647 break; 6648 case BUS_POWER_POST_NOTIFICATION: 6649 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6650 "BUS_POWER_POST_NOTIFICATION:" 6651 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6652 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6653 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6654 *(int *)result)); 6655 6656 if (*(int *)result == DDI_SUCCESS) { 6657 if (bpc->bpc_nlevel > 0) { 6658 MDI_CLIENT_SET_POWER_UP(ct); 6659 } else { 6660 MDI_CLIENT_SET_POWER_DOWN(ct); 6661 } 6662 } 6663 6664 /* release the hold we did in pre-notification */ 6665 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6666 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6667 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6668 "i_mdi_pm_rele_client\n")); 6669 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6670 } 6671 6672 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6673 /* another thread might started attaching */ 6674 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6675 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6676 "i_mdi_pm_rele_client\n")); 6677 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6678 /* detaching has been taken care in pm_post_unconfig */ 6679 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6680 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6681 "i_mdi_pm_reset_client\n")); 6682 i_mdi_pm_reset_client(ct); 6683 } 6684 } 6685 6686 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6687 cv_broadcast(&ct->ct_powerchange_cv); 6688 6689 break; 6690 6691 /* need to do more */ 6692 case BUS_POWER_HAS_CHANGED: 6693 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6694 "BUS_POWER_HAS_CHANGED:" 6695 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6696 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6697 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6698 6699 if (bphc->bphc_nlevel > 0 && 6700 bphc->bphc_nlevel > bphc->bphc_olevel) { 6701 if (ct->ct_power_cnt == 0) { 6702 ret = i_mdi_power_all_phci(ct); 6703 } 6704 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6705 "i_mdi_pm_hold_client\n")); 6706 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6707 } 6708 6709 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6710 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6711 "i_mdi_pm_rele_client\n")); 6712 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6713 } 6714 break; 6715 } 6716 6717 MDI_CLIENT_UNLOCK(ct); 6718 return (ret); 6719 } 6720 6721 static int 6722 i_mdi_pm_pre_config_one(dev_info_t *child) 6723 { 6724 int ret = MDI_SUCCESS; 6725 mdi_client_t *ct; 6726 6727 ct = i_devi_get_client(child); 6728 if (ct == NULL) 6729 return (MDI_FAILURE); 6730 6731 MDI_CLIENT_LOCK(ct); 6732 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6733 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6734 6735 if (!MDI_CLIENT_IS_FAILED(ct)) { 6736 MDI_CLIENT_UNLOCK(ct); 6737 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6738 return (MDI_SUCCESS); 6739 } 6740 6741 if (ct->ct_powercnt_config) { 6742 MDI_CLIENT_UNLOCK(ct); 6743 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6744 return (MDI_SUCCESS); 6745 } 6746 6747 if (ct->ct_power_cnt == 0) { 6748 ret = i_mdi_power_all_phci(ct); 6749 } 6750 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6751 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6752 ct->ct_powercnt_config = 1; 6753 ct->ct_powercnt_reset = 0; 6754 MDI_CLIENT_UNLOCK(ct); 6755 return (ret); 6756 } 6757 6758 static int 6759 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6760 { 6761 int ret = MDI_SUCCESS; 6762 dev_info_t *cdip; 6763 int circ; 6764 6765 ASSERT(MDI_VHCI(vdip)); 6766 6767 /* ndi_devi_config_one */ 6768 if (child) { 6769 ASSERT(DEVI_BUSY_OWNED(vdip)); 6770 return (i_mdi_pm_pre_config_one(child)); 6771 } 6772 6773 /* devi_config_common */ 6774 ndi_devi_enter(vdip, &circ); 6775 cdip = ddi_get_child(vdip); 6776 while (cdip) { 6777 dev_info_t *next = ddi_get_next_sibling(cdip); 6778 6779 ret = i_mdi_pm_pre_config_one(cdip); 6780 if (ret != MDI_SUCCESS) 6781 break; 6782 cdip = next; 6783 } 6784 ndi_devi_exit(vdip, circ); 6785 return (ret); 6786 } 6787 6788 static int 6789 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6790 { 6791 int ret = MDI_SUCCESS; 6792 mdi_client_t *ct; 6793 6794 ct = i_devi_get_client(child); 6795 if (ct == NULL) 6796 return (MDI_FAILURE); 6797 6798 MDI_CLIENT_LOCK(ct); 6799 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6800 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6801 6802 if (!i_ddi_devi_attached(ct->ct_dip)) { 6803 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6804 MDI_CLIENT_UNLOCK(ct); 6805 return (MDI_SUCCESS); 6806 } 6807 6808 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6809 (flags & NDI_AUTODETACH)) { 6810 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6811 MDI_CLIENT_UNLOCK(ct); 6812 return (MDI_FAILURE); 6813 } 6814 6815 if (ct->ct_powercnt_unconfig) { 6816 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6817 MDI_CLIENT_UNLOCK(ct); 6818 *held = 1; 6819 return (MDI_SUCCESS); 6820 } 6821 6822 if (ct->ct_power_cnt == 0) { 6823 ret = i_mdi_power_all_phci(ct); 6824 } 6825 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6826 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6827 ct->ct_powercnt_unconfig = 1; 6828 ct->ct_powercnt_reset = 0; 6829 MDI_CLIENT_UNLOCK(ct); 6830 if (ret == MDI_SUCCESS) 6831 *held = 1; 6832 return (ret); 6833 } 6834 6835 static int 6836 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6837 int flags) 6838 { 6839 int ret = MDI_SUCCESS; 6840 dev_info_t *cdip; 6841 int circ; 6842 6843 ASSERT(MDI_VHCI(vdip)); 6844 *held = 0; 6845 6846 /* ndi_devi_unconfig_one */ 6847 if (child) { 6848 ASSERT(DEVI_BUSY_OWNED(vdip)); 6849 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6850 } 6851 6852 /* devi_unconfig_common */ 6853 ndi_devi_enter(vdip, &circ); 6854 cdip = ddi_get_child(vdip); 6855 while (cdip) { 6856 dev_info_t *next = ddi_get_next_sibling(cdip); 6857 6858 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6859 cdip = next; 6860 } 6861 ndi_devi_exit(vdip, circ); 6862 6863 if (*held) 6864 ret = MDI_SUCCESS; 6865 6866 return (ret); 6867 } 6868 6869 static void 6870 i_mdi_pm_post_config_one(dev_info_t *child) 6871 { 6872 mdi_client_t *ct; 6873 6874 ct = i_devi_get_client(child); 6875 if (ct == NULL) 6876 return; 6877 6878 MDI_CLIENT_LOCK(ct); 6879 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6880 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6881 6882 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6883 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6884 MDI_CLIENT_UNLOCK(ct); 6885 return; 6886 } 6887 6888 /* client has not been updated */ 6889 if (MDI_CLIENT_IS_FAILED(ct)) { 6890 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 6891 MDI_CLIENT_UNLOCK(ct); 6892 return; 6893 } 6894 6895 /* another thread might have powered it down or detached it */ 6896 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6897 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6898 (!i_ddi_devi_attached(ct->ct_dip) && 6899 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6900 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6901 i_mdi_pm_reset_client(ct); 6902 } else { 6903 mdi_pathinfo_t *pip, *next; 6904 int valid_path_count = 0; 6905 6906 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6907 pip = ct->ct_path_head; 6908 while (pip != NULL) { 6909 MDI_PI_LOCK(pip); 6910 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6911 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6912 valid_path_count ++; 6913 MDI_PI_UNLOCK(pip); 6914 pip = next; 6915 } 6916 i_mdi_pm_rele_client(ct, valid_path_count); 6917 } 6918 ct->ct_powercnt_config = 0; 6919 MDI_CLIENT_UNLOCK(ct); 6920 } 6921 6922 static void 6923 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6924 { 6925 int circ; 6926 dev_info_t *cdip; 6927 6928 ASSERT(MDI_VHCI(vdip)); 6929 6930 /* ndi_devi_config_one */ 6931 if (child) { 6932 ASSERT(DEVI_BUSY_OWNED(vdip)); 6933 i_mdi_pm_post_config_one(child); 6934 return; 6935 } 6936 6937 /* devi_config_common */ 6938 ndi_devi_enter(vdip, &circ); 6939 cdip = ddi_get_child(vdip); 6940 while (cdip) { 6941 dev_info_t *next = ddi_get_next_sibling(cdip); 6942 6943 i_mdi_pm_post_config_one(cdip); 6944 cdip = next; 6945 } 6946 ndi_devi_exit(vdip, circ); 6947 } 6948 6949 static void 6950 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6951 { 6952 mdi_client_t *ct; 6953 6954 ct = i_devi_get_client(child); 6955 if (ct == NULL) 6956 return; 6957 6958 MDI_CLIENT_LOCK(ct); 6959 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6960 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6961 6962 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6963 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 6964 MDI_CLIENT_UNLOCK(ct); 6965 return; 6966 } 6967 6968 /* failure detaching or another thread just attached it */ 6969 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6970 i_ddi_devi_attached(ct->ct_dip)) || 6971 (!i_ddi_devi_attached(ct->ct_dip) && 6972 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6973 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6974 i_mdi_pm_reset_client(ct); 6975 } else { 6976 mdi_pathinfo_t *pip, *next; 6977 int valid_path_count = 0; 6978 6979 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6980 pip = ct->ct_path_head; 6981 while (pip != NULL) { 6982 MDI_PI_LOCK(pip); 6983 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6984 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6985 valid_path_count ++; 6986 MDI_PI_UNLOCK(pip); 6987 pip = next; 6988 } 6989 i_mdi_pm_rele_client(ct, valid_path_count); 6990 ct->ct_powercnt_unconfig = 0; 6991 } 6992 6993 MDI_CLIENT_UNLOCK(ct); 6994 } 6995 6996 static void 6997 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6998 { 6999 int circ; 7000 dev_info_t *cdip; 7001 7002 ASSERT(MDI_VHCI(vdip)); 7003 7004 if (!held) { 7005 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7006 return; 7007 } 7008 7009 if (child) { 7010 ASSERT(DEVI_BUSY_OWNED(vdip)); 7011 i_mdi_pm_post_unconfig_one(child); 7012 return; 7013 } 7014 7015 ndi_devi_enter(vdip, &circ); 7016 cdip = ddi_get_child(vdip); 7017 while (cdip) { 7018 dev_info_t *next = ddi_get_next_sibling(cdip); 7019 7020 i_mdi_pm_post_unconfig_one(cdip); 7021 cdip = next; 7022 } 7023 ndi_devi_exit(vdip, circ); 7024 } 7025 7026 int 7027 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7028 { 7029 int circ, ret = MDI_SUCCESS; 7030 dev_info_t *client_dip = NULL; 7031 mdi_client_t *ct; 7032 7033 /* 7034 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7035 * Power up pHCI for the named client device. 7036 * Note: Before the client is enumerated under vhci by phci, 7037 * client_dip can be NULL. Then proceed to power up all the 7038 * pHCIs. 7039 */ 7040 if (devnm != NULL) { 7041 ndi_devi_enter(vdip, &circ); 7042 client_dip = ndi_devi_findchild(vdip, devnm); 7043 } 7044 7045 MDI_DEBUG(4, (MDI_NOTE, vdip, 7046 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7047 7048 switch (op) { 7049 case MDI_PM_PRE_CONFIG: 7050 ret = i_mdi_pm_pre_config(vdip, client_dip); 7051 break; 7052 7053 case MDI_PM_PRE_UNCONFIG: 7054 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7055 flags); 7056 break; 7057 7058 case MDI_PM_POST_CONFIG: 7059 i_mdi_pm_post_config(vdip, client_dip); 7060 break; 7061 7062 case MDI_PM_POST_UNCONFIG: 7063 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7064 break; 7065 7066 case MDI_PM_HOLD_POWER: 7067 case MDI_PM_RELE_POWER: 7068 ASSERT(args); 7069 7070 client_dip = (dev_info_t *)args; 7071 ASSERT(MDI_CLIENT(client_dip)); 7072 7073 ct = i_devi_get_client(client_dip); 7074 MDI_CLIENT_LOCK(ct); 7075 7076 if (op == MDI_PM_HOLD_POWER) { 7077 if (ct->ct_power_cnt == 0) { 7078 (void) i_mdi_power_all_phci(ct); 7079 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7080 "i_mdi_pm_hold_client\n")); 7081 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7082 } 7083 } else { 7084 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 7085 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7086 "i_mdi_pm_rele_client\n")); 7087 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7088 } else { 7089 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7090 "i_mdi_pm_reset_client\n")); 7091 i_mdi_pm_reset_client(ct); 7092 } 7093 } 7094 7095 MDI_CLIENT_UNLOCK(ct); 7096 break; 7097 7098 default: 7099 break; 7100 } 7101 7102 if (devnm) 7103 ndi_devi_exit(vdip, circ); 7104 7105 return (ret); 7106 } 7107 7108 int 7109 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7110 { 7111 mdi_vhci_t *vhci; 7112 7113 if (!MDI_VHCI(dip)) 7114 return (MDI_FAILURE); 7115 7116 if (mdi_class) { 7117 vhci = DEVI(dip)->devi_mdi_xhci; 7118 ASSERT(vhci); 7119 *mdi_class = vhci->vh_class; 7120 } 7121 7122 return (MDI_SUCCESS); 7123 } 7124 7125 int 7126 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7127 { 7128 mdi_phci_t *phci; 7129 7130 if (!MDI_PHCI(dip)) 7131 return (MDI_FAILURE); 7132 7133 if (mdi_class) { 7134 phci = DEVI(dip)->devi_mdi_xhci; 7135 ASSERT(phci); 7136 *mdi_class = phci->ph_vhci->vh_class; 7137 } 7138 7139 return (MDI_SUCCESS); 7140 } 7141 7142 int 7143 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7144 { 7145 mdi_client_t *client; 7146 7147 if (!MDI_CLIENT(dip)) 7148 return (MDI_FAILURE); 7149 7150 if (mdi_class) { 7151 client = DEVI(dip)->devi_mdi_client; 7152 ASSERT(client); 7153 *mdi_class = client->ct_vhci->vh_class; 7154 } 7155 7156 return (MDI_SUCCESS); 7157 } 7158 7159 void * 7160 mdi_client_get_vhci_private(dev_info_t *dip) 7161 { 7162 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7163 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7164 mdi_client_t *ct; 7165 ct = i_devi_get_client(dip); 7166 return (ct->ct_vprivate); 7167 } 7168 return (NULL); 7169 } 7170 7171 void 7172 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7173 { 7174 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7175 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7176 mdi_client_t *ct; 7177 ct = i_devi_get_client(dip); 7178 ct->ct_vprivate = data; 7179 } 7180 } 7181 /* 7182 * mdi_pi_get_vhci_private(): 7183 * Get the vhci private information associated with the 7184 * mdi_pathinfo node 7185 */ 7186 void * 7187 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7188 { 7189 caddr_t vprivate = NULL; 7190 if (pip) { 7191 vprivate = MDI_PI(pip)->pi_vprivate; 7192 } 7193 return (vprivate); 7194 } 7195 7196 /* 7197 * mdi_pi_set_vhci_private(): 7198 * Set the vhci private information in the mdi_pathinfo node 7199 */ 7200 void 7201 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7202 { 7203 if (pip) { 7204 MDI_PI(pip)->pi_vprivate = priv; 7205 } 7206 } 7207 7208 /* 7209 * mdi_phci_get_vhci_private(): 7210 * Get the vhci private information associated with the 7211 * mdi_phci node 7212 */ 7213 void * 7214 mdi_phci_get_vhci_private(dev_info_t *dip) 7215 { 7216 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7217 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7218 mdi_phci_t *ph; 7219 ph = i_devi_get_phci(dip); 7220 return (ph->ph_vprivate); 7221 } 7222 return (NULL); 7223 } 7224 7225 /* 7226 * mdi_phci_set_vhci_private(): 7227 * Set the vhci private information in the mdi_phci node 7228 */ 7229 void 7230 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7231 { 7232 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7233 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7234 mdi_phci_t *ph; 7235 ph = i_devi_get_phci(dip); 7236 ph->ph_vprivate = priv; 7237 } 7238 } 7239 7240 int 7241 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7242 { 7243 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7244 } 7245 7246 int 7247 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7248 { 7249 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7250 } 7251 7252 /* 7253 * When processing hotplug, if mdi_pi_offline-mdi_pi_free fails then this 7254 * interface is used to represent device removal. 7255 */ 7256 int 7257 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7258 { 7259 MDI_PI_LOCK(pip); 7260 if (mdi_pi_device_isremoved(pip)) { 7261 MDI_PI_UNLOCK(pip); 7262 return (0); 7263 } 7264 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7265 MDI_PI_FLAGS_SET_HIDDEN(pip); 7266 MDI_PI_UNLOCK(pip); 7267 7268 i_ddi_di_cache_invalidate(); 7269 7270 return (1); 7271 } 7272 7273 /* 7274 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7275 * is now accessible then this interfaces is used to represent device insertion. 7276 */ 7277 int 7278 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7279 { 7280 MDI_PI_LOCK(pip); 7281 if (!mdi_pi_device_isremoved(pip)) { 7282 MDI_PI_UNLOCK(pip); 7283 return (0); 7284 } 7285 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7286 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7287 MDI_PI_UNLOCK(pip); 7288 7289 i_ddi_di_cache_invalidate(); 7290 7291 return (1); 7292 } 7293 7294 /* 7295 * List of vhci class names: 7296 * A vhci class name must be in this list only if the corresponding vhci 7297 * driver intends to use the mdi provided bus config implementation 7298 * (i.e., mdi_vhci_bus_config()). 7299 */ 7300 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7301 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7302 7303 /* 7304 * During boot time, the on-disk vhci cache for every vhci class is read 7305 * in the form of an nvlist and stored here. 7306 */ 7307 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7308 7309 /* nvpair names in vhci cache nvlist */ 7310 #define MDI_VHCI_CACHE_VERSION 1 7311 #define MDI_NVPNAME_VERSION "version" 7312 #define MDI_NVPNAME_PHCIS "phcis" 7313 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7314 7315 /* 7316 * Given vhci class name, return its on-disk vhci cache filename. 7317 * Memory for the returned filename which includes the full path is allocated 7318 * by this function. 7319 */ 7320 static char * 7321 vhclass2vhcache_filename(char *vhclass) 7322 { 7323 char *filename; 7324 int len; 7325 static char *fmt = "/etc/devices/mdi_%s_cache"; 7326 7327 /* 7328 * fmt contains the on-disk vhci cache file name format; 7329 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7330 */ 7331 7332 /* the -1 below is to account for "%s" in the format string */ 7333 len = strlen(fmt) + strlen(vhclass) - 1; 7334 filename = kmem_alloc(len, KM_SLEEP); 7335 (void) snprintf(filename, len, fmt, vhclass); 7336 ASSERT(len == (strlen(filename) + 1)); 7337 return (filename); 7338 } 7339 7340 /* 7341 * initialize the vhci cache related data structures and read the on-disk 7342 * vhci cached data into memory. 7343 */ 7344 static void 7345 setup_vhci_cache(mdi_vhci_t *vh) 7346 { 7347 mdi_vhci_config_t *vhc; 7348 mdi_vhci_cache_t *vhcache; 7349 int i; 7350 nvlist_t *nvl = NULL; 7351 7352 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7353 vh->vh_config = vhc; 7354 vhcache = &vhc->vhc_vhcache; 7355 7356 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7357 7358 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7359 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7360 7361 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7362 7363 /* 7364 * Create string hash; same as mod_hash_create_strhash() except that 7365 * we use NULL key destructor. 7366 */ 7367 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7368 mdi_bus_config_cache_hash_size, 7369 mod_hash_null_keydtor, mod_hash_null_valdtor, 7370 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7371 7372 /* 7373 * The on-disk vhci cache is read during booting prior to the 7374 * lights-out period by mdi_read_devices_files(). 7375 */ 7376 for (i = 0; i < N_VHCI_CLASSES; i++) { 7377 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7378 nvl = vhcache_nvl[i]; 7379 vhcache_nvl[i] = NULL; 7380 break; 7381 } 7382 } 7383 7384 /* 7385 * this is to cover the case of some one manually causing unloading 7386 * (or detaching) and reloading (or attaching) of a vhci driver. 7387 */ 7388 if (nvl == NULL && modrootloaded) 7389 nvl = read_on_disk_vhci_cache(vh->vh_class); 7390 7391 if (nvl != NULL) { 7392 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7393 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7394 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7395 else { 7396 cmn_err(CE_WARN, 7397 "%s: data file corrupted, will recreate", 7398 vhc->vhc_vhcache_filename); 7399 } 7400 rw_exit(&vhcache->vhcache_lock); 7401 nvlist_free(nvl); 7402 } 7403 7404 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7405 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7406 7407 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7408 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7409 } 7410 7411 /* 7412 * free all vhci cache related resources 7413 */ 7414 static int 7415 destroy_vhci_cache(mdi_vhci_t *vh) 7416 { 7417 mdi_vhci_config_t *vhc = vh->vh_config; 7418 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7419 mdi_vhcache_phci_t *cphci, *cphci_next; 7420 mdi_vhcache_client_t *cct, *cct_next; 7421 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7422 7423 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7424 return (MDI_FAILURE); 7425 7426 kmem_free(vhc->vhc_vhcache_filename, 7427 strlen(vhc->vhc_vhcache_filename) + 1); 7428 7429 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7430 7431 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7432 cphci = cphci_next) { 7433 cphci_next = cphci->cphci_next; 7434 free_vhcache_phci(cphci); 7435 } 7436 7437 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7438 cct_next = cct->cct_next; 7439 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7440 cpi_next = cpi->cpi_next; 7441 free_vhcache_pathinfo(cpi); 7442 } 7443 free_vhcache_client(cct); 7444 } 7445 7446 rw_destroy(&vhcache->vhcache_lock); 7447 7448 mutex_destroy(&vhc->vhc_lock); 7449 cv_destroy(&vhc->vhc_cv); 7450 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7451 return (MDI_SUCCESS); 7452 } 7453 7454 /* 7455 * Stop all vhci cache related async threads and free their resources. 7456 */ 7457 static int 7458 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7459 { 7460 mdi_async_client_config_t *acc, *acc_next; 7461 7462 mutex_enter(&vhc->vhc_lock); 7463 vhc->vhc_flags |= MDI_VHC_EXIT; 7464 ASSERT(vhc->vhc_acc_thrcount >= 0); 7465 cv_broadcast(&vhc->vhc_cv); 7466 7467 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7468 vhc->vhc_acc_thrcount != 0) { 7469 mutex_exit(&vhc->vhc_lock); 7470 delay_random(5); 7471 mutex_enter(&vhc->vhc_lock); 7472 } 7473 7474 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7475 7476 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7477 acc_next = acc->acc_next; 7478 free_async_client_config(acc); 7479 } 7480 vhc->vhc_acc_list_head = NULL; 7481 vhc->vhc_acc_list_tail = NULL; 7482 vhc->vhc_acc_count = 0; 7483 7484 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7485 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7486 mutex_exit(&vhc->vhc_lock); 7487 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7488 vhcache_dirty(vhc); 7489 return (MDI_FAILURE); 7490 } 7491 } else 7492 mutex_exit(&vhc->vhc_lock); 7493 7494 if (callb_delete(vhc->vhc_cbid) != 0) 7495 return (MDI_FAILURE); 7496 7497 return (MDI_SUCCESS); 7498 } 7499 7500 /* 7501 * Stop vhci cache flush thread 7502 */ 7503 /* ARGSUSED */ 7504 static boolean_t 7505 stop_vhcache_flush_thread(void *arg, int code) 7506 { 7507 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7508 7509 mutex_enter(&vhc->vhc_lock); 7510 vhc->vhc_flags |= MDI_VHC_EXIT; 7511 cv_broadcast(&vhc->vhc_cv); 7512 7513 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7514 mutex_exit(&vhc->vhc_lock); 7515 delay_random(5); 7516 mutex_enter(&vhc->vhc_lock); 7517 } 7518 7519 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7520 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7521 mutex_exit(&vhc->vhc_lock); 7522 (void) flush_vhcache(vhc, 1); 7523 } else 7524 mutex_exit(&vhc->vhc_lock); 7525 7526 return (B_TRUE); 7527 } 7528 7529 /* 7530 * Enqueue the vhcache phci (cphci) at the tail of the list 7531 */ 7532 static void 7533 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7534 { 7535 cphci->cphci_next = NULL; 7536 if (vhcache->vhcache_phci_head == NULL) 7537 vhcache->vhcache_phci_head = cphci; 7538 else 7539 vhcache->vhcache_phci_tail->cphci_next = cphci; 7540 vhcache->vhcache_phci_tail = cphci; 7541 } 7542 7543 /* 7544 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7545 */ 7546 static void 7547 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7548 mdi_vhcache_pathinfo_t *cpi) 7549 { 7550 cpi->cpi_next = NULL; 7551 if (cct->cct_cpi_head == NULL) 7552 cct->cct_cpi_head = cpi; 7553 else 7554 cct->cct_cpi_tail->cpi_next = cpi; 7555 cct->cct_cpi_tail = cpi; 7556 } 7557 7558 /* 7559 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7560 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7561 * flag set come at the beginning of the list. All cpis which have this 7562 * flag set come at the end of the list. 7563 */ 7564 static void 7565 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7566 mdi_vhcache_pathinfo_t *newcpi) 7567 { 7568 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7569 7570 if (cct->cct_cpi_head == NULL || 7571 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7572 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7573 else { 7574 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7575 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7576 prev_cpi = cpi, cpi = cpi->cpi_next) 7577 ; 7578 7579 if (prev_cpi == NULL) 7580 cct->cct_cpi_head = newcpi; 7581 else 7582 prev_cpi->cpi_next = newcpi; 7583 7584 newcpi->cpi_next = cpi; 7585 7586 if (cpi == NULL) 7587 cct->cct_cpi_tail = newcpi; 7588 } 7589 } 7590 7591 /* 7592 * Enqueue the vhcache client (cct) at the tail of the list 7593 */ 7594 static void 7595 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7596 mdi_vhcache_client_t *cct) 7597 { 7598 cct->cct_next = NULL; 7599 if (vhcache->vhcache_client_head == NULL) 7600 vhcache->vhcache_client_head = cct; 7601 else 7602 vhcache->vhcache_client_tail->cct_next = cct; 7603 vhcache->vhcache_client_tail = cct; 7604 } 7605 7606 static void 7607 free_string_array(char **str, int nelem) 7608 { 7609 int i; 7610 7611 if (str) { 7612 for (i = 0; i < nelem; i++) { 7613 if (str[i]) 7614 kmem_free(str[i], strlen(str[i]) + 1); 7615 } 7616 kmem_free(str, sizeof (char *) * nelem); 7617 } 7618 } 7619 7620 static void 7621 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7622 { 7623 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7624 kmem_free(cphci, sizeof (*cphci)); 7625 } 7626 7627 static void 7628 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7629 { 7630 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7631 kmem_free(cpi, sizeof (*cpi)); 7632 } 7633 7634 static void 7635 free_vhcache_client(mdi_vhcache_client_t *cct) 7636 { 7637 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7638 kmem_free(cct, sizeof (*cct)); 7639 } 7640 7641 static char * 7642 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7643 { 7644 char *name_addr; 7645 int len; 7646 7647 len = strlen(ct_name) + strlen(ct_addr) + 2; 7648 name_addr = kmem_alloc(len, KM_SLEEP); 7649 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7650 7651 if (ret_len) 7652 *ret_len = len; 7653 return (name_addr); 7654 } 7655 7656 /* 7657 * Copy the contents of paddrnvl to vhci cache. 7658 * paddrnvl nvlist contains path information for a vhci client. 7659 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7660 */ 7661 static void 7662 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7663 mdi_vhcache_client_t *cct) 7664 { 7665 nvpair_t *nvp = NULL; 7666 mdi_vhcache_pathinfo_t *cpi; 7667 uint_t nelem; 7668 uint32_t *val; 7669 7670 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7671 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7672 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7673 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7674 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7675 ASSERT(nelem == 2); 7676 cpi->cpi_cphci = cphci_list[val[0]]; 7677 cpi->cpi_flags = val[1]; 7678 enqueue_tail_vhcache_pathinfo(cct, cpi); 7679 } 7680 } 7681 7682 /* 7683 * Copy the contents of caddrmapnvl to vhci cache. 7684 * caddrmapnvl nvlist contains vhci client address to phci client address 7685 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7686 * this nvlist. 7687 */ 7688 static void 7689 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7690 mdi_vhcache_phci_t *cphci_list[]) 7691 { 7692 nvpair_t *nvp = NULL; 7693 nvlist_t *paddrnvl; 7694 mdi_vhcache_client_t *cct; 7695 7696 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7697 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7698 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7699 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7700 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7701 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7702 /* the client must contain at least one path */ 7703 ASSERT(cct->cct_cpi_head != NULL); 7704 7705 enqueue_vhcache_client(vhcache, cct); 7706 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7707 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7708 } 7709 } 7710 7711 /* 7712 * Copy the contents of the main nvlist to vhci cache. 7713 * 7714 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7715 * The nvlist contains the mappings between the vhci client addresses and 7716 * their corresponding phci client addresses. 7717 * 7718 * The structure of the nvlist is as follows: 7719 * 7720 * Main nvlist: 7721 * NAME TYPE DATA 7722 * version int32 version number 7723 * phcis string array array of phci paths 7724 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7725 * 7726 * structure of c2paddrs_nvl: 7727 * NAME TYPE DATA 7728 * caddr1 nvlist_t paddrs_nvl1 7729 * caddr2 nvlist_t paddrs_nvl2 7730 * ... 7731 * where caddr1, caddr2, ... are vhci client name and addresses in the 7732 * form of "<clientname>@<clientaddress>". 7733 * (for example: "ssd@2000002037cd9f72"); 7734 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7735 * 7736 * structure of paddrs_nvl: 7737 * NAME TYPE DATA 7738 * pi_addr1 uint32_array (phci-id, cpi_flags) 7739 * pi_addr2 uint32_array (phci-id, cpi_flags) 7740 * ... 7741 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7742 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7743 * phci-ids are integers that identify pHCIs to which the 7744 * the bus specific address belongs to. These integers are used as an index 7745 * into to the phcis string array in the main nvlist to get the pHCI path. 7746 */ 7747 static int 7748 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7749 { 7750 char **phcis, **phci_namep; 7751 uint_t nphcis; 7752 mdi_vhcache_phci_t *cphci, **cphci_list; 7753 nvlist_t *caddrmapnvl; 7754 int32_t ver; 7755 int i; 7756 size_t cphci_list_size; 7757 7758 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7759 7760 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7761 ver != MDI_VHCI_CACHE_VERSION) 7762 return (MDI_FAILURE); 7763 7764 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7765 &nphcis) != 0) 7766 return (MDI_SUCCESS); 7767 7768 ASSERT(nphcis > 0); 7769 7770 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7771 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7772 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7773 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7774 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7775 enqueue_vhcache_phci(vhcache, cphci); 7776 cphci_list[i] = cphci; 7777 } 7778 7779 ASSERT(vhcache->vhcache_phci_head != NULL); 7780 7781 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7782 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7783 7784 kmem_free(cphci_list, cphci_list_size); 7785 return (MDI_SUCCESS); 7786 } 7787 7788 /* 7789 * Build paddrnvl for the specified client using the information in the 7790 * vhci cache and add it to the caddrmapnnvl. 7791 * Returns 0 on success, errno on failure. 7792 */ 7793 static int 7794 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7795 nvlist_t *caddrmapnvl) 7796 { 7797 mdi_vhcache_pathinfo_t *cpi; 7798 nvlist_t *nvl; 7799 int err; 7800 uint32_t val[2]; 7801 7802 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7803 7804 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7805 return (err); 7806 7807 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7808 val[0] = cpi->cpi_cphci->cphci_id; 7809 val[1] = cpi->cpi_flags; 7810 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7811 != 0) 7812 goto out; 7813 } 7814 7815 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7816 out: 7817 nvlist_free(nvl); 7818 return (err); 7819 } 7820 7821 /* 7822 * Build caddrmapnvl using the information in the vhci cache 7823 * and add it to the mainnvl. 7824 * Returns 0 on success, errno on failure. 7825 */ 7826 static int 7827 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7828 { 7829 mdi_vhcache_client_t *cct; 7830 nvlist_t *nvl; 7831 int err; 7832 7833 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7834 7835 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7836 return (err); 7837 7838 for (cct = vhcache->vhcache_client_head; cct != NULL; 7839 cct = cct->cct_next) { 7840 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7841 goto out; 7842 } 7843 7844 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7845 out: 7846 nvlist_free(nvl); 7847 return (err); 7848 } 7849 7850 /* 7851 * Build nvlist using the information in the vhci cache. 7852 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7853 * Returns nvl on success, NULL on failure. 7854 */ 7855 static nvlist_t * 7856 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7857 { 7858 mdi_vhcache_phci_t *cphci; 7859 uint_t phci_count; 7860 char **phcis; 7861 nvlist_t *nvl; 7862 int err, i; 7863 7864 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7865 nvl = NULL; 7866 goto out; 7867 } 7868 7869 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7870 MDI_VHCI_CACHE_VERSION)) != 0) 7871 goto out; 7872 7873 rw_enter(&vhcache->vhcache_lock, RW_READER); 7874 if (vhcache->vhcache_phci_head == NULL) { 7875 rw_exit(&vhcache->vhcache_lock); 7876 return (nvl); 7877 } 7878 7879 phci_count = 0; 7880 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7881 cphci = cphci->cphci_next) 7882 cphci->cphci_id = phci_count++; 7883 7884 /* build phci pathname list */ 7885 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7886 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7887 cphci = cphci->cphci_next, i++) 7888 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7889 7890 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7891 phci_count); 7892 free_string_array(phcis, phci_count); 7893 7894 if (err == 0 && 7895 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7896 rw_exit(&vhcache->vhcache_lock); 7897 return (nvl); 7898 } 7899 7900 rw_exit(&vhcache->vhcache_lock); 7901 out: 7902 if (nvl) 7903 nvlist_free(nvl); 7904 return (NULL); 7905 } 7906 7907 /* 7908 * Lookup vhcache phci structure for the specified phci path. 7909 */ 7910 static mdi_vhcache_phci_t * 7911 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7912 { 7913 mdi_vhcache_phci_t *cphci; 7914 7915 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7916 7917 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7918 cphci = cphci->cphci_next) { 7919 if (strcmp(cphci->cphci_path, phci_path) == 0) 7920 return (cphci); 7921 } 7922 7923 return (NULL); 7924 } 7925 7926 /* 7927 * Lookup vhcache phci structure for the specified phci. 7928 */ 7929 static mdi_vhcache_phci_t * 7930 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7931 { 7932 mdi_vhcache_phci_t *cphci; 7933 7934 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7935 7936 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7937 cphci = cphci->cphci_next) { 7938 if (cphci->cphci_phci == ph) 7939 return (cphci); 7940 } 7941 7942 return (NULL); 7943 } 7944 7945 /* 7946 * Add the specified phci to the vhci cache if not already present. 7947 */ 7948 static void 7949 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7950 { 7951 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7952 mdi_vhcache_phci_t *cphci; 7953 char *pathname; 7954 int cache_updated; 7955 7956 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7957 7958 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7959 (void) ddi_pathname(ph->ph_dip, pathname); 7960 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7961 != NULL) { 7962 cphci->cphci_phci = ph; 7963 cache_updated = 0; 7964 } else { 7965 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7966 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7967 cphci->cphci_phci = ph; 7968 enqueue_vhcache_phci(vhcache, cphci); 7969 cache_updated = 1; 7970 } 7971 7972 rw_exit(&vhcache->vhcache_lock); 7973 7974 /* 7975 * Since a new phci has been added, reset 7976 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7977 * during next vhcache_discover_paths(). 7978 */ 7979 mutex_enter(&vhc->vhc_lock); 7980 vhc->vhc_path_discovery_cutoff_time = 0; 7981 mutex_exit(&vhc->vhc_lock); 7982 7983 kmem_free(pathname, MAXPATHLEN); 7984 if (cache_updated) 7985 vhcache_dirty(vhc); 7986 } 7987 7988 /* 7989 * Remove the reference to the specified phci from the vhci cache. 7990 */ 7991 static void 7992 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7993 { 7994 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7995 mdi_vhcache_phci_t *cphci; 7996 7997 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7998 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7999 /* do not remove the actual mdi_vhcache_phci structure */ 8000 cphci->cphci_phci = NULL; 8001 } 8002 rw_exit(&vhcache->vhcache_lock); 8003 } 8004 8005 static void 8006 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8007 mdi_vhcache_lookup_token_t *src) 8008 { 8009 if (src == NULL) { 8010 dst->lt_cct = NULL; 8011 dst->lt_cct_lookup_time = 0; 8012 } else { 8013 dst->lt_cct = src->lt_cct; 8014 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8015 } 8016 } 8017 8018 /* 8019 * Look up vhcache client for the specified client. 8020 */ 8021 static mdi_vhcache_client_t * 8022 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8023 mdi_vhcache_lookup_token_t *token) 8024 { 8025 mod_hash_val_t hv; 8026 char *name_addr; 8027 int len; 8028 8029 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8030 8031 /* 8032 * If no vhcache clean occurred since the last lookup, we can 8033 * simply return the cct from the last lookup operation. 8034 * It works because ccts are never freed except during the vhcache 8035 * cleanup operation. 8036 */ 8037 if (token != NULL && 8038 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8039 return (token->lt_cct); 8040 8041 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8042 if (mod_hash_find(vhcache->vhcache_client_hash, 8043 (mod_hash_key_t)name_addr, &hv) == 0) { 8044 if (token) { 8045 token->lt_cct = (mdi_vhcache_client_t *)hv; 8046 token->lt_cct_lookup_time = lbolt64; 8047 } 8048 } else { 8049 if (token) { 8050 token->lt_cct = NULL; 8051 token->lt_cct_lookup_time = 0; 8052 } 8053 hv = NULL; 8054 } 8055 kmem_free(name_addr, len); 8056 return ((mdi_vhcache_client_t *)hv); 8057 } 8058 8059 /* 8060 * Add the specified path to the vhci cache if not already present. 8061 * Also add the vhcache client for the client corresponding to this path 8062 * if it doesn't already exist. 8063 */ 8064 static void 8065 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8066 { 8067 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8068 mdi_vhcache_client_t *cct; 8069 mdi_vhcache_pathinfo_t *cpi; 8070 mdi_phci_t *ph = pip->pi_phci; 8071 mdi_client_t *ct = pip->pi_client; 8072 int cache_updated = 0; 8073 8074 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8075 8076 /* if vhcache client for this pip doesn't already exist, add it */ 8077 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8078 NULL)) == NULL) { 8079 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8080 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8081 ct->ct_guid, NULL); 8082 enqueue_vhcache_client(vhcache, cct); 8083 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8084 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8085 cache_updated = 1; 8086 } 8087 8088 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8089 if (cpi->cpi_cphci->cphci_phci == ph && 8090 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8091 cpi->cpi_pip = pip; 8092 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8093 cpi->cpi_flags &= 8094 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8095 sort_vhcache_paths(cct); 8096 cache_updated = 1; 8097 } 8098 break; 8099 } 8100 } 8101 8102 if (cpi == NULL) { 8103 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8104 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8105 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8106 ASSERT(cpi->cpi_cphci != NULL); 8107 cpi->cpi_pip = pip; 8108 enqueue_vhcache_pathinfo(cct, cpi); 8109 cache_updated = 1; 8110 } 8111 8112 rw_exit(&vhcache->vhcache_lock); 8113 8114 if (cache_updated) 8115 vhcache_dirty(vhc); 8116 } 8117 8118 /* 8119 * Remove the reference to the specified path from the vhci cache. 8120 */ 8121 static void 8122 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8123 { 8124 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8125 mdi_client_t *ct = pip->pi_client; 8126 mdi_vhcache_client_t *cct; 8127 mdi_vhcache_pathinfo_t *cpi; 8128 8129 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8130 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8131 NULL)) != NULL) { 8132 for (cpi = cct->cct_cpi_head; cpi != NULL; 8133 cpi = cpi->cpi_next) { 8134 if (cpi->cpi_pip == pip) { 8135 cpi->cpi_pip = NULL; 8136 break; 8137 } 8138 } 8139 } 8140 rw_exit(&vhcache->vhcache_lock); 8141 } 8142 8143 /* 8144 * Flush the vhci cache to disk. 8145 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8146 */ 8147 static int 8148 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8149 { 8150 nvlist_t *nvl; 8151 int err; 8152 int rv; 8153 8154 /* 8155 * It is possible that the system may shutdown before 8156 * i_ddi_io_initialized (during stmsboot for example). To allow for 8157 * flushing the cache in this case do not check for 8158 * i_ddi_io_initialized when force flag is set. 8159 */ 8160 if (force_flag == 0 && !i_ddi_io_initialized()) 8161 return (MDI_FAILURE); 8162 8163 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8164 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8165 nvlist_free(nvl); 8166 } else 8167 err = EFAULT; 8168 8169 rv = MDI_SUCCESS; 8170 mutex_enter(&vhc->vhc_lock); 8171 if (err != 0) { 8172 if (err == EROFS) { 8173 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8174 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8175 MDI_VHC_VHCACHE_DIRTY); 8176 } else { 8177 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8178 cmn_err(CE_CONT, "%s: update failed\n", 8179 vhc->vhc_vhcache_filename); 8180 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8181 } 8182 rv = MDI_FAILURE; 8183 } 8184 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8185 cmn_err(CE_CONT, 8186 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8187 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8188 } 8189 mutex_exit(&vhc->vhc_lock); 8190 8191 return (rv); 8192 } 8193 8194 /* 8195 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8196 * Exits itself if left idle for the idle timeout period. 8197 */ 8198 static void 8199 vhcache_flush_thread(void *arg) 8200 { 8201 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8202 clock_t idle_time, quit_at_ticks; 8203 callb_cpr_t cprinfo; 8204 8205 /* number of seconds to sleep idle before exiting */ 8206 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8207 8208 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8209 "mdi_vhcache_flush"); 8210 mutex_enter(&vhc->vhc_lock); 8211 for (; ; ) { 8212 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8213 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8214 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8215 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8216 (void) cv_timedwait(&vhc->vhc_cv, 8217 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8218 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8219 } else { 8220 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8221 mutex_exit(&vhc->vhc_lock); 8222 8223 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8224 vhcache_dirty(vhc); 8225 8226 mutex_enter(&vhc->vhc_lock); 8227 } 8228 } 8229 8230 quit_at_ticks = ddi_get_lbolt() + idle_time; 8231 8232 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8233 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8234 ddi_get_lbolt() < quit_at_ticks) { 8235 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8236 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8237 quit_at_ticks); 8238 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8239 } 8240 8241 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8242 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8243 goto out; 8244 } 8245 8246 out: 8247 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8248 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8249 CALLB_CPR_EXIT(&cprinfo); 8250 } 8251 8252 /* 8253 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8254 */ 8255 static void 8256 vhcache_dirty(mdi_vhci_config_t *vhc) 8257 { 8258 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8259 int create_thread; 8260 8261 rw_enter(&vhcache->vhcache_lock, RW_READER); 8262 /* do not flush cache until the cache is fully built */ 8263 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8264 rw_exit(&vhcache->vhcache_lock); 8265 return; 8266 } 8267 rw_exit(&vhcache->vhcache_lock); 8268 8269 mutex_enter(&vhc->vhc_lock); 8270 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8271 mutex_exit(&vhc->vhc_lock); 8272 return; 8273 } 8274 8275 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8276 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8277 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8278 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8279 cv_broadcast(&vhc->vhc_cv); 8280 create_thread = 0; 8281 } else { 8282 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8283 create_thread = 1; 8284 } 8285 mutex_exit(&vhc->vhc_lock); 8286 8287 if (create_thread) 8288 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8289 0, &p0, TS_RUN, minclsyspri); 8290 } 8291 8292 /* 8293 * phci bus config structure - one for for each phci bus config operation that 8294 * we initiate on behalf of a vhci. 8295 */ 8296 typedef struct mdi_phci_bus_config_s { 8297 char *phbc_phci_path; 8298 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8299 struct mdi_phci_bus_config_s *phbc_next; 8300 } mdi_phci_bus_config_t; 8301 8302 /* vhci bus config structure - one for each vhci bus config operation */ 8303 typedef struct mdi_vhci_bus_config_s { 8304 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8305 major_t vhbc_op_major; /* bus config op major */ 8306 uint_t vhbc_op_flags; /* bus config op flags */ 8307 kmutex_t vhbc_lock; 8308 kcondvar_t vhbc_cv; 8309 int vhbc_thr_count; 8310 } mdi_vhci_bus_config_t; 8311 8312 /* 8313 * bus config the specified phci 8314 */ 8315 static void 8316 bus_config_phci(void *arg) 8317 { 8318 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8319 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8320 dev_info_t *ph_dip; 8321 8322 /* 8323 * first configure all path components upto phci and then configure 8324 * the phci children. 8325 */ 8326 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8327 != NULL) { 8328 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8329 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8330 (void) ndi_devi_config_driver(ph_dip, 8331 vhbc->vhbc_op_flags, 8332 vhbc->vhbc_op_major); 8333 } else 8334 (void) ndi_devi_config(ph_dip, 8335 vhbc->vhbc_op_flags); 8336 8337 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8338 ndi_rele_devi(ph_dip); 8339 } 8340 8341 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8342 kmem_free(phbc, sizeof (*phbc)); 8343 8344 mutex_enter(&vhbc->vhbc_lock); 8345 vhbc->vhbc_thr_count--; 8346 if (vhbc->vhbc_thr_count == 0) 8347 cv_broadcast(&vhbc->vhbc_cv); 8348 mutex_exit(&vhbc->vhbc_lock); 8349 } 8350 8351 /* 8352 * Bus config all phcis associated with the vhci in parallel. 8353 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8354 */ 8355 static void 8356 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8357 ddi_bus_config_op_t op, major_t maj) 8358 { 8359 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8360 mdi_vhci_bus_config_t *vhbc; 8361 mdi_vhcache_phci_t *cphci; 8362 8363 rw_enter(&vhcache->vhcache_lock, RW_READER); 8364 if (vhcache->vhcache_phci_head == NULL) { 8365 rw_exit(&vhcache->vhcache_lock); 8366 return; 8367 } 8368 8369 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8370 8371 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8372 cphci = cphci->cphci_next) { 8373 /* skip phcis that haven't attached before root is available */ 8374 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8375 continue; 8376 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8377 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8378 KM_SLEEP); 8379 phbc->phbc_vhbusconfig = vhbc; 8380 phbc->phbc_next = phbc_head; 8381 phbc_head = phbc; 8382 vhbc->vhbc_thr_count++; 8383 } 8384 rw_exit(&vhcache->vhcache_lock); 8385 8386 vhbc->vhbc_op = op; 8387 vhbc->vhbc_op_major = maj; 8388 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8389 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8390 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8391 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8392 8393 /* now create threads to initiate bus config on all phcis in parallel */ 8394 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8395 phbc_next = phbc->phbc_next; 8396 if (mdi_mtc_off) 8397 bus_config_phci((void *)phbc); 8398 else 8399 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8400 0, &p0, TS_RUN, minclsyspri); 8401 } 8402 8403 mutex_enter(&vhbc->vhbc_lock); 8404 /* wait until all threads exit */ 8405 while (vhbc->vhbc_thr_count > 0) 8406 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8407 mutex_exit(&vhbc->vhbc_lock); 8408 8409 mutex_destroy(&vhbc->vhbc_lock); 8410 cv_destroy(&vhbc->vhbc_cv); 8411 kmem_free(vhbc, sizeof (*vhbc)); 8412 } 8413 8414 /* 8415 * Single threaded version of bus_config_all_phcis() 8416 */ 8417 static void 8418 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8419 ddi_bus_config_op_t op, major_t maj) 8420 { 8421 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8422 8423 single_threaded_vhconfig_enter(vhc); 8424 bus_config_all_phcis(vhcache, flags, op, maj); 8425 single_threaded_vhconfig_exit(vhc); 8426 } 8427 8428 /* 8429 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8430 * The path includes the child component in addition to the phci path. 8431 */ 8432 static int 8433 bus_config_one_phci_child(char *path) 8434 { 8435 dev_info_t *ph_dip, *child; 8436 char *devnm; 8437 int rv = MDI_FAILURE; 8438 8439 /* extract the child component of the phci */ 8440 devnm = strrchr(path, '/'); 8441 *devnm++ = '\0'; 8442 8443 /* 8444 * first configure all path components upto phci and then 8445 * configure the phci child. 8446 */ 8447 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8448 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8449 NDI_SUCCESS) { 8450 /* 8451 * release the hold that ndi_devi_config_one() placed 8452 */ 8453 ndi_rele_devi(child); 8454 rv = MDI_SUCCESS; 8455 } 8456 8457 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8458 ndi_rele_devi(ph_dip); 8459 } 8460 8461 devnm--; 8462 *devnm = '/'; 8463 return (rv); 8464 } 8465 8466 /* 8467 * Build a list of phci client paths for the specified vhci client. 8468 * The list includes only those phci client paths which aren't configured yet. 8469 */ 8470 static mdi_phys_path_t * 8471 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8472 { 8473 mdi_vhcache_pathinfo_t *cpi; 8474 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8475 int config_path, len; 8476 8477 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8478 /* 8479 * include only those paths that aren't configured. 8480 */ 8481 config_path = 0; 8482 if (cpi->cpi_pip == NULL) 8483 config_path = 1; 8484 else { 8485 MDI_PI_LOCK(cpi->cpi_pip); 8486 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8487 config_path = 1; 8488 MDI_PI_UNLOCK(cpi->cpi_pip); 8489 } 8490 8491 if (config_path) { 8492 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8493 len = strlen(cpi->cpi_cphci->cphci_path) + 8494 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8495 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8496 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8497 cpi->cpi_cphci->cphci_path, ct_name, 8498 cpi->cpi_addr); 8499 pp->phys_path_next = NULL; 8500 8501 if (pp_head == NULL) 8502 pp_head = pp; 8503 else 8504 pp_tail->phys_path_next = pp; 8505 pp_tail = pp; 8506 } 8507 } 8508 8509 return (pp_head); 8510 } 8511 8512 /* 8513 * Free the memory allocated for phci client path list. 8514 */ 8515 static void 8516 free_phclient_path_list(mdi_phys_path_t *pp_head) 8517 { 8518 mdi_phys_path_t *pp, *pp_next; 8519 8520 for (pp = pp_head; pp != NULL; pp = pp_next) { 8521 pp_next = pp->phys_path_next; 8522 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8523 kmem_free(pp, sizeof (*pp)); 8524 } 8525 } 8526 8527 /* 8528 * Allocated async client structure and initialize with the specified values. 8529 */ 8530 static mdi_async_client_config_t * 8531 alloc_async_client_config(char *ct_name, char *ct_addr, 8532 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8533 { 8534 mdi_async_client_config_t *acc; 8535 8536 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8537 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8538 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8539 acc->acc_phclient_path_list_head = pp_head; 8540 init_vhcache_lookup_token(&acc->acc_token, tok); 8541 acc->acc_next = NULL; 8542 return (acc); 8543 } 8544 8545 /* 8546 * Free the memory allocated for the async client structure and their members. 8547 */ 8548 static void 8549 free_async_client_config(mdi_async_client_config_t *acc) 8550 { 8551 if (acc->acc_phclient_path_list_head) 8552 free_phclient_path_list(acc->acc_phclient_path_list_head); 8553 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8554 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8555 kmem_free(acc, sizeof (*acc)); 8556 } 8557 8558 /* 8559 * Sort vhcache pathinfos (cpis) of the specified client. 8560 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8561 * flag set come at the beginning of the list. All cpis which have this 8562 * flag set come at the end of the list. 8563 */ 8564 static void 8565 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8566 { 8567 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8568 8569 cpi_head = cct->cct_cpi_head; 8570 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8571 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8572 cpi_next = cpi->cpi_next; 8573 enqueue_vhcache_pathinfo(cct, cpi); 8574 } 8575 } 8576 8577 /* 8578 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8579 * every vhcache pathinfo of the specified client. If not adjust the flag 8580 * setting appropriately. 8581 * 8582 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8583 * on-disk vhci cache. So every time this flag is updated the cache must be 8584 * flushed. 8585 */ 8586 static void 8587 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8588 mdi_vhcache_lookup_token_t *tok) 8589 { 8590 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8591 mdi_vhcache_client_t *cct; 8592 mdi_vhcache_pathinfo_t *cpi; 8593 8594 rw_enter(&vhcache->vhcache_lock, RW_READER); 8595 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8596 == NULL) { 8597 rw_exit(&vhcache->vhcache_lock); 8598 return; 8599 } 8600 8601 /* 8602 * to avoid unnecessary on-disk cache updates, first check if an 8603 * update is really needed. If no update is needed simply return. 8604 */ 8605 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8606 if ((cpi->cpi_pip != NULL && 8607 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8608 (cpi->cpi_pip == NULL && 8609 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8610 break; 8611 } 8612 } 8613 if (cpi == NULL) { 8614 rw_exit(&vhcache->vhcache_lock); 8615 return; 8616 } 8617 8618 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8619 rw_exit(&vhcache->vhcache_lock); 8620 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8621 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8622 tok)) == NULL) { 8623 rw_exit(&vhcache->vhcache_lock); 8624 return; 8625 } 8626 } 8627 8628 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8629 if (cpi->cpi_pip != NULL) 8630 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8631 else 8632 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8633 } 8634 sort_vhcache_paths(cct); 8635 8636 rw_exit(&vhcache->vhcache_lock); 8637 vhcache_dirty(vhc); 8638 } 8639 8640 /* 8641 * Configure all specified paths of the client. 8642 */ 8643 static void 8644 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8645 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8646 { 8647 mdi_phys_path_t *pp; 8648 8649 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8650 (void) bus_config_one_phci_child(pp->phys_path); 8651 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8652 } 8653 8654 /* 8655 * Dequeue elements from vhci async client config list and bus configure 8656 * their corresponding phci clients. 8657 */ 8658 static void 8659 config_client_paths_thread(void *arg) 8660 { 8661 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8662 mdi_async_client_config_t *acc; 8663 clock_t quit_at_ticks; 8664 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8665 callb_cpr_t cprinfo; 8666 8667 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8668 "mdi_config_client_paths"); 8669 8670 for (; ; ) { 8671 quit_at_ticks = ddi_get_lbolt() + idle_time; 8672 8673 mutex_enter(&vhc->vhc_lock); 8674 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8675 vhc->vhc_acc_list_head == NULL && 8676 ddi_get_lbolt() < quit_at_ticks) { 8677 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8678 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8679 quit_at_ticks); 8680 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8681 } 8682 8683 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8684 vhc->vhc_acc_list_head == NULL) 8685 goto out; 8686 8687 acc = vhc->vhc_acc_list_head; 8688 vhc->vhc_acc_list_head = acc->acc_next; 8689 if (vhc->vhc_acc_list_head == NULL) 8690 vhc->vhc_acc_list_tail = NULL; 8691 vhc->vhc_acc_count--; 8692 mutex_exit(&vhc->vhc_lock); 8693 8694 config_client_paths_sync(vhc, acc->acc_ct_name, 8695 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8696 &acc->acc_token); 8697 8698 free_async_client_config(acc); 8699 } 8700 8701 out: 8702 vhc->vhc_acc_thrcount--; 8703 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8704 CALLB_CPR_EXIT(&cprinfo); 8705 } 8706 8707 /* 8708 * Arrange for all the phci client paths (pp_head) for the specified client 8709 * to be bus configured asynchronously by a thread. 8710 */ 8711 static void 8712 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8713 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8714 { 8715 mdi_async_client_config_t *acc, *newacc; 8716 int create_thread; 8717 8718 if (pp_head == NULL) 8719 return; 8720 8721 if (mdi_mtc_off) { 8722 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8723 free_phclient_path_list(pp_head); 8724 return; 8725 } 8726 8727 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8728 ASSERT(newacc); 8729 8730 mutex_enter(&vhc->vhc_lock); 8731 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8732 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8733 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8734 free_async_client_config(newacc); 8735 mutex_exit(&vhc->vhc_lock); 8736 return; 8737 } 8738 } 8739 8740 if (vhc->vhc_acc_list_head == NULL) 8741 vhc->vhc_acc_list_head = newacc; 8742 else 8743 vhc->vhc_acc_list_tail->acc_next = newacc; 8744 vhc->vhc_acc_list_tail = newacc; 8745 vhc->vhc_acc_count++; 8746 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8747 cv_broadcast(&vhc->vhc_cv); 8748 create_thread = 0; 8749 } else { 8750 vhc->vhc_acc_thrcount++; 8751 create_thread = 1; 8752 } 8753 mutex_exit(&vhc->vhc_lock); 8754 8755 if (create_thread) 8756 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8757 0, &p0, TS_RUN, minclsyspri); 8758 } 8759 8760 /* 8761 * Return number of online paths for the specified client. 8762 */ 8763 static int 8764 nonline_paths(mdi_vhcache_client_t *cct) 8765 { 8766 mdi_vhcache_pathinfo_t *cpi; 8767 int online_count = 0; 8768 8769 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8770 if (cpi->cpi_pip != NULL) { 8771 MDI_PI_LOCK(cpi->cpi_pip); 8772 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8773 online_count++; 8774 MDI_PI_UNLOCK(cpi->cpi_pip); 8775 } 8776 } 8777 8778 return (online_count); 8779 } 8780 8781 /* 8782 * Bus configure all paths for the specified vhci client. 8783 * If at least one path for the client is already online, the remaining paths 8784 * will be configured asynchronously. Otherwise, it synchronously configures 8785 * the paths until at least one path is online and then rest of the paths 8786 * will be configured asynchronously. 8787 */ 8788 static void 8789 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8790 { 8791 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8792 mdi_phys_path_t *pp_head, *pp; 8793 mdi_vhcache_client_t *cct; 8794 mdi_vhcache_lookup_token_t tok; 8795 8796 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8797 8798 init_vhcache_lookup_token(&tok, NULL); 8799 8800 if (ct_name == NULL || ct_addr == NULL || 8801 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8802 == NULL || 8803 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8804 rw_exit(&vhcache->vhcache_lock); 8805 return; 8806 } 8807 8808 /* if at least one path is online, configure the rest asynchronously */ 8809 if (nonline_paths(cct) > 0) { 8810 rw_exit(&vhcache->vhcache_lock); 8811 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8812 return; 8813 } 8814 8815 rw_exit(&vhcache->vhcache_lock); 8816 8817 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8818 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8819 rw_enter(&vhcache->vhcache_lock, RW_READER); 8820 8821 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8822 ct_addr, &tok)) == NULL) { 8823 rw_exit(&vhcache->vhcache_lock); 8824 goto out; 8825 } 8826 8827 if (nonline_paths(cct) > 0 && 8828 pp->phys_path_next != NULL) { 8829 rw_exit(&vhcache->vhcache_lock); 8830 config_client_paths_async(vhc, ct_name, ct_addr, 8831 pp->phys_path_next, &tok); 8832 pp->phys_path_next = NULL; 8833 goto out; 8834 } 8835 8836 rw_exit(&vhcache->vhcache_lock); 8837 } 8838 } 8839 8840 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8841 out: 8842 free_phclient_path_list(pp_head); 8843 } 8844 8845 static void 8846 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8847 { 8848 mutex_enter(&vhc->vhc_lock); 8849 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8850 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8851 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8852 mutex_exit(&vhc->vhc_lock); 8853 } 8854 8855 static void 8856 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8857 { 8858 mutex_enter(&vhc->vhc_lock); 8859 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8860 cv_broadcast(&vhc->vhc_cv); 8861 mutex_exit(&vhc->vhc_lock); 8862 } 8863 8864 typedef struct mdi_phci_driver_info { 8865 char *phdriver_name; /* name of the phci driver */ 8866 8867 /* set to non zero if the phci driver supports root device */ 8868 int phdriver_root_support; 8869 } mdi_phci_driver_info_t; 8870 8871 /* 8872 * vhci class and root support capability of a phci driver can be 8873 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8874 * phci driver.conf file. The built-in tables below contain this information 8875 * for those phci drivers whose driver.conf files don't yet contain this info. 8876 * 8877 * All phci drivers expect iscsi have root device support. 8878 */ 8879 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8880 { "fp", 1 }, 8881 { "iscsi", 0 }, 8882 { "ibsrp", 1 } 8883 }; 8884 8885 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8886 8887 static void * 8888 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8889 { 8890 void *new_ptr; 8891 8892 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8893 if (old_ptr) { 8894 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8895 kmem_free(old_ptr, old_size); 8896 } 8897 return (new_ptr); 8898 } 8899 8900 static void 8901 add_to_phci_list(char ***driver_list, int **root_support_list, 8902 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8903 { 8904 ASSERT(*cur_elements <= *max_elements); 8905 if (*cur_elements == *max_elements) { 8906 *max_elements += 10; 8907 *driver_list = mdi_realloc(*driver_list, 8908 sizeof (char *) * (*cur_elements), 8909 sizeof (char *) * (*max_elements)); 8910 *root_support_list = mdi_realloc(*root_support_list, 8911 sizeof (int) * (*cur_elements), 8912 sizeof (int) * (*max_elements)); 8913 } 8914 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8915 (*root_support_list)[*cur_elements] = root_support; 8916 (*cur_elements)++; 8917 } 8918 8919 static void 8920 get_phci_driver_list(char *vhci_class, char ***driver_list, 8921 int **root_support_list, int *cur_elements, int *max_elements) 8922 { 8923 mdi_phci_driver_info_t *st_driver_list, *p; 8924 int st_ndrivers, root_support, i, j, driver_conf_count; 8925 major_t m; 8926 struct devnames *dnp; 8927 ddi_prop_t *propp; 8928 8929 *driver_list = NULL; 8930 *root_support_list = NULL; 8931 *cur_elements = 0; 8932 *max_elements = 0; 8933 8934 /* add the phci drivers derived from the phci driver.conf files */ 8935 for (m = 0; m < devcnt; m++) { 8936 dnp = &devnamesp[m]; 8937 8938 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8939 LOCK_DEV_OPS(&dnp->dn_lock); 8940 if (dnp->dn_global_prop_ptr != NULL && 8941 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8942 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8943 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8944 strcmp(propp->prop_val, vhci_class) == 0) { 8945 8946 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8947 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8948 &dnp->dn_global_prop_ptr->prop_list) 8949 == NULL) ? 1 : 0; 8950 8951 add_to_phci_list(driver_list, root_support_list, 8952 cur_elements, max_elements, dnp->dn_name, 8953 root_support); 8954 8955 UNLOCK_DEV_OPS(&dnp->dn_lock); 8956 } else 8957 UNLOCK_DEV_OPS(&dnp->dn_lock); 8958 } 8959 } 8960 8961 driver_conf_count = *cur_elements; 8962 8963 /* add the phci drivers specified in the built-in tables */ 8964 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8965 st_driver_list = scsi_phci_driver_list; 8966 st_ndrivers = sizeof (scsi_phci_driver_list) / 8967 sizeof (mdi_phci_driver_info_t); 8968 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8969 st_driver_list = ib_phci_driver_list; 8970 st_ndrivers = sizeof (ib_phci_driver_list) / 8971 sizeof (mdi_phci_driver_info_t); 8972 } else { 8973 st_driver_list = NULL; 8974 st_ndrivers = 0; 8975 } 8976 8977 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8978 /* add this phci driver if not already added before */ 8979 for (j = 0; j < driver_conf_count; j++) { 8980 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8981 break; 8982 } 8983 if (j == driver_conf_count) { 8984 add_to_phci_list(driver_list, root_support_list, 8985 cur_elements, max_elements, p->phdriver_name, 8986 p->phdriver_root_support); 8987 } 8988 } 8989 } 8990 8991 /* 8992 * Attach the phci driver instances associated with the specified vhci class. 8993 * If root is mounted attach all phci driver instances. 8994 * If root is not mounted, attach the instances of only those phci 8995 * drivers that have the root support. 8996 */ 8997 static void 8998 attach_phci_drivers(char *vhci_class) 8999 { 9000 char **driver_list, **p; 9001 int *root_support_list; 9002 int cur_elements, max_elements, i; 9003 major_t m; 9004 9005 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9006 &cur_elements, &max_elements); 9007 9008 for (i = 0; i < cur_elements; i++) { 9009 if (modrootloaded || root_support_list[i]) { 9010 m = ddi_name_to_major(driver_list[i]); 9011 if (m != DDI_MAJOR_T_NONE && 9012 ddi_hold_installed_driver(m)) 9013 ddi_rele_driver(m); 9014 } 9015 } 9016 9017 if (driver_list) { 9018 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9019 kmem_free(*p, strlen(*p) + 1); 9020 kmem_free(driver_list, sizeof (char *) * max_elements); 9021 kmem_free(root_support_list, sizeof (int) * max_elements); 9022 } 9023 } 9024 9025 /* 9026 * Build vhci cache: 9027 * 9028 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9029 * the phci driver instances. During this process the cache gets built. 9030 * 9031 * Cache is built fully if the root is mounted. 9032 * If the root is not mounted, phci drivers that do not have root support 9033 * are not attached. As a result the cache is built partially. The entries 9034 * in the cache reflect only those phci drivers that have root support. 9035 */ 9036 static int 9037 build_vhci_cache(mdi_vhci_t *vh) 9038 { 9039 mdi_vhci_config_t *vhc = vh->vh_config; 9040 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9041 9042 single_threaded_vhconfig_enter(vhc); 9043 9044 rw_enter(&vhcache->vhcache_lock, RW_READER); 9045 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9046 rw_exit(&vhcache->vhcache_lock); 9047 single_threaded_vhconfig_exit(vhc); 9048 return (0); 9049 } 9050 rw_exit(&vhcache->vhcache_lock); 9051 9052 attach_phci_drivers(vh->vh_class); 9053 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9054 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9055 9056 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9057 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9058 rw_exit(&vhcache->vhcache_lock); 9059 9060 single_threaded_vhconfig_exit(vhc); 9061 vhcache_dirty(vhc); 9062 return (1); 9063 } 9064 9065 /* 9066 * Determine if discovery of paths is needed. 9067 */ 9068 static int 9069 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9070 { 9071 int rv = 1; 9072 9073 mutex_enter(&vhc->vhc_lock); 9074 if (i_ddi_io_initialized() == 0) { 9075 if (vhc->vhc_path_discovery_boot > 0) { 9076 vhc->vhc_path_discovery_boot--; 9077 goto out; 9078 } 9079 } else { 9080 if (vhc->vhc_path_discovery_postboot > 0) { 9081 vhc->vhc_path_discovery_postboot--; 9082 goto out; 9083 } 9084 } 9085 9086 /* 9087 * Do full path discovery at most once per mdi_path_discovery_interval. 9088 * This is to avoid a series of full path discoveries when opening 9089 * stale /dev/[r]dsk links. 9090 */ 9091 if (mdi_path_discovery_interval != -1 && 9092 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 9093 goto out; 9094 9095 rv = 0; 9096 out: 9097 mutex_exit(&vhc->vhc_lock); 9098 return (rv); 9099 } 9100 9101 /* 9102 * Discover all paths: 9103 * 9104 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9105 * driver instances. During this process all paths will be discovered. 9106 */ 9107 static int 9108 vhcache_discover_paths(mdi_vhci_t *vh) 9109 { 9110 mdi_vhci_config_t *vhc = vh->vh_config; 9111 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9112 int rv = 0; 9113 9114 single_threaded_vhconfig_enter(vhc); 9115 9116 if (vhcache_do_discovery(vhc)) { 9117 attach_phci_drivers(vh->vh_class); 9118 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9119 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9120 9121 mutex_enter(&vhc->vhc_lock); 9122 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 9123 mdi_path_discovery_interval * TICKS_PER_SECOND; 9124 mutex_exit(&vhc->vhc_lock); 9125 rv = 1; 9126 } 9127 9128 single_threaded_vhconfig_exit(vhc); 9129 return (rv); 9130 } 9131 9132 /* 9133 * Generic vhci bus config implementation: 9134 * 9135 * Parameters 9136 * vdip vhci dip 9137 * flags bus config flags 9138 * op bus config operation 9139 * The remaining parameters are bus config operation specific 9140 * 9141 * for BUS_CONFIG_ONE 9142 * arg pointer to name@addr 9143 * child upon successful return from this function, *child will be 9144 * set to the configured and held devinfo child node of vdip. 9145 * ct_addr pointer to client address (i.e. GUID) 9146 * 9147 * for BUS_CONFIG_DRIVER 9148 * arg major number of the driver 9149 * child and ct_addr parameters are ignored 9150 * 9151 * for BUS_CONFIG_ALL 9152 * arg, child, and ct_addr parameters are ignored 9153 * 9154 * Note that for the rest of the bus config operations, this function simply 9155 * calls the framework provided default bus config routine. 9156 */ 9157 int 9158 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9159 void *arg, dev_info_t **child, char *ct_addr) 9160 { 9161 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9162 mdi_vhci_config_t *vhc = vh->vh_config; 9163 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9164 int rv = 0; 9165 int params_valid = 0; 9166 char *cp; 9167 9168 /* 9169 * To bus config vhcis we relay operation, possibly using another 9170 * thread, to phcis. The phci driver then interacts with MDI to cause 9171 * vhci child nodes to be enumerated under the vhci node. Adding a 9172 * vhci child requires an ndi_devi_enter of the vhci. Since another 9173 * thread may be adding the child, to avoid deadlock we can't wait 9174 * for the relayed operations to complete if we have already entered 9175 * the vhci node. 9176 */ 9177 if (DEVI_BUSY_OWNED(vdip)) { 9178 MDI_DEBUG(2, (MDI_NOTE, vdip, 9179 "vhci dip is busy owned %p", (void *)vdip)); 9180 goto default_bus_config; 9181 } 9182 9183 rw_enter(&vhcache->vhcache_lock, RW_READER); 9184 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9185 rw_exit(&vhcache->vhcache_lock); 9186 rv = build_vhci_cache(vh); 9187 rw_enter(&vhcache->vhcache_lock, RW_READER); 9188 } 9189 9190 switch (op) { 9191 case BUS_CONFIG_ONE: 9192 if (arg != NULL && ct_addr != NULL) { 9193 /* extract node name */ 9194 cp = (char *)arg; 9195 while (*cp != '\0' && *cp != '@') 9196 cp++; 9197 if (*cp == '@') { 9198 params_valid = 1; 9199 *cp = '\0'; 9200 config_client_paths(vhc, (char *)arg, ct_addr); 9201 /* config_client_paths() releases cache_lock */ 9202 *cp = '@'; 9203 break; 9204 } 9205 } 9206 9207 rw_exit(&vhcache->vhcache_lock); 9208 break; 9209 9210 case BUS_CONFIG_DRIVER: 9211 rw_exit(&vhcache->vhcache_lock); 9212 if (rv == 0) 9213 st_bus_config_all_phcis(vhc, flags, op, 9214 (major_t)(uintptr_t)arg); 9215 break; 9216 9217 case BUS_CONFIG_ALL: 9218 rw_exit(&vhcache->vhcache_lock); 9219 if (rv == 0) 9220 st_bus_config_all_phcis(vhc, flags, op, -1); 9221 break; 9222 9223 default: 9224 rw_exit(&vhcache->vhcache_lock); 9225 break; 9226 } 9227 9228 9229 default_bus_config: 9230 /* 9231 * All requested child nodes are enumerated under the vhci. 9232 * Now configure them. 9233 */ 9234 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9235 NDI_SUCCESS) { 9236 return (MDI_SUCCESS); 9237 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9238 /* discover all paths and try configuring again */ 9239 if (vhcache_discover_paths(vh) && 9240 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9241 NDI_SUCCESS) 9242 return (MDI_SUCCESS); 9243 } 9244 9245 return (MDI_FAILURE); 9246 } 9247 9248 /* 9249 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9250 */ 9251 static nvlist_t * 9252 read_on_disk_vhci_cache(char *vhci_class) 9253 { 9254 nvlist_t *nvl; 9255 int err; 9256 char *filename; 9257 9258 filename = vhclass2vhcache_filename(vhci_class); 9259 9260 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9261 kmem_free(filename, strlen(filename) + 1); 9262 return (nvl); 9263 } else if (err == EIO) 9264 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9265 else if (err == EINVAL) 9266 cmn_err(CE_WARN, 9267 "%s: data file corrupted, will recreate", filename); 9268 9269 kmem_free(filename, strlen(filename) + 1); 9270 return (NULL); 9271 } 9272 9273 /* 9274 * Read on-disk vhci cache into nvlists for all vhci classes. 9275 * Called during booting by i_ddi_read_devices_files(). 9276 */ 9277 void 9278 mdi_read_devices_files(void) 9279 { 9280 int i; 9281 9282 for (i = 0; i < N_VHCI_CLASSES; i++) 9283 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9284 } 9285 9286 /* 9287 * Remove all stale entries from vhci cache. 9288 */ 9289 static void 9290 clean_vhcache(mdi_vhci_config_t *vhc) 9291 { 9292 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9293 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9294 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9295 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9296 9297 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9298 9299 cct_head = vhcache->vhcache_client_head; 9300 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9301 for (cct = cct_head; cct != NULL; cct = cct_next) { 9302 cct_next = cct->cct_next; 9303 9304 cpi_head = cct->cct_cpi_head; 9305 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9306 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9307 cpi_next = cpi->cpi_next; 9308 if (cpi->cpi_pip != NULL) { 9309 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9310 enqueue_tail_vhcache_pathinfo(cct, cpi); 9311 } else 9312 free_vhcache_pathinfo(cpi); 9313 } 9314 9315 if (cct->cct_cpi_head != NULL) 9316 enqueue_vhcache_client(vhcache, cct); 9317 else { 9318 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9319 (mod_hash_key_t)cct->cct_name_addr); 9320 free_vhcache_client(cct); 9321 } 9322 } 9323 9324 cphci_head = vhcache->vhcache_phci_head; 9325 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9326 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9327 cphci_next = cphci->cphci_next; 9328 if (cphci->cphci_phci != NULL) 9329 enqueue_vhcache_phci(vhcache, cphci); 9330 else 9331 free_vhcache_phci(cphci); 9332 } 9333 9334 vhcache->vhcache_clean_time = lbolt64; 9335 rw_exit(&vhcache->vhcache_lock); 9336 vhcache_dirty(vhc); 9337 } 9338 9339 /* 9340 * Remove all stale entries from vhci cache. 9341 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9342 */ 9343 void 9344 mdi_clean_vhcache(void) 9345 { 9346 mdi_vhci_t *vh; 9347 9348 mutex_enter(&mdi_mutex); 9349 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9350 vh->vh_refcnt++; 9351 mutex_exit(&mdi_mutex); 9352 clean_vhcache(vh->vh_config); 9353 mutex_enter(&mdi_mutex); 9354 vh->vh_refcnt--; 9355 } 9356 mutex_exit(&mdi_mutex); 9357 } 9358 9359 /* 9360 * mdi_vhci_walk_clients(): 9361 * Walker routine to traverse client dev_info nodes 9362 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9363 * below the client, including nexus devices, which we dont want. 9364 * So we just traverse the immediate siblings, starting from 1st client. 9365 */ 9366 void 9367 mdi_vhci_walk_clients(dev_info_t *vdip, 9368 int (*f)(dev_info_t *, void *), void *arg) 9369 { 9370 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9371 dev_info_t *cdip; 9372 mdi_client_t *ct; 9373 9374 MDI_VHCI_CLIENT_LOCK(vh); 9375 cdip = ddi_get_child(vdip); 9376 while (cdip) { 9377 ct = i_devi_get_client(cdip); 9378 MDI_CLIENT_LOCK(ct); 9379 9380 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9381 cdip = ddi_get_next_sibling(cdip); 9382 else 9383 cdip = NULL; 9384 9385 MDI_CLIENT_UNLOCK(ct); 9386 } 9387 MDI_VHCI_CLIENT_UNLOCK(vh); 9388 } 9389 9390 /* 9391 * mdi_vhci_walk_phcis(): 9392 * Walker routine to traverse phci dev_info nodes 9393 */ 9394 void 9395 mdi_vhci_walk_phcis(dev_info_t *vdip, 9396 int (*f)(dev_info_t *, void *), void *arg) 9397 { 9398 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9399 mdi_phci_t *ph, *next; 9400 9401 MDI_VHCI_PHCI_LOCK(vh); 9402 ph = vh->vh_phci_head; 9403 while (ph) { 9404 MDI_PHCI_LOCK(ph); 9405 9406 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9407 next = ph->ph_next; 9408 else 9409 next = NULL; 9410 9411 MDI_PHCI_UNLOCK(ph); 9412 ph = next; 9413 } 9414 MDI_VHCI_PHCI_UNLOCK(vh); 9415 } 9416 9417 9418 /* 9419 * mdi_walk_vhcis(): 9420 * Walker routine to traverse vhci dev_info nodes 9421 */ 9422 void 9423 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9424 { 9425 mdi_vhci_t *vh = NULL; 9426 9427 mutex_enter(&mdi_mutex); 9428 /* 9429 * Scan for already registered vhci 9430 */ 9431 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9432 vh->vh_refcnt++; 9433 mutex_exit(&mdi_mutex); 9434 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9435 mutex_enter(&mdi_mutex); 9436 vh->vh_refcnt--; 9437 break; 9438 } else { 9439 mutex_enter(&mdi_mutex); 9440 vh->vh_refcnt--; 9441 } 9442 } 9443 9444 mutex_exit(&mdi_mutex); 9445 } 9446 9447 /* 9448 * i_mdi_log_sysevent(): 9449 * Logs events for pickup by syseventd 9450 */ 9451 static void 9452 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9453 { 9454 char *path_name; 9455 nvlist_t *attr_list; 9456 9457 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9458 KM_SLEEP) != DDI_SUCCESS) { 9459 goto alloc_failed; 9460 } 9461 9462 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9463 (void) ddi_pathname(dip, path_name); 9464 9465 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9466 ddi_driver_name(dip)) != DDI_SUCCESS) { 9467 goto error; 9468 } 9469 9470 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9471 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9472 goto error; 9473 } 9474 9475 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9476 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9477 goto error; 9478 } 9479 9480 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9481 path_name) != DDI_SUCCESS) { 9482 goto error; 9483 } 9484 9485 if (nvlist_add_string(attr_list, DDI_CLASS, 9486 ph_vh_class) != DDI_SUCCESS) { 9487 goto error; 9488 } 9489 9490 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9491 attr_list, NULL, DDI_SLEEP); 9492 9493 error: 9494 kmem_free(path_name, MAXPATHLEN); 9495 nvlist_free(attr_list); 9496 return; 9497 9498 alloc_failed: 9499 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9500 } 9501 9502 char ** 9503 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9504 { 9505 char **driver_list, **ret_driver_list = NULL; 9506 int *root_support_list; 9507 int cur_elements, max_elements; 9508 9509 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9510 &cur_elements, &max_elements); 9511 9512 9513 if (driver_list) { 9514 kmem_free(root_support_list, sizeof (int) * max_elements); 9515 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9516 * max_elements, sizeof (char *) * cur_elements); 9517 } 9518 *ndrivers = cur_elements; 9519 9520 return (ret_driver_list); 9521 9522 } 9523 9524 void 9525 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9526 { 9527 char **p; 9528 int i; 9529 9530 if (driver_list) { 9531 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9532 kmem_free(*p, strlen(*p) + 1); 9533 kmem_free(driver_list, sizeof (char *) * ndrivers); 9534 } 9535 } 9536 9537 /* 9538 * mdi_is_dev_supported(): 9539 * function called by pHCI bus config operation to determine if a 9540 * device should be represented as a child of the vHCI or the 9541 * pHCI. This decision is made by the vHCI, using cinfo idenity 9542 * information passed by the pHCI - specifics of the cinfo 9543 * representation are by agreement between the pHCI and vHCI. 9544 * Return Values: 9545 * MDI_SUCCESS 9546 * MDI_FAILURE 9547 */ 9548 int 9549 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9550 { 9551 mdi_vhci_t *vh; 9552 9553 ASSERT(class && pdip); 9554 9555 /* 9556 * For dev_supported, mdi_phci_register() must have established pdip as 9557 * a pHCI. 9558 * 9559 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9560 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9561 */ 9562 if (!MDI_PHCI(pdip)) 9563 return (MDI_FAILURE); 9564 9565 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9566 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9567 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9568 return (MDI_FAILURE); 9569 } 9570 9571 /* Return vHCI answer */ 9572 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9573 } 9574 9575 int 9576 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9577 { 9578 uint_t devstate = 0; 9579 dev_info_t *cdip; 9580 9581 if ((pip == NULL) || (dcp == NULL)) 9582 return (MDI_FAILURE); 9583 9584 cdip = mdi_pi_get_client(pip); 9585 9586 switch (mdi_pi_get_state(pip)) { 9587 case MDI_PATHINFO_STATE_INIT: 9588 devstate = DEVICE_DOWN; 9589 break; 9590 case MDI_PATHINFO_STATE_ONLINE: 9591 devstate = DEVICE_ONLINE; 9592 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9593 devstate |= DEVICE_BUSY; 9594 break; 9595 case MDI_PATHINFO_STATE_STANDBY: 9596 devstate = DEVICE_ONLINE; 9597 break; 9598 case MDI_PATHINFO_STATE_FAULT: 9599 devstate = DEVICE_DOWN; 9600 break; 9601 case MDI_PATHINFO_STATE_OFFLINE: 9602 devstate = DEVICE_OFFLINE; 9603 break; 9604 default: 9605 ASSERT(MDI_PI(pip)->pi_state); 9606 } 9607 9608 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9609 return (MDI_FAILURE); 9610 9611 return (MDI_SUCCESS); 9612 } 9613