1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 77 #define MDI_WARN CE_WARN, __func__ 78 #define MDI_NOTE CE_NOTE, __func__ 79 #define MDI_CONT CE_CONT, __func__ 80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 81 #else /* !DEBUG */ 82 #define MDI_DEBUG(dbglevel, pargs) 83 #endif /* DEBUG */ 84 int mdi_debug_consoleonly = 0; 85 86 extern pri_t minclsyspri; 87 extern int modrootloaded; 88 89 /* 90 * Global mutex: 91 * Protects vHCI list and structure members. 92 */ 93 kmutex_t mdi_mutex; 94 95 /* 96 * Registered vHCI class driver lists 97 */ 98 int mdi_vhci_count; 99 mdi_vhci_t *mdi_vhci_head; 100 mdi_vhci_t *mdi_vhci_tail; 101 102 /* 103 * Client Hash Table size 104 */ 105 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 106 107 /* 108 * taskq interface definitions 109 */ 110 #define MDI_TASKQ_N_THREADS 8 111 #define MDI_TASKQ_PRI minclsyspri 112 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 113 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 114 115 taskq_t *mdi_taskq; 116 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 117 118 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 119 120 /* 121 * The data should be "quiet" for this interval (in seconds) before the 122 * vhci cached data is flushed to the disk. 123 */ 124 static int mdi_vhcache_flush_delay = 10; 125 126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 127 static int mdi_vhcache_flush_daemon_idle_time = 60; 128 129 /* 130 * MDI falls back to discovery of all paths when a bus_config_one fails. 131 * The following parameters can be used to tune this operation. 132 * 133 * mdi_path_discovery_boot 134 * Number of times path discovery will be attempted during early boot. 135 * Probably there is no reason to ever set this value to greater than one. 136 * 137 * mdi_path_discovery_postboot 138 * Number of times path discovery will be attempted after early boot. 139 * Set it to a minimum of two to allow for discovery of iscsi paths which 140 * may happen very late during booting. 141 * 142 * mdi_path_discovery_interval 143 * Minimum number of seconds MDI will wait between successive discovery 144 * of all paths. Set it to -1 to disable discovery of all paths. 145 */ 146 static int mdi_path_discovery_boot = 1; 147 static int mdi_path_discovery_postboot = 2; 148 static int mdi_path_discovery_interval = 10; 149 150 /* 151 * number of seconds the asynchronous configuration thread will sleep idle 152 * before exiting. 153 */ 154 static int mdi_async_config_idle_time = 600; 155 156 static int mdi_bus_config_cache_hash_size = 256; 157 158 /* turns off multithreaded configuration for certain operations */ 159 static int mdi_mtc_off = 0; 160 161 /* 162 * The "path" to a pathinfo node is identical to the /devices path to a 163 * devinfo node had the device been enumerated under a pHCI instead of 164 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 165 * This association persists across create/delete of the pathinfo nodes, 166 * but not across reboot. 167 */ 168 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 169 static int mdi_pathmap_hash_size = 256; 170 static kmutex_t mdi_pathmap_mutex; 171 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 172 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 173 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 174 175 /* 176 * MDI component property name/value string definitions 177 */ 178 const char *mdi_component_prop = "mpxio-component"; 179 const char *mdi_component_prop_vhci = "vhci"; 180 const char *mdi_component_prop_phci = "phci"; 181 const char *mdi_component_prop_client = "client"; 182 183 /* 184 * MDI client global unique identifier property name 185 */ 186 const char *mdi_client_guid_prop = "client-guid"; 187 188 /* 189 * MDI client load balancing property name/value string definitions 190 */ 191 const char *mdi_load_balance = "load-balance"; 192 const char *mdi_load_balance_none = "none"; 193 const char *mdi_load_balance_rr = "round-robin"; 194 const char *mdi_load_balance_lba = "logical-block"; 195 196 /* 197 * Obsolete vHCI class definition; to be removed after Leadville update 198 */ 199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 200 201 static char vhci_greeting[] = 202 "\tThere already exists one vHCI driver for class %s\n" 203 "\tOnly one vHCI driver for each class is allowed\n"; 204 205 /* 206 * Static function prototypes 207 */ 208 static int i_mdi_phci_offline(dev_info_t *, uint_t); 209 static int i_mdi_client_offline(dev_info_t *, uint_t); 210 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 211 static void i_mdi_phci_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static int i_mdi_client_pre_detach(dev_info_t *, 214 ddi_detach_cmd_t); 215 static void i_mdi_client_post_detach(dev_info_t *, 216 ddi_detach_cmd_t, int); 217 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 218 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 219 static int i_mdi_lba_lb(mdi_client_t *ct, 220 mdi_pathinfo_t **ret_pip, struct buf *buf); 221 static void i_mdi_pm_hold_client(mdi_client_t *, int); 222 static void i_mdi_pm_rele_client(mdi_client_t *, int); 223 static void i_mdi_pm_reset_client(mdi_client_t *); 224 static int i_mdi_power_all_phci(mdi_client_t *); 225 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 226 227 228 /* 229 * Internal mdi_pathinfo node functions 230 */ 231 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 232 233 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 234 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 235 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 236 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 237 static void i_mdi_phci_unlock(mdi_phci_t *); 238 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 239 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 241 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 242 mdi_client_t *); 243 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 244 static void i_mdi_client_remove_path(mdi_client_t *, 245 mdi_pathinfo_t *); 246 247 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 248 mdi_pathinfo_state_t, int); 249 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 250 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 251 char **, int); 252 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 253 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 254 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 255 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 256 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 257 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 258 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 259 static void i_mdi_client_update_state(mdi_client_t *); 260 static int i_mdi_client_compute_state(mdi_client_t *, 261 mdi_phci_t *); 262 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 263 static void i_mdi_client_unlock(mdi_client_t *); 264 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 265 static mdi_client_t *i_devi_get_client(dev_info_t *); 266 /* 267 * NOTE: this will be removed once the NWS files are changed to use the new 268 * mdi_{enable,disable}_path interfaces 269 */ 270 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 271 int, int); 272 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 273 mdi_vhci_t *vh, int flags, int op); 274 /* 275 * Failover related function prototypes 276 */ 277 static int i_mdi_failover(void *); 278 279 /* 280 * misc internal functions 281 */ 282 static int i_mdi_get_hash_key(char *); 283 static int i_map_nvlist_error_to_mdi(int); 284 static void i_mdi_report_path_state(mdi_client_t *, 285 mdi_pathinfo_t *); 286 287 static void setup_vhci_cache(mdi_vhci_t *); 288 static int destroy_vhci_cache(mdi_vhci_t *); 289 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 290 static boolean_t stop_vhcache_flush_thread(void *, int); 291 static void free_string_array(char **, int); 292 static void free_vhcache_phci(mdi_vhcache_phci_t *); 293 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 294 static void free_vhcache_client(mdi_vhcache_client_t *); 295 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 296 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 297 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 298 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 299 static void vhcache_pi_add(mdi_vhci_config_t *, 300 struct mdi_pathinfo *); 301 static void vhcache_pi_remove(mdi_vhci_config_t *, 302 struct mdi_pathinfo *); 303 static void free_phclient_path_list(mdi_phys_path_t *); 304 static void sort_vhcache_paths(mdi_vhcache_client_t *); 305 static int flush_vhcache(mdi_vhci_config_t *, int); 306 static void vhcache_dirty(mdi_vhci_config_t *); 307 static void free_async_client_config(mdi_async_client_config_t *); 308 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 309 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 310 static nvlist_t *read_on_disk_vhci_cache(char *); 311 extern int fread_nvlist(char *, nvlist_t **); 312 extern int fwrite_nvlist(char *, nvlist_t *); 313 314 /* called once when first vhci registers with mdi */ 315 static void 316 i_mdi_init() 317 { 318 static int initialized = 0; 319 320 if (initialized) 321 return; 322 initialized = 1; 323 324 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 325 326 /* Create our taskq resources */ 327 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 328 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 329 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 330 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 331 332 /* Allocate ['path_instance' <-> "path"] maps */ 333 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 334 mdi_pathmap_bypath = mod_hash_create_strhash( 335 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 336 mod_hash_null_valdtor); 337 mdi_pathmap_byinstance = mod_hash_create_idhash( 338 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 339 mod_hash_null_valdtor); 340 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 341 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 342 mod_hash_null_valdtor); 343 } 344 345 /* 346 * mdi_get_component_type(): 347 * Return mpxio component type 348 * Return Values: 349 * MDI_COMPONENT_NONE 350 * MDI_COMPONENT_VHCI 351 * MDI_COMPONENT_PHCI 352 * MDI_COMPONENT_CLIENT 353 * XXX This doesn't work under multi-level MPxIO and should be 354 * removed when clients migrate mdi_component_is_*() interfaces. 355 */ 356 int 357 mdi_get_component_type(dev_info_t *dip) 358 { 359 return (DEVI(dip)->devi_mdi_component); 360 } 361 362 /* 363 * mdi_vhci_register(): 364 * Register a vHCI module with the mpxio framework 365 * mdi_vhci_register() is called by vHCI drivers to register the 366 * 'class_driver' vHCI driver and its MDI entrypoints with the 367 * mpxio framework. The vHCI driver must call this interface as 368 * part of its attach(9e) handler. 369 * Competing threads may try to attach mdi_vhci_register() as 370 * the vHCI drivers are loaded and attached as a result of pHCI 371 * driver instance registration (mdi_phci_register()) with the 372 * framework. 373 * Return Values: 374 * MDI_SUCCESS 375 * MDI_FAILURE 376 */ 377 /*ARGSUSED*/ 378 int 379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 380 int flags) 381 { 382 mdi_vhci_t *vh = NULL; 383 384 /* Registrant can't be older */ 385 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 386 387 #ifdef DEBUG 388 /* 389 * IB nexus driver is loaded only when IB hardware is present. 390 * In order to be able to do this there is a need to drive the loading 391 * and attaching of the IB nexus driver (especially when an IB hardware 392 * is dynamically plugged in) when an IB HCA driver (PHCI) 393 * is being attached. Unfortunately this gets into the limitations 394 * of devfs as there seems to be no clean way to drive configuration 395 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 396 * for IB. 397 */ 398 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 399 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 400 #endif 401 402 i_mdi_init(); 403 404 mutex_enter(&mdi_mutex); 405 /* 406 * Scan for already registered vhci 407 */ 408 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 409 if (strcmp(vh->vh_class, class) == 0) { 410 /* 411 * vHCI has already been created. Check for valid 412 * vHCI ops registration. We only support one vHCI 413 * module per class 414 */ 415 if (vh->vh_ops != NULL) { 416 mutex_exit(&mdi_mutex); 417 cmn_err(CE_NOTE, vhci_greeting, class); 418 return (MDI_FAILURE); 419 } 420 break; 421 } 422 } 423 424 /* 425 * if not yet created, create the vHCI component 426 */ 427 if (vh == NULL) { 428 struct client_hash *hash = NULL; 429 char *load_balance; 430 431 /* 432 * Allocate and initialize the mdi extensions 433 */ 434 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 435 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 436 KM_SLEEP); 437 vh->vh_client_table = hash; 438 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 439 (void) strcpy(vh->vh_class, class); 440 vh->vh_lb = LOAD_BALANCE_RR; 441 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 442 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 443 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 444 vh->vh_lb = LOAD_BALANCE_NONE; 445 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 446 == 0) { 447 vh->vh_lb = LOAD_BALANCE_LBA; 448 } 449 ddi_prop_free(load_balance); 450 } 451 452 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 453 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 454 455 /* 456 * Store the vHCI ops vectors 457 */ 458 vh->vh_dip = vdip; 459 vh->vh_ops = vops; 460 461 setup_vhci_cache(vh); 462 463 if (mdi_vhci_head == NULL) { 464 mdi_vhci_head = vh; 465 } 466 if (mdi_vhci_tail) { 467 mdi_vhci_tail->vh_next = vh; 468 } 469 mdi_vhci_tail = vh; 470 mdi_vhci_count++; 471 } 472 473 /* 474 * Claim the devfs node as a vhci component 475 */ 476 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 477 478 /* 479 * Initialize our back reference from dev_info node 480 */ 481 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 482 mutex_exit(&mdi_mutex); 483 return (MDI_SUCCESS); 484 } 485 486 /* 487 * mdi_vhci_unregister(): 488 * Unregister a vHCI module from mpxio framework 489 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 490 * of a vhci to unregister it from the framework. 491 * Return Values: 492 * MDI_SUCCESS 493 * MDI_FAILURE 494 */ 495 /*ARGSUSED*/ 496 int 497 mdi_vhci_unregister(dev_info_t *vdip, int flags) 498 { 499 mdi_vhci_t *found, *vh, *prev = NULL; 500 501 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 502 503 /* 504 * Check for invalid VHCI 505 */ 506 if ((vh = i_devi_get_vhci(vdip)) == NULL) 507 return (MDI_FAILURE); 508 509 /* 510 * Scan the list of registered vHCIs for a match 511 */ 512 mutex_enter(&mdi_mutex); 513 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 514 if (found == vh) 515 break; 516 prev = found; 517 } 518 519 if (found == NULL) { 520 mutex_exit(&mdi_mutex); 521 return (MDI_FAILURE); 522 } 523 524 /* 525 * Check the vHCI, pHCI and client count. All the pHCIs and clients 526 * should have been unregistered, before a vHCI can be 527 * unregistered. 528 */ 529 MDI_VHCI_PHCI_LOCK(vh); 530 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 531 MDI_VHCI_PHCI_UNLOCK(vh); 532 mutex_exit(&mdi_mutex); 533 return (MDI_FAILURE); 534 } 535 MDI_VHCI_PHCI_UNLOCK(vh); 536 537 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 538 mutex_exit(&mdi_mutex); 539 return (MDI_FAILURE); 540 } 541 542 /* 543 * Remove the vHCI from the global list 544 */ 545 if (vh == mdi_vhci_head) { 546 mdi_vhci_head = vh->vh_next; 547 } else { 548 prev->vh_next = vh->vh_next; 549 } 550 if (vh == mdi_vhci_tail) { 551 mdi_vhci_tail = prev; 552 } 553 mdi_vhci_count--; 554 mutex_exit(&mdi_mutex); 555 556 vh->vh_ops = NULL; 557 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 558 DEVI(vdip)->devi_mdi_xhci = NULL; 559 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 560 kmem_free(vh->vh_client_table, 561 mdi_client_table_size * sizeof (struct client_hash)); 562 mutex_destroy(&vh->vh_phci_mutex); 563 mutex_destroy(&vh->vh_client_mutex); 564 565 kmem_free(vh, sizeof (mdi_vhci_t)); 566 return (MDI_SUCCESS); 567 } 568 569 /* 570 * i_mdi_vhci_class2vhci(): 571 * Look for a matching vHCI module given a vHCI class name 572 * Return Values: 573 * Handle to a vHCI component 574 * NULL 575 */ 576 static mdi_vhci_t * 577 i_mdi_vhci_class2vhci(char *class) 578 { 579 mdi_vhci_t *vh = NULL; 580 581 ASSERT(!MUTEX_HELD(&mdi_mutex)); 582 583 mutex_enter(&mdi_mutex); 584 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 585 if (strcmp(vh->vh_class, class) == 0) { 586 break; 587 } 588 } 589 mutex_exit(&mdi_mutex); 590 return (vh); 591 } 592 593 /* 594 * i_devi_get_vhci(): 595 * Utility function to get the handle to a vHCI component 596 * Return Values: 597 * Handle to a vHCI component 598 * NULL 599 */ 600 mdi_vhci_t * 601 i_devi_get_vhci(dev_info_t *vdip) 602 { 603 mdi_vhci_t *vh = NULL; 604 if (MDI_VHCI(vdip)) { 605 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 606 } 607 return (vh); 608 } 609 610 /* 611 * mdi_phci_register(): 612 * Register a pHCI module with mpxio framework 613 * mdi_phci_register() is called by pHCI drivers to register with 614 * the mpxio framework and a specific 'class_driver' vHCI. The 615 * pHCI driver must call this interface as part of its attach(9e) 616 * handler. 617 * Return Values: 618 * MDI_SUCCESS 619 * MDI_FAILURE 620 */ 621 /*ARGSUSED*/ 622 int 623 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 624 { 625 mdi_phci_t *ph; 626 mdi_vhci_t *vh; 627 char *data; 628 629 /* 630 * Some subsystems, like fcp, perform pHCI registration from a 631 * different thread than the one doing the pHCI attach(9E) - the 632 * driver attach code is waiting for this other thread to complete. 633 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 634 * (indicating that some thread has done an ndi_devi_enter of parent) 635 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 636 */ 637 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 638 639 /* 640 * Check for mpxio-disable property. Enable mpxio if the property is 641 * missing or not set to "yes". 642 * If the property is set to "yes" then emit a brief message. 643 */ 644 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 645 &data) == DDI_SUCCESS)) { 646 if (strcmp(data, "yes") == 0) { 647 MDI_DEBUG(1, (MDI_CONT, pdip, 648 "?multipath capabilities disabled via %s.conf.", 649 ddi_driver_name(pdip))); 650 ddi_prop_free(data); 651 return (MDI_FAILURE); 652 } 653 ddi_prop_free(data); 654 } 655 656 /* 657 * Search for a matching vHCI 658 */ 659 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 660 if (vh == NULL) { 661 return (MDI_FAILURE); 662 } 663 664 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 665 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 666 ph->ph_dip = pdip; 667 ph->ph_vhci = vh; 668 ph->ph_next = NULL; 669 ph->ph_unstable = 0; 670 ph->ph_vprivate = 0; 671 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 672 673 MDI_PHCI_LOCK(ph); 674 MDI_PHCI_SET_POWER_UP(ph); 675 MDI_PHCI_UNLOCK(ph); 676 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 677 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 678 679 vhcache_phci_add(vh->vh_config, ph); 680 681 MDI_VHCI_PHCI_LOCK(vh); 682 if (vh->vh_phci_head == NULL) { 683 vh->vh_phci_head = ph; 684 } 685 if (vh->vh_phci_tail) { 686 vh->vh_phci_tail->ph_next = ph; 687 } 688 vh->vh_phci_tail = ph; 689 vh->vh_phci_count++; 690 MDI_VHCI_PHCI_UNLOCK(vh); 691 692 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 693 return (MDI_SUCCESS); 694 } 695 696 /* 697 * mdi_phci_unregister(): 698 * Unregister a pHCI module from mpxio framework 699 * mdi_phci_unregister() is called by the pHCI drivers from their 700 * detach(9E) handler to unregister their instances from the 701 * framework. 702 * Return Values: 703 * MDI_SUCCESS 704 * MDI_FAILURE 705 */ 706 /*ARGSUSED*/ 707 int 708 mdi_phci_unregister(dev_info_t *pdip, int flags) 709 { 710 mdi_vhci_t *vh; 711 mdi_phci_t *ph; 712 mdi_phci_t *tmp; 713 mdi_phci_t *prev = NULL; 714 mdi_pathinfo_t *pip; 715 716 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 717 718 ph = i_devi_get_phci(pdip); 719 if (ph == NULL) { 720 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 721 return (MDI_FAILURE); 722 } 723 724 vh = ph->ph_vhci; 725 ASSERT(vh != NULL); 726 if (vh == NULL) { 727 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 728 return (MDI_FAILURE); 729 } 730 731 MDI_VHCI_PHCI_LOCK(vh); 732 tmp = vh->vh_phci_head; 733 while (tmp) { 734 if (tmp == ph) { 735 break; 736 } 737 prev = tmp; 738 tmp = tmp->ph_next; 739 } 740 741 if (ph == vh->vh_phci_head) { 742 vh->vh_phci_head = ph->ph_next; 743 } else { 744 prev->ph_next = ph->ph_next; 745 } 746 747 if (ph == vh->vh_phci_tail) { 748 vh->vh_phci_tail = prev; 749 } 750 751 vh->vh_phci_count--; 752 MDI_VHCI_PHCI_UNLOCK(vh); 753 754 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 755 MDI_PHCI_LOCK(ph); 756 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 757 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 758 MDI_PI(pip)->pi_phci = NULL; 759 MDI_PHCI_UNLOCK(ph); 760 761 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 762 ESC_DDI_INITIATOR_UNREGISTER); 763 vhcache_phci_remove(vh->vh_config, ph); 764 cv_destroy(&ph->ph_unstable_cv); 765 mutex_destroy(&ph->ph_mutex); 766 kmem_free(ph, sizeof (mdi_phci_t)); 767 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 768 DEVI(pdip)->devi_mdi_xhci = NULL; 769 return (MDI_SUCCESS); 770 } 771 772 /* 773 * i_devi_get_phci(): 774 * Utility function to return the phci extensions. 775 */ 776 static mdi_phci_t * 777 i_devi_get_phci(dev_info_t *pdip) 778 { 779 mdi_phci_t *ph = NULL; 780 781 if (MDI_PHCI(pdip)) { 782 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 783 } 784 return (ph); 785 } 786 787 /* 788 * Single thread mdi entry into devinfo node for modifying its children. 789 * If necessary we perform an ndi_devi_enter of the vHCI before doing 790 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 791 * for the vHCI and one for the pHCI. 792 */ 793 void 794 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 795 { 796 dev_info_t *vdip; 797 int vcircular, pcircular; 798 799 /* Verify calling context */ 800 ASSERT(MDI_PHCI(phci_dip)); 801 vdip = mdi_devi_get_vdip(phci_dip); 802 ASSERT(vdip); /* A pHCI always has a vHCI */ 803 804 /* 805 * If pHCI is detaching then the framework has already entered the 806 * vHCI on a threads that went down the code path leading to 807 * detach_node(). This framework enter of the vHCI during pHCI 808 * detach is done to avoid deadlock with vHCI power management 809 * operations which enter the vHCI and the enter down the path 810 * to the pHCI. If pHCI is detaching then we piggyback this calls 811 * enter of the vHCI on frameworks vHCI enter that has already 812 * occurred - this is OK because we know that the framework thread 813 * doing detach is waiting for our completion. 814 * 815 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 816 * race with detach - but we can't do that because the framework has 817 * already entered the parent, so we have some complexity instead. 818 */ 819 for (;;) { 820 if (ndi_devi_tryenter(vdip, &vcircular)) { 821 ASSERT(vcircular != -1); 822 if (DEVI_IS_DETACHING(phci_dip)) { 823 ndi_devi_exit(vdip, vcircular); 824 vcircular = -1; 825 } 826 break; 827 } else if (DEVI_IS_DETACHING(phci_dip)) { 828 vcircular = -1; 829 break; 830 } else if (servicing_interrupt()) { 831 /* 832 * Don't delay an interrupt (and ensure adaptive 833 * mutex inversion support). 834 */ 835 ndi_devi_enter(vdip, &vcircular); 836 break; 837 } else { 838 delay_random(2); 839 } 840 } 841 842 ndi_devi_enter(phci_dip, &pcircular); 843 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 844 } 845 846 /* 847 * Attempt to mdi_devi_enter. 848 */ 849 int 850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 851 { 852 dev_info_t *vdip; 853 int vcircular, pcircular; 854 855 /* Verify calling context */ 856 ASSERT(MDI_PHCI(phci_dip)); 857 vdip = mdi_devi_get_vdip(phci_dip); 858 ASSERT(vdip); /* A pHCI always has a vHCI */ 859 860 if (ndi_devi_tryenter(vdip, &vcircular)) { 861 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 862 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 863 return (1); /* locked */ 864 } 865 ndi_devi_exit(vdip, vcircular); 866 } 867 return (0); /* busy */ 868 } 869 870 /* 871 * Release mdi_devi_enter or successful mdi_devi_tryenter. 872 */ 873 void 874 mdi_devi_exit(dev_info_t *phci_dip, int circular) 875 { 876 dev_info_t *vdip; 877 int vcircular, pcircular; 878 879 /* Verify calling context */ 880 ASSERT(MDI_PHCI(phci_dip)); 881 vdip = mdi_devi_get_vdip(phci_dip); 882 ASSERT(vdip); /* A pHCI always has a vHCI */ 883 884 /* extract two circular recursion values from single int */ 885 pcircular = (short)(circular & 0xFFFF); 886 vcircular = (short)((circular >> 16) & 0xFFFF); 887 888 ndi_devi_exit(phci_dip, pcircular); 889 if (vcircular != -1) 890 ndi_devi_exit(vdip, vcircular); 891 } 892 893 /* 894 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 895 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 896 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 897 * with vHCI power management code during path online/offline. Each 898 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 899 * occur within the scope of an active mdi_devi_enter that establishes the 900 * circular value. 901 */ 902 void 903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 904 { 905 int pcircular; 906 907 /* Verify calling context */ 908 ASSERT(MDI_PHCI(phci_dip)); 909 910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 911 ndi_hold_devi(phci_dip); 912 913 pcircular = (short)(circular & 0xFFFF); 914 ndi_devi_exit(phci_dip, pcircular); 915 } 916 917 void 918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 919 { 920 int pcircular; 921 922 /* Verify calling context */ 923 ASSERT(MDI_PHCI(phci_dip)); 924 925 ndi_devi_enter(phci_dip, &pcircular); 926 927 /* Drop hold from mdi_devi_exit_phci. */ 928 ndi_rele_devi(phci_dip); 929 930 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 931 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 932 } 933 934 /* 935 * mdi_devi_get_vdip(): 936 * given a pHCI dip return vHCI dip 937 */ 938 dev_info_t * 939 mdi_devi_get_vdip(dev_info_t *pdip) 940 { 941 mdi_phci_t *ph; 942 943 ph = i_devi_get_phci(pdip); 944 if (ph && ph->ph_vhci) 945 return (ph->ph_vhci->vh_dip); 946 return (NULL); 947 } 948 949 /* 950 * mdi_devi_pdip_entered(): 951 * Return 1 if we are vHCI and have done an ndi_devi_enter 952 * of a pHCI 953 */ 954 int 955 mdi_devi_pdip_entered(dev_info_t *vdip) 956 { 957 mdi_vhci_t *vh; 958 mdi_phci_t *ph; 959 960 vh = i_devi_get_vhci(vdip); 961 if (vh == NULL) 962 return (0); 963 964 MDI_VHCI_PHCI_LOCK(vh); 965 ph = vh->vh_phci_head; 966 while (ph) { 967 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 968 MDI_VHCI_PHCI_UNLOCK(vh); 969 return (1); 970 } 971 ph = ph->ph_next; 972 } 973 MDI_VHCI_PHCI_UNLOCK(vh); 974 return (0); 975 } 976 977 /* 978 * mdi_phci_path2devinfo(): 979 * Utility function to search for a valid phci device given 980 * the devfs pathname. 981 */ 982 dev_info_t * 983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 984 { 985 char *temp_pathname; 986 mdi_vhci_t *vh; 987 mdi_phci_t *ph; 988 dev_info_t *pdip = NULL; 989 990 vh = i_devi_get_vhci(vdip); 991 ASSERT(vh != NULL); 992 993 if (vh == NULL) { 994 /* 995 * Invalid vHCI component, return failure 996 */ 997 return (NULL); 998 } 999 1000 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1001 MDI_VHCI_PHCI_LOCK(vh); 1002 ph = vh->vh_phci_head; 1003 while (ph != NULL) { 1004 pdip = ph->ph_dip; 1005 ASSERT(pdip != NULL); 1006 *temp_pathname = '\0'; 1007 (void) ddi_pathname(pdip, temp_pathname); 1008 if (strcmp(temp_pathname, pathname) == 0) { 1009 break; 1010 } 1011 ph = ph->ph_next; 1012 } 1013 if (ph == NULL) { 1014 pdip = NULL; 1015 } 1016 MDI_VHCI_PHCI_UNLOCK(vh); 1017 kmem_free(temp_pathname, MAXPATHLEN); 1018 return (pdip); 1019 } 1020 1021 /* 1022 * mdi_phci_get_path_count(): 1023 * get number of path information nodes associated with a given 1024 * pHCI device. 1025 */ 1026 int 1027 mdi_phci_get_path_count(dev_info_t *pdip) 1028 { 1029 mdi_phci_t *ph; 1030 int count = 0; 1031 1032 ph = i_devi_get_phci(pdip); 1033 if (ph != NULL) { 1034 count = ph->ph_path_count; 1035 } 1036 return (count); 1037 } 1038 1039 /* 1040 * i_mdi_phci_lock(): 1041 * Lock a pHCI device 1042 * Return Values: 1043 * None 1044 * Note: 1045 * The default locking order is: 1046 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1047 * But there are number of situations where locks need to be 1048 * grabbed in reverse order. This routine implements try and lock 1049 * mechanism depending on the requested parameter option. 1050 */ 1051 static void 1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1053 { 1054 if (pip) { 1055 /* Reverse locking is requested. */ 1056 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1057 if (servicing_interrupt()) { 1058 MDI_PI_HOLD(pip); 1059 MDI_PI_UNLOCK(pip); 1060 MDI_PHCI_LOCK(ph); 1061 MDI_PI_LOCK(pip); 1062 MDI_PI_RELE(pip); 1063 break; 1064 } else { 1065 /* 1066 * tryenter failed. Try to grab again 1067 * after a small delay 1068 */ 1069 MDI_PI_HOLD(pip); 1070 MDI_PI_UNLOCK(pip); 1071 delay_random(2); 1072 MDI_PI_LOCK(pip); 1073 MDI_PI_RELE(pip); 1074 } 1075 } 1076 } else { 1077 MDI_PHCI_LOCK(ph); 1078 } 1079 } 1080 1081 /* 1082 * i_mdi_phci_unlock(): 1083 * Unlock the pHCI component 1084 */ 1085 static void 1086 i_mdi_phci_unlock(mdi_phci_t *ph) 1087 { 1088 MDI_PHCI_UNLOCK(ph); 1089 } 1090 1091 /* 1092 * i_mdi_devinfo_create(): 1093 * create client device's devinfo node 1094 * Return Values: 1095 * dev_info 1096 * NULL 1097 * Notes: 1098 */ 1099 static dev_info_t * 1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1101 char **compatible, int ncompatible) 1102 { 1103 dev_info_t *cdip = NULL; 1104 1105 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1106 1107 /* Verify for duplicate entry */ 1108 cdip = i_mdi_devinfo_find(vh, name, guid); 1109 ASSERT(cdip == NULL); 1110 if (cdip) { 1111 cmn_err(CE_WARN, 1112 "i_mdi_devinfo_create: client %s@%s already exists", 1113 name ? name : "", guid ? guid : ""); 1114 } 1115 1116 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1117 if (cdip == NULL) 1118 goto fail; 1119 1120 /* 1121 * Create component type and Global unique identifier 1122 * properties 1123 */ 1124 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1125 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1126 goto fail; 1127 } 1128 1129 /* Decorate the node with compatible property */ 1130 if (compatible && 1131 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1132 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1133 goto fail; 1134 } 1135 1136 return (cdip); 1137 1138 fail: 1139 if (cdip) { 1140 (void) ndi_prop_remove_all(cdip); 1141 (void) ndi_devi_free(cdip); 1142 } 1143 return (NULL); 1144 } 1145 1146 /* 1147 * i_mdi_devinfo_find(): 1148 * Find a matching devinfo node for given client node name 1149 * and its guid. 1150 * Return Values: 1151 * Handle to a dev_info node or NULL 1152 */ 1153 static dev_info_t * 1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1155 { 1156 char *data; 1157 dev_info_t *cdip = NULL; 1158 dev_info_t *ndip = NULL; 1159 int circular; 1160 1161 ndi_devi_enter(vh->vh_dip, &circular); 1162 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1163 while ((cdip = ndip) != NULL) { 1164 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1165 1166 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1167 continue; 1168 } 1169 1170 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1171 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1172 &data) != DDI_PROP_SUCCESS) { 1173 continue; 1174 } 1175 1176 if (strcmp(data, guid) != 0) { 1177 ddi_prop_free(data); 1178 continue; 1179 } 1180 ddi_prop_free(data); 1181 break; 1182 } 1183 ndi_devi_exit(vh->vh_dip, circular); 1184 return (cdip); 1185 } 1186 1187 /* 1188 * i_mdi_devinfo_remove(): 1189 * Remove a client device node 1190 */ 1191 static int 1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1193 { 1194 int rv = MDI_SUCCESS; 1195 1196 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1197 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1198 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1199 if (rv != NDI_SUCCESS) { 1200 MDI_DEBUG(1, (MDI_NOTE, cdip, 1201 "!failed: cdip %p", (void *)cdip)); 1202 } 1203 /* 1204 * Convert to MDI error code 1205 */ 1206 switch (rv) { 1207 case NDI_SUCCESS: 1208 rv = MDI_SUCCESS; 1209 break; 1210 case NDI_BUSY: 1211 rv = MDI_BUSY; 1212 break; 1213 default: 1214 rv = MDI_FAILURE; 1215 break; 1216 } 1217 } 1218 return (rv); 1219 } 1220 1221 /* 1222 * i_devi_get_client() 1223 * Utility function to get mpxio component extensions 1224 */ 1225 static mdi_client_t * 1226 i_devi_get_client(dev_info_t *cdip) 1227 { 1228 mdi_client_t *ct = NULL; 1229 1230 if (MDI_CLIENT(cdip)) { 1231 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1232 } 1233 return (ct); 1234 } 1235 1236 /* 1237 * i_mdi_is_child_present(): 1238 * Search for the presence of client device dev_info node 1239 */ 1240 static int 1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1242 { 1243 int rv = MDI_FAILURE; 1244 struct dev_info *dip; 1245 int circular; 1246 1247 ndi_devi_enter(vdip, &circular); 1248 dip = DEVI(vdip)->devi_child; 1249 while (dip) { 1250 if (dip == DEVI(cdip)) { 1251 rv = MDI_SUCCESS; 1252 break; 1253 } 1254 dip = dip->devi_sibling; 1255 } 1256 ndi_devi_exit(vdip, circular); 1257 return (rv); 1258 } 1259 1260 1261 /* 1262 * i_mdi_client_lock(): 1263 * Grab client component lock 1264 * Return Values: 1265 * None 1266 * Note: 1267 * The default locking order is: 1268 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1269 * But there are number of situations where locks need to be 1270 * grabbed in reverse order. This routine implements try and lock 1271 * mechanism depending on the requested parameter option. 1272 */ 1273 static void 1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1275 { 1276 if (pip) { 1277 /* 1278 * Reverse locking is requested. 1279 */ 1280 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1281 if (servicing_interrupt()) { 1282 MDI_PI_HOLD(pip); 1283 MDI_PI_UNLOCK(pip); 1284 MDI_CLIENT_LOCK(ct); 1285 MDI_PI_LOCK(pip); 1286 MDI_PI_RELE(pip); 1287 break; 1288 } else { 1289 /* 1290 * tryenter failed. Try to grab again 1291 * after a small delay 1292 */ 1293 MDI_PI_HOLD(pip); 1294 MDI_PI_UNLOCK(pip); 1295 delay_random(2); 1296 MDI_PI_LOCK(pip); 1297 MDI_PI_RELE(pip); 1298 } 1299 } 1300 } else { 1301 MDI_CLIENT_LOCK(ct); 1302 } 1303 } 1304 1305 /* 1306 * i_mdi_client_unlock(): 1307 * Unlock a client component 1308 */ 1309 static void 1310 i_mdi_client_unlock(mdi_client_t *ct) 1311 { 1312 MDI_CLIENT_UNLOCK(ct); 1313 } 1314 1315 /* 1316 * i_mdi_client_alloc(): 1317 * Allocate and initialize a client structure. Caller should 1318 * hold the vhci client lock. 1319 * Return Values: 1320 * Handle to a client component 1321 */ 1322 /*ARGSUSED*/ 1323 static mdi_client_t * 1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1325 { 1326 mdi_client_t *ct; 1327 1328 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1329 1330 /* 1331 * Allocate and initialize a component structure. 1332 */ 1333 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1334 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1335 ct->ct_hnext = NULL; 1336 ct->ct_hprev = NULL; 1337 ct->ct_dip = NULL; 1338 ct->ct_vhci = vh; 1339 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1340 (void) strcpy(ct->ct_drvname, name); 1341 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1342 (void) strcpy(ct->ct_guid, lguid); 1343 ct->ct_cprivate = NULL; 1344 ct->ct_vprivate = NULL; 1345 ct->ct_flags = 0; 1346 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1347 MDI_CLIENT_LOCK(ct); 1348 MDI_CLIENT_SET_OFFLINE(ct); 1349 MDI_CLIENT_SET_DETACH(ct); 1350 MDI_CLIENT_SET_POWER_UP(ct); 1351 MDI_CLIENT_UNLOCK(ct); 1352 ct->ct_failover_flags = 0; 1353 ct->ct_failover_status = 0; 1354 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1355 ct->ct_unstable = 0; 1356 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1357 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1358 ct->ct_lb = vh->vh_lb; 1359 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1360 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1361 ct->ct_path_count = 0; 1362 ct->ct_path_head = NULL; 1363 ct->ct_path_tail = NULL; 1364 ct->ct_path_last = NULL; 1365 1366 /* 1367 * Add this client component to our client hash queue 1368 */ 1369 i_mdi_client_enlist_table(vh, ct); 1370 return (ct); 1371 } 1372 1373 /* 1374 * i_mdi_client_enlist_table(): 1375 * Attach the client device to the client hash table. Caller 1376 * should hold the vhci client lock. 1377 */ 1378 static void 1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1380 { 1381 int index; 1382 struct client_hash *head; 1383 1384 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1385 1386 index = i_mdi_get_hash_key(ct->ct_guid); 1387 head = &vh->vh_client_table[index]; 1388 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1389 head->ct_hash_head = ct; 1390 head->ct_hash_count++; 1391 vh->vh_client_count++; 1392 } 1393 1394 /* 1395 * i_mdi_client_delist_table(): 1396 * Attach the client device to the client hash table. 1397 * Caller should hold the vhci client lock. 1398 */ 1399 static void 1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1401 { 1402 int index; 1403 char *guid; 1404 struct client_hash *head; 1405 mdi_client_t *next; 1406 mdi_client_t *last; 1407 1408 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1409 1410 guid = ct->ct_guid; 1411 index = i_mdi_get_hash_key(guid); 1412 head = &vh->vh_client_table[index]; 1413 1414 last = NULL; 1415 next = (mdi_client_t *)head->ct_hash_head; 1416 while (next != NULL) { 1417 if (next == ct) { 1418 break; 1419 } 1420 last = next; 1421 next = next->ct_hnext; 1422 } 1423 1424 if (next) { 1425 head->ct_hash_count--; 1426 if (last == NULL) { 1427 head->ct_hash_head = ct->ct_hnext; 1428 } else { 1429 last->ct_hnext = ct->ct_hnext; 1430 } 1431 ct->ct_hnext = NULL; 1432 vh->vh_client_count--; 1433 } 1434 } 1435 1436 1437 /* 1438 * i_mdi_client_free(): 1439 * Free a client component 1440 */ 1441 static int 1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1443 { 1444 int rv = MDI_SUCCESS; 1445 int flags = ct->ct_flags; 1446 dev_info_t *cdip; 1447 dev_info_t *vdip; 1448 1449 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1450 1451 vdip = vh->vh_dip; 1452 cdip = ct->ct_dip; 1453 1454 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1455 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1456 DEVI(cdip)->devi_mdi_client = NULL; 1457 1458 /* 1459 * Clear out back ref. to dev_info_t node 1460 */ 1461 ct->ct_dip = NULL; 1462 1463 /* 1464 * Remove this client from our hash queue 1465 */ 1466 i_mdi_client_delist_table(vh, ct); 1467 1468 /* 1469 * Uninitialize and free the component 1470 */ 1471 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1472 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1473 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1474 cv_destroy(&ct->ct_failover_cv); 1475 cv_destroy(&ct->ct_unstable_cv); 1476 cv_destroy(&ct->ct_powerchange_cv); 1477 mutex_destroy(&ct->ct_mutex); 1478 kmem_free(ct, sizeof (*ct)); 1479 1480 if (cdip != NULL) { 1481 MDI_VHCI_CLIENT_UNLOCK(vh); 1482 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1483 MDI_VHCI_CLIENT_LOCK(vh); 1484 } 1485 return (rv); 1486 } 1487 1488 /* 1489 * i_mdi_client_find(): 1490 * Find the client structure corresponding to a given guid 1491 * Caller should hold the vhci client lock. 1492 */ 1493 static mdi_client_t * 1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1495 { 1496 int index; 1497 struct client_hash *head; 1498 mdi_client_t *ct; 1499 1500 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1501 1502 index = i_mdi_get_hash_key(guid); 1503 head = &vh->vh_client_table[index]; 1504 1505 ct = head->ct_hash_head; 1506 while (ct != NULL) { 1507 if (strcmp(ct->ct_guid, guid) == 0 && 1508 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1509 break; 1510 } 1511 ct = ct->ct_hnext; 1512 } 1513 return (ct); 1514 } 1515 1516 /* 1517 * i_mdi_client_update_state(): 1518 * Compute and update client device state 1519 * Notes: 1520 * A client device can be in any of three possible states: 1521 * 1522 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1523 * one online/standby paths. Can tolerate failures. 1524 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1525 * no alternate paths available as standby. A failure on the online 1526 * would result in loss of access to device data. 1527 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1528 * no paths available to access the device. 1529 */ 1530 static void 1531 i_mdi_client_update_state(mdi_client_t *ct) 1532 { 1533 int state; 1534 1535 ASSERT(MDI_CLIENT_LOCKED(ct)); 1536 state = i_mdi_client_compute_state(ct, NULL); 1537 MDI_CLIENT_SET_STATE(ct, state); 1538 } 1539 1540 /* 1541 * i_mdi_client_compute_state(): 1542 * Compute client device state 1543 * 1544 * mdi_phci_t * Pointer to pHCI structure which should 1545 * while computing the new value. Used by 1546 * i_mdi_phci_offline() to find the new 1547 * client state after DR of a pHCI. 1548 */ 1549 static int 1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1551 { 1552 int state; 1553 int online_count = 0; 1554 int standby_count = 0; 1555 mdi_pathinfo_t *pip, *next; 1556 1557 ASSERT(MDI_CLIENT_LOCKED(ct)); 1558 pip = ct->ct_path_head; 1559 while (pip != NULL) { 1560 MDI_PI_LOCK(pip); 1561 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1562 if (MDI_PI(pip)->pi_phci == ph) { 1563 MDI_PI_UNLOCK(pip); 1564 pip = next; 1565 continue; 1566 } 1567 1568 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1569 == MDI_PATHINFO_STATE_ONLINE) 1570 online_count++; 1571 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1572 == MDI_PATHINFO_STATE_STANDBY) 1573 standby_count++; 1574 MDI_PI_UNLOCK(pip); 1575 pip = next; 1576 } 1577 1578 if (online_count == 0) { 1579 if (standby_count == 0) { 1580 state = MDI_CLIENT_STATE_FAILED; 1581 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1582 "client state failed: ct = %p", (void *)ct)); 1583 } else if (standby_count == 1) { 1584 state = MDI_CLIENT_STATE_DEGRADED; 1585 } else { 1586 state = MDI_CLIENT_STATE_OPTIMAL; 1587 } 1588 } else if (online_count == 1) { 1589 if (standby_count == 0) { 1590 state = MDI_CLIENT_STATE_DEGRADED; 1591 } else { 1592 state = MDI_CLIENT_STATE_OPTIMAL; 1593 } 1594 } else { 1595 state = MDI_CLIENT_STATE_OPTIMAL; 1596 } 1597 return (state); 1598 } 1599 1600 /* 1601 * i_mdi_client2devinfo(): 1602 * Utility function 1603 */ 1604 dev_info_t * 1605 i_mdi_client2devinfo(mdi_client_t *ct) 1606 { 1607 return (ct->ct_dip); 1608 } 1609 1610 /* 1611 * mdi_client_path2_devinfo(): 1612 * Given the parent devinfo and child devfs pathname, search for 1613 * a valid devfs node handle. 1614 */ 1615 dev_info_t * 1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1617 { 1618 dev_info_t *cdip = NULL; 1619 dev_info_t *ndip = NULL; 1620 char *temp_pathname; 1621 int circular; 1622 1623 /* 1624 * Allocate temp buffer 1625 */ 1626 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1627 1628 /* 1629 * Lock parent against changes 1630 */ 1631 ndi_devi_enter(vdip, &circular); 1632 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1633 while ((cdip = ndip) != NULL) { 1634 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1635 1636 *temp_pathname = '\0'; 1637 (void) ddi_pathname(cdip, temp_pathname); 1638 if (strcmp(temp_pathname, pathname) == 0) { 1639 break; 1640 } 1641 } 1642 /* 1643 * Release devinfo lock 1644 */ 1645 ndi_devi_exit(vdip, circular); 1646 1647 /* 1648 * Free the temp buffer 1649 */ 1650 kmem_free(temp_pathname, MAXPATHLEN); 1651 return (cdip); 1652 } 1653 1654 /* 1655 * mdi_client_get_path_count(): 1656 * Utility function to get number of path information nodes 1657 * associated with a given client device. 1658 */ 1659 int 1660 mdi_client_get_path_count(dev_info_t *cdip) 1661 { 1662 mdi_client_t *ct; 1663 int count = 0; 1664 1665 ct = i_devi_get_client(cdip); 1666 if (ct != NULL) { 1667 count = ct->ct_path_count; 1668 } 1669 return (count); 1670 } 1671 1672 1673 /* 1674 * i_mdi_get_hash_key(): 1675 * Create a hash using strings as keys 1676 * 1677 */ 1678 static int 1679 i_mdi_get_hash_key(char *str) 1680 { 1681 uint32_t g, hash = 0; 1682 char *p; 1683 1684 for (p = str; *p != '\0'; p++) { 1685 g = *p; 1686 hash += g; 1687 } 1688 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1689 } 1690 1691 /* 1692 * mdi_get_lb_policy(): 1693 * Get current load balancing policy for a given client device 1694 */ 1695 client_lb_t 1696 mdi_get_lb_policy(dev_info_t *cdip) 1697 { 1698 client_lb_t lb = LOAD_BALANCE_NONE; 1699 mdi_client_t *ct; 1700 1701 ct = i_devi_get_client(cdip); 1702 if (ct != NULL) { 1703 lb = ct->ct_lb; 1704 } 1705 return (lb); 1706 } 1707 1708 /* 1709 * mdi_set_lb_region_size(): 1710 * Set current region size for the load-balance 1711 */ 1712 int 1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1714 { 1715 mdi_client_t *ct; 1716 int rv = MDI_FAILURE; 1717 1718 ct = i_devi_get_client(cdip); 1719 if (ct != NULL && ct->ct_lb_args != NULL) { 1720 ct->ct_lb_args->region_size = region_size; 1721 rv = MDI_SUCCESS; 1722 } 1723 return (rv); 1724 } 1725 1726 /* 1727 * mdi_Set_lb_policy(): 1728 * Set current load balancing policy for a given client device 1729 */ 1730 int 1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1732 { 1733 mdi_client_t *ct; 1734 int rv = MDI_FAILURE; 1735 1736 ct = i_devi_get_client(cdip); 1737 if (ct != NULL) { 1738 ct->ct_lb = lb; 1739 rv = MDI_SUCCESS; 1740 } 1741 return (rv); 1742 } 1743 1744 /* 1745 * mdi_failover(): 1746 * failover function called by the vHCI drivers to initiate 1747 * a failover operation. This is typically due to non-availability 1748 * of online paths to route I/O requests. Failover can be 1749 * triggered through user application also. 1750 * 1751 * The vHCI driver calls mdi_failover() to initiate a failover 1752 * operation. mdi_failover() calls back into the vHCI driver's 1753 * vo_failover() entry point to perform the actual failover 1754 * operation. The reason for requiring the vHCI driver to 1755 * initiate failover by calling mdi_failover(), instead of directly 1756 * executing vo_failover() itself, is to ensure that the mdi 1757 * framework can keep track of the client state properly. 1758 * Additionally, mdi_failover() provides as a convenience the 1759 * option of performing the failover operation synchronously or 1760 * asynchronously 1761 * 1762 * Upon successful completion of the failover operation, the 1763 * paths that were previously ONLINE will be in the STANDBY state, 1764 * and the newly activated paths will be in the ONLINE state. 1765 * 1766 * The flags modifier determines whether the activation is done 1767 * synchronously: MDI_FAILOVER_SYNC 1768 * Return Values: 1769 * MDI_SUCCESS 1770 * MDI_FAILURE 1771 * MDI_BUSY 1772 */ 1773 /*ARGSUSED*/ 1774 int 1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1776 { 1777 int rv; 1778 mdi_client_t *ct; 1779 1780 ct = i_devi_get_client(cdip); 1781 ASSERT(ct != NULL); 1782 if (ct == NULL) { 1783 /* cdip is not a valid client device. Nothing more to do. */ 1784 return (MDI_FAILURE); 1785 } 1786 1787 MDI_CLIENT_LOCK(ct); 1788 1789 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1790 /* A path to the client is being freed */ 1791 MDI_CLIENT_UNLOCK(ct); 1792 return (MDI_BUSY); 1793 } 1794 1795 1796 if (MDI_CLIENT_IS_FAILED(ct)) { 1797 /* 1798 * Client is in failed state. Nothing more to do. 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_FAILURE); 1802 } 1803 1804 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1805 /* 1806 * Failover is already in progress; return BUSY 1807 */ 1808 MDI_CLIENT_UNLOCK(ct); 1809 return (MDI_BUSY); 1810 } 1811 /* 1812 * Make sure that mdi_pathinfo node state changes are processed. 1813 * We do not allow failovers to progress while client path state 1814 * changes are in progress 1815 */ 1816 if (ct->ct_unstable) { 1817 if (flags == MDI_FAILOVER_ASYNC) { 1818 MDI_CLIENT_UNLOCK(ct); 1819 return (MDI_BUSY); 1820 } else { 1821 while (ct->ct_unstable) 1822 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1823 } 1824 } 1825 1826 /* 1827 * Client device is in stable state. Before proceeding, perform sanity 1828 * checks again. 1829 */ 1830 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1831 (!i_ddi_devi_attached(ct->ct_dip))) { 1832 /* 1833 * Client is in failed state. Nothing more to do. 1834 */ 1835 MDI_CLIENT_UNLOCK(ct); 1836 return (MDI_FAILURE); 1837 } 1838 1839 /* 1840 * Set the client state as failover in progress. 1841 */ 1842 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1843 ct->ct_failover_flags = flags; 1844 MDI_CLIENT_UNLOCK(ct); 1845 1846 if (flags == MDI_FAILOVER_ASYNC) { 1847 /* 1848 * Submit the initiate failover request via CPR safe 1849 * taskq threads. 1850 */ 1851 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1852 ct, KM_SLEEP); 1853 return (MDI_ACCEPT); 1854 } else { 1855 /* 1856 * Synchronous failover mode. Typically invoked from the user 1857 * land. 1858 */ 1859 rv = i_mdi_failover(ct); 1860 } 1861 return (rv); 1862 } 1863 1864 /* 1865 * i_mdi_failover(): 1866 * internal failover function. Invokes vHCI drivers failover 1867 * callback function and process the failover status 1868 * Return Values: 1869 * None 1870 * 1871 * Note: A client device in failover state can not be detached or freed. 1872 */ 1873 static int 1874 i_mdi_failover(void *arg) 1875 { 1876 int rv = MDI_SUCCESS; 1877 mdi_client_t *ct = (mdi_client_t *)arg; 1878 mdi_vhci_t *vh = ct->ct_vhci; 1879 1880 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1881 1882 if (vh->vh_ops->vo_failover != NULL) { 1883 /* 1884 * Call vHCI drivers callback routine 1885 */ 1886 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1887 ct->ct_failover_flags); 1888 } 1889 1890 MDI_CLIENT_LOCK(ct); 1891 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1892 1893 /* 1894 * Save the failover return status 1895 */ 1896 ct->ct_failover_status = rv; 1897 1898 /* 1899 * As a result of failover, client status would have been changed. 1900 * Update the client state and wake up anyone waiting on this client 1901 * device. 1902 */ 1903 i_mdi_client_update_state(ct); 1904 1905 cv_broadcast(&ct->ct_failover_cv); 1906 MDI_CLIENT_UNLOCK(ct); 1907 return (rv); 1908 } 1909 1910 /* 1911 * Load balancing is logical block. 1912 * IOs within the range described by region_size 1913 * would go on the same path. This would improve the 1914 * performance by cache-hit on some of the RAID devices. 1915 * Search only for online paths(At some point we 1916 * may want to balance across target ports). 1917 * If no paths are found then default to round-robin. 1918 */ 1919 static int 1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1921 { 1922 int path_index = -1; 1923 int online_path_count = 0; 1924 int online_nonpref_path_count = 0; 1925 int region_size = ct->ct_lb_args->region_size; 1926 mdi_pathinfo_t *pip; 1927 mdi_pathinfo_t *next; 1928 int preferred, path_cnt; 1929 1930 pip = ct->ct_path_head; 1931 while (pip) { 1932 MDI_PI_LOCK(pip); 1933 if (MDI_PI(pip)->pi_state == 1934 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1935 online_path_count++; 1936 } else if (MDI_PI(pip)->pi_state == 1937 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1938 online_nonpref_path_count++; 1939 } 1940 next = (mdi_pathinfo_t *) 1941 MDI_PI(pip)->pi_client_link; 1942 MDI_PI_UNLOCK(pip); 1943 pip = next; 1944 } 1945 /* if found any online/preferred then use this type */ 1946 if (online_path_count > 0) { 1947 path_cnt = online_path_count; 1948 preferred = 1; 1949 } else if (online_nonpref_path_count > 0) { 1950 path_cnt = online_nonpref_path_count; 1951 preferred = 0; 1952 } else { 1953 path_cnt = 0; 1954 } 1955 if (path_cnt) { 1956 path_index = (bp->b_blkno >> region_size) % path_cnt; 1957 pip = ct->ct_path_head; 1958 while (pip && path_index != -1) { 1959 MDI_PI_LOCK(pip); 1960 if (path_index == 0 && 1961 (MDI_PI(pip)->pi_state == 1962 MDI_PATHINFO_STATE_ONLINE) && 1963 MDI_PI(pip)->pi_preferred == preferred) { 1964 MDI_PI_HOLD(pip); 1965 MDI_PI_UNLOCK(pip); 1966 *ret_pip = pip; 1967 return (MDI_SUCCESS); 1968 } 1969 path_index --; 1970 next = (mdi_pathinfo_t *) 1971 MDI_PI(pip)->pi_client_link; 1972 MDI_PI_UNLOCK(pip); 1973 pip = next; 1974 } 1975 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1976 "lba %llx: path %s %p", 1977 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1978 } 1979 return (MDI_FAILURE); 1980 } 1981 1982 /* 1983 * mdi_select_path(): 1984 * select a path to access a client device. 1985 * 1986 * mdi_select_path() function is called by the vHCI drivers to 1987 * select a path to route the I/O request to. The caller passes 1988 * the block I/O data transfer structure ("buf") as one of the 1989 * parameters. The mpxio framework uses the buf structure 1990 * contents to maintain per path statistics (total I/O size / 1991 * count pending). If more than one online paths are available to 1992 * select, the framework automatically selects a suitable path 1993 * for routing I/O request. If a failover operation is active for 1994 * this client device the call shall be failed with MDI_BUSY error 1995 * code. 1996 * 1997 * By default this function returns a suitable path in online 1998 * state based on the current load balancing policy. Currently 1999 * we support LOAD_BALANCE_NONE (Previously selected online path 2000 * will continue to be used till the path is usable) and 2001 * LOAD_BALANCE_RR (Online paths will be selected in a round 2002 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2003 * based on the logical block). The load balancing 2004 * through vHCI drivers configuration file (driver.conf). 2005 * 2006 * vHCI drivers may override this default behavior by specifying 2007 * appropriate flags. The meaning of the thrid argument depends 2008 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2009 * then the argument is the "path instance" of the path to select. 2010 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2011 * "start_pip". A non NULL "start_pip" is the starting point to 2012 * walk and find the next appropriate path. The following values 2013 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2014 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2015 * STANDBY path). 2016 * 2017 * The non-standard behavior is used by the scsi_vhci driver, 2018 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2019 * attach of client devices (to avoid an unnecessary failover 2020 * when the STANDBY path comes up first), during failover 2021 * (to activate a STANDBY path as ONLINE). 2022 * 2023 * The selected path is returned in a a mdi_hold_path() state 2024 * (pi_ref_cnt). Caller should release the hold by calling 2025 * mdi_rele_path(). 2026 * 2027 * Return Values: 2028 * MDI_SUCCESS - Completed successfully 2029 * MDI_BUSY - Client device is busy failing over 2030 * MDI_NOPATH - Client device is online, but no valid path are 2031 * available to access this client device 2032 * MDI_FAILURE - Invalid client device or state 2033 * MDI_DEVI_ONLINING 2034 * - Client device (struct dev_info state) is in 2035 * onlining state. 2036 */ 2037 2038 /*ARGSUSED*/ 2039 int 2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2041 void *arg, mdi_pathinfo_t **ret_pip) 2042 { 2043 mdi_client_t *ct; 2044 mdi_pathinfo_t *pip; 2045 mdi_pathinfo_t *next; 2046 mdi_pathinfo_t *head; 2047 mdi_pathinfo_t *start; 2048 client_lb_t lbp; /* load balancing policy */ 2049 int sb = 1; /* standard behavior */ 2050 int preferred = 1; /* preferred path */ 2051 int cond, cont = 1; 2052 int retry = 0; 2053 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2054 int path_instance; /* request specific path instance */ 2055 2056 /* determine type of arg based on flags */ 2057 if (flags & MDI_SELECT_PATH_INSTANCE) { 2058 path_instance = (int)(intptr_t)arg; 2059 start_pip = NULL; 2060 } else { 2061 path_instance = 0; 2062 start_pip = (mdi_pathinfo_t *)arg; 2063 } 2064 2065 if (flags != 0) { 2066 /* 2067 * disable default behavior 2068 */ 2069 sb = 0; 2070 } 2071 2072 *ret_pip = NULL; 2073 ct = i_devi_get_client(cdip); 2074 if (ct == NULL) { 2075 /* mdi extensions are NULL, Nothing more to do */ 2076 return (MDI_FAILURE); 2077 } 2078 2079 MDI_CLIENT_LOCK(ct); 2080 2081 if (sb) { 2082 if (MDI_CLIENT_IS_FAILED(ct)) { 2083 /* 2084 * Client is not ready to accept any I/O requests. 2085 * Fail this request. 2086 */ 2087 MDI_DEBUG(2, (MDI_NOTE, cdip, 2088 "client state offline ct = %p", (void *)ct)); 2089 MDI_CLIENT_UNLOCK(ct); 2090 return (MDI_FAILURE); 2091 } 2092 2093 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2094 /* 2095 * Check for Failover is in progress. If so tell the 2096 * caller that this device is busy. 2097 */ 2098 MDI_DEBUG(2, (MDI_NOTE, cdip, 2099 "client failover in progress ct = %p", 2100 (void *)ct)); 2101 MDI_CLIENT_UNLOCK(ct); 2102 return (MDI_BUSY); 2103 } 2104 2105 /* 2106 * Check to see whether the client device is attached. 2107 * If not so, let the vHCI driver manually select a path 2108 * (standby) and let the probe/attach process to continue. 2109 */ 2110 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2111 MDI_DEBUG(4, (MDI_NOTE, cdip, 2112 "devi is onlining ct = %p", (void *)ct)); 2113 MDI_CLIENT_UNLOCK(ct); 2114 return (MDI_DEVI_ONLINING); 2115 } 2116 } 2117 2118 /* 2119 * Cache in the client list head. If head of the list is NULL 2120 * return MDI_NOPATH 2121 */ 2122 head = ct->ct_path_head; 2123 if (head == NULL) { 2124 MDI_CLIENT_UNLOCK(ct); 2125 return (MDI_NOPATH); 2126 } 2127 2128 /* Caller is specifying a specific pathinfo path by path_instance */ 2129 if (path_instance) { 2130 /* search for pathinfo with correct path_instance */ 2131 for (pip = head; 2132 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2133 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2134 ; 2135 2136 /* If path can't be selected then MDI_NOPATH is returned. */ 2137 if (pip == NULL) { 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } 2141 2142 /* 2143 * Verify state of path. When asked to select a specific 2144 * path_instance, we select the requested path in any 2145 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2146 * We don't however select paths where the pHCI has detached. 2147 * NOTE: last pathinfo node of an opened client device may 2148 * exist in an OFFLINE state after the pHCI associated with 2149 * that path has detached (but pi_phci will be NULL if that 2150 * has occurred). 2151 */ 2152 MDI_PI_LOCK(pip); 2153 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2154 (MDI_PI(pip)->pi_phci == NULL)) { 2155 MDI_PI_UNLOCK(pip); 2156 MDI_CLIENT_UNLOCK(ct); 2157 return (MDI_FAILURE); 2158 } 2159 2160 /* Return MDI_BUSY if we have a transient condition */ 2161 if (MDI_PI_IS_TRANSIENT(pip)) { 2162 MDI_PI_UNLOCK(pip); 2163 MDI_CLIENT_UNLOCK(ct); 2164 return (MDI_BUSY); 2165 } 2166 2167 /* 2168 * Return the path in hold state. Caller should release the 2169 * lock by calling mdi_rele_path() 2170 */ 2171 MDI_PI_HOLD(pip); 2172 MDI_PI_UNLOCK(pip); 2173 *ret_pip = pip; 2174 MDI_CLIENT_UNLOCK(ct); 2175 return (MDI_SUCCESS); 2176 } 2177 2178 /* 2179 * for non default behavior, bypass current 2180 * load balancing policy and always use LOAD_BALANCE_RR 2181 * except that the start point will be adjusted based 2182 * on the provided start_pip 2183 */ 2184 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2185 2186 switch (lbp) { 2187 case LOAD_BALANCE_NONE: 2188 /* 2189 * Load balancing is None or Alternate path mode 2190 * Start looking for a online mdi_pathinfo node starting from 2191 * last known selected path 2192 */ 2193 preferred = 1; 2194 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2195 if (pip == NULL) { 2196 pip = head; 2197 } 2198 start = pip; 2199 do { 2200 MDI_PI_LOCK(pip); 2201 /* 2202 * No need to explicitly check if the path is disabled. 2203 * Since we are checking for state == ONLINE and the 2204 * same variable is used for DISABLE/ENABLE information. 2205 */ 2206 if ((MDI_PI(pip)->pi_state == 2207 MDI_PATHINFO_STATE_ONLINE) && 2208 preferred == MDI_PI(pip)->pi_preferred) { 2209 /* 2210 * Return the path in hold state. Caller should 2211 * release the lock by calling mdi_rele_path() 2212 */ 2213 MDI_PI_HOLD(pip); 2214 MDI_PI_UNLOCK(pip); 2215 ct->ct_path_last = pip; 2216 *ret_pip = pip; 2217 MDI_CLIENT_UNLOCK(ct); 2218 return (MDI_SUCCESS); 2219 } 2220 2221 /* 2222 * Path is busy. 2223 */ 2224 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2225 MDI_PI_IS_TRANSIENT(pip)) 2226 retry = 1; 2227 /* 2228 * Keep looking for a next available online path 2229 */ 2230 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2231 if (next == NULL) { 2232 next = head; 2233 } 2234 MDI_PI_UNLOCK(pip); 2235 pip = next; 2236 if (start == pip && preferred) { 2237 preferred = 0; 2238 } else if (start == pip && !preferred) { 2239 cont = 0; 2240 } 2241 } while (cont); 2242 break; 2243 2244 case LOAD_BALANCE_LBA: 2245 /* 2246 * Make sure we are looking 2247 * for an online path. Otherwise, if it is for a STANDBY 2248 * path request, it will go through and fetch an ONLINE 2249 * path which is not desirable. 2250 */ 2251 if ((ct->ct_lb_args != NULL) && 2252 (ct->ct_lb_args->region_size) && bp && 2253 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2254 if (i_mdi_lba_lb(ct, ret_pip, bp) 2255 == MDI_SUCCESS) { 2256 MDI_CLIENT_UNLOCK(ct); 2257 return (MDI_SUCCESS); 2258 } 2259 } 2260 /* FALLTHROUGH */ 2261 case LOAD_BALANCE_RR: 2262 /* 2263 * Load balancing is Round Robin. Start looking for a online 2264 * mdi_pathinfo node starting from last known selected path 2265 * as the start point. If override flags are specified, 2266 * process accordingly. 2267 * If the search is already in effect(start_pip not null), 2268 * then lets just use the same path preference to continue the 2269 * traversal. 2270 */ 2271 2272 if (start_pip != NULL) { 2273 preferred = MDI_PI(start_pip)->pi_preferred; 2274 } else { 2275 preferred = 1; 2276 } 2277 2278 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2279 if (start == NULL) { 2280 pip = head; 2281 } else { 2282 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2283 if (pip == NULL) { 2284 if ( flags & MDI_SELECT_NO_PREFERRED) { 2285 /* 2286 * Return since we hit the end of list 2287 */ 2288 MDI_CLIENT_UNLOCK(ct); 2289 return (MDI_NOPATH); 2290 } 2291 2292 if (!sb) { 2293 if (preferred == 0) { 2294 /* 2295 * Looks like we have completed 2296 * the traversal as preferred 2297 * value is 0. Time to bail out. 2298 */ 2299 *ret_pip = NULL; 2300 MDI_CLIENT_UNLOCK(ct); 2301 return (MDI_NOPATH); 2302 } else { 2303 /* 2304 * Looks like we reached the 2305 * end of the list. Lets enable 2306 * traversal of non preferred 2307 * paths. 2308 */ 2309 preferred = 0; 2310 } 2311 } 2312 pip = head; 2313 } 2314 } 2315 start = pip; 2316 do { 2317 MDI_PI_LOCK(pip); 2318 if (sb) { 2319 cond = ((MDI_PI(pip)->pi_state == 2320 MDI_PATHINFO_STATE_ONLINE && 2321 MDI_PI(pip)->pi_preferred == 2322 preferred) ? 1 : 0); 2323 } else { 2324 if (flags == MDI_SELECT_ONLINE_PATH) { 2325 cond = ((MDI_PI(pip)->pi_state == 2326 MDI_PATHINFO_STATE_ONLINE && 2327 MDI_PI(pip)->pi_preferred == 2328 preferred) ? 1 : 0); 2329 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2330 cond = ((MDI_PI(pip)->pi_state == 2331 MDI_PATHINFO_STATE_STANDBY && 2332 MDI_PI(pip)->pi_preferred == 2333 preferred) ? 1 : 0); 2334 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2335 MDI_SELECT_STANDBY_PATH)) { 2336 cond = (((MDI_PI(pip)->pi_state == 2337 MDI_PATHINFO_STATE_ONLINE || 2338 (MDI_PI(pip)->pi_state == 2339 MDI_PATHINFO_STATE_STANDBY)) && 2340 MDI_PI(pip)->pi_preferred == 2341 preferred) ? 1 : 0); 2342 } else if (flags == 2343 (MDI_SELECT_STANDBY_PATH | 2344 MDI_SELECT_ONLINE_PATH | 2345 MDI_SELECT_USER_DISABLE_PATH)) { 2346 cond = (((MDI_PI(pip)->pi_state == 2347 MDI_PATHINFO_STATE_ONLINE || 2348 (MDI_PI(pip)->pi_state == 2349 MDI_PATHINFO_STATE_STANDBY) || 2350 (MDI_PI(pip)->pi_state == 2351 (MDI_PATHINFO_STATE_ONLINE| 2352 MDI_PATHINFO_STATE_USER_DISABLE)) || 2353 (MDI_PI(pip)->pi_state == 2354 (MDI_PATHINFO_STATE_STANDBY | 2355 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2356 MDI_PI(pip)->pi_preferred == 2357 preferred) ? 1 : 0); 2358 } else if (flags == 2359 (MDI_SELECT_STANDBY_PATH | 2360 MDI_SELECT_ONLINE_PATH | 2361 MDI_SELECT_NO_PREFERRED)) { 2362 cond = (((MDI_PI(pip)->pi_state == 2363 MDI_PATHINFO_STATE_ONLINE) || 2364 (MDI_PI(pip)->pi_state == 2365 MDI_PATHINFO_STATE_STANDBY)) 2366 ? 1 : 0); 2367 } else { 2368 cond = 0; 2369 } 2370 } 2371 /* 2372 * No need to explicitly check if the path is disabled. 2373 * Since we are checking for state == ONLINE and the 2374 * same variable is used for DISABLE/ENABLE information. 2375 */ 2376 if (cond) { 2377 /* 2378 * Return the path in hold state. Caller should 2379 * release the lock by calling mdi_rele_path() 2380 */ 2381 MDI_PI_HOLD(pip); 2382 MDI_PI_UNLOCK(pip); 2383 if (sb) 2384 ct->ct_path_last = pip; 2385 *ret_pip = pip; 2386 MDI_CLIENT_UNLOCK(ct); 2387 return (MDI_SUCCESS); 2388 } 2389 /* 2390 * Path is busy. 2391 */ 2392 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2393 MDI_PI_IS_TRANSIENT(pip)) 2394 retry = 1; 2395 2396 /* 2397 * Keep looking for a next available online path 2398 */ 2399 do_again: 2400 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2401 if (next == NULL) { 2402 if ( flags & MDI_SELECT_NO_PREFERRED) { 2403 /* 2404 * Bail out since we hit the end of list 2405 */ 2406 MDI_PI_UNLOCK(pip); 2407 break; 2408 } 2409 2410 if (!sb) { 2411 if (preferred == 1) { 2412 /* 2413 * Looks like we reached the 2414 * end of the list. Lets enable 2415 * traversal of non preferred 2416 * paths. 2417 */ 2418 preferred = 0; 2419 next = head; 2420 } else { 2421 /* 2422 * We have done both the passes 2423 * Preferred as well as for 2424 * Non-preferred. Bail out now. 2425 */ 2426 cont = 0; 2427 } 2428 } else { 2429 /* 2430 * Standard behavior case. 2431 */ 2432 next = head; 2433 } 2434 } 2435 MDI_PI_UNLOCK(pip); 2436 if (cont == 0) { 2437 break; 2438 } 2439 pip = next; 2440 2441 if (!sb) { 2442 /* 2443 * We need to handle the selection of 2444 * non-preferred path in the following 2445 * case: 2446 * 2447 * +------+ +------+ +------+ +-----+ 2448 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2449 * +------+ +------+ +------+ +-----+ 2450 * 2451 * If we start the search with B, we need to 2452 * skip beyond B to pick C which is non - 2453 * preferred in the second pass. The following 2454 * test, if true, will allow us to skip over 2455 * the 'start'(B in the example) to select 2456 * other non preferred elements. 2457 */ 2458 if ((start_pip != NULL) && (start_pip == pip) && 2459 (MDI_PI(start_pip)->pi_preferred 2460 != preferred)) { 2461 /* 2462 * try again after going past the start 2463 * pip 2464 */ 2465 MDI_PI_LOCK(pip); 2466 goto do_again; 2467 } 2468 } else { 2469 /* 2470 * Standard behavior case 2471 */ 2472 if (start == pip && preferred) { 2473 /* look for nonpreferred paths */ 2474 preferred = 0; 2475 } else if (start == pip && !preferred) { 2476 /* 2477 * Exit condition 2478 */ 2479 cont = 0; 2480 } 2481 } 2482 } while (cont); 2483 break; 2484 } 2485 2486 MDI_CLIENT_UNLOCK(ct); 2487 if (retry == 1) { 2488 return (MDI_BUSY); 2489 } else { 2490 return (MDI_NOPATH); 2491 } 2492 } 2493 2494 /* 2495 * For a client, return the next available path to any phci 2496 * 2497 * Note: 2498 * Caller should hold the branch's devinfo node to get a consistent 2499 * snap shot of the mdi_pathinfo nodes. 2500 * 2501 * Please note that even the list is stable the mdi_pathinfo 2502 * node state and properties are volatile. The caller should lock 2503 * and unlock the nodes by calling mdi_pi_lock() and 2504 * mdi_pi_unlock() functions to get a stable properties. 2505 * 2506 * If there is a need to use the nodes beyond the hold of the 2507 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2508 * need to be held against unexpected removal by calling 2509 * mdi_hold_path() and should be released by calling 2510 * mdi_rele_path() on completion. 2511 */ 2512 mdi_pathinfo_t * 2513 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2514 { 2515 mdi_client_t *ct; 2516 2517 if (!MDI_CLIENT(ct_dip)) 2518 return (NULL); 2519 2520 /* 2521 * Walk through client link 2522 */ 2523 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2524 ASSERT(ct != NULL); 2525 2526 if (pip == NULL) 2527 return ((mdi_pathinfo_t *)ct->ct_path_head); 2528 2529 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2530 } 2531 2532 /* 2533 * For a phci, return the next available path to any client 2534 * Note: ditto mdi_get_next_phci_path() 2535 */ 2536 mdi_pathinfo_t * 2537 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2538 { 2539 mdi_phci_t *ph; 2540 2541 if (!MDI_PHCI(ph_dip)) 2542 return (NULL); 2543 2544 /* 2545 * Walk through pHCI link 2546 */ 2547 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2548 ASSERT(ph != NULL); 2549 2550 if (pip == NULL) 2551 return ((mdi_pathinfo_t *)ph->ph_path_head); 2552 2553 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2554 } 2555 2556 /* 2557 * mdi_hold_path(): 2558 * Hold the mdi_pathinfo node against unwanted unexpected free. 2559 * Return Values: 2560 * None 2561 */ 2562 void 2563 mdi_hold_path(mdi_pathinfo_t *pip) 2564 { 2565 if (pip) { 2566 MDI_PI_LOCK(pip); 2567 MDI_PI_HOLD(pip); 2568 MDI_PI_UNLOCK(pip); 2569 } 2570 } 2571 2572 2573 /* 2574 * mdi_rele_path(): 2575 * Release the mdi_pathinfo node which was selected 2576 * through mdi_select_path() mechanism or manually held by 2577 * calling mdi_hold_path(). 2578 * Return Values: 2579 * None 2580 */ 2581 void 2582 mdi_rele_path(mdi_pathinfo_t *pip) 2583 { 2584 if (pip) { 2585 MDI_PI_LOCK(pip); 2586 MDI_PI_RELE(pip); 2587 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2588 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2589 } 2590 MDI_PI_UNLOCK(pip); 2591 } 2592 } 2593 2594 /* 2595 * mdi_pi_lock(): 2596 * Lock the mdi_pathinfo node. 2597 * Note: 2598 * The caller should release the lock by calling mdi_pi_unlock() 2599 */ 2600 void 2601 mdi_pi_lock(mdi_pathinfo_t *pip) 2602 { 2603 ASSERT(pip != NULL); 2604 if (pip) { 2605 MDI_PI_LOCK(pip); 2606 } 2607 } 2608 2609 2610 /* 2611 * mdi_pi_unlock(): 2612 * Unlock the mdi_pathinfo node. 2613 * Note: 2614 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2615 */ 2616 void 2617 mdi_pi_unlock(mdi_pathinfo_t *pip) 2618 { 2619 ASSERT(pip != NULL); 2620 if (pip) { 2621 MDI_PI_UNLOCK(pip); 2622 } 2623 } 2624 2625 /* 2626 * mdi_pi_find(): 2627 * Search the list of mdi_pathinfo nodes attached to the 2628 * pHCI/Client device node whose path address matches "paddr". 2629 * Returns a pointer to the mdi_pathinfo node if a matching node is 2630 * found. 2631 * Return Values: 2632 * mdi_pathinfo node handle 2633 * NULL 2634 * Notes: 2635 * Caller need not hold any locks to call this function. 2636 */ 2637 mdi_pathinfo_t * 2638 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2639 { 2640 mdi_phci_t *ph; 2641 mdi_vhci_t *vh; 2642 mdi_client_t *ct; 2643 mdi_pathinfo_t *pip = NULL; 2644 2645 MDI_DEBUG(2, (MDI_NOTE, pdip, 2646 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2647 if ((pdip == NULL) || (paddr == NULL)) { 2648 return (NULL); 2649 } 2650 ph = i_devi_get_phci(pdip); 2651 if (ph == NULL) { 2652 /* 2653 * Invalid pHCI device, Nothing more to do. 2654 */ 2655 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2656 return (NULL); 2657 } 2658 2659 vh = ph->ph_vhci; 2660 if (vh == NULL) { 2661 /* 2662 * Invalid vHCI device, Nothing more to do. 2663 */ 2664 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2665 return (NULL); 2666 } 2667 2668 /* 2669 * Look for pathinfo node identified by paddr. 2670 */ 2671 if (caddr == NULL) { 2672 /* 2673 * Find a mdi_pathinfo node under pHCI list for a matching 2674 * unit address. 2675 */ 2676 MDI_PHCI_LOCK(ph); 2677 if (MDI_PHCI_IS_OFFLINE(ph)) { 2678 MDI_DEBUG(2, (MDI_WARN, pdip, 2679 "offline phci %p", (void *)ph)); 2680 MDI_PHCI_UNLOCK(ph); 2681 return (NULL); 2682 } 2683 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2684 2685 while (pip != NULL) { 2686 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2687 break; 2688 } 2689 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2690 } 2691 MDI_PHCI_UNLOCK(ph); 2692 MDI_DEBUG(2, (MDI_NOTE, pdip, 2693 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2694 return (pip); 2695 } 2696 2697 /* 2698 * XXX - Is the rest of the code in this function really necessary? 2699 * The consumers of mdi_pi_find() can search for the desired pathinfo 2700 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2701 * whether the search is based on the pathinfo nodes attached to 2702 * the pHCI or the client node, the result will be the same. 2703 */ 2704 2705 /* 2706 * Find the client device corresponding to 'caddr' 2707 */ 2708 MDI_VHCI_CLIENT_LOCK(vh); 2709 2710 /* 2711 * XXX - Passing NULL to the following function works as long as the 2712 * the client addresses (caddr) are unique per vhci basis. 2713 */ 2714 ct = i_mdi_client_find(vh, NULL, caddr); 2715 if (ct == NULL) { 2716 /* 2717 * Client not found, Obviously mdi_pathinfo node has not been 2718 * created yet. 2719 */ 2720 MDI_VHCI_CLIENT_UNLOCK(vh); 2721 MDI_DEBUG(2, (MDI_NOTE, pdip, 2722 "client not found for caddr @%s", caddr ? caddr : "")); 2723 return (NULL); 2724 } 2725 2726 /* 2727 * Hold the client lock and look for a mdi_pathinfo node with matching 2728 * pHCI and paddr 2729 */ 2730 MDI_CLIENT_LOCK(ct); 2731 2732 /* 2733 * Release the global mutex as it is no more needed. Note: We always 2734 * respect the locking order while acquiring. 2735 */ 2736 MDI_VHCI_CLIENT_UNLOCK(vh); 2737 2738 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2739 while (pip != NULL) { 2740 /* 2741 * Compare the unit address 2742 */ 2743 if ((MDI_PI(pip)->pi_phci == ph) && 2744 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2745 break; 2746 } 2747 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2748 } 2749 MDI_CLIENT_UNLOCK(ct); 2750 MDI_DEBUG(2, (MDI_NOTE, pdip, 2751 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2752 return (pip); 2753 } 2754 2755 /* 2756 * mdi_pi_alloc(): 2757 * Allocate and initialize a new instance of a mdi_pathinfo node. 2758 * The mdi_pathinfo node returned by this function identifies a 2759 * unique device path is capable of having properties attached 2760 * and passed to mdi_pi_online() to fully attach and online the 2761 * path and client device node. 2762 * The mdi_pathinfo node returned by this function must be 2763 * destroyed using mdi_pi_free() if the path is no longer 2764 * operational or if the caller fails to attach a client device 2765 * node when calling mdi_pi_online(). The framework will not free 2766 * the resources allocated. 2767 * This function can be called from both interrupt and kernel 2768 * contexts. DDI_NOSLEEP flag should be used while calling 2769 * from interrupt contexts. 2770 * Return Values: 2771 * MDI_SUCCESS 2772 * MDI_FAILURE 2773 * MDI_NOMEM 2774 */ 2775 /*ARGSUSED*/ 2776 int 2777 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2778 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2779 { 2780 mdi_vhci_t *vh; 2781 mdi_phci_t *ph; 2782 mdi_client_t *ct; 2783 mdi_pathinfo_t *pip = NULL; 2784 dev_info_t *cdip; 2785 int rv = MDI_NOMEM; 2786 int path_allocated = 0; 2787 2788 MDI_DEBUG(2, (MDI_NOTE, pdip, 2789 "cname %s: caddr@%s paddr@%s", 2790 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2791 2792 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2793 ret_pip == NULL) { 2794 /* Nothing more to do */ 2795 return (MDI_FAILURE); 2796 } 2797 2798 *ret_pip = NULL; 2799 2800 /* No allocations on detaching pHCI */ 2801 if (DEVI_IS_DETACHING(pdip)) { 2802 /* Invalid pHCI device, return failure */ 2803 MDI_DEBUG(1, (MDI_WARN, pdip, 2804 "!detaching pHCI=%p", (void *)pdip)); 2805 return (MDI_FAILURE); 2806 } 2807 2808 ph = i_devi_get_phci(pdip); 2809 ASSERT(ph != NULL); 2810 if (ph == NULL) { 2811 /* Invalid pHCI device, return failure */ 2812 MDI_DEBUG(1, (MDI_WARN, pdip, 2813 "!invalid pHCI=%p", (void *)pdip)); 2814 return (MDI_FAILURE); 2815 } 2816 2817 MDI_PHCI_LOCK(ph); 2818 vh = ph->ph_vhci; 2819 if (vh == NULL) { 2820 /* Invalid vHCI device, return failure */ 2821 MDI_DEBUG(1, (MDI_WARN, pdip, 2822 "!invalid vHCI=%p", (void *)pdip)); 2823 MDI_PHCI_UNLOCK(ph); 2824 return (MDI_FAILURE); 2825 } 2826 2827 if (MDI_PHCI_IS_READY(ph) == 0) { 2828 /* 2829 * Do not allow new node creation when pHCI is in 2830 * offline/suspended states 2831 */ 2832 MDI_DEBUG(1, (MDI_WARN, pdip, 2833 "pHCI=%p is not ready", (void *)ph)); 2834 MDI_PHCI_UNLOCK(ph); 2835 return (MDI_BUSY); 2836 } 2837 MDI_PHCI_UNSTABLE(ph); 2838 MDI_PHCI_UNLOCK(ph); 2839 2840 /* look for a matching client, create one if not found */ 2841 MDI_VHCI_CLIENT_LOCK(vh); 2842 ct = i_mdi_client_find(vh, cname, caddr); 2843 if (ct == NULL) { 2844 ct = i_mdi_client_alloc(vh, cname, caddr); 2845 ASSERT(ct != NULL); 2846 } 2847 2848 if (ct->ct_dip == NULL) { 2849 /* 2850 * Allocate a devinfo node 2851 */ 2852 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2853 compatible, ncompatible); 2854 if (ct->ct_dip == NULL) { 2855 (void) i_mdi_client_free(vh, ct); 2856 goto fail; 2857 } 2858 } 2859 cdip = ct->ct_dip; 2860 2861 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2862 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2863 2864 MDI_CLIENT_LOCK(ct); 2865 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2866 while (pip != NULL) { 2867 /* 2868 * Compare the unit address 2869 */ 2870 if ((MDI_PI(pip)->pi_phci == ph) && 2871 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2872 break; 2873 } 2874 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2875 } 2876 MDI_CLIENT_UNLOCK(ct); 2877 2878 if (pip == NULL) { 2879 /* 2880 * This is a new path for this client device. Allocate and 2881 * initialize a new pathinfo node 2882 */ 2883 pip = i_mdi_pi_alloc(ph, paddr, ct); 2884 ASSERT(pip != NULL); 2885 path_allocated = 1; 2886 } 2887 rv = MDI_SUCCESS; 2888 2889 fail: 2890 /* 2891 * Release the global mutex. 2892 */ 2893 MDI_VHCI_CLIENT_UNLOCK(vh); 2894 2895 /* 2896 * Mark the pHCI as stable 2897 */ 2898 MDI_PHCI_LOCK(ph); 2899 MDI_PHCI_STABLE(ph); 2900 MDI_PHCI_UNLOCK(ph); 2901 *ret_pip = pip; 2902 2903 MDI_DEBUG(2, (MDI_NOTE, pdip, 2904 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2905 2906 if (path_allocated) 2907 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2908 2909 return (rv); 2910 } 2911 2912 /*ARGSUSED*/ 2913 int 2914 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2915 int flags, mdi_pathinfo_t **ret_pip) 2916 { 2917 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2918 flags, ret_pip)); 2919 } 2920 2921 /* 2922 * i_mdi_pi_alloc(): 2923 * Allocate a mdi_pathinfo node and add to the pHCI path list 2924 * Return Values: 2925 * mdi_pathinfo 2926 */ 2927 /*ARGSUSED*/ 2928 static mdi_pathinfo_t * 2929 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2930 { 2931 mdi_pathinfo_t *pip; 2932 int ct_circular; 2933 int ph_circular; 2934 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2935 char *path_persistent; 2936 int path_instance; 2937 mod_hash_val_t hv; 2938 2939 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2940 2941 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2942 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2943 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2944 MDI_PATHINFO_STATE_TRANSIENT; 2945 2946 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2947 MDI_PI_SET_USER_DISABLE(pip); 2948 2949 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2950 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2951 2952 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2953 MDI_PI_SET_DRV_DISABLE(pip); 2954 2955 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2956 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2957 MDI_PI(pip)->pi_client = ct; 2958 MDI_PI(pip)->pi_phci = ph; 2959 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2960 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2961 2962 /* 2963 * We form the "path" to the pathinfo node, and see if we have 2964 * already allocated a 'path_instance' for that "path". If so, 2965 * we use the already allocated 'path_instance'. If not, we 2966 * allocate a new 'path_instance' and associate it with a copy of 2967 * the "path" string (which is never freed). The association 2968 * between a 'path_instance' this "path" string persists until 2969 * reboot. 2970 */ 2971 mutex_enter(&mdi_pathmap_mutex); 2972 (void) ddi_pathname(ph->ph_dip, path); 2973 (void) sprintf(path + strlen(path), "/%s@%s", 2974 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2975 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2976 path_instance = (uint_t)(intptr_t)hv; 2977 } else { 2978 /* allocate a new 'path_instance' and persistent "path" */ 2979 path_instance = mdi_pathmap_instance++; 2980 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2981 (void) mod_hash_insert(mdi_pathmap_bypath, 2982 (mod_hash_key_t)path_persistent, 2983 (mod_hash_val_t)(intptr_t)path_instance); 2984 (void) mod_hash_insert(mdi_pathmap_byinstance, 2985 (mod_hash_key_t)(intptr_t)path_instance, 2986 (mod_hash_val_t)path_persistent); 2987 2988 /* create shortpath name */ 2989 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2990 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2991 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2992 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2993 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2994 (mod_hash_key_t)(intptr_t)path_instance, 2995 (mod_hash_val_t)path_persistent); 2996 } 2997 mutex_exit(&mdi_pathmap_mutex); 2998 MDI_PI(pip)->pi_path_instance = path_instance; 2999 3000 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3001 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3002 MDI_PI(pip)->pi_pprivate = NULL; 3003 MDI_PI(pip)->pi_cprivate = NULL; 3004 MDI_PI(pip)->pi_vprivate = NULL; 3005 MDI_PI(pip)->pi_client_link = NULL; 3006 MDI_PI(pip)->pi_phci_link = NULL; 3007 MDI_PI(pip)->pi_ref_cnt = 0; 3008 MDI_PI(pip)->pi_kstats = NULL; 3009 MDI_PI(pip)->pi_preferred = 1; 3010 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3011 3012 /* 3013 * Lock both dev_info nodes against changes in parallel. 3014 * 3015 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3016 * This atypical operation is done to synchronize pathinfo nodes 3017 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3018 * the pathinfo nodes are children of the Client. 3019 */ 3020 ndi_devi_enter(ct->ct_dip, &ct_circular); 3021 ndi_devi_enter(ph->ph_dip, &ph_circular); 3022 3023 i_mdi_phci_add_path(ph, pip); 3024 i_mdi_client_add_path(ct, pip); 3025 3026 ndi_devi_exit(ph->ph_dip, ph_circular); 3027 ndi_devi_exit(ct->ct_dip, ct_circular); 3028 3029 return (pip); 3030 } 3031 3032 /* 3033 * mdi_pi_pathname_by_instance(): 3034 * Lookup of "path" by 'path_instance'. Return "path". 3035 * NOTE: returned "path" remains valid forever (until reboot). 3036 */ 3037 char * 3038 mdi_pi_pathname_by_instance(int path_instance) 3039 { 3040 char *path; 3041 mod_hash_val_t hv; 3042 3043 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3044 mutex_enter(&mdi_pathmap_mutex); 3045 if (mod_hash_find(mdi_pathmap_byinstance, 3046 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3047 path = (char *)hv; 3048 else 3049 path = NULL; 3050 mutex_exit(&mdi_pathmap_mutex); 3051 return (path); 3052 } 3053 3054 /* 3055 * mdi_pi_spathname_by_instance(): 3056 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3057 * NOTE: returned "shortpath" remains valid forever (until reboot). 3058 */ 3059 char * 3060 mdi_pi_spathname_by_instance(int path_instance) 3061 { 3062 char *path; 3063 mod_hash_val_t hv; 3064 3065 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3066 mutex_enter(&mdi_pathmap_mutex); 3067 if (mod_hash_find(mdi_pathmap_sbyinstance, 3068 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3069 path = (char *)hv; 3070 else 3071 path = NULL; 3072 mutex_exit(&mdi_pathmap_mutex); 3073 return (path); 3074 } 3075 3076 3077 /* 3078 * i_mdi_phci_add_path(): 3079 * Add a mdi_pathinfo node to pHCI list. 3080 * Notes: 3081 * Caller should per-pHCI mutex 3082 */ 3083 static void 3084 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3085 { 3086 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3087 3088 MDI_PHCI_LOCK(ph); 3089 if (ph->ph_path_head == NULL) { 3090 ph->ph_path_head = pip; 3091 } else { 3092 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3093 } 3094 ph->ph_path_tail = pip; 3095 ph->ph_path_count++; 3096 MDI_PHCI_UNLOCK(ph); 3097 } 3098 3099 /* 3100 * i_mdi_client_add_path(): 3101 * Add mdi_pathinfo node to client list 3102 */ 3103 static void 3104 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3105 { 3106 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3107 3108 MDI_CLIENT_LOCK(ct); 3109 if (ct->ct_path_head == NULL) { 3110 ct->ct_path_head = pip; 3111 } else { 3112 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3113 } 3114 ct->ct_path_tail = pip; 3115 ct->ct_path_count++; 3116 MDI_CLIENT_UNLOCK(ct); 3117 } 3118 3119 /* 3120 * mdi_pi_free(): 3121 * Free the mdi_pathinfo node and also client device node if this 3122 * is the last path to the device 3123 * Return Values: 3124 * MDI_SUCCESS 3125 * MDI_FAILURE 3126 * MDI_BUSY 3127 */ 3128 /*ARGSUSED*/ 3129 int 3130 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3131 { 3132 int rv = MDI_FAILURE; 3133 mdi_vhci_t *vh; 3134 mdi_phci_t *ph; 3135 mdi_client_t *ct; 3136 int (*f)(); 3137 int client_held = 0; 3138 3139 MDI_PI_LOCK(pip); 3140 ph = MDI_PI(pip)->pi_phci; 3141 ASSERT(ph != NULL); 3142 if (ph == NULL) { 3143 /* 3144 * Invalid pHCI device, return failure 3145 */ 3146 MDI_DEBUG(1, (MDI_WARN, NULL, 3147 "!invalid pHCI: pip %s %p", 3148 mdi_pi_spathname(pip), (void *)pip)); 3149 MDI_PI_UNLOCK(pip); 3150 return (MDI_FAILURE); 3151 } 3152 3153 vh = ph->ph_vhci; 3154 ASSERT(vh != NULL); 3155 if (vh == NULL) { 3156 /* Invalid pHCI device, return failure */ 3157 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3158 "!invalid vHCI: pip %s %p", 3159 mdi_pi_spathname(pip), (void *)pip)); 3160 MDI_PI_UNLOCK(pip); 3161 return (MDI_FAILURE); 3162 } 3163 3164 ct = MDI_PI(pip)->pi_client; 3165 ASSERT(ct != NULL); 3166 if (ct == NULL) { 3167 /* 3168 * Invalid Client device, return failure 3169 */ 3170 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3171 "!invalid client: pip %s %p", 3172 mdi_pi_spathname(pip), (void *)pip)); 3173 MDI_PI_UNLOCK(pip); 3174 return (MDI_FAILURE); 3175 } 3176 3177 /* 3178 * Check to see for busy condition. A mdi_pathinfo can only be freed 3179 * if the node state is either offline or init and the reference count 3180 * is zero. 3181 */ 3182 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3183 MDI_PI_IS_INITING(pip))) { 3184 /* 3185 * Node is busy 3186 */ 3187 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3188 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3189 MDI_PI_UNLOCK(pip); 3190 return (MDI_BUSY); 3191 } 3192 3193 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3194 /* 3195 * Give a chance for pending I/Os to complete. 3196 */ 3197 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3198 "!%d cmds still pending on path: %s %p", 3199 MDI_PI(pip)->pi_ref_cnt, 3200 mdi_pi_spathname(pip), (void *)pip)); 3201 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3202 &MDI_PI(pip)->pi_mutex, 3203 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3204 /* 3205 * The timeout time reached without ref_cnt being zero 3206 * being signaled. 3207 */ 3208 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3209 "!Timeout reached on path %s %p without the cond", 3210 mdi_pi_spathname(pip), (void *)pip)); 3211 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3212 "!%d cmds still pending on path %s %p", 3213 MDI_PI(pip)->pi_ref_cnt, 3214 mdi_pi_spathname(pip), (void *)pip)); 3215 MDI_PI_UNLOCK(pip); 3216 return (MDI_BUSY); 3217 } 3218 } 3219 if (MDI_PI(pip)->pi_pm_held) { 3220 client_held = 1; 3221 } 3222 MDI_PI_UNLOCK(pip); 3223 3224 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3225 3226 MDI_CLIENT_LOCK(ct); 3227 3228 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3229 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3230 3231 /* 3232 * Wait till failover is complete before removing this node. 3233 */ 3234 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3235 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3236 3237 MDI_CLIENT_UNLOCK(ct); 3238 MDI_VHCI_CLIENT_LOCK(vh); 3239 MDI_CLIENT_LOCK(ct); 3240 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3241 3242 if (!MDI_PI_IS_INITING(pip)) { 3243 f = vh->vh_ops->vo_pi_uninit; 3244 if (f != NULL) { 3245 rv = (*f)(vh->vh_dip, pip, 0); 3246 } 3247 } 3248 /* 3249 * If vo_pi_uninit() completed successfully. 3250 */ 3251 if (rv == MDI_SUCCESS) { 3252 if (client_held) { 3253 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3254 "i_mdi_pm_rele_client\n")); 3255 i_mdi_pm_rele_client(ct, 1); 3256 } 3257 i_mdi_pi_free(ph, pip, ct); 3258 if (ct->ct_path_count == 0) { 3259 /* 3260 * Client lost its last path. 3261 * Clean up the client device 3262 */ 3263 MDI_CLIENT_UNLOCK(ct); 3264 (void) i_mdi_client_free(ct->ct_vhci, ct); 3265 MDI_VHCI_CLIENT_UNLOCK(vh); 3266 return (rv); 3267 } 3268 } 3269 MDI_CLIENT_UNLOCK(ct); 3270 MDI_VHCI_CLIENT_UNLOCK(vh); 3271 3272 if (rv == MDI_FAILURE) 3273 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3274 3275 return (rv); 3276 } 3277 3278 /* 3279 * i_mdi_pi_free(): 3280 * Free the mdi_pathinfo node 3281 */ 3282 static void 3283 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3284 { 3285 int ct_circular; 3286 int ph_circular; 3287 3288 ASSERT(MDI_CLIENT_LOCKED(ct)); 3289 3290 /* 3291 * remove any per-path kstats 3292 */ 3293 i_mdi_pi_kstat_destroy(pip); 3294 3295 /* See comments in i_mdi_pi_alloc() */ 3296 ndi_devi_enter(ct->ct_dip, &ct_circular); 3297 ndi_devi_enter(ph->ph_dip, &ph_circular); 3298 3299 i_mdi_client_remove_path(ct, pip); 3300 i_mdi_phci_remove_path(ph, pip); 3301 3302 ndi_devi_exit(ph->ph_dip, ph_circular); 3303 ndi_devi_exit(ct->ct_dip, ct_circular); 3304 3305 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3306 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3307 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3308 if (MDI_PI(pip)->pi_addr) { 3309 kmem_free(MDI_PI(pip)->pi_addr, 3310 strlen(MDI_PI(pip)->pi_addr) + 1); 3311 MDI_PI(pip)->pi_addr = NULL; 3312 } 3313 3314 if (MDI_PI(pip)->pi_prop) { 3315 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3316 MDI_PI(pip)->pi_prop = NULL; 3317 } 3318 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3319 } 3320 3321 3322 /* 3323 * i_mdi_phci_remove_path(): 3324 * Remove a mdi_pathinfo node from pHCI list. 3325 * Notes: 3326 * Caller should hold per-pHCI mutex 3327 */ 3328 static void 3329 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3330 { 3331 mdi_pathinfo_t *prev = NULL; 3332 mdi_pathinfo_t *path = NULL; 3333 3334 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3335 3336 MDI_PHCI_LOCK(ph); 3337 path = ph->ph_path_head; 3338 while (path != NULL) { 3339 if (path == pip) { 3340 break; 3341 } 3342 prev = path; 3343 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3344 } 3345 3346 if (path) { 3347 ph->ph_path_count--; 3348 if (prev) { 3349 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3350 } else { 3351 ph->ph_path_head = 3352 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3353 } 3354 if (ph->ph_path_tail == path) { 3355 ph->ph_path_tail = prev; 3356 } 3357 } 3358 3359 /* 3360 * Clear the pHCI link 3361 */ 3362 MDI_PI(pip)->pi_phci_link = NULL; 3363 MDI_PI(pip)->pi_phci = NULL; 3364 MDI_PHCI_UNLOCK(ph); 3365 } 3366 3367 /* 3368 * i_mdi_client_remove_path(): 3369 * Remove a mdi_pathinfo node from client path list. 3370 */ 3371 static void 3372 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3373 { 3374 mdi_pathinfo_t *prev = NULL; 3375 mdi_pathinfo_t *path; 3376 3377 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3378 3379 ASSERT(MDI_CLIENT_LOCKED(ct)); 3380 path = ct->ct_path_head; 3381 while (path != NULL) { 3382 if (path == pip) { 3383 break; 3384 } 3385 prev = path; 3386 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3387 } 3388 3389 if (path) { 3390 ct->ct_path_count--; 3391 if (prev) { 3392 MDI_PI(prev)->pi_client_link = 3393 MDI_PI(path)->pi_client_link; 3394 } else { 3395 ct->ct_path_head = 3396 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3397 } 3398 if (ct->ct_path_tail == path) { 3399 ct->ct_path_tail = prev; 3400 } 3401 if (ct->ct_path_last == path) { 3402 ct->ct_path_last = ct->ct_path_head; 3403 } 3404 } 3405 MDI_PI(pip)->pi_client_link = NULL; 3406 MDI_PI(pip)->pi_client = NULL; 3407 } 3408 3409 /* 3410 * i_mdi_pi_state_change(): 3411 * online a mdi_pathinfo node 3412 * 3413 * Return Values: 3414 * MDI_SUCCESS 3415 * MDI_FAILURE 3416 */ 3417 /*ARGSUSED*/ 3418 static int 3419 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3420 { 3421 int rv = MDI_SUCCESS; 3422 mdi_vhci_t *vh; 3423 mdi_phci_t *ph; 3424 mdi_client_t *ct; 3425 int (*f)(); 3426 dev_info_t *cdip; 3427 3428 MDI_PI_LOCK(pip); 3429 3430 ph = MDI_PI(pip)->pi_phci; 3431 ASSERT(ph); 3432 if (ph == NULL) { 3433 /* 3434 * Invalid pHCI device, fail the request 3435 */ 3436 MDI_PI_UNLOCK(pip); 3437 MDI_DEBUG(1, (MDI_WARN, NULL, 3438 "!invalid phci: pip %s %p", 3439 mdi_pi_spathname(pip), (void *)pip)); 3440 return (MDI_FAILURE); 3441 } 3442 3443 vh = ph->ph_vhci; 3444 ASSERT(vh); 3445 if (vh == NULL) { 3446 /* 3447 * Invalid vHCI device, fail the request 3448 */ 3449 MDI_PI_UNLOCK(pip); 3450 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3451 "!invalid vhci: pip %s %p", 3452 mdi_pi_spathname(pip), (void *)pip)); 3453 return (MDI_FAILURE); 3454 } 3455 3456 ct = MDI_PI(pip)->pi_client; 3457 ASSERT(ct != NULL); 3458 if (ct == NULL) { 3459 /* 3460 * Invalid client device, fail the request 3461 */ 3462 MDI_PI_UNLOCK(pip); 3463 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3464 "!invalid client: pip %s %p", 3465 mdi_pi_spathname(pip), (void *)pip)); 3466 return (MDI_FAILURE); 3467 } 3468 3469 /* 3470 * If this path has not been initialized yet, Callback vHCI driver's 3471 * pathinfo node initialize entry point 3472 */ 3473 3474 if (MDI_PI_IS_INITING(pip)) { 3475 MDI_PI_UNLOCK(pip); 3476 f = vh->vh_ops->vo_pi_init; 3477 if (f != NULL) { 3478 rv = (*f)(vh->vh_dip, pip, 0); 3479 if (rv != MDI_SUCCESS) { 3480 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3481 "!vo_pi_init failed: vHCI %p, pip %s %p", 3482 (void *)vh, mdi_pi_spathname(pip), 3483 (void *)pip)); 3484 return (MDI_FAILURE); 3485 } 3486 } 3487 MDI_PI_LOCK(pip); 3488 MDI_PI_CLEAR_TRANSIENT(pip); 3489 } 3490 3491 /* 3492 * Do not allow state transition when pHCI is in offline/suspended 3493 * states 3494 */ 3495 i_mdi_phci_lock(ph, pip); 3496 if (MDI_PHCI_IS_READY(ph) == 0) { 3497 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3498 "!pHCI not ready, pHCI=%p", (void *)ph)); 3499 MDI_PI_UNLOCK(pip); 3500 i_mdi_phci_unlock(ph); 3501 return (MDI_BUSY); 3502 } 3503 MDI_PHCI_UNSTABLE(ph); 3504 i_mdi_phci_unlock(ph); 3505 3506 /* 3507 * Check if mdi_pathinfo state is in transient state. 3508 * If yes, offlining is in progress and wait till transient state is 3509 * cleared. 3510 */ 3511 if (MDI_PI_IS_TRANSIENT(pip)) { 3512 while (MDI_PI_IS_TRANSIENT(pip)) { 3513 cv_wait(&MDI_PI(pip)->pi_state_cv, 3514 &MDI_PI(pip)->pi_mutex); 3515 } 3516 } 3517 3518 /* 3519 * Grab the client lock in reverse order sequence and release the 3520 * mdi_pathinfo mutex. 3521 */ 3522 i_mdi_client_lock(ct, pip); 3523 MDI_PI_UNLOCK(pip); 3524 3525 /* 3526 * Wait till failover state is cleared 3527 */ 3528 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3529 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3530 3531 /* 3532 * Mark the mdi_pathinfo node state as transient 3533 */ 3534 MDI_PI_LOCK(pip); 3535 switch (state) { 3536 case MDI_PATHINFO_STATE_ONLINE: 3537 MDI_PI_SET_ONLINING(pip); 3538 break; 3539 3540 case MDI_PATHINFO_STATE_STANDBY: 3541 MDI_PI_SET_STANDBYING(pip); 3542 break; 3543 3544 case MDI_PATHINFO_STATE_FAULT: 3545 /* 3546 * Mark the pathinfo state as FAULTED 3547 */ 3548 MDI_PI_SET_FAULTING(pip); 3549 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3550 break; 3551 3552 case MDI_PATHINFO_STATE_OFFLINE: 3553 /* 3554 * ndi_devi_offline() cannot hold pip or ct locks. 3555 */ 3556 MDI_PI_UNLOCK(pip); 3557 3558 /* 3559 * If this is a user initiated path online->offline operation 3560 * who's success would transition a client from DEGRADED to 3561 * FAILED then only proceed if we can offline the client first. 3562 */ 3563 cdip = ct->ct_dip; 3564 if ((flag & NDI_USER_REQ) && 3565 MDI_PI_IS_ONLINE(pip) && 3566 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3567 i_mdi_client_unlock(ct); 3568 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3569 if (rv != NDI_SUCCESS) { 3570 /* 3571 * Convert to MDI error code 3572 */ 3573 switch (rv) { 3574 case NDI_BUSY: 3575 rv = MDI_BUSY; 3576 break; 3577 default: 3578 rv = MDI_FAILURE; 3579 break; 3580 } 3581 goto state_change_exit; 3582 } else { 3583 i_mdi_client_lock(ct, NULL); 3584 } 3585 } 3586 /* 3587 * Mark the mdi_pathinfo node state as transient 3588 */ 3589 MDI_PI_LOCK(pip); 3590 MDI_PI_SET_OFFLINING(pip); 3591 break; 3592 } 3593 MDI_PI_UNLOCK(pip); 3594 MDI_CLIENT_UNSTABLE(ct); 3595 i_mdi_client_unlock(ct); 3596 3597 f = vh->vh_ops->vo_pi_state_change; 3598 if (f != NULL) 3599 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3600 3601 MDI_CLIENT_LOCK(ct); 3602 MDI_PI_LOCK(pip); 3603 if (rv == MDI_NOT_SUPPORTED) { 3604 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3605 } 3606 if (rv != MDI_SUCCESS) { 3607 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3608 "vo_pi_state_change failed: rv %x", rv)); 3609 } 3610 if (MDI_PI_IS_TRANSIENT(pip)) { 3611 if (rv == MDI_SUCCESS) { 3612 MDI_PI_CLEAR_TRANSIENT(pip); 3613 } else { 3614 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3615 } 3616 } 3617 3618 /* 3619 * Wake anyone waiting for this mdi_pathinfo node 3620 */ 3621 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3622 MDI_PI_UNLOCK(pip); 3623 3624 /* 3625 * Mark the client device as stable 3626 */ 3627 MDI_CLIENT_STABLE(ct); 3628 if (rv == MDI_SUCCESS) { 3629 if (ct->ct_unstable == 0) { 3630 cdip = ct->ct_dip; 3631 3632 /* 3633 * Onlining the mdi_pathinfo node will impact the 3634 * client state Update the client and dev_info node 3635 * state accordingly 3636 */ 3637 rv = NDI_SUCCESS; 3638 i_mdi_client_update_state(ct); 3639 switch (MDI_CLIENT_STATE(ct)) { 3640 case MDI_CLIENT_STATE_OPTIMAL: 3641 case MDI_CLIENT_STATE_DEGRADED: 3642 if (cdip && !i_ddi_devi_attached(cdip) && 3643 ((state == MDI_PATHINFO_STATE_ONLINE) || 3644 (state == MDI_PATHINFO_STATE_STANDBY))) { 3645 3646 /* 3647 * Must do ndi_devi_online() through 3648 * hotplug thread for deferred 3649 * attach mechanism to work 3650 */ 3651 MDI_CLIENT_UNLOCK(ct); 3652 rv = ndi_devi_online(cdip, 0); 3653 MDI_CLIENT_LOCK(ct); 3654 if ((rv != NDI_SUCCESS) && 3655 (MDI_CLIENT_STATE(ct) == 3656 MDI_CLIENT_STATE_DEGRADED)) { 3657 /* 3658 * ndi_devi_online failed. 3659 * Reset client flags to 3660 * offline. 3661 */ 3662 MDI_DEBUG(1, (MDI_WARN, cdip, 3663 "!ndi_devi_online failed " 3664 "error %x", rv)); 3665 MDI_CLIENT_SET_OFFLINE(ct); 3666 } 3667 if (rv != NDI_SUCCESS) { 3668 /* Reset the path state */ 3669 MDI_PI_LOCK(pip); 3670 MDI_PI(pip)->pi_state = 3671 MDI_PI_OLD_STATE(pip); 3672 MDI_PI_UNLOCK(pip); 3673 } 3674 } 3675 break; 3676 3677 case MDI_CLIENT_STATE_FAILED: 3678 /* 3679 * This is the last path case for 3680 * non-user initiated events. 3681 */ 3682 if (((flag & NDI_USER_REQ) == 0) && 3683 cdip && (i_ddi_node_state(cdip) >= 3684 DS_INITIALIZED)) { 3685 MDI_CLIENT_UNLOCK(ct); 3686 rv = ndi_devi_offline(cdip, 3687 NDI_DEVFS_CLEAN); 3688 MDI_CLIENT_LOCK(ct); 3689 3690 if (rv != NDI_SUCCESS) { 3691 /* 3692 * ndi_devi_offline failed. 3693 * Reset client flags to 3694 * online as the path could not 3695 * be offlined. 3696 */ 3697 MDI_DEBUG(1, (MDI_WARN, cdip, 3698 "!ndi_devi_offline failed: " 3699 "error %x", rv)); 3700 MDI_CLIENT_SET_ONLINE(ct); 3701 } 3702 } 3703 break; 3704 } 3705 /* 3706 * Convert to MDI error code 3707 */ 3708 switch (rv) { 3709 case NDI_SUCCESS: 3710 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3711 i_mdi_report_path_state(ct, pip); 3712 rv = MDI_SUCCESS; 3713 break; 3714 case NDI_BUSY: 3715 rv = MDI_BUSY; 3716 break; 3717 default: 3718 rv = MDI_FAILURE; 3719 break; 3720 } 3721 } 3722 } 3723 MDI_CLIENT_UNLOCK(ct); 3724 3725 state_change_exit: 3726 /* 3727 * Mark the pHCI as stable again. 3728 */ 3729 MDI_PHCI_LOCK(ph); 3730 MDI_PHCI_STABLE(ph); 3731 MDI_PHCI_UNLOCK(ph); 3732 return (rv); 3733 } 3734 3735 /* 3736 * mdi_pi_online(): 3737 * Place the path_info node in the online state. The path is 3738 * now available to be selected by mdi_select_path() for 3739 * transporting I/O requests to client devices. 3740 * Return Values: 3741 * MDI_SUCCESS 3742 * MDI_FAILURE 3743 */ 3744 int 3745 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3746 { 3747 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3748 int client_held = 0; 3749 int rv; 3750 3751 ASSERT(ct != NULL); 3752 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3753 if (rv != MDI_SUCCESS) 3754 return (rv); 3755 3756 MDI_PI_LOCK(pip); 3757 if (MDI_PI(pip)->pi_pm_held == 0) { 3758 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3759 "i_mdi_pm_hold_pip %p", (void *)pip)); 3760 i_mdi_pm_hold_pip(pip); 3761 client_held = 1; 3762 } 3763 MDI_PI_UNLOCK(pip); 3764 3765 if (client_held) { 3766 MDI_CLIENT_LOCK(ct); 3767 if (ct->ct_power_cnt == 0) { 3768 rv = i_mdi_power_all_phci(ct); 3769 } 3770 3771 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3772 "i_mdi_pm_hold_client %p", (void *)ct)); 3773 i_mdi_pm_hold_client(ct, 1); 3774 MDI_CLIENT_UNLOCK(ct); 3775 } 3776 3777 return (rv); 3778 } 3779 3780 /* 3781 * mdi_pi_standby(): 3782 * Place the mdi_pathinfo node in standby state 3783 * 3784 * Return Values: 3785 * MDI_SUCCESS 3786 * MDI_FAILURE 3787 */ 3788 int 3789 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3790 { 3791 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3792 } 3793 3794 /* 3795 * mdi_pi_fault(): 3796 * Place the mdi_pathinfo node in fault'ed state 3797 * Return Values: 3798 * MDI_SUCCESS 3799 * MDI_FAILURE 3800 */ 3801 int 3802 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3803 { 3804 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3805 } 3806 3807 /* 3808 * mdi_pi_offline(): 3809 * Offline a mdi_pathinfo node. 3810 * Return Values: 3811 * MDI_SUCCESS 3812 * MDI_FAILURE 3813 */ 3814 int 3815 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3816 { 3817 int ret, client_held = 0; 3818 mdi_client_t *ct; 3819 3820 /* 3821 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3822 * used it to mean "user initiated operation" (i.e. devctl). Callers 3823 * should now just use NDI_USER_REQ. 3824 */ 3825 if (flags & NDI_DEVI_REMOVE) { 3826 flags &= ~NDI_DEVI_REMOVE; 3827 flags |= NDI_USER_REQ; 3828 } 3829 3830 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3831 3832 if (ret == MDI_SUCCESS) { 3833 MDI_PI_LOCK(pip); 3834 if (MDI_PI(pip)->pi_pm_held) { 3835 client_held = 1; 3836 } 3837 MDI_PI_UNLOCK(pip); 3838 3839 if (client_held) { 3840 ct = MDI_PI(pip)->pi_client; 3841 MDI_CLIENT_LOCK(ct); 3842 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3843 "i_mdi_pm_rele_client\n")); 3844 i_mdi_pm_rele_client(ct, 1); 3845 MDI_CLIENT_UNLOCK(ct); 3846 } 3847 } 3848 3849 return (ret); 3850 } 3851 3852 /* 3853 * i_mdi_pi_offline(): 3854 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3855 */ 3856 static int 3857 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3858 { 3859 dev_info_t *vdip = NULL; 3860 mdi_vhci_t *vh = NULL; 3861 mdi_client_t *ct = NULL; 3862 int (*f)(); 3863 int rv; 3864 3865 MDI_PI_LOCK(pip); 3866 ct = MDI_PI(pip)->pi_client; 3867 ASSERT(ct != NULL); 3868 3869 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3870 /* 3871 * Give a chance for pending I/Os to complete. 3872 */ 3873 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3874 "!%d cmds still pending on path %s %p", 3875 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3876 (void *)pip)); 3877 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3878 &MDI_PI(pip)->pi_mutex, 3879 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3880 /* 3881 * The timeout time reached without ref_cnt being zero 3882 * being signaled. 3883 */ 3884 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3885 "!Timeout reached on path %s %p without the cond", 3886 mdi_pi_spathname(pip), (void *)pip)); 3887 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3888 "!%d cmds still pending on path %s %p", 3889 MDI_PI(pip)->pi_ref_cnt, 3890 mdi_pi_spathname(pip), (void *)pip)); 3891 } 3892 } 3893 vh = ct->ct_vhci; 3894 vdip = vh->vh_dip; 3895 3896 /* 3897 * Notify vHCI that has registered this event 3898 */ 3899 ASSERT(vh->vh_ops); 3900 f = vh->vh_ops->vo_pi_state_change; 3901 3902 if (f != NULL) { 3903 MDI_PI_UNLOCK(pip); 3904 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3905 flags)) != MDI_SUCCESS) { 3906 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3907 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3908 ddi_driver_name(vdip), ddi_get_instance(vdip), 3909 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3910 } 3911 MDI_PI_LOCK(pip); 3912 } 3913 3914 /* 3915 * Set the mdi_pathinfo node state and clear the transient condition 3916 */ 3917 MDI_PI_SET_OFFLINE(pip); 3918 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3919 MDI_PI_UNLOCK(pip); 3920 3921 MDI_CLIENT_LOCK(ct); 3922 if (rv == MDI_SUCCESS) { 3923 if (ct->ct_unstable == 0) { 3924 dev_info_t *cdip = ct->ct_dip; 3925 3926 /* 3927 * Onlining the mdi_pathinfo node will impact the 3928 * client state Update the client and dev_info node 3929 * state accordingly 3930 */ 3931 i_mdi_client_update_state(ct); 3932 rv = NDI_SUCCESS; 3933 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3934 if (cdip && 3935 (i_ddi_node_state(cdip) >= 3936 DS_INITIALIZED)) { 3937 MDI_CLIENT_UNLOCK(ct); 3938 rv = ndi_devi_offline(cdip, 3939 NDI_DEVFS_CLEAN); 3940 MDI_CLIENT_LOCK(ct); 3941 if (rv != NDI_SUCCESS) { 3942 /* 3943 * ndi_devi_offline failed. 3944 * Reset client flags to 3945 * online. 3946 */ 3947 MDI_DEBUG(4, (MDI_WARN, cdip, 3948 "ndi_devi_offline failed: " 3949 "error %x", rv)); 3950 MDI_CLIENT_SET_ONLINE(ct); 3951 } 3952 } 3953 } 3954 /* 3955 * Convert to MDI error code 3956 */ 3957 switch (rv) { 3958 case NDI_SUCCESS: 3959 rv = MDI_SUCCESS; 3960 break; 3961 case NDI_BUSY: 3962 rv = MDI_BUSY; 3963 break; 3964 default: 3965 rv = MDI_FAILURE; 3966 break; 3967 } 3968 } 3969 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3970 i_mdi_report_path_state(ct, pip); 3971 } 3972 3973 MDI_CLIENT_UNLOCK(ct); 3974 3975 /* 3976 * Change in the mdi_pathinfo node state will impact the client state 3977 */ 3978 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3979 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3980 return (rv); 3981 } 3982 3983 /* 3984 * mdi_pi_get_node_name(): 3985 * Get the name associated with a mdi_pathinfo node. 3986 * Since pathinfo nodes are not directly named, we 3987 * return the node_name of the client. 3988 * 3989 * Return Values: 3990 * char * 3991 */ 3992 char * 3993 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3994 { 3995 mdi_client_t *ct; 3996 3997 if (pip == NULL) 3998 return (NULL); 3999 ct = MDI_PI(pip)->pi_client; 4000 if ((ct == NULL) || (ct->ct_dip == NULL)) 4001 return (NULL); 4002 return (ddi_node_name(ct->ct_dip)); 4003 } 4004 4005 /* 4006 * mdi_pi_get_addr(): 4007 * Get the unit address associated with a mdi_pathinfo node 4008 * 4009 * Return Values: 4010 * char * 4011 */ 4012 char * 4013 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4014 { 4015 if (pip == NULL) 4016 return (NULL); 4017 4018 return (MDI_PI(pip)->pi_addr); 4019 } 4020 4021 /* 4022 * mdi_pi_get_path_instance(): 4023 * Get the 'path_instance' of a mdi_pathinfo node 4024 * 4025 * Return Values: 4026 * path_instance 4027 */ 4028 int 4029 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4030 { 4031 if (pip == NULL) 4032 return (0); 4033 4034 return (MDI_PI(pip)->pi_path_instance); 4035 } 4036 4037 /* 4038 * mdi_pi_pathname(): 4039 * Return pointer to path to pathinfo node. 4040 */ 4041 char * 4042 mdi_pi_pathname(mdi_pathinfo_t *pip) 4043 { 4044 if (pip == NULL) 4045 return (NULL); 4046 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4047 } 4048 4049 /* 4050 * mdi_pi_spathname(): 4051 * Return pointer to shortpath to pathinfo node. Used for debug 4052 * messages, so return "" instead of NULL when unknown. 4053 */ 4054 char * 4055 mdi_pi_spathname(mdi_pathinfo_t *pip) 4056 { 4057 char *spath = ""; 4058 4059 if (pip) { 4060 spath = mdi_pi_spathname_by_instance( 4061 mdi_pi_get_path_instance(pip)); 4062 if (spath == NULL) 4063 spath = ""; 4064 } 4065 return (spath); 4066 } 4067 4068 char * 4069 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4070 { 4071 char *obp_path = NULL; 4072 if ((pip == NULL) || (path == NULL)) 4073 return (NULL); 4074 4075 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4076 (void) strcpy(path, obp_path); 4077 (void) mdi_prop_free(obp_path); 4078 } else { 4079 path = NULL; 4080 } 4081 return (path); 4082 } 4083 4084 int 4085 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4086 { 4087 dev_info_t *pdip; 4088 char *obp_path = NULL; 4089 int rc = MDI_FAILURE; 4090 4091 if (pip == NULL) 4092 return (MDI_FAILURE); 4093 4094 pdip = mdi_pi_get_phci(pip); 4095 if (pdip == NULL) 4096 return (MDI_FAILURE); 4097 4098 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4099 4100 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4101 (void) ddi_pathname(pdip, obp_path); 4102 } 4103 4104 if (component) { 4105 (void) strncat(obp_path, "/", MAXPATHLEN); 4106 (void) strncat(obp_path, component, MAXPATHLEN); 4107 } 4108 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4109 4110 if (obp_path) 4111 kmem_free(obp_path, MAXPATHLEN); 4112 return (rc); 4113 } 4114 4115 /* 4116 * mdi_pi_get_client(): 4117 * Get the client devinfo associated with a mdi_pathinfo node 4118 * 4119 * Return Values: 4120 * Handle to client device dev_info node 4121 */ 4122 dev_info_t * 4123 mdi_pi_get_client(mdi_pathinfo_t *pip) 4124 { 4125 dev_info_t *dip = NULL; 4126 if (pip) { 4127 dip = MDI_PI(pip)->pi_client->ct_dip; 4128 } 4129 return (dip); 4130 } 4131 4132 /* 4133 * mdi_pi_get_phci(): 4134 * Get the pHCI devinfo associated with the mdi_pathinfo node 4135 * Return Values: 4136 * Handle to dev_info node 4137 */ 4138 dev_info_t * 4139 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4140 { 4141 dev_info_t *dip = NULL; 4142 mdi_phci_t *ph; 4143 4144 if (pip) { 4145 ph = MDI_PI(pip)->pi_phci; 4146 if (ph) 4147 dip = ph->ph_dip; 4148 } 4149 return (dip); 4150 } 4151 4152 /* 4153 * mdi_pi_get_client_private(): 4154 * Get the client private information associated with the 4155 * mdi_pathinfo node 4156 */ 4157 void * 4158 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4159 { 4160 void *cprivate = NULL; 4161 if (pip) { 4162 cprivate = MDI_PI(pip)->pi_cprivate; 4163 } 4164 return (cprivate); 4165 } 4166 4167 /* 4168 * mdi_pi_set_client_private(): 4169 * Set the client private information in the mdi_pathinfo node 4170 */ 4171 void 4172 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4173 { 4174 if (pip) { 4175 MDI_PI(pip)->pi_cprivate = priv; 4176 } 4177 } 4178 4179 /* 4180 * mdi_pi_get_phci_private(): 4181 * Get the pHCI private information associated with the 4182 * mdi_pathinfo node 4183 */ 4184 caddr_t 4185 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4186 { 4187 caddr_t pprivate = NULL; 4188 4189 if (pip) { 4190 pprivate = MDI_PI(pip)->pi_pprivate; 4191 } 4192 return (pprivate); 4193 } 4194 4195 /* 4196 * mdi_pi_set_phci_private(): 4197 * Set the pHCI private information in the mdi_pathinfo node 4198 */ 4199 void 4200 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4201 { 4202 if (pip) { 4203 MDI_PI(pip)->pi_pprivate = priv; 4204 } 4205 } 4206 4207 /* 4208 * mdi_pi_get_state(): 4209 * Get the mdi_pathinfo node state. Transient states are internal 4210 * and not provided to the users 4211 */ 4212 mdi_pathinfo_state_t 4213 mdi_pi_get_state(mdi_pathinfo_t *pip) 4214 { 4215 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4216 4217 if (pip) { 4218 if (MDI_PI_IS_TRANSIENT(pip)) { 4219 /* 4220 * mdi_pathinfo is in state transition. Return the 4221 * last good state. 4222 */ 4223 state = MDI_PI_OLD_STATE(pip); 4224 } else { 4225 state = MDI_PI_STATE(pip); 4226 } 4227 } 4228 return (state); 4229 } 4230 4231 /* 4232 * mdi_pi_get_flags(): 4233 * Get the mdi_pathinfo node flags. 4234 */ 4235 uint_t 4236 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4237 { 4238 return (pip ? MDI_PI(pip)->pi_flags : 0); 4239 } 4240 4241 /* 4242 * Note that the following function needs to be the new interface for 4243 * mdi_pi_get_state when mpxio gets integrated to ON. 4244 */ 4245 int 4246 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4247 uint32_t *ext_state) 4248 { 4249 *state = MDI_PATHINFO_STATE_INIT; 4250 4251 if (pip) { 4252 if (MDI_PI_IS_TRANSIENT(pip)) { 4253 /* 4254 * mdi_pathinfo is in state transition. Return the 4255 * last good state. 4256 */ 4257 *state = MDI_PI_OLD_STATE(pip); 4258 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4259 } else { 4260 *state = MDI_PI_STATE(pip); 4261 *ext_state = MDI_PI_EXT_STATE(pip); 4262 } 4263 } 4264 return (MDI_SUCCESS); 4265 } 4266 4267 /* 4268 * mdi_pi_get_preferred: 4269 * Get the preferred path flag 4270 */ 4271 int 4272 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4273 { 4274 if (pip) { 4275 return (MDI_PI(pip)->pi_preferred); 4276 } 4277 return (0); 4278 } 4279 4280 /* 4281 * mdi_pi_set_preferred: 4282 * Set the preferred path flag 4283 */ 4284 void 4285 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4286 { 4287 if (pip) { 4288 MDI_PI(pip)->pi_preferred = preferred; 4289 } 4290 } 4291 4292 /* 4293 * mdi_pi_set_state(): 4294 * Set the mdi_pathinfo node state 4295 */ 4296 void 4297 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4298 { 4299 uint32_t ext_state; 4300 4301 if (pip) { 4302 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4303 MDI_PI(pip)->pi_state = state; 4304 MDI_PI(pip)->pi_state |= ext_state; 4305 4306 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4307 i_ddi_di_cache_invalidate(); 4308 } 4309 } 4310 4311 /* 4312 * Property functions: 4313 */ 4314 int 4315 i_map_nvlist_error_to_mdi(int val) 4316 { 4317 int rv; 4318 4319 switch (val) { 4320 case 0: 4321 rv = DDI_PROP_SUCCESS; 4322 break; 4323 case EINVAL: 4324 case ENOTSUP: 4325 rv = DDI_PROP_INVAL_ARG; 4326 break; 4327 case ENOMEM: 4328 rv = DDI_PROP_NO_MEMORY; 4329 break; 4330 default: 4331 rv = DDI_PROP_NOT_FOUND; 4332 break; 4333 } 4334 return (rv); 4335 } 4336 4337 /* 4338 * mdi_pi_get_next_prop(): 4339 * Property walk function. The caller should hold mdi_pi_lock() 4340 * and release by calling mdi_pi_unlock() at the end of walk to 4341 * get a consistent value. 4342 */ 4343 nvpair_t * 4344 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4345 { 4346 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4347 return (NULL); 4348 } 4349 ASSERT(MDI_PI_LOCKED(pip)); 4350 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4351 } 4352 4353 /* 4354 * mdi_prop_remove(): 4355 * Remove the named property from the named list. 4356 */ 4357 int 4358 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4359 { 4360 if (pip == NULL) { 4361 return (DDI_PROP_NOT_FOUND); 4362 } 4363 ASSERT(!MDI_PI_LOCKED(pip)); 4364 MDI_PI_LOCK(pip); 4365 if (MDI_PI(pip)->pi_prop == NULL) { 4366 MDI_PI_UNLOCK(pip); 4367 return (DDI_PROP_NOT_FOUND); 4368 } 4369 if (name) { 4370 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4371 } else { 4372 char nvp_name[MAXNAMELEN]; 4373 nvpair_t *nvp; 4374 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4375 while (nvp) { 4376 nvpair_t *next; 4377 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4378 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4379 nvpair_name(nvp)); 4380 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4381 nvp_name); 4382 nvp = next; 4383 } 4384 } 4385 MDI_PI_UNLOCK(pip); 4386 return (DDI_PROP_SUCCESS); 4387 } 4388 4389 /* 4390 * mdi_prop_size(): 4391 * Get buffer size needed to pack the property data. 4392 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4393 * buffer size. 4394 */ 4395 int 4396 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4397 { 4398 int rv; 4399 size_t bufsize; 4400 4401 *buflenp = 0; 4402 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4403 return (DDI_PROP_NOT_FOUND); 4404 } 4405 ASSERT(MDI_PI_LOCKED(pip)); 4406 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4407 &bufsize, NV_ENCODE_NATIVE); 4408 *buflenp = bufsize; 4409 return (i_map_nvlist_error_to_mdi(rv)); 4410 } 4411 4412 /* 4413 * mdi_prop_pack(): 4414 * pack the property list. The caller should hold the 4415 * mdi_pathinfo_t node to get a consistent data 4416 */ 4417 int 4418 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4419 { 4420 int rv; 4421 size_t bufsize; 4422 4423 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4424 return (DDI_PROP_NOT_FOUND); 4425 } 4426 4427 ASSERT(MDI_PI_LOCKED(pip)); 4428 4429 bufsize = buflen; 4430 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4431 NV_ENCODE_NATIVE, KM_SLEEP); 4432 4433 return (i_map_nvlist_error_to_mdi(rv)); 4434 } 4435 4436 /* 4437 * mdi_prop_update_byte(): 4438 * Create/Update a byte property 4439 */ 4440 int 4441 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4442 { 4443 int rv; 4444 4445 if (pip == NULL) { 4446 return (DDI_PROP_INVAL_ARG); 4447 } 4448 ASSERT(!MDI_PI_LOCKED(pip)); 4449 MDI_PI_LOCK(pip); 4450 if (MDI_PI(pip)->pi_prop == NULL) { 4451 MDI_PI_UNLOCK(pip); 4452 return (DDI_PROP_NOT_FOUND); 4453 } 4454 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4455 MDI_PI_UNLOCK(pip); 4456 return (i_map_nvlist_error_to_mdi(rv)); 4457 } 4458 4459 /* 4460 * mdi_prop_update_byte_array(): 4461 * Create/Update a byte array property 4462 */ 4463 int 4464 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4465 uint_t nelements) 4466 { 4467 int rv; 4468 4469 if (pip == NULL) { 4470 return (DDI_PROP_INVAL_ARG); 4471 } 4472 ASSERT(!MDI_PI_LOCKED(pip)); 4473 MDI_PI_LOCK(pip); 4474 if (MDI_PI(pip)->pi_prop == NULL) { 4475 MDI_PI_UNLOCK(pip); 4476 return (DDI_PROP_NOT_FOUND); 4477 } 4478 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4479 MDI_PI_UNLOCK(pip); 4480 return (i_map_nvlist_error_to_mdi(rv)); 4481 } 4482 4483 /* 4484 * mdi_prop_update_int(): 4485 * Create/Update a 32 bit integer property 4486 */ 4487 int 4488 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4489 { 4490 int rv; 4491 4492 if (pip == NULL) { 4493 return (DDI_PROP_INVAL_ARG); 4494 } 4495 ASSERT(!MDI_PI_LOCKED(pip)); 4496 MDI_PI_LOCK(pip); 4497 if (MDI_PI(pip)->pi_prop == NULL) { 4498 MDI_PI_UNLOCK(pip); 4499 return (DDI_PROP_NOT_FOUND); 4500 } 4501 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4502 MDI_PI_UNLOCK(pip); 4503 return (i_map_nvlist_error_to_mdi(rv)); 4504 } 4505 4506 /* 4507 * mdi_prop_update_int64(): 4508 * Create/Update a 64 bit integer property 4509 */ 4510 int 4511 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4512 { 4513 int rv; 4514 4515 if (pip == NULL) { 4516 return (DDI_PROP_INVAL_ARG); 4517 } 4518 ASSERT(!MDI_PI_LOCKED(pip)); 4519 MDI_PI_LOCK(pip); 4520 if (MDI_PI(pip)->pi_prop == NULL) { 4521 MDI_PI_UNLOCK(pip); 4522 return (DDI_PROP_NOT_FOUND); 4523 } 4524 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4525 MDI_PI_UNLOCK(pip); 4526 return (i_map_nvlist_error_to_mdi(rv)); 4527 } 4528 4529 /* 4530 * mdi_prop_update_int_array(): 4531 * Create/Update a int array property 4532 */ 4533 int 4534 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4535 uint_t nelements) 4536 { 4537 int rv; 4538 4539 if (pip == NULL) { 4540 return (DDI_PROP_INVAL_ARG); 4541 } 4542 ASSERT(!MDI_PI_LOCKED(pip)); 4543 MDI_PI_LOCK(pip); 4544 if (MDI_PI(pip)->pi_prop == NULL) { 4545 MDI_PI_UNLOCK(pip); 4546 return (DDI_PROP_NOT_FOUND); 4547 } 4548 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4549 nelements); 4550 MDI_PI_UNLOCK(pip); 4551 return (i_map_nvlist_error_to_mdi(rv)); 4552 } 4553 4554 /* 4555 * mdi_prop_update_string(): 4556 * Create/Update a string property 4557 */ 4558 int 4559 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4560 { 4561 int rv; 4562 4563 if (pip == NULL) { 4564 return (DDI_PROP_INVAL_ARG); 4565 } 4566 ASSERT(!MDI_PI_LOCKED(pip)); 4567 MDI_PI_LOCK(pip); 4568 if (MDI_PI(pip)->pi_prop == NULL) { 4569 MDI_PI_UNLOCK(pip); 4570 return (DDI_PROP_NOT_FOUND); 4571 } 4572 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4573 MDI_PI_UNLOCK(pip); 4574 return (i_map_nvlist_error_to_mdi(rv)); 4575 } 4576 4577 /* 4578 * mdi_prop_update_string_array(): 4579 * Create/Update a string array property 4580 */ 4581 int 4582 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4583 uint_t nelements) 4584 { 4585 int rv; 4586 4587 if (pip == NULL) { 4588 return (DDI_PROP_INVAL_ARG); 4589 } 4590 ASSERT(!MDI_PI_LOCKED(pip)); 4591 MDI_PI_LOCK(pip); 4592 if (MDI_PI(pip)->pi_prop == NULL) { 4593 MDI_PI_UNLOCK(pip); 4594 return (DDI_PROP_NOT_FOUND); 4595 } 4596 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4597 nelements); 4598 MDI_PI_UNLOCK(pip); 4599 return (i_map_nvlist_error_to_mdi(rv)); 4600 } 4601 4602 /* 4603 * mdi_prop_lookup_byte(): 4604 * Look for byte property identified by name. The data returned 4605 * is the actual property and valid as long as mdi_pathinfo_t node 4606 * is alive. 4607 */ 4608 int 4609 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4610 { 4611 int rv; 4612 4613 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4614 return (DDI_PROP_NOT_FOUND); 4615 } 4616 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4617 return (i_map_nvlist_error_to_mdi(rv)); 4618 } 4619 4620 4621 /* 4622 * mdi_prop_lookup_byte_array(): 4623 * Look for byte array property identified by name. The data 4624 * returned is the actual property and valid as long as 4625 * mdi_pathinfo_t node is alive. 4626 */ 4627 int 4628 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4629 uint_t *nelements) 4630 { 4631 int rv; 4632 4633 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4634 return (DDI_PROP_NOT_FOUND); 4635 } 4636 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4637 nelements); 4638 return (i_map_nvlist_error_to_mdi(rv)); 4639 } 4640 4641 /* 4642 * mdi_prop_lookup_int(): 4643 * Look for int property identified by name. The data returned 4644 * is the actual property and valid as long as mdi_pathinfo_t 4645 * node is alive. 4646 */ 4647 int 4648 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4649 { 4650 int rv; 4651 4652 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4653 return (DDI_PROP_NOT_FOUND); 4654 } 4655 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4656 return (i_map_nvlist_error_to_mdi(rv)); 4657 } 4658 4659 /* 4660 * mdi_prop_lookup_int64(): 4661 * Look for int64 property identified by name. The data returned 4662 * is the actual property and valid as long as mdi_pathinfo_t node 4663 * is alive. 4664 */ 4665 int 4666 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4667 { 4668 int rv; 4669 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4670 return (DDI_PROP_NOT_FOUND); 4671 } 4672 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4673 return (i_map_nvlist_error_to_mdi(rv)); 4674 } 4675 4676 /* 4677 * mdi_prop_lookup_int_array(): 4678 * Look for int array property identified by name. The data 4679 * returned is the actual property and valid as long as 4680 * mdi_pathinfo_t node is alive. 4681 */ 4682 int 4683 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4684 uint_t *nelements) 4685 { 4686 int rv; 4687 4688 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4689 return (DDI_PROP_NOT_FOUND); 4690 } 4691 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4692 (int32_t **)data, nelements); 4693 return (i_map_nvlist_error_to_mdi(rv)); 4694 } 4695 4696 /* 4697 * mdi_prop_lookup_string(): 4698 * Look for string property identified by name. The data 4699 * returned is the actual property and valid as long as 4700 * mdi_pathinfo_t node is alive. 4701 */ 4702 int 4703 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4704 { 4705 int rv; 4706 4707 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4708 return (DDI_PROP_NOT_FOUND); 4709 } 4710 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4711 return (i_map_nvlist_error_to_mdi(rv)); 4712 } 4713 4714 /* 4715 * mdi_prop_lookup_string_array(): 4716 * Look for string array property identified by name. The data 4717 * returned is the actual property and valid as long as 4718 * mdi_pathinfo_t node is alive. 4719 */ 4720 int 4721 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4722 uint_t *nelements) 4723 { 4724 int rv; 4725 4726 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4727 return (DDI_PROP_NOT_FOUND); 4728 } 4729 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4730 nelements); 4731 return (i_map_nvlist_error_to_mdi(rv)); 4732 } 4733 4734 /* 4735 * mdi_prop_free(): 4736 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4737 * functions return the pointer to actual property data and not a 4738 * copy of it. So the data returned is valid as long as 4739 * mdi_pathinfo_t node is valid. 4740 */ 4741 /*ARGSUSED*/ 4742 int 4743 mdi_prop_free(void *data) 4744 { 4745 return (DDI_PROP_SUCCESS); 4746 } 4747 4748 /*ARGSUSED*/ 4749 static void 4750 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4751 { 4752 char *ct_path; 4753 char *ct_status; 4754 char *status; 4755 dev_info_t *cdip = ct->ct_dip; 4756 char lb_buf[64]; 4757 int report_lb_c = 0, report_lb_p = 0; 4758 4759 ASSERT(MDI_CLIENT_LOCKED(ct)); 4760 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4761 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4762 return; 4763 } 4764 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4765 ct_status = "optimal"; 4766 report_lb_c = 1; 4767 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4768 ct_status = "degraded"; 4769 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4770 ct_status = "failed"; 4771 } else { 4772 ct_status = "unknown"; 4773 } 4774 4775 lb_buf[0] = 0; /* not interested in load balancing config */ 4776 4777 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4778 status = "removed"; 4779 } else if (MDI_PI_IS_OFFLINE(pip)) { 4780 status = "offline"; 4781 } else if (MDI_PI_IS_ONLINE(pip)) { 4782 status = "online"; 4783 report_lb_p = 1; 4784 } else if (MDI_PI_IS_STANDBY(pip)) { 4785 status = "standby"; 4786 } else if (MDI_PI_IS_FAULT(pip)) { 4787 status = "faulted"; 4788 } else { 4789 status = "unknown"; 4790 } 4791 4792 if (cdip) { 4793 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4794 4795 /* 4796 * NOTE: Keeping "multipath status: %s" and 4797 * "Load balancing: %s" format unchanged in case someone 4798 * scrubs /var/adm/messages looking for these messages. 4799 */ 4800 if (report_lb_c && report_lb_p) { 4801 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4802 (void) snprintf(lb_buf, sizeof (lb_buf), 4803 "%s, region-size: %d", mdi_load_balance_lba, 4804 ct->ct_lb_args->region_size); 4805 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4806 (void) snprintf(lb_buf, sizeof (lb_buf), 4807 "%s", mdi_load_balance_none); 4808 } else { 4809 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4810 mdi_load_balance_rr); 4811 } 4812 4813 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4814 "?%s (%s%d) multipath status: %s: " 4815 "path %d %s is %s: Load balancing: %s\n", 4816 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4817 ddi_get_instance(cdip), ct_status, 4818 mdi_pi_get_path_instance(pip), 4819 mdi_pi_spathname(pip), status, lb_buf); 4820 } else { 4821 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4822 "?%s (%s%d) multipath status: %s: " 4823 "path %d %s is %s\n", 4824 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4825 ddi_get_instance(cdip), ct_status, 4826 mdi_pi_get_path_instance(pip), 4827 mdi_pi_spathname(pip), status); 4828 } 4829 4830 kmem_free(ct_path, MAXPATHLEN); 4831 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4832 } 4833 } 4834 4835 #ifdef DEBUG 4836 /* 4837 * i_mdi_log(): 4838 * Utility function for error message management 4839 * 4840 * NOTE: Implementation takes care of trailing \n for cmn_err, 4841 * MDI_DEBUG should not terminate fmt strings with \n. 4842 * 4843 * NOTE: If the level is >= 2, and there is no leading !?^ 4844 * then a leading ! is implied (but can be overriden via 4845 * mdi_debug_consoleonly). If you are using kmdb on the console, 4846 * consider setting mdi_debug_consoleonly to 1 as an aid. 4847 */ 4848 /*PRINTFLIKE4*/ 4849 static void 4850 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4851 { 4852 char name[MAXNAMELEN]; 4853 char buf[512]; 4854 char *bp; 4855 va_list ap; 4856 int log_only = 0; 4857 int boot_only = 0; 4858 int console_only = 0; 4859 4860 if (dip) { 4861 (void) snprintf(name, sizeof(name), "%s%d: ", 4862 ddi_driver_name(dip), ddi_get_instance(dip)); 4863 } else { 4864 name[0] = 0; 4865 } 4866 4867 va_start(ap, fmt); 4868 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4869 va_end(ap); 4870 4871 switch (buf[0]) { 4872 case '!': 4873 bp = &buf[1]; 4874 log_only = 1; 4875 break; 4876 case '?': 4877 bp = &buf[1]; 4878 boot_only = 1; 4879 break; 4880 case '^': 4881 bp = &buf[1]; 4882 console_only = 1; 4883 break; 4884 default: 4885 if (level >= 2) 4886 log_only = 1; /* ! implied */ 4887 bp = buf; 4888 break; 4889 } 4890 if (mdi_debug_logonly) { 4891 log_only = 1; 4892 boot_only = 0; 4893 console_only = 0; 4894 } 4895 if (mdi_debug_consoleonly) { 4896 log_only = 0; 4897 boot_only = 0; 4898 console_only = 1; 4899 level = CE_NOTE; 4900 goto console; 4901 } 4902 4903 switch (level) { 4904 case CE_NOTE: 4905 level = CE_CONT; 4906 /* FALLTHROUGH */ 4907 case CE_CONT: 4908 if (boot_only) { 4909 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4910 } else if (console_only) { 4911 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4912 } else if (log_only) { 4913 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4914 } else { 4915 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4916 } 4917 break; 4918 4919 case CE_WARN: 4920 case CE_PANIC: 4921 console: 4922 if (boot_only) { 4923 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4924 } else if (console_only) { 4925 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4926 } else if (log_only) { 4927 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4928 } else { 4929 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4930 } 4931 break; 4932 default: 4933 cmn_err(level, "mdi: %s%s", name, bp); 4934 break; 4935 } 4936 } 4937 #endif /* DEBUG */ 4938 4939 void 4940 i_mdi_client_online(dev_info_t *ct_dip) 4941 { 4942 mdi_client_t *ct; 4943 4944 /* 4945 * Client online notification. Mark client state as online 4946 * restore our binding with dev_info node 4947 */ 4948 ct = i_devi_get_client(ct_dip); 4949 ASSERT(ct != NULL); 4950 MDI_CLIENT_LOCK(ct); 4951 MDI_CLIENT_SET_ONLINE(ct); 4952 /* catch for any memory leaks */ 4953 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4954 ct->ct_dip = ct_dip; 4955 4956 if (ct->ct_power_cnt == 0) 4957 (void) i_mdi_power_all_phci(ct); 4958 4959 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 4960 "i_mdi_pm_hold_client %p", (void *)ct)); 4961 i_mdi_pm_hold_client(ct, 1); 4962 4963 MDI_CLIENT_UNLOCK(ct); 4964 } 4965 4966 void 4967 i_mdi_phci_online(dev_info_t *ph_dip) 4968 { 4969 mdi_phci_t *ph; 4970 4971 /* pHCI online notification. Mark state accordingly */ 4972 ph = i_devi_get_phci(ph_dip); 4973 ASSERT(ph != NULL); 4974 MDI_PHCI_LOCK(ph); 4975 MDI_PHCI_SET_ONLINE(ph); 4976 MDI_PHCI_UNLOCK(ph); 4977 } 4978 4979 /* 4980 * mdi_devi_online(): 4981 * Online notification from NDI framework on pHCI/client 4982 * device online. 4983 * Return Values: 4984 * NDI_SUCCESS 4985 * MDI_FAILURE 4986 */ 4987 /*ARGSUSED*/ 4988 int 4989 mdi_devi_online(dev_info_t *dip, uint_t flags) 4990 { 4991 if (MDI_PHCI(dip)) { 4992 i_mdi_phci_online(dip); 4993 } 4994 4995 if (MDI_CLIENT(dip)) { 4996 i_mdi_client_online(dip); 4997 } 4998 return (NDI_SUCCESS); 4999 } 5000 5001 /* 5002 * mdi_devi_offline(): 5003 * Offline notification from NDI framework on pHCI/Client device 5004 * offline. 5005 * 5006 * Return Values: 5007 * NDI_SUCCESS 5008 * NDI_FAILURE 5009 */ 5010 /*ARGSUSED*/ 5011 int 5012 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5013 { 5014 int rv = NDI_SUCCESS; 5015 5016 if (MDI_CLIENT(dip)) { 5017 rv = i_mdi_client_offline(dip, flags); 5018 if (rv != NDI_SUCCESS) 5019 return (rv); 5020 } 5021 5022 if (MDI_PHCI(dip)) { 5023 rv = i_mdi_phci_offline(dip, flags); 5024 5025 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5026 /* set client back online */ 5027 i_mdi_client_online(dip); 5028 } 5029 } 5030 5031 return (rv); 5032 } 5033 5034 /*ARGSUSED*/ 5035 static int 5036 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5037 { 5038 int rv = NDI_SUCCESS; 5039 mdi_phci_t *ph; 5040 mdi_client_t *ct; 5041 mdi_pathinfo_t *pip; 5042 mdi_pathinfo_t *next; 5043 mdi_pathinfo_t *failed_pip = NULL; 5044 dev_info_t *cdip; 5045 5046 /* 5047 * pHCI component offline notification 5048 * Make sure that this pHCI instance is free to be offlined. 5049 * If it is OK to proceed, Offline and remove all the child 5050 * mdi_pathinfo nodes. This process automatically offlines 5051 * corresponding client devices, for which this pHCI provides 5052 * critical services. 5053 */ 5054 ph = i_devi_get_phci(dip); 5055 MDI_DEBUG(2, (MDI_NOTE, dip, 5056 "called %p %p", (void *)dip, (void *)ph)); 5057 if (ph == NULL) { 5058 return (rv); 5059 } 5060 5061 MDI_PHCI_LOCK(ph); 5062 5063 if (MDI_PHCI_IS_OFFLINE(ph)) { 5064 MDI_DEBUG(1, (MDI_WARN, dip, 5065 "!pHCI already offlined: %p", (void *)dip)); 5066 MDI_PHCI_UNLOCK(ph); 5067 return (NDI_SUCCESS); 5068 } 5069 5070 /* 5071 * Check to see if the pHCI can be offlined 5072 */ 5073 if (ph->ph_unstable) { 5074 MDI_DEBUG(1, (MDI_WARN, dip, 5075 "!One or more target devices are in transient state. " 5076 "This device can not be removed at this moment. " 5077 "Please try again later.")); 5078 MDI_PHCI_UNLOCK(ph); 5079 return (NDI_BUSY); 5080 } 5081 5082 pip = ph->ph_path_head; 5083 while (pip != NULL) { 5084 MDI_PI_LOCK(pip); 5085 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5086 5087 /* 5088 * The mdi_pathinfo state is OK. Check the client state. 5089 * If failover in progress fail the pHCI from offlining 5090 */ 5091 ct = MDI_PI(pip)->pi_client; 5092 i_mdi_client_lock(ct, pip); 5093 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5094 (ct->ct_unstable)) { 5095 /* 5096 * Failover is in progress, Fail the DR 5097 */ 5098 MDI_DEBUG(1, (MDI_WARN, dip, 5099 "!pHCI device is busy. " 5100 "This device can not be removed at this moment. " 5101 "Please try again later.")); 5102 MDI_PI_UNLOCK(pip); 5103 i_mdi_client_unlock(ct); 5104 MDI_PHCI_UNLOCK(ph); 5105 return (NDI_BUSY); 5106 } 5107 MDI_PI_UNLOCK(pip); 5108 5109 /* 5110 * Check to see of we are removing the last path of this 5111 * client device... 5112 */ 5113 cdip = ct->ct_dip; 5114 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5115 (i_mdi_client_compute_state(ct, ph) == 5116 MDI_CLIENT_STATE_FAILED)) { 5117 i_mdi_client_unlock(ct); 5118 MDI_PHCI_UNLOCK(ph); 5119 if (ndi_devi_offline(cdip, 5120 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5121 /* 5122 * ndi_devi_offline() failed. 5123 * This pHCI provides the critical path 5124 * to one or more client devices. 5125 * Return busy. 5126 */ 5127 MDI_PHCI_LOCK(ph); 5128 MDI_DEBUG(1, (MDI_WARN, dip, 5129 "!pHCI device is busy. " 5130 "This device can not be removed at this " 5131 "moment. Please try again later.")); 5132 failed_pip = pip; 5133 break; 5134 } else { 5135 MDI_PHCI_LOCK(ph); 5136 pip = next; 5137 } 5138 } else { 5139 i_mdi_client_unlock(ct); 5140 pip = next; 5141 } 5142 } 5143 5144 if (failed_pip) { 5145 pip = ph->ph_path_head; 5146 while (pip != failed_pip) { 5147 MDI_PI_LOCK(pip); 5148 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5149 ct = MDI_PI(pip)->pi_client; 5150 i_mdi_client_lock(ct, pip); 5151 cdip = ct->ct_dip; 5152 switch (MDI_CLIENT_STATE(ct)) { 5153 case MDI_CLIENT_STATE_OPTIMAL: 5154 case MDI_CLIENT_STATE_DEGRADED: 5155 if (cdip) { 5156 MDI_PI_UNLOCK(pip); 5157 i_mdi_client_unlock(ct); 5158 MDI_PHCI_UNLOCK(ph); 5159 (void) ndi_devi_online(cdip, 0); 5160 MDI_PHCI_LOCK(ph); 5161 pip = next; 5162 continue; 5163 } 5164 break; 5165 5166 case MDI_CLIENT_STATE_FAILED: 5167 if (cdip) { 5168 MDI_PI_UNLOCK(pip); 5169 i_mdi_client_unlock(ct); 5170 MDI_PHCI_UNLOCK(ph); 5171 (void) ndi_devi_offline(cdip, 5172 NDI_DEVFS_CLEAN); 5173 MDI_PHCI_LOCK(ph); 5174 pip = next; 5175 continue; 5176 } 5177 break; 5178 } 5179 MDI_PI_UNLOCK(pip); 5180 i_mdi_client_unlock(ct); 5181 pip = next; 5182 } 5183 MDI_PHCI_UNLOCK(ph); 5184 return (NDI_BUSY); 5185 } 5186 5187 /* 5188 * Mark the pHCI as offline 5189 */ 5190 MDI_PHCI_SET_OFFLINE(ph); 5191 5192 /* 5193 * Mark the child mdi_pathinfo nodes as transient 5194 */ 5195 pip = ph->ph_path_head; 5196 while (pip != NULL) { 5197 MDI_PI_LOCK(pip); 5198 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5199 MDI_PI_SET_OFFLINING(pip); 5200 MDI_PI_UNLOCK(pip); 5201 pip = next; 5202 } 5203 MDI_PHCI_UNLOCK(ph); 5204 /* 5205 * Give a chance for any pending commands to execute 5206 */ 5207 delay_random(5); 5208 MDI_PHCI_LOCK(ph); 5209 pip = ph->ph_path_head; 5210 while (pip != NULL) { 5211 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5212 (void) i_mdi_pi_offline(pip, flags); 5213 MDI_PI_LOCK(pip); 5214 ct = MDI_PI(pip)->pi_client; 5215 if (!MDI_PI_IS_OFFLINE(pip)) { 5216 MDI_DEBUG(1, (MDI_WARN, dip, 5217 "!pHCI device is busy. " 5218 "This device can not be removed at this moment. " 5219 "Please try again later.")); 5220 MDI_PI_UNLOCK(pip); 5221 MDI_PHCI_SET_ONLINE(ph); 5222 MDI_PHCI_UNLOCK(ph); 5223 return (NDI_BUSY); 5224 } 5225 MDI_PI_UNLOCK(pip); 5226 pip = next; 5227 } 5228 MDI_PHCI_UNLOCK(ph); 5229 5230 return (rv); 5231 } 5232 5233 void 5234 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5235 { 5236 mdi_phci_t *ph; 5237 mdi_client_t *ct; 5238 mdi_pathinfo_t *pip; 5239 mdi_pathinfo_t *next; 5240 dev_info_t *cdip; 5241 5242 if (!MDI_PHCI(dip)) 5243 return; 5244 5245 ph = i_devi_get_phci(dip); 5246 if (ph == NULL) { 5247 return; 5248 } 5249 5250 MDI_PHCI_LOCK(ph); 5251 5252 if (MDI_PHCI_IS_OFFLINE(ph)) { 5253 /* has no last path */ 5254 MDI_PHCI_UNLOCK(ph); 5255 return; 5256 } 5257 5258 pip = ph->ph_path_head; 5259 while (pip != NULL) { 5260 MDI_PI_LOCK(pip); 5261 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5262 5263 ct = MDI_PI(pip)->pi_client; 5264 i_mdi_client_lock(ct, pip); 5265 MDI_PI_UNLOCK(pip); 5266 5267 cdip = ct->ct_dip; 5268 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5269 (i_mdi_client_compute_state(ct, ph) == 5270 MDI_CLIENT_STATE_FAILED)) { 5271 /* Last path. Mark client dip as retiring */ 5272 i_mdi_client_unlock(ct); 5273 MDI_PHCI_UNLOCK(ph); 5274 (void) e_ddi_mark_retiring(cdip, cons_array); 5275 MDI_PHCI_LOCK(ph); 5276 pip = next; 5277 } else { 5278 i_mdi_client_unlock(ct); 5279 pip = next; 5280 } 5281 } 5282 5283 MDI_PHCI_UNLOCK(ph); 5284 5285 return; 5286 } 5287 5288 void 5289 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5290 { 5291 mdi_phci_t *ph; 5292 mdi_client_t *ct; 5293 mdi_pathinfo_t *pip; 5294 mdi_pathinfo_t *next; 5295 dev_info_t *cdip; 5296 5297 if (!MDI_PHCI(dip)) 5298 return; 5299 5300 ph = i_devi_get_phci(dip); 5301 if (ph == NULL) 5302 return; 5303 5304 MDI_PHCI_LOCK(ph); 5305 5306 if (MDI_PHCI_IS_OFFLINE(ph)) { 5307 MDI_PHCI_UNLOCK(ph); 5308 /* not last path */ 5309 return; 5310 } 5311 5312 if (ph->ph_unstable) { 5313 MDI_PHCI_UNLOCK(ph); 5314 /* can't check for constraints */ 5315 *constraint = 0; 5316 return; 5317 } 5318 5319 pip = ph->ph_path_head; 5320 while (pip != NULL) { 5321 MDI_PI_LOCK(pip); 5322 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5323 5324 /* 5325 * The mdi_pathinfo state is OK. Check the client state. 5326 * If failover in progress fail the pHCI from offlining 5327 */ 5328 ct = MDI_PI(pip)->pi_client; 5329 i_mdi_client_lock(ct, pip); 5330 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5331 (ct->ct_unstable)) { 5332 /* 5333 * Failover is in progress, can't check for constraints 5334 */ 5335 MDI_PI_UNLOCK(pip); 5336 i_mdi_client_unlock(ct); 5337 MDI_PHCI_UNLOCK(ph); 5338 *constraint = 0; 5339 return; 5340 } 5341 MDI_PI_UNLOCK(pip); 5342 5343 /* 5344 * Check to see of we are retiring the last path of this 5345 * client device... 5346 */ 5347 cdip = ct->ct_dip; 5348 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5349 (i_mdi_client_compute_state(ct, ph) == 5350 MDI_CLIENT_STATE_FAILED)) { 5351 i_mdi_client_unlock(ct); 5352 MDI_PHCI_UNLOCK(ph); 5353 (void) e_ddi_retire_notify(cdip, constraint); 5354 MDI_PHCI_LOCK(ph); 5355 pip = next; 5356 } else { 5357 i_mdi_client_unlock(ct); 5358 pip = next; 5359 } 5360 } 5361 5362 MDI_PHCI_UNLOCK(ph); 5363 5364 return; 5365 } 5366 5367 /* 5368 * offline the path(s) hanging off the pHCI. If the 5369 * last path to any client, check that constraints 5370 * have been applied. 5371 */ 5372 void 5373 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5374 { 5375 mdi_phci_t *ph; 5376 mdi_client_t *ct; 5377 mdi_pathinfo_t *pip; 5378 mdi_pathinfo_t *next; 5379 dev_info_t *cdip; 5380 int unstable = 0; 5381 int constraint; 5382 5383 if (!MDI_PHCI(dip)) 5384 return; 5385 5386 ph = i_devi_get_phci(dip); 5387 if (ph == NULL) { 5388 /* no last path and no pips */ 5389 return; 5390 } 5391 5392 MDI_PHCI_LOCK(ph); 5393 5394 if (MDI_PHCI_IS_OFFLINE(ph)) { 5395 MDI_PHCI_UNLOCK(ph); 5396 /* no last path and no pips */ 5397 return; 5398 } 5399 5400 /* 5401 * Check to see if the pHCI can be offlined 5402 */ 5403 if (ph->ph_unstable) { 5404 unstable = 1; 5405 } 5406 5407 pip = ph->ph_path_head; 5408 while (pip != NULL) { 5409 MDI_PI_LOCK(pip); 5410 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5411 5412 /* 5413 * if failover in progress fail the pHCI from offlining 5414 */ 5415 ct = MDI_PI(pip)->pi_client; 5416 i_mdi_client_lock(ct, pip); 5417 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5418 (ct->ct_unstable)) { 5419 unstable = 1; 5420 } 5421 MDI_PI_UNLOCK(pip); 5422 5423 /* 5424 * Check to see of we are removing the last path of this 5425 * client device... 5426 */ 5427 cdip = ct->ct_dip; 5428 if (!phci_only && cdip && 5429 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5430 (i_mdi_client_compute_state(ct, ph) == 5431 MDI_CLIENT_STATE_FAILED)) { 5432 i_mdi_client_unlock(ct); 5433 MDI_PHCI_UNLOCK(ph); 5434 /* 5435 * We don't retire clients we just retire the 5436 * path to a client. If it is the last path 5437 * to a client, constraints are checked and 5438 * if we pass the last path is offlined. MPXIO will 5439 * then fail all I/Os to the client. Since we don't 5440 * want to retire the client on a path error 5441 * set constraint = 0 so that the client dip 5442 * is not retired. 5443 */ 5444 constraint = 0; 5445 (void) e_ddi_retire_finalize(cdip, &constraint); 5446 MDI_PHCI_LOCK(ph); 5447 pip = next; 5448 } else { 5449 i_mdi_client_unlock(ct); 5450 pip = next; 5451 } 5452 } 5453 5454 /* 5455 * Cannot offline pip(s) 5456 */ 5457 if (unstable) { 5458 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5459 "pHCI in transient state, cannot retire", 5460 ddi_driver_name(dip), ddi_get_instance(dip)); 5461 MDI_PHCI_UNLOCK(ph); 5462 return; 5463 } 5464 5465 /* 5466 * Mark the pHCI as offline 5467 */ 5468 MDI_PHCI_SET_OFFLINE(ph); 5469 5470 /* 5471 * Mark the child mdi_pathinfo nodes as transient 5472 */ 5473 pip = ph->ph_path_head; 5474 while (pip != NULL) { 5475 MDI_PI_LOCK(pip); 5476 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5477 MDI_PI_SET_OFFLINING(pip); 5478 MDI_PI_UNLOCK(pip); 5479 pip = next; 5480 } 5481 MDI_PHCI_UNLOCK(ph); 5482 /* 5483 * Give a chance for any pending commands to execute 5484 */ 5485 delay_random(5); 5486 MDI_PHCI_LOCK(ph); 5487 pip = ph->ph_path_head; 5488 while (pip != NULL) { 5489 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5490 (void) i_mdi_pi_offline(pip, 0); 5491 MDI_PI_LOCK(pip); 5492 ct = MDI_PI(pip)->pi_client; 5493 if (!MDI_PI_IS_OFFLINE(pip)) { 5494 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5495 "path %d %s busy, cannot offline", 5496 mdi_pi_get_path_instance(pip), 5497 mdi_pi_spathname(pip)); 5498 MDI_PI_UNLOCK(pip); 5499 MDI_PHCI_SET_ONLINE(ph); 5500 MDI_PHCI_UNLOCK(ph); 5501 return; 5502 } 5503 MDI_PI_UNLOCK(pip); 5504 pip = next; 5505 } 5506 MDI_PHCI_UNLOCK(ph); 5507 5508 return; 5509 } 5510 5511 void 5512 mdi_phci_unretire(dev_info_t *dip) 5513 { 5514 ASSERT(MDI_PHCI(dip)); 5515 5516 /* 5517 * Online the phci 5518 */ 5519 i_mdi_phci_online(dip); 5520 } 5521 5522 /*ARGSUSED*/ 5523 static int 5524 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5525 { 5526 int rv = NDI_SUCCESS; 5527 mdi_client_t *ct; 5528 5529 /* 5530 * Client component to go offline. Make sure that we are 5531 * not in failing over state and update client state 5532 * accordingly 5533 */ 5534 ct = i_devi_get_client(dip); 5535 MDI_DEBUG(2, (MDI_NOTE, dip, 5536 "called %p %p", (void *)dip, (void *)ct)); 5537 if (ct != NULL) { 5538 MDI_CLIENT_LOCK(ct); 5539 if (ct->ct_unstable) { 5540 /* 5541 * One or more paths are in transient state, 5542 * Dont allow offline of a client device 5543 */ 5544 MDI_DEBUG(1, (MDI_WARN, dip, 5545 "!One or more paths to " 5546 "this device are in transient state. " 5547 "This device can not be removed at this moment. " 5548 "Please try again later.")); 5549 MDI_CLIENT_UNLOCK(ct); 5550 return (NDI_BUSY); 5551 } 5552 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5553 /* 5554 * Failover is in progress, Dont allow DR of 5555 * a client device 5556 */ 5557 MDI_DEBUG(1, (MDI_WARN, dip, 5558 "!Client device is Busy. " 5559 "This device can not be removed at this moment. " 5560 "Please try again later.")); 5561 MDI_CLIENT_UNLOCK(ct); 5562 return (NDI_BUSY); 5563 } 5564 MDI_CLIENT_SET_OFFLINE(ct); 5565 5566 /* 5567 * Unbind our relationship with the dev_info node 5568 */ 5569 if (flags & NDI_DEVI_REMOVE) { 5570 ct->ct_dip = NULL; 5571 } 5572 MDI_CLIENT_UNLOCK(ct); 5573 } 5574 return (rv); 5575 } 5576 5577 /* 5578 * mdi_pre_attach(): 5579 * Pre attach() notification handler 5580 */ 5581 /*ARGSUSED*/ 5582 int 5583 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5584 { 5585 /* don't support old DDI_PM_RESUME */ 5586 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5587 (cmd == DDI_PM_RESUME)) 5588 return (DDI_FAILURE); 5589 5590 return (DDI_SUCCESS); 5591 } 5592 5593 /* 5594 * mdi_post_attach(): 5595 * Post attach() notification handler 5596 */ 5597 /*ARGSUSED*/ 5598 void 5599 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5600 { 5601 mdi_phci_t *ph; 5602 mdi_client_t *ct; 5603 mdi_vhci_t *vh; 5604 5605 if (MDI_PHCI(dip)) { 5606 ph = i_devi_get_phci(dip); 5607 ASSERT(ph != NULL); 5608 5609 MDI_PHCI_LOCK(ph); 5610 switch (cmd) { 5611 case DDI_ATTACH: 5612 MDI_DEBUG(2, (MDI_NOTE, dip, 5613 "phci post_attach called %p", (void *)ph)); 5614 if (error == DDI_SUCCESS) { 5615 MDI_PHCI_SET_ATTACH(ph); 5616 } else { 5617 MDI_DEBUG(1, (MDI_NOTE, dip, 5618 "!pHCI post_attach failed: error %d", 5619 error)); 5620 MDI_PHCI_SET_DETACH(ph); 5621 } 5622 break; 5623 5624 case DDI_RESUME: 5625 MDI_DEBUG(2, (MDI_NOTE, dip, 5626 "pHCI post_resume: called %p", (void *)ph)); 5627 if (error == DDI_SUCCESS) { 5628 MDI_PHCI_SET_RESUME(ph); 5629 } else { 5630 MDI_DEBUG(1, (MDI_NOTE, dip, 5631 "!pHCI post_resume failed: error %d", 5632 error)); 5633 MDI_PHCI_SET_SUSPEND(ph); 5634 } 5635 break; 5636 } 5637 MDI_PHCI_UNLOCK(ph); 5638 } 5639 5640 if (MDI_CLIENT(dip)) { 5641 ct = i_devi_get_client(dip); 5642 ASSERT(ct != NULL); 5643 5644 MDI_CLIENT_LOCK(ct); 5645 switch (cmd) { 5646 case DDI_ATTACH: 5647 MDI_DEBUG(2, (MDI_NOTE, dip, 5648 "client post_attach called %p", (void *)ct)); 5649 if (error != DDI_SUCCESS) { 5650 MDI_DEBUG(1, (MDI_NOTE, dip, 5651 "!client post_attach failed: error %d", 5652 error)); 5653 MDI_CLIENT_SET_DETACH(ct); 5654 MDI_DEBUG(4, (MDI_WARN, dip, 5655 "i_mdi_pm_reset_client")); 5656 i_mdi_pm_reset_client(ct); 5657 break; 5658 } 5659 5660 /* 5661 * Client device has successfully attached, inform 5662 * the vhci. 5663 */ 5664 vh = ct->ct_vhci; 5665 if (vh->vh_ops->vo_client_attached) 5666 (*vh->vh_ops->vo_client_attached)(dip); 5667 5668 MDI_CLIENT_SET_ATTACH(ct); 5669 break; 5670 5671 case DDI_RESUME: 5672 MDI_DEBUG(2, (MDI_NOTE, dip, 5673 "client post_attach: called %p", (void *)ct)); 5674 if (error == DDI_SUCCESS) { 5675 MDI_CLIENT_SET_RESUME(ct); 5676 } else { 5677 MDI_DEBUG(1, (MDI_NOTE, dip, 5678 "!client post_resume failed: error %d", 5679 error)); 5680 MDI_CLIENT_SET_SUSPEND(ct); 5681 } 5682 break; 5683 } 5684 MDI_CLIENT_UNLOCK(ct); 5685 } 5686 } 5687 5688 /* 5689 * mdi_pre_detach(): 5690 * Pre detach notification handler 5691 */ 5692 /*ARGSUSED*/ 5693 int 5694 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5695 { 5696 int rv = DDI_SUCCESS; 5697 5698 if (MDI_CLIENT(dip)) { 5699 (void) i_mdi_client_pre_detach(dip, cmd); 5700 } 5701 5702 if (MDI_PHCI(dip)) { 5703 rv = i_mdi_phci_pre_detach(dip, cmd); 5704 } 5705 5706 return (rv); 5707 } 5708 5709 /*ARGSUSED*/ 5710 static int 5711 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5712 { 5713 int rv = DDI_SUCCESS; 5714 mdi_phci_t *ph; 5715 mdi_client_t *ct; 5716 mdi_pathinfo_t *pip; 5717 mdi_pathinfo_t *failed_pip = NULL; 5718 mdi_pathinfo_t *next; 5719 5720 ph = i_devi_get_phci(dip); 5721 if (ph == NULL) { 5722 return (rv); 5723 } 5724 5725 MDI_PHCI_LOCK(ph); 5726 switch (cmd) { 5727 case DDI_DETACH: 5728 MDI_DEBUG(2, (MDI_NOTE, dip, 5729 "pHCI pre_detach: called %p", (void *)ph)); 5730 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5731 /* 5732 * mdi_pathinfo nodes are still attached to 5733 * this pHCI. Fail the detach for this pHCI. 5734 */ 5735 MDI_DEBUG(2, (MDI_WARN, dip, 5736 "pHCI pre_detach: paths are still attached %p", 5737 (void *)ph)); 5738 rv = DDI_FAILURE; 5739 break; 5740 } 5741 MDI_PHCI_SET_DETACH(ph); 5742 break; 5743 5744 case DDI_SUSPEND: 5745 /* 5746 * pHCI is getting suspended. Since mpxio client 5747 * devices may not be suspended at this point, to avoid 5748 * a potential stack overflow, it is important to suspend 5749 * client devices before pHCI can be suspended. 5750 */ 5751 5752 MDI_DEBUG(2, (MDI_NOTE, dip, 5753 "pHCI pre_suspend: called %p", (void *)ph)); 5754 /* 5755 * Suspend all the client devices accessible through this pHCI 5756 */ 5757 pip = ph->ph_path_head; 5758 while (pip != NULL && rv == DDI_SUCCESS) { 5759 dev_info_t *cdip; 5760 MDI_PI_LOCK(pip); 5761 next = 5762 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5763 ct = MDI_PI(pip)->pi_client; 5764 i_mdi_client_lock(ct, pip); 5765 cdip = ct->ct_dip; 5766 MDI_PI_UNLOCK(pip); 5767 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5768 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5769 i_mdi_client_unlock(ct); 5770 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5771 DDI_SUCCESS) { 5772 /* 5773 * Suspend of one of the client 5774 * device has failed. 5775 */ 5776 MDI_DEBUG(1, (MDI_WARN, dip, 5777 "!suspend of device (%s%d) failed.", 5778 ddi_driver_name(cdip), 5779 ddi_get_instance(cdip))); 5780 failed_pip = pip; 5781 break; 5782 } 5783 } else { 5784 i_mdi_client_unlock(ct); 5785 } 5786 pip = next; 5787 } 5788 5789 if (rv == DDI_SUCCESS) { 5790 /* 5791 * Suspend of client devices is complete. Proceed 5792 * with pHCI suspend. 5793 */ 5794 MDI_PHCI_SET_SUSPEND(ph); 5795 } else { 5796 /* 5797 * Revert back all the suspended client device states 5798 * to converse. 5799 */ 5800 pip = ph->ph_path_head; 5801 while (pip != failed_pip) { 5802 dev_info_t *cdip; 5803 MDI_PI_LOCK(pip); 5804 next = 5805 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5806 ct = MDI_PI(pip)->pi_client; 5807 i_mdi_client_lock(ct, pip); 5808 cdip = ct->ct_dip; 5809 MDI_PI_UNLOCK(pip); 5810 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5811 i_mdi_client_unlock(ct); 5812 (void) devi_attach(cdip, DDI_RESUME); 5813 } else { 5814 i_mdi_client_unlock(ct); 5815 } 5816 pip = next; 5817 } 5818 } 5819 break; 5820 5821 default: 5822 rv = DDI_FAILURE; 5823 break; 5824 } 5825 MDI_PHCI_UNLOCK(ph); 5826 return (rv); 5827 } 5828 5829 /*ARGSUSED*/ 5830 static int 5831 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5832 { 5833 int rv = DDI_SUCCESS; 5834 mdi_client_t *ct; 5835 5836 ct = i_devi_get_client(dip); 5837 if (ct == NULL) { 5838 return (rv); 5839 } 5840 5841 MDI_CLIENT_LOCK(ct); 5842 switch (cmd) { 5843 case DDI_DETACH: 5844 MDI_DEBUG(2, (MDI_NOTE, dip, 5845 "client pre_detach: called %p", 5846 (void *)ct)); 5847 MDI_CLIENT_SET_DETACH(ct); 5848 break; 5849 5850 case DDI_SUSPEND: 5851 MDI_DEBUG(2, (MDI_NOTE, dip, 5852 "client pre_suspend: called %p", 5853 (void *)ct)); 5854 MDI_CLIENT_SET_SUSPEND(ct); 5855 break; 5856 5857 default: 5858 rv = DDI_FAILURE; 5859 break; 5860 } 5861 MDI_CLIENT_UNLOCK(ct); 5862 return (rv); 5863 } 5864 5865 /* 5866 * mdi_post_detach(): 5867 * Post detach notification handler 5868 */ 5869 /*ARGSUSED*/ 5870 void 5871 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5872 { 5873 /* 5874 * Detach/Suspend of mpxio component failed. Update our state 5875 * too 5876 */ 5877 if (MDI_PHCI(dip)) 5878 i_mdi_phci_post_detach(dip, cmd, error); 5879 5880 if (MDI_CLIENT(dip)) 5881 i_mdi_client_post_detach(dip, cmd, error); 5882 } 5883 5884 /*ARGSUSED*/ 5885 static void 5886 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5887 { 5888 mdi_phci_t *ph; 5889 5890 /* 5891 * Detach/Suspend of phci component failed. Update our state 5892 * too 5893 */ 5894 ph = i_devi_get_phci(dip); 5895 if (ph == NULL) { 5896 return; 5897 } 5898 5899 MDI_PHCI_LOCK(ph); 5900 /* 5901 * Detach of pHCI failed. Restore back converse 5902 * state 5903 */ 5904 switch (cmd) { 5905 case DDI_DETACH: 5906 MDI_DEBUG(2, (MDI_NOTE, dip, 5907 "pHCI post_detach: called %p", 5908 (void *)ph)); 5909 if (error != DDI_SUCCESS) 5910 MDI_PHCI_SET_ATTACH(ph); 5911 break; 5912 5913 case DDI_SUSPEND: 5914 MDI_DEBUG(2, (MDI_NOTE, dip, 5915 "pHCI post_suspend: called %p", 5916 (void *)ph)); 5917 if (error != DDI_SUCCESS) 5918 MDI_PHCI_SET_RESUME(ph); 5919 break; 5920 } 5921 MDI_PHCI_UNLOCK(ph); 5922 } 5923 5924 /*ARGSUSED*/ 5925 static void 5926 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5927 { 5928 mdi_client_t *ct; 5929 5930 ct = i_devi_get_client(dip); 5931 if (ct == NULL) { 5932 return; 5933 } 5934 MDI_CLIENT_LOCK(ct); 5935 /* 5936 * Detach of Client failed. Restore back converse 5937 * state 5938 */ 5939 switch (cmd) { 5940 case DDI_DETACH: 5941 MDI_DEBUG(2, (MDI_NOTE, dip, 5942 "client post_detach: called %p", (void *)ct)); 5943 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5944 MDI_DEBUG(4, (MDI_NOTE, dip, 5945 "i_mdi_pm_rele_client\n")); 5946 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5947 } else { 5948 MDI_DEBUG(4, (MDI_NOTE, dip, 5949 "i_mdi_pm_reset_client\n")); 5950 i_mdi_pm_reset_client(ct); 5951 } 5952 if (error != DDI_SUCCESS) 5953 MDI_CLIENT_SET_ATTACH(ct); 5954 break; 5955 5956 case DDI_SUSPEND: 5957 MDI_DEBUG(2, (MDI_NOTE, dip, 5958 "called %p", (void *)ct)); 5959 if (error != DDI_SUCCESS) 5960 MDI_CLIENT_SET_RESUME(ct); 5961 break; 5962 } 5963 MDI_CLIENT_UNLOCK(ct); 5964 } 5965 5966 int 5967 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5968 { 5969 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5970 } 5971 5972 /* 5973 * create and install per-path (client - pHCI) statistics 5974 * I/O stats supported: nread, nwritten, reads, and writes 5975 * Error stats - hard errors, soft errors, & transport errors 5976 */ 5977 int 5978 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5979 { 5980 kstat_t *kiosp, *kerrsp; 5981 struct pi_errs *nsp; 5982 struct mdi_pi_kstats *mdi_statp; 5983 5984 if (MDI_PI(pip)->pi_kstats != NULL) 5985 return (MDI_SUCCESS); 5986 5987 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5988 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5989 return (MDI_FAILURE); 5990 } 5991 5992 (void) strcat(ksname, ",err"); 5993 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5994 KSTAT_TYPE_NAMED, 5995 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5996 if (kerrsp == NULL) { 5997 kstat_delete(kiosp); 5998 return (MDI_FAILURE); 5999 } 6000 6001 nsp = (struct pi_errs *)kerrsp->ks_data; 6002 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6003 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6004 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6005 KSTAT_DATA_UINT32); 6006 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6007 KSTAT_DATA_UINT32); 6008 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6009 KSTAT_DATA_UINT32); 6010 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6011 KSTAT_DATA_UINT32); 6012 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6013 KSTAT_DATA_UINT32); 6014 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6015 KSTAT_DATA_UINT32); 6016 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6017 KSTAT_DATA_UINT32); 6018 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6019 6020 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6021 mdi_statp->pi_kstat_ref = 1; 6022 mdi_statp->pi_kstat_iostats = kiosp; 6023 mdi_statp->pi_kstat_errstats = kerrsp; 6024 kstat_install(kiosp); 6025 kstat_install(kerrsp); 6026 MDI_PI(pip)->pi_kstats = mdi_statp; 6027 return (MDI_SUCCESS); 6028 } 6029 6030 /* 6031 * destroy per-path properties 6032 */ 6033 static void 6034 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6035 { 6036 6037 struct mdi_pi_kstats *mdi_statp; 6038 6039 if (MDI_PI(pip)->pi_kstats == NULL) 6040 return; 6041 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6042 return; 6043 6044 MDI_PI(pip)->pi_kstats = NULL; 6045 6046 /* 6047 * the kstat may be shared between multiple pathinfo nodes 6048 * decrement this pathinfo's usage, removing the kstats 6049 * themselves when the last pathinfo reference is removed. 6050 */ 6051 ASSERT(mdi_statp->pi_kstat_ref > 0); 6052 if (--mdi_statp->pi_kstat_ref != 0) 6053 return; 6054 6055 kstat_delete(mdi_statp->pi_kstat_iostats); 6056 kstat_delete(mdi_statp->pi_kstat_errstats); 6057 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6058 } 6059 6060 /* 6061 * update I/O paths KSTATS 6062 */ 6063 void 6064 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6065 { 6066 kstat_t *iostatp; 6067 size_t xfer_cnt; 6068 6069 ASSERT(pip != NULL); 6070 6071 /* 6072 * I/O can be driven across a path prior to having path 6073 * statistics available, i.e. probe(9e). 6074 */ 6075 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6076 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6077 xfer_cnt = bp->b_bcount - bp->b_resid; 6078 if (bp->b_flags & B_READ) { 6079 KSTAT_IO_PTR(iostatp)->reads++; 6080 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6081 } else { 6082 KSTAT_IO_PTR(iostatp)->writes++; 6083 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6084 } 6085 } 6086 } 6087 6088 /* 6089 * Enable the path(specific client/target/initiator) 6090 * Enabling a path means that MPxIO may select the enabled path for routing 6091 * future I/O requests, subject to other path state constraints. 6092 */ 6093 int 6094 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6095 { 6096 mdi_phci_t *ph; 6097 6098 ph = MDI_PI(pip)->pi_phci; 6099 if (ph == NULL) { 6100 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6101 "!failed: path %s %p: NULL ph", 6102 mdi_pi_spathname(pip), (void *)pip)); 6103 return (MDI_FAILURE); 6104 } 6105 6106 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6107 MDI_ENABLE_OP); 6108 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6109 "!returning success pip = %p. ph = %p", 6110 (void *)pip, (void *)ph)); 6111 return (MDI_SUCCESS); 6112 6113 } 6114 6115 /* 6116 * Disable the path (specific client/target/initiator) 6117 * Disabling a path means that MPxIO will not select the disabled path for 6118 * routing any new I/O requests. 6119 */ 6120 int 6121 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6122 { 6123 mdi_phci_t *ph; 6124 6125 ph = MDI_PI(pip)->pi_phci; 6126 if (ph == NULL) { 6127 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6128 "!failed: path %s %p: NULL ph", 6129 mdi_pi_spathname(pip), (void *)pip)); 6130 return (MDI_FAILURE); 6131 } 6132 6133 (void) i_mdi_enable_disable_path(pip, 6134 ph->ph_vhci, flags, MDI_DISABLE_OP); 6135 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6136 "!returning success pip = %p. ph = %p", 6137 (void *)pip, (void *)ph)); 6138 return (MDI_SUCCESS); 6139 } 6140 6141 /* 6142 * disable the path to a particular pHCI (pHCI specified in the phci_path 6143 * argument) for a particular client (specified in the client_path argument). 6144 * Disabling a path means that MPxIO will not select the disabled path for 6145 * routing any new I/O requests. 6146 * NOTE: this will be removed once the NWS files are changed to use the new 6147 * mdi_{enable,disable}_path interfaces 6148 */ 6149 int 6150 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6151 { 6152 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6153 } 6154 6155 /* 6156 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6157 * argument) for a particular client (specified in the client_path argument). 6158 * Enabling a path means that MPxIO may select the enabled path for routing 6159 * future I/O requests, subject to other path state constraints. 6160 * NOTE: this will be removed once the NWS files are changed to use the new 6161 * mdi_{enable,disable}_path interfaces 6162 */ 6163 6164 int 6165 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6166 { 6167 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6168 } 6169 6170 /* 6171 * Common routine for doing enable/disable. 6172 */ 6173 static mdi_pathinfo_t * 6174 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6175 int op) 6176 { 6177 int sync_flag = 0; 6178 int rv; 6179 mdi_pathinfo_t *next; 6180 int (*f)() = NULL; 6181 6182 /* 6183 * Check to make sure the path is not already in the 6184 * requested state. If it is just return the next path 6185 * as we have nothing to do here. 6186 */ 6187 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6188 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6189 MDI_PI_LOCK(pip); 6190 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6191 MDI_PI_UNLOCK(pip); 6192 return (next); 6193 } 6194 6195 f = vh->vh_ops->vo_pi_state_change; 6196 6197 sync_flag = (flags << 8) & 0xf00; 6198 6199 /* 6200 * Do a callback into the mdi consumer to let it 6201 * know that path is about to get enabled/disabled. 6202 */ 6203 if (f != NULL) { 6204 rv = (*f)(vh->vh_dip, pip, 0, 6205 MDI_PI_EXT_STATE(pip), 6206 MDI_EXT_STATE_CHANGE | sync_flag | 6207 op | MDI_BEFORE_STATE_CHANGE); 6208 if (rv != MDI_SUCCESS) { 6209 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6210 "vo_pi_state_change: failed rv = %x", rv)); 6211 } 6212 } 6213 MDI_PI_LOCK(pip); 6214 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6215 6216 switch (flags) { 6217 case USER_DISABLE: 6218 if (op == MDI_DISABLE_OP) { 6219 MDI_PI_SET_USER_DISABLE(pip); 6220 } else { 6221 MDI_PI_SET_USER_ENABLE(pip); 6222 } 6223 break; 6224 case DRIVER_DISABLE: 6225 if (op == MDI_DISABLE_OP) { 6226 MDI_PI_SET_DRV_DISABLE(pip); 6227 } else { 6228 MDI_PI_SET_DRV_ENABLE(pip); 6229 } 6230 break; 6231 case DRIVER_DISABLE_TRANSIENT: 6232 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6233 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6234 } else { 6235 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6236 } 6237 break; 6238 } 6239 MDI_PI_UNLOCK(pip); 6240 /* 6241 * Do a callback into the mdi consumer to let it 6242 * know that path is now enabled/disabled. 6243 */ 6244 if (f != NULL) { 6245 rv = (*f)(vh->vh_dip, pip, 0, 6246 MDI_PI_EXT_STATE(pip), 6247 MDI_EXT_STATE_CHANGE | sync_flag | 6248 op | MDI_AFTER_STATE_CHANGE); 6249 if (rv != MDI_SUCCESS) { 6250 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6251 "vo_pi_state_change failed: rv = %x", rv)); 6252 } 6253 } 6254 return (next); 6255 } 6256 6257 /* 6258 * Common routine for doing enable/disable. 6259 * NOTE: this will be removed once the NWS files are changed to use the new 6260 * mdi_{enable,disable}_path has been putback 6261 */ 6262 int 6263 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6264 { 6265 6266 mdi_phci_t *ph; 6267 mdi_vhci_t *vh = NULL; 6268 mdi_client_t *ct; 6269 mdi_pathinfo_t *next, *pip; 6270 int found_it; 6271 6272 ph = i_devi_get_phci(pdip); 6273 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6274 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6275 (void *)cdip)); 6276 if (ph == NULL) { 6277 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6278 "!failed: operation %d: NULL ph", op)); 6279 return (MDI_FAILURE); 6280 } 6281 6282 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6283 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6284 "!failed: invalid operation %d", op)); 6285 return (MDI_FAILURE); 6286 } 6287 6288 vh = ph->ph_vhci; 6289 6290 if (cdip == NULL) { 6291 /* 6292 * Need to mark the Phci as enabled/disabled. 6293 */ 6294 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6295 "op %d for the phci", op)); 6296 MDI_PHCI_LOCK(ph); 6297 switch (flags) { 6298 case USER_DISABLE: 6299 if (op == MDI_DISABLE_OP) { 6300 MDI_PHCI_SET_USER_DISABLE(ph); 6301 } else { 6302 MDI_PHCI_SET_USER_ENABLE(ph); 6303 } 6304 break; 6305 case DRIVER_DISABLE: 6306 if (op == MDI_DISABLE_OP) { 6307 MDI_PHCI_SET_DRV_DISABLE(ph); 6308 } else { 6309 MDI_PHCI_SET_DRV_ENABLE(ph); 6310 } 6311 break; 6312 case DRIVER_DISABLE_TRANSIENT: 6313 if (op == MDI_DISABLE_OP) { 6314 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6315 } else { 6316 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6317 } 6318 break; 6319 default: 6320 MDI_PHCI_UNLOCK(ph); 6321 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6322 "!invalid flag argument= %d", flags)); 6323 } 6324 6325 /* 6326 * Phci has been disabled. Now try to enable/disable 6327 * path info's to each client. 6328 */ 6329 pip = ph->ph_path_head; 6330 while (pip != NULL) { 6331 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6332 } 6333 MDI_PHCI_UNLOCK(ph); 6334 } else { 6335 6336 /* 6337 * Disable a specific client. 6338 */ 6339 ct = i_devi_get_client(cdip); 6340 if (ct == NULL) { 6341 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6342 "!failed: operation = %d: NULL ct", op)); 6343 return (MDI_FAILURE); 6344 } 6345 6346 MDI_CLIENT_LOCK(ct); 6347 pip = ct->ct_path_head; 6348 found_it = 0; 6349 while (pip != NULL) { 6350 MDI_PI_LOCK(pip); 6351 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6352 if (MDI_PI(pip)->pi_phci == ph) { 6353 MDI_PI_UNLOCK(pip); 6354 found_it = 1; 6355 break; 6356 } 6357 MDI_PI_UNLOCK(pip); 6358 pip = next; 6359 } 6360 6361 6362 MDI_CLIENT_UNLOCK(ct); 6363 if (found_it == 0) { 6364 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6365 "!failed. Could not find corresponding pip\n")); 6366 return (MDI_FAILURE); 6367 } 6368 6369 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6370 } 6371 6372 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6373 "!op %d returning success pdip = %p cdip = %p", 6374 op, (void *)pdip, (void *)cdip)); 6375 return (MDI_SUCCESS); 6376 } 6377 6378 /* 6379 * Ensure phci powered up 6380 */ 6381 static void 6382 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6383 { 6384 dev_info_t *ph_dip; 6385 6386 ASSERT(pip != NULL); 6387 ASSERT(MDI_PI_LOCKED(pip)); 6388 6389 if (MDI_PI(pip)->pi_pm_held) { 6390 return; 6391 } 6392 6393 ph_dip = mdi_pi_get_phci(pip); 6394 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6395 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6396 if (ph_dip == NULL) { 6397 return; 6398 } 6399 6400 MDI_PI_UNLOCK(pip); 6401 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6402 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6403 pm_hold_power(ph_dip); 6404 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6405 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6406 MDI_PI_LOCK(pip); 6407 6408 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6409 if (DEVI(ph_dip)->devi_pm_info) 6410 MDI_PI(pip)->pi_pm_held = 1; 6411 } 6412 6413 /* 6414 * Allow phci powered down 6415 */ 6416 static void 6417 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6418 { 6419 dev_info_t *ph_dip = NULL; 6420 6421 ASSERT(pip != NULL); 6422 ASSERT(MDI_PI_LOCKED(pip)); 6423 6424 if (MDI_PI(pip)->pi_pm_held == 0) { 6425 return; 6426 } 6427 6428 ph_dip = mdi_pi_get_phci(pip); 6429 ASSERT(ph_dip != NULL); 6430 6431 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6432 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6433 6434 MDI_PI_UNLOCK(pip); 6435 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6436 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6437 pm_rele_power(ph_dip); 6438 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6439 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6440 MDI_PI_LOCK(pip); 6441 6442 MDI_PI(pip)->pi_pm_held = 0; 6443 } 6444 6445 static void 6446 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6447 { 6448 ASSERT(MDI_CLIENT_LOCKED(ct)); 6449 6450 ct->ct_power_cnt += incr; 6451 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6452 "%p ct_power_cnt = %d incr = %d", 6453 (void *)ct, ct->ct_power_cnt, incr)); 6454 ASSERT(ct->ct_power_cnt >= 0); 6455 } 6456 6457 static void 6458 i_mdi_rele_all_phci(mdi_client_t *ct) 6459 { 6460 mdi_pathinfo_t *pip; 6461 6462 ASSERT(MDI_CLIENT_LOCKED(ct)); 6463 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6464 while (pip != NULL) { 6465 mdi_hold_path(pip); 6466 MDI_PI_LOCK(pip); 6467 i_mdi_pm_rele_pip(pip); 6468 MDI_PI_UNLOCK(pip); 6469 mdi_rele_path(pip); 6470 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6471 } 6472 } 6473 6474 static void 6475 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6476 { 6477 ASSERT(MDI_CLIENT_LOCKED(ct)); 6478 6479 if (i_ddi_devi_attached(ct->ct_dip)) { 6480 ct->ct_power_cnt -= decr; 6481 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6482 "%p ct_power_cnt = %d decr = %d", 6483 (void *)ct, ct->ct_power_cnt, decr)); 6484 } 6485 6486 ASSERT(ct->ct_power_cnt >= 0); 6487 if (ct->ct_power_cnt == 0) { 6488 i_mdi_rele_all_phci(ct); 6489 return; 6490 } 6491 } 6492 6493 static void 6494 i_mdi_pm_reset_client(mdi_client_t *ct) 6495 { 6496 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6497 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6498 ASSERT(MDI_CLIENT_LOCKED(ct)); 6499 ct->ct_power_cnt = 0; 6500 i_mdi_rele_all_phci(ct); 6501 ct->ct_powercnt_config = 0; 6502 ct->ct_powercnt_unconfig = 0; 6503 ct->ct_powercnt_reset = 1; 6504 } 6505 6506 static int 6507 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6508 { 6509 int ret; 6510 dev_info_t *ph_dip; 6511 6512 MDI_PI_LOCK(pip); 6513 i_mdi_pm_hold_pip(pip); 6514 6515 ph_dip = mdi_pi_get_phci(pip); 6516 MDI_PI_UNLOCK(pip); 6517 6518 /* bring all components of phci to full power */ 6519 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6520 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6521 ddi_get_instance(ph_dip), (void *)pip)); 6522 6523 ret = pm_powerup(ph_dip); 6524 6525 if (ret == DDI_FAILURE) { 6526 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6527 "pm_powerup FAILED for %s%d %p", 6528 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6529 (void *)pip)); 6530 6531 MDI_PI_LOCK(pip); 6532 i_mdi_pm_rele_pip(pip); 6533 MDI_PI_UNLOCK(pip); 6534 return (MDI_FAILURE); 6535 } 6536 6537 return (MDI_SUCCESS); 6538 } 6539 6540 static int 6541 i_mdi_power_all_phci(mdi_client_t *ct) 6542 { 6543 mdi_pathinfo_t *pip; 6544 int succeeded = 0; 6545 6546 ASSERT(MDI_CLIENT_LOCKED(ct)); 6547 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6548 while (pip != NULL) { 6549 /* 6550 * Don't power if MDI_PATHINFO_STATE_FAULT 6551 * or MDI_PATHINFO_STATE_OFFLINE. 6552 */ 6553 if (MDI_PI_IS_INIT(pip) || 6554 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6555 mdi_hold_path(pip); 6556 MDI_CLIENT_UNLOCK(ct); 6557 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6558 succeeded = 1; 6559 6560 ASSERT(ct == MDI_PI(pip)->pi_client); 6561 MDI_CLIENT_LOCK(ct); 6562 mdi_rele_path(pip); 6563 } 6564 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6565 } 6566 6567 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6568 } 6569 6570 /* 6571 * mdi_bus_power(): 6572 * 1. Place the phci(s) into powered up state so that 6573 * client can do power management 6574 * 2. Ensure phci powered up as client power managing 6575 * Return Values: 6576 * MDI_SUCCESS 6577 * MDI_FAILURE 6578 */ 6579 int 6580 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6581 void *arg, void *result) 6582 { 6583 int ret = MDI_SUCCESS; 6584 pm_bp_child_pwrchg_t *bpc; 6585 mdi_client_t *ct; 6586 dev_info_t *cdip; 6587 pm_bp_has_changed_t *bphc; 6588 6589 /* 6590 * BUS_POWER_NOINVOL not supported 6591 */ 6592 if (op == BUS_POWER_NOINVOL) 6593 return (MDI_FAILURE); 6594 6595 /* 6596 * ignore other OPs. 6597 * return quickly to save cou cycles on the ct processing 6598 */ 6599 switch (op) { 6600 case BUS_POWER_PRE_NOTIFICATION: 6601 case BUS_POWER_POST_NOTIFICATION: 6602 bpc = (pm_bp_child_pwrchg_t *)arg; 6603 cdip = bpc->bpc_dip; 6604 break; 6605 case BUS_POWER_HAS_CHANGED: 6606 bphc = (pm_bp_has_changed_t *)arg; 6607 cdip = bphc->bphc_dip; 6608 break; 6609 default: 6610 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6611 } 6612 6613 ASSERT(MDI_CLIENT(cdip)); 6614 6615 ct = i_devi_get_client(cdip); 6616 if (ct == NULL) 6617 return (MDI_FAILURE); 6618 6619 /* 6620 * wait till the mdi_pathinfo node state change are processed 6621 */ 6622 MDI_CLIENT_LOCK(ct); 6623 switch (op) { 6624 case BUS_POWER_PRE_NOTIFICATION: 6625 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6626 "BUS_POWER_PRE_NOTIFICATION:" 6627 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6628 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6629 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6630 6631 /* serialize power level change per client */ 6632 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6633 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6634 6635 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6636 6637 if (ct->ct_power_cnt == 0) { 6638 ret = i_mdi_power_all_phci(ct); 6639 } 6640 6641 /* 6642 * if new_level > 0: 6643 * - hold phci(s) 6644 * - power up phci(s) if not already 6645 * ignore power down 6646 */ 6647 if (bpc->bpc_nlevel > 0) { 6648 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6649 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6650 "i_mdi_pm_hold_client\n")); 6651 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6652 } 6653 } 6654 break; 6655 case BUS_POWER_POST_NOTIFICATION: 6656 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6657 "BUS_POWER_POST_NOTIFICATION:" 6658 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6659 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6660 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6661 *(int *)result)); 6662 6663 if (*(int *)result == DDI_SUCCESS) { 6664 if (bpc->bpc_nlevel > 0) { 6665 MDI_CLIENT_SET_POWER_UP(ct); 6666 } else { 6667 MDI_CLIENT_SET_POWER_DOWN(ct); 6668 } 6669 } 6670 6671 /* release the hold we did in pre-notification */ 6672 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6673 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6674 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6675 "i_mdi_pm_rele_client\n")); 6676 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6677 } 6678 6679 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6680 /* another thread might started attaching */ 6681 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6682 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6683 "i_mdi_pm_rele_client\n")); 6684 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6685 /* detaching has been taken care in pm_post_unconfig */ 6686 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6687 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6688 "i_mdi_pm_reset_client\n")); 6689 i_mdi_pm_reset_client(ct); 6690 } 6691 } 6692 6693 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6694 cv_broadcast(&ct->ct_powerchange_cv); 6695 6696 break; 6697 6698 /* need to do more */ 6699 case BUS_POWER_HAS_CHANGED: 6700 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6701 "BUS_POWER_HAS_CHANGED:" 6702 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6703 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6704 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6705 6706 if (bphc->bphc_nlevel > 0 && 6707 bphc->bphc_nlevel > bphc->bphc_olevel) { 6708 if (ct->ct_power_cnt == 0) { 6709 ret = i_mdi_power_all_phci(ct); 6710 } 6711 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6712 "i_mdi_pm_hold_client\n")); 6713 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6714 } 6715 6716 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6717 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6718 "i_mdi_pm_rele_client\n")); 6719 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6720 } 6721 break; 6722 } 6723 6724 MDI_CLIENT_UNLOCK(ct); 6725 return (ret); 6726 } 6727 6728 static int 6729 i_mdi_pm_pre_config_one(dev_info_t *child) 6730 { 6731 int ret = MDI_SUCCESS; 6732 mdi_client_t *ct; 6733 6734 ct = i_devi_get_client(child); 6735 if (ct == NULL) 6736 return (MDI_FAILURE); 6737 6738 MDI_CLIENT_LOCK(ct); 6739 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6740 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6741 6742 if (!MDI_CLIENT_IS_FAILED(ct)) { 6743 MDI_CLIENT_UNLOCK(ct); 6744 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6745 return (MDI_SUCCESS); 6746 } 6747 6748 if (ct->ct_powercnt_config) { 6749 MDI_CLIENT_UNLOCK(ct); 6750 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6751 return (MDI_SUCCESS); 6752 } 6753 6754 if (ct->ct_power_cnt == 0) { 6755 ret = i_mdi_power_all_phci(ct); 6756 } 6757 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6758 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6759 ct->ct_powercnt_config = 1; 6760 ct->ct_powercnt_reset = 0; 6761 MDI_CLIENT_UNLOCK(ct); 6762 return (ret); 6763 } 6764 6765 static int 6766 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6767 { 6768 int ret = MDI_SUCCESS; 6769 dev_info_t *cdip; 6770 int circ; 6771 6772 ASSERT(MDI_VHCI(vdip)); 6773 6774 /* ndi_devi_config_one */ 6775 if (child) { 6776 ASSERT(DEVI_BUSY_OWNED(vdip)); 6777 return (i_mdi_pm_pre_config_one(child)); 6778 } 6779 6780 /* devi_config_common */ 6781 ndi_devi_enter(vdip, &circ); 6782 cdip = ddi_get_child(vdip); 6783 while (cdip) { 6784 dev_info_t *next = ddi_get_next_sibling(cdip); 6785 6786 ret = i_mdi_pm_pre_config_one(cdip); 6787 if (ret != MDI_SUCCESS) 6788 break; 6789 cdip = next; 6790 } 6791 ndi_devi_exit(vdip, circ); 6792 return (ret); 6793 } 6794 6795 static int 6796 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6797 { 6798 int ret = MDI_SUCCESS; 6799 mdi_client_t *ct; 6800 6801 ct = i_devi_get_client(child); 6802 if (ct == NULL) 6803 return (MDI_FAILURE); 6804 6805 MDI_CLIENT_LOCK(ct); 6806 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6807 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6808 6809 if (!i_ddi_devi_attached(ct->ct_dip)) { 6810 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6811 MDI_CLIENT_UNLOCK(ct); 6812 return (MDI_SUCCESS); 6813 } 6814 6815 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6816 (flags & NDI_AUTODETACH)) { 6817 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6818 MDI_CLIENT_UNLOCK(ct); 6819 return (MDI_FAILURE); 6820 } 6821 6822 if (ct->ct_powercnt_unconfig) { 6823 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6824 MDI_CLIENT_UNLOCK(ct); 6825 *held = 1; 6826 return (MDI_SUCCESS); 6827 } 6828 6829 if (ct->ct_power_cnt == 0) { 6830 ret = i_mdi_power_all_phci(ct); 6831 } 6832 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6833 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6834 ct->ct_powercnt_unconfig = 1; 6835 ct->ct_powercnt_reset = 0; 6836 MDI_CLIENT_UNLOCK(ct); 6837 if (ret == MDI_SUCCESS) 6838 *held = 1; 6839 return (ret); 6840 } 6841 6842 static int 6843 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6844 int flags) 6845 { 6846 int ret = MDI_SUCCESS; 6847 dev_info_t *cdip; 6848 int circ; 6849 6850 ASSERT(MDI_VHCI(vdip)); 6851 *held = 0; 6852 6853 /* ndi_devi_unconfig_one */ 6854 if (child) { 6855 ASSERT(DEVI_BUSY_OWNED(vdip)); 6856 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6857 } 6858 6859 /* devi_unconfig_common */ 6860 ndi_devi_enter(vdip, &circ); 6861 cdip = ddi_get_child(vdip); 6862 while (cdip) { 6863 dev_info_t *next = ddi_get_next_sibling(cdip); 6864 6865 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6866 cdip = next; 6867 } 6868 ndi_devi_exit(vdip, circ); 6869 6870 if (*held) 6871 ret = MDI_SUCCESS; 6872 6873 return (ret); 6874 } 6875 6876 static void 6877 i_mdi_pm_post_config_one(dev_info_t *child) 6878 { 6879 mdi_client_t *ct; 6880 6881 ct = i_devi_get_client(child); 6882 if (ct == NULL) 6883 return; 6884 6885 MDI_CLIENT_LOCK(ct); 6886 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6887 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6888 6889 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6890 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6891 MDI_CLIENT_UNLOCK(ct); 6892 return; 6893 } 6894 6895 /* client has not been updated */ 6896 if (MDI_CLIENT_IS_FAILED(ct)) { 6897 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 6898 MDI_CLIENT_UNLOCK(ct); 6899 return; 6900 } 6901 6902 /* another thread might have powered it down or detached it */ 6903 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6904 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6905 (!i_ddi_devi_attached(ct->ct_dip) && 6906 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6907 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6908 i_mdi_pm_reset_client(ct); 6909 } else { 6910 mdi_pathinfo_t *pip, *next; 6911 int valid_path_count = 0; 6912 6913 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6914 pip = ct->ct_path_head; 6915 while (pip != NULL) { 6916 MDI_PI_LOCK(pip); 6917 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6918 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6919 valid_path_count ++; 6920 MDI_PI_UNLOCK(pip); 6921 pip = next; 6922 } 6923 i_mdi_pm_rele_client(ct, valid_path_count); 6924 } 6925 ct->ct_powercnt_config = 0; 6926 MDI_CLIENT_UNLOCK(ct); 6927 } 6928 6929 static void 6930 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6931 { 6932 int circ; 6933 dev_info_t *cdip; 6934 6935 ASSERT(MDI_VHCI(vdip)); 6936 6937 /* ndi_devi_config_one */ 6938 if (child) { 6939 ASSERT(DEVI_BUSY_OWNED(vdip)); 6940 i_mdi_pm_post_config_one(child); 6941 return; 6942 } 6943 6944 /* devi_config_common */ 6945 ndi_devi_enter(vdip, &circ); 6946 cdip = ddi_get_child(vdip); 6947 while (cdip) { 6948 dev_info_t *next = ddi_get_next_sibling(cdip); 6949 6950 i_mdi_pm_post_config_one(cdip); 6951 cdip = next; 6952 } 6953 ndi_devi_exit(vdip, circ); 6954 } 6955 6956 static void 6957 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6958 { 6959 mdi_client_t *ct; 6960 6961 ct = i_devi_get_client(child); 6962 if (ct == NULL) 6963 return; 6964 6965 MDI_CLIENT_LOCK(ct); 6966 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6967 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6968 6969 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6970 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 6971 MDI_CLIENT_UNLOCK(ct); 6972 return; 6973 } 6974 6975 /* failure detaching or another thread just attached it */ 6976 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6977 i_ddi_devi_attached(ct->ct_dip)) || 6978 (!i_ddi_devi_attached(ct->ct_dip) && 6979 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6980 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6981 i_mdi_pm_reset_client(ct); 6982 } else { 6983 mdi_pathinfo_t *pip, *next; 6984 int valid_path_count = 0; 6985 6986 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6987 pip = ct->ct_path_head; 6988 while (pip != NULL) { 6989 MDI_PI_LOCK(pip); 6990 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6991 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6992 valid_path_count ++; 6993 MDI_PI_UNLOCK(pip); 6994 pip = next; 6995 } 6996 i_mdi_pm_rele_client(ct, valid_path_count); 6997 ct->ct_powercnt_unconfig = 0; 6998 } 6999 7000 MDI_CLIENT_UNLOCK(ct); 7001 } 7002 7003 static void 7004 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7005 { 7006 int circ; 7007 dev_info_t *cdip; 7008 7009 ASSERT(MDI_VHCI(vdip)); 7010 7011 if (!held) { 7012 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7013 return; 7014 } 7015 7016 if (child) { 7017 ASSERT(DEVI_BUSY_OWNED(vdip)); 7018 i_mdi_pm_post_unconfig_one(child); 7019 return; 7020 } 7021 7022 ndi_devi_enter(vdip, &circ); 7023 cdip = ddi_get_child(vdip); 7024 while (cdip) { 7025 dev_info_t *next = ddi_get_next_sibling(cdip); 7026 7027 i_mdi_pm_post_unconfig_one(cdip); 7028 cdip = next; 7029 } 7030 ndi_devi_exit(vdip, circ); 7031 } 7032 7033 int 7034 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7035 { 7036 int circ, ret = MDI_SUCCESS; 7037 dev_info_t *client_dip = NULL; 7038 mdi_client_t *ct; 7039 7040 /* 7041 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7042 * Power up pHCI for the named client device. 7043 * Note: Before the client is enumerated under vhci by phci, 7044 * client_dip can be NULL. Then proceed to power up all the 7045 * pHCIs. 7046 */ 7047 if (devnm != NULL) { 7048 ndi_devi_enter(vdip, &circ); 7049 client_dip = ndi_devi_findchild(vdip, devnm); 7050 } 7051 7052 MDI_DEBUG(4, (MDI_NOTE, vdip, 7053 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7054 7055 switch (op) { 7056 case MDI_PM_PRE_CONFIG: 7057 ret = i_mdi_pm_pre_config(vdip, client_dip); 7058 break; 7059 7060 case MDI_PM_PRE_UNCONFIG: 7061 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7062 flags); 7063 break; 7064 7065 case MDI_PM_POST_CONFIG: 7066 i_mdi_pm_post_config(vdip, client_dip); 7067 break; 7068 7069 case MDI_PM_POST_UNCONFIG: 7070 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7071 break; 7072 7073 case MDI_PM_HOLD_POWER: 7074 case MDI_PM_RELE_POWER: 7075 ASSERT(args); 7076 7077 client_dip = (dev_info_t *)args; 7078 ASSERT(MDI_CLIENT(client_dip)); 7079 7080 ct = i_devi_get_client(client_dip); 7081 MDI_CLIENT_LOCK(ct); 7082 7083 if (op == MDI_PM_HOLD_POWER) { 7084 if (ct->ct_power_cnt == 0) { 7085 (void) i_mdi_power_all_phci(ct); 7086 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7087 "i_mdi_pm_hold_client\n")); 7088 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7089 } 7090 } else { 7091 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 7092 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7093 "i_mdi_pm_rele_client\n")); 7094 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7095 } else { 7096 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7097 "i_mdi_pm_reset_client\n")); 7098 i_mdi_pm_reset_client(ct); 7099 } 7100 } 7101 7102 MDI_CLIENT_UNLOCK(ct); 7103 break; 7104 7105 default: 7106 break; 7107 } 7108 7109 if (devnm) 7110 ndi_devi_exit(vdip, circ); 7111 7112 return (ret); 7113 } 7114 7115 int 7116 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7117 { 7118 mdi_vhci_t *vhci; 7119 7120 if (!MDI_VHCI(dip)) 7121 return (MDI_FAILURE); 7122 7123 if (mdi_class) { 7124 vhci = DEVI(dip)->devi_mdi_xhci; 7125 ASSERT(vhci); 7126 *mdi_class = vhci->vh_class; 7127 } 7128 7129 return (MDI_SUCCESS); 7130 } 7131 7132 int 7133 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7134 { 7135 mdi_phci_t *phci; 7136 7137 if (!MDI_PHCI(dip)) 7138 return (MDI_FAILURE); 7139 7140 if (mdi_class) { 7141 phci = DEVI(dip)->devi_mdi_xhci; 7142 ASSERT(phci); 7143 *mdi_class = phci->ph_vhci->vh_class; 7144 } 7145 7146 return (MDI_SUCCESS); 7147 } 7148 7149 int 7150 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7151 { 7152 mdi_client_t *client; 7153 7154 if (!MDI_CLIENT(dip)) 7155 return (MDI_FAILURE); 7156 7157 if (mdi_class) { 7158 client = DEVI(dip)->devi_mdi_client; 7159 ASSERT(client); 7160 *mdi_class = client->ct_vhci->vh_class; 7161 } 7162 7163 return (MDI_SUCCESS); 7164 } 7165 7166 void * 7167 mdi_client_get_vhci_private(dev_info_t *dip) 7168 { 7169 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7170 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7171 mdi_client_t *ct; 7172 ct = i_devi_get_client(dip); 7173 return (ct->ct_vprivate); 7174 } 7175 return (NULL); 7176 } 7177 7178 void 7179 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7180 { 7181 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7182 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7183 mdi_client_t *ct; 7184 ct = i_devi_get_client(dip); 7185 ct->ct_vprivate = data; 7186 } 7187 } 7188 /* 7189 * mdi_pi_get_vhci_private(): 7190 * Get the vhci private information associated with the 7191 * mdi_pathinfo node 7192 */ 7193 void * 7194 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7195 { 7196 caddr_t vprivate = NULL; 7197 if (pip) { 7198 vprivate = MDI_PI(pip)->pi_vprivate; 7199 } 7200 return (vprivate); 7201 } 7202 7203 /* 7204 * mdi_pi_set_vhci_private(): 7205 * Set the vhci private information in the mdi_pathinfo node 7206 */ 7207 void 7208 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7209 { 7210 if (pip) { 7211 MDI_PI(pip)->pi_vprivate = priv; 7212 } 7213 } 7214 7215 /* 7216 * mdi_phci_get_vhci_private(): 7217 * Get the vhci private information associated with the 7218 * mdi_phci node 7219 */ 7220 void * 7221 mdi_phci_get_vhci_private(dev_info_t *dip) 7222 { 7223 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7224 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7225 mdi_phci_t *ph; 7226 ph = i_devi_get_phci(dip); 7227 return (ph->ph_vprivate); 7228 } 7229 return (NULL); 7230 } 7231 7232 /* 7233 * mdi_phci_set_vhci_private(): 7234 * Set the vhci private information in the mdi_phci node 7235 */ 7236 void 7237 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7238 { 7239 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7240 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7241 mdi_phci_t *ph; 7242 ph = i_devi_get_phci(dip); 7243 ph->ph_vprivate = priv; 7244 } 7245 } 7246 7247 int 7248 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7249 { 7250 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7251 } 7252 7253 int 7254 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7255 { 7256 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7257 } 7258 7259 /* 7260 * When processing hotplug, if mdi_pi_offline-mdi_pi_free fails then this 7261 * interface is used to represent device removal. 7262 */ 7263 int 7264 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7265 { 7266 MDI_PI_LOCK(pip); 7267 if (mdi_pi_device_isremoved(pip)) { 7268 MDI_PI_UNLOCK(pip); 7269 return (0); 7270 } 7271 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7272 MDI_PI_FLAGS_SET_HIDDEN(pip); 7273 MDI_PI_UNLOCK(pip); 7274 7275 i_ddi_di_cache_invalidate(); 7276 7277 return (1); 7278 } 7279 7280 /* 7281 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7282 * is now accessible then this interfaces is used to represent device insertion. 7283 */ 7284 int 7285 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7286 { 7287 MDI_PI_LOCK(pip); 7288 if (!mdi_pi_device_isremoved(pip)) { 7289 MDI_PI_UNLOCK(pip); 7290 return (0); 7291 } 7292 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7293 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7294 MDI_PI_UNLOCK(pip); 7295 7296 i_ddi_di_cache_invalidate(); 7297 7298 return (1); 7299 } 7300 7301 /* 7302 * List of vhci class names: 7303 * A vhci class name must be in this list only if the corresponding vhci 7304 * driver intends to use the mdi provided bus config implementation 7305 * (i.e., mdi_vhci_bus_config()). 7306 */ 7307 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7308 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7309 7310 /* 7311 * During boot time, the on-disk vhci cache for every vhci class is read 7312 * in the form of an nvlist and stored here. 7313 */ 7314 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7315 7316 /* nvpair names in vhci cache nvlist */ 7317 #define MDI_VHCI_CACHE_VERSION 1 7318 #define MDI_NVPNAME_VERSION "version" 7319 #define MDI_NVPNAME_PHCIS "phcis" 7320 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7321 7322 /* 7323 * Given vhci class name, return its on-disk vhci cache filename. 7324 * Memory for the returned filename which includes the full path is allocated 7325 * by this function. 7326 */ 7327 static char * 7328 vhclass2vhcache_filename(char *vhclass) 7329 { 7330 char *filename; 7331 int len; 7332 static char *fmt = "/etc/devices/mdi_%s_cache"; 7333 7334 /* 7335 * fmt contains the on-disk vhci cache file name format; 7336 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7337 */ 7338 7339 /* the -1 below is to account for "%s" in the format string */ 7340 len = strlen(fmt) + strlen(vhclass) - 1; 7341 filename = kmem_alloc(len, KM_SLEEP); 7342 (void) snprintf(filename, len, fmt, vhclass); 7343 ASSERT(len == (strlen(filename) + 1)); 7344 return (filename); 7345 } 7346 7347 /* 7348 * initialize the vhci cache related data structures and read the on-disk 7349 * vhci cached data into memory. 7350 */ 7351 static void 7352 setup_vhci_cache(mdi_vhci_t *vh) 7353 { 7354 mdi_vhci_config_t *vhc; 7355 mdi_vhci_cache_t *vhcache; 7356 int i; 7357 nvlist_t *nvl = NULL; 7358 7359 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7360 vh->vh_config = vhc; 7361 vhcache = &vhc->vhc_vhcache; 7362 7363 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7364 7365 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7366 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7367 7368 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7369 7370 /* 7371 * Create string hash; same as mod_hash_create_strhash() except that 7372 * we use NULL key destructor. 7373 */ 7374 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7375 mdi_bus_config_cache_hash_size, 7376 mod_hash_null_keydtor, mod_hash_null_valdtor, 7377 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7378 7379 /* 7380 * The on-disk vhci cache is read during booting prior to the 7381 * lights-out period by mdi_read_devices_files(). 7382 */ 7383 for (i = 0; i < N_VHCI_CLASSES; i++) { 7384 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7385 nvl = vhcache_nvl[i]; 7386 vhcache_nvl[i] = NULL; 7387 break; 7388 } 7389 } 7390 7391 /* 7392 * this is to cover the case of some one manually causing unloading 7393 * (or detaching) and reloading (or attaching) of a vhci driver. 7394 */ 7395 if (nvl == NULL && modrootloaded) 7396 nvl = read_on_disk_vhci_cache(vh->vh_class); 7397 7398 if (nvl != NULL) { 7399 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7400 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7401 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7402 else { 7403 cmn_err(CE_WARN, 7404 "%s: data file corrupted, will recreate", 7405 vhc->vhc_vhcache_filename); 7406 } 7407 rw_exit(&vhcache->vhcache_lock); 7408 nvlist_free(nvl); 7409 } 7410 7411 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7412 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7413 7414 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7415 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7416 } 7417 7418 /* 7419 * free all vhci cache related resources 7420 */ 7421 static int 7422 destroy_vhci_cache(mdi_vhci_t *vh) 7423 { 7424 mdi_vhci_config_t *vhc = vh->vh_config; 7425 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7426 mdi_vhcache_phci_t *cphci, *cphci_next; 7427 mdi_vhcache_client_t *cct, *cct_next; 7428 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7429 7430 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7431 return (MDI_FAILURE); 7432 7433 kmem_free(vhc->vhc_vhcache_filename, 7434 strlen(vhc->vhc_vhcache_filename) + 1); 7435 7436 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7437 7438 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7439 cphci = cphci_next) { 7440 cphci_next = cphci->cphci_next; 7441 free_vhcache_phci(cphci); 7442 } 7443 7444 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7445 cct_next = cct->cct_next; 7446 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7447 cpi_next = cpi->cpi_next; 7448 free_vhcache_pathinfo(cpi); 7449 } 7450 free_vhcache_client(cct); 7451 } 7452 7453 rw_destroy(&vhcache->vhcache_lock); 7454 7455 mutex_destroy(&vhc->vhc_lock); 7456 cv_destroy(&vhc->vhc_cv); 7457 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7458 return (MDI_SUCCESS); 7459 } 7460 7461 /* 7462 * Stop all vhci cache related async threads and free their resources. 7463 */ 7464 static int 7465 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7466 { 7467 mdi_async_client_config_t *acc, *acc_next; 7468 7469 mutex_enter(&vhc->vhc_lock); 7470 vhc->vhc_flags |= MDI_VHC_EXIT; 7471 ASSERT(vhc->vhc_acc_thrcount >= 0); 7472 cv_broadcast(&vhc->vhc_cv); 7473 7474 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7475 vhc->vhc_acc_thrcount != 0) { 7476 mutex_exit(&vhc->vhc_lock); 7477 delay_random(5); 7478 mutex_enter(&vhc->vhc_lock); 7479 } 7480 7481 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7482 7483 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7484 acc_next = acc->acc_next; 7485 free_async_client_config(acc); 7486 } 7487 vhc->vhc_acc_list_head = NULL; 7488 vhc->vhc_acc_list_tail = NULL; 7489 vhc->vhc_acc_count = 0; 7490 7491 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7492 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7493 mutex_exit(&vhc->vhc_lock); 7494 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7495 vhcache_dirty(vhc); 7496 return (MDI_FAILURE); 7497 } 7498 } else 7499 mutex_exit(&vhc->vhc_lock); 7500 7501 if (callb_delete(vhc->vhc_cbid) != 0) 7502 return (MDI_FAILURE); 7503 7504 return (MDI_SUCCESS); 7505 } 7506 7507 /* 7508 * Stop vhci cache flush thread 7509 */ 7510 /* ARGSUSED */ 7511 static boolean_t 7512 stop_vhcache_flush_thread(void *arg, int code) 7513 { 7514 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7515 7516 mutex_enter(&vhc->vhc_lock); 7517 vhc->vhc_flags |= MDI_VHC_EXIT; 7518 cv_broadcast(&vhc->vhc_cv); 7519 7520 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7521 mutex_exit(&vhc->vhc_lock); 7522 delay_random(5); 7523 mutex_enter(&vhc->vhc_lock); 7524 } 7525 7526 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7527 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7528 mutex_exit(&vhc->vhc_lock); 7529 (void) flush_vhcache(vhc, 1); 7530 } else 7531 mutex_exit(&vhc->vhc_lock); 7532 7533 return (B_TRUE); 7534 } 7535 7536 /* 7537 * Enqueue the vhcache phci (cphci) at the tail of the list 7538 */ 7539 static void 7540 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7541 { 7542 cphci->cphci_next = NULL; 7543 if (vhcache->vhcache_phci_head == NULL) 7544 vhcache->vhcache_phci_head = cphci; 7545 else 7546 vhcache->vhcache_phci_tail->cphci_next = cphci; 7547 vhcache->vhcache_phci_tail = cphci; 7548 } 7549 7550 /* 7551 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7552 */ 7553 static void 7554 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7555 mdi_vhcache_pathinfo_t *cpi) 7556 { 7557 cpi->cpi_next = NULL; 7558 if (cct->cct_cpi_head == NULL) 7559 cct->cct_cpi_head = cpi; 7560 else 7561 cct->cct_cpi_tail->cpi_next = cpi; 7562 cct->cct_cpi_tail = cpi; 7563 } 7564 7565 /* 7566 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7567 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7568 * flag set come at the beginning of the list. All cpis which have this 7569 * flag set come at the end of the list. 7570 */ 7571 static void 7572 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7573 mdi_vhcache_pathinfo_t *newcpi) 7574 { 7575 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7576 7577 if (cct->cct_cpi_head == NULL || 7578 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7579 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7580 else { 7581 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7582 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7583 prev_cpi = cpi, cpi = cpi->cpi_next) 7584 ; 7585 7586 if (prev_cpi == NULL) 7587 cct->cct_cpi_head = newcpi; 7588 else 7589 prev_cpi->cpi_next = newcpi; 7590 7591 newcpi->cpi_next = cpi; 7592 7593 if (cpi == NULL) 7594 cct->cct_cpi_tail = newcpi; 7595 } 7596 } 7597 7598 /* 7599 * Enqueue the vhcache client (cct) at the tail of the list 7600 */ 7601 static void 7602 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7603 mdi_vhcache_client_t *cct) 7604 { 7605 cct->cct_next = NULL; 7606 if (vhcache->vhcache_client_head == NULL) 7607 vhcache->vhcache_client_head = cct; 7608 else 7609 vhcache->vhcache_client_tail->cct_next = cct; 7610 vhcache->vhcache_client_tail = cct; 7611 } 7612 7613 static void 7614 free_string_array(char **str, int nelem) 7615 { 7616 int i; 7617 7618 if (str) { 7619 for (i = 0; i < nelem; i++) { 7620 if (str[i]) 7621 kmem_free(str[i], strlen(str[i]) + 1); 7622 } 7623 kmem_free(str, sizeof (char *) * nelem); 7624 } 7625 } 7626 7627 static void 7628 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7629 { 7630 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7631 kmem_free(cphci, sizeof (*cphci)); 7632 } 7633 7634 static void 7635 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7636 { 7637 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7638 kmem_free(cpi, sizeof (*cpi)); 7639 } 7640 7641 static void 7642 free_vhcache_client(mdi_vhcache_client_t *cct) 7643 { 7644 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7645 kmem_free(cct, sizeof (*cct)); 7646 } 7647 7648 static char * 7649 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7650 { 7651 char *name_addr; 7652 int len; 7653 7654 len = strlen(ct_name) + strlen(ct_addr) + 2; 7655 name_addr = kmem_alloc(len, KM_SLEEP); 7656 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7657 7658 if (ret_len) 7659 *ret_len = len; 7660 return (name_addr); 7661 } 7662 7663 /* 7664 * Copy the contents of paddrnvl to vhci cache. 7665 * paddrnvl nvlist contains path information for a vhci client. 7666 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7667 */ 7668 static void 7669 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7670 mdi_vhcache_client_t *cct) 7671 { 7672 nvpair_t *nvp = NULL; 7673 mdi_vhcache_pathinfo_t *cpi; 7674 uint_t nelem; 7675 uint32_t *val; 7676 7677 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7678 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7679 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7680 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7681 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7682 ASSERT(nelem == 2); 7683 cpi->cpi_cphci = cphci_list[val[0]]; 7684 cpi->cpi_flags = val[1]; 7685 enqueue_tail_vhcache_pathinfo(cct, cpi); 7686 } 7687 } 7688 7689 /* 7690 * Copy the contents of caddrmapnvl to vhci cache. 7691 * caddrmapnvl nvlist contains vhci client address to phci client address 7692 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7693 * this nvlist. 7694 */ 7695 static void 7696 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7697 mdi_vhcache_phci_t *cphci_list[]) 7698 { 7699 nvpair_t *nvp = NULL; 7700 nvlist_t *paddrnvl; 7701 mdi_vhcache_client_t *cct; 7702 7703 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7704 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7705 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7706 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7707 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7708 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7709 /* the client must contain at least one path */ 7710 ASSERT(cct->cct_cpi_head != NULL); 7711 7712 enqueue_vhcache_client(vhcache, cct); 7713 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7714 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7715 } 7716 } 7717 7718 /* 7719 * Copy the contents of the main nvlist to vhci cache. 7720 * 7721 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7722 * The nvlist contains the mappings between the vhci client addresses and 7723 * their corresponding phci client addresses. 7724 * 7725 * The structure of the nvlist is as follows: 7726 * 7727 * Main nvlist: 7728 * NAME TYPE DATA 7729 * version int32 version number 7730 * phcis string array array of phci paths 7731 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7732 * 7733 * structure of c2paddrs_nvl: 7734 * NAME TYPE DATA 7735 * caddr1 nvlist_t paddrs_nvl1 7736 * caddr2 nvlist_t paddrs_nvl2 7737 * ... 7738 * where caddr1, caddr2, ... are vhci client name and addresses in the 7739 * form of "<clientname>@<clientaddress>". 7740 * (for example: "ssd@2000002037cd9f72"); 7741 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7742 * 7743 * structure of paddrs_nvl: 7744 * NAME TYPE DATA 7745 * pi_addr1 uint32_array (phci-id, cpi_flags) 7746 * pi_addr2 uint32_array (phci-id, cpi_flags) 7747 * ... 7748 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7749 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7750 * phci-ids are integers that identify pHCIs to which the 7751 * the bus specific address belongs to. These integers are used as an index 7752 * into to the phcis string array in the main nvlist to get the pHCI path. 7753 */ 7754 static int 7755 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7756 { 7757 char **phcis, **phci_namep; 7758 uint_t nphcis; 7759 mdi_vhcache_phci_t *cphci, **cphci_list; 7760 nvlist_t *caddrmapnvl; 7761 int32_t ver; 7762 int i; 7763 size_t cphci_list_size; 7764 7765 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7766 7767 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7768 ver != MDI_VHCI_CACHE_VERSION) 7769 return (MDI_FAILURE); 7770 7771 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7772 &nphcis) != 0) 7773 return (MDI_SUCCESS); 7774 7775 ASSERT(nphcis > 0); 7776 7777 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7778 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7779 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7780 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7781 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7782 enqueue_vhcache_phci(vhcache, cphci); 7783 cphci_list[i] = cphci; 7784 } 7785 7786 ASSERT(vhcache->vhcache_phci_head != NULL); 7787 7788 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7789 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7790 7791 kmem_free(cphci_list, cphci_list_size); 7792 return (MDI_SUCCESS); 7793 } 7794 7795 /* 7796 * Build paddrnvl for the specified client using the information in the 7797 * vhci cache and add it to the caddrmapnnvl. 7798 * Returns 0 on success, errno on failure. 7799 */ 7800 static int 7801 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7802 nvlist_t *caddrmapnvl) 7803 { 7804 mdi_vhcache_pathinfo_t *cpi; 7805 nvlist_t *nvl; 7806 int err; 7807 uint32_t val[2]; 7808 7809 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7810 7811 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7812 return (err); 7813 7814 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7815 val[0] = cpi->cpi_cphci->cphci_id; 7816 val[1] = cpi->cpi_flags; 7817 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7818 != 0) 7819 goto out; 7820 } 7821 7822 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7823 out: 7824 nvlist_free(nvl); 7825 return (err); 7826 } 7827 7828 /* 7829 * Build caddrmapnvl using the information in the vhci cache 7830 * and add it to the mainnvl. 7831 * Returns 0 on success, errno on failure. 7832 */ 7833 static int 7834 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7835 { 7836 mdi_vhcache_client_t *cct; 7837 nvlist_t *nvl; 7838 int err; 7839 7840 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7841 7842 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7843 return (err); 7844 7845 for (cct = vhcache->vhcache_client_head; cct != NULL; 7846 cct = cct->cct_next) { 7847 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7848 goto out; 7849 } 7850 7851 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7852 out: 7853 nvlist_free(nvl); 7854 return (err); 7855 } 7856 7857 /* 7858 * Build nvlist using the information in the vhci cache. 7859 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7860 * Returns nvl on success, NULL on failure. 7861 */ 7862 static nvlist_t * 7863 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7864 { 7865 mdi_vhcache_phci_t *cphci; 7866 uint_t phci_count; 7867 char **phcis; 7868 nvlist_t *nvl; 7869 int err, i; 7870 7871 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7872 nvl = NULL; 7873 goto out; 7874 } 7875 7876 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7877 MDI_VHCI_CACHE_VERSION)) != 0) 7878 goto out; 7879 7880 rw_enter(&vhcache->vhcache_lock, RW_READER); 7881 if (vhcache->vhcache_phci_head == NULL) { 7882 rw_exit(&vhcache->vhcache_lock); 7883 return (nvl); 7884 } 7885 7886 phci_count = 0; 7887 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7888 cphci = cphci->cphci_next) 7889 cphci->cphci_id = phci_count++; 7890 7891 /* build phci pathname list */ 7892 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7893 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7894 cphci = cphci->cphci_next, i++) 7895 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7896 7897 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7898 phci_count); 7899 free_string_array(phcis, phci_count); 7900 7901 if (err == 0 && 7902 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7903 rw_exit(&vhcache->vhcache_lock); 7904 return (nvl); 7905 } 7906 7907 rw_exit(&vhcache->vhcache_lock); 7908 out: 7909 if (nvl) 7910 nvlist_free(nvl); 7911 return (NULL); 7912 } 7913 7914 /* 7915 * Lookup vhcache phci structure for the specified phci path. 7916 */ 7917 static mdi_vhcache_phci_t * 7918 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7919 { 7920 mdi_vhcache_phci_t *cphci; 7921 7922 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7923 7924 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7925 cphci = cphci->cphci_next) { 7926 if (strcmp(cphci->cphci_path, phci_path) == 0) 7927 return (cphci); 7928 } 7929 7930 return (NULL); 7931 } 7932 7933 /* 7934 * Lookup vhcache phci structure for the specified phci. 7935 */ 7936 static mdi_vhcache_phci_t * 7937 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7938 { 7939 mdi_vhcache_phci_t *cphci; 7940 7941 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7942 7943 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7944 cphci = cphci->cphci_next) { 7945 if (cphci->cphci_phci == ph) 7946 return (cphci); 7947 } 7948 7949 return (NULL); 7950 } 7951 7952 /* 7953 * Add the specified phci to the vhci cache if not already present. 7954 */ 7955 static void 7956 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7957 { 7958 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7959 mdi_vhcache_phci_t *cphci; 7960 char *pathname; 7961 int cache_updated; 7962 7963 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7964 7965 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7966 (void) ddi_pathname(ph->ph_dip, pathname); 7967 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7968 != NULL) { 7969 cphci->cphci_phci = ph; 7970 cache_updated = 0; 7971 } else { 7972 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7973 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7974 cphci->cphci_phci = ph; 7975 enqueue_vhcache_phci(vhcache, cphci); 7976 cache_updated = 1; 7977 } 7978 7979 rw_exit(&vhcache->vhcache_lock); 7980 7981 /* 7982 * Since a new phci has been added, reset 7983 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7984 * during next vhcache_discover_paths(). 7985 */ 7986 mutex_enter(&vhc->vhc_lock); 7987 vhc->vhc_path_discovery_cutoff_time = 0; 7988 mutex_exit(&vhc->vhc_lock); 7989 7990 kmem_free(pathname, MAXPATHLEN); 7991 if (cache_updated) 7992 vhcache_dirty(vhc); 7993 } 7994 7995 /* 7996 * Remove the reference to the specified phci from the vhci cache. 7997 */ 7998 static void 7999 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8000 { 8001 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8002 mdi_vhcache_phci_t *cphci; 8003 8004 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8005 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8006 /* do not remove the actual mdi_vhcache_phci structure */ 8007 cphci->cphci_phci = NULL; 8008 } 8009 rw_exit(&vhcache->vhcache_lock); 8010 } 8011 8012 static void 8013 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8014 mdi_vhcache_lookup_token_t *src) 8015 { 8016 if (src == NULL) { 8017 dst->lt_cct = NULL; 8018 dst->lt_cct_lookup_time = 0; 8019 } else { 8020 dst->lt_cct = src->lt_cct; 8021 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8022 } 8023 } 8024 8025 /* 8026 * Look up vhcache client for the specified client. 8027 */ 8028 static mdi_vhcache_client_t * 8029 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8030 mdi_vhcache_lookup_token_t *token) 8031 { 8032 mod_hash_val_t hv; 8033 char *name_addr; 8034 int len; 8035 8036 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8037 8038 /* 8039 * If no vhcache clean occurred since the last lookup, we can 8040 * simply return the cct from the last lookup operation. 8041 * It works because ccts are never freed except during the vhcache 8042 * cleanup operation. 8043 */ 8044 if (token != NULL && 8045 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8046 return (token->lt_cct); 8047 8048 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8049 if (mod_hash_find(vhcache->vhcache_client_hash, 8050 (mod_hash_key_t)name_addr, &hv) == 0) { 8051 if (token) { 8052 token->lt_cct = (mdi_vhcache_client_t *)hv; 8053 token->lt_cct_lookup_time = lbolt64; 8054 } 8055 } else { 8056 if (token) { 8057 token->lt_cct = NULL; 8058 token->lt_cct_lookup_time = 0; 8059 } 8060 hv = NULL; 8061 } 8062 kmem_free(name_addr, len); 8063 return ((mdi_vhcache_client_t *)hv); 8064 } 8065 8066 /* 8067 * Add the specified path to the vhci cache if not already present. 8068 * Also add the vhcache client for the client corresponding to this path 8069 * if it doesn't already exist. 8070 */ 8071 static void 8072 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8073 { 8074 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8075 mdi_vhcache_client_t *cct; 8076 mdi_vhcache_pathinfo_t *cpi; 8077 mdi_phci_t *ph = pip->pi_phci; 8078 mdi_client_t *ct = pip->pi_client; 8079 int cache_updated = 0; 8080 8081 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8082 8083 /* if vhcache client for this pip doesn't already exist, add it */ 8084 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8085 NULL)) == NULL) { 8086 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8087 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8088 ct->ct_guid, NULL); 8089 enqueue_vhcache_client(vhcache, cct); 8090 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8091 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8092 cache_updated = 1; 8093 } 8094 8095 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8096 if (cpi->cpi_cphci->cphci_phci == ph && 8097 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8098 cpi->cpi_pip = pip; 8099 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8100 cpi->cpi_flags &= 8101 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8102 sort_vhcache_paths(cct); 8103 cache_updated = 1; 8104 } 8105 break; 8106 } 8107 } 8108 8109 if (cpi == NULL) { 8110 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8111 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8112 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8113 ASSERT(cpi->cpi_cphci != NULL); 8114 cpi->cpi_pip = pip; 8115 enqueue_vhcache_pathinfo(cct, cpi); 8116 cache_updated = 1; 8117 } 8118 8119 rw_exit(&vhcache->vhcache_lock); 8120 8121 if (cache_updated) 8122 vhcache_dirty(vhc); 8123 } 8124 8125 /* 8126 * Remove the reference to the specified path from the vhci cache. 8127 */ 8128 static void 8129 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8130 { 8131 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8132 mdi_client_t *ct = pip->pi_client; 8133 mdi_vhcache_client_t *cct; 8134 mdi_vhcache_pathinfo_t *cpi; 8135 8136 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8137 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8138 NULL)) != NULL) { 8139 for (cpi = cct->cct_cpi_head; cpi != NULL; 8140 cpi = cpi->cpi_next) { 8141 if (cpi->cpi_pip == pip) { 8142 cpi->cpi_pip = NULL; 8143 break; 8144 } 8145 } 8146 } 8147 rw_exit(&vhcache->vhcache_lock); 8148 } 8149 8150 /* 8151 * Flush the vhci cache to disk. 8152 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8153 */ 8154 static int 8155 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8156 { 8157 nvlist_t *nvl; 8158 int err; 8159 int rv; 8160 8161 /* 8162 * It is possible that the system may shutdown before 8163 * i_ddi_io_initialized (during stmsboot for example). To allow for 8164 * flushing the cache in this case do not check for 8165 * i_ddi_io_initialized when force flag is set. 8166 */ 8167 if (force_flag == 0 && !i_ddi_io_initialized()) 8168 return (MDI_FAILURE); 8169 8170 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8171 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8172 nvlist_free(nvl); 8173 } else 8174 err = EFAULT; 8175 8176 rv = MDI_SUCCESS; 8177 mutex_enter(&vhc->vhc_lock); 8178 if (err != 0) { 8179 if (err == EROFS) { 8180 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8181 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8182 MDI_VHC_VHCACHE_DIRTY); 8183 } else { 8184 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8185 cmn_err(CE_CONT, "%s: update failed\n", 8186 vhc->vhc_vhcache_filename); 8187 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8188 } 8189 rv = MDI_FAILURE; 8190 } 8191 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8192 cmn_err(CE_CONT, 8193 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8194 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8195 } 8196 mutex_exit(&vhc->vhc_lock); 8197 8198 return (rv); 8199 } 8200 8201 /* 8202 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8203 * Exits itself if left idle for the idle timeout period. 8204 */ 8205 static void 8206 vhcache_flush_thread(void *arg) 8207 { 8208 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8209 clock_t idle_time, quit_at_ticks; 8210 callb_cpr_t cprinfo; 8211 8212 /* number of seconds to sleep idle before exiting */ 8213 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8214 8215 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8216 "mdi_vhcache_flush"); 8217 mutex_enter(&vhc->vhc_lock); 8218 for (; ; ) { 8219 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8220 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8221 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8222 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8223 (void) cv_timedwait(&vhc->vhc_cv, 8224 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8225 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8226 } else { 8227 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8228 mutex_exit(&vhc->vhc_lock); 8229 8230 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8231 vhcache_dirty(vhc); 8232 8233 mutex_enter(&vhc->vhc_lock); 8234 } 8235 } 8236 8237 quit_at_ticks = ddi_get_lbolt() + idle_time; 8238 8239 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8240 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8241 ddi_get_lbolt() < quit_at_ticks) { 8242 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8243 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8244 quit_at_ticks); 8245 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8246 } 8247 8248 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8249 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8250 goto out; 8251 } 8252 8253 out: 8254 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8255 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8256 CALLB_CPR_EXIT(&cprinfo); 8257 } 8258 8259 /* 8260 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8261 */ 8262 static void 8263 vhcache_dirty(mdi_vhci_config_t *vhc) 8264 { 8265 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8266 int create_thread; 8267 8268 rw_enter(&vhcache->vhcache_lock, RW_READER); 8269 /* do not flush cache until the cache is fully built */ 8270 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8271 rw_exit(&vhcache->vhcache_lock); 8272 return; 8273 } 8274 rw_exit(&vhcache->vhcache_lock); 8275 8276 mutex_enter(&vhc->vhc_lock); 8277 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8278 mutex_exit(&vhc->vhc_lock); 8279 return; 8280 } 8281 8282 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8283 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8284 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8285 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8286 cv_broadcast(&vhc->vhc_cv); 8287 create_thread = 0; 8288 } else { 8289 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8290 create_thread = 1; 8291 } 8292 mutex_exit(&vhc->vhc_lock); 8293 8294 if (create_thread) 8295 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8296 0, &p0, TS_RUN, minclsyspri); 8297 } 8298 8299 /* 8300 * phci bus config structure - one for for each phci bus config operation that 8301 * we initiate on behalf of a vhci. 8302 */ 8303 typedef struct mdi_phci_bus_config_s { 8304 char *phbc_phci_path; 8305 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8306 struct mdi_phci_bus_config_s *phbc_next; 8307 } mdi_phci_bus_config_t; 8308 8309 /* vhci bus config structure - one for each vhci bus config operation */ 8310 typedef struct mdi_vhci_bus_config_s { 8311 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8312 major_t vhbc_op_major; /* bus config op major */ 8313 uint_t vhbc_op_flags; /* bus config op flags */ 8314 kmutex_t vhbc_lock; 8315 kcondvar_t vhbc_cv; 8316 int vhbc_thr_count; 8317 } mdi_vhci_bus_config_t; 8318 8319 /* 8320 * bus config the specified phci 8321 */ 8322 static void 8323 bus_config_phci(void *arg) 8324 { 8325 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8326 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8327 dev_info_t *ph_dip; 8328 8329 /* 8330 * first configure all path components upto phci and then configure 8331 * the phci children. 8332 */ 8333 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8334 != NULL) { 8335 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8336 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8337 (void) ndi_devi_config_driver(ph_dip, 8338 vhbc->vhbc_op_flags, 8339 vhbc->vhbc_op_major); 8340 } else 8341 (void) ndi_devi_config(ph_dip, 8342 vhbc->vhbc_op_flags); 8343 8344 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8345 ndi_rele_devi(ph_dip); 8346 } 8347 8348 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8349 kmem_free(phbc, sizeof (*phbc)); 8350 8351 mutex_enter(&vhbc->vhbc_lock); 8352 vhbc->vhbc_thr_count--; 8353 if (vhbc->vhbc_thr_count == 0) 8354 cv_broadcast(&vhbc->vhbc_cv); 8355 mutex_exit(&vhbc->vhbc_lock); 8356 } 8357 8358 /* 8359 * Bus config all phcis associated with the vhci in parallel. 8360 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8361 */ 8362 static void 8363 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8364 ddi_bus_config_op_t op, major_t maj) 8365 { 8366 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8367 mdi_vhci_bus_config_t *vhbc; 8368 mdi_vhcache_phci_t *cphci; 8369 8370 rw_enter(&vhcache->vhcache_lock, RW_READER); 8371 if (vhcache->vhcache_phci_head == NULL) { 8372 rw_exit(&vhcache->vhcache_lock); 8373 return; 8374 } 8375 8376 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8377 8378 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8379 cphci = cphci->cphci_next) { 8380 /* skip phcis that haven't attached before root is available */ 8381 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8382 continue; 8383 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8384 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8385 KM_SLEEP); 8386 phbc->phbc_vhbusconfig = vhbc; 8387 phbc->phbc_next = phbc_head; 8388 phbc_head = phbc; 8389 vhbc->vhbc_thr_count++; 8390 } 8391 rw_exit(&vhcache->vhcache_lock); 8392 8393 vhbc->vhbc_op = op; 8394 vhbc->vhbc_op_major = maj; 8395 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8396 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8397 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8398 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8399 8400 /* now create threads to initiate bus config on all phcis in parallel */ 8401 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8402 phbc_next = phbc->phbc_next; 8403 if (mdi_mtc_off) 8404 bus_config_phci((void *)phbc); 8405 else 8406 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8407 0, &p0, TS_RUN, minclsyspri); 8408 } 8409 8410 mutex_enter(&vhbc->vhbc_lock); 8411 /* wait until all threads exit */ 8412 while (vhbc->vhbc_thr_count > 0) 8413 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8414 mutex_exit(&vhbc->vhbc_lock); 8415 8416 mutex_destroy(&vhbc->vhbc_lock); 8417 cv_destroy(&vhbc->vhbc_cv); 8418 kmem_free(vhbc, sizeof (*vhbc)); 8419 } 8420 8421 /* 8422 * Single threaded version of bus_config_all_phcis() 8423 */ 8424 static void 8425 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8426 ddi_bus_config_op_t op, major_t maj) 8427 { 8428 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8429 8430 single_threaded_vhconfig_enter(vhc); 8431 bus_config_all_phcis(vhcache, flags, op, maj); 8432 single_threaded_vhconfig_exit(vhc); 8433 } 8434 8435 /* 8436 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8437 * The path includes the child component in addition to the phci path. 8438 */ 8439 static int 8440 bus_config_one_phci_child(char *path) 8441 { 8442 dev_info_t *ph_dip, *child; 8443 char *devnm; 8444 int rv = MDI_FAILURE; 8445 8446 /* extract the child component of the phci */ 8447 devnm = strrchr(path, '/'); 8448 *devnm++ = '\0'; 8449 8450 /* 8451 * first configure all path components upto phci and then 8452 * configure the phci child. 8453 */ 8454 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8455 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8456 NDI_SUCCESS) { 8457 /* 8458 * release the hold that ndi_devi_config_one() placed 8459 */ 8460 ndi_rele_devi(child); 8461 rv = MDI_SUCCESS; 8462 } 8463 8464 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8465 ndi_rele_devi(ph_dip); 8466 } 8467 8468 devnm--; 8469 *devnm = '/'; 8470 return (rv); 8471 } 8472 8473 /* 8474 * Build a list of phci client paths for the specified vhci client. 8475 * The list includes only those phci client paths which aren't configured yet. 8476 */ 8477 static mdi_phys_path_t * 8478 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8479 { 8480 mdi_vhcache_pathinfo_t *cpi; 8481 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8482 int config_path, len; 8483 8484 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8485 /* 8486 * include only those paths that aren't configured. 8487 */ 8488 config_path = 0; 8489 if (cpi->cpi_pip == NULL) 8490 config_path = 1; 8491 else { 8492 MDI_PI_LOCK(cpi->cpi_pip); 8493 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8494 config_path = 1; 8495 MDI_PI_UNLOCK(cpi->cpi_pip); 8496 } 8497 8498 if (config_path) { 8499 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8500 len = strlen(cpi->cpi_cphci->cphci_path) + 8501 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8502 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8503 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8504 cpi->cpi_cphci->cphci_path, ct_name, 8505 cpi->cpi_addr); 8506 pp->phys_path_next = NULL; 8507 8508 if (pp_head == NULL) 8509 pp_head = pp; 8510 else 8511 pp_tail->phys_path_next = pp; 8512 pp_tail = pp; 8513 } 8514 } 8515 8516 return (pp_head); 8517 } 8518 8519 /* 8520 * Free the memory allocated for phci client path list. 8521 */ 8522 static void 8523 free_phclient_path_list(mdi_phys_path_t *pp_head) 8524 { 8525 mdi_phys_path_t *pp, *pp_next; 8526 8527 for (pp = pp_head; pp != NULL; pp = pp_next) { 8528 pp_next = pp->phys_path_next; 8529 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8530 kmem_free(pp, sizeof (*pp)); 8531 } 8532 } 8533 8534 /* 8535 * Allocated async client structure and initialize with the specified values. 8536 */ 8537 static mdi_async_client_config_t * 8538 alloc_async_client_config(char *ct_name, char *ct_addr, 8539 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8540 { 8541 mdi_async_client_config_t *acc; 8542 8543 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8544 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8545 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8546 acc->acc_phclient_path_list_head = pp_head; 8547 init_vhcache_lookup_token(&acc->acc_token, tok); 8548 acc->acc_next = NULL; 8549 return (acc); 8550 } 8551 8552 /* 8553 * Free the memory allocated for the async client structure and their members. 8554 */ 8555 static void 8556 free_async_client_config(mdi_async_client_config_t *acc) 8557 { 8558 if (acc->acc_phclient_path_list_head) 8559 free_phclient_path_list(acc->acc_phclient_path_list_head); 8560 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8561 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8562 kmem_free(acc, sizeof (*acc)); 8563 } 8564 8565 /* 8566 * Sort vhcache pathinfos (cpis) of the specified client. 8567 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8568 * flag set come at the beginning of the list. All cpis which have this 8569 * flag set come at the end of the list. 8570 */ 8571 static void 8572 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8573 { 8574 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8575 8576 cpi_head = cct->cct_cpi_head; 8577 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8578 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8579 cpi_next = cpi->cpi_next; 8580 enqueue_vhcache_pathinfo(cct, cpi); 8581 } 8582 } 8583 8584 /* 8585 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8586 * every vhcache pathinfo of the specified client. If not adjust the flag 8587 * setting appropriately. 8588 * 8589 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8590 * on-disk vhci cache. So every time this flag is updated the cache must be 8591 * flushed. 8592 */ 8593 static void 8594 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8595 mdi_vhcache_lookup_token_t *tok) 8596 { 8597 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8598 mdi_vhcache_client_t *cct; 8599 mdi_vhcache_pathinfo_t *cpi; 8600 8601 rw_enter(&vhcache->vhcache_lock, RW_READER); 8602 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8603 == NULL) { 8604 rw_exit(&vhcache->vhcache_lock); 8605 return; 8606 } 8607 8608 /* 8609 * to avoid unnecessary on-disk cache updates, first check if an 8610 * update is really needed. If no update is needed simply return. 8611 */ 8612 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8613 if ((cpi->cpi_pip != NULL && 8614 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8615 (cpi->cpi_pip == NULL && 8616 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8617 break; 8618 } 8619 } 8620 if (cpi == NULL) { 8621 rw_exit(&vhcache->vhcache_lock); 8622 return; 8623 } 8624 8625 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8626 rw_exit(&vhcache->vhcache_lock); 8627 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8628 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8629 tok)) == NULL) { 8630 rw_exit(&vhcache->vhcache_lock); 8631 return; 8632 } 8633 } 8634 8635 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8636 if (cpi->cpi_pip != NULL) 8637 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8638 else 8639 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8640 } 8641 sort_vhcache_paths(cct); 8642 8643 rw_exit(&vhcache->vhcache_lock); 8644 vhcache_dirty(vhc); 8645 } 8646 8647 /* 8648 * Configure all specified paths of the client. 8649 */ 8650 static void 8651 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8652 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8653 { 8654 mdi_phys_path_t *pp; 8655 8656 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8657 (void) bus_config_one_phci_child(pp->phys_path); 8658 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8659 } 8660 8661 /* 8662 * Dequeue elements from vhci async client config list and bus configure 8663 * their corresponding phci clients. 8664 */ 8665 static void 8666 config_client_paths_thread(void *arg) 8667 { 8668 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8669 mdi_async_client_config_t *acc; 8670 clock_t quit_at_ticks; 8671 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8672 callb_cpr_t cprinfo; 8673 8674 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8675 "mdi_config_client_paths"); 8676 8677 for (; ; ) { 8678 quit_at_ticks = ddi_get_lbolt() + idle_time; 8679 8680 mutex_enter(&vhc->vhc_lock); 8681 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8682 vhc->vhc_acc_list_head == NULL && 8683 ddi_get_lbolt() < quit_at_ticks) { 8684 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8685 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8686 quit_at_ticks); 8687 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8688 } 8689 8690 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8691 vhc->vhc_acc_list_head == NULL) 8692 goto out; 8693 8694 acc = vhc->vhc_acc_list_head; 8695 vhc->vhc_acc_list_head = acc->acc_next; 8696 if (vhc->vhc_acc_list_head == NULL) 8697 vhc->vhc_acc_list_tail = NULL; 8698 vhc->vhc_acc_count--; 8699 mutex_exit(&vhc->vhc_lock); 8700 8701 config_client_paths_sync(vhc, acc->acc_ct_name, 8702 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8703 &acc->acc_token); 8704 8705 free_async_client_config(acc); 8706 } 8707 8708 out: 8709 vhc->vhc_acc_thrcount--; 8710 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8711 CALLB_CPR_EXIT(&cprinfo); 8712 } 8713 8714 /* 8715 * Arrange for all the phci client paths (pp_head) for the specified client 8716 * to be bus configured asynchronously by a thread. 8717 */ 8718 static void 8719 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8720 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8721 { 8722 mdi_async_client_config_t *acc, *newacc; 8723 int create_thread; 8724 8725 if (pp_head == NULL) 8726 return; 8727 8728 if (mdi_mtc_off) { 8729 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8730 free_phclient_path_list(pp_head); 8731 return; 8732 } 8733 8734 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8735 ASSERT(newacc); 8736 8737 mutex_enter(&vhc->vhc_lock); 8738 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8739 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8740 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8741 free_async_client_config(newacc); 8742 mutex_exit(&vhc->vhc_lock); 8743 return; 8744 } 8745 } 8746 8747 if (vhc->vhc_acc_list_head == NULL) 8748 vhc->vhc_acc_list_head = newacc; 8749 else 8750 vhc->vhc_acc_list_tail->acc_next = newacc; 8751 vhc->vhc_acc_list_tail = newacc; 8752 vhc->vhc_acc_count++; 8753 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8754 cv_broadcast(&vhc->vhc_cv); 8755 create_thread = 0; 8756 } else { 8757 vhc->vhc_acc_thrcount++; 8758 create_thread = 1; 8759 } 8760 mutex_exit(&vhc->vhc_lock); 8761 8762 if (create_thread) 8763 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8764 0, &p0, TS_RUN, minclsyspri); 8765 } 8766 8767 /* 8768 * Return number of online paths for the specified client. 8769 */ 8770 static int 8771 nonline_paths(mdi_vhcache_client_t *cct) 8772 { 8773 mdi_vhcache_pathinfo_t *cpi; 8774 int online_count = 0; 8775 8776 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8777 if (cpi->cpi_pip != NULL) { 8778 MDI_PI_LOCK(cpi->cpi_pip); 8779 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8780 online_count++; 8781 MDI_PI_UNLOCK(cpi->cpi_pip); 8782 } 8783 } 8784 8785 return (online_count); 8786 } 8787 8788 /* 8789 * Bus configure all paths for the specified vhci client. 8790 * If at least one path for the client is already online, the remaining paths 8791 * will be configured asynchronously. Otherwise, it synchronously configures 8792 * the paths until at least one path is online and then rest of the paths 8793 * will be configured asynchronously. 8794 */ 8795 static void 8796 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8797 { 8798 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8799 mdi_phys_path_t *pp_head, *pp; 8800 mdi_vhcache_client_t *cct; 8801 mdi_vhcache_lookup_token_t tok; 8802 8803 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8804 8805 init_vhcache_lookup_token(&tok, NULL); 8806 8807 if (ct_name == NULL || ct_addr == NULL || 8808 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8809 == NULL || 8810 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8811 rw_exit(&vhcache->vhcache_lock); 8812 return; 8813 } 8814 8815 /* if at least one path is online, configure the rest asynchronously */ 8816 if (nonline_paths(cct) > 0) { 8817 rw_exit(&vhcache->vhcache_lock); 8818 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8819 return; 8820 } 8821 8822 rw_exit(&vhcache->vhcache_lock); 8823 8824 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8825 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8826 rw_enter(&vhcache->vhcache_lock, RW_READER); 8827 8828 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8829 ct_addr, &tok)) == NULL) { 8830 rw_exit(&vhcache->vhcache_lock); 8831 goto out; 8832 } 8833 8834 if (nonline_paths(cct) > 0 && 8835 pp->phys_path_next != NULL) { 8836 rw_exit(&vhcache->vhcache_lock); 8837 config_client_paths_async(vhc, ct_name, ct_addr, 8838 pp->phys_path_next, &tok); 8839 pp->phys_path_next = NULL; 8840 goto out; 8841 } 8842 8843 rw_exit(&vhcache->vhcache_lock); 8844 } 8845 } 8846 8847 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8848 out: 8849 free_phclient_path_list(pp_head); 8850 } 8851 8852 static void 8853 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8854 { 8855 mutex_enter(&vhc->vhc_lock); 8856 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8857 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8858 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8859 mutex_exit(&vhc->vhc_lock); 8860 } 8861 8862 static void 8863 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8864 { 8865 mutex_enter(&vhc->vhc_lock); 8866 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8867 cv_broadcast(&vhc->vhc_cv); 8868 mutex_exit(&vhc->vhc_lock); 8869 } 8870 8871 typedef struct mdi_phci_driver_info { 8872 char *phdriver_name; /* name of the phci driver */ 8873 8874 /* set to non zero if the phci driver supports root device */ 8875 int phdriver_root_support; 8876 } mdi_phci_driver_info_t; 8877 8878 /* 8879 * vhci class and root support capability of a phci driver can be 8880 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8881 * phci driver.conf file. The built-in tables below contain this information 8882 * for those phci drivers whose driver.conf files don't yet contain this info. 8883 * 8884 * All phci drivers expect iscsi have root device support. 8885 */ 8886 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8887 { "fp", 1 }, 8888 { "iscsi", 0 }, 8889 { "ibsrp", 1 } 8890 }; 8891 8892 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8893 8894 static void * 8895 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8896 { 8897 void *new_ptr; 8898 8899 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8900 if (old_ptr) { 8901 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8902 kmem_free(old_ptr, old_size); 8903 } 8904 return (new_ptr); 8905 } 8906 8907 static void 8908 add_to_phci_list(char ***driver_list, int **root_support_list, 8909 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8910 { 8911 ASSERT(*cur_elements <= *max_elements); 8912 if (*cur_elements == *max_elements) { 8913 *max_elements += 10; 8914 *driver_list = mdi_realloc(*driver_list, 8915 sizeof (char *) * (*cur_elements), 8916 sizeof (char *) * (*max_elements)); 8917 *root_support_list = mdi_realloc(*root_support_list, 8918 sizeof (int) * (*cur_elements), 8919 sizeof (int) * (*max_elements)); 8920 } 8921 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8922 (*root_support_list)[*cur_elements] = root_support; 8923 (*cur_elements)++; 8924 } 8925 8926 static void 8927 get_phci_driver_list(char *vhci_class, char ***driver_list, 8928 int **root_support_list, int *cur_elements, int *max_elements) 8929 { 8930 mdi_phci_driver_info_t *st_driver_list, *p; 8931 int st_ndrivers, root_support, i, j, driver_conf_count; 8932 major_t m; 8933 struct devnames *dnp; 8934 ddi_prop_t *propp; 8935 8936 *driver_list = NULL; 8937 *root_support_list = NULL; 8938 *cur_elements = 0; 8939 *max_elements = 0; 8940 8941 /* add the phci drivers derived from the phci driver.conf files */ 8942 for (m = 0; m < devcnt; m++) { 8943 dnp = &devnamesp[m]; 8944 8945 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8946 LOCK_DEV_OPS(&dnp->dn_lock); 8947 if (dnp->dn_global_prop_ptr != NULL && 8948 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8949 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8950 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8951 strcmp(propp->prop_val, vhci_class) == 0) { 8952 8953 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8954 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8955 &dnp->dn_global_prop_ptr->prop_list) 8956 == NULL) ? 1 : 0; 8957 8958 add_to_phci_list(driver_list, root_support_list, 8959 cur_elements, max_elements, dnp->dn_name, 8960 root_support); 8961 8962 UNLOCK_DEV_OPS(&dnp->dn_lock); 8963 } else 8964 UNLOCK_DEV_OPS(&dnp->dn_lock); 8965 } 8966 } 8967 8968 driver_conf_count = *cur_elements; 8969 8970 /* add the phci drivers specified in the built-in tables */ 8971 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8972 st_driver_list = scsi_phci_driver_list; 8973 st_ndrivers = sizeof (scsi_phci_driver_list) / 8974 sizeof (mdi_phci_driver_info_t); 8975 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8976 st_driver_list = ib_phci_driver_list; 8977 st_ndrivers = sizeof (ib_phci_driver_list) / 8978 sizeof (mdi_phci_driver_info_t); 8979 } else { 8980 st_driver_list = NULL; 8981 st_ndrivers = 0; 8982 } 8983 8984 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8985 /* add this phci driver if not already added before */ 8986 for (j = 0; j < driver_conf_count; j++) { 8987 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8988 break; 8989 } 8990 if (j == driver_conf_count) { 8991 add_to_phci_list(driver_list, root_support_list, 8992 cur_elements, max_elements, p->phdriver_name, 8993 p->phdriver_root_support); 8994 } 8995 } 8996 } 8997 8998 /* 8999 * Attach the phci driver instances associated with the specified vhci class. 9000 * If root is mounted attach all phci driver instances. 9001 * If root is not mounted, attach the instances of only those phci 9002 * drivers that have the root support. 9003 */ 9004 static void 9005 attach_phci_drivers(char *vhci_class) 9006 { 9007 char **driver_list, **p; 9008 int *root_support_list; 9009 int cur_elements, max_elements, i; 9010 major_t m; 9011 9012 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9013 &cur_elements, &max_elements); 9014 9015 for (i = 0; i < cur_elements; i++) { 9016 if (modrootloaded || root_support_list[i]) { 9017 m = ddi_name_to_major(driver_list[i]); 9018 if (m != DDI_MAJOR_T_NONE && 9019 ddi_hold_installed_driver(m)) 9020 ddi_rele_driver(m); 9021 } 9022 } 9023 9024 if (driver_list) { 9025 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9026 kmem_free(*p, strlen(*p) + 1); 9027 kmem_free(driver_list, sizeof (char *) * max_elements); 9028 kmem_free(root_support_list, sizeof (int) * max_elements); 9029 } 9030 } 9031 9032 /* 9033 * Build vhci cache: 9034 * 9035 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9036 * the phci driver instances. During this process the cache gets built. 9037 * 9038 * Cache is built fully if the root is mounted. 9039 * If the root is not mounted, phci drivers that do not have root support 9040 * are not attached. As a result the cache is built partially. The entries 9041 * in the cache reflect only those phci drivers that have root support. 9042 */ 9043 static int 9044 build_vhci_cache(mdi_vhci_t *vh) 9045 { 9046 mdi_vhci_config_t *vhc = vh->vh_config; 9047 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9048 9049 single_threaded_vhconfig_enter(vhc); 9050 9051 rw_enter(&vhcache->vhcache_lock, RW_READER); 9052 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9053 rw_exit(&vhcache->vhcache_lock); 9054 single_threaded_vhconfig_exit(vhc); 9055 return (0); 9056 } 9057 rw_exit(&vhcache->vhcache_lock); 9058 9059 attach_phci_drivers(vh->vh_class); 9060 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9061 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9062 9063 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9064 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9065 rw_exit(&vhcache->vhcache_lock); 9066 9067 single_threaded_vhconfig_exit(vhc); 9068 vhcache_dirty(vhc); 9069 return (1); 9070 } 9071 9072 /* 9073 * Determine if discovery of paths is needed. 9074 */ 9075 static int 9076 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9077 { 9078 int rv = 1; 9079 9080 mutex_enter(&vhc->vhc_lock); 9081 if (i_ddi_io_initialized() == 0) { 9082 if (vhc->vhc_path_discovery_boot > 0) { 9083 vhc->vhc_path_discovery_boot--; 9084 goto out; 9085 } 9086 } else { 9087 if (vhc->vhc_path_discovery_postboot > 0) { 9088 vhc->vhc_path_discovery_postboot--; 9089 goto out; 9090 } 9091 } 9092 9093 /* 9094 * Do full path discovery at most once per mdi_path_discovery_interval. 9095 * This is to avoid a series of full path discoveries when opening 9096 * stale /dev/[r]dsk links. 9097 */ 9098 if (mdi_path_discovery_interval != -1 && 9099 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 9100 goto out; 9101 9102 rv = 0; 9103 out: 9104 mutex_exit(&vhc->vhc_lock); 9105 return (rv); 9106 } 9107 9108 /* 9109 * Discover all paths: 9110 * 9111 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9112 * driver instances. During this process all paths will be discovered. 9113 */ 9114 static int 9115 vhcache_discover_paths(mdi_vhci_t *vh) 9116 { 9117 mdi_vhci_config_t *vhc = vh->vh_config; 9118 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9119 int rv = 0; 9120 9121 single_threaded_vhconfig_enter(vhc); 9122 9123 if (vhcache_do_discovery(vhc)) { 9124 attach_phci_drivers(vh->vh_class); 9125 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9126 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9127 9128 mutex_enter(&vhc->vhc_lock); 9129 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 9130 mdi_path_discovery_interval * TICKS_PER_SECOND; 9131 mutex_exit(&vhc->vhc_lock); 9132 rv = 1; 9133 } 9134 9135 single_threaded_vhconfig_exit(vhc); 9136 return (rv); 9137 } 9138 9139 /* 9140 * Generic vhci bus config implementation: 9141 * 9142 * Parameters 9143 * vdip vhci dip 9144 * flags bus config flags 9145 * op bus config operation 9146 * The remaining parameters are bus config operation specific 9147 * 9148 * for BUS_CONFIG_ONE 9149 * arg pointer to name@addr 9150 * child upon successful return from this function, *child will be 9151 * set to the configured and held devinfo child node of vdip. 9152 * ct_addr pointer to client address (i.e. GUID) 9153 * 9154 * for BUS_CONFIG_DRIVER 9155 * arg major number of the driver 9156 * child and ct_addr parameters are ignored 9157 * 9158 * for BUS_CONFIG_ALL 9159 * arg, child, and ct_addr parameters are ignored 9160 * 9161 * Note that for the rest of the bus config operations, this function simply 9162 * calls the framework provided default bus config routine. 9163 */ 9164 int 9165 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9166 void *arg, dev_info_t **child, char *ct_addr) 9167 { 9168 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9169 mdi_vhci_config_t *vhc = vh->vh_config; 9170 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9171 int rv = 0; 9172 int params_valid = 0; 9173 char *cp; 9174 9175 /* 9176 * To bus config vhcis we relay operation, possibly using another 9177 * thread, to phcis. The phci driver then interacts with MDI to cause 9178 * vhci child nodes to be enumerated under the vhci node. Adding a 9179 * vhci child requires an ndi_devi_enter of the vhci. Since another 9180 * thread may be adding the child, to avoid deadlock we can't wait 9181 * for the relayed operations to complete if we have already entered 9182 * the vhci node. 9183 */ 9184 if (DEVI_BUSY_OWNED(vdip)) { 9185 MDI_DEBUG(2, (MDI_NOTE, vdip, 9186 "vhci dip is busy owned %p", (void *)vdip)); 9187 goto default_bus_config; 9188 } 9189 9190 rw_enter(&vhcache->vhcache_lock, RW_READER); 9191 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9192 rw_exit(&vhcache->vhcache_lock); 9193 rv = build_vhci_cache(vh); 9194 rw_enter(&vhcache->vhcache_lock, RW_READER); 9195 } 9196 9197 switch (op) { 9198 case BUS_CONFIG_ONE: 9199 if (arg != NULL && ct_addr != NULL) { 9200 /* extract node name */ 9201 cp = (char *)arg; 9202 while (*cp != '\0' && *cp != '@') 9203 cp++; 9204 if (*cp == '@') { 9205 params_valid = 1; 9206 *cp = '\0'; 9207 config_client_paths(vhc, (char *)arg, ct_addr); 9208 /* config_client_paths() releases cache_lock */ 9209 *cp = '@'; 9210 break; 9211 } 9212 } 9213 9214 rw_exit(&vhcache->vhcache_lock); 9215 break; 9216 9217 case BUS_CONFIG_DRIVER: 9218 rw_exit(&vhcache->vhcache_lock); 9219 if (rv == 0) 9220 st_bus_config_all_phcis(vhc, flags, op, 9221 (major_t)(uintptr_t)arg); 9222 break; 9223 9224 case BUS_CONFIG_ALL: 9225 rw_exit(&vhcache->vhcache_lock); 9226 if (rv == 0) 9227 st_bus_config_all_phcis(vhc, flags, op, -1); 9228 break; 9229 9230 default: 9231 rw_exit(&vhcache->vhcache_lock); 9232 break; 9233 } 9234 9235 9236 default_bus_config: 9237 /* 9238 * All requested child nodes are enumerated under the vhci. 9239 * Now configure them. 9240 */ 9241 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9242 NDI_SUCCESS) { 9243 return (MDI_SUCCESS); 9244 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9245 /* discover all paths and try configuring again */ 9246 if (vhcache_discover_paths(vh) && 9247 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9248 NDI_SUCCESS) 9249 return (MDI_SUCCESS); 9250 } 9251 9252 return (MDI_FAILURE); 9253 } 9254 9255 /* 9256 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9257 */ 9258 static nvlist_t * 9259 read_on_disk_vhci_cache(char *vhci_class) 9260 { 9261 nvlist_t *nvl; 9262 int err; 9263 char *filename; 9264 9265 filename = vhclass2vhcache_filename(vhci_class); 9266 9267 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9268 kmem_free(filename, strlen(filename) + 1); 9269 return (nvl); 9270 } else if (err == EIO) 9271 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9272 else if (err == EINVAL) 9273 cmn_err(CE_WARN, 9274 "%s: data file corrupted, will recreate", filename); 9275 9276 kmem_free(filename, strlen(filename) + 1); 9277 return (NULL); 9278 } 9279 9280 /* 9281 * Read on-disk vhci cache into nvlists for all vhci classes. 9282 * Called during booting by i_ddi_read_devices_files(). 9283 */ 9284 void 9285 mdi_read_devices_files(void) 9286 { 9287 int i; 9288 9289 for (i = 0; i < N_VHCI_CLASSES; i++) 9290 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9291 } 9292 9293 /* 9294 * Remove all stale entries from vhci cache. 9295 */ 9296 static void 9297 clean_vhcache(mdi_vhci_config_t *vhc) 9298 { 9299 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9300 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9301 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9302 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9303 9304 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9305 9306 cct_head = vhcache->vhcache_client_head; 9307 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9308 for (cct = cct_head; cct != NULL; cct = cct_next) { 9309 cct_next = cct->cct_next; 9310 9311 cpi_head = cct->cct_cpi_head; 9312 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9313 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9314 cpi_next = cpi->cpi_next; 9315 if (cpi->cpi_pip != NULL) { 9316 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9317 enqueue_tail_vhcache_pathinfo(cct, cpi); 9318 } else 9319 free_vhcache_pathinfo(cpi); 9320 } 9321 9322 if (cct->cct_cpi_head != NULL) 9323 enqueue_vhcache_client(vhcache, cct); 9324 else { 9325 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9326 (mod_hash_key_t)cct->cct_name_addr); 9327 free_vhcache_client(cct); 9328 } 9329 } 9330 9331 cphci_head = vhcache->vhcache_phci_head; 9332 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9333 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9334 cphci_next = cphci->cphci_next; 9335 if (cphci->cphci_phci != NULL) 9336 enqueue_vhcache_phci(vhcache, cphci); 9337 else 9338 free_vhcache_phci(cphci); 9339 } 9340 9341 vhcache->vhcache_clean_time = lbolt64; 9342 rw_exit(&vhcache->vhcache_lock); 9343 vhcache_dirty(vhc); 9344 } 9345 9346 /* 9347 * Remove all stale entries from vhci cache. 9348 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9349 */ 9350 void 9351 mdi_clean_vhcache(void) 9352 { 9353 mdi_vhci_t *vh; 9354 9355 mutex_enter(&mdi_mutex); 9356 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9357 vh->vh_refcnt++; 9358 mutex_exit(&mdi_mutex); 9359 clean_vhcache(vh->vh_config); 9360 mutex_enter(&mdi_mutex); 9361 vh->vh_refcnt--; 9362 } 9363 mutex_exit(&mdi_mutex); 9364 } 9365 9366 /* 9367 * mdi_vhci_walk_clients(): 9368 * Walker routine to traverse client dev_info nodes 9369 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9370 * below the client, including nexus devices, which we dont want. 9371 * So we just traverse the immediate siblings, starting from 1st client. 9372 */ 9373 void 9374 mdi_vhci_walk_clients(dev_info_t *vdip, 9375 int (*f)(dev_info_t *, void *), void *arg) 9376 { 9377 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9378 dev_info_t *cdip; 9379 mdi_client_t *ct; 9380 9381 MDI_VHCI_CLIENT_LOCK(vh); 9382 cdip = ddi_get_child(vdip); 9383 while (cdip) { 9384 ct = i_devi_get_client(cdip); 9385 MDI_CLIENT_LOCK(ct); 9386 9387 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9388 cdip = ddi_get_next_sibling(cdip); 9389 else 9390 cdip = NULL; 9391 9392 MDI_CLIENT_UNLOCK(ct); 9393 } 9394 MDI_VHCI_CLIENT_UNLOCK(vh); 9395 } 9396 9397 /* 9398 * mdi_vhci_walk_phcis(): 9399 * Walker routine to traverse phci dev_info nodes 9400 */ 9401 void 9402 mdi_vhci_walk_phcis(dev_info_t *vdip, 9403 int (*f)(dev_info_t *, void *), void *arg) 9404 { 9405 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9406 mdi_phci_t *ph, *next; 9407 9408 MDI_VHCI_PHCI_LOCK(vh); 9409 ph = vh->vh_phci_head; 9410 while (ph) { 9411 MDI_PHCI_LOCK(ph); 9412 9413 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9414 next = ph->ph_next; 9415 else 9416 next = NULL; 9417 9418 MDI_PHCI_UNLOCK(ph); 9419 ph = next; 9420 } 9421 MDI_VHCI_PHCI_UNLOCK(vh); 9422 } 9423 9424 9425 /* 9426 * mdi_walk_vhcis(): 9427 * Walker routine to traverse vhci dev_info nodes 9428 */ 9429 void 9430 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9431 { 9432 mdi_vhci_t *vh = NULL; 9433 9434 mutex_enter(&mdi_mutex); 9435 /* 9436 * Scan for already registered vhci 9437 */ 9438 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9439 vh->vh_refcnt++; 9440 mutex_exit(&mdi_mutex); 9441 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9442 mutex_enter(&mdi_mutex); 9443 vh->vh_refcnt--; 9444 break; 9445 } else { 9446 mutex_enter(&mdi_mutex); 9447 vh->vh_refcnt--; 9448 } 9449 } 9450 9451 mutex_exit(&mdi_mutex); 9452 } 9453 9454 /* 9455 * i_mdi_log_sysevent(): 9456 * Logs events for pickup by syseventd 9457 */ 9458 static void 9459 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9460 { 9461 char *path_name; 9462 nvlist_t *attr_list; 9463 9464 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9465 KM_SLEEP) != DDI_SUCCESS) { 9466 goto alloc_failed; 9467 } 9468 9469 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9470 (void) ddi_pathname(dip, path_name); 9471 9472 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9473 ddi_driver_name(dip)) != DDI_SUCCESS) { 9474 goto error; 9475 } 9476 9477 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9478 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9479 goto error; 9480 } 9481 9482 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9483 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9484 goto error; 9485 } 9486 9487 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9488 path_name) != DDI_SUCCESS) { 9489 goto error; 9490 } 9491 9492 if (nvlist_add_string(attr_list, DDI_CLASS, 9493 ph_vh_class) != DDI_SUCCESS) { 9494 goto error; 9495 } 9496 9497 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9498 attr_list, NULL, DDI_SLEEP); 9499 9500 error: 9501 kmem_free(path_name, MAXPATHLEN); 9502 nvlist_free(attr_list); 9503 return; 9504 9505 alloc_failed: 9506 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9507 } 9508 9509 char ** 9510 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9511 { 9512 char **driver_list, **ret_driver_list = NULL; 9513 int *root_support_list; 9514 int cur_elements, max_elements; 9515 9516 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9517 &cur_elements, &max_elements); 9518 9519 9520 if (driver_list) { 9521 kmem_free(root_support_list, sizeof (int) * max_elements); 9522 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9523 * max_elements, sizeof (char *) * cur_elements); 9524 } 9525 *ndrivers = cur_elements; 9526 9527 return (ret_driver_list); 9528 9529 } 9530 9531 void 9532 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9533 { 9534 char **p; 9535 int i; 9536 9537 if (driver_list) { 9538 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9539 kmem_free(*p, strlen(*p) + 1); 9540 kmem_free(driver_list, sizeof (char *) * ndrivers); 9541 } 9542 } 9543 9544 /* 9545 * mdi_is_dev_supported(): 9546 * function called by pHCI bus config operation to determine if a 9547 * device should be represented as a child of the vHCI or the 9548 * pHCI. This decision is made by the vHCI, using cinfo idenity 9549 * information passed by the pHCI - specifics of the cinfo 9550 * representation are by agreement between the pHCI and vHCI. 9551 * Return Values: 9552 * MDI_SUCCESS 9553 * MDI_FAILURE 9554 */ 9555 int 9556 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9557 { 9558 mdi_vhci_t *vh; 9559 9560 ASSERT(class && pdip); 9561 9562 /* 9563 * For dev_supported, mdi_phci_register() must have established pdip as 9564 * a pHCI. 9565 * 9566 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9567 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9568 */ 9569 if (!MDI_PHCI(pdip)) 9570 return (MDI_FAILURE); 9571 9572 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9573 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9574 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9575 return (MDI_FAILURE); 9576 } 9577 9578 /* Return vHCI answer */ 9579 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9580 } 9581 9582 int 9583 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9584 { 9585 uint_t devstate = 0; 9586 dev_info_t *cdip; 9587 9588 if ((pip == NULL) || (dcp == NULL)) 9589 return (MDI_FAILURE); 9590 9591 cdip = mdi_pi_get_client(pip); 9592 9593 switch (mdi_pi_get_state(pip)) { 9594 case MDI_PATHINFO_STATE_INIT: 9595 devstate = DEVICE_DOWN; 9596 break; 9597 case MDI_PATHINFO_STATE_ONLINE: 9598 devstate = DEVICE_ONLINE; 9599 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9600 devstate |= DEVICE_BUSY; 9601 break; 9602 case MDI_PATHINFO_STATE_STANDBY: 9603 devstate = DEVICE_ONLINE; 9604 break; 9605 case MDI_PATHINFO_STATE_FAULT: 9606 devstate = DEVICE_DOWN; 9607 break; 9608 case MDI_PATHINFO_STATE_OFFLINE: 9609 devstate = DEVICE_OFFLINE; 9610 break; 9611 default: 9612 ASSERT(MDI_PI(pip)->pi_state); 9613 } 9614 9615 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9616 return (MDI_FAILURE); 9617 9618 return (MDI_SUCCESS); 9619 } 9620