1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 27 * detailed discussion of the overall mpxio architecture. 28 * 29 * Default locking order: 30 * 31 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 33 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 35 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 38 */ 39 40 #include <sys/note.h> 41 #include <sys/types.h> 42 #include <sys/varargs.h> 43 #include <sys/param.h> 44 #include <sys/errno.h> 45 #include <sys/uio.h> 46 #include <sys/buf.h> 47 #include <sys/modctl.h> 48 #include <sys/open.h> 49 #include <sys/kmem.h> 50 #include <sys/poll.h> 51 #include <sys/conf.h> 52 #include <sys/bootconf.h> 53 #include <sys/cmn_err.h> 54 #include <sys/stat.h> 55 #include <sys/ddi.h> 56 #include <sys/sunddi.h> 57 #include <sys/ddipropdefs.h> 58 #include <sys/sunndi.h> 59 #include <sys/ndi_impldefs.h> 60 #include <sys/promif.h> 61 #include <sys/sunmdi.h> 62 #include <sys/mdi_impldefs.h> 63 #include <sys/taskq.h> 64 #include <sys/epm.h> 65 #include <sys/sunpm.h> 66 #include <sys/modhash.h> 67 #include <sys/disp.h> 68 #include <sys/autoconf.h> 69 #include <sys/sysmacros.h> 70 71 #ifdef DEBUG 72 #include <sys/debug.h> 73 int mdi_debug = 1; 74 int mdi_debug_logonly = 0; 75 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 76 #define MDI_WARN CE_WARN, __func__ 77 #define MDI_NOTE CE_NOTE, __func__ 78 #define MDI_CONT CE_CONT, __func__ 79 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(dbglevel, pargs) 82 #endif /* DEBUG */ 83 int mdi_debug_consoleonly = 0; 84 int mdi_delay = 3; 85 86 extern pri_t minclsyspri; 87 extern int modrootloaded; 88 89 /* 90 * Global mutex: 91 * Protects vHCI list and structure members. 92 */ 93 kmutex_t mdi_mutex; 94 95 /* 96 * Registered vHCI class driver lists 97 */ 98 int mdi_vhci_count; 99 mdi_vhci_t *mdi_vhci_head; 100 mdi_vhci_t *mdi_vhci_tail; 101 102 /* 103 * Client Hash Table size 104 */ 105 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 106 107 /* 108 * taskq interface definitions 109 */ 110 #define MDI_TASKQ_N_THREADS 8 111 #define MDI_TASKQ_PRI minclsyspri 112 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 113 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 114 115 taskq_t *mdi_taskq; 116 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 117 118 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 119 120 /* 121 * The data should be "quiet" for this interval (in seconds) before the 122 * vhci cached data is flushed to the disk. 123 */ 124 static int mdi_vhcache_flush_delay = 10; 125 126 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 127 static int mdi_vhcache_flush_daemon_idle_time = 60; 128 129 /* 130 * MDI falls back to discovery of all paths when a bus_config_one fails. 131 * The following parameters can be used to tune this operation. 132 * 133 * mdi_path_discovery_boot 134 * Number of times path discovery will be attempted during early boot. 135 * Probably there is no reason to ever set this value to greater than one. 136 * 137 * mdi_path_discovery_postboot 138 * Number of times path discovery will be attempted after early boot. 139 * Set it to a minimum of two to allow for discovery of iscsi paths which 140 * may happen very late during booting. 141 * 142 * mdi_path_discovery_interval 143 * Minimum number of seconds MDI will wait between successive discovery 144 * of all paths. Set it to -1 to disable discovery of all paths. 145 */ 146 static int mdi_path_discovery_boot = 1; 147 static int mdi_path_discovery_postboot = 2; 148 static int mdi_path_discovery_interval = 10; 149 150 /* 151 * number of seconds the asynchronous configuration thread will sleep idle 152 * before exiting. 153 */ 154 static int mdi_async_config_idle_time = 600; 155 156 static int mdi_bus_config_cache_hash_size = 256; 157 158 /* turns off multithreaded configuration for certain operations */ 159 static int mdi_mtc_off = 0; 160 161 /* 162 * The "path" to a pathinfo node is identical to the /devices path to a 163 * devinfo node had the device been enumerated under a pHCI instead of 164 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 165 * This association persists across create/delete of the pathinfo nodes, 166 * but not across reboot. 167 */ 168 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 169 static int mdi_pathmap_hash_size = 256; 170 static kmutex_t mdi_pathmap_mutex; 171 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 172 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 173 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 174 175 /* 176 * MDI component property name/value string definitions 177 */ 178 const char *mdi_component_prop = "mpxio-component"; 179 const char *mdi_component_prop_vhci = "vhci"; 180 const char *mdi_component_prop_phci = "phci"; 181 const char *mdi_component_prop_client = "client"; 182 183 /* 184 * MDI client global unique identifier property name 185 */ 186 const char *mdi_client_guid_prop = "client-guid"; 187 188 /* 189 * MDI client load balancing property name/value string definitions 190 */ 191 const char *mdi_load_balance = "load-balance"; 192 const char *mdi_load_balance_none = "none"; 193 const char *mdi_load_balance_rr = "round-robin"; 194 const char *mdi_load_balance_lba = "logical-block"; 195 196 /* 197 * Obsolete vHCI class definition; to be removed after Leadville update 198 */ 199 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 200 201 static char vhci_greeting[] = 202 "\tThere already exists one vHCI driver for class %s\n" 203 "\tOnly one vHCI driver for each class is allowed\n"; 204 205 /* 206 * Static function prototypes 207 */ 208 static int i_mdi_phci_offline(dev_info_t *, uint_t); 209 static int i_mdi_client_offline(dev_info_t *, uint_t); 210 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 211 static void i_mdi_phci_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static int i_mdi_client_pre_detach(dev_info_t *, 214 ddi_detach_cmd_t); 215 static void i_mdi_client_post_detach(dev_info_t *, 216 ddi_detach_cmd_t, int); 217 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 218 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 219 static int i_mdi_lba_lb(mdi_client_t *ct, 220 mdi_pathinfo_t **ret_pip, struct buf *buf); 221 static void i_mdi_pm_hold_client(mdi_client_t *, int); 222 static void i_mdi_pm_rele_client(mdi_client_t *, int); 223 static void i_mdi_pm_reset_client(mdi_client_t *); 224 static int i_mdi_power_all_phci(mdi_client_t *); 225 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 226 227 228 /* 229 * Internal mdi_pathinfo node functions 230 */ 231 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 232 233 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 234 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 235 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 236 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 237 static void i_mdi_phci_unlock(mdi_phci_t *); 238 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 239 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 241 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 242 mdi_client_t *); 243 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 244 static void i_mdi_client_remove_path(mdi_client_t *, 245 mdi_pathinfo_t *); 246 247 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 248 mdi_pathinfo_state_t, int); 249 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 250 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 251 char **, int); 252 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 253 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 254 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 255 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 256 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 257 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 258 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 259 static void i_mdi_client_update_state(mdi_client_t *); 260 static int i_mdi_client_compute_state(mdi_client_t *, 261 mdi_phci_t *); 262 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 263 static void i_mdi_client_unlock(mdi_client_t *); 264 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 265 static mdi_client_t *i_devi_get_client(dev_info_t *); 266 /* 267 * NOTE: this will be removed once the NWS files are changed to use the new 268 * mdi_{enable,disable}_path interfaces 269 */ 270 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 271 int, int); 272 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 273 mdi_vhci_t *vh, int flags, int op); 274 /* 275 * Failover related function prototypes 276 */ 277 static int i_mdi_failover(void *); 278 279 /* 280 * misc internal functions 281 */ 282 static int i_mdi_get_hash_key(char *); 283 static int i_map_nvlist_error_to_mdi(int); 284 static void i_mdi_report_path_state(mdi_client_t *, 285 mdi_pathinfo_t *); 286 287 static void setup_vhci_cache(mdi_vhci_t *); 288 static int destroy_vhci_cache(mdi_vhci_t *); 289 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 290 static boolean_t stop_vhcache_flush_thread(void *, int); 291 static void free_string_array(char **, int); 292 static void free_vhcache_phci(mdi_vhcache_phci_t *); 293 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 294 static void free_vhcache_client(mdi_vhcache_client_t *); 295 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 296 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 297 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 298 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 299 static void vhcache_pi_add(mdi_vhci_config_t *, 300 struct mdi_pathinfo *); 301 static void vhcache_pi_remove(mdi_vhci_config_t *, 302 struct mdi_pathinfo *); 303 static void free_phclient_path_list(mdi_phys_path_t *); 304 static void sort_vhcache_paths(mdi_vhcache_client_t *); 305 static int flush_vhcache(mdi_vhci_config_t *, int); 306 static void vhcache_dirty(mdi_vhci_config_t *); 307 static void free_async_client_config(mdi_async_client_config_t *); 308 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 309 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 310 static nvlist_t *read_on_disk_vhci_cache(char *); 311 extern int fread_nvlist(char *, nvlist_t **); 312 extern int fwrite_nvlist(char *, nvlist_t *); 313 314 /* called once when first vhci registers with mdi */ 315 static void 316 i_mdi_init() 317 { 318 static int initialized = 0; 319 320 if (initialized) 321 return; 322 initialized = 1; 323 324 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 325 326 /* Create our taskq resources */ 327 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 328 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 329 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 330 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 331 332 /* Allocate ['path_instance' <-> "path"] maps */ 333 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 334 mdi_pathmap_bypath = mod_hash_create_strhash( 335 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 336 mod_hash_null_valdtor); 337 mdi_pathmap_byinstance = mod_hash_create_idhash( 338 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 339 mod_hash_null_valdtor); 340 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 341 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 342 mod_hash_null_valdtor); 343 } 344 345 /* 346 * mdi_get_component_type(): 347 * Return mpxio component type 348 * Return Values: 349 * MDI_COMPONENT_NONE 350 * MDI_COMPONENT_VHCI 351 * MDI_COMPONENT_PHCI 352 * MDI_COMPONENT_CLIENT 353 * XXX This doesn't work under multi-level MPxIO and should be 354 * removed when clients migrate mdi_component_is_*() interfaces. 355 */ 356 int 357 mdi_get_component_type(dev_info_t *dip) 358 { 359 return (DEVI(dip)->devi_mdi_component); 360 } 361 362 /* 363 * mdi_vhci_register(): 364 * Register a vHCI module with the mpxio framework 365 * mdi_vhci_register() is called by vHCI drivers to register the 366 * 'class_driver' vHCI driver and its MDI entrypoints with the 367 * mpxio framework. The vHCI driver must call this interface as 368 * part of its attach(9e) handler. 369 * Competing threads may try to attach mdi_vhci_register() as 370 * the vHCI drivers are loaded and attached as a result of pHCI 371 * driver instance registration (mdi_phci_register()) with the 372 * framework. 373 * Return Values: 374 * MDI_SUCCESS 375 * MDI_FAILURE 376 */ 377 /*ARGSUSED*/ 378 int 379 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 380 int flags) 381 { 382 mdi_vhci_t *vh = NULL; 383 384 /* Registrant can't be older */ 385 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 386 387 #ifdef DEBUG 388 /* 389 * IB nexus driver is loaded only when IB hardware is present. 390 * In order to be able to do this there is a need to drive the loading 391 * and attaching of the IB nexus driver (especially when an IB hardware 392 * is dynamically plugged in) when an IB HCA driver (PHCI) 393 * is being attached. Unfortunately this gets into the limitations 394 * of devfs as there seems to be no clean way to drive configuration 395 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 396 * for IB. 397 */ 398 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 399 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 400 #endif 401 402 i_mdi_init(); 403 404 mutex_enter(&mdi_mutex); 405 /* 406 * Scan for already registered vhci 407 */ 408 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 409 if (strcmp(vh->vh_class, class) == 0) { 410 /* 411 * vHCI has already been created. Check for valid 412 * vHCI ops registration. We only support one vHCI 413 * module per class 414 */ 415 if (vh->vh_ops != NULL) { 416 mutex_exit(&mdi_mutex); 417 cmn_err(CE_NOTE, vhci_greeting, class); 418 return (MDI_FAILURE); 419 } 420 break; 421 } 422 } 423 424 /* 425 * if not yet created, create the vHCI component 426 */ 427 if (vh == NULL) { 428 struct client_hash *hash = NULL; 429 char *load_balance; 430 431 /* 432 * Allocate and initialize the mdi extensions 433 */ 434 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 435 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 436 KM_SLEEP); 437 vh->vh_client_table = hash; 438 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 439 (void) strcpy(vh->vh_class, class); 440 vh->vh_lb = LOAD_BALANCE_RR; 441 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 442 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 443 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 444 vh->vh_lb = LOAD_BALANCE_NONE; 445 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 446 == 0) { 447 vh->vh_lb = LOAD_BALANCE_LBA; 448 } 449 ddi_prop_free(load_balance); 450 } 451 452 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 453 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 454 455 /* 456 * Store the vHCI ops vectors 457 */ 458 vh->vh_dip = vdip; 459 vh->vh_ops = vops; 460 461 setup_vhci_cache(vh); 462 463 if (mdi_vhci_head == NULL) { 464 mdi_vhci_head = vh; 465 } 466 if (mdi_vhci_tail) { 467 mdi_vhci_tail->vh_next = vh; 468 } 469 mdi_vhci_tail = vh; 470 mdi_vhci_count++; 471 } 472 473 /* 474 * Claim the devfs node as a vhci component 475 */ 476 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 477 478 /* 479 * Initialize our back reference from dev_info node 480 */ 481 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 482 mutex_exit(&mdi_mutex); 483 return (MDI_SUCCESS); 484 } 485 486 /* 487 * mdi_vhci_unregister(): 488 * Unregister a vHCI module from mpxio framework 489 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 490 * of a vhci to unregister it from the framework. 491 * Return Values: 492 * MDI_SUCCESS 493 * MDI_FAILURE 494 */ 495 /*ARGSUSED*/ 496 int 497 mdi_vhci_unregister(dev_info_t *vdip, int flags) 498 { 499 mdi_vhci_t *found, *vh, *prev = NULL; 500 501 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 502 503 /* 504 * Check for invalid VHCI 505 */ 506 if ((vh = i_devi_get_vhci(vdip)) == NULL) 507 return (MDI_FAILURE); 508 509 /* 510 * Scan the list of registered vHCIs for a match 511 */ 512 mutex_enter(&mdi_mutex); 513 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 514 if (found == vh) 515 break; 516 prev = found; 517 } 518 519 if (found == NULL) { 520 mutex_exit(&mdi_mutex); 521 return (MDI_FAILURE); 522 } 523 524 /* 525 * Check the vHCI, pHCI and client count. All the pHCIs and clients 526 * should have been unregistered, before a vHCI can be 527 * unregistered. 528 */ 529 MDI_VHCI_PHCI_LOCK(vh); 530 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 531 MDI_VHCI_PHCI_UNLOCK(vh); 532 mutex_exit(&mdi_mutex); 533 return (MDI_FAILURE); 534 } 535 MDI_VHCI_PHCI_UNLOCK(vh); 536 537 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 538 mutex_exit(&mdi_mutex); 539 return (MDI_FAILURE); 540 } 541 542 /* 543 * Remove the vHCI from the global list 544 */ 545 if (vh == mdi_vhci_head) { 546 mdi_vhci_head = vh->vh_next; 547 } else { 548 prev->vh_next = vh->vh_next; 549 } 550 if (vh == mdi_vhci_tail) { 551 mdi_vhci_tail = prev; 552 } 553 mdi_vhci_count--; 554 mutex_exit(&mdi_mutex); 555 556 vh->vh_ops = NULL; 557 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 558 DEVI(vdip)->devi_mdi_xhci = NULL; 559 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 560 kmem_free(vh->vh_client_table, 561 mdi_client_table_size * sizeof (struct client_hash)); 562 mutex_destroy(&vh->vh_phci_mutex); 563 mutex_destroy(&vh->vh_client_mutex); 564 565 kmem_free(vh, sizeof (mdi_vhci_t)); 566 return (MDI_SUCCESS); 567 } 568 569 /* 570 * i_mdi_vhci_class2vhci(): 571 * Look for a matching vHCI module given a vHCI class name 572 * Return Values: 573 * Handle to a vHCI component 574 * NULL 575 */ 576 static mdi_vhci_t * 577 i_mdi_vhci_class2vhci(char *class) 578 { 579 mdi_vhci_t *vh = NULL; 580 581 ASSERT(!MUTEX_HELD(&mdi_mutex)); 582 583 mutex_enter(&mdi_mutex); 584 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 585 if (strcmp(vh->vh_class, class) == 0) { 586 break; 587 } 588 } 589 mutex_exit(&mdi_mutex); 590 return (vh); 591 } 592 593 /* 594 * i_devi_get_vhci(): 595 * Utility function to get the handle to a vHCI component 596 * Return Values: 597 * Handle to a vHCI component 598 * NULL 599 */ 600 mdi_vhci_t * 601 i_devi_get_vhci(dev_info_t *vdip) 602 { 603 mdi_vhci_t *vh = NULL; 604 if (MDI_VHCI(vdip)) { 605 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 606 } 607 return (vh); 608 } 609 610 /* 611 * mdi_phci_register(): 612 * Register a pHCI module with mpxio framework 613 * mdi_phci_register() is called by pHCI drivers to register with 614 * the mpxio framework and a specific 'class_driver' vHCI. The 615 * pHCI driver must call this interface as part of its attach(9e) 616 * handler. 617 * Return Values: 618 * MDI_SUCCESS 619 * MDI_FAILURE 620 */ 621 /*ARGSUSED*/ 622 int 623 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 624 { 625 mdi_phci_t *ph; 626 mdi_vhci_t *vh; 627 char *data; 628 629 /* 630 * Some subsystems, like fcp, perform pHCI registration from a 631 * different thread than the one doing the pHCI attach(9E) - the 632 * driver attach code is waiting for this other thread to complete. 633 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 634 * (indicating that some thread has done an ndi_devi_enter of parent) 635 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 636 */ 637 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 638 639 /* 640 * Check for mpxio-disable property. Enable mpxio if the property is 641 * missing or not set to "yes". 642 * If the property is set to "yes" then emit a brief message. 643 */ 644 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 645 &data) == DDI_SUCCESS)) { 646 if (strcmp(data, "yes") == 0) { 647 MDI_DEBUG(1, (MDI_CONT, pdip, 648 "?multipath capabilities disabled via %s.conf.", 649 ddi_driver_name(pdip))); 650 ddi_prop_free(data); 651 return (MDI_FAILURE); 652 } 653 ddi_prop_free(data); 654 } 655 656 /* 657 * Search for a matching vHCI 658 */ 659 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 660 if (vh == NULL) { 661 return (MDI_FAILURE); 662 } 663 664 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 665 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 666 ph->ph_dip = pdip; 667 ph->ph_vhci = vh; 668 ph->ph_next = NULL; 669 ph->ph_unstable = 0; 670 ph->ph_vprivate = 0; 671 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 672 673 MDI_PHCI_LOCK(ph); 674 MDI_PHCI_SET_POWER_UP(ph); 675 MDI_PHCI_UNLOCK(ph); 676 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 677 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 678 679 vhcache_phci_add(vh->vh_config, ph); 680 681 MDI_VHCI_PHCI_LOCK(vh); 682 if (vh->vh_phci_head == NULL) { 683 vh->vh_phci_head = ph; 684 } 685 if (vh->vh_phci_tail) { 686 vh->vh_phci_tail->ph_next = ph; 687 } 688 vh->vh_phci_tail = ph; 689 vh->vh_phci_count++; 690 MDI_VHCI_PHCI_UNLOCK(vh); 691 692 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 693 return (MDI_SUCCESS); 694 } 695 696 /* 697 * mdi_phci_unregister(): 698 * Unregister a pHCI module from mpxio framework 699 * mdi_phci_unregister() is called by the pHCI drivers from their 700 * detach(9E) handler to unregister their instances from the 701 * framework. 702 * Return Values: 703 * MDI_SUCCESS 704 * MDI_FAILURE 705 */ 706 /*ARGSUSED*/ 707 int 708 mdi_phci_unregister(dev_info_t *pdip, int flags) 709 { 710 mdi_vhci_t *vh; 711 mdi_phci_t *ph; 712 mdi_phci_t *tmp; 713 mdi_phci_t *prev = NULL; 714 mdi_pathinfo_t *pip; 715 716 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 717 718 ph = i_devi_get_phci(pdip); 719 if (ph == NULL) { 720 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 721 return (MDI_FAILURE); 722 } 723 724 vh = ph->ph_vhci; 725 ASSERT(vh != NULL); 726 if (vh == NULL) { 727 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 728 return (MDI_FAILURE); 729 } 730 731 MDI_VHCI_PHCI_LOCK(vh); 732 tmp = vh->vh_phci_head; 733 while (tmp) { 734 if (tmp == ph) { 735 break; 736 } 737 prev = tmp; 738 tmp = tmp->ph_next; 739 } 740 741 if (ph == vh->vh_phci_head) { 742 vh->vh_phci_head = ph->ph_next; 743 } else { 744 prev->ph_next = ph->ph_next; 745 } 746 747 if (ph == vh->vh_phci_tail) { 748 vh->vh_phci_tail = prev; 749 } 750 751 vh->vh_phci_count--; 752 MDI_VHCI_PHCI_UNLOCK(vh); 753 754 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 755 MDI_PHCI_LOCK(ph); 756 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 757 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 758 MDI_PI(pip)->pi_phci = NULL; 759 MDI_PHCI_UNLOCK(ph); 760 761 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 762 ESC_DDI_INITIATOR_UNREGISTER); 763 vhcache_phci_remove(vh->vh_config, ph); 764 cv_destroy(&ph->ph_unstable_cv); 765 mutex_destroy(&ph->ph_mutex); 766 kmem_free(ph, sizeof (mdi_phci_t)); 767 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 768 DEVI(pdip)->devi_mdi_xhci = NULL; 769 return (MDI_SUCCESS); 770 } 771 772 /* 773 * i_devi_get_phci(): 774 * Utility function to return the phci extensions. 775 */ 776 static mdi_phci_t * 777 i_devi_get_phci(dev_info_t *pdip) 778 { 779 mdi_phci_t *ph = NULL; 780 781 if (MDI_PHCI(pdip)) { 782 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 783 } 784 return (ph); 785 } 786 787 /* 788 * Single thread mdi entry into devinfo node for modifying its children. 789 * If necessary we perform an ndi_devi_enter of the vHCI before doing 790 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 791 * for the vHCI and one for the pHCI. 792 */ 793 void 794 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 795 { 796 dev_info_t *vdip; 797 int vcircular, pcircular; 798 799 /* Verify calling context */ 800 ASSERT(MDI_PHCI(phci_dip)); 801 vdip = mdi_devi_get_vdip(phci_dip); 802 ASSERT(vdip); /* A pHCI always has a vHCI */ 803 804 /* 805 * If pHCI is detaching then the framework has already entered the 806 * vHCI on a threads that went down the code path leading to 807 * detach_node(). This framework enter of the vHCI during pHCI 808 * detach is done to avoid deadlock with vHCI power management 809 * operations which enter the vHCI and the enter down the path 810 * to the pHCI. If pHCI is detaching then we piggyback this calls 811 * enter of the vHCI on frameworks vHCI enter that has already 812 * occurred - this is OK because we know that the framework thread 813 * doing detach is waiting for our completion. 814 * 815 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 816 * race with detach - but we can't do that because the framework has 817 * already entered the parent, so we have some complexity instead. 818 */ 819 for (;;) { 820 if (ndi_devi_tryenter(vdip, &vcircular)) { 821 ASSERT(vcircular != -1); 822 if (DEVI_IS_DETACHING(phci_dip)) { 823 ndi_devi_exit(vdip, vcircular); 824 vcircular = -1; 825 } 826 break; 827 } else if (DEVI_IS_DETACHING(phci_dip)) { 828 vcircular = -1; 829 break; 830 } else if (servicing_interrupt()) { 831 /* 832 * Don't delay an interrupt (and ensure adaptive 833 * mutex inversion support). 834 */ 835 ndi_devi_enter(vdip, &vcircular); 836 break; 837 } else { 838 delay_random(mdi_delay); 839 } 840 } 841 842 ndi_devi_enter(phci_dip, &pcircular); 843 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 844 } 845 846 /* 847 * Attempt to mdi_devi_enter. 848 */ 849 int 850 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 851 { 852 dev_info_t *vdip; 853 int vcircular, pcircular; 854 855 /* Verify calling context */ 856 ASSERT(MDI_PHCI(phci_dip)); 857 vdip = mdi_devi_get_vdip(phci_dip); 858 ASSERT(vdip); /* A pHCI always has a vHCI */ 859 860 if (ndi_devi_tryenter(vdip, &vcircular)) { 861 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 862 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 863 return (1); /* locked */ 864 } 865 ndi_devi_exit(vdip, vcircular); 866 } 867 return (0); /* busy */ 868 } 869 870 /* 871 * Release mdi_devi_enter or successful mdi_devi_tryenter. 872 */ 873 void 874 mdi_devi_exit(dev_info_t *phci_dip, int circular) 875 { 876 dev_info_t *vdip; 877 int vcircular, pcircular; 878 879 /* Verify calling context */ 880 ASSERT(MDI_PHCI(phci_dip)); 881 vdip = mdi_devi_get_vdip(phci_dip); 882 ASSERT(vdip); /* A pHCI always has a vHCI */ 883 884 /* extract two circular recursion values from single int */ 885 pcircular = (short)(circular & 0xFFFF); 886 vcircular = (short)((circular >> 16) & 0xFFFF); 887 888 ndi_devi_exit(phci_dip, pcircular); 889 if (vcircular != -1) 890 ndi_devi_exit(vdip, vcircular); 891 } 892 893 /* 894 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 895 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 896 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 897 * with vHCI power management code during path online/offline. Each 898 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 899 * occur within the scope of an active mdi_devi_enter that establishes the 900 * circular value. 901 */ 902 void 903 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 904 { 905 int pcircular; 906 907 /* Verify calling context */ 908 ASSERT(MDI_PHCI(phci_dip)); 909 910 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 911 ndi_hold_devi(phci_dip); 912 913 pcircular = (short)(circular & 0xFFFF); 914 ndi_devi_exit(phci_dip, pcircular); 915 } 916 917 void 918 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 919 { 920 int pcircular; 921 922 /* Verify calling context */ 923 ASSERT(MDI_PHCI(phci_dip)); 924 925 ndi_devi_enter(phci_dip, &pcircular); 926 927 /* Drop hold from mdi_devi_exit_phci. */ 928 ndi_rele_devi(phci_dip); 929 930 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 931 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 932 } 933 934 /* 935 * mdi_devi_get_vdip(): 936 * given a pHCI dip return vHCI dip 937 */ 938 dev_info_t * 939 mdi_devi_get_vdip(dev_info_t *pdip) 940 { 941 mdi_phci_t *ph; 942 943 ph = i_devi_get_phci(pdip); 944 if (ph && ph->ph_vhci) 945 return (ph->ph_vhci->vh_dip); 946 return (NULL); 947 } 948 949 /* 950 * mdi_devi_pdip_entered(): 951 * Return 1 if we are vHCI and have done an ndi_devi_enter 952 * of a pHCI 953 */ 954 int 955 mdi_devi_pdip_entered(dev_info_t *vdip) 956 { 957 mdi_vhci_t *vh; 958 mdi_phci_t *ph; 959 960 vh = i_devi_get_vhci(vdip); 961 if (vh == NULL) 962 return (0); 963 964 MDI_VHCI_PHCI_LOCK(vh); 965 ph = vh->vh_phci_head; 966 while (ph) { 967 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 968 MDI_VHCI_PHCI_UNLOCK(vh); 969 return (1); 970 } 971 ph = ph->ph_next; 972 } 973 MDI_VHCI_PHCI_UNLOCK(vh); 974 return (0); 975 } 976 977 /* 978 * mdi_phci_path2devinfo(): 979 * Utility function to search for a valid phci device given 980 * the devfs pathname. 981 */ 982 dev_info_t * 983 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 984 { 985 char *temp_pathname; 986 mdi_vhci_t *vh; 987 mdi_phci_t *ph; 988 dev_info_t *pdip = NULL; 989 990 vh = i_devi_get_vhci(vdip); 991 ASSERT(vh != NULL); 992 993 if (vh == NULL) { 994 /* 995 * Invalid vHCI component, return failure 996 */ 997 return (NULL); 998 } 999 1000 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1001 MDI_VHCI_PHCI_LOCK(vh); 1002 ph = vh->vh_phci_head; 1003 while (ph != NULL) { 1004 pdip = ph->ph_dip; 1005 ASSERT(pdip != NULL); 1006 *temp_pathname = '\0'; 1007 (void) ddi_pathname(pdip, temp_pathname); 1008 if (strcmp(temp_pathname, pathname) == 0) { 1009 break; 1010 } 1011 ph = ph->ph_next; 1012 } 1013 if (ph == NULL) { 1014 pdip = NULL; 1015 } 1016 MDI_VHCI_PHCI_UNLOCK(vh); 1017 kmem_free(temp_pathname, MAXPATHLEN); 1018 return (pdip); 1019 } 1020 1021 /* 1022 * mdi_phci_get_path_count(): 1023 * get number of path information nodes associated with a given 1024 * pHCI device. 1025 */ 1026 int 1027 mdi_phci_get_path_count(dev_info_t *pdip) 1028 { 1029 mdi_phci_t *ph; 1030 int count = 0; 1031 1032 ph = i_devi_get_phci(pdip); 1033 if (ph != NULL) { 1034 count = ph->ph_path_count; 1035 } 1036 return (count); 1037 } 1038 1039 /* 1040 * i_mdi_phci_lock(): 1041 * Lock a pHCI device 1042 * Return Values: 1043 * None 1044 * Note: 1045 * The default locking order is: 1046 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1047 * But there are number of situations where locks need to be 1048 * grabbed in reverse order. This routine implements try and lock 1049 * mechanism depending on the requested parameter option. 1050 */ 1051 static void 1052 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1053 { 1054 if (pip) { 1055 /* Reverse locking is requested. */ 1056 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1057 if (servicing_interrupt()) { 1058 MDI_PI_HOLD(pip); 1059 MDI_PI_UNLOCK(pip); 1060 MDI_PHCI_LOCK(ph); 1061 MDI_PI_LOCK(pip); 1062 MDI_PI_RELE(pip); 1063 break; 1064 } else { 1065 /* 1066 * tryenter failed. Try to grab again 1067 * after a small delay 1068 */ 1069 MDI_PI_HOLD(pip); 1070 MDI_PI_UNLOCK(pip); 1071 delay_random(mdi_delay); 1072 MDI_PI_LOCK(pip); 1073 MDI_PI_RELE(pip); 1074 } 1075 } 1076 } else { 1077 MDI_PHCI_LOCK(ph); 1078 } 1079 } 1080 1081 /* 1082 * i_mdi_phci_unlock(): 1083 * Unlock the pHCI component 1084 */ 1085 static void 1086 i_mdi_phci_unlock(mdi_phci_t *ph) 1087 { 1088 MDI_PHCI_UNLOCK(ph); 1089 } 1090 1091 /* 1092 * i_mdi_devinfo_create(): 1093 * create client device's devinfo node 1094 * Return Values: 1095 * dev_info 1096 * NULL 1097 * Notes: 1098 */ 1099 static dev_info_t * 1100 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1101 char **compatible, int ncompatible) 1102 { 1103 dev_info_t *cdip = NULL; 1104 1105 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1106 1107 /* Verify for duplicate entry */ 1108 cdip = i_mdi_devinfo_find(vh, name, guid); 1109 ASSERT(cdip == NULL); 1110 if (cdip) { 1111 cmn_err(CE_WARN, 1112 "i_mdi_devinfo_create: client %s@%s already exists", 1113 name ? name : "", guid ? guid : ""); 1114 } 1115 1116 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1117 if (cdip == NULL) 1118 goto fail; 1119 1120 /* 1121 * Create component type and Global unique identifier 1122 * properties 1123 */ 1124 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1125 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1126 goto fail; 1127 } 1128 1129 /* Decorate the node with compatible property */ 1130 if (compatible && 1131 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1132 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1133 goto fail; 1134 } 1135 1136 return (cdip); 1137 1138 fail: 1139 if (cdip) { 1140 (void) ndi_prop_remove_all(cdip); 1141 (void) ndi_devi_free(cdip); 1142 } 1143 return (NULL); 1144 } 1145 1146 /* 1147 * i_mdi_devinfo_find(): 1148 * Find a matching devinfo node for given client node name 1149 * and its guid. 1150 * Return Values: 1151 * Handle to a dev_info node or NULL 1152 */ 1153 static dev_info_t * 1154 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1155 { 1156 char *data; 1157 dev_info_t *cdip = NULL; 1158 dev_info_t *ndip = NULL; 1159 int circular; 1160 1161 ndi_devi_enter(vh->vh_dip, &circular); 1162 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1163 while ((cdip = ndip) != NULL) { 1164 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1165 1166 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1167 continue; 1168 } 1169 1170 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1171 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1172 &data) != DDI_PROP_SUCCESS) { 1173 continue; 1174 } 1175 1176 if (strcmp(data, guid) != 0) { 1177 ddi_prop_free(data); 1178 continue; 1179 } 1180 ddi_prop_free(data); 1181 break; 1182 } 1183 ndi_devi_exit(vh->vh_dip, circular); 1184 return (cdip); 1185 } 1186 1187 /* 1188 * i_mdi_devinfo_remove(): 1189 * Remove a client device node 1190 */ 1191 static int 1192 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1193 { 1194 int rv = MDI_SUCCESS; 1195 1196 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1197 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1198 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1199 if (rv != NDI_SUCCESS) { 1200 MDI_DEBUG(1, (MDI_NOTE, cdip, 1201 "!failed: cdip %p", (void *)cdip)); 1202 } 1203 /* 1204 * Convert to MDI error code 1205 */ 1206 switch (rv) { 1207 case NDI_SUCCESS: 1208 rv = MDI_SUCCESS; 1209 break; 1210 case NDI_BUSY: 1211 rv = MDI_BUSY; 1212 break; 1213 default: 1214 rv = MDI_FAILURE; 1215 break; 1216 } 1217 } 1218 return (rv); 1219 } 1220 1221 /* 1222 * i_devi_get_client() 1223 * Utility function to get mpxio component extensions 1224 */ 1225 static mdi_client_t * 1226 i_devi_get_client(dev_info_t *cdip) 1227 { 1228 mdi_client_t *ct = NULL; 1229 1230 if (MDI_CLIENT(cdip)) { 1231 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1232 } 1233 return (ct); 1234 } 1235 1236 /* 1237 * i_mdi_is_child_present(): 1238 * Search for the presence of client device dev_info node 1239 */ 1240 static int 1241 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1242 { 1243 int rv = MDI_FAILURE; 1244 struct dev_info *dip; 1245 int circular; 1246 1247 ndi_devi_enter(vdip, &circular); 1248 dip = DEVI(vdip)->devi_child; 1249 while (dip) { 1250 if (dip == DEVI(cdip)) { 1251 rv = MDI_SUCCESS; 1252 break; 1253 } 1254 dip = dip->devi_sibling; 1255 } 1256 ndi_devi_exit(vdip, circular); 1257 return (rv); 1258 } 1259 1260 1261 /* 1262 * i_mdi_client_lock(): 1263 * Grab client component lock 1264 * Return Values: 1265 * None 1266 * Note: 1267 * The default locking order is: 1268 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1269 * But there are number of situations where locks need to be 1270 * grabbed in reverse order. This routine implements try and lock 1271 * mechanism depending on the requested parameter option. 1272 */ 1273 static void 1274 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1275 { 1276 if (pip) { 1277 /* 1278 * Reverse locking is requested. 1279 */ 1280 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1281 if (servicing_interrupt()) { 1282 MDI_PI_HOLD(pip); 1283 MDI_PI_UNLOCK(pip); 1284 MDI_CLIENT_LOCK(ct); 1285 MDI_PI_LOCK(pip); 1286 MDI_PI_RELE(pip); 1287 break; 1288 } else { 1289 /* 1290 * tryenter failed. Try to grab again 1291 * after a small delay 1292 */ 1293 MDI_PI_HOLD(pip); 1294 MDI_PI_UNLOCK(pip); 1295 delay_random(mdi_delay); 1296 MDI_PI_LOCK(pip); 1297 MDI_PI_RELE(pip); 1298 } 1299 } 1300 } else { 1301 MDI_CLIENT_LOCK(ct); 1302 } 1303 } 1304 1305 /* 1306 * i_mdi_client_unlock(): 1307 * Unlock a client component 1308 */ 1309 static void 1310 i_mdi_client_unlock(mdi_client_t *ct) 1311 { 1312 MDI_CLIENT_UNLOCK(ct); 1313 } 1314 1315 /* 1316 * i_mdi_client_alloc(): 1317 * Allocate and initialize a client structure. Caller should 1318 * hold the vhci client lock. 1319 * Return Values: 1320 * Handle to a client component 1321 */ 1322 /*ARGSUSED*/ 1323 static mdi_client_t * 1324 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1325 { 1326 mdi_client_t *ct; 1327 1328 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1329 1330 /* 1331 * Allocate and initialize a component structure. 1332 */ 1333 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1334 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1335 ct->ct_hnext = NULL; 1336 ct->ct_hprev = NULL; 1337 ct->ct_dip = NULL; 1338 ct->ct_vhci = vh; 1339 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1340 (void) strcpy(ct->ct_drvname, name); 1341 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1342 (void) strcpy(ct->ct_guid, lguid); 1343 ct->ct_cprivate = NULL; 1344 ct->ct_vprivate = NULL; 1345 ct->ct_flags = 0; 1346 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1347 MDI_CLIENT_LOCK(ct); 1348 MDI_CLIENT_SET_OFFLINE(ct); 1349 MDI_CLIENT_SET_DETACH(ct); 1350 MDI_CLIENT_SET_POWER_UP(ct); 1351 MDI_CLIENT_UNLOCK(ct); 1352 ct->ct_failover_flags = 0; 1353 ct->ct_failover_status = 0; 1354 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1355 ct->ct_unstable = 0; 1356 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1357 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1358 ct->ct_lb = vh->vh_lb; 1359 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1360 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1361 ct->ct_path_count = 0; 1362 ct->ct_path_head = NULL; 1363 ct->ct_path_tail = NULL; 1364 ct->ct_path_last = NULL; 1365 1366 /* 1367 * Add this client component to our client hash queue 1368 */ 1369 i_mdi_client_enlist_table(vh, ct); 1370 return (ct); 1371 } 1372 1373 /* 1374 * i_mdi_client_enlist_table(): 1375 * Attach the client device to the client hash table. Caller 1376 * should hold the vhci client lock. 1377 */ 1378 static void 1379 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1380 { 1381 int index; 1382 struct client_hash *head; 1383 1384 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1385 1386 index = i_mdi_get_hash_key(ct->ct_guid); 1387 head = &vh->vh_client_table[index]; 1388 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1389 head->ct_hash_head = ct; 1390 head->ct_hash_count++; 1391 vh->vh_client_count++; 1392 } 1393 1394 /* 1395 * i_mdi_client_delist_table(): 1396 * Attach the client device to the client hash table. 1397 * Caller should hold the vhci client lock. 1398 */ 1399 static void 1400 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1401 { 1402 int index; 1403 char *guid; 1404 struct client_hash *head; 1405 mdi_client_t *next; 1406 mdi_client_t *last; 1407 1408 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1409 1410 guid = ct->ct_guid; 1411 index = i_mdi_get_hash_key(guid); 1412 head = &vh->vh_client_table[index]; 1413 1414 last = NULL; 1415 next = (mdi_client_t *)head->ct_hash_head; 1416 while (next != NULL) { 1417 if (next == ct) { 1418 break; 1419 } 1420 last = next; 1421 next = next->ct_hnext; 1422 } 1423 1424 if (next) { 1425 head->ct_hash_count--; 1426 if (last == NULL) { 1427 head->ct_hash_head = ct->ct_hnext; 1428 } else { 1429 last->ct_hnext = ct->ct_hnext; 1430 } 1431 ct->ct_hnext = NULL; 1432 vh->vh_client_count--; 1433 } 1434 } 1435 1436 1437 /* 1438 * i_mdi_client_free(): 1439 * Free a client component 1440 */ 1441 static int 1442 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1443 { 1444 int rv = MDI_SUCCESS; 1445 int flags = ct->ct_flags; 1446 dev_info_t *cdip; 1447 dev_info_t *vdip; 1448 1449 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1450 1451 vdip = vh->vh_dip; 1452 cdip = ct->ct_dip; 1453 1454 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1455 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1456 DEVI(cdip)->devi_mdi_client = NULL; 1457 1458 /* 1459 * Clear out back ref. to dev_info_t node 1460 */ 1461 ct->ct_dip = NULL; 1462 1463 /* 1464 * Remove this client from our hash queue 1465 */ 1466 i_mdi_client_delist_table(vh, ct); 1467 1468 /* 1469 * Uninitialize and free the component 1470 */ 1471 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1472 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1473 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1474 cv_destroy(&ct->ct_failover_cv); 1475 cv_destroy(&ct->ct_unstable_cv); 1476 cv_destroy(&ct->ct_powerchange_cv); 1477 mutex_destroy(&ct->ct_mutex); 1478 kmem_free(ct, sizeof (*ct)); 1479 1480 if (cdip != NULL) { 1481 MDI_VHCI_CLIENT_UNLOCK(vh); 1482 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1483 MDI_VHCI_CLIENT_LOCK(vh); 1484 } 1485 return (rv); 1486 } 1487 1488 /* 1489 * i_mdi_client_find(): 1490 * Find the client structure corresponding to a given guid 1491 * Caller should hold the vhci client lock. 1492 */ 1493 static mdi_client_t * 1494 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1495 { 1496 int index; 1497 struct client_hash *head; 1498 mdi_client_t *ct; 1499 1500 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1501 1502 index = i_mdi_get_hash_key(guid); 1503 head = &vh->vh_client_table[index]; 1504 1505 ct = head->ct_hash_head; 1506 while (ct != NULL) { 1507 if (strcmp(ct->ct_guid, guid) == 0 && 1508 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1509 break; 1510 } 1511 ct = ct->ct_hnext; 1512 } 1513 return (ct); 1514 } 1515 1516 /* 1517 * i_mdi_client_update_state(): 1518 * Compute and update client device state 1519 * Notes: 1520 * A client device can be in any of three possible states: 1521 * 1522 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1523 * one online/standby paths. Can tolerate failures. 1524 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1525 * no alternate paths available as standby. A failure on the online 1526 * would result in loss of access to device data. 1527 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1528 * no paths available to access the device. 1529 */ 1530 static void 1531 i_mdi_client_update_state(mdi_client_t *ct) 1532 { 1533 int state; 1534 1535 ASSERT(MDI_CLIENT_LOCKED(ct)); 1536 state = i_mdi_client_compute_state(ct, NULL); 1537 MDI_CLIENT_SET_STATE(ct, state); 1538 } 1539 1540 /* 1541 * i_mdi_client_compute_state(): 1542 * Compute client device state 1543 * 1544 * mdi_phci_t * Pointer to pHCI structure which should 1545 * while computing the new value. Used by 1546 * i_mdi_phci_offline() to find the new 1547 * client state after DR of a pHCI. 1548 */ 1549 static int 1550 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1551 { 1552 int state; 1553 int online_count = 0; 1554 int standby_count = 0; 1555 mdi_pathinfo_t *pip, *next; 1556 1557 ASSERT(MDI_CLIENT_LOCKED(ct)); 1558 pip = ct->ct_path_head; 1559 while (pip != NULL) { 1560 MDI_PI_LOCK(pip); 1561 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1562 if (MDI_PI(pip)->pi_phci == ph) { 1563 MDI_PI_UNLOCK(pip); 1564 pip = next; 1565 continue; 1566 } 1567 1568 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1569 == MDI_PATHINFO_STATE_ONLINE) 1570 online_count++; 1571 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1572 == MDI_PATHINFO_STATE_STANDBY) 1573 standby_count++; 1574 MDI_PI_UNLOCK(pip); 1575 pip = next; 1576 } 1577 1578 if (online_count == 0) { 1579 if (standby_count == 0) { 1580 state = MDI_CLIENT_STATE_FAILED; 1581 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1582 "client state failed: ct = %p", (void *)ct)); 1583 } else if (standby_count == 1) { 1584 state = MDI_CLIENT_STATE_DEGRADED; 1585 } else { 1586 state = MDI_CLIENT_STATE_OPTIMAL; 1587 } 1588 } else if (online_count == 1) { 1589 if (standby_count == 0) { 1590 state = MDI_CLIENT_STATE_DEGRADED; 1591 } else { 1592 state = MDI_CLIENT_STATE_OPTIMAL; 1593 } 1594 } else { 1595 state = MDI_CLIENT_STATE_OPTIMAL; 1596 } 1597 return (state); 1598 } 1599 1600 /* 1601 * i_mdi_client2devinfo(): 1602 * Utility function 1603 */ 1604 dev_info_t * 1605 i_mdi_client2devinfo(mdi_client_t *ct) 1606 { 1607 return (ct->ct_dip); 1608 } 1609 1610 /* 1611 * mdi_client_path2_devinfo(): 1612 * Given the parent devinfo and child devfs pathname, search for 1613 * a valid devfs node handle. 1614 */ 1615 dev_info_t * 1616 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1617 { 1618 dev_info_t *cdip = NULL; 1619 dev_info_t *ndip = NULL; 1620 char *temp_pathname; 1621 int circular; 1622 1623 /* 1624 * Allocate temp buffer 1625 */ 1626 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1627 1628 /* 1629 * Lock parent against changes 1630 */ 1631 ndi_devi_enter(vdip, &circular); 1632 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1633 while ((cdip = ndip) != NULL) { 1634 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1635 1636 *temp_pathname = '\0'; 1637 (void) ddi_pathname(cdip, temp_pathname); 1638 if (strcmp(temp_pathname, pathname) == 0) { 1639 break; 1640 } 1641 } 1642 /* 1643 * Release devinfo lock 1644 */ 1645 ndi_devi_exit(vdip, circular); 1646 1647 /* 1648 * Free the temp buffer 1649 */ 1650 kmem_free(temp_pathname, MAXPATHLEN); 1651 return (cdip); 1652 } 1653 1654 /* 1655 * mdi_client_get_path_count(): 1656 * Utility function to get number of path information nodes 1657 * associated with a given client device. 1658 */ 1659 int 1660 mdi_client_get_path_count(dev_info_t *cdip) 1661 { 1662 mdi_client_t *ct; 1663 int count = 0; 1664 1665 ct = i_devi_get_client(cdip); 1666 if (ct != NULL) { 1667 count = ct->ct_path_count; 1668 } 1669 return (count); 1670 } 1671 1672 1673 /* 1674 * i_mdi_get_hash_key(): 1675 * Create a hash using strings as keys 1676 * 1677 */ 1678 static int 1679 i_mdi_get_hash_key(char *str) 1680 { 1681 uint32_t g, hash = 0; 1682 char *p; 1683 1684 for (p = str; *p != '\0'; p++) { 1685 g = *p; 1686 hash += g; 1687 } 1688 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1689 } 1690 1691 /* 1692 * mdi_get_lb_policy(): 1693 * Get current load balancing policy for a given client device 1694 */ 1695 client_lb_t 1696 mdi_get_lb_policy(dev_info_t *cdip) 1697 { 1698 client_lb_t lb = LOAD_BALANCE_NONE; 1699 mdi_client_t *ct; 1700 1701 ct = i_devi_get_client(cdip); 1702 if (ct != NULL) { 1703 lb = ct->ct_lb; 1704 } 1705 return (lb); 1706 } 1707 1708 /* 1709 * mdi_set_lb_region_size(): 1710 * Set current region size for the load-balance 1711 */ 1712 int 1713 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1714 { 1715 mdi_client_t *ct; 1716 int rv = MDI_FAILURE; 1717 1718 ct = i_devi_get_client(cdip); 1719 if (ct != NULL && ct->ct_lb_args != NULL) { 1720 ct->ct_lb_args->region_size = region_size; 1721 rv = MDI_SUCCESS; 1722 } 1723 return (rv); 1724 } 1725 1726 /* 1727 * mdi_Set_lb_policy(): 1728 * Set current load balancing policy for a given client device 1729 */ 1730 int 1731 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1732 { 1733 mdi_client_t *ct; 1734 int rv = MDI_FAILURE; 1735 1736 ct = i_devi_get_client(cdip); 1737 if (ct != NULL) { 1738 ct->ct_lb = lb; 1739 rv = MDI_SUCCESS; 1740 } 1741 return (rv); 1742 } 1743 1744 /* 1745 * mdi_failover(): 1746 * failover function called by the vHCI drivers to initiate 1747 * a failover operation. This is typically due to non-availability 1748 * of online paths to route I/O requests. Failover can be 1749 * triggered through user application also. 1750 * 1751 * The vHCI driver calls mdi_failover() to initiate a failover 1752 * operation. mdi_failover() calls back into the vHCI driver's 1753 * vo_failover() entry point to perform the actual failover 1754 * operation. The reason for requiring the vHCI driver to 1755 * initiate failover by calling mdi_failover(), instead of directly 1756 * executing vo_failover() itself, is to ensure that the mdi 1757 * framework can keep track of the client state properly. 1758 * Additionally, mdi_failover() provides as a convenience the 1759 * option of performing the failover operation synchronously or 1760 * asynchronously 1761 * 1762 * Upon successful completion of the failover operation, the 1763 * paths that were previously ONLINE will be in the STANDBY state, 1764 * and the newly activated paths will be in the ONLINE state. 1765 * 1766 * The flags modifier determines whether the activation is done 1767 * synchronously: MDI_FAILOVER_SYNC 1768 * Return Values: 1769 * MDI_SUCCESS 1770 * MDI_FAILURE 1771 * MDI_BUSY 1772 */ 1773 /*ARGSUSED*/ 1774 int 1775 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1776 { 1777 int rv; 1778 mdi_client_t *ct; 1779 1780 ct = i_devi_get_client(cdip); 1781 ASSERT(ct != NULL); 1782 if (ct == NULL) { 1783 /* cdip is not a valid client device. Nothing more to do. */ 1784 return (MDI_FAILURE); 1785 } 1786 1787 MDI_CLIENT_LOCK(ct); 1788 1789 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1790 /* A path to the client is being freed */ 1791 MDI_CLIENT_UNLOCK(ct); 1792 return (MDI_BUSY); 1793 } 1794 1795 1796 if (MDI_CLIENT_IS_FAILED(ct)) { 1797 /* 1798 * Client is in failed state. Nothing more to do. 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_FAILURE); 1802 } 1803 1804 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1805 /* 1806 * Failover is already in progress; return BUSY 1807 */ 1808 MDI_CLIENT_UNLOCK(ct); 1809 return (MDI_BUSY); 1810 } 1811 /* 1812 * Make sure that mdi_pathinfo node state changes are processed. 1813 * We do not allow failovers to progress while client path state 1814 * changes are in progress 1815 */ 1816 if (ct->ct_unstable) { 1817 if (flags == MDI_FAILOVER_ASYNC) { 1818 MDI_CLIENT_UNLOCK(ct); 1819 return (MDI_BUSY); 1820 } else { 1821 while (ct->ct_unstable) 1822 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1823 } 1824 } 1825 1826 /* 1827 * Client device is in stable state. Before proceeding, perform sanity 1828 * checks again. 1829 */ 1830 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1831 (!i_ddi_devi_attached(ct->ct_dip))) { 1832 /* 1833 * Client is in failed state. Nothing more to do. 1834 */ 1835 MDI_CLIENT_UNLOCK(ct); 1836 return (MDI_FAILURE); 1837 } 1838 1839 /* 1840 * Set the client state as failover in progress. 1841 */ 1842 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1843 ct->ct_failover_flags = flags; 1844 MDI_CLIENT_UNLOCK(ct); 1845 1846 if (flags == MDI_FAILOVER_ASYNC) { 1847 /* 1848 * Submit the initiate failover request via CPR safe 1849 * taskq threads. 1850 */ 1851 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1852 ct, KM_SLEEP); 1853 return (MDI_ACCEPT); 1854 } else { 1855 /* 1856 * Synchronous failover mode. Typically invoked from the user 1857 * land. 1858 */ 1859 rv = i_mdi_failover(ct); 1860 } 1861 return (rv); 1862 } 1863 1864 /* 1865 * i_mdi_failover(): 1866 * internal failover function. Invokes vHCI drivers failover 1867 * callback function and process the failover status 1868 * Return Values: 1869 * None 1870 * 1871 * Note: A client device in failover state can not be detached or freed. 1872 */ 1873 static int 1874 i_mdi_failover(void *arg) 1875 { 1876 int rv = MDI_SUCCESS; 1877 mdi_client_t *ct = (mdi_client_t *)arg; 1878 mdi_vhci_t *vh = ct->ct_vhci; 1879 1880 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1881 1882 if (vh->vh_ops->vo_failover != NULL) { 1883 /* 1884 * Call vHCI drivers callback routine 1885 */ 1886 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1887 ct->ct_failover_flags); 1888 } 1889 1890 MDI_CLIENT_LOCK(ct); 1891 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1892 1893 /* 1894 * Save the failover return status 1895 */ 1896 ct->ct_failover_status = rv; 1897 1898 /* 1899 * As a result of failover, client status would have been changed. 1900 * Update the client state and wake up anyone waiting on this client 1901 * device. 1902 */ 1903 i_mdi_client_update_state(ct); 1904 1905 cv_broadcast(&ct->ct_failover_cv); 1906 MDI_CLIENT_UNLOCK(ct); 1907 return (rv); 1908 } 1909 1910 /* 1911 * Load balancing is logical block. 1912 * IOs within the range described by region_size 1913 * would go on the same path. This would improve the 1914 * performance by cache-hit on some of the RAID devices. 1915 * Search only for online paths(At some point we 1916 * may want to balance across target ports). 1917 * If no paths are found then default to round-robin. 1918 */ 1919 static int 1920 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1921 { 1922 int path_index = -1; 1923 int online_path_count = 0; 1924 int online_nonpref_path_count = 0; 1925 int region_size = ct->ct_lb_args->region_size; 1926 mdi_pathinfo_t *pip; 1927 mdi_pathinfo_t *next; 1928 int preferred, path_cnt; 1929 1930 pip = ct->ct_path_head; 1931 while (pip) { 1932 MDI_PI_LOCK(pip); 1933 if (MDI_PI(pip)->pi_state == 1934 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1935 online_path_count++; 1936 } else if (MDI_PI(pip)->pi_state == 1937 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1938 online_nonpref_path_count++; 1939 } 1940 next = (mdi_pathinfo_t *) 1941 MDI_PI(pip)->pi_client_link; 1942 MDI_PI_UNLOCK(pip); 1943 pip = next; 1944 } 1945 /* if found any online/preferred then use this type */ 1946 if (online_path_count > 0) { 1947 path_cnt = online_path_count; 1948 preferred = 1; 1949 } else if (online_nonpref_path_count > 0) { 1950 path_cnt = online_nonpref_path_count; 1951 preferred = 0; 1952 } else { 1953 path_cnt = 0; 1954 } 1955 if (path_cnt) { 1956 path_index = (bp->b_blkno >> region_size) % path_cnt; 1957 pip = ct->ct_path_head; 1958 while (pip && path_index != -1) { 1959 MDI_PI_LOCK(pip); 1960 if (path_index == 0 && 1961 (MDI_PI(pip)->pi_state == 1962 MDI_PATHINFO_STATE_ONLINE) && 1963 MDI_PI(pip)->pi_preferred == preferred) { 1964 MDI_PI_HOLD(pip); 1965 MDI_PI_UNLOCK(pip); 1966 *ret_pip = pip; 1967 return (MDI_SUCCESS); 1968 } 1969 path_index --; 1970 next = (mdi_pathinfo_t *) 1971 MDI_PI(pip)->pi_client_link; 1972 MDI_PI_UNLOCK(pip); 1973 pip = next; 1974 } 1975 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1976 "lba %llx: path %s %p", 1977 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1978 } 1979 return (MDI_FAILURE); 1980 } 1981 1982 /* 1983 * mdi_select_path(): 1984 * select a path to access a client device. 1985 * 1986 * mdi_select_path() function is called by the vHCI drivers to 1987 * select a path to route the I/O request to. The caller passes 1988 * the block I/O data transfer structure ("buf") as one of the 1989 * parameters. The mpxio framework uses the buf structure 1990 * contents to maintain per path statistics (total I/O size / 1991 * count pending). If more than one online paths are available to 1992 * select, the framework automatically selects a suitable path 1993 * for routing I/O request. If a failover operation is active for 1994 * this client device the call shall be failed with MDI_BUSY error 1995 * code. 1996 * 1997 * By default this function returns a suitable path in online 1998 * state based on the current load balancing policy. Currently 1999 * we support LOAD_BALANCE_NONE (Previously selected online path 2000 * will continue to be used till the path is usable) and 2001 * LOAD_BALANCE_RR (Online paths will be selected in a round 2002 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2003 * based on the logical block). The load balancing 2004 * through vHCI drivers configuration file (driver.conf). 2005 * 2006 * vHCI drivers may override this default behavior by specifying 2007 * appropriate flags. The meaning of the thrid argument depends 2008 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2009 * then the argument is the "path instance" of the path to select. 2010 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2011 * "start_pip". A non NULL "start_pip" is the starting point to 2012 * walk and find the next appropriate path. The following values 2013 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2014 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2015 * STANDBY path). 2016 * 2017 * The non-standard behavior is used by the scsi_vhci driver, 2018 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2019 * attach of client devices (to avoid an unnecessary failover 2020 * when the STANDBY path comes up first), during failover 2021 * (to activate a STANDBY path as ONLINE). 2022 * 2023 * The selected path is returned in a a mdi_hold_path() state 2024 * (pi_ref_cnt). Caller should release the hold by calling 2025 * mdi_rele_path(). 2026 * 2027 * Return Values: 2028 * MDI_SUCCESS - Completed successfully 2029 * MDI_BUSY - Client device is busy failing over 2030 * MDI_NOPATH - Client device is online, but no valid path are 2031 * available to access this client device 2032 * MDI_FAILURE - Invalid client device or state 2033 * MDI_DEVI_ONLINING 2034 * - Client device (struct dev_info state) is in 2035 * onlining state. 2036 */ 2037 2038 /*ARGSUSED*/ 2039 int 2040 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2041 void *arg, mdi_pathinfo_t **ret_pip) 2042 { 2043 mdi_client_t *ct; 2044 mdi_pathinfo_t *pip; 2045 mdi_pathinfo_t *next; 2046 mdi_pathinfo_t *head; 2047 mdi_pathinfo_t *start; 2048 client_lb_t lbp; /* load balancing policy */ 2049 int sb = 1; /* standard behavior */ 2050 int preferred = 1; /* preferred path */ 2051 int cond, cont = 1; 2052 int retry = 0; 2053 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2054 int path_instance; /* request specific path instance */ 2055 2056 /* determine type of arg based on flags */ 2057 if (flags & MDI_SELECT_PATH_INSTANCE) { 2058 path_instance = (int)(intptr_t)arg; 2059 start_pip = NULL; 2060 } else { 2061 path_instance = 0; 2062 start_pip = (mdi_pathinfo_t *)arg; 2063 } 2064 2065 if (flags != 0) { 2066 /* 2067 * disable default behavior 2068 */ 2069 sb = 0; 2070 } 2071 2072 *ret_pip = NULL; 2073 ct = i_devi_get_client(cdip); 2074 if (ct == NULL) { 2075 /* mdi extensions are NULL, Nothing more to do */ 2076 return (MDI_FAILURE); 2077 } 2078 2079 MDI_CLIENT_LOCK(ct); 2080 2081 if (sb) { 2082 if (MDI_CLIENT_IS_FAILED(ct)) { 2083 /* 2084 * Client is not ready to accept any I/O requests. 2085 * Fail this request. 2086 */ 2087 MDI_DEBUG(2, (MDI_NOTE, cdip, 2088 "client state offline ct = %p", (void *)ct)); 2089 MDI_CLIENT_UNLOCK(ct); 2090 return (MDI_FAILURE); 2091 } 2092 2093 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2094 /* 2095 * Check for Failover is in progress. If so tell the 2096 * caller that this device is busy. 2097 */ 2098 MDI_DEBUG(2, (MDI_NOTE, cdip, 2099 "client failover in progress ct = %p", 2100 (void *)ct)); 2101 MDI_CLIENT_UNLOCK(ct); 2102 return (MDI_BUSY); 2103 } 2104 2105 /* 2106 * Check to see whether the client device is attached. 2107 * If not so, let the vHCI driver manually select a path 2108 * (standby) and let the probe/attach process to continue. 2109 */ 2110 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2111 MDI_DEBUG(4, (MDI_NOTE, cdip, 2112 "devi is onlining ct = %p", (void *)ct)); 2113 MDI_CLIENT_UNLOCK(ct); 2114 return (MDI_DEVI_ONLINING); 2115 } 2116 } 2117 2118 /* 2119 * Cache in the client list head. If head of the list is NULL 2120 * return MDI_NOPATH 2121 */ 2122 head = ct->ct_path_head; 2123 if (head == NULL) { 2124 MDI_CLIENT_UNLOCK(ct); 2125 return (MDI_NOPATH); 2126 } 2127 2128 /* Caller is specifying a specific pathinfo path by path_instance */ 2129 if (path_instance) { 2130 /* search for pathinfo with correct path_instance */ 2131 for (pip = head; 2132 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2133 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2134 ; 2135 2136 /* If path can't be selected then MDI_NOPATH is returned. */ 2137 if (pip == NULL) { 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } 2141 2142 /* 2143 * Verify state of path. When asked to select a specific 2144 * path_instance, we select the requested path in any 2145 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2146 * We don't however select paths where the pHCI has detached. 2147 * NOTE: last pathinfo node of an opened client device may 2148 * exist in an OFFLINE state after the pHCI associated with 2149 * that path has detached (but pi_phci will be NULL if that 2150 * has occurred). 2151 */ 2152 MDI_PI_LOCK(pip); 2153 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2154 (MDI_PI(pip)->pi_phci == NULL)) { 2155 MDI_PI_UNLOCK(pip); 2156 MDI_CLIENT_UNLOCK(ct); 2157 return (MDI_FAILURE); 2158 } 2159 2160 /* Return MDI_BUSY if we have a transient condition */ 2161 if (MDI_PI_IS_TRANSIENT(pip)) { 2162 MDI_PI_UNLOCK(pip); 2163 MDI_CLIENT_UNLOCK(ct); 2164 return (MDI_BUSY); 2165 } 2166 2167 /* 2168 * Return the path in hold state. Caller should release the 2169 * lock by calling mdi_rele_path() 2170 */ 2171 MDI_PI_HOLD(pip); 2172 MDI_PI_UNLOCK(pip); 2173 *ret_pip = pip; 2174 MDI_CLIENT_UNLOCK(ct); 2175 return (MDI_SUCCESS); 2176 } 2177 2178 /* 2179 * for non default behavior, bypass current 2180 * load balancing policy and always use LOAD_BALANCE_RR 2181 * except that the start point will be adjusted based 2182 * on the provided start_pip 2183 */ 2184 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2185 2186 switch (lbp) { 2187 case LOAD_BALANCE_NONE: 2188 /* 2189 * Load balancing is None or Alternate path mode 2190 * Start looking for a online mdi_pathinfo node starting from 2191 * last known selected path 2192 */ 2193 preferred = 1; 2194 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2195 if (pip == NULL) { 2196 pip = head; 2197 } 2198 start = pip; 2199 do { 2200 MDI_PI_LOCK(pip); 2201 /* 2202 * No need to explicitly check if the path is disabled. 2203 * Since we are checking for state == ONLINE and the 2204 * same variable is used for DISABLE/ENABLE information. 2205 */ 2206 if ((MDI_PI(pip)->pi_state == 2207 MDI_PATHINFO_STATE_ONLINE) && 2208 preferred == MDI_PI(pip)->pi_preferred) { 2209 /* 2210 * Return the path in hold state. Caller should 2211 * release the lock by calling mdi_rele_path() 2212 */ 2213 MDI_PI_HOLD(pip); 2214 MDI_PI_UNLOCK(pip); 2215 ct->ct_path_last = pip; 2216 *ret_pip = pip; 2217 MDI_CLIENT_UNLOCK(ct); 2218 return (MDI_SUCCESS); 2219 } 2220 2221 /* 2222 * Path is busy. 2223 */ 2224 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2225 MDI_PI_IS_TRANSIENT(pip)) 2226 retry = 1; 2227 /* 2228 * Keep looking for a next available online path 2229 */ 2230 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2231 if (next == NULL) { 2232 next = head; 2233 } 2234 MDI_PI_UNLOCK(pip); 2235 pip = next; 2236 if (start == pip && preferred) { 2237 preferred = 0; 2238 } else if (start == pip && !preferred) { 2239 cont = 0; 2240 } 2241 } while (cont); 2242 break; 2243 2244 case LOAD_BALANCE_LBA: 2245 /* 2246 * Make sure we are looking 2247 * for an online path. Otherwise, if it is for a STANDBY 2248 * path request, it will go through and fetch an ONLINE 2249 * path which is not desirable. 2250 */ 2251 if ((ct->ct_lb_args != NULL) && 2252 (ct->ct_lb_args->region_size) && bp && 2253 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2254 if (i_mdi_lba_lb(ct, ret_pip, bp) 2255 == MDI_SUCCESS) { 2256 MDI_CLIENT_UNLOCK(ct); 2257 return (MDI_SUCCESS); 2258 } 2259 } 2260 /* FALLTHROUGH */ 2261 case LOAD_BALANCE_RR: 2262 /* 2263 * Load balancing is Round Robin. Start looking for a online 2264 * mdi_pathinfo node starting from last known selected path 2265 * as the start point. If override flags are specified, 2266 * process accordingly. 2267 * If the search is already in effect(start_pip not null), 2268 * then lets just use the same path preference to continue the 2269 * traversal. 2270 */ 2271 2272 if (start_pip != NULL) { 2273 preferred = MDI_PI(start_pip)->pi_preferred; 2274 } else { 2275 preferred = 1; 2276 } 2277 2278 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2279 if (start == NULL) { 2280 pip = head; 2281 } else { 2282 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2283 if (pip == NULL) { 2284 if ( flags & MDI_SELECT_NO_PREFERRED) { 2285 /* 2286 * Return since we hit the end of list 2287 */ 2288 MDI_CLIENT_UNLOCK(ct); 2289 return (MDI_NOPATH); 2290 } 2291 2292 if (!sb) { 2293 if (preferred == 0) { 2294 /* 2295 * Looks like we have completed 2296 * the traversal as preferred 2297 * value is 0. Time to bail out. 2298 */ 2299 *ret_pip = NULL; 2300 MDI_CLIENT_UNLOCK(ct); 2301 return (MDI_NOPATH); 2302 } else { 2303 /* 2304 * Looks like we reached the 2305 * end of the list. Lets enable 2306 * traversal of non preferred 2307 * paths. 2308 */ 2309 preferred = 0; 2310 } 2311 } 2312 pip = head; 2313 } 2314 } 2315 start = pip; 2316 do { 2317 MDI_PI_LOCK(pip); 2318 if (sb) { 2319 cond = ((MDI_PI(pip)->pi_state == 2320 MDI_PATHINFO_STATE_ONLINE && 2321 MDI_PI(pip)->pi_preferred == 2322 preferred) ? 1 : 0); 2323 } else { 2324 if (flags == MDI_SELECT_ONLINE_PATH) { 2325 cond = ((MDI_PI(pip)->pi_state == 2326 MDI_PATHINFO_STATE_ONLINE && 2327 MDI_PI(pip)->pi_preferred == 2328 preferred) ? 1 : 0); 2329 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2330 cond = ((MDI_PI(pip)->pi_state == 2331 MDI_PATHINFO_STATE_STANDBY && 2332 MDI_PI(pip)->pi_preferred == 2333 preferred) ? 1 : 0); 2334 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2335 MDI_SELECT_STANDBY_PATH)) { 2336 cond = (((MDI_PI(pip)->pi_state == 2337 MDI_PATHINFO_STATE_ONLINE || 2338 (MDI_PI(pip)->pi_state == 2339 MDI_PATHINFO_STATE_STANDBY)) && 2340 MDI_PI(pip)->pi_preferred == 2341 preferred) ? 1 : 0); 2342 } else if (flags == 2343 (MDI_SELECT_STANDBY_PATH | 2344 MDI_SELECT_ONLINE_PATH | 2345 MDI_SELECT_USER_DISABLE_PATH)) { 2346 cond = (((MDI_PI(pip)->pi_state == 2347 MDI_PATHINFO_STATE_ONLINE || 2348 (MDI_PI(pip)->pi_state == 2349 MDI_PATHINFO_STATE_STANDBY) || 2350 (MDI_PI(pip)->pi_state == 2351 (MDI_PATHINFO_STATE_ONLINE| 2352 MDI_PATHINFO_STATE_USER_DISABLE)) || 2353 (MDI_PI(pip)->pi_state == 2354 (MDI_PATHINFO_STATE_STANDBY | 2355 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2356 MDI_PI(pip)->pi_preferred == 2357 preferred) ? 1 : 0); 2358 } else if (flags == 2359 (MDI_SELECT_STANDBY_PATH | 2360 MDI_SELECT_ONLINE_PATH | 2361 MDI_SELECT_NO_PREFERRED)) { 2362 cond = (((MDI_PI(pip)->pi_state == 2363 MDI_PATHINFO_STATE_ONLINE) || 2364 (MDI_PI(pip)->pi_state == 2365 MDI_PATHINFO_STATE_STANDBY)) 2366 ? 1 : 0); 2367 } else { 2368 cond = 0; 2369 } 2370 } 2371 /* 2372 * No need to explicitly check if the path is disabled. 2373 * Since we are checking for state == ONLINE and the 2374 * same variable is used for DISABLE/ENABLE information. 2375 */ 2376 if (cond) { 2377 /* 2378 * Return the path in hold state. Caller should 2379 * release the lock by calling mdi_rele_path() 2380 */ 2381 MDI_PI_HOLD(pip); 2382 MDI_PI_UNLOCK(pip); 2383 if (sb) 2384 ct->ct_path_last = pip; 2385 *ret_pip = pip; 2386 MDI_CLIENT_UNLOCK(ct); 2387 return (MDI_SUCCESS); 2388 } 2389 /* 2390 * Path is busy. 2391 */ 2392 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2393 MDI_PI_IS_TRANSIENT(pip)) 2394 retry = 1; 2395 2396 /* 2397 * Keep looking for a next available online path 2398 */ 2399 do_again: 2400 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2401 if (next == NULL) { 2402 if ( flags & MDI_SELECT_NO_PREFERRED) { 2403 /* 2404 * Bail out since we hit the end of list 2405 */ 2406 MDI_PI_UNLOCK(pip); 2407 break; 2408 } 2409 2410 if (!sb) { 2411 if (preferred == 1) { 2412 /* 2413 * Looks like we reached the 2414 * end of the list. Lets enable 2415 * traversal of non preferred 2416 * paths. 2417 */ 2418 preferred = 0; 2419 next = head; 2420 } else { 2421 /* 2422 * We have done both the passes 2423 * Preferred as well as for 2424 * Non-preferred. Bail out now. 2425 */ 2426 cont = 0; 2427 } 2428 } else { 2429 /* 2430 * Standard behavior case. 2431 */ 2432 next = head; 2433 } 2434 } 2435 MDI_PI_UNLOCK(pip); 2436 if (cont == 0) { 2437 break; 2438 } 2439 pip = next; 2440 2441 if (!sb) { 2442 /* 2443 * We need to handle the selection of 2444 * non-preferred path in the following 2445 * case: 2446 * 2447 * +------+ +------+ +------+ +-----+ 2448 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2449 * +------+ +------+ +------+ +-----+ 2450 * 2451 * If we start the search with B, we need to 2452 * skip beyond B to pick C which is non - 2453 * preferred in the second pass. The following 2454 * test, if true, will allow us to skip over 2455 * the 'start'(B in the example) to select 2456 * other non preferred elements. 2457 */ 2458 if ((start_pip != NULL) && (start_pip == pip) && 2459 (MDI_PI(start_pip)->pi_preferred 2460 != preferred)) { 2461 /* 2462 * try again after going past the start 2463 * pip 2464 */ 2465 MDI_PI_LOCK(pip); 2466 goto do_again; 2467 } 2468 } else { 2469 /* 2470 * Standard behavior case 2471 */ 2472 if (start == pip && preferred) { 2473 /* look for nonpreferred paths */ 2474 preferred = 0; 2475 } else if (start == pip && !preferred) { 2476 /* 2477 * Exit condition 2478 */ 2479 cont = 0; 2480 } 2481 } 2482 } while (cont); 2483 break; 2484 } 2485 2486 MDI_CLIENT_UNLOCK(ct); 2487 if (retry == 1) { 2488 return (MDI_BUSY); 2489 } else { 2490 return (MDI_NOPATH); 2491 } 2492 } 2493 2494 /* 2495 * For a client, return the next available path to any phci 2496 * 2497 * Note: 2498 * Caller should hold the branch's devinfo node to get a consistent 2499 * snap shot of the mdi_pathinfo nodes. 2500 * 2501 * Please note that even the list is stable the mdi_pathinfo 2502 * node state and properties are volatile. The caller should lock 2503 * and unlock the nodes by calling mdi_pi_lock() and 2504 * mdi_pi_unlock() functions to get a stable properties. 2505 * 2506 * If there is a need to use the nodes beyond the hold of the 2507 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2508 * need to be held against unexpected removal by calling 2509 * mdi_hold_path() and should be released by calling 2510 * mdi_rele_path() on completion. 2511 */ 2512 mdi_pathinfo_t * 2513 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2514 { 2515 mdi_client_t *ct; 2516 2517 if (!MDI_CLIENT(ct_dip)) 2518 return (NULL); 2519 2520 /* 2521 * Walk through client link 2522 */ 2523 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2524 ASSERT(ct != NULL); 2525 2526 if (pip == NULL) 2527 return ((mdi_pathinfo_t *)ct->ct_path_head); 2528 2529 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2530 } 2531 2532 /* 2533 * For a phci, return the next available path to any client 2534 * Note: ditto mdi_get_next_phci_path() 2535 */ 2536 mdi_pathinfo_t * 2537 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2538 { 2539 mdi_phci_t *ph; 2540 2541 if (!MDI_PHCI(ph_dip)) 2542 return (NULL); 2543 2544 /* 2545 * Walk through pHCI link 2546 */ 2547 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2548 ASSERT(ph != NULL); 2549 2550 if (pip == NULL) 2551 return ((mdi_pathinfo_t *)ph->ph_path_head); 2552 2553 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2554 } 2555 2556 /* 2557 * mdi_hold_path(): 2558 * Hold the mdi_pathinfo node against unwanted unexpected free. 2559 * Return Values: 2560 * None 2561 */ 2562 void 2563 mdi_hold_path(mdi_pathinfo_t *pip) 2564 { 2565 if (pip) { 2566 MDI_PI_LOCK(pip); 2567 MDI_PI_HOLD(pip); 2568 MDI_PI_UNLOCK(pip); 2569 } 2570 } 2571 2572 2573 /* 2574 * mdi_rele_path(): 2575 * Release the mdi_pathinfo node which was selected 2576 * through mdi_select_path() mechanism or manually held by 2577 * calling mdi_hold_path(). 2578 * Return Values: 2579 * None 2580 */ 2581 void 2582 mdi_rele_path(mdi_pathinfo_t *pip) 2583 { 2584 if (pip) { 2585 MDI_PI_LOCK(pip); 2586 MDI_PI_RELE(pip); 2587 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2588 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2589 } 2590 MDI_PI_UNLOCK(pip); 2591 } 2592 } 2593 2594 /* 2595 * mdi_pi_lock(): 2596 * Lock the mdi_pathinfo node. 2597 * Note: 2598 * The caller should release the lock by calling mdi_pi_unlock() 2599 */ 2600 void 2601 mdi_pi_lock(mdi_pathinfo_t *pip) 2602 { 2603 ASSERT(pip != NULL); 2604 if (pip) { 2605 MDI_PI_LOCK(pip); 2606 } 2607 } 2608 2609 2610 /* 2611 * mdi_pi_unlock(): 2612 * Unlock the mdi_pathinfo node. 2613 * Note: 2614 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2615 */ 2616 void 2617 mdi_pi_unlock(mdi_pathinfo_t *pip) 2618 { 2619 ASSERT(pip != NULL); 2620 if (pip) { 2621 MDI_PI_UNLOCK(pip); 2622 } 2623 } 2624 2625 /* 2626 * mdi_pi_find(): 2627 * Search the list of mdi_pathinfo nodes attached to the 2628 * pHCI/Client device node whose path address matches "paddr". 2629 * Returns a pointer to the mdi_pathinfo node if a matching node is 2630 * found. 2631 * Return Values: 2632 * mdi_pathinfo node handle 2633 * NULL 2634 * Notes: 2635 * Caller need not hold any locks to call this function. 2636 */ 2637 mdi_pathinfo_t * 2638 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2639 { 2640 mdi_phci_t *ph; 2641 mdi_vhci_t *vh; 2642 mdi_client_t *ct; 2643 mdi_pathinfo_t *pip = NULL; 2644 2645 MDI_DEBUG(2, (MDI_NOTE, pdip, 2646 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2647 if ((pdip == NULL) || (paddr == NULL)) { 2648 return (NULL); 2649 } 2650 ph = i_devi_get_phci(pdip); 2651 if (ph == NULL) { 2652 /* 2653 * Invalid pHCI device, Nothing more to do. 2654 */ 2655 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2656 return (NULL); 2657 } 2658 2659 vh = ph->ph_vhci; 2660 if (vh == NULL) { 2661 /* 2662 * Invalid vHCI device, Nothing more to do. 2663 */ 2664 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2665 return (NULL); 2666 } 2667 2668 /* 2669 * Look for pathinfo node identified by paddr. 2670 */ 2671 if (caddr == NULL) { 2672 /* 2673 * Find a mdi_pathinfo node under pHCI list for a matching 2674 * unit address. 2675 */ 2676 MDI_PHCI_LOCK(ph); 2677 if (MDI_PHCI_IS_OFFLINE(ph)) { 2678 MDI_DEBUG(2, (MDI_WARN, pdip, 2679 "offline phci %p", (void *)ph)); 2680 MDI_PHCI_UNLOCK(ph); 2681 return (NULL); 2682 } 2683 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2684 2685 while (pip != NULL) { 2686 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2687 break; 2688 } 2689 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2690 } 2691 MDI_PHCI_UNLOCK(ph); 2692 MDI_DEBUG(2, (MDI_NOTE, pdip, 2693 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2694 return (pip); 2695 } 2696 2697 /* 2698 * XXX - Is the rest of the code in this function really necessary? 2699 * The consumers of mdi_pi_find() can search for the desired pathinfo 2700 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2701 * whether the search is based on the pathinfo nodes attached to 2702 * the pHCI or the client node, the result will be the same. 2703 */ 2704 2705 /* 2706 * Find the client device corresponding to 'caddr' 2707 */ 2708 MDI_VHCI_CLIENT_LOCK(vh); 2709 2710 /* 2711 * XXX - Passing NULL to the following function works as long as the 2712 * the client addresses (caddr) are unique per vhci basis. 2713 */ 2714 ct = i_mdi_client_find(vh, NULL, caddr); 2715 if (ct == NULL) { 2716 /* 2717 * Client not found, Obviously mdi_pathinfo node has not been 2718 * created yet. 2719 */ 2720 MDI_VHCI_CLIENT_UNLOCK(vh); 2721 MDI_DEBUG(2, (MDI_NOTE, pdip, 2722 "client not found for caddr @%s", caddr ? caddr : "")); 2723 return (NULL); 2724 } 2725 2726 /* 2727 * Hold the client lock and look for a mdi_pathinfo node with matching 2728 * pHCI and paddr 2729 */ 2730 MDI_CLIENT_LOCK(ct); 2731 2732 /* 2733 * Release the global mutex as it is no more needed. Note: We always 2734 * respect the locking order while acquiring. 2735 */ 2736 MDI_VHCI_CLIENT_UNLOCK(vh); 2737 2738 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2739 while (pip != NULL) { 2740 /* 2741 * Compare the unit address 2742 */ 2743 if ((MDI_PI(pip)->pi_phci == ph) && 2744 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2745 break; 2746 } 2747 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2748 } 2749 MDI_CLIENT_UNLOCK(ct); 2750 MDI_DEBUG(2, (MDI_NOTE, pdip, 2751 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2752 return (pip); 2753 } 2754 2755 /* 2756 * mdi_pi_alloc(): 2757 * Allocate and initialize a new instance of a mdi_pathinfo node. 2758 * The mdi_pathinfo node returned by this function identifies a 2759 * unique device path is capable of having properties attached 2760 * and passed to mdi_pi_online() to fully attach and online the 2761 * path and client device node. 2762 * The mdi_pathinfo node returned by this function must be 2763 * destroyed using mdi_pi_free() if the path is no longer 2764 * operational or if the caller fails to attach a client device 2765 * node when calling mdi_pi_online(). The framework will not free 2766 * the resources allocated. 2767 * This function can be called from both interrupt and kernel 2768 * contexts. DDI_NOSLEEP flag should be used while calling 2769 * from interrupt contexts. 2770 * Return Values: 2771 * MDI_SUCCESS 2772 * MDI_FAILURE 2773 * MDI_NOMEM 2774 */ 2775 /*ARGSUSED*/ 2776 int 2777 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2778 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2779 { 2780 mdi_vhci_t *vh; 2781 mdi_phci_t *ph; 2782 mdi_client_t *ct; 2783 mdi_pathinfo_t *pip = NULL; 2784 dev_info_t *cdip; 2785 int rv = MDI_NOMEM; 2786 int path_allocated = 0; 2787 2788 MDI_DEBUG(2, (MDI_NOTE, pdip, 2789 "cname %s: caddr@%s paddr@%s", 2790 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2791 2792 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2793 ret_pip == NULL) { 2794 /* Nothing more to do */ 2795 return (MDI_FAILURE); 2796 } 2797 2798 *ret_pip = NULL; 2799 2800 /* No allocations on detaching pHCI */ 2801 if (DEVI_IS_DETACHING(pdip)) { 2802 /* Invalid pHCI device, return failure */ 2803 MDI_DEBUG(1, (MDI_WARN, pdip, 2804 "!detaching pHCI=%p", (void *)pdip)); 2805 return (MDI_FAILURE); 2806 } 2807 2808 ph = i_devi_get_phci(pdip); 2809 ASSERT(ph != NULL); 2810 if (ph == NULL) { 2811 /* Invalid pHCI device, return failure */ 2812 MDI_DEBUG(1, (MDI_WARN, pdip, 2813 "!invalid pHCI=%p", (void *)pdip)); 2814 return (MDI_FAILURE); 2815 } 2816 2817 MDI_PHCI_LOCK(ph); 2818 vh = ph->ph_vhci; 2819 if (vh == NULL) { 2820 /* Invalid vHCI device, return failure */ 2821 MDI_DEBUG(1, (MDI_WARN, pdip, 2822 "!invalid vHCI=%p", (void *)pdip)); 2823 MDI_PHCI_UNLOCK(ph); 2824 return (MDI_FAILURE); 2825 } 2826 2827 if (MDI_PHCI_IS_READY(ph) == 0) { 2828 /* 2829 * Do not allow new node creation when pHCI is in 2830 * offline/suspended states 2831 */ 2832 MDI_DEBUG(1, (MDI_WARN, pdip, 2833 "pHCI=%p is not ready", (void *)ph)); 2834 MDI_PHCI_UNLOCK(ph); 2835 return (MDI_BUSY); 2836 } 2837 MDI_PHCI_UNSTABLE(ph); 2838 MDI_PHCI_UNLOCK(ph); 2839 2840 /* look for a matching client, create one if not found */ 2841 MDI_VHCI_CLIENT_LOCK(vh); 2842 ct = i_mdi_client_find(vh, cname, caddr); 2843 if (ct == NULL) { 2844 ct = i_mdi_client_alloc(vh, cname, caddr); 2845 ASSERT(ct != NULL); 2846 } 2847 2848 if (ct->ct_dip == NULL) { 2849 /* 2850 * Allocate a devinfo node 2851 */ 2852 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2853 compatible, ncompatible); 2854 if (ct->ct_dip == NULL) { 2855 (void) i_mdi_client_free(vh, ct); 2856 goto fail; 2857 } 2858 } 2859 cdip = ct->ct_dip; 2860 2861 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2862 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2863 2864 MDI_CLIENT_LOCK(ct); 2865 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2866 while (pip != NULL) { 2867 /* 2868 * Compare the unit address 2869 */ 2870 if ((MDI_PI(pip)->pi_phci == ph) && 2871 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2872 break; 2873 } 2874 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2875 } 2876 MDI_CLIENT_UNLOCK(ct); 2877 2878 if (pip == NULL) { 2879 /* 2880 * This is a new path for this client device. Allocate and 2881 * initialize a new pathinfo node 2882 */ 2883 pip = i_mdi_pi_alloc(ph, paddr, ct); 2884 ASSERT(pip != NULL); 2885 path_allocated = 1; 2886 } 2887 rv = MDI_SUCCESS; 2888 2889 fail: 2890 /* 2891 * Release the global mutex. 2892 */ 2893 MDI_VHCI_CLIENT_UNLOCK(vh); 2894 2895 /* 2896 * Mark the pHCI as stable 2897 */ 2898 MDI_PHCI_LOCK(ph); 2899 MDI_PHCI_STABLE(ph); 2900 MDI_PHCI_UNLOCK(ph); 2901 *ret_pip = pip; 2902 2903 MDI_DEBUG(2, (MDI_NOTE, pdip, 2904 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2905 2906 if (path_allocated) 2907 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2908 2909 return (rv); 2910 } 2911 2912 /*ARGSUSED*/ 2913 int 2914 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2915 int flags, mdi_pathinfo_t **ret_pip) 2916 { 2917 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2918 flags, ret_pip)); 2919 } 2920 2921 /* 2922 * i_mdi_pi_alloc(): 2923 * Allocate a mdi_pathinfo node and add to the pHCI path list 2924 * Return Values: 2925 * mdi_pathinfo 2926 */ 2927 /*ARGSUSED*/ 2928 static mdi_pathinfo_t * 2929 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2930 { 2931 mdi_pathinfo_t *pip; 2932 int ct_circular; 2933 int ph_circular; 2934 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2935 char *path_persistent; 2936 int path_instance; 2937 mod_hash_val_t hv; 2938 2939 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2940 2941 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2942 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2943 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2944 MDI_PATHINFO_STATE_TRANSIENT; 2945 2946 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2947 MDI_PI_SET_USER_DISABLE(pip); 2948 2949 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2950 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2951 2952 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2953 MDI_PI_SET_DRV_DISABLE(pip); 2954 2955 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2956 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2957 MDI_PI(pip)->pi_client = ct; 2958 MDI_PI(pip)->pi_phci = ph; 2959 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2960 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2961 2962 /* 2963 * We form the "path" to the pathinfo node, and see if we have 2964 * already allocated a 'path_instance' for that "path". If so, 2965 * we use the already allocated 'path_instance'. If not, we 2966 * allocate a new 'path_instance' and associate it with a copy of 2967 * the "path" string (which is never freed). The association 2968 * between a 'path_instance' this "path" string persists until 2969 * reboot. 2970 */ 2971 mutex_enter(&mdi_pathmap_mutex); 2972 (void) ddi_pathname(ph->ph_dip, path); 2973 (void) sprintf(path + strlen(path), "/%s@%s", 2974 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2975 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2976 path_instance = (uint_t)(intptr_t)hv; 2977 } else { 2978 /* allocate a new 'path_instance' and persistent "path" */ 2979 path_instance = mdi_pathmap_instance++; 2980 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2981 (void) mod_hash_insert(mdi_pathmap_bypath, 2982 (mod_hash_key_t)path_persistent, 2983 (mod_hash_val_t)(intptr_t)path_instance); 2984 (void) mod_hash_insert(mdi_pathmap_byinstance, 2985 (mod_hash_key_t)(intptr_t)path_instance, 2986 (mod_hash_val_t)path_persistent); 2987 2988 /* create shortpath name */ 2989 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2990 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2991 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2992 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2993 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2994 (mod_hash_key_t)(intptr_t)path_instance, 2995 (mod_hash_val_t)path_persistent); 2996 } 2997 mutex_exit(&mdi_pathmap_mutex); 2998 MDI_PI(pip)->pi_path_instance = path_instance; 2999 3000 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3001 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3002 MDI_PI(pip)->pi_pprivate = NULL; 3003 MDI_PI(pip)->pi_cprivate = NULL; 3004 MDI_PI(pip)->pi_vprivate = NULL; 3005 MDI_PI(pip)->pi_client_link = NULL; 3006 MDI_PI(pip)->pi_phci_link = NULL; 3007 MDI_PI(pip)->pi_ref_cnt = 0; 3008 MDI_PI(pip)->pi_kstats = NULL; 3009 MDI_PI(pip)->pi_preferred = 1; 3010 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3011 3012 /* 3013 * Lock both dev_info nodes against changes in parallel. 3014 * 3015 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3016 * This atypical operation is done to synchronize pathinfo nodes 3017 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3018 * the pathinfo nodes are children of the Client. 3019 */ 3020 ndi_devi_enter(ct->ct_dip, &ct_circular); 3021 ndi_devi_enter(ph->ph_dip, &ph_circular); 3022 3023 i_mdi_phci_add_path(ph, pip); 3024 i_mdi_client_add_path(ct, pip); 3025 3026 ndi_devi_exit(ph->ph_dip, ph_circular); 3027 ndi_devi_exit(ct->ct_dip, ct_circular); 3028 3029 return (pip); 3030 } 3031 3032 /* 3033 * mdi_pi_pathname_by_instance(): 3034 * Lookup of "path" by 'path_instance'. Return "path". 3035 * NOTE: returned "path" remains valid forever (until reboot). 3036 */ 3037 char * 3038 mdi_pi_pathname_by_instance(int path_instance) 3039 { 3040 char *path; 3041 mod_hash_val_t hv; 3042 3043 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3044 mutex_enter(&mdi_pathmap_mutex); 3045 if (mod_hash_find(mdi_pathmap_byinstance, 3046 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3047 path = (char *)hv; 3048 else 3049 path = NULL; 3050 mutex_exit(&mdi_pathmap_mutex); 3051 return (path); 3052 } 3053 3054 /* 3055 * mdi_pi_spathname_by_instance(): 3056 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3057 * NOTE: returned "shortpath" remains valid forever (until reboot). 3058 */ 3059 char * 3060 mdi_pi_spathname_by_instance(int path_instance) 3061 { 3062 char *path; 3063 mod_hash_val_t hv; 3064 3065 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3066 mutex_enter(&mdi_pathmap_mutex); 3067 if (mod_hash_find(mdi_pathmap_sbyinstance, 3068 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3069 path = (char *)hv; 3070 else 3071 path = NULL; 3072 mutex_exit(&mdi_pathmap_mutex); 3073 return (path); 3074 } 3075 3076 3077 /* 3078 * i_mdi_phci_add_path(): 3079 * Add a mdi_pathinfo node to pHCI list. 3080 * Notes: 3081 * Caller should per-pHCI mutex 3082 */ 3083 static void 3084 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3085 { 3086 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3087 3088 MDI_PHCI_LOCK(ph); 3089 if (ph->ph_path_head == NULL) { 3090 ph->ph_path_head = pip; 3091 } else { 3092 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3093 } 3094 ph->ph_path_tail = pip; 3095 ph->ph_path_count++; 3096 MDI_PHCI_UNLOCK(ph); 3097 } 3098 3099 /* 3100 * i_mdi_client_add_path(): 3101 * Add mdi_pathinfo node to client list 3102 */ 3103 static void 3104 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3105 { 3106 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3107 3108 MDI_CLIENT_LOCK(ct); 3109 if (ct->ct_path_head == NULL) { 3110 ct->ct_path_head = pip; 3111 } else { 3112 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3113 } 3114 ct->ct_path_tail = pip; 3115 ct->ct_path_count++; 3116 MDI_CLIENT_UNLOCK(ct); 3117 } 3118 3119 /* 3120 * mdi_pi_free(): 3121 * Free the mdi_pathinfo node and also client device node if this 3122 * is the last path to the device 3123 * Return Values: 3124 * MDI_SUCCESS 3125 * MDI_FAILURE 3126 * MDI_BUSY 3127 */ 3128 /*ARGSUSED*/ 3129 int 3130 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3131 { 3132 int rv; 3133 mdi_vhci_t *vh; 3134 mdi_phci_t *ph; 3135 mdi_client_t *ct; 3136 int (*f)(); 3137 int client_held = 0; 3138 3139 MDI_PI_LOCK(pip); 3140 ph = MDI_PI(pip)->pi_phci; 3141 ASSERT(ph != NULL); 3142 if (ph == NULL) { 3143 /* 3144 * Invalid pHCI device, return failure 3145 */ 3146 MDI_DEBUG(1, (MDI_WARN, NULL, 3147 "!invalid pHCI: pip %s %p", 3148 mdi_pi_spathname(pip), (void *)pip)); 3149 MDI_PI_UNLOCK(pip); 3150 return (MDI_FAILURE); 3151 } 3152 3153 vh = ph->ph_vhci; 3154 ASSERT(vh != NULL); 3155 if (vh == NULL) { 3156 /* Invalid pHCI device, return failure */ 3157 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3158 "!invalid vHCI: pip %s %p", 3159 mdi_pi_spathname(pip), (void *)pip)); 3160 MDI_PI_UNLOCK(pip); 3161 return (MDI_FAILURE); 3162 } 3163 3164 ct = MDI_PI(pip)->pi_client; 3165 ASSERT(ct != NULL); 3166 if (ct == NULL) { 3167 /* 3168 * Invalid Client device, return failure 3169 */ 3170 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3171 "!invalid client: pip %s %p", 3172 mdi_pi_spathname(pip), (void *)pip)); 3173 MDI_PI_UNLOCK(pip); 3174 return (MDI_FAILURE); 3175 } 3176 3177 /* 3178 * Check to see for busy condition. A mdi_pathinfo can only be freed 3179 * if the node state is either offline or init and the reference count 3180 * is zero. 3181 */ 3182 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3183 MDI_PI_IS_INITING(pip))) { 3184 /* 3185 * Node is busy 3186 */ 3187 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3188 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3189 MDI_PI_UNLOCK(pip); 3190 return (MDI_BUSY); 3191 } 3192 3193 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3194 /* 3195 * Give a chance for pending I/Os to complete. 3196 */ 3197 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3198 "!%d cmds still pending on path: %s %p", 3199 MDI_PI(pip)->pi_ref_cnt, 3200 mdi_pi_spathname(pip), (void *)pip)); 3201 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3202 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3203 TR_CLOCK_TICK) == -1) { 3204 /* 3205 * The timeout time reached without ref_cnt being zero 3206 * being signaled. 3207 */ 3208 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3209 "!Timeout reached on path %s %p without the cond", 3210 mdi_pi_spathname(pip), (void *)pip)); 3211 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3212 "!%d cmds still pending on path %s %p", 3213 MDI_PI(pip)->pi_ref_cnt, 3214 mdi_pi_spathname(pip), (void *)pip)); 3215 MDI_PI_UNLOCK(pip); 3216 return (MDI_BUSY); 3217 } 3218 } 3219 if (MDI_PI(pip)->pi_pm_held) { 3220 client_held = 1; 3221 } 3222 MDI_PI_UNLOCK(pip); 3223 3224 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3225 3226 MDI_CLIENT_LOCK(ct); 3227 3228 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3229 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3230 3231 /* 3232 * Wait till failover is complete before removing this node. 3233 */ 3234 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3235 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3236 3237 MDI_CLIENT_UNLOCK(ct); 3238 MDI_VHCI_CLIENT_LOCK(vh); 3239 MDI_CLIENT_LOCK(ct); 3240 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3241 3242 if (!MDI_PI_IS_INITING(pip)) { 3243 f = vh->vh_ops->vo_pi_uninit; 3244 if (f != NULL) { 3245 rv = (*f)(vh->vh_dip, pip, 0); 3246 } 3247 } else 3248 rv = MDI_SUCCESS; 3249 3250 /* 3251 * If vo_pi_uninit() completed successfully. 3252 */ 3253 if (rv == MDI_SUCCESS) { 3254 if (client_held) { 3255 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3256 "i_mdi_pm_rele_client\n")); 3257 i_mdi_pm_rele_client(ct, 1); 3258 } 3259 i_mdi_pi_free(ph, pip, ct); 3260 if (ct->ct_path_count == 0) { 3261 /* 3262 * Client lost its last path. 3263 * Clean up the client device 3264 */ 3265 MDI_CLIENT_UNLOCK(ct); 3266 (void) i_mdi_client_free(ct->ct_vhci, ct); 3267 MDI_VHCI_CLIENT_UNLOCK(vh); 3268 return (rv); 3269 } 3270 } 3271 MDI_CLIENT_UNLOCK(ct); 3272 MDI_VHCI_CLIENT_UNLOCK(vh); 3273 3274 if (rv == MDI_FAILURE) 3275 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3276 3277 return (rv); 3278 } 3279 3280 /* 3281 * i_mdi_pi_free(): 3282 * Free the mdi_pathinfo node 3283 */ 3284 static void 3285 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3286 { 3287 int ct_circular; 3288 int ph_circular; 3289 3290 ASSERT(MDI_CLIENT_LOCKED(ct)); 3291 3292 /* 3293 * remove any per-path kstats 3294 */ 3295 i_mdi_pi_kstat_destroy(pip); 3296 3297 /* See comments in i_mdi_pi_alloc() */ 3298 ndi_devi_enter(ct->ct_dip, &ct_circular); 3299 ndi_devi_enter(ph->ph_dip, &ph_circular); 3300 3301 i_mdi_client_remove_path(ct, pip); 3302 i_mdi_phci_remove_path(ph, pip); 3303 3304 ndi_devi_exit(ph->ph_dip, ph_circular); 3305 ndi_devi_exit(ct->ct_dip, ct_circular); 3306 3307 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3308 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3309 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3310 if (MDI_PI(pip)->pi_addr) { 3311 kmem_free(MDI_PI(pip)->pi_addr, 3312 strlen(MDI_PI(pip)->pi_addr) + 1); 3313 MDI_PI(pip)->pi_addr = NULL; 3314 } 3315 3316 if (MDI_PI(pip)->pi_prop) { 3317 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3318 MDI_PI(pip)->pi_prop = NULL; 3319 } 3320 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3321 } 3322 3323 3324 /* 3325 * i_mdi_phci_remove_path(): 3326 * Remove a mdi_pathinfo node from pHCI list. 3327 * Notes: 3328 * Caller should hold per-pHCI mutex 3329 */ 3330 static void 3331 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3332 { 3333 mdi_pathinfo_t *prev = NULL; 3334 mdi_pathinfo_t *path = NULL; 3335 3336 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3337 3338 MDI_PHCI_LOCK(ph); 3339 path = ph->ph_path_head; 3340 while (path != NULL) { 3341 if (path == pip) { 3342 break; 3343 } 3344 prev = path; 3345 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3346 } 3347 3348 if (path) { 3349 ph->ph_path_count--; 3350 if (prev) { 3351 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3352 } else { 3353 ph->ph_path_head = 3354 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3355 } 3356 if (ph->ph_path_tail == path) { 3357 ph->ph_path_tail = prev; 3358 } 3359 } 3360 3361 /* 3362 * Clear the pHCI link 3363 */ 3364 MDI_PI(pip)->pi_phci_link = NULL; 3365 MDI_PI(pip)->pi_phci = NULL; 3366 MDI_PHCI_UNLOCK(ph); 3367 } 3368 3369 /* 3370 * i_mdi_client_remove_path(): 3371 * Remove a mdi_pathinfo node from client path list. 3372 */ 3373 static void 3374 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3375 { 3376 mdi_pathinfo_t *prev = NULL; 3377 mdi_pathinfo_t *path; 3378 3379 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3380 3381 ASSERT(MDI_CLIENT_LOCKED(ct)); 3382 path = ct->ct_path_head; 3383 while (path != NULL) { 3384 if (path == pip) { 3385 break; 3386 } 3387 prev = path; 3388 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3389 } 3390 3391 if (path) { 3392 ct->ct_path_count--; 3393 if (prev) { 3394 MDI_PI(prev)->pi_client_link = 3395 MDI_PI(path)->pi_client_link; 3396 } else { 3397 ct->ct_path_head = 3398 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3399 } 3400 if (ct->ct_path_tail == path) { 3401 ct->ct_path_tail = prev; 3402 } 3403 if (ct->ct_path_last == path) { 3404 ct->ct_path_last = ct->ct_path_head; 3405 } 3406 } 3407 MDI_PI(pip)->pi_client_link = NULL; 3408 MDI_PI(pip)->pi_client = NULL; 3409 } 3410 3411 /* 3412 * i_mdi_pi_state_change(): 3413 * online a mdi_pathinfo node 3414 * 3415 * Return Values: 3416 * MDI_SUCCESS 3417 * MDI_FAILURE 3418 */ 3419 /*ARGSUSED*/ 3420 static int 3421 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3422 { 3423 int rv = MDI_SUCCESS; 3424 mdi_vhci_t *vh; 3425 mdi_phci_t *ph; 3426 mdi_client_t *ct; 3427 int (*f)(); 3428 dev_info_t *cdip; 3429 3430 MDI_PI_LOCK(pip); 3431 3432 ph = MDI_PI(pip)->pi_phci; 3433 ASSERT(ph); 3434 if (ph == NULL) { 3435 /* 3436 * Invalid pHCI device, fail the request 3437 */ 3438 MDI_PI_UNLOCK(pip); 3439 MDI_DEBUG(1, (MDI_WARN, NULL, 3440 "!invalid phci: pip %s %p", 3441 mdi_pi_spathname(pip), (void *)pip)); 3442 return (MDI_FAILURE); 3443 } 3444 3445 vh = ph->ph_vhci; 3446 ASSERT(vh); 3447 if (vh == NULL) { 3448 /* 3449 * Invalid vHCI device, fail the request 3450 */ 3451 MDI_PI_UNLOCK(pip); 3452 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3453 "!invalid vhci: pip %s %p", 3454 mdi_pi_spathname(pip), (void *)pip)); 3455 return (MDI_FAILURE); 3456 } 3457 3458 ct = MDI_PI(pip)->pi_client; 3459 ASSERT(ct != NULL); 3460 if (ct == NULL) { 3461 /* 3462 * Invalid client device, fail the request 3463 */ 3464 MDI_PI_UNLOCK(pip); 3465 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3466 "!invalid client: pip %s %p", 3467 mdi_pi_spathname(pip), (void *)pip)); 3468 return (MDI_FAILURE); 3469 } 3470 3471 /* 3472 * If this path has not been initialized yet, Callback vHCI driver's 3473 * pathinfo node initialize entry point 3474 */ 3475 3476 if (MDI_PI_IS_INITING(pip)) { 3477 MDI_PI_UNLOCK(pip); 3478 f = vh->vh_ops->vo_pi_init; 3479 if (f != NULL) { 3480 rv = (*f)(vh->vh_dip, pip, 0); 3481 if (rv != MDI_SUCCESS) { 3482 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3483 "!vo_pi_init failed: vHCI %p, pip %s %p", 3484 (void *)vh, mdi_pi_spathname(pip), 3485 (void *)pip)); 3486 return (MDI_FAILURE); 3487 } 3488 } 3489 MDI_PI_LOCK(pip); 3490 MDI_PI_CLEAR_TRANSIENT(pip); 3491 } 3492 3493 /* 3494 * Do not allow state transition when pHCI is in offline/suspended 3495 * states 3496 */ 3497 i_mdi_phci_lock(ph, pip); 3498 if (MDI_PHCI_IS_READY(ph) == 0) { 3499 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3500 "!pHCI not ready, pHCI=%p", (void *)ph)); 3501 MDI_PI_UNLOCK(pip); 3502 i_mdi_phci_unlock(ph); 3503 return (MDI_BUSY); 3504 } 3505 MDI_PHCI_UNSTABLE(ph); 3506 i_mdi_phci_unlock(ph); 3507 3508 /* 3509 * Check if mdi_pathinfo state is in transient state. 3510 * If yes, offlining is in progress and wait till transient state is 3511 * cleared. 3512 */ 3513 if (MDI_PI_IS_TRANSIENT(pip)) { 3514 while (MDI_PI_IS_TRANSIENT(pip)) { 3515 cv_wait(&MDI_PI(pip)->pi_state_cv, 3516 &MDI_PI(pip)->pi_mutex); 3517 } 3518 } 3519 3520 /* 3521 * Grab the client lock in reverse order sequence and release the 3522 * mdi_pathinfo mutex. 3523 */ 3524 i_mdi_client_lock(ct, pip); 3525 MDI_PI_UNLOCK(pip); 3526 3527 /* 3528 * Wait till failover state is cleared 3529 */ 3530 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3531 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3532 3533 /* 3534 * Mark the mdi_pathinfo node state as transient 3535 */ 3536 MDI_PI_LOCK(pip); 3537 switch (state) { 3538 case MDI_PATHINFO_STATE_ONLINE: 3539 MDI_PI_SET_ONLINING(pip); 3540 break; 3541 3542 case MDI_PATHINFO_STATE_STANDBY: 3543 MDI_PI_SET_STANDBYING(pip); 3544 break; 3545 3546 case MDI_PATHINFO_STATE_FAULT: 3547 /* 3548 * Mark the pathinfo state as FAULTED 3549 */ 3550 MDI_PI_SET_FAULTING(pip); 3551 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3552 break; 3553 3554 case MDI_PATHINFO_STATE_OFFLINE: 3555 /* 3556 * ndi_devi_offline() cannot hold pip or ct locks. 3557 */ 3558 MDI_PI_UNLOCK(pip); 3559 3560 /* 3561 * If this is a user initiated path online->offline operation 3562 * who's success would transition a client from DEGRADED to 3563 * FAILED then only proceed if we can offline the client first. 3564 */ 3565 cdip = ct->ct_dip; 3566 if ((flag & NDI_USER_REQ) && 3567 MDI_PI_IS_ONLINE(pip) && 3568 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3569 i_mdi_client_unlock(ct); 3570 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3571 if (rv != NDI_SUCCESS) { 3572 /* 3573 * Convert to MDI error code 3574 */ 3575 switch (rv) { 3576 case NDI_BUSY: 3577 rv = MDI_BUSY; 3578 break; 3579 default: 3580 rv = MDI_FAILURE; 3581 break; 3582 } 3583 goto state_change_exit; 3584 } else { 3585 i_mdi_client_lock(ct, NULL); 3586 } 3587 } 3588 /* 3589 * Mark the mdi_pathinfo node state as transient 3590 */ 3591 MDI_PI_LOCK(pip); 3592 MDI_PI_SET_OFFLINING(pip); 3593 break; 3594 } 3595 MDI_PI_UNLOCK(pip); 3596 MDI_CLIENT_UNSTABLE(ct); 3597 i_mdi_client_unlock(ct); 3598 3599 f = vh->vh_ops->vo_pi_state_change; 3600 if (f != NULL) 3601 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3602 3603 MDI_CLIENT_LOCK(ct); 3604 MDI_PI_LOCK(pip); 3605 if (rv == MDI_NOT_SUPPORTED) { 3606 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3607 } 3608 if (rv != MDI_SUCCESS) { 3609 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3610 "vo_pi_state_change failed: rv %x", rv)); 3611 } 3612 if (MDI_PI_IS_TRANSIENT(pip)) { 3613 if (rv == MDI_SUCCESS) { 3614 MDI_PI_CLEAR_TRANSIENT(pip); 3615 } else { 3616 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3617 } 3618 } 3619 3620 /* 3621 * Wake anyone waiting for this mdi_pathinfo node 3622 */ 3623 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3624 MDI_PI_UNLOCK(pip); 3625 3626 /* 3627 * Mark the client device as stable 3628 */ 3629 MDI_CLIENT_STABLE(ct); 3630 if (rv == MDI_SUCCESS) { 3631 if (ct->ct_unstable == 0) { 3632 cdip = ct->ct_dip; 3633 3634 /* 3635 * Onlining the mdi_pathinfo node will impact the 3636 * client state Update the client and dev_info node 3637 * state accordingly 3638 */ 3639 rv = NDI_SUCCESS; 3640 i_mdi_client_update_state(ct); 3641 switch (MDI_CLIENT_STATE(ct)) { 3642 case MDI_CLIENT_STATE_OPTIMAL: 3643 case MDI_CLIENT_STATE_DEGRADED: 3644 if (cdip && !i_ddi_devi_attached(cdip) && 3645 ((state == MDI_PATHINFO_STATE_ONLINE) || 3646 (state == MDI_PATHINFO_STATE_STANDBY))) { 3647 3648 /* 3649 * Must do ndi_devi_online() through 3650 * hotplug thread for deferred 3651 * attach mechanism to work 3652 */ 3653 MDI_CLIENT_UNLOCK(ct); 3654 rv = ndi_devi_online(cdip, 0); 3655 MDI_CLIENT_LOCK(ct); 3656 if ((rv != NDI_SUCCESS) && 3657 (MDI_CLIENT_STATE(ct) == 3658 MDI_CLIENT_STATE_DEGRADED)) { 3659 /* 3660 * ndi_devi_online failed. 3661 * Reset client flags to 3662 * offline. 3663 */ 3664 MDI_DEBUG(1, (MDI_WARN, cdip, 3665 "!ndi_devi_online failed " 3666 "error %x", rv)); 3667 MDI_CLIENT_SET_OFFLINE(ct); 3668 } 3669 if (rv != NDI_SUCCESS) { 3670 /* Reset the path state */ 3671 MDI_PI_LOCK(pip); 3672 MDI_PI(pip)->pi_state = 3673 MDI_PI_OLD_STATE(pip); 3674 MDI_PI_UNLOCK(pip); 3675 } 3676 } 3677 break; 3678 3679 case MDI_CLIENT_STATE_FAILED: 3680 /* 3681 * This is the last path case for 3682 * non-user initiated events. 3683 */ 3684 if (((flag & NDI_USER_REQ) == 0) && 3685 cdip && (i_ddi_node_state(cdip) >= 3686 DS_INITIALIZED)) { 3687 MDI_CLIENT_UNLOCK(ct); 3688 rv = ndi_devi_offline(cdip, 3689 NDI_DEVFS_CLEAN); 3690 MDI_CLIENT_LOCK(ct); 3691 3692 if (rv != NDI_SUCCESS) { 3693 /* 3694 * ndi_devi_offline failed. 3695 * Reset client flags to 3696 * online as the path could not 3697 * be offlined. 3698 */ 3699 MDI_DEBUG(1, (MDI_WARN, cdip, 3700 "!ndi_devi_offline failed: " 3701 "error %x", rv)); 3702 MDI_CLIENT_SET_ONLINE(ct); 3703 } 3704 } 3705 break; 3706 } 3707 /* 3708 * Convert to MDI error code 3709 */ 3710 switch (rv) { 3711 case NDI_SUCCESS: 3712 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3713 i_mdi_report_path_state(ct, pip); 3714 rv = MDI_SUCCESS; 3715 break; 3716 case NDI_BUSY: 3717 rv = MDI_BUSY; 3718 break; 3719 default: 3720 rv = MDI_FAILURE; 3721 break; 3722 } 3723 } 3724 } 3725 MDI_CLIENT_UNLOCK(ct); 3726 3727 state_change_exit: 3728 /* 3729 * Mark the pHCI as stable again. 3730 */ 3731 MDI_PHCI_LOCK(ph); 3732 MDI_PHCI_STABLE(ph); 3733 MDI_PHCI_UNLOCK(ph); 3734 return (rv); 3735 } 3736 3737 /* 3738 * mdi_pi_online(): 3739 * Place the path_info node in the online state. The path is 3740 * now available to be selected by mdi_select_path() for 3741 * transporting I/O requests to client devices. 3742 * Return Values: 3743 * MDI_SUCCESS 3744 * MDI_FAILURE 3745 */ 3746 int 3747 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3748 { 3749 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3750 int client_held = 0; 3751 int rv; 3752 3753 ASSERT(ct != NULL); 3754 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3755 if (rv != MDI_SUCCESS) 3756 return (rv); 3757 3758 MDI_PI_LOCK(pip); 3759 if (MDI_PI(pip)->pi_pm_held == 0) { 3760 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3761 "i_mdi_pm_hold_pip %p", (void *)pip)); 3762 i_mdi_pm_hold_pip(pip); 3763 client_held = 1; 3764 } 3765 MDI_PI_UNLOCK(pip); 3766 3767 if (client_held) { 3768 MDI_CLIENT_LOCK(ct); 3769 if (ct->ct_power_cnt == 0) { 3770 rv = i_mdi_power_all_phci(ct); 3771 } 3772 3773 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3774 "i_mdi_pm_hold_client %p", (void *)ct)); 3775 i_mdi_pm_hold_client(ct, 1); 3776 MDI_CLIENT_UNLOCK(ct); 3777 } 3778 3779 return (rv); 3780 } 3781 3782 /* 3783 * mdi_pi_standby(): 3784 * Place the mdi_pathinfo node in standby state 3785 * 3786 * Return Values: 3787 * MDI_SUCCESS 3788 * MDI_FAILURE 3789 */ 3790 int 3791 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3792 { 3793 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3794 } 3795 3796 /* 3797 * mdi_pi_fault(): 3798 * Place the mdi_pathinfo node in fault'ed state 3799 * Return Values: 3800 * MDI_SUCCESS 3801 * MDI_FAILURE 3802 */ 3803 int 3804 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3805 { 3806 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3807 } 3808 3809 /* 3810 * mdi_pi_offline(): 3811 * Offline a mdi_pathinfo node. 3812 * Return Values: 3813 * MDI_SUCCESS 3814 * MDI_FAILURE 3815 */ 3816 int 3817 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3818 { 3819 int ret, client_held = 0; 3820 mdi_client_t *ct; 3821 3822 /* 3823 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3824 * used it to mean "user initiated operation" (i.e. devctl). Callers 3825 * should now just use NDI_USER_REQ. 3826 */ 3827 if (flags & NDI_DEVI_REMOVE) { 3828 flags &= ~NDI_DEVI_REMOVE; 3829 flags |= NDI_USER_REQ; 3830 } 3831 3832 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3833 3834 if (ret == MDI_SUCCESS) { 3835 MDI_PI_LOCK(pip); 3836 if (MDI_PI(pip)->pi_pm_held) { 3837 client_held = 1; 3838 } 3839 MDI_PI_UNLOCK(pip); 3840 3841 if (client_held) { 3842 ct = MDI_PI(pip)->pi_client; 3843 MDI_CLIENT_LOCK(ct); 3844 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3845 "i_mdi_pm_rele_client\n")); 3846 i_mdi_pm_rele_client(ct, 1); 3847 MDI_CLIENT_UNLOCK(ct); 3848 } 3849 } 3850 3851 return (ret); 3852 } 3853 3854 /* 3855 * i_mdi_pi_offline(): 3856 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3857 */ 3858 static int 3859 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3860 { 3861 dev_info_t *vdip = NULL; 3862 mdi_vhci_t *vh = NULL; 3863 mdi_client_t *ct = NULL; 3864 int (*f)(); 3865 int rv; 3866 3867 MDI_PI_LOCK(pip); 3868 ct = MDI_PI(pip)->pi_client; 3869 ASSERT(ct != NULL); 3870 3871 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3872 /* 3873 * Give a chance for pending I/Os to complete. 3874 */ 3875 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3876 "!%d cmds still pending on path %s %p", 3877 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3878 (void *)pip)); 3879 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3880 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3881 TR_CLOCK_TICK) == -1) { 3882 /* 3883 * The timeout time reached without ref_cnt being zero 3884 * being signaled. 3885 */ 3886 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3887 "!Timeout reached on path %s %p without the cond", 3888 mdi_pi_spathname(pip), (void *)pip)); 3889 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3890 "!%d cmds still pending on path %s %p", 3891 MDI_PI(pip)->pi_ref_cnt, 3892 mdi_pi_spathname(pip), (void *)pip)); 3893 } 3894 } 3895 vh = ct->ct_vhci; 3896 vdip = vh->vh_dip; 3897 3898 /* 3899 * Notify vHCI that has registered this event 3900 */ 3901 ASSERT(vh->vh_ops); 3902 f = vh->vh_ops->vo_pi_state_change; 3903 3904 if (f != NULL) { 3905 MDI_PI_UNLOCK(pip); 3906 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3907 flags)) != MDI_SUCCESS) { 3908 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3909 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3910 ddi_driver_name(vdip), ddi_get_instance(vdip), 3911 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3912 } 3913 MDI_PI_LOCK(pip); 3914 } 3915 3916 /* 3917 * Set the mdi_pathinfo node state and clear the transient condition 3918 */ 3919 MDI_PI_SET_OFFLINE(pip); 3920 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3921 MDI_PI_UNLOCK(pip); 3922 3923 MDI_CLIENT_LOCK(ct); 3924 if (rv == MDI_SUCCESS) { 3925 if (ct->ct_unstable == 0) { 3926 dev_info_t *cdip = ct->ct_dip; 3927 3928 /* 3929 * Onlining the mdi_pathinfo node will impact the 3930 * client state Update the client and dev_info node 3931 * state accordingly 3932 */ 3933 i_mdi_client_update_state(ct); 3934 rv = NDI_SUCCESS; 3935 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3936 if (cdip && 3937 (i_ddi_node_state(cdip) >= 3938 DS_INITIALIZED)) { 3939 MDI_CLIENT_UNLOCK(ct); 3940 rv = ndi_devi_offline(cdip, 3941 NDI_DEVFS_CLEAN); 3942 MDI_CLIENT_LOCK(ct); 3943 if (rv != NDI_SUCCESS) { 3944 /* 3945 * ndi_devi_offline failed. 3946 * Reset client flags to 3947 * online. 3948 */ 3949 MDI_DEBUG(4, (MDI_WARN, cdip, 3950 "ndi_devi_offline failed: " 3951 "error %x", rv)); 3952 MDI_CLIENT_SET_ONLINE(ct); 3953 } 3954 } 3955 } 3956 /* 3957 * Convert to MDI error code 3958 */ 3959 switch (rv) { 3960 case NDI_SUCCESS: 3961 rv = MDI_SUCCESS; 3962 break; 3963 case NDI_BUSY: 3964 rv = MDI_BUSY; 3965 break; 3966 default: 3967 rv = MDI_FAILURE; 3968 break; 3969 } 3970 } 3971 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3972 i_mdi_report_path_state(ct, pip); 3973 } 3974 3975 MDI_CLIENT_UNLOCK(ct); 3976 3977 /* 3978 * Change in the mdi_pathinfo node state will impact the client state 3979 */ 3980 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3981 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3982 return (rv); 3983 } 3984 3985 /* 3986 * i_mdi_pi_online(): 3987 * Online a mdi_pathinfo node and call the vHCI driver's callback 3988 */ 3989 static int 3990 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3991 { 3992 mdi_vhci_t *vh = NULL; 3993 mdi_client_t *ct = NULL; 3994 mdi_phci_t *ph; 3995 int (*f)(); 3996 int rv; 3997 3998 MDI_PI_LOCK(pip); 3999 ph = MDI_PI(pip)->pi_phci; 4000 vh = ph->ph_vhci; 4001 ct = MDI_PI(pip)->pi_client; 4002 MDI_PI_SET_ONLINING(pip) 4003 MDI_PI_UNLOCK(pip); 4004 f = vh->vh_ops->vo_pi_state_change; 4005 if (f != NULL) 4006 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, 4007 flags); 4008 MDI_CLIENT_LOCK(ct); 4009 MDI_PI_LOCK(pip); 4010 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 4011 MDI_PI_UNLOCK(pip); 4012 if (rv == MDI_SUCCESS) { 4013 dev_info_t *cdip = ct->ct_dip; 4014 4015 rv = MDI_SUCCESS; 4016 i_mdi_client_update_state(ct); 4017 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL || 4018 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4019 if (cdip && !i_ddi_devi_attached(cdip)) { 4020 MDI_CLIENT_UNLOCK(ct); 4021 rv = ndi_devi_online(cdip, 0); 4022 MDI_CLIENT_LOCK(ct); 4023 if ((rv != NDI_SUCCESS) && 4024 (MDI_CLIENT_STATE(ct) == 4025 MDI_CLIENT_STATE_DEGRADED)) { 4026 MDI_CLIENT_SET_OFFLINE(ct); 4027 } 4028 if (rv != NDI_SUCCESS) { 4029 /* Reset the path state */ 4030 MDI_PI_LOCK(pip); 4031 MDI_PI(pip)->pi_state = 4032 MDI_PI_OLD_STATE(pip); 4033 MDI_PI_UNLOCK(pip); 4034 } 4035 } 4036 } 4037 switch (rv) { 4038 case NDI_SUCCESS: 4039 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 4040 i_mdi_report_path_state(ct, pip); 4041 rv = MDI_SUCCESS; 4042 break; 4043 case NDI_BUSY: 4044 rv = MDI_BUSY; 4045 break; 4046 default: 4047 rv = MDI_FAILURE; 4048 break; 4049 } 4050 } else { 4051 /* Reset the path state */ 4052 MDI_PI_LOCK(pip); 4053 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 4054 MDI_PI_UNLOCK(pip); 4055 } 4056 MDI_CLIENT_UNLOCK(ct); 4057 return (rv); 4058 } 4059 4060 /* 4061 * mdi_pi_get_node_name(): 4062 * Get the name associated with a mdi_pathinfo node. 4063 * Since pathinfo nodes are not directly named, we 4064 * return the node_name of the client. 4065 * 4066 * Return Values: 4067 * char * 4068 */ 4069 char * 4070 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 4071 { 4072 mdi_client_t *ct; 4073 4074 if (pip == NULL) 4075 return (NULL); 4076 ct = MDI_PI(pip)->pi_client; 4077 if ((ct == NULL) || (ct->ct_dip == NULL)) 4078 return (NULL); 4079 return (ddi_node_name(ct->ct_dip)); 4080 } 4081 4082 /* 4083 * mdi_pi_get_addr(): 4084 * Get the unit address associated with a mdi_pathinfo node 4085 * 4086 * Return Values: 4087 * char * 4088 */ 4089 char * 4090 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4091 { 4092 if (pip == NULL) 4093 return (NULL); 4094 4095 return (MDI_PI(pip)->pi_addr); 4096 } 4097 4098 /* 4099 * mdi_pi_get_path_instance(): 4100 * Get the 'path_instance' of a mdi_pathinfo node 4101 * 4102 * Return Values: 4103 * path_instance 4104 */ 4105 int 4106 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4107 { 4108 if (pip == NULL) 4109 return (0); 4110 4111 return (MDI_PI(pip)->pi_path_instance); 4112 } 4113 4114 /* 4115 * mdi_pi_pathname(): 4116 * Return pointer to path to pathinfo node. 4117 */ 4118 char * 4119 mdi_pi_pathname(mdi_pathinfo_t *pip) 4120 { 4121 if (pip == NULL) 4122 return (NULL); 4123 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4124 } 4125 4126 /* 4127 * mdi_pi_spathname(): 4128 * Return pointer to shortpath to pathinfo node. Used for debug 4129 * messages, so return "" instead of NULL when unknown. 4130 */ 4131 char * 4132 mdi_pi_spathname(mdi_pathinfo_t *pip) 4133 { 4134 char *spath = ""; 4135 4136 if (pip) { 4137 spath = mdi_pi_spathname_by_instance( 4138 mdi_pi_get_path_instance(pip)); 4139 if (spath == NULL) 4140 spath = ""; 4141 } 4142 return (spath); 4143 } 4144 4145 char * 4146 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4147 { 4148 char *obp_path = NULL; 4149 if ((pip == NULL) || (path == NULL)) 4150 return (NULL); 4151 4152 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4153 (void) strcpy(path, obp_path); 4154 (void) mdi_prop_free(obp_path); 4155 } else { 4156 path = NULL; 4157 } 4158 return (path); 4159 } 4160 4161 int 4162 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4163 { 4164 dev_info_t *pdip; 4165 char *obp_path = NULL; 4166 int rc = MDI_FAILURE; 4167 4168 if (pip == NULL) 4169 return (MDI_FAILURE); 4170 4171 pdip = mdi_pi_get_phci(pip); 4172 if (pdip == NULL) 4173 return (MDI_FAILURE); 4174 4175 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4176 4177 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4178 (void) ddi_pathname(pdip, obp_path); 4179 } 4180 4181 if (component) { 4182 (void) strncat(obp_path, "/", MAXPATHLEN); 4183 (void) strncat(obp_path, component, MAXPATHLEN); 4184 } 4185 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4186 4187 if (obp_path) 4188 kmem_free(obp_path, MAXPATHLEN); 4189 return (rc); 4190 } 4191 4192 /* 4193 * mdi_pi_get_client(): 4194 * Get the client devinfo associated with a mdi_pathinfo node 4195 * 4196 * Return Values: 4197 * Handle to client device dev_info node 4198 */ 4199 dev_info_t * 4200 mdi_pi_get_client(mdi_pathinfo_t *pip) 4201 { 4202 dev_info_t *dip = NULL; 4203 if (pip) { 4204 dip = MDI_PI(pip)->pi_client->ct_dip; 4205 } 4206 return (dip); 4207 } 4208 4209 /* 4210 * mdi_pi_get_phci(): 4211 * Get the pHCI devinfo associated with the mdi_pathinfo node 4212 * Return Values: 4213 * Handle to dev_info node 4214 */ 4215 dev_info_t * 4216 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4217 { 4218 dev_info_t *dip = NULL; 4219 mdi_phci_t *ph; 4220 4221 if (pip) { 4222 ph = MDI_PI(pip)->pi_phci; 4223 if (ph) 4224 dip = ph->ph_dip; 4225 } 4226 return (dip); 4227 } 4228 4229 /* 4230 * mdi_pi_get_client_private(): 4231 * Get the client private information associated with the 4232 * mdi_pathinfo node 4233 */ 4234 void * 4235 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4236 { 4237 void *cprivate = NULL; 4238 if (pip) { 4239 cprivate = MDI_PI(pip)->pi_cprivate; 4240 } 4241 return (cprivate); 4242 } 4243 4244 /* 4245 * mdi_pi_set_client_private(): 4246 * Set the client private information in the mdi_pathinfo node 4247 */ 4248 void 4249 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4250 { 4251 if (pip) { 4252 MDI_PI(pip)->pi_cprivate = priv; 4253 } 4254 } 4255 4256 /* 4257 * mdi_pi_get_phci_private(): 4258 * Get the pHCI private information associated with the 4259 * mdi_pathinfo node 4260 */ 4261 caddr_t 4262 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4263 { 4264 caddr_t pprivate = NULL; 4265 4266 if (pip) { 4267 pprivate = MDI_PI(pip)->pi_pprivate; 4268 } 4269 return (pprivate); 4270 } 4271 4272 /* 4273 * mdi_pi_set_phci_private(): 4274 * Set the pHCI private information in the mdi_pathinfo node 4275 */ 4276 void 4277 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4278 { 4279 if (pip) { 4280 MDI_PI(pip)->pi_pprivate = priv; 4281 } 4282 } 4283 4284 /* 4285 * mdi_pi_get_state(): 4286 * Get the mdi_pathinfo node state. Transient states are internal 4287 * and not provided to the users 4288 */ 4289 mdi_pathinfo_state_t 4290 mdi_pi_get_state(mdi_pathinfo_t *pip) 4291 { 4292 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4293 4294 if (pip) { 4295 if (MDI_PI_IS_TRANSIENT(pip)) { 4296 /* 4297 * mdi_pathinfo is in state transition. Return the 4298 * last good state. 4299 */ 4300 state = MDI_PI_OLD_STATE(pip); 4301 } else { 4302 state = MDI_PI_STATE(pip); 4303 } 4304 } 4305 return (state); 4306 } 4307 4308 /* 4309 * mdi_pi_get_flags(): 4310 * Get the mdi_pathinfo node flags. 4311 */ 4312 uint_t 4313 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4314 { 4315 return (pip ? MDI_PI(pip)->pi_flags : 0); 4316 } 4317 4318 /* 4319 * Note that the following function needs to be the new interface for 4320 * mdi_pi_get_state when mpxio gets integrated to ON. 4321 */ 4322 int 4323 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4324 uint32_t *ext_state) 4325 { 4326 *state = MDI_PATHINFO_STATE_INIT; 4327 4328 if (pip) { 4329 if (MDI_PI_IS_TRANSIENT(pip)) { 4330 /* 4331 * mdi_pathinfo is in state transition. Return the 4332 * last good state. 4333 */ 4334 *state = MDI_PI_OLD_STATE(pip); 4335 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4336 } else { 4337 *state = MDI_PI_STATE(pip); 4338 *ext_state = MDI_PI_EXT_STATE(pip); 4339 } 4340 } 4341 return (MDI_SUCCESS); 4342 } 4343 4344 /* 4345 * mdi_pi_get_preferred: 4346 * Get the preferred path flag 4347 */ 4348 int 4349 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4350 { 4351 if (pip) { 4352 return (MDI_PI(pip)->pi_preferred); 4353 } 4354 return (0); 4355 } 4356 4357 /* 4358 * mdi_pi_set_preferred: 4359 * Set the preferred path flag 4360 */ 4361 void 4362 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4363 { 4364 if (pip) { 4365 MDI_PI(pip)->pi_preferred = preferred; 4366 } 4367 } 4368 4369 /* 4370 * mdi_pi_set_state(): 4371 * Set the mdi_pathinfo node state 4372 */ 4373 void 4374 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4375 { 4376 uint32_t ext_state; 4377 4378 if (pip) { 4379 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4380 MDI_PI(pip)->pi_state = state; 4381 MDI_PI(pip)->pi_state |= ext_state; 4382 4383 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4384 i_ddi_di_cache_invalidate(); 4385 } 4386 } 4387 4388 /* 4389 * Property functions: 4390 */ 4391 int 4392 i_map_nvlist_error_to_mdi(int val) 4393 { 4394 int rv; 4395 4396 switch (val) { 4397 case 0: 4398 rv = DDI_PROP_SUCCESS; 4399 break; 4400 case EINVAL: 4401 case ENOTSUP: 4402 rv = DDI_PROP_INVAL_ARG; 4403 break; 4404 case ENOMEM: 4405 rv = DDI_PROP_NO_MEMORY; 4406 break; 4407 default: 4408 rv = DDI_PROP_NOT_FOUND; 4409 break; 4410 } 4411 return (rv); 4412 } 4413 4414 /* 4415 * mdi_pi_get_next_prop(): 4416 * Property walk function. The caller should hold mdi_pi_lock() 4417 * and release by calling mdi_pi_unlock() at the end of walk to 4418 * get a consistent value. 4419 */ 4420 nvpair_t * 4421 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4422 { 4423 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4424 return (NULL); 4425 } 4426 ASSERT(MDI_PI_LOCKED(pip)); 4427 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4428 } 4429 4430 /* 4431 * mdi_prop_remove(): 4432 * Remove the named property from the named list. 4433 */ 4434 int 4435 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4436 { 4437 if (pip == NULL) { 4438 return (DDI_PROP_NOT_FOUND); 4439 } 4440 ASSERT(!MDI_PI_LOCKED(pip)); 4441 MDI_PI_LOCK(pip); 4442 if (MDI_PI(pip)->pi_prop == NULL) { 4443 MDI_PI_UNLOCK(pip); 4444 return (DDI_PROP_NOT_FOUND); 4445 } 4446 if (name) { 4447 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4448 } else { 4449 char nvp_name[MAXNAMELEN]; 4450 nvpair_t *nvp; 4451 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4452 while (nvp) { 4453 nvpair_t *next; 4454 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4455 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4456 nvpair_name(nvp)); 4457 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4458 nvp_name); 4459 nvp = next; 4460 } 4461 } 4462 MDI_PI_UNLOCK(pip); 4463 return (DDI_PROP_SUCCESS); 4464 } 4465 4466 /* 4467 * mdi_prop_size(): 4468 * Get buffer size needed to pack the property data. 4469 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4470 * buffer size. 4471 */ 4472 int 4473 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4474 { 4475 int rv; 4476 size_t bufsize; 4477 4478 *buflenp = 0; 4479 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4480 return (DDI_PROP_NOT_FOUND); 4481 } 4482 ASSERT(MDI_PI_LOCKED(pip)); 4483 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4484 &bufsize, NV_ENCODE_NATIVE); 4485 *buflenp = bufsize; 4486 return (i_map_nvlist_error_to_mdi(rv)); 4487 } 4488 4489 /* 4490 * mdi_prop_pack(): 4491 * pack the property list. The caller should hold the 4492 * mdi_pathinfo_t node to get a consistent data 4493 */ 4494 int 4495 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4496 { 4497 int rv; 4498 size_t bufsize; 4499 4500 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4501 return (DDI_PROP_NOT_FOUND); 4502 } 4503 4504 ASSERT(MDI_PI_LOCKED(pip)); 4505 4506 bufsize = buflen; 4507 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4508 NV_ENCODE_NATIVE, KM_SLEEP); 4509 4510 return (i_map_nvlist_error_to_mdi(rv)); 4511 } 4512 4513 /* 4514 * mdi_prop_update_byte(): 4515 * Create/Update a byte property 4516 */ 4517 int 4518 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4519 { 4520 int rv; 4521 4522 if (pip == NULL) { 4523 return (DDI_PROP_INVAL_ARG); 4524 } 4525 ASSERT(!MDI_PI_LOCKED(pip)); 4526 MDI_PI_LOCK(pip); 4527 if (MDI_PI(pip)->pi_prop == NULL) { 4528 MDI_PI_UNLOCK(pip); 4529 return (DDI_PROP_NOT_FOUND); 4530 } 4531 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4532 MDI_PI_UNLOCK(pip); 4533 return (i_map_nvlist_error_to_mdi(rv)); 4534 } 4535 4536 /* 4537 * mdi_prop_update_byte_array(): 4538 * Create/Update a byte array property 4539 */ 4540 int 4541 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4542 uint_t nelements) 4543 { 4544 int rv; 4545 4546 if (pip == NULL) { 4547 return (DDI_PROP_INVAL_ARG); 4548 } 4549 ASSERT(!MDI_PI_LOCKED(pip)); 4550 MDI_PI_LOCK(pip); 4551 if (MDI_PI(pip)->pi_prop == NULL) { 4552 MDI_PI_UNLOCK(pip); 4553 return (DDI_PROP_NOT_FOUND); 4554 } 4555 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4556 MDI_PI_UNLOCK(pip); 4557 return (i_map_nvlist_error_to_mdi(rv)); 4558 } 4559 4560 /* 4561 * mdi_prop_update_int(): 4562 * Create/Update a 32 bit integer property 4563 */ 4564 int 4565 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4566 { 4567 int rv; 4568 4569 if (pip == NULL) { 4570 return (DDI_PROP_INVAL_ARG); 4571 } 4572 ASSERT(!MDI_PI_LOCKED(pip)); 4573 MDI_PI_LOCK(pip); 4574 if (MDI_PI(pip)->pi_prop == NULL) { 4575 MDI_PI_UNLOCK(pip); 4576 return (DDI_PROP_NOT_FOUND); 4577 } 4578 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4579 MDI_PI_UNLOCK(pip); 4580 return (i_map_nvlist_error_to_mdi(rv)); 4581 } 4582 4583 /* 4584 * mdi_prop_update_int64(): 4585 * Create/Update a 64 bit integer property 4586 */ 4587 int 4588 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4589 { 4590 int rv; 4591 4592 if (pip == NULL) { 4593 return (DDI_PROP_INVAL_ARG); 4594 } 4595 ASSERT(!MDI_PI_LOCKED(pip)); 4596 MDI_PI_LOCK(pip); 4597 if (MDI_PI(pip)->pi_prop == NULL) { 4598 MDI_PI_UNLOCK(pip); 4599 return (DDI_PROP_NOT_FOUND); 4600 } 4601 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4602 MDI_PI_UNLOCK(pip); 4603 return (i_map_nvlist_error_to_mdi(rv)); 4604 } 4605 4606 /* 4607 * mdi_prop_update_int_array(): 4608 * Create/Update a int array property 4609 */ 4610 int 4611 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4612 uint_t nelements) 4613 { 4614 int rv; 4615 4616 if (pip == NULL) { 4617 return (DDI_PROP_INVAL_ARG); 4618 } 4619 ASSERT(!MDI_PI_LOCKED(pip)); 4620 MDI_PI_LOCK(pip); 4621 if (MDI_PI(pip)->pi_prop == NULL) { 4622 MDI_PI_UNLOCK(pip); 4623 return (DDI_PROP_NOT_FOUND); 4624 } 4625 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4626 nelements); 4627 MDI_PI_UNLOCK(pip); 4628 return (i_map_nvlist_error_to_mdi(rv)); 4629 } 4630 4631 /* 4632 * mdi_prop_update_string(): 4633 * Create/Update a string property 4634 */ 4635 int 4636 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4637 { 4638 int rv; 4639 4640 if (pip == NULL) { 4641 return (DDI_PROP_INVAL_ARG); 4642 } 4643 ASSERT(!MDI_PI_LOCKED(pip)); 4644 MDI_PI_LOCK(pip); 4645 if (MDI_PI(pip)->pi_prop == NULL) { 4646 MDI_PI_UNLOCK(pip); 4647 return (DDI_PROP_NOT_FOUND); 4648 } 4649 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4650 MDI_PI_UNLOCK(pip); 4651 return (i_map_nvlist_error_to_mdi(rv)); 4652 } 4653 4654 /* 4655 * mdi_prop_update_string_array(): 4656 * Create/Update a string array property 4657 */ 4658 int 4659 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4660 uint_t nelements) 4661 { 4662 int rv; 4663 4664 if (pip == NULL) { 4665 return (DDI_PROP_INVAL_ARG); 4666 } 4667 ASSERT(!MDI_PI_LOCKED(pip)); 4668 MDI_PI_LOCK(pip); 4669 if (MDI_PI(pip)->pi_prop == NULL) { 4670 MDI_PI_UNLOCK(pip); 4671 return (DDI_PROP_NOT_FOUND); 4672 } 4673 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4674 nelements); 4675 MDI_PI_UNLOCK(pip); 4676 return (i_map_nvlist_error_to_mdi(rv)); 4677 } 4678 4679 /* 4680 * mdi_prop_lookup_byte(): 4681 * Look for byte property identified by name. The data returned 4682 * is the actual property and valid as long as mdi_pathinfo_t node 4683 * is alive. 4684 */ 4685 int 4686 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4687 { 4688 int rv; 4689 4690 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4691 return (DDI_PROP_NOT_FOUND); 4692 } 4693 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4694 return (i_map_nvlist_error_to_mdi(rv)); 4695 } 4696 4697 4698 /* 4699 * mdi_prop_lookup_byte_array(): 4700 * Look for byte array property identified by name. The data 4701 * returned is the actual property and valid as long as 4702 * mdi_pathinfo_t node is alive. 4703 */ 4704 int 4705 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4706 uint_t *nelements) 4707 { 4708 int rv; 4709 4710 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4711 return (DDI_PROP_NOT_FOUND); 4712 } 4713 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4714 nelements); 4715 return (i_map_nvlist_error_to_mdi(rv)); 4716 } 4717 4718 /* 4719 * mdi_prop_lookup_int(): 4720 * Look for int property identified by name. The data returned 4721 * is the actual property and valid as long as mdi_pathinfo_t 4722 * node is alive. 4723 */ 4724 int 4725 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4726 { 4727 int rv; 4728 4729 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4730 return (DDI_PROP_NOT_FOUND); 4731 } 4732 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4733 return (i_map_nvlist_error_to_mdi(rv)); 4734 } 4735 4736 /* 4737 * mdi_prop_lookup_int64(): 4738 * Look for int64 property identified by name. The data returned 4739 * is the actual property and valid as long as mdi_pathinfo_t node 4740 * is alive. 4741 */ 4742 int 4743 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4744 { 4745 int rv; 4746 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4747 return (DDI_PROP_NOT_FOUND); 4748 } 4749 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4750 return (i_map_nvlist_error_to_mdi(rv)); 4751 } 4752 4753 /* 4754 * mdi_prop_lookup_int_array(): 4755 * Look for int array property identified by name. The data 4756 * returned is the actual property and valid as long as 4757 * mdi_pathinfo_t node is alive. 4758 */ 4759 int 4760 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4761 uint_t *nelements) 4762 { 4763 int rv; 4764 4765 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4766 return (DDI_PROP_NOT_FOUND); 4767 } 4768 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4769 (int32_t **)data, nelements); 4770 return (i_map_nvlist_error_to_mdi(rv)); 4771 } 4772 4773 /* 4774 * mdi_prop_lookup_string(): 4775 * Look for string property identified by name. The data 4776 * returned is the actual property and valid as long as 4777 * mdi_pathinfo_t node is alive. 4778 */ 4779 int 4780 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4781 { 4782 int rv; 4783 4784 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4785 return (DDI_PROP_NOT_FOUND); 4786 } 4787 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4788 return (i_map_nvlist_error_to_mdi(rv)); 4789 } 4790 4791 /* 4792 * mdi_prop_lookup_string_array(): 4793 * Look for string array property identified by name. The data 4794 * returned is the actual property and valid as long as 4795 * mdi_pathinfo_t node is alive. 4796 */ 4797 int 4798 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4799 uint_t *nelements) 4800 { 4801 int rv; 4802 4803 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4804 return (DDI_PROP_NOT_FOUND); 4805 } 4806 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4807 nelements); 4808 return (i_map_nvlist_error_to_mdi(rv)); 4809 } 4810 4811 /* 4812 * mdi_prop_free(): 4813 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4814 * functions return the pointer to actual property data and not a 4815 * copy of it. So the data returned is valid as long as 4816 * mdi_pathinfo_t node is valid. 4817 */ 4818 /*ARGSUSED*/ 4819 int 4820 mdi_prop_free(void *data) 4821 { 4822 return (DDI_PROP_SUCCESS); 4823 } 4824 4825 /*ARGSUSED*/ 4826 static void 4827 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4828 { 4829 char *ct_path; 4830 char *ct_status; 4831 char *status; 4832 dev_info_t *cdip = ct->ct_dip; 4833 char lb_buf[64]; 4834 int report_lb_c = 0, report_lb_p = 0; 4835 4836 ASSERT(MDI_CLIENT_LOCKED(ct)); 4837 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4838 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4839 return; 4840 } 4841 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4842 ct_status = "optimal"; 4843 report_lb_c = 1; 4844 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4845 ct_status = "degraded"; 4846 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4847 ct_status = "failed"; 4848 } else { 4849 ct_status = "unknown"; 4850 } 4851 4852 lb_buf[0] = 0; /* not interested in load balancing config */ 4853 4854 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4855 status = "removed"; 4856 } else if (MDI_PI_IS_OFFLINE(pip)) { 4857 status = "offline"; 4858 } else if (MDI_PI_IS_ONLINE(pip)) { 4859 status = "online"; 4860 report_lb_p = 1; 4861 } else if (MDI_PI_IS_STANDBY(pip)) { 4862 status = "standby"; 4863 } else if (MDI_PI_IS_FAULT(pip)) { 4864 status = "faulted"; 4865 } else { 4866 status = "unknown"; 4867 } 4868 4869 if (cdip) { 4870 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4871 4872 /* 4873 * NOTE: Keeping "multipath status: %s" and 4874 * "Load balancing: %s" format unchanged in case someone 4875 * scrubs /var/adm/messages looking for these messages. 4876 */ 4877 if (report_lb_c && report_lb_p) { 4878 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4879 (void) snprintf(lb_buf, sizeof (lb_buf), 4880 "%s, region-size: %d", mdi_load_balance_lba, 4881 ct->ct_lb_args->region_size); 4882 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4883 (void) snprintf(lb_buf, sizeof (lb_buf), 4884 "%s", mdi_load_balance_none); 4885 } else { 4886 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4887 mdi_load_balance_rr); 4888 } 4889 4890 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4891 "?%s (%s%d) multipath status: %s: " 4892 "path %d %s is %s: Load balancing: %s\n", 4893 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4894 ddi_get_instance(cdip), ct_status, 4895 mdi_pi_get_path_instance(pip), 4896 mdi_pi_spathname(pip), status, lb_buf); 4897 } else { 4898 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4899 "?%s (%s%d) multipath status: %s: " 4900 "path %d %s is %s\n", 4901 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4902 ddi_get_instance(cdip), ct_status, 4903 mdi_pi_get_path_instance(pip), 4904 mdi_pi_spathname(pip), status); 4905 } 4906 4907 kmem_free(ct_path, MAXPATHLEN); 4908 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4909 } 4910 } 4911 4912 #ifdef DEBUG 4913 /* 4914 * i_mdi_log(): 4915 * Utility function for error message management 4916 * 4917 * NOTE: Implementation takes care of trailing \n for cmn_err, 4918 * MDI_DEBUG should not terminate fmt strings with \n. 4919 * 4920 * NOTE: If the level is >= 2, and there is no leading !?^ 4921 * then a leading ! is implied (but can be overriden via 4922 * mdi_debug_consoleonly). If you are using kmdb on the console, 4923 * consider setting mdi_debug_consoleonly to 1 as an aid. 4924 */ 4925 /*PRINTFLIKE4*/ 4926 static void 4927 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4928 { 4929 char name[MAXNAMELEN]; 4930 char buf[512]; 4931 char *bp; 4932 va_list ap; 4933 int log_only = 0; 4934 int boot_only = 0; 4935 int console_only = 0; 4936 4937 if (dip) { 4938 (void) snprintf(name, sizeof(name), "%s%d: ", 4939 ddi_driver_name(dip), ddi_get_instance(dip)); 4940 } else { 4941 name[0] = 0; 4942 } 4943 4944 va_start(ap, fmt); 4945 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4946 va_end(ap); 4947 4948 switch (buf[0]) { 4949 case '!': 4950 bp = &buf[1]; 4951 log_only = 1; 4952 break; 4953 case '?': 4954 bp = &buf[1]; 4955 boot_only = 1; 4956 break; 4957 case '^': 4958 bp = &buf[1]; 4959 console_only = 1; 4960 break; 4961 default: 4962 if (level >= 2) 4963 log_only = 1; /* ! implied */ 4964 bp = buf; 4965 break; 4966 } 4967 if (mdi_debug_logonly) { 4968 log_only = 1; 4969 boot_only = 0; 4970 console_only = 0; 4971 } 4972 if (mdi_debug_consoleonly) { 4973 log_only = 0; 4974 boot_only = 0; 4975 console_only = 1; 4976 level = CE_NOTE; 4977 goto console; 4978 } 4979 4980 switch (level) { 4981 case CE_NOTE: 4982 level = CE_CONT; 4983 /* FALLTHROUGH */ 4984 case CE_CONT: 4985 if (boot_only) { 4986 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4987 } else if (console_only) { 4988 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4989 } else if (log_only) { 4990 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4991 } else { 4992 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4993 } 4994 break; 4995 4996 case CE_WARN: 4997 case CE_PANIC: 4998 console: 4999 if (boot_only) { 5000 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 5001 } else if (console_only) { 5002 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 5003 } else if (log_only) { 5004 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 5005 } else { 5006 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 5007 } 5008 break; 5009 default: 5010 cmn_err(level, "mdi: %s%s", name, bp); 5011 break; 5012 } 5013 } 5014 #endif /* DEBUG */ 5015 5016 void 5017 i_mdi_client_online(dev_info_t *ct_dip) 5018 { 5019 mdi_client_t *ct; 5020 5021 /* 5022 * Client online notification. Mark client state as online 5023 * restore our binding with dev_info node 5024 */ 5025 ct = i_devi_get_client(ct_dip); 5026 ASSERT(ct != NULL); 5027 MDI_CLIENT_LOCK(ct); 5028 MDI_CLIENT_SET_ONLINE(ct); 5029 /* catch for any memory leaks */ 5030 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 5031 ct->ct_dip = ct_dip; 5032 5033 if (ct->ct_power_cnt == 0) 5034 (void) i_mdi_power_all_phci(ct); 5035 5036 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 5037 "i_mdi_pm_hold_client %p", (void *)ct)); 5038 i_mdi_pm_hold_client(ct, 1); 5039 5040 MDI_CLIENT_UNLOCK(ct); 5041 } 5042 5043 void 5044 i_mdi_phci_online(dev_info_t *ph_dip) 5045 { 5046 mdi_phci_t *ph; 5047 5048 /* pHCI online notification. Mark state accordingly */ 5049 ph = i_devi_get_phci(ph_dip); 5050 ASSERT(ph != NULL); 5051 MDI_PHCI_LOCK(ph); 5052 MDI_PHCI_SET_ONLINE(ph); 5053 MDI_PHCI_UNLOCK(ph); 5054 } 5055 5056 /* 5057 * mdi_devi_online(): 5058 * Online notification from NDI framework on pHCI/client 5059 * device online. 5060 * Return Values: 5061 * NDI_SUCCESS 5062 * MDI_FAILURE 5063 */ 5064 /*ARGSUSED*/ 5065 int 5066 mdi_devi_online(dev_info_t *dip, uint_t flags) 5067 { 5068 if (MDI_PHCI(dip)) { 5069 i_mdi_phci_online(dip); 5070 } 5071 5072 if (MDI_CLIENT(dip)) { 5073 i_mdi_client_online(dip); 5074 } 5075 return (NDI_SUCCESS); 5076 } 5077 5078 /* 5079 * mdi_devi_offline(): 5080 * Offline notification from NDI framework on pHCI/Client device 5081 * offline. 5082 * 5083 * Return Values: 5084 * NDI_SUCCESS 5085 * NDI_FAILURE 5086 */ 5087 /*ARGSUSED*/ 5088 int 5089 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5090 { 5091 int rv = NDI_SUCCESS; 5092 5093 if (MDI_CLIENT(dip)) { 5094 rv = i_mdi_client_offline(dip, flags); 5095 if (rv != NDI_SUCCESS) 5096 return (rv); 5097 } 5098 5099 if (MDI_PHCI(dip)) { 5100 rv = i_mdi_phci_offline(dip, flags); 5101 5102 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5103 /* set client back online */ 5104 i_mdi_client_online(dip); 5105 } 5106 } 5107 5108 return (rv); 5109 } 5110 5111 /*ARGSUSED*/ 5112 static int 5113 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5114 { 5115 int rv = NDI_SUCCESS; 5116 mdi_phci_t *ph; 5117 mdi_client_t *ct; 5118 mdi_pathinfo_t *pip; 5119 mdi_pathinfo_t *next; 5120 mdi_pathinfo_t *failed_pip = NULL; 5121 dev_info_t *cdip; 5122 5123 /* 5124 * pHCI component offline notification 5125 * Make sure that this pHCI instance is free to be offlined. 5126 * If it is OK to proceed, Offline and remove all the child 5127 * mdi_pathinfo nodes. This process automatically offlines 5128 * corresponding client devices, for which this pHCI provides 5129 * critical services. 5130 */ 5131 ph = i_devi_get_phci(dip); 5132 MDI_DEBUG(2, (MDI_NOTE, dip, 5133 "called %p %p", (void *)dip, (void *)ph)); 5134 if (ph == NULL) { 5135 return (rv); 5136 } 5137 5138 MDI_PHCI_LOCK(ph); 5139 5140 if (MDI_PHCI_IS_OFFLINE(ph)) { 5141 MDI_DEBUG(1, (MDI_WARN, dip, 5142 "!pHCI already offlined: %p", (void *)dip)); 5143 MDI_PHCI_UNLOCK(ph); 5144 return (NDI_SUCCESS); 5145 } 5146 5147 /* 5148 * Check to see if the pHCI can be offlined 5149 */ 5150 if (ph->ph_unstable) { 5151 MDI_DEBUG(1, (MDI_WARN, dip, 5152 "!One or more target devices are in transient state. " 5153 "This device can not be removed at this moment. " 5154 "Please try again later.")); 5155 MDI_PHCI_UNLOCK(ph); 5156 return (NDI_BUSY); 5157 } 5158 5159 pip = ph->ph_path_head; 5160 while (pip != NULL) { 5161 MDI_PI_LOCK(pip); 5162 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5163 5164 /* 5165 * The mdi_pathinfo state is OK. Check the client state. 5166 * If failover in progress fail the pHCI from offlining 5167 */ 5168 ct = MDI_PI(pip)->pi_client; 5169 i_mdi_client_lock(ct, pip); 5170 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5171 (ct->ct_unstable)) { 5172 /* 5173 * Failover is in progress, Fail the DR 5174 */ 5175 MDI_DEBUG(1, (MDI_WARN, dip, 5176 "!pHCI device is busy. " 5177 "This device can not be removed at this moment. " 5178 "Please try again later.")); 5179 MDI_PI_UNLOCK(pip); 5180 i_mdi_client_unlock(ct); 5181 MDI_PHCI_UNLOCK(ph); 5182 return (NDI_BUSY); 5183 } 5184 MDI_PI_UNLOCK(pip); 5185 5186 /* 5187 * Check to see of we are removing the last path of this 5188 * client device... 5189 */ 5190 cdip = ct->ct_dip; 5191 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5192 (i_mdi_client_compute_state(ct, ph) == 5193 MDI_CLIENT_STATE_FAILED)) { 5194 i_mdi_client_unlock(ct); 5195 MDI_PHCI_UNLOCK(ph); 5196 if (ndi_devi_offline(cdip, 5197 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5198 /* 5199 * ndi_devi_offline() failed. 5200 * This pHCI provides the critical path 5201 * to one or more client devices. 5202 * Return busy. 5203 */ 5204 MDI_PHCI_LOCK(ph); 5205 MDI_DEBUG(1, (MDI_WARN, dip, 5206 "!pHCI device is busy. " 5207 "This device can not be removed at this " 5208 "moment. Please try again later.")); 5209 failed_pip = pip; 5210 break; 5211 } else { 5212 MDI_PHCI_LOCK(ph); 5213 pip = next; 5214 } 5215 } else { 5216 i_mdi_client_unlock(ct); 5217 pip = next; 5218 } 5219 } 5220 5221 if (failed_pip) { 5222 pip = ph->ph_path_head; 5223 while (pip != failed_pip) { 5224 MDI_PI_LOCK(pip); 5225 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5226 ct = MDI_PI(pip)->pi_client; 5227 i_mdi_client_lock(ct, pip); 5228 cdip = ct->ct_dip; 5229 switch (MDI_CLIENT_STATE(ct)) { 5230 case MDI_CLIENT_STATE_OPTIMAL: 5231 case MDI_CLIENT_STATE_DEGRADED: 5232 if (cdip) { 5233 MDI_PI_UNLOCK(pip); 5234 i_mdi_client_unlock(ct); 5235 MDI_PHCI_UNLOCK(ph); 5236 (void) ndi_devi_online(cdip, 0); 5237 MDI_PHCI_LOCK(ph); 5238 pip = next; 5239 continue; 5240 } 5241 break; 5242 5243 case MDI_CLIENT_STATE_FAILED: 5244 if (cdip) { 5245 MDI_PI_UNLOCK(pip); 5246 i_mdi_client_unlock(ct); 5247 MDI_PHCI_UNLOCK(ph); 5248 (void) ndi_devi_offline(cdip, 5249 NDI_DEVFS_CLEAN); 5250 MDI_PHCI_LOCK(ph); 5251 pip = next; 5252 continue; 5253 } 5254 break; 5255 } 5256 MDI_PI_UNLOCK(pip); 5257 i_mdi_client_unlock(ct); 5258 pip = next; 5259 } 5260 MDI_PHCI_UNLOCK(ph); 5261 return (NDI_BUSY); 5262 } 5263 5264 /* 5265 * Mark the pHCI as offline 5266 */ 5267 MDI_PHCI_SET_OFFLINE(ph); 5268 5269 /* 5270 * Mark the child mdi_pathinfo nodes as transient 5271 */ 5272 pip = ph->ph_path_head; 5273 while (pip != NULL) { 5274 MDI_PI_LOCK(pip); 5275 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5276 MDI_PI_SET_OFFLINING(pip); 5277 MDI_PI_UNLOCK(pip); 5278 pip = next; 5279 } 5280 MDI_PHCI_UNLOCK(ph); 5281 /* 5282 * Give a chance for any pending commands to execute 5283 */ 5284 delay_random(mdi_delay); 5285 MDI_PHCI_LOCK(ph); 5286 pip = ph->ph_path_head; 5287 while (pip != NULL) { 5288 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5289 (void) i_mdi_pi_offline(pip, flags); 5290 MDI_PI_LOCK(pip); 5291 ct = MDI_PI(pip)->pi_client; 5292 if (!MDI_PI_IS_OFFLINE(pip)) { 5293 MDI_DEBUG(1, (MDI_WARN, dip, 5294 "!pHCI device is busy. " 5295 "This device can not be removed at this moment. " 5296 "Please try again later.")); 5297 MDI_PI_UNLOCK(pip); 5298 MDI_PHCI_SET_ONLINE(ph); 5299 MDI_PHCI_UNLOCK(ph); 5300 return (NDI_BUSY); 5301 } 5302 MDI_PI_UNLOCK(pip); 5303 pip = next; 5304 } 5305 MDI_PHCI_UNLOCK(ph); 5306 5307 return (rv); 5308 } 5309 5310 void 5311 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5312 { 5313 mdi_phci_t *ph; 5314 mdi_client_t *ct; 5315 mdi_pathinfo_t *pip; 5316 mdi_pathinfo_t *next; 5317 dev_info_t *cdip; 5318 5319 if (!MDI_PHCI(dip)) 5320 return; 5321 5322 ph = i_devi_get_phci(dip); 5323 if (ph == NULL) { 5324 return; 5325 } 5326 5327 MDI_PHCI_LOCK(ph); 5328 5329 if (MDI_PHCI_IS_OFFLINE(ph)) { 5330 /* has no last path */ 5331 MDI_PHCI_UNLOCK(ph); 5332 return; 5333 } 5334 5335 pip = ph->ph_path_head; 5336 while (pip != NULL) { 5337 MDI_PI_LOCK(pip); 5338 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5339 5340 ct = MDI_PI(pip)->pi_client; 5341 i_mdi_client_lock(ct, pip); 5342 MDI_PI_UNLOCK(pip); 5343 5344 cdip = ct->ct_dip; 5345 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5346 (i_mdi_client_compute_state(ct, ph) == 5347 MDI_CLIENT_STATE_FAILED)) { 5348 /* Last path. Mark client dip as retiring */ 5349 i_mdi_client_unlock(ct); 5350 MDI_PHCI_UNLOCK(ph); 5351 (void) e_ddi_mark_retiring(cdip, cons_array); 5352 MDI_PHCI_LOCK(ph); 5353 pip = next; 5354 } else { 5355 i_mdi_client_unlock(ct); 5356 pip = next; 5357 } 5358 } 5359 5360 MDI_PHCI_UNLOCK(ph); 5361 5362 return; 5363 } 5364 5365 void 5366 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5367 { 5368 mdi_phci_t *ph; 5369 mdi_client_t *ct; 5370 mdi_pathinfo_t *pip; 5371 mdi_pathinfo_t *next; 5372 dev_info_t *cdip; 5373 5374 if (!MDI_PHCI(dip)) 5375 return; 5376 5377 ph = i_devi_get_phci(dip); 5378 if (ph == NULL) 5379 return; 5380 5381 MDI_PHCI_LOCK(ph); 5382 5383 if (MDI_PHCI_IS_OFFLINE(ph)) { 5384 MDI_PHCI_UNLOCK(ph); 5385 /* not last path */ 5386 return; 5387 } 5388 5389 if (ph->ph_unstable) { 5390 MDI_PHCI_UNLOCK(ph); 5391 /* can't check for constraints */ 5392 *constraint = 0; 5393 return; 5394 } 5395 5396 pip = ph->ph_path_head; 5397 while (pip != NULL) { 5398 MDI_PI_LOCK(pip); 5399 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5400 5401 /* 5402 * The mdi_pathinfo state is OK. Check the client state. 5403 * If failover in progress fail the pHCI from offlining 5404 */ 5405 ct = MDI_PI(pip)->pi_client; 5406 i_mdi_client_lock(ct, pip); 5407 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5408 (ct->ct_unstable)) { 5409 /* 5410 * Failover is in progress, can't check for constraints 5411 */ 5412 MDI_PI_UNLOCK(pip); 5413 i_mdi_client_unlock(ct); 5414 MDI_PHCI_UNLOCK(ph); 5415 *constraint = 0; 5416 return; 5417 } 5418 MDI_PI_UNLOCK(pip); 5419 5420 /* 5421 * Check to see of we are retiring the last path of this 5422 * client device... 5423 */ 5424 cdip = ct->ct_dip; 5425 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5426 (i_mdi_client_compute_state(ct, ph) == 5427 MDI_CLIENT_STATE_FAILED)) { 5428 i_mdi_client_unlock(ct); 5429 MDI_PHCI_UNLOCK(ph); 5430 (void) e_ddi_retire_notify(cdip, constraint); 5431 MDI_PHCI_LOCK(ph); 5432 pip = next; 5433 } else { 5434 i_mdi_client_unlock(ct); 5435 pip = next; 5436 } 5437 } 5438 5439 MDI_PHCI_UNLOCK(ph); 5440 5441 return; 5442 } 5443 5444 /* 5445 * offline the path(s) hanging off the pHCI. If the 5446 * last path to any client, check that constraints 5447 * have been applied. 5448 * 5449 * If constraint is 0, we aren't going to retire the 5450 * pHCI. However we still need to go through the paths 5451 * calling e_ddi_retire_finalize() to clear their 5452 * contract barriers. 5453 */ 5454 void 5455 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint) 5456 { 5457 mdi_phci_t *ph; 5458 mdi_client_t *ct; 5459 mdi_pathinfo_t *pip; 5460 mdi_pathinfo_t *next; 5461 dev_info_t *cdip; 5462 int unstable = 0; 5463 int tmp_constraint; 5464 5465 if (!MDI_PHCI(dip)) 5466 return; 5467 5468 ph = i_devi_get_phci(dip); 5469 if (ph == NULL) { 5470 /* no last path and no pips */ 5471 return; 5472 } 5473 5474 MDI_PHCI_LOCK(ph); 5475 5476 if (MDI_PHCI_IS_OFFLINE(ph)) { 5477 MDI_PHCI_UNLOCK(ph); 5478 /* no last path and no pips */ 5479 return; 5480 } 5481 5482 /* 5483 * Check to see if the pHCI can be offlined 5484 */ 5485 if (ph->ph_unstable) { 5486 unstable = 1; 5487 } 5488 5489 pip = ph->ph_path_head; 5490 while (pip != NULL) { 5491 MDI_PI_LOCK(pip); 5492 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5493 5494 /* 5495 * if failover in progress fail the pHCI from offlining 5496 */ 5497 ct = MDI_PI(pip)->pi_client; 5498 i_mdi_client_lock(ct, pip); 5499 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5500 (ct->ct_unstable)) { 5501 unstable = 1; 5502 } 5503 MDI_PI_UNLOCK(pip); 5504 5505 /* 5506 * Check to see of we are removing the last path of this 5507 * client device... 5508 */ 5509 cdip = ct->ct_dip; 5510 if (!phci_only && cdip && 5511 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5512 (i_mdi_client_compute_state(ct, ph) == 5513 MDI_CLIENT_STATE_FAILED)) { 5514 i_mdi_client_unlock(ct); 5515 MDI_PHCI_UNLOCK(ph); 5516 /* 5517 * This is the last path to this client. 5518 * 5519 * Constraint will only be set to 1 if this client can 5520 * be retired (as already determined by 5521 * mdi_phci_retire_notify). However we don't actually 5522 * need to retire the client (we just retire the last 5523 * path - MPXIO will then fail all I/Os to the client). 5524 * But we still need to call e_ddi_retire_finalize so 5525 * the contract barriers can be cleared. Therefore we 5526 * temporarily set constraint = 0 so that the client 5527 * dip is not retired. 5528 */ 5529 tmp_constraint = 0; 5530 (void) e_ddi_retire_finalize(cdip, &tmp_constraint); 5531 MDI_PHCI_LOCK(ph); 5532 pip = next; 5533 } else { 5534 i_mdi_client_unlock(ct); 5535 pip = next; 5536 } 5537 } 5538 5539 if (!phci_only && *((int *)constraint) == 0) { 5540 MDI_PHCI_UNLOCK(ph); 5541 return; 5542 } 5543 5544 /* 5545 * Cannot offline pip(s) 5546 */ 5547 if (unstable) { 5548 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5549 "pHCI in transient state, cannot retire", 5550 ddi_driver_name(dip), ddi_get_instance(dip)); 5551 MDI_PHCI_UNLOCK(ph); 5552 return; 5553 } 5554 5555 /* 5556 * Mark the pHCI as offline 5557 */ 5558 MDI_PHCI_SET_OFFLINE(ph); 5559 5560 /* 5561 * Mark the child mdi_pathinfo nodes as transient 5562 */ 5563 pip = ph->ph_path_head; 5564 while (pip != NULL) { 5565 MDI_PI_LOCK(pip); 5566 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5567 MDI_PI_SET_OFFLINING(pip); 5568 MDI_PI_UNLOCK(pip); 5569 pip = next; 5570 } 5571 MDI_PHCI_UNLOCK(ph); 5572 /* 5573 * Give a chance for any pending commands to execute 5574 */ 5575 delay_random(mdi_delay); 5576 MDI_PHCI_LOCK(ph); 5577 pip = ph->ph_path_head; 5578 while (pip != NULL) { 5579 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5580 (void) i_mdi_pi_offline(pip, 0); 5581 MDI_PI_LOCK(pip); 5582 ct = MDI_PI(pip)->pi_client; 5583 if (!MDI_PI_IS_OFFLINE(pip)) { 5584 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5585 "path %d %s busy, cannot offline", 5586 mdi_pi_get_path_instance(pip), 5587 mdi_pi_spathname(pip)); 5588 MDI_PI_UNLOCK(pip); 5589 MDI_PHCI_SET_ONLINE(ph); 5590 MDI_PHCI_UNLOCK(ph); 5591 return; 5592 } 5593 MDI_PI_UNLOCK(pip); 5594 pip = next; 5595 } 5596 MDI_PHCI_UNLOCK(ph); 5597 5598 return; 5599 } 5600 5601 void 5602 mdi_phci_unretire(dev_info_t *dip) 5603 { 5604 mdi_phci_t *ph; 5605 mdi_pathinfo_t *pip; 5606 mdi_pathinfo_t *next; 5607 5608 ASSERT(MDI_PHCI(dip)); 5609 5610 /* 5611 * Online the phci 5612 */ 5613 i_mdi_phci_online(dip); 5614 5615 ph = i_devi_get_phci(dip); 5616 MDI_PHCI_LOCK(ph); 5617 pip = ph->ph_path_head; 5618 while (pip != NULL) { 5619 MDI_PI_LOCK(pip); 5620 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5621 MDI_PI_UNLOCK(pip); 5622 (void) i_mdi_pi_online(pip, 0); 5623 pip = next; 5624 } 5625 MDI_PHCI_UNLOCK(ph); 5626 } 5627 5628 /*ARGSUSED*/ 5629 static int 5630 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5631 { 5632 int rv = NDI_SUCCESS; 5633 mdi_client_t *ct; 5634 5635 /* 5636 * Client component to go offline. Make sure that we are 5637 * not in failing over state and update client state 5638 * accordingly 5639 */ 5640 ct = i_devi_get_client(dip); 5641 MDI_DEBUG(2, (MDI_NOTE, dip, 5642 "called %p %p", (void *)dip, (void *)ct)); 5643 if (ct != NULL) { 5644 MDI_CLIENT_LOCK(ct); 5645 if (ct->ct_unstable) { 5646 /* 5647 * One or more paths are in transient state, 5648 * Dont allow offline of a client device 5649 */ 5650 MDI_DEBUG(1, (MDI_WARN, dip, 5651 "!One or more paths to " 5652 "this device are in transient state. " 5653 "This device can not be removed at this moment. " 5654 "Please try again later.")); 5655 MDI_CLIENT_UNLOCK(ct); 5656 return (NDI_BUSY); 5657 } 5658 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5659 /* 5660 * Failover is in progress, Dont allow DR of 5661 * a client device 5662 */ 5663 MDI_DEBUG(1, (MDI_WARN, dip, 5664 "!Client device is Busy. " 5665 "This device can not be removed at this moment. " 5666 "Please try again later.")); 5667 MDI_CLIENT_UNLOCK(ct); 5668 return (NDI_BUSY); 5669 } 5670 MDI_CLIENT_SET_OFFLINE(ct); 5671 5672 /* 5673 * Unbind our relationship with the dev_info node 5674 */ 5675 if (flags & NDI_DEVI_REMOVE) { 5676 ct->ct_dip = NULL; 5677 } 5678 MDI_CLIENT_UNLOCK(ct); 5679 } 5680 return (rv); 5681 } 5682 5683 /* 5684 * mdi_pre_attach(): 5685 * Pre attach() notification handler 5686 */ 5687 /*ARGSUSED*/ 5688 int 5689 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5690 { 5691 /* don't support old DDI_PM_RESUME */ 5692 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5693 (cmd == DDI_PM_RESUME)) 5694 return (DDI_FAILURE); 5695 5696 return (DDI_SUCCESS); 5697 } 5698 5699 /* 5700 * mdi_post_attach(): 5701 * Post attach() notification handler 5702 */ 5703 /*ARGSUSED*/ 5704 void 5705 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5706 { 5707 mdi_phci_t *ph; 5708 mdi_client_t *ct; 5709 mdi_vhci_t *vh; 5710 5711 if (MDI_PHCI(dip)) { 5712 ph = i_devi_get_phci(dip); 5713 ASSERT(ph != NULL); 5714 5715 MDI_PHCI_LOCK(ph); 5716 switch (cmd) { 5717 case DDI_ATTACH: 5718 MDI_DEBUG(2, (MDI_NOTE, dip, 5719 "phci post_attach called %p", (void *)ph)); 5720 if (error == DDI_SUCCESS) { 5721 MDI_PHCI_SET_ATTACH(ph); 5722 } else { 5723 MDI_DEBUG(1, (MDI_NOTE, dip, 5724 "!pHCI post_attach failed: error %d", 5725 error)); 5726 MDI_PHCI_SET_DETACH(ph); 5727 } 5728 break; 5729 5730 case DDI_RESUME: 5731 MDI_DEBUG(2, (MDI_NOTE, dip, 5732 "pHCI post_resume: called %p", (void *)ph)); 5733 if (error == DDI_SUCCESS) { 5734 MDI_PHCI_SET_RESUME(ph); 5735 } else { 5736 MDI_DEBUG(1, (MDI_NOTE, dip, 5737 "!pHCI post_resume failed: error %d", 5738 error)); 5739 MDI_PHCI_SET_SUSPEND(ph); 5740 } 5741 break; 5742 } 5743 MDI_PHCI_UNLOCK(ph); 5744 } 5745 5746 if (MDI_CLIENT(dip)) { 5747 ct = i_devi_get_client(dip); 5748 ASSERT(ct != NULL); 5749 5750 MDI_CLIENT_LOCK(ct); 5751 switch (cmd) { 5752 case DDI_ATTACH: 5753 MDI_DEBUG(2, (MDI_NOTE, dip, 5754 "client post_attach called %p", (void *)ct)); 5755 if (error != DDI_SUCCESS) { 5756 MDI_DEBUG(1, (MDI_NOTE, dip, 5757 "!client post_attach failed: error %d", 5758 error)); 5759 MDI_CLIENT_SET_DETACH(ct); 5760 MDI_DEBUG(4, (MDI_WARN, dip, 5761 "i_mdi_pm_reset_client")); 5762 i_mdi_pm_reset_client(ct); 5763 break; 5764 } 5765 5766 /* 5767 * Client device has successfully attached, inform 5768 * the vhci. 5769 */ 5770 vh = ct->ct_vhci; 5771 if (vh->vh_ops->vo_client_attached) 5772 (*vh->vh_ops->vo_client_attached)(dip); 5773 5774 MDI_CLIENT_SET_ATTACH(ct); 5775 break; 5776 5777 case DDI_RESUME: 5778 MDI_DEBUG(2, (MDI_NOTE, dip, 5779 "client post_attach: called %p", (void *)ct)); 5780 if (error == DDI_SUCCESS) { 5781 MDI_CLIENT_SET_RESUME(ct); 5782 } else { 5783 MDI_DEBUG(1, (MDI_NOTE, dip, 5784 "!client post_resume failed: error %d", 5785 error)); 5786 MDI_CLIENT_SET_SUSPEND(ct); 5787 } 5788 break; 5789 } 5790 MDI_CLIENT_UNLOCK(ct); 5791 } 5792 } 5793 5794 /* 5795 * mdi_pre_detach(): 5796 * Pre detach notification handler 5797 */ 5798 /*ARGSUSED*/ 5799 int 5800 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5801 { 5802 int rv = DDI_SUCCESS; 5803 5804 if (MDI_CLIENT(dip)) { 5805 (void) i_mdi_client_pre_detach(dip, cmd); 5806 } 5807 5808 if (MDI_PHCI(dip)) { 5809 rv = i_mdi_phci_pre_detach(dip, cmd); 5810 } 5811 5812 return (rv); 5813 } 5814 5815 /*ARGSUSED*/ 5816 static int 5817 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5818 { 5819 int rv = DDI_SUCCESS; 5820 mdi_phci_t *ph; 5821 mdi_client_t *ct; 5822 mdi_pathinfo_t *pip; 5823 mdi_pathinfo_t *failed_pip = NULL; 5824 mdi_pathinfo_t *next; 5825 5826 ph = i_devi_get_phci(dip); 5827 if (ph == NULL) { 5828 return (rv); 5829 } 5830 5831 MDI_PHCI_LOCK(ph); 5832 switch (cmd) { 5833 case DDI_DETACH: 5834 MDI_DEBUG(2, (MDI_NOTE, dip, 5835 "pHCI pre_detach: called %p", (void *)ph)); 5836 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5837 /* 5838 * mdi_pathinfo nodes are still attached to 5839 * this pHCI. Fail the detach for this pHCI. 5840 */ 5841 MDI_DEBUG(2, (MDI_WARN, dip, 5842 "pHCI pre_detach: paths are still attached %p", 5843 (void *)ph)); 5844 rv = DDI_FAILURE; 5845 break; 5846 } 5847 MDI_PHCI_SET_DETACH(ph); 5848 break; 5849 5850 case DDI_SUSPEND: 5851 /* 5852 * pHCI is getting suspended. Since mpxio client 5853 * devices may not be suspended at this point, to avoid 5854 * a potential stack overflow, it is important to suspend 5855 * client devices before pHCI can be suspended. 5856 */ 5857 5858 MDI_DEBUG(2, (MDI_NOTE, dip, 5859 "pHCI pre_suspend: called %p", (void *)ph)); 5860 /* 5861 * Suspend all the client devices accessible through this pHCI 5862 */ 5863 pip = ph->ph_path_head; 5864 while (pip != NULL && rv == DDI_SUCCESS) { 5865 dev_info_t *cdip; 5866 MDI_PI_LOCK(pip); 5867 next = 5868 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5869 ct = MDI_PI(pip)->pi_client; 5870 i_mdi_client_lock(ct, pip); 5871 cdip = ct->ct_dip; 5872 MDI_PI_UNLOCK(pip); 5873 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5874 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5875 i_mdi_client_unlock(ct); 5876 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5877 DDI_SUCCESS) { 5878 /* 5879 * Suspend of one of the client 5880 * device has failed. 5881 */ 5882 MDI_DEBUG(1, (MDI_WARN, dip, 5883 "!suspend of device (%s%d) failed.", 5884 ddi_driver_name(cdip), 5885 ddi_get_instance(cdip))); 5886 failed_pip = pip; 5887 break; 5888 } 5889 } else { 5890 i_mdi_client_unlock(ct); 5891 } 5892 pip = next; 5893 } 5894 5895 if (rv == DDI_SUCCESS) { 5896 /* 5897 * Suspend of client devices is complete. Proceed 5898 * with pHCI suspend. 5899 */ 5900 MDI_PHCI_SET_SUSPEND(ph); 5901 } else { 5902 /* 5903 * Revert back all the suspended client device states 5904 * to converse. 5905 */ 5906 pip = ph->ph_path_head; 5907 while (pip != failed_pip) { 5908 dev_info_t *cdip; 5909 MDI_PI_LOCK(pip); 5910 next = 5911 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5912 ct = MDI_PI(pip)->pi_client; 5913 i_mdi_client_lock(ct, pip); 5914 cdip = ct->ct_dip; 5915 MDI_PI_UNLOCK(pip); 5916 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5917 i_mdi_client_unlock(ct); 5918 (void) devi_attach(cdip, DDI_RESUME); 5919 } else { 5920 i_mdi_client_unlock(ct); 5921 } 5922 pip = next; 5923 } 5924 } 5925 break; 5926 5927 default: 5928 rv = DDI_FAILURE; 5929 break; 5930 } 5931 MDI_PHCI_UNLOCK(ph); 5932 return (rv); 5933 } 5934 5935 /*ARGSUSED*/ 5936 static int 5937 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5938 { 5939 int rv = DDI_SUCCESS; 5940 mdi_client_t *ct; 5941 5942 ct = i_devi_get_client(dip); 5943 if (ct == NULL) { 5944 return (rv); 5945 } 5946 5947 MDI_CLIENT_LOCK(ct); 5948 switch (cmd) { 5949 case DDI_DETACH: 5950 MDI_DEBUG(2, (MDI_NOTE, dip, 5951 "client pre_detach: called %p", 5952 (void *)ct)); 5953 MDI_CLIENT_SET_DETACH(ct); 5954 break; 5955 5956 case DDI_SUSPEND: 5957 MDI_DEBUG(2, (MDI_NOTE, dip, 5958 "client pre_suspend: called %p", 5959 (void *)ct)); 5960 MDI_CLIENT_SET_SUSPEND(ct); 5961 break; 5962 5963 default: 5964 rv = DDI_FAILURE; 5965 break; 5966 } 5967 MDI_CLIENT_UNLOCK(ct); 5968 return (rv); 5969 } 5970 5971 /* 5972 * mdi_post_detach(): 5973 * Post detach notification handler 5974 */ 5975 /*ARGSUSED*/ 5976 void 5977 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5978 { 5979 /* 5980 * Detach/Suspend of mpxio component failed. Update our state 5981 * too 5982 */ 5983 if (MDI_PHCI(dip)) 5984 i_mdi_phci_post_detach(dip, cmd, error); 5985 5986 if (MDI_CLIENT(dip)) 5987 i_mdi_client_post_detach(dip, cmd, error); 5988 } 5989 5990 /*ARGSUSED*/ 5991 static void 5992 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5993 { 5994 mdi_phci_t *ph; 5995 5996 /* 5997 * Detach/Suspend of phci component failed. Update our state 5998 * too 5999 */ 6000 ph = i_devi_get_phci(dip); 6001 if (ph == NULL) { 6002 return; 6003 } 6004 6005 MDI_PHCI_LOCK(ph); 6006 /* 6007 * Detach of pHCI failed. Restore back converse 6008 * state 6009 */ 6010 switch (cmd) { 6011 case DDI_DETACH: 6012 MDI_DEBUG(2, (MDI_NOTE, dip, 6013 "pHCI post_detach: called %p", 6014 (void *)ph)); 6015 if (error != DDI_SUCCESS) 6016 MDI_PHCI_SET_ATTACH(ph); 6017 break; 6018 6019 case DDI_SUSPEND: 6020 MDI_DEBUG(2, (MDI_NOTE, dip, 6021 "pHCI post_suspend: called %p", 6022 (void *)ph)); 6023 if (error != DDI_SUCCESS) 6024 MDI_PHCI_SET_RESUME(ph); 6025 break; 6026 } 6027 MDI_PHCI_UNLOCK(ph); 6028 } 6029 6030 /*ARGSUSED*/ 6031 static void 6032 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 6033 { 6034 mdi_client_t *ct; 6035 6036 ct = i_devi_get_client(dip); 6037 if (ct == NULL) { 6038 return; 6039 } 6040 MDI_CLIENT_LOCK(ct); 6041 /* 6042 * Detach of Client failed. Restore back converse 6043 * state 6044 */ 6045 switch (cmd) { 6046 case DDI_DETACH: 6047 MDI_DEBUG(2, (MDI_NOTE, dip, 6048 "client post_detach: called %p", (void *)ct)); 6049 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6050 MDI_DEBUG(4, (MDI_NOTE, dip, 6051 "i_mdi_pm_rele_client\n")); 6052 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6053 } else { 6054 MDI_DEBUG(4, (MDI_NOTE, dip, 6055 "i_mdi_pm_reset_client\n")); 6056 i_mdi_pm_reset_client(ct); 6057 } 6058 if (error != DDI_SUCCESS) 6059 MDI_CLIENT_SET_ATTACH(ct); 6060 break; 6061 6062 case DDI_SUSPEND: 6063 MDI_DEBUG(2, (MDI_NOTE, dip, 6064 "called %p", (void *)ct)); 6065 if (error != DDI_SUCCESS) 6066 MDI_CLIENT_SET_RESUME(ct); 6067 break; 6068 } 6069 MDI_CLIENT_UNLOCK(ct); 6070 } 6071 6072 int 6073 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 6074 { 6075 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 6076 } 6077 6078 /* 6079 * create and install per-path (client - pHCI) statistics 6080 * I/O stats supported: nread, nwritten, reads, and writes 6081 * Error stats - hard errors, soft errors, & transport errors 6082 */ 6083 int 6084 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 6085 { 6086 kstat_t *kiosp, *kerrsp; 6087 struct pi_errs *nsp; 6088 struct mdi_pi_kstats *mdi_statp; 6089 6090 if (MDI_PI(pip)->pi_kstats != NULL) 6091 return (MDI_SUCCESS); 6092 6093 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 6094 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 6095 return (MDI_FAILURE); 6096 } 6097 6098 (void) strcat(ksname, ",err"); 6099 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 6100 KSTAT_TYPE_NAMED, 6101 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 6102 if (kerrsp == NULL) { 6103 kstat_delete(kiosp); 6104 return (MDI_FAILURE); 6105 } 6106 6107 nsp = (struct pi_errs *)kerrsp->ks_data; 6108 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6109 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6110 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6111 KSTAT_DATA_UINT32); 6112 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6113 KSTAT_DATA_UINT32); 6114 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6115 KSTAT_DATA_UINT32); 6116 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6117 KSTAT_DATA_UINT32); 6118 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6119 KSTAT_DATA_UINT32); 6120 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6121 KSTAT_DATA_UINT32); 6122 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6123 KSTAT_DATA_UINT32); 6124 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6125 6126 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6127 mdi_statp->pi_kstat_ref = 1; 6128 mdi_statp->pi_kstat_iostats = kiosp; 6129 mdi_statp->pi_kstat_errstats = kerrsp; 6130 kstat_install(kiosp); 6131 kstat_install(kerrsp); 6132 MDI_PI(pip)->pi_kstats = mdi_statp; 6133 return (MDI_SUCCESS); 6134 } 6135 6136 /* 6137 * destroy per-path properties 6138 */ 6139 static void 6140 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6141 { 6142 6143 struct mdi_pi_kstats *mdi_statp; 6144 6145 if (MDI_PI(pip)->pi_kstats == NULL) 6146 return; 6147 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6148 return; 6149 6150 MDI_PI(pip)->pi_kstats = NULL; 6151 6152 /* 6153 * the kstat may be shared between multiple pathinfo nodes 6154 * decrement this pathinfo's usage, removing the kstats 6155 * themselves when the last pathinfo reference is removed. 6156 */ 6157 ASSERT(mdi_statp->pi_kstat_ref > 0); 6158 if (--mdi_statp->pi_kstat_ref != 0) 6159 return; 6160 6161 kstat_delete(mdi_statp->pi_kstat_iostats); 6162 kstat_delete(mdi_statp->pi_kstat_errstats); 6163 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6164 } 6165 6166 /* 6167 * update I/O paths KSTATS 6168 */ 6169 void 6170 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6171 { 6172 kstat_t *iostatp; 6173 size_t xfer_cnt; 6174 6175 ASSERT(pip != NULL); 6176 6177 /* 6178 * I/O can be driven across a path prior to having path 6179 * statistics available, i.e. probe(9e). 6180 */ 6181 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6182 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6183 xfer_cnt = bp->b_bcount - bp->b_resid; 6184 if (bp->b_flags & B_READ) { 6185 KSTAT_IO_PTR(iostatp)->reads++; 6186 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6187 } else { 6188 KSTAT_IO_PTR(iostatp)->writes++; 6189 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6190 } 6191 } 6192 } 6193 6194 /* 6195 * Enable the path(specific client/target/initiator) 6196 * Enabling a path means that MPxIO may select the enabled path for routing 6197 * future I/O requests, subject to other path state constraints. 6198 */ 6199 int 6200 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6201 { 6202 mdi_phci_t *ph; 6203 6204 ph = MDI_PI(pip)->pi_phci; 6205 if (ph == NULL) { 6206 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6207 "!failed: path %s %p: NULL ph", 6208 mdi_pi_spathname(pip), (void *)pip)); 6209 return (MDI_FAILURE); 6210 } 6211 6212 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6213 MDI_ENABLE_OP); 6214 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6215 "!returning success pip = %p. ph = %p", 6216 (void *)pip, (void *)ph)); 6217 return (MDI_SUCCESS); 6218 6219 } 6220 6221 /* 6222 * Disable the path (specific client/target/initiator) 6223 * Disabling a path means that MPxIO will not select the disabled path for 6224 * routing any new I/O requests. 6225 */ 6226 int 6227 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6228 { 6229 mdi_phci_t *ph; 6230 6231 ph = MDI_PI(pip)->pi_phci; 6232 if (ph == NULL) { 6233 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6234 "!failed: path %s %p: NULL ph", 6235 mdi_pi_spathname(pip), (void *)pip)); 6236 return (MDI_FAILURE); 6237 } 6238 6239 (void) i_mdi_enable_disable_path(pip, 6240 ph->ph_vhci, flags, MDI_DISABLE_OP); 6241 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6242 "!returning success pip = %p. ph = %p", 6243 (void *)pip, (void *)ph)); 6244 return (MDI_SUCCESS); 6245 } 6246 6247 /* 6248 * disable the path to a particular pHCI (pHCI specified in the phci_path 6249 * argument) for a particular client (specified in the client_path argument). 6250 * Disabling a path means that MPxIO will not select the disabled path for 6251 * routing any new I/O requests. 6252 * NOTE: this will be removed once the NWS files are changed to use the new 6253 * mdi_{enable,disable}_path interfaces 6254 */ 6255 int 6256 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6257 { 6258 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6259 } 6260 6261 /* 6262 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6263 * argument) for a particular client (specified in the client_path argument). 6264 * Enabling a path means that MPxIO may select the enabled path for routing 6265 * future I/O requests, subject to other path state constraints. 6266 * NOTE: this will be removed once the NWS files are changed to use the new 6267 * mdi_{enable,disable}_path interfaces 6268 */ 6269 6270 int 6271 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6272 { 6273 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6274 } 6275 6276 /* 6277 * Common routine for doing enable/disable. 6278 */ 6279 static mdi_pathinfo_t * 6280 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6281 int op) 6282 { 6283 int sync_flag = 0; 6284 int rv; 6285 mdi_pathinfo_t *next; 6286 int (*f)() = NULL; 6287 6288 /* 6289 * Check to make sure the path is not already in the 6290 * requested state. If it is just return the next path 6291 * as we have nothing to do here. 6292 */ 6293 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6294 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6295 MDI_PI_LOCK(pip); 6296 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6297 MDI_PI_UNLOCK(pip); 6298 return (next); 6299 } 6300 6301 f = vh->vh_ops->vo_pi_state_change; 6302 6303 sync_flag = (flags << 8) & 0xf00; 6304 6305 /* 6306 * Do a callback into the mdi consumer to let it 6307 * know that path is about to get enabled/disabled. 6308 */ 6309 if (f != NULL) { 6310 rv = (*f)(vh->vh_dip, pip, 0, 6311 MDI_PI_EXT_STATE(pip), 6312 MDI_EXT_STATE_CHANGE | sync_flag | 6313 op | MDI_BEFORE_STATE_CHANGE); 6314 if (rv != MDI_SUCCESS) { 6315 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6316 "vo_pi_state_change: failed rv = %x", rv)); 6317 } 6318 } 6319 MDI_PI_LOCK(pip); 6320 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6321 6322 switch (flags) { 6323 case USER_DISABLE: 6324 if (op == MDI_DISABLE_OP) { 6325 MDI_PI_SET_USER_DISABLE(pip); 6326 } else { 6327 MDI_PI_SET_USER_ENABLE(pip); 6328 } 6329 break; 6330 case DRIVER_DISABLE: 6331 if (op == MDI_DISABLE_OP) { 6332 MDI_PI_SET_DRV_DISABLE(pip); 6333 } else { 6334 MDI_PI_SET_DRV_ENABLE(pip); 6335 } 6336 break; 6337 case DRIVER_DISABLE_TRANSIENT: 6338 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6339 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6340 } else { 6341 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6342 } 6343 break; 6344 } 6345 MDI_PI_UNLOCK(pip); 6346 /* 6347 * Do a callback into the mdi consumer to let it 6348 * know that path is now enabled/disabled. 6349 */ 6350 if (f != NULL) { 6351 rv = (*f)(vh->vh_dip, pip, 0, 6352 MDI_PI_EXT_STATE(pip), 6353 MDI_EXT_STATE_CHANGE | sync_flag | 6354 op | MDI_AFTER_STATE_CHANGE); 6355 if (rv != MDI_SUCCESS) { 6356 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6357 "vo_pi_state_change failed: rv = %x", rv)); 6358 } 6359 } 6360 return (next); 6361 } 6362 6363 /* 6364 * Common routine for doing enable/disable. 6365 * NOTE: this will be removed once the NWS files are changed to use the new 6366 * mdi_{enable,disable}_path has been putback 6367 */ 6368 int 6369 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6370 { 6371 6372 mdi_phci_t *ph; 6373 mdi_vhci_t *vh = NULL; 6374 mdi_client_t *ct; 6375 mdi_pathinfo_t *next, *pip; 6376 int found_it; 6377 6378 ph = i_devi_get_phci(pdip); 6379 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6380 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6381 (void *)cdip)); 6382 if (ph == NULL) { 6383 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6384 "!failed: operation %d: NULL ph", op)); 6385 return (MDI_FAILURE); 6386 } 6387 6388 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6389 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6390 "!failed: invalid operation %d", op)); 6391 return (MDI_FAILURE); 6392 } 6393 6394 vh = ph->ph_vhci; 6395 6396 if (cdip == NULL) { 6397 /* 6398 * Need to mark the Phci as enabled/disabled. 6399 */ 6400 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6401 "op %d for the phci", op)); 6402 MDI_PHCI_LOCK(ph); 6403 switch (flags) { 6404 case USER_DISABLE: 6405 if (op == MDI_DISABLE_OP) { 6406 MDI_PHCI_SET_USER_DISABLE(ph); 6407 } else { 6408 MDI_PHCI_SET_USER_ENABLE(ph); 6409 } 6410 break; 6411 case DRIVER_DISABLE: 6412 if (op == MDI_DISABLE_OP) { 6413 MDI_PHCI_SET_DRV_DISABLE(ph); 6414 } else { 6415 MDI_PHCI_SET_DRV_ENABLE(ph); 6416 } 6417 break; 6418 case DRIVER_DISABLE_TRANSIENT: 6419 if (op == MDI_DISABLE_OP) { 6420 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6421 } else { 6422 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6423 } 6424 break; 6425 default: 6426 MDI_PHCI_UNLOCK(ph); 6427 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6428 "!invalid flag argument= %d", flags)); 6429 } 6430 6431 /* 6432 * Phci has been disabled. Now try to enable/disable 6433 * path info's to each client. 6434 */ 6435 pip = ph->ph_path_head; 6436 while (pip != NULL) { 6437 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6438 } 6439 MDI_PHCI_UNLOCK(ph); 6440 } else { 6441 6442 /* 6443 * Disable a specific client. 6444 */ 6445 ct = i_devi_get_client(cdip); 6446 if (ct == NULL) { 6447 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6448 "!failed: operation = %d: NULL ct", op)); 6449 return (MDI_FAILURE); 6450 } 6451 6452 MDI_CLIENT_LOCK(ct); 6453 pip = ct->ct_path_head; 6454 found_it = 0; 6455 while (pip != NULL) { 6456 MDI_PI_LOCK(pip); 6457 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6458 if (MDI_PI(pip)->pi_phci == ph) { 6459 MDI_PI_UNLOCK(pip); 6460 found_it = 1; 6461 break; 6462 } 6463 MDI_PI_UNLOCK(pip); 6464 pip = next; 6465 } 6466 6467 6468 MDI_CLIENT_UNLOCK(ct); 6469 if (found_it == 0) { 6470 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6471 "!failed. Could not find corresponding pip\n")); 6472 return (MDI_FAILURE); 6473 } 6474 6475 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6476 } 6477 6478 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6479 "!op %d returning success pdip = %p cdip = %p", 6480 op, (void *)pdip, (void *)cdip)); 6481 return (MDI_SUCCESS); 6482 } 6483 6484 /* 6485 * Ensure phci powered up 6486 */ 6487 static void 6488 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6489 { 6490 dev_info_t *ph_dip; 6491 6492 ASSERT(pip != NULL); 6493 ASSERT(MDI_PI_LOCKED(pip)); 6494 6495 if (MDI_PI(pip)->pi_pm_held) { 6496 return; 6497 } 6498 6499 ph_dip = mdi_pi_get_phci(pip); 6500 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6501 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6502 if (ph_dip == NULL) { 6503 return; 6504 } 6505 6506 MDI_PI_UNLOCK(pip); 6507 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6508 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6509 pm_hold_power(ph_dip); 6510 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6511 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6512 MDI_PI_LOCK(pip); 6513 6514 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6515 if (DEVI(ph_dip)->devi_pm_info) 6516 MDI_PI(pip)->pi_pm_held = 1; 6517 } 6518 6519 /* 6520 * Allow phci powered down 6521 */ 6522 static void 6523 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6524 { 6525 dev_info_t *ph_dip = NULL; 6526 6527 ASSERT(pip != NULL); 6528 ASSERT(MDI_PI_LOCKED(pip)); 6529 6530 if (MDI_PI(pip)->pi_pm_held == 0) { 6531 return; 6532 } 6533 6534 ph_dip = mdi_pi_get_phci(pip); 6535 ASSERT(ph_dip != NULL); 6536 6537 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6538 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6539 6540 MDI_PI_UNLOCK(pip); 6541 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6542 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6543 pm_rele_power(ph_dip); 6544 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6545 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6546 MDI_PI_LOCK(pip); 6547 6548 MDI_PI(pip)->pi_pm_held = 0; 6549 } 6550 6551 static void 6552 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6553 { 6554 ASSERT(MDI_CLIENT_LOCKED(ct)); 6555 6556 ct->ct_power_cnt += incr; 6557 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6558 "%p ct_power_cnt = %d incr = %d", 6559 (void *)ct, ct->ct_power_cnt, incr)); 6560 ASSERT(ct->ct_power_cnt >= 0); 6561 } 6562 6563 static void 6564 i_mdi_rele_all_phci(mdi_client_t *ct) 6565 { 6566 mdi_pathinfo_t *pip; 6567 6568 ASSERT(MDI_CLIENT_LOCKED(ct)); 6569 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6570 while (pip != NULL) { 6571 mdi_hold_path(pip); 6572 MDI_PI_LOCK(pip); 6573 i_mdi_pm_rele_pip(pip); 6574 MDI_PI_UNLOCK(pip); 6575 mdi_rele_path(pip); 6576 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6577 } 6578 } 6579 6580 static void 6581 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6582 { 6583 ASSERT(MDI_CLIENT_LOCKED(ct)); 6584 6585 if (i_ddi_devi_attached(ct->ct_dip)) { 6586 ct->ct_power_cnt -= decr; 6587 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6588 "%p ct_power_cnt = %d decr = %d", 6589 (void *)ct, ct->ct_power_cnt, decr)); 6590 } 6591 6592 ASSERT(ct->ct_power_cnt >= 0); 6593 if (ct->ct_power_cnt == 0) { 6594 i_mdi_rele_all_phci(ct); 6595 return; 6596 } 6597 } 6598 6599 static void 6600 i_mdi_pm_reset_client(mdi_client_t *ct) 6601 { 6602 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6603 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6604 ASSERT(MDI_CLIENT_LOCKED(ct)); 6605 ct->ct_power_cnt = 0; 6606 i_mdi_rele_all_phci(ct); 6607 ct->ct_powercnt_config = 0; 6608 ct->ct_powercnt_unconfig = 0; 6609 ct->ct_powercnt_reset = 1; 6610 } 6611 6612 static int 6613 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6614 { 6615 int ret; 6616 dev_info_t *ph_dip; 6617 6618 MDI_PI_LOCK(pip); 6619 i_mdi_pm_hold_pip(pip); 6620 6621 ph_dip = mdi_pi_get_phci(pip); 6622 MDI_PI_UNLOCK(pip); 6623 6624 /* bring all components of phci to full power */ 6625 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6626 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6627 ddi_get_instance(ph_dip), (void *)pip)); 6628 6629 ret = pm_powerup(ph_dip); 6630 6631 if (ret == DDI_FAILURE) { 6632 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6633 "pm_powerup FAILED for %s%d %p", 6634 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6635 (void *)pip)); 6636 6637 MDI_PI_LOCK(pip); 6638 i_mdi_pm_rele_pip(pip); 6639 MDI_PI_UNLOCK(pip); 6640 return (MDI_FAILURE); 6641 } 6642 6643 return (MDI_SUCCESS); 6644 } 6645 6646 static int 6647 i_mdi_power_all_phci(mdi_client_t *ct) 6648 { 6649 mdi_pathinfo_t *pip; 6650 int succeeded = 0; 6651 6652 ASSERT(MDI_CLIENT_LOCKED(ct)); 6653 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6654 while (pip != NULL) { 6655 /* 6656 * Don't power if MDI_PATHINFO_STATE_FAULT 6657 * or MDI_PATHINFO_STATE_OFFLINE. 6658 */ 6659 if (MDI_PI_IS_INIT(pip) || 6660 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6661 mdi_hold_path(pip); 6662 MDI_CLIENT_UNLOCK(ct); 6663 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6664 succeeded = 1; 6665 6666 ASSERT(ct == MDI_PI(pip)->pi_client); 6667 MDI_CLIENT_LOCK(ct); 6668 mdi_rele_path(pip); 6669 } 6670 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6671 } 6672 6673 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6674 } 6675 6676 /* 6677 * mdi_bus_power(): 6678 * 1. Place the phci(s) into powered up state so that 6679 * client can do power management 6680 * 2. Ensure phci powered up as client power managing 6681 * Return Values: 6682 * MDI_SUCCESS 6683 * MDI_FAILURE 6684 */ 6685 int 6686 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6687 void *arg, void *result) 6688 { 6689 int ret = MDI_SUCCESS; 6690 pm_bp_child_pwrchg_t *bpc; 6691 mdi_client_t *ct; 6692 dev_info_t *cdip; 6693 pm_bp_has_changed_t *bphc; 6694 6695 /* 6696 * BUS_POWER_NOINVOL not supported 6697 */ 6698 if (op == BUS_POWER_NOINVOL) 6699 return (MDI_FAILURE); 6700 6701 /* 6702 * ignore other OPs. 6703 * return quickly to save cou cycles on the ct processing 6704 */ 6705 switch (op) { 6706 case BUS_POWER_PRE_NOTIFICATION: 6707 case BUS_POWER_POST_NOTIFICATION: 6708 bpc = (pm_bp_child_pwrchg_t *)arg; 6709 cdip = bpc->bpc_dip; 6710 break; 6711 case BUS_POWER_HAS_CHANGED: 6712 bphc = (pm_bp_has_changed_t *)arg; 6713 cdip = bphc->bphc_dip; 6714 break; 6715 default: 6716 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6717 } 6718 6719 ASSERT(MDI_CLIENT(cdip)); 6720 6721 ct = i_devi_get_client(cdip); 6722 if (ct == NULL) 6723 return (MDI_FAILURE); 6724 6725 /* 6726 * wait till the mdi_pathinfo node state change are processed 6727 */ 6728 MDI_CLIENT_LOCK(ct); 6729 switch (op) { 6730 case BUS_POWER_PRE_NOTIFICATION: 6731 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6732 "BUS_POWER_PRE_NOTIFICATION:" 6733 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6734 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6735 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6736 6737 /* serialize power level change per client */ 6738 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6739 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6740 6741 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6742 6743 if (ct->ct_power_cnt == 0) { 6744 ret = i_mdi_power_all_phci(ct); 6745 } 6746 6747 /* 6748 * if new_level > 0: 6749 * - hold phci(s) 6750 * - power up phci(s) if not already 6751 * ignore power down 6752 */ 6753 if (bpc->bpc_nlevel > 0) { 6754 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6755 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6756 "i_mdi_pm_hold_client\n")); 6757 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6758 } 6759 } 6760 break; 6761 case BUS_POWER_POST_NOTIFICATION: 6762 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6763 "BUS_POWER_POST_NOTIFICATION:" 6764 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6765 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6766 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6767 *(int *)result)); 6768 6769 if (*(int *)result == DDI_SUCCESS) { 6770 if (bpc->bpc_nlevel > 0) { 6771 MDI_CLIENT_SET_POWER_UP(ct); 6772 } else { 6773 MDI_CLIENT_SET_POWER_DOWN(ct); 6774 } 6775 } 6776 6777 /* release the hold we did in pre-notification */ 6778 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6779 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6780 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6781 "i_mdi_pm_rele_client\n")); 6782 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6783 } 6784 6785 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6786 /* another thread might started attaching */ 6787 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6788 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6789 "i_mdi_pm_rele_client\n")); 6790 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6791 /* detaching has been taken care in pm_post_unconfig */ 6792 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6793 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6794 "i_mdi_pm_reset_client\n")); 6795 i_mdi_pm_reset_client(ct); 6796 } 6797 } 6798 6799 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6800 cv_broadcast(&ct->ct_powerchange_cv); 6801 6802 break; 6803 6804 /* need to do more */ 6805 case BUS_POWER_HAS_CHANGED: 6806 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6807 "BUS_POWER_HAS_CHANGED:" 6808 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6809 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6810 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6811 6812 if (bphc->bphc_nlevel > 0 && 6813 bphc->bphc_nlevel > bphc->bphc_olevel) { 6814 if (ct->ct_power_cnt == 0) { 6815 ret = i_mdi_power_all_phci(ct); 6816 } 6817 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6818 "i_mdi_pm_hold_client\n")); 6819 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6820 } 6821 6822 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6823 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6824 "i_mdi_pm_rele_client\n")); 6825 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6826 } 6827 break; 6828 } 6829 6830 MDI_CLIENT_UNLOCK(ct); 6831 return (ret); 6832 } 6833 6834 static int 6835 i_mdi_pm_pre_config_one(dev_info_t *child) 6836 { 6837 int ret = MDI_SUCCESS; 6838 mdi_client_t *ct; 6839 6840 ct = i_devi_get_client(child); 6841 if (ct == NULL) 6842 return (MDI_FAILURE); 6843 6844 MDI_CLIENT_LOCK(ct); 6845 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6846 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6847 6848 if (!MDI_CLIENT_IS_FAILED(ct)) { 6849 MDI_CLIENT_UNLOCK(ct); 6850 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6851 return (MDI_SUCCESS); 6852 } 6853 6854 if (ct->ct_powercnt_config) { 6855 MDI_CLIENT_UNLOCK(ct); 6856 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6857 return (MDI_SUCCESS); 6858 } 6859 6860 if (ct->ct_power_cnt == 0) { 6861 ret = i_mdi_power_all_phci(ct); 6862 } 6863 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6864 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6865 ct->ct_powercnt_config = 1; 6866 ct->ct_powercnt_reset = 0; 6867 MDI_CLIENT_UNLOCK(ct); 6868 return (ret); 6869 } 6870 6871 static int 6872 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6873 { 6874 int ret = MDI_SUCCESS; 6875 dev_info_t *cdip; 6876 int circ; 6877 6878 ASSERT(MDI_VHCI(vdip)); 6879 6880 /* ndi_devi_config_one */ 6881 if (child) { 6882 ASSERT(DEVI_BUSY_OWNED(vdip)); 6883 return (i_mdi_pm_pre_config_one(child)); 6884 } 6885 6886 /* devi_config_common */ 6887 ndi_devi_enter(vdip, &circ); 6888 cdip = ddi_get_child(vdip); 6889 while (cdip) { 6890 dev_info_t *next = ddi_get_next_sibling(cdip); 6891 6892 ret = i_mdi_pm_pre_config_one(cdip); 6893 if (ret != MDI_SUCCESS) 6894 break; 6895 cdip = next; 6896 } 6897 ndi_devi_exit(vdip, circ); 6898 return (ret); 6899 } 6900 6901 static int 6902 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6903 { 6904 int ret = MDI_SUCCESS; 6905 mdi_client_t *ct; 6906 6907 ct = i_devi_get_client(child); 6908 if (ct == NULL) 6909 return (MDI_FAILURE); 6910 6911 MDI_CLIENT_LOCK(ct); 6912 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6913 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6914 6915 if (!i_ddi_devi_attached(ct->ct_dip)) { 6916 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6917 MDI_CLIENT_UNLOCK(ct); 6918 return (MDI_SUCCESS); 6919 } 6920 6921 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6922 (flags & NDI_AUTODETACH)) { 6923 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6924 MDI_CLIENT_UNLOCK(ct); 6925 return (MDI_FAILURE); 6926 } 6927 6928 if (ct->ct_powercnt_unconfig) { 6929 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6930 MDI_CLIENT_UNLOCK(ct); 6931 *held = 1; 6932 return (MDI_SUCCESS); 6933 } 6934 6935 if (ct->ct_power_cnt == 0) { 6936 ret = i_mdi_power_all_phci(ct); 6937 } 6938 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6939 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6940 ct->ct_powercnt_unconfig = 1; 6941 ct->ct_powercnt_reset = 0; 6942 MDI_CLIENT_UNLOCK(ct); 6943 if (ret == MDI_SUCCESS) 6944 *held = 1; 6945 return (ret); 6946 } 6947 6948 static int 6949 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6950 int flags) 6951 { 6952 int ret = MDI_SUCCESS; 6953 dev_info_t *cdip; 6954 int circ; 6955 6956 ASSERT(MDI_VHCI(vdip)); 6957 *held = 0; 6958 6959 /* ndi_devi_unconfig_one */ 6960 if (child) { 6961 ASSERT(DEVI_BUSY_OWNED(vdip)); 6962 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6963 } 6964 6965 /* devi_unconfig_common */ 6966 ndi_devi_enter(vdip, &circ); 6967 cdip = ddi_get_child(vdip); 6968 while (cdip) { 6969 dev_info_t *next = ddi_get_next_sibling(cdip); 6970 6971 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6972 cdip = next; 6973 } 6974 ndi_devi_exit(vdip, circ); 6975 6976 if (*held) 6977 ret = MDI_SUCCESS; 6978 6979 return (ret); 6980 } 6981 6982 static void 6983 i_mdi_pm_post_config_one(dev_info_t *child) 6984 { 6985 mdi_client_t *ct; 6986 6987 ct = i_devi_get_client(child); 6988 if (ct == NULL) 6989 return; 6990 6991 MDI_CLIENT_LOCK(ct); 6992 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6993 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6994 6995 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6996 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6997 MDI_CLIENT_UNLOCK(ct); 6998 return; 6999 } 7000 7001 /* client has not been updated */ 7002 if (MDI_CLIENT_IS_FAILED(ct)) { 7003 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 7004 MDI_CLIENT_UNLOCK(ct); 7005 return; 7006 } 7007 7008 /* another thread might have powered it down or detached it */ 7009 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7010 !DEVI_IS_ATTACHING(ct->ct_dip)) || 7011 (!i_ddi_devi_attached(ct->ct_dip) && 7012 !DEVI_IS_ATTACHING(ct->ct_dip))) { 7013 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7014 i_mdi_pm_reset_client(ct); 7015 } else { 7016 mdi_pathinfo_t *pip, *next; 7017 int valid_path_count = 0; 7018 7019 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7020 pip = ct->ct_path_head; 7021 while (pip != NULL) { 7022 MDI_PI_LOCK(pip); 7023 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7024 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7025 valid_path_count ++; 7026 MDI_PI_UNLOCK(pip); 7027 pip = next; 7028 } 7029 i_mdi_pm_rele_client(ct, valid_path_count); 7030 } 7031 ct->ct_powercnt_config = 0; 7032 MDI_CLIENT_UNLOCK(ct); 7033 } 7034 7035 static void 7036 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 7037 { 7038 int circ; 7039 dev_info_t *cdip; 7040 7041 ASSERT(MDI_VHCI(vdip)); 7042 7043 /* ndi_devi_config_one */ 7044 if (child) { 7045 ASSERT(DEVI_BUSY_OWNED(vdip)); 7046 i_mdi_pm_post_config_one(child); 7047 return; 7048 } 7049 7050 /* devi_config_common */ 7051 ndi_devi_enter(vdip, &circ); 7052 cdip = ddi_get_child(vdip); 7053 while (cdip) { 7054 dev_info_t *next = ddi_get_next_sibling(cdip); 7055 7056 i_mdi_pm_post_config_one(cdip); 7057 cdip = next; 7058 } 7059 ndi_devi_exit(vdip, circ); 7060 } 7061 7062 static void 7063 i_mdi_pm_post_unconfig_one(dev_info_t *child) 7064 { 7065 mdi_client_t *ct; 7066 7067 ct = i_devi_get_client(child); 7068 if (ct == NULL) 7069 return; 7070 7071 MDI_CLIENT_LOCK(ct); 7072 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 7073 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 7074 7075 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 7076 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 7077 MDI_CLIENT_UNLOCK(ct); 7078 return; 7079 } 7080 7081 /* failure detaching or another thread just attached it */ 7082 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7083 i_ddi_devi_attached(ct->ct_dip)) || 7084 (!i_ddi_devi_attached(ct->ct_dip) && 7085 !DEVI_IS_ATTACHING(ct->ct_dip))) { 7086 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7087 i_mdi_pm_reset_client(ct); 7088 } else { 7089 mdi_pathinfo_t *pip, *next; 7090 int valid_path_count = 0; 7091 7092 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7093 pip = ct->ct_path_head; 7094 while (pip != NULL) { 7095 MDI_PI_LOCK(pip); 7096 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7097 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7098 valid_path_count ++; 7099 MDI_PI_UNLOCK(pip); 7100 pip = next; 7101 } 7102 i_mdi_pm_rele_client(ct, valid_path_count); 7103 ct->ct_powercnt_unconfig = 0; 7104 } 7105 7106 MDI_CLIENT_UNLOCK(ct); 7107 } 7108 7109 static void 7110 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7111 { 7112 int circ; 7113 dev_info_t *cdip; 7114 7115 ASSERT(MDI_VHCI(vdip)); 7116 7117 if (!held) { 7118 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7119 return; 7120 } 7121 7122 if (child) { 7123 ASSERT(DEVI_BUSY_OWNED(vdip)); 7124 i_mdi_pm_post_unconfig_one(child); 7125 return; 7126 } 7127 7128 ndi_devi_enter(vdip, &circ); 7129 cdip = ddi_get_child(vdip); 7130 while (cdip) { 7131 dev_info_t *next = ddi_get_next_sibling(cdip); 7132 7133 i_mdi_pm_post_unconfig_one(cdip); 7134 cdip = next; 7135 } 7136 ndi_devi_exit(vdip, circ); 7137 } 7138 7139 int 7140 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7141 { 7142 int circ, ret = MDI_SUCCESS; 7143 dev_info_t *client_dip = NULL; 7144 mdi_client_t *ct; 7145 7146 /* 7147 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7148 * Power up pHCI for the named client device. 7149 * Note: Before the client is enumerated under vhci by phci, 7150 * client_dip can be NULL. Then proceed to power up all the 7151 * pHCIs. 7152 */ 7153 if (devnm != NULL) { 7154 ndi_devi_enter(vdip, &circ); 7155 client_dip = ndi_devi_findchild(vdip, devnm); 7156 } 7157 7158 MDI_DEBUG(4, (MDI_NOTE, vdip, 7159 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7160 7161 switch (op) { 7162 case MDI_PM_PRE_CONFIG: 7163 ret = i_mdi_pm_pre_config(vdip, client_dip); 7164 break; 7165 7166 case MDI_PM_PRE_UNCONFIG: 7167 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7168 flags); 7169 break; 7170 7171 case MDI_PM_POST_CONFIG: 7172 i_mdi_pm_post_config(vdip, client_dip); 7173 break; 7174 7175 case MDI_PM_POST_UNCONFIG: 7176 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7177 break; 7178 7179 case MDI_PM_HOLD_POWER: 7180 case MDI_PM_RELE_POWER: 7181 ASSERT(args); 7182 7183 client_dip = (dev_info_t *)args; 7184 ASSERT(MDI_CLIENT(client_dip)); 7185 7186 ct = i_devi_get_client(client_dip); 7187 MDI_CLIENT_LOCK(ct); 7188 7189 if (op == MDI_PM_HOLD_POWER) { 7190 if (ct->ct_power_cnt == 0) { 7191 (void) i_mdi_power_all_phci(ct); 7192 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7193 "i_mdi_pm_hold_client\n")); 7194 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7195 } 7196 } else { 7197 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 7198 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7199 "i_mdi_pm_rele_client\n")); 7200 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7201 } else { 7202 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7203 "i_mdi_pm_reset_client\n")); 7204 i_mdi_pm_reset_client(ct); 7205 } 7206 } 7207 7208 MDI_CLIENT_UNLOCK(ct); 7209 break; 7210 7211 default: 7212 break; 7213 } 7214 7215 if (devnm) 7216 ndi_devi_exit(vdip, circ); 7217 7218 return (ret); 7219 } 7220 7221 int 7222 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7223 { 7224 mdi_vhci_t *vhci; 7225 7226 if (!MDI_VHCI(dip)) 7227 return (MDI_FAILURE); 7228 7229 if (mdi_class) { 7230 vhci = DEVI(dip)->devi_mdi_xhci; 7231 ASSERT(vhci); 7232 *mdi_class = vhci->vh_class; 7233 } 7234 7235 return (MDI_SUCCESS); 7236 } 7237 7238 int 7239 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7240 { 7241 mdi_phci_t *phci; 7242 7243 if (!MDI_PHCI(dip)) 7244 return (MDI_FAILURE); 7245 7246 if (mdi_class) { 7247 phci = DEVI(dip)->devi_mdi_xhci; 7248 ASSERT(phci); 7249 *mdi_class = phci->ph_vhci->vh_class; 7250 } 7251 7252 return (MDI_SUCCESS); 7253 } 7254 7255 int 7256 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7257 { 7258 mdi_client_t *client; 7259 7260 if (!MDI_CLIENT(dip)) 7261 return (MDI_FAILURE); 7262 7263 if (mdi_class) { 7264 client = DEVI(dip)->devi_mdi_client; 7265 ASSERT(client); 7266 *mdi_class = client->ct_vhci->vh_class; 7267 } 7268 7269 return (MDI_SUCCESS); 7270 } 7271 7272 void * 7273 mdi_client_get_vhci_private(dev_info_t *dip) 7274 { 7275 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7276 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7277 mdi_client_t *ct; 7278 ct = i_devi_get_client(dip); 7279 return (ct->ct_vprivate); 7280 } 7281 return (NULL); 7282 } 7283 7284 void 7285 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7286 { 7287 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7288 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7289 mdi_client_t *ct; 7290 ct = i_devi_get_client(dip); 7291 ct->ct_vprivate = data; 7292 } 7293 } 7294 /* 7295 * mdi_pi_get_vhci_private(): 7296 * Get the vhci private information associated with the 7297 * mdi_pathinfo node 7298 */ 7299 void * 7300 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7301 { 7302 caddr_t vprivate = NULL; 7303 if (pip) { 7304 vprivate = MDI_PI(pip)->pi_vprivate; 7305 } 7306 return (vprivate); 7307 } 7308 7309 /* 7310 * mdi_pi_set_vhci_private(): 7311 * Set the vhci private information in the mdi_pathinfo node 7312 */ 7313 void 7314 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7315 { 7316 if (pip) { 7317 MDI_PI(pip)->pi_vprivate = priv; 7318 } 7319 } 7320 7321 /* 7322 * mdi_phci_get_vhci_private(): 7323 * Get the vhci private information associated with the 7324 * mdi_phci node 7325 */ 7326 void * 7327 mdi_phci_get_vhci_private(dev_info_t *dip) 7328 { 7329 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7330 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7331 mdi_phci_t *ph; 7332 ph = i_devi_get_phci(dip); 7333 return (ph->ph_vprivate); 7334 } 7335 return (NULL); 7336 } 7337 7338 /* 7339 * mdi_phci_set_vhci_private(): 7340 * Set the vhci private information in the mdi_phci node 7341 */ 7342 void 7343 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7344 { 7345 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7346 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7347 mdi_phci_t *ph; 7348 ph = i_devi_get_phci(dip); 7349 ph->ph_vprivate = priv; 7350 } 7351 } 7352 7353 int 7354 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7355 { 7356 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7357 } 7358 7359 int 7360 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7361 { 7362 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7363 } 7364 7365 /* Return 1 if all client paths are device_removed */ 7366 static int 7367 i_mdi_client_all_devices_removed(mdi_client_t *ct) 7368 { 7369 mdi_pathinfo_t *pip; 7370 int all_devices_removed = 1; 7371 7372 MDI_CLIENT_LOCK(ct); 7373 for (pip = ct->ct_path_head; pip; 7374 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) { 7375 if (!mdi_pi_device_isremoved(pip)) { 7376 all_devices_removed = 0; 7377 break; 7378 } 7379 } 7380 MDI_CLIENT_UNLOCK(ct); 7381 return (all_devices_removed); 7382 } 7383 7384 /* 7385 * When processing path hotunplug, represent device removal. 7386 */ 7387 int 7388 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7389 { 7390 mdi_client_t *ct; 7391 7392 MDI_PI_LOCK(pip); 7393 if (mdi_pi_device_isremoved(pip)) { 7394 MDI_PI_UNLOCK(pip); 7395 return (0); 7396 } 7397 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7398 MDI_PI_FLAGS_SET_HIDDEN(pip); 7399 MDI_PI_UNLOCK(pip); 7400 7401 /* 7402 * If all paths associated with the client are now DEVICE_REMOVED, 7403 * reflect DEVICE_REMOVED in the client. 7404 */ 7405 ct = MDI_PI(pip)->pi_client; 7406 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct)) 7407 (void) ndi_devi_device_remove(ct->ct_dip); 7408 else 7409 i_ddi_di_cache_invalidate(); 7410 7411 return (1); 7412 } 7413 7414 /* 7415 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7416 * is now accessible then this interfaces is used to represent device insertion. 7417 */ 7418 int 7419 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7420 { 7421 MDI_PI_LOCK(pip); 7422 if (!mdi_pi_device_isremoved(pip)) { 7423 MDI_PI_UNLOCK(pip); 7424 return (0); 7425 } 7426 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7427 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7428 MDI_PI_UNLOCK(pip); 7429 7430 i_ddi_di_cache_invalidate(); 7431 7432 return (1); 7433 } 7434 7435 /* 7436 * List of vhci class names: 7437 * A vhci class name must be in this list only if the corresponding vhci 7438 * driver intends to use the mdi provided bus config implementation 7439 * (i.e., mdi_vhci_bus_config()). 7440 */ 7441 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7442 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7443 7444 /* 7445 * During boot time, the on-disk vhci cache for every vhci class is read 7446 * in the form of an nvlist and stored here. 7447 */ 7448 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7449 7450 /* nvpair names in vhci cache nvlist */ 7451 #define MDI_VHCI_CACHE_VERSION 1 7452 #define MDI_NVPNAME_VERSION "version" 7453 #define MDI_NVPNAME_PHCIS "phcis" 7454 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7455 7456 /* 7457 * Given vhci class name, return its on-disk vhci cache filename. 7458 * Memory for the returned filename which includes the full path is allocated 7459 * by this function. 7460 */ 7461 static char * 7462 vhclass2vhcache_filename(char *vhclass) 7463 { 7464 char *filename; 7465 int len; 7466 static char *fmt = "/etc/devices/mdi_%s_cache"; 7467 7468 /* 7469 * fmt contains the on-disk vhci cache file name format; 7470 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7471 */ 7472 7473 /* the -1 below is to account for "%s" in the format string */ 7474 len = strlen(fmt) + strlen(vhclass) - 1; 7475 filename = kmem_alloc(len, KM_SLEEP); 7476 (void) snprintf(filename, len, fmt, vhclass); 7477 ASSERT(len == (strlen(filename) + 1)); 7478 return (filename); 7479 } 7480 7481 /* 7482 * initialize the vhci cache related data structures and read the on-disk 7483 * vhci cached data into memory. 7484 */ 7485 static void 7486 setup_vhci_cache(mdi_vhci_t *vh) 7487 { 7488 mdi_vhci_config_t *vhc; 7489 mdi_vhci_cache_t *vhcache; 7490 int i; 7491 nvlist_t *nvl = NULL; 7492 7493 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7494 vh->vh_config = vhc; 7495 vhcache = &vhc->vhc_vhcache; 7496 7497 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7498 7499 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7500 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7501 7502 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7503 7504 /* 7505 * Create string hash; same as mod_hash_create_strhash() except that 7506 * we use NULL key destructor. 7507 */ 7508 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7509 mdi_bus_config_cache_hash_size, 7510 mod_hash_null_keydtor, mod_hash_null_valdtor, 7511 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7512 7513 /* 7514 * The on-disk vhci cache is read during booting prior to the 7515 * lights-out period by mdi_read_devices_files(). 7516 */ 7517 for (i = 0; i < N_VHCI_CLASSES; i++) { 7518 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7519 nvl = vhcache_nvl[i]; 7520 vhcache_nvl[i] = NULL; 7521 break; 7522 } 7523 } 7524 7525 /* 7526 * this is to cover the case of some one manually causing unloading 7527 * (or detaching) and reloading (or attaching) of a vhci driver. 7528 */ 7529 if (nvl == NULL && modrootloaded) 7530 nvl = read_on_disk_vhci_cache(vh->vh_class); 7531 7532 if (nvl != NULL) { 7533 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7534 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7535 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7536 else { 7537 cmn_err(CE_WARN, 7538 "%s: data file corrupted, will recreate", 7539 vhc->vhc_vhcache_filename); 7540 } 7541 rw_exit(&vhcache->vhcache_lock); 7542 nvlist_free(nvl); 7543 } 7544 7545 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7546 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7547 7548 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7549 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7550 } 7551 7552 /* 7553 * free all vhci cache related resources 7554 */ 7555 static int 7556 destroy_vhci_cache(mdi_vhci_t *vh) 7557 { 7558 mdi_vhci_config_t *vhc = vh->vh_config; 7559 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7560 mdi_vhcache_phci_t *cphci, *cphci_next; 7561 mdi_vhcache_client_t *cct, *cct_next; 7562 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7563 7564 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7565 return (MDI_FAILURE); 7566 7567 kmem_free(vhc->vhc_vhcache_filename, 7568 strlen(vhc->vhc_vhcache_filename) + 1); 7569 7570 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7571 7572 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7573 cphci = cphci_next) { 7574 cphci_next = cphci->cphci_next; 7575 free_vhcache_phci(cphci); 7576 } 7577 7578 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7579 cct_next = cct->cct_next; 7580 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7581 cpi_next = cpi->cpi_next; 7582 free_vhcache_pathinfo(cpi); 7583 } 7584 free_vhcache_client(cct); 7585 } 7586 7587 rw_destroy(&vhcache->vhcache_lock); 7588 7589 mutex_destroy(&vhc->vhc_lock); 7590 cv_destroy(&vhc->vhc_cv); 7591 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7592 return (MDI_SUCCESS); 7593 } 7594 7595 /* 7596 * Stop all vhci cache related async threads and free their resources. 7597 */ 7598 static int 7599 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7600 { 7601 mdi_async_client_config_t *acc, *acc_next; 7602 7603 mutex_enter(&vhc->vhc_lock); 7604 vhc->vhc_flags |= MDI_VHC_EXIT; 7605 ASSERT(vhc->vhc_acc_thrcount >= 0); 7606 cv_broadcast(&vhc->vhc_cv); 7607 7608 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7609 vhc->vhc_acc_thrcount != 0) { 7610 mutex_exit(&vhc->vhc_lock); 7611 delay_random(mdi_delay); 7612 mutex_enter(&vhc->vhc_lock); 7613 } 7614 7615 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7616 7617 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7618 acc_next = acc->acc_next; 7619 free_async_client_config(acc); 7620 } 7621 vhc->vhc_acc_list_head = NULL; 7622 vhc->vhc_acc_list_tail = NULL; 7623 vhc->vhc_acc_count = 0; 7624 7625 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7626 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7627 mutex_exit(&vhc->vhc_lock); 7628 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7629 vhcache_dirty(vhc); 7630 return (MDI_FAILURE); 7631 } 7632 } else 7633 mutex_exit(&vhc->vhc_lock); 7634 7635 if (callb_delete(vhc->vhc_cbid) != 0) 7636 return (MDI_FAILURE); 7637 7638 return (MDI_SUCCESS); 7639 } 7640 7641 /* 7642 * Stop vhci cache flush thread 7643 */ 7644 /* ARGSUSED */ 7645 static boolean_t 7646 stop_vhcache_flush_thread(void *arg, int code) 7647 { 7648 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7649 7650 mutex_enter(&vhc->vhc_lock); 7651 vhc->vhc_flags |= MDI_VHC_EXIT; 7652 cv_broadcast(&vhc->vhc_cv); 7653 7654 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7655 mutex_exit(&vhc->vhc_lock); 7656 delay_random(mdi_delay); 7657 mutex_enter(&vhc->vhc_lock); 7658 } 7659 7660 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7661 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7662 mutex_exit(&vhc->vhc_lock); 7663 (void) flush_vhcache(vhc, 1); 7664 } else 7665 mutex_exit(&vhc->vhc_lock); 7666 7667 return (B_TRUE); 7668 } 7669 7670 /* 7671 * Enqueue the vhcache phci (cphci) at the tail of the list 7672 */ 7673 static void 7674 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7675 { 7676 cphci->cphci_next = NULL; 7677 if (vhcache->vhcache_phci_head == NULL) 7678 vhcache->vhcache_phci_head = cphci; 7679 else 7680 vhcache->vhcache_phci_tail->cphci_next = cphci; 7681 vhcache->vhcache_phci_tail = cphci; 7682 } 7683 7684 /* 7685 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7686 */ 7687 static void 7688 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7689 mdi_vhcache_pathinfo_t *cpi) 7690 { 7691 cpi->cpi_next = NULL; 7692 if (cct->cct_cpi_head == NULL) 7693 cct->cct_cpi_head = cpi; 7694 else 7695 cct->cct_cpi_tail->cpi_next = cpi; 7696 cct->cct_cpi_tail = cpi; 7697 } 7698 7699 /* 7700 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7701 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7702 * flag set come at the beginning of the list. All cpis which have this 7703 * flag set come at the end of the list. 7704 */ 7705 static void 7706 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7707 mdi_vhcache_pathinfo_t *newcpi) 7708 { 7709 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7710 7711 if (cct->cct_cpi_head == NULL || 7712 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7713 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7714 else { 7715 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7716 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7717 prev_cpi = cpi, cpi = cpi->cpi_next) 7718 ; 7719 7720 if (prev_cpi == NULL) 7721 cct->cct_cpi_head = newcpi; 7722 else 7723 prev_cpi->cpi_next = newcpi; 7724 7725 newcpi->cpi_next = cpi; 7726 7727 if (cpi == NULL) 7728 cct->cct_cpi_tail = newcpi; 7729 } 7730 } 7731 7732 /* 7733 * Enqueue the vhcache client (cct) at the tail of the list 7734 */ 7735 static void 7736 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7737 mdi_vhcache_client_t *cct) 7738 { 7739 cct->cct_next = NULL; 7740 if (vhcache->vhcache_client_head == NULL) 7741 vhcache->vhcache_client_head = cct; 7742 else 7743 vhcache->vhcache_client_tail->cct_next = cct; 7744 vhcache->vhcache_client_tail = cct; 7745 } 7746 7747 static void 7748 free_string_array(char **str, int nelem) 7749 { 7750 int i; 7751 7752 if (str) { 7753 for (i = 0; i < nelem; i++) { 7754 if (str[i]) 7755 kmem_free(str[i], strlen(str[i]) + 1); 7756 } 7757 kmem_free(str, sizeof (char *) * nelem); 7758 } 7759 } 7760 7761 static void 7762 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7763 { 7764 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7765 kmem_free(cphci, sizeof (*cphci)); 7766 } 7767 7768 static void 7769 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7770 { 7771 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7772 kmem_free(cpi, sizeof (*cpi)); 7773 } 7774 7775 static void 7776 free_vhcache_client(mdi_vhcache_client_t *cct) 7777 { 7778 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7779 kmem_free(cct, sizeof (*cct)); 7780 } 7781 7782 static char * 7783 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7784 { 7785 char *name_addr; 7786 int len; 7787 7788 len = strlen(ct_name) + strlen(ct_addr) + 2; 7789 name_addr = kmem_alloc(len, KM_SLEEP); 7790 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7791 7792 if (ret_len) 7793 *ret_len = len; 7794 return (name_addr); 7795 } 7796 7797 /* 7798 * Copy the contents of paddrnvl to vhci cache. 7799 * paddrnvl nvlist contains path information for a vhci client. 7800 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7801 */ 7802 static void 7803 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7804 mdi_vhcache_client_t *cct) 7805 { 7806 nvpair_t *nvp = NULL; 7807 mdi_vhcache_pathinfo_t *cpi; 7808 uint_t nelem; 7809 uint32_t *val; 7810 7811 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7812 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7813 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7814 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7815 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7816 ASSERT(nelem == 2); 7817 cpi->cpi_cphci = cphci_list[val[0]]; 7818 cpi->cpi_flags = val[1]; 7819 enqueue_tail_vhcache_pathinfo(cct, cpi); 7820 } 7821 } 7822 7823 /* 7824 * Copy the contents of caddrmapnvl to vhci cache. 7825 * caddrmapnvl nvlist contains vhci client address to phci client address 7826 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7827 * this nvlist. 7828 */ 7829 static void 7830 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7831 mdi_vhcache_phci_t *cphci_list[]) 7832 { 7833 nvpair_t *nvp = NULL; 7834 nvlist_t *paddrnvl; 7835 mdi_vhcache_client_t *cct; 7836 7837 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7838 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7839 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7840 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7841 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7842 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7843 /* the client must contain at least one path */ 7844 ASSERT(cct->cct_cpi_head != NULL); 7845 7846 enqueue_vhcache_client(vhcache, cct); 7847 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7848 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7849 } 7850 } 7851 7852 /* 7853 * Copy the contents of the main nvlist to vhci cache. 7854 * 7855 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7856 * The nvlist contains the mappings between the vhci client addresses and 7857 * their corresponding phci client addresses. 7858 * 7859 * The structure of the nvlist is as follows: 7860 * 7861 * Main nvlist: 7862 * NAME TYPE DATA 7863 * version int32 version number 7864 * phcis string array array of phci paths 7865 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7866 * 7867 * structure of c2paddrs_nvl: 7868 * NAME TYPE DATA 7869 * caddr1 nvlist_t paddrs_nvl1 7870 * caddr2 nvlist_t paddrs_nvl2 7871 * ... 7872 * where caddr1, caddr2, ... are vhci client name and addresses in the 7873 * form of "<clientname>@<clientaddress>". 7874 * (for example: "ssd@2000002037cd9f72"); 7875 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7876 * 7877 * structure of paddrs_nvl: 7878 * NAME TYPE DATA 7879 * pi_addr1 uint32_array (phci-id, cpi_flags) 7880 * pi_addr2 uint32_array (phci-id, cpi_flags) 7881 * ... 7882 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7883 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7884 * phci-ids are integers that identify pHCIs to which the 7885 * the bus specific address belongs to. These integers are used as an index 7886 * into to the phcis string array in the main nvlist to get the pHCI path. 7887 */ 7888 static int 7889 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7890 { 7891 char **phcis, **phci_namep; 7892 uint_t nphcis; 7893 mdi_vhcache_phci_t *cphci, **cphci_list; 7894 nvlist_t *caddrmapnvl; 7895 int32_t ver; 7896 int i; 7897 size_t cphci_list_size; 7898 7899 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7900 7901 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7902 ver != MDI_VHCI_CACHE_VERSION) 7903 return (MDI_FAILURE); 7904 7905 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7906 &nphcis) != 0) 7907 return (MDI_SUCCESS); 7908 7909 ASSERT(nphcis > 0); 7910 7911 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7912 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7913 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7914 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7915 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7916 enqueue_vhcache_phci(vhcache, cphci); 7917 cphci_list[i] = cphci; 7918 } 7919 7920 ASSERT(vhcache->vhcache_phci_head != NULL); 7921 7922 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7923 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7924 7925 kmem_free(cphci_list, cphci_list_size); 7926 return (MDI_SUCCESS); 7927 } 7928 7929 /* 7930 * Build paddrnvl for the specified client using the information in the 7931 * vhci cache and add it to the caddrmapnnvl. 7932 * Returns 0 on success, errno on failure. 7933 */ 7934 static int 7935 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7936 nvlist_t *caddrmapnvl) 7937 { 7938 mdi_vhcache_pathinfo_t *cpi; 7939 nvlist_t *nvl; 7940 int err; 7941 uint32_t val[2]; 7942 7943 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7944 7945 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7946 return (err); 7947 7948 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7949 val[0] = cpi->cpi_cphci->cphci_id; 7950 val[1] = cpi->cpi_flags; 7951 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7952 != 0) 7953 goto out; 7954 } 7955 7956 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7957 out: 7958 nvlist_free(nvl); 7959 return (err); 7960 } 7961 7962 /* 7963 * Build caddrmapnvl using the information in the vhci cache 7964 * and add it to the mainnvl. 7965 * Returns 0 on success, errno on failure. 7966 */ 7967 static int 7968 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7969 { 7970 mdi_vhcache_client_t *cct; 7971 nvlist_t *nvl; 7972 int err; 7973 7974 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7975 7976 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7977 return (err); 7978 7979 for (cct = vhcache->vhcache_client_head; cct != NULL; 7980 cct = cct->cct_next) { 7981 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7982 goto out; 7983 } 7984 7985 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7986 out: 7987 nvlist_free(nvl); 7988 return (err); 7989 } 7990 7991 /* 7992 * Build nvlist using the information in the vhci cache. 7993 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7994 * Returns nvl on success, NULL on failure. 7995 */ 7996 static nvlist_t * 7997 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7998 { 7999 mdi_vhcache_phci_t *cphci; 8000 uint_t phci_count; 8001 char **phcis; 8002 nvlist_t *nvl; 8003 int err, i; 8004 8005 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 8006 nvl = NULL; 8007 goto out; 8008 } 8009 8010 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 8011 MDI_VHCI_CACHE_VERSION)) != 0) 8012 goto out; 8013 8014 rw_enter(&vhcache->vhcache_lock, RW_READER); 8015 if (vhcache->vhcache_phci_head == NULL) { 8016 rw_exit(&vhcache->vhcache_lock); 8017 return (nvl); 8018 } 8019 8020 phci_count = 0; 8021 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8022 cphci = cphci->cphci_next) 8023 cphci->cphci_id = phci_count++; 8024 8025 /* build phci pathname list */ 8026 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 8027 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 8028 cphci = cphci->cphci_next, i++) 8029 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 8030 8031 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 8032 phci_count); 8033 free_string_array(phcis, phci_count); 8034 8035 if (err == 0 && 8036 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 8037 rw_exit(&vhcache->vhcache_lock); 8038 return (nvl); 8039 } 8040 8041 rw_exit(&vhcache->vhcache_lock); 8042 out: 8043 if (nvl) 8044 nvlist_free(nvl); 8045 return (NULL); 8046 } 8047 8048 /* 8049 * Lookup vhcache phci structure for the specified phci path. 8050 */ 8051 static mdi_vhcache_phci_t * 8052 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 8053 { 8054 mdi_vhcache_phci_t *cphci; 8055 8056 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8057 8058 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8059 cphci = cphci->cphci_next) { 8060 if (strcmp(cphci->cphci_path, phci_path) == 0) 8061 return (cphci); 8062 } 8063 8064 return (NULL); 8065 } 8066 8067 /* 8068 * Lookup vhcache phci structure for the specified phci. 8069 */ 8070 static mdi_vhcache_phci_t * 8071 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 8072 { 8073 mdi_vhcache_phci_t *cphci; 8074 8075 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8076 8077 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8078 cphci = cphci->cphci_next) { 8079 if (cphci->cphci_phci == ph) 8080 return (cphci); 8081 } 8082 8083 return (NULL); 8084 } 8085 8086 /* 8087 * Add the specified phci to the vhci cache if not already present. 8088 */ 8089 static void 8090 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8091 { 8092 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8093 mdi_vhcache_phci_t *cphci; 8094 char *pathname; 8095 int cache_updated; 8096 8097 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8098 8099 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 8100 (void) ddi_pathname(ph->ph_dip, pathname); 8101 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 8102 != NULL) { 8103 cphci->cphci_phci = ph; 8104 cache_updated = 0; 8105 } else { 8106 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 8107 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 8108 cphci->cphci_phci = ph; 8109 enqueue_vhcache_phci(vhcache, cphci); 8110 cache_updated = 1; 8111 } 8112 8113 rw_exit(&vhcache->vhcache_lock); 8114 8115 /* 8116 * Since a new phci has been added, reset 8117 * vhc_path_discovery_cutoff_time to allow for discovery of paths 8118 * during next vhcache_discover_paths(). 8119 */ 8120 mutex_enter(&vhc->vhc_lock); 8121 vhc->vhc_path_discovery_cutoff_time = 0; 8122 mutex_exit(&vhc->vhc_lock); 8123 8124 kmem_free(pathname, MAXPATHLEN); 8125 if (cache_updated) 8126 vhcache_dirty(vhc); 8127 } 8128 8129 /* 8130 * Remove the reference to the specified phci from the vhci cache. 8131 */ 8132 static void 8133 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8134 { 8135 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8136 mdi_vhcache_phci_t *cphci; 8137 8138 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8139 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8140 /* do not remove the actual mdi_vhcache_phci structure */ 8141 cphci->cphci_phci = NULL; 8142 } 8143 rw_exit(&vhcache->vhcache_lock); 8144 } 8145 8146 static void 8147 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8148 mdi_vhcache_lookup_token_t *src) 8149 { 8150 if (src == NULL) { 8151 dst->lt_cct = NULL; 8152 dst->lt_cct_lookup_time = 0; 8153 } else { 8154 dst->lt_cct = src->lt_cct; 8155 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8156 } 8157 } 8158 8159 /* 8160 * Look up vhcache client for the specified client. 8161 */ 8162 static mdi_vhcache_client_t * 8163 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8164 mdi_vhcache_lookup_token_t *token) 8165 { 8166 mod_hash_val_t hv; 8167 char *name_addr; 8168 int len; 8169 8170 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8171 8172 /* 8173 * If no vhcache clean occurred since the last lookup, we can 8174 * simply return the cct from the last lookup operation. 8175 * It works because ccts are never freed except during the vhcache 8176 * cleanup operation. 8177 */ 8178 if (token != NULL && 8179 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8180 return (token->lt_cct); 8181 8182 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8183 if (mod_hash_find(vhcache->vhcache_client_hash, 8184 (mod_hash_key_t)name_addr, &hv) == 0) { 8185 if (token) { 8186 token->lt_cct = (mdi_vhcache_client_t *)hv; 8187 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8188 } 8189 } else { 8190 if (token) { 8191 token->lt_cct = NULL; 8192 token->lt_cct_lookup_time = 0; 8193 } 8194 hv = NULL; 8195 } 8196 kmem_free(name_addr, len); 8197 return ((mdi_vhcache_client_t *)hv); 8198 } 8199 8200 /* 8201 * Add the specified path to the vhci cache if not already present. 8202 * Also add the vhcache client for the client corresponding to this path 8203 * if it doesn't already exist. 8204 */ 8205 static void 8206 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8207 { 8208 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8209 mdi_vhcache_client_t *cct; 8210 mdi_vhcache_pathinfo_t *cpi; 8211 mdi_phci_t *ph = pip->pi_phci; 8212 mdi_client_t *ct = pip->pi_client; 8213 int cache_updated = 0; 8214 8215 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8216 8217 /* if vhcache client for this pip doesn't already exist, add it */ 8218 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8219 NULL)) == NULL) { 8220 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8221 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8222 ct->ct_guid, NULL); 8223 enqueue_vhcache_client(vhcache, cct); 8224 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8225 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8226 cache_updated = 1; 8227 } 8228 8229 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8230 if (cpi->cpi_cphci->cphci_phci == ph && 8231 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8232 cpi->cpi_pip = pip; 8233 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8234 cpi->cpi_flags &= 8235 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8236 sort_vhcache_paths(cct); 8237 cache_updated = 1; 8238 } 8239 break; 8240 } 8241 } 8242 8243 if (cpi == NULL) { 8244 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8245 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8246 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8247 ASSERT(cpi->cpi_cphci != NULL); 8248 cpi->cpi_pip = pip; 8249 enqueue_vhcache_pathinfo(cct, cpi); 8250 cache_updated = 1; 8251 } 8252 8253 rw_exit(&vhcache->vhcache_lock); 8254 8255 if (cache_updated) 8256 vhcache_dirty(vhc); 8257 } 8258 8259 /* 8260 * Remove the reference to the specified path from the vhci cache. 8261 */ 8262 static void 8263 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8264 { 8265 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8266 mdi_client_t *ct = pip->pi_client; 8267 mdi_vhcache_client_t *cct; 8268 mdi_vhcache_pathinfo_t *cpi; 8269 8270 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8271 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8272 NULL)) != NULL) { 8273 for (cpi = cct->cct_cpi_head; cpi != NULL; 8274 cpi = cpi->cpi_next) { 8275 if (cpi->cpi_pip == pip) { 8276 cpi->cpi_pip = NULL; 8277 break; 8278 } 8279 } 8280 } 8281 rw_exit(&vhcache->vhcache_lock); 8282 } 8283 8284 /* 8285 * Flush the vhci cache to disk. 8286 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8287 */ 8288 static int 8289 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8290 { 8291 nvlist_t *nvl; 8292 int err; 8293 int rv; 8294 8295 /* 8296 * It is possible that the system may shutdown before 8297 * i_ddi_io_initialized (during stmsboot for example). To allow for 8298 * flushing the cache in this case do not check for 8299 * i_ddi_io_initialized when force flag is set. 8300 */ 8301 if (force_flag == 0 && !i_ddi_io_initialized()) 8302 return (MDI_FAILURE); 8303 8304 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8305 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8306 nvlist_free(nvl); 8307 } else 8308 err = EFAULT; 8309 8310 rv = MDI_SUCCESS; 8311 mutex_enter(&vhc->vhc_lock); 8312 if (err != 0) { 8313 if (err == EROFS) { 8314 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8315 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8316 MDI_VHC_VHCACHE_DIRTY); 8317 } else { 8318 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8319 cmn_err(CE_CONT, "%s: update failed\n", 8320 vhc->vhc_vhcache_filename); 8321 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8322 } 8323 rv = MDI_FAILURE; 8324 } 8325 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8326 cmn_err(CE_CONT, 8327 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8328 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8329 } 8330 mutex_exit(&vhc->vhc_lock); 8331 8332 return (rv); 8333 } 8334 8335 /* 8336 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8337 * Exits itself if left idle for the idle timeout period. 8338 */ 8339 static void 8340 vhcache_flush_thread(void *arg) 8341 { 8342 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8343 clock_t idle_time, quit_at_ticks; 8344 callb_cpr_t cprinfo; 8345 8346 /* number of seconds to sleep idle before exiting */ 8347 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8348 8349 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8350 "mdi_vhcache_flush"); 8351 mutex_enter(&vhc->vhc_lock); 8352 for (; ; ) { 8353 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8354 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8355 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8356 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8357 (void) cv_timedwait(&vhc->vhc_cv, 8358 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8359 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8360 } else { 8361 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8362 mutex_exit(&vhc->vhc_lock); 8363 8364 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8365 vhcache_dirty(vhc); 8366 8367 mutex_enter(&vhc->vhc_lock); 8368 } 8369 } 8370 8371 quit_at_ticks = ddi_get_lbolt() + idle_time; 8372 8373 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8374 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8375 ddi_get_lbolt() < quit_at_ticks) { 8376 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8377 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8378 quit_at_ticks); 8379 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8380 } 8381 8382 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8383 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8384 goto out; 8385 } 8386 8387 out: 8388 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8389 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8390 CALLB_CPR_EXIT(&cprinfo); 8391 } 8392 8393 /* 8394 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8395 */ 8396 static void 8397 vhcache_dirty(mdi_vhci_config_t *vhc) 8398 { 8399 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8400 int create_thread; 8401 8402 rw_enter(&vhcache->vhcache_lock, RW_READER); 8403 /* do not flush cache until the cache is fully built */ 8404 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8405 rw_exit(&vhcache->vhcache_lock); 8406 return; 8407 } 8408 rw_exit(&vhcache->vhcache_lock); 8409 8410 mutex_enter(&vhc->vhc_lock); 8411 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8412 mutex_exit(&vhc->vhc_lock); 8413 return; 8414 } 8415 8416 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8417 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8418 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8419 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8420 cv_broadcast(&vhc->vhc_cv); 8421 create_thread = 0; 8422 } else { 8423 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8424 create_thread = 1; 8425 } 8426 mutex_exit(&vhc->vhc_lock); 8427 8428 if (create_thread) 8429 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8430 0, &p0, TS_RUN, minclsyspri); 8431 } 8432 8433 /* 8434 * phci bus config structure - one for for each phci bus config operation that 8435 * we initiate on behalf of a vhci. 8436 */ 8437 typedef struct mdi_phci_bus_config_s { 8438 char *phbc_phci_path; 8439 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8440 struct mdi_phci_bus_config_s *phbc_next; 8441 } mdi_phci_bus_config_t; 8442 8443 /* vhci bus config structure - one for each vhci bus config operation */ 8444 typedef struct mdi_vhci_bus_config_s { 8445 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8446 major_t vhbc_op_major; /* bus config op major */ 8447 uint_t vhbc_op_flags; /* bus config op flags */ 8448 kmutex_t vhbc_lock; 8449 kcondvar_t vhbc_cv; 8450 int vhbc_thr_count; 8451 } mdi_vhci_bus_config_t; 8452 8453 /* 8454 * bus config the specified phci 8455 */ 8456 static void 8457 bus_config_phci(void *arg) 8458 { 8459 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8460 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8461 dev_info_t *ph_dip; 8462 8463 /* 8464 * first configure all path components upto phci and then configure 8465 * the phci children. 8466 */ 8467 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8468 != NULL) { 8469 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8470 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8471 (void) ndi_devi_config_driver(ph_dip, 8472 vhbc->vhbc_op_flags, 8473 vhbc->vhbc_op_major); 8474 } else 8475 (void) ndi_devi_config(ph_dip, 8476 vhbc->vhbc_op_flags); 8477 8478 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8479 ndi_rele_devi(ph_dip); 8480 } 8481 8482 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8483 kmem_free(phbc, sizeof (*phbc)); 8484 8485 mutex_enter(&vhbc->vhbc_lock); 8486 vhbc->vhbc_thr_count--; 8487 if (vhbc->vhbc_thr_count == 0) 8488 cv_broadcast(&vhbc->vhbc_cv); 8489 mutex_exit(&vhbc->vhbc_lock); 8490 } 8491 8492 /* 8493 * Bus config all phcis associated with the vhci in parallel. 8494 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8495 */ 8496 static void 8497 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8498 ddi_bus_config_op_t op, major_t maj) 8499 { 8500 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8501 mdi_vhci_bus_config_t *vhbc; 8502 mdi_vhcache_phci_t *cphci; 8503 8504 rw_enter(&vhcache->vhcache_lock, RW_READER); 8505 if (vhcache->vhcache_phci_head == NULL) { 8506 rw_exit(&vhcache->vhcache_lock); 8507 return; 8508 } 8509 8510 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8511 8512 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8513 cphci = cphci->cphci_next) { 8514 /* skip phcis that haven't attached before root is available */ 8515 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8516 continue; 8517 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8518 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8519 KM_SLEEP); 8520 phbc->phbc_vhbusconfig = vhbc; 8521 phbc->phbc_next = phbc_head; 8522 phbc_head = phbc; 8523 vhbc->vhbc_thr_count++; 8524 } 8525 rw_exit(&vhcache->vhcache_lock); 8526 8527 vhbc->vhbc_op = op; 8528 vhbc->vhbc_op_major = maj; 8529 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8530 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8531 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8532 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8533 8534 /* now create threads to initiate bus config on all phcis in parallel */ 8535 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8536 phbc_next = phbc->phbc_next; 8537 if (mdi_mtc_off) 8538 bus_config_phci((void *)phbc); 8539 else 8540 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8541 0, &p0, TS_RUN, minclsyspri); 8542 } 8543 8544 mutex_enter(&vhbc->vhbc_lock); 8545 /* wait until all threads exit */ 8546 while (vhbc->vhbc_thr_count > 0) 8547 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8548 mutex_exit(&vhbc->vhbc_lock); 8549 8550 mutex_destroy(&vhbc->vhbc_lock); 8551 cv_destroy(&vhbc->vhbc_cv); 8552 kmem_free(vhbc, sizeof (*vhbc)); 8553 } 8554 8555 /* 8556 * Single threaded version of bus_config_all_phcis() 8557 */ 8558 static void 8559 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8560 ddi_bus_config_op_t op, major_t maj) 8561 { 8562 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8563 8564 single_threaded_vhconfig_enter(vhc); 8565 bus_config_all_phcis(vhcache, flags, op, maj); 8566 single_threaded_vhconfig_exit(vhc); 8567 } 8568 8569 /* 8570 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8571 * The path includes the child component in addition to the phci path. 8572 */ 8573 static int 8574 bus_config_one_phci_child(char *path) 8575 { 8576 dev_info_t *ph_dip, *child; 8577 char *devnm; 8578 int rv = MDI_FAILURE; 8579 8580 /* extract the child component of the phci */ 8581 devnm = strrchr(path, '/'); 8582 *devnm++ = '\0'; 8583 8584 /* 8585 * first configure all path components upto phci and then 8586 * configure the phci child. 8587 */ 8588 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8589 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8590 NDI_SUCCESS) { 8591 /* 8592 * release the hold that ndi_devi_config_one() placed 8593 */ 8594 ndi_rele_devi(child); 8595 rv = MDI_SUCCESS; 8596 } 8597 8598 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8599 ndi_rele_devi(ph_dip); 8600 } 8601 8602 devnm--; 8603 *devnm = '/'; 8604 return (rv); 8605 } 8606 8607 /* 8608 * Build a list of phci client paths for the specified vhci client. 8609 * The list includes only those phci client paths which aren't configured yet. 8610 */ 8611 static mdi_phys_path_t * 8612 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8613 { 8614 mdi_vhcache_pathinfo_t *cpi; 8615 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8616 int config_path, len; 8617 8618 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8619 /* 8620 * include only those paths that aren't configured. 8621 */ 8622 config_path = 0; 8623 if (cpi->cpi_pip == NULL) 8624 config_path = 1; 8625 else { 8626 MDI_PI_LOCK(cpi->cpi_pip); 8627 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8628 config_path = 1; 8629 MDI_PI_UNLOCK(cpi->cpi_pip); 8630 } 8631 8632 if (config_path) { 8633 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8634 len = strlen(cpi->cpi_cphci->cphci_path) + 8635 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8636 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8637 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8638 cpi->cpi_cphci->cphci_path, ct_name, 8639 cpi->cpi_addr); 8640 pp->phys_path_next = NULL; 8641 8642 if (pp_head == NULL) 8643 pp_head = pp; 8644 else 8645 pp_tail->phys_path_next = pp; 8646 pp_tail = pp; 8647 } 8648 } 8649 8650 return (pp_head); 8651 } 8652 8653 /* 8654 * Free the memory allocated for phci client path list. 8655 */ 8656 static void 8657 free_phclient_path_list(mdi_phys_path_t *pp_head) 8658 { 8659 mdi_phys_path_t *pp, *pp_next; 8660 8661 for (pp = pp_head; pp != NULL; pp = pp_next) { 8662 pp_next = pp->phys_path_next; 8663 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8664 kmem_free(pp, sizeof (*pp)); 8665 } 8666 } 8667 8668 /* 8669 * Allocated async client structure and initialize with the specified values. 8670 */ 8671 static mdi_async_client_config_t * 8672 alloc_async_client_config(char *ct_name, char *ct_addr, 8673 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8674 { 8675 mdi_async_client_config_t *acc; 8676 8677 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8678 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8679 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8680 acc->acc_phclient_path_list_head = pp_head; 8681 init_vhcache_lookup_token(&acc->acc_token, tok); 8682 acc->acc_next = NULL; 8683 return (acc); 8684 } 8685 8686 /* 8687 * Free the memory allocated for the async client structure and their members. 8688 */ 8689 static void 8690 free_async_client_config(mdi_async_client_config_t *acc) 8691 { 8692 if (acc->acc_phclient_path_list_head) 8693 free_phclient_path_list(acc->acc_phclient_path_list_head); 8694 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8695 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8696 kmem_free(acc, sizeof (*acc)); 8697 } 8698 8699 /* 8700 * Sort vhcache pathinfos (cpis) of the specified client. 8701 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8702 * flag set come at the beginning of the list. All cpis which have this 8703 * flag set come at the end of the list. 8704 */ 8705 static void 8706 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8707 { 8708 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8709 8710 cpi_head = cct->cct_cpi_head; 8711 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8712 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8713 cpi_next = cpi->cpi_next; 8714 enqueue_vhcache_pathinfo(cct, cpi); 8715 } 8716 } 8717 8718 /* 8719 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8720 * every vhcache pathinfo of the specified client. If not adjust the flag 8721 * setting appropriately. 8722 * 8723 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8724 * on-disk vhci cache. So every time this flag is updated the cache must be 8725 * flushed. 8726 */ 8727 static void 8728 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8729 mdi_vhcache_lookup_token_t *tok) 8730 { 8731 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8732 mdi_vhcache_client_t *cct; 8733 mdi_vhcache_pathinfo_t *cpi; 8734 8735 rw_enter(&vhcache->vhcache_lock, RW_READER); 8736 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8737 == NULL) { 8738 rw_exit(&vhcache->vhcache_lock); 8739 return; 8740 } 8741 8742 /* 8743 * to avoid unnecessary on-disk cache updates, first check if an 8744 * update is really needed. If no update is needed simply return. 8745 */ 8746 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8747 if ((cpi->cpi_pip != NULL && 8748 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8749 (cpi->cpi_pip == NULL && 8750 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8751 break; 8752 } 8753 } 8754 if (cpi == NULL) { 8755 rw_exit(&vhcache->vhcache_lock); 8756 return; 8757 } 8758 8759 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8760 rw_exit(&vhcache->vhcache_lock); 8761 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8762 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8763 tok)) == NULL) { 8764 rw_exit(&vhcache->vhcache_lock); 8765 return; 8766 } 8767 } 8768 8769 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8770 if (cpi->cpi_pip != NULL) 8771 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8772 else 8773 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8774 } 8775 sort_vhcache_paths(cct); 8776 8777 rw_exit(&vhcache->vhcache_lock); 8778 vhcache_dirty(vhc); 8779 } 8780 8781 /* 8782 * Configure all specified paths of the client. 8783 */ 8784 static void 8785 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8786 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8787 { 8788 mdi_phys_path_t *pp; 8789 8790 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8791 (void) bus_config_one_phci_child(pp->phys_path); 8792 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8793 } 8794 8795 /* 8796 * Dequeue elements from vhci async client config list and bus configure 8797 * their corresponding phci clients. 8798 */ 8799 static void 8800 config_client_paths_thread(void *arg) 8801 { 8802 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8803 mdi_async_client_config_t *acc; 8804 clock_t quit_at_ticks; 8805 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8806 callb_cpr_t cprinfo; 8807 8808 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8809 "mdi_config_client_paths"); 8810 8811 for (; ; ) { 8812 quit_at_ticks = ddi_get_lbolt() + idle_time; 8813 8814 mutex_enter(&vhc->vhc_lock); 8815 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8816 vhc->vhc_acc_list_head == NULL && 8817 ddi_get_lbolt() < quit_at_ticks) { 8818 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8819 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8820 quit_at_ticks); 8821 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8822 } 8823 8824 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8825 vhc->vhc_acc_list_head == NULL) 8826 goto out; 8827 8828 acc = vhc->vhc_acc_list_head; 8829 vhc->vhc_acc_list_head = acc->acc_next; 8830 if (vhc->vhc_acc_list_head == NULL) 8831 vhc->vhc_acc_list_tail = NULL; 8832 vhc->vhc_acc_count--; 8833 mutex_exit(&vhc->vhc_lock); 8834 8835 config_client_paths_sync(vhc, acc->acc_ct_name, 8836 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8837 &acc->acc_token); 8838 8839 free_async_client_config(acc); 8840 } 8841 8842 out: 8843 vhc->vhc_acc_thrcount--; 8844 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8845 CALLB_CPR_EXIT(&cprinfo); 8846 } 8847 8848 /* 8849 * Arrange for all the phci client paths (pp_head) for the specified client 8850 * to be bus configured asynchronously by a thread. 8851 */ 8852 static void 8853 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8854 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8855 { 8856 mdi_async_client_config_t *acc, *newacc; 8857 int create_thread; 8858 8859 if (pp_head == NULL) 8860 return; 8861 8862 if (mdi_mtc_off) { 8863 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8864 free_phclient_path_list(pp_head); 8865 return; 8866 } 8867 8868 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8869 ASSERT(newacc); 8870 8871 mutex_enter(&vhc->vhc_lock); 8872 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8873 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8874 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8875 free_async_client_config(newacc); 8876 mutex_exit(&vhc->vhc_lock); 8877 return; 8878 } 8879 } 8880 8881 if (vhc->vhc_acc_list_head == NULL) 8882 vhc->vhc_acc_list_head = newacc; 8883 else 8884 vhc->vhc_acc_list_tail->acc_next = newacc; 8885 vhc->vhc_acc_list_tail = newacc; 8886 vhc->vhc_acc_count++; 8887 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8888 cv_broadcast(&vhc->vhc_cv); 8889 create_thread = 0; 8890 } else { 8891 vhc->vhc_acc_thrcount++; 8892 create_thread = 1; 8893 } 8894 mutex_exit(&vhc->vhc_lock); 8895 8896 if (create_thread) 8897 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8898 0, &p0, TS_RUN, minclsyspri); 8899 } 8900 8901 /* 8902 * Return number of online paths for the specified client. 8903 */ 8904 static int 8905 nonline_paths(mdi_vhcache_client_t *cct) 8906 { 8907 mdi_vhcache_pathinfo_t *cpi; 8908 int online_count = 0; 8909 8910 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8911 if (cpi->cpi_pip != NULL) { 8912 MDI_PI_LOCK(cpi->cpi_pip); 8913 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8914 online_count++; 8915 MDI_PI_UNLOCK(cpi->cpi_pip); 8916 } 8917 } 8918 8919 return (online_count); 8920 } 8921 8922 /* 8923 * Bus configure all paths for the specified vhci client. 8924 * If at least one path for the client is already online, the remaining paths 8925 * will be configured asynchronously. Otherwise, it synchronously configures 8926 * the paths until at least one path is online and then rest of the paths 8927 * will be configured asynchronously. 8928 */ 8929 static void 8930 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8931 { 8932 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8933 mdi_phys_path_t *pp_head, *pp; 8934 mdi_vhcache_client_t *cct; 8935 mdi_vhcache_lookup_token_t tok; 8936 8937 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8938 8939 init_vhcache_lookup_token(&tok, NULL); 8940 8941 if (ct_name == NULL || ct_addr == NULL || 8942 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8943 == NULL || 8944 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8945 rw_exit(&vhcache->vhcache_lock); 8946 return; 8947 } 8948 8949 /* if at least one path is online, configure the rest asynchronously */ 8950 if (nonline_paths(cct) > 0) { 8951 rw_exit(&vhcache->vhcache_lock); 8952 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8953 return; 8954 } 8955 8956 rw_exit(&vhcache->vhcache_lock); 8957 8958 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8959 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8960 rw_enter(&vhcache->vhcache_lock, RW_READER); 8961 8962 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8963 ct_addr, &tok)) == NULL) { 8964 rw_exit(&vhcache->vhcache_lock); 8965 goto out; 8966 } 8967 8968 if (nonline_paths(cct) > 0 && 8969 pp->phys_path_next != NULL) { 8970 rw_exit(&vhcache->vhcache_lock); 8971 config_client_paths_async(vhc, ct_name, ct_addr, 8972 pp->phys_path_next, &tok); 8973 pp->phys_path_next = NULL; 8974 goto out; 8975 } 8976 8977 rw_exit(&vhcache->vhcache_lock); 8978 } 8979 } 8980 8981 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8982 out: 8983 free_phclient_path_list(pp_head); 8984 } 8985 8986 static void 8987 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8988 { 8989 mutex_enter(&vhc->vhc_lock); 8990 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8991 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8992 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8993 mutex_exit(&vhc->vhc_lock); 8994 } 8995 8996 static void 8997 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8998 { 8999 mutex_enter(&vhc->vhc_lock); 9000 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 9001 cv_broadcast(&vhc->vhc_cv); 9002 mutex_exit(&vhc->vhc_lock); 9003 } 9004 9005 typedef struct mdi_phci_driver_info { 9006 char *phdriver_name; /* name of the phci driver */ 9007 9008 /* set to non zero if the phci driver supports root device */ 9009 int phdriver_root_support; 9010 } mdi_phci_driver_info_t; 9011 9012 /* 9013 * vhci class and root support capability of a phci driver can be 9014 * specified using ddi-vhci-class and ddi-no-root-support properties in the 9015 * phci driver.conf file. The built-in tables below contain this information 9016 * for those phci drivers whose driver.conf files don't yet contain this info. 9017 * 9018 * All phci drivers expect iscsi have root device support. 9019 */ 9020 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 9021 { "fp", 1 }, 9022 { "iscsi", 0 }, 9023 { "ibsrp", 1 } 9024 }; 9025 9026 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 9027 9028 static void * 9029 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 9030 { 9031 void *new_ptr; 9032 9033 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 9034 if (old_ptr) { 9035 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 9036 kmem_free(old_ptr, old_size); 9037 } 9038 return (new_ptr); 9039 } 9040 9041 static void 9042 add_to_phci_list(char ***driver_list, int **root_support_list, 9043 int *cur_elements, int *max_elements, char *driver_name, int root_support) 9044 { 9045 ASSERT(*cur_elements <= *max_elements); 9046 if (*cur_elements == *max_elements) { 9047 *max_elements += 10; 9048 *driver_list = mdi_realloc(*driver_list, 9049 sizeof (char *) * (*cur_elements), 9050 sizeof (char *) * (*max_elements)); 9051 *root_support_list = mdi_realloc(*root_support_list, 9052 sizeof (int) * (*cur_elements), 9053 sizeof (int) * (*max_elements)); 9054 } 9055 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 9056 (*root_support_list)[*cur_elements] = root_support; 9057 (*cur_elements)++; 9058 } 9059 9060 static void 9061 get_phci_driver_list(char *vhci_class, char ***driver_list, 9062 int **root_support_list, int *cur_elements, int *max_elements) 9063 { 9064 mdi_phci_driver_info_t *st_driver_list, *p; 9065 int st_ndrivers, root_support, i, j, driver_conf_count; 9066 major_t m; 9067 struct devnames *dnp; 9068 ddi_prop_t *propp; 9069 9070 *driver_list = NULL; 9071 *root_support_list = NULL; 9072 *cur_elements = 0; 9073 *max_elements = 0; 9074 9075 /* add the phci drivers derived from the phci driver.conf files */ 9076 for (m = 0; m < devcnt; m++) { 9077 dnp = &devnamesp[m]; 9078 9079 if (dnp->dn_flags & DN_PHCI_DRIVER) { 9080 LOCK_DEV_OPS(&dnp->dn_lock); 9081 if (dnp->dn_global_prop_ptr != NULL && 9082 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 9083 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 9084 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 9085 strcmp(propp->prop_val, vhci_class) == 0) { 9086 9087 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 9088 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 9089 &dnp->dn_global_prop_ptr->prop_list) 9090 == NULL) ? 1 : 0; 9091 9092 add_to_phci_list(driver_list, root_support_list, 9093 cur_elements, max_elements, dnp->dn_name, 9094 root_support); 9095 9096 UNLOCK_DEV_OPS(&dnp->dn_lock); 9097 } else 9098 UNLOCK_DEV_OPS(&dnp->dn_lock); 9099 } 9100 } 9101 9102 driver_conf_count = *cur_elements; 9103 9104 /* add the phci drivers specified in the built-in tables */ 9105 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 9106 st_driver_list = scsi_phci_driver_list; 9107 st_ndrivers = sizeof (scsi_phci_driver_list) / 9108 sizeof (mdi_phci_driver_info_t); 9109 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 9110 st_driver_list = ib_phci_driver_list; 9111 st_ndrivers = sizeof (ib_phci_driver_list) / 9112 sizeof (mdi_phci_driver_info_t); 9113 } else { 9114 st_driver_list = NULL; 9115 st_ndrivers = 0; 9116 } 9117 9118 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 9119 /* add this phci driver if not already added before */ 9120 for (j = 0; j < driver_conf_count; j++) { 9121 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 9122 break; 9123 } 9124 if (j == driver_conf_count) { 9125 add_to_phci_list(driver_list, root_support_list, 9126 cur_elements, max_elements, p->phdriver_name, 9127 p->phdriver_root_support); 9128 } 9129 } 9130 } 9131 9132 /* 9133 * Attach the phci driver instances associated with the specified vhci class. 9134 * If root is mounted attach all phci driver instances. 9135 * If root is not mounted, attach the instances of only those phci 9136 * drivers that have the root support. 9137 */ 9138 static void 9139 attach_phci_drivers(char *vhci_class) 9140 { 9141 char **driver_list, **p; 9142 int *root_support_list; 9143 int cur_elements, max_elements, i; 9144 major_t m; 9145 9146 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9147 &cur_elements, &max_elements); 9148 9149 for (i = 0; i < cur_elements; i++) { 9150 if (modrootloaded || root_support_list[i]) { 9151 m = ddi_name_to_major(driver_list[i]); 9152 if (m != DDI_MAJOR_T_NONE && 9153 ddi_hold_installed_driver(m)) 9154 ddi_rele_driver(m); 9155 } 9156 } 9157 9158 if (driver_list) { 9159 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9160 kmem_free(*p, strlen(*p) + 1); 9161 kmem_free(driver_list, sizeof (char *) * max_elements); 9162 kmem_free(root_support_list, sizeof (int) * max_elements); 9163 } 9164 } 9165 9166 /* 9167 * Build vhci cache: 9168 * 9169 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9170 * the phci driver instances. During this process the cache gets built. 9171 * 9172 * Cache is built fully if the root is mounted. 9173 * If the root is not mounted, phci drivers that do not have root support 9174 * are not attached. As a result the cache is built partially. The entries 9175 * in the cache reflect only those phci drivers that have root support. 9176 */ 9177 static int 9178 build_vhci_cache(mdi_vhci_t *vh) 9179 { 9180 mdi_vhci_config_t *vhc = vh->vh_config; 9181 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9182 9183 single_threaded_vhconfig_enter(vhc); 9184 9185 rw_enter(&vhcache->vhcache_lock, RW_READER); 9186 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9187 rw_exit(&vhcache->vhcache_lock); 9188 single_threaded_vhconfig_exit(vhc); 9189 return (0); 9190 } 9191 rw_exit(&vhcache->vhcache_lock); 9192 9193 attach_phci_drivers(vh->vh_class); 9194 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9195 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9196 9197 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9198 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9199 rw_exit(&vhcache->vhcache_lock); 9200 9201 single_threaded_vhconfig_exit(vhc); 9202 vhcache_dirty(vhc); 9203 return (1); 9204 } 9205 9206 /* 9207 * Determine if discovery of paths is needed. 9208 */ 9209 static int 9210 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9211 { 9212 int rv = 1; 9213 9214 mutex_enter(&vhc->vhc_lock); 9215 if (i_ddi_io_initialized() == 0) { 9216 if (vhc->vhc_path_discovery_boot > 0) { 9217 vhc->vhc_path_discovery_boot--; 9218 goto out; 9219 } 9220 } else { 9221 if (vhc->vhc_path_discovery_postboot > 0) { 9222 vhc->vhc_path_discovery_postboot--; 9223 goto out; 9224 } 9225 } 9226 9227 /* 9228 * Do full path discovery at most once per mdi_path_discovery_interval. 9229 * This is to avoid a series of full path discoveries when opening 9230 * stale /dev/[r]dsk links. 9231 */ 9232 if (mdi_path_discovery_interval != -1 && 9233 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9234 goto out; 9235 9236 rv = 0; 9237 out: 9238 mutex_exit(&vhc->vhc_lock); 9239 return (rv); 9240 } 9241 9242 /* 9243 * Discover all paths: 9244 * 9245 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9246 * driver instances. During this process all paths will be discovered. 9247 */ 9248 static int 9249 vhcache_discover_paths(mdi_vhci_t *vh) 9250 { 9251 mdi_vhci_config_t *vhc = vh->vh_config; 9252 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9253 int rv = 0; 9254 9255 single_threaded_vhconfig_enter(vhc); 9256 9257 if (vhcache_do_discovery(vhc)) { 9258 attach_phci_drivers(vh->vh_class); 9259 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9260 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9261 9262 mutex_enter(&vhc->vhc_lock); 9263 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9264 mdi_path_discovery_interval * TICKS_PER_SECOND; 9265 mutex_exit(&vhc->vhc_lock); 9266 rv = 1; 9267 } 9268 9269 single_threaded_vhconfig_exit(vhc); 9270 return (rv); 9271 } 9272 9273 /* 9274 * Generic vhci bus config implementation: 9275 * 9276 * Parameters 9277 * vdip vhci dip 9278 * flags bus config flags 9279 * op bus config operation 9280 * The remaining parameters are bus config operation specific 9281 * 9282 * for BUS_CONFIG_ONE 9283 * arg pointer to name@addr 9284 * child upon successful return from this function, *child will be 9285 * set to the configured and held devinfo child node of vdip. 9286 * ct_addr pointer to client address (i.e. GUID) 9287 * 9288 * for BUS_CONFIG_DRIVER 9289 * arg major number of the driver 9290 * child and ct_addr parameters are ignored 9291 * 9292 * for BUS_CONFIG_ALL 9293 * arg, child, and ct_addr parameters are ignored 9294 * 9295 * Note that for the rest of the bus config operations, this function simply 9296 * calls the framework provided default bus config routine. 9297 */ 9298 int 9299 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9300 void *arg, dev_info_t **child, char *ct_addr) 9301 { 9302 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9303 mdi_vhci_config_t *vhc = vh->vh_config; 9304 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9305 int rv = 0; 9306 int params_valid = 0; 9307 char *cp; 9308 9309 /* 9310 * To bus config vhcis we relay operation, possibly using another 9311 * thread, to phcis. The phci driver then interacts with MDI to cause 9312 * vhci child nodes to be enumerated under the vhci node. Adding a 9313 * vhci child requires an ndi_devi_enter of the vhci. Since another 9314 * thread may be adding the child, to avoid deadlock we can't wait 9315 * for the relayed operations to complete if we have already entered 9316 * the vhci node. 9317 */ 9318 if (DEVI_BUSY_OWNED(vdip)) { 9319 MDI_DEBUG(2, (MDI_NOTE, vdip, 9320 "vhci dip is busy owned %p", (void *)vdip)); 9321 goto default_bus_config; 9322 } 9323 9324 rw_enter(&vhcache->vhcache_lock, RW_READER); 9325 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9326 rw_exit(&vhcache->vhcache_lock); 9327 rv = build_vhci_cache(vh); 9328 rw_enter(&vhcache->vhcache_lock, RW_READER); 9329 } 9330 9331 switch (op) { 9332 case BUS_CONFIG_ONE: 9333 if (arg != NULL && ct_addr != NULL) { 9334 /* extract node name */ 9335 cp = (char *)arg; 9336 while (*cp != '\0' && *cp != '@') 9337 cp++; 9338 if (*cp == '@') { 9339 params_valid = 1; 9340 *cp = '\0'; 9341 config_client_paths(vhc, (char *)arg, ct_addr); 9342 /* config_client_paths() releases cache_lock */ 9343 *cp = '@'; 9344 break; 9345 } 9346 } 9347 9348 rw_exit(&vhcache->vhcache_lock); 9349 break; 9350 9351 case BUS_CONFIG_DRIVER: 9352 rw_exit(&vhcache->vhcache_lock); 9353 if (rv == 0) 9354 st_bus_config_all_phcis(vhc, flags, op, 9355 (major_t)(uintptr_t)arg); 9356 break; 9357 9358 case BUS_CONFIG_ALL: 9359 rw_exit(&vhcache->vhcache_lock); 9360 if (rv == 0) 9361 st_bus_config_all_phcis(vhc, flags, op, -1); 9362 break; 9363 9364 default: 9365 rw_exit(&vhcache->vhcache_lock); 9366 break; 9367 } 9368 9369 9370 default_bus_config: 9371 /* 9372 * All requested child nodes are enumerated under the vhci. 9373 * Now configure them. 9374 */ 9375 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9376 NDI_SUCCESS) { 9377 return (MDI_SUCCESS); 9378 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9379 /* discover all paths and try configuring again */ 9380 if (vhcache_discover_paths(vh) && 9381 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9382 NDI_SUCCESS) 9383 return (MDI_SUCCESS); 9384 } 9385 9386 return (MDI_FAILURE); 9387 } 9388 9389 /* 9390 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9391 */ 9392 static nvlist_t * 9393 read_on_disk_vhci_cache(char *vhci_class) 9394 { 9395 nvlist_t *nvl; 9396 int err; 9397 char *filename; 9398 9399 filename = vhclass2vhcache_filename(vhci_class); 9400 9401 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9402 kmem_free(filename, strlen(filename) + 1); 9403 return (nvl); 9404 } else if (err == EIO) 9405 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9406 else if (err == EINVAL) 9407 cmn_err(CE_WARN, 9408 "%s: data file corrupted, will recreate", filename); 9409 9410 kmem_free(filename, strlen(filename) + 1); 9411 return (NULL); 9412 } 9413 9414 /* 9415 * Read on-disk vhci cache into nvlists for all vhci classes. 9416 * Called during booting by i_ddi_read_devices_files(). 9417 */ 9418 void 9419 mdi_read_devices_files(void) 9420 { 9421 int i; 9422 9423 for (i = 0; i < N_VHCI_CLASSES; i++) 9424 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9425 } 9426 9427 /* 9428 * Remove all stale entries from vhci cache. 9429 */ 9430 static void 9431 clean_vhcache(mdi_vhci_config_t *vhc) 9432 { 9433 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9434 mdi_vhcache_phci_t *phci, *nxt_phci; 9435 mdi_vhcache_client_t *client, *nxt_client; 9436 mdi_vhcache_pathinfo_t *path, *nxt_path; 9437 9438 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9439 9440 client = vhcache->vhcache_client_head; 9441 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9442 for ( ; client != NULL; client = nxt_client) { 9443 nxt_client = client->cct_next; 9444 9445 path = client->cct_cpi_head; 9446 client->cct_cpi_head = client->cct_cpi_tail = NULL; 9447 for ( ; path != NULL; path = nxt_path) { 9448 nxt_path = path->cpi_next; 9449 if ((path->cpi_cphci->cphci_phci != NULL) && 9450 (path->cpi_pip != NULL)) { 9451 enqueue_tail_vhcache_pathinfo(client, path); 9452 } else if (path->cpi_pip != NULL) { 9453 /* Not valid to have a path without a phci. */ 9454 free_vhcache_pathinfo(path); 9455 } 9456 } 9457 9458 if (client->cct_cpi_head != NULL) 9459 enqueue_vhcache_client(vhcache, client); 9460 else { 9461 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9462 (mod_hash_key_t)client->cct_name_addr); 9463 free_vhcache_client(client); 9464 } 9465 } 9466 9467 phci = vhcache->vhcache_phci_head; 9468 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9469 for ( ; phci != NULL; phci = nxt_phci) { 9470 9471 nxt_phci = phci->cphci_next; 9472 if (phci->cphci_phci != NULL) 9473 enqueue_vhcache_phci(vhcache, phci); 9474 else 9475 free_vhcache_phci(phci); 9476 } 9477 9478 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9479 rw_exit(&vhcache->vhcache_lock); 9480 vhcache_dirty(vhc); 9481 } 9482 9483 /* 9484 * Remove all stale entries from vhci cache. 9485 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9486 */ 9487 void 9488 mdi_clean_vhcache(void) 9489 { 9490 mdi_vhci_t *vh; 9491 9492 mutex_enter(&mdi_mutex); 9493 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9494 vh->vh_refcnt++; 9495 mutex_exit(&mdi_mutex); 9496 clean_vhcache(vh->vh_config); 9497 mutex_enter(&mdi_mutex); 9498 vh->vh_refcnt--; 9499 } 9500 mutex_exit(&mdi_mutex); 9501 } 9502 9503 /* 9504 * mdi_vhci_walk_clients(): 9505 * Walker routine to traverse client dev_info nodes 9506 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9507 * below the client, including nexus devices, which we dont want. 9508 * So we just traverse the immediate siblings, starting from 1st client. 9509 */ 9510 void 9511 mdi_vhci_walk_clients(dev_info_t *vdip, 9512 int (*f)(dev_info_t *, void *), void *arg) 9513 { 9514 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9515 dev_info_t *cdip; 9516 mdi_client_t *ct; 9517 9518 MDI_VHCI_CLIENT_LOCK(vh); 9519 cdip = ddi_get_child(vdip); 9520 while (cdip) { 9521 ct = i_devi_get_client(cdip); 9522 MDI_CLIENT_LOCK(ct); 9523 9524 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9525 cdip = ddi_get_next_sibling(cdip); 9526 else 9527 cdip = NULL; 9528 9529 MDI_CLIENT_UNLOCK(ct); 9530 } 9531 MDI_VHCI_CLIENT_UNLOCK(vh); 9532 } 9533 9534 /* 9535 * mdi_vhci_walk_phcis(): 9536 * Walker routine to traverse phci dev_info nodes 9537 */ 9538 void 9539 mdi_vhci_walk_phcis(dev_info_t *vdip, 9540 int (*f)(dev_info_t *, void *), void *arg) 9541 { 9542 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9543 mdi_phci_t *ph, *next; 9544 9545 MDI_VHCI_PHCI_LOCK(vh); 9546 ph = vh->vh_phci_head; 9547 while (ph) { 9548 MDI_PHCI_LOCK(ph); 9549 9550 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9551 next = ph->ph_next; 9552 else 9553 next = NULL; 9554 9555 MDI_PHCI_UNLOCK(ph); 9556 ph = next; 9557 } 9558 MDI_VHCI_PHCI_UNLOCK(vh); 9559 } 9560 9561 9562 /* 9563 * mdi_walk_vhcis(): 9564 * Walker routine to traverse vhci dev_info nodes 9565 */ 9566 void 9567 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9568 { 9569 mdi_vhci_t *vh = NULL; 9570 9571 mutex_enter(&mdi_mutex); 9572 /* 9573 * Scan for already registered vhci 9574 */ 9575 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9576 vh->vh_refcnt++; 9577 mutex_exit(&mdi_mutex); 9578 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9579 mutex_enter(&mdi_mutex); 9580 vh->vh_refcnt--; 9581 break; 9582 } else { 9583 mutex_enter(&mdi_mutex); 9584 vh->vh_refcnt--; 9585 } 9586 } 9587 9588 mutex_exit(&mdi_mutex); 9589 } 9590 9591 /* 9592 * i_mdi_log_sysevent(): 9593 * Logs events for pickup by syseventd 9594 */ 9595 static void 9596 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9597 { 9598 char *path_name; 9599 nvlist_t *attr_list; 9600 9601 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9602 KM_SLEEP) != DDI_SUCCESS) { 9603 goto alloc_failed; 9604 } 9605 9606 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9607 (void) ddi_pathname(dip, path_name); 9608 9609 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9610 ddi_driver_name(dip)) != DDI_SUCCESS) { 9611 goto error; 9612 } 9613 9614 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9615 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9616 goto error; 9617 } 9618 9619 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9620 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9621 goto error; 9622 } 9623 9624 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9625 path_name) != DDI_SUCCESS) { 9626 goto error; 9627 } 9628 9629 if (nvlist_add_string(attr_list, DDI_CLASS, 9630 ph_vh_class) != DDI_SUCCESS) { 9631 goto error; 9632 } 9633 9634 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9635 attr_list, NULL, DDI_SLEEP); 9636 9637 error: 9638 kmem_free(path_name, MAXPATHLEN); 9639 nvlist_free(attr_list); 9640 return; 9641 9642 alloc_failed: 9643 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9644 } 9645 9646 char ** 9647 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9648 { 9649 char **driver_list, **ret_driver_list = NULL; 9650 int *root_support_list; 9651 int cur_elements, max_elements; 9652 9653 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9654 &cur_elements, &max_elements); 9655 9656 9657 if (driver_list) { 9658 kmem_free(root_support_list, sizeof (int) * max_elements); 9659 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9660 * max_elements, sizeof (char *) * cur_elements); 9661 } 9662 *ndrivers = cur_elements; 9663 9664 return (ret_driver_list); 9665 9666 } 9667 9668 void 9669 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9670 { 9671 char **p; 9672 int i; 9673 9674 if (driver_list) { 9675 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9676 kmem_free(*p, strlen(*p) + 1); 9677 kmem_free(driver_list, sizeof (char *) * ndrivers); 9678 } 9679 } 9680 9681 /* 9682 * mdi_is_dev_supported(): 9683 * function called by pHCI bus config operation to determine if a 9684 * device should be represented as a child of the vHCI or the 9685 * pHCI. This decision is made by the vHCI, using cinfo idenity 9686 * information passed by the pHCI - specifics of the cinfo 9687 * representation are by agreement between the pHCI and vHCI. 9688 * Return Values: 9689 * MDI_SUCCESS 9690 * MDI_FAILURE 9691 */ 9692 int 9693 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9694 { 9695 mdi_vhci_t *vh; 9696 9697 ASSERT(class && pdip); 9698 9699 /* 9700 * For dev_supported, mdi_phci_register() must have established pdip as 9701 * a pHCI. 9702 * 9703 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9704 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9705 */ 9706 if (!MDI_PHCI(pdip)) 9707 return (MDI_FAILURE); 9708 9709 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9710 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9711 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9712 return (MDI_FAILURE); 9713 } 9714 9715 /* Return vHCI answer */ 9716 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9717 } 9718 9719 int 9720 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9721 { 9722 uint_t devstate = 0; 9723 dev_info_t *cdip; 9724 9725 if ((pip == NULL) || (dcp == NULL)) 9726 return (MDI_FAILURE); 9727 9728 cdip = mdi_pi_get_client(pip); 9729 9730 switch (mdi_pi_get_state(pip)) { 9731 case MDI_PATHINFO_STATE_INIT: 9732 devstate = DEVICE_DOWN; 9733 break; 9734 case MDI_PATHINFO_STATE_ONLINE: 9735 devstate = DEVICE_ONLINE; 9736 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9737 devstate |= DEVICE_BUSY; 9738 break; 9739 case MDI_PATHINFO_STATE_STANDBY: 9740 devstate = DEVICE_ONLINE; 9741 break; 9742 case MDI_PATHINFO_STATE_FAULT: 9743 devstate = DEVICE_DOWN; 9744 break; 9745 case MDI_PATHINFO_STATE_OFFLINE: 9746 devstate = DEVICE_OFFLINE; 9747 break; 9748 default: 9749 ASSERT(MDI_PI(pip)->pi_state); 9750 } 9751 9752 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9753 return (MDI_FAILURE); 9754 9755 return (MDI_SUCCESS); 9756 } 9757