1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 77 #define MDI_WARN CE_WARN, __func__ 78 #define MDI_NOTE CE_NOTE, __func__ 79 #define MDI_CONT CE_CONT, __func__ 80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 81 #else /* !DEBUG */ 82 #define MDI_DEBUG(dbglevel, pargs) 83 #endif /* DEBUG */ 84 int mdi_debug_consoleonly = 0; 85 int mdi_delay = 3; 86 87 extern pri_t minclsyspri; 88 extern int modrootloaded; 89 90 /* 91 * Global mutex: 92 * Protects vHCI list and structure members. 93 */ 94 kmutex_t mdi_mutex; 95 96 /* 97 * Registered vHCI class driver lists 98 */ 99 int mdi_vhci_count; 100 mdi_vhci_t *mdi_vhci_head; 101 mdi_vhci_t *mdi_vhci_tail; 102 103 /* 104 * Client Hash Table size 105 */ 106 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 107 108 /* 109 * taskq interface definitions 110 */ 111 #define MDI_TASKQ_N_THREADS 8 112 #define MDI_TASKQ_PRI minclsyspri 113 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 114 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 115 116 taskq_t *mdi_taskq; 117 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 118 119 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 120 121 /* 122 * The data should be "quiet" for this interval (in seconds) before the 123 * vhci cached data is flushed to the disk. 124 */ 125 static int mdi_vhcache_flush_delay = 10; 126 127 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 128 static int mdi_vhcache_flush_daemon_idle_time = 60; 129 130 /* 131 * MDI falls back to discovery of all paths when a bus_config_one fails. 132 * The following parameters can be used to tune this operation. 133 * 134 * mdi_path_discovery_boot 135 * Number of times path discovery will be attempted during early boot. 136 * Probably there is no reason to ever set this value to greater than one. 137 * 138 * mdi_path_discovery_postboot 139 * Number of times path discovery will be attempted after early boot. 140 * Set it to a minimum of two to allow for discovery of iscsi paths which 141 * may happen very late during booting. 142 * 143 * mdi_path_discovery_interval 144 * Minimum number of seconds MDI will wait between successive discovery 145 * of all paths. Set it to -1 to disable discovery of all paths. 146 */ 147 static int mdi_path_discovery_boot = 1; 148 static int mdi_path_discovery_postboot = 2; 149 static int mdi_path_discovery_interval = 10; 150 151 /* 152 * number of seconds the asynchronous configuration thread will sleep idle 153 * before exiting. 154 */ 155 static int mdi_async_config_idle_time = 600; 156 157 static int mdi_bus_config_cache_hash_size = 256; 158 159 /* turns off multithreaded configuration for certain operations */ 160 static int mdi_mtc_off = 0; 161 162 /* 163 * The "path" to a pathinfo node is identical to the /devices path to a 164 * devinfo node had the device been enumerated under a pHCI instead of 165 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 166 * This association persists across create/delete of the pathinfo nodes, 167 * but not across reboot. 168 */ 169 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 170 static int mdi_pathmap_hash_size = 256; 171 static kmutex_t mdi_pathmap_mutex; 172 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 173 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 174 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 175 176 /* 177 * MDI component property name/value string definitions 178 */ 179 const char *mdi_component_prop = "mpxio-component"; 180 const char *mdi_component_prop_vhci = "vhci"; 181 const char *mdi_component_prop_phci = "phci"; 182 const char *mdi_component_prop_client = "client"; 183 184 /* 185 * MDI client global unique identifier property name 186 */ 187 const char *mdi_client_guid_prop = "client-guid"; 188 189 /* 190 * MDI client load balancing property name/value string definitions 191 */ 192 const char *mdi_load_balance = "load-balance"; 193 const char *mdi_load_balance_none = "none"; 194 const char *mdi_load_balance_rr = "round-robin"; 195 const char *mdi_load_balance_lba = "logical-block"; 196 197 /* 198 * Obsolete vHCI class definition; to be removed after Leadville update 199 */ 200 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 201 202 static char vhci_greeting[] = 203 "\tThere already exists one vHCI driver for class %s\n" 204 "\tOnly one vHCI driver for each class is allowed\n"; 205 206 /* 207 * Static function prototypes 208 */ 209 static int i_mdi_phci_offline(dev_info_t *, uint_t); 210 static int i_mdi_client_offline(dev_info_t *, uint_t); 211 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 212 static void i_mdi_phci_post_detach(dev_info_t *, 213 ddi_detach_cmd_t, int); 214 static int i_mdi_client_pre_detach(dev_info_t *, 215 ddi_detach_cmd_t); 216 static void i_mdi_client_post_detach(dev_info_t *, 217 ddi_detach_cmd_t, int); 218 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 219 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 220 static int i_mdi_lba_lb(mdi_client_t *ct, 221 mdi_pathinfo_t **ret_pip, struct buf *buf); 222 static void i_mdi_pm_hold_client(mdi_client_t *, int); 223 static void i_mdi_pm_rele_client(mdi_client_t *, int); 224 static void i_mdi_pm_reset_client(mdi_client_t *); 225 static int i_mdi_power_all_phci(mdi_client_t *); 226 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 227 228 229 /* 230 * Internal mdi_pathinfo node functions 231 */ 232 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 233 234 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 235 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 236 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 237 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 238 static void i_mdi_phci_unlock(mdi_phci_t *); 239 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 240 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 241 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 242 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 243 mdi_client_t *); 244 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 245 static void i_mdi_client_remove_path(mdi_client_t *, 246 mdi_pathinfo_t *); 247 248 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 249 mdi_pathinfo_state_t, int); 250 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 251 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 252 char **, int); 253 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 254 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 255 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 256 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 257 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 258 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 259 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 260 static void i_mdi_client_update_state(mdi_client_t *); 261 static int i_mdi_client_compute_state(mdi_client_t *, 262 mdi_phci_t *); 263 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 264 static void i_mdi_client_unlock(mdi_client_t *); 265 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 266 static mdi_client_t *i_devi_get_client(dev_info_t *); 267 /* 268 * NOTE: this will be removed once the NWS files are changed to use the new 269 * mdi_{enable,disable}_path interfaces 270 */ 271 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 272 int, int); 273 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 274 mdi_vhci_t *vh, int flags, int op); 275 /* 276 * Failover related function prototypes 277 */ 278 static int i_mdi_failover(void *); 279 280 /* 281 * misc internal functions 282 */ 283 static int i_mdi_get_hash_key(char *); 284 static int i_map_nvlist_error_to_mdi(int); 285 static void i_mdi_report_path_state(mdi_client_t *, 286 mdi_pathinfo_t *); 287 288 static void setup_vhci_cache(mdi_vhci_t *); 289 static int destroy_vhci_cache(mdi_vhci_t *); 290 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 291 static boolean_t stop_vhcache_flush_thread(void *, int); 292 static void free_string_array(char **, int); 293 static void free_vhcache_phci(mdi_vhcache_phci_t *); 294 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 295 static void free_vhcache_client(mdi_vhcache_client_t *); 296 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 297 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 298 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 299 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 300 static void vhcache_pi_add(mdi_vhci_config_t *, 301 struct mdi_pathinfo *); 302 static void vhcache_pi_remove(mdi_vhci_config_t *, 303 struct mdi_pathinfo *); 304 static void free_phclient_path_list(mdi_phys_path_t *); 305 static void sort_vhcache_paths(mdi_vhcache_client_t *); 306 static int flush_vhcache(mdi_vhci_config_t *, int); 307 static void vhcache_dirty(mdi_vhci_config_t *); 308 static void free_async_client_config(mdi_async_client_config_t *); 309 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 310 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 311 static nvlist_t *read_on_disk_vhci_cache(char *); 312 extern int fread_nvlist(char *, nvlist_t **); 313 extern int fwrite_nvlist(char *, nvlist_t *); 314 315 /* called once when first vhci registers with mdi */ 316 static void 317 i_mdi_init() 318 { 319 static int initialized = 0; 320 321 if (initialized) 322 return; 323 initialized = 1; 324 325 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 326 327 /* Create our taskq resources */ 328 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 329 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 330 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 331 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 332 333 /* Allocate ['path_instance' <-> "path"] maps */ 334 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 335 mdi_pathmap_bypath = mod_hash_create_strhash( 336 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 337 mod_hash_null_valdtor); 338 mdi_pathmap_byinstance = mod_hash_create_idhash( 339 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 340 mod_hash_null_valdtor); 341 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 342 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 343 mod_hash_null_valdtor); 344 } 345 346 /* 347 * mdi_get_component_type(): 348 * Return mpxio component type 349 * Return Values: 350 * MDI_COMPONENT_NONE 351 * MDI_COMPONENT_VHCI 352 * MDI_COMPONENT_PHCI 353 * MDI_COMPONENT_CLIENT 354 * XXX This doesn't work under multi-level MPxIO and should be 355 * removed when clients migrate mdi_component_is_*() interfaces. 356 */ 357 int 358 mdi_get_component_type(dev_info_t *dip) 359 { 360 return (DEVI(dip)->devi_mdi_component); 361 } 362 363 /* 364 * mdi_vhci_register(): 365 * Register a vHCI module with the mpxio framework 366 * mdi_vhci_register() is called by vHCI drivers to register the 367 * 'class_driver' vHCI driver and its MDI entrypoints with the 368 * mpxio framework. The vHCI driver must call this interface as 369 * part of its attach(9e) handler. 370 * Competing threads may try to attach mdi_vhci_register() as 371 * the vHCI drivers are loaded and attached as a result of pHCI 372 * driver instance registration (mdi_phci_register()) with the 373 * framework. 374 * Return Values: 375 * MDI_SUCCESS 376 * MDI_FAILURE 377 */ 378 /*ARGSUSED*/ 379 int 380 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 381 int flags) 382 { 383 mdi_vhci_t *vh = NULL; 384 385 /* Registrant can't be older */ 386 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 387 388 #ifdef DEBUG 389 /* 390 * IB nexus driver is loaded only when IB hardware is present. 391 * In order to be able to do this there is a need to drive the loading 392 * and attaching of the IB nexus driver (especially when an IB hardware 393 * is dynamically plugged in) when an IB HCA driver (PHCI) 394 * is being attached. Unfortunately this gets into the limitations 395 * of devfs as there seems to be no clean way to drive configuration 396 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 397 * for IB. 398 */ 399 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 400 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 401 #endif 402 403 i_mdi_init(); 404 405 mutex_enter(&mdi_mutex); 406 /* 407 * Scan for already registered vhci 408 */ 409 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 410 if (strcmp(vh->vh_class, class) == 0) { 411 /* 412 * vHCI has already been created. Check for valid 413 * vHCI ops registration. We only support one vHCI 414 * module per class 415 */ 416 if (vh->vh_ops != NULL) { 417 mutex_exit(&mdi_mutex); 418 cmn_err(CE_NOTE, vhci_greeting, class); 419 return (MDI_FAILURE); 420 } 421 break; 422 } 423 } 424 425 /* 426 * if not yet created, create the vHCI component 427 */ 428 if (vh == NULL) { 429 struct client_hash *hash = NULL; 430 char *load_balance; 431 432 /* 433 * Allocate and initialize the mdi extensions 434 */ 435 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 436 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 437 KM_SLEEP); 438 vh->vh_client_table = hash; 439 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 440 (void) strcpy(vh->vh_class, class); 441 vh->vh_lb = LOAD_BALANCE_RR; 442 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 443 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 444 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 445 vh->vh_lb = LOAD_BALANCE_NONE; 446 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 447 == 0) { 448 vh->vh_lb = LOAD_BALANCE_LBA; 449 } 450 ddi_prop_free(load_balance); 451 } 452 453 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 454 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 455 456 /* 457 * Store the vHCI ops vectors 458 */ 459 vh->vh_dip = vdip; 460 vh->vh_ops = vops; 461 462 setup_vhci_cache(vh); 463 464 if (mdi_vhci_head == NULL) { 465 mdi_vhci_head = vh; 466 } 467 if (mdi_vhci_tail) { 468 mdi_vhci_tail->vh_next = vh; 469 } 470 mdi_vhci_tail = vh; 471 mdi_vhci_count++; 472 } 473 474 /* 475 * Claim the devfs node as a vhci component 476 */ 477 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 478 479 /* 480 * Initialize our back reference from dev_info node 481 */ 482 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 483 mutex_exit(&mdi_mutex); 484 return (MDI_SUCCESS); 485 } 486 487 /* 488 * mdi_vhci_unregister(): 489 * Unregister a vHCI module from mpxio framework 490 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 491 * of a vhci to unregister it from the framework. 492 * Return Values: 493 * MDI_SUCCESS 494 * MDI_FAILURE 495 */ 496 /*ARGSUSED*/ 497 int 498 mdi_vhci_unregister(dev_info_t *vdip, int flags) 499 { 500 mdi_vhci_t *found, *vh, *prev = NULL; 501 502 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 503 504 /* 505 * Check for invalid VHCI 506 */ 507 if ((vh = i_devi_get_vhci(vdip)) == NULL) 508 return (MDI_FAILURE); 509 510 /* 511 * Scan the list of registered vHCIs for a match 512 */ 513 mutex_enter(&mdi_mutex); 514 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 515 if (found == vh) 516 break; 517 prev = found; 518 } 519 520 if (found == NULL) { 521 mutex_exit(&mdi_mutex); 522 return (MDI_FAILURE); 523 } 524 525 /* 526 * Check the vHCI, pHCI and client count. All the pHCIs and clients 527 * should have been unregistered, before a vHCI can be 528 * unregistered. 529 */ 530 MDI_VHCI_PHCI_LOCK(vh); 531 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 532 MDI_VHCI_PHCI_UNLOCK(vh); 533 mutex_exit(&mdi_mutex); 534 return (MDI_FAILURE); 535 } 536 MDI_VHCI_PHCI_UNLOCK(vh); 537 538 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 539 mutex_exit(&mdi_mutex); 540 return (MDI_FAILURE); 541 } 542 543 /* 544 * Remove the vHCI from the global list 545 */ 546 if (vh == mdi_vhci_head) { 547 mdi_vhci_head = vh->vh_next; 548 } else { 549 prev->vh_next = vh->vh_next; 550 } 551 if (vh == mdi_vhci_tail) { 552 mdi_vhci_tail = prev; 553 } 554 mdi_vhci_count--; 555 mutex_exit(&mdi_mutex); 556 557 vh->vh_ops = NULL; 558 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 559 DEVI(vdip)->devi_mdi_xhci = NULL; 560 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 561 kmem_free(vh->vh_client_table, 562 mdi_client_table_size * sizeof (struct client_hash)); 563 mutex_destroy(&vh->vh_phci_mutex); 564 mutex_destroy(&vh->vh_client_mutex); 565 566 kmem_free(vh, sizeof (mdi_vhci_t)); 567 return (MDI_SUCCESS); 568 } 569 570 /* 571 * i_mdi_vhci_class2vhci(): 572 * Look for a matching vHCI module given a vHCI class name 573 * Return Values: 574 * Handle to a vHCI component 575 * NULL 576 */ 577 static mdi_vhci_t * 578 i_mdi_vhci_class2vhci(char *class) 579 { 580 mdi_vhci_t *vh = NULL; 581 582 ASSERT(!MUTEX_HELD(&mdi_mutex)); 583 584 mutex_enter(&mdi_mutex); 585 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 586 if (strcmp(vh->vh_class, class) == 0) { 587 break; 588 } 589 } 590 mutex_exit(&mdi_mutex); 591 return (vh); 592 } 593 594 /* 595 * i_devi_get_vhci(): 596 * Utility function to get the handle to a vHCI component 597 * Return Values: 598 * Handle to a vHCI component 599 * NULL 600 */ 601 mdi_vhci_t * 602 i_devi_get_vhci(dev_info_t *vdip) 603 { 604 mdi_vhci_t *vh = NULL; 605 if (MDI_VHCI(vdip)) { 606 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 607 } 608 return (vh); 609 } 610 611 /* 612 * mdi_phci_register(): 613 * Register a pHCI module with mpxio framework 614 * mdi_phci_register() is called by pHCI drivers to register with 615 * the mpxio framework and a specific 'class_driver' vHCI. The 616 * pHCI driver must call this interface as part of its attach(9e) 617 * handler. 618 * Return Values: 619 * MDI_SUCCESS 620 * MDI_FAILURE 621 */ 622 /*ARGSUSED*/ 623 int 624 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 625 { 626 mdi_phci_t *ph; 627 mdi_vhci_t *vh; 628 char *data; 629 630 /* 631 * Some subsystems, like fcp, perform pHCI registration from a 632 * different thread than the one doing the pHCI attach(9E) - the 633 * driver attach code is waiting for this other thread to complete. 634 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 635 * (indicating that some thread has done an ndi_devi_enter of parent) 636 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 637 */ 638 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 639 640 /* 641 * Check for mpxio-disable property. Enable mpxio if the property is 642 * missing or not set to "yes". 643 * If the property is set to "yes" then emit a brief message. 644 */ 645 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 646 &data) == DDI_SUCCESS)) { 647 if (strcmp(data, "yes") == 0) { 648 MDI_DEBUG(1, (MDI_CONT, pdip, 649 "?multipath capabilities disabled via %s.conf.", 650 ddi_driver_name(pdip))); 651 ddi_prop_free(data); 652 return (MDI_FAILURE); 653 } 654 ddi_prop_free(data); 655 } 656 657 /* 658 * Search for a matching vHCI 659 */ 660 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 661 if (vh == NULL) { 662 return (MDI_FAILURE); 663 } 664 665 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 666 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 667 ph->ph_dip = pdip; 668 ph->ph_vhci = vh; 669 ph->ph_next = NULL; 670 ph->ph_unstable = 0; 671 ph->ph_vprivate = 0; 672 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 673 674 MDI_PHCI_LOCK(ph); 675 MDI_PHCI_SET_POWER_UP(ph); 676 MDI_PHCI_UNLOCK(ph); 677 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 678 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 679 680 vhcache_phci_add(vh->vh_config, ph); 681 682 MDI_VHCI_PHCI_LOCK(vh); 683 if (vh->vh_phci_head == NULL) { 684 vh->vh_phci_head = ph; 685 } 686 if (vh->vh_phci_tail) { 687 vh->vh_phci_tail->ph_next = ph; 688 } 689 vh->vh_phci_tail = ph; 690 vh->vh_phci_count++; 691 MDI_VHCI_PHCI_UNLOCK(vh); 692 693 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 694 return (MDI_SUCCESS); 695 } 696 697 /* 698 * mdi_phci_unregister(): 699 * Unregister a pHCI module from mpxio framework 700 * mdi_phci_unregister() is called by the pHCI drivers from their 701 * detach(9E) handler to unregister their instances from the 702 * framework. 703 * Return Values: 704 * MDI_SUCCESS 705 * MDI_FAILURE 706 */ 707 /*ARGSUSED*/ 708 int 709 mdi_phci_unregister(dev_info_t *pdip, int flags) 710 { 711 mdi_vhci_t *vh; 712 mdi_phci_t *ph; 713 mdi_phci_t *tmp; 714 mdi_phci_t *prev = NULL; 715 mdi_pathinfo_t *pip; 716 717 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 718 719 ph = i_devi_get_phci(pdip); 720 if (ph == NULL) { 721 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 722 return (MDI_FAILURE); 723 } 724 725 vh = ph->ph_vhci; 726 ASSERT(vh != NULL); 727 if (vh == NULL) { 728 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 729 return (MDI_FAILURE); 730 } 731 732 MDI_VHCI_PHCI_LOCK(vh); 733 tmp = vh->vh_phci_head; 734 while (tmp) { 735 if (tmp == ph) { 736 break; 737 } 738 prev = tmp; 739 tmp = tmp->ph_next; 740 } 741 742 if (ph == vh->vh_phci_head) { 743 vh->vh_phci_head = ph->ph_next; 744 } else { 745 prev->ph_next = ph->ph_next; 746 } 747 748 if (ph == vh->vh_phci_tail) { 749 vh->vh_phci_tail = prev; 750 } 751 752 vh->vh_phci_count--; 753 MDI_VHCI_PHCI_UNLOCK(vh); 754 755 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 756 MDI_PHCI_LOCK(ph); 757 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 758 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 759 MDI_PI(pip)->pi_phci = NULL; 760 MDI_PHCI_UNLOCK(ph); 761 762 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 763 ESC_DDI_INITIATOR_UNREGISTER); 764 vhcache_phci_remove(vh->vh_config, ph); 765 cv_destroy(&ph->ph_unstable_cv); 766 mutex_destroy(&ph->ph_mutex); 767 kmem_free(ph, sizeof (mdi_phci_t)); 768 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 769 DEVI(pdip)->devi_mdi_xhci = NULL; 770 return (MDI_SUCCESS); 771 } 772 773 /* 774 * i_devi_get_phci(): 775 * Utility function to return the phci extensions. 776 */ 777 static mdi_phci_t * 778 i_devi_get_phci(dev_info_t *pdip) 779 { 780 mdi_phci_t *ph = NULL; 781 782 if (MDI_PHCI(pdip)) { 783 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 784 } 785 return (ph); 786 } 787 788 /* 789 * Single thread mdi entry into devinfo node for modifying its children. 790 * If necessary we perform an ndi_devi_enter of the vHCI before doing 791 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 792 * for the vHCI and one for the pHCI. 793 */ 794 void 795 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 796 { 797 dev_info_t *vdip; 798 int vcircular, pcircular; 799 800 /* Verify calling context */ 801 ASSERT(MDI_PHCI(phci_dip)); 802 vdip = mdi_devi_get_vdip(phci_dip); 803 ASSERT(vdip); /* A pHCI always has a vHCI */ 804 805 /* 806 * If pHCI is detaching then the framework has already entered the 807 * vHCI on a threads that went down the code path leading to 808 * detach_node(). This framework enter of the vHCI during pHCI 809 * detach is done to avoid deadlock with vHCI power management 810 * operations which enter the vHCI and the enter down the path 811 * to the pHCI. If pHCI is detaching then we piggyback this calls 812 * enter of the vHCI on frameworks vHCI enter that has already 813 * occurred - this is OK because we know that the framework thread 814 * doing detach is waiting for our completion. 815 * 816 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 817 * race with detach - but we can't do that because the framework has 818 * already entered the parent, so we have some complexity instead. 819 */ 820 for (;;) { 821 if (ndi_devi_tryenter(vdip, &vcircular)) { 822 ASSERT(vcircular != -1); 823 if (DEVI_IS_DETACHING(phci_dip)) { 824 ndi_devi_exit(vdip, vcircular); 825 vcircular = -1; 826 } 827 break; 828 } else if (DEVI_IS_DETACHING(phci_dip)) { 829 vcircular = -1; 830 break; 831 } else if (servicing_interrupt()) { 832 /* 833 * Don't delay an interrupt (and ensure adaptive 834 * mutex inversion support). 835 */ 836 ndi_devi_enter(vdip, &vcircular); 837 break; 838 } else { 839 delay_random(mdi_delay); 840 } 841 } 842 843 ndi_devi_enter(phci_dip, &pcircular); 844 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 845 } 846 847 /* 848 * Attempt to mdi_devi_enter. 849 */ 850 int 851 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 852 { 853 dev_info_t *vdip; 854 int vcircular, pcircular; 855 856 /* Verify calling context */ 857 ASSERT(MDI_PHCI(phci_dip)); 858 vdip = mdi_devi_get_vdip(phci_dip); 859 ASSERT(vdip); /* A pHCI always has a vHCI */ 860 861 if (ndi_devi_tryenter(vdip, &vcircular)) { 862 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 863 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 864 return (1); /* locked */ 865 } 866 ndi_devi_exit(vdip, vcircular); 867 } 868 return (0); /* busy */ 869 } 870 871 /* 872 * Release mdi_devi_enter or successful mdi_devi_tryenter. 873 */ 874 void 875 mdi_devi_exit(dev_info_t *phci_dip, int circular) 876 { 877 dev_info_t *vdip; 878 int vcircular, pcircular; 879 880 /* Verify calling context */ 881 ASSERT(MDI_PHCI(phci_dip)); 882 vdip = mdi_devi_get_vdip(phci_dip); 883 ASSERT(vdip); /* A pHCI always has a vHCI */ 884 885 /* extract two circular recursion values from single int */ 886 pcircular = (short)(circular & 0xFFFF); 887 vcircular = (short)((circular >> 16) & 0xFFFF); 888 889 ndi_devi_exit(phci_dip, pcircular); 890 if (vcircular != -1) 891 ndi_devi_exit(vdip, vcircular); 892 } 893 894 /* 895 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 896 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 897 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 898 * with vHCI power management code during path online/offline. Each 899 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 900 * occur within the scope of an active mdi_devi_enter that establishes the 901 * circular value. 902 */ 903 void 904 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 905 { 906 int pcircular; 907 908 /* Verify calling context */ 909 ASSERT(MDI_PHCI(phci_dip)); 910 911 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 912 ndi_hold_devi(phci_dip); 913 914 pcircular = (short)(circular & 0xFFFF); 915 ndi_devi_exit(phci_dip, pcircular); 916 } 917 918 void 919 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 920 { 921 int pcircular; 922 923 /* Verify calling context */ 924 ASSERT(MDI_PHCI(phci_dip)); 925 926 ndi_devi_enter(phci_dip, &pcircular); 927 928 /* Drop hold from mdi_devi_exit_phci. */ 929 ndi_rele_devi(phci_dip); 930 931 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 932 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 933 } 934 935 /* 936 * mdi_devi_get_vdip(): 937 * given a pHCI dip return vHCI dip 938 */ 939 dev_info_t * 940 mdi_devi_get_vdip(dev_info_t *pdip) 941 { 942 mdi_phci_t *ph; 943 944 ph = i_devi_get_phci(pdip); 945 if (ph && ph->ph_vhci) 946 return (ph->ph_vhci->vh_dip); 947 return (NULL); 948 } 949 950 /* 951 * mdi_devi_pdip_entered(): 952 * Return 1 if we are vHCI and have done an ndi_devi_enter 953 * of a pHCI 954 */ 955 int 956 mdi_devi_pdip_entered(dev_info_t *vdip) 957 { 958 mdi_vhci_t *vh; 959 mdi_phci_t *ph; 960 961 vh = i_devi_get_vhci(vdip); 962 if (vh == NULL) 963 return (0); 964 965 MDI_VHCI_PHCI_LOCK(vh); 966 ph = vh->vh_phci_head; 967 while (ph) { 968 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 969 MDI_VHCI_PHCI_UNLOCK(vh); 970 return (1); 971 } 972 ph = ph->ph_next; 973 } 974 MDI_VHCI_PHCI_UNLOCK(vh); 975 return (0); 976 } 977 978 /* 979 * mdi_phci_path2devinfo(): 980 * Utility function to search for a valid phci device given 981 * the devfs pathname. 982 */ 983 dev_info_t * 984 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 985 { 986 char *temp_pathname; 987 mdi_vhci_t *vh; 988 mdi_phci_t *ph; 989 dev_info_t *pdip = NULL; 990 991 vh = i_devi_get_vhci(vdip); 992 ASSERT(vh != NULL); 993 994 if (vh == NULL) { 995 /* 996 * Invalid vHCI component, return failure 997 */ 998 return (NULL); 999 } 1000 1001 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1002 MDI_VHCI_PHCI_LOCK(vh); 1003 ph = vh->vh_phci_head; 1004 while (ph != NULL) { 1005 pdip = ph->ph_dip; 1006 ASSERT(pdip != NULL); 1007 *temp_pathname = '\0'; 1008 (void) ddi_pathname(pdip, temp_pathname); 1009 if (strcmp(temp_pathname, pathname) == 0) { 1010 break; 1011 } 1012 ph = ph->ph_next; 1013 } 1014 if (ph == NULL) { 1015 pdip = NULL; 1016 } 1017 MDI_VHCI_PHCI_UNLOCK(vh); 1018 kmem_free(temp_pathname, MAXPATHLEN); 1019 return (pdip); 1020 } 1021 1022 /* 1023 * mdi_phci_get_path_count(): 1024 * get number of path information nodes associated with a given 1025 * pHCI device. 1026 */ 1027 int 1028 mdi_phci_get_path_count(dev_info_t *pdip) 1029 { 1030 mdi_phci_t *ph; 1031 int count = 0; 1032 1033 ph = i_devi_get_phci(pdip); 1034 if (ph != NULL) { 1035 count = ph->ph_path_count; 1036 } 1037 return (count); 1038 } 1039 1040 /* 1041 * i_mdi_phci_lock(): 1042 * Lock a pHCI device 1043 * Return Values: 1044 * None 1045 * Note: 1046 * The default locking order is: 1047 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1048 * But there are number of situations where locks need to be 1049 * grabbed in reverse order. This routine implements try and lock 1050 * mechanism depending on the requested parameter option. 1051 */ 1052 static void 1053 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1054 { 1055 if (pip) { 1056 /* Reverse locking is requested. */ 1057 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1058 if (servicing_interrupt()) { 1059 MDI_PI_HOLD(pip); 1060 MDI_PI_UNLOCK(pip); 1061 MDI_PHCI_LOCK(ph); 1062 MDI_PI_LOCK(pip); 1063 MDI_PI_RELE(pip); 1064 break; 1065 } else { 1066 /* 1067 * tryenter failed. Try to grab again 1068 * after a small delay 1069 */ 1070 MDI_PI_HOLD(pip); 1071 MDI_PI_UNLOCK(pip); 1072 delay_random(mdi_delay); 1073 MDI_PI_LOCK(pip); 1074 MDI_PI_RELE(pip); 1075 } 1076 } 1077 } else { 1078 MDI_PHCI_LOCK(ph); 1079 } 1080 } 1081 1082 /* 1083 * i_mdi_phci_unlock(): 1084 * Unlock the pHCI component 1085 */ 1086 static void 1087 i_mdi_phci_unlock(mdi_phci_t *ph) 1088 { 1089 MDI_PHCI_UNLOCK(ph); 1090 } 1091 1092 /* 1093 * i_mdi_devinfo_create(): 1094 * create client device's devinfo node 1095 * Return Values: 1096 * dev_info 1097 * NULL 1098 * Notes: 1099 */ 1100 static dev_info_t * 1101 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1102 char **compatible, int ncompatible) 1103 { 1104 dev_info_t *cdip = NULL; 1105 1106 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1107 1108 /* Verify for duplicate entry */ 1109 cdip = i_mdi_devinfo_find(vh, name, guid); 1110 ASSERT(cdip == NULL); 1111 if (cdip) { 1112 cmn_err(CE_WARN, 1113 "i_mdi_devinfo_create: client %s@%s already exists", 1114 name ? name : "", guid ? guid : ""); 1115 } 1116 1117 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1118 if (cdip == NULL) 1119 goto fail; 1120 1121 /* 1122 * Create component type and Global unique identifier 1123 * properties 1124 */ 1125 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1126 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1127 goto fail; 1128 } 1129 1130 /* Decorate the node with compatible property */ 1131 if (compatible && 1132 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1133 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1134 goto fail; 1135 } 1136 1137 return (cdip); 1138 1139 fail: 1140 if (cdip) { 1141 (void) ndi_prop_remove_all(cdip); 1142 (void) ndi_devi_free(cdip); 1143 } 1144 return (NULL); 1145 } 1146 1147 /* 1148 * i_mdi_devinfo_find(): 1149 * Find a matching devinfo node for given client node name 1150 * and its guid. 1151 * Return Values: 1152 * Handle to a dev_info node or NULL 1153 */ 1154 static dev_info_t * 1155 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1156 { 1157 char *data; 1158 dev_info_t *cdip = NULL; 1159 dev_info_t *ndip = NULL; 1160 int circular; 1161 1162 ndi_devi_enter(vh->vh_dip, &circular); 1163 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1164 while ((cdip = ndip) != NULL) { 1165 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1166 1167 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1168 continue; 1169 } 1170 1171 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1172 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1173 &data) != DDI_PROP_SUCCESS) { 1174 continue; 1175 } 1176 1177 if (strcmp(data, guid) != 0) { 1178 ddi_prop_free(data); 1179 continue; 1180 } 1181 ddi_prop_free(data); 1182 break; 1183 } 1184 ndi_devi_exit(vh->vh_dip, circular); 1185 return (cdip); 1186 } 1187 1188 /* 1189 * i_mdi_devinfo_remove(): 1190 * Remove a client device node 1191 */ 1192 static int 1193 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1194 { 1195 int rv = MDI_SUCCESS; 1196 1197 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1198 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1199 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1200 if (rv != NDI_SUCCESS) { 1201 MDI_DEBUG(1, (MDI_NOTE, cdip, 1202 "!failed: cdip %p", (void *)cdip)); 1203 } 1204 /* 1205 * Convert to MDI error code 1206 */ 1207 switch (rv) { 1208 case NDI_SUCCESS: 1209 rv = MDI_SUCCESS; 1210 break; 1211 case NDI_BUSY: 1212 rv = MDI_BUSY; 1213 break; 1214 default: 1215 rv = MDI_FAILURE; 1216 break; 1217 } 1218 } 1219 return (rv); 1220 } 1221 1222 /* 1223 * i_devi_get_client() 1224 * Utility function to get mpxio component extensions 1225 */ 1226 static mdi_client_t * 1227 i_devi_get_client(dev_info_t *cdip) 1228 { 1229 mdi_client_t *ct = NULL; 1230 1231 if (MDI_CLIENT(cdip)) { 1232 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1233 } 1234 return (ct); 1235 } 1236 1237 /* 1238 * i_mdi_is_child_present(): 1239 * Search for the presence of client device dev_info node 1240 */ 1241 static int 1242 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1243 { 1244 int rv = MDI_FAILURE; 1245 struct dev_info *dip; 1246 int circular; 1247 1248 ndi_devi_enter(vdip, &circular); 1249 dip = DEVI(vdip)->devi_child; 1250 while (dip) { 1251 if (dip == DEVI(cdip)) { 1252 rv = MDI_SUCCESS; 1253 break; 1254 } 1255 dip = dip->devi_sibling; 1256 } 1257 ndi_devi_exit(vdip, circular); 1258 return (rv); 1259 } 1260 1261 1262 /* 1263 * i_mdi_client_lock(): 1264 * Grab client component lock 1265 * Return Values: 1266 * None 1267 * Note: 1268 * The default locking order is: 1269 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1270 * But there are number of situations where locks need to be 1271 * grabbed in reverse order. This routine implements try and lock 1272 * mechanism depending on the requested parameter option. 1273 */ 1274 static void 1275 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1276 { 1277 if (pip) { 1278 /* 1279 * Reverse locking is requested. 1280 */ 1281 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1282 if (servicing_interrupt()) { 1283 MDI_PI_HOLD(pip); 1284 MDI_PI_UNLOCK(pip); 1285 MDI_CLIENT_LOCK(ct); 1286 MDI_PI_LOCK(pip); 1287 MDI_PI_RELE(pip); 1288 break; 1289 } else { 1290 /* 1291 * tryenter failed. Try to grab again 1292 * after a small delay 1293 */ 1294 MDI_PI_HOLD(pip); 1295 MDI_PI_UNLOCK(pip); 1296 delay_random(mdi_delay); 1297 MDI_PI_LOCK(pip); 1298 MDI_PI_RELE(pip); 1299 } 1300 } 1301 } else { 1302 MDI_CLIENT_LOCK(ct); 1303 } 1304 } 1305 1306 /* 1307 * i_mdi_client_unlock(): 1308 * Unlock a client component 1309 */ 1310 static void 1311 i_mdi_client_unlock(mdi_client_t *ct) 1312 { 1313 MDI_CLIENT_UNLOCK(ct); 1314 } 1315 1316 /* 1317 * i_mdi_client_alloc(): 1318 * Allocate and initialize a client structure. Caller should 1319 * hold the vhci client lock. 1320 * Return Values: 1321 * Handle to a client component 1322 */ 1323 /*ARGSUSED*/ 1324 static mdi_client_t * 1325 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1326 { 1327 mdi_client_t *ct; 1328 1329 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1330 1331 /* 1332 * Allocate and initialize a component structure. 1333 */ 1334 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1335 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1336 ct->ct_hnext = NULL; 1337 ct->ct_hprev = NULL; 1338 ct->ct_dip = NULL; 1339 ct->ct_vhci = vh; 1340 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1341 (void) strcpy(ct->ct_drvname, name); 1342 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1343 (void) strcpy(ct->ct_guid, lguid); 1344 ct->ct_cprivate = NULL; 1345 ct->ct_vprivate = NULL; 1346 ct->ct_flags = 0; 1347 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1348 MDI_CLIENT_LOCK(ct); 1349 MDI_CLIENT_SET_OFFLINE(ct); 1350 MDI_CLIENT_SET_DETACH(ct); 1351 MDI_CLIENT_SET_POWER_UP(ct); 1352 MDI_CLIENT_UNLOCK(ct); 1353 ct->ct_failover_flags = 0; 1354 ct->ct_failover_status = 0; 1355 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1356 ct->ct_unstable = 0; 1357 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1358 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1359 ct->ct_lb = vh->vh_lb; 1360 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1361 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1362 ct->ct_path_count = 0; 1363 ct->ct_path_head = NULL; 1364 ct->ct_path_tail = NULL; 1365 ct->ct_path_last = NULL; 1366 1367 /* 1368 * Add this client component to our client hash queue 1369 */ 1370 i_mdi_client_enlist_table(vh, ct); 1371 return (ct); 1372 } 1373 1374 /* 1375 * i_mdi_client_enlist_table(): 1376 * Attach the client device to the client hash table. Caller 1377 * should hold the vhci client lock. 1378 */ 1379 static void 1380 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1381 { 1382 int index; 1383 struct client_hash *head; 1384 1385 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1386 1387 index = i_mdi_get_hash_key(ct->ct_guid); 1388 head = &vh->vh_client_table[index]; 1389 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1390 head->ct_hash_head = ct; 1391 head->ct_hash_count++; 1392 vh->vh_client_count++; 1393 } 1394 1395 /* 1396 * i_mdi_client_delist_table(): 1397 * Attach the client device to the client hash table. 1398 * Caller should hold the vhci client lock. 1399 */ 1400 static void 1401 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1402 { 1403 int index; 1404 char *guid; 1405 struct client_hash *head; 1406 mdi_client_t *next; 1407 mdi_client_t *last; 1408 1409 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1410 1411 guid = ct->ct_guid; 1412 index = i_mdi_get_hash_key(guid); 1413 head = &vh->vh_client_table[index]; 1414 1415 last = NULL; 1416 next = (mdi_client_t *)head->ct_hash_head; 1417 while (next != NULL) { 1418 if (next == ct) { 1419 break; 1420 } 1421 last = next; 1422 next = next->ct_hnext; 1423 } 1424 1425 if (next) { 1426 head->ct_hash_count--; 1427 if (last == NULL) { 1428 head->ct_hash_head = ct->ct_hnext; 1429 } else { 1430 last->ct_hnext = ct->ct_hnext; 1431 } 1432 ct->ct_hnext = NULL; 1433 vh->vh_client_count--; 1434 } 1435 } 1436 1437 1438 /* 1439 * i_mdi_client_free(): 1440 * Free a client component 1441 */ 1442 static int 1443 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1444 { 1445 int rv = MDI_SUCCESS; 1446 int flags = ct->ct_flags; 1447 dev_info_t *cdip; 1448 dev_info_t *vdip; 1449 1450 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1451 1452 vdip = vh->vh_dip; 1453 cdip = ct->ct_dip; 1454 1455 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1456 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1457 DEVI(cdip)->devi_mdi_client = NULL; 1458 1459 /* 1460 * Clear out back ref. to dev_info_t node 1461 */ 1462 ct->ct_dip = NULL; 1463 1464 /* 1465 * Remove this client from our hash queue 1466 */ 1467 i_mdi_client_delist_table(vh, ct); 1468 1469 /* 1470 * Uninitialize and free the component 1471 */ 1472 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1473 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1474 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1475 cv_destroy(&ct->ct_failover_cv); 1476 cv_destroy(&ct->ct_unstable_cv); 1477 cv_destroy(&ct->ct_powerchange_cv); 1478 mutex_destroy(&ct->ct_mutex); 1479 kmem_free(ct, sizeof (*ct)); 1480 1481 if (cdip != NULL) { 1482 MDI_VHCI_CLIENT_UNLOCK(vh); 1483 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1484 MDI_VHCI_CLIENT_LOCK(vh); 1485 } 1486 return (rv); 1487 } 1488 1489 /* 1490 * i_mdi_client_find(): 1491 * Find the client structure corresponding to a given guid 1492 * Caller should hold the vhci client lock. 1493 */ 1494 static mdi_client_t * 1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1496 { 1497 int index; 1498 struct client_hash *head; 1499 mdi_client_t *ct; 1500 1501 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1502 1503 index = i_mdi_get_hash_key(guid); 1504 head = &vh->vh_client_table[index]; 1505 1506 ct = head->ct_hash_head; 1507 while (ct != NULL) { 1508 if (strcmp(ct->ct_guid, guid) == 0 && 1509 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1510 break; 1511 } 1512 ct = ct->ct_hnext; 1513 } 1514 return (ct); 1515 } 1516 1517 /* 1518 * i_mdi_client_update_state(): 1519 * Compute and update client device state 1520 * Notes: 1521 * A client device can be in any of three possible states: 1522 * 1523 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1524 * one online/standby paths. Can tolerate failures. 1525 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1526 * no alternate paths available as standby. A failure on the online 1527 * would result in loss of access to device data. 1528 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1529 * no paths available to access the device. 1530 */ 1531 static void 1532 i_mdi_client_update_state(mdi_client_t *ct) 1533 { 1534 int state; 1535 1536 ASSERT(MDI_CLIENT_LOCKED(ct)); 1537 state = i_mdi_client_compute_state(ct, NULL); 1538 MDI_CLIENT_SET_STATE(ct, state); 1539 } 1540 1541 /* 1542 * i_mdi_client_compute_state(): 1543 * Compute client device state 1544 * 1545 * mdi_phci_t * Pointer to pHCI structure which should 1546 * while computing the new value. Used by 1547 * i_mdi_phci_offline() to find the new 1548 * client state after DR of a pHCI. 1549 */ 1550 static int 1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1552 { 1553 int state; 1554 int online_count = 0; 1555 int standby_count = 0; 1556 mdi_pathinfo_t *pip, *next; 1557 1558 ASSERT(MDI_CLIENT_LOCKED(ct)); 1559 pip = ct->ct_path_head; 1560 while (pip != NULL) { 1561 MDI_PI_LOCK(pip); 1562 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1563 if (MDI_PI(pip)->pi_phci == ph) { 1564 MDI_PI_UNLOCK(pip); 1565 pip = next; 1566 continue; 1567 } 1568 1569 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1570 == MDI_PATHINFO_STATE_ONLINE) 1571 online_count++; 1572 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1573 == MDI_PATHINFO_STATE_STANDBY) 1574 standby_count++; 1575 MDI_PI_UNLOCK(pip); 1576 pip = next; 1577 } 1578 1579 if (online_count == 0) { 1580 if (standby_count == 0) { 1581 state = MDI_CLIENT_STATE_FAILED; 1582 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1583 "client state failed: ct = %p", (void *)ct)); 1584 } else if (standby_count == 1) { 1585 state = MDI_CLIENT_STATE_DEGRADED; 1586 } else { 1587 state = MDI_CLIENT_STATE_OPTIMAL; 1588 } 1589 } else if (online_count == 1) { 1590 if (standby_count == 0) { 1591 state = MDI_CLIENT_STATE_DEGRADED; 1592 } else { 1593 state = MDI_CLIENT_STATE_OPTIMAL; 1594 } 1595 } else { 1596 state = MDI_CLIENT_STATE_OPTIMAL; 1597 } 1598 return (state); 1599 } 1600 1601 /* 1602 * i_mdi_client2devinfo(): 1603 * Utility function 1604 */ 1605 dev_info_t * 1606 i_mdi_client2devinfo(mdi_client_t *ct) 1607 { 1608 return (ct->ct_dip); 1609 } 1610 1611 /* 1612 * mdi_client_path2_devinfo(): 1613 * Given the parent devinfo and child devfs pathname, search for 1614 * a valid devfs node handle. 1615 */ 1616 dev_info_t * 1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1618 { 1619 dev_info_t *cdip = NULL; 1620 dev_info_t *ndip = NULL; 1621 char *temp_pathname; 1622 int circular; 1623 1624 /* 1625 * Allocate temp buffer 1626 */ 1627 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1628 1629 /* 1630 * Lock parent against changes 1631 */ 1632 ndi_devi_enter(vdip, &circular); 1633 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1634 while ((cdip = ndip) != NULL) { 1635 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1636 1637 *temp_pathname = '\0'; 1638 (void) ddi_pathname(cdip, temp_pathname); 1639 if (strcmp(temp_pathname, pathname) == 0) { 1640 break; 1641 } 1642 } 1643 /* 1644 * Release devinfo lock 1645 */ 1646 ndi_devi_exit(vdip, circular); 1647 1648 /* 1649 * Free the temp buffer 1650 */ 1651 kmem_free(temp_pathname, MAXPATHLEN); 1652 return (cdip); 1653 } 1654 1655 /* 1656 * mdi_client_get_path_count(): 1657 * Utility function to get number of path information nodes 1658 * associated with a given client device. 1659 */ 1660 int 1661 mdi_client_get_path_count(dev_info_t *cdip) 1662 { 1663 mdi_client_t *ct; 1664 int count = 0; 1665 1666 ct = i_devi_get_client(cdip); 1667 if (ct != NULL) { 1668 count = ct->ct_path_count; 1669 } 1670 return (count); 1671 } 1672 1673 1674 /* 1675 * i_mdi_get_hash_key(): 1676 * Create a hash using strings as keys 1677 * 1678 */ 1679 static int 1680 i_mdi_get_hash_key(char *str) 1681 { 1682 uint32_t g, hash = 0; 1683 char *p; 1684 1685 for (p = str; *p != '\0'; p++) { 1686 g = *p; 1687 hash += g; 1688 } 1689 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1690 } 1691 1692 /* 1693 * mdi_get_lb_policy(): 1694 * Get current load balancing policy for a given client device 1695 */ 1696 client_lb_t 1697 mdi_get_lb_policy(dev_info_t *cdip) 1698 { 1699 client_lb_t lb = LOAD_BALANCE_NONE; 1700 mdi_client_t *ct; 1701 1702 ct = i_devi_get_client(cdip); 1703 if (ct != NULL) { 1704 lb = ct->ct_lb; 1705 } 1706 return (lb); 1707 } 1708 1709 /* 1710 * mdi_set_lb_region_size(): 1711 * Set current region size for the load-balance 1712 */ 1713 int 1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1715 { 1716 mdi_client_t *ct; 1717 int rv = MDI_FAILURE; 1718 1719 ct = i_devi_get_client(cdip); 1720 if (ct != NULL && ct->ct_lb_args != NULL) { 1721 ct->ct_lb_args->region_size = region_size; 1722 rv = MDI_SUCCESS; 1723 } 1724 return (rv); 1725 } 1726 1727 /* 1728 * mdi_Set_lb_policy(): 1729 * Set current load balancing policy for a given client device 1730 */ 1731 int 1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1733 { 1734 mdi_client_t *ct; 1735 int rv = MDI_FAILURE; 1736 1737 ct = i_devi_get_client(cdip); 1738 if (ct != NULL) { 1739 ct->ct_lb = lb; 1740 rv = MDI_SUCCESS; 1741 } 1742 return (rv); 1743 } 1744 1745 /* 1746 * mdi_failover(): 1747 * failover function called by the vHCI drivers to initiate 1748 * a failover operation. This is typically due to non-availability 1749 * of online paths to route I/O requests. Failover can be 1750 * triggered through user application also. 1751 * 1752 * The vHCI driver calls mdi_failover() to initiate a failover 1753 * operation. mdi_failover() calls back into the vHCI driver's 1754 * vo_failover() entry point to perform the actual failover 1755 * operation. The reason for requiring the vHCI driver to 1756 * initiate failover by calling mdi_failover(), instead of directly 1757 * executing vo_failover() itself, is to ensure that the mdi 1758 * framework can keep track of the client state properly. 1759 * Additionally, mdi_failover() provides as a convenience the 1760 * option of performing the failover operation synchronously or 1761 * asynchronously 1762 * 1763 * Upon successful completion of the failover operation, the 1764 * paths that were previously ONLINE will be in the STANDBY state, 1765 * and the newly activated paths will be in the ONLINE state. 1766 * 1767 * The flags modifier determines whether the activation is done 1768 * synchronously: MDI_FAILOVER_SYNC 1769 * Return Values: 1770 * MDI_SUCCESS 1771 * MDI_FAILURE 1772 * MDI_BUSY 1773 */ 1774 /*ARGSUSED*/ 1775 int 1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1777 { 1778 int rv; 1779 mdi_client_t *ct; 1780 1781 ct = i_devi_get_client(cdip); 1782 ASSERT(ct != NULL); 1783 if (ct == NULL) { 1784 /* cdip is not a valid client device. Nothing more to do. */ 1785 return (MDI_FAILURE); 1786 } 1787 1788 MDI_CLIENT_LOCK(ct); 1789 1790 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1791 /* A path to the client is being freed */ 1792 MDI_CLIENT_UNLOCK(ct); 1793 return (MDI_BUSY); 1794 } 1795 1796 1797 if (MDI_CLIENT_IS_FAILED(ct)) { 1798 /* 1799 * Client is in failed state. Nothing more to do. 1800 */ 1801 MDI_CLIENT_UNLOCK(ct); 1802 return (MDI_FAILURE); 1803 } 1804 1805 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1806 /* 1807 * Failover is already in progress; return BUSY 1808 */ 1809 MDI_CLIENT_UNLOCK(ct); 1810 return (MDI_BUSY); 1811 } 1812 /* 1813 * Make sure that mdi_pathinfo node state changes are processed. 1814 * We do not allow failovers to progress while client path state 1815 * changes are in progress 1816 */ 1817 if (ct->ct_unstable) { 1818 if (flags == MDI_FAILOVER_ASYNC) { 1819 MDI_CLIENT_UNLOCK(ct); 1820 return (MDI_BUSY); 1821 } else { 1822 while (ct->ct_unstable) 1823 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1824 } 1825 } 1826 1827 /* 1828 * Client device is in stable state. Before proceeding, perform sanity 1829 * checks again. 1830 */ 1831 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1832 (!i_ddi_devi_attached(ct->ct_dip))) { 1833 /* 1834 * Client is in failed state. Nothing more to do. 1835 */ 1836 MDI_CLIENT_UNLOCK(ct); 1837 return (MDI_FAILURE); 1838 } 1839 1840 /* 1841 * Set the client state as failover in progress. 1842 */ 1843 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1844 ct->ct_failover_flags = flags; 1845 MDI_CLIENT_UNLOCK(ct); 1846 1847 if (flags == MDI_FAILOVER_ASYNC) { 1848 /* 1849 * Submit the initiate failover request via CPR safe 1850 * taskq threads. 1851 */ 1852 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1853 ct, KM_SLEEP); 1854 return (MDI_ACCEPT); 1855 } else { 1856 /* 1857 * Synchronous failover mode. Typically invoked from the user 1858 * land. 1859 */ 1860 rv = i_mdi_failover(ct); 1861 } 1862 return (rv); 1863 } 1864 1865 /* 1866 * i_mdi_failover(): 1867 * internal failover function. Invokes vHCI drivers failover 1868 * callback function and process the failover status 1869 * Return Values: 1870 * None 1871 * 1872 * Note: A client device in failover state can not be detached or freed. 1873 */ 1874 static int 1875 i_mdi_failover(void *arg) 1876 { 1877 int rv = MDI_SUCCESS; 1878 mdi_client_t *ct = (mdi_client_t *)arg; 1879 mdi_vhci_t *vh = ct->ct_vhci; 1880 1881 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1882 1883 if (vh->vh_ops->vo_failover != NULL) { 1884 /* 1885 * Call vHCI drivers callback routine 1886 */ 1887 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1888 ct->ct_failover_flags); 1889 } 1890 1891 MDI_CLIENT_LOCK(ct); 1892 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1893 1894 /* 1895 * Save the failover return status 1896 */ 1897 ct->ct_failover_status = rv; 1898 1899 /* 1900 * As a result of failover, client status would have been changed. 1901 * Update the client state and wake up anyone waiting on this client 1902 * device. 1903 */ 1904 i_mdi_client_update_state(ct); 1905 1906 cv_broadcast(&ct->ct_failover_cv); 1907 MDI_CLIENT_UNLOCK(ct); 1908 return (rv); 1909 } 1910 1911 /* 1912 * Load balancing is logical block. 1913 * IOs within the range described by region_size 1914 * would go on the same path. This would improve the 1915 * performance by cache-hit on some of the RAID devices. 1916 * Search only for online paths(At some point we 1917 * may want to balance across target ports). 1918 * If no paths are found then default to round-robin. 1919 */ 1920 static int 1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1922 { 1923 int path_index = -1; 1924 int online_path_count = 0; 1925 int online_nonpref_path_count = 0; 1926 int region_size = ct->ct_lb_args->region_size; 1927 mdi_pathinfo_t *pip; 1928 mdi_pathinfo_t *next; 1929 int preferred, path_cnt; 1930 1931 pip = ct->ct_path_head; 1932 while (pip) { 1933 MDI_PI_LOCK(pip); 1934 if (MDI_PI(pip)->pi_state == 1935 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1936 online_path_count++; 1937 } else if (MDI_PI(pip)->pi_state == 1938 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1939 online_nonpref_path_count++; 1940 } 1941 next = (mdi_pathinfo_t *) 1942 MDI_PI(pip)->pi_client_link; 1943 MDI_PI_UNLOCK(pip); 1944 pip = next; 1945 } 1946 /* if found any online/preferred then use this type */ 1947 if (online_path_count > 0) { 1948 path_cnt = online_path_count; 1949 preferred = 1; 1950 } else if (online_nonpref_path_count > 0) { 1951 path_cnt = online_nonpref_path_count; 1952 preferred = 0; 1953 } else { 1954 path_cnt = 0; 1955 } 1956 if (path_cnt) { 1957 path_index = (bp->b_blkno >> region_size) % path_cnt; 1958 pip = ct->ct_path_head; 1959 while (pip && path_index != -1) { 1960 MDI_PI_LOCK(pip); 1961 if (path_index == 0 && 1962 (MDI_PI(pip)->pi_state == 1963 MDI_PATHINFO_STATE_ONLINE) && 1964 MDI_PI(pip)->pi_preferred == preferred) { 1965 MDI_PI_HOLD(pip); 1966 MDI_PI_UNLOCK(pip); 1967 *ret_pip = pip; 1968 return (MDI_SUCCESS); 1969 } 1970 path_index --; 1971 next = (mdi_pathinfo_t *) 1972 MDI_PI(pip)->pi_client_link; 1973 MDI_PI_UNLOCK(pip); 1974 pip = next; 1975 } 1976 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1977 "lba %llx: path %s %p", 1978 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1979 } 1980 return (MDI_FAILURE); 1981 } 1982 1983 /* 1984 * mdi_select_path(): 1985 * select a path to access a client device. 1986 * 1987 * mdi_select_path() function is called by the vHCI drivers to 1988 * select a path to route the I/O request to. The caller passes 1989 * the block I/O data transfer structure ("buf") as one of the 1990 * parameters. The mpxio framework uses the buf structure 1991 * contents to maintain per path statistics (total I/O size / 1992 * count pending). If more than one online paths are available to 1993 * select, the framework automatically selects a suitable path 1994 * for routing I/O request. If a failover operation is active for 1995 * this client device the call shall be failed with MDI_BUSY error 1996 * code. 1997 * 1998 * By default this function returns a suitable path in online 1999 * state based on the current load balancing policy. Currently 2000 * we support LOAD_BALANCE_NONE (Previously selected online path 2001 * will continue to be used till the path is usable) and 2002 * LOAD_BALANCE_RR (Online paths will be selected in a round 2003 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2004 * based on the logical block). The load balancing 2005 * through vHCI drivers configuration file (driver.conf). 2006 * 2007 * vHCI drivers may override this default behavior by specifying 2008 * appropriate flags. The meaning of the thrid argument depends 2009 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2010 * then the argument is the "path instance" of the path to select. 2011 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2012 * "start_pip". A non NULL "start_pip" is the starting point to 2013 * walk and find the next appropriate path. The following values 2014 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2015 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2016 * STANDBY path). 2017 * 2018 * The non-standard behavior is used by the scsi_vhci driver, 2019 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2020 * attach of client devices (to avoid an unnecessary failover 2021 * when the STANDBY path comes up first), during failover 2022 * (to activate a STANDBY path as ONLINE). 2023 * 2024 * The selected path is returned in a a mdi_hold_path() state 2025 * (pi_ref_cnt). Caller should release the hold by calling 2026 * mdi_rele_path(). 2027 * 2028 * Return Values: 2029 * MDI_SUCCESS - Completed successfully 2030 * MDI_BUSY - Client device is busy failing over 2031 * MDI_NOPATH - Client device is online, but no valid path are 2032 * available to access this client device 2033 * MDI_FAILURE - Invalid client device or state 2034 * MDI_DEVI_ONLINING 2035 * - Client device (struct dev_info state) is in 2036 * onlining state. 2037 */ 2038 2039 /*ARGSUSED*/ 2040 int 2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2042 void *arg, mdi_pathinfo_t **ret_pip) 2043 { 2044 mdi_client_t *ct; 2045 mdi_pathinfo_t *pip; 2046 mdi_pathinfo_t *next; 2047 mdi_pathinfo_t *head; 2048 mdi_pathinfo_t *start; 2049 client_lb_t lbp; /* load balancing policy */ 2050 int sb = 1; /* standard behavior */ 2051 int preferred = 1; /* preferred path */ 2052 int cond, cont = 1; 2053 int retry = 0; 2054 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2055 int path_instance; /* request specific path instance */ 2056 2057 /* determine type of arg based on flags */ 2058 if (flags & MDI_SELECT_PATH_INSTANCE) { 2059 path_instance = (int)(intptr_t)arg; 2060 start_pip = NULL; 2061 } else { 2062 path_instance = 0; 2063 start_pip = (mdi_pathinfo_t *)arg; 2064 } 2065 2066 if (flags != 0) { 2067 /* 2068 * disable default behavior 2069 */ 2070 sb = 0; 2071 } 2072 2073 *ret_pip = NULL; 2074 ct = i_devi_get_client(cdip); 2075 if (ct == NULL) { 2076 /* mdi extensions are NULL, Nothing more to do */ 2077 return (MDI_FAILURE); 2078 } 2079 2080 MDI_CLIENT_LOCK(ct); 2081 2082 if (sb) { 2083 if (MDI_CLIENT_IS_FAILED(ct)) { 2084 /* 2085 * Client is not ready to accept any I/O requests. 2086 * Fail this request. 2087 */ 2088 MDI_DEBUG(2, (MDI_NOTE, cdip, 2089 "client state offline ct = %p", (void *)ct)); 2090 MDI_CLIENT_UNLOCK(ct); 2091 return (MDI_FAILURE); 2092 } 2093 2094 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2095 /* 2096 * Check for Failover is in progress. If so tell the 2097 * caller that this device is busy. 2098 */ 2099 MDI_DEBUG(2, (MDI_NOTE, cdip, 2100 "client failover in progress ct = %p", 2101 (void *)ct)); 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_BUSY); 2104 } 2105 2106 /* 2107 * Check to see whether the client device is attached. 2108 * If not so, let the vHCI driver manually select a path 2109 * (standby) and let the probe/attach process to continue. 2110 */ 2111 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2112 MDI_DEBUG(4, (MDI_NOTE, cdip, 2113 "devi is onlining ct = %p", (void *)ct)); 2114 MDI_CLIENT_UNLOCK(ct); 2115 return (MDI_DEVI_ONLINING); 2116 } 2117 } 2118 2119 /* 2120 * Cache in the client list head. If head of the list is NULL 2121 * return MDI_NOPATH 2122 */ 2123 head = ct->ct_path_head; 2124 if (head == NULL) { 2125 MDI_CLIENT_UNLOCK(ct); 2126 return (MDI_NOPATH); 2127 } 2128 2129 /* Caller is specifying a specific pathinfo path by path_instance */ 2130 if (path_instance) { 2131 /* search for pathinfo with correct path_instance */ 2132 for (pip = head; 2133 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2134 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2135 ; 2136 2137 /* If path can't be selected then MDI_NOPATH is returned. */ 2138 if (pip == NULL) { 2139 MDI_CLIENT_UNLOCK(ct); 2140 return (MDI_NOPATH); 2141 } 2142 2143 /* 2144 * Verify state of path. When asked to select a specific 2145 * path_instance, we select the requested path in any 2146 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2147 * We don't however select paths where the pHCI has detached. 2148 * NOTE: last pathinfo node of an opened client device may 2149 * exist in an OFFLINE state after the pHCI associated with 2150 * that path has detached (but pi_phci will be NULL if that 2151 * has occurred). 2152 */ 2153 MDI_PI_LOCK(pip); 2154 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2155 (MDI_PI(pip)->pi_phci == NULL)) { 2156 MDI_PI_UNLOCK(pip); 2157 MDI_CLIENT_UNLOCK(ct); 2158 return (MDI_FAILURE); 2159 } 2160 2161 /* Return MDI_BUSY if we have a transient condition */ 2162 if (MDI_PI_IS_TRANSIENT(pip)) { 2163 MDI_PI_UNLOCK(pip); 2164 MDI_CLIENT_UNLOCK(ct); 2165 return (MDI_BUSY); 2166 } 2167 2168 /* 2169 * Return the path in hold state. Caller should release the 2170 * lock by calling mdi_rele_path() 2171 */ 2172 MDI_PI_HOLD(pip); 2173 MDI_PI_UNLOCK(pip); 2174 *ret_pip = pip; 2175 MDI_CLIENT_UNLOCK(ct); 2176 return (MDI_SUCCESS); 2177 } 2178 2179 /* 2180 * for non default behavior, bypass current 2181 * load balancing policy and always use LOAD_BALANCE_RR 2182 * except that the start point will be adjusted based 2183 * on the provided start_pip 2184 */ 2185 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2186 2187 switch (lbp) { 2188 case LOAD_BALANCE_NONE: 2189 /* 2190 * Load balancing is None or Alternate path mode 2191 * Start looking for a online mdi_pathinfo node starting from 2192 * last known selected path 2193 */ 2194 preferred = 1; 2195 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2196 if (pip == NULL) { 2197 pip = head; 2198 } 2199 start = pip; 2200 do { 2201 MDI_PI_LOCK(pip); 2202 /* 2203 * No need to explicitly check if the path is disabled. 2204 * Since we are checking for state == ONLINE and the 2205 * same variable is used for DISABLE/ENABLE information. 2206 */ 2207 if ((MDI_PI(pip)->pi_state == 2208 MDI_PATHINFO_STATE_ONLINE) && 2209 preferred == MDI_PI(pip)->pi_preferred) { 2210 /* 2211 * Return the path in hold state. Caller should 2212 * release the lock by calling mdi_rele_path() 2213 */ 2214 MDI_PI_HOLD(pip); 2215 MDI_PI_UNLOCK(pip); 2216 ct->ct_path_last = pip; 2217 *ret_pip = pip; 2218 MDI_CLIENT_UNLOCK(ct); 2219 return (MDI_SUCCESS); 2220 } 2221 2222 /* 2223 * Path is busy. 2224 */ 2225 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2226 MDI_PI_IS_TRANSIENT(pip)) 2227 retry = 1; 2228 /* 2229 * Keep looking for a next available online path 2230 */ 2231 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2232 if (next == NULL) { 2233 next = head; 2234 } 2235 MDI_PI_UNLOCK(pip); 2236 pip = next; 2237 if (start == pip && preferred) { 2238 preferred = 0; 2239 } else if (start == pip && !preferred) { 2240 cont = 0; 2241 } 2242 } while (cont); 2243 break; 2244 2245 case LOAD_BALANCE_LBA: 2246 /* 2247 * Make sure we are looking 2248 * for an online path. Otherwise, if it is for a STANDBY 2249 * path request, it will go through and fetch an ONLINE 2250 * path which is not desirable. 2251 */ 2252 if ((ct->ct_lb_args != NULL) && 2253 (ct->ct_lb_args->region_size) && bp && 2254 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2255 if (i_mdi_lba_lb(ct, ret_pip, bp) 2256 == MDI_SUCCESS) { 2257 MDI_CLIENT_UNLOCK(ct); 2258 return (MDI_SUCCESS); 2259 } 2260 } 2261 /* FALLTHROUGH */ 2262 case LOAD_BALANCE_RR: 2263 /* 2264 * Load balancing is Round Robin. Start looking for a online 2265 * mdi_pathinfo node starting from last known selected path 2266 * as the start point. If override flags are specified, 2267 * process accordingly. 2268 * If the search is already in effect(start_pip not null), 2269 * then lets just use the same path preference to continue the 2270 * traversal. 2271 */ 2272 2273 if (start_pip != NULL) { 2274 preferred = MDI_PI(start_pip)->pi_preferred; 2275 } else { 2276 preferred = 1; 2277 } 2278 2279 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2280 if (start == NULL) { 2281 pip = head; 2282 } else { 2283 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2284 if (pip == NULL) { 2285 if ( flags & MDI_SELECT_NO_PREFERRED) { 2286 /* 2287 * Return since we hit the end of list 2288 */ 2289 MDI_CLIENT_UNLOCK(ct); 2290 return (MDI_NOPATH); 2291 } 2292 2293 if (!sb) { 2294 if (preferred == 0) { 2295 /* 2296 * Looks like we have completed 2297 * the traversal as preferred 2298 * value is 0. Time to bail out. 2299 */ 2300 *ret_pip = NULL; 2301 MDI_CLIENT_UNLOCK(ct); 2302 return (MDI_NOPATH); 2303 } else { 2304 /* 2305 * Looks like we reached the 2306 * end of the list. Lets enable 2307 * traversal of non preferred 2308 * paths. 2309 */ 2310 preferred = 0; 2311 } 2312 } 2313 pip = head; 2314 } 2315 } 2316 start = pip; 2317 do { 2318 MDI_PI_LOCK(pip); 2319 if (sb) { 2320 cond = ((MDI_PI(pip)->pi_state == 2321 MDI_PATHINFO_STATE_ONLINE && 2322 MDI_PI(pip)->pi_preferred == 2323 preferred) ? 1 : 0); 2324 } else { 2325 if (flags == MDI_SELECT_ONLINE_PATH) { 2326 cond = ((MDI_PI(pip)->pi_state == 2327 MDI_PATHINFO_STATE_ONLINE && 2328 MDI_PI(pip)->pi_preferred == 2329 preferred) ? 1 : 0); 2330 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2331 cond = ((MDI_PI(pip)->pi_state == 2332 MDI_PATHINFO_STATE_STANDBY && 2333 MDI_PI(pip)->pi_preferred == 2334 preferred) ? 1 : 0); 2335 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2336 MDI_SELECT_STANDBY_PATH)) { 2337 cond = (((MDI_PI(pip)->pi_state == 2338 MDI_PATHINFO_STATE_ONLINE || 2339 (MDI_PI(pip)->pi_state == 2340 MDI_PATHINFO_STATE_STANDBY)) && 2341 MDI_PI(pip)->pi_preferred == 2342 preferred) ? 1 : 0); 2343 } else if (flags == 2344 (MDI_SELECT_STANDBY_PATH | 2345 MDI_SELECT_ONLINE_PATH | 2346 MDI_SELECT_USER_DISABLE_PATH)) { 2347 cond = (((MDI_PI(pip)->pi_state == 2348 MDI_PATHINFO_STATE_ONLINE || 2349 (MDI_PI(pip)->pi_state == 2350 MDI_PATHINFO_STATE_STANDBY) || 2351 (MDI_PI(pip)->pi_state == 2352 (MDI_PATHINFO_STATE_ONLINE| 2353 MDI_PATHINFO_STATE_USER_DISABLE)) || 2354 (MDI_PI(pip)->pi_state == 2355 (MDI_PATHINFO_STATE_STANDBY | 2356 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2357 MDI_PI(pip)->pi_preferred == 2358 preferred) ? 1 : 0); 2359 } else if (flags == 2360 (MDI_SELECT_STANDBY_PATH | 2361 MDI_SELECT_ONLINE_PATH | 2362 MDI_SELECT_NO_PREFERRED)) { 2363 cond = (((MDI_PI(pip)->pi_state == 2364 MDI_PATHINFO_STATE_ONLINE) || 2365 (MDI_PI(pip)->pi_state == 2366 MDI_PATHINFO_STATE_STANDBY)) 2367 ? 1 : 0); 2368 } else { 2369 cond = 0; 2370 } 2371 } 2372 /* 2373 * No need to explicitly check if the path is disabled. 2374 * Since we are checking for state == ONLINE and the 2375 * same variable is used for DISABLE/ENABLE information. 2376 */ 2377 if (cond) { 2378 /* 2379 * Return the path in hold state. Caller should 2380 * release the lock by calling mdi_rele_path() 2381 */ 2382 MDI_PI_HOLD(pip); 2383 MDI_PI_UNLOCK(pip); 2384 if (sb) 2385 ct->ct_path_last = pip; 2386 *ret_pip = pip; 2387 MDI_CLIENT_UNLOCK(ct); 2388 return (MDI_SUCCESS); 2389 } 2390 /* 2391 * Path is busy. 2392 */ 2393 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2394 MDI_PI_IS_TRANSIENT(pip)) 2395 retry = 1; 2396 2397 /* 2398 * Keep looking for a next available online path 2399 */ 2400 do_again: 2401 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2402 if (next == NULL) { 2403 if ( flags & MDI_SELECT_NO_PREFERRED) { 2404 /* 2405 * Bail out since we hit the end of list 2406 */ 2407 MDI_PI_UNLOCK(pip); 2408 break; 2409 } 2410 2411 if (!sb) { 2412 if (preferred == 1) { 2413 /* 2414 * Looks like we reached the 2415 * end of the list. Lets enable 2416 * traversal of non preferred 2417 * paths. 2418 */ 2419 preferred = 0; 2420 next = head; 2421 } else { 2422 /* 2423 * We have done both the passes 2424 * Preferred as well as for 2425 * Non-preferred. Bail out now. 2426 */ 2427 cont = 0; 2428 } 2429 } else { 2430 /* 2431 * Standard behavior case. 2432 */ 2433 next = head; 2434 } 2435 } 2436 MDI_PI_UNLOCK(pip); 2437 if (cont == 0) { 2438 break; 2439 } 2440 pip = next; 2441 2442 if (!sb) { 2443 /* 2444 * We need to handle the selection of 2445 * non-preferred path in the following 2446 * case: 2447 * 2448 * +------+ +------+ +------+ +-----+ 2449 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2450 * +------+ +------+ +------+ +-----+ 2451 * 2452 * If we start the search with B, we need to 2453 * skip beyond B to pick C which is non - 2454 * preferred in the second pass. The following 2455 * test, if true, will allow us to skip over 2456 * the 'start'(B in the example) to select 2457 * other non preferred elements. 2458 */ 2459 if ((start_pip != NULL) && (start_pip == pip) && 2460 (MDI_PI(start_pip)->pi_preferred 2461 != preferred)) { 2462 /* 2463 * try again after going past the start 2464 * pip 2465 */ 2466 MDI_PI_LOCK(pip); 2467 goto do_again; 2468 } 2469 } else { 2470 /* 2471 * Standard behavior case 2472 */ 2473 if (start == pip && preferred) { 2474 /* look for nonpreferred paths */ 2475 preferred = 0; 2476 } else if (start == pip && !preferred) { 2477 /* 2478 * Exit condition 2479 */ 2480 cont = 0; 2481 } 2482 } 2483 } while (cont); 2484 break; 2485 } 2486 2487 MDI_CLIENT_UNLOCK(ct); 2488 if (retry == 1) { 2489 return (MDI_BUSY); 2490 } else { 2491 return (MDI_NOPATH); 2492 } 2493 } 2494 2495 /* 2496 * For a client, return the next available path to any phci 2497 * 2498 * Note: 2499 * Caller should hold the branch's devinfo node to get a consistent 2500 * snap shot of the mdi_pathinfo nodes. 2501 * 2502 * Please note that even the list is stable the mdi_pathinfo 2503 * node state and properties are volatile. The caller should lock 2504 * and unlock the nodes by calling mdi_pi_lock() and 2505 * mdi_pi_unlock() functions to get a stable properties. 2506 * 2507 * If there is a need to use the nodes beyond the hold of the 2508 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2509 * need to be held against unexpected removal by calling 2510 * mdi_hold_path() and should be released by calling 2511 * mdi_rele_path() on completion. 2512 */ 2513 mdi_pathinfo_t * 2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2515 { 2516 mdi_client_t *ct; 2517 2518 if (!MDI_CLIENT(ct_dip)) 2519 return (NULL); 2520 2521 /* 2522 * Walk through client link 2523 */ 2524 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2525 ASSERT(ct != NULL); 2526 2527 if (pip == NULL) 2528 return ((mdi_pathinfo_t *)ct->ct_path_head); 2529 2530 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2531 } 2532 2533 /* 2534 * For a phci, return the next available path to any client 2535 * Note: ditto mdi_get_next_phci_path() 2536 */ 2537 mdi_pathinfo_t * 2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2539 { 2540 mdi_phci_t *ph; 2541 2542 if (!MDI_PHCI(ph_dip)) 2543 return (NULL); 2544 2545 /* 2546 * Walk through pHCI link 2547 */ 2548 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2549 ASSERT(ph != NULL); 2550 2551 if (pip == NULL) 2552 return ((mdi_pathinfo_t *)ph->ph_path_head); 2553 2554 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2555 } 2556 2557 /* 2558 * mdi_hold_path(): 2559 * Hold the mdi_pathinfo node against unwanted unexpected free. 2560 * Return Values: 2561 * None 2562 */ 2563 void 2564 mdi_hold_path(mdi_pathinfo_t *pip) 2565 { 2566 if (pip) { 2567 MDI_PI_LOCK(pip); 2568 MDI_PI_HOLD(pip); 2569 MDI_PI_UNLOCK(pip); 2570 } 2571 } 2572 2573 2574 /* 2575 * mdi_rele_path(): 2576 * Release the mdi_pathinfo node which was selected 2577 * through mdi_select_path() mechanism or manually held by 2578 * calling mdi_hold_path(). 2579 * Return Values: 2580 * None 2581 */ 2582 void 2583 mdi_rele_path(mdi_pathinfo_t *pip) 2584 { 2585 if (pip) { 2586 MDI_PI_LOCK(pip); 2587 MDI_PI_RELE(pip); 2588 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2589 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2590 } 2591 MDI_PI_UNLOCK(pip); 2592 } 2593 } 2594 2595 /* 2596 * mdi_pi_lock(): 2597 * Lock the mdi_pathinfo node. 2598 * Note: 2599 * The caller should release the lock by calling mdi_pi_unlock() 2600 */ 2601 void 2602 mdi_pi_lock(mdi_pathinfo_t *pip) 2603 { 2604 ASSERT(pip != NULL); 2605 if (pip) { 2606 MDI_PI_LOCK(pip); 2607 } 2608 } 2609 2610 2611 /* 2612 * mdi_pi_unlock(): 2613 * Unlock the mdi_pathinfo node. 2614 * Note: 2615 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2616 */ 2617 void 2618 mdi_pi_unlock(mdi_pathinfo_t *pip) 2619 { 2620 ASSERT(pip != NULL); 2621 if (pip) { 2622 MDI_PI_UNLOCK(pip); 2623 } 2624 } 2625 2626 /* 2627 * mdi_pi_find(): 2628 * Search the list of mdi_pathinfo nodes attached to the 2629 * pHCI/Client device node whose path address matches "paddr". 2630 * Returns a pointer to the mdi_pathinfo node if a matching node is 2631 * found. 2632 * Return Values: 2633 * mdi_pathinfo node handle 2634 * NULL 2635 * Notes: 2636 * Caller need not hold any locks to call this function. 2637 */ 2638 mdi_pathinfo_t * 2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2640 { 2641 mdi_phci_t *ph; 2642 mdi_vhci_t *vh; 2643 mdi_client_t *ct; 2644 mdi_pathinfo_t *pip = NULL; 2645 2646 MDI_DEBUG(2, (MDI_NOTE, pdip, 2647 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2648 if ((pdip == NULL) || (paddr == NULL)) { 2649 return (NULL); 2650 } 2651 ph = i_devi_get_phci(pdip); 2652 if (ph == NULL) { 2653 /* 2654 * Invalid pHCI device, Nothing more to do. 2655 */ 2656 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2657 return (NULL); 2658 } 2659 2660 vh = ph->ph_vhci; 2661 if (vh == NULL) { 2662 /* 2663 * Invalid vHCI device, Nothing more to do. 2664 */ 2665 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2666 return (NULL); 2667 } 2668 2669 /* 2670 * Look for pathinfo node identified by paddr. 2671 */ 2672 if (caddr == NULL) { 2673 /* 2674 * Find a mdi_pathinfo node under pHCI list for a matching 2675 * unit address. 2676 */ 2677 MDI_PHCI_LOCK(ph); 2678 if (MDI_PHCI_IS_OFFLINE(ph)) { 2679 MDI_DEBUG(2, (MDI_WARN, pdip, 2680 "offline phci %p", (void *)ph)); 2681 MDI_PHCI_UNLOCK(ph); 2682 return (NULL); 2683 } 2684 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2685 2686 while (pip != NULL) { 2687 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2688 break; 2689 } 2690 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2691 } 2692 MDI_PHCI_UNLOCK(ph); 2693 MDI_DEBUG(2, (MDI_NOTE, pdip, 2694 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2695 return (pip); 2696 } 2697 2698 /* 2699 * XXX - Is the rest of the code in this function really necessary? 2700 * The consumers of mdi_pi_find() can search for the desired pathinfo 2701 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2702 * whether the search is based on the pathinfo nodes attached to 2703 * the pHCI or the client node, the result will be the same. 2704 */ 2705 2706 /* 2707 * Find the client device corresponding to 'caddr' 2708 */ 2709 MDI_VHCI_CLIENT_LOCK(vh); 2710 2711 /* 2712 * XXX - Passing NULL to the following function works as long as the 2713 * the client addresses (caddr) are unique per vhci basis. 2714 */ 2715 ct = i_mdi_client_find(vh, NULL, caddr); 2716 if (ct == NULL) { 2717 /* 2718 * Client not found, Obviously mdi_pathinfo node has not been 2719 * created yet. 2720 */ 2721 MDI_VHCI_CLIENT_UNLOCK(vh); 2722 MDI_DEBUG(2, (MDI_NOTE, pdip, 2723 "client not found for caddr @%s", caddr ? caddr : "")); 2724 return (NULL); 2725 } 2726 2727 /* 2728 * Hold the client lock and look for a mdi_pathinfo node with matching 2729 * pHCI and paddr 2730 */ 2731 MDI_CLIENT_LOCK(ct); 2732 2733 /* 2734 * Release the global mutex as it is no more needed. Note: We always 2735 * respect the locking order while acquiring. 2736 */ 2737 MDI_VHCI_CLIENT_UNLOCK(vh); 2738 2739 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2740 while (pip != NULL) { 2741 /* 2742 * Compare the unit address 2743 */ 2744 if ((MDI_PI(pip)->pi_phci == ph) && 2745 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2746 break; 2747 } 2748 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2749 } 2750 MDI_CLIENT_UNLOCK(ct); 2751 MDI_DEBUG(2, (MDI_NOTE, pdip, 2752 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2753 return (pip); 2754 } 2755 2756 /* 2757 * mdi_pi_alloc(): 2758 * Allocate and initialize a new instance of a mdi_pathinfo node. 2759 * The mdi_pathinfo node returned by this function identifies a 2760 * unique device path is capable of having properties attached 2761 * and passed to mdi_pi_online() to fully attach and online the 2762 * path and client device node. 2763 * The mdi_pathinfo node returned by this function must be 2764 * destroyed using mdi_pi_free() if the path is no longer 2765 * operational or if the caller fails to attach a client device 2766 * node when calling mdi_pi_online(). The framework will not free 2767 * the resources allocated. 2768 * This function can be called from both interrupt and kernel 2769 * contexts. DDI_NOSLEEP flag should be used while calling 2770 * from interrupt contexts. 2771 * Return Values: 2772 * MDI_SUCCESS 2773 * MDI_FAILURE 2774 * MDI_NOMEM 2775 */ 2776 /*ARGSUSED*/ 2777 int 2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2779 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2780 { 2781 mdi_vhci_t *vh; 2782 mdi_phci_t *ph; 2783 mdi_client_t *ct; 2784 mdi_pathinfo_t *pip = NULL; 2785 dev_info_t *cdip; 2786 int rv = MDI_NOMEM; 2787 int path_allocated = 0; 2788 2789 MDI_DEBUG(2, (MDI_NOTE, pdip, 2790 "cname %s: caddr@%s paddr@%s", 2791 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2792 2793 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2794 ret_pip == NULL) { 2795 /* Nothing more to do */ 2796 return (MDI_FAILURE); 2797 } 2798 2799 *ret_pip = NULL; 2800 2801 /* No allocations on detaching pHCI */ 2802 if (DEVI_IS_DETACHING(pdip)) { 2803 /* Invalid pHCI device, return failure */ 2804 MDI_DEBUG(1, (MDI_WARN, pdip, 2805 "!detaching pHCI=%p", (void *)pdip)); 2806 return (MDI_FAILURE); 2807 } 2808 2809 ph = i_devi_get_phci(pdip); 2810 ASSERT(ph != NULL); 2811 if (ph == NULL) { 2812 /* Invalid pHCI device, return failure */ 2813 MDI_DEBUG(1, (MDI_WARN, pdip, 2814 "!invalid pHCI=%p", (void *)pdip)); 2815 return (MDI_FAILURE); 2816 } 2817 2818 MDI_PHCI_LOCK(ph); 2819 vh = ph->ph_vhci; 2820 if (vh == NULL) { 2821 /* Invalid vHCI device, return failure */ 2822 MDI_DEBUG(1, (MDI_WARN, pdip, 2823 "!invalid vHCI=%p", (void *)pdip)); 2824 MDI_PHCI_UNLOCK(ph); 2825 return (MDI_FAILURE); 2826 } 2827 2828 if (MDI_PHCI_IS_READY(ph) == 0) { 2829 /* 2830 * Do not allow new node creation when pHCI is in 2831 * offline/suspended states 2832 */ 2833 MDI_DEBUG(1, (MDI_WARN, pdip, 2834 "pHCI=%p is not ready", (void *)ph)); 2835 MDI_PHCI_UNLOCK(ph); 2836 return (MDI_BUSY); 2837 } 2838 MDI_PHCI_UNSTABLE(ph); 2839 MDI_PHCI_UNLOCK(ph); 2840 2841 /* look for a matching client, create one if not found */ 2842 MDI_VHCI_CLIENT_LOCK(vh); 2843 ct = i_mdi_client_find(vh, cname, caddr); 2844 if (ct == NULL) { 2845 ct = i_mdi_client_alloc(vh, cname, caddr); 2846 ASSERT(ct != NULL); 2847 } 2848 2849 if (ct->ct_dip == NULL) { 2850 /* 2851 * Allocate a devinfo node 2852 */ 2853 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2854 compatible, ncompatible); 2855 if (ct->ct_dip == NULL) { 2856 (void) i_mdi_client_free(vh, ct); 2857 goto fail; 2858 } 2859 } 2860 cdip = ct->ct_dip; 2861 2862 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2863 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2864 2865 MDI_CLIENT_LOCK(ct); 2866 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2867 while (pip != NULL) { 2868 /* 2869 * Compare the unit address 2870 */ 2871 if ((MDI_PI(pip)->pi_phci == ph) && 2872 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2873 break; 2874 } 2875 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2876 } 2877 MDI_CLIENT_UNLOCK(ct); 2878 2879 if (pip == NULL) { 2880 /* 2881 * This is a new path for this client device. Allocate and 2882 * initialize a new pathinfo node 2883 */ 2884 pip = i_mdi_pi_alloc(ph, paddr, ct); 2885 ASSERT(pip != NULL); 2886 path_allocated = 1; 2887 } 2888 rv = MDI_SUCCESS; 2889 2890 fail: 2891 /* 2892 * Release the global mutex. 2893 */ 2894 MDI_VHCI_CLIENT_UNLOCK(vh); 2895 2896 /* 2897 * Mark the pHCI as stable 2898 */ 2899 MDI_PHCI_LOCK(ph); 2900 MDI_PHCI_STABLE(ph); 2901 MDI_PHCI_UNLOCK(ph); 2902 *ret_pip = pip; 2903 2904 MDI_DEBUG(2, (MDI_NOTE, pdip, 2905 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2906 2907 if (path_allocated) 2908 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2909 2910 return (rv); 2911 } 2912 2913 /*ARGSUSED*/ 2914 int 2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2916 int flags, mdi_pathinfo_t **ret_pip) 2917 { 2918 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2919 flags, ret_pip)); 2920 } 2921 2922 /* 2923 * i_mdi_pi_alloc(): 2924 * Allocate a mdi_pathinfo node and add to the pHCI path list 2925 * Return Values: 2926 * mdi_pathinfo 2927 */ 2928 /*ARGSUSED*/ 2929 static mdi_pathinfo_t * 2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2931 { 2932 mdi_pathinfo_t *pip; 2933 int ct_circular; 2934 int ph_circular; 2935 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2936 char *path_persistent; 2937 int path_instance; 2938 mod_hash_val_t hv; 2939 2940 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2941 2942 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2943 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2944 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2945 MDI_PATHINFO_STATE_TRANSIENT; 2946 2947 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2948 MDI_PI_SET_USER_DISABLE(pip); 2949 2950 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2951 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2952 2953 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2954 MDI_PI_SET_DRV_DISABLE(pip); 2955 2956 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2957 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2958 MDI_PI(pip)->pi_client = ct; 2959 MDI_PI(pip)->pi_phci = ph; 2960 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2961 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2962 2963 /* 2964 * We form the "path" to the pathinfo node, and see if we have 2965 * already allocated a 'path_instance' for that "path". If so, 2966 * we use the already allocated 'path_instance'. If not, we 2967 * allocate a new 'path_instance' and associate it with a copy of 2968 * the "path" string (which is never freed). The association 2969 * between a 'path_instance' this "path" string persists until 2970 * reboot. 2971 */ 2972 mutex_enter(&mdi_pathmap_mutex); 2973 (void) ddi_pathname(ph->ph_dip, path); 2974 (void) sprintf(path + strlen(path), "/%s@%s", 2975 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2976 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2977 path_instance = (uint_t)(intptr_t)hv; 2978 } else { 2979 /* allocate a new 'path_instance' and persistent "path" */ 2980 path_instance = mdi_pathmap_instance++; 2981 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2982 (void) mod_hash_insert(mdi_pathmap_bypath, 2983 (mod_hash_key_t)path_persistent, 2984 (mod_hash_val_t)(intptr_t)path_instance); 2985 (void) mod_hash_insert(mdi_pathmap_byinstance, 2986 (mod_hash_key_t)(intptr_t)path_instance, 2987 (mod_hash_val_t)path_persistent); 2988 2989 /* create shortpath name */ 2990 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2991 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2992 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2993 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2994 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2995 (mod_hash_key_t)(intptr_t)path_instance, 2996 (mod_hash_val_t)path_persistent); 2997 } 2998 mutex_exit(&mdi_pathmap_mutex); 2999 MDI_PI(pip)->pi_path_instance = path_instance; 3000 3001 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3002 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3003 MDI_PI(pip)->pi_pprivate = NULL; 3004 MDI_PI(pip)->pi_cprivate = NULL; 3005 MDI_PI(pip)->pi_vprivate = NULL; 3006 MDI_PI(pip)->pi_client_link = NULL; 3007 MDI_PI(pip)->pi_phci_link = NULL; 3008 MDI_PI(pip)->pi_ref_cnt = 0; 3009 MDI_PI(pip)->pi_kstats = NULL; 3010 MDI_PI(pip)->pi_preferred = 1; 3011 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3012 3013 /* 3014 * Lock both dev_info nodes against changes in parallel. 3015 * 3016 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3017 * This atypical operation is done to synchronize pathinfo nodes 3018 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3019 * the pathinfo nodes are children of the Client. 3020 */ 3021 ndi_devi_enter(ct->ct_dip, &ct_circular); 3022 ndi_devi_enter(ph->ph_dip, &ph_circular); 3023 3024 i_mdi_phci_add_path(ph, pip); 3025 i_mdi_client_add_path(ct, pip); 3026 3027 ndi_devi_exit(ph->ph_dip, ph_circular); 3028 ndi_devi_exit(ct->ct_dip, ct_circular); 3029 3030 return (pip); 3031 } 3032 3033 /* 3034 * mdi_pi_pathname_by_instance(): 3035 * Lookup of "path" by 'path_instance'. Return "path". 3036 * NOTE: returned "path" remains valid forever (until reboot). 3037 */ 3038 char * 3039 mdi_pi_pathname_by_instance(int path_instance) 3040 { 3041 char *path; 3042 mod_hash_val_t hv; 3043 3044 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3045 mutex_enter(&mdi_pathmap_mutex); 3046 if (mod_hash_find(mdi_pathmap_byinstance, 3047 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3048 path = (char *)hv; 3049 else 3050 path = NULL; 3051 mutex_exit(&mdi_pathmap_mutex); 3052 return (path); 3053 } 3054 3055 /* 3056 * mdi_pi_spathname_by_instance(): 3057 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3058 * NOTE: returned "shortpath" remains valid forever (until reboot). 3059 */ 3060 char * 3061 mdi_pi_spathname_by_instance(int path_instance) 3062 { 3063 char *path; 3064 mod_hash_val_t hv; 3065 3066 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3067 mutex_enter(&mdi_pathmap_mutex); 3068 if (mod_hash_find(mdi_pathmap_sbyinstance, 3069 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3070 path = (char *)hv; 3071 else 3072 path = NULL; 3073 mutex_exit(&mdi_pathmap_mutex); 3074 return (path); 3075 } 3076 3077 3078 /* 3079 * i_mdi_phci_add_path(): 3080 * Add a mdi_pathinfo node to pHCI list. 3081 * Notes: 3082 * Caller should per-pHCI mutex 3083 */ 3084 static void 3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3086 { 3087 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3088 3089 MDI_PHCI_LOCK(ph); 3090 if (ph->ph_path_head == NULL) { 3091 ph->ph_path_head = pip; 3092 } else { 3093 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3094 } 3095 ph->ph_path_tail = pip; 3096 ph->ph_path_count++; 3097 MDI_PHCI_UNLOCK(ph); 3098 } 3099 3100 /* 3101 * i_mdi_client_add_path(): 3102 * Add mdi_pathinfo node to client list 3103 */ 3104 static void 3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3106 { 3107 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3108 3109 MDI_CLIENT_LOCK(ct); 3110 if (ct->ct_path_head == NULL) { 3111 ct->ct_path_head = pip; 3112 } else { 3113 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3114 } 3115 ct->ct_path_tail = pip; 3116 ct->ct_path_count++; 3117 MDI_CLIENT_UNLOCK(ct); 3118 } 3119 3120 /* 3121 * mdi_pi_free(): 3122 * Free the mdi_pathinfo node and also client device node if this 3123 * is the last path to the device 3124 * Return Values: 3125 * MDI_SUCCESS 3126 * MDI_FAILURE 3127 * MDI_BUSY 3128 */ 3129 /*ARGSUSED*/ 3130 int 3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3132 { 3133 int rv; 3134 mdi_vhci_t *vh; 3135 mdi_phci_t *ph; 3136 mdi_client_t *ct; 3137 int (*f)(); 3138 int client_held = 0; 3139 3140 MDI_PI_LOCK(pip); 3141 ph = MDI_PI(pip)->pi_phci; 3142 ASSERT(ph != NULL); 3143 if (ph == NULL) { 3144 /* 3145 * Invalid pHCI device, return failure 3146 */ 3147 MDI_DEBUG(1, (MDI_WARN, NULL, 3148 "!invalid pHCI: pip %s %p", 3149 mdi_pi_spathname(pip), (void *)pip)); 3150 MDI_PI_UNLOCK(pip); 3151 return (MDI_FAILURE); 3152 } 3153 3154 vh = ph->ph_vhci; 3155 ASSERT(vh != NULL); 3156 if (vh == NULL) { 3157 /* Invalid pHCI device, return failure */ 3158 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3159 "!invalid vHCI: pip %s %p", 3160 mdi_pi_spathname(pip), (void *)pip)); 3161 MDI_PI_UNLOCK(pip); 3162 return (MDI_FAILURE); 3163 } 3164 3165 ct = MDI_PI(pip)->pi_client; 3166 ASSERT(ct != NULL); 3167 if (ct == NULL) { 3168 /* 3169 * Invalid Client device, return failure 3170 */ 3171 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3172 "!invalid client: pip %s %p", 3173 mdi_pi_spathname(pip), (void *)pip)); 3174 MDI_PI_UNLOCK(pip); 3175 return (MDI_FAILURE); 3176 } 3177 3178 /* 3179 * Check to see for busy condition. A mdi_pathinfo can only be freed 3180 * if the node state is either offline or init and the reference count 3181 * is zero. 3182 */ 3183 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3184 MDI_PI_IS_INITING(pip))) { 3185 /* 3186 * Node is busy 3187 */ 3188 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3189 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3190 MDI_PI_UNLOCK(pip); 3191 return (MDI_BUSY); 3192 } 3193 3194 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3195 /* 3196 * Give a chance for pending I/Os to complete. 3197 */ 3198 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3199 "!%d cmds still pending on path: %s %p", 3200 MDI_PI(pip)->pi_ref_cnt, 3201 mdi_pi_spathname(pip), (void *)pip)); 3202 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3203 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3204 TR_CLOCK_TICK) == -1) { 3205 /* 3206 * The timeout time reached without ref_cnt being zero 3207 * being signaled. 3208 */ 3209 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3210 "!Timeout reached on path %s %p without the cond", 3211 mdi_pi_spathname(pip), (void *)pip)); 3212 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3213 "!%d cmds still pending on path %s %p", 3214 MDI_PI(pip)->pi_ref_cnt, 3215 mdi_pi_spathname(pip), (void *)pip)); 3216 MDI_PI_UNLOCK(pip); 3217 return (MDI_BUSY); 3218 } 3219 } 3220 if (MDI_PI(pip)->pi_pm_held) { 3221 client_held = 1; 3222 } 3223 MDI_PI_UNLOCK(pip); 3224 3225 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3226 3227 MDI_CLIENT_LOCK(ct); 3228 3229 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3230 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3231 3232 /* 3233 * Wait till failover is complete before removing this node. 3234 */ 3235 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3236 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3237 3238 MDI_CLIENT_UNLOCK(ct); 3239 MDI_VHCI_CLIENT_LOCK(vh); 3240 MDI_CLIENT_LOCK(ct); 3241 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3242 3243 if (!MDI_PI_IS_INITING(pip)) { 3244 f = vh->vh_ops->vo_pi_uninit; 3245 if (f != NULL) { 3246 rv = (*f)(vh->vh_dip, pip, 0); 3247 } 3248 } else 3249 rv = MDI_SUCCESS; 3250 3251 /* 3252 * If vo_pi_uninit() completed successfully. 3253 */ 3254 if (rv == MDI_SUCCESS) { 3255 if (client_held) { 3256 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3257 "i_mdi_pm_rele_client\n")); 3258 i_mdi_pm_rele_client(ct, 1); 3259 } 3260 i_mdi_pi_free(ph, pip, ct); 3261 if (ct->ct_path_count == 0) { 3262 /* 3263 * Client lost its last path. 3264 * Clean up the client device 3265 */ 3266 MDI_CLIENT_UNLOCK(ct); 3267 (void) i_mdi_client_free(ct->ct_vhci, ct); 3268 MDI_VHCI_CLIENT_UNLOCK(vh); 3269 return (rv); 3270 } 3271 } 3272 MDI_CLIENT_UNLOCK(ct); 3273 MDI_VHCI_CLIENT_UNLOCK(vh); 3274 3275 if (rv == MDI_FAILURE) 3276 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3277 3278 return (rv); 3279 } 3280 3281 /* 3282 * i_mdi_pi_free(): 3283 * Free the mdi_pathinfo node 3284 */ 3285 static void 3286 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3287 { 3288 int ct_circular; 3289 int ph_circular; 3290 3291 ASSERT(MDI_CLIENT_LOCKED(ct)); 3292 3293 /* 3294 * remove any per-path kstats 3295 */ 3296 i_mdi_pi_kstat_destroy(pip); 3297 3298 /* See comments in i_mdi_pi_alloc() */ 3299 ndi_devi_enter(ct->ct_dip, &ct_circular); 3300 ndi_devi_enter(ph->ph_dip, &ph_circular); 3301 3302 i_mdi_client_remove_path(ct, pip); 3303 i_mdi_phci_remove_path(ph, pip); 3304 3305 ndi_devi_exit(ph->ph_dip, ph_circular); 3306 ndi_devi_exit(ct->ct_dip, ct_circular); 3307 3308 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3309 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3310 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3311 if (MDI_PI(pip)->pi_addr) { 3312 kmem_free(MDI_PI(pip)->pi_addr, 3313 strlen(MDI_PI(pip)->pi_addr) + 1); 3314 MDI_PI(pip)->pi_addr = NULL; 3315 } 3316 3317 if (MDI_PI(pip)->pi_prop) { 3318 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3319 MDI_PI(pip)->pi_prop = NULL; 3320 } 3321 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3322 } 3323 3324 3325 /* 3326 * i_mdi_phci_remove_path(): 3327 * Remove a mdi_pathinfo node from pHCI list. 3328 * Notes: 3329 * Caller should hold per-pHCI mutex 3330 */ 3331 static void 3332 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3333 { 3334 mdi_pathinfo_t *prev = NULL; 3335 mdi_pathinfo_t *path = NULL; 3336 3337 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3338 3339 MDI_PHCI_LOCK(ph); 3340 path = ph->ph_path_head; 3341 while (path != NULL) { 3342 if (path == pip) { 3343 break; 3344 } 3345 prev = path; 3346 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3347 } 3348 3349 if (path) { 3350 ph->ph_path_count--; 3351 if (prev) { 3352 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3353 } else { 3354 ph->ph_path_head = 3355 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3356 } 3357 if (ph->ph_path_tail == path) { 3358 ph->ph_path_tail = prev; 3359 } 3360 } 3361 3362 /* 3363 * Clear the pHCI link 3364 */ 3365 MDI_PI(pip)->pi_phci_link = NULL; 3366 MDI_PI(pip)->pi_phci = NULL; 3367 MDI_PHCI_UNLOCK(ph); 3368 } 3369 3370 /* 3371 * i_mdi_client_remove_path(): 3372 * Remove a mdi_pathinfo node from client path list. 3373 */ 3374 static void 3375 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3376 { 3377 mdi_pathinfo_t *prev = NULL; 3378 mdi_pathinfo_t *path; 3379 3380 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3381 3382 ASSERT(MDI_CLIENT_LOCKED(ct)); 3383 path = ct->ct_path_head; 3384 while (path != NULL) { 3385 if (path == pip) { 3386 break; 3387 } 3388 prev = path; 3389 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3390 } 3391 3392 if (path) { 3393 ct->ct_path_count--; 3394 if (prev) { 3395 MDI_PI(prev)->pi_client_link = 3396 MDI_PI(path)->pi_client_link; 3397 } else { 3398 ct->ct_path_head = 3399 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3400 } 3401 if (ct->ct_path_tail == path) { 3402 ct->ct_path_tail = prev; 3403 } 3404 if (ct->ct_path_last == path) { 3405 ct->ct_path_last = ct->ct_path_head; 3406 } 3407 } 3408 MDI_PI(pip)->pi_client_link = NULL; 3409 MDI_PI(pip)->pi_client = NULL; 3410 } 3411 3412 /* 3413 * i_mdi_pi_state_change(): 3414 * online a mdi_pathinfo node 3415 * 3416 * Return Values: 3417 * MDI_SUCCESS 3418 * MDI_FAILURE 3419 */ 3420 /*ARGSUSED*/ 3421 static int 3422 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3423 { 3424 int rv = MDI_SUCCESS; 3425 mdi_vhci_t *vh; 3426 mdi_phci_t *ph; 3427 mdi_client_t *ct; 3428 int (*f)(); 3429 dev_info_t *cdip; 3430 3431 MDI_PI_LOCK(pip); 3432 3433 ph = MDI_PI(pip)->pi_phci; 3434 ASSERT(ph); 3435 if (ph == NULL) { 3436 /* 3437 * Invalid pHCI device, fail the request 3438 */ 3439 MDI_PI_UNLOCK(pip); 3440 MDI_DEBUG(1, (MDI_WARN, NULL, 3441 "!invalid phci: pip %s %p", 3442 mdi_pi_spathname(pip), (void *)pip)); 3443 return (MDI_FAILURE); 3444 } 3445 3446 vh = ph->ph_vhci; 3447 ASSERT(vh); 3448 if (vh == NULL) { 3449 /* 3450 * Invalid vHCI device, fail the request 3451 */ 3452 MDI_PI_UNLOCK(pip); 3453 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3454 "!invalid vhci: pip %s %p", 3455 mdi_pi_spathname(pip), (void *)pip)); 3456 return (MDI_FAILURE); 3457 } 3458 3459 ct = MDI_PI(pip)->pi_client; 3460 ASSERT(ct != NULL); 3461 if (ct == NULL) { 3462 /* 3463 * Invalid client device, fail the request 3464 */ 3465 MDI_PI_UNLOCK(pip); 3466 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3467 "!invalid client: pip %s %p", 3468 mdi_pi_spathname(pip), (void *)pip)); 3469 return (MDI_FAILURE); 3470 } 3471 3472 /* 3473 * If this path has not been initialized yet, Callback vHCI driver's 3474 * pathinfo node initialize entry point 3475 */ 3476 3477 if (MDI_PI_IS_INITING(pip)) { 3478 MDI_PI_UNLOCK(pip); 3479 f = vh->vh_ops->vo_pi_init; 3480 if (f != NULL) { 3481 rv = (*f)(vh->vh_dip, pip, 0); 3482 if (rv != MDI_SUCCESS) { 3483 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3484 "!vo_pi_init failed: vHCI %p, pip %s %p", 3485 (void *)vh, mdi_pi_spathname(pip), 3486 (void *)pip)); 3487 return (MDI_FAILURE); 3488 } 3489 } 3490 MDI_PI_LOCK(pip); 3491 MDI_PI_CLEAR_TRANSIENT(pip); 3492 } 3493 3494 /* 3495 * Do not allow state transition when pHCI is in offline/suspended 3496 * states 3497 */ 3498 i_mdi_phci_lock(ph, pip); 3499 if (MDI_PHCI_IS_READY(ph) == 0) { 3500 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3501 "!pHCI not ready, pHCI=%p", (void *)ph)); 3502 MDI_PI_UNLOCK(pip); 3503 i_mdi_phci_unlock(ph); 3504 return (MDI_BUSY); 3505 } 3506 MDI_PHCI_UNSTABLE(ph); 3507 i_mdi_phci_unlock(ph); 3508 3509 /* 3510 * Check if mdi_pathinfo state is in transient state. 3511 * If yes, offlining is in progress and wait till transient state is 3512 * cleared. 3513 */ 3514 if (MDI_PI_IS_TRANSIENT(pip)) { 3515 while (MDI_PI_IS_TRANSIENT(pip)) { 3516 cv_wait(&MDI_PI(pip)->pi_state_cv, 3517 &MDI_PI(pip)->pi_mutex); 3518 } 3519 } 3520 3521 /* 3522 * Grab the client lock in reverse order sequence and release the 3523 * mdi_pathinfo mutex. 3524 */ 3525 i_mdi_client_lock(ct, pip); 3526 MDI_PI_UNLOCK(pip); 3527 3528 /* 3529 * Wait till failover state is cleared 3530 */ 3531 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3532 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3533 3534 /* 3535 * Mark the mdi_pathinfo node state as transient 3536 */ 3537 MDI_PI_LOCK(pip); 3538 switch (state) { 3539 case MDI_PATHINFO_STATE_ONLINE: 3540 MDI_PI_SET_ONLINING(pip); 3541 break; 3542 3543 case MDI_PATHINFO_STATE_STANDBY: 3544 MDI_PI_SET_STANDBYING(pip); 3545 break; 3546 3547 case MDI_PATHINFO_STATE_FAULT: 3548 /* 3549 * Mark the pathinfo state as FAULTED 3550 */ 3551 MDI_PI_SET_FAULTING(pip); 3552 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3553 break; 3554 3555 case MDI_PATHINFO_STATE_OFFLINE: 3556 /* 3557 * ndi_devi_offline() cannot hold pip or ct locks. 3558 */ 3559 MDI_PI_UNLOCK(pip); 3560 3561 /* 3562 * If this is a user initiated path online->offline operation 3563 * who's success would transition a client from DEGRADED to 3564 * FAILED then only proceed if we can offline the client first. 3565 */ 3566 cdip = ct->ct_dip; 3567 if ((flag & NDI_USER_REQ) && 3568 MDI_PI_IS_ONLINE(pip) && 3569 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3570 i_mdi_client_unlock(ct); 3571 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3572 if (rv != NDI_SUCCESS) { 3573 /* 3574 * Convert to MDI error code 3575 */ 3576 switch (rv) { 3577 case NDI_BUSY: 3578 rv = MDI_BUSY; 3579 break; 3580 default: 3581 rv = MDI_FAILURE; 3582 break; 3583 } 3584 goto state_change_exit; 3585 } else { 3586 i_mdi_client_lock(ct, NULL); 3587 } 3588 } 3589 /* 3590 * Mark the mdi_pathinfo node state as transient 3591 */ 3592 MDI_PI_LOCK(pip); 3593 MDI_PI_SET_OFFLINING(pip); 3594 break; 3595 } 3596 MDI_PI_UNLOCK(pip); 3597 MDI_CLIENT_UNSTABLE(ct); 3598 i_mdi_client_unlock(ct); 3599 3600 f = vh->vh_ops->vo_pi_state_change; 3601 if (f != NULL) 3602 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3603 3604 MDI_CLIENT_LOCK(ct); 3605 MDI_PI_LOCK(pip); 3606 if (rv == MDI_NOT_SUPPORTED) { 3607 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3608 } 3609 if (rv != MDI_SUCCESS) { 3610 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3611 "vo_pi_state_change failed: rv %x", rv)); 3612 } 3613 if (MDI_PI_IS_TRANSIENT(pip)) { 3614 if (rv == MDI_SUCCESS) { 3615 MDI_PI_CLEAR_TRANSIENT(pip); 3616 } else { 3617 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3618 } 3619 } 3620 3621 /* 3622 * Wake anyone waiting for this mdi_pathinfo node 3623 */ 3624 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3625 MDI_PI_UNLOCK(pip); 3626 3627 /* 3628 * Mark the client device as stable 3629 */ 3630 MDI_CLIENT_STABLE(ct); 3631 if (rv == MDI_SUCCESS) { 3632 if (ct->ct_unstable == 0) { 3633 cdip = ct->ct_dip; 3634 3635 /* 3636 * Onlining the mdi_pathinfo node will impact the 3637 * client state Update the client and dev_info node 3638 * state accordingly 3639 */ 3640 rv = NDI_SUCCESS; 3641 i_mdi_client_update_state(ct); 3642 switch (MDI_CLIENT_STATE(ct)) { 3643 case MDI_CLIENT_STATE_OPTIMAL: 3644 case MDI_CLIENT_STATE_DEGRADED: 3645 if (cdip && !i_ddi_devi_attached(cdip) && 3646 ((state == MDI_PATHINFO_STATE_ONLINE) || 3647 (state == MDI_PATHINFO_STATE_STANDBY))) { 3648 3649 /* 3650 * Must do ndi_devi_online() through 3651 * hotplug thread for deferred 3652 * attach mechanism to work 3653 */ 3654 MDI_CLIENT_UNLOCK(ct); 3655 rv = ndi_devi_online(cdip, 0); 3656 MDI_CLIENT_LOCK(ct); 3657 if ((rv != NDI_SUCCESS) && 3658 (MDI_CLIENT_STATE(ct) == 3659 MDI_CLIENT_STATE_DEGRADED)) { 3660 /* 3661 * ndi_devi_online failed. 3662 * Reset client flags to 3663 * offline. 3664 */ 3665 MDI_DEBUG(1, (MDI_WARN, cdip, 3666 "!ndi_devi_online failed " 3667 "error %x", rv)); 3668 MDI_CLIENT_SET_OFFLINE(ct); 3669 } 3670 if (rv != NDI_SUCCESS) { 3671 /* Reset the path state */ 3672 MDI_PI_LOCK(pip); 3673 MDI_PI(pip)->pi_state = 3674 MDI_PI_OLD_STATE(pip); 3675 MDI_PI_UNLOCK(pip); 3676 } 3677 } 3678 break; 3679 3680 case MDI_CLIENT_STATE_FAILED: 3681 /* 3682 * This is the last path case for 3683 * non-user initiated events. 3684 */ 3685 if (((flag & NDI_USER_REQ) == 0) && 3686 cdip && (i_ddi_node_state(cdip) >= 3687 DS_INITIALIZED)) { 3688 MDI_CLIENT_UNLOCK(ct); 3689 rv = ndi_devi_offline(cdip, 3690 NDI_DEVFS_CLEAN); 3691 MDI_CLIENT_LOCK(ct); 3692 3693 if (rv != NDI_SUCCESS) { 3694 /* 3695 * ndi_devi_offline failed. 3696 * Reset client flags to 3697 * online as the path could not 3698 * be offlined. 3699 */ 3700 MDI_DEBUG(1, (MDI_WARN, cdip, 3701 "!ndi_devi_offline failed: " 3702 "error %x", rv)); 3703 MDI_CLIENT_SET_ONLINE(ct); 3704 } 3705 } 3706 break; 3707 } 3708 /* 3709 * Convert to MDI error code 3710 */ 3711 switch (rv) { 3712 case NDI_SUCCESS: 3713 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3714 i_mdi_report_path_state(ct, pip); 3715 rv = MDI_SUCCESS; 3716 break; 3717 case NDI_BUSY: 3718 rv = MDI_BUSY; 3719 break; 3720 default: 3721 rv = MDI_FAILURE; 3722 break; 3723 } 3724 } 3725 } 3726 MDI_CLIENT_UNLOCK(ct); 3727 3728 state_change_exit: 3729 /* 3730 * Mark the pHCI as stable again. 3731 */ 3732 MDI_PHCI_LOCK(ph); 3733 MDI_PHCI_STABLE(ph); 3734 MDI_PHCI_UNLOCK(ph); 3735 return (rv); 3736 } 3737 3738 /* 3739 * mdi_pi_online(): 3740 * Place the path_info node in the online state. The path is 3741 * now available to be selected by mdi_select_path() for 3742 * transporting I/O requests to client devices. 3743 * Return Values: 3744 * MDI_SUCCESS 3745 * MDI_FAILURE 3746 */ 3747 int 3748 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3749 { 3750 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3751 int client_held = 0; 3752 int rv; 3753 3754 ASSERT(ct != NULL); 3755 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3756 if (rv != MDI_SUCCESS) 3757 return (rv); 3758 3759 MDI_PI_LOCK(pip); 3760 if (MDI_PI(pip)->pi_pm_held == 0) { 3761 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3762 "i_mdi_pm_hold_pip %p", (void *)pip)); 3763 i_mdi_pm_hold_pip(pip); 3764 client_held = 1; 3765 } 3766 MDI_PI_UNLOCK(pip); 3767 3768 if (client_held) { 3769 MDI_CLIENT_LOCK(ct); 3770 if (ct->ct_power_cnt == 0) { 3771 rv = i_mdi_power_all_phci(ct); 3772 } 3773 3774 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3775 "i_mdi_pm_hold_client %p", (void *)ct)); 3776 i_mdi_pm_hold_client(ct, 1); 3777 MDI_CLIENT_UNLOCK(ct); 3778 } 3779 3780 return (rv); 3781 } 3782 3783 /* 3784 * mdi_pi_standby(): 3785 * Place the mdi_pathinfo node in standby state 3786 * 3787 * Return Values: 3788 * MDI_SUCCESS 3789 * MDI_FAILURE 3790 */ 3791 int 3792 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3793 { 3794 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3795 } 3796 3797 /* 3798 * mdi_pi_fault(): 3799 * Place the mdi_pathinfo node in fault'ed state 3800 * Return Values: 3801 * MDI_SUCCESS 3802 * MDI_FAILURE 3803 */ 3804 int 3805 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3806 { 3807 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3808 } 3809 3810 /* 3811 * mdi_pi_offline(): 3812 * Offline a mdi_pathinfo node. 3813 * Return Values: 3814 * MDI_SUCCESS 3815 * MDI_FAILURE 3816 */ 3817 int 3818 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3819 { 3820 int ret, client_held = 0; 3821 mdi_client_t *ct; 3822 3823 /* 3824 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3825 * used it to mean "user initiated operation" (i.e. devctl). Callers 3826 * should now just use NDI_USER_REQ. 3827 */ 3828 if (flags & NDI_DEVI_REMOVE) { 3829 flags &= ~NDI_DEVI_REMOVE; 3830 flags |= NDI_USER_REQ; 3831 } 3832 3833 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3834 3835 if (ret == MDI_SUCCESS) { 3836 MDI_PI_LOCK(pip); 3837 if (MDI_PI(pip)->pi_pm_held) { 3838 client_held = 1; 3839 } 3840 MDI_PI_UNLOCK(pip); 3841 3842 if (client_held) { 3843 ct = MDI_PI(pip)->pi_client; 3844 MDI_CLIENT_LOCK(ct); 3845 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3846 "i_mdi_pm_rele_client\n")); 3847 i_mdi_pm_rele_client(ct, 1); 3848 MDI_CLIENT_UNLOCK(ct); 3849 } 3850 } 3851 3852 return (ret); 3853 } 3854 3855 /* 3856 * i_mdi_pi_offline(): 3857 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3858 */ 3859 static int 3860 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3861 { 3862 dev_info_t *vdip = NULL; 3863 mdi_vhci_t *vh = NULL; 3864 mdi_client_t *ct = NULL; 3865 int (*f)(); 3866 int rv; 3867 3868 MDI_PI_LOCK(pip); 3869 ct = MDI_PI(pip)->pi_client; 3870 ASSERT(ct != NULL); 3871 3872 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3873 /* 3874 * Give a chance for pending I/Os to complete. 3875 */ 3876 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3877 "!%d cmds still pending on path %s %p", 3878 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3879 (void *)pip)); 3880 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3881 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3882 TR_CLOCK_TICK) == -1) { 3883 /* 3884 * The timeout time reached without ref_cnt being zero 3885 * being signaled. 3886 */ 3887 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3888 "!Timeout reached on path %s %p without the cond", 3889 mdi_pi_spathname(pip), (void *)pip)); 3890 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3891 "!%d cmds still pending on path %s %p", 3892 MDI_PI(pip)->pi_ref_cnt, 3893 mdi_pi_spathname(pip), (void *)pip)); 3894 } 3895 } 3896 vh = ct->ct_vhci; 3897 vdip = vh->vh_dip; 3898 3899 /* 3900 * Notify vHCI that has registered this event 3901 */ 3902 ASSERT(vh->vh_ops); 3903 f = vh->vh_ops->vo_pi_state_change; 3904 3905 if (f != NULL) { 3906 MDI_PI_UNLOCK(pip); 3907 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3908 flags)) != MDI_SUCCESS) { 3909 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3910 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3911 ddi_driver_name(vdip), ddi_get_instance(vdip), 3912 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3913 } 3914 MDI_PI_LOCK(pip); 3915 } 3916 3917 /* 3918 * Set the mdi_pathinfo node state and clear the transient condition 3919 */ 3920 MDI_PI_SET_OFFLINE(pip); 3921 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3922 MDI_PI_UNLOCK(pip); 3923 3924 MDI_CLIENT_LOCK(ct); 3925 if (rv == MDI_SUCCESS) { 3926 if (ct->ct_unstable == 0) { 3927 dev_info_t *cdip = ct->ct_dip; 3928 3929 /* 3930 * Onlining the mdi_pathinfo node will impact the 3931 * client state Update the client and dev_info node 3932 * state accordingly 3933 */ 3934 i_mdi_client_update_state(ct); 3935 rv = NDI_SUCCESS; 3936 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3937 if (cdip && 3938 (i_ddi_node_state(cdip) >= 3939 DS_INITIALIZED)) { 3940 MDI_CLIENT_UNLOCK(ct); 3941 rv = ndi_devi_offline(cdip, 3942 NDI_DEVFS_CLEAN); 3943 MDI_CLIENT_LOCK(ct); 3944 if (rv != NDI_SUCCESS) { 3945 /* 3946 * ndi_devi_offline failed. 3947 * Reset client flags to 3948 * online. 3949 */ 3950 MDI_DEBUG(4, (MDI_WARN, cdip, 3951 "ndi_devi_offline failed: " 3952 "error %x", rv)); 3953 MDI_CLIENT_SET_ONLINE(ct); 3954 } 3955 } 3956 } 3957 /* 3958 * Convert to MDI error code 3959 */ 3960 switch (rv) { 3961 case NDI_SUCCESS: 3962 rv = MDI_SUCCESS; 3963 break; 3964 case NDI_BUSY: 3965 rv = MDI_BUSY; 3966 break; 3967 default: 3968 rv = MDI_FAILURE; 3969 break; 3970 } 3971 } 3972 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3973 i_mdi_report_path_state(ct, pip); 3974 } 3975 3976 MDI_CLIENT_UNLOCK(ct); 3977 3978 /* 3979 * Change in the mdi_pathinfo node state will impact the client state 3980 */ 3981 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3982 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3983 return (rv); 3984 } 3985 3986 /* 3987 * mdi_pi_get_node_name(): 3988 * Get the name associated with a mdi_pathinfo node. 3989 * Since pathinfo nodes are not directly named, we 3990 * return the node_name of the client. 3991 * 3992 * Return Values: 3993 * char * 3994 */ 3995 char * 3996 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3997 { 3998 mdi_client_t *ct; 3999 4000 if (pip == NULL) 4001 return (NULL); 4002 ct = MDI_PI(pip)->pi_client; 4003 if ((ct == NULL) || (ct->ct_dip == NULL)) 4004 return (NULL); 4005 return (ddi_node_name(ct->ct_dip)); 4006 } 4007 4008 /* 4009 * mdi_pi_get_addr(): 4010 * Get the unit address associated with a mdi_pathinfo node 4011 * 4012 * Return Values: 4013 * char * 4014 */ 4015 char * 4016 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4017 { 4018 if (pip == NULL) 4019 return (NULL); 4020 4021 return (MDI_PI(pip)->pi_addr); 4022 } 4023 4024 /* 4025 * mdi_pi_get_path_instance(): 4026 * Get the 'path_instance' of a mdi_pathinfo node 4027 * 4028 * Return Values: 4029 * path_instance 4030 */ 4031 int 4032 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4033 { 4034 if (pip == NULL) 4035 return (0); 4036 4037 return (MDI_PI(pip)->pi_path_instance); 4038 } 4039 4040 /* 4041 * mdi_pi_pathname(): 4042 * Return pointer to path to pathinfo node. 4043 */ 4044 char * 4045 mdi_pi_pathname(mdi_pathinfo_t *pip) 4046 { 4047 if (pip == NULL) 4048 return (NULL); 4049 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4050 } 4051 4052 /* 4053 * mdi_pi_spathname(): 4054 * Return pointer to shortpath to pathinfo node. Used for debug 4055 * messages, so return "" instead of NULL when unknown. 4056 */ 4057 char * 4058 mdi_pi_spathname(mdi_pathinfo_t *pip) 4059 { 4060 char *spath = ""; 4061 4062 if (pip) { 4063 spath = mdi_pi_spathname_by_instance( 4064 mdi_pi_get_path_instance(pip)); 4065 if (spath == NULL) 4066 spath = ""; 4067 } 4068 return (spath); 4069 } 4070 4071 char * 4072 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4073 { 4074 char *obp_path = NULL; 4075 if ((pip == NULL) || (path == NULL)) 4076 return (NULL); 4077 4078 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4079 (void) strcpy(path, obp_path); 4080 (void) mdi_prop_free(obp_path); 4081 } else { 4082 path = NULL; 4083 } 4084 return (path); 4085 } 4086 4087 int 4088 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4089 { 4090 dev_info_t *pdip; 4091 char *obp_path = NULL; 4092 int rc = MDI_FAILURE; 4093 4094 if (pip == NULL) 4095 return (MDI_FAILURE); 4096 4097 pdip = mdi_pi_get_phci(pip); 4098 if (pdip == NULL) 4099 return (MDI_FAILURE); 4100 4101 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4102 4103 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4104 (void) ddi_pathname(pdip, obp_path); 4105 } 4106 4107 if (component) { 4108 (void) strncat(obp_path, "/", MAXPATHLEN); 4109 (void) strncat(obp_path, component, MAXPATHLEN); 4110 } 4111 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4112 4113 if (obp_path) 4114 kmem_free(obp_path, MAXPATHLEN); 4115 return (rc); 4116 } 4117 4118 /* 4119 * mdi_pi_get_client(): 4120 * Get the client devinfo associated with a mdi_pathinfo node 4121 * 4122 * Return Values: 4123 * Handle to client device dev_info node 4124 */ 4125 dev_info_t * 4126 mdi_pi_get_client(mdi_pathinfo_t *pip) 4127 { 4128 dev_info_t *dip = NULL; 4129 if (pip) { 4130 dip = MDI_PI(pip)->pi_client->ct_dip; 4131 } 4132 return (dip); 4133 } 4134 4135 /* 4136 * mdi_pi_get_phci(): 4137 * Get the pHCI devinfo associated with the mdi_pathinfo node 4138 * Return Values: 4139 * Handle to dev_info node 4140 */ 4141 dev_info_t * 4142 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4143 { 4144 dev_info_t *dip = NULL; 4145 mdi_phci_t *ph; 4146 4147 if (pip) { 4148 ph = MDI_PI(pip)->pi_phci; 4149 if (ph) 4150 dip = ph->ph_dip; 4151 } 4152 return (dip); 4153 } 4154 4155 /* 4156 * mdi_pi_get_client_private(): 4157 * Get the client private information associated with the 4158 * mdi_pathinfo node 4159 */ 4160 void * 4161 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4162 { 4163 void *cprivate = NULL; 4164 if (pip) { 4165 cprivate = MDI_PI(pip)->pi_cprivate; 4166 } 4167 return (cprivate); 4168 } 4169 4170 /* 4171 * mdi_pi_set_client_private(): 4172 * Set the client private information in the mdi_pathinfo node 4173 */ 4174 void 4175 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4176 { 4177 if (pip) { 4178 MDI_PI(pip)->pi_cprivate = priv; 4179 } 4180 } 4181 4182 /* 4183 * mdi_pi_get_phci_private(): 4184 * Get the pHCI private information associated with the 4185 * mdi_pathinfo node 4186 */ 4187 caddr_t 4188 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4189 { 4190 caddr_t pprivate = NULL; 4191 4192 if (pip) { 4193 pprivate = MDI_PI(pip)->pi_pprivate; 4194 } 4195 return (pprivate); 4196 } 4197 4198 /* 4199 * mdi_pi_set_phci_private(): 4200 * Set the pHCI private information in the mdi_pathinfo node 4201 */ 4202 void 4203 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4204 { 4205 if (pip) { 4206 MDI_PI(pip)->pi_pprivate = priv; 4207 } 4208 } 4209 4210 /* 4211 * mdi_pi_get_state(): 4212 * Get the mdi_pathinfo node state. Transient states are internal 4213 * and not provided to the users 4214 */ 4215 mdi_pathinfo_state_t 4216 mdi_pi_get_state(mdi_pathinfo_t *pip) 4217 { 4218 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4219 4220 if (pip) { 4221 if (MDI_PI_IS_TRANSIENT(pip)) { 4222 /* 4223 * mdi_pathinfo is in state transition. Return the 4224 * last good state. 4225 */ 4226 state = MDI_PI_OLD_STATE(pip); 4227 } else { 4228 state = MDI_PI_STATE(pip); 4229 } 4230 } 4231 return (state); 4232 } 4233 4234 /* 4235 * mdi_pi_get_flags(): 4236 * Get the mdi_pathinfo node flags. 4237 */ 4238 uint_t 4239 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4240 { 4241 return (pip ? MDI_PI(pip)->pi_flags : 0); 4242 } 4243 4244 /* 4245 * Note that the following function needs to be the new interface for 4246 * mdi_pi_get_state when mpxio gets integrated to ON. 4247 */ 4248 int 4249 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4250 uint32_t *ext_state) 4251 { 4252 *state = MDI_PATHINFO_STATE_INIT; 4253 4254 if (pip) { 4255 if (MDI_PI_IS_TRANSIENT(pip)) { 4256 /* 4257 * mdi_pathinfo is in state transition. Return the 4258 * last good state. 4259 */ 4260 *state = MDI_PI_OLD_STATE(pip); 4261 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4262 } else { 4263 *state = MDI_PI_STATE(pip); 4264 *ext_state = MDI_PI_EXT_STATE(pip); 4265 } 4266 } 4267 return (MDI_SUCCESS); 4268 } 4269 4270 /* 4271 * mdi_pi_get_preferred: 4272 * Get the preferred path flag 4273 */ 4274 int 4275 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4276 { 4277 if (pip) { 4278 return (MDI_PI(pip)->pi_preferred); 4279 } 4280 return (0); 4281 } 4282 4283 /* 4284 * mdi_pi_set_preferred: 4285 * Set the preferred path flag 4286 */ 4287 void 4288 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4289 { 4290 if (pip) { 4291 MDI_PI(pip)->pi_preferred = preferred; 4292 } 4293 } 4294 4295 /* 4296 * mdi_pi_set_state(): 4297 * Set the mdi_pathinfo node state 4298 */ 4299 void 4300 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4301 { 4302 uint32_t ext_state; 4303 4304 if (pip) { 4305 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4306 MDI_PI(pip)->pi_state = state; 4307 MDI_PI(pip)->pi_state |= ext_state; 4308 4309 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4310 i_ddi_di_cache_invalidate(); 4311 } 4312 } 4313 4314 /* 4315 * Property functions: 4316 */ 4317 int 4318 i_map_nvlist_error_to_mdi(int val) 4319 { 4320 int rv; 4321 4322 switch (val) { 4323 case 0: 4324 rv = DDI_PROP_SUCCESS; 4325 break; 4326 case EINVAL: 4327 case ENOTSUP: 4328 rv = DDI_PROP_INVAL_ARG; 4329 break; 4330 case ENOMEM: 4331 rv = DDI_PROP_NO_MEMORY; 4332 break; 4333 default: 4334 rv = DDI_PROP_NOT_FOUND; 4335 break; 4336 } 4337 return (rv); 4338 } 4339 4340 /* 4341 * mdi_pi_get_next_prop(): 4342 * Property walk function. The caller should hold mdi_pi_lock() 4343 * and release by calling mdi_pi_unlock() at the end of walk to 4344 * get a consistent value. 4345 */ 4346 nvpair_t * 4347 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4348 { 4349 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4350 return (NULL); 4351 } 4352 ASSERT(MDI_PI_LOCKED(pip)); 4353 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4354 } 4355 4356 /* 4357 * mdi_prop_remove(): 4358 * Remove the named property from the named list. 4359 */ 4360 int 4361 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4362 { 4363 if (pip == NULL) { 4364 return (DDI_PROP_NOT_FOUND); 4365 } 4366 ASSERT(!MDI_PI_LOCKED(pip)); 4367 MDI_PI_LOCK(pip); 4368 if (MDI_PI(pip)->pi_prop == NULL) { 4369 MDI_PI_UNLOCK(pip); 4370 return (DDI_PROP_NOT_FOUND); 4371 } 4372 if (name) { 4373 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4374 } else { 4375 char nvp_name[MAXNAMELEN]; 4376 nvpair_t *nvp; 4377 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4378 while (nvp) { 4379 nvpair_t *next; 4380 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4381 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4382 nvpair_name(nvp)); 4383 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4384 nvp_name); 4385 nvp = next; 4386 } 4387 } 4388 MDI_PI_UNLOCK(pip); 4389 return (DDI_PROP_SUCCESS); 4390 } 4391 4392 /* 4393 * mdi_prop_size(): 4394 * Get buffer size needed to pack the property data. 4395 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4396 * buffer size. 4397 */ 4398 int 4399 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4400 { 4401 int rv; 4402 size_t bufsize; 4403 4404 *buflenp = 0; 4405 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4406 return (DDI_PROP_NOT_FOUND); 4407 } 4408 ASSERT(MDI_PI_LOCKED(pip)); 4409 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4410 &bufsize, NV_ENCODE_NATIVE); 4411 *buflenp = bufsize; 4412 return (i_map_nvlist_error_to_mdi(rv)); 4413 } 4414 4415 /* 4416 * mdi_prop_pack(): 4417 * pack the property list. The caller should hold the 4418 * mdi_pathinfo_t node to get a consistent data 4419 */ 4420 int 4421 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4422 { 4423 int rv; 4424 size_t bufsize; 4425 4426 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4427 return (DDI_PROP_NOT_FOUND); 4428 } 4429 4430 ASSERT(MDI_PI_LOCKED(pip)); 4431 4432 bufsize = buflen; 4433 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4434 NV_ENCODE_NATIVE, KM_SLEEP); 4435 4436 return (i_map_nvlist_error_to_mdi(rv)); 4437 } 4438 4439 /* 4440 * mdi_prop_update_byte(): 4441 * Create/Update a byte property 4442 */ 4443 int 4444 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4445 { 4446 int rv; 4447 4448 if (pip == NULL) { 4449 return (DDI_PROP_INVAL_ARG); 4450 } 4451 ASSERT(!MDI_PI_LOCKED(pip)); 4452 MDI_PI_LOCK(pip); 4453 if (MDI_PI(pip)->pi_prop == NULL) { 4454 MDI_PI_UNLOCK(pip); 4455 return (DDI_PROP_NOT_FOUND); 4456 } 4457 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4458 MDI_PI_UNLOCK(pip); 4459 return (i_map_nvlist_error_to_mdi(rv)); 4460 } 4461 4462 /* 4463 * mdi_prop_update_byte_array(): 4464 * Create/Update a byte array property 4465 */ 4466 int 4467 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4468 uint_t nelements) 4469 { 4470 int rv; 4471 4472 if (pip == NULL) { 4473 return (DDI_PROP_INVAL_ARG); 4474 } 4475 ASSERT(!MDI_PI_LOCKED(pip)); 4476 MDI_PI_LOCK(pip); 4477 if (MDI_PI(pip)->pi_prop == NULL) { 4478 MDI_PI_UNLOCK(pip); 4479 return (DDI_PROP_NOT_FOUND); 4480 } 4481 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4482 MDI_PI_UNLOCK(pip); 4483 return (i_map_nvlist_error_to_mdi(rv)); 4484 } 4485 4486 /* 4487 * mdi_prop_update_int(): 4488 * Create/Update a 32 bit integer property 4489 */ 4490 int 4491 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4492 { 4493 int rv; 4494 4495 if (pip == NULL) { 4496 return (DDI_PROP_INVAL_ARG); 4497 } 4498 ASSERT(!MDI_PI_LOCKED(pip)); 4499 MDI_PI_LOCK(pip); 4500 if (MDI_PI(pip)->pi_prop == NULL) { 4501 MDI_PI_UNLOCK(pip); 4502 return (DDI_PROP_NOT_FOUND); 4503 } 4504 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4505 MDI_PI_UNLOCK(pip); 4506 return (i_map_nvlist_error_to_mdi(rv)); 4507 } 4508 4509 /* 4510 * mdi_prop_update_int64(): 4511 * Create/Update a 64 bit integer property 4512 */ 4513 int 4514 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4515 { 4516 int rv; 4517 4518 if (pip == NULL) { 4519 return (DDI_PROP_INVAL_ARG); 4520 } 4521 ASSERT(!MDI_PI_LOCKED(pip)); 4522 MDI_PI_LOCK(pip); 4523 if (MDI_PI(pip)->pi_prop == NULL) { 4524 MDI_PI_UNLOCK(pip); 4525 return (DDI_PROP_NOT_FOUND); 4526 } 4527 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4528 MDI_PI_UNLOCK(pip); 4529 return (i_map_nvlist_error_to_mdi(rv)); 4530 } 4531 4532 /* 4533 * mdi_prop_update_int_array(): 4534 * Create/Update a int array property 4535 */ 4536 int 4537 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4538 uint_t nelements) 4539 { 4540 int rv; 4541 4542 if (pip == NULL) { 4543 return (DDI_PROP_INVAL_ARG); 4544 } 4545 ASSERT(!MDI_PI_LOCKED(pip)); 4546 MDI_PI_LOCK(pip); 4547 if (MDI_PI(pip)->pi_prop == NULL) { 4548 MDI_PI_UNLOCK(pip); 4549 return (DDI_PROP_NOT_FOUND); 4550 } 4551 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4552 nelements); 4553 MDI_PI_UNLOCK(pip); 4554 return (i_map_nvlist_error_to_mdi(rv)); 4555 } 4556 4557 /* 4558 * mdi_prop_update_string(): 4559 * Create/Update a string property 4560 */ 4561 int 4562 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4563 { 4564 int rv; 4565 4566 if (pip == NULL) { 4567 return (DDI_PROP_INVAL_ARG); 4568 } 4569 ASSERT(!MDI_PI_LOCKED(pip)); 4570 MDI_PI_LOCK(pip); 4571 if (MDI_PI(pip)->pi_prop == NULL) { 4572 MDI_PI_UNLOCK(pip); 4573 return (DDI_PROP_NOT_FOUND); 4574 } 4575 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4576 MDI_PI_UNLOCK(pip); 4577 return (i_map_nvlist_error_to_mdi(rv)); 4578 } 4579 4580 /* 4581 * mdi_prop_update_string_array(): 4582 * Create/Update a string array property 4583 */ 4584 int 4585 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4586 uint_t nelements) 4587 { 4588 int rv; 4589 4590 if (pip == NULL) { 4591 return (DDI_PROP_INVAL_ARG); 4592 } 4593 ASSERT(!MDI_PI_LOCKED(pip)); 4594 MDI_PI_LOCK(pip); 4595 if (MDI_PI(pip)->pi_prop == NULL) { 4596 MDI_PI_UNLOCK(pip); 4597 return (DDI_PROP_NOT_FOUND); 4598 } 4599 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4600 nelements); 4601 MDI_PI_UNLOCK(pip); 4602 return (i_map_nvlist_error_to_mdi(rv)); 4603 } 4604 4605 /* 4606 * mdi_prop_lookup_byte(): 4607 * Look for byte property identified by name. The data returned 4608 * is the actual property and valid as long as mdi_pathinfo_t node 4609 * is alive. 4610 */ 4611 int 4612 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4613 { 4614 int rv; 4615 4616 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4617 return (DDI_PROP_NOT_FOUND); 4618 } 4619 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4620 return (i_map_nvlist_error_to_mdi(rv)); 4621 } 4622 4623 4624 /* 4625 * mdi_prop_lookup_byte_array(): 4626 * Look for byte array property identified by name. The data 4627 * returned is the actual property and valid as long as 4628 * mdi_pathinfo_t node is alive. 4629 */ 4630 int 4631 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4632 uint_t *nelements) 4633 { 4634 int rv; 4635 4636 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4637 return (DDI_PROP_NOT_FOUND); 4638 } 4639 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4640 nelements); 4641 return (i_map_nvlist_error_to_mdi(rv)); 4642 } 4643 4644 /* 4645 * mdi_prop_lookup_int(): 4646 * Look for int property identified by name. The data returned 4647 * is the actual property and valid as long as mdi_pathinfo_t 4648 * node is alive. 4649 */ 4650 int 4651 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4652 { 4653 int rv; 4654 4655 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4656 return (DDI_PROP_NOT_FOUND); 4657 } 4658 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4659 return (i_map_nvlist_error_to_mdi(rv)); 4660 } 4661 4662 /* 4663 * mdi_prop_lookup_int64(): 4664 * Look for int64 property identified by name. The data returned 4665 * is the actual property and valid as long as mdi_pathinfo_t node 4666 * is alive. 4667 */ 4668 int 4669 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4670 { 4671 int rv; 4672 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4673 return (DDI_PROP_NOT_FOUND); 4674 } 4675 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4676 return (i_map_nvlist_error_to_mdi(rv)); 4677 } 4678 4679 /* 4680 * mdi_prop_lookup_int_array(): 4681 * Look for int array property identified by name. The data 4682 * returned is the actual property and valid as long as 4683 * mdi_pathinfo_t node is alive. 4684 */ 4685 int 4686 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4687 uint_t *nelements) 4688 { 4689 int rv; 4690 4691 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4692 return (DDI_PROP_NOT_FOUND); 4693 } 4694 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4695 (int32_t **)data, nelements); 4696 return (i_map_nvlist_error_to_mdi(rv)); 4697 } 4698 4699 /* 4700 * mdi_prop_lookup_string(): 4701 * Look for string property identified by name. The data 4702 * returned is the actual property and valid as long as 4703 * mdi_pathinfo_t node is alive. 4704 */ 4705 int 4706 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4707 { 4708 int rv; 4709 4710 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4711 return (DDI_PROP_NOT_FOUND); 4712 } 4713 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4714 return (i_map_nvlist_error_to_mdi(rv)); 4715 } 4716 4717 /* 4718 * mdi_prop_lookup_string_array(): 4719 * Look for string array property identified by name. The data 4720 * returned is the actual property and valid as long as 4721 * mdi_pathinfo_t node is alive. 4722 */ 4723 int 4724 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4725 uint_t *nelements) 4726 { 4727 int rv; 4728 4729 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4730 return (DDI_PROP_NOT_FOUND); 4731 } 4732 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4733 nelements); 4734 return (i_map_nvlist_error_to_mdi(rv)); 4735 } 4736 4737 /* 4738 * mdi_prop_free(): 4739 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4740 * functions return the pointer to actual property data and not a 4741 * copy of it. So the data returned is valid as long as 4742 * mdi_pathinfo_t node is valid. 4743 */ 4744 /*ARGSUSED*/ 4745 int 4746 mdi_prop_free(void *data) 4747 { 4748 return (DDI_PROP_SUCCESS); 4749 } 4750 4751 /*ARGSUSED*/ 4752 static void 4753 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4754 { 4755 char *ct_path; 4756 char *ct_status; 4757 char *status; 4758 dev_info_t *cdip = ct->ct_dip; 4759 char lb_buf[64]; 4760 int report_lb_c = 0, report_lb_p = 0; 4761 4762 ASSERT(MDI_CLIENT_LOCKED(ct)); 4763 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4764 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4765 return; 4766 } 4767 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4768 ct_status = "optimal"; 4769 report_lb_c = 1; 4770 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4771 ct_status = "degraded"; 4772 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4773 ct_status = "failed"; 4774 } else { 4775 ct_status = "unknown"; 4776 } 4777 4778 lb_buf[0] = 0; /* not interested in load balancing config */ 4779 4780 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4781 status = "removed"; 4782 } else if (MDI_PI_IS_OFFLINE(pip)) { 4783 status = "offline"; 4784 } else if (MDI_PI_IS_ONLINE(pip)) { 4785 status = "online"; 4786 report_lb_p = 1; 4787 } else if (MDI_PI_IS_STANDBY(pip)) { 4788 status = "standby"; 4789 } else if (MDI_PI_IS_FAULT(pip)) { 4790 status = "faulted"; 4791 } else { 4792 status = "unknown"; 4793 } 4794 4795 if (cdip) { 4796 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4797 4798 /* 4799 * NOTE: Keeping "multipath status: %s" and 4800 * "Load balancing: %s" format unchanged in case someone 4801 * scrubs /var/adm/messages looking for these messages. 4802 */ 4803 if (report_lb_c && report_lb_p) { 4804 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4805 (void) snprintf(lb_buf, sizeof (lb_buf), 4806 "%s, region-size: %d", mdi_load_balance_lba, 4807 ct->ct_lb_args->region_size); 4808 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4809 (void) snprintf(lb_buf, sizeof (lb_buf), 4810 "%s", mdi_load_balance_none); 4811 } else { 4812 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4813 mdi_load_balance_rr); 4814 } 4815 4816 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4817 "?%s (%s%d) multipath status: %s: " 4818 "path %d %s is %s: Load balancing: %s\n", 4819 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4820 ddi_get_instance(cdip), ct_status, 4821 mdi_pi_get_path_instance(pip), 4822 mdi_pi_spathname(pip), status, lb_buf); 4823 } else { 4824 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4825 "?%s (%s%d) multipath status: %s: " 4826 "path %d %s is %s\n", 4827 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4828 ddi_get_instance(cdip), ct_status, 4829 mdi_pi_get_path_instance(pip), 4830 mdi_pi_spathname(pip), status); 4831 } 4832 4833 kmem_free(ct_path, MAXPATHLEN); 4834 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4835 } 4836 } 4837 4838 #ifdef DEBUG 4839 /* 4840 * i_mdi_log(): 4841 * Utility function for error message management 4842 * 4843 * NOTE: Implementation takes care of trailing \n for cmn_err, 4844 * MDI_DEBUG should not terminate fmt strings with \n. 4845 * 4846 * NOTE: If the level is >= 2, and there is no leading !?^ 4847 * then a leading ! is implied (but can be overriden via 4848 * mdi_debug_consoleonly). If you are using kmdb on the console, 4849 * consider setting mdi_debug_consoleonly to 1 as an aid. 4850 */ 4851 /*PRINTFLIKE4*/ 4852 static void 4853 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4854 { 4855 char name[MAXNAMELEN]; 4856 char buf[512]; 4857 char *bp; 4858 va_list ap; 4859 int log_only = 0; 4860 int boot_only = 0; 4861 int console_only = 0; 4862 4863 if (dip) { 4864 (void) snprintf(name, sizeof(name), "%s%d: ", 4865 ddi_driver_name(dip), ddi_get_instance(dip)); 4866 } else { 4867 name[0] = 0; 4868 } 4869 4870 va_start(ap, fmt); 4871 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4872 va_end(ap); 4873 4874 switch (buf[0]) { 4875 case '!': 4876 bp = &buf[1]; 4877 log_only = 1; 4878 break; 4879 case '?': 4880 bp = &buf[1]; 4881 boot_only = 1; 4882 break; 4883 case '^': 4884 bp = &buf[1]; 4885 console_only = 1; 4886 break; 4887 default: 4888 if (level >= 2) 4889 log_only = 1; /* ! implied */ 4890 bp = buf; 4891 break; 4892 } 4893 if (mdi_debug_logonly) { 4894 log_only = 1; 4895 boot_only = 0; 4896 console_only = 0; 4897 } 4898 if (mdi_debug_consoleonly) { 4899 log_only = 0; 4900 boot_only = 0; 4901 console_only = 1; 4902 level = CE_NOTE; 4903 goto console; 4904 } 4905 4906 switch (level) { 4907 case CE_NOTE: 4908 level = CE_CONT; 4909 /* FALLTHROUGH */ 4910 case CE_CONT: 4911 if (boot_only) { 4912 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4913 } else if (console_only) { 4914 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4915 } else if (log_only) { 4916 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4917 } else { 4918 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4919 } 4920 break; 4921 4922 case CE_WARN: 4923 case CE_PANIC: 4924 console: 4925 if (boot_only) { 4926 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4927 } else if (console_only) { 4928 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4929 } else if (log_only) { 4930 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4931 } else { 4932 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4933 } 4934 break; 4935 default: 4936 cmn_err(level, "mdi: %s%s", name, bp); 4937 break; 4938 } 4939 } 4940 #endif /* DEBUG */ 4941 4942 void 4943 i_mdi_client_online(dev_info_t *ct_dip) 4944 { 4945 mdi_client_t *ct; 4946 4947 /* 4948 * Client online notification. Mark client state as online 4949 * restore our binding with dev_info node 4950 */ 4951 ct = i_devi_get_client(ct_dip); 4952 ASSERT(ct != NULL); 4953 MDI_CLIENT_LOCK(ct); 4954 MDI_CLIENT_SET_ONLINE(ct); 4955 /* catch for any memory leaks */ 4956 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4957 ct->ct_dip = ct_dip; 4958 4959 if (ct->ct_power_cnt == 0) 4960 (void) i_mdi_power_all_phci(ct); 4961 4962 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 4963 "i_mdi_pm_hold_client %p", (void *)ct)); 4964 i_mdi_pm_hold_client(ct, 1); 4965 4966 MDI_CLIENT_UNLOCK(ct); 4967 } 4968 4969 void 4970 i_mdi_phci_online(dev_info_t *ph_dip) 4971 { 4972 mdi_phci_t *ph; 4973 4974 /* pHCI online notification. Mark state accordingly */ 4975 ph = i_devi_get_phci(ph_dip); 4976 ASSERT(ph != NULL); 4977 MDI_PHCI_LOCK(ph); 4978 MDI_PHCI_SET_ONLINE(ph); 4979 MDI_PHCI_UNLOCK(ph); 4980 } 4981 4982 /* 4983 * mdi_devi_online(): 4984 * Online notification from NDI framework on pHCI/client 4985 * device online. 4986 * Return Values: 4987 * NDI_SUCCESS 4988 * MDI_FAILURE 4989 */ 4990 /*ARGSUSED*/ 4991 int 4992 mdi_devi_online(dev_info_t *dip, uint_t flags) 4993 { 4994 if (MDI_PHCI(dip)) { 4995 i_mdi_phci_online(dip); 4996 } 4997 4998 if (MDI_CLIENT(dip)) { 4999 i_mdi_client_online(dip); 5000 } 5001 return (NDI_SUCCESS); 5002 } 5003 5004 /* 5005 * mdi_devi_offline(): 5006 * Offline notification from NDI framework on pHCI/Client device 5007 * offline. 5008 * 5009 * Return Values: 5010 * NDI_SUCCESS 5011 * NDI_FAILURE 5012 */ 5013 /*ARGSUSED*/ 5014 int 5015 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5016 { 5017 int rv = NDI_SUCCESS; 5018 5019 if (MDI_CLIENT(dip)) { 5020 rv = i_mdi_client_offline(dip, flags); 5021 if (rv != NDI_SUCCESS) 5022 return (rv); 5023 } 5024 5025 if (MDI_PHCI(dip)) { 5026 rv = i_mdi_phci_offline(dip, flags); 5027 5028 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5029 /* set client back online */ 5030 i_mdi_client_online(dip); 5031 } 5032 } 5033 5034 return (rv); 5035 } 5036 5037 /*ARGSUSED*/ 5038 static int 5039 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5040 { 5041 int rv = NDI_SUCCESS; 5042 mdi_phci_t *ph; 5043 mdi_client_t *ct; 5044 mdi_pathinfo_t *pip; 5045 mdi_pathinfo_t *next; 5046 mdi_pathinfo_t *failed_pip = NULL; 5047 dev_info_t *cdip; 5048 5049 /* 5050 * pHCI component offline notification 5051 * Make sure that this pHCI instance is free to be offlined. 5052 * If it is OK to proceed, Offline and remove all the child 5053 * mdi_pathinfo nodes. This process automatically offlines 5054 * corresponding client devices, for which this pHCI provides 5055 * critical services. 5056 */ 5057 ph = i_devi_get_phci(dip); 5058 MDI_DEBUG(2, (MDI_NOTE, dip, 5059 "called %p %p", (void *)dip, (void *)ph)); 5060 if (ph == NULL) { 5061 return (rv); 5062 } 5063 5064 MDI_PHCI_LOCK(ph); 5065 5066 if (MDI_PHCI_IS_OFFLINE(ph)) { 5067 MDI_DEBUG(1, (MDI_WARN, dip, 5068 "!pHCI already offlined: %p", (void *)dip)); 5069 MDI_PHCI_UNLOCK(ph); 5070 return (NDI_SUCCESS); 5071 } 5072 5073 /* 5074 * Check to see if the pHCI can be offlined 5075 */ 5076 if (ph->ph_unstable) { 5077 MDI_DEBUG(1, (MDI_WARN, dip, 5078 "!One or more target devices are in transient state. " 5079 "This device can not be removed at this moment. " 5080 "Please try again later.")); 5081 MDI_PHCI_UNLOCK(ph); 5082 return (NDI_BUSY); 5083 } 5084 5085 pip = ph->ph_path_head; 5086 while (pip != NULL) { 5087 MDI_PI_LOCK(pip); 5088 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5089 5090 /* 5091 * The mdi_pathinfo state is OK. Check the client state. 5092 * If failover in progress fail the pHCI from offlining 5093 */ 5094 ct = MDI_PI(pip)->pi_client; 5095 i_mdi_client_lock(ct, pip); 5096 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5097 (ct->ct_unstable)) { 5098 /* 5099 * Failover is in progress, Fail the DR 5100 */ 5101 MDI_DEBUG(1, (MDI_WARN, dip, 5102 "!pHCI device is busy. " 5103 "This device can not be removed at this moment. " 5104 "Please try again later.")); 5105 MDI_PI_UNLOCK(pip); 5106 i_mdi_client_unlock(ct); 5107 MDI_PHCI_UNLOCK(ph); 5108 return (NDI_BUSY); 5109 } 5110 MDI_PI_UNLOCK(pip); 5111 5112 /* 5113 * Check to see of we are removing the last path of this 5114 * client device... 5115 */ 5116 cdip = ct->ct_dip; 5117 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5118 (i_mdi_client_compute_state(ct, ph) == 5119 MDI_CLIENT_STATE_FAILED)) { 5120 i_mdi_client_unlock(ct); 5121 MDI_PHCI_UNLOCK(ph); 5122 if (ndi_devi_offline(cdip, 5123 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5124 /* 5125 * ndi_devi_offline() failed. 5126 * This pHCI provides the critical path 5127 * to one or more client devices. 5128 * Return busy. 5129 */ 5130 MDI_PHCI_LOCK(ph); 5131 MDI_DEBUG(1, (MDI_WARN, dip, 5132 "!pHCI device is busy. " 5133 "This device can not be removed at this " 5134 "moment. Please try again later.")); 5135 failed_pip = pip; 5136 break; 5137 } else { 5138 MDI_PHCI_LOCK(ph); 5139 pip = next; 5140 } 5141 } else { 5142 i_mdi_client_unlock(ct); 5143 pip = next; 5144 } 5145 } 5146 5147 if (failed_pip) { 5148 pip = ph->ph_path_head; 5149 while (pip != failed_pip) { 5150 MDI_PI_LOCK(pip); 5151 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5152 ct = MDI_PI(pip)->pi_client; 5153 i_mdi_client_lock(ct, pip); 5154 cdip = ct->ct_dip; 5155 switch (MDI_CLIENT_STATE(ct)) { 5156 case MDI_CLIENT_STATE_OPTIMAL: 5157 case MDI_CLIENT_STATE_DEGRADED: 5158 if (cdip) { 5159 MDI_PI_UNLOCK(pip); 5160 i_mdi_client_unlock(ct); 5161 MDI_PHCI_UNLOCK(ph); 5162 (void) ndi_devi_online(cdip, 0); 5163 MDI_PHCI_LOCK(ph); 5164 pip = next; 5165 continue; 5166 } 5167 break; 5168 5169 case MDI_CLIENT_STATE_FAILED: 5170 if (cdip) { 5171 MDI_PI_UNLOCK(pip); 5172 i_mdi_client_unlock(ct); 5173 MDI_PHCI_UNLOCK(ph); 5174 (void) ndi_devi_offline(cdip, 5175 NDI_DEVFS_CLEAN); 5176 MDI_PHCI_LOCK(ph); 5177 pip = next; 5178 continue; 5179 } 5180 break; 5181 } 5182 MDI_PI_UNLOCK(pip); 5183 i_mdi_client_unlock(ct); 5184 pip = next; 5185 } 5186 MDI_PHCI_UNLOCK(ph); 5187 return (NDI_BUSY); 5188 } 5189 5190 /* 5191 * Mark the pHCI as offline 5192 */ 5193 MDI_PHCI_SET_OFFLINE(ph); 5194 5195 /* 5196 * Mark the child mdi_pathinfo nodes as transient 5197 */ 5198 pip = ph->ph_path_head; 5199 while (pip != NULL) { 5200 MDI_PI_LOCK(pip); 5201 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5202 MDI_PI_SET_OFFLINING(pip); 5203 MDI_PI_UNLOCK(pip); 5204 pip = next; 5205 } 5206 MDI_PHCI_UNLOCK(ph); 5207 /* 5208 * Give a chance for any pending commands to execute 5209 */ 5210 delay_random(mdi_delay); 5211 MDI_PHCI_LOCK(ph); 5212 pip = ph->ph_path_head; 5213 while (pip != NULL) { 5214 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5215 (void) i_mdi_pi_offline(pip, flags); 5216 MDI_PI_LOCK(pip); 5217 ct = MDI_PI(pip)->pi_client; 5218 if (!MDI_PI_IS_OFFLINE(pip)) { 5219 MDI_DEBUG(1, (MDI_WARN, dip, 5220 "!pHCI device is busy. " 5221 "This device can not be removed at this moment. " 5222 "Please try again later.")); 5223 MDI_PI_UNLOCK(pip); 5224 MDI_PHCI_SET_ONLINE(ph); 5225 MDI_PHCI_UNLOCK(ph); 5226 return (NDI_BUSY); 5227 } 5228 MDI_PI_UNLOCK(pip); 5229 pip = next; 5230 } 5231 MDI_PHCI_UNLOCK(ph); 5232 5233 return (rv); 5234 } 5235 5236 void 5237 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5238 { 5239 mdi_phci_t *ph; 5240 mdi_client_t *ct; 5241 mdi_pathinfo_t *pip; 5242 mdi_pathinfo_t *next; 5243 dev_info_t *cdip; 5244 5245 if (!MDI_PHCI(dip)) 5246 return; 5247 5248 ph = i_devi_get_phci(dip); 5249 if (ph == NULL) { 5250 return; 5251 } 5252 5253 MDI_PHCI_LOCK(ph); 5254 5255 if (MDI_PHCI_IS_OFFLINE(ph)) { 5256 /* has no last path */ 5257 MDI_PHCI_UNLOCK(ph); 5258 return; 5259 } 5260 5261 pip = ph->ph_path_head; 5262 while (pip != NULL) { 5263 MDI_PI_LOCK(pip); 5264 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5265 5266 ct = MDI_PI(pip)->pi_client; 5267 i_mdi_client_lock(ct, pip); 5268 MDI_PI_UNLOCK(pip); 5269 5270 cdip = ct->ct_dip; 5271 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5272 (i_mdi_client_compute_state(ct, ph) == 5273 MDI_CLIENT_STATE_FAILED)) { 5274 /* Last path. Mark client dip as retiring */ 5275 i_mdi_client_unlock(ct); 5276 MDI_PHCI_UNLOCK(ph); 5277 (void) e_ddi_mark_retiring(cdip, cons_array); 5278 MDI_PHCI_LOCK(ph); 5279 pip = next; 5280 } else { 5281 i_mdi_client_unlock(ct); 5282 pip = next; 5283 } 5284 } 5285 5286 MDI_PHCI_UNLOCK(ph); 5287 5288 return; 5289 } 5290 5291 void 5292 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5293 { 5294 mdi_phci_t *ph; 5295 mdi_client_t *ct; 5296 mdi_pathinfo_t *pip; 5297 mdi_pathinfo_t *next; 5298 dev_info_t *cdip; 5299 5300 if (!MDI_PHCI(dip)) 5301 return; 5302 5303 ph = i_devi_get_phci(dip); 5304 if (ph == NULL) 5305 return; 5306 5307 MDI_PHCI_LOCK(ph); 5308 5309 if (MDI_PHCI_IS_OFFLINE(ph)) { 5310 MDI_PHCI_UNLOCK(ph); 5311 /* not last path */ 5312 return; 5313 } 5314 5315 if (ph->ph_unstable) { 5316 MDI_PHCI_UNLOCK(ph); 5317 /* can't check for constraints */ 5318 *constraint = 0; 5319 return; 5320 } 5321 5322 pip = ph->ph_path_head; 5323 while (pip != NULL) { 5324 MDI_PI_LOCK(pip); 5325 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5326 5327 /* 5328 * The mdi_pathinfo state is OK. Check the client state. 5329 * If failover in progress fail the pHCI from offlining 5330 */ 5331 ct = MDI_PI(pip)->pi_client; 5332 i_mdi_client_lock(ct, pip); 5333 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5334 (ct->ct_unstable)) { 5335 /* 5336 * Failover is in progress, can't check for constraints 5337 */ 5338 MDI_PI_UNLOCK(pip); 5339 i_mdi_client_unlock(ct); 5340 MDI_PHCI_UNLOCK(ph); 5341 *constraint = 0; 5342 return; 5343 } 5344 MDI_PI_UNLOCK(pip); 5345 5346 /* 5347 * Check to see of we are retiring the last path of this 5348 * client device... 5349 */ 5350 cdip = ct->ct_dip; 5351 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5352 (i_mdi_client_compute_state(ct, ph) == 5353 MDI_CLIENT_STATE_FAILED)) { 5354 i_mdi_client_unlock(ct); 5355 MDI_PHCI_UNLOCK(ph); 5356 (void) e_ddi_retire_notify(cdip, constraint); 5357 MDI_PHCI_LOCK(ph); 5358 pip = next; 5359 } else { 5360 i_mdi_client_unlock(ct); 5361 pip = next; 5362 } 5363 } 5364 5365 MDI_PHCI_UNLOCK(ph); 5366 5367 return; 5368 } 5369 5370 /* 5371 * offline the path(s) hanging off the pHCI. If the 5372 * last path to any client, check that constraints 5373 * have been applied. 5374 */ 5375 void 5376 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5377 { 5378 mdi_phci_t *ph; 5379 mdi_client_t *ct; 5380 mdi_pathinfo_t *pip; 5381 mdi_pathinfo_t *next; 5382 dev_info_t *cdip; 5383 int unstable = 0; 5384 int constraint; 5385 5386 if (!MDI_PHCI(dip)) 5387 return; 5388 5389 ph = i_devi_get_phci(dip); 5390 if (ph == NULL) { 5391 /* no last path and no pips */ 5392 return; 5393 } 5394 5395 MDI_PHCI_LOCK(ph); 5396 5397 if (MDI_PHCI_IS_OFFLINE(ph)) { 5398 MDI_PHCI_UNLOCK(ph); 5399 /* no last path and no pips */ 5400 return; 5401 } 5402 5403 /* 5404 * Check to see if the pHCI can be offlined 5405 */ 5406 if (ph->ph_unstable) { 5407 unstable = 1; 5408 } 5409 5410 pip = ph->ph_path_head; 5411 while (pip != NULL) { 5412 MDI_PI_LOCK(pip); 5413 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5414 5415 /* 5416 * if failover in progress fail the pHCI from offlining 5417 */ 5418 ct = MDI_PI(pip)->pi_client; 5419 i_mdi_client_lock(ct, pip); 5420 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5421 (ct->ct_unstable)) { 5422 unstable = 1; 5423 } 5424 MDI_PI_UNLOCK(pip); 5425 5426 /* 5427 * Check to see of we are removing the last path of this 5428 * client device... 5429 */ 5430 cdip = ct->ct_dip; 5431 if (!phci_only && cdip && 5432 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5433 (i_mdi_client_compute_state(ct, ph) == 5434 MDI_CLIENT_STATE_FAILED)) { 5435 i_mdi_client_unlock(ct); 5436 MDI_PHCI_UNLOCK(ph); 5437 /* 5438 * We don't retire clients we just retire the 5439 * path to a client. If it is the last path 5440 * to a client, constraints are checked and 5441 * if we pass the last path is offlined. MPXIO will 5442 * then fail all I/Os to the client. Since we don't 5443 * want to retire the client on a path error 5444 * set constraint = 0 so that the client dip 5445 * is not retired. 5446 */ 5447 constraint = 0; 5448 (void) e_ddi_retire_finalize(cdip, &constraint); 5449 MDI_PHCI_LOCK(ph); 5450 pip = next; 5451 } else { 5452 i_mdi_client_unlock(ct); 5453 pip = next; 5454 } 5455 } 5456 5457 /* 5458 * Cannot offline pip(s) 5459 */ 5460 if (unstable) { 5461 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5462 "pHCI in transient state, cannot retire", 5463 ddi_driver_name(dip), ddi_get_instance(dip)); 5464 MDI_PHCI_UNLOCK(ph); 5465 return; 5466 } 5467 5468 /* 5469 * Mark the pHCI as offline 5470 */ 5471 MDI_PHCI_SET_OFFLINE(ph); 5472 5473 /* 5474 * Mark the child mdi_pathinfo nodes as transient 5475 */ 5476 pip = ph->ph_path_head; 5477 while (pip != NULL) { 5478 MDI_PI_LOCK(pip); 5479 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5480 MDI_PI_SET_OFFLINING(pip); 5481 MDI_PI_UNLOCK(pip); 5482 pip = next; 5483 } 5484 MDI_PHCI_UNLOCK(ph); 5485 /* 5486 * Give a chance for any pending commands to execute 5487 */ 5488 delay_random(mdi_delay); 5489 MDI_PHCI_LOCK(ph); 5490 pip = ph->ph_path_head; 5491 while (pip != NULL) { 5492 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5493 (void) i_mdi_pi_offline(pip, 0); 5494 MDI_PI_LOCK(pip); 5495 ct = MDI_PI(pip)->pi_client; 5496 if (!MDI_PI_IS_OFFLINE(pip)) { 5497 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5498 "path %d %s busy, cannot offline", 5499 mdi_pi_get_path_instance(pip), 5500 mdi_pi_spathname(pip)); 5501 MDI_PI_UNLOCK(pip); 5502 MDI_PHCI_SET_ONLINE(ph); 5503 MDI_PHCI_UNLOCK(ph); 5504 return; 5505 } 5506 MDI_PI_UNLOCK(pip); 5507 pip = next; 5508 } 5509 MDI_PHCI_UNLOCK(ph); 5510 5511 return; 5512 } 5513 5514 void 5515 mdi_phci_unretire(dev_info_t *dip) 5516 { 5517 ASSERT(MDI_PHCI(dip)); 5518 5519 /* 5520 * Online the phci 5521 */ 5522 i_mdi_phci_online(dip); 5523 } 5524 5525 /*ARGSUSED*/ 5526 static int 5527 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5528 { 5529 int rv = NDI_SUCCESS; 5530 mdi_client_t *ct; 5531 5532 /* 5533 * Client component to go offline. Make sure that we are 5534 * not in failing over state and update client state 5535 * accordingly 5536 */ 5537 ct = i_devi_get_client(dip); 5538 MDI_DEBUG(2, (MDI_NOTE, dip, 5539 "called %p %p", (void *)dip, (void *)ct)); 5540 if (ct != NULL) { 5541 MDI_CLIENT_LOCK(ct); 5542 if (ct->ct_unstable) { 5543 /* 5544 * One or more paths are in transient state, 5545 * Dont allow offline of a client device 5546 */ 5547 MDI_DEBUG(1, (MDI_WARN, dip, 5548 "!One or more paths to " 5549 "this device are in transient state. " 5550 "This device can not be removed at this moment. " 5551 "Please try again later.")); 5552 MDI_CLIENT_UNLOCK(ct); 5553 return (NDI_BUSY); 5554 } 5555 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5556 /* 5557 * Failover is in progress, Dont allow DR of 5558 * a client device 5559 */ 5560 MDI_DEBUG(1, (MDI_WARN, dip, 5561 "!Client device is Busy. " 5562 "This device can not be removed at this moment. " 5563 "Please try again later.")); 5564 MDI_CLIENT_UNLOCK(ct); 5565 return (NDI_BUSY); 5566 } 5567 MDI_CLIENT_SET_OFFLINE(ct); 5568 5569 /* 5570 * Unbind our relationship with the dev_info node 5571 */ 5572 if (flags & NDI_DEVI_REMOVE) { 5573 ct->ct_dip = NULL; 5574 } 5575 MDI_CLIENT_UNLOCK(ct); 5576 } 5577 return (rv); 5578 } 5579 5580 /* 5581 * mdi_pre_attach(): 5582 * Pre attach() notification handler 5583 */ 5584 /*ARGSUSED*/ 5585 int 5586 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5587 { 5588 /* don't support old DDI_PM_RESUME */ 5589 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5590 (cmd == DDI_PM_RESUME)) 5591 return (DDI_FAILURE); 5592 5593 return (DDI_SUCCESS); 5594 } 5595 5596 /* 5597 * mdi_post_attach(): 5598 * Post attach() notification handler 5599 */ 5600 /*ARGSUSED*/ 5601 void 5602 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5603 { 5604 mdi_phci_t *ph; 5605 mdi_client_t *ct; 5606 mdi_vhci_t *vh; 5607 5608 if (MDI_PHCI(dip)) { 5609 ph = i_devi_get_phci(dip); 5610 ASSERT(ph != NULL); 5611 5612 MDI_PHCI_LOCK(ph); 5613 switch (cmd) { 5614 case DDI_ATTACH: 5615 MDI_DEBUG(2, (MDI_NOTE, dip, 5616 "phci post_attach called %p", (void *)ph)); 5617 if (error == DDI_SUCCESS) { 5618 MDI_PHCI_SET_ATTACH(ph); 5619 } else { 5620 MDI_DEBUG(1, (MDI_NOTE, dip, 5621 "!pHCI post_attach failed: error %d", 5622 error)); 5623 MDI_PHCI_SET_DETACH(ph); 5624 } 5625 break; 5626 5627 case DDI_RESUME: 5628 MDI_DEBUG(2, (MDI_NOTE, dip, 5629 "pHCI post_resume: called %p", (void *)ph)); 5630 if (error == DDI_SUCCESS) { 5631 MDI_PHCI_SET_RESUME(ph); 5632 } else { 5633 MDI_DEBUG(1, (MDI_NOTE, dip, 5634 "!pHCI post_resume failed: error %d", 5635 error)); 5636 MDI_PHCI_SET_SUSPEND(ph); 5637 } 5638 break; 5639 } 5640 MDI_PHCI_UNLOCK(ph); 5641 } 5642 5643 if (MDI_CLIENT(dip)) { 5644 ct = i_devi_get_client(dip); 5645 ASSERT(ct != NULL); 5646 5647 MDI_CLIENT_LOCK(ct); 5648 switch (cmd) { 5649 case DDI_ATTACH: 5650 MDI_DEBUG(2, (MDI_NOTE, dip, 5651 "client post_attach called %p", (void *)ct)); 5652 if (error != DDI_SUCCESS) { 5653 MDI_DEBUG(1, (MDI_NOTE, dip, 5654 "!client post_attach failed: error %d", 5655 error)); 5656 MDI_CLIENT_SET_DETACH(ct); 5657 MDI_DEBUG(4, (MDI_WARN, dip, 5658 "i_mdi_pm_reset_client")); 5659 i_mdi_pm_reset_client(ct); 5660 break; 5661 } 5662 5663 /* 5664 * Client device has successfully attached, inform 5665 * the vhci. 5666 */ 5667 vh = ct->ct_vhci; 5668 if (vh->vh_ops->vo_client_attached) 5669 (*vh->vh_ops->vo_client_attached)(dip); 5670 5671 MDI_CLIENT_SET_ATTACH(ct); 5672 break; 5673 5674 case DDI_RESUME: 5675 MDI_DEBUG(2, (MDI_NOTE, dip, 5676 "client post_attach: called %p", (void *)ct)); 5677 if (error == DDI_SUCCESS) { 5678 MDI_CLIENT_SET_RESUME(ct); 5679 } else { 5680 MDI_DEBUG(1, (MDI_NOTE, dip, 5681 "!client post_resume failed: error %d", 5682 error)); 5683 MDI_CLIENT_SET_SUSPEND(ct); 5684 } 5685 break; 5686 } 5687 MDI_CLIENT_UNLOCK(ct); 5688 } 5689 } 5690 5691 /* 5692 * mdi_pre_detach(): 5693 * Pre detach notification handler 5694 */ 5695 /*ARGSUSED*/ 5696 int 5697 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5698 { 5699 int rv = DDI_SUCCESS; 5700 5701 if (MDI_CLIENT(dip)) { 5702 (void) i_mdi_client_pre_detach(dip, cmd); 5703 } 5704 5705 if (MDI_PHCI(dip)) { 5706 rv = i_mdi_phci_pre_detach(dip, cmd); 5707 } 5708 5709 return (rv); 5710 } 5711 5712 /*ARGSUSED*/ 5713 static int 5714 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5715 { 5716 int rv = DDI_SUCCESS; 5717 mdi_phci_t *ph; 5718 mdi_client_t *ct; 5719 mdi_pathinfo_t *pip; 5720 mdi_pathinfo_t *failed_pip = NULL; 5721 mdi_pathinfo_t *next; 5722 5723 ph = i_devi_get_phci(dip); 5724 if (ph == NULL) { 5725 return (rv); 5726 } 5727 5728 MDI_PHCI_LOCK(ph); 5729 switch (cmd) { 5730 case DDI_DETACH: 5731 MDI_DEBUG(2, (MDI_NOTE, dip, 5732 "pHCI pre_detach: called %p", (void *)ph)); 5733 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5734 /* 5735 * mdi_pathinfo nodes are still attached to 5736 * this pHCI. Fail the detach for this pHCI. 5737 */ 5738 MDI_DEBUG(2, (MDI_WARN, dip, 5739 "pHCI pre_detach: paths are still attached %p", 5740 (void *)ph)); 5741 rv = DDI_FAILURE; 5742 break; 5743 } 5744 MDI_PHCI_SET_DETACH(ph); 5745 break; 5746 5747 case DDI_SUSPEND: 5748 /* 5749 * pHCI is getting suspended. Since mpxio client 5750 * devices may not be suspended at this point, to avoid 5751 * a potential stack overflow, it is important to suspend 5752 * client devices before pHCI can be suspended. 5753 */ 5754 5755 MDI_DEBUG(2, (MDI_NOTE, dip, 5756 "pHCI pre_suspend: called %p", (void *)ph)); 5757 /* 5758 * Suspend all the client devices accessible through this pHCI 5759 */ 5760 pip = ph->ph_path_head; 5761 while (pip != NULL && rv == DDI_SUCCESS) { 5762 dev_info_t *cdip; 5763 MDI_PI_LOCK(pip); 5764 next = 5765 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5766 ct = MDI_PI(pip)->pi_client; 5767 i_mdi_client_lock(ct, pip); 5768 cdip = ct->ct_dip; 5769 MDI_PI_UNLOCK(pip); 5770 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5771 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5772 i_mdi_client_unlock(ct); 5773 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5774 DDI_SUCCESS) { 5775 /* 5776 * Suspend of one of the client 5777 * device has failed. 5778 */ 5779 MDI_DEBUG(1, (MDI_WARN, dip, 5780 "!suspend of device (%s%d) failed.", 5781 ddi_driver_name(cdip), 5782 ddi_get_instance(cdip))); 5783 failed_pip = pip; 5784 break; 5785 } 5786 } else { 5787 i_mdi_client_unlock(ct); 5788 } 5789 pip = next; 5790 } 5791 5792 if (rv == DDI_SUCCESS) { 5793 /* 5794 * Suspend of client devices is complete. Proceed 5795 * with pHCI suspend. 5796 */ 5797 MDI_PHCI_SET_SUSPEND(ph); 5798 } else { 5799 /* 5800 * Revert back all the suspended client device states 5801 * to converse. 5802 */ 5803 pip = ph->ph_path_head; 5804 while (pip != failed_pip) { 5805 dev_info_t *cdip; 5806 MDI_PI_LOCK(pip); 5807 next = 5808 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5809 ct = MDI_PI(pip)->pi_client; 5810 i_mdi_client_lock(ct, pip); 5811 cdip = ct->ct_dip; 5812 MDI_PI_UNLOCK(pip); 5813 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5814 i_mdi_client_unlock(ct); 5815 (void) devi_attach(cdip, DDI_RESUME); 5816 } else { 5817 i_mdi_client_unlock(ct); 5818 } 5819 pip = next; 5820 } 5821 } 5822 break; 5823 5824 default: 5825 rv = DDI_FAILURE; 5826 break; 5827 } 5828 MDI_PHCI_UNLOCK(ph); 5829 return (rv); 5830 } 5831 5832 /*ARGSUSED*/ 5833 static int 5834 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5835 { 5836 int rv = DDI_SUCCESS; 5837 mdi_client_t *ct; 5838 5839 ct = i_devi_get_client(dip); 5840 if (ct == NULL) { 5841 return (rv); 5842 } 5843 5844 MDI_CLIENT_LOCK(ct); 5845 switch (cmd) { 5846 case DDI_DETACH: 5847 MDI_DEBUG(2, (MDI_NOTE, dip, 5848 "client pre_detach: called %p", 5849 (void *)ct)); 5850 MDI_CLIENT_SET_DETACH(ct); 5851 break; 5852 5853 case DDI_SUSPEND: 5854 MDI_DEBUG(2, (MDI_NOTE, dip, 5855 "client pre_suspend: called %p", 5856 (void *)ct)); 5857 MDI_CLIENT_SET_SUSPEND(ct); 5858 break; 5859 5860 default: 5861 rv = DDI_FAILURE; 5862 break; 5863 } 5864 MDI_CLIENT_UNLOCK(ct); 5865 return (rv); 5866 } 5867 5868 /* 5869 * mdi_post_detach(): 5870 * Post detach notification handler 5871 */ 5872 /*ARGSUSED*/ 5873 void 5874 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5875 { 5876 /* 5877 * Detach/Suspend of mpxio component failed. Update our state 5878 * too 5879 */ 5880 if (MDI_PHCI(dip)) 5881 i_mdi_phci_post_detach(dip, cmd, error); 5882 5883 if (MDI_CLIENT(dip)) 5884 i_mdi_client_post_detach(dip, cmd, error); 5885 } 5886 5887 /*ARGSUSED*/ 5888 static void 5889 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5890 { 5891 mdi_phci_t *ph; 5892 5893 /* 5894 * Detach/Suspend of phci component failed. Update our state 5895 * too 5896 */ 5897 ph = i_devi_get_phci(dip); 5898 if (ph == NULL) { 5899 return; 5900 } 5901 5902 MDI_PHCI_LOCK(ph); 5903 /* 5904 * Detach of pHCI failed. Restore back converse 5905 * state 5906 */ 5907 switch (cmd) { 5908 case DDI_DETACH: 5909 MDI_DEBUG(2, (MDI_NOTE, dip, 5910 "pHCI post_detach: called %p", 5911 (void *)ph)); 5912 if (error != DDI_SUCCESS) 5913 MDI_PHCI_SET_ATTACH(ph); 5914 break; 5915 5916 case DDI_SUSPEND: 5917 MDI_DEBUG(2, (MDI_NOTE, dip, 5918 "pHCI post_suspend: called %p", 5919 (void *)ph)); 5920 if (error != DDI_SUCCESS) 5921 MDI_PHCI_SET_RESUME(ph); 5922 break; 5923 } 5924 MDI_PHCI_UNLOCK(ph); 5925 } 5926 5927 /*ARGSUSED*/ 5928 static void 5929 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5930 { 5931 mdi_client_t *ct; 5932 5933 ct = i_devi_get_client(dip); 5934 if (ct == NULL) { 5935 return; 5936 } 5937 MDI_CLIENT_LOCK(ct); 5938 /* 5939 * Detach of Client failed. Restore back converse 5940 * state 5941 */ 5942 switch (cmd) { 5943 case DDI_DETACH: 5944 MDI_DEBUG(2, (MDI_NOTE, dip, 5945 "client post_detach: called %p", (void *)ct)); 5946 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5947 MDI_DEBUG(4, (MDI_NOTE, dip, 5948 "i_mdi_pm_rele_client\n")); 5949 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5950 } else { 5951 MDI_DEBUG(4, (MDI_NOTE, dip, 5952 "i_mdi_pm_reset_client\n")); 5953 i_mdi_pm_reset_client(ct); 5954 } 5955 if (error != DDI_SUCCESS) 5956 MDI_CLIENT_SET_ATTACH(ct); 5957 break; 5958 5959 case DDI_SUSPEND: 5960 MDI_DEBUG(2, (MDI_NOTE, dip, 5961 "called %p", (void *)ct)); 5962 if (error != DDI_SUCCESS) 5963 MDI_CLIENT_SET_RESUME(ct); 5964 break; 5965 } 5966 MDI_CLIENT_UNLOCK(ct); 5967 } 5968 5969 int 5970 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5971 { 5972 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5973 } 5974 5975 /* 5976 * create and install per-path (client - pHCI) statistics 5977 * I/O stats supported: nread, nwritten, reads, and writes 5978 * Error stats - hard errors, soft errors, & transport errors 5979 */ 5980 int 5981 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5982 { 5983 kstat_t *kiosp, *kerrsp; 5984 struct pi_errs *nsp; 5985 struct mdi_pi_kstats *mdi_statp; 5986 5987 if (MDI_PI(pip)->pi_kstats != NULL) 5988 return (MDI_SUCCESS); 5989 5990 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5991 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5992 return (MDI_FAILURE); 5993 } 5994 5995 (void) strcat(ksname, ",err"); 5996 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5997 KSTAT_TYPE_NAMED, 5998 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5999 if (kerrsp == NULL) { 6000 kstat_delete(kiosp); 6001 return (MDI_FAILURE); 6002 } 6003 6004 nsp = (struct pi_errs *)kerrsp->ks_data; 6005 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6006 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6007 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6008 KSTAT_DATA_UINT32); 6009 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6010 KSTAT_DATA_UINT32); 6011 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6012 KSTAT_DATA_UINT32); 6013 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6014 KSTAT_DATA_UINT32); 6015 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6016 KSTAT_DATA_UINT32); 6017 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6018 KSTAT_DATA_UINT32); 6019 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6020 KSTAT_DATA_UINT32); 6021 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6022 6023 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6024 mdi_statp->pi_kstat_ref = 1; 6025 mdi_statp->pi_kstat_iostats = kiosp; 6026 mdi_statp->pi_kstat_errstats = kerrsp; 6027 kstat_install(kiosp); 6028 kstat_install(kerrsp); 6029 MDI_PI(pip)->pi_kstats = mdi_statp; 6030 return (MDI_SUCCESS); 6031 } 6032 6033 /* 6034 * destroy per-path properties 6035 */ 6036 static void 6037 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6038 { 6039 6040 struct mdi_pi_kstats *mdi_statp; 6041 6042 if (MDI_PI(pip)->pi_kstats == NULL) 6043 return; 6044 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6045 return; 6046 6047 MDI_PI(pip)->pi_kstats = NULL; 6048 6049 /* 6050 * the kstat may be shared between multiple pathinfo nodes 6051 * decrement this pathinfo's usage, removing the kstats 6052 * themselves when the last pathinfo reference is removed. 6053 */ 6054 ASSERT(mdi_statp->pi_kstat_ref > 0); 6055 if (--mdi_statp->pi_kstat_ref != 0) 6056 return; 6057 6058 kstat_delete(mdi_statp->pi_kstat_iostats); 6059 kstat_delete(mdi_statp->pi_kstat_errstats); 6060 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6061 } 6062 6063 /* 6064 * update I/O paths KSTATS 6065 */ 6066 void 6067 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6068 { 6069 kstat_t *iostatp; 6070 size_t xfer_cnt; 6071 6072 ASSERT(pip != NULL); 6073 6074 /* 6075 * I/O can be driven across a path prior to having path 6076 * statistics available, i.e. probe(9e). 6077 */ 6078 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6079 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6080 xfer_cnt = bp->b_bcount - bp->b_resid; 6081 if (bp->b_flags & B_READ) { 6082 KSTAT_IO_PTR(iostatp)->reads++; 6083 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6084 } else { 6085 KSTAT_IO_PTR(iostatp)->writes++; 6086 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6087 } 6088 } 6089 } 6090 6091 /* 6092 * Enable the path(specific client/target/initiator) 6093 * Enabling a path means that MPxIO may select the enabled path for routing 6094 * future I/O requests, subject to other path state constraints. 6095 */ 6096 int 6097 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6098 { 6099 mdi_phci_t *ph; 6100 6101 ph = MDI_PI(pip)->pi_phci; 6102 if (ph == NULL) { 6103 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6104 "!failed: path %s %p: NULL ph", 6105 mdi_pi_spathname(pip), (void *)pip)); 6106 return (MDI_FAILURE); 6107 } 6108 6109 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6110 MDI_ENABLE_OP); 6111 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6112 "!returning success pip = %p. ph = %p", 6113 (void *)pip, (void *)ph)); 6114 return (MDI_SUCCESS); 6115 6116 } 6117 6118 /* 6119 * Disable the path (specific client/target/initiator) 6120 * Disabling a path means that MPxIO will not select the disabled path for 6121 * routing any new I/O requests. 6122 */ 6123 int 6124 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6125 { 6126 mdi_phci_t *ph; 6127 6128 ph = MDI_PI(pip)->pi_phci; 6129 if (ph == NULL) { 6130 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6131 "!failed: path %s %p: NULL ph", 6132 mdi_pi_spathname(pip), (void *)pip)); 6133 return (MDI_FAILURE); 6134 } 6135 6136 (void) i_mdi_enable_disable_path(pip, 6137 ph->ph_vhci, flags, MDI_DISABLE_OP); 6138 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6139 "!returning success pip = %p. ph = %p", 6140 (void *)pip, (void *)ph)); 6141 return (MDI_SUCCESS); 6142 } 6143 6144 /* 6145 * disable the path to a particular pHCI (pHCI specified in the phci_path 6146 * argument) for a particular client (specified in the client_path argument). 6147 * Disabling a path means that MPxIO will not select the disabled path for 6148 * routing any new I/O requests. 6149 * NOTE: this will be removed once the NWS files are changed to use the new 6150 * mdi_{enable,disable}_path interfaces 6151 */ 6152 int 6153 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6154 { 6155 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6156 } 6157 6158 /* 6159 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6160 * argument) for a particular client (specified in the client_path argument). 6161 * Enabling a path means that MPxIO may select the enabled path for routing 6162 * future I/O requests, subject to other path state constraints. 6163 * NOTE: this will be removed once the NWS files are changed to use the new 6164 * mdi_{enable,disable}_path interfaces 6165 */ 6166 6167 int 6168 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6169 { 6170 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6171 } 6172 6173 /* 6174 * Common routine for doing enable/disable. 6175 */ 6176 static mdi_pathinfo_t * 6177 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6178 int op) 6179 { 6180 int sync_flag = 0; 6181 int rv; 6182 mdi_pathinfo_t *next; 6183 int (*f)() = NULL; 6184 6185 /* 6186 * Check to make sure the path is not already in the 6187 * requested state. If it is just return the next path 6188 * as we have nothing to do here. 6189 */ 6190 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6191 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6192 MDI_PI_LOCK(pip); 6193 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6194 MDI_PI_UNLOCK(pip); 6195 return (next); 6196 } 6197 6198 f = vh->vh_ops->vo_pi_state_change; 6199 6200 sync_flag = (flags << 8) & 0xf00; 6201 6202 /* 6203 * Do a callback into the mdi consumer to let it 6204 * know that path is about to get enabled/disabled. 6205 */ 6206 if (f != NULL) { 6207 rv = (*f)(vh->vh_dip, pip, 0, 6208 MDI_PI_EXT_STATE(pip), 6209 MDI_EXT_STATE_CHANGE | sync_flag | 6210 op | MDI_BEFORE_STATE_CHANGE); 6211 if (rv != MDI_SUCCESS) { 6212 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6213 "vo_pi_state_change: failed rv = %x", rv)); 6214 } 6215 } 6216 MDI_PI_LOCK(pip); 6217 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6218 6219 switch (flags) { 6220 case USER_DISABLE: 6221 if (op == MDI_DISABLE_OP) { 6222 MDI_PI_SET_USER_DISABLE(pip); 6223 } else { 6224 MDI_PI_SET_USER_ENABLE(pip); 6225 } 6226 break; 6227 case DRIVER_DISABLE: 6228 if (op == MDI_DISABLE_OP) { 6229 MDI_PI_SET_DRV_DISABLE(pip); 6230 } else { 6231 MDI_PI_SET_DRV_ENABLE(pip); 6232 } 6233 break; 6234 case DRIVER_DISABLE_TRANSIENT: 6235 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6236 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6237 } else { 6238 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6239 } 6240 break; 6241 } 6242 MDI_PI_UNLOCK(pip); 6243 /* 6244 * Do a callback into the mdi consumer to let it 6245 * know that path is now enabled/disabled. 6246 */ 6247 if (f != NULL) { 6248 rv = (*f)(vh->vh_dip, pip, 0, 6249 MDI_PI_EXT_STATE(pip), 6250 MDI_EXT_STATE_CHANGE | sync_flag | 6251 op | MDI_AFTER_STATE_CHANGE); 6252 if (rv != MDI_SUCCESS) { 6253 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6254 "vo_pi_state_change failed: rv = %x", rv)); 6255 } 6256 } 6257 return (next); 6258 } 6259 6260 /* 6261 * Common routine for doing enable/disable. 6262 * NOTE: this will be removed once the NWS files are changed to use the new 6263 * mdi_{enable,disable}_path has been putback 6264 */ 6265 int 6266 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6267 { 6268 6269 mdi_phci_t *ph; 6270 mdi_vhci_t *vh = NULL; 6271 mdi_client_t *ct; 6272 mdi_pathinfo_t *next, *pip; 6273 int found_it; 6274 6275 ph = i_devi_get_phci(pdip); 6276 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6277 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6278 (void *)cdip)); 6279 if (ph == NULL) { 6280 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6281 "!failed: operation %d: NULL ph", op)); 6282 return (MDI_FAILURE); 6283 } 6284 6285 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6286 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6287 "!failed: invalid operation %d", op)); 6288 return (MDI_FAILURE); 6289 } 6290 6291 vh = ph->ph_vhci; 6292 6293 if (cdip == NULL) { 6294 /* 6295 * Need to mark the Phci as enabled/disabled. 6296 */ 6297 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6298 "op %d for the phci", op)); 6299 MDI_PHCI_LOCK(ph); 6300 switch (flags) { 6301 case USER_DISABLE: 6302 if (op == MDI_DISABLE_OP) { 6303 MDI_PHCI_SET_USER_DISABLE(ph); 6304 } else { 6305 MDI_PHCI_SET_USER_ENABLE(ph); 6306 } 6307 break; 6308 case DRIVER_DISABLE: 6309 if (op == MDI_DISABLE_OP) { 6310 MDI_PHCI_SET_DRV_DISABLE(ph); 6311 } else { 6312 MDI_PHCI_SET_DRV_ENABLE(ph); 6313 } 6314 break; 6315 case DRIVER_DISABLE_TRANSIENT: 6316 if (op == MDI_DISABLE_OP) { 6317 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6318 } else { 6319 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6320 } 6321 break; 6322 default: 6323 MDI_PHCI_UNLOCK(ph); 6324 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6325 "!invalid flag argument= %d", flags)); 6326 } 6327 6328 /* 6329 * Phci has been disabled. Now try to enable/disable 6330 * path info's to each client. 6331 */ 6332 pip = ph->ph_path_head; 6333 while (pip != NULL) { 6334 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6335 } 6336 MDI_PHCI_UNLOCK(ph); 6337 } else { 6338 6339 /* 6340 * Disable a specific client. 6341 */ 6342 ct = i_devi_get_client(cdip); 6343 if (ct == NULL) { 6344 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6345 "!failed: operation = %d: NULL ct", op)); 6346 return (MDI_FAILURE); 6347 } 6348 6349 MDI_CLIENT_LOCK(ct); 6350 pip = ct->ct_path_head; 6351 found_it = 0; 6352 while (pip != NULL) { 6353 MDI_PI_LOCK(pip); 6354 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6355 if (MDI_PI(pip)->pi_phci == ph) { 6356 MDI_PI_UNLOCK(pip); 6357 found_it = 1; 6358 break; 6359 } 6360 MDI_PI_UNLOCK(pip); 6361 pip = next; 6362 } 6363 6364 6365 MDI_CLIENT_UNLOCK(ct); 6366 if (found_it == 0) { 6367 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6368 "!failed. Could not find corresponding pip\n")); 6369 return (MDI_FAILURE); 6370 } 6371 6372 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6373 } 6374 6375 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6376 "!op %d returning success pdip = %p cdip = %p", 6377 op, (void *)pdip, (void *)cdip)); 6378 return (MDI_SUCCESS); 6379 } 6380 6381 /* 6382 * Ensure phci powered up 6383 */ 6384 static void 6385 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6386 { 6387 dev_info_t *ph_dip; 6388 6389 ASSERT(pip != NULL); 6390 ASSERT(MDI_PI_LOCKED(pip)); 6391 6392 if (MDI_PI(pip)->pi_pm_held) { 6393 return; 6394 } 6395 6396 ph_dip = mdi_pi_get_phci(pip); 6397 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6398 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6399 if (ph_dip == NULL) { 6400 return; 6401 } 6402 6403 MDI_PI_UNLOCK(pip); 6404 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6405 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6406 pm_hold_power(ph_dip); 6407 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6408 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6409 MDI_PI_LOCK(pip); 6410 6411 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6412 if (DEVI(ph_dip)->devi_pm_info) 6413 MDI_PI(pip)->pi_pm_held = 1; 6414 } 6415 6416 /* 6417 * Allow phci powered down 6418 */ 6419 static void 6420 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6421 { 6422 dev_info_t *ph_dip = NULL; 6423 6424 ASSERT(pip != NULL); 6425 ASSERT(MDI_PI_LOCKED(pip)); 6426 6427 if (MDI_PI(pip)->pi_pm_held == 0) { 6428 return; 6429 } 6430 6431 ph_dip = mdi_pi_get_phci(pip); 6432 ASSERT(ph_dip != NULL); 6433 6434 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6435 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6436 6437 MDI_PI_UNLOCK(pip); 6438 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6439 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6440 pm_rele_power(ph_dip); 6441 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6442 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6443 MDI_PI_LOCK(pip); 6444 6445 MDI_PI(pip)->pi_pm_held = 0; 6446 } 6447 6448 static void 6449 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6450 { 6451 ASSERT(MDI_CLIENT_LOCKED(ct)); 6452 6453 ct->ct_power_cnt += incr; 6454 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6455 "%p ct_power_cnt = %d incr = %d", 6456 (void *)ct, ct->ct_power_cnt, incr)); 6457 ASSERT(ct->ct_power_cnt >= 0); 6458 } 6459 6460 static void 6461 i_mdi_rele_all_phci(mdi_client_t *ct) 6462 { 6463 mdi_pathinfo_t *pip; 6464 6465 ASSERT(MDI_CLIENT_LOCKED(ct)); 6466 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6467 while (pip != NULL) { 6468 mdi_hold_path(pip); 6469 MDI_PI_LOCK(pip); 6470 i_mdi_pm_rele_pip(pip); 6471 MDI_PI_UNLOCK(pip); 6472 mdi_rele_path(pip); 6473 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6474 } 6475 } 6476 6477 static void 6478 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6479 { 6480 ASSERT(MDI_CLIENT_LOCKED(ct)); 6481 6482 if (i_ddi_devi_attached(ct->ct_dip)) { 6483 ct->ct_power_cnt -= decr; 6484 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6485 "%p ct_power_cnt = %d decr = %d", 6486 (void *)ct, ct->ct_power_cnt, decr)); 6487 } 6488 6489 ASSERT(ct->ct_power_cnt >= 0); 6490 if (ct->ct_power_cnt == 0) { 6491 i_mdi_rele_all_phci(ct); 6492 return; 6493 } 6494 } 6495 6496 static void 6497 i_mdi_pm_reset_client(mdi_client_t *ct) 6498 { 6499 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6500 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6501 ASSERT(MDI_CLIENT_LOCKED(ct)); 6502 ct->ct_power_cnt = 0; 6503 i_mdi_rele_all_phci(ct); 6504 ct->ct_powercnt_config = 0; 6505 ct->ct_powercnt_unconfig = 0; 6506 ct->ct_powercnt_reset = 1; 6507 } 6508 6509 static int 6510 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6511 { 6512 int ret; 6513 dev_info_t *ph_dip; 6514 6515 MDI_PI_LOCK(pip); 6516 i_mdi_pm_hold_pip(pip); 6517 6518 ph_dip = mdi_pi_get_phci(pip); 6519 MDI_PI_UNLOCK(pip); 6520 6521 /* bring all components of phci to full power */ 6522 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6523 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6524 ddi_get_instance(ph_dip), (void *)pip)); 6525 6526 ret = pm_powerup(ph_dip); 6527 6528 if (ret == DDI_FAILURE) { 6529 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6530 "pm_powerup FAILED for %s%d %p", 6531 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6532 (void *)pip)); 6533 6534 MDI_PI_LOCK(pip); 6535 i_mdi_pm_rele_pip(pip); 6536 MDI_PI_UNLOCK(pip); 6537 return (MDI_FAILURE); 6538 } 6539 6540 return (MDI_SUCCESS); 6541 } 6542 6543 static int 6544 i_mdi_power_all_phci(mdi_client_t *ct) 6545 { 6546 mdi_pathinfo_t *pip; 6547 int succeeded = 0; 6548 6549 ASSERT(MDI_CLIENT_LOCKED(ct)); 6550 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6551 while (pip != NULL) { 6552 /* 6553 * Don't power if MDI_PATHINFO_STATE_FAULT 6554 * or MDI_PATHINFO_STATE_OFFLINE. 6555 */ 6556 if (MDI_PI_IS_INIT(pip) || 6557 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6558 mdi_hold_path(pip); 6559 MDI_CLIENT_UNLOCK(ct); 6560 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6561 succeeded = 1; 6562 6563 ASSERT(ct == MDI_PI(pip)->pi_client); 6564 MDI_CLIENT_LOCK(ct); 6565 mdi_rele_path(pip); 6566 } 6567 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6568 } 6569 6570 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6571 } 6572 6573 /* 6574 * mdi_bus_power(): 6575 * 1. Place the phci(s) into powered up state so that 6576 * client can do power management 6577 * 2. Ensure phci powered up as client power managing 6578 * Return Values: 6579 * MDI_SUCCESS 6580 * MDI_FAILURE 6581 */ 6582 int 6583 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6584 void *arg, void *result) 6585 { 6586 int ret = MDI_SUCCESS; 6587 pm_bp_child_pwrchg_t *bpc; 6588 mdi_client_t *ct; 6589 dev_info_t *cdip; 6590 pm_bp_has_changed_t *bphc; 6591 6592 /* 6593 * BUS_POWER_NOINVOL not supported 6594 */ 6595 if (op == BUS_POWER_NOINVOL) 6596 return (MDI_FAILURE); 6597 6598 /* 6599 * ignore other OPs. 6600 * return quickly to save cou cycles on the ct processing 6601 */ 6602 switch (op) { 6603 case BUS_POWER_PRE_NOTIFICATION: 6604 case BUS_POWER_POST_NOTIFICATION: 6605 bpc = (pm_bp_child_pwrchg_t *)arg; 6606 cdip = bpc->bpc_dip; 6607 break; 6608 case BUS_POWER_HAS_CHANGED: 6609 bphc = (pm_bp_has_changed_t *)arg; 6610 cdip = bphc->bphc_dip; 6611 break; 6612 default: 6613 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6614 } 6615 6616 ASSERT(MDI_CLIENT(cdip)); 6617 6618 ct = i_devi_get_client(cdip); 6619 if (ct == NULL) 6620 return (MDI_FAILURE); 6621 6622 /* 6623 * wait till the mdi_pathinfo node state change are processed 6624 */ 6625 MDI_CLIENT_LOCK(ct); 6626 switch (op) { 6627 case BUS_POWER_PRE_NOTIFICATION: 6628 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6629 "BUS_POWER_PRE_NOTIFICATION:" 6630 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6631 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6632 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6633 6634 /* serialize power level change per client */ 6635 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6636 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6637 6638 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6639 6640 if (ct->ct_power_cnt == 0) { 6641 ret = i_mdi_power_all_phci(ct); 6642 } 6643 6644 /* 6645 * if new_level > 0: 6646 * - hold phci(s) 6647 * - power up phci(s) if not already 6648 * ignore power down 6649 */ 6650 if (bpc->bpc_nlevel > 0) { 6651 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6652 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6653 "i_mdi_pm_hold_client\n")); 6654 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6655 } 6656 } 6657 break; 6658 case BUS_POWER_POST_NOTIFICATION: 6659 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6660 "BUS_POWER_POST_NOTIFICATION:" 6661 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6662 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6663 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6664 *(int *)result)); 6665 6666 if (*(int *)result == DDI_SUCCESS) { 6667 if (bpc->bpc_nlevel > 0) { 6668 MDI_CLIENT_SET_POWER_UP(ct); 6669 } else { 6670 MDI_CLIENT_SET_POWER_DOWN(ct); 6671 } 6672 } 6673 6674 /* release the hold we did in pre-notification */ 6675 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6676 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6677 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6678 "i_mdi_pm_rele_client\n")); 6679 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6680 } 6681 6682 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6683 /* another thread might started attaching */ 6684 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6685 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6686 "i_mdi_pm_rele_client\n")); 6687 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6688 /* detaching has been taken care in pm_post_unconfig */ 6689 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6690 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6691 "i_mdi_pm_reset_client\n")); 6692 i_mdi_pm_reset_client(ct); 6693 } 6694 } 6695 6696 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6697 cv_broadcast(&ct->ct_powerchange_cv); 6698 6699 break; 6700 6701 /* need to do more */ 6702 case BUS_POWER_HAS_CHANGED: 6703 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6704 "BUS_POWER_HAS_CHANGED:" 6705 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6706 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6707 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6708 6709 if (bphc->bphc_nlevel > 0 && 6710 bphc->bphc_nlevel > bphc->bphc_olevel) { 6711 if (ct->ct_power_cnt == 0) { 6712 ret = i_mdi_power_all_phci(ct); 6713 } 6714 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6715 "i_mdi_pm_hold_client\n")); 6716 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6717 } 6718 6719 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6720 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6721 "i_mdi_pm_rele_client\n")); 6722 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6723 } 6724 break; 6725 } 6726 6727 MDI_CLIENT_UNLOCK(ct); 6728 return (ret); 6729 } 6730 6731 static int 6732 i_mdi_pm_pre_config_one(dev_info_t *child) 6733 { 6734 int ret = MDI_SUCCESS; 6735 mdi_client_t *ct; 6736 6737 ct = i_devi_get_client(child); 6738 if (ct == NULL) 6739 return (MDI_FAILURE); 6740 6741 MDI_CLIENT_LOCK(ct); 6742 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6743 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6744 6745 if (!MDI_CLIENT_IS_FAILED(ct)) { 6746 MDI_CLIENT_UNLOCK(ct); 6747 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6748 return (MDI_SUCCESS); 6749 } 6750 6751 if (ct->ct_powercnt_config) { 6752 MDI_CLIENT_UNLOCK(ct); 6753 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6754 return (MDI_SUCCESS); 6755 } 6756 6757 if (ct->ct_power_cnt == 0) { 6758 ret = i_mdi_power_all_phci(ct); 6759 } 6760 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6761 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6762 ct->ct_powercnt_config = 1; 6763 ct->ct_powercnt_reset = 0; 6764 MDI_CLIENT_UNLOCK(ct); 6765 return (ret); 6766 } 6767 6768 static int 6769 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6770 { 6771 int ret = MDI_SUCCESS; 6772 dev_info_t *cdip; 6773 int circ; 6774 6775 ASSERT(MDI_VHCI(vdip)); 6776 6777 /* ndi_devi_config_one */ 6778 if (child) { 6779 ASSERT(DEVI_BUSY_OWNED(vdip)); 6780 return (i_mdi_pm_pre_config_one(child)); 6781 } 6782 6783 /* devi_config_common */ 6784 ndi_devi_enter(vdip, &circ); 6785 cdip = ddi_get_child(vdip); 6786 while (cdip) { 6787 dev_info_t *next = ddi_get_next_sibling(cdip); 6788 6789 ret = i_mdi_pm_pre_config_one(cdip); 6790 if (ret != MDI_SUCCESS) 6791 break; 6792 cdip = next; 6793 } 6794 ndi_devi_exit(vdip, circ); 6795 return (ret); 6796 } 6797 6798 static int 6799 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6800 { 6801 int ret = MDI_SUCCESS; 6802 mdi_client_t *ct; 6803 6804 ct = i_devi_get_client(child); 6805 if (ct == NULL) 6806 return (MDI_FAILURE); 6807 6808 MDI_CLIENT_LOCK(ct); 6809 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6810 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6811 6812 if (!i_ddi_devi_attached(ct->ct_dip)) { 6813 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6814 MDI_CLIENT_UNLOCK(ct); 6815 return (MDI_SUCCESS); 6816 } 6817 6818 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6819 (flags & NDI_AUTODETACH)) { 6820 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6821 MDI_CLIENT_UNLOCK(ct); 6822 return (MDI_FAILURE); 6823 } 6824 6825 if (ct->ct_powercnt_unconfig) { 6826 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6827 MDI_CLIENT_UNLOCK(ct); 6828 *held = 1; 6829 return (MDI_SUCCESS); 6830 } 6831 6832 if (ct->ct_power_cnt == 0) { 6833 ret = i_mdi_power_all_phci(ct); 6834 } 6835 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6836 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6837 ct->ct_powercnt_unconfig = 1; 6838 ct->ct_powercnt_reset = 0; 6839 MDI_CLIENT_UNLOCK(ct); 6840 if (ret == MDI_SUCCESS) 6841 *held = 1; 6842 return (ret); 6843 } 6844 6845 static int 6846 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6847 int flags) 6848 { 6849 int ret = MDI_SUCCESS; 6850 dev_info_t *cdip; 6851 int circ; 6852 6853 ASSERT(MDI_VHCI(vdip)); 6854 *held = 0; 6855 6856 /* ndi_devi_unconfig_one */ 6857 if (child) { 6858 ASSERT(DEVI_BUSY_OWNED(vdip)); 6859 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6860 } 6861 6862 /* devi_unconfig_common */ 6863 ndi_devi_enter(vdip, &circ); 6864 cdip = ddi_get_child(vdip); 6865 while (cdip) { 6866 dev_info_t *next = ddi_get_next_sibling(cdip); 6867 6868 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6869 cdip = next; 6870 } 6871 ndi_devi_exit(vdip, circ); 6872 6873 if (*held) 6874 ret = MDI_SUCCESS; 6875 6876 return (ret); 6877 } 6878 6879 static void 6880 i_mdi_pm_post_config_one(dev_info_t *child) 6881 { 6882 mdi_client_t *ct; 6883 6884 ct = i_devi_get_client(child); 6885 if (ct == NULL) 6886 return; 6887 6888 MDI_CLIENT_LOCK(ct); 6889 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6890 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6891 6892 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6893 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6894 MDI_CLIENT_UNLOCK(ct); 6895 return; 6896 } 6897 6898 /* client has not been updated */ 6899 if (MDI_CLIENT_IS_FAILED(ct)) { 6900 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 6901 MDI_CLIENT_UNLOCK(ct); 6902 return; 6903 } 6904 6905 /* another thread might have powered it down or detached it */ 6906 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6907 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6908 (!i_ddi_devi_attached(ct->ct_dip) && 6909 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6910 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6911 i_mdi_pm_reset_client(ct); 6912 } else { 6913 mdi_pathinfo_t *pip, *next; 6914 int valid_path_count = 0; 6915 6916 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6917 pip = ct->ct_path_head; 6918 while (pip != NULL) { 6919 MDI_PI_LOCK(pip); 6920 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6921 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6922 valid_path_count ++; 6923 MDI_PI_UNLOCK(pip); 6924 pip = next; 6925 } 6926 i_mdi_pm_rele_client(ct, valid_path_count); 6927 } 6928 ct->ct_powercnt_config = 0; 6929 MDI_CLIENT_UNLOCK(ct); 6930 } 6931 6932 static void 6933 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6934 { 6935 int circ; 6936 dev_info_t *cdip; 6937 6938 ASSERT(MDI_VHCI(vdip)); 6939 6940 /* ndi_devi_config_one */ 6941 if (child) { 6942 ASSERT(DEVI_BUSY_OWNED(vdip)); 6943 i_mdi_pm_post_config_one(child); 6944 return; 6945 } 6946 6947 /* devi_config_common */ 6948 ndi_devi_enter(vdip, &circ); 6949 cdip = ddi_get_child(vdip); 6950 while (cdip) { 6951 dev_info_t *next = ddi_get_next_sibling(cdip); 6952 6953 i_mdi_pm_post_config_one(cdip); 6954 cdip = next; 6955 } 6956 ndi_devi_exit(vdip, circ); 6957 } 6958 6959 static void 6960 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6961 { 6962 mdi_client_t *ct; 6963 6964 ct = i_devi_get_client(child); 6965 if (ct == NULL) 6966 return; 6967 6968 MDI_CLIENT_LOCK(ct); 6969 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6970 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6971 6972 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6973 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 6974 MDI_CLIENT_UNLOCK(ct); 6975 return; 6976 } 6977 6978 /* failure detaching or another thread just attached it */ 6979 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6980 i_ddi_devi_attached(ct->ct_dip)) || 6981 (!i_ddi_devi_attached(ct->ct_dip) && 6982 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6983 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6984 i_mdi_pm_reset_client(ct); 6985 } else { 6986 mdi_pathinfo_t *pip, *next; 6987 int valid_path_count = 0; 6988 6989 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6990 pip = ct->ct_path_head; 6991 while (pip != NULL) { 6992 MDI_PI_LOCK(pip); 6993 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6994 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6995 valid_path_count ++; 6996 MDI_PI_UNLOCK(pip); 6997 pip = next; 6998 } 6999 i_mdi_pm_rele_client(ct, valid_path_count); 7000 ct->ct_powercnt_unconfig = 0; 7001 } 7002 7003 MDI_CLIENT_UNLOCK(ct); 7004 } 7005 7006 static void 7007 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7008 { 7009 int circ; 7010 dev_info_t *cdip; 7011 7012 ASSERT(MDI_VHCI(vdip)); 7013 7014 if (!held) { 7015 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7016 return; 7017 } 7018 7019 if (child) { 7020 ASSERT(DEVI_BUSY_OWNED(vdip)); 7021 i_mdi_pm_post_unconfig_one(child); 7022 return; 7023 } 7024 7025 ndi_devi_enter(vdip, &circ); 7026 cdip = ddi_get_child(vdip); 7027 while (cdip) { 7028 dev_info_t *next = ddi_get_next_sibling(cdip); 7029 7030 i_mdi_pm_post_unconfig_one(cdip); 7031 cdip = next; 7032 } 7033 ndi_devi_exit(vdip, circ); 7034 } 7035 7036 int 7037 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7038 { 7039 int circ, ret = MDI_SUCCESS; 7040 dev_info_t *client_dip = NULL; 7041 mdi_client_t *ct; 7042 7043 /* 7044 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7045 * Power up pHCI for the named client device. 7046 * Note: Before the client is enumerated under vhci by phci, 7047 * client_dip can be NULL. Then proceed to power up all the 7048 * pHCIs. 7049 */ 7050 if (devnm != NULL) { 7051 ndi_devi_enter(vdip, &circ); 7052 client_dip = ndi_devi_findchild(vdip, devnm); 7053 } 7054 7055 MDI_DEBUG(4, (MDI_NOTE, vdip, 7056 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7057 7058 switch (op) { 7059 case MDI_PM_PRE_CONFIG: 7060 ret = i_mdi_pm_pre_config(vdip, client_dip); 7061 break; 7062 7063 case MDI_PM_PRE_UNCONFIG: 7064 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7065 flags); 7066 break; 7067 7068 case MDI_PM_POST_CONFIG: 7069 i_mdi_pm_post_config(vdip, client_dip); 7070 break; 7071 7072 case MDI_PM_POST_UNCONFIG: 7073 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7074 break; 7075 7076 case MDI_PM_HOLD_POWER: 7077 case MDI_PM_RELE_POWER: 7078 ASSERT(args); 7079 7080 client_dip = (dev_info_t *)args; 7081 ASSERT(MDI_CLIENT(client_dip)); 7082 7083 ct = i_devi_get_client(client_dip); 7084 MDI_CLIENT_LOCK(ct); 7085 7086 if (op == MDI_PM_HOLD_POWER) { 7087 if (ct->ct_power_cnt == 0) { 7088 (void) i_mdi_power_all_phci(ct); 7089 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7090 "i_mdi_pm_hold_client\n")); 7091 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7092 } 7093 } else { 7094 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 7095 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7096 "i_mdi_pm_rele_client\n")); 7097 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7098 } else { 7099 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7100 "i_mdi_pm_reset_client\n")); 7101 i_mdi_pm_reset_client(ct); 7102 } 7103 } 7104 7105 MDI_CLIENT_UNLOCK(ct); 7106 break; 7107 7108 default: 7109 break; 7110 } 7111 7112 if (devnm) 7113 ndi_devi_exit(vdip, circ); 7114 7115 return (ret); 7116 } 7117 7118 int 7119 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7120 { 7121 mdi_vhci_t *vhci; 7122 7123 if (!MDI_VHCI(dip)) 7124 return (MDI_FAILURE); 7125 7126 if (mdi_class) { 7127 vhci = DEVI(dip)->devi_mdi_xhci; 7128 ASSERT(vhci); 7129 *mdi_class = vhci->vh_class; 7130 } 7131 7132 return (MDI_SUCCESS); 7133 } 7134 7135 int 7136 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7137 { 7138 mdi_phci_t *phci; 7139 7140 if (!MDI_PHCI(dip)) 7141 return (MDI_FAILURE); 7142 7143 if (mdi_class) { 7144 phci = DEVI(dip)->devi_mdi_xhci; 7145 ASSERT(phci); 7146 *mdi_class = phci->ph_vhci->vh_class; 7147 } 7148 7149 return (MDI_SUCCESS); 7150 } 7151 7152 int 7153 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7154 { 7155 mdi_client_t *client; 7156 7157 if (!MDI_CLIENT(dip)) 7158 return (MDI_FAILURE); 7159 7160 if (mdi_class) { 7161 client = DEVI(dip)->devi_mdi_client; 7162 ASSERT(client); 7163 *mdi_class = client->ct_vhci->vh_class; 7164 } 7165 7166 return (MDI_SUCCESS); 7167 } 7168 7169 void * 7170 mdi_client_get_vhci_private(dev_info_t *dip) 7171 { 7172 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7173 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7174 mdi_client_t *ct; 7175 ct = i_devi_get_client(dip); 7176 return (ct->ct_vprivate); 7177 } 7178 return (NULL); 7179 } 7180 7181 void 7182 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7183 { 7184 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7185 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7186 mdi_client_t *ct; 7187 ct = i_devi_get_client(dip); 7188 ct->ct_vprivate = data; 7189 } 7190 } 7191 /* 7192 * mdi_pi_get_vhci_private(): 7193 * Get the vhci private information associated with the 7194 * mdi_pathinfo node 7195 */ 7196 void * 7197 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7198 { 7199 caddr_t vprivate = NULL; 7200 if (pip) { 7201 vprivate = MDI_PI(pip)->pi_vprivate; 7202 } 7203 return (vprivate); 7204 } 7205 7206 /* 7207 * mdi_pi_set_vhci_private(): 7208 * Set the vhci private information in the mdi_pathinfo node 7209 */ 7210 void 7211 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7212 { 7213 if (pip) { 7214 MDI_PI(pip)->pi_vprivate = priv; 7215 } 7216 } 7217 7218 /* 7219 * mdi_phci_get_vhci_private(): 7220 * Get the vhci private information associated with the 7221 * mdi_phci node 7222 */ 7223 void * 7224 mdi_phci_get_vhci_private(dev_info_t *dip) 7225 { 7226 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7227 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7228 mdi_phci_t *ph; 7229 ph = i_devi_get_phci(dip); 7230 return (ph->ph_vprivate); 7231 } 7232 return (NULL); 7233 } 7234 7235 /* 7236 * mdi_phci_set_vhci_private(): 7237 * Set the vhci private information in the mdi_phci node 7238 */ 7239 void 7240 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7241 { 7242 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7243 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7244 mdi_phci_t *ph; 7245 ph = i_devi_get_phci(dip); 7246 ph->ph_vprivate = priv; 7247 } 7248 } 7249 7250 int 7251 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7252 { 7253 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7254 } 7255 7256 int 7257 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7258 { 7259 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7260 } 7261 7262 /* 7263 * When processing hotplug, if mdi_pi_offline-mdi_pi_free fails then this 7264 * interface is used to represent device removal. 7265 */ 7266 int 7267 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7268 { 7269 MDI_PI_LOCK(pip); 7270 if (mdi_pi_device_isremoved(pip)) { 7271 MDI_PI_UNLOCK(pip); 7272 return (0); 7273 } 7274 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7275 MDI_PI_FLAGS_SET_HIDDEN(pip); 7276 MDI_PI_UNLOCK(pip); 7277 7278 i_ddi_di_cache_invalidate(); 7279 7280 return (1); 7281 } 7282 7283 /* 7284 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7285 * is now accessible then this interfaces is used to represent device insertion. 7286 */ 7287 int 7288 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7289 { 7290 MDI_PI_LOCK(pip); 7291 if (!mdi_pi_device_isremoved(pip)) { 7292 MDI_PI_UNLOCK(pip); 7293 return (0); 7294 } 7295 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7296 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7297 MDI_PI_UNLOCK(pip); 7298 7299 i_ddi_di_cache_invalidate(); 7300 7301 return (1); 7302 } 7303 7304 /* 7305 * List of vhci class names: 7306 * A vhci class name must be in this list only if the corresponding vhci 7307 * driver intends to use the mdi provided bus config implementation 7308 * (i.e., mdi_vhci_bus_config()). 7309 */ 7310 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7311 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7312 7313 /* 7314 * During boot time, the on-disk vhci cache for every vhci class is read 7315 * in the form of an nvlist and stored here. 7316 */ 7317 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7318 7319 /* nvpair names in vhci cache nvlist */ 7320 #define MDI_VHCI_CACHE_VERSION 1 7321 #define MDI_NVPNAME_VERSION "version" 7322 #define MDI_NVPNAME_PHCIS "phcis" 7323 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7324 7325 /* 7326 * Given vhci class name, return its on-disk vhci cache filename. 7327 * Memory for the returned filename which includes the full path is allocated 7328 * by this function. 7329 */ 7330 static char * 7331 vhclass2vhcache_filename(char *vhclass) 7332 { 7333 char *filename; 7334 int len; 7335 static char *fmt = "/etc/devices/mdi_%s_cache"; 7336 7337 /* 7338 * fmt contains the on-disk vhci cache file name format; 7339 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7340 */ 7341 7342 /* the -1 below is to account for "%s" in the format string */ 7343 len = strlen(fmt) + strlen(vhclass) - 1; 7344 filename = kmem_alloc(len, KM_SLEEP); 7345 (void) snprintf(filename, len, fmt, vhclass); 7346 ASSERT(len == (strlen(filename) + 1)); 7347 return (filename); 7348 } 7349 7350 /* 7351 * initialize the vhci cache related data structures and read the on-disk 7352 * vhci cached data into memory. 7353 */ 7354 static void 7355 setup_vhci_cache(mdi_vhci_t *vh) 7356 { 7357 mdi_vhci_config_t *vhc; 7358 mdi_vhci_cache_t *vhcache; 7359 int i; 7360 nvlist_t *nvl = NULL; 7361 7362 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7363 vh->vh_config = vhc; 7364 vhcache = &vhc->vhc_vhcache; 7365 7366 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7367 7368 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7369 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7370 7371 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7372 7373 /* 7374 * Create string hash; same as mod_hash_create_strhash() except that 7375 * we use NULL key destructor. 7376 */ 7377 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7378 mdi_bus_config_cache_hash_size, 7379 mod_hash_null_keydtor, mod_hash_null_valdtor, 7380 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7381 7382 /* 7383 * The on-disk vhci cache is read during booting prior to the 7384 * lights-out period by mdi_read_devices_files(). 7385 */ 7386 for (i = 0; i < N_VHCI_CLASSES; i++) { 7387 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7388 nvl = vhcache_nvl[i]; 7389 vhcache_nvl[i] = NULL; 7390 break; 7391 } 7392 } 7393 7394 /* 7395 * this is to cover the case of some one manually causing unloading 7396 * (or detaching) and reloading (or attaching) of a vhci driver. 7397 */ 7398 if (nvl == NULL && modrootloaded) 7399 nvl = read_on_disk_vhci_cache(vh->vh_class); 7400 7401 if (nvl != NULL) { 7402 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7403 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7404 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7405 else { 7406 cmn_err(CE_WARN, 7407 "%s: data file corrupted, will recreate", 7408 vhc->vhc_vhcache_filename); 7409 } 7410 rw_exit(&vhcache->vhcache_lock); 7411 nvlist_free(nvl); 7412 } 7413 7414 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7415 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7416 7417 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7418 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7419 } 7420 7421 /* 7422 * free all vhci cache related resources 7423 */ 7424 static int 7425 destroy_vhci_cache(mdi_vhci_t *vh) 7426 { 7427 mdi_vhci_config_t *vhc = vh->vh_config; 7428 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7429 mdi_vhcache_phci_t *cphci, *cphci_next; 7430 mdi_vhcache_client_t *cct, *cct_next; 7431 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7432 7433 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7434 return (MDI_FAILURE); 7435 7436 kmem_free(vhc->vhc_vhcache_filename, 7437 strlen(vhc->vhc_vhcache_filename) + 1); 7438 7439 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7440 7441 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7442 cphci = cphci_next) { 7443 cphci_next = cphci->cphci_next; 7444 free_vhcache_phci(cphci); 7445 } 7446 7447 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7448 cct_next = cct->cct_next; 7449 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7450 cpi_next = cpi->cpi_next; 7451 free_vhcache_pathinfo(cpi); 7452 } 7453 free_vhcache_client(cct); 7454 } 7455 7456 rw_destroy(&vhcache->vhcache_lock); 7457 7458 mutex_destroy(&vhc->vhc_lock); 7459 cv_destroy(&vhc->vhc_cv); 7460 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7461 return (MDI_SUCCESS); 7462 } 7463 7464 /* 7465 * Stop all vhci cache related async threads and free their resources. 7466 */ 7467 static int 7468 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7469 { 7470 mdi_async_client_config_t *acc, *acc_next; 7471 7472 mutex_enter(&vhc->vhc_lock); 7473 vhc->vhc_flags |= MDI_VHC_EXIT; 7474 ASSERT(vhc->vhc_acc_thrcount >= 0); 7475 cv_broadcast(&vhc->vhc_cv); 7476 7477 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7478 vhc->vhc_acc_thrcount != 0) { 7479 mutex_exit(&vhc->vhc_lock); 7480 delay_random(mdi_delay); 7481 mutex_enter(&vhc->vhc_lock); 7482 } 7483 7484 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7485 7486 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7487 acc_next = acc->acc_next; 7488 free_async_client_config(acc); 7489 } 7490 vhc->vhc_acc_list_head = NULL; 7491 vhc->vhc_acc_list_tail = NULL; 7492 vhc->vhc_acc_count = 0; 7493 7494 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7495 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7496 mutex_exit(&vhc->vhc_lock); 7497 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7498 vhcache_dirty(vhc); 7499 return (MDI_FAILURE); 7500 } 7501 } else 7502 mutex_exit(&vhc->vhc_lock); 7503 7504 if (callb_delete(vhc->vhc_cbid) != 0) 7505 return (MDI_FAILURE); 7506 7507 return (MDI_SUCCESS); 7508 } 7509 7510 /* 7511 * Stop vhci cache flush thread 7512 */ 7513 /* ARGSUSED */ 7514 static boolean_t 7515 stop_vhcache_flush_thread(void *arg, int code) 7516 { 7517 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7518 7519 mutex_enter(&vhc->vhc_lock); 7520 vhc->vhc_flags |= MDI_VHC_EXIT; 7521 cv_broadcast(&vhc->vhc_cv); 7522 7523 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7524 mutex_exit(&vhc->vhc_lock); 7525 delay_random(mdi_delay); 7526 mutex_enter(&vhc->vhc_lock); 7527 } 7528 7529 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7530 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7531 mutex_exit(&vhc->vhc_lock); 7532 (void) flush_vhcache(vhc, 1); 7533 } else 7534 mutex_exit(&vhc->vhc_lock); 7535 7536 return (B_TRUE); 7537 } 7538 7539 /* 7540 * Enqueue the vhcache phci (cphci) at the tail of the list 7541 */ 7542 static void 7543 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7544 { 7545 cphci->cphci_next = NULL; 7546 if (vhcache->vhcache_phci_head == NULL) 7547 vhcache->vhcache_phci_head = cphci; 7548 else 7549 vhcache->vhcache_phci_tail->cphci_next = cphci; 7550 vhcache->vhcache_phci_tail = cphci; 7551 } 7552 7553 /* 7554 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7555 */ 7556 static void 7557 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7558 mdi_vhcache_pathinfo_t *cpi) 7559 { 7560 cpi->cpi_next = NULL; 7561 if (cct->cct_cpi_head == NULL) 7562 cct->cct_cpi_head = cpi; 7563 else 7564 cct->cct_cpi_tail->cpi_next = cpi; 7565 cct->cct_cpi_tail = cpi; 7566 } 7567 7568 /* 7569 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7570 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7571 * flag set come at the beginning of the list. All cpis which have this 7572 * flag set come at the end of the list. 7573 */ 7574 static void 7575 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7576 mdi_vhcache_pathinfo_t *newcpi) 7577 { 7578 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7579 7580 if (cct->cct_cpi_head == NULL || 7581 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7582 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7583 else { 7584 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7585 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7586 prev_cpi = cpi, cpi = cpi->cpi_next) 7587 ; 7588 7589 if (prev_cpi == NULL) 7590 cct->cct_cpi_head = newcpi; 7591 else 7592 prev_cpi->cpi_next = newcpi; 7593 7594 newcpi->cpi_next = cpi; 7595 7596 if (cpi == NULL) 7597 cct->cct_cpi_tail = newcpi; 7598 } 7599 } 7600 7601 /* 7602 * Enqueue the vhcache client (cct) at the tail of the list 7603 */ 7604 static void 7605 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7606 mdi_vhcache_client_t *cct) 7607 { 7608 cct->cct_next = NULL; 7609 if (vhcache->vhcache_client_head == NULL) 7610 vhcache->vhcache_client_head = cct; 7611 else 7612 vhcache->vhcache_client_tail->cct_next = cct; 7613 vhcache->vhcache_client_tail = cct; 7614 } 7615 7616 static void 7617 free_string_array(char **str, int nelem) 7618 { 7619 int i; 7620 7621 if (str) { 7622 for (i = 0; i < nelem; i++) { 7623 if (str[i]) 7624 kmem_free(str[i], strlen(str[i]) + 1); 7625 } 7626 kmem_free(str, sizeof (char *) * nelem); 7627 } 7628 } 7629 7630 static void 7631 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7632 { 7633 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7634 kmem_free(cphci, sizeof (*cphci)); 7635 } 7636 7637 static void 7638 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7639 { 7640 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7641 kmem_free(cpi, sizeof (*cpi)); 7642 } 7643 7644 static void 7645 free_vhcache_client(mdi_vhcache_client_t *cct) 7646 { 7647 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7648 kmem_free(cct, sizeof (*cct)); 7649 } 7650 7651 static char * 7652 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7653 { 7654 char *name_addr; 7655 int len; 7656 7657 len = strlen(ct_name) + strlen(ct_addr) + 2; 7658 name_addr = kmem_alloc(len, KM_SLEEP); 7659 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7660 7661 if (ret_len) 7662 *ret_len = len; 7663 return (name_addr); 7664 } 7665 7666 /* 7667 * Copy the contents of paddrnvl to vhci cache. 7668 * paddrnvl nvlist contains path information for a vhci client. 7669 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7670 */ 7671 static void 7672 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7673 mdi_vhcache_client_t *cct) 7674 { 7675 nvpair_t *nvp = NULL; 7676 mdi_vhcache_pathinfo_t *cpi; 7677 uint_t nelem; 7678 uint32_t *val; 7679 7680 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7681 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7682 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7683 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7684 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7685 ASSERT(nelem == 2); 7686 cpi->cpi_cphci = cphci_list[val[0]]; 7687 cpi->cpi_flags = val[1]; 7688 enqueue_tail_vhcache_pathinfo(cct, cpi); 7689 } 7690 } 7691 7692 /* 7693 * Copy the contents of caddrmapnvl to vhci cache. 7694 * caddrmapnvl nvlist contains vhci client address to phci client address 7695 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7696 * this nvlist. 7697 */ 7698 static void 7699 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7700 mdi_vhcache_phci_t *cphci_list[]) 7701 { 7702 nvpair_t *nvp = NULL; 7703 nvlist_t *paddrnvl; 7704 mdi_vhcache_client_t *cct; 7705 7706 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7707 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7708 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7709 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7710 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7711 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7712 /* the client must contain at least one path */ 7713 ASSERT(cct->cct_cpi_head != NULL); 7714 7715 enqueue_vhcache_client(vhcache, cct); 7716 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7717 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7718 } 7719 } 7720 7721 /* 7722 * Copy the contents of the main nvlist to vhci cache. 7723 * 7724 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7725 * The nvlist contains the mappings between the vhci client addresses and 7726 * their corresponding phci client addresses. 7727 * 7728 * The structure of the nvlist is as follows: 7729 * 7730 * Main nvlist: 7731 * NAME TYPE DATA 7732 * version int32 version number 7733 * phcis string array array of phci paths 7734 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7735 * 7736 * structure of c2paddrs_nvl: 7737 * NAME TYPE DATA 7738 * caddr1 nvlist_t paddrs_nvl1 7739 * caddr2 nvlist_t paddrs_nvl2 7740 * ... 7741 * where caddr1, caddr2, ... are vhci client name and addresses in the 7742 * form of "<clientname>@<clientaddress>". 7743 * (for example: "ssd@2000002037cd9f72"); 7744 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7745 * 7746 * structure of paddrs_nvl: 7747 * NAME TYPE DATA 7748 * pi_addr1 uint32_array (phci-id, cpi_flags) 7749 * pi_addr2 uint32_array (phci-id, cpi_flags) 7750 * ... 7751 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7752 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7753 * phci-ids are integers that identify pHCIs to which the 7754 * the bus specific address belongs to. These integers are used as an index 7755 * into to the phcis string array in the main nvlist to get the pHCI path. 7756 */ 7757 static int 7758 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7759 { 7760 char **phcis, **phci_namep; 7761 uint_t nphcis; 7762 mdi_vhcache_phci_t *cphci, **cphci_list; 7763 nvlist_t *caddrmapnvl; 7764 int32_t ver; 7765 int i; 7766 size_t cphci_list_size; 7767 7768 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7769 7770 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7771 ver != MDI_VHCI_CACHE_VERSION) 7772 return (MDI_FAILURE); 7773 7774 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7775 &nphcis) != 0) 7776 return (MDI_SUCCESS); 7777 7778 ASSERT(nphcis > 0); 7779 7780 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7781 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7782 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7783 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7784 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7785 enqueue_vhcache_phci(vhcache, cphci); 7786 cphci_list[i] = cphci; 7787 } 7788 7789 ASSERT(vhcache->vhcache_phci_head != NULL); 7790 7791 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7792 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7793 7794 kmem_free(cphci_list, cphci_list_size); 7795 return (MDI_SUCCESS); 7796 } 7797 7798 /* 7799 * Build paddrnvl for the specified client using the information in the 7800 * vhci cache and add it to the caddrmapnnvl. 7801 * Returns 0 on success, errno on failure. 7802 */ 7803 static int 7804 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7805 nvlist_t *caddrmapnvl) 7806 { 7807 mdi_vhcache_pathinfo_t *cpi; 7808 nvlist_t *nvl; 7809 int err; 7810 uint32_t val[2]; 7811 7812 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7813 7814 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7815 return (err); 7816 7817 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7818 val[0] = cpi->cpi_cphci->cphci_id; 7819 val[1] = cpi->cpi_flags; 7820 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7821 != 0) 7822 goto out; 7823 } 7824 7825 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7826 out: 7827 nvlist_free(nvl); 7828 return (err); 7829 } 7830 7831 /* 7832 * Build caddrmapnvl using the information in the vhci cache 7833 * and add it to the mainnvl. 7834 * Returns 0 on success, errno on failure. 7835 */ 7836 static int 7837 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7838 { 7839 mdi_vhcache_client_t *cct; 7840 nvlist_t *nvl; 7841 int err; 7842 7843 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7844 7845 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7846 return (err); 7847 7848 for (cct = vhcache->vhcache_client_head; cct != NULL; 7849 cct = cct->cct_next) { 7850 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7851 goto out; 7852 } 7853 7854 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7855 out: 7856 nvlist_free(nvl); 7857 return (err); 7858 } 7859 7860 /* 7861 * Build nvlist using the information in the vhci cache. 7862 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7863 * Returns nvl on success, NULL on failure. 7864 */ 7865 static nvlist_t * 7866 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7867 { 7868 mdi_vhcache_phci_t *cphci; 7869 uint_t phci_count; 7870 char **phcis; 7871 nvlist_t *nvl; 7872 int err, i; 7873 7874 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7875 nvl = NULL; 7876 goto out; 7877 } 7878 7879 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7880 MDI_VHCI_CACHE_VERSION)) != 0) 7881 goto out; 7882 7883 rw_enter(&vhcache->vhcache_lock, RW_READER); 7884 if (vhcache->vhcache_phci_head == NULL) { 7885 rw_exit(&vhcache->vhcache_lock); 7886 return (nvl); 7887 } 7888 7889 phci_count = 0; 7890 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7891 cphci = cphci->cphci_next) 7892 cphci->cphci_id = phci_count++; 7893 7894 /* build phci pathname list */ 7895 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7896 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7897 cphci = cphci->cphci_next, i++) 7898 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7899 7900 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7901 phci_count); 7902 free_string_array(phcis, phci_count); 7903 7904 if (err == 0 && 7905 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7906 rw_exit(&vhcache->vhcache_lock); 7907 return (nvl); 7908 } 7909 7910 rw_exit(&vhcache->vhcache_lock); 7911 out: 7912 if (nvl) 7913 nvlist_free(nvl); 7914 return (NULL); 7915 } 7916 7917 /* 7918 * Lookup vhcache phci structure for the specified phci path. 7919 */ 7920 static mdi_vhcache_phci_t * 7921 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7922 { 7923 mdi_vhcache_phci_t *cphci; 7924 7925 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7926 7927 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7928 cphci = cphci->cphci_next) { 7929 if (strcmp(cphci->cphci_path, phci_path) == 0) 7930 return (cphci); 7931 } 7932 7933 return (NULL); 7934 } 7935 7936 /* 7937 * Lookup vhcache phci structure for the specified phci. 7938 */ 7939 static mdi_vhcache_phci_t * 7940 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7941 { 7942 mdi_vhcache_phci_t *cphci; 7943 7944 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7945 7946 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7947 cphci = cphci->cphci_next) { 7948 if (cphci->cphci_phci == ph) 7949 return (cphci); 7950 } 7951 7952 return (NULL); 7953 } 7954 7955 /* 7956 * Add the specified phci to the vhci cache if not already present. 7957 */ 7958 static void 7959 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7960 { 7961 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7962 mdi_vhcache_phci_t *cphci; 7963 char *pathname; 7964 int cache_updated; 7965 7966 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7967 7968 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7969 (void) ddi_pathname(ph->ph_dip, pathname); 7970 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7971 != NULL) { 7972 cphci->cphci_phci = ph; 7973 cache_updated = 0; 7974 } else { 7975 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7976 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7977 cphci->cphci_phci = ph; 7978 enqueue_vhcache_phci(vhcache, cphci); 7979 cache_updated = 1; 7980 } 7981 7982 rw_exit(&vhcache->vhcache_lock); 7983 7984 /* 7985 * Since a new phci has been added, reset 7986 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7987 * during next vhcache_discover_paths(). 7988 */ 7989 mutex_enter(&vhc->vhc_lock); 7990 vhc->vhc_path_discovery_cutoff_time = 0; 7991 mutex_exit(&vhc->vhc_lock); 7992 7993 kmem_free(pathname, MAXPATHLEN); 7994 if (cache_updated) 7995 vhcache_dirty(vhc); 7996 } 7997 7998 /* 7999 * Remove the reference to the specified phci from the vhci cache. 8000 */ 8001 static void 8002 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8003 { 8004 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8005 mdi_vhcache_phci_t *cphci; 8006 8007 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8008 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8009 /* do not remove the actual mdi_vhcache_phci structure */ 8010 cphci->cphci_phci = NULL; 8011 } 8012 rw_exit(&vhcache->vhcache_lock); 8013 } 8014 8015 static void 8016 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8017 mdi_vhcache_lookup_token_t *src) 8018 { 8019 if (src == NULL) { 8020 dst->lt_cct = NULL; 8021 dst->lt_cct_lookup_time = 0; 8022 } else { 8023 dst->lt_cct = src->lt_cct; 8024 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8025 } 8026 } 8027 8028 /* 8029 * Look up vhcache client for the specified client. 8030 */ 8031 static mdi_vhcache_client_t * 8032 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8033 mdi_vhcache_lookup_token_t *token) 8034 { 8035 mod_hash_val_t hv; 8036 char *name_addr; 8037 int len; 8038 8039 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8040 8041 /* 8042 * If no vhcache clean occurred since the last lookup, we can 8043 * simply return the cct from the last lookup operation. 8044 * It works because ccts are never freed except during the vhcache 8045 * cleanup operation. 8046 */ 8047 if (token != NULL && 8048 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8049 return (token->lt_cct); 8050 8051 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8052 if (mod_hash_find(vhcache->vhcache_client_hash, 8053 (mod_hash_key_t)name_addr, &hv) == 0) { 8054 if (token) { 8055 token->lt_cct = (mdi_vhcache_client_t *)hv; 8056 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8057 } 8058 } else { 8059 if (token) { 8060 token->lt_cct = NULL; 8061 token->lt_cct_lookup_time = 0; 8062 } 8063 hv = NULL; 8064 } 8065 kmem_free(name_addr, len); 8066 return ((mdi_vhcache_client_t *)hv); 8067 } 8068 8069 /* 8070 * Add the specified path to the vhci cache if not already present. 8071 * Also add the vhcache client for the client corresponding to this path 8072 * if it doesn't already exist. 8073 */ 8074 static void 8075 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8076 { 8077 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8078 mdi_vhcache_client_t *cct; 8079 mdi_vhcache_pathinfo_t *cpi; 8080 mdi_phci_t *ph = pip->pi_phci; 8081 mdi_client_t *ct = pip->pi_client; 8082 int cache_updated = 0; 8083 8084 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8085 8086 /* if vhcache client for this pip doesn't already exist, add it */ 8087 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8088 NULL)) == NULL) { 8089 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8090 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8091 ct->ct_guid, NULL); 8092 enqueue_vhcache_client(vhcache, cct); 8093 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8094 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8095 cache_updated = 1; 8096 } 8097 8098 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8099 if (cpi->cpi_cphci->cphci_phci == ph && 8100 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8101 cpi->cpi_pip = pip; 8102 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8103 cpi->cpi_flags &= 8104 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8105 sort_vhcache_paths(cct); 8106 cache_updated = 1; 8107 } 8108 break; 8109 } 8110 } 8111 8112 if (cpi == NULL) { 8113 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8114 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8115 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8116 ASSERT(cpi->cpi_cphci != NULL); 8117 cpi->cpi_pip = pip; 8118 enqueue_vhcache_pathinfo(cct, cpi); 8119 cache_updated = 1; 8120 } 8121 8122 rw_exit(&vhcache->vhcache_lock); 8123 8124 if (cache_updated) 8125 vhcache_dirty(vhc); 8126 } 8127 8128 /* 8129 * Remove the reference to the specified path from the vhci cache. 8130 */ 8131 static void 8132 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8133 { 8134 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8135 mdi_client_t *ct = pip->pi_client; 8136 mdi_vhcache_client_t *cct; 8137 mdi_vhcache_pathinfo_t *cpi; 8138 8139 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8140 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8141 NULL)) != NULL) { 8142 for (cpi = cct->cct_cpi_head; cpi != NULL; 8143 cpi = cpi->cpi_next) { 8144 if (cpi->cpi_pip == pip) { 8145 cpi->cpi_pip = NULL; 8146 break; 8147 } 8148 } 8149 } 8150 rw_exit(&vhcache->vhcache_lock); 8151 } 8152 8153 /* 8154 * Flush the vhci cache to disk. 8155 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8156 */ 8157 static int 8158 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8159 { 8160 nvlist_t *nvl; 8161 int err; 8162 int rv; 8163 8164 /* 8165 * It is possible that the system may shutdown before 8166 * i_ddi_io_initialized (during stmsboot for example). To allow for 8167 * flushing the cache in this case do not check for 8168 * i_ddi_io_initialized when force flag is set. 8169 */ 8170 if (force_flag == 0 && !i_ddi_io_initialized()) 8171 return (MDI_FAILURE); 8172 8173 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8174 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8175 nvlist_free(nvl); 8176 } else 8177 err = EFAULT; 8178 8179 rv = MDI_SUCCESS; 8180 mutex_enter(&vhc->vhc_lock); 8181 if (err != 0) { 8182 if (err == EROFS) { 8183 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8184 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8185 MDI_VHC_VHCACHE_DIRTY); 8186 } else { 8187 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8188 cmn_err(CE_CONT, "%s: update failed\n", 8189 vhc->vhc_vhcache_filename); 8190 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8191 } 8192 rv = MDI_FAILURE; 8193 } 8194 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8195 cmn_err(CE_CONT, 8196 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8197 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8198 } 8199 mutex_exit(&vhc->vhc_lock); 8200 8201 return (rv); 8202 } 8203 8204 /* 8205 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8206 * Exits itself if left idle for the idle timeout period. 8207 */ 8208 static void 8209 vhcache_flush_thread(void *arg) 8210 { 8211 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8212 clock_t idle_time, quit_at_ticks; 8213 callb_cpr_t cprinfo; 8214 8215 /* number of seconds to sleep idle before exiting */ 8216 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8217 8218 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8219 "mdi_vhcache_flush"); 8220 mutex_enter(&vhc->vhc_lock); 8221 for (; ; ) { 8222 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8223 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8224 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8225 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8226 (void) cv_timedwait(&vhc->vhc_cv, 8227 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8228 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8229 } else { 8230 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8231 mutex_exit(&vhc->vhc_lock); 8232 8233 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8234 vhcache_dirty(vhc); 8235 8236 mutex_enter(&vhc->vhc_lock); 8237 } 8238 } 8239 8240 quit_at_ticks = ddi_get_lbolt() + idle_time; 8241 8242 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8243 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8244 ddi_get_lbolt() < quit_at_ticks) { 8245 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8246 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8247 quit_at_ticks); 8248 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8249 } 8250 8251 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8252 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8253 goto out; 8254 } 8255 8256 out: 8257 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8258 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8259 CALLB_CPR_EXIT(&cprinfo); 8260 } 8261 8262 /* 8263 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8264 */ 8265 static void 8266 vhcache_dirty(mdi_vhci_config_t *vhc) 8267 { 8268 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8269 int create_thread; 8270 8271 rw_enter(&vhcache->vhcache_lock, RW_READER); 8272 /* do not flush cache until the cache is fully built */ 8273 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8274 rw_exit(&vhcache->vhcache_lock); 8275 return; 8276 } 8277 rw_exit(&vhcache->vhcache_lock); 8278 8279 mutex_enter(&vhc->vhc_lock); 8280 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8281 mutex_exit(&vhc->vhc_lock); 8282 return; 8283 } 8284 8285 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8286 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8287 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8288 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8289 cv_broadcast(&vhc->vhc_cv); 8290 create_thread = 0; 8291 } else { 8292 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8293 create_thread = 1; 8294 } 8295 mutex_exit(&vhc->vhc_lock); 8296 8297 if (create_thread) 8298 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8299 0, &p0, TS_RUN, minclsyspri); 8300 } 8301 8302 /* 8303 * phci bus config structure - one for for each phci bus config operation that 8304 * we initiate on behalf of a vhci. 8305 */ 8306 typedef struct mdi_phci_bus_config_s { 8307 char *phbc_phci_path; 8308 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8309 struct mdi_phci_bus_config_s *phbc_next; 8310 } mdi_phci_bus_config_t; 8311 8312 /* vhci bus config structure - one for each vhci bus config operation */ 8313 typedef struct mdi_vhci_bus_config_s { 8314 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8315 major_t vhbc_op_major; /* bus config op major */ 8316 uint_t vhbc_op_flags; /* bus config op flags */ 8317 kmutex_t vhbc_lock; 8318 kcondvar_t vhbc_cv; 8319 int vhbc_thr_count; 8320 } mdi_vhci_bus_config_t; 8321 8322 /* 8323 * bus config the specified phci 8324 */ 8325 static void 8326 bus_config_phci(void *arg) 8327 { 8328 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8329 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8330 dev_info_t *ph_dip; 8331 8332 /* 8333 * first configure all path components upto phci and then configure 8334 * the phci children. 8335 */ 8336 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8337 != NULL) { 8338 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8339 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8340 (void) ndi_devi_config_driver(ph_dip, 8341 vhbc->vhbc_op_flags, 8342 vhbc->vhbc_op_major); 8343 } else 8344 (void) ndi_devi_config(ph_dip, 8345 vhbc->vhbc_op_flags); 8346 8347 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8348 ndi_rele_devi(ph_dip); 8349 } 8350 8351 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8352 kmem_free(phbc, sizeof (*phbc)); 8353 8354 mutex_enter(&vhbc->vhbc_lock); 8355 vhbc->vhbc_thr_count--; 8356 if (vhbc->vhbc_thr_count == 0) 8357 cv_broadcast(&vhbc->vhbc_cv); 8358 mutex_exit(&vhbc->vhbc_lock); 8359 } 8360 8361 /* 8362 * Bus config all phcis associated with the vhci in parallel. 8363 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8364 */ 8365 static void 8366 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8367 ddi_bus_config_op_t op, major_t maj) 8368 { 8369 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8370 mdi_vhci_bus_config_t *vhbc; 8371 mdi_vhcache_phci_t *cphci; 8372 8373 rw_enter(&vhcache->vhcache_lock, RW_READER); 8374 if (vhcache->vhcache_phci_head == NULL) { 8375 rw_exit(&vhcache->vhcache_lock); 8376 return; 8377 } 8378 8379 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8380 8381 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8382 cphci = cphci->cphci_next) { 8383 /* skip phcis that haven't attached before root is available */ 8384 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8385 continue; 8386 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8387 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8388 KM_SLEEP); 8389 phbc->phbc_vhbusconfig = vhbc; 8390 phbc->phbc_next = phbc_head; 8391 phbc_head = phbc; 8392 vhbc->vhbc_thr_count++; 8393 } 8394 rw_exit(&vhcache->vhcache_lock); 8395 8396 vhbc->vhbc_op = op; 8397 vhbc->vhbc_op_major = maj; 8398 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8399 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8400 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8401 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8402 8403 /* now create threads to initiate bus config on all phcis in parallel */ 8404 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8405 phbc_next = phbc->phbc_next; 8406 if (mdi_mtc_off) 8407 bus_config_phci((void *)phbc); 8408 else 8409 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8410 0, &p0, TS_RUN, minclsyspri); 8411 } 8412 8413 mutex_enter(&vhbc->vhbc_lock); 8414 /* wait until all threads exit */ 8415 while (vhbc->vhbc_thr_count > 0) 8416 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8417 mutex_exit(&vhbc->vhbc_lock); 8418 8419 mutex_destroy(&vhbc->vhbc_lock); 8420 cv_destroy(&vhbc->vhbc_cv); 8421 kmem_free(vhbc, sizeof (*vhbc)); 8422 } 8423 8424 /* 8425 * Single threaded version of bus_config_all_phcis() 8426 */ 8427 static void 8428 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8429 ddi_bus_config_op_t op, major_t maj) 8430 { 8431 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8432 8433 single_threaded_vhconfig_enter(vhc); 8434 bus_config_all_phcis(vhcache, flags, op, maj); 8435 single_threaded_vhconfig_exit(vhc); 8436 } 8437 8438 /* 8439 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8440 * The path includes the child component in addition to the phci path. 8441 */ 8442 static int 8443 bus_config_one_phci_child(char *path) 8444 { 8445 dev_info_t *ph_dip, *child; 8446 char *devnm; 8447 int rv = MDI_FAILURE; 8448 8449 /* extract the child component of the phci */ 8450 devnm = strrchr(path, '/'); 8451 *devnm++ = '\0'; 8452 8453 /* 8454 * first configure all path components upto phci and then 8455 * configure the phci child. 8456 */ 8457 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8458 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8459 NDI_SUCCESS) { 8460 /* 8461 * release the hold that ndi_devi_config_one() placed 8462 */ 8463 ndi_rele_devi(child); 8464 rv = MDI_SUCCESS; 8465 } 8466 8467 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8468 ndi_rele_devi(ph_dip); 8469 } 8470 8471 devnm--; 8472 *devnm = '/'; 8473 return (rv); 8474 } 8475 8476 /* 8477 * Build a list of phci client paths for the specified vhci client. 8478 * The list includes only those phci client paths which aren't configured yet. 8479 */ 8480 static mdi_phys_path_t * 8481 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8482 { 8483 mdi_vhcache_pathinfo_t *cpi; 8484 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8485 int config_path, len; 8486 8487 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8488 /* 8489 * include only those paths that aren't configured. 8490 */ 8491 config_path = 0; 8492 if (cpi->cpi_pip == NULL) 8493 config_path = 1; 8494 else { 8495 MDI_PI_LOCK(cpi->cpi_pip); 8496 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8497 config_path = 1; 8498 MDI_PI_UNLOCK(cpi->cpi_pip); 8499 } 8500 8501 if (config_path) { 8502 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8503 len = strlen(cpi->cpi_cphci->cphci_path) + 8504 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8505 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8506 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8507 cpi->cpi_cphci->cphci_path, ct_name, 8508 cpi->cpi_addr); 8509 pp->phys_path_next = NULL; 8510 8511 if (pp_head == NULL) 8512 pp_head = pp; 8513 else 8514 pp_tail->phys_path_next = pp; 8515 pp_tail = pp; 8516 } 8517 } 8518 8519 return (pp_head); 8520 } 8521 8522 /* 8523 * Free the memory allocated for phci client path list. 8524 */ 8525 static void 8526 free_phclient_path_list(mdi_phys_path_t *pp_head) 8527 { 8528 mdi_phys_path_t *pp, *pp_next; 8529 8530 for (pp = pp_head; pp != NULL; pp = pp_next) { 8531 pp_next = pp->phys_path_next; 8532 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8533 kmem_free(pp, sizeof (*pp)); 8534 } 8535 } 8536 8537 /* 8538 * Allocated async client structure and initialize with the specified values. 8539 */ 8540 static mdi_async_client_config_t * 8541 alloc_async_client_config(char *ct_name, char *ct_addr, 8542 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8543 { 8544 mdi_async_client_config_t *acc; 8545 8546 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8547 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8548 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8549 acc->acc_phclient_path_list_head = pp_head; 8550 init_vhcache_lookup_token(&acc->acc_token, tok); 8551 acc->acc_next = NULL; 8552 return (acc); 8553 } 8554 8555 /* 8556 * Free the memory allocated for the async client structure and their members. 8557 */ 8558 static void 8559 free_async_client_config(mdi_async_client_config_t *acc) 8560 { 8561 if (acc->acc_phclient_path_list_head) 8562 free_phclient_path_list(acc->acc_phclient_path_list_head); 8563 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8564 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8565 kmem_free(acc, sizeof (*acc)); 8566 } 8567 8568 /* 8569 * Sort vhcache pathinfos (cpis) of the specified client. 8570 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8571 * flag set come at the beginning of the list. All cpis which have this 8572 * flag set come at the end of the list. 8573 */ 8574 static void 8575 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8576 { 8577 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8578 8579 cpi_head = cct->cct_cpi_head; 8580 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8581 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8582 cpi_next = cpi->cpi_next; 8583 enqueue_vhcache_pathinfo(cct, cpi); 8584 } 8585 } 8586 8587 /* 8588 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8589 * every vhcache pathinfo of the specified client. If not adjust the flag 8590 * setting appropriately. 8591 * 8592 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8593 * on-disk vhci cache. So every time this flag is updated the cache must be 8594 * flushed. 8595 */ 8596 static void 8597 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8598 mdi_vhcache_lookup_token_t *tok) 8599 { 8600 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8601 mdi_vhcache_client_t *cct; 8602 mdi_vhcache_pathinfo_t *cpi; 8603 8604 rw_enter(&vhcache->vhcache_lock, RW_READER); 8605 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8606 == NULL) { 8607 rw_exit(&vhcache->vhcache_lock); 8608 return; 8609 } 8610 8611 /* 8612 * to avoid unnecessary on-disk cache updates, first check if an 8613 * update is really needed. If no update is needed simply return. 8614 */ 8615 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8616 if ((cpi->cpi_pip != NULL && 8617 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8618 (cpi->cpi_pip == NULL && 8619 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8620 break; 8621 } 8622 } 8623 if (cpi == NULL) { 8624 rw_exit(&vhcache->vhcache_lock); 8625 return; 8626 } 8627 8628 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8629 rw_exit(&vhcache->vhcache_lock); 8630 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8631 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8632 tok)) == NULL) { 8633 rw_exit(&vhcache->vhcache_lock); 8634 return; 8635 } 8636 } 8637 8638 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8639 if (cpi->cpi_pip != NULL) 8640 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8641 else 8642 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8643 } 8644 sort_vhcache_paths(cct); 8645 8646 rw_exit(&vhcache->vhcache_lock); 8647 vhcache_dirty(vhc); 8648 } 8649 8650 /* 8651 * Configure all specified paths of the client. 8652 */ 8653 static void 8654 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8655 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8656 { 8657 mdi_phys_path_t *pp; 8658 8659 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8660 (void) bus_config_one_phci_child(pp->phys_path); 8661 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8662 } 8663 8664 /* 8665 * Dequeue elements from vhci async client config list and bus configure 8666 * their corresponding phci clients. 8667 */ 8668 static void 8669 config_client_paths_thread(void *arg) 8670 { 8671 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8672 mdi_async_client_config_t *acc; 8673 clock_t quit_at_ticks; 8674 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8675 callb_cpr_t cprinfo; 8676 8677 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8678 "mdi_config_client_paths"); 8679 8680 for (; ; ) { 8681 quit_at_ticks = ddi_get_lbolt() + idle_time; 8682 8683 mutex_enter(&vhc->vhc_lock); 8684 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8685 vhc->vhc_acc_list_head == NULL && 8686 ddi_get_lbolt() < quit_at_ticks) { 8687 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8688 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8689 quit_at_ticks); 8690 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8691 } 8692 8693 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8694 vhc->vhc_acc_list_head == NULL) 8695 goto out; 8696 8697 acc = vhc->vhc_acc_list_head; 8698 vhc->vhc_acc_list_head = acc->acc_next; 8699 if (vhc->vhc_acc_list_head == NULL) 8700 vhc->vhc_acc_list_tail = NULL; 8701 vhc->vhc_acc_count--; 8702 mutex_exit(&vhc->vhc_lock); 8703 8704 config_client_paths_sync(vhc, acc->acc_ct_name, 8705 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8706 &acc->acc_token); 8707 8708 free_async_client_config(acc); 8709 } 8710 8711 out: 8712 vhc->vhc_acc_thrcount--; 8713 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8714 CALLB_CPR_EXIT(&cprinfo); 8715 } 8716 8717 /* 8718 * Arrange for all the phci client paths (pp_head) for the specified client 8719 * to be bus configured asynchronously by a thread. 8720 */ 8721 static void 8722 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8723 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8724 { 8725 mdi_async_client_config_t *acc, *newacc; 8726 int create_thread; 8727 8728 if (pp_head == NULL) 8729 return; 8730 8731 if (mdi_mtc_off) { 8732 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8733 free_phclient_path_list(pp_head); 8734 return; 8735 } 8736 8737 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8738 ASSERT(newacc); 8739 8740 mutex_enter(&vhc->vhc_lock); 8741 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8742 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8743 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8744 free_async_client_config(newacc); 8745 mutex_exit(&vhc->vhc_lock); 8746 return; 8747 } 8748 } 8749 8750 if (vhc->vhc_acc_list_head == NULL) 8751 vhc->vhc_acc_list_head = newacc; 8752 else 8753 vhc->vhc_acc_list_tail->acc_next = newacc; 8754 vhc->vhc_acc_list_tail = newacc; 8755 vhc->vhc_acc_count++; 8756 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8757 cv_broadcast(&vhc->vhc_cv); 8758 create_thread = 0; 8759 } else { 8760 vhc->vhc_acc_thrcount++; 8761 create_thread = 1; 8762 } 8763 mutex_exit(&vhc->vhc_lock); 8764 8765 if (create_thread) 8766 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8767 0, &p0, TS_RUN, minclsyspri); 8768 } 8769 8770 /* 8771 * Return number of online paths for the specified client. 8772 */ 8773 static int 8774 nonline_paths(mdi_vhcache_client_t *cct) 8775 { 8776 mdi_vhcache_pathinfo_t *cpi; 8777 int online_count = 0; 8778 8779 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8780 if (cpi->cpi_pip != NULL) { 8781 MDI_PI_LOCK(cpi->cpi_pip); 8782 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8783 online_count++; 8784 MDI_PI_UNLOCK(cpi->cpi_pip); 8785 } 8786 } 8787 8788 return (online_count); 8789 } 8790 8791 /* 8792 * Bus configure all paths for the specified vhci client. 8793 * If at least one path for the client is already online, the remaining paths 8794 * will be configured asynchronously. Otherwise, it synchronously configures 8795 * the paths until at least one path is online and then rest of the paths 8796 * will be configured asynchronously. 8797 */ 8798 static void 8799 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8800 { 8801 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8802 mdi_phys_path_t *pp_head, *pp; 8803 mdi_vhcache_client_t *cct; 8804 mdi_vhcache_lookup_token_t tok; 8805 8806 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8807 8808 init_vhcache_lookup_token(&tok, NULL); 8809 8810 if (ct_name == NULL || ct_addr == NULL || 8811 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8812 == NULL || 8813 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8814 rw_exit(&vhcache->vhcache_lock); 8815 return; 8816 } 8817 8818 /* if at least one path is online, configure the rest asynchronously */ 8819 if (nonline_paths(cct) > 0) { 8820 rw_exit(&vhcache->vhcache_lock); 8821 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8822 return; 8823 } 8824 8825 rw_exit(&vhcache->vhcache_lock); 8826 8827 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8828 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8829 rw_enter(&vhcache->vhcache_lock, RW_READER); 8830 8831 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8832 ct_addr, &tok)) == NULL) { 8833 rw_exit(&vhcache->vhcache_lock); 8834 goto out; 8835 } 8836 8837 if (nonline_paths(cct) > 0 && 8838 pp->phys_path_next != NULL) { 8839 rw_exit(&vhcache->vhcache_lock); 8840 config_client_paths_async(vhc, ct_name, ct_addr, 8841 pp->phys_path_next, &tok); 8842 pp->phys_path_next = NULL; 8843 goto out; 8844 } 8845 8846 rw_exit(&vhcache->vhcache_lock); 8847 } 8848 } 8849 8850 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8851 out: 8852 free_phclient_path_list(pp_head); 8853 } 8854 8855 static void 8856 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8857 { 8858 mutex_enter(&vhc->vhc_lock); 8859 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8860 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8861 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8862 mutex_exit(&vhc->vhc_lock); 8863 } 8864 8865 static void 8866 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8867 { 8868 mutex_enter(&vhc->vhc_lock); 8869 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8870 cv_broadcast(&vhc->vhc_cv); 8871 mutex_exit(&vhc->vhc_lock); 8872 } 8873 8874 typedef struct mdi_phci_driver_info { 8875 char *phdriver_name; /* name of the phci driver */ 8876 8877 /* set to non zero if the phci driver supports root device */ 8878 int phdriver_root_support; 8879 } mdi_phci_driver_info_t; 8880 8881 /* 8882 * vhci class and root support capability of a phci driver can be 8883 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8884 * phci driver.conf file. The built-in tables below contain this information 8885 * for those phci drivers whose driver.conf files don't yet contain this info. 8886 * 8887 * All phci drivers expect iscsi have root device support. 8888 */ 8889 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8890 { "fp", 1 }, 8891 { "iscsi", 0 }, 8892 { "ibsrp", 1 } 8893 }; 8894 8895 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8896 8897 static void * 8898 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8899 { 8900 void *new_ptr; 8901 8902 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8903 if (old_ptr) { 8904 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8905 kmem_free(old_ptr, old_size); 8906 } 8907 return (new_ptr); 8908 } 8909 8910 static void 8911 add_to_phci_list(char ***driver_list, int **root_support_list, 8912 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8913 { 8914 ASSERT(*cur_elements <= *max_elements); 8915 if (*cur_elements == *max_elements) { 8916 *max_elements += 10; 8917 *driver_list = mdi_realloc(*driver_list, 8918 sizeof (char *) * (*cur_elements), 8919 sizeof (char *) * (*max_elements)); 8920 *root_support_list = mdi_realloc(*root_support_list, 8921 sizeof (int) * (*cur_elements), 8922 sizeof (int) * (*max_elements)); 8923 } 8924 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8925 (*root_support_list)[*cur_elements] = root_support; 8926 (*cur_elements)++; 8927 } 8928 8929 static void 8930 get_phci_driver_list(char *vhci_class, char ***driver_list, 8931 int **root_support_list, int *cur_elements, int *max_elements) 8932 { 8933 mdi_phci_driver_info_t *st_driver_list, *p; 8934 int st_ndrivers, root_support, i, j, driver_conf_count; 8935 major_t m; 8936 struct devnames *dnp; 8937 ddi_prop_t *propp; 8938 8939 *driver_list = NULL; 8940 *root_support_list = NULL; 8941 *cur_elements = 0; 8942 *max_elements = 0; 8943 8944 /* add the phci drivers derived from the phci driver.conf files */ 8945 for (m = 0; m < devcnt; m++) { 8946 dnp = &devnamesp[m]; 8947 8948 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8949 LOCK_DEV_OPS(&dnp->dn_lock); 8950 if (dnp->dn_global_prop_ptr != NULL && 8951 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8952 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8953 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8954 strcmp(propp->prop_val, vhci_class) == 0) { 8955 8956 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8957 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8958 &dnp->dn_global_prop_ptr->prop_list) 8959 == NULL) ? 1 : 0; 8960 8961 add_to_phci_list(driver_list, root_support_list, 8962 cur_elements, max_elements, dnp->dn_name, 8963 root_support); 8964 8965 UNLOCK_DEV_OPS(&dnp->dn_lock); 8966 } else 8967 UNLOCK_DEV_OPS(&dnp->dn_lock); 8968 } 8969 } 8970 8971 driver_conf_count = *cur_elements; 8972 8973 /* add the phci drivers specified in the built-in tables */ 8974 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8975 st_driver_list = scsi_phci_driver_list; 8976 st_ndrivers = sizeof (scsi_phci_driver_list) / 8977 sizeof (mdi_phci_driver_info_t); 8978 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8979 st_driver_list = ib_phci_driver_list; 8980 st_ndrivers = sizeof (ib_phci_driver_list) / 8981 sizeof (mdi_phci_driver_info_t); 8982 } else { 8983 st_driver_list = NULL; 8984 st_ndrivers = 0; 8985 } 8986 8987 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8988 /* add this phci driver if not already added before */ 8989 for (j = 0; j < driver_conf_count; j++) { 8990 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8991 break; 8992 } 8993 if (j == driver_conf_count) { 8994 add_to_phci_list(driver_list, root_support_list, 8995 cur_elements, max_elements, p->phdriver_name, 8996 p->phdriver_root_support); 8997 } 8998 } 8999 } 9000 9001 /* 9002 * Attach the phci driver instances associated with the specified vhci class. 9003 * If root is mounted attach all phci driver instances. 9004 * If root is not mounted, attach the instances of only those phci 9005 * drivers that have the root support. 9006 */ 9007 static void 9008 attach_phci_drivers(char *vhci_class) 9009 { 9010 char **driver_list, **p; 9011 int *root_support_list; 9012 int cur_elements, max_elements, i; 9013 major_t m; 9014 9015 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9016 &cur_elements, &max_elements); 9017 9018 for (i = 0; i < cur_elements; i++) { 9019 if (modrootloaded || root_support_list[i]) { 9020 m = ddi_name_to_major(driver_list[i]); 9021 if (m != DDI_MAJOR_T_NONE && 9022 ddi_hold_installed_driver(m)) 9023 ddi_rele_driver(m); 9024 } 9025 } 9026 9027 if (driver_list) { 9028 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9029 kmem_free(*p, strlen(*p) + 1); 9030 kmem_free(driver_list, sizeof (char *) * max_elements); 9031 kmem_free(root_support_list, sizeof (int) * max_elements); 9032 } 9033 } 9034 9035 /* 9036 * Build vhci cache: 9037 * 9038 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9039 * the phci driver instances. During this process the cache gets built. 9040 * 9041 * Cache is built fully if the root is mounted. 9042 * If the root is not mounted, phci drivers that do not have root support 9043 * are not attached. As a result the cache is built partially. The entries 9044 * in the cache reflect only those phci drivers that have root support. 9045 */ 9046 static int 9047 build_vhci_cache(mdi_vhci_t *vh) 9048 { 9049 mdi_vhci_config_t *vhc = vh->vh_config; 9050 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9051 9052 single_threaded_vhconfig_enter(vhc); 9053 9054 rw_enter(&vhcache->vhcache_lock, RW_READER); 9055 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9056 rw_exit(&vhcache->vhcache_lock); 9057 single_threaded_vhconfig_exit(vhc); 9058 return (0); 9059 } 9060 rw_exit(&vhcache->vhcache_lock); 9061 9062 attach_phci_drivers(vh->vh_class); 9063 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9064 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9065 9066 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9067 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9068 rw_exit(&vhcache->vhcache_lock); 9069 9070 single_threaded_vhconfig_exit(vhc); 9071 vhcache_dirty(vhc); 9072 return (1); 9073 } 9074 9075 /* 9076 * Determine if discovery of paths is needed. 9077 */ 9078 static int 9079 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9080 { 9081 int rv = 1; 9082 9083 mutex_enter(&vhc->vhc_lock); 9084 if (i_ddi_io_initialized() == 0) { 9085 if (vhc->vhc_path_discovery_boot > 0) { 9086 vhc->vhc_path_discovery_boot--; 9087 goto out; 9088 } 9089 } else { 9090 if (vhc->vhc_path_discovery_postboot > 0) { 9091 vhc->vhc_path_discovery_postboot--; 9092 goto out; 9093 } 9094 } 9095 9096 /* 9097 * Do full path discovery at most once per mdi_path_discovery_interval. 9098 * This is to avoid a series of full path discoveries when opening 9099 * stale /dev/[r]dsk links. 9100 */ 9101 if (mdi_path_discovery_interval != -1 && 9102 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9103 goto out; 9104 9105 rv = 0; 9106 out: 9107 mutex_exit(&vhc->vhc_lock); 9108 return (rv); 9109 } 9110 9111 /* 9112 * Discover all paths: 9113 * 9114 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9115 * driver instances. During this process all paths will be discovered. 9116 */ 9117 static int 9118 vhcache_discover_paths(mdi_vhci_t *vh) 9119 { 9120 mdi_vhci_config_t *vhc = vh->vh_config; 9121 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9122 int rv = 0; 9123 9124 single_threaded_vhconfig_enter(vhc); 9125 9126 if (vhcache_do_discovery(vhc)) { 9127 attach_phci_drivers(vh->vh_class); 9128 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9129 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9130 9131 mutex_enter(&vhc->vhc_lock); 9132 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9133 mdi_path_discovery_interval * TICKS_PER_SECOND; 9134 mutex_exit(&vhc->vhc_lock); 9135 rv = 1; 9136 } 9137 9138 single_threaded_vhconfig_exit(vhc); 9139 return (rv); 9140 } 9141 9142 /* 9143 * Generic vhci bus config implementation: 9144 * 9145 * Parameters 9146 * vdip vhci dip 9147 * flags bus config flags 9148 * op bus config operation 9149 * The remaining parameters are bus config operation specific 9150 * 9151 * for BUS_CONFIG_ONE 9152 * arg pointer to name@addr 9153 * child upon successful return from this function, *child will be 9154 * set to the configured and held devinfo child node of vdip. 9155 * ct_addr pointer to client address (i.e. GUID) 9156 * 9157 * for BUS_CONFIG_DRIVER 9158 * arg major number of the driver 9159 * child and ct_addr parameters are ignored 9160 * 9161 * for BUS_CONFIG_ALL 9162 * arg, child, and ct_addr parameters are ignored 9163 * 9164 * Note that for the rest of the bus config operations, this function simply 9165 * calls the framework provided default bus config routine. 9166 */ 9167 int 9168 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9169 void *arg, dev_info_t **child, char *ct_addr) 9170 { 9171 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9172 mdi_vhci_config_t *vhc = vh->vh_config; 9173 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9174 int rv = 0; 9175 int params_valid = 0; 9176 char *cp; 9177 9178 /* 9179 * To bus config vhcis we relay operation, possibly using another 9180 * thread, to phcis. The phci driver then interacts with MDI to cause 9181 * vhci child nodes to be enumerated under the vhci node. Adding a 9182 * vhci child requires an ndi_devi_enter of the vhci. Since another 9183 * thread may be adding the child, to avoid deadlock we can't wait 9184 * for the relayed operations to complete if we have already entered 9185 * the vhci node. 9186 */ 9187 if (DEVI_BUSY_OWNED(vdip)) { 9188 MDI_DEBUG(2, (MDI_NOTE, vdip, 9189 "vhci dip is busy owned %p", (void *)vdip)); 9190 goto default_bus_config; 9191 } 9192 9193 rw_enter(&vhcache->vhcache_lock, RW_READER); 9194 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9195 rw_exit(&vhcache->vhcache_lock); 9196 rv = build_vhci_cache(vh); 9197 rw_enter(&vhcache->vhcache_lock, RW_READER); 9198 } 9199 9200 switch (op) { 9201 case BUS_CONFIG_ONE: 9202 if (arg != NULL && ct_addr != NULL) { 9203 /* extract node name */ 9204 cp = (char *)arg; 9205 while (*cp != '\0' && *cp != '@') 9206 cp++; 9207 if (*cp == '@') { 9208 params_valid = 1; 9209 *cp = '\0'; 9210 config_client_paths(vhc, (char *)arg, ct_addr); 9211 /* config_client_paths() releases cache_lock */ 9212 *cp = '@'; 9213 break; 9214 } 9215 } 9216 9217 rw_exit(&vhcache->vhcache_lock); 9218 break; 9219 9220 case BUS_CONFIG_DRIVER: 9221 rw_exit(&vhcache->vhcache_lock); 9222 if (rv == 0) 9223 st_bus_config_all_phcis(vhc, flags, op, 9224 (major_t)(uintptr_t)arg); 9225 break; 9226 9227 case BUS_CONFIG_ALL: 9228 rw_exit(&vhcache->vhcache_lock); 9229 if (rv == 0) 9230 st_bus_config_all_phcis(vhc, flags, op, -1); 9231 break; 9232 9233 default: 9234 rw_exit(&vhcache->vhcache_lock); 9235 break; 9236 } 9237 9238 9239 default_bus_config: 9240 /* 9241 * All requested child nodes are enumerated under the vhci. 9242 * Now configure them. 9243 */ 9244 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9245 NDI_SUCCESS) { 9246 return (MDI_SUCCESS); 9247 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9248 /* discover all paths and try configuring again */ 9249 if (vhcache_discover_paths(vh) && 9250 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9251 NDI_SUCCESS) 9252 return (MDI_SUCCESS); 9253 } 9254 9255 return (MDI_FAILURE); 9256 } 9257 9258 /* 9259 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9260 */ 9261 static nvlist_t * 9262 read_on_disk_vhci_cache(char *vhci_class) 9263 { 9264 nvlist_t *nvl; 9265 int err; 9266 char *filename; 9267 9268 filename = vhclass2vhcache_filename(vhci_class); 9269 9270 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9271 kmem_free(filename, strlen(filename) + 1); 9272 return (nvl); 9273 } else if (err == EIO) 9274 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9275 else if (err == EINVAL) 9276 cmn_err(CE_WARN, 9277 "%s: data file corrupted, will recreate", filename); 9278 9279 kmem_free(filename, strlen(filename) + 1); 9280 return (NULL); 9281 } 9282 9283 /* 9284 * Read on-disk vhci cache into nvlists for all vhci classes. 9285 * Called during booting by i_ddi_read_devices_files(). 9286 */ 9287 void 9288 mdi_read_devices_files(void) 9289 { 9290 int i; 9291 9292 for (i = 0; i < N_VHCI_CLASSES; i++) 9293 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9294 } 9295 9296 /* 9297 * Remove all stale entries from vhci cache. 9298 */ 9299 static void 9300 clean_vhcache(mdi_vhci_config_t *vhc) 9301 { 9302 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9303 mdi_vhcache_phci_t *phci, *nxt_phci; 9304 mdi_vhcache_client_t *client, *nxt_client; 9305 mdi_vhcache_pathinfo_t *path, *nxt_path; 9306 9307 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9308 9309 client = vhcache->vhcache_client_head; 9310 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9311 for ( ; client != NULL; client = nxt_client) { 9312 nxt_client = client->cct_next; 9313 9314 path = client->cct_cpi_head; 9315 client->cct_cpi_head = client->cct_cpi_tail = NULL; 9316 for ( ; path != NULL; path = nxt_path) { 9317 nxt_path = path->cpi_next; 9318 if ((path->cpi_cphci->cphci_phci != NULL) && 9319 (path->cpi_pip != NULL)) { 9320 enqueue_tail_vhcache_pathinfo(client, path); 9321 } else if (path->cpi_pip != NULL) { 9322 /* Not valid to have a path without a phci. */ 9323 free_vhcache_pathinfo(path); 9324 } 9325 } 9326 9327 if (client->cct_cpi_head != NULL) 9328 enqueue_vhcache_client(vhcache, client); 9329 else { 9330 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9331 (mod_hash_key_t)client->cct_name_addr); 9332 free_vhcache_client(client); 9333 } 9334 } 9335 9336 phci = vhcache->vhcache_phci_head; 9337 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9338 for ( ; phci != NULL; phci = nxt_phci) { 9339 9340 nxt_phci = phci->cphci_next; 9341 if (phci->cphci_phci != NULL) 9342 enqueue_vhcache_phci(vhcache, phci); 9343 else 9344 free_vhcache_phci(phci); 9345 } 9346 9347 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9348 rw_exit(&vhcache->vhcache_lock); 9349 vhcache_dirty(vhc); 9350 } 9351 9352 /* 9353 * Remove all stale entries from vhci cache. 9354 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9355 */ 9356 void 9357 mdi_clean_vhcache(void) 9358 { 9359 mdi_vhci_t *vh; 9360 9361 mutex_enter(&mdi_mutex); 9362 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9363 vh->vh_refcnt++; 9364 mutex_exit(&mdi_mutex); 9365 clean_vhcache(vh->vh_config); 9366 mutex_enter(&mdi_mutex); 9367 vh->vh_refcnt--; 9368 } 9369 mutex_exit(&mdi_mutex); 9370 } 9371 9372 /* 9373 * mdi_vhci_walk_clients(): 9374 * Walker routine to traverse client dev_info nodes 9375 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9376 * below the client, including nexus devices, which we dont want. 9377 * So we just traverse the immediate siblings, starting from 1st client. 9378 */ 9379 void 9380 mdi_vhci_walk_clients(dev_info_t *vdip, 9381 int (*f)(dev_info_t *, void *), void *arg) 9382 { 9383 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9384 dev_info_t *cdip; 9385 mdi_client_t *ct; 9386 9387 MDI_VHCI_CLIENT_LOCK(vh); 9388 cdip = ddi_get_child(vdip); 9389 while (cdip) { 9390 ct = i_devi_get_client(cdip); 9391 MDI_CLIENT_LOCK(ct); 9392 9393 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9394 cdip = ddi_get_next_sibling(cdip); 9395 else 9396 cdip = NULL; 9397 9398 MDI_CLIENT_UNLOCK(ct); 9399 } 9400 MDI_VHCI_CLIENT_UNLOCK(vh); 9401 } 9402 9403 /* 9404 * mdi_vhci_walk_phcis(): 9405 * Walker routine to traverse phci dev_info nodes 9406 */ 9407 void 9408 mdi_vhci_walk_phcis(dev_info_t *vdip, 9409 int (*f)(dev_info_t *, void *), void *arg) 9410 { 9411 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9412 mdi_phci_t *ph, *next; 9413 9414 MDI_VHCI_PHCI_LOCK(vh); 9415 ph = vh->vh_phci_head; 9416 while (ph) { 9417 MDI_PHCI_LOCK(ph); 9418 9419 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9420 next = ph->ph_next; 9421 else 9422 next = NULL; 9423 9424 MDI_PHCI_UNLOCK(ph); 9425 ph = next; 9426 } 9427 MDI_VHCI_PHCI_UNLOCK(vh); 9428 } 9429 9430 9431 /* 9432 * mdi_walk_vhcis(): 9433 * Walker routine to traverse vhci dev_info nodes 9434 */ 9435 void 9436 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9437 { 9438 mdi_vhci_t *vh = NULL; 9439 9440 mutex_enter(&mdi_mutex); 9441 /* 9442 * Scan for already registered vhci 9443 */ 9444 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9445 vh->vh_refcnt++; 9446 mutex_exit(&mdi_mutex); 9447 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9448 mutex_enter(&mdi_mutex); 9449 vh->vh_refcnt--; 9450 break; 9451 } else { 9452 mutex_enter(&mdi_mutex); 9453 vh->vh_refcnt--; 9454 } 9455 } 9456 9457 mutex_exit(&mdi_mutex); 9458 } 9459 9460 /* 9461 * i_mdi_log_sysevent(): 9462 * Logs events for pickup by syseventd 9463 */ 9464 static void 9465 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9466 { 9467 char *path_name; 9468 nvlist_t *attr_list; 9469 9470 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9471 KM_SLEEP) != DDI_SUCCESS) { 9472 goto alloc_failed; 9473 } 9474 9475 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9476 (void) ddi_pathname(dip, path_name); 9477 9478 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9479 ddi_driver_name(dip)) != DDI_SUCCESS) { 9480 goto error; 9481 } 9482 9483 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9484 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9485 goto error; 9486 } 9487 9488 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9489 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9490 goto error; 9491 } 9492 9493 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9494 path_name) != DDI_SUCCESS) { 9495 goto error; 9496 } 9497 9498 if (nvlist_add_string(attr_list, DDI_CLASS, 9499 ph_vh_class) != DDI_SUCCESS) { 9500 goto error; 9501 } 9502 9503 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9504 attr_list, NULL, DDI_SLEEP); 9505 9506 error: 9507 kmem_free(path_name, MAXPATHLEN); 9508 nvlist_free(attr_list); 9509 return; 9510 9511 alloc_failed: 9512 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9513 } 9514 9515 char ** 9516 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9517 { 9518 char **driver_list, **ret_driver_list = NULL; 9519 int *root_support_list; 9520 int cur_elements, max_elements; 9521 9522 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9523 &cur_elements, &max_elements); 9524 9525 9526 if (driver_list) { 9527 kmem_free(root_support_list, sizeof (int) * max_elements); 9528 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9529 * max_elements, sizeof (char *) * cur_elements); 9530 } 9531 *ndrivers = cur_elements; 9532 9533 return (ret_driver_list); 9534 9535 } 9536 9537 void 9538 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9539 { 9540 char **p; 9541 int i; 9542 9543 if (driver_list) { 9544 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9545 kmem_free(*p, strlen(*p) + 1); 9546 kmem_free(driver_list, sizeof (char *) * ndrivers); 9547 } 9548 } 9549 9550 /* 9551 * mdi_is_dev_supported(): 9552 * function called by pHCI bus config operation to determine if a 9553 * device should be represented as a child of the vHCI or the 9554 * pHCI. This decision is made by the vHCI, using cinfo idenity 9555 * information passed by the pHCI - specifics of the cinfo 9556 * representation are by agreement between the pHCI and vHCI. 9557 * Return Values: 9558 * MDI_SUCCESS 9559 * MDI_FAILURE 9560 */ 9561 int 9562 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9563 { 9564 mdi_vhci_t *vh; 9565 9566 ASSERT(class && pdip); 9567 9568 /* 9569 * For dev_supported, mdi_phci_register() must have established pdip as 9570 * a pHCI. 9571 * 9572 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9573 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9574 */ 9575 if (!MDI_PHCI(pdip)) 9576 return (MDI_FAILURE); 9577 9578 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9579 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9580 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9581 return (MDI_FAILURE); 9582 } 9583 9584 /* Return vHCI answer */ 9585 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9586 } 9587 9588 int 9589 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9590 { 9591 uint_t devstate = 0; 9592 dev_info_t *cdip; 9593 9594 if ((pip == NULL) || (dcp == NULL)) 9595 return (MDI_FAILURE); 9596 9597 cdip = mdi_pi_get_client(pip); 9598 9599 switch (mdi_pi_get_state(pip)) { 9600 case MDI_PATHINFO_STATE_INIT: 9601 devstate = DEVICE_DOWN; 9602 break; 9603 case MDI_PATHINFO_STATE_ONLINE: 9604 devstate = DEVICE_ONLINE; 9605 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9606 devstate |= DEVICE_BUSY; 9607 break; 9608 case MDI_PATHINFO_STATE_STANDBY: 9609 devstate = DEVICE_ONLINE; 9610 break; 9611 case MDI_PATHINFO_STATE_FAULT: 9612 devstate = DEVICE_DOWN; 9613 break; 9614 case MDI_PATHINFO_STATE_OFFLINE: 9615 devstate = DEVICE_OFFLINE; 9616 break; 9617 default: 9618 ASSERT(MDI_PI(pip)->pi_state); 9619 } 9620 9621 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9622 return (MDI_FAILURE); 9623 9624 return (MDI_SUCCESS); 9625 } 9626