1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 77 #define MDI_WARN CE_WARN, __func__ 78 #define MDI_NOTE CE_NOTE, __func__ 79 #define MDI_CONT CE_CONT, __func__ 80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 81 #else /* !DEBUG */ 82 #define MDI_DEBUG(dbglevel, pargs) 83 #endif /* DEBUG */ 84 int mdi_debug_consoleonly = 0; 85 int mdi_delay = 3; 86 87 extern pri_t minclsyspri; 88 extern int modrootloaded; 89 90 /* 91 * Global mutex: 92 * Protects vHCI list and structure members. 93 */ 94 kmutex_t mdi_mutex; 95 96 /* 97 * Registered vHCI class driver lists 98 */ 99 int mdi_vhci_count; 100 mdi_vhci_t *mdi_vhci_head; 101 mdi_vhci_t *mdi_vhci_tail; 102 103 /* 104 * Client Hash Table size 105 */ 106 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 107 108 /* 109 * taskq interface definitions 110 */ 111 #define MDI_TASKQ_N_THREADS 8 112 #define MDI_TASKQ_PRI minclsyspri 113 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 114 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 115 116 taskq_t *mdi_taskq; 117 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 118 119 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 120 121 /* 122 * The data should be "quiet" for this interval (in seconds) before the 123 * vhci cached data is flushed to the disk. 124 */ 125 static int mdi_vhcache_flush_delay = 10; 126 127 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 128 static int mdi_vhcache_flush_daemon_idle_time = 60; 129 130 /* 131 * MDI falls back to discovery of all paths when a bus_config_one fails. 132 * The following parameters can be used to tune this operation. 133 * 134 * mdi_path_discovery_boot 135 * Number of times path discovery will be attempted during early boot. 136 * Probably there is no reason to ever set this value to greater than one. 137 * 138 * mdi_path_discovery_postboot 139 * Number of times path discovery will be attempted after early boot. 140 * Set it to a minimum of two to allow for discovery of iscsi paths which 141 * may happen very late during booting. 142 * 143 * mdi_path_discovery_interval 144 * Minimum number of seconds MDI will wait between successive discovery 145 * of all paths. Set it to -1 to disable discovery of all paths. 146 */ 147 static int mdi_path_discovery_boot = 1; 148 static int mdi_path_discovery_postboot = 2; 149 static int mdi_path_discovery_interval = 10; 150 151 /* 152 * number of seconds the asynchronous configuration thread will sleep idle 153 * before exiting. 154 */ 155 static int mdi_async_config_idle_time = 600; 156 157 static int mdi_bus_config_cache_hash_size = 256; 158 159 /* turns off multithreaded configuration for certain operations */ 160 static int mdi_mtc_off = 0; 161 162 /* 163 * The "path" to a pathinfo node is identical to the /devices path to a 164 * devinfo node had the device been enumerated under a pHCI instead of 165 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 166 * This association persists across create/delete of the pathinfo nodes, 167 * but not across reboot. 168 */ 169 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 170 static int mdi_pathmap_hash_size = 256; 171 static kmutex_t mdi_pathmap_mutex; 172 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 173 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 174 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 175 176 /* 177 * MDI component property name/value string definitions 178 */ 179 const char *mdi_component_prop = "mpxio-component"; 180 const char *mdi_component_prop_vhci = "vhci"; 181 const char *mdi_component_prop_phci = "phci"; 182 const char *mdi_component_prop_client = "client"; 183 184 /* 185 * MDI client global unique identifier property name 186 */ 187 const char *mdi_client_guid_prop = "client-guid"; 188 189 /* 190 * MDI client load balancing property name/value string definitions 191 */ 192 const char *mdi_load_balance = "load-balance"; 193 const char *mdi_load_balance_none = "none"; 194 const char *mdi_load_balance_rr = "round-robin"; 195 const char *mdi_load_balance_lba = "logical-block"; 196 197 /* 198 * Obsolete vHCI class definition; to be removed after Leadville update 199 */ 200 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 201 202 static char vhci_greeting[] = 203 "\tThere already exists one vHCI driver for class %s\n" 204 "\tOnly one vHCI driver for each class is allowed\n"; 205 206 /* 207 * Static function prototypes 208 */ 209 static int i_mdi_phci_offline(dev_info_t *, uint_t); 210 static int i_mdi_client_offline(dev_info_t *, uint_t); 211 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 212 static void i_mdi_phci_post_detach(dev_info_t *, 213 ddi_detach_cmd_t, int); 214 static int i_mdi_client_pre_detach(dev_info_t *, 215 ddi_detach_cmd_t); 216 static void i_mdi_client_post_detach(dev_info_t *, 217 ddi_detach_cmd_t, int); 218 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 219 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 220 static int i_mdi_lba_lb(mdi_client_t *ct, 221 mdi_pathinfo_t **ret_pip, struct buf *buf); 222 static void i_mdi_pm_hold_client(mdi_client_t *, int); 223 static void i_mdi_pm_rele_client(mdi_client_t *, int); 224 static void i_mdi_pm_reset_client(mdi_client_t *); 225 static int i_mdi_power_all_phci(mdi_client_t *); 226 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 227 228 229 /* 230 * Internal mdi_pathinfo node functions 231 */ 232 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 233 234 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 235 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 236 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 237 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 238 static void i_mdi_phci_unlock(mdi_phci_t *); 239 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 240 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 241 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 242 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 243 mdi_client_t *); 244 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 245 static void i_mdi_client_remove_path(mdi_client_t *, 246 mdi_pathinfo_t *); 247 248 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 249 mdi_pathinfo_state_t, int); 250 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 251 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 252 char **, int); 253 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 254 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 255 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 256 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 257 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 258 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 259 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 260 static void i_mdi_client_update_state(mdi_client_t *); 261 static int i_mdi_client_compute_state(mdi_client_t *, 262 mdi_phci_t *); 263 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 264 static void i_mdi_client_unlock(mdi_client_t *); 265 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 266 static mdi_client_t *i_devi_get_client(dev_info_t *); 267 /* 268 * NOTE: this will be removed once the NWS files are changed to use the new 269 * mdi_{enable,disable}_path interfaces 270 */ 271 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 272 int, int); 273 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 274 mdi_vhci_t *vh, int flags, int op); 275 /* 276 * Failover related function prototypes 277 */ 278 static int i_mdi_failover(void *); 279 280 /* 281 * misc internal functions 282 */ 283 static int i_mdi_get_hash_key(char *); 284 static int i_map_nvlist_error_to_mdi(int); 285 static void i_mdi_report_path_state(mdi_client_t *, 286 mdi_pathinfo_t *); 287 288 static void setup_vhci_cache(mdi_vhci_t *); 289 static int destroy_vhci_cache(mdi_vhci_t *); 290 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 291 static boolean_t stop_vhcache_flush_thread(void *, int); 292 static void free_string_array(char **, int); 293 static void free_vhcache_phci(mdi_vhcache_phci_t *); 294 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 295 static void free_vhcache_client(mdi_vhcache_client_t *); 296 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 297 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 298 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 299 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 300 static void vhcache_pi_add(mdi_vhci_config_t *, 301 struct mdi_pathinfo *); 302 static void vhcache_pi_remove(mdi_vhci_config_t *, 303 struct mdi_pathinfo *); 304 static void free_phclient_path_list(mdi_phys_path_t *); 305 static void sort_vhcache_paths(mdi_vhcache_client_t *); 306 static int flush_vhcache(mdi_vhci_config_t *, int); 307 static void vhcache_dirty(mdi_vhci_config_t *); 308 static void free_async_client_config(mdi_async_client_config_t *); 309 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 310 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 311 static nvlist_t *read_on_disk_vhci_cache(char *); 312 extern int fread_nvlist(char *, nvlist_t **); 313 extern int fwrite_nvlist(char *, nvlist_t *); 314 315 /* called once when first vhci registers with mdi */ 316 static void 317 i_mdi_init() 318 { 319 static int initialized = 0; 320 321 if (initialized) 322 return; 323 initialized = 1; 324 325 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 326 327 /* Create our taskq resources */ 328 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 329 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 330 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 331 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 332 333 /* Allocate ['path_instance' <-> "path"] maps */ 334 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 335 mdi_pathmap_bypath = mod_hash_create_strhash( 336 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 337 mod_hash_null_valdtor); 338 mdi_pathmap_byinstance = mod_hash_create_idhash( 339 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 340 mod_hash_null_valdtor); 341 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 342 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 343 mod_hash_null_valdtor); 344 } 345 346 /* 347 * mdi_get_component_type(): 348 * Return mpxio component type 349 * Return Values: 350 * MDI_COMPONENT_NONE 351 * MDI_COMPONENT_VHCI 352 * MDI_COMPONENT_PHCI 353 * MDI_COMPONENT_CLIENT 354 * XXX This doesn't work under multi-level MPxIO and should be 355 * removed when clients migrate mdi_component_is_*() interfaces. 356 */ 357 int 358 mdi_get_component_type(dev_info_t *dip) 359 { 360 return (DEVI(dip)->devi_mdi_component); 361 } 362 363 /* 364 * mdi_vhci_register(): 365 * Register a vHCI module with the mpxio framework 366 * mdi_vhci_register() is called by vHCI drivers to register the 367 * 'class_driver' vHCI driver and its MDI entrypoints with the 368 * mpxio framework. The vHCI driver must call this interface as 369 * part of its attach(9e) handler. 370 * Competing threads may try to attach mdi_vhci_register() as 371 * the vHCI drivers are loaded and attached as a result of pHCI 372 * driver instance registration (mdi_phci_register()) with the 373 * framework. 374 * Return Values: 375 * MDI_SUCCESS 376 * MDI_FAILURE 377 */ 378 /*ARGSUSED*/ 379 int 380 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 381 int flags) 382 { 383 mdi_vhci_t *vh = NULL; 384 385 /* Registrant can't be older */ 386 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 387 388 #ifdef DEBUG 389 /* 390 * IB nexus driver is loaded only when IB hardware is present. 391 * In order to be able to do this there is a need to drive the loading 392 * and attaching of the IB nexus driver (especially when an IB hardware 393 * is dynamically plugged in) when an IB HCA driver (PHCI) 394 * is being attached. Unfortunately this gets into the limitations 395 * of devfs as there seems to be no clean way to drive configuration 396 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 397 * for IB. 398 */ 399 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 400 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 401 #endif 402 403 i_mdi_init(); 404 405 mutex_enter(&mdi_mutex); 406 /* 407 * Scan for already registered vhci 408 */ 409 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 410 if (strcmp(vh->vh_class, class) == 0) { 411 /* 412 * vHCI has already been created. Check for valid 413 * vHCI ops registration. We only support one vHCI 414 * module per class 415 */ 416 if (vh->vh_ops != NULL) { 417 mutex_exit(&mdi_mutex); 418 cmn_err(CE_NOTE, vhci_greeting, class); 419 return (MDI_FAILURE); 420 } 421 break; 422 } 423 } 424 425 /* 426 * if not yet created, create the vHCI component 427 */ 428 if (vh == NULL) { 429 struct client_hash *hash = NULL; 430 char *load_balance; 431 432 /* 433 * Allocate and initialize the mdi extensions 434 */ 435 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 436 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 437 KM_SLEEP); 438 vh->vh_client_table = hash; 439 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 440 (void) strcpy(vh->vh_class, class); 441 vh->vh_lb = LOAD_BALANCE_RR; 442 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 443 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 444 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 445 vh->vh_lb = LOAD_BALANCE_NONE; 446 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 447 == 0) { 448 vh->vh_lb = LOAD_BALANCE_LBA; 449 } 450 ddi_prop_free(load_balance); 451 } 452 453 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 454 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 455 456 /* 457 * Store the vHCI ops vectors 458 */ 459 vh->vh_dip = vdip; 460 vh->vh_ops = vops; 461 462 setup_vhci_cache(vh); 463 464 if (mdi_vhci_head == NULL) { 465 mdi_vhci_head = vh; 466 } 467 if (mdi_vhci_tail) { 468 mdi_vhci_tail->vh_next = vh; 469 } 470 mdi_vhci_tail = vh; 471 mdi_vhci_count++; 472 } 473 474 /* 475 * Claim the devfs node as a vhci component 476 */ 477 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 478 479 /* 480 * Initialize our back reference from dev_info node 481 */ 482 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 483 mutex_exit(&mdi_mutex); 484 return (MDI_SUCCESS); 485 } 486 487 /* 488 * mdi_vhci_unregister(): 489 * Unregister a vHCI module from mpxio framework 490 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 491 * of a vhci to unregister it from the framework. 492 * Return Values: 493 * MDI_SUCCESS 494 * MDI_FAILURE 495 */ 496 /*ARGSUSED*/ 497 int 498 mdi_vhci_unregister(dev_info_t *vdip, int flags) 499 { 500 mdi_vhci_t *found, *vh, *prev = NULL; 501 502 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 503 504 /* 505 * Check for invalid VHCI 506 */ 507 if ((vh = i_devi_get_vhci(vdip)) == NULL) 508 return (MDI_FAILURE); 509 510 /* 511 * Scan the list of registered vHCIs for a match 512 */ 513 mutex_enter(&mdi_mutex); 514 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 515 if (found == vh) 516 break; 517 prev = found; 518 } 519 520 if (found == NULL) { 521 mutex_exit(&mdi_mutex); 522 return (MDI_FAILURE); 523 } 524 525 /* 526 * Check the vHCI, pHCI and client count. All the pHCIs and clients 527 * should have been unregistered, before a vHCI can be 528 * unregistered. 529 */ 530 MDI_VHCI_PHCI_LOCK(vh); 531 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 532 MDI_VHCI_PHCI_UNLOCK(vh); 533 mutex_exit(&mdi_mutex); 534 return (MDI_FAILURE); 535 } 536 MDI_VHCI_PHCI_UNLOCK(vh); 537 538 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 539 mutex_exit(&mdi_mutex); 540 return (MDI_FAILURE); 541 } 542 543 /* 544 * Remove the vHCI from the global list 545 */ 546 if (vh == mdi_vhci_head) { 547 mdi_vhci_head = vh->vh_next; 548 } else { 549 prev->vh_next = vh->vh_next; 550 } 551 if (vh == mdi_vhci_tail) { 552 mdi_vhci_tail = prev; 553 } 554 mdi_vhci_count--; 555 mutex_exit(&mdi_mutex); 556 557 vh->vh_ops = NULL; 558 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 559 DEVI(vdip)->devi_mdi_xhci = NULL; 560 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 561 kmem_free(vh->vh_client_table, 562 mdi_client_table_size * sizeof (struct client_hash)); 563 mutex_destroy(&vh->vh_phci_mutex); 564 mutex_destroy(&vh->vh_client_mutex); 565 566 kmem_free(vh, sizeof (mdi_vhci_t)); 567 return (MDI_SUCCESS); 568 } 569 570 /* 571 * i_mdi_vhci_class2vhci(): 572 * Look for a matching vHCI module given a vHCI class name 573 * Return Values: 574 * Handle to a vHCI component 575 * NULL 576 */ 577 static mdi_vhci_t * 578 i_mdi_vhci_class2vhci(char *class) 579 { 580 mdi_vhci_t *vh = NULL; 581 582 ASSERT(!MUTEX_HELD(&mdi_mutex)); 583 584 mutex_enter(&mdi_mutex); 585 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 586 if (strcmp(vh->vh_class, class) == 0) { 587 break; 588 } 589 } 590 mutex_exit(&mdi_mutex); 591 return (vh); 592 } 593 594 /* 595 * i_devi_get_vhci(): 596 * Utility function to get the handle to a vHCI component 597 * Return Values: 598 * Handle to a vHCI component 599 * NULL 600 */ 601 mdi_vhci_t * 602 i_devi_get_vhci(dev_info_t *vdip) 603 { 604 mdi_vhci_t *vh = NULL; 605 if (MDI_VHCI(vdip)) { 606 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 607 } 608 return (vh); 609 } 610 611 /* 612 * mdi_phci_register(): 613 * Register a pHCI module with mpxio framework 614 * mdi_phci_register() is called by pHCI drivers to register with 615 * the mpxio framework and a specific 'class_driver' vHCI. The 616 * pHCI driver must call this interface as part of its attach(9e) 617 * handler. 618 * Return Values: 619 * MDI_SUCCESS 620 * MDI_FAILURE 621 */ 622 /*ARGSUSED*/ 623 int 624 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 625 { 626 mdi_phci_t *ph; 627 mdi_vhci_t *vh; 628 char *data; 629 630 /* 631 * Some subsystems, like fcp, perform pHCI registration from a 632 * different thread than the one doing the pHCI attach(9E) - the 633 * driver attach code is waiting for this other thread to complete. 634 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 635 * (indicating that some thread has done an ndi_devi_enter of parent) 636 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 637 */ 638 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 639 640 /* 641 * Check for mpxio-disable property. Enable mpxio if the property is 642 * missing or not set to "yes". 643 * If the property is set to "yes" then emit a brief message. 644 */ 645 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 646 &data) == DDI_SUCCESS)) { 647 if (strcmp(data, "yes") == 0) { 648 MDI_DEBUG(1, (MDI_CONT, pdip, 649 "?multipath capabilities disabled via %s.conf.", 650 ddi_driver_name(pdip))); 651 ddi_prop_free(data); 652 return (MDI_FAILURE); 653 } 654 ddi_prop_free(data); 655 } 656 657 /* 658 * Search for a matching vHCI 659 */ 660 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 661 if (vh == NULL) { 662 return (MDI_FAILURE); 663 } 664 665 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 666 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 667 ph->ph_dip = pdip; 668 ph->ph_vhci = vh; 669 ph->ph_next = NULL; 670 ph->ph_unstable = 0; 671 ph->ph_vprivate = 0; 672 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 673 674 MDI_PHCI_LOCK(ph); 675 MDI_PHCI_SET_POWER_UP(ph); 676 MDI_PHCI_UNLOCK(ph); 677 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 678 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 679 680 vhcache_phci_add(vh->vh_config, ph); 681 682 MDI_VHCI_PHCI_LOCK(vh); 683 if (vh->vh_phci_head == NULL) { 684 vh->vh_phci_head = ph; 685 } 686 if (vh->vh_phci_tail) { 687 vh->vh_phci_tail->ph_next = ph; 688 } 689 vh->vh_phci_tail = ph; 690 vh->vh_phci_count++; 691 MDI_VHCI_PHCI_UNLOCK(vh); 692 693 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 694 return (MDI_SUCCESS); 695 } 696 697 /* 698 * mdi_phci_unregister(): 699 * Unregister a pHCI module from mpxio framework 700 * mdi_phci_unregister() is called by the pHCI drivers from their 701 * detach(9E) handler to unregister their instances from the 702 * framework. 703 * Return Values: 704 * MDI_SUCCESS 705 * MDI_FAILURE 706 */ 707 /*ARGSUSED*/ 708 int 709 mdi_phci_unregister(dev_info_t *pdip, int flags) 710 { 711 mdi_vhci_t *vh; 712 mdi_phci_t *ph; 713 mdi_phci_t *tmp; 714 mdi_phci_t *prev = NULL; 715 mdi_pathinfo_t *pip; 716 717 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 718 719 ph = i_devi_get_phci(pdip); 720 if (ph == NULL) { 721 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 722 return (MDI_FAILURE); 723 } 724 725 vh = ph->ph_vhci; 726 ASSERT(vh != NULL); 727 if (vh == NULL) { 728 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 729 return (MDI_FAILURE); 730 } 731 732 MDI_VHCI_PHCI_LOCK(vh); 733 tmp = vh->vh_phci_head; 734 while (tmp) { 735 if (tmp == ph) { 736 break; 737 } 738 prev = tmp; 739 tmp = tmp->ph_next; 740 } 741 742 if (ph == vh->vh_phci_head) { 743 vh->vh_phci_head = ph->ph_next; 744 } else { 745 prev->ph_next = ph->ph_next; 746 } 747 748 if (ph == vh->vh_phci_tail) { 749 vh->vh_phci_tail = prev; 750 } 751 752 vh->vh_phci_count--; 753 MDI_VHCI_PHCI_UNLOCK(vh); 754 755 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 756 MDI_PHCI_LOCK(ph); 757 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 758 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 759 MDI_PI(pip)->pi_phci = NULL; 760 MDI_PHCI_UNLOCK(ph); 761 762 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 763 ESC_DDI_INITIATOR_UNREGISTER); 764 vhcache_phci_remove(vh->vh_config, ph); 765 cv_destroy(&ph->ph_unstable_cv); 766 mutex_destroy(&ph->ph_mutex); 767 kmem_free(ph, sizeof (mdi_phci_t)); 768 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 769 DEVI(pdip)->devi_mdi_xhci = NULL; 770 return (MDI_SUCCESS); 771 } 772 773 /* 774 * i_devi_get_phci(): 775 * Utility function to return the phci extensions. 776 */ 777 static mdi_phci_t * 778 i_devi_get_phci(dev_info_t *pdip) 779 { 780 mdi_phci_t *ph = NULL; 781 782 if (MDI_PHCI(pdip)) { 783 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 784 } 785 return (ph); 786 } 787 788 /* 789 * Single thread mdi entry into devinfo node for modifying its children. 790 * If necessary we perform an ndi_devi_enter of the vHCI before doing 791 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 792 * for the vHCI and one for the pHCI. 793 */ 794 void 795 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 796 { 797 dev_info_t *vdip; 798 int vcircular, pcircular; 799 800 /* Verify calling context */ 801 ASSERT(MDI_PHCI(phci_dip)); 802 vdip = mdi_devi_get_vdip(phci_dip); 803 ASSERT(vdip); /* A pHCI always has a vHCI */ 804 805 /* 806 * If pHCI is detaching then the framework has already entered the 807 * vHCI on a threads that went down the code path leading to 808 * detach_node(). This framework enter of the vHCI during pHCI 809 * detach is done to avoid deadlock with vHCI power management 810 * operations which enter the vHCI and the enter down the path 811 * to the pHCI. If pHCI is detaching then we piggyback this calls 812 * enter of the vHCI on frameworks vHCI enter that has already 813 * occurred - this is OK because we know that the framework thread 814 * doing detach is waiting for our completion. 815 * 816 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 817 * race with detach - but we can't do that because the framework has 818 * already entered the parent, so we have some complexity instead. 819 */ 820 for (;;) { 821 if (ndi_devi_tryenter(vdip, &vcircular)) { 822 ASSERT(vcircular != -1); 823 if (DEVI_IS_DETACHING(phci_dip)) { 824 ndi_devi_exit(vdip, vcircular); 825 vcircular = -1; 826 } 827 break; 828 } else if (DEVI_IS_DETACHING(phci_dip)) { 829 vcircular = -1; 830 break; 831 } else if (servicing_interrupt()) { 832 /* 833 * Don't delay an interrupt (and ensure adaptive 834 * mutex inversion support). 835 */ 836 ndi_devi_enter(vdip, &vcircular); 837 break; 838 } else { 839 delay_random(mdi_delay); 840 } 841 } 842 843 ndi_devi_enter(phci_dip, &pcircular); 844 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 845 } 846 847 /* 848 * Attempt to mdi_devi_enter. 849 */ 850 int 851 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 852 { 853 dev_info_t *vdip; 854 int vcircular, pcircular; 855 856 /* Verify calling context */ 857 ASSERT(MDI_PHCI(phci_dip)); 858 vdip = mdi_devi_get_vdip(phci_dip); 859 ASSERT(vdip); /* A pHCI always has a vHCI */ 860 861 if (ndi_devi_tryenter(vdip, &vcircular)) { 862 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 863 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 864 return (1); /* locked */ 865 } 866 ndi_devi_exit(vdip, vcircular); 867 } 868 return (0); /* busy */ 869 } 870 871 /* 872 * Release mdi_devi_enter or successful mdi_devi_tryenter. 873 */ 874 void 875 mdi_devi_exit(dev_info_t *phci_dip, int circular) 876 { 877 dev_info_t *vdip; 878 int vcircular, pcircular; 879 880 /* Verify calling context */ 881 ASSERT(MDI_PHCI(phci_dip)); 882 vdip = mdi_devi_get_vdip(phci_dip); 883 ASSERT(vdip); /* A pHCI always has a vHCI */ 884 885 /* extract two circular recursion values from single int */ 886 pcircular = (short)(circular & 0xFFFF); 887 vcircular = (short)((circular >> 16) & 0xFFFF); 888 889 ndi_devi_exit(phci_dip, pcircular); 890 if (vcircular != -1) 891 ndi_devi_exit(vdip, vcircular); 892 } 893 894 /* 895 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 896 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 897 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 898 * with vHCI power management code during path online/offline. Each 899 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 900 * occur within the scope of an active mdi_devi_enter that establishes the 901 * circular value. 902 */ 903 void 904 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 905 { 906 int pcircular; 907 908 /* Verify calling context */ 909 ASSERT(MDI_PHCI(phci_dip)); 910 911 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 912 ndi_hold_devi(phci_dip); 913 914 pcircular = (short)(circular & 0xFFFF); 915 ndi_devi_exit(phci_dip, pcircular); 916 } 917 918 void 919 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 920 { 921 int pcircular; 922 923 /* Verify calling context */ 924 ASSERT(MDI_PHCI(phci_dip)); 925 926 ndi_devi_enter(phci_dip, &pcircular); 927 928 /* Drop hold from mdi_devi_exit_phci. */ 929 ndi_rele_devi(phci_dip); 930 931 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 932 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 933 } 934 935 /* 936 * mdi_devi_get_vdip(): 937 * given a pHCI dip return vHCI dip 938 */ 939 dev_info_t * 940 mdi_devi_get_vdip(dev_info_t *pdip) 941 { 942 mdi_phci_t *ph; 943 944 ph = i_devi_get_phci(pdip); 945 if (ph && ph->ph_vhci) 946 return (ph->ph_vhci->vh_dip); 947 return (NULL); 948 } 949 950 /* 951 * mdi_devi_pdip_entered(): 952 * Return 1 if we are vHCI and have done an ndi_devi_enter 953 * of a pHCI 954 */ 955 int 956 mdi_devi_pdip_entered(dev_info_t *vdip) 957 { 958 mdi_vhci_t *vh; 959 mdi_phci_t *ph; 960 961 vh = i_devi_get_vhci(vdip); 962 if (vh == NULL) 963 return (0); 964 965 MDI_VHCI_PHCI_LOCK(vh); 966 ph = vh->vh_phci_head; 967 while (ph) { 968 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 969 MDI_VHCI_PHCI_UNLOCK(vh); 970 return (1); 971 } 972 ph = ph->ph_next; 973 } 974 MDI_VHCI_PHCI_UNLOCK(vh); 975 return (0); 976 } 977 978 /* 979 * mdi_phci_path2devinfo(): 980 * Utility function to search for a valid phci device given 981 * the devfs pathname. 982 */ 983 dev_info_t * 984 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 985 { 986 char *temp_pathname; 987 mdi_vhci_t *vh; 988 mdi_phci_t *ph; 989 dev_info_t *pdip = NULL; 990 991 vh = i_devi_get_vhci(vdip); 992 ASSERT(vh != NULL); 993 994 if (vh == NULL) { 995 /* 996 * Invalid vHCI component, return failure 997 */ 998 return (NULL); 999 } 1000 1001 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1002 MDI_VHCI_PHCI_LOCK(vh); 1003 ph = vh->vh_phci_head; 1004 while (ph != NULL) { 1005 pdip = ph->ph_dip; 1006 ASSERT(pdip != NULL); 1007 *temp_pathname = '\0'; 1008 (void) ddi_pathname(pdip, temp_pathname); 1009 if (strcmp(temp_pathname, pathname) == 0) { 1010 break; 1011 } 1012 ph = ph->ph_next; 1013 } 1014 if (ph == NULL) { 1015 pdip = NULL; 1016 } 1017 MDI_VHCI_PHCI_UNLOCK(vh); 1018 kmem_free(temp_pathname, MAXPATHLEN); 1019 return (pdip); 1020 } 1021 1022 /* 1023 * mdi_phci_get_path_count(): 1024 * get number of path information nodes associated with a given 1025 * pHCI device. 1026 */ 1027 int 1028 mdi_phci_get_path_count(dev_info_t *pdip) 1029 { 1030 mdi_phci_t *ph; 1031 int count = 0; 1032 1033 ph = i_devi_get_phci(pdip); 1034 if (ph != NULL) { 1035 count = ph->ph_path_count; 1036 } 1037 return (count); 1038 } 1039 1040 /* 1041 * i_mdi_phci_lock(): 1042 * Lock a pHCI device 1043 * Return Values: 1044 * None 1045 * Note: 1046 * The default locking order is: 1047 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1048 * But there are number of situations where locks need to be 1049 * grabbed in reverse order. This routine implements try and lock 1050 * mechanism depending on the requested parameter option. 1051 */ 1052 static void 1053 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1054 { 1055 if (pip) { 1056 /* Reverse locking is requested. */ 1057 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1058 if (servicing_interrupt()) { 1059 MDI_PI_HOLD(pip); 1060 MDI_PI_UNLOCK(pip); 1061 MDI_PHCI_LOCK(ph); 1062 MDI_PI_LOCK(pip); 1063 MDI_PI_RELE(pip); 1064 break; 1065 } else { 1066 /* 1067 * tryenter failed. Try to grab again 1068 * after a small delay 1069 */ 1070 MDI_PI_HOLD(pip); 1071 MDI_PI_UNLOCK(pip); 1072 delay_random(mdi_delay); 1073 MDI_PI_LOCK(pip); 1074 MDI_PI_RELE(pip); 1075 } 1076 } 1077 } else { 1078 MDI_PHCI_LOCK(ph); 1079 } 1080 } 1081 1082 /* 1083 * i_mdi_phci_unlock(): 1084 * Unlock the pHCI component 1085 */ 1086 static void 1087 i_mdi_phci_unlock(mdi_phci_t *ph) 1088 { 1089 MDI_PHCI_UNLOCK(ph); 1090 } 1091 1092 /* 1093 * i_mdi_devinfo_create(): 1094 * create client device's devinfo node 1095 * Return Values: 1096 * dev_info 1097 * NULL 1098 * Notes: 1099 */ 1100 static dev_info_t * 1101 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1102 char **compatible, int ncompatible) 1103 { 1104 dev_info_t *cdip = NULL; 1105 1106 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1107 1108 /* Verify for duplicate entry */ 1109 cdip = i_mdi_devinfo_find(vh, name, guid); 1110 ASSERT(cdip == NULL); 1111 if (cdip) { 1112 cmn_err(CE_WARN, 1113 "i_mdi_devinfo_create: client %s@%s already exists", 1114 name ? name : "", guid ? guid : ""); 1115 } 1116 1117 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1118 if (cdip == NULL) 1119 goto fail; 1120 1121 /* 1122 * Create component type and Global unique identifier 1123 * properties 1124 */ 1125 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1126 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1127 goto fail; 1128 } 1129 1130 /* Decorate the node with compatible property */ 1131 if (compatible && 1132 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1133 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1134 goto fail; 1135 } 1136 1137 return (cdip); 1138 1139 fail: 1140 if (cdip) { 1141 (void) ndi_prop_remove_all(cdip); 1142 (void) ndi_devi_free(cdip); 1143 } 1144 return (NULL); 1145 } 1146 1147 /* 1148 * i_mdi_devinfo_find(): 1149 * Find a matching devinfo node for given client node name 1150 * and its guid. 1151 * Return Values: 1152 * Handle to a dev_info node or NULL 1153 */ 1154 static dev_info_t * 1155 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1156 { 1157 char *data; 1158 dev_info_t *cdip = NULL; 1159 dev_info_t *ndip = NULL; 1160 int circular; 1161 1162 ndi_devi_enter(vh->vh_dip, &circular); 1163 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1164 while ((cdip = ndip) != NULL) { 1165 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1166 1167 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1168 continue; 1169 } 1170 1171 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1172 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1173 &data) != DDI_PROP_SUCCESS) { 1174 continue; 1175 } 1176 1177 if (strcmp(data, guid) != 0) { 1178 ddi_prop_free(data); 1179 continue; 1180 } 1181 ddi_prop_free(data); 1182 break; 1183 } 1184 ndi_devi_exit(vh->vh_dip, circular); 1185 return (cdip); 1186 } 1187 1188 /* 1189 * i_mdi_devinfo_remove(): 1190 * Remove a client device node 1191 */ 1192 static int 1193 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1194 { 1195 int rv = MDI_SUCCESS; 1196 1197 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1198 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1199 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1200 if (rv != NDI_SUCCESS) { 1201 MDI_DEBUG(1, (MDI_NOTE, cdip, 1202 "!failed: cdip %p", (void *)cdip)); 1203 } 1204 /* 1205 * Convert to MDI error code 1206 */ 1207 switch (rv) { 1208 case NDI_SUCCESS: 1209 rv = MDI_SUCCESS; 1210 break; 1211 case NDI_BUSY: 1212 rv = MDI_BUSY; 1213 break; 1214 default: 1215 rv = MDI_FAILURE; 1216 break; 1217 } 1218 } 1219 return (rv); 1220 } 1221 1222 /* 1223 * i_devi_get_client() 1224 * Utility function to get mpxio component extensions 1225 */ 1226 static mdi_client_t * 1227 i_devi_get_client(dev_info_t *cdip) 1228 { 1229 mdi_client_t *ct = NULL; 1230 1231 if (MDI_CLIENT(cdip)) { 1232 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1233 } 1234 return (ct); 1235 } 1236 1237 /* 1238 * i_mdi_is_child_present(): 1239 * Search for the presence of client device dev_info node 1240 */ 1241 static int 1242 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1243 { 1244 int rv = MDI_FAILURE; 1245 struct dev_info *dip; 1246 int circular; 1247 1248 ndi_devi_enter(vdip, &circular); 1249 dip = DEVI(vdip)->devi_child; 1250 while (dip) { 1251 if (dip == DEVI(cdip)) { 1252 rv = MDI_SUCCESS; 1253 break; 1254 } 1255 dip = dip->devi_sibling; 1256 } 1257 ndi_devi_exit(vdip, circular); 1258 return (rv); 1259 } 1260 1261 1262 /* 1263 * i_mdi_client_lock(): 1264 * Grab client component lock 1265 * Return Values: 1266 * None 1267 * Note: 1268 * The default locking order is: 1269 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1270 * But there are number of situations where locks need to be 1271 * grabbed in reverse order. This routine implements try and lock 1272 * mechanism depending on the requested parameter option. 1273 */ 1274 static void 1275 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1276 { 1277 if (pip) { 1278 /* 1279 * Reverse locking is requested. 1280 */ 1281 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1282 if (servicing_interrupt()) { 1283 MDI_PI_HOLD(pip); 1284 MDI_PI_UNLOCK(pip); 1285 MDI_CLIENT_LOCK(ct); 1286 MDI_PI_LOCK(pip); 1287 MDI_PI_RELE(pip); 1288 break; 1289 } else { 1290 /* 1291 * tryenter failed. Try to grab again 1292 * after a small delay 1293 */ 1294 MDI_PI_HOLD(pip); 1295 MDI_PI_UNLOCK(pip); 1296 delay_random(mdi_delay); 1297 MDI_PI_LOCK(pip); 1298 MDI_PI_RELE(pip); 1299 } 1300 } 1301 } else { 1302 MDI_CLIENT_LOCK(ct); 1303 } 1304 } 1305 1306 /* 1307 * i_mdi_client_unlock(): 1308 * Unlock a client component 1309 */ 1310 static void 1311 i_mdi_client_unlock(mdi_client_t *ct) 1312 { 1313 MDI_CLIENT_UNLOCK(ct); 1314 } 1315 1316 /* 1317 * i_mdi_client_alloc(): 1318 * Allocate and initialize a client structure. Caller should 1319 * hold the vhci client lock. 1320 * Return Values: 1321 * Handle to a client component 1322 */ 1323 /*ARGSUSED*/ 1324 static mdi_client_t * 1325 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1326 { 1327 mdi_client_t *ct; 1328 1329 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1330 1331 /* 1332 * Allocate and initialize a component structure. 1333 */ 1334 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1335 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1336 ct->ct_hnext = NULL; 1337 ct->ct_hprev = NULL; 1338 ct->ct_dip = NULL; 1339 ct->ct_vhci = vh; 1340 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1341 (void) strcpy(ct->ct_drvname, name); 1342 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1343 (void) strcpy(ct->ct_guid, lguid); 1344 ct->ct_cprivate = NULL; 1345 ct->ct_vprivate = NULL; 1346 ct->ct_flags = 0; 1347 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1348 MDI_CLIENT_LOCK(ct); 1349 MDI_CLIENT_SET_OFFLINE(ct); 1350 MDI_CLIENT_SET_DETACH(ct); 1351 MDI_CLIENT_SET_POWER_UP(ct); 1352 MDI_CLIENT_UNLOCK(ct); 1353 ct->ct_failover_flags = 0; 1354 ct->ct_failover_status = 0; 1355 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1356 ct->ct_unstable = 0; 1357 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1358 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1359 ct->ct_lb = vh->vh_lb; 1360 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1361 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1362 ct->ct_path_count = 0; 1363 ct->ct_path_head = NULL; 1364 ct->ct_path_tail = NULL; 1365 ct->ct_path_last = NULL; 1366 1367 /* 1368 * Add this client component to our client hash queue 1369 */ 1370 i_mdi_client_enlist_table(vh, ct); 1371 return (ct); 1372 } 1373 1374 /* 1375 * i_mdi_client_enlist_table(): 1376 * Attach the client device to the client hash table. Caller 1377 * should hold the vhci client lock. 1378 */ 1379 static void 1380 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1381 { 1382 int index; 1383 struct client_hash *head; 1384 1385 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1386 1387 index = i_mdi_get_hash_key(ct->ct_guid); 1388 head = &vh->vh_client_table[index]; 1389 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1390 head->ct_hash_head = ct; 1391 head->ct_hash_count++; 1392 vh->vh_client_count++; 1393 } 1394 1395 /* 1396 * i_mdi_client_delist_table(): 1397 * Attach the client device to the client hash table. 1398 * Caller should hold the vhci client lock. 1399 */ 1400 static void 1401 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1402 { 1403 int index; 1404 char *guid; 1405 struct client_hash *head; 1406 mdi_client_t *next; 1407 mdi_client_t *last; 1408 1409 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1410 1411 guid = ct->ct_guid; 1412 index = i_mdi_get_hash_key(guid); 1413 head = &vh->vh_client_table[index]; 1414 1415 last = NULL; 1416 next = (mdi_client_t *)head->ct_hash_head; 1417 while (next != NULL) { 1418 if (next == ct) { 1419 break; 1420 } 1421 last = next; 1422 next = next->ct_hnext; 1423 } 1424 1425 if (next) { 1426 head->ct_hash_count--; 1427 if (last == NULL) { 1428 head->ct_hash_head = ct->ct_hnext; 1429 } else { 1430 last->ct_hnext = ct->ct_hnext; 1431 } 1432 ct->ct_hnext = NULL; 1433 vh->vh_client_count--; 1434 } 1435 } 1436 1437 1438 /* 1439 * i_mdi_client_free(): 1440 * Free a client component 1441 */ 1442 static int 1443 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1444 { 1445 int rv = MDI_SUCCESS; 1446 int flags = ct->ct_flags; 1447 dev_info_t *cdip; 1448 dev_info_t *vdip; 1449 1450 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1451 1452 vdip = vh->vh_dip; 1453 cdip = ct->ct_dip; 1454 1455 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1456 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1457 DEVI(cdip)->devi_mdi_client = NULL; 1458 1459 /* 1460 * Clear out back ref. to dev_info_t node 1461 */ 1462 ct->ct_dip = NULL; 1463 1464 /* 1465 * Remove this client from our hash queue 1466 */ 1467 i_mdi_client_delist_table(vh, ct); 1468 1469 /* 1470 * Uninitialize and free the component 1471 */ 1472 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1473 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1474 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1475 cv_destroy(&ct->ct_failover_cv); 1476 cv_destroy(&ct->ct_unstable_cv); 1477 cv_destroy(&ct->ct_powerchange_cv); 1478 mutex_destroy(&ct->ct_mutex); 1479 kmem_free(ct, sizeof (*ct)); 1480 1481 if (cdip != NULL) { 1482 MDI_VHCI_CLIENT_UNLOCK(vh); 1483 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1484 MDI_VHCI_CLIENT_LOCK(vh); 1485 } 1486 return (rv); 1487 } 1488 1489 /* 1490 * i_mdi_client_find(): 1491 * Find the client structure corresponding to a given guid 1492 * Caller should hold the vhci client lock. 1493 */ 1494 static mdi_client_t * 1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1496 { 1497 int index; 1498 struct client_hash *head; 1499 mdi_client_t *ct; 1500 1501 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1502 1503 index = i_mdi_get_hash_key(guid); 1504 head = &vh->vh_client_table[index]; 1505 1506 ct = head->ct_hash_head; 1507 while (ct != NULL) { 1508 if (strcmp(ct->ct_guid, guid) == 0 && 1509 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1510 break; 1511 } 1512 ct = ct->ct_hnext; 1513 } 1514 return (ct); 1515 } 1516 1517 /* 1518 * i_mdi_client_update_state(): 1519 * Compute and update client device state 1520 * Notes: 1521 * A client device can be in any of three possible states: 1522 * 1523 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1524 * one online/standby paths. Can tolerate failures. 1525 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1526 * no alternate paths available as standby. A failure on the online 1527 * would result in loss of access to device data. 1528 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1529 * no paths available to access the device. 1530 */ 1531 static void 1532 i_mdi_client_update_state(mdi_client_t *ct) 1533 { 1534 int state; 1535 1536 ASSERT(MDI_CLIENT_LOCKED(ct)); 1537 state = i_mdi_client_compute_state(ct, NULL); 1538 MDI_CLIENT_SET_STATE(ct, state); 1539 } 1540 1541 /* 1542 * i_mdi_client_compute_state(): 1543 * Compute client device state 1544 * 1545 * mdi_phci_t * Pointer to pHCI structure which should 1546 * while computing the new value. Used by 1547 * i_mdi_phci_offline() to find the new 1548 * client state after DR of a pHCI. 1549 */ 1550 static int 1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1552 { 1553 int state; 1554 int online_count = 0; 1555 int standby_count = 0; 1556 mdi_pathinfo_t *pip, *next; 1557 1558 ASSERT(MDI_CLIENT_LOCKED(ct)); 1559 pip = ct->ct_path_head; 1560 while (pip != NULL) { 1561 MDI_PI_LOCK(pip); 1562 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1563 if (MDI_PI(pip)->pi_phci == ph) { 1564 MDI_PI_UNLOCK(pip); 1565 pip = next; 1566 continue; 1567 } 1568 1569 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1570 == MDI_PATHINFO_STATE_ONLINE) 1571 online_count++; 1572 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1573 == MDI_PATHINFO_STATE_STANDBY) 1574 standby_count++; 1575 MDI_PI_UNLOCK(pip); 1576 pip = next; 1577 } 1578 1579 if (online_count == 0) { 1580 if (standby_count == 0) { 1581 state = MDI_CLIENT_STATE_FAILED; 1582 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1583 "client state failed: ct = %p", (void *)ct)); 1584 } else if (standby_count == 1) { 1585 state = MDI_CLIENT_STATE_DEGRADED; 1586 } else { 1587 state = MDI_CLIENT_STATE_OPTIMAL; 1588 } 1589 } else if (online_count == 1) { 1590 if (standby_count == 0) { 1591 state = MDI_CLIENT_STATE_DEGRADED; 1592 } else { 1593 state = MDI_CLIENT_STATE_OPTIMAL; 1594 } 1595 } else { 1596 state = MDI_CLIENT_STATE_OPTIMAL; 1597 } 1598 return (state); 1599 } 1600 1601 /* 1602 * i_mdi_client2devinfo(): 1603 * Utility function 1604 */ 1605 dev_info_t * 1606 i_mdi_client2devinfo(mdi_client_t *ct) 1607 { 1608 return (ct->ct_dip); 1609 } 1610 1611 /* 1612 * mdi_client_path2_devinfo(): 1613 * Given the parent devinfo and child devfs pathname, search for 1614 * a valid devfs node handle. 1615 */ 1616 dev_info_t * 1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1618 { 1619 dev_info_t *cdip = NULL; 1620 dev_info_t *ndip = NULL; 1621 char *temp_pathname; 1622 int circular; 1623 1624 /* 1625 * Allocate temp buffer 1626 */ 1627 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1628 1629 /* 1630 * Lock parent against changes 1631 */ 1632 ndi_devi_enter(vdip, &circular); 1633 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1634 while ((cdip = ndip) != NULL) { 1635 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1636 1637 *temp_pathname = '\0'; 1638 (void) ddi_pathname(cdip, temp_pathname); 1639 if (strcmp(temp_pathname, pathname) == 0) { 1640 break; 1641 } 1642 } 1643 /* 1644 * Release devinfo lock 1645 */ 1646 ndi_devi_exit(vdip, circular); 1647 1648 /* 1649 * Free the temp buffer 1650 */ 1651 kmem_free(temp_pathname, MAXPATHLEN); 1652 return (cdip); 1653 } 1654 1655 /* 1656 * mdi_client_get_path_count(): 1657 * Utility function to get number of path information nodes 1658 * associated with a given client device. 1659 */ 1660 int 1661 mdi_client_get_path_count(dev_info_t *cdip) 1662 { 1663 mdi_client_t *ct; 1664 int count = 0; 1665 1666 ct = i_devi_get_client(cdip); 1667 if (ct != NULL) { 1668 count = ct->ct_path_count; 1669 } 1670 return (count); 1671 } 1672 1673 1674 /* 1675 * i_mdi_get_hash_key(): 1676 * Create a hash using strings as keys 1677 * 1678 */ 1679 static int 1680 i_mdi_get_hash_key(char *str) 1681 { 1682 uint32_t g, hash = 0; 1683 char *p; 1684 1685 for (p = str; *p != '\0'; p++) { 1686 g = *p; 1687 hash += g; 1688 } 1689 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1690 } 1691 1692 /* 1693 * mdi_get_lb_policy(): 1694 * Get current load balancing policy for a given client device 1695 */ 1696 client_lb_t 1697 mdi_get_lb_policy(dev_info_t *cdip) 1698 { 1699 client_lb_t lb = LOAD_BALANCE_NONE; 1700 mdi_client_t *ct; 1701 1702 ct = i_devi_get_client(cdip); 1703 if (ct != NULL) { 1704 lb = ct->ct_lb; 1705 } 1706 return (lb); 1707 } 1708 1709 /* 1710 * mdi_set_lb_region_size(): 1711 * Set current region size for the load-balance 1712 */ 1713 int 1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1715 { 1716 mdi_client_t *ct; 1717 int rv = MDI_FAILURE; 1718 1719 ct = i_devi_get_client(cdip); 1720 if (ct != NULL && ct->ct_lb_args != NULL) { 1721 ct->ct_lb_args->region_size = region_size; 1722 rv = MDI_SUCCESS; 1723 } 1724 return (rv); 1725 } 1726 1727 /* 1728 * mdi_Set_lb_policy(): 1729 * Set current load balancing policy for a given client device 1730 */ 1731 int 1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1733 { 1734 mdi_client_t *ct; 1735 int rv = MDI_FAILURE; 1736 1737 ct = i_devi_get_client(cdip); 1738 if (ct != NULL) { 1739 ct->ct_lb = lb; 1740 rv = MDI_SUCCESS; 1741 } 1742 return (rv); 1743 } 1744 1745 /* 1746 * mdi_failover(): 1747 * failover function called by the vHCI drivers to initiate 1748 * a failover operation. This is typically due to non-availability 1749 * of online paths to route I/O requests. Failover can be 1750 * triggered through user application also. 1751 * 1752 * The vHCI driver calls mdi_failover() to initiate a failover 1753 * operation. mdi_failover() calls back into the vHCI driver's 1754 * vo_failover() entry point to perform the actual failover 1755 * operation. The reason for requiring the vHCI driver to 1756 * initiate failover by calling mdi_failover(), instead of directly 1757 * executing vo_failover() itself, is to ensure that the mdi 1758 * framework can keep track of the client state properly. 1759 * Additionally, mdi_failover() provides as a convenience the 1760 * option of performing the failover operation synchronously or 1761 * asynchronously 1762 * 1763 * Upon successful completion of the failover operation, the 1764 * paths that were previously ONLINE will be in the STANDBY state, 1765 * and the newly activated paths will be in the ONLINE state. 1766 * 1767 * The flags modifier determines whether the activation is done 1768 * synchronously: MDI_FAILOVER_SYNC 1769 * Return Values: 1770 * MDI_SUCCESS 1771 * MDI_FAILURE 1772 * MDI_BUSY 1773 */ 1774 /*ARGSUSED*/ 1775 int 1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1777 { 1778 int rv; 1779 mdi_client_t *ct; 1780 1781 ct = i_devi_get_client(cdip); 1782 ASSERT(ct != NULL); 1783 if (ct == NULL) { 1784 /* cdip is not a valid client device. Nothing more to do. */ 1785 return (MDI_FAILURE); 1786 } 1787 1788 MDI_CLIENT_LOCK(ct); 1789 1790 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1791 /* A path to the client is being freed */ 1792 MDI_CLIENT_UNLOCK(ct); 1793 return (MDI_BUSY); 1794 } 1795 1796 1797 if (MDI_CLIENT_IS_FAILED(ct)) { 1798 /* 1799 * Client is in failed state. Nothing more to do. 1800 */ 1801 MDI_CLIENT_UNLOCK(ct); 1802 return (MDI_FAILURE); 1803 } 1804 1805 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1806 /* 1807 * Failover is already in progress; return BUSY 1808 */ 1809 MDI_CLIENT_UNLOCK(ct); 1810 return (MDI_BUSY); 1811 } 1812 /* 1813 * Make sure that mdi_pathinfo node state changes are processed. 1814 * We do not allow failovers to progress while client path state 1815 * changes are in progress 1816 */ 1817 if (ct->ct_unstable) { 1818 if (flags == MDI_FAILOVER_ASYNC) { 1819 MDI_CLIENT_UNLOCK(ct); 1820 return (MDI_BUSY); 1821 } else { 1822 while (ct->ct_unstable) 1823 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1824 } 1825 } 1826 1827 /* 1828 * Client device is in stable state. Before proceeding, perform sanity 1829 * checks again. 1830 */ 1831 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1832 (!i_ddi_devi_attached(ct->ct_dip))) { 1833 /* 1834 * Client is in failed state. Nothing more to do. 1835 */ 1836 MDI_CLIENT_UNLOCK(ct); 1837 return (MDI_FAILURE); 1838 } 1839 1840 /* 1841 * Set the client state as failover in progress. 1842 */ 1843 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1844 ct->ct_failover_flags = flags; 1845 MDI_CLIENT_UNLOCK(ct); 1846 1847 if (flags == MDI_FAILOVER_ASYNC) { 1848 /* 1849 * Submit the initiate failover request via CPR safe 1850 * taskq threads. 1851 */ 1852 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1853 ct, KM_SLEEP); 1854 return (MDI_ACCEPT); 1855 } else { 1856 /* 1857 * Synchronous failover mode. Typically invoked from the user 1858 * land. 1859 */ 1860 rv = i_mdi_failover(ct); 1861 } 1862 return (rv); 1863 } 1864 1865 /* 1866 * i_mdi_failover(): 1867 * internal failover function. Invokes vHCI drivers failover 1868 * callback function and process the failover status 1869 * Return Values: 1870 * None 1871 * 1872 * Note: A client device in failover state can not be detached or freed. 1873 */ 1874 static int 1875 i_mdi_failover(void *arg) 1876 { 1877 int rv = MDI_SUCCESS; 1878 mdi_client_t *ct = (mdi_client_t *)arg; 1879 mdi_vhci_t *vh = ct->ct_vhci; 1880 1881 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1882 1883 if (vh->vh_ops->vo_failover != NULL) { 1884 /* 1885 * Call vHCI drivers callback routine 1886 */ 1887 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1888 ct->ct_failover_flags); 1889 } 1890 1891 MDI_CLIENT_LOCK(ct); 1892 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1893 1894 /* 1895 * Save the failover return status 1896 */ 1897 ct->ct_failover_status = rv; 1898 1899 /* 1900 * As a result of failover, client status would have been changed. 1901 * Update the client state and wake up anyone waiting on this client 1902 * device. 1903 */ 1904 i_mdi_client_update_state(ct); 1905 1906 cv_broadcast(&ct->ct_failover_cv); 1907 MDI_CLIENT_UNLOCK(ct); 1908 return (rv); 1909 } 1910 1911 /* 1912 * Load balancing is logical block. 1913 * IOs within the range described by region_size 1914 * would go on the same path. This would improve the 1915 * performance by cache-hit on some of the RAID devices. 1916 * Search only for online paths(At some point we 1917 * may want to balance across target ports). 1918 * If no paths are found then default to round-robin. 1919 */ 1920 static int 1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1922 { 1923 int path_index = -1; 1924 int online_path_count = 0; 1925 int online_nonpref_path_count = 0; 1926 int region_size = ct->ct_lb_args->region_size; 1927 mdi_pathinfo_t *pip; 1928 mdi_pathinfo_t *next; 1929 int preferred, path_cnt; 1930 1931 pip = ct->ct_path_head; 1932 while (pip) { 1933 MDI_PI_LOCK(pip); 1934 if (MDI_PI(pip)->pi_state == 1935 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1936 online_path_count++; 1937 } else if (MDI_PI(pip)->pi_state == 1938 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1939 online_nonpref_path_count++; 1940 } 1941 next = (mdi_pathinfo_t *) 1942 MDI_PI(pip)->pi_client_link; 1943 MDI_PI_UNLOCK(pip); 1944 pip = next; 1945 } 1946 /* if found any online/preferred then use this type */ 1947 if (online_path_count > 0) { 1948 path_cnt = online_path_count; 1949 preferred = 1; 1950 } else if (online_nonpref_path_count > 0) { 1951 path_cnt = online_nonpref_path_count; 1952 preferred = 0; 1953 } else { 1954 path_cnt = 0; 1955 } 1956 if (path_cnt) { 1957 path_index = (bp->b_blkno >> region_size) % path_cnt; 1958 pip = ct->ct_path_head; 1959 while (pip && path_index != -1) { 1960 MDI_PI_LOCK(pip); 1961 if (path_index == 0 && 1962 (MDI_PI(pip)->pi_state == 1963 MDI_PATHINFO_STATE_ONLINE) && 1964 MDI_PI(pip)->pi_preferred == preferred) { 1965 MDI_PI_HOLD(pip); 1966 MDI_PI_UNLOCK(pip); 1967 *ret_pip = pip; 1968 return (MDI_SUCCESS); 1969 } 1970 path_index --; 1971 next = (mdi_pathinfo_t *) 1972 MDI_PI(pip)->pi_client_link; 1973 MDI_PI_UNLOCK(pip); 1974 pip = next; 1975 } 1976 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1977 "lba %llx: path %s %p", 1978 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1979 } 1980 return (MDI_FAILURE); 1981 } 1982 1983 /* 1984 * mdi_select_path(): 1985 * select a path to access a client device. 1986 * 1987 * mdi_select_path() function is called by the vHCI drivers to 1988 * select a path to route the I/O request to. The caller passes 1989 * the block I/O data transfer structure ("buf") as one of the 1990 * parameters. The mpxio framework uses the buf structure 1991 * contents to maintain per path statistics (total I/O size / 1992 * count pending). If more than one online paths are available to 1993 * select, the framework automatically selects a suitable path 1994 * for routing I/O request. If a failover operation is active for 1995 * this client device the call shall be failed with MDI_BUSY error 1996 * code. 1997 * 1998 * By default this function returns a suitable path in online 1999 * state based on the current load balancing policy. Currently 2000 * we support LOAD_BALANCE_NONE (Previously selected online path 2001 * will continue to be used till the path is usable) and 2002 * LOAD_BALANCE_RR (Online paths will be selected in a round 2003 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2004 * based on the logical block). The load balancing 2005 * through vHCI drivers configuration file (driver.conf). 2006 * 2007 * vHCI drivers may override this default behavior by specifying 2008 * appropriate flags. The meaning of the thrid argument depends 2009 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2010 * then the argument is the "path instance" of the path to select. 2011 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2012 * "start_pip". A non NULL "start_pip" is the starting point to 2013 * walk and find the next appropriate path. The following values 2014 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2015 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2016 * STANDBY path). 2017 * 2018 * The non-standard behavior is used by the scsi_vhci driver, 2019 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2020 * attach of client devices (to avoid an unnecessary failover 2021 * when the STANDBY path comes up first), during failover 2022 * (to activate a STANDBY path as ONLINE). 2023 * 2024 * The selected path is returned in a a mdi_hold_path() state 2025 * (pi_ref_cnt). Caller should release the hold by calling 2026 * mdi_rele_path(). 2027 * 2028 * Return Values: 2029 * MDI_SUCCESS - Completed successfully 2030 * MDI_BUSY - Client device is busy failing over 2031 * MDI_NOPATH - Client device is online, but no valid path are 2032 * available to access this client device 2033 * MDI_FAILURE - Invalid client device or state 2034 * MDI_DEVI_ONLINING 2035 * - Client device (struct dev_info state) is in 2036 * onlining state. 2037 */ 2038 2039 /*ARGSUSED*/ 2040 int 2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2042 void *arg, mdi_pathinfo_t **ret_pip) 2043 { 2044 mdi_client_t *ct; 2045 mdi_pathinfo_t *pip; 2046 mdi_pathinfo_t *next; 2047 mdi_pathinfo_t *head; 2048 mdi_pathinfo_t *start; 2049 client_lb_t lbp; /* load balancing policy */ 2050 int sb = 1; /* standard behavior */ 2051 int preferred = 1; /* preferred path */ 2052 int cond, cont = 1; 2053 int retry = 0; 2054 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2055 int path_instance; /* request specific path instance */ 2056 2057 /* determine type of arg based on flags */ 2058 if (flags & MDI_SELECT_PATH_INSTANCE) { 2059 path_instance = (int)(intptr_t)arg; 2060 start_pip = NULL; 2061 } else { 2062 path_instance = 0; 2063 start_pip = (mdi_pathinfo_t *)arg; 2064 } 2065 2066 if (flags != 0) { 2067 /* 2068 * disable default behavior 2069 */ 2070 sb = 0; 2071 } 2072 2073 *ret_pip = NULL; 2074 ct = i_devi_get_client(cdip); 2075 if (ct == NULL) { 2076 /* mdi extensions are NULL, Nothing more to do */ 2077 return (MDI_FAILURE); 2078 } 2079 2080 MDI_CLIENT_LOCK(ct); 2081 2082 if (sb) { 2083 if (MDI_CLIENT_IS_FAILED(ct)) { 2084 /* 2085 * Client is not ready to accept any I/O requests. 2086 * Fail this request. 2087 */ 2088 MDI_DEBUG(2, (MDI_NOTE, cdip, 2089 "client state offline ct = %p", (void *)ct)); 2090 MDI_CLIENT_UNLOCK(ct); 2091 return (MDI_FAILURE); 2092 } 2093 2094 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2095 /* 2096 * Check for Failover is in progress. If so tell the 2097 * caller that this device is busy. 2098 */ 2099 MDI_DEBUG(2, (MDI_NOTE, cdip, 2100 "client failover in progress ct = %p", 2101 (void *)ct)); 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_BUSY); 2104 } 2105 2106 /* 2107 * Check to see whether the client device is attached. 2108 * If not so, let the vHCI driver manually select a path 2109 * (standby) and let the probe/attach process to continue. 2110 */ 2111 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2112 MDI_DEBUG(4, (MDI_NOTE, cdip, 2113 "devi is onlining ct = %p", (void *)ct)); 2114 MDI_CLIENT_UNLOCK(ct); 2115 return (MDI_DEVI_ONLINING); 2116 } 2117 } 2118 2119 /* 2120 * Cache in the client list head. If head of the list is NULL 2121 * return MDI_NOPATH 2122 */ 2123 head = ct->ct_path_head; 2124 if (head == NULL) { 2125 MDI_CLIENT_UNLOCK(ct); 2126 return (MDI_NOPATH); 2127 } 2128 2129 /* Caller is specifying a specific pathinfo path by path_instance */ 2130 if (path_instance) { 2131 /* search for pathinfo with correct path_instance */ 2132 for (pip = head; 2133 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2134 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2135 ; 2136 2137 /* If path can't be selected then MDI_NOPATH is returned. */ 2138 if (pip == NULL) { 2139 MDI_CLIENT_UNLOCK(ct); 2140 return (MDI_NOPATH); 2141 } 2142 2143 /* 2144 * Verify state of path. When asked to select a specific 2145 * path_instance, we select the requested path in any 2146 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2147 * We don't however select paths where the pHCI has detached. 2148 * NOTE: last pathinfo node of an opened client device may 2149 * exist in an OFFLINE state after the pHCI associated with 2150 * that path has detached (but pi_phci will be NULL if that 2151 * has occurred). 2152 */ 2153 MDI_PI_LOCK(pip); 2154 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2155 (MDI_PI(pip)->pi_phci == NULL)) { 2156 MDI_PI_UNLOCK(pip); 2157 MDI_CLIENT_UNLOCK(ct); 2158 return (MDI_FAILURE); 2159 } 2160 2161 /* Return MDI_BUSY if we have a transient condition */ 2162 if (MDI_PI_IS_TRANSIENT(pip)) { 2163 MDI_PI_UNLOCK(pip); 2164 MDI_CLIENT_UNLOCK(ct); 2165 return (MDI_BUSY); 2166 } 2167 2168 /* 2169 * Return the path in hold state. Caller should release the 2170 * lock by calling mdi_rele_path() 2171 */ 2172 MDI_PI_HOLD(pip); 2173 MDI_PI_UNLOCK(pip); 2174 *ret_pip = pip; 2175 MDI_CLIENT_UNLOCK(ct); 2176 return (MDI_SUCCESS); 2177 } 2178 2179 /* 2180 * for non default behavior, bypass current 2181 * load balancing policy and always use LOAD_BALANCE_RR 2182 * except that the start point will be adjusted based 2183 * on the provided start_pip 2184 */ 2185 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2186 2187 switch (lbp) { 2188 case LOAD_BALANCE_NONE: 2189 /* 2190 * Load balancing is None or Alternate path mode 2191 * Start looking for a online mdi_pathinfo node starting from 2192 * last known selected path 2193 */ 2194 preferred = 1; 2195 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2196 if (pip == NULL) { 2197 pip = head; 2198 } 2199 start = pip; 2200 do { 2201 MDI_PI_LOCK(pip); 2202 /* 2203 * No need to explicitly check if the path is disabled. 2204 * Since we are checking for state == ONLINE and the 2205 * same variable is used for DISABLE/ENABLE information. 2206 */ 2207 if ((MDI_PI(pip)->pi_state == 2208 MDI_PATHINFO_STATE_ONLINE) && 2209 preferred == MDI_PI(pip)->pi_preferred) { 2210 /* 2211 * Return the path in hold state. Caller should 2212 * release the lock by calling mdi_rele_path() 2213 */ 2214 MDI_PI_HOLD(pip); 2215 MDI_PI_UNLOCK(pip); 2216 ct->ct_path_last = pip; 2217 *ret_pip = pip; 2218 MDI_CLIENT_UNLOCK(ct); 2219 return (MDI_SUCCESS); 2220 } 2221 2222 /* 2223 * Path is busy. 2224 */ 2225 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2226 MDI_PI_IS_TRANSIENT(pip)) 2227 retry = 1; 2228 /* 2229 * Keep looking for a next available online path 2230 */ 2231 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2232 if (next == NULL) { 2233 next = head; 2234 } 2235 MDI_PI_UNLOCK(pip); 2236 pip = next; 2237 if (start == pip && preferred) { 2238 preferred = 0; 2239 } else if (start == pip && !preferred) { 2240 cont = 0; 2241 } 2242 } while (cont); 2243 break; 2244 2245 case LOAD_BALANCE_LBA: 2246 /* 2247 * Make sure we are looking 2248 * for an online path. Otherwise, if it is for a STANDBY 2249 * path request, it will go through and fetch an ONLINE 2250 * path which is not desirable. 2251 */ 2252 if ((ct->ct_lb_args != NULL) && 2253 (ct->ct_lb_args->region_size) && bp && 2254 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2255 if (i_mdi_lba_lb(ct, ret_pip, bp) 2256 == MDI_SUCCESS) { 2257 MDI_CLIENT_UNLOCK(ct); 2258 return (MDI_SUCCESS); 2259 } 2260 } 2261 /* FALLTHROUGH */ 2262 case LOAD_BALANCE_RR: 2263 /* 2264 * Load balancing is Round Robin. Start looking for a online 2265 * mdi_pathinfo node starting from last known selected path 2266 * as the start point. If override flags are specified, 2267 * process accordingly. 2268 * If the search is already in effect(start_pip not null), 2269 * then lets just use the same path preference to continue the 2270 * traversal. 2271 */ 2272 2273 if (start_pip != NULL) { 2274 preferred = MDI_PI(start_pip)->pi_preferred; 2275 } else { 2276 preferred = 1; 2277 } 2278 2279 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2280 if (start == NULL) { 2281 pip = head; 2282 } else { 2283 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2284 if (pip == NULL) { 2285 if ( flags & MDI_SELECT_NO_PREFERRED) { 2286 /* 2287 * Return since we hit the end of list 2288 */ 2289 MDI_CLIENT_UNLOCK(ct); 2290 return (MDI_NOPATH); 2291 } 2292 2293 if (!sb) { 2294 if (preferred == 0) { 2295 /* 2296 * Looks like we have completed 2297 * the traversal as preferred 2298 * value is 0. Time to bail out. 2299 */ 2300 *ret_pip = NULL; 2301 MDI_CLIENT_UNLOCK(ct); 2302 return (MDI_NOPATH); 2303 } else { 2304 /* 2305 * Looks like we reached the 2306 * end of the list. Lets enable 2307 * traversal of non preferred 2308 * paths. 2309 */ 2310 preferred = 0; 2311 } 2312 } 2313 pip = head; 2314 } 2315 } 2316 start = pip; 2317 do { 2318 MDI_PI_LOCK(pip); 2319 if (sb) { 2320 cond = ((MDI_PI(pip)->pi_state == 2321 MDI_PATHINFO_STATE_ONLINE && 2322 MDI_PI(pip)->pi_preferred == 2323 preferred) ? 1 : 0); 2324 } else { 2325 if (flags == MDI_SELECT_ONLINE_PATH) { 2326 cond = ((MDI_PI(pip)->pi_state == 2327 MDI_PATHINFO_STATE_ONLINE && 2328 MDI_PI(pip)->pi_preferred == 2329 preferred) ? 1 : 0); 2330 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2331 cond = ((MDI_PI(pip)->pi_state == 2332 MDI_PATHINFO_STATE_STANDBY && 2333 MDI_PI(pip)->pi_preferred == 2334 preferred) ? 1 : 0); 2335 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2336 MDI_SELECT_STANDBY_PATH)) { 2337 cond = (((MDI_PI(pip)->pi_state == 2338 MDI_PATHINFO_STATE_ONLINE || 2339 (MDI_PI(pip)->pi_state == 2340 MDI_PATHINFO_STATE_STANDBY)) && 2341 MDI_PI(pip)->pi_preferred == 2342 preferred) ? 1 : 0); 2343 } else if (flags == 2344 (MDI_SELECT_STANDBY_PATH | 2345 MDI_SELECT_ONLINE_PATH | 2346 MDI_SELECT_USER_DISABLE_PATH)) { 2347 cond = (((MDI_PI(pip)->pi_state == 2348 MDI_PATHINFO_STATE_ONLINE || 2349 (MDI_PI(pip)->pi_state == 2350 MDI_PATHINFO_STATE_STANDBY) || 2351 (MDI_PI(pip)->pi_state == 2352 (MDI_PATHINFO_STATE_ONLINE| 2353 MDI_PATHINFO_STATE_USER_DISABLE)) || 2354 (MDI_PI(pip)->pi_state == 2355 (MDI_PATHINFO_STATE_STANDBY | 2356 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2357 MDI_PI(pip)->pi_preferred == 2358 preferred) ? 1 : 0); 2359 } else if (flags == 2360 (MDI_SELECT_STANDBY_PATH | 2361 MDI_SELECT_ONLINE_PATH | 2362 MDI_SELECT_NO_PREFERRED)) { 2363 cond = (((MDI_PI(pip)->pi_state == 2364 MDI_PATHINFO_STATE_ONLINE) || 2365 (MDI_PI(pip)->pi_state == 2366 MDI_PATHINFO_STATE_STANDBY)) 2367 ? 1 : 0); 2368 } else { 2369 cond = 0; 2370 } 2371 } 2372 /* 2373 * No need to explicitly check if the path is disabled. 2374 * Since we are checking for state == ONLINE and the 2375 * same variable is used for DISABLE/ENABLE information. 2376 */ 2377 if (cond) { 2378 /* 2379 * Return the path in hold state. Caller should 2380 * release the lock by calling mdi_rele_path() 2381 */ 2382 MDI_PI_HOLD(pip); 2383 MDI_PI_UNLOCK(pip); 2384 if (sb) 2385 ct->ct_path_last = pip; 2386 *ret_pip = pip; 2387 MDI_CLIENT_UNLOCK(ct); 2388 return (MDI_SUCCESS); 2389 } 2390 /* 2391 * Path is busy. 2392 */ 2393 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2394 MDI_PI_IS_TRANSIENT(pip)) 2395 retry = 1; 2396 2397 /* 2398 * Keep looking for a next available online path 2399 */ 2400 do_again: 2401 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2402 if (next == NULL) { 2403 if ( flags & MDI_SELECT_NO_PREFERRED) { 2404 /* 2405 * Bail out since we hit the end of list 2406 */ 2407 MDI_PI_UNLOCK(pip); 2408 break; 2409 } 2410 2411 if (!sb) { 2412 if (preferred == 1) { 2413 /* 2414 * Looks like we reached the 2415 * end of the list. Lets enable 2416 * traversal of non preferred 2417 * paths. 2418 */ 2419 preferred = 0; 2420 next = head; 2421 } else { 2422 /* 2423 * We have done both the passes 2424 * Preferred as well as for 2425 * Non-preferred. Bail out now. 2426 */ 2427 cont = 0; 2428 } 2429 } else { 2430 /* 2431 * Standard behavior case. 2432 */ 2433 next = head; 2434 } 2435 } 2436 MDI_PI_UNLOCK(pip); 2437 if (cont == 0) { 2438 break; 2439 } 2440 pip = next; 2441 2442 if (!sb) { 2443 /* 2444 * We need to handle the selection of 2445 * non-preferred path in the following 2446 * case: 2447 * 2448 * +------+ +------+ +------+ +-----+ 2449 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2450 * +------+ +------+ +------+ +-----+ 2451 * 2452 * If we start the search with B, we need to 2453 * skip beyond B to pick C which is non - 2454 * preferred in the second pass. The following 2455 * test, if true, will allow us to skip over 2456 * the 'start'(B in the example) to select 2457 * other non preferred elements. 2458 */ 2459 if ((start_pip != NULL) && (start_pip == pip) && 2460 (MDI_PI(start_pip)->pi_preferred 2461 != preferred)) { 2462 /* 2463 * try again after going past the start 2464 * pip 2465 */ 2466 MDI_PI_LOCK(pip); 2467 goto do_again; 2468 } 2469 } else { 2470 /* 2471 * Standard behavior case 2472 */ 2473 if (start == pip && preferred) { 2474 /* look for nonpreferred paths */ 2475 preferred = 0; 2476 } else if (start == pip && !preferred) { 2477 /* 2478 * Exit condition 2479 */ 2480 cont = 0; 2481 } 2482 } 2483 } while (cont); 2484 break; 2485 } 2486 2487 MDI_CLIENT_UNLOCK(ct); 2488 if (retry == 1) { 2489 return (MDI_BUSY); 2490 } else { 2491 return (MDI_NOPATH); 2492 } 2493 } 2494 2495 /* 2496 * For a client, return the next available path to any phci 2497 * 2498 * Note: 2499 * Caller should hold the branch's devinfo node to get a consistent 2500 * snap shot of the mdi_pathinfo nodes. 2501 * 2502 * Please note that even the list is stable the mdi_pathinfo 2503 * node state and properties are volatile. The caller should lock 2504 * and unlock the nodes by calling mdi_pi_lock() and 2505 * mdi_pi_unlock() functions to get a stable properties. 2506 * 2507 * If there is a need to use the nodes beyond the hold of the 2508 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2509 * need to be held against unexpected removal by calling 2510 * mdi_hold_path() and should be released by calling 2511 * mdi_rele_path() on completion. 2512 */ 2513 mdi_pathinfo_t * 2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2515 { 2516 mdi_client_t *ct; 2517 2518 if (!MDI_CLIENT(ct_dip)) 2519 return (NULL); 2520 2521 /* 2522 * Walk through client link 2523 */ 2524 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2525 ASSERT(ct != NULL); 2526 2527 if (pip == NULL) 2528 return ((mdi_pathinfo_t *)ct->ct_path_head); 2529 2530 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2531 } 2532 2533 /* 2534 * For a phci, return the next available path to any client 2535 * Note: ditto mdi_get_next_phci_path() 2536 */ 2537 mdi_pathinfo_t * 2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2539 { 2540 mdi_phci_t *ph; 2541 2542 if (!MDI_PHCI(ph_dip)) 2543 return (NULL); 2544 2545 /* 2546 * Walk through pHCI link 2547 */ 2548 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2549 ASSERT(ph != NULL); 2550 2551 if (pip == NULL) 2552 return ((mdi_pathinfo_t *)ph->ph_path_head); 2553 2554 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2555 } 2556 2557 /* 2558 * mdi_hold_path(): 2559 * Hold the mdi_pathinfo node against unwanted unexpected free. 2560 * Return Values: 2561 * None 2562 */ 2563 void 2564 mdi_hold_path(mdi_pathinfo_t *pip) 2565 { 2566 if (pip) { 2567 MDI_PI_LOCK(pip); 2568 MDI_PI_HOLD(pip); 2569 MDI_PI_UNLOCK(pip); 2570 } 2571 } 2572 2573 2574 /* 2575 * mdi_rele_path(): 2576 * Release the mdi_pathinfo node which was selected 2577 * through mdi_select_path() mechanism or manually held by 2578 * calling mdi_hold_path(). 2579 * Return Values: 2580 * None 2581 */ 2582 void 2583 mdi_rele_path(mdi_pathinfo_t *pip) 2584 { 2585 if (pip) { 2586 MDI_PI_LOCK(pip); 2587 MDI_PI_RELE(pip); 2588 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2589 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2590 } 2591 MDI_PI_UNLOCK(pip); 2592 } 2593 } 2594 2595 /* 2596 * mdi_pi_lock(): 2597 * Lock the mdi_pathinfo node. 2598 * Note: 2599 * The caller should release the lock by calling mdi_pi_unlock() 2600 */ 2601 void 2602 mdi_pi_lock(mdi_pathinfo_t *pip) 2603 { 2604 ASSERT(pip != NULL); 2605 if (pip) { 2606 MDI_PI_LOCK(pip); 2607 } 2608 } 2609 2610 2611 /* 2612 * mdi_pi_unlock(): 2613 * Unlock the mdi_pathinfo node. 2614 * Note: 2615 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2616 */ 2617 void 2618 mdi_pi_unlock(mdi_pathinfo_t *pip) 2619 { 2620 ASSERT(pip != NULL); 2621 if (pip) { 2622 MDI_PI_UNLOCK(pip); 2623 } 2624 } 2625 2626 /* 2627 * mdi_pi_find(): 2628 * Search the list of mdi_pathinfo nodes attached to the 2629 * pHCI/Client device node whose path address matches "paddr". 2630 * Returns a pointer to the mdi_pathinfo node if a matching node is 2631 * found. 2632 * Return Values: 2633 * mdi_pathinfo node handle 2634 * NULL 2635 * Notes: 2636 * Caller need not hold any locks to call this function. 2637 */ 2638 mdi_pathinfo_t * 2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2640 { 2641 mdi_phci_t *ph; 2642 mdi_vhci_t *vh; 2643 mdi_client_t *ct; 2644 mdi_pathinfo_t *pip = NULL; 2645 2646 MDI_DEBUG(2, (MDI_NOTE, pdip, 2647 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2648 if ((pdip == NULL) || (paddr == NULL)) { 2649 return (NULL); 2650 } 2651 ph = i_devi_get_phci(pdip); 2652 if (ph == NULL) { 2653 /* 2654 * Invalid pHCI device, Nothing more to do. 2655 */ 2656 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2657 return (NULL); 2658 } 2659 2660 vh = ph->ph_vhci; 2661 if (vh == NULL) { 2662 /* 2663 * Invalid vHCI device, Nothing more to do. 2664 */ 2665 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2666 return (NULL); 2667 } 2668 2669 /* 2670 * Look for pathinfo node identified by paddr. 2671 */ 2672 if (caddr == NULL) { 2673 /* 2674 * Find a mdi_pathinfo node under pHCI list for a matching 2675 * unit address. 2676 */ 2677 MDI_PHCI_LOCK(ph); 2678 if (MDI_PHCI_IS_OFFLINE(ph)) { 2679 MDI_DEBUG(2, (MDI_WARN, pdip, 2680 "offline phci %p", (void *)ph)); 2681 MDI_PHCI_UNLOCK(ph); 2682 return (NULL); 2683 } 2684 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2685 2686 while (pip != NULL) { 2687 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2688 break; 2689 } 2690 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2691 } 2692 MDI_PHCI_UNLOCK(ph); 2693 MDI_DEBUG(2, (MDI_NOTE, pdip, 2694 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2695 return (pip); 2696 } 2697 2698 /* 2699 * XXX - Is the rest of the code in this function really necessary? 2700 * The consumers of mdi_pi_find() can search for the desired pathinfo 2701 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2702 * whether the search is based on the pathinfo nodes attached to 2703 * the pHCI or the client node, the result will be the same. 2704 */ 2705 2706 /* 2707 * Find the client device corresponding to 'caddr' 2708 */ 2709 MDI_VHCI_CLIENT_LOCK(vh); 2710 2711 /* 2712 * XXX - Passing NULL to the following function works as long as the 2713 * the client addresses (caddr) are unique per vhci basis. 2714 */ 2715 ct = i_mdi_client_find(vh, NULL, caddr); 2716 if (ct == NULL) { 2717 /* 2718 * Client not found, Obviously mdi_pathinfo node has not been 2719 * created yet. 2720 */ 2721 MDI_VHCI_CLIENT_UNLOCK(vh); 2722 MDI_DEBUG(2, (MDI_NOTE, pdip, 2723 "client not found for caddr @%s", caddr ? caddr : "")); 2724 return (NULL); 2725 } 2726 2727 /* 2728 * Hold the client lock and look for a mdi_pathinfo node with matching 2729 * pHCI and paddr 2730 */ 2731 MDI_CLIENT_LOCK(ct); 2732 2733 /* 2734 * Release the global mutex as it is no more needed. Note: We always 2735 * respect the locking order while acquiring. 2736 */ 2737 MDI_VHCI_CLIENT_UNLOCK(vh); 2738 2739 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2740 while (pip != NULL) { 2741 /* 2742 * Compare the unit address 2743 */ 2744 if ((MDI_PI(pip)->pi_phci == ph) && 2745 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2746 break; 2747 } 2748 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2749 } 2750 MDI_CLIENT_UNLOCK(ct); 2751 MDI_DEBUG(2, (MDI_NOTE, pdip, 2752 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2753 return (pip); 2754 } 2755 2756 /* 2757 * mdi_pi_alloc(): 2758 * Allocate and initialize a new instance of a mdi_pathinfo node. 2759 * The mdi_pathinfo node returned by this function identifies a 2760 * unique device path is capable of having properties attached 2761 * and passed to mdi_pi_online() to fully attach and online the 2762 * path and client device node. 2763 * The mdi_pathinfo node returned by this function must be 2764 * destroyed using mdi_pi_free() if the path is no longer 2765 * operational or if the caller fails to attach a client device 2766 * node when calling mdi_pi_online(). The framework will not free 2767 * the resources allocated. 2768 * This function can be called from both interrupt and kernel 2769 * contexts. DDI_NOSLEEP flag should be used while calling 2770 * from interrupt contexts. 2771 * Return Values: 2772 * MDI_SUCCESS 2773 * MDI_FAILURE 2774 * MDI_NOMEM 2775 */ 2776 /*ARGSUSED*/ 2777 int 2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2779 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2780 { 2781 mdi_vhci_t *vh; 2782 mdi_phci_t *ph; 2783 mdi_client_t *ct; 2784 mdi_pathinfo_t *pip = NULL; 2785 dev_info_t *cdip; 2786 int rv = MDI_NOMEM; 2787 int path_allocated = 0; 2788 2789 MDI_DEBUG(2, (MDI_NOTE, pdip, 2790 "cname %s: caddr@%s paddr@%s", 2791 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2792 2793 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2794 ret_pip == NULL) { 2795 /* Nothing more to do */ 2796 return (MDI_FAILURE); 2797 } 2798 2799 *ret_pip = NULL; 2800 2801 /* No allocations on detaching pHCI */ 2802 if (DEVI_IS_DETACHING(pdip)) { 2803 /* Invalid pHCI device, return failure */ 2804 MDI_DEBUG(1, (MDI_WARN, pdip, 2805 "!detaching pHCI=%p", (void *)pdip)); 2806 return (MDI_FAILURE); 2807 } 2808 2809 ph = i_devi_get_phci(pdip); 2810 ASSERT(ph != NULL); 2811 if (ph == NULL) { 2812 /* Invalid pHCI device, return failure */ 2813 MDI_DEBUG(1, (MDI_WARN, pdip, 2814 "!invalid pHCI=%p", (void *)pdip)); 2815 return (MDI_FAILURE); 2816 } 2817 2818 MDI_PHCI_LOCK(ph); 2819 vh = ph->ph_vhci; 2820 if (vh == NULL) { 2821 /* Invalid vHCI device, return failure */ 2822 MDI_DEBUG(1, (MDI_WARN, pdip, 2823 "!invalid vHCI=%p", (void *)pdip)); 2824 MDI_PHCI_UNLOCK(ph); 2825 return (MDI_FAILURE); 2826 } 2827 2828 if (MDI_PHCI_IS_READY(ph) == 0) { 2829 /* 2830 * Do not allow new node creation when pHCI is in 2831 * offline/suspended states 2832 */ 2833 MDI_DEBUG(1, (MDI_WARN, pdip, 2834 "pHCI=%p is not ready", (void *)ph)); 2835 MDI_PHCI_UNLOCK(ph); 2836 return (MDI_BUSY); 2837 } 2838 MDI_PHCI_UNSTABLE(ph); 2839 MDI_PHCI_UNLOCK(ph); 2840 2841 /* look for a matching client, create one if not found */ 2842 MDI_VHCI_CLIENT_LOCK(vh); 2843 ct = i_mdi_client_find(vh, cname, caddr); 2844 if (ct == NULL) { 2845 ct = i_mdi_client_alloc(vh, cname, caddr); 2846 ASSERT(ct != NULL); 2847 } 2848 2849 if (ct->ct_dip == NULL) { 2850 /* 2851 * Allocate a devinfo node 2852 */ 2853 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2854 compatible, ncompatible); 2855 if (ct->ct_dip == NULL) { 2856 (void) i_mdi_client_free(vh, ct); 2857 goto fail; 2858 } 2859 } 2860 cdip = ct->ct_dip; 2861 2862 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2863 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2864 2865 MDI_CLIENT_LOCK(ct); 2866 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2867 while (pip != NULL) { 2868 /* 2869 * Compare the unit address 2870 */ 2871 if ((MDI_PI(pip)->pi_phci == ph) && 2872 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2873 break; 2874 } 2875 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2876 } 2877 MDI_CLIENT_UNLOCK(ct); 2878 2879 if (pip == NULL) { 2880 /* 2881 * This is a new path for this client device. Allocate and 2882 * initialize a new pathinfo node 2883 */ 2884 pip = i_mdi_pi_alloc(ph, paddr, ct); 2885 ASSERT(pip != NULL); 2886 path_allocated = 1; 2887 } 2888 rv = MDI_SUCCESS; 2889 2890 fail: 2891 /* 2892 * Release the global mutex. 2893 */ 2894 MDI_VHCI_CLIENT_UNLOCK(vh); 2895 2896 /* 2897 * Mark the pHCI as stable 2898 */ 2899 MDI_PHCI_LOCK(ph); 2900 MDI_PHCI_STABLE(ph); 2901 MDI_PHCI_UNLOCK(ph); 2902 *ret_pip = pip; 2903 2904 MDI_DEBUG(2, (MDI_NOTE, pdip, 2905 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2906 2907 if (path_allocated) 2908 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2909 2910 return (rv); 2911 } 2912 2913 /*ARGSUSED*/ 2914 int 2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2916 int flags, mdi_pathinfo_t **ret_pip) 2917 { 2918 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2919 flags, ret_pip)); 2920 } 2921 2922 /* 2923 * i_mdi_pi_alloc(): 2924 * Allocate a mdi_pathinfo node and add to the pHCI path list 2925 * Return Values: 2926 * mdi_pathinfo 2927 */ 2928 /*ARGSUSED*/ 2929 static mdi_pathinfo_t * 2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2931 { 2932 mdi_pathinfo_t *pip; 2933 int ct_circular; 2934 int ph_circular; 2935 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2936 char *path_persistent; 2937 int path_instance; 2938 mod_hash_val_t hv; 2939 2940 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2941 2942 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2943 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2944 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2945 MDI_PATHINFO_STATE_TRANSIENT; 2946 2947 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2948 MDI_PI_SET_USER_DISABLE(pip); 2949 2950 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2951 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2952 2953 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2954 MDI_PI_SET_DRV_DISABLE(pip); 2955 2956 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2957 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2958 MDI_PI(pip)->pi_client = ct; 2959 MDI_PI(pip)->pi_phci = ph; 2960 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2961 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2962 2963 /* 2964 * We form the "path" to the pathinfo node, and see if we have 2965 * already allocated a 'path_instance' for that "path". If so, 2966 * we use the already allocated 'path_instance'. If not, we 2967 * allocate a new 'path_instance' and associate it with a copy of 2968 * the "path" string (which is never freed). The association 2969 * between a 'path_instance' this "path" string persists until 2970 * reboot. 2971 */ 2972 mutex_enter(&mdi_pathmap_mutex); 2973 (void) ddi_pathname(ph->ph_dip, path); 2974 (void) sprintf(path + strlen(path), "/%s@%s", 2975 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2976 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2977 path_instance = (uint_t)(intptr_t)hv; 2978 } else { 2979 /* allocate a new 'path_instance' and persistent "path" */ 2980 path_instance = mdi_pathmap_instance++; 2981 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2982 (void) mod_hash_insert(mdi_pathmap_bypath, 2983 (mod_hash_key_t)path_persistent, 2984 (mod_hash_val_t)(intptr_t)path_instance); 2985 (void) mod_hash_insert(mdi_pathmap_byinstance, 2986 (mod_hash_key_t)(intptr_t)path_instance, 2987 (mod_hash_val_t)path_persistent); 2988 2989 /* create shortpath name */ 2990 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2991 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2992 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2993 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2994 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2995 (mod_hash_key_t)(intptr_t)path_instance, 2996 (mod_hash_val_t)path_persistent); 2997 } 2998 mutex_exit(&mdi_pathmap_mutex); 2999 MDI_PI(pip)->pi_path_instance = path_instance; 3000 3001 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3002 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3003 MDI_PI(pip)->pi_pprivate = NULL; 3004 MDI_PI(pip)->pi_cprivate = NULL; 3005 MDI_PI(pip)->pi_vprivate = NULL; 3006 MDI_PI(pip)->pi_client_link = NULL; 3007 MDI_PI(pip)->pi_phci_link = NULL; 3008 MDI_PI(pip)->pi_ref_cnt = 0; 3009 MDI_PI(pip)->pi_kstats = NULL; 3010 MDI_PI(pip)->pi_preferred = 1; 3011 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3012 3013 /* 3014 * Lock both dev_info nodes against changes in parallel. 3015 * 3016 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3017 * This atypical operation is done to synchronize pathinfo nodes 3018 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3019 * the pathinfo nodes are children of the Client. 3020 */ 3021 ndi_devi_enter(ct->ct_dip, &ct_circular); 3022 ndi_devi_enter(ph->ph_dip, &ph_circular); 3023 3024 i_mdi_phci_add_path(ph, pip); 3025 i_mdi_client_add_path(ct, pip); 3026 3027 ndi_devi_exit(ph->ph_dip, ph_circular); 3028 ndi_devi_exit(ct->ct_dip, ct_circular); 3029 3030 return (pip); 3031 } 3032 3033 /* 3034 * mdi_pi_pathname_by_instance(): 3035 * Lookup of "path" by 'path_instance'. Return "path". 3036 * NOTE: returned "path" remains valid forever (until reboot). 3037 */ 3038 char * 3039 mdi_pi_pathname_by_instance(int path_instance) 3040 { 3041 char *path; 3042 mod_hash_val_t hv; 3043 3044 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3045 mutex_enter(&mdi_pathmap_mutex); 3046 if (mod_hash_find(mdi_pathmap_byinstance, 3047 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3048 path = (char *)hv; 3049 else 3050 path = NULL; 3051 mutex_exit(&mdi_pathmap_mutex); 3052 return (path); 3053 } 3054 3055 /* 3056 * mdi_pi_spathname_by_instance(): 3057 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3058 * NOTE: returned "shortpath" remains valid forever (until reboot). 3059 */ 3060 char * 3061 mdi_pi_spathname_by_instance(int path_instance) 3062 { 3063 char *path; 3064 mod_hash_val_t hv; 3065 3066 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3067 mutex_enter(&mdi_pathmap_mutex); 3068 if (mod_hash_find(mdi_pathmap_sbyinstance, 3069 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3070 path = (char *)hv; 3071 else 3072 path = NULL; 3073 mutex_exit(&mdi_pathmap_mutex); 3074 return (path); 3075 } 3076 3077 3078 /* 3079 * i_mdi_phci_add_path(): 3080 * Add a mdi_pathinfo node to pHCI list. 3081 * Notes: 3082 * Caller should per-pHCI mutex 3083 */ 3084 static void 3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3086 { 3087 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3088 3089 MDI_PHCI_LOCK(ph); 3090 if (ph->ph_path_head == NULL) { 3091 ph->ph_path_head = pip; 3092 } else { 3093 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3094 } 3095 ph->ph_path_tail = pip; 3096 ph->ph_path_count++; 3097 MDI_PHCI_UNLOCK(ph); 3098 } 3099 3100 /* 3101 * i_mdi_client_add_path(): 3102 * Add mdi_pathinfo node to client list 3103 */ 3104 static void 3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3106 { 3107 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3108 3109 MDI_CLIENT_LOCK(ct); 3110 if (ct->ct_path_head == NULL) { 3111 ct->ct_path_head = pip; 3112 } else { 3113 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3114 } 3115 ct->ct_path_tail = pip; 3116 ct->ct_path_count++; 3117 MDI_CLIENT_UNLOCK(ct); 3118 } 3119 3120 /* 3121 * mdi_pi_free(): 3122 * Free the mdi_pathinfo node and also client device node if this 3123 * is the last path to the device 3124 * Return Values: 3125 * MDI_SUCCESS 3126 * MDI_FAILURE 3127 * MDI_BUSY 3128 */ 3129 /*ARGSUSED*/ 3130 int 3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3132 { 3133 int rv = MDI_FAILURE; 3134 mdi_vhci_t *vh; 3135 mdi_phci_t *ph; 3136 mdi_client_t *ct; 3137 int (*f)(); 3138 int client_held = 0; 3139 3140 MDI_PI_LOCK(pip); 3141 ph = MDI_PI(pip)->pi_phci; 3142 ASSERT(ph != NULL); 3143 if (ph == NULL) { 3144 /* 3145 * Invalid pHCI device, return failure 3146 */ 3147 MDI_DEBUG(1, (MDI_WARN, NULL, 3148 "!invalid pHCI: pip %s %p", 3149 mdi_pi_spathname(pip), (void *)pip)); 3150 MDI_PI_UNLOCK(pip); 3151 return (MDI_FAILURE); 3152 } 3153 3154 vh = ph->ph_vhci; 3155 ASSERT(vh != NULL); 3156 if (vh == NULL) { 3157 /* Invalid pHCI device, return failure */ 3158 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3159 "!invalid vHCI: pip %s %p", 3160 mdi_pi_spathname(pip), (void *)pip)); 3161 MDI_PI_UNLOCK(pip); 3162 return (MDI_FAILURE); 3163 } 3164 3165 ct = MDI_PI(pip)->pi_client; 3166 ASSERT(ct != NULL); 3167 if (ct == NULL) { 3168 /* 3169 * Invalid Client device, return failure 3170 */ 3171 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3172 "!invalid client: pip %s %p", 3173 mdi_pi_spathname(pip), (void *)pip)); 3174 MDI_PI_UNLOCK(pip); 3175 return (MDI_FAILURE); 3176 } 3177 3178 /* 3179 * Check to see for busy condition. A mdi_pathinfo can only be freed 3180 * if the node state is either offline or init and the reference count 3181 * is zero. 3182 */ 3183 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3184 MDI_PI_IS_INITING(pip))) { 3185 /* 3186 * Node is busy 3187 */ 3188 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3189 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3190 MDI_PI_UNLOCK(pip); 3191 return (MDI_BUSY); 3192 } 3193 3194 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3195 /* 3196 * Give a chance for pending I/Os to complete. 3197 */ 3198 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3199 "!%d cmds still pending on path: %s %p", 3200 MDI_PI(pip)->pi_ref_cnt, 3201 mdi_pi_spathname(pip), (void *)pip)); 3202 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3203 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3204 TR_CLOCK_TICK) == -1) { 3205 /* 3206 * The timeout time reached without ref_cnt being zero 3207 * being signaled. 3208 */ 3209 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3210 "!Timeout reached on path %s %p without the cond", 3211 mdi_pi_spathname(pip), (void *)pip)); 3212 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3213 "!%d cmds still pending on path %s %p", 3214 MDI_PI(pip)->pi_ref_cnt, 3215 mdi_pi_spathname(pip), (void *)pip)); 3216 MDI_PI_UNLOCK(pip); 3217 return (MDI_BUSY); 3218 } 3219 } 3220 if (MDI_PI(pip)->pi_pm_held) { 3221 client_held = 1; 3222 } 3223 MDI_PI_UNLOCK(pip); 3224 3225 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3226 3227 MDI_CLIENT_LOCK(ct); 3228 3229 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3230 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3231 3232 /* 3233 * Wait till failover is complete before removing this node. 3234 */ 3235 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3236 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3237 3238 MDI_CLIENT_UNLOCK(ct); 3239 MDI_VHCI_CLIENT_LOCK(vh); 3240 MDI_CLIENT_LOCK(ct); 3241 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3242 3243 if (!MDI_PI_IS_INITING(pip)) { 3244 f = vh->vh_ops->vo_pi_uninit; 3245 if (f != NULL) { 3246 rv = (*f)(vh->vh_dip, pip, 0); 3247 } 3248 } 3249 /* 3250 * If vo_pi_uninit() completed successfully. 3251 */ 3252 if (rv == MDI_SUCCESS) { 3253 if (client_held) { 3254 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3255 "i_mdi_pm_rele_client\n")); 3256 i_mdi_pm_rele_client(ct, 1); 3257 } 3258 i_mdi_pi_free(ph, pip, ct); 3259 if (ct->ct_path_count == 0) { 3260 /* 3261 * Client lost its last path. 3262 * Clean up the client device 3263 */ 3264 MDI_CLIENT_UNLOCK(ct); 3265 (void) i_mdi_client_free(ct->ct_vhci, ct); 3266 MDI_VHCI_CLIENT_UNLOCK(vh); 3267 return (rv); 3268 } 3269 } 3270 MDI_CLIENT_UNLOCK(ct); 3271 MDI_VHCI_CLIENT_UNLOCK(vh); 3272 3273 if (rv == MDI_FAILURE) 3274 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3275 3276 return (rv); 3277 } 3278 3279 /* 3280 * i_mdi_pi_free(): 3281 * Free the mdi_pathinfo node 3282 */ 3283 static void 3284 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3285 { 3286 int ct_circular; 3287 int ph_circular; 3288 3289 ASSERT(MDI_CLIENT_LOCKED(ct)); 3290 3291 /* 3292 * remove any per-path kstats 3293 */ 3294 i_mdi_pi_kstat_destroy(pip); 3295 3296 /* See comments in i_mdi_pi_alloc() */ 3297 ndi_devi_enter(ct->ct_dip, &ct_circular); 3298 ndi_devi_enter(ph->ph_dip, &ph_circular); 3299 3300 i_mdi_client_remove_path(ct, pip); 3301 i_mdi_phci_remove_path(ph, pip); 3302 3303 ndi_devi_exit(ph->ph_dip, ph_circular); 3304 ndi_devi_exit(ct->ct_dip, ct_circular); 3305 3306 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3307 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3308 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3309 if (MDI_PI(pip)->pi_addr) { 3310 kmem_free(MDI_PI(pip)->pi_addr, 3311 strlen(MDI_PI(pip)->pi_addr) + 1); 3312 MDI_PI(pip)->pi_addr = NULL; 3313 } 3314 3315 if (MDI_PI(pip)->pi_prop) { 3316 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3317 MDI_PI(pip)->pi_prop = NULL; 3318 } 3319 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3320 } 3321 3322 3323 /* 3324 * i_mdi_phci_remove_path(): 3325 * Remove a mdi_pathinfo node from pHCI list. 3326 * Notes: 3327 * Caller should hold per-pHCI mutex 3328 */ 3329 static void 3330 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3331 { 3332 mdi_pathinfo_t *prev = NULL; 3333 mdi_pathinfo_t *path = NULL; 3334 3335 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3336 3337 MDI_PHCI_LOCK(ph); 3338 path = ph->ph_path_head; 3339 while (path != NULL) { 3340 if (path == pip) { 3341 break; 3342 } 3343 prev = path; 3344 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3345 } 3346 3347 if (path) { 3348 ph->ph_path_count--; 3349 if (prev) { 3350 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3351 } else { 3352 ph->ph_path_head = 3353 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3354 } 3355 if (ph->ph_path_tail == path) { 3356 ph->ph_path_tail = prev; 3357 } 3358 } 3359 3360 /* 3361 * Clear the pHCI link 3362 */ 3363 MDI_PI(pip)->pi_phci_link = NULL; 3364 MDI_PI(pip)->pi_phci = NULL; 3365 MDI_PHCI_UNLOCK(ph); 3366 } 3367 3368 /* 3369 * i_mdi_client_remove_path(): 3370 * Remove a mdi_pathinfo node from client path list. 3371 */ 3372 static void 3373 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3374 { 3375 mdi_pathinfo_t *prev = NULL; 3376 mdi_pathinfo_t *path; 3377 3378 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3379 3380 ASSERT(MDI_CLIENT_LOCKED(ct)); 3381 path = ct->ct_path_head; 3382 while (path != NULL) { 3383 if (path == pip) { 3384 break; 3385 } 3386 prev = path; 3387 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3388 } 3389 3390 if (path) { 3391 ct->ct_path_count--; 3392 if (prev) { 3393 MDI_PI(prev)->pi_client_link = 3394 MDI_PI(path)->pi_client_link; 3395 } else { 3396 ct->ct_path_head = 3397 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3398 } 3399 if (ct->ct_path_tail == path) { 3400 ct->ct_path_tail = prev; 3401 } 3402 if (ct->ct_path_last == path) { 3403 ct->ct_path_last = ct->ct_path_head; 3404 } 3405 } 3406 MDI_PI(pip)->pi_client_link = NULL; 3407 MDI_PI(pip)->pi_client = NULL; 3408 } 3409 3410 /* 3411 * i_mdi_pi_state_change(): 3412 * online a mdi_pathinfo node 3413 * 3414 * Return Values: 3415 * MDI_SUCCESS 3416 * MDI_FAILURE 3417 */ 3418 /*ARGSUSED*/ 3419 static int 3420 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3421 { 3422 int rv = MDI_SUCCESS; 3423 mdi_vhci_t *vh; 3424 mdi_phci_t *ph; 3425 mdi_client_t *ct; 3426 int (*f)(); 3427 dev_info_t *cdip; 3428 3429 MDI_PI_LOCK(pip); 3430 3431 ph = MDI_PI(pip)->pi_phci; 3432 ASSERT(ph); 3433 if (ph == NULL) { 3434 /* 3435 * Invalid pHCI device, fail the request 3436 */ 3437 MDI_PI_UNLOCK(pip); 3438 MDI_DEBUG(1, (MDI_WARN, NULL, 3439 "!invalid phci: pip %s %p", 3440 mdi_pi_spathname(pip), (void *)pip)); 3441 return (MDI_FAILURE); 3442 } 3443 3444 vh = ph->ph_vhci; 3445 ASSERT(vh); 3446 if (vh == NULL) { 3447 /* 3448 * Invalid vHCI device, fail the request 3449 */ 3450 MDI_PI_UNLOCK(pip); 3451 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3452 "!invalid vhci: pip %s %p", 3453 mdi_pi_spathname(pip), (void *)pip)); 3454 return (MDI_FAILURE); 3455 } 3456 3457 ct = MDI_PI(pip)->pi_client; 3458 ASSERT(ct != NULL); 3459 if (ct == NULL) { 3460 /* 3461 * Invalid client device, fail the request 3462 */ 3463 MDI_PI_UNLOCK(pip); 3464 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3465 "!invalid client: pip %s %p", 3466 mdi_pi_spathname(pip), (void *)pip)); 3467 return (MDI_FAILURE); 3468 } 3469 3470 /* 3471 * If this path has not been initialized yet, Callback vHCI driver's 3472 * pathinfo node initialize entry point 3473 */ 3474 3475 if (MDI_PI_IS_INITING(pip)) { 3476 MDI_PI_UNLOCK(pip); 3477 f = vh->vh_ops->vo_pi_init; 3478 if (f != NULL) { 3479 rv = (*f)(vh->vh_dip, pip, 0); 3480 if (rv != MDI_SUCCESS) { 3481 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3482 "!vo_pi_init failed: vHCI %p, pip %s %p", 3483 (void *)vh, mdi_pi_spathname(pip), 3484 (void *)pip)); 3485 return (MDI_FAILURE); 3486 } 3487 } 3488 MDI_PI_LOCK(pip); 3489 MDI_PI_CLEAR_TRANSIENT(pip); 3490 } 3491 3492 /* 3493 * Do not allow state transition when pHCI is in offline/suspended 3494 * states 3495 */ 3496 i_mdi_phci_lock(ph, pip); 3497 if (MDI_PHCI_IS_READY(ph) == 0) { 3498 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3499 "!pHCI not ready, pHCI=%p", (void *)ph)); 3500 MDI_PI_UNLOCK(pip); 3501 i_mdi_phci_unlock(ph); 3502 return (MDI_BUSY); 3503 } 3504 MDI_PHCI_UNSTABLE(ph); 3505 i_mdi_phci_unlock(ph); 3506 3507 /* 3508 * Check if mdi_pathinfo state is in transient state. 3509 * If yes, offlining is in progress and wait till transient state is 3510 * cleared. 3511 */ 3512 if (MDI_PI_IS_TRANSIENT(pip)) { 3513 while (MDI_PI_IS_TRANSIENT(pip)) { 3514 cv_wait(&MDI_PI(pip)->pi_state_cv, 3515 &MDI_PI(pip)->pi_mutex); 3516 } 3517 } 3518 3519 /* 3520 * Grab the client lock in reverse order sequence and release the 3521 * mdi_pathinfo mutex. 3522 */ 3523 i_mdi_client_lock(ct, pip); 3524 MDI_PI_UNLOCK(pip); 3525 3526 /* 3527 * Wait till failover state is cleared 3528 */ 3529 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3530 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3531 3532 /* 3533 * Mark the mdi_pathinfo node state as transient 3534 */ 3535 MDI_PI_LOCK(pip); 3536 switch (state) { 3537 case MDI_PATHINFO_STATE_ONLINE: 3538 MDI_PI_SET_ONLINING(pip); 3539 break; 3540 3541 case MDI_PATHINFO_STATE_STANDBY: 3542 MDI_PI_SET_STANDBYING(pip); 3543 break; 3544 3545 case MDI_PATHINFO_STATE_FAULT: 3546 /* 3547 * Mark the pathinfo state as FAULTED 3548 */ 3549 MDI_PI_SET_FAULTING(pip); 3550 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3551 break; 3552 3553 case MDI_PATHINFO_STATE_OFFLINE: 3554 /* 3555 * ndi_devi_offline() cannot hold pip or ct locks. 3556 */ 3557 MDI_PI_UNLOCK(pip); 3558 3559 /* 3560 * If this is a user initiated path online->offline operation 3561 * who's success would transition a client from DEGRADED to 3562 * FAILED then only proceed if we can offline the client first. 3563 */ 3564 cdip = ct->ct_dip; 3565 if ((flag & NDI_USER_REQ) && 3566 MDI_PI_IS_ONLINE(pip) && 3567 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3568 i_mdi_client_unlock(ct); 3569 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3570 if (rv != NDI_SUCCESS) { 3571 /* 3572 * Convert to MDI error code 3573 */ 3574 switch (rv) { 3575 case NDI_BUSY: 3576 rv = MDI_BUSY; 3577 break; 3578 default: 3579 rv = MDI_FAILURE; 3580 break; 3581 } 3582 goto state_change_exit; 3583 } else { 3584 i_mdi_client_lock(ct, NULL); 3585 } 3586 } 3587 /* 3588 * Mark the mdi_pathinfo node state as transient 3589 */ 3590 MDI_PI_LOCK(pip); 3591 MDI_PI_SET_OFFLINING(pip); 3592 break; 3593 } 3594 MDI_PI_UNLOCK(pip); 3595 MDI_CLIENT_UNSTABLE(ct); 3596 i_mdi_client_unlock(ct); 3597 3598 f = vh->vh_ops->vo_pi_state_change; 3599 if (f != NULL) 3600 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3601 3602 MDI_CLIENT_LOCK(ct); 3603 MDI_PI_LOCK(pip); 3604 if (rv == MDI_NOT_SUPPORTED) { 3605 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3606 } 3607 if (rv != MDI_SUCCESS) { 3608 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3609 "vo_pi_state_change failed: rv %x", rv)); 3610 } 3611 if (MDI_PI_IS_TRANSIENT(pip)) { 3612 if (rv == MDI_SUCCESS) { 3613 MDI_PI_CLEAR_TRANSIENT(pip); 3614 } else { 3615 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3616 } 3617 } 3618 3619 /* 3620 * Wake anyone waiting for this mdi_pathinfo node 3621 */ 3622 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3623 MDI_PI_UNLOCK(pip); 3624 3625 /* 3626 * Mark the client device as stable 3627 */ 3628 MDI_CLIENT_STABLE(ct); 3629 if (rv == MDI_SUCCESS) { 3630 if (ct->ct_unstable == 0) { 3631 cdip = ct->ct_dip; 3632 3633 /* 3634 * Onlining the mdi_pathinfo node will impact the 3635 * client state Update the client and dev_info node 3636 * state accordingly 3637 */ 3638 rv = NDI_SUCCESS; 3639 i_mdi_client_update_state(ct); 3640 switch (MDI_CLIENT_STATE(ct)) { 3641 case MDI_CLIENT_STATE_OPTIMAL: 3642 case MDI_CLIENT_STATE_DEGRADED: 3643 if (cdip && !i_ddi_devi_attached(cdip) && 3644 ((state == MDI_PATHINFO_STATE_ONLINE) || 3645 (state == MDI_PATHINFO_STATE_STANDBY))) { 3646 3647 /* 3648 * Must do ndi_devi_online() through 3649 * hotplug thread for deferred 3650 * attach mechanism to work 3651 */ 3652 MDI_CLIENT_UNLOCK(ct); 3653 rv = ndi_devi_online(cdip, 0); 3654 MDI_CLIENT_LOCK(ct); 3655 if ((rv != NDI_SUCCESS) && 3656 (MDI_CLIENT_STATE(ct) == 3657 MDI_CLIENT_STATE_DEGRADED)) { 3658 /* 3659 * ndi_devi_online failed. 3660 * Reset client flags to 3661 * offline. 3662 */ 3663 MDI_DEBUG(1, (MDI_WARN, cdip, 3664 "!ndi_devi_online failed " 3665 "error %x", rv)); 3666 MDI_CLIENT_SET_OFFLINE(ct); 3667 } 3668 if (rv != NDI_SUCCESS) { 3669 /* Reset the path state */ 3670 MDI_PI_LOCK(pip); 3671 MDI_PI(pip)->pi_state = 3672 MDI_PI_OLD_STATE(pip); 3673 MDI_PI_UNLOCK(pip); 3674 } 3675 } 3676 break; 3677 3678 case MDI_CLIENT_STATE_FAILED: 3679 /* 3680 * This is the last path case for 3681 * non-user initiated events. 3682 */ 3683 if (((flag & NDI_USER_REQ) == 0) && 3684 cdip && (i_ddi_node_state(cdip) >= 3685 DS_INITIALIZED)) { 3686 MDI_CLIENT_UNLOCK(ct); 3687 rv = ndi_devi_offline(cdip, 3688 NDI_DEVFS_CLEAN); 3689 MDI_CLIENT_LOCK(ct); 3690 3691 if (rv != NDI_SUCCESS) { 3692 /* 3693 * ndi_devi_offline failed. 3694 * Reset client flags to 3695 * online as the path could not 3696 * be offlined. 3697 */ 3698 MDI_DEBUG(1, (MDI_WARN, cdip, 3699 "!ndi_devi_offline failed: " 3700 "error %x", rv)); 3701 MDI_CLIENT_SET_ONLINE(ct); 3702 } 3703 } 3704 break; 3705 } 3706 /* 3707 * Convert to MDI error code 3708 */ 3709 switch (rv) { 3710 case NDI_SUCCESS: 3711 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3712 i_mdi_report_path_state(ct, pip); 3713 rv = MDI_SUCCESS; 3714 break; 3715 case NDI_BUSY: 3716 rv = MDI_BUSY; 3717 break; 3718 default: 3719 rv = MDI_FAILURE; 3720 break; 3721 } 3722 } 3723 } 3724 MDI_CLIENT_UNLOCK(ct); 3725 3726 state_change_exit: 3727 /* 3728 * Mark the pHCI as stable again. 3729 */ 3730 MDI_PHCI_LOCK(ph); 3731 MDI_PHCI_STABLE(ph); 3732 MDI_PHCI_UNLOCK(ph); 3733 return (rv); 3734 } 3735 3736 /* 3737 * mdi_pi_online(): 3738 * Place the path_info node in the online state. The path is 3739 * now available to be selected by mdi_select_path() for 3740 * transporting I/O requests to client devices. 3741 * Return Values: 3742 * MDI_SUCCESS 3743 * MDI_FAILURE 3744 */ 3745 int 3746 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3747 { 3748 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3749 int client_held = 0; 3750 int rv; 3751 3752 ASSERT(ct != NULL); 3753 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3754 if (rv != MDI_SUCCESS) 3755 return (rv); 3756 3757 MDI_PI_LOCK(pip); 3758 if (MDI_PI(pip)->pi_pm_held == 0) { 3759 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3760 "i_mdi_pm_hold_pip %p", (void *)pip)); 3761 i_mdi_pm_hold_pip(pip); 3762 client_held = 1; 3763 } 3764 MDI_PI_UNLOCK(pip); 3765 3766 if (client_held) { 3767 MDI_CLIENT_LOCK(ct); 3768 if (ct->ct_power_cnt == 0) { 3769 rv = i_mdi_power_all_phci(ct); 3770 } 3771 3772 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3773 "i_mdi_pm_hold_client %p", (void *)ct)); 3774 i_mdi_pm_hold_client(ct, 1); 3775 MDI_CLIENT_UNLOCK(ct); 3776 } 3777 3778 return (rv); 3779 } 3780 3781 /* 3782 * mdi_pi_standby(): 3783 * Place the mdi_pathinfo node in standby state 3784 * 3785 * Return Values: 3786 * MDI_SUCCESS 3787 * MDI_FAILURE 3788 */ 3789 int 3790 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3791 { 3792 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3793 } 3794 3795 /* 3796 * mdi_pi_fault(): 3797 * Place the mdi_pathinfo node in fault'ed state 3798 * Return Values: 3799 * MDI_SUCCESS 3800 * MDI_FAILURE 3801 */ 3802 int 3803 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3804 { 3805 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3806 } 3807 3808 /* 3809 * mdi_pi_offline(): 3810 * Offline a mdi_pathinfo node. 3811 * Return Values: 3812 * MDI_SUCCESS 3813 * MDI_FAILURE 3814 */ 3815 int 3816 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3817 { 3818 int ret, client_held = 0; 3819 mdi_client_t *ct; 3820 3821 /* 3822 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3823 * used it to mean "user initiated operation" (i.e. devctl). Callers 3824 * should now just use NDI_USER_REQ. 3825 */ 3826 if (flags & NDI_DEVI_REMOVE) { 3827 flags &= ~NDI_DEVI_REMOVE; 3828 flags |= NDI_USER_REQ; 3829 } 3830 3831 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3832 3833 if (ret == MDI_SUCCESS) { 3834 MDI_PI_LOCK(pip); 3835 if (MDI_PI(pip)->pi_pm_held) { 3836 client_held = 1; 3837 } 3838 MDI_PI_UNLOCK(pip); 3839 3840 if (client_held) { 3841 ct = MDI_PI(pip)->pi_client; 3842 MDI_CLIENT_LOCK(ct); 3843 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3844 "i_mdi_pm_rele_client\n")); 3845 i_mdi_pm_rele_client(ct, 1); 3846 MDI_CLIENT_UNLOCK(ct); 3847 } 3848 } 3849 3850 return (ret); 3851 } 3852 3853 /* 3854 * i_mdi_pi_offline(): 3855 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3856 */ 3857 static int 3858 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3859 { 3860 dev_info_t *vdip = NULL; 3861 mdi_vhci_t *vh = NULL; 3862 mdi_client_t *ct = NULL; 3863 int (*f)(); 3864 int rv; 3865 3866 MDI_PI_LOCK(pip); 3867 ct = MDI_PI(pip)->pi_client; 3868 ASSERT(ct != NULL); 3869 3870 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3871 /* 3872 * Give a chance for pending I/Os to complete. 3873 */ 3874 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3875 "!%d cmds still pending on path %s %p", 3876 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3877 (void *)pip)); 3878 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3879 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3880 TR_CLOCK_TICK) == -1) { 3881 /* 3882 * The timeout time reached without ref_cnt being zero 3883 * being signaled. 3884 */ 3885 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3886 "!Timeout reached on path %s %p without the cond", 3887 mdi_pi_spathname(pip), (void *)pip)); 3888 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3889 "!%d cmds still pending on path %s %p", 3890 MDI_PI(pip)->pi_ref_cnt, 3891 mdi_pi_spathname(pip), (void *)pip)); 3892 } 3893 } 3894 vh = ct->ct_vhci; 3895 vdip = vh->vh_dip; 3896 3897 /* 3898 * Notify vHCI that has registered this event 3899 */ 3900 ASSERT(vh->vh_ops); 3901 f = vh->vh_ops->vo_pi_state_change; 3902 3903 if (f != NULL) { 3904 MDI_PI_UNLOCK(pip); 3905 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3906 flags)) != MDI_SUCCESS) { 3907 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3908 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3909 ddi_driver_name(vdip), ddi_get_instance(vdip), 3910 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3911 } 3912 MDI_PI_LOCK(pip); 3913 } 3914 3915 /* 3916 * Set the mdi_pathinfo node state and clear the transient condition 3917 */ 3918 MDI_PI_SET_OFFLINE(pip); 3919 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3920 MDI_PI_UNLOCK(pip); 3921 3922 MDI_CLIENT_LOCK(ct); 3923 if (rv == MDI_SUCCESS) { 3924 if (ct->ct_unstable == 0) { 3925 dev_info_t *cdip = ct->ct_dip; 3926 3927 /* 3928 * Onlining the mdi_pathinfo node will impact the 3929 * client state Update the client and dev_info node 3930 * state accordingly 3931 */ 3932 i_mdi_client_update_state(ct); 3933 rv = NDI_SUCCESS; 3934 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3935 if (cdip && 3936 (i_ddi_node_state(cdip) >= 3937 DS_INITIALIZED)) { 3938 MDI_CLIENT_UNLOCK(ct); 3939 rv = ndi_devi_offline(cdip, 3940 NDI_DEVFS_CLEAN); 3941 MDI_CLIENT_LOCK(ct); 3942 if (rv != NDI_SUCCESS) { 3943 /* 3944 * ndi_devi_offline failed. 3945 * Reset client flags to 3946 * online. 3947 */ 3948 MDI_DEBUG(4, (MDI_WARN, cdip, 3949 "ndi_devi_offline failed: " 3950 "error %x", rv)); 3951 MDI_CLIENT_SET_ONLINE(ct); 3952 } 3953 } 3954 } 3955 /* 3956 * Convert to MDI error code 3957 */ 3958 switch (rv) { 3959 case NDI_SUCCESS: 3960 rv = MDI_SUCCESS; 3961 break; 3962 case NDI_BUSY: 3963 rv = MDI_BUSY; 3964 break; 3965 default: 3966 rv = MDI_FAILURE; 3967 break; 3968 } 3969 } 3970 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3971 i_mdi_report_path_state(ct, pip); 3972 } 3973 3974 MDI_CLIENT_UNLOCK(ct); 3975 3976 /* 3977 * Change in the mdi_pathinfo node state will impact the client state 3978 */ 3979 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3980 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3981 return (rv); 3982 } 3983 3984 /* 3985 * mdi_pi_get_node_name(): 3986 * Get the name associated with a mdi_pathinfo node. 3987 * Since pathinfo nodes are not directly named, we 3988 * return the node_name of the client. 3989 * 3990 * Return Values: 3991 * char * 3992 */ 3993 char * 3994 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3995 { 3996 mdi_client_t *ct; 3997 3998 if (pip == NULL) 3999 return (NULL); 4000 ct = MDI_PI(pip)->pi_client; 4001 if ((ct == NULL) || (ct->ct_dip == NULL)) 4002 return (NULL); 4003 return (ddi_node_name(ct->ct_dip)); 4004 } 4005 4006 /* 4007 * mdi_pi_get_addr(): 4008 * Get the unit address associated with a mdi_pathinfo node 4009 * 4010 * Return Values: 4011 * char * 4012 */ 4013 char * 4014 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4015 { 4016 if (pip == NULL) 4017 return (NULL); 4018 4019 return (MDI_PI(pip)->pi_addr); 4020 } 4021 4022 /* 4023 * mdi_pi_get_path_instance(): 4024 * Get the 'path_instance' of a mdi_pathinfo node 4025 * 4026 * Return Values: 4027 * path_instance 4028 */ 4029 int 4030 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4031 { 4032 if (pip == NULL) 4033 return (0); 4034 4035 return (MDI_PI(pip)->pi_path_instance); 4036 } 4037 4038 /* 4039 * mdi_pi_pathname(): 4040 * Return pointer to path to pathinfo node. 4041 */ 4042 char * 4043 mdi_pi_pathname(mdi_pathinfo_t *pip) 4044 { 4045 if (pip == NULL) 4046 return (NULL); 4047 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4048 } 4049 4050 /* 4051 * mdi_pi_spathname(): 4052 * Return pointer to shortpath to pathinfo node. Used for debug 4053 * messages, so return "" instead of NULL when unknown. 4054 */ 4055 char * 4056 mdi_pi_spathname(mdi_pathinfo_t *pip) 4057 { 4058 char *spath = ""; 4059 4060 if (pip) { 4061 spath = mdi_pi_spathname_by_instance( 4062 mdi_pi_get_path_instance(pip)); 4063 if (spath == NULL) 4064 spath = ""; 4065 } 4066 return (spath); 4067 } 4068 4069 char * 4070 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4071 { 4072 char *obp_path = NULL; 4073 if ((pip == NULL) || (path == NULL)) 4074 return (NULL); 4075 4076 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4077 (void) strcpy(path, obp_path); 4078 (void) mdi_prop_free(obp_path); 4079 } else { 4080 path = NULL; 4081 } 4082 return (path); 4083 } 4084 4085 int 4086 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4087 { 4088 dev_info_t *pdip; 4089 char *obp_path = NULL; 4090 int rc = MDI_FAILURE; 4091 4092 if (pip == NULL) 4093 return (MDI_FAILURE); 4094 4095 pdip = mdi_pi_get_phci(pip); 4096 if (pdip == NULL) 4097 return (MDI_FAILURE); 4098 4099 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4100 4101 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4102 (void) ddi_pathname(pdip, obp_path); 4103 } 4104 4105 if (component) { 4106 (void) strncat(obp_path, "/", MAXPATHLEN); 4107 (void) strncat(obp_path, component, MAXPATHLEN); 4108 } 4109 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4110 4111 if (obp_path) 4112 kmem_free(obp_path, MAXPATHLEN); 4113 return (rc); 4114 } 4115 4116 /* 4117 * mdi_pi_get_client(): 4118 * Get the client devinfo associated with a mdi_pathinfo node 4119 * 4120 * Return Values: 4121 * Handle to client device dev_info node 4122 */ 4123 dev_info_t * 4124 mdi_pi_get_client(mdi_pathinfo_t *pip) 4125 { 4126 dev_info_t *dip = NULL; 4127 if (pip) { 4128 dip = MDI_PI(pip)->pi_client->ct_dip; 4129 } 4130 return (dip); 4131 } 4132 4133 /* 4134 * mdi_pi_get_phci(): 4135 * Get the pHCI devinfo associated with the mdi_pathinfo node 4136 * Return Values: 4137 * Handle to dev_info node 4138 */ 4139 dev_info_t * 4140 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4141 { 4142 dev_info_t *dip = NULL; 4143 mdi_phci_t *ph; 4144 4145 if (pip) { 4146 ph = MDI_PI(pip)->pi_phci; 4147 if (ph) 4148 dip = ph->ph_dip; 4149 } 4150 return (dip); 4151 } 4152 4153 /* 4154 * mdi_pi_get_client_private(): 4155 * Get the client private information associated with the 4156 * mdi_pathinfo node 4157 */ 4158 void * 4159 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4160 { 4161 void *cprivate = NULL; 4162 if (pip) { 4163 cprivate = MDI_PI(pip)->pi_cprivate; 4164 } 4165 return (cprivate); 4166 } 4167 4168 /* 4169 * mdi_pi_set_client_private(): 4170 * Set the client private information in the mdi_pathinfo node 4171 */ 4172 void 4173 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4174 { 4175 if (pip) { 4176 MDI_PI(pip)->pi_cprivate = priv; 4177 } 4178 } 4179 4180 /* 4181 * mdi_pi_get_phci_private(): 4182 * Get the pHCI private information associated with the 4183 * mdi_pathinfo node 4184 */ 4185 caddr_t 4186 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4187 { 4188 caddr_t pprivate = NULL; 4189 4190 if (pip) { 4191 pprivate = MDI_PI(pip)->pi_pprivate; 4192 } 4193 return (pprivate); 4194 } 4195 4196 /* 4197 * mdi_pi_set_phci_private(): 4198 * Set the pHCI private information in the mdi_pathinfo node 4199 */ 4200 void 4201 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4202 { 4203 if (pip) { 4204 MDI_PI(pip)->pi_pprivate = priv; 4205 } 4206 } 4207 4208 /* 4209 * mdi_pi_get_state(): 4210 * Get the mdi_pathinfo node state. Transient states are internal 4211 * and not provided to the users 4212 */ 4213 mdi_pathinfo_state_t 4214 mdi_pi_get_state(mdi_pathinfo_t *pip) 4215 { 4216 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4217 4218 if (pip) { 4219 if (MDI_PI_IS_TRANSIENT(pip)) { 4220 /* 4221 * mdi_pathinfo is in state transition. Return the 4222 * last good state. 4223 */ 4224 state = MDI_PI_OLD_STATE(pip); 4225 } else { 4226 state = MDI_PI_STATE(pip); 4227 } 4228 } 4229 return (state); 4230 } 4231 4232 /* 4233 * mdi_pi_get_flags(): 4234 * Get the mdi_pathinfo node flags. 4235 */ 4236 uint_t 4237 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4238 { 4239 return (pip ? MDI_PI(pip)->pi_flags : 0); 4240 } 4241 4242 /* 4243 * Note that the following function needs to be the new interface for 4244 * mdi_pi_get_state when mpxio gets integrated to ON. 4245 */ 4246 int 4247 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4248 uint32_t *ext_state) 4249 { 4250 *state = MDI_PATHINFO_STATE_INIT; 4251 4252 if (pip) { 4253 if (MDI_PI_IS_TRANSIENT(pip)) { 4254 /* 4255 * mdi_pathinfo is in state transition. Return the 4256 * last good state. 4257 */ 4258 *state = MDI_PI_OLD_STATE(pip); 4259 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4260 } else { 4261 *state = MDI_PI_STATE(pip); 4262 *ext_state = MDI_PI_EXT_STATE(pip); 4263 } 4264 } 4265 return (MDI_SUCCESS); 4266 } 4267 4268 /* 4269 * mdi_pi_get_preferred: 4270 * Get the preferred path flag 4271 */ 4272 int 4273 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4274 { 4275 if (pip) { 4276 return (MDI_PI(pip)->pi_preferred); 4277 } 4278 return (0); 4279 } 4280 4281 /* 4282 * mdi_pi_set_preferred: 4283 * Set the preferred path flag 4284 */ 4285 void 4286 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4287 { 4288 if (pip) { 4289 MDI_PI(pip)->pi_preferred = preferred; 4290 } 4291 } 4292 4293 /* 4294 * mdi_pi_set_state(): 4295 * Set the mdi_pathinfo node state 4296 */ 4297 void 4298 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4299 { 4300 uint32_t ext_state; 4301 4302 if (pip) { 4303 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4304 MDI_PI(pip)->pi_state = state; 4305 MDI_PI(pip)->pi_state |= ext_state; 4306 4307 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4308 i_ddi_di_cache_invalidate(); 4309 } 4310 } 4311 4312 /* 4313 * Property functions: 4314 */ 4315 int 4316 i_map_nvlist_error_to_mdi(int val) 4317 { 4318 int rv; 4319 4320 switch (val) { 4321 case 0: 4322 rv = DDI_PROP_SUCCESS; 4323 break; 4324 case EINVAL: 4325 case ENOTSUP: 4326 rv = DDI_PROP_INVAL_ARG; 4327 break; 4328 case ENOMEM: 4329 rv = DDI_PROP_NO_MEMORY; 4330 break; 4331 default: 4332 rv = DDI_PROP_NOT_FOUND; 4333 break; 4334 } 4335 return (rv); 4336 } 4337 4338 /* 4339 * mdi_pi_get_next_prop(): 4340 * Property walk function. The caller should hold mdi_pi_lock() 4341 * and release by calling mdi_pi_unlock() at the end of walk to 4342 * get a consistent value. 4343 */ 4344 nvpair_t * 4345 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4346 { 4347 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4348 return (NULL); 4349 } 4350 ASSERT(MDI_PI_LOCKED(pip)); 4351 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4352 } 4353 4354 /* 4355 * mdi_prop_remove(): 4356 * Remove the named property from the named list. 4357 */ 4358 int 4359 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4360 { 4361 if (pip == NULL) { 4362 return (DDI_PROP_NOT_FOUND); 4363 } 4364 ASSERT(!MDI_PI_LOCKED(pip)); 4365 MDI_PI_LOCK(pip); 4366 if (MDI_PI(pip)->pi_prop == NULL) { 4367 MDI_PI_UNLOCK(pip); 4368 return (DDI_PROP_NOT_FOUND); 4369 } 4370 if (name) { 4371 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4372 } else { 4373 char nvp_name[MAXNAMELEN]; 4374 nvpair_t *nvp; 4375 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4376 while (nvp) { 4377 nvpair_t *next; 4378 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4379 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4380 nvpair_name(nvp)); 4381 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4382 nvp_name); 4383 nvp = next; 4384 } 4385 } 4386 MDI_PI_UNLOCK(pip); 4387 return (DDI_PROP_SUCCESS); 4388 } 4389 4390 /* 4391 * mdi_prop_size(): 4392 * Get buffer size needed to pack the property data. 4393 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4394 * buffer size. 4395 */ 4396 int 4397 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4398 { 4399 int rv; 4400 size_t bufsize; 4401 4402 *buflenp = 0; 4403 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4404 return (DDI_PROP_NOT_FOUND); 4405 } 4406 ASSERT(MDI_PI_LOCKED(pip)); 4407 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4408 &bufsize, NV_ENCODE_NATIVE); 4409 *buflenp = bufsize; 4410 return (i_map_nvlist_error_to_mdi(rv)); 4411 } 4412 4413 /* 4414 * mdi_prop_pack(): 4415 * pack the property list. The caller should hold the 4416 * mdi_pathinfo_t node to get a consistent data 4417 */ 4418 int 4419 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4420 { 4421 int rv; 4422 size_t bufsize; 4423 4424 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4425 return (DDI_PROP_NOT_FOUND); 4426 } 4427 4428 ASSERT(MDI_PI_LOCKED(pip)); 4429 4430 bufsize = buflen; 4431 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4432 NV_ENCODE_NATIVE, KM_SLEEP); 4433 4434 return (i_map_nvlist_error_to_mdi(rv)); 4435 } 4436 4437 /* 4438 * mdi_prop_update_byte(): 4439 * Create/Update a byte property 4440 */ 4441 int 4442 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4443 { 4444 int rv; 4445 4446 if (pip == NULL) { 4447 return (DDI_PROP_INVAL_ARG); 4448 } 4449 ASSERT(!MDI_PI_LOCKED(pip)); 4450 MDI_PI_LOCK(pip); 4451 if (MDI_PI(pip)->pi_prop == NULL) { 4452 MDI_PI_UNLOCK(pip); 4453 return (DDI_PROP_NOT_FOUND); 4454 } 4455 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4456 MDI_PI_UNLOCK(pip); 4457 return (i_map_nvlist_error_to_mdi(rv)); 4458 } 4459 4460 /* 4461 * mdi_prop_update_byte_array(): 4462 * Create/Update a byte array property 4463 */ 4464 int 4465 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4466 uint_t nelements) 4467 { 4468 int rv; 4469 4470 if (pip == NULL) { 4471 return (DDI_PROP_INVAL_ARG); 4472 } 4473 ASSERT(!MDI_PI_LOCKED(pip)); 4474 MDI_PI_LOCK(pip); 4475 if (MDI_PI(pip)->pi_prop == NULL) { 4476 MDI_PI_UNLOCK(pip); 4477 return (DDI_PROP_NOT_FOUND); 4478 } 4479 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4480 MDI_PI_UNLOCK(pip); 4481 return (i_map_nvlist_error_to_mdi(rv)); 4482 } 4483 4484 /* 4485 * mdi_prop_update_int(): 4486 * Create/Update a 32 bit integer property 4487 */ 4488 int 4489 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4490 { 4491 int rv; 4492 4493 if (pip == NULL) { 4494 return (DDI_PROP_INVAL_ARG); 4495 } 4496 ASSERT(!MDI_PI_LOCKED(pip)); 4497 MDI_PI_LOCK(pip); 4498 if (MDI_PI(pip)->pi_prop == NULL) { 4499 MDI_PI_UNLOCK(pip); 4500 return (DDI_PROP_NOT_FOUND); 4501 } 4502 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4503 MDI_PI_UNLOCK(pip); 4504 return (i_map_nvlist_error_to_mdi(rv)); 4505 } 4506 4507 /* 4508 * mdi_prop_update_int64(): 4509 * Create/Update a 64 bit integer property 4510 */ 4511 int 4512 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4513 { 4514 int rv; 4515 4516 if (pip == NULL) { 4517 return (DDI_PROP_INVAL_ARG); 4518 } 4519 ASSERT(!MDI_PI_LOCKED(pip)); 4520 MDI_PI_LOCK(pip); 4521 if (MDI_PI(pip)->pi_prop == NULL) { 4522 MDI_PI_UNLOCK(pip); 4523 return (DDI_PROP_NOT_FOUND); 4524 } 4525 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4526 MDI_PI_UNLOCK(pip); 4527 return (i_map_nvlist_error_to_mdi(rv)); 4528 } 4529 4530 /* 4531 * mdi_prop_update_int_array(): 4532 * Create/Update a int array property 4533 */ 4534 int 4535 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4536 uint_t nelements) 4537 { 4538 int rv; 4539 4540 if (pip == NULL) { 4541 return (DDI_PROP_INVAL_ARG); 4542 } 4543 ASSERT(!MDI_PI_LOCKED(pip)); 4544 MDI_PI_LOCK(pip); 4545 if (MDI_PI(pip)->pi_prop == NULL) { 4546 MDI_PI_UNLOCK(pip); 4547 return (DDI_PROP_NOT_FOUND); 4548 } 4549 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4550 nelements); 4551 MDI_PI_UNLOCK(pip); 4552 return (i_map_nvlist_error_to_mdi(rv)); 4553 } 4554 4555 /* 4556 * mdi_prop_update_string(): 4557 * Create/Update a string property 4558 */ 4559 int 4560 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4561 { 4562 int rv; 4563 4564 if (pip == NULL) { 4565 return (DDI_PROP_INVAL_ARG); 4566 } 4567 ASSERT(!MDI_PI_LOCKED(pip)); 4568 MDI_PI_LOCK(pip); 4569 if (MDI_PI(pip)->pi_prop == NULL) { 4570 MDI_PI_UNLOCK(pip); 4571 return (DDI_PROP_NOT_FOUND); 4572 } 4573 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4574 MDI_PI_UNLOCK(pip); 4575 return (i_map_nvlist_error_to_mdi(rv)); 4576 } 4577 4578 /* 4579 * mdi_prop_update_string_array(): 4580 * Create/Update a string array property 4581 */ 4582 int 4583 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4584 uint_t nelements) 4585 { 4586 int rv; 4587 4588 if (pip == NULL) { 4589 return (DDI_PROP_INVAL_ARG); 4590 } 4591 ASSERT(!MDI_PI_LOCKED(pip)); 4592 MDI_PI_LOCK(pip); 4593 if (MDI_PI(pip)->pi_prop == NULL) { 4594 MDI_PI_UNLOCK(pip); 4595 return (DDI_PROP_NOT_FOUND); 4596 } 4597 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4598 nelements); 4599 MDI_PI_UNLOCK(pip); 4600 return (i_map_nvlist_error_to_mdi(rv)); 4601 } 4602 4603 /* 4604 * mdi_prop_lookup_byte(): 4605 * Look for byte property identified by name. The data returned 4606 * is the actual property and valid as long as mdi_pathinfo_t node 4607 * is alive. 4608 */ 4609 int 4610 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4611 { 4612 int rv; 4613 4614 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4615 return (DDI_PROP_NOT_FOUND); 4616 } 4617 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4618 return (i_map_nvlist_error_to_mdi(rv)); 4619 } 4620 4621 4622 /* 4623 * mdi_prop_lookup_byte_array(): 4624 * Look for byte array property identified by name. The data 4625 * returned is the actual property and valid as long as 4626 * mdi_pathinfo_t node is alive. 4627 */ 4628 int 4629 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4630 uint_t *nelements) 4631 { 4632 int rv; 4633 4634 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4635 return (DDI_PROP_NOT_FOUND); 4636 } 4637 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4638 nelements); 4639 return (i_map_nvlist_error_to_mdi(rv)); 4640 } 4641 4642 /* 4643 * mdi_prop_lookup_int(): 4644 * Look for int property identified by name. The data returned 4645 * is the actual property and valid as long as mdi_pathinfo_t 4646 * node is alive. 4647 */ 4648 int 4649 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4650 { 4651 int rv; 4652 4653 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4654 return (DDI_PROP_NOT_FOUND); 4655 } 4656 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4657 return (i_map_nvlist_error_to_mdi(rv)); 4658 } 4659 4660 /* 4661 * mdi_prop_lookup_int64(): 4662 * Look for int64 property identified by name. The data returned 4663 * is the actual property and valid as long as mdi_pathinfo_t node 4664 * is alive. 4665 */ 4666 int 4667 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4668 { 4669 int rv; 4670 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4671 return (DDI_PROP_NOT_FOUND); 4672 } 4673 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4674 return (i_map_nvlist_error_to_mdi(rv)); 4675 } 4676 4677 /* 4678 * mdi_prop_lookup_int_array(): 4679 * Look for int array property identified by name. The data 4680 * returned is the actual property and valid as long as 4681 * mdi_pathinfo_t node is alive. 4682 */ 4683 int 4684 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4685 uint_t *nelements) 4686 { 4687 int rv; 4688 4689 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4690 return (DDI_PROP_NOT_FOUND); 4691 } 4692 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4693 (int32_t **)data, nelements); 4694 return (i_map_nvlist_error_to_mdi(rv)); 4695 } 4696 4697 /* 4698 * mdi_prop_lookup_string(): 4699 * Look for string property identified by name. The data 4700 * returned is the actual property and valid as long as 4701 * mdi_pathinfo_t node is alive. 4702 */ 4703 int 4704 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4705 { 4706 int rv; 4707 4708 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4709 return (DDI_PROP_NOT_FOUND); 4710 } 4711 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4712 return (i_map_nvlist_error_to_mdi(rv)); 4713 } 4714 4715 /* 4716 * mdi_prop_lookup_string_array(): 4717 * Look for string array property identified by name. The data 4718 * returned is the actual property and valid as long as 4719 * mdi_pathinfo_t node is alive. 4720 */ 4721 int 4722 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4723 uint_t *nelements) 4724 { 4725 int rv; 4726 4727 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4728 return (DDI_PROP_NOT_FOUND); 4729 } 4730 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4731 nelements); 4732 return (i_map_nvlist_error_to_mdi(rv)); 4733 } 4734 4735 /* 4736 * mdi_prop_free(): 4737 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4738 * functions return the pointer to actual property data and not a 4739 * copy of it. So the data returned is valid as long as 4740 * mdi_pathinfo_t node is valid. 4741 */ 4742 /*ARGSUSED*/ 4743 int 4744 mdi_prop_free(void *data) 4745 { 4746 return (DDI_PROP_SUCCESS); 4747 } 4748 4749 /*ARGSUSED*/ 4750 static void 4751 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4752 { 4753 char *ct_path; 4754 char *ct_status; 4755 char *status; 4756 dev_info_t *cdip = ct->ct_dip; 4757 char lb_buf[64]; 4758 int report_lb_c = 0, report_lb_p = 0; 4759 4760 ASSERT(MDI_CLIENT_LOCKED(ct)); 4761 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4762 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4763 return; 4764 } 4765 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4766 ct_status = "optimal"; 4767 report_lb_c = 1; 4768 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4769 ct_status = "degraded"; 4770 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4771 ct_status = "failed"; 4772 } else { 4773 ct_status = "unknown"; 4774 } 4775 4776 lb_buf[0] = 0; /* not interested in load balancing config */ 4777 4778 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4779 status = "removed"; 4780 } else if (MDI_PI_IS_OFFLINE(pip)) { 4781 status = "offline"; 4782 } else if (MDI_PI_IS_ONLINE(pip)) { 4783 status = "online"; 4784 report_lb_p = 1; 4785 } else if (MDI_PI_IS_STANDBY(pip)) { 4786 status = "standby"; 4787 } else if (MDI_PI_IS_FAULT(pip)) { 4788 status = "faulted"; 4789 } else { 4790 status = "unknown"; 4791 } 4792 4793 if (cdip) { 4794 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4795 4796 /* 4797 * NOTE: Keeping "multipath status: %s" and 4798 * "Load balancing: %s" format unchanged in case someone 4799 * scrubs /var/adm/messages looking for these messages. 4800 */ 4801 if (report_lb_c && report_lb_p) { 4802 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4803 (void) snprintf(lb_buf, sizeof (lb_buf), 4804 "%s, region-size: %d", mdi_load_balance_lba, 4805 ct->ct_lb_args->region_size); 4806 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4807 (void) snprintf(lb_buf, sizeof (lb_buf), 4808 "%s", mdi_load_balance_none); 4809 } else { 4810 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4811 mdi_load_balance_rr); 4812 } 4813 4814 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4815 "?%s (%s%d) multipath status: %s: " 4816 "path %d %s is %s: Load balancing: %s\n", 4817 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4818 ddi_get_instance(cdip), ct_status, 4819 mdi_pi_get_path_instance(pip), 4820 mdi_pi_spathname(pip), status, lb_buf); 4821 } else { 4822 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4823 "?%s (%s%d) multipath status: %s: " 4824 "path %d %s is %s\n", 4825 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4826 ddi_get_instance(cdip), ct_status, 4827 mdi_pi_get_path_instance(pip), 4828 mdi_pi_spathname(pip), status); 4829 } 4830 4831 kmem_free(ct_path, MAXPATHLEN); 4832 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4833 } 4834 } 4835 4836 #ifdef DEBUG 4837 /* 4838 * i_mdi_log(): 4839 * Utility function for error message management 4840 * 4841 * NOTE: Implementation takes care of trailing \n for cmn_err, 4842 * MDI_DEBUG should not terminate fmt strings with \n. 4843 * 4844 * NOTE: If the level is >= 2, and there is no leading !?^ 4845 * then a leading ! is implied (but can be overriden via 4846 * mdi_debug_consoleonly). If you are using kmdb on the console, 4847 * consider setting mdi_debug_consoleonly to 1 as an aid. 4848 */ 4849 /*PRINTFLIKE4*/ 4850 static void 4851 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4852 { 4853 char name[MAXNAMELEN]; 4854 char buf[512]; 4855 char *bp; 4856 va_list ap; 4857 int log_only = 0; 4858 int boot_only = 0; 4859 int console_only = 0; 4860 4861 if (dip) { 4862 (void) snprintf(name, sizeof(name), "%s%d: ", 4863 ddi_driver_name(dip), ddi_get_instance(dip)); 4864 } else { 4865 name[0] = 0; 4866 } 4867 4868 va_start(ap, fmt); 4869 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4870 va_end(ap); 4871 4872 switch (buf[0]) { 4873 case '!': 4874 bp = &buf[1]; 4875 log_only = 1; 4876 break; 4877 case '?': 4878 bp = &buf[1]; 4879 boot_only = 1; 4880 break; 4881 case '^': 4882 bp = &buf[1]; 4883 console_only = 1; 4884 break; 4885 default: 4886 if (level >= 2) 4887 log_only = 1; /* ! implied */ 4888 bp = buf; 4889 break; 4890 } 4891 if (mdi_debug_logonly) { 4892 log_only = 1; 4893 boot_only = 0; 4894 console_only = 0; 4895 } 4896 if (mdi_debug_consoleonly) { 4897 log_only = 0; 4898 boot_only = 0; 4899 console_only = 1; 4900 level = CE_NOTE; 4901 goto console; 4902 } 4903 4904 switch (level) { 4905 case CE_NOTE: 4906 level = CE_CONT; 4907 /* FALLTHROUGH */ 4908 case CE_CONT: 4909 if (boot_only) { 4910 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4911 } else if (console_only) { 4912 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4913 } else if (log_only) { 4914 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4915 } else { 4916 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4917 } 4918 break; 4919 4920 case CE_WARN: 4921 case CE_PANIC: 4922 console: 4923 if (boot_only) { 4924 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4925 } else if (console_only) { 4926 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4927 } else if (log_only) { 4928 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4929 } else { 4930 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4931 } 4932 break; 4933 default: 4934 cmn_err(level, "mdi: %s%s", name, bp); 4935 break; 4936 } 4937 } 4938 #endif /* DEBUG */ 4939 4940 void 4941 i_mdi_client_online(dev_info_t *ct_dip) 4942 { 4943 mdi_client_t *ct; 4944 4945 /* 4946 * Client online notification. Mark client state as online 4947 * restore our binding with dev_info node 4948 */ 4949 ct = i_devi_get_client(ct_dip); 4950 ASSERT(ct != NULL); 4951 MDI_CLIENT_LOCK(ct); 4952 MDI_CLIENT_SET_ONLINE(ct); 4953 /* catch for any memory leaks */ 4954 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4955 ct->ct_dip = ct_dip; 4956 4957 if (ct->ct_power_cnt == 0) 4958 (void) i_mdi_power_all_phci(ct); 4959 4960 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 4961 "i_mdi_pm_hold_client %p", (void *)ct)); 4962 i_mdi_pm_hold_client(ct, 1); 4963 4964 MDI_CLIENT_UNLOCK(ct); 4965 } 4966 4967 void 4968 i_mdi_phci_online(dev_info_t *ph_dip) 4969 { 4970 mdi_phci_t *ph; 4971 4972 /* pHCI online notification. Mark state accordingly */ 4973 ph = i_devi_get_phci(ph_dip); 4974 ASSERT(ph != NULL); 4975 MDI_PHCI_LOCK(ph); 4976 MDI_PHCI_SET_ONLINE(ph); 4977 MDI_PHCI_UNLOCK(ph); 4978 } 4979 4980 /* 4981 * mdi_devi_online(): 4982 * Online notification from NDI framework on pHCI/client 4983 * device online. 4984 * Return Values: 4985 * NDI_SUCCESS 4986 * MDI_FAILURE 4987 */ 4988 /*ARGSUSED*/ 4989 int 4990 mdi_devi_online(dev_info_t *dip, uint_t flags) 4991 { 4992 if (MDI_PHCI(dip)) { 4993 i_mdi_phci_online(dip); 4994 } 4995 4996 if (MDI_CLIENT(dip)) { 4997 i_mdi_client_online(dip); 4998 } 4999 return (NDI_SUCCESS); 5000 } 5001 5002 /* 5003 * mdi_devi_offline(): 5004 * Offline notification from NDI framework on pHCI/Client device 5005 * offline. 5006 * 5007 * Return Values: 5008 * NDI_SUCCESS 5009 * NDI_FAILURE 5010 */ 5011 /*ARGSUSED*/ 5012 int 5013 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5014 { 5015 int rv = NDI_SUCCESS; 5016 5017 if (MDI_CLIENT(dip)) { 5018 rv = i_mdi_client_offline(dip, flags); 5019 if (rv != NDI_SUCCESS) 5020 return (rv); 5021 } 5022 5023 if (MDI_PHCI(dip)) { 5024 rv = i_mdi_phci_offline(dip, flags); 5025 5026 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5027 /* set client back online */ 5028 i_mdi_client_online(dip); 5029 } 5030 } 5031 5032 return (rv); 5033 } 5034 5035 /*ARGSUSED*/ 5036 static int 5037 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5038 { 5039 int rv = NDI_SUCCESS; 5040 mdi_phci_t *ph; 5041 mdi_client_t *ct; 5042 mdi_pathinfo_t *pip; 5043 mdi_pathinfo_t *next; 5044 mdi_pathinfo_t *failed_pip = NULL; 5045 dev_info_t *cdip; 5046 5047 /* 5048 * pHCI component offline notification 5049 * Make sure that this pHCI instance is free to be offlined. 5050 * If it is OK to proceed, Offline and remove all the child 5051 * mdi_pathinfo nodes. This process automatically offlines 5052 * corresponding client devices, for which this pHCI provides 5053 * critical services. 5054 */ 5055 ph = i_devi_get_phci(dip); 5056 MDI_DEBUG(2, (MDI_NOTE, dip, 5057 "called %p %p", (void *)dip, (void *)ph)); 5058 if (ph == NULL) { 5059 return (rv); 5060 } 5061 5062 MDI_PHCI_LOCK(ph); 5063 5064 if (MDI_PHCI_IS_OFFLINE(ph)) { 5065 MDI_DEBUG(1, (MDI_WARN, dip, 5066 "!pHCI already offlined: %p", (void *)dip)); 5067 MDI_PHCI_UNLOCK(ph); 5068 return (NDI_SUCCESS); 5069 } 5070 5071 /* 5072 * Check to see if the pHCI can be offlined 5073 */ 5074 if (ph->ph_unstable) { 5075 MDI_DEBUG(1, (MDI_WARN, dip, 5076 "!One or more target devices are in transient state. " 5077 "This device can not be removed at this moment. " 5078 "Please try again later.")); 5079 MDI_PHCI_UNLOCK(ph); 5080 return (NDI_BUSY); 5081 } 5082 5083 pip = ph->ph_path_head; 5084 while (pip != NULL) { 5085 MDI_PI_LOCK(pip); 5086 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5087 5088 /* 5089 * The mdi_pathinfo state is OK. Check the client state. 5090 * If failover in progress fail the pHCI from offlining 5091 */ 5092 ct = MDI_PI(pip)->pi_client; 5093 i_mdi_client_lock(ct, pip); 5094 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5095 (ct->ct_unstable)) { 5096 /* 5097 * Failover is in progress, Fail the DR 5098 */ 5099 MDI_DEBUG(1, (MDI_WARN, dip, 5100 "!pHCI device is busy. " 5101 "This device can not be removed at this moment. " 5102 "Please try again later.")); 5103 MDI_PI_UNLOCK(pip); 5104 i_mdi_client_unlock(ct); 5105 MDI_PHCI_UNLOCK(ph); 5106 return (NDI_BUSY); 5107 } 5108 MDI_PI_UNLOCK(pip); 5109 5110 /* 5111 * Check to see of we are removing the last path of this 5112 * client device... 5113 */ 5114 cdip = ct->ct_dip; 5115 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5116 (i_mdi_client_compute_state(ct, ph) == 5117 MDI_CLIENT_STATE_FAILED)) { 5118 i_mdi_client_unlock(ct); 5119 MDI_PHCI_UNLOCK(ph); 5120 if (ndi_devi_offline(cdip, 5121 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5122 /* 5123 * ndi_devi_offline() failed. 5124 * This pHCI provides the critical path 5125 * to one or more client devices. 5126 * Return busy. 5127 */ 5128 MDI_PHCI_LOCK(ph); 5129 MDI_DEBUG(1, (MDI_WARN, dip, 5130 "!pHCI device is busy. " 5131 "This device can not be removed at this " 5132 "moment. Please try again later.")); 5133 failed_pip = pip; 5134 break; 5135 } else { 5136 MDI_PHCI_LOCK(ph); 5137 pip = next; 5138 } 5139 } else { 5140 i_mdi_client_unlock(ct); 5141 pip = next; 5142 } 5143 } 5144 5145 if (failed_pip) { 5146 pip = ph->ph_path_head; 5147 while (pip != failed_pip) { 5148 MDI_PI_LOCK(pip); 5149 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5150 ct = MDI_PI(pip)->pi_client; 5151 i_mdi_client_lock(ct, pip); 5152 cdip = ct->ct_dip; 5153 switch (MDI_CLIENT_STATE(ct)) { 5154 case MDI_CLIENT_STATE_OPTIMAL: 5155 case MDI_CLIENT_STATE_DEGRADED: 5156 if (cdip) { 5157 MDI_PI_UNLOCK(pip); 5158 i_mdi_client_unlock(ct); 5159 MDI_PHCI_UNLOCK(ph); 5160 (void) ndi_devi_online(cdip, 0); 5161 MDI_PHCI_LOCK(ph); 5162 pip = next; 5163 continue; 5164 } 5165 break; 5166 5167 case MDI_CLIENT_STATE_FAILED: 5168 if (cdip) { 5169 MDI_PI_UNLOCK(pip); 5170 i_mdi_client_unlock(ct); 5171 MDI_PHCI_UNLOCK(ph); 5172 (void) ndi_devi_offline(cdip, 5173 NDI_DEVFS_CLEAN); 5174 MDI_PHCI_LOCK(ph); 5175 pip = next; 5176 continue; 5177 } 5178 break; 5179 } 5180 MDI_PI_UNLOCK(pip); 5181 i_mdi_client_unlock(ct); 5182 pip = next; 5183 } 5184 MDI_PHCI_UNLOCK(ph); 5185 return (NDI_BUSY); 5186 } 5187 5188 /* 5189 * Mark the pHCI as offline 5190 */ 5191 MDI_PHCI_SET_OFFLINE(ph); 5192 5193 /* 5194 * Mark the child mdi_pathinfo nodes as transient 5195 */ 5196 pip = ph->ph_path_head; 5197 while (pip != NULL) { 5198 MDI_PI_LOCK(pip); 5199 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5200 MDI_PI_SET_OFFLINING(pip); 5201 MDI_PI_UNLOCK(pip); 5202 pip = next; 5203 } 5204 MDI_PHCI_UNLOCK(ph); 5205 /* 5206 * Give a chance for any pending commands to execute 5207 */ 5208 delay_random(mdi_delay); 5209 MDI_PHCI_LOCK(ph); 5210 pip = ph->ph_path_head; 5211 while (pip != NULL) { 5212 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5213 (void) i_mdi_pi_offline(pip, flags); 5214 MDI_PI_LOCK(pip); 5215 ct = MDI_PI(pip)->pi_client; 5216 if (!MDI_PI_IS_OFFLINE(pip)) { 5217 MDI_DEBUG(1, (MDI_WARN, dip, 5218 "!pHCI device is busy. " 5219 "This device can not be removed at this moment. " 5220 "Please try again later.")); 5221 MDI_PI_UNLOCK(pip); 5222 MDI_PHCI_SET_ONLINE(ph); 5223 MDI_PHCI_UNLOCK(ph); 5224 return (NDI_BUSY); 5225 } 5226 MDI_PI_UNLOCK(pip); 5227 pip = next; 5228 } 5229 MDI_PHCI_UNLOCK(ph); 5230 5231 return (rv); 5232 } 5233 5234 void 5235 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5236 { 5237 mdi_phci_t *ph; 5238 mdi_client_t *ct; 5239 mdi_pathinfo_t *pip; 5240 mdi_pathinfo_t *next; 5241 dev_info_t *cdip; 5242 5243 if (!MDI_PHCI(dip)) 5244 return; 5245 5246 ph = i_devi_get_phci(dip); 5247 if (ph == NULL) { 5248 return; 5249 } 5250 5251 MDI_PHCI_LOCK(ph); 5252 5253 if (MDI_PHCI_IS_OFFLINE(ph)) { 5254 /* has no last path */ 5255 MDI_PHCI_UNLOCK(ph); 5256 return; 5257 } 5258 5259 pip = ph->ph_path_head; 5260 while (pip != NULL) { 5261 MDI_PI_LOCK(pip); 5262 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5263 5264 ct = MDI_PI(pip)->pi_client; 5265 i_mdi_client_lock(ct, pip); 5266 MDI_PI_UNLOCK(pip); 5267 5268 cdip = ct->ct_dip; 5269 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5270 (i_mdi_client_compute_state(ct, ph) == 5271 MDI_CLIENT_STATE_FAILED)) { 5272 /* Last path. Mark client dip as retiring */ 5273 i_mdi_client_unlock(ct); 5274 MDI_PHCI_UNLOCK(ph); 5275 (void) e_ddi_mark_retiring(cdip, cons_array); 5276 MDI_PHCI_LOCK(ph); 5277 pip = next; 5278 } else { 5279 i_mdi_client_unlock(ct); 5280 pip = next; 5281 } 5282 } 5283 5284 MDI_PHCI_UNLOCK(ph); 5285 5286 return; 5287 } 5288 5289 void 5290 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5291 { 5292 mdi_phci_t *ph; 5293 mdi_client_t *ct; 5294 mdi_pathinfo_t *pip; 5295 mdi_pathinfo_t *next; 5296 dev_info_t *cdip; 5297 5298 if (!MDI_PHCI(dip)) 5299 return; 5300 5301 ph = i_devi_get_phci(dip); 5302 if (ph == NULL) 5303 return; 5304 5305 MDI_PHCI_LOCK(ph); 5306 5307 if (MDI_PHCI_IS_OFFLINE(ph)) { 5308 MDI_PHCI_UNLOCK(ph); 5309 /* not last path */ 5310 return; 5311 } 5312 5313 if (ph->ph_unstable) { 5314 MDI_PHCI_UNLOCK(ph); 5315 /* can't check for constraints */ 5316 *constraint = 0; 5317 return; 5318 } 5319 5320 pip = ph->ph_path_head; 5321 while (pip != NULL) { 5322 MDI_PI_LOCK(pip); 5323 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5324 5325 /* 5326 * The mdi_pathinfo state is OK. Check the client state. 5327 * If failover in progress fail the pHCI from offlining 5328 */ 5329 ct = MDI_PI(pip)->pi_client; 5330 i_mdi_client_lock(ct, pip); 5331 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5332 (ct->ct_unstable)) { 5333 /* 5334 * Failover is in progress, can't check for constraints 5335 */ 5336 MDI_PI_UNLOCK(pip); 5337 i_mdi_client_unlock(ct); 5338 MDI_PHCI_UNLOCK(ph); 5339 *constraint = 0; 5340 return; 5341 } 5342 MDI_PI_UNLOCK(pip); 5343 5344 /* 5345 * Check to see of we are retiring the last path of this 5346 * client device... 5347 */ 5348 cdip = ct->ct_dip; 5349 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5350 (i_mdi_client_compute_state(ct, ph) == 5351 MDI_CLIENT_STATE_FAILED)) { 5352 i_mdi_client_unlock(ct); 5353 MDI_PHCI_UNLOCK(ph); 5354 (void) e_ddi_retire_notify(cdip, constraint); 5355 MDI_PHCI_LOCK(ph); 5356 pip = next; 5357 } else { 5358 i_mdi_client_unlock(ct); 5359 pip = next; 5360 } 5361 } 5362 5363 MDI_PHCI_UNLOCK(ph); 5364 5365 return; 5366 } 5367 5368 /* 5369 * offline the path(s) hanging off the pHCI. If the 5370 * last path to any client, check that constraints 5371 * have been applied. 5372 */ 5373 void 5374 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5375 { 5376 mdi_phci_t *ph; 5377 mdi_client_t *ct; 5378 mdi_pathinfo_t *pip; 5379 mdi_pathinfo_t *next; 5380 dev_info_t *cdip; 5381 int unstable = 0; 5382 int constraint; 5383 5384 if (!MDI_PHCI(dip)) 5385 return; 5386 5387 ph = i_devi_get_phci(dip); 5388 if (ph == NULL) { 5389 /* no last path and no pips */ 5390 return; 5391 } 5392 5393 MDI_PHCI_LOCK(ph); 5394 5395 if (MDI_PHCI_IS_OFFLINE(ph)) { 5396 MDI_PHCI_UNLOCK(ph); 5397 /* no last path and no pips */ 5398 return; 5399 } 5400 5401 /* 5402 * Check to see if the pHCI can be offlined 5403 */ 5404 if (ph->ph_unstable) { 5405 unstable = 1; 5406 } 5407 5408 pip = ph->ph_path_head; 5409 while (pip != NULL) { 5410 MDI_PI_LOCK(pip); 5411 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5412 5413 /* 5414 * if failover in progress fail the pHCI from offlining 5415 */ 5416 ct = MDI_PI(pip)->pi_client; 5417 i_mdi_client_lock(ct, pip); 5418 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5419 (ct->ct_unstable)) { 5420 unstable = 1; 5421 } 5422 MDI_PI_UNLOCK(pip); 5423 5424 /* 5425 * Check to see of we are removing the last path of this 5426 * client device... 5427 */ 5428 cdip = ct->ct_dip; 5429 if (!phci_only && cdip && 5430 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5431 (i_mdi_client_compute_state(ct, ph) == 5432 MDI_CLIENT_STATE_FAILED)) { 5433 i_mdi_client_unlock(ct); 5434 MDI_PHCI_UNLOCK(ph); 5435 /* 5436 * We don't retire clients we just retire the 5437 * path to a client. If it is the last path 5438 * to a client, constraints are checked and 5439 * if we pass the last path is offlined. MPXIO will 5440 * then fail all I/Os to the client. Since we don't 5441 * want to retire the client on a path error 5442 * set constraint = 0 so that the client dip 5443 * is not retired. 5444 */ 5445 constraint = 0; 5446 (void) e_ddi_retire_finalize(cdip, &constraint); 5447 MDI_PHCI_LOCK(ph); 5448 pip = next; 5449 } else { 5450 i_mdi_client_unlock(ct); 5451 pip = next; 5452 } 5453 } 5454 5455 /* 5456 * Cannot offline pip(s) 5457 */ 5458 if (unstable) { 5459 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5460 "pHCI in transient state, cannot retire", 5461 ddi_driver_name(dip), ddi_get_instance(dip)); 5462 MDI_PHCI_UNLOCK(ph); 5463 return; 5464 } 5465 5466 /* 5467 * Mark the pHCI as offline 5468 */ 5469 MDI_PHCI_SET_OFFLINE(ph); 5470 5471 /* 5472 * Mark the child mdi_pathinfo nodes as transient 5473 */ 5474 pip = ph->ph_path_head; 5475 while (pip != NULL) { 5476 MDI_PI_LOCK(pip); 5477 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5478 MDI_PI_SET_OFFLINING(pip); 5479 MDI_PI_UNLOCK(pip); 5480 pip = next; 5481 } 5482 MDI_PHCI_UNLOCK(ph); 5483 /* 5484 * Give a chance for any pending commands to execute 5485 */ 5486 delay_random(mdi_delay); 5487 MDI_PHCI_LOCK(ph); 5488 pip = ph->ph_path_head; 5489 while (pip != NULL) { 5490 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5491 (void) i_mdi_pi_offline(pip, 0); 5492 MDI_PI_LOCK(pip); 5493 ct = MDI_PI(pip)->pi_client; 5494 if (!MDI_PI_IS_OFFLINE(pip)) { 5495 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5496 "path %d %s busy, cannot offline", 5497 mdi_pi_get_path_instance(pip), 5498 mdi_pi_spathname(pip)); 5499 MDI_PI_UNLOCK(pip); 5500 MDI_PHCI_SET_ONLINE(ph); 5501 MDI_PHCI_UNLOCK(ph); 5502 return; 5503 } 5504 MDI_PI_UNLOCK(pip); 5505 pip = next; 5506 } 5507 MDI_PHCI_UNLOCK(ph); 5508 5509 return; 5510 } 5511 5512 void 5513 mdi_phci_unretire(dev_info_t *dip) 5514 { 5515 ASSERT(MDI_PHCI(dip)); 5516 5517 /* 5518 * Online the phci 5519 */ 5520 i_mdi_phci_online(dip); 5521 } 5522 5523 /*ARGSUSED*/ 5524 static int 5525 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5526 { 5527 int rv = NDI_SUCCESS; 5528 mdi_client_t *ct; 5529 5530 /* 5531 * Client component to go offline. Make sure that we are 5532 * not in failing over state and update client state 5533 * accordingly 5534 */ 5535 ct = i_devi_get_client(dip); 5536 MDI_DEBUG(2, (MDI_NOTE, dip, 5537 "called %p %p", (void *)dip, (void *)ct)); 5538 if (ct != NULL) { 5539 MDI_CLIENT_LOCK(ct); 5540 if (ct->ct_unstable) { 5541 /* 5542 * One or more paths are in transient state, 5543 * Dont allow offline of a client device 5544 */ 5545 MDI_DEBUG(1, (MDI_WARN, dip, 5546 "!One or more paths to " 5547 "this device are in transient state. " 5548 "This device can not be removed at this moment. " 5549 "Please try again later.")); 5550 MDI_CLIENT_UNLOCK(ct); 5551 return (NDI_BUSY); 5552 } 5553 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5554 /* 5555 * Failover is in progress, Dont allow DR of 5556 * a client device 5557 */ 5558 MDI_DEBUG(1, (MDI_WARN, dip, 5559 "!Client device is Busy. " 5560 "This device can not be removed at this moment. " 5561 "Please try again later.")); 5562 MDI_CLIENT_UNLOCK(ct); 5563 return (NDI_BUSY); 5564 } 5565 MDI_CLIENT_SET_OFFLINE(ct); 5566 5567 /* 5568 * Unbind our relationship with the dev_info node 5569 */ 5570 if (flags & NDI_DEVI_REMOVE) { 5571 ct->ct_dip = NULL; 5572 } 5573 MDI_CLIENT_UNLOCK(ct); 5574 } 5575 return (rv); 5576 } 5577 5578 /* 5579 * mdi_pre_attach(): 5580 * Pre attach() notification handler 5581 */ 5582 /*ARGSUSED*/ 5583 int 5584 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5585 { 5586 /* don't support old DDI_PM_RESUME */ 5587 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5588 (cmd == DDI_PM_RESUME)) 5589 return (DDI_FAILURE); 5590 5591 return (DDI_SUCCESS); 5592 } 5593 5594 /* 5595 * mdi_post_attach(): 5596 * Post attach() notification handler 5597 */ 5598 /*ARGSUSED*/ 5599 void 5600 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5601 { 5602 mdi_phci_t *ph; 5603 mdi_client_t *ct; 5604 mdi_vhci_t *vh; 5605 5606 if (MDI_PHCI(dip)) { 5607 ph = i_devi_get_phci(dip); 5608 ASSERT(ph != NULL); 5609 5610 MDI_PHCI_LOCK(ph); 5611 switch (cmd) { 5612 case DDI_ATTACH: 5613 MDI_DEBUG(2, (MDI_NOTE, dip, 5614 "phci post_attach called %p", (void *)ph)); 5615 if (error == DDI_SUCCESS) { 5616 MDI_PHCI_SET_ATTACH(ph); 5617 } else { 5618 MDI_DEBUG(1, (MDI_NOTE, dip, 5619 "!pHCI post_attach failed: error %d", 5620 error)); 5621 MDI_PHCI_SET_DETACH(ph); 5622 } 5623 break; 5624 5625 case DDI_RESUME: 5626 MDI_DEBUG(2, (MDI_NOTE, dip, 5627 "pHCI post_resume: called %p", (void *)ph)); 5628 if (error == DDI_SUCCESS) { 5629 MDI_PHCI_SET_RESUME(ph); 5630 } else { 5631 MDI_DEBUG(1, (MDI_NOTE, dip, 5632 "!pHCI post_resume failed: error %d", 5633 error)); 5634 MDI_PHCI_SET_SUSPEND(ph); 5635 } 5636 break; 5637 } 5638 MDI_PHCI_UNLOCK(ph); 5639 } 5640 5641 if (MDI_CLIENT(dip)) { 5642 ct = i_devi_get_client(dip); 5643 ASSERT(ct != NULL); 5644 5645 MDI_CLIENT_LOCK(ct); 5646 switch (cmd) { 5647 case DDI_ATTACH: 5648 MDI_DEBUG(2, (MDI_NOTE, dip, 5649 "client post_attach called %p", (void *)ct)); 5650 if (error != DDI_SUCCESS) { 5651 MDI_DEBUG(1, (MDI_NOTE, dip, 5652 "!client post_attach failed: error %d", 5653 error)); 5654 MDI_CLIENT_SET_DETACH(ct); 5655 MDI_DEBUG(4, (MDI_WARN, dip, 5656 "i_mdi_pm_reset_client")); 5657 i_mdi_pm_reset_client(ct); 5658 break; 5659 } 5660 5661 /* 5662 * Client device has successfully attached, inform 5663 * the vhci. 5664 */ 5665 vh = ct->ct_vhci; 5666 if (vh->vh_ops->vo_client_attached) 5667 (*vh->vh_ops->vo_client_attached)(dip); 5668 5669 MDI_CLIENT_SET_ATTACH(ct); 5670 break; 5671 5672 case DDI_RESUME: 5673 MDI_DEBUG(2, (MDI_NOTE, dip, 5674 "client post_attach: called %p", (void *)ct)); 5675 if (error == DDI_SUCCESS) { 5676 MDI_CLIENT_SET_RESUME(ct); 5677 } else { 5678 MDI_DEBUG(1, (MDI_NOTE, dip, 5679 "!client post_resume failed: error %d", 5680 error)); 5681 MDI_CLIENT_SET_SUSPEND(ct); 5682 } 5683 break; 5684 } 5685 MDI_CLIENT_UNLOCK(ct); 5686 } 5687 } 5688 5689 /* 5690 * mdi_pre_detach(): 5691 * Pre detach notification handler 5692 */ 5693 /*ARGSUSED*/ 5694 int 5695 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5696 { 5697 int rv = DDI_SUCCESS; 5698 5699 if (MDI_CLIENT(dip)) { 5700 (void) i_mdi_client_pre_detach(dip, cmd); 5701 } 5702 5703 if (MDI_PHCI(dip)) { 5704 rv = i_mdi_phci_pre_detach(dip, cmd); 5705 } 5706 5707 return (rv); 5708 } 5709 5710 /*ARGSUSED*/ 5711 static int 5712 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5713 { 5714 int rv = DDI_SUCCESS; 5715 mdi_phci_t *ph; 5716 mdi_client_t *ct; 5717 mdi_pathinfo_t *pip; 5718 mdi_pathinfo_t *failed_pip = NULL; 5719 mdi_pathinfo_t *next; 5720 5721 ph = i_devi_get_phci(dip); 5722 if (ph == NULL) { 5723 return (rv); 5724 } 5725 5726 MDI_PHCI_LOCK(ph); 5727 switch (cmd) { 5728 case DDI_DETACH: 5729 MDI_DEBUG(2, (MDI_NOTE, dip, 5730 "pHCI pre_detach: called %p", (void *)ph)); 5731 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5732 /* 5733 * mdi_pathinfo nodes are still attached to 5734 * this pHCI. Fail the detach for this pHCI. 5735 */ 5736 MDI_DEBUG(2, (MDI_WARN, dip, 5737 "pHCI pre_detach: paths are still attached %p", 5738 (void *)ph)); 5739 rv = DDI_FAILURE; 5740 break; 5741 } 5742 MDI_PHCI_SET_DETACH(ph); 5743 break; 5744 5745 case DDI_SUSPEND: 5746 /* 5747 * pHCI is getting suspended. Since mpxio client 5748 * devices may not be suspended at this point, to avoid 5749 * a potential stack overflow, it is important to suspend 5750 * client devices before pHCI can be suspended. 5751 */ 5752 5753 MDI_DEBUG(2, (MDI_NOTE, dip, 5754 "pHCI pre_suspend: called %p", (void *)ph)); 5755 /* 5756 * Suspend all the client devices accessible through this pHCI 5757 */ 5758 pip = ph->ph_path_head; 5759 while (pip != NULL && rv == DDI_SUCCESS) { 5760 dev_info_t *cdip; 5761 MDI_PI_LOCK(pip); 5762 next = 5763 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5764 ct = MDI_PI(pip)->pi_client; 5765 i_mdi_client_lock(ct, pip); 5766 cdip = ct->ct_dip; 5767 MDI_PI_UNLOCK(pip); 5768 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5769 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5770 i_mdi_client_unlock(ct); 5771 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5772 DDI_SUCCESS) { 5773 /* 5774 * Suspend of one of the client 5775 * device has failed. 5776 */ 5777 MDI_DEBUG(1, (MDI_WARN, dip, 5778 "!suspend of device (%s%d) failed.", 5779 ddi_driver_name(cdip), 5780 ddi_get_instance(cdip))); 5781 failed_pip = pip; 5782 break; 5783 } 5784 } else { 5785 i_mdi_client_unlock(ct); 5786 } 5787 pip = next; 5788 } 5789 5790 if (rv == DDI_SUCCESS) { 5791 /* 5792 * Suspend of client devices is complete. Proceed 5793 * with pHCI suspend. 5794 */ 5795 MDI_PHCI_SET_SUSPEND(ph); 5796 } else { 5797 /* 5798 * Revert back all the suspended client device states 5799 * to converse. 5800 */ 5801 pip = ph->ph_path_head; 5802 while (pip != failed_pip) { 5803 dev_info_t *cdip; 5804 MDI_PI_LOCK(pip); 5805 next = 5806 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5807 ct = MDI_PI(pip)->pi_client; 5808 i_mdi_client_lock(ct, pip); 5809 cdip = ct->ct_dip; 5810 MDI_PI_UNLOCK(pip); 5811 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5812 i_mdi_client_unlock(ct); 5813 (void) devi_attach(cdip, DDI_RESUME); 5814 } else { 5815 i_mdi_client_unlock(ct); 5816 } 5817 pip = next; 5818 } 5819 } 5820 break; 5821 5822 default: 5823 rv = DDI_FAILURE; 5824 break; 5825 } 5826 MDI_PHCI_UNLOCK(ph); 5827 return (rv); 5828 } 5829 5830 /*ARGSUSED*/ 5831 static int 5832 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5833 { 5834 int rv = DDI_SUCCESS; 5835 mdi_client_t *ct; 5836 5837 ct = i_devi_get_client(dip); 5838 if (ct == NULL) { 5839 return (rv); 5840 } 5841 5842 MDI_CLIENT_LOCK(ct); 5843 switch (cmd) { 5844 case DDI_DETACH: 5845 MDI_DEBUG(2, (MDI_NOTE, dip, 5846 "client pre_detach: called %p", 5847 (void *)ct)); 5848 MDI_CLIENT_SET_DETACH(ct); 5849 break; 5850 5851 case DDI_SUSPEND: 5852 MDI_DEBUG(2, (MDI_NOTE, dip, 5853 "client pre_suspend: called %p", 5854 (void *)ct)); 5855 MDI_CLIENT_SET_SUSPEND(ct); 5856 break; 5857 5858 default: 5859 rv = DDI_FAILURE; 5860 break; 5861 } 5862 MDI_CLIENT_UNLOCK(ct); 5863 return (rv); 5864 } 5865 5866 /* 5867 * mdi_post_detach(): 5868 * Post detach notification handler 5869 */ 5870 /*ARGSUSED*/ 5871 void 5872 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5873 { 5874 /* 5875 * Detach/Suspend of mpxio component failed. Update our state 5876 * too 5877 */ 5878 if (MDI_PHCI(dip)) 5879 i_mdi_phci_post_detach(dip, cmd, error); 5880 5881 if (MDI_CLIENT(dip)) 5882 i_mdi_client_post_detach(dip, cmd, error); 5883 } 5884 5885 /*ARGSUSED*/ 5886 static void 5887 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5888 { 5889 mdi_phci_t *ph; 5890 5891 /* 5892 * Detach/Suspend of phci component failed. Update our state 5893 * too 5894 */ 5895 ph = i_devi_get_phci(dip); 5896 if (ph == NULL) { 5897 return; 5898 } 5899 5900 MDI_PHCI_LOCK(ph); 5901 /* 5902 * Detach of pHCI failed. Restore back converse 5903 * state 5904 */ 5905 switch (cmd) { 5906 case DDI_DETACH: 5907 MDI_DEBUG(2, (MDI_NOTE, dip, 5908 "pHCI post_detach: called %p", 5909 (void *)ph)); 5910 if (error != DDI_SUCCESS) 5911 MDI_PHCI_SET_ATTACH(ph); 5912 break; 5913 5914 case DDI_SUSPEND: 5915 MDI_DEBUG(2, (MDI_NOTE, dip, 5916 "pHCI post_suspend: called %p", 5917 (void *)ph)); 5918 if (error != DDI_SUCCESS) 5919 MDI_PHCI_SET_RESUME(ph); 5920 break; 5921 } 5922 MDI_PHCI_UNLOCK(ph); 5923 } 5924 5925 /*ARGSUSED*/ 5926 static void 5927 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5928 { 5929 mdi_client_t *ct; 5930 5931 ct = i_devi_get_client(dip); 5932 if (ct == NULL) { 5933 return; 5934 } 5935 MDI_CLIENT_LOCK(ct); 5936 /* 5937 * Detach of Client failed. Restore back converse 5938 * state 5939 */ 5940 switch (cmd) { 5941 case DDI_DETACH: 5942 MDI_DEBUG(2, (MDI_NOTE, dip, 5943 "client post_detach: called %p", (void *)ct)); 5944 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5945 MDI_DEBUG(4, (MDI_NOTE, dip, 5946 "i_mdi_pm_rele_client\n")); 5947 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5948 } else { 5949 MDI_DEBUG(4, (MDI_NOTE, dip, 5950 "i_mdi_pm_reset_client\n")); 5951 i_mdi_pm_reset_client(ct); 5952 } 5953 if (error != DDI_SUCCESS) 5954 MDI_CLIENT_SET_ATTACH(ct); 5955 break; 5956 5957 case DDI_SUSPEND: 5958 MDI_DEBUG(2, (MDI_NOTE, dip, 5959 "called %p", (void *)ct)); 5960 if (error != DDI_SUCCESS) 5961 MDI_CLIENT_SET_RESUME(ct); 5962 break; 5963 } 5964 MDI_CLIENT_UNLOCK(ct); 5965 } 5966 5967 int 5968 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5969 { 5970 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5971 } 5972 5973 /* 5974 * create and install per-path (client - pHCI) statistics 5975 * I/O stats supported: nread, nwritten, reads, and writes 5976 * Error stats - hard errors, soft errors, & transport errors 5977 */ 5978 int 5979 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5980 { 5981 kstat_t *kiosp, *kerrsp; 5982 struct pi_errs *nsp; 5983 struct mdi_pi_kstats *mdi_statp; 5984 5985 if (MDI_PI(pip)->pi_kstats != NULL) 5986 return (MDI_SUCCESS); 5987 5988 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5989 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5990 return (MDI_FAILURE); 5991 } 5992 5993 (void) strcat(ksname, ",err"); 5994 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5995 KSTAT_TYPE_NAMED, 5996 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5997 if (kerrsp == NULL) { 5998 kstat_delete(kiosp); 5999 return (MDI_FAILURE); 6000 } 6001 6002 nsp = (struct pi_errs *)kerrsp->ks_data; 6003 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6004 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6005 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6006 KSTAT_DATA_UINT32); 6007 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6008 KSTAT_DATA_UINT32); 6009 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6010 KSTAT_DATA_UINT32); 6011 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6012 KSTAT_DATA_UINT32); 6013 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6014 KSTAT_DATA_UINT32); 6015 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6016 KSTAT_DATA_UINT32); 6017 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6018 KSTAT_DATA_UINT32); 6019 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6020 6021 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6022 mdi_statp->pi_kstat_ref = 1; 6023 mdi_statp->pi_kstat_iostats = kiosp; 6024 mdi_statp->pi_kstat_errstats = kerrsp; 6025 kstat_install(kiosp); 6026 kstat_install(kerrsp); 6027 MDI_PI(pip)->pi_kstats = mdi_statp; 6028 return (MDI_SUCCESS); 6029 } 6030 6031 /* 6032 * destroy per-path properties 6033 */ 6034 static void 6035 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6036 { 6037 6038 struct mdi_pi_kstats *mdi_statp; 6039 6040 if (MDI_PI(pip)->pi_kstats == NULL) 6041 return; 6042 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6043 return; 6044 6045 MDI_PI(pip)->pi_kstats = NULL; 6046 6047 /* 6048 * the kstat may be shared between multiple pathinfo nodes 6049 * decrement this pathinfo's usage, removing the kstats 6050 * themselves when the last pathinfo reference is removed. 6051 */ 6052 ASSERT(mdi_statp->pi_kstat_ref > 0); 6053 if (--mdi_statp->pi_kstat_ref != 0) 6054 return; 6055 6056 kstat_delete(mdi_statp->pi_kstat_iostats); 6057 kstat_delete(mdi_statp->pi_kstat_errstats); 6058 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6059 } 6060 6061 /* 6062 * update I/O paths KSTATS 6063 */ 6064 void 6065 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6066 { 6067 kstat_t *iostatp; 6068 size_t xfer_cnt; 6069 6070 ASSERT(pip != NULL); 6071 6072 /* 6073 * I/O can be driven across a path prior to having path 6074 * statistics available, i.e. probe(9e). 6075 */ 6076 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6077 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6078 xfer_cnt = bp->b_bcount - bp->b_resid; 6079 if (bp->b_flags & B_READ) { 6080 KSTAT_IO_PTR(iostatp)->reads++; 6081 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6082 } else { 6083 KSTAT_IO_PTR(iostatp)->writes++; 6084 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6085 } 6086 } 6087 } 6088 6089 /* 6090 * Enable the path(specific client/target/initiator) 6091 * Enabling a path means that MPxIO may select the enabled path for routing 6092 * future I/O requests, subject to other path state constraints. 6093 */ 6094 int 6095 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6096 { 6097 mdi_phci_t *ph; 6098 6099 ph = MDI_PI(pip)->pi_phci; 6100 if (ph == NULL) { 6101 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6102 "!failed: path %s %p: NULL ph", 6103 mdi_pi_spathname(pip), (void *)pip)); 6104 return (MDI_FAILURE); 6105 } 6106 6107 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6108 MDI_ENABLE_OP); 6109 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6110 "!returning success pip = %p. ph = %p", 6111 (void *)pip, (void *)ph)); 6112 return (MDI_SUCCESS); 6113 6114 } 6115 6116 /* 6117 * Disable the path (specific client/target/initiator) 6118 * Disabling a path means that MPxIO will not select the disabled path for 6119 * routing any new I/O requests. 6120 */ 6121 int 6122 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6123 { 6124 mdi_phci_t *ph; 6125 6126 ph = MDI_PI(pip)->pi_phci; 6127 if (ph == NULL) { 6128 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6129 "!failed: path %s %p: NULL ph", 6130 mdi_pi_spathname(pip), (void *)pip)); 6131 return (MDI_FAILURE); 6132 } 6133 6134 (void) i_mdi_enable_disable_path(pip, 6135 ph->ph_vhci, flags, MDI_DISABLE_OP); 6136 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6137 "!returning success pip = %p. ph = %p", 6138 (void *)pip, (void *)ph)); 6139 return (MDI_SUCCESS); 6140 } 6141 6142 /* 6143 * disable the path to a particular pHCI (pHCI specified in the phci_path 6144 * argument) for a particular client (specified in the client_path argument). 6145 * Disabling a path means that MPxIO will not select the disabled path for 6146 * routing any new I/O requests. 6147 * NOTE: this will be removed once the NWS files are changed to use the new 6148 * mdi_{enable,disable}_path interfaces 6149 */ 6150 int 6151 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6152 { 6153 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6154 } 6155 6156 /* 6157 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6158 * argument) for a particular client (specified in the client_path argument). 6159 * Enabling a path means that MPxIO may select the enabled path for routing 6160 * future I/O requests, subject to other path state constraints. 6161 * NOTE: this will be removed once the NWS files are changed to use the new 6162 * mdi_{enable,disable}_path interfaces 6163 */ 6164 6165 int 6166 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6167 { 6168 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6169 } 6170 6171 /* 6172 * Common routine for doing enable/disable. 6173 */ 6174 static mdi_pathinfo_t * 6175 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6176 int op) 6177 { 6178 int sync_flag = 0; 6179 int rv; 6180 mdi_pathinfo_t *next; 6181 int (*f)() = NULL; 6182 6183 /* 6184 * Check to make sure the path is not already in the 6185 * requested state. If it is just return the next path 6186 * as we have nothing to do here. 6187 */ 6188 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6189 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6190 MDI_PI_LOCK(pip); 6191 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6192 MDI_PI_UNLOCK(pip); 6193 return (next); 6194 } 6195 6196 f = vh->vh_ops->vo_pi_state_change; 6197 6198 sync_flag = (flags << 8) & 0xf00; 6199 6200 /* 6201 * Do a callback into the mdi consumer to let it 6202 * know that path is about to get enabled/disabled. 6203 */ 6204 if (f != NULL) { 6205 rv = (*f)(vh->vh_dip, pip, 0, 6206 MDI_PI_EXT_STATE(pip), 6207 MDI_EXT_STATE_CHANGE | sync_flag | 6208 op | MDI_BEFORE_STATE_CHANGE); 6209 if (rv != MDI_SUCCESS) { 6210 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6211 "vo_pi_state_change: failed rv = %x", rv)); 6212 } 6213 } 6214 MDI_PI_LOCK(pip); 6215 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6216 6217 switch (flags) { 6218 case USER_DISABLE: 6219 if (op == MDI_DISABLE_OP) { 6220 MDI_PI_SET_USER_DISABLE(pip); 6221 } else { 6222 MDI_PI_SET_USER_ENABLE(pip); 6223 } 6224 break; 6225 case DRIVER_DISABLE: 6226 if (op == MDI_DISABLE_OP) { 6227 MDI_PI_SET_DRV_DISABLE(pip); 6228 } else { 6229 MDI_PI_SET_DRV_ENABLE(pip); 6230 } 6231 break; 6232 case DRIVER_DISABLE_TRANSIENT: 6233 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6234 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6235 } else { 6236 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6237 } 6238 break; 6239 } 6240 MDI_PI_UNLOCK(pip); 6241 /* 6242 * Do a callback into the mdi consumer to let it 6243 * know that path is now enabled/disabled. 6244 */ 6245 if (f != NULL) { 6246 rv = (*f)(vh->vh_dip, pip, 0, 6247 MDI_PI_EXT_STATE(pip), 6248 MDI_EXT_STATE_CHANGE | sync_flag | 6249 op | MDI_AFTER_STATE_CHANGE); 6250 if (rv != MDI_SUCCESS) { 6251 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6252 "vo_pi_state_change failed: rv = %x", rv)); 6253 } 6254 } 6255 return (next); 6256 } 6257 6258 /* 6259 * Common routine for doing enable/disable. 6260 * NOTE: this will be removed once the NWS files are changed to use the new 6261 * mdi_{enable,disable}_path has been putback 6262 */ 6263 int 6264 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6265 { 6266 6267 mdi_phci_t *ph; 6268 mdi_vhci_t *vh = NULL; 6269 mdi_client_t *ct; 6270 mdi_pathinfo_t *next, *pip; 6271 int found_it; 6272 6273 ph = i_devi_get_phci(pdip); 6274 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6275 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6276 (void *)cdip)); 6277 if (ph == NULL) { 6278 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6279 "!failed: operation %d: NULL ph", op)); 6280 return (MDI_FAILURE); 6281 } 6282 6283 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6284 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6285 "!failed: invalid operation %d", op)); 6286 return (MDI_FAILURE); 6287 } 6288 6289 vh = ph->ph_vhci; 6290 6291 if (cdip == NULL) { 6292 /* 6293 * Need to mark the Phci as enabled/disabled. 6294 */ 6295 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6296 "op %d for the phci", op)); 6297 MDI_PHCI_LOCK(ph); 6298 switch (flags) { 6299 case USER_DISABLE: 6300 if (op == MDI_DISABLE_OP) { 6301 MDI_PHCI_SET_USER_DISABLE(ph); 6302 } else { 6303 MDI_PHCI_SET_USER_ENABLE(ph); 6304 } 6305 break; 6306 case DRIVER_DISABLE: 6307 if (op == MDI_DISABLE_OP) { 6308 MDI_PHCI_SET_DRV_DISABLE(ph); 6309 } else { 6310 MDI_PHCI_SET_DRV_ENABLE(ph); 6311 } 6312 break; 6313 case DRIVER_DISABLE_TRANSIENT: 6314 if (op == MDI_DISABLE_OP) { 6315 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6316 } else { 6317 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6318 } 6319 break; 6320 default: 6321 MDI_PHCI_UNLOCK(ph); 6322 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6323 "!invalid flag argument= %d", flags)); 6324 } 6325 6326 /* 6327 * Phci has been disabled. Now try to enable/disable 6328 * path info's to each client. 6329 */ 6330 pip = ph->ph_path_head; 6331 while (pip != NULL) { 6332 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6333 } 6334 MDI_PHCI_UNLOCK(ph); 6335 } else { 6336 6337 /* 6338 * Disable a specific client. 6339 */ 6340 ct = i_devi_get_client(cdip); 6341 if (ct == NULL) { 6342 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6343 "!failed: operation = %d: NULL ct", op)); 6344 return (MDI_FAILURE); 6345 } 6346 6347 MDI_CLIENT_LOCK(ct); 6348 pip = ct->ct_path_head; 6349 found_it = 0; 6350 while (pip != NULL) { 6351 MDI_PI_LOCK(pip); 6352 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6353 if (MDI_PI(pip)->pi_phci == ph) { 6354 MDI_PI_UNLOCK(pip); 6355 found_it = 1; 6356 break; 6357 } 6358 MDI_PI_UNLOCK(pip); 6359 pip = next; 6360 } 6361 6362 6363 MDI_CLIENT_UNLOCK(ct); 6364 if (found_it == 0) { 6365 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6366 "!failed. Could not find corresponding pip\n")); 6367 return (MDI_FAILURE); 6368 } 6369 6370 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6371 } 6372 6373 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6374 "!op %d returning success pdip = %p cdip = %p", 6375 op, (void *)pdip, (void *)cdip)); 6376 return (MDI_SUCCESS); 6377 } 6378 6379 /* 6380 * Ensure phci powered up 6381 */ 6382 static void 6383 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6384 { 6385 dev_info_t *ph_dip; 6386 6387 ASSERT(pip != NULL); 6388 ASSERT(MDI_PI_LOCKED(pip)); 6389 6390 if (MDI_PI(pip)->pi_pm_held) { 6391 return; 6392 } 6393 6394 ph_dip = mdi_pi_get_phci(pip); 6395 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6396 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6397 if (ph_dip == NULL) { 6398 return; 6399 } 6400 6401 MDI_PI_UNLOCK(pip); 6402 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6403 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6404 pm_hold_power(ph_dip); 6405 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6406 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6407 MDI_PI_LOCK(pip); 6408 6409 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6410 if (DEVI(ph_dip)->devi_pm_info) 6411 MDI_PI(pip)->pi_pm_held = 1; 6412 } 6413 6414 /* 6415 * Allow phci powered down 6416 */ 6417 static void 6418 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6419 { 6420 dev_info_t *ph_dip = NULL; 6421 6422 ASSERT(pip != NULL); 6423 ASSERT(MDI_PI_LOCKED(pip)); 6424 6425 if (MDI_PI(pip)->pi_pm_held == 0) { 6426 return; 6427 } 6428 6429 ph_dip = mdi_pi_get_phci(pip); 6430 ASSERT(ph_dip != NULL); 6431 6432 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6433 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6434 6435 MDI_PI_UNLOCK(pip); 6436 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6437 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6438 pm_rele_power(ph_dip); 6439 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6440 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6441 MDI_PI_LOCK(pip); 6442 6443 MDI_PI(pip)->pi_pm_held = 0; 6444 } 6445 6446 static void 6447 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6448 { 6449 ASSERT(MDI_CLIENT_LOCKED(ct)); 6450 6451 ct->ct_power_cnt += incr; 6452 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6453 "%p ct_power_cnt = %d incr = %d", 6454 (void *)ct, ct->ct_power_cnt, incr)); 6455 ASSERT(ct->ct_power_cnt >= 0); 6456 } 6457 6458 static void 6459 i_mdi_rele_all_phci(mdi_client_t *ct) 6460 { 6461 mdi_pathinfo_t *pip; 6462 6463 ASSERT(MDI_CLIENT_LOCKED(ct)); 6464 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6465 while (pip != NULL) { 6466 mdi_hold_path(pip); 6467 MDI_PI_LOCK(pip); 6468 i_mdi_pm_rele_pip(pip); 6469 MDI_PI_UNLOCK(pip); 6470 mdi_rele_path(pip); 6471 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6472 } 6473 } 6474 6475 static void 6476 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6477 { 6478 ASSERT(MDI_CLIENT_LOCKED(ct)); 6479 6480 if (i_ddi_devi_attached(ct->ct_dip)) { 6481 ct->ct_power_cnt -= decr; 6482 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6483 "%p ct_power_cnt = %d decr = %d", 6484 (void *)ct, ct->ct_power_cnt, decr)); 6485 } 6486 6487 ASSERT(ct->ct_power_cnt >= 0); 6488 if (ct->ct_power_cnt == 0) { 6489 i_mdi_rele_all_phci(ct); 6490 return; 6491 } 6492 } 6493 6494 static void 6495 i_mdi_pm_reset_client(mdi_client_t *ct) 6496 { 6497 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6498 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6499 ASSERT(MDI_CLIENT_LOCKED(ct)); 6500 ct->ct_power_cnt = 0; 6501 i_mdi_rele_all_phci(ct); 6502 ct->ct_powercnt_config = 0; 6503 ct->ct_powercnt_unconfig = 0; 6504 ct->ct_powercnt_reset = 1; 6505 } 6506 6507 static int 6508 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6509 { 6510 int ret; 6511 dev_info_t *ph_dip; 6512 6513 MDI_PI_LOCK(pip); 6514 i_mdi_pm_hold_pip(pip); 6515 6516 ph_dip = mdi_pi_get_phci(pip); 6517 MDI_PI_UNLOCK(pip); 6518 6519 /* bring all components of phci to full power */ 6520 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6521 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6522 ddi_get_instance(ph_dip), (void *)pip)); 6523 6524 ret = pm_powerup(ph_dip); 6525 6526 if (ret == DDI_FAILURE) { 6527 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6528 "pm_powerup FAILED for %s%d %p", 6529 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6530 (void *)pip)); 6531 6532 MDI_PI_LOCK(pip); 6533 i_mdi_pm_rele_pip(pip); 6534 MDI_PI_UNLOCK(pip); 6535 return (MDI_FAILURE); 6536 } 6537 6538 return (MDI_SUCCESS); 6539 } 6540 6541 static int 6542 i_mdi_power_all_phci(mdi_client_t *ct) 6543 { 6544 mdi_pathinfo_t *pip; 6545 int succeeded = 0; 6546 6547 ASSERT(MDI_CLIENT_LOCKED(ct)); 6548 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6549 while (pip != NULL) { 6550 /* 6551 * Don't power if MDI_PATHINFO_STATE_FAULT 6552 * or MDI_PATHINFO_STATE_OFFLINE. 6553 */ 6554 if (MDI_PI_IS_INIT(pip) || 6555 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6556 mdi_hold_path(pip); 6557 MDI_CLIENT_UNLOCK(ct); 6558 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6559 succeeded = 1; 6560 6561 ASSERT(ct == MDI_PI(pip)->pi_client); 6562 MDI_CLIENT_LOCK(ct); 6563 mdi_rele_path(pip); 6564 } 6565 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6566 } 6567 6568 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6569 } 6570 6571 /* 6572 * mdi_bus_power(): 6573 * 1. Place the phci(s) into powered up state so that 6574 * client can do power management 6575 * 2. Ensure phci powered up as client power managing 6576 * Return Values: 6577 * MDI_SUCCESS 6578 * MDI_FAILURE 6579 */ 6580 int 6581 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6582 void *arg, void *result) 6583 { 6584 int ret = MDI_SUCCESS; 6585 pm_bp_child_pwrchg_t *bpc; 6586 mdi_client_t *ct; 6587 dev_info_t *cdip; 6588 pm_bp_has_changed_t *bphc; 6589 6590 /* 6591 * BUS_POWER_NOINVOL not supported 6592 */ 6593 if (op == BUS_POWER_NOINVOL) 6594 return (MDI_FAILURE); 6595 6596 /* 6597 * ignore other OPs. 6598 * return quickly to save cou cycles on the ct processing 6599 */ 6600 switch (op) { 6601 case BUS_POWER_PRE_NOTIFICATION: 6602 case BUS_POWER_POST_NOTIFICATION: 6603 bpc = (pm_bp_child_pwrchg_t *)arg; 6604 cdip = bpc->bpc_dip; 6605 break; 6606 case BUS_POWER_HAS_CHANGED: 6607 bphc = (pm_bp_has_changed_t *)arg; 6608 cdip = bphc->bphc_dip; 6609 break; 6610 default: 6611 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6612 } 6613 6614 ASSERT(MDI_CLIENT(cdip)); 6615 6616 ct = i_devi_get_client(cdip); 6617 if (ct == NULL) 6618 return (MDI_FAILURE); 6619 6620 /* 6621 * wait till the mdi_pathinfo node state change are processed 6622 */ 6623 MDI_CLIENT_LOCK(ct); 6624 switch (op) { 6625 case BUS_POWER_PRE_NOTIFICATION: 6626 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6627 "BUS_POWER_PRE_NOTIFICATION:" 6628 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6629 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6630 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6631 6632 /* serialize power level change per client */ 6633 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6634 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6635 6636 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6637 6638 if (ct->ct_power_cnt == 0) { 6639 ret = i_mdi_power_all_phci(ct); 6640 } 6641 6642 /* 6643 * if new_level > 0: 6644 * - hold phci(s) 6645 * - power up phci(s) if not already 6646 * ignore power down 6647 */ 6648 if (bpc->bpc_nlevel > 0) { 6649 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6650 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6651 "i_mdi_pm_hold_client\n")); 6652 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6653 } 6654 } 6655 break; 6656 case BUS_POWER_POST_NOTIFICATION: 6657 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6658 "BUS_POWER_POST_NOTIFICATION:" 6659 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6660 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6661 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6662 *(int *)result)); 6663 6664 if (*(int *)result == DDI_SUCCESS) { 6665 if (bpc->bpc_nlevel > 0) { 6666 MDI_CLIENT_SET_POWER_UP(ct); 6667 } else { 6668 MDI_CLIENT_SET_POWER_DOWN(ct); 6669 } 6670 } 6671 6672 /* release the hold we did in pre-notification */ 6673 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6674 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6675 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6676 "i_mdi_pm_rele_client\n")); 6677 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6678 } 6679 6680 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6681 /* another thread might started attaching */ 6682 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6683 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6684 "i_mdi_pm_rele_client\n")); 6685 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6686 /* detaching has been taken care in pm_post_unconfig */ 6687 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6688 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6689 "i_mdi_pm_reset_client\n")); 6690 i_mdi_pm_reset_client(ct); 6691 } 6692 } 6693 6694 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6695 cv_broadcast(&ct->ct_powerchange_cv); 6696 6697 break; 6698 6699 /* need to do more */ 6700 case BUS_POWER_HAS_CHANGED: 6701 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6702 "BUS_POWER_HAS_CHANGED:" 6703 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6704 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6705 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6706 6707 if (bphc->bphc_nlevel > 0 && 6708 bphc->bphc_nlevel > bphc->bphc_olevel) { 6709 if (ct->ct_power_cnt == 0) { 6710 ret = i_mdi_power_all_phci(ct); 6711 } 6712 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6713 "i_mdi_pm_hold_client\n")); 6714 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6715 } 6716 6717 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6718 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6719 "i_mdi_pm_rele_client\n")); 6720 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6721 } 6722 break; 6723 } 6724 6725 MDI_CLIENT_UNLOCK(ct); 6726 return (ret); 6727 } 6728 6729 static int 6730 i_mdi_pm_pre_config_one(dev_info_t *child) 6731 { 6732 int ret = MDI_SUCCESS; 6733 mdi_client_t *ct; 6734 6735 ct = i_devi_get_client(child); 6736 if (ct == NULL) 6737 return (MDI_FAILURE); 6738 6739 MDI_CLIENT_LOCK(ct); 6740 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6741 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6742 6743 if (!MDI_CLIENT_IS_FAILED(ct)) { 6744 MDI_CLIENT_UNLOCK(ct); 6745 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6746 return (MDI_SUCCESS); 6747 } 6748 6749 if (ct->ct_powercnt_config) { 6750 MDI_CLIENT_UNLOCK(ct); 6751 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6752 return (MDI_SUCCESS); 6753 } 6754 6755 if (ct->ct_power_cnt == 0) { 6756 ret = i_mdi_power_all_phci(ct); 6757 } 6758 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6759 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6760 ct->ct_powercnt_config = 1; 6761 ct->ct_powercnt_reset = 0; 6762 MDI_CLIENT_UNLOCK(ct); 6763 return (ret); 6764 } 6765 6766 static int 6767 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6768 { 6769 int ret = MDI_SUCCESS; 6770 dev_info_t *cdip; 6771 int circ; 6772 6773 ASSERT(MDI_VHCI(vdip)); 6774 6775 /* ndi_devi_config_one */ 6776 if (child) { 6777 ASSERT(DEVI_BUSY_OWNED(vdip)); 6778 return (i_mdi_pm_pre_config_one(child)); 6779 } 6780 6781 /* devi_config_common */ 6782 ndi_devi_enter(vdip, &circ); 6783 cdip = ddi_get_child(vdip); 6784 while (cdip) { 6785 dev_info_t *next = ddi_get_next_sibling(cdip); 6786 6787 ret = i_mdi_pm_pre_config_one(cdip); 6788 if (ret != MDI_SUCCESS) 6789 break; 6790 cdip = next; 6791 } 6792 ndi_devi_exit(vdip, circ); 6793 return (ret); 6794 } 6795 6796 static int 6797 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6798 { 6799 int ret = MDI_SUCCESS; 6800 mdi_client_t *ct; 6801 6802 ct = i_devi_get_client(child); 6803 if (ct == NULL) 6804 return (MDI_FAILURE); 6805 6806 MDI_CLIENT_LOCK(ct); 6807 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6808 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6809 6810 if (!i_ddi_devi_attached(ct->ct_dip)) { 6811 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6812 MDI_CLIENT_UNLOCK(ct); 6813 return (MDI_SUCCESS); 6814 } 6815 6816 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6817 (flags & NDI_AUTODETACH)) { 6818 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6819 MDI_CLIENT_UNLOCK(ct); 6820 return (MDI_FAILURE); 6821 } 6822 6823 if (ct->ct_powercnt_unconfig) { 6824 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6825 MDI_CLIENT_UNLOCK(ct); 6826 *held = 1; 6827 return (MDI_SUCCESS); 6828 } 6829 6830 if (ct->ct_power_cnt == 0) { 6831 ret = i_mdi_power_all_phci(ct); 6832 } 6833 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6834 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6835 ct->ct_powercnt_unconfig = 1; 6836 ct->ct_powercnt_reset = 0; 6837 MDI_CLIENT_UNLOCK(ct); 6838 if (ret == MDI_SUCCESS) 6839 *held = 1; 6840 return (ret); 6841 } 6842 6843 static int 6844 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6845 int flags) 6846 { 6847 int ret = MDI_SUCCESS; 6848 dev_info_t *cdip; 6849 int circ; 6850 6851 ASSERT(MDI_VHCI(vdip)); 6852 *held = 0; 6853 6854 /* ndi_devi_unconfig_one */ 6855 if (child) { 6856 ASSERT(DEVI_BUSY_OWNED(vdip)); 6857 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6858 } 6859 6860 /* devi_unconfig_common */ 6861 ndi_devi_enter(vdip, &circ); 6862 cdip = ddi_get_child(vdip); 6863 while (cdip) { 6864 dev_info_t *next = ddi_get_next_sibling(cdip); 6865 6866 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6867 cdip = next; 6868 } 6869 ndi_devi_exit(vdip, circ); 6870 6871 if (*held) 6872 ret = MDI_SUCCESS; 6873 6874 return (ret); 6875 } 6876 6877 static void 6878 i_mdi_pm_post_config_one(dev_info_t *child) 6879 { 6880 mdi_client_t *ct; 6881 6882 ct = i_devi_get_client(child); 6883 if (ct == NULL) 6884 return; 6885 6886 MDI_CLIENT_LOCK(ct); 6887 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6888 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6889 6890 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6891 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6892 MDI_CLIENT_UNLOCK(ct); 6893 return; 6894 } 6895 6896 /* client has not been updated */ 6897 if (MDI_CLIENT_IS_FAILED(ct)) { 6898 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 6899 MDI_CLIENT_UNLOCK(ct); 6900 return; 6901 } 6902 6903 /* another thread might have powered it down or detached it */ 6904 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6905 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6906 (!i_ddi_devi_attached(ct->ct_dip) && 6907 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6908 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6909 i_mdi_pm_reset_client(ct); 6910 } else { 6911 mdi_pathinfo_t *pip, *next; 6912 int valid_path_count = 0; 6913 6914 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6915 pip = ct->ct_path_head; 6916 while (pip != NULL) { 6917 MDI_PI_LOCK(pip); 6918 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6919 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6920 valid_path_count ++; 6921 MDI_PI_UNLOCK(pip); 6922 pip = next; 6923 } 6924 i_mdi_pm_rele_client(ct, valid_path_count); 6925 } 6926 ct->ct_powercnt_config = 0; 6927 MDI_CLIENT_UNLOCK(ct); 6928 } 6929 6930 static void 6931 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6932 { 6933 int circ; 6934 dev_info_t *cdip; 6935 6936 ASSERT(MDI_VHCI(vdip)); 6937 6938 /* ndi_devi_config_one */ 6939 if (child) { 6940 ASSERT(DEVI_BUSY_OWNED(vdip)); 6941 i_mdi_pm_post_config_one(child); 6942 return; 6943 } 6944 6945 /* devi_config_common */ 6946 ndi_devi_enter(vdip, &circ); 6947 cdip = ddi_get_child(vdip); 6948 while (cdip) { 6949 dev_info_t *next = ddi_get_next_sibling(cdip); 6950 6951 i_mdi_pm_post_config_one(cdip); 6952 cdip = next; 6953 } 6954 ndi_devi_exit(vdip, circ); 6955 } 6956 6957 static void 6958 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6959 { 6960 mdi_client_t *ct; 6961 6962 ct = i_devi_get_client(child); 6963 if (ct == NULL) 6964 return; 6965 6966 MDI_CLIENT_LOCK(ct); 6967 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6968 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6969 6970 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6971 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 6972 MDI_CLIENT_UNLOCK(ct); 6973 return; 6974 } 6975 6976 /* failure detaching or another thread just attached it */ 6977 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6978 i_ddi_devi_attached(ct->ct_dip)) || 6979 (!i_ddi_devi_attached(ct->ct_dip) && 6980 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6981 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6982 i_mdi_pm_reset_client(ct); 6983 } else { 6984 mdi_pathinfo_t *pip, *next; 6985 int valid_path_count = 0; 6986 6987 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6988 pip = ct->ct_path_head; 6989 while (pip != NULL) { 6990 MDI_PI_LOCK(pip); 6991 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6992 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6993 valid_path_count ++; 6994 MDI_PI_UNLOCK(pip); 6995 pip = next; 6996 } 6997 i_mdi_pm_rele_client(ct, valid_path_count); 6998 ct->ct_powercnt_unconfig = 0; 6999 } 7000 7001 MDI_CLIENT_UNLOCK(ct); 7002 } 7003 7004 static void 7005 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7006 { 7007 int circ; 7008 dev_info_t *cdip; 7009 7010 ASSERT(MDI_VHCI(vdip)); 7011 7012 if (!held) { 7013 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7014 return; 7015 } 7016 7017 if (child) { 7018 ASSERT(DEVI_BUSY_OWNED(vdip)); 7019 i_mdi_pm_post_unconfig_one(child); 7020 return; 7021 } 7022 7023 ndi_devi_enter(vdip, &circ); 7024 cdip = ddi_get_child(vdip); 7025 while (cdip) { 7026 dev_info_t *next = ddi_get_next_sibling(cdip); 7027 7028 i_mdi_pm_post_unconfig_one(cdip); 7029 cdip = next; 7030 } 7031 ndi_devi_exit(vdip, circ); 7032 } 7033 7034 int 7035 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7036 { 7037 int circ, ret = MDI_SUCCESS; 7038 dev_info_t *client_dip = NULL; 7039 mdi_client_t *ct; 7040 7041 /* 7042 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7043 * Power up pHCI for the named client device. 7044 * Note: Before the client is enumerated under vhci by phci, 7045 * client_dip can be NULL. Then proceed to power up all the 7046 * pHCIs. 7047 */ 7048 if (devnm != NULL) { 7049 ndi_devi_enter(vdip, &circ); 7050 client_dip = ndi_devi_findchild(vdip, devnm); 7051 } 7052 7053 MDI_DEBUG(4, (MDI_NOTE, vdip, 7054 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7055 7056 switch (op) { 7057 case MDI_PM_PRE_CONFIG: 7058 ret = i_mdi_pm_pre_config(vdip, client_dip); 7059 break; 7060 7061 case MDI_PM_PRE_UNCONFIG: 7062 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7063 flags); 7064 break; 7065 7066 case MDI_PM_POST_CONFIG: 7067 i_mdi_pm_post_config(vdip, client_dip); 7068 break; 7069 7070 case MDI_PM_POST_UNCONFIG: 7071 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7072 break; 7073 7074 case MDI_PM_HOLD_POWER: 7075 case MDI_PM_RELE_POWER: 7076 ASSERT(args); 7077 7078 client_dip = (dev_info_t *)args; 7079 ASSERT(MDI_CLIENT(client_dip)); 7080 7081 ct = i_devi_get_client(client_dip); 7082 MDI_CLIENT_LOCK(ct); 7083 7084 if (op == MDI_PM_HOLD_POWER) { 7085 if (ct->ct_power_cnt == 0) { 7086 (void) i_mdi_power_all_phci(ct); 7087 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7088 "i_mdi_pm_hold_client\n")); 7089 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7090 } 7091 } else { 7092 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 7093 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7094 "i_mdi_pm_rele_client\n")); 7095 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7096 } else { 7097 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7098 "i_mdi_pm_reset_client\n")); 7099 i_mdi_pm_reset_client(ct); 7100 } 7101 } 7102 7103 MDI_CLIENT_UNLOCK(ct); 7104 break; 7105 7106 default: 7107 break; 7108 } 7109 7110 if (devnm) 7111 ndi_devi_exit(vdip, circ); 7112 7113 return (ret); 7114 } 7115 7116 int 7117 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7118 { 7119 mdi_vhci_t *vhci; 7120 7121 if (!MDI_VHCI(dip)) 7122 return (MDI_FAILURE); 7123 7124 if (mdi_class) { 7125 vhci = DEVI(dip)->devi_mdi_xhci; 7126 ASSERT(vhci); 7127 *mdi_class = vhci->vh_class; 7128 } 7129 7130 return (MDI_SUCCESS); 7131 } 7132 7133 int 7134 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7135 { 7136 mdi_phci_t *phci; 7137 7138 if (!MDI_PHCI(dip)) 7139 return (MDI_FAILURE); 7140 7141 if (mdi_class) { 7142 phci = DEVI(dip)->devi_mdi_xhci; 7143 ASSERT(phci); 7144 *mdi_class = phci->ph_vhci->vh_class; 7145 } 7146 7147 return (MDI_SUCCESS); 7148 } 7149 7150 int 7151 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7152 { 7153 mdi_client_t *client; 7154 7155 if (!MDI_CLIENT(dip)) 7156 return (MDI_FAILURE); 7157 7158 if (mdi_class) { 7159 client = DEVI(dip)->devi_mdi_client; 7160 ASSERT(client); 7161 *mdi_class = client->ct_vhci->vh_class; 7162 } 7163 7164 return (MDI_SUCCESS); 7165 } 7166 7167 void * 7168 mdi_client_get_vhci_private(dev_info_t *dip) 7169 { 7170 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7171 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7172 mdi_client_t *ct; 7173 ct = i_devi_get_client(dip); 7174 return (ct->ct_vprivate); 7175 } 7176 return (NULL); 7177 } 7178 7179 void 7180 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7181 { 7182 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7183 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7184 mdi_client_t *ct; 7185 ct = i_devi_get_client(dip); 7186 ct->ct_vprivate = data; 7187 } 7188 } 7189 /* 7190 * mdi_pi_get_vhci_private(): 7191 * Get the vhci private information associated with the 7192 * mdi_pathinfo node 7193 */ 7194 void * 7195 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7196 { 7197 caddr_t vprivate = NULL; 7198 if (pip) { 7199 vprivate = MDI_PI(pip)->pi_vprivate; 7200 } 7201 return (vprivate); 7202 } 7203 7204 /* 7205 * mdi_pi_set_vhci_private(): 7206 * Set the vhci private information in the mdi_pathinfo node 7207 */ 7208 void 7209 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7210 { 7211 if (pip) { 7212 MDI_PI(pip)->pi_vprivate = priv; 7213 } 7214 } 7215 7216 /* 7217 * mdi_phci_get_vhci_private(): 7218 * Get the vhci private information associated with the 7219 * mdi_phci node 7220 */ 7221 void * 7222 mdi_phci_get_vhci_private(dev_info_t *dip) 7223 { 7224 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7225 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7226 mdi_phci_t *ph; 7227 ph = i_devi_get_phci(dip); 7228 return (ph->ph_vprivate); 7229 } 7230 return (NULL); 7231 } 7232 7233 /* 7234 * mdi_phci_set_vhci_private(): 7235 * Set the vhci private information in the mdi_phci node 7236 */ 7237 void 7238 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7239 { 7240 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7241 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7242 mdi_phci_t *ph; 7243 ph = i_devi_get_phci(dip); 7244 ph->ph_vprivate = priv; 7245 } 7246 } 7247 7248 int 7249 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7250 { 7251 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7252 } 7253 7254 int 7255 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7256 { 7257 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7258 } 7259 7260 /* 7261 * When processing hotplug, if mdi_pi_offline-mdi_pi_free fails then this 7262 * interface is used to represent device removal. 7263 */ 7264 int 7265 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7266 { 7267 MDI_PI_LOCK(pip); 7268 if (mdi_pi_device_isremoved(pip)) { 7269 MDI_PI_UNLOCK(pip); 7270 return (0); 7271 } 7272 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7273 MDI_PI_FLAGS_SET_HIDDEN(pip); 7274 MDI_PI_UNLOCK(pip); 7275 7276 i_ddi_di_cache_invalidate(); 7277 7278 return (1); 7279 } 7280 7281 /* 7282 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7283 * is now accessible then this interfaces is used to represent device insertion. 7284 */ 7285 int 7286 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7287 { 7288 MDI_PI_LOCK(pip); 7289 if (!mdi_pi_device_isremoved(pip)) { 7290 MDI_PI_UNLOCK(pip); 7291 return (0); 7292 } 7293 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7294 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7295 MDI_PI_UNLOCK(pip); 7296 7297 i_ddi_di_cache_invalidate(); 7298 7299 return (1); 7300 } 7301 7302 /* 7303 * List of vhci class names: 7304 * A vhci class name must be in this list only if the corresponding vhci 7305 * driver intends to use the mdi provided bus config implementation 7306 * (i.e., mdi_vhci_bus_config()). 7307 */ 7308 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7309 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7310 7311 /* 7312 * During boot time, the on-disk vhci cache for every vhci class is read 7313 * in the form of an nvlist and stored here. 7314 */ 7315 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7316 7317 /* nvpair names in vhci cache nvlist */ 7318 #define MDI_VHCI_CACHE_VERSION 1 7319 #define MDI_NVPNAME_VERSION "version" 7320 #define MDI_NVPNAME_PHCIS "phcis" 7321 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7322 7323 /* 7324 * Given vhci class name, return its on-disk vhci cache filename. 7325 * Memory for the returned filename which includes the full path is allocated 7326 * by this function. 7327 */ 7328 static char * 7329 vhclass2vhcache_filename(char *vhclass) 7330 { 7331 char *filename; 7332 int len; 7333 static char *fmt = "/etc/devices/mdi_%s_cache"; 7334 7335 /* 7336 * fmt contains the on-disk vhci cache file name format; 7337 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7338 */ 7339 7340 /* the -1 below is to account for "%s" in the format string */ 7341 len = strlen(fmt) + strlen(vhclass) - 1; 7342 filename = kmem_alloc(len, KM_SLEEP); 7343 (void) snprintf(filename, len, fmt, vhclass); 7344 ASSERT(len == (strlen(filename) + 1)); 7345 return (filename); 7346 } 7347 7348 /* 7349 * initialize the vhci cache related data structures and read the on-disk 7350 * vhci cached data into memory. 7351 */ 7352 static void 7353 setup_vhci_cache(mdi_vhci_t *vh) 7354 { 7355 mdi_vhci_config_t *vhc; 7356 mdi_vhci_cache_t *vhcache; 7357 int i; 7358 nvlist_t *nvl = NULL; 7359 7360 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7361 vh->vh_config = vhc; 7362 vhcache = &vhc->vhc_vhcache; 7363 7364 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7365 7366 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7367 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7368 7369 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7370 7371 /* 7372 * Create string hash; same as mod_hash_create_strhash() except that 7373 * we use NULL key destructor. 7374 */ 7375 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7376 mdi_bus_config_cache_hash_size, 7377 mod_hash_null_keydtor, mod_hash_null_valdtor, 7378 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7379 7380 /* 7381 * The on-disk vhci cache is read during booting prior to the 7382 * lights-out period by mdi_read_devices_files(). 7383 */ 7384 for (i = 0; i < N_VHCI_CLASSES; i++) { 7385 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7386 nvl = vhcache_nvl[i]; 7387 vhcache_nvl[i] = NULL; 7388 break; 7389 } 7390 } 7391 7392 /* 7393 * this is to cover the case of some one manually causing unloading 7394 * (or detaching) and reloading (or attaching) of a vhci driver. 7395 */ 7396 if (nvl == NULL && modrootloaded) 7397 nvl = read_on_disk_vhci_cache(vh->vh_class); 7398 7399 if (nvl != NULL) { 7400 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7401 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7402 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7403 else { 7404 cmn_err(CE_WARN, 7405 "%s: data file corrupted, will recreate", 7406 vhc->vhc_vhcache_filename); 7407 } 7408 rw_exit(&vhcache->vhcache_lock); 7409 nvlist_free(nvl); 7410 } 7411 7412 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7413 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7414 7415 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7416 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7417 } 7418 7419 /* 7420 * free all vhci cache related resources 7421 */ 7422 static int 7423 destroy_vhci_cache(mdi_vhci_t *vh) 7424 { 7425 mdi_vhci_config_t *vhc = vh->vh_config; 7426 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7427 mdi_vhcache_phci_t *cphci, *cphci_next; 7428 mdi_vhcache_client_t *cct, *cct_next; 7429 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7430 7431 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7432 return (MDI_FAILURE); 7433 7434 kmem_free(vhc->vhc_vhcache_filename, 7435 strlen(vhc->vhc_vhcache_filename) + 1); 7436 7437 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7438 7439 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7440 cphci = cphci_next) { 7441 cphci_next = cphci->cphci_next; 7442 free_vhcache_phci(cphci); 7443 } 7444 7445 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7446 cct_next = cct->cct_next; 7447 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7448 cpi_next = cpi->cpi_next; 7449 free_vhcache_pathinfo(cpi); 7450 } 7451 free_vhcache_client(cct); 7452 } 7453 7454 rw_destroy(&vhcache->vhcache_lock); 7455 7456 mutex_destroy(&vhc->vhc_lock); 7457 cv_destroy(&vhc->vhc_cv); 7458 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7459 return (MDI_SUCCESS); 7460 } 7461 7462 /* 7463 * Stop all vhci cache related async threads and free their resources. 7464 */ 7465 static int 7466 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7467 { 7468 mdi_async_client_config_t *acc, *acc_next; 7469 7470 mutex_enter(&vhc->vhc_lock); 7471 vhc->vhc_flags |= MDI_VHC_EXIT; 7472 ASSERT(vhc->vhc_acc_thrcount >= 0); 7473 cv_broadcast(&vhc->vhc_cv); 7474 7475 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7476 vhc->vhc_acc_thrcount != 0) { 7477 mutex_exit(&vhc->vhc_lock); 7478 delay_random(mdi_delay); 7479 mutex_enter(&vhc->vhc_lock); 7480 } 7481 7482 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7483 7484 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7485 acc_next = acc->acc_next; 7486 free_async_client_config(acc); 7487 } 7488 vhc->vhc_acc_list_head = NULL; 7489 vhc->vhc_acc_list_tail = NULL; 7490 vhc->vhc_acc_count = 0; 7491 7492 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7493 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7494 mutex_exit(&vhc->vhc_lock); 7495 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7496 vhcache_dirty(vhc); 7497 return (MDI_FAILURE); 7498 } 7499 } else 7500 mutex_exit(&vhc->vhc_lock); 7501 7502 if (callb_delete(vhc->vhc_cbid) != 0) 7503 return (MDI_FAILURE); 7504 7505 return (MDI_SUCCESS); 7506 } 7507 7508 /* 7509 * Stop vhci cache flush thread 7510 */ 7511 /* ARGSUSED */ 7512 static boolean_t 7513 stop_vhcache_flush_thread(void *arg, int code) 7514 { 7515 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7516 7517 mutex_enter(&vhc->vhc_lock); 7518 vhc->vhc_flags |= MDI_VHC_EXIT; 7519 cv_broadcast(&vhc->vhc_cv); 7520 7521 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7522 mutex_exit(&vhc->vhc_lock); 7523 delay_random(mdi_delay); 7524 mutex_enter(&vhc->vhc_lock); 7525 } 7526 7527 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7528 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7529 mutex_exit(&vhc->vhc_lock); 7530 (void) flush_vhcache(vhc, 1); 7531 } else 7532 mutex_exit(&vhc->vhc_lock); 7533 7534 return (B_TRUE); 7535 } 7536 7537 /* 7538 * Enqueue the vhcache phci (cphci) at the tail of the list 7539 */ 7540 static void 7541 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7542 { 7543 cphci->cphci_next = NULL; 7544 if (vhcache->vhcache_phci_head == NULL) 7545 vhcache->vhcache_phci_head = cphci; 7546 else 7547 vhcache->vhcache_phci_tail->cphci_next = cphci; 7548 vhcache->vhcache_phci_tail = cphci; 7549 } 7550 7551 /* 7552 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7553 */ 7554 static void 7555 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7556 mdi_vhcache_pathinfo_t *cpi) 7557 { 7558 cpi->cpi_next = NULL; 7559 if (cct->cct_cpi_head == NULL) 7560 cct->cct_cpi_head = cpi; 7561 else 7562 cct->cct_cpi_tail->cpi_next = cpi; 7563 cct->cct_cpi_tail = cpi; 7564 } 7565 7566 /* 7567 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7568 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7569 * flag set come at the beginning of the list. All cpis which have this 7570 * flag set come at the end of the list. 7571 */ 7572 static void 7573 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7574 mdi_vhcache_pathinfo_t *newcpi) 7575 { 7576 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7577 7578 if (cct->cct_cpi_head == NULL || 7579 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7580 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7581 else { 7582 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7583 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7584 prev_cpi = cpi, cpi = cpi->cpi_next) 7585 ; 7586 7587 if (prev_cpi == NULL) 7588 cct->cct_cpi_head = newcpi; 7589 else 7590 prev_cpi->cpi_next = newcpi; 7591 7592 newcpi->cpi_next = cpi; 7593 7594 if (cpi == NULL) 7595 cct->cct_cpi_tail = newcpi; 7596 } 7597 } 7598 7599 /* 7600 * Enqueue the vhcache client (cct) at the tail of the list 7601 */ 7602 static void 7603 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7604 mdi_vhcache_client_t *cct) 7605 { 7606 cct->cct_next = NULL; 7607 if (vhcache->vhcache_client_head == NULL) 7608 vhcache->vhcache_client_head = cct; 7609 else 7610 vhcache->vhcache_client_tail->cct_next = cct; 7611 vhcache->vhcache_client_tail = cct; 7612 } 7613 7614 static void 7615 free_string_array(char **str, int nelem) 7616 { 7617 int i; 7618 7619 if (str) { 7620 for (i = 0; i < nelem; i++) { 7621 if (str[i]) 7622 kmem_free(str[i], strlen(str[i]) + 1); 7623 } 7624 kmem_free(str, sizeof (char *) * nelem); 7625 } 7626 } 7627 7628 static void 7629 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7630 { 7631 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7632 kmem_free(cphci, sizeof (*cphci)); 7633 } 7634 7635 static void 7636 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7637 { 7638 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7639 kmem_free(cpi, sizeof (*cpi)); 7640 } 7641 7642 static void 7643 free_vhcache_client(mdi_vhcache_client_t *cct) 7644 { 7645 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7646 kmem_free(cct, sizeof (*cct)); 7647 } 7648 7649 static char * 7650 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7651 { 7652 char *name_addr; 7653 int len; 7654 7655 len = strlen(ct_name) + strlen(ct_addr) + 2; 7656 name_addr = kmem_alloc(len, KM_SLEEP); 7657 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7658 7659 if (ret_len) 7660 *ret_len = len; 7661 return (name_addr); 7662 } 7663 7664 /* 7665 * Copy the contents of paddrnvl to vhci cache. 7666 * paddrnvl nvlist contains path information for a vhci client. 7667 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7668 */ 7669 static void 7670 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7671 mdi_vhcache_client_t *cct) 7672 { 7673 nvpair_t *nvp = NULL; 7674 mdi_vhcache_pathinfo_t *cpi; 7675 uint_t nelem; 7676 uint32_t *val; 7677 7678 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7679 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7680 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7681 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7682 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7683 ASSERT(nelem == 2); 7684 cpi->cpi_cphci = cphci_list[val[0]]; 7685 cpi->cpi_flags = val[1]; 7686 enqueue_tail_vhcache_pathinfo(cct, cpi); 7687 } 7688 } 7689 7690 /* 7691 * Copy the contents of caddrmapnvl to vhci cache. 7692 * caddrmapnvl nvlist contains vhci client address to phci client address 7693 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7694 * this nvlist. 7695 */ 7696 static void 7697 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7698 mdi_vhcache_phci_t *cphci_list[]) 7699 { 7700 nvpair_t *nvp = NULL; 7701 nvlist_t *paddrnvl; 7702 mdi_vhcache_client_t *cct; 7703 7704 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7705 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7706 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7707 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7708 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7709 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7710 /* the client must contain at least one path */ 7711 ASSERT(cct->cct_cpi_head != NULL); 7712 7713 enqueue_vhcache_client(vhcache, cct); 7714 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7715 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7716 } 7717 } 7718 7719 /* 7720 * Copy the contents of the main nvlist to vhci cache. 7721 * 7722 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7723 * The nvlist contains the mappings between the vhci client addresses and 7724 * their corresponding phci client addresses. 7725 * 7726 * The structure of the nvlist is as follows: 7727 * 7728 * Main nvlist: 7729 * NAME TYPE DATA 7730 * version int32 version number 7731 * phcis string array array of phci paths 7732 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7733 * 7734 * structure of c2paddrs_nvl: 7735 * NAME TYPE DATA 7736 * caddr1 nvlist_t paddrs_nvl1 7737 * caddr2 nvlist_t paddrs_nvl2 7738 * ... 7739 * where caddr1, caddr2, ... are vhci client name and addresses in the 7740 * form of "<clientname>@<clientaddress>". 7741 * (for example: "ssd@2000002037cd9f72"); 7742 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7743 * 7744 * structure of paddrs_nvl: 7745 * NAME TYPE DATA 7746 * pi_addr1 uint32_array (phci-id, cpi_flags) 7747 * pi_addr2 uint32_array (phci-id, cpi_flags) 7748 * ... 7749 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7750 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7751 * phci-ids are integers that identify pHCIs to which the 7752 * the bus specific address belongs to. These integers are used as an index 7753 * into to the phcis string array in the main nvlist to get the pHCI path. 7754 */ 7755 static int 7756 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7757 { 7758 char **phcis, **phci_namep; 7759 uint_t nphcis; 7760 mdi_vhcache_phci_t *cphci, **cphci_list; 7761 nvlist_t *caddrmapnvl; 7762 int32_t ver; 7763 int i; 7764 size_t cphci_list_size; 7765 7766 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7767 7768 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7769 ver != MDI_VHCI_CACHE_VERSION) 7770 return (MDI_FAILURE); 7771 7772 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7773 &nphcis) != 0) 7774 return (MDI_SUCCESS); 7775 7776 ASSERT(nphcis > 0); 7777 7778 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7779 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7780 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7781 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7782 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7783 enqueue_vhcache_phci(vhcache, cphci); 7784 cphci_list[i] = cphci; 7785 } 7786 7787 ASSERT(vhcache->vhcache_phci_head != NULL); 7788 7789 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7790 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7791 7792 kmem_free(cphci_list, cphci_list_size); 7793 return (MDI_SUCCESS); 7794 } 7795 7796 /* 7797 * Build paddrnvl for the specified client using the information in the 7798 * vhci cache and add it to the caddrmapnnvl. 7799 * Returns 0 on success, errno on failure. 7800 */ 7801 static int 7802 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7803 nvlist_t *caddrmapnvl) 7804 { 7805 mdi_vhcache_pathinfo_t *cpi; 7806 nvlist_t *nvl; 7807 int err; 7808 uint32_t val[2]; 7809 7810 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7811 7812 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7813 return (err); 7814 7815 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7816 val[0] = cpi->cpi_cphci->cphci_id; 7817 val[1] = cpi->cpi_flags; 7818 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7819 != 0) 7820 goto out; 7821 } 7822 7823 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7824 out: 7825 nvlist_free(nvl); 7826 return (err); 7827 } 7828 7829 /* 7830 * Build caddrmapnvl using the information in the vhci cache 7831 * and add it to the mainnvl. 7832 * Returns 0 on success, errno on failure. 7833 */ 7834 static int 7835 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7836 { 7837 mdi_vhcache_client_t *cct; 7838 nvlist_t *nvl; 7839 int err; 7840 7841 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7842 7843 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7844 return (err); 7845 7846 for (cct = vhcache->vhcache_client_head; cct != NULL; 7847 cct = cct->cct_next) { 7848 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7849 goto out; 7850 } 7851 7852 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7853 out: 7854 nvlist_free(nvl); 7855 return (err); 7856 } 7857 7858 /* 7859 * Build nvlist using the information in the vhci cache. 7860 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7861 * Returns nvl on success, NULL on failure. 7862 */ 7863 static nvlist_t * 7864 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7865 { 7866 mdi_vhcache_phci_t *cphci; 7867 uint_t phci_count; 7868 char **phcis; 7869 nvlist_t *nvl; 7870 int err, i; 7871 7872 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7873 nvl = NULL; 7874 goto out; 7875 } 7876 7877 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7878 MDI_VHCI_CACHE_VERSION)) != 0) 7879 goto out; 7880 7881 rw_enter(&vhcache->vhcache_lock, RW_READER); 7882 if (vhcache->vhcache_phci_head == NULL) { 7883 rw_exit(&vhcache->vhcache_lock); 7884 return (nvl); 7885 } 7886 7887 phci_count = 0; 7888 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7889 cphci = cphci->cphci_next) 7890 cphci->cphci_id = phci_count++; 7891 7892 /* build phci pathname list */ 7893 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7894 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7895 cphci = cphci->cphci_next, i++) 7896 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7897 7898 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7899 phci_count); 7900 free_string_array(phcis, phci_count); 7901 7902 if (err == 0 && 7903 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7904 rw_exit(&vhcache->vhcache_lock); 7905 return (nvl); 7906 } 7907 7908 rw_exit(&vhcache->vhcache_lock); 7909 out: 7910 if (nvl) 7911 nvlist_free(nvl); 7912 return (NULL); 7913 } 7914 7915 /* 7916 * Lookup vhcache phci structure for the specified phci path. 7917 */ 7918 static mdi_vhcache_phci_t * 7919 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7920 { 7921 mdi_vhcache_phci_t *cphci; 7922 7923 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7924 7925 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7926 cphci = cphci->cphci_next) { 7927 if (strcmp(cphci->cphci_path, phci_path) == 0) 7928 return (cphci); 7929 } 7930 7931 return (NULL); 7932 } 7933 7934 /* 7935 * Lookup vhcache phci structure for the specified phci. 7936 */ 7937 static mdi_vhcache_phci_t * 7938 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7939 { 7940 mdi_vhcache_phci_t *cphci; 7941 7942 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7943 7944 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7945 cphci = cphci->cphci_next) { 7946 if (cphci->cphci_phci == ph) 7947 return (cphci); 7948 } 7949 7950 return (NULL); 7951 } 7952 7953 /* 7954 * Add the specified phci to the vhci cache if not already present. 7955 */ 7956 static void 7957 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7958 { 7959 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7960 mdi_vhcache_phci_t *cphci; 7961 char *pathname; 7962 int cache_updated; 7963 7964 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7965 7966 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7967 (void) ddi_pathname(ph->ph_dip, pathname); 7968 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7969 != NULL) { 7970 cphci->cphci_phci = ph; 7971 cache_updated = 0; 7972 } else { 7973 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7974 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7975 cphci->cphci_phci = ph; 7976 enqueue_vhcache_phci(vhcache, cphci); 7977 cache_updated = 1; 7978 } 7979 7980 rw_exit(&vhcache->vhcache_lock); 7981 7982 /* 7983 * Since a new phci has been added, reset 7984 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7985 * during next vhcache_discover_paths(). 7986 */ 7987 mutex_enter(&vhc->vhc_lock); 7988 vhc->vhc_path_discovery_cutoff_time = 0; 7989 mutex_exit(&vhc->vhc_lock); 7990 7991 kmem_free(pathname, MAXPATHLEN); 7992 if (cache_updated) 7993 vhcache_dirty(vhc); 7994 } 7995 7996 /* 7997 * Remove the reference to the specified phci from the vhci cache. 7998 */ 7999 static void 8000 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8001 { 8002 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8003 mdi_vhcache_phci_t *cphci; 8004 8005 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8006 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8007 /* do not remove the actual mdi_vhcache_phci structure */ 8008 cphci->cphci_phci = NULL; 8009 } 8010 rw_exit(&vhcache->vhcache_lock); 8011 } 8012 8013 static void 8014 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8015 mdi_vhcache_lookup_token_t *src) 8016 { 8017 if (src == NULL) { 8018 dst->lt_cct = NULL; 8019 dst->lt_cct_lookup_time = 0; 8020 } else { 8021 dst->lt_cct = src->lt_cct; 8022 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8023 } 8024 } 8025 8026 /* 8027 * Look up vhcache client for the specified client. 8028 */ 8029 static mdi_vhcache_client_t * 8030 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8031 mdi_vhcache_lookup_token_t *token) 8032 { 8033 mod_hash_val_t hv; 8034 char *name_addr; 8035 int len; 8036 8037 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8038 8039 /* 8040 * If no vhcache clean occurred since the last lookup, we can 8041 * simply return the cct from the last lookup operation. 8042 * It works because ccts are never freed except during the vhcache 8043 * cleanup operation. 8044 */ 8045 if (token != NULL && 8046 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8047 return (token->lt_cct); 8048 8049 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8050 if (mod_hash_find(vhcache->vhcache_client_hash, 8051 (mod_hash_key_t)name_addr, &hv) == 0) { 8052 if (token) { 8053 token->lt_cct = (mdi_vhcache_client_t *)hv; 8054 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8055 } 8056 } else { 8057 if (token) { 8058 token->lt_cct = NULL; 8059 token->lt_cct_lookup_time = 0; 8060 } 8061 hv = NULL; 8062 } 8063 kmem_free(name_addr, len); 8064 return ((mdi_vhcache_client_t *)hv); 8065 } 8066 8067 /* 8068 * Add the specified path to the vhci cache if not already present. 8069 * Also add the vhcache client for the client corresponding to this path 8070 * if it doesn't already exist. 8071 */ 8072 static void 8073 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8074 { 8075 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8076 mdi_vhcache_client_t *cct; 8077 mdi_vhcache_pathinfo_t *cpi; 8078 mdi_phci_t *ph = pip->pi_phci; 8079 mdi_client_t *ct = pip->pi_client; 8080 int cache_updated = 0; 8081 8082 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8083 8084 /* if vhcache client for this pip doesn't already exist, add it */ 8085 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8086 NULL)) == NULL) { 8087 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8088 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8089 ct->ct_guid, NULL); 8090 enqueue_vhcache_client(vhcache, cct); 8091 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8092 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8093 cache_updated = 1; 8094 } 8095 8096 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8097 if (cpi->cpi_cphci->cphci_phci == ph && 8098 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8099 cpi->cpi_pip = pip; 8100 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8101 cpi->cpi_flags &= 8102 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8103 sort_vhcache_paths(cct); 8104 cache_updated = 1; 8105 } 8106 break; 8107 } 8108 } 8109 8110 if (cpi == NULL) { 8111 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8112 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8113 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8114 ASSERT(cpi->cpi_cphci != NULL); 8115 cpi->cpi_pip = pip; 8116 enqueue_vhcache_pathinfo(cct, cpi); 8117 cache_updated = 1; 8118 } 8119 8120 rw_exit(&vhcache->vhcache_lock); 8121 8122 if (cache_updated) 8123 vhcache_dirty(vhc); 8124 } 8125 8126 /* 8127 * Remove the reference to the specified path from the vhci cache. 8128 */ 8129 static void 8130 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8131 { 8132 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8133 mdi_client_t *ct = pip->pi_client; 8134 mdi_vhcache_client_t *cct; 8135 mdi_vhcache_pathinfo_t *cpi; 8136 8137 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8138 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8139 NULL)) != NULL) { 8140 for (cpi = cct->cct_cpi_head; cpi != NULL; 8141 cpi = cpi->cpi_next) { 8142 if (cpi->cpi_pip == pip) { 8143 cpi->cpi_pip = NULL; 8144 break; 8145 } 8146 } 8147 } 8148 rw_exit(&vhcache->vhcache_lock); 8149 } 8150 8151 /* 8152 * Flush the vhci cache to disk. 8153 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8154 */ 8155 static int 8156 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8157 { 8158 nvlist_t *nvl; 8159 int err; 8160 int rv; 8161 8162 /* 8163 * It is possible that the system may shutdown before 8164 * i_ddi_io_initialized (during stmsboot for example). To allow for 8165 * flushing the cache in this case do not check for 8166 * i_ddi_io_initialized when force flag is set. 8167 */ 8168 if (force_flag == 0 && !i_ddi_io_initialized()) 8169 return (MDI_FAILURE); 8170 8171 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8172 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8173 nvlist_free(nvl); 8174 } else 8175 err = EFAULT; 8176 8177 rv = MDI_SUCCESS; 8178 mutex_enter(&vhc->vhc_lock); 8179 if (err != 0) { 8180 if (err == EROFS) { 8181 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8182 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8183 MDI_VHC_VHCACHE_DIRTY); 8184 } else { 8185 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8186 cmn_err(CE_CONT, "%s: update failed\n", 8187 vhc->vhc_vhcache_filename); 8188 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8189 } 8190 rv = MDI_FAILURE; 8191 } 8192 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8193 cmn_err(CE_CONT, 8194 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8195 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8196 } 8197 mutex_exit(&vhc->vhc_lock); 8198 8199 return (rv); 8200 } 8201 8202 /* 8203 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8204 * Exits itself if left idle for the idle timeout period. 8205 */ 8206 static void 8207 vhcache_flush_thread(void *arg) 8208 { 8209 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8210 clock_t idle_time, quit_at_ticks; 8211 callb_cpr_t cprinfo; 8212 8213 /* number of seconds to sleep idle before exiting */ 8214 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8215 8216 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8217 "mdi_vhcache_flush"); 8218 mutex_enter(&vhc->vhc_lock); 8219 for (; ; ) { 8220 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8221 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8222 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8223 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8224 (void) cv_timedwait(&vhc->vhc_cv, 8225 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8226 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8227 } else { 8228 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8229 mutex_exit(&vhc->vhc_lock); 8230 8231 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8232 vhcache_dirty(vhc); 8233 8234 mutex_enter(&vhc->vhc_lock); 8235 } 8236 } 8237 8238 quit_at_ticks = ddi_get_lbolt() + idle_time; 8239 8240 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8241 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8242 ddi_get_lbolt() < quit_at_ticks) { 8243 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8244 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8245 quit_at_ticks); 8246 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8247 } 8248 8249 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8250 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8251 goto out; 8252 } 8253 8254 out: 8255 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8256 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8257 CALLB_CPR_EXIT(&cprinfo); 8258 } 8259 8260 /* 8261 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8262 */ 8263 static void 8264 vhcache_dirty(mdi_vhci_config_t *vhc) 8265 { 8266 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8267 int create_thread; 8268 8269 rw_enter(&vhcache->vhcache_lock, RW_READER); 8270 /* do not flush cache until the cache is fully built */ 8271 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8272 rw_exit(&vhcache->vhcache_lock); 8273 return; 8274 } 8275 rw_exit(&vhcache->vhcache_lock); 8276 8277 mutex_enter(&vhc->vhc_lock); 8278 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8279 mutex_exit(&vhc->vhc_lock); 8280 return; 8281 } 8282 8283 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8284 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8285 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8286 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8287 cv_broadcast(&vhc->vhc_cv); 8288 create_thread = 0; 8289 } else { 8290 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8291 create_thread = 1; 8292 } 8293 mutex_exit(&vhc->vhc_lock); 8294 8295 if (create_thread) 8296 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8297 0, &p0, TS_RUN, minclsyspri); 8298 } 8299 8300 /* 8301 * phci bus config structure - one for for each phci bus config operation that 8302 * we initiate on behalf of a vhci. 8303 */ 8304 typedef struct mdi_phci_bus_config_s { 8305 char *phbc_phci_path; 8306 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8307 struct mdi_phci_bus_config_s *phbc_next; 8308 } mdi_phci_bus_config_t; 8309 8310 /* vhci bus config structure - one for each vhci bus config operation */ 8311 typedef struct mdi_vhci_bus_config_s { 8312 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8313 major_t vhbc_op_major; /* bus config op major */ 8314 uint_t vhbc_op_flags; /* bus config op flags */ 8315 kmutex_t vhbc_lock; 8316 kcondvar_t vhbc_cv; 8317 int vhbc_thr_count; 8318 } mdi_vhci_bus_config_t; 8319 8320 /* 8321 * bus config the specified phci 8322 */ 8323 static void 8324 bus_config_phci(void *arg) 8325 { 8326 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8327 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8328 dev_info_t *ph_dip; 8329 8330 /* 8331 * first configure all path components upto phci and then configure 8332 * the phci children. 8333 */ 8334 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8335 != NULL) { 8336 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8337 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8338 (void) ndi_devi_config_driver(ph_dip, 8339 vhbc->vhbc_op_flags, 8340 vhbc->vhbc_op_major); 8341 } else 8342 (void) ndi_devi_config(ph_dip, 8343 vhbc->vhbc_op_flags); 8344 8345 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8346 ndi_rele_devi(ph_dip); 8347 } 8348 8349 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8350 kmem_free(phbc, sizeof (*phbc)); 8351 8352 mutex_enter(&vhbc->vhbc_lock); 8353 vhbc->vhbc_thr_count--; 8354 if (vhbc->vhbc_thr_count == 0) 8355 cv_broadcast(&vhbc->vhbc_cv); 8356 mutex_exit(&vhbc->vhbc_lock); 8357 } 8358 8359 /* 8360 * Bus config all phcis associated with the vhci in parallel. 8361 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8362 */ 8363 static void 8364 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8365 ddi_bus_config_op_t op, major_t maj) 8366 { 8367 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8368 mdi_vhci_bus_config_t *vhbc; 8369 mdi_vhcache_phci_t *cphci; 8370 8371 rw_enter(&vhcache->vhcache_lock, RW_READER); 8372 if (vhcache->vhcache_phci_head == NULL) { 8373 rw_exit(&vhcache->vhcache_lock); 8374 return; 8375 } 8376 8377 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8378 8379 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8380 cphci = cphci->cphci_next) { 8381 /* skip phcis that haven't attached before root is available */ 8382 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8383 continue; 8384 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8385 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8386 KM_SLEEP); 8387 phbc->phbc_vhbusconfig = vhbc; 8388 phbc->phbc_next = phbc_head; 8389 phbc_head = phbc; 8390 vhbc->vhbc_thr_count++; 8391 } 8392 rw_exit(&vhcache->vhcache_lock); 8393 8394 vhbc->vhbc_op = op; 8395 vhbc->vhbc_op_major = maj; 8396 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8397 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8398 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8399 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8400 8401 /* now create threads to initiate bus config on all phcis in parallel */ 8402 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8403 phbc_next = phbc->phbc_next; 8404 if (mdi_mtc_off) 8405 bus_config_phci((void *)phbc); 8406 else 8407 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8408 0, &p0, TS_RUN, minclsyspri); 8409 } 8410 8411 mutex_enter(&vhbc->vhbc_lock); 8412 /* wait until all threads exit */ 8413 while (vhbc->vhbc_thr_count > 0) 8414 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8415 mutex_exit(&vhbc->vhbc_lock); 8416 8417 mutex_destroy(&vhbc->vhbc_lock); 8418 cv_destroy(&vhbc->vhbc_cv); 8419 kmem_free(vhbc, sizeof (*vhbc)); 8420 } 8421 8422 /* 8423 * Single threaded version of bus_config_all_phcis() 8424 */ 8425 static void 8426 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8427 ddi_bus_config_op_t op, major_t maj) 8428 { 8429 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8430 8431 single_threaded_vhconfig_enter(vhc); 8432 bus_config_all_phcis(vhcache, flags, op, maj); 8433 single_threaded_vhconfig_exit(vhc); 8434 } 8435 8436 /* 8437 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8438 * The path includes the child component in addition to the phci path. 8439 */ 8440 static int 8441 bus_config_one_phci_child(char *path) 8442 { 8443 dev_info_t *ph_dip, *child; 8444 char *devnm; 8445 int rv = MDI_FAILURE; 8446 8447 /* extract the child component of the phci */ 8448 devnm = strrchr(path, '/'); 8449 *devnm++ = '\0'; 8450 8451 /* 8452 * first configure all path components upto phci and then 8453 * configure the phci child. 8454 */ 8455 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8456 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8457 NDI_SUCCESS) { 8458 /* 8459 * release the hold that ndi_devi_config_one() placed 8460 */ 8461 ndi_rele_devi(child); 8462 rv = MDI_SUCCESS; 8463 } 8464 8465 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8466 ndi_rele_devi(ph_dip); 8467 } 8468 8469 devnm--; 8470 *devnm = '/'; 8471 return (rv); 8472 } 8473 8474 /* 8475 * Build a list of phci client paths for the specified vhci client. 8476 * The list includes only those phci client paths which aren't configured yet. 8477 */ 8478 static mdi_phys_path_t * 8479 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8480 { 8481 mdi_vhcache_pathinfo_t *cpi; 8482 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8483 int config_path, len; 8484 8485 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8486 /* 8487 * include only those paths that aren't configured. 8488 */ 8489 config_path = 0; 8490 if (cpi->cpi_pip == NULL) 8491 config_path = 1; 8492 else { 8493 MDI_PI_LOCK(cpi->cpi_pip); 8494 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8495 config_path = 1; 8496 MDI_PI_UNLOCK(cpi->cpi_pip); 8497 } 8498 8499 if (config_path) { 8500 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8501 len = strlen(cpi->cpi_cphci->cphci_path) + 8502 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8503 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8504 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8505 cpi->cpi_cphci->cphci_path, ct_name, 8506 cpi->cpi_addr); 8507 pp->phys_path_next = NULL; 8508 8509 if (pp_head == NULL) 8510 pp_head = pp; 8511 else 8512 pp_tail->phys_path_next = pp; 8513 pp_tail = pp; 8514 } 8515 } 8516 8517 return (pp_head); 8518 } 8519 8520 /* 8521 * Free the memory allocated for phci client path list. 8522 */ 8523 static void 8524 free_phclient_path_list(mdi_phys_path_t *pp_head) 8525 { 8526 mdi_phys_path_t *pp, *pp_next; 8527 8528 for (pp = pp_head; pp != NULL; pp = pp_next) { 8529 pp_next = pp->phys_path_next; 8530 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8531 kmem_free(pp, sizeof (*pp)); 8532 } 8533 } 8534 8535 /* 8536 * Allocated async client structure and initialize with the specified values. 8537 */ 8538 static mdi_async_client_config_t * 8539 alloc_async_client_config(char *ct_name, char *ct_addr, 8540 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8541 { 8542 mdi_async_client_config_t *acc; 8543 8544 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8545 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8546 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8547 acc->acc_phclient_path_list_head = pp_head; 8548 init_vhcache_lookup_token(&acc->acc_token, tok); 8549 acc->acc_next = NULL; 8550 return (acc); 8551 } 8552 8553 /* 8554 * Free the memory allocated for the async client structure and their members. 8555 */ 8556 static void 8557 free_async_client_config(mdi_async_client_config_t *acc) 8558 { 8559 if (acc->acc_phclient_path_list_head) 8560 free_phclient_path_list(acc->acc_phclient_path_list_head); 8561 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8562 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8563 kmem_free(acc, sizeof (*acc)); 8564 } 8565 8566 /* 8567 * Sort vhcache pathinfos (cpis) of the specified client. 8568 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8569 * flag set come at the beginning of the list. All cpis which have this 8570 * flag set come at the end of the list. 8571 */ 8572 static void 8573 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8574 { 8575 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8576 8577 cpi_head = cct->cct_cpi_head; 8578 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8579 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8580 cpi_next = cpi->cpi_next; 8581 enqueue_vhcache_pathinfo(cct, cpi); 8582 } 8583 } 8584 8585 /* 8586 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8587 * every vhcache pathinfo of the specified client. If not adjust the flag 8588 * setting appropriately. 8589 * 8590 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8591 * on-disk vhci cache. So every time this flag is updated the cache must be 8592 * flushed. 8593 */ 8594 static void 8595 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8596 mdi_vhcache_lookup_token_t *tok) 8597 { 8598 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8599 mdi_vhcache_client_t *cct; 8600 mdi_vhcache_pathinfo_t *cpi; 8601 8602 rw_enter(&vhcache->vhcache_lock, RW_READER); 8603 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8604 == NULL) { 8605 rw_exit(&vhcache->vhcache_lock); 8606 return; 8607 } 8608 8609 /* 8610 * to avoid unnecessary on-disk cache updates, first check if an 8611 * update is really needed. If no update is needed simply return. 8612 */ 8613 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8614 if ((cpi->cpi_pip != NULL && 8615 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8616 (cpi->cpi_pip == NULL && 8617 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8618 break; 8619 } 8620 } 8621 if (cpi == NULL) { 8622 rw_exit(&vhcache->vhcache_lock); 8623 return; 8624 } 8625 8626 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8627 rw_exit(&vhcache->vhcache_lock); 8628 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8629 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8630 tok)) == NULL) { 8631 rw_exit(&vhcache->vhcache_lock); 8632 return; 8633 } 8634 } 8635 8636 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8637 if (cpi->cpi_pip != NULL) 8638 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8639 else 8640 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8641 } 8642 sort_vhcache_paths(cct); 8643 8644 rw_exit(&vhcache->vhcache_lock); 8645 vhcache_dirty(vhc); 8646 } 8647 8648 /* 8649 * Configure all specified paths of the client. 8650 */ 8651 static void 8652 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8653 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8654 { 8655 mdi_phys_path_t *pp; 8656 8657 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8658 (void) bus_config_one_phci_child(pp->phys_path); 8659 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8660 } 8661 8662 /* 8663 * Dequeue elements from vhci async client config list and bus configure 8664 * their corresponding phci clients. 8665 */ 8666 static void 8667 config_client_paths_thread(void *arg) 8668 { 8669 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8670 mdi_async_client_config_t *acc; 8671 clock_t quit_at_ticks; 8672 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8673 callb_cpr_t cprinfo; 8674 8675 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8676 "mdi_config_client_paths"); 8677 8678 for (; ; ) { 8679 quit_at_ticks = ddi_get_lbolt() + idle_time; 8680 8681 mutex_enter(&vhc->vhc_lock); 8682 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8683 vhc->vhc_acc_list_head == NULL && 8684 ddi_get_lbolt() < quit_at_ticks) { 8685 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8686 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8687 quit_at_ticks); 8688 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8689 } 8690 8691 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8692 vhc->vhc_acc_list_head == NULL) 8693 goto out; 8694 8695 acc = vhc->vhc_acc_list_head; 8696 vhc->vhc_acc_list_head = acc->acc_next; 8697 if (vhc->vhc_acc_list_head == NULL) 8698 vhc->vhc_acc_list_tail = NULL; 8699 vhc->vhc_acc_count--; 8700 mutex_exit(&vhc->vhc_lock); 8701 8702 config_client_paths_sync(vhc, acc->acc_ct_name, 8703 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8704 &acc->acc_token); 8705 8706 free_async_client_config(acc); 8707 } 8708 8709 out: 8710 vhc->vhc_acc_thrcount--; 8711 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8712 CALLB_CPR_EXIT(&cprinfo); 8713 } 8714 8715 /* 8716 * Arrange for all the phci client paths (pp_head) for the specified client 8717 * to be bus configured asynchronously by a thread. 8718 */ 8719 static void 8720 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8721 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8722 { 8723 mdi_async_client_config_t *acc, *newacc; 8724 int create_thread; 8725 8726 if (pp_head == NULL) 8727 return; 8728 8729 if (mdi_mtc_off) { 8730 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8731 free_phclient_path_list(pp_head); 8732 return; 8733 } 8734 8735 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8736 ASSERT(newacc); 8737 8738 mutex_enter(&vhc->vhc_lock); 8739 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8740 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8741 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8742 free_async_client_config(newacc); 8743 mutex_exit(&vhc->vhc_lock); 8744 return; 8745 } 8746 } 8747 8748 if (vhc->vhc_acc_list_head == NULL) 8749 vhc->vhc_acc_list_head = newacc; 8750 else 8751 vhc->vhc_acc_list_tail->acc_next = newacc; 8752 vhc->vhc_acc_list_tail = newacc; 8753 vhc->vhc_acc_count++; 8754 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8755 cv_broadcast(&vhc->vhc_cv); 8756 create_thread = 0; 8757 } else { 8758 vhc->vhc_acc_thrcount++; 8759 create_thread = 1; 8760 } 8761 mutex_exit(&vhc->vhc_lock); 8762 8763 if (create_thread) 8764 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8765 0, &p0, TS_RUN, minclsyspri); 8766 } 8767 8768 /* 8769 * Return number of online paths for the specified client. 8770 */ 8771 static int 8772 nonline_paths(mdi_vhcache_client_t *cct) 8773 { 8774 mdi_vhcache_pathinfo_t *cpi; 8775 int online_count = 0; 8776 8777 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8778 if (cpi->cpi_pip != NULL) { 8779 MDI_PI_LOCK(cpi->cpi_pip); 8780 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8781 online_count++; 8782 MDI_PI_UNLOCK(cpi->cpi_pip); 8783 } 8784 } 8785 8786 return (online_count); 8787 } 8788 8789 /* 8790 * Bus configure all paths for the specified vhci client. 8791 * If at least one path for the client is already online, the remaining paths 8792 * will be configured asynchronously. Otherwise, it synchronously configures 8793 * the paths until at least one path is online and then rest of the paths 8794 * will be configured asynchronously. 8795 */ 8796 static void 8797 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8798 { 8799 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8800 mdi_phys_path_t *pp_head, *pp; 8801 mdi_vhcache_client_t *cct; 8802 mdi_vhcache_lookup_token_t tok; 8803 8804 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8805 8806 init_vhcache_lookup_token(&tok, NULL); 8807 8808 if (ct_name == NULL || ct_addr == NULL || 8809 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8810 == NULL || 8811 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8812 rw_exit(&vhcache->vhcache_lock); 8813 return; 8814 } 8815 8816 /* if at least one path is online, configure the rest asynchronously */ 8817 if (nonline_paths(cct) > 0) { 8818 rw_exit(&vhcache->vhcache_lock); 8819 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8820 return; 8821 } 8822 8823 rw_exit(&vhcache->vhcache_lock); 8824 8825 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8826 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8827 rw_enter(&vhcache->vhcache_lock, RW_READER); 8828 8829 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8830 ct_addr, &tok)) == NULL) { 8831 rw_exit(&vhcache->vhcache_lock); 8832 goto out; 8833 } 8834 8835 if (nonline_paths(cct) > 0 && 8836 pp->phys_path_next != NULL) { 8837 rw_exit(&vhcache->vhcache_lock); 8838 config_client_paths_async(vhc, ct_name, ct_addr, 8839 pp->phys_path_next, &tok); 8840 pp->phys_path_next = NULL; 8841 goto out; 8842 } 8843 8844 rw_exit(&vhcache->vhcache_lock); 8845 } 8846 } 8847 8848 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8849 out: 8850 free_phclient_path_list(pp_head); 8851 } 8852 8853 static void 8854 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8855 { 8856 mutex_enter(&vhc->vhc_lock); 8857 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8858 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8859 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8860 mutex_exit(&vhc->vhc_lock); 8861 } 8862 8863 static void 8864 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8865 { 8866 mutex_enter(&vhc->vhc_lock); 8867 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8868 cv_broadcast(&vhc->vhc_cv); 8869 mutex_exit(&vhc->vhc_lock); 8870 } 8871 8872 typedef struct mdi_phci_driver_info { 8873 char *phdriver_name; /* name of the phci driver */ 8874 8875 /* set to non zero if the phci driver supports root device */ 8876 int phdriver_root_support; 8877 } mdi_phci_driver_info_t; 8878 8879 /* 8880 * vhci class and root support capability of a phci driver can be 8881 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8882 * phci driver.conf file. The built-in tables below contain this information 8883 * for those phci drivers whose driver.conf files don't yet contain this info. 8884 * 8885 * All phci drivers expect iscsi have root device support. 8886 */ 8887 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8888 { "fp", 1 }, 8889 { "iscsi", 0 }, 8890 { "ibsrp", 1 } 8891 }; 8892 8893 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8894 8895 static void * 8896 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8897 { 8898 void *new_ptr; 8899 8900 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8901 if (old_ptr) { 8902 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8903 kmem_free(old_ptr, old_size); 8904 } 8905 return (new_ptr); 8906 } 8907 8908 static void 8909 add_to_phci_list(char ***driver_list, int **root_support_list, 8910 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8911 { 8912 ASSERT(*cur_elements <= *max_elements); 8913 if (*cur_elements == *max_elements) { 8914 *max_elements += 10; 8915 *driver_list = mdi_realloc(*driver_list, 8916 sizeof (char *) * (*cur_elements), 8917 sizeof (char *) * (*max_elements)); 8918 *root_support_list = mdi_realloc(*root_support_list, 8919 sizeof (int) * (*cur_elements), 8920 sizeof (int) * (*max_elements)); 8921 } 8922 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8923 (*root_support_list)[*cur_elements] = root_support; 8924 (*cur_elements)++; 8925 } 8926 8927 static void 8928 get_phci_driver_list(char *vhci_class, char ***driver_list, 8929 int **root_support_list, int *cur_elements, int *max_elements) 8930 { 8931 mdi_phci_driver_info_t *st_driver_list, *p; 8932 int st_ndrivers, root_support, i, j, driver_conf_count; 8933 major_t m; 8934 struct devnames *dnp; 8935 ddi_prop_t *propp; 8936 8937 *driver_list = NULL; 8938 *root_support_list = NULL; 8939 *cur_elements = 0; 8940 *max_elements = 0; 8941 8942 /* add the phci drivers derived from the phci driver.conf files */ 8943 for (m = 0; m < devcnt; m++) { 8944 dnp = &devnamesp[m]; 8945 8946 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8947 LOCK_DEV_OPS(&dnp->dn_lock); 8948 if (dnp->dn_global_prop_ptr != NULL && 8949 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8950 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8951 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8952 strcmp(propp->prop_val, vhci_class) == 0) { 8953 8954 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8955 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8956 &dnp->dn_global_prop_ptr->prop_list) 8957 == NULL) ? 1 : 0; 8958 8959 add_to_phci_list(driver_list, root_support_list, 8960 cur_elements, max_elements, dnp->dn_name, 8961 root_support); 8962 8963 UNLOCK_DEV_OPS(&dnp->dn_lock); 8964 } else 8965 UNLOCK_DEV_OPS(&dnp->dn_lock); 8966 } 8967 } 8968 8969 driver_conf_count = *cur_elements; 8970 8971 /* add the phci drivers specified in the built-in tables */ 8972 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8973 st_driver_list = scsi_phci_driver_list; 8974 st_ndrivers = sizeof (scsi_phci_driver_list) / 8975 sizeof (mdi_phci_driver_info_t); 8976 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8977 st_driver_list = ib_phci_driver_list; 8978 st_ndrivers = sizeof (ib_phci_driver_list) / 8979 sizeof (mdi_phci_driver_info_t); 8980 } else { 8981 st_driver_list = NULL; 8982 st_ndrivers = 0; 8983 } 8984 8985 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8986 /* add this phci driver if not already added before */ 8987 for (j = 0; j < driver_conf_count; j++) { 8988 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8989 break; 8990 } 8991 if (j == driver_conf_count) { 8992 add_to_phci_list(driver_list, root_support_list, 8993 cur_elements, max_elements, p->phdriver_name, 8994 p->phdriver_root_support); 8995 } 8996 } 8997 } 8998 8999 /* 9000 * Attach the phci driver instances associated with the specified vhci class. 9001 * If root is mounted attach all phci driver instances. 9002 * If root is not mounted, attach the instances of only those phci 9003 * drivers that have the root support. 9004 */ 9005 static void 9006 attach_phci_drivers(char *vhci_class) 9007 { 9008 char **driver_list, **p; 9009 int *root_support_list; 9010 int cur_elements, max_elements, i; 9011 major_t m; 9012 9013 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9014 &cur_elements, &max_elements); 9015 9016 for (i = 0; i < cur_elements; i++) { 9017 if (modrootloaded || root_support_list[i]) { 9018 m = ddi_name_to_major(driver_list[i]); 9019 if (m != DDI_MAJOR_T_NONE && 9020 ddi_hold_installed_driver(m)) 9021 ddi_rele_driver(m); 9022 } 9023 } 9024 9025 if (driver_list) { 9026 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9027 kmem_free(*p, strlen(*p) + 1); 9028 kmem_free(driver_list, sizeof (char *) * max_elements); 9029 kmem_free(root_support_list, sizeof (int) * max_elements); 9030 } 9031 } 9032 9033 /* 9034 * Build vhci cache: 9035 * 9036 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9037 * the phci driver instances. During this process the cache gets built. 9038 * 9039 * Cache is built fully if the root is mounted. 9040 * If the root is not mounted, phci drivers that do not have root support 9041 * are not attached. As a result the cache is built partially. The entries 9042 * in the cache reflect only those phci drivers that have root support. 9043 */ 9044 static int 9045 build_vhci_cache(mdi_vhci_t *vh) 9046 { 9047 mdi_vhci_config_t *vhc = vh->vh_config; 9048 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9049 9050 single_threaded_vhconfig_enter(vhc); 9051 9052 rw_enter(&vhcache->vhcache_lock, RW_READER); 9053 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9054 rw_exit(&vhcache->vhcache_lock); 9055 single_threaded_vhconfig_exit(vhc); 9056 return (0); 9057 } 9058 rw_exit(&vhcache->vhcache_lock); 9059 9060 attach_phci_drivers(vh->vh_class); 9061 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9062 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9063 9064 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9065 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9066 rw_exit(&vhcache->vhcache_lock); 9067 9068 single_threaded_vhconfig_exit(vhc); 9069 vhcache_dirty(vhc); 9070 return (1); 9071 } 9072 9073 /* 9074 * Determine if discovery of paths is needed. 9075 */ 9076 static int 9077 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9078 { 9079 int rv = 1; 9080 9081 mutex_enter(&vhc->vhc_lock); 9082 if (i_ddi_io_initialized() == 0) { 9083 if (vhc->vhc_path_discovery_boot > 0) { 9084 vhc->vhc_path_discovery_boot--; 9085 goto out; 9086 } 9087 } else { 9088 if (vhc->vhc_path_discovery_postboot > 0) { 9089 vhc->vhc_path_discovery_postboot--; 9090 goto out; 9091 } 9092 } 9093 9094 /* 9095 * Do full path discovery at most once per mdi_path_discovery_interval. 9096 * This is to avoid a series of full path discoveries when opening 9097 * stale /dev/[r]dsk links. 9098 */ 9099 if (mdi_path_discovery_interval != -1 && 9100 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9101 goto out; 9102 9103 rv = 0; 9104 out: 9105 mutex_exit(&vhc->vhc_lock); 9106 return (rv); 9107 } 9108 9109 /* 9110 * Discover all paths: 9111 * 9112 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9113 * driver instances. During this process all paths will be discovered. 9114 */ 9115 static int 9116 vhcache_discover_paths(mdi_vhci_t *vh) 9117 { 9118 mdi_vhci_config_t *vhc = vh->vh_config; 9119 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9120 int rv = 0; 9121 9122 single_threaded_vhconfig_enter(vhc); 9123 9124 if (vhcache_do_discovery(vhc)) { 9125 attach_phci_drivers(vh->vh_class); 9126 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9127 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9128 9129 mutex_enter(&vhc->vhc_lock); 9130 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9131 mdi_path_discovery_interval * TICKS_PER_SECOND; 9132 mutex_exit(&vhc->vhc_lock); 9133 rv = 1; 9134 } 9135 9136 single_threaded_vhconfig_exit(vhc); 9137 return (rv); 9138 } 9139 9140 /* 9141 * Generic vhci bus config implementation: 9142 * 9143 * Parameters 9144 * vdip vhci dip 9145 * flags bus config flags 9146 * op bus config operation 9147 * The remaining parameters are bus config operation specific 9148 * 9149 * for BUS_CONFIG_ONE 9150 * arg pointer to name@addr 9151 * child upon successful return from this function, *child will be 9152 * set to the configured and held devinfo child node of vdip. 9153 * ct_addr pointer to client address (i.e. GUID) 9154 * 9155 * for BUS_CONFIG_DRIVER 9156 * arg major number of the driver 9157 * child and ct_addr parameters are ignored 9158 * 9159 * for BUS_CONFIG_ALL 9160 * arg, child, and ct_addr parameters are ignored 9161 * 9162 * Note that for the rest of the bus config operations, this function simply 9163 * calls the framework provided default bus config routine. 9164 */ 9165 int 9166 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9167 void *arg, dev_info_t **child, char *ct_addr) 9168 { 9169 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9170 mdi_vhci_config_t *vhc = vh->vh_config; 9171 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9172 int rv = 0; 9173 int params_valid = 0; 9174 char *cp; 9175 9176 /* 9177 * To bus config vhcis we relay operation, possibly using another 9178 * thread, to phcis. The phci driver then interacts with MDI to cause 9179 * vhci child nodes to be enumerated under the vhci node. Adding a 9180 * vhci child requires an ndi_devi_enter of the vhci. Since another 9181 * thread may be adding the child, to avoid deadlock we can't wait 9182 * for the relayed operations to complete if we have already entered 9183 * the vhci node. 9184 */ 9185 if (DEVI_BUSY_OWNED(vdip)) { 9186 MDI_DEBUG(2, (MDI_NOTE, vdip, 9187 "vhci dip is busy owned %p", (void *)vdip)); 9188 goto default_bus_config; 9189 } 9190 9191 rw_enter(&vhcache->vhcache_lock, RW_READER); 9192 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9193 rw_exit(&vhcache->vhcache_lock); 9194 rv = build_vhci_cache(vh); 9195 rw_enter(&vhcache->vhcache_lock, RW_READER); 9196 } 9197 9198 switch (op) { 9199 case BUS_CONFIG_ONE: 9200 if (arg != NULL && ct_addr != NULL) { 9201 /* extract node name */ 9202 cp = (char *)arg; 9203 while (*cp != '\0' && *cp != '@') 9204 cp++; 9205 if (*cp == '@') { 9206 params_valid = 1; 9207 *cp = '\0'; 9208 config_client_paths(vhc, (char *)arg, ct_addr); 9209 /* config_client_paths() releases cache_lock */ 9210 *cp = '@'; 9211 break; 9212 } 9213 } 9214 9215 rw_exit(&vhcache->vhcache_lock); 9216 break; 9217 9218 case BUS_CONFIG_DRIVER: 9219 rw_exit(&vhcache->vhcache_lock); 9220 if (rv == 0) 9221 st_bus_config_all_phcis(vhc, flags, op, 9222 (major_t)(uintptr_t)arg); 9223 break; 9224 9225 case BUS_CONFIG_ALL: 9226 rw_exit(&vhcache->vhcache_lock); 9227 if (rv == 0) 9228 st_bus_config_all_phcis(vhc, flags, op, -1); 9229 break; 9230 9231 default: 9232 rw_exit(&vhcache->vhcache_lock); 9233 break; 9234 } 9235 9236 9237 default_bus_config: 9238 /* 9239 * All requested child nodes are enumerated under the vhci. 9240 * Now configure them. 9241 */ 9242 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9243 NDI_SUCCESS) { 9244 return (MDI_SUCCESS); 9245 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9246 /* discover all paths and try configuring again */ 9247 if (vhcache_discover_paths(vh) && 9248 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9249 NDI_SUCCESS) 9250 return (MDI_SUCCESS); 9251 } 9252 9253 return (MDI_FAILURE); 9254 } 9255 9256 /* 9257 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9258 */ 9259 static nvlist_t * 9260 read_on_disk_vhci_cache(char *vhci_class) 9261 { 9262 nvlist_t *nvl; 9263 int err; 9264 char *filename; 9265 9266 filename = vhclass2vhcache_filename(vhci_class); 9267 9268 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9269 kmem_free(filename, strlen(filename) + 1); 9270 return (nvl); 9271 } else if (err == EIO) 9272 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9273 else if (err == EINVAL) 9274 cmn_err(CE_WARN, 9275 "%s: data file corrupted, will recreate", filename); 9276 9277 kmem_free(filename, strlen(filename) + 1); 9278 return (NULL); 9279 } 9280 9281 /* 9282 * Read on-disk vhci cache into nvlists for all vhci classes. 9283 * Called during booting by i_ddi_read_devices_files(). 9284 */ 9285 void 9286 mdi_read_devices_files(void) 9287 { 9288 int i; 9289 9290 for (i = 0; i < N_VHCI_CLASSES; i++) 9291 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9292 } 9293 9294 /* 9295 * Remove all stale entries from vhci cache. 9296 */ 9297 static void 9298 clean_vhcache(mdi_vhci_config_t *vhc) 9299 { 9300 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9301 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9302 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9303 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9304 9305 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9306 9307 cct_head = vhcache->vhcache_client_head; 9308 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9309 for (cct = cct_head; cct != NULL; cct = cct_next) { 9310 cct_next = cct->cct_next; 9311 9312 cpi_head = cct->cct_cpi_head; 9313 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9314 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9315 cpi_next = cpi->cpi_next; 9316 if (cpi->cpi_pip != NULL) { 9317 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9318 enqueue_tail_vhcache_pathinfo(cct, cpi); 9319 } else 9320 free_vhcache_pathinfo(cpi); 9321 } 9322 9323 if (cct->cct_cpi_head != NULL) 9324 enqueue_vhcache_client(vhcache, cct); 9325 else { 9326 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9327 (mod_hash_key_t)cct->cct_name_addr); 9328 free_vhcache_client(cct); 9329 } 9330 } 9331 9332 cphci_head = vhcache->vhcache_phci_head; 9333 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9334 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9335 cphci_next = cphci->cphci_next; 9336 if (cphci->cphci_phci != NULL) 9337 enqueue_vhcache_phci(vhcache, cphci); 9338 else 9339 free_vhcache_phci(cphci); 9340 } 9341 9342 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9343 rw_exit(&vhcache->vhcache_lock); 9344 vhcache_dirty(vhc); 9345 } 9346 9347 /* 9348 * Remove all stale entries from vhci cache. 9349 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9350 */ 9351 void 9352 mdi_clean_vhcache(void) 9353 { 9354 mdi_vhci_t *vh; 9355 9356 mutex_enter(&mdi_mutex); 9357 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9358 vh->vh_refcnt++; 9359 mutex_exit(&mdi_mutex); 9360 clean_vhcache(vh->vh_config); 9361 mutex_enter(&mdi_mutex); 9362 vh->vh_refcnt--; 9363 } 9364 mutex_exit(&mdi_mutex); 9365 } 9366 9367 /* 9368 * mdi_vhci_walk_clients(): 9369 * Walker routine to traverse client dev_info nodes 9370 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9371 * below the client, including nexus devices, which we dont want. 9372 * So we just traverse the immediate siblings, starting from 1st client. 9373 */ 9374 void 9375 mdi_vhci_walk_clients(dev_info_t *vdip, 9376 int (*f)(dev_info_t *, void *), void *arg) 9377 { 9378 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9379 dev_info_t *cdip; 9380 mdi_client_t *ct; 9381 9382 MDI_VHCI_CLIENT_LOCK(vh); 9383 cdip = ddi_get_child(vdip); 9384 while (cdip) { 9385 ct = i_devi_get_client(cdip); 9386 MDI_CLIENT_LOCK(ct); 9387 9388 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9389 cdip = ddi_get_next_sibling(cdip); 9390 else 9391 cdip = NULL; 9392 9393 MDI_CLIENT_UNLOCK(ct); 9394 } 9395 MDI_VHCI_CLIENT_UNLOCK(vh); 9396 } 9397 9398 /* 9399 * mdi_vhci_walk_phcis(): 9400 * Walker routine to traverse phci dev_info nodes 9401 */ 9402 void 9403 mdi_vhci_walk_phcis(dev_info_t *vdip, 9404 int (*f)(dev_info_t *, void *), void *arg) 9405 { 9406 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9407 mdi_phci_t *ph, *next; 9408 9409 MDI_VHCI_PHCI_LOCK(vh); 9410 ph = vh->vh_phci_head; 9411 while (ph) { 9412 MDI_PHCI_LOCK(ph); 9413 9414 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9415 next = ph->ph_next; 9416 else 9417 next = NULL; 9418 9419 MDI_PHCI_UNLOCK(ph); 9420 ph = next; 9421 } 9422 MDI_VHCI_PHCI_UNLOCK(vh); 9423 } 9424 9425 9426 /* 9427 * mdi_walk_vhcis(): 9428 * Walker routine to traverse vhci dev_info nodes 9429 */ 9430 void 9431 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9432 { 9433 mdi_vhci_t *vh = NULL; 9434 9435 mutex_enter(&mdi_mutex); 9436 /* 9437 * Scan for already registered vhci 9438 */ 9439 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9440 vh->vh_refcnt++; 9441 mutex_exit(&mdi_mutex); 9442 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9443 mutex_enter(&mdi_mutex); 9444 vh->vh_refcnt--; 9445 break; 9446 } else { 9447 mutex_enter(&mdi_mutex); 9448 vh->vh_refcnt--; 9449 } 9450 } 9451 9452 mutex_exit(&mdi_mutex); 9453 } 9454 9455 /* 9456 * i_mdi_log_sysevent(): 9457 * Logs events for pickup by syseventd 9458 */ 9459 static void 9460 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9461 { 9462 char *path_name; 9463 nvlist_t *attr_list; 9464 9465 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9466 KM_SLEEP) != DDI_SUCCESS) { 9467 goto alloc_failed; 9468 } 9469 9470 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9471 (void) ddi_pathname(dip, path_name); 9472 9473 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9474 ddi_driver_name(dip)) != DDI_SUCCESS) { 9475 goto error; 9476 } 9477 9478 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9479 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9480 goto error; 9481 } 9482 9483 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9484 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9485 goto error; 9486 } 9487 9488 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9489 path_name) != DDI_SUCCESS) { 9490 goto error; 9491 } 9492 9493 if (nvlist_add_string(attr_list, DDI_CLASS, 9494 ph_vh_class) != DDI_SUCCESS) { 9495 goto error; 9496 } 9497 9498 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9499 attr_list, NULL, DDI_SLEEP); 9500 9501 error: 9502 kmem_free(path_name, MAXPATHLEN); 9503 nvlist_free(attr_list); 9504 return; 9505 9506 alloc_failed: 9507 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9508 } 9509 9510 char ** 9511 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9512 { 9513 char **driver_list, **ret_driver_list = NULL; 9514 int *root_support_list; 9515 int cur_elements, max_elements; 9516 9517 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9518 &cur_elements, &max_elements); 9519 9520 9521 if (driver_list) { 9522 kmem_free(root_support_list, sizeof (int) * max_elements); 9523 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9524 * max_elements, sizeof (char *) * cur_elements); 9525 } 9526 *ndrivers = cur_elements; 9527 9528 return (ret_driver_list); 9529 9530 } 9531 9532 void 9533 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9534 { 9535 char **p; 9536 int i; 9537 9538 if (driver_list) { 9539 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9540 kmem_free(*p, strlen(*p) + 1); 9541 kmem_free(driver_list, sizeof (char *) * ndrivers); 9542 } 9543 } 9544 9545 /* 9546 * mdi_is_dev_supported(): 9547 * function called by pHCI bus config operation to determine if a 9548 * device should be represented as a child of the vHCI or the 9549 * pHCI. This decision is made by the vHCI, using cinfo idenity 9550 * information passed by the pHCI - specifics of the cinfo 9551 * representation are by agreement between the pHCI and vHCI. 9552 * Return Values: 9553 * MDI_SUCCESS 9554 * MDI_FAILURE 9555 */ 9556 int 9557 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9558 { 9559 mdi_vhci_t *vh; 9560 9561 ASSERT(class && pdip); 9562 9563 /* 9564 * For dev_supported, mdi_phci_register() must have established pdip as 9565 * a pHCI. 9566 * 9567 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9568 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9569 */ 9570 if (!MDI_PHCI(pdip)) 9571 return (MDI_FAILURE); 9572 9573 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9574 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9575 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9576 return (MDI_FAILURE); 9577 } 9578 9579 /* Return vHCI answer */ 9580 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9581 } 9582 9583 int 9584 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9585 { 9586 uint_t devstate = 0; 9587 dev_info_t *cdip; 9588 9589 if ((pip == NULL) || (dcp == NULL)) 9590 return (MDI_FAILURE); 9591 9592 cdip = mdi_pi_get_client(pip); 9593 9594 switch (mdi_pi_get_state(pip)) { 9595 case MDI_PATHINFO_STATE_INIT: 9596 devstate = DEVICE_DOWN; 9597 break; 9598 case MDI_PATHINFO_STATE_ONLINE: 9599 devstate = DEVICE_ONLINE; 9600 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9601 devstate |= DEVICE_BUSY; 9602 break; 9603 case MDI_PATHINFO_STATE_STANDBY: 9604 devstate = DEVICE_ONLINE; 9605 break; 9606 case MDI_PATHINFO_STATE_FAULT: 9607 devstate = DEVICE_DOWN; 9608 break; 9609 case MDI_PATHINFO_STATE_OFFLINE: 9610 devstate = DEVICE_OFFLINE; 9611 break; 9612 default: 9613 ASSERT(MDI_PI(pip)->pi_state); 9614 } 9615 9616 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9617 return (MDI_FAILURE); 9618 9619 return (MDI_SUCCESS); 9620 } 9621