1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * The "path" to a pathinfo node is identical to the /devices path to a 160 * devinfo node had the device been enumerated under a pHCI instead of 161 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 162 * This association persists across create/delete of the pathinfo nodes, 163 * but not across reboot. 164 */ 165 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 166 static int mdi_pathmap_hash_size = 256; 167 static kmutex_t mdi_pathmap_mutex; 168 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 169 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 170 171 /* 172 * MDI component property name/value string definitions 173 */ 174 const char *mdi_component_prop = "mpxio-component"; 175 const char *mdi_component_prop_vhci = "vhci"; 176 const char *mdi_component_prop_phci = "phci"; 177 const char *mdi_component_prop_client = "client"; 178 179 /* 180 * MDI client global unique identifier property name 181 */ 182 const char *mdi_client_guid_prop = "client-guid"; 183 184 /* 185 * MDI client load balancing property name/value string definitions 186 */ 187 const char *mdi_load_balance = "load-balance"; 188 const char *mdi_load_balance_none = "none"; 189 const char *mdi_load_balance_rr = "round-robin"; 190 const char *mdi_load_balance_lba = "logical-block"; 191 192 /* 193 * Obsolete vHCI class definition; to be removed after Leadville update 194 */ 195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 196 197 static char vhci_greeting[] = 198 "\tThere already exists one vHCI driver for class %s\n" 199 "\tOnly one vHCI driver for each class is allowed\n"; 200 201 /* 202 * Static function prototypes 203 */ 204 static int i_mdi_phci_offline(dev_info_t *, uint_t); 205 static int i_mdi_client_offline(dev_info_t *, uint_t); 206 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 207 static void i_mdi_phci_post_detach(dev_info_t *, 208 ddi_detach_cmd_t, int); 209 static int i_mdi_client_pre_detach(dev_info_t *, 210 ddi_detach_cmd_t); 211 static void i_mdi_client_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 214 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 215 static int i_mdi_lba_lb(mdi_client_t *ct, 216 mdi_pathinfo_t **ret_pip, struct buf *buf); 217 static void i_mdi_pm_hold_client(mdi_client_t *, int); 218 static void i_mdi_pm_rele_client(mdi_client_t *, int); 219 static void i_mdi_pm_reset_client(mdi_client_t *); 220 static int i_mdi_power_all_phci(mdi_client_t *); 221 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 222 223 224 /* 225 * Internal mdi_pathinfo node functions 226 */ 227 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 228 229 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 230 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 231 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 232 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 233 static void i_mdi_phci_unlock(mdi_phci_t *); 234 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 235 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 236 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 237 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 238 mdi_client_t *); 239 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_remove_path(mdi_client_t *, 241 mdi_pathinfo_t *); 242 243 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 244 mdi_pathinfo_state_t, int); 245 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 246 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 247 char **, int); 248 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 249 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 250 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 251 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 252 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 253 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 254 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 255 static void i_mdi_client_update_state(mdi_client_t *); 256 static int i_mdi_client_compute_state(mdi_client_t *, 257 mdi_phci_t *); 258 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 259 static void i_mdi_client_unlock(mdi_client_t *); 260 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 261 static mdi_client_t *i_devi_get_client(dev_info_t *); 262 /* 263 * NOTE: this will be removed once the NWS files are changed to use the new 264 * mdi_{enable,disable}_path interfaces 265 */ 266 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 267 int, int); 268 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 269 mdi_vhci_t *vh, int flags, int op); 270 /* 271 * Failover related function prototypes 272 */ 273 static int i_mdi_failover(void *); 274 275 /* 276 * misc internal functions 277 */ 278 static int i_mdi_get_hash_key(char *); 279 static int i_map_nvlist_error_to_mdi(int); 280 static void i_mdi_report_path_state(mdi_client_t *, 281 mdi_pathinfo_t *); 282 283 static void setup_vhci_cache(mdi_vhci_t *); 284 static int destroy_vhci_cache(mdi_vhci_t *); 285 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 286 static boolean_t stop_vhcache_flush_thread(void *, int); 287 static void free_string_array(char **, int); 288 static void free_vhcache_phci(mdi_vhcache_phci_t *); 289 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 290 static void free_vhcache_client(mdi_vhcache_client_t *); 291 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 292 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 293 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 294 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_pi_add(mdi_vhci_config_t *, 296 struct mdi_pathinfo *); 297 static void vhcache_pi_remove(mdi_vhci_config_t *, 298 struct mdi_pathinfo *); 299 static void free_phclient_path_list(mdi_phys_path_t *); 300 static void sort_vhcache_paths(mdi_vhcache_client_t *); 301 static int flush_vhcache(mdi_vhci_config_t *, int); 302 static void vhcache_dirty(mdi_vhci_config_t *); 303 static void free_async_client_config(mdi_async_client_config_t *); 304 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 305 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 306 static nvlist_t *read_on_disk_vhci_cache(char *); 307 extern int fread_nvlist(char *, nvlist_t **); 308 extern int fwrite_nvlist(char *, nvlist_t *); 309 310 /* called once when first vhci registers with mdi */ 311 static void 312 i_mdi_init() 313 { 314 static int initialized = 0; 315 316 if (initialized) 317 return; 318 initialized = 1; 319 320 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 321 322 /* Create our taskq resources */ 323 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 324 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 325 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 326 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 327 328 /* Allocate ['path_instance' <-> "path"] maps */ 329 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 330 mdi_pathmap_bypath = mod_hash_create_strhash( 331 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 332 mod_hash_null_valdtor); 333 mdi_pathmap_byinstance = mod_hash_create_idhash( 334 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 335 mod_hash_null_valdtor); 336 } 337 338 /* 339 * mdi_get_component_type(): 340 * Return mpxio component type 341 * Return Values: 342 * MDI_COMPONENT_NONE 343 * MDI_COMPONENT_VHCI 344 * MDI_COMPONENT_PHCI 345 * MDI_COMPONENT_CLIENT 346 * XXX This doesn't work under multi-level MPxIO and should be 347 * removed when clients migrate mdi_component_is_*() interfaces. 348 */ 349 int 350 mdi_get_component_type(dev_info_t *dip) 351 { 352 return (DEVI(dip)->devi_mdi_component); 353 } 354 355 /* 356 * mdi_vhci_register(): 357 * Register a vHCI module with the mpxio framework 358 * mdi_vhci_register() is called by vHCI drivers to register the 359 * 'class_driver' vHCI driver and its MDI entrypoints with the 360 * mpxio framework. The vHCI driver must call this interface as 361 * part of its attach(9e) handler. 362 * Competing threads may try to attach mdi_vhci_register() as 363 * the vHCI drivers are loaded and attached as a result of pHCI 364 * driver instance registration (mdi_phci_register()) with the 365 * framework. 366 * Return Values: 367 * MDI_SUCCESS 368 * MDI_FAILURE 369 */ 370 /*ARGSUSED*/ 371 int 372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 373 int flags) 374 { 375 mdi_vhci_t *vh = NULL; 376 377 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 378 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 379 380 i_mdi_init(); 381 382 mutex_enter(&mdi_mutex); 383 /* 384 * Scan for already registered vhci 385 */ 386 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 387 if (strcmp(vh->vh_class, class) == 0) { 388 /* 389 * vHCI has already been created. Check for valid 390 * vHCI ops registration. We only support one vHCI 391 * module per class 392 */ 393 if (vh->vh_ops != NULL) { 394 mutex_exit(&mdi_mutex); 395 cmn_err(CE_NOTE, vhci_greeting, class); 396 return (MDI_FAILURE); 397 } 398 break; 399 } 400 } 401 402 /* 403 * if not yet created, create the vHCI component 404 */ 405 if (vh == NULL) { 406 struct client_hash *hash = NULL; 407 char *load_balance; 408 409 /* 410 * Allocate and initialize the mdi extensions 411 */ 412 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 413 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 414 KM_SLEEP); 415 vh->vh_client_table = hash; 416 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 417 (void) strcpy(vh->vh_class, class); 418 vh->vh_lb = LOAD_BALANCE_RR; 419 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 420 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 421 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 422 vh->vh_lb = LOAD_BALANCE_NONE; 423 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 424 == 0) { 425 vh->vh_lb = LOAD_BALANCE_LBA; 426 } 427 ddi_prop_free(load_balance); 428 } 429 430 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 431 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 432 433 /* 434 * Store the vHCI ops vectors 435 */ 436 vh->vh_dip = vdip; 437 vh->vh_ops = vops; 438 439 setup_vhci_cache(vh); 440 441 if (mdi_vhci_head == NULL) { 442 mdi_vhci_head = vh; 443 } 444 if (mdi_vhci_tail) { 445 mdi_vhci_tail->vh_next = vh; 446 } 447 mdi_vhci_tail = vh; 448 mdi_vhci_count++; 449 } 450 451 /* 452 * Claim the devfs node as a vhci component 453 */ 454 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 455 456 /* 457 * Initialize our back reference from dev_info node 458 */ 459 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 460 mutex_exit(&mdi_mutex); 461 return (MDI_SUCCESS); 462 } 463 464 /* 465 * mdi_vhci_unregister(): 466 * Unregister a vHCI module from mpxio framework 467 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 468 * of a vhci to unregister it from the framework. 469 * Return Values: 470 * MDI_SUCCESS 471 * MDI_FAILURE 472 */ 473 /*ARGSUSED*/ 474 int 475 mdi_vhci_unregister(dev_info_t *vdip, int flags) 476 { 477 mdi_vhci_t *found, *vh, *prev = NULL; 478 479 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 480 481 /* 482 * Check for invalid VHCI 483 */ 484 if ((vh = i_devi_get_vhci(vdip)) == NULL) 485 return (MDI_FAILURE); 486 487 /* 488 * Scan the list of registered vHCIs for a match 489 */ 490 mutex_enter(&mdi_mutex); 491 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 492 if (found == vh) 493 break; 494 prev = found; 495 } 496 497 if (found == NULL) { 498 mutex_exit(&mdi_mutex); 499 return (MDI_FAILURE); 500 } 501 502 /* 503 * Check the vHCI, pHCI and client count. All the pHCIs and clients 504 * should have been unregistered, before a vHCI can be 505 * unregistered. 506 */ 507 MDI_VHCI_PHCI_LOCK(vh); 508 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 509 MDI_VHCI_PHCI_UNLOCK(vh); 510 mutex_exit(&mdi_mutex); 511 return (MDI_FAILURE); 512 } 513 MDI_VHCI_PHCI_UNLOCK(vh); 514 515 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 516 mutex_exit(&mdi_mutex); 517 return (MDI_FAILURE); 518 } 519 520 /* 521 * Remove the vHCI from the global list 522 */ 523 if (vh == mdi_vhci_head) { 524 mdi_vhci_head = vh->vh_next; 525 } else { 526 prev->vh_next = vh->vh_next; 527 } 528 if (vh == mdi_vhci_tail) { 529 mdi_vhci_tail = prev; 530 } 531 mdi_vhci_count--; 532 mutex_exit(&mdi_mutex); 533 534 vh->vh_ops = NULL; 535 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 536 DEVI(vdip)->devi_mdi_xhci = NULL; 537 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 538 kmem_free(vh->vh_client_table, 539 mdi_client_table_size * sizeof (struct client_hash)); 540 mutex_destroy(&vh->vh_phci_mutex); 541 mutex_destroy(&vh->vh_client_mutex); 542 543 kmem_free(vh, sizeof (mdi_vhci_t)); 544 return (MDI_SUCCESS); 545 } 546 547 /* 548 * i_mdi_vhci_class2vhci(): 549 * Look for a matching vHCI module given a vHCI class name 550 * Return Values: 551 * Handle to a vHCI component 552 * NULL 553 */ 554 static mdi_vhci_t * 555 i_mdi_vhci_class2vhci(char *class) 556 { 557 mdi_vhci_t *vh = NULL; 558 559 ASSERT(!MUTEX_HELD(&mdi_mutex)); 560 561 mutex_enter(&mdi_mutex); 562 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 563 if (strcmp(vh->vh_class, class) == 0) { 564 break; 565 } 566 } 567 mutex_exit(&mdi_mutex); 568 return (vh); 569 } 570 571 /* 572 * i_devi_get_vhci(): 573 * Utility function to get the handle to a vHCI component 574 * Return Values: 575 * Handle to a vHCI component 576 * NULL 577 */ 578 mdi_vhci_t * 579 i_devi_get_vhci(dev_info_t *vdip) 580 { 581 mdi_vhci_t *vh = NULL; 582 if (MDI_VHCI(vdip)) { 583 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 584 } 585 return (vh); 586 } 587 588 /* 589 * mdi_phci_register(): 590 * Register a pHCI module with mpxio framework 591 * mdi_phci_register() is called by pHCI drivers to register with 592 * the mpxio framework and a specific 'class_driver' vHCI. The 593 * pHCI driver must call this interface as part of its attach(9e) 594 * handler. 595 * Return Values: 596 * MDI_SUCCESS 597 * MDI_FAILURE 598 */ 599 /*ARGSUSED*/ 600 int 601 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 602 { 603 mdi_phci_t *ph; 604 mdi_vhci_t *vh; 605 char *data; 606 char *pathname; 607 608 /* 609 * Some subsystems, like fcp, perform pHCI registration from a 610 * different thread than the one doing the pHCI attach(9E) - the 611 * driver attach code is waiting for this other thread to complete. 612 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 613 * (indicating that some thread has done an ndi_devi_enter of parent) 614 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 615 */ 616 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 617 618 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 619 (void) ddi_pathname(pdip, pathname); 620 621 /* 622 * Check for mpxio-disable property. Enable mpxio if the property is 623 * missing or not set to "yes". 624 * If the property is set to "yes" then emit a brief message. 625 */ 626 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 627 &data) == DDI_SUCCESS)) { 628 if (strcmp(data, "yes") == 0) { 629 MDI_DEBUG(1, (CE_CONT, pdip, 630 "?%s (%s%d) multipath capabilities " 631 "disabled via %s.conf.\n", pathname, 632 ddi_driver_name(pdip), ddi_get_instance(pdip), 633 ddi_driver_name(pdip))); 634 ddi_prop_free(data); 635 kmem_free(pathname, MAXPATHLEN); 636 return (MDI_FAILURE); 637 } 638 ddi_prop_free(data); 639 } 640 641 kmem_free(pathname, MAXPATHLEN); 642 643 /* 644 * Search for a matching vHCI 645 */ 646 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 647 if (vh == NULL) { 648 return (MDI_FAILURE); 649 } 650 651 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 652 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 653 ph->ph_dip = pdip; 654 ph->ph_vhci = vh; 655 ph->ph_next = NULL; 656 ph->ph_unstable = 0; 657 ph->ph_vprivate = 0; 658 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 659 660 MDI_PHCI_LOCK(ph); 661 MDI_PHCI_SET_POWER_UP(ph); 662 MDI_PHCI_UNLOCK(ph); 663 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 664 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 665 666 vhcache_phci_add(vh->vh_config, ph); 667 668 MDI_VHCI_PHCI_LOCK(vh); 669 if (vh->vh_phci_head == NULL) { 670 vh->vh_phci_head = ph; 671 } 672 if (vh->vh_phci_tail) { 673 vh->vh_phci_tail->ph_next = ph; 674 } 675 vh->vh_phci_tail = ph; 676 vh->vh_phci_count++; 677 MDI_VHCI_PHCI_UNLOCK(vh); 678 679 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 680 return (MDI_SUCCESS); 681 } 682 683 /* 684 * mdi_phci_unregister(): 685 * Unregister a pHCI module from mpxio framework 686 * mdi_phci_unregister() is called by the pHCI drivers from their 687 * detach(9E) handler to unregister their instances from the 688 * framework. 689 * Return Values: 690 * MDI_SUCCESS 691 * MDI_FAILURE 692 */ 693 /*ARGSUSED*/ 694 int 695 mdi_phci_unregister(dev_info_t *pdip, int flags) 696 { 697 mdi_vhci_t *vh; 698 mdi_phci_t *ph; 699 mdi_phci_t *tmp; 700 mdi_phci_t *prev = NULL; 701 702 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 703 704 ph = i_devi_get_phci(pdip); 705 if (ph == NULL) { 706 MDI_DEBUG(1, (CE_WARN, pdip, 707 "!pHCI unregister: Not a valid pHCI")); 708 return (MDI_FAILURE); 709 } 710 711 vh = ph->ph_vhci; 712 ASSERT(vh != NULL); 713 if (vh == NULL) { 714 MDI_DEBUG(1, (CE_WARN, pdip, 715 "!pHCI unregister: Not a valid vHCI")); 716 return (MDI_FAILURE); 717 } 718 719 MDI_VHCI_PHCI_LOCK(vh); 720 tmp = vh->vh_phci_head; 721 while (tmp) { 722 if (tmp == ph) { 723 break; 724 } 725 prev = tmp; 726 tmp = tmp->ph_next; 727 } 728 729 if (ph == vh->vh_phci_head) { 730 vh->vh_phci_head = ph->ph_next; 731 } else { 732 prev->ph_next = ph->ph_next; 733 } 734 735 if (ph == vh->vh_phci_tail) { 736 vh->vh_phci_tail = prev; 737 } 738 739 vh->vh_phci_count--; 740 MDI_VHCI_PHCI_UNLOCK(vh); 741 742 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 743 ESC_DDI_INITIATOR_UNREGISTER); 744 vhcache_phci_remove(vh->vh_config, ph); 745 cv_destroy(&ph->ph_unstable_cv); 746 mutex_destroy(&ph->ph_mutex); 747 kmem_free(ph, sizeof (mdi_phci_t)); 748 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 749 DEVI(pdip)->devi_mdi_xhci = NULL; 750 return (MDI_SUCCESS); 751 } 752 753 /* 754 * i_devi_get_phci(): 755 * Utility function to return the phci extensions. 756 */ 757 static mdi_phci_t * 758 i_devi_get_phci(dev_info_t *pdip) 759 { 760 mdi_phci_t *ph = NULL; 761 if (MDI_PHCI(pdip)) { 762 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 763 } 764 return (ph); 765 } 766 767 /* 768 * Single thread mdi entry into devinfo node for modifying its children. 769 * If necessary we perform an ndi_devi_enter of the vHCI before doing 770 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 771 * for the vHCI and one for the pHCI. 772 */ 773 void 774 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 775 { 776 dev_info_t *vdip; 777 int vcircular, pcircular; 778 779 /* Verify calling context */ 780 ASSERT(MDI_PHCI(phci_dip)); 781 vdip = mdi_devi_get_vdip(phci_dip); 782 ASSERT(vdip); /* A pHCI always has a vHCI */ 783 784 /* 785 * If pHCI is detaching then the framework has already entered the 786 * vHCI on a threads that went down the code path leading to 787 * detach_node(). This framework enter of the vHCI during pHCI 788 * detach is done to avoid deadlock with vHCI power management 789 * operations which enter the vHCI and the enter down the path 790 * to the pHCI. If pHCI is detaching then we piggyback this calls 791 * enter of the vHCI on frameworks vHCI enter that has already 792 * occurred - this is OK because we know that the framework thread 793 * doing detach is waiting for our completion. 794 * 795 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 796 * race with detach - but we can't do that because the framework has 797 * already entered the parent, so we have some complexity instead. 798 */ 799 for (;;) { 800 if (ndi_devi_tryenter(vdip, &vcircular)) { 801 ASSERT(vcircular != -1); 802 if (DEVI_IS_DETACHING(phci_dip)) { 803 ndi_devi_exit(vdip, vcircular); 804 vcircular = -1; 805 } 806 break; 807 } else if (DEVI_IS_DETACHING(phci_dip)) { 808 vcircular = -1; 809 break; 810 } else { 811 delay(1); 812 } 813 } 814 815 ndi_devi_enter(phci_dip, &pcircular); 816 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 817 } 818 819 /* 820 * Release mdi_devi_enter or successful mdi_devi_tryenter. 821 */ 822 void 823 mdi_devi_exit(dev_info_t *phci_dip, int circular) 824 { 825 dev_info_t *vdip; 826 int vcircular, pcircular; 827 828 /* Verify calling context */ 829 ASSERT(MDI_PHCI(phci_dip)); 830 vdip = mdi_devi_get_vdip(phci_dip); 831 ASSERT(vdip); /* A pHCI always has a vHCI */ 832 833 /* extract two circular recursion values from single int */ 834 pcircular = (short)(circular & 0xFFFF); 835 vcircular = (short)((circular >> 16) & 0xFFFF); 836 837 ndi_devi_exit(phci_dip, pcircular); 838 if (vcircular != -1) 839 ndi_devi_exit(vdip, vcircular); 840 } 841 842 /* 843 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 844 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 845 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 846 * with vHCI power management code during path online/offline. Each 847 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 848 * occur within the scope of an active mdi_devi_enter that establishes the 849 * circular value. 850 */ 851 void 852 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 853 { 854 int pcircular; 855 856 /* Verify calling context */ 857 ASSERT(MDI_PHCI(phci_dip)); 858 859 pcircular = (short)(circular & 0xFFFF); 860 ndi_devi_exit(phci_dip, pcircular); 861 } 862 863 void 864 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 865 { 866 int pcircular; 867 868 /* Verify calling context */ 869 ASSERT(MDI_PHCI(phci_dip)); 870 871 ndi_devi_enter(phci_dip, &pcircular); 872 873 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 874 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 875 } 876 877 /* 878 * mdi_devi_get_vdip(): 879 * given a pHCI dip return vHCI dip 880 */ 881 dev_info_t * 882 mdi_devi_get_vdip(dev_info_t *pdip) 883 { 884 mdi_phci_t *ph; 885 886 ph = i_devi_get_phci(pdip); 887 if (ph && ph->ph_vhci) 888 return (ph->ph_vhci->vh_dip); 889 return (NULL); 890 } 891 892 /* 893 * mdi_devi_pdip_entered(): 894 * Return 1 if we are vHCI and have done an ndi_devi_enter 895 * of a pHCI 896 */ 897 int 898 mdi_devi_pdip_entered(dev_info_t *vdip) 899 { 900 mdi_vhci_t *vh; 901 mdi_phci_t *ph; 902 903 vh = i_devi_get_vhci(vdip); 904 if (vh == NULL) 905 return (0); 906 907 MDI_VHCI_PHCI_LOCK(vh); 908 ph = vh->vh_phci_head; 909 while (ph) { 910 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 911 MDI_VHCI_PHCI_UNLOCK(vh); 912 return (1); 913 } 914 ph = ph->ph_next; 915 } 916 MDI_VHCI_PHCI_UNLOCK(vh); 917 return (0); 918 } 919 920 /* 921 * mdi_phci_path2devinfo(): 922 * Utility function to search for a valid phci device given 923 * the devfs pathname. 924 */ 925 dev_info_t * 926 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 927 { 928 char *temp_pathname; 929 mdi_vhci_t *vh; 930 mdi_phci_t *ph; 931 dev_info_t *pdip = NULL; 932 933 vh = i_devi_get_vhci(vdip); 934 ASSERT(vh != NULL); 935 936 if (vh == NULL) { 937 /* 938 * Invalid vHCI component, return failure 939 */ 940 return (NULL); 941 } 942 943 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 944 MDI_VHCI_PHCI_LOCK(vh); 945 ph = vh->vh_phci_head; 946 while (ph != NULL) { 947 pdip = ph->ph_dip; 948 ASSERT(pdip != NULL); 949 *temp_pathname = '\0'; 950 (void) ddi_pathname(pdip, temp_pathname); 951 if (strcmp(temp_pathname, pathname) == 0) { 952 break; 953 } 954 ph = ph->ph_next; 955 } 956 if (ph == NULL) { 957 pdip = NULL; 958 } 959 MDI_VHCI_PHCI_UNLOCK(vh); 960 kmem_free(temp_pathname, MAXPATHLEN); 961 return (pdip); 962 } 963 964 /* 965 * mdi_phci_get_path_count(): 966 * get number of path information nodes associated with a given 967 * pHCI device. 968 */ 969 int 970 mdi_phci_get_path_count(dev_info_t *pdip) 971 { 972 mdi_phci_t *ph; 973 int count = 0; 974 975 ph = i_devi_get_phci(pdip); 976 if (ph != NULL) { 977 count = ph->ph_path_count; 978 } 979 return (count); 980 } 981 982 /* 983 * i_mdi_phci_lock(): 984 * Lock a pHCI device 985 * Return Values: 986 * None 987 * Note: 988 * The default locking order is: 989 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 990 * But there are number of situations where locks need to be 991 * grabbed in reverse order. This routine implements try and lock 992 * mechanism depending on the requested parameter option. 993 */ 994 static void 995 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 996 { 997 if (pip) { 998 /* Reverse locking is requested. */ 999 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1000 /* 1001 * tryenter failed. Try to grab again 1002 * after a small delay 1003 */ 1004 MDI_PI_HOLD(pip); 1005 MDI_PI_UNLOCK(pip); 1006 delay(1); 1007 MDI_PI_LOCK(pip); 1008 MDI_PI_RELE(pip); 1009 } 1010 } else { 1011 MDI_PHCI_LOCK(ph); 1012 } 1013 } 1014 1015 /* 1016 * i_mdi_phci_unlock(): 1017 * Unlock the pHCI component 1018 */ 1019 static void 1020 i_mdi_phci_unlock(mdi_phci_t *ph) 1021 { 1022 MDI_PHCI_UNLOCK(ph); 1023 } 1024 1025 /* 1026 * i_mdi_devinfo_create(): 1027 * create client device's devinfo node 1028 * Return Values: 1029 * dev_info 1030 * NULL 1031 * Notes: 1032 */ 1033 static dev_info_t * 1034 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1035 char **compatible, int ncompatible) 1036 { 1037 dev_info_t *cdip = NULL; 1038 1039 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1040 1041 /* Verify for duplicate entry */ 1042 cdip = i_mdi_devinfo_find(vh, name, guid); 1043 ASSERT(cdip == NULL); 1044 if (cdip) { 1045 cmn_err(CE_WARN, 1046 "i_mdi_devinfo_create: client dip %p already exists", 1047 (void *)cdip); 1048 } 1049 1050 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1051 if (cdip == NULL) 1052 goto fail; 1053 1054 /* 1055 * Create component type and Global unique identifier 1056 * properties 1057 */ 1058 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1059 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1060 goto fail; 1061 } 1062 1063 /* Decorate the node with compatible property */ 1064 if (compatible && 1065 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1066 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1067 goto fail; 1068 } 1069 1070 return (cdip); 1071 1072 fail: 1073 if (cdip) { 1074 (void) ndi_prop_remove_all(cdip); 1075 (void) ndi_devi_free(cdip); 1076 } 1077 return (NULL); 1078 } 1079 1080 /* 1081 * i_mdi_devinfo_find(): 1082 * Find a matching devinfo node for given client node name 1083 * and its guid. 1084 * Return Values: 1085 * Handle to a dev_info node or NULL 1086 */ 1087 static dev_info_t * 1088 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1089 { 1090 char *data; 1091 dev_info_t *cdip = NULL; 1092 dev_info_t *ndip = NULL; 1093 int circular; 1094 1095 ndi_devi_enter(vh->vh_dip, &circular); 1096 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1097 while ((cdip = ndip) != NULL) { 1098 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1099 1100 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1101 continue; 1102 } 1103 1104 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1105 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1106 &data) != DDI_PROP_SUCCESS) { 1107 continue; 1108 } 1109 1110 if (strcmp(data, guid) != 0) { 1111 ddi_prop_free(data); 1112 continue; 1113 } 1114 ddi_prop_free(data); 1115 break; 1116 } 1117 ndi_devi_exit(vh->vh_dip, circular); 1118 return (cdip); 1119 } 1120 1121 /* 1122 * i_mdi_devinfo_remove(): 1123 * Remove a client device node 1124 */ 1125 static int 1126 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1127 { 1128 int rv = MDI_SUCCESS; 1129 1130 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1131 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1132 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1133 if (rv != NDI_SUCCESS) { 1134 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1135 " failed. cdip = %p\n", (void *)cdip)); 1136 } 1137 /* 1138 * Convert to MDI error code 1139 */ 1140 switch (rv) { 1141 case NDI_SUCCESS: 1142 rv = MDI_SUCCESS; 1143 break; 1144 case NDI_BUSY: 1145 rv = MDI_BUSY; 1146 break; 1147 default: 1148 rv = MDI_FAILURE; 1149 break; 1150 } 1151 } 1152 return (rv); 1153 } 1154 1155 /* 1156 * i_devi_get_client() 1157 * Utility function to get mpxio component extensions 1158 */ 1159 static mdi_client_t * 1160 i_devi_get_client(dev_info_t *cdip) 1161 { 1162 mdi_client_t *ct = NULL; 1163 1164 if (MDI_CLIENT(cdip)) { 1165 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1166 } 1167 return (ct); 1168 } 1169 1170 /* 1171 * i_mdi_is_child_present(): 1172 * Search for the presence of client device dev_info node 1173 */ 1174 static int 1175 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1176 { 1177 int rv = MDI_FAILURE; 1178 struct dev_info *dip; 1179 int circular; 1180 1181 ndi_devi_enter(vdip, &circular); 1182 dip = DEVI(vdip)->devi_child; 1183 while (dip) { 1184 if (dip == DEVI(cdip)) { 1185 rv = MDI_SUCCESS; 1186 break; 1187 } 1188 dip = dip->devi_sibling; 1189 } 1190 ndi_devi_exit(vdip, circular); 1191 return (rv); 1192 } 1193 1194 1195 /* 1196 * i_mdi_client_lock(): 1197 * Grab client component lock 1198 * Return Values: 1199 * None 1200 * Note: 1201 * The default locking order is: 1202 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1203 * But there are number of situations where locks need to be 1204 * grabbed in reverse order. This routine implements try and lock 1205 * mechanism depending on the requested parameter option. 1206 */ 1207 static void 1208 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1209 { 1210 if (pip) { 1211 /* 1212 * Reverse locking is requested. 1213 */ 1214 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1215 /* 1216 * tryenter failed. Try to grab again 1217 * after a small delay 1218 */ 1219 MDI_PI_HOLD(pip); 1220 MDI_PI_UNLOCK(pip); 1221 delay(1); 1222 MDI_PI_LOCK(pip); 1223 MDI_PI_RELE(pip); 1224 } 1225 } else { 1226 MDI_CLIENT_LOCK(ct); 1227 } 1228 } 1229 1230 /* 1231 * i_mdi_client_unlock(): 1232 * Unlock a client component 1233 */ 1234 static void 1235 i_mdi_client_unlock(mdi_client_t *ct) 1236 { 1237 MDI_CLIENT_UNLOCK(ct); 1238 } 1239 1240 /* 1241 * i_mdi_client_alloc(): 1242 * Allocate and initialize a client structure. Caller should 1243 * hold the vhci client lock. 1244 * Return Values: 1245 * Handle to a client component 1246 */ 1247 /*ARGSUSED*/ 1248 static mdi_client_t * 1249 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1250 { 1251 mdi_client_t *ct; 1252 1253 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1254 1255 /* 1256 * Allocate and initialize a component structure. 1257 */ 1258 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1259 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1260 ct->ct_hnext = NULL; 1261 ct->ct_hprev = NULL; 1262 ct->ct_dip = NULL; 1263 ct->ct_vhci = vh; 1264 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1265 (void) strcpy(ct->ct_drvname, name); 1266 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1267 (void) strcpy(ct->ct_guid, lguid); 1268 ct->ct_cprivate = NULL; 1269 ct->ct_vprivate = NULL; 1270 ct->ct_flags = 0; 1271 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1272 MDI_CLIENT_LOCK(ct); 1273 MDI_CLIENT_SET_OFFLINE(ct); 1274 MDI_CLIENT_SET_DETACH(ct); 1275 MDI_CLIENT_SET_POWER_UP(ct); 1276 MDI_CLIENT_UNLOCK(ct); 1277 ct->ct_failover_flags = 0; 1278 ct->ct_failover_status = 0; 1279 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1280 ct->ct_unstable = 0; 1281 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1282 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1283 ct->ct_lb = vh->vh_lb; 1284 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1285 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1286 ct->ct_path_count = 0; 1287 ct->ct_path_head = NULL; 1288 ct->ct_path_tail = NULL; 1289 ct->ct_path_last = NULL; 1290 1291 /* 1292 * Add this client component to our client hash queue 1293 */ 1294 i_mdi_client_enlist_table(vh, ct); 1295 return (ct); 1296 } 1297 1298 /* 1299 * i_mdi_client_enlist_table(): 1300 * Attach the client device to the client hash table. Caller 1301 * should hold the vhci client lock. 1302 */ 1303 static void 1304 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1305 { 1306 int index; 1307 struct client_hash *head; 1308 1309 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1310 1311 index = i_mdi_get_hash_key(ct->ct_guid); 1312 head = &vh->vh_client_table[index]; 1313 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1314 head->ct_hash_head = ct; 1315 head->ct_hash_count++; 1316 vh->vh_client_count++; 1317 } 1318 1319 /* 1320 * i_mdi_client_delist_table(): 1321 * Attach the client device to the client hash table. 1322 * Caller should hold the vhci client lock. 1323 */ 1324 static void 1325 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1326 { 1327 int index; 1328 char *guid; 1329 struct client_hash *head; 1330 mdi_client_t *next; 1331 mdi_client_t *last; 1332 1333 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1334 1335 guid = ct->ct_guid; 1336 index = i_mdi_get_hash_key(guid); 1337 head = &vh->vh_client_table[index]; 1338 1339 last = NULL; 1340 next = (mdi_client_t *)head->ct_hash_head; 1341 while (next != NULL) { 1342 if (next == ct) { 1343 break; 1344 } 1345 last = next; 1346 next = next->ct_hnext; 1347 } 1348 1349 if (next) { 1350 head->ct_hash_count--; 1351 if (last == NULL) { 1352 head->ct_hash_head = ct->ct_hnext; 1353 } else { 1354 last->ct_hnext = ct->ct_hnext; 1355 } 1356 ct->ct_hnext = NULL; 1357 vh->vh_client_count--; 1358 } 1359 } 1360 1361 1362 /* 1363 * i_mdi_client_free(): 1364 * Free a client component 1365 */ 1366 static int 1367 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1368 { 1369 int rv = MDI_SUCCESS; 1370 int flags = ct->ct_flags; 1371 dev_info_t *cdip; 1372 dev_info_t *vdip; 1373 1374 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1375 1376 vdip = vh->vh_dip; 1377 cdip = ct->ct_dip; 1378 1379 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1380 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1381 DEVI(cdip)->devi_mdi_client = NULL; 1382 1383 /* 1384 * Clear out back ref. to dev_info_t node 1385 */ 1386 ct->ct_dip = NULL; 1387 1388 /* 1389 * Remove this client from our hash queue 1390 */ 1391 i_mdi_client_delist_table(vh, ct); 1392 1393 /* 1394 * Uninitialize and free the component 1395 */ 1396 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1397 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1398 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1399 cv_destroy(&ct->ct_failover_cv); 1400 cv_destroy(&ct->ct_unstable_cv); 1401 cv_destroy(&ct->ct_powerchange_cv); 1402 mutex_destroy(&ct->ct_mutex); 1403 kmem_free(ct, sizeof (*ct)); 1404 1405 if (cdip != NULL) { 1406 MDI_VHCI_CLIENT_UNLOCK(vh); 1407 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1408 MDI_VHCI_CLIENT_LOCK(vh); 1409 } 1410 return (rv); 1411 } 1412 1413 /* 1414 * i_mdi_client_find(): 1415 * Find the client structure corresponding to a given guid 1416 * Caller should hold the vhci client lock. 1417 */ 1418 static mdi_client_t * 1419 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1420 { 1421 int index; 1422 struct client_hash *head; 1423 mdi_client_t *ct; 1424 1425 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1426 1427 index = i_mdi_get_hash_key(guid); 1428 head = &vh->vh_client_table[index]; 1429 1430 ct = head->ct_hash_head; 1431 while (ct != NULL) { 1432 if (strcmp(ct->ct_guid, guid) == 0 && 1433 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1434 break; 1435 } 1436 ct = ct->ct_hnext; 1437 } 1438 return (ct); 1439 } 1440 1441 /* 1442 * i_mdi_client_update_state(): 1443 * Compute and update client device state 1444 * Notes: 1445 * A client device can be in any of three possible states: 1446 * 1447 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1448 * one online/standby paths. Can tolerate failures. 1449 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1450 * no alternate paths available as standby. A failure on the online 1451 * would result in loss of access to device data. 1452 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1453 * no paths available to access the device. 1454 */ 1455 static void 1456 i_mdi_client_update_state(mdi_client_t *ct) 1457 { 1458 int state; 1459 1460 ASSERT(MDI_CLIENT_LOCKED(ct)); 1461 state = i_mdi_client_compute_state(ct, NULL); 1462 MDI_CLIENT_SET_STATE(ct, state); 1463 } 1464 1465 /* 1466 * i_mdi_client_compute_state(): 1467 * Compute client device state 1468 * 1469 * mdi_phci_t * Pointer to pHCI structure which should 1470 * while computing the new value. Used by 1471 * i_mdi_phci_offline() to find the new 1472 * client state after DR of a pHCI. 1473 */ 1474 static int 1475 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1476 { 1477 int state; 1478 int online_count = 0; 1479 int standby_count = 0; 1480 mdi_pathinfo_t *pip, *next; 1481 1482 ASSERT(MDI_CLIENT_LOCKED(ct)); 1483 pip = ct->ct_path_head; 1484 while (pip != NULL) { 1485 MDI_PI_LOCK(pip); 1486 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1487 if (MDI_PI(pip)->pi_phci == ph) { 1488 MDI_PI_UNLOCK(pip); 1489 pip = next; 1490 continue; 1491 } 1492 1493 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1494 == MDI_PATHINFO_STATE_ONLINE) 1495 online_count++; 1496 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1497 == MDI_PATHINFO_STATE_STANDBY) 1498 standby_count++; 1499 MDI_PI_UNLOCK(pip); 1500 pip = next; 1501 } 1502 1503 if (online_count == 0) { 1504 if (standby_count == 0) { 1505 state = MDI_CLIENT_STATE_FAILED; 1506 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1507 " ct = %p\n", (void *)ct)); 1508 } else if (standby_count == 1) { 1509 state = MDI_CLIENT_STATE_DEGRADED; 1510 } else { 1511 state = MDI_CLIENT_STATE_OPTIMAL; 1512 } 1513 } else if (online_count == 1) { 1514 if (standby_count == 0) { 1515 state = MDI_CLIENT_STATE_DEGRADED; 1516 } else { 1517 state = MDI_CLIENT_STATE_OPTIMAL; 1518 } 1519 } else { 1520 state = MDI_CLIENT_STATE_OPTIMAL; 1521 } 1522 return (state); 1523 } 1524 1525 /* 1526 * i_mdi_client2devinfo(): 1527 * Utility function 1528 */ 1529 dev_info_t * 1530 i_mdi_client2devinfo(mdi_client_t *ct) 1531 { 1532 return (ct->ct_dip); 1533 } 1534 1535 /* 1536 * mdi_client_path2_devinfo(): 1537 * Given the parent devinfo and child devfs pathname, search for 1538 * a valid devfs node handle. 1539 */ 1540 dev_info_t * 1541 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1542 { 1543 dev_info_t *cdip = NULL; 1544 dev_info_t *ndip = NULL; 1545 char *temp_pathname; 1546 int circular; 1547 1548 /* 1549 * Allocate temp buffer 1550 */ 1551 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1552 1553 /* 1554 * Lock parent against changes 1555 */ 1556 ndi_devi_enter(vdip, &circular); 1557 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1558 while ((cdip = ndip) != NULL) { 1559 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1560 1561 *temp_pathname = '\0'; 1562 (void) ddi_pathname(cdip, temp_pathname); 1563 if (strcmp(temp_pathname, pathname) == 0) { 1564 break; 1565 } 1566 } 1567 /* 1568 * Release devinfo lock 1569 */ 1570 ndi_devi_exit(vdip, circular); 1571 1572 /* 1573 * Free the temp buffer 1574 */ 1575 kmem_free(temp_pathname, MAXPATHLEN); 1576 return (cdip); 1577 } 1578 1579 /* 1580 * mdi_client_get_path_count(): 1581 * Utility function to get number of path information nodes 1582 * associated with a given client device. 1583 */ 1584 int 1585 mdi_client_get_path_count(dev_info_t *cdip) 1586 { 1587 mdi_client_t *ct; 1588 int count = 0; 1589 1590 ct = i_devi_get_client(cdip); 1591 if (ct != NULL) { 1592 count = ct->ct_path_count; 1593 } 1594 return (count); 1595 } 1596 1597 1598 /* 1599 * i_mdi_get_hash_key(): 1600 * Create a hash using strings as keys 1601 * 1602 */ 1603 static int 1604 i_mdi_get_hash_key(char *str) 1605 { 1606 uint32_t g, hash = 0; 1607 char *p; 1608 1609 for (p = str; *p != '\0'; p++) { 1610 g = *p; 1611 hash += g; 1612 } 1613 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1614 } 1615 1616 /* 1617 * mdi_get_lb_policy(): 1618 * Get current load balancing policy for a given client device 1619 */ 1620 client_lb_t 1621 mdi_get_lb_policy(dev_info_t *cdip) 1622 { 1623 client_lb_t lb = LOAD_BALANCE_NONE; 1624 mdi_client_t *ct; 1625 1626 ct = i_devi_get_client(cdip); 1627 if (ct != NULL) { 1628 lb = ct->ct_lb; 1629 } 1630 return (lb); 1631 } 1632 1633 /* 1634 * mdi_set_lb_region_size(): 1635 * Set current region size for the load-balance 1636 */ 1637 int 1638 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1639 { 1640 mdi_client_t *ct; 1641 int rv = MDI_FAILURE; 1642 1643 ct = i_devi_get_client(cdip); 1644 if (ct != NULL && ct->ct_lb_args != NULL) { 1645 ct->ct_lb_args->region_size = region_size; 1646 rv = MDI_SUCCESS; 1647 } 1648 return (rv); 1649 } 1650 1651 /* 1652 * mdi_Set_lb_policy(): 1653 * Set current load balancing policy for a given client device 1654 */ 1655 int 1656 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1657 { 1658 mdi_client_t *ct; 1659 int rv = MDI_FAILURE; 1660 1661 ct = i_devi_get_client(cdip); 1662 if (ct != NULL) { 1663 ct->ct_lb = lb; 1664 rv = MDI_SUCCESS; 1665 } 1666 return (rv); 1667 } 1668 1669 /* 1670 * mdi_failover(): 1671 * failover function called by the vHCI drivers to initiate 1672 * a failover operation. This is typically due to non-availability 1673 * of online paths to route I/O requests. Failover can be 1674 * triggered through user application also. 1675 * 1676 * The vHCI driver calls mdi_failover() to initiate a failover 1677 * operation. mdi_failover() calls back into the vHCI driver's 1678 * vo_failover() entry point to perform the actual failover 1679 * operation. The reason for requiring the vHCI driver to 1680 * initiate failover by calling mdi_failover(), instead of directly 1681 * executing vo_failover() itself, is to ensure that the mdi 1682 * framework can keep track of the client state properly. 1683 * Additionally, mdi_failover() provides as a convenience the 1684 * option of performing the failover operation synchronously or 1685 * asynchronously 1686 * 1687 * Upon successful completion of the failover operation, the 1688 * paths that were previously ONLINE will be in the STANDBY state, 1689 * and the newly activated paths will be in the ONLINE state. 1690 * 1691 * The flags modifier determines whether the activation is done 1692 * synchronously: MDI_FAILOVER_SYNC 1693 * Return Values: 1694 * MDI_SUCCESS 1695 * MDI_FAILURE 1696 * MDI_BUSY 1697 */ 1698 /*ARGSUSED*/ 1699 int 1700 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1701 { 1702 int rv; 1703 mdi_client_t *ct; 1704 1705 ct = i_devi_get_client(cdip); 1706 ASSERT(ct != NULL); 1707 if (ct == NULL) { 1708 /* cdip is not a valid client device. Nothing more to do. */ 1709 return (MDI_FAILURE); 1710 } 1711 1712 MDI_CLIENT_LOCK(ct); 1713 1714 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1715 /* A path to the client is being freed */ 1716 MDI_CLIENT_UNLOCK(ct); 1717 return (MDI_BUSY); 1718 } 1719 1720 1721 if (MDI_CLIENT_IS_FAILED(ct)) { 1722 /* 1723 * Client is in failed state. Nothing more to do. 1724 */ 1725 MDI_CLIENT_UNLOCK(ct); 1726 return (MDI_FAILURE); 1727 } 1728 1729 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1730 /* 1731 * Failover is already in progress; return BUSY 1732 */ 1733 MDI_CLIENT_UNLOCK(ct); 1734 return (MDI_BUSY); 1735 } 1736 /* 1737 * Make sure that mdi_pathinfo node state changes are processed. 1738 * We do not allow failovers to progress while client path state 1739 * changes are in progress 1740 */ 1741 if (ct->ct_unstable) { 1742 if (flags == MDI_FAILOVER_ASYNC) { 1743 MDI_CLIENT_UNLOCK(ct); 1744 return (MDI_BUSY); 1745 } else { 1746 while (ct->ct_unstable) 1747 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1748 } 1749 } 1750 1751 /* 1752 * Client device is in stable state. Before proceeding, perform sanity 1753 * checks again. 1754 */ 1755 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1756 (!i_ddi_devi_attached(ct->ct_dip))) { 1757 /* 1758 * Client is in failed state. Nothing more to do. 1759 */ 1760 MDI_CLIENT_UNLOCK(ct); 1761 return (MDI_FAILURE); 1762 } 1763 1764 /* 1765 * Set the client state as failover in progress. 1766 */ 1767 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1768 ct->ct_failover_flags = flags; 1769 MDI_CLIENT_UNLOCK(ct); 1770 1771 if (flags == MDI_FAILOVER_ASYNC) { 1772 /* 1773 * Submit the initiate failover request via CPR safe 1774 * taskq threads. 1775 */ 1776 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1777 ct, KM_SLEEP); 1778 return (MDI_ACCEPT); 1779 } else { 1780 /* 1781 * Synchronous failover mode. Typically invoked from the user 1782 * land. 1783 */ 1784 rv = i_mdi_failover(ct); 1785 } 1786 return (rv); 1787 } 1788 1789 /* 1790 * i_mdi_failover(): 1791 * internal failover function. Invokes vHCI drivers failover 1792 * callback function and process the failover status 1793 * Return Values: 1794 * None 1795 * 1796 * Note: A client device in failover state can not be detached or freed. 1797 */ 1798 static int 1799 i_mdi_failover(void *arg) 1800 { 1801 int rv = MDI_SUCCESS; 1802 mdi_client_t *ct = (mdi_client_t *)arg; 1803 mdi_vhci_t *vh = ct->ct_vhci; 1804 1805 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1806 1807 if (vh->vh_ops->vo_failover != NULL) { 1808 /* 1809 * Call vHCI drivers callback routine 1810 */ 1811 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1812 ct->ct_failover_flags); 1813 } 1814 1815 MDI_CLIENT_LOCK(ct); 1816 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1817 1818 /* 1819 * Save the failover return status 1820 */ 1821 ct->ct_failover_status = rv; 1822 1823 /* 1824 * As a result of failover, client status would have been changed. 1825 * Update the client state and wake up anyone waiting on this client 1826 * device. 1827 */ 1828 i_mdi_client_update_state(ct); 1829 1830 cv_broadcast(&ct->ct_failover_cv); 1831 MDI_CLIENT_UNLOCK(ct); 1832 return (rv); 1833 } 1834 1835 /* 1836 * Load balancing is logical block. 1837 * IOs within the range described by region_size 1838 * would go on the same path. This would improve the 1839 * performance by cache-hit on some of the RAID devices. 1840 * Search only for online paths(At some point we 1841 * may want to balance across target ports). 1842 * If no paths are found then default to round-robin. 1843 */ 1844 static int 1845 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1846 { 1847 int path_index = -1; 1848 int online_path_count = 0; 1849 int online_nonpref_path_count = 0; 1850 int region_size = ct->ct_lb_args->region_size; 1851 mdi_pathinfo_t *pip; 1852 mdi_pathinfo_t *next; 1853 int preferred, path_cnt; 1854 1855 pip = ct->ct_path_head; 1856 while (pip) { 1857 MDI_PI_LOCK(pip); 1858 if (MDI_PI(pip)->pi_state == 1859 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1860 online_path_count++; 1861 } else if (MDI_PI(pip)->pi_state == 1862 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1863 online_nonpref_path_count++; 1864 } 1865 next = (mdi_pathinfo_t *) 1866 MDI_PI(pip)->pi_client_link; 1867 MDI_PI_UNLOCK(pip); 1868 pip = next; 1869 } 1870 /* if found any online/preferred then use this type */ 1871 if (online_path_count > 0) { 1872 path_cnt = online_path_count; 1873 preferred = 1; 1874 } else if (online_nonpref_path_count > 0) { 1875 path_cnt = online_nonpref_path_count; 1876 preferred = 0; 1877 } else { 1878 path_cnt = 0; 1879 } 1880 if (path_cnt) { 1881 path_index = (bp->b_blkno >> region_size) % path_cnt; 1882 pip = ct->ct_path_head; 1883 while (pip && path_index != -1) { 1884 MDI_PI_LOCK(pip); 1885 if (path_index == 0 && 1886 (MDI_PI(pip)->pi_state == 1887 MDI_PATHINFO_STATE_ONLINE) && 1888 MDI_PI(pip)->pi_preferred == preferred) { 1889 MDI_PI_HOLD(pip); 1890 MDI_PI_UNLOCK(pip); 1891 *ret_pip = pip; 1892 return (MDI_SUCCESS); 1893 } 1894 path_index --; 1895 next = (mdi_pathinfo_t *) 1896 MDI_PI(pip)->pi_client_link; 1897 MDI_PI_UNLOCK(pip); 1898 pip = next; 1899 } 1900 if (pip == NULL) { 1901 MDI_DEBUG(4, (CE_NOTE, NULL, 1902 "!lba %llx, no pip !!\n", 1903 bp->b_lblkno)); 1904 } else { 1905 MDI_DEBUG(4, (CE_NOTE, NULL, 1906 "!lba %llx, no pip for path_index, " 1907 "pip %p\n", bp->b_lblkno, (void *)pip)); 1908 } 1909 } 1910 return (MDI_FAILURE); 1911 } 1912 1913 /* 1914 * mdi_select_path(): 1915 * select a path to access a client device. 1916 * 1917 * mdi_select_path() function is called by the vHCI drivers to 1918 * select a path to route the I/O request to. The caller passes 1919 * the block I/O data transfer structure ("buf") as one of the 1920 * parameters. The mpxio framework uses the buf structure 1921 * contents to maintain per path statistics (total I/O size / 1922 * count pending). If more than one online paths are available to 1923 * select, the framework automatically selects a suitable path 1924 * for routing I/O request. If a failover operation is active for 1925 * this client device the call shall be failed with MDI_BUSY error 1926 * code. 1927 * 1928 * By default this function returns a suitable path in online 1929 * state based on the current load balancing policy. Currently 1930 * we support LOAD_BALANCE_NONE (Previously selected online path 1931 * will continue to be used till the path is usable) and 1932 * LOAD_BALANCE_RR (Online paths will be selected in a round 1933 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1934 * based on the logical block). The load balancing 1935 * through vHCI drivers configuration file (driver.conf). 1936 * 1937 * vHCI drivers may override this default behavior by specifying 1938 * appropriate flags. The meaning of the thrid argument depends 1939 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1940 * then the argument is the "path instance" of the path to select. 1941 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1942 * "start_pip". A non NULL "start_pip" is the starting point to 1943 * walk and find the next appropriate path. The following values 1944 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1945 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1946 * STANDBY path). 1947 * 1948 * The non-standard behavior is used by the scsi_vhci driver, 1949 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1950 * attach of client devices (to avoid an unnecessary failover 1951 * when the STANDBY path comes up first), during failover 1952 * (to activate a STANDBY path as ONLINE). 1953 * 1954 * The selected path is returned in a a mdi_hold_path() state 1955 * (pi_ref_cnt). Caller should release the hold by calling 1956 * mdi_rele_path(). 1957 * 1958 * Return Values: 1959 * MDI_SUCCESS - Completed successfully 1960 * MDI_BUSY - Client device is busy failing over 1961 * MDI_NOPATH - Client device is online, but no valid path are 1962 * available to access this client device 1963 * MDI_FAILURE - Invalid client device or state 1964 * MDI_DEVI_ONLINING 1965 * - Client device (struct dev_info state) is in 1966 * onlining state. 1967 */ 1968 1969 /*ARGSUSED*/ 1970 int 1971 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1972 void *arg, mdi_pathinfo_t **ret_pip) 1973 { 1974 mdi_client_t *ct; 1975 mdi_pathinfo_t *pip; 1976 mdi_pathinfo_t *next; 1977 mdi_pathinfo_t *head; 1978 mdi_pathinfo_t *start; 1979 client_lb_t lbp; /* load balancing policy */ 1980 int sb = 1; /* standard behavior */ 1981 int preferred = 1; /* preferred path */ 1982 int cond, cont = 1; 1983 int retry = 0; 1984 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 1985 int path_instance; /* request specific path instance */ 1986 1987 /* determine type of arg based on flags */ 1988 if (flags & MDI_SELECT_PATH_INSTANCE) { 1989 flags &= ~MDI_SELECT_PATH_INSTANCE; 1990 path_instance = (int)(intptr_t)arg; 1991 start_pip = NULL; 1992 } else { 1993 path_instance = 0; 1994 start_pip = (mdi_pathinfo_t *)arg; 1995 } 1996 1997 if (flags != 0) { 1998 /* 1999 * disable default behavior 2000 */ 2001 sb = 0; 2002 } 2003 2004 *ret_pip = NULL; 2005 ct = i_devi_get_client(cdip); 2006 if (ct == NULL) { 2007 /* mdi extensions are NULL, Nothing more to do */ 2008 return (MDI_FAILURE); 2009 } 2010 2011 MDI_CLIENT_LOCK(ct); 2012 2013 if (sb) { 2014 if (MDI_CLIENT_IS_FAILED(ct)) { 2015 /* 2016 * Client is not ready to accept any I/O requests. 2017 * Fail this request. 2018 */ 2019 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2020 "client state offline ct = %p\n", (void *)ct)); 2021 MDI_CLIENT_UNLOCK(ct); 2022 return (MDI_FAILURE); 2023 } 2024 2025 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2026 /* 2027 * Check for Failover is in progress. If so tell the 2028 * caller that this device is busy. 2029 */ 2030 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2031 "client failover in progress ct = %p\n", 2032 (void *)ct)); 2033 MDI_CLIENT_UNLOCK(ct); 2034 return (MDI_BUSY); 2035 } 2036 2037 /* 2038 * Check to see whether the client device is attached. 2039 * If not so, let the vHCI driver manually select a path 2040 * (standby) and let the probe/attach process to continue. 2041 */ 2042 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2043 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2044 "ct = %p\n", (void *)ct)); 2045 MDI_CLIENT_UNLOCK(ct); 2046 return (MDI_DEVI_ONLINING); 2047 } 2048 } 2049 2050 /* 2051 * Cache in the client list head. If head of the list is NULL 2052 * return MDI_NOPATH 2053 */ 2054 head = ct->ct_path_head; 2055 if (head == NULL) { 2056 MDI_CLIENT_UNLOCK(ct); 2057 return (MDI_NOPATH); 2058 } 2059 2060 /* Caller is specifying a specific pathinfo path by path_instance */ 2061 if (path_instance) { 2062 /* search for pathinfo with correct path_instance */ 2063 for (pip = head; 2064 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2065 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2066 ; 2067 2068 /* If path can't be selected then MDI_FAILURE is returned. */ 2069 if (pip == NULL) { 2070 MDI_CLIENT_UNLOCK(ct); 2071 return (MDI_FAILURE); 2072 } 2073 2074 /* verify state of path */ 2075 MDI_PI_LOCK(pip); 2076 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2077 MDI_PI_UNLOCK(pip); 2078 MDI_CLIENT_UNLOCK(ct); 2079 return (MDI_FAILURE); 2080 } 2081 2082 /* 2083 * Return the path in hold state. Caller should release the 2084 * lock by calling mdi_rele_path() 2085 */ 2086 MDI_PI_HOLD(pip); 2087 MDI_PI_UNLOCK(pip); 2088 ct->ct_path_last = pip; 2089 *ret_pip = pip; 2090 MDI_CLIENT_UNLOCK(ct); 2091 return (MDI_SUCCESS); 2092 } 2093 2094 /* 2095 * for non default behavior, bypass current 2096 * load balancing policy and always use LOAD_BALANCE_RR 2097 * except that the start point will be adjusted based 2098 * on the provided start_pip 2099 */ 2100 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2101 2102 switch (lbp) { 2103 case LOAD_BALANCE_NONE: 2104 /* 2105 * Load balancing is None or Alternate path mode 2106 * Start looking for a online mdi_pathinfo node starting from 2107 * last known selected path 2108 */ 2109 preferred = 1; 2110 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2111 if (pip == NULL) { 2112 pip = head; 2113 } 2114 start = pip; 2115 do { 2116 MDI_PI_LOCK(pip); 2117 /* 2118 * No need to explicitly check if the path is disabled. 2119 * Since we are checking for state == ONLINE and the 2120 * same veriable is used for DISABLE/ENABLE information. 2121 */ 2122 if ((MDI_PI(pip)->pi_state == 2123 MDI_PATHINFO_STATE_ONLINE) && 2124 preferred == MDI_PI(pip)->pi_preferred) { 2125 /* 2126 * Return the path in hold state. Caller should 2127 * release the lock by calling mdi_rele_path() 2128 */ 2129 MDI_PI_HOLD(pip); 2130 MDI_PI_UNLOCK(pip); 2131 ct->ct_path_last = pip; 2132 *ret_pip = pip; 2133 MDI_CLIENT_UNLOCK(ct); 2134 return (MDI_SUCCESS); 2135 } 2136 2137 /* 2138 * Path is busy. 2139 */ 2140 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2141 MDI_PI_IS_TRANSIENT(pip)) 2142 retry = 1; 2143 /* 2144 * Keep looking for a next available online path 2145 */ 2146 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2147 if (next == NULL) { 2148 next = head; 2149 } 2150 MDI_PI_UNLOCK(pip); 2151 pip = next; 2152 if (start == pip && preferred) { 2153 preferred = 0; 2154 } else if (start == pip && !preferred) { 2155 cont = 0; 2156 } 2157 } while (cont); 2158 break; 2159 2160 case LOAD_BALANCE_LBA: 2161 /* 2162 * Make sure we are looking 2163 * for an online path. Otherwise, if it is for a STANDBY 2164 * path request, it will go through and fetch an ONLINE 2165 * path which is not desirable. 2166 */ 2167 if ((ct->ct_lb_args != NULL) && 2168 (ct->ct_lb_args->region_size) && bp && 2169 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2170 if (i_mdi_lba_lb(ct, ret_pip, bp) 2171 == MDI_SUCCESS) { 2172 MDI_CLIENT_UNLOCK(ct); 2173 return (MDI_SUCCESS); 2174 } 2175 } 2176 /* FALLTHROUGH */ 2177 case LOAD_BALANCE_RR: 2178 /* 2179 * Load balancing is Round Robin. Start looking for a online 2180 * mdi_pathinfo node starting from last known selected path 2181 * as the start point. If override flags are specified, 2182 * process accordingly. 2183 * If the search is already in effect(start_pip not null), 2184 * then lets just use the same path preference to continue the 2185 * traversal. 2186 */ 2187 2188 if (start_pip != NULL) { 2189 preferred = MDI_PI(start_pip)->pi_preferred; 2190 } else { 2191 preferred = 1; 2192 } 2193 2194 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2195 if (start == NULL) { 2196 pip = head; 2197 } else { 2198 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2199 if (pip == NULL) { 2200 if ( flags & MDI_SELECT_NO_PREFERRED) { 2201 /* 2202 * Return since we hit the end of list 2203 */ 2204 MDI_CLIENT_UNLOCK(ct); 2205 return (MDI_NOPATH); 2206 } 2207 2208 if (!sb) { 2209 if (preferred == 0) { 2210 /* 2211 * Looks like we have completed 2212 * the traversal as preferred 2213 * value is 0. Time to bail out. 2214 */ 2215 *ret_pip = NULL; 2216 MDI_CLIENT_UNLOCK(ct); 2217 return (MDI_NOPATH); 2218 } else { 2219 /* 2220 * Looks like we reached the 2221 * end of the list. Lets enable 2222 * traversal of non preferred 2223 * paths. 2224 */ 2225 preferred = 0; 2226 } 2227 } 2228 pip = head; 2229 } 2230 } 2231 start = pip; 2232 do { 2233 MDI_PI_LOCK(pip); 2234 if (sb) { 2235 cond = ((MDI_PI(pip)->pi_state == 2236 MDI_PATHINFO_STATE_ONLINE && 2237 MDI_PI(pip)->pi_preferred == 2238 preferred) ? 1 : 0); 2239 } else { 2240 if (flags == MDI_SELECT_ONLINE_PATH) { 2241 cond = ((MDI_PI(pip)->pi_state == 2242 MDI_PATHINFO_STATE_ONLINE && 2243 MDI_PI(pip)->pi_preferred == 2244 preferred) ? 1 : 0); 2245 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2246 cond = ((MDI_PI(pip)->pi_state == 2247 MDI_PATHINFO_STATE_STANDBY && 2248 MDI_PI(pip)->pi_preferred == 2249 preferred) ? 1 : 0); 2250 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2251 MDI_SELECT_STANDBY_PATH)) { 2252 cond = (((MDI_PI(pip)->pi_state == 2253 MDI_PATHINFO_STATE_ONLINE || 2254 (MDI_PI(pip)->pi_state == 2255 MDI_PATHINFO_STATE_STANDBY)) && 2256 MDI_PI(pip)->pi_preferred == 2257 preferred) ? 1 : 0); 2258 } else if (flags == 2259 (MDI_SELECT_STANDBY_PATH | 2260 MDI_SELECT_ONLINE_PATH | 2261 MDI_SELECT_USER_DISABLE_PATH)) { 2262 cond = (((MDI_PI(pip)->pi_state == 2263 MDI_PATHINFO_STATE_ONLINE || 2264 (MDI_PI(pip)->pi_state == 2265 MDI_PATHINFO_STATE_STANDBY) || 2266 (MDI_PI(pip)->pi_state == 2267 (MDI_PATHINFO_STATE_ONLINE| 2268 MDI_PATHINFO_STATE_USER_DISABLE)) || 2269 (MDI_PI(pip)->pi_state == 2270 (MDI_PATHINFO_STATE_STANDBY | 2271 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2272 MDI_PI(pip)->pi_preferred == 2273 preferred) ? 1 : 0); 2274 } else if (flags == 2275 (MDI_SELECT_STANDBY_PATH | 2276 MDI_SELECT_ONLINE_PATH | 2277 MDI_SELECT_NO_PREFERRED)) { 2278 cond = (((MDI_PI(pip)->pi_state == 2279 MDI_PATHINFO_STATE_ONLINE) || 2280 (MDI_PI(pip)->pi_state == 2281 MDI_PATHINFO_STATE_STANDBY)) 2282 ? 1 : 0); 2283 } else { 2284 cond = 0; 2285 } 2286 } 2287 /* 2288 * No need to explicitly check if the path is disabled. 2289 * Since we are checking for state == ONLINE and the 2290 * same veriable is used for DISABLE/ENABLE information. 2291 */ 2292 if (cond) { 2293 /* 2294 * Return the path in hold state. Caller should 2295 * release the lock by calling mdi_rele_path() 2296 */ 2297 MDI_PI_HOLD(pip); 2298 MDI_PI_UNLOCK(pip); 2299 if (sb) 2300 ct->ct_path_last = pip; 2301 *ret_pip = pip; 2302 MDI_CLIENT_UNLOCK(ct); 2303 return (MDI_SUCCESS); 2304 } 2305 /* 2306 * Path is busy. 2307 */ 2308 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2309 MDI_PI_IS_TRANSIENT(pip)) 2310 retry = 1; 2311 2312 /* 2313 * Keep looking for a next available online path 2314 */ 2315 do_again: 2316 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2317 if (next == NULL) { 2318 if ( flags & MDI_SELECT_NO_PREFERRED) { 2319 /* 2320 * Bail out since we hit the end of list 2321 */ 2322 MDI_PI_UNLOCK(pip); 2323 break; 2324 } 2325 2326 if (!sb) { 2327 if (preferred == 1) { 2328 /* 2329 * Looks like we reached the 2330 * end of the list. Lets enable 2331 * traversal of non preferred 2332 * paths. 2333 */ 2334 preferred = 0; 2335 next = head; 2336 } else { 2337 /* 2338 * We have done both the passes 2339 * Preferred as well as for 2340 * Non-preferred. Bail out now. 2341 */ 2342 cont = 0; 2343 } 2344 } else { 2345 /* 2346 * Standard behavior case. 2347 */ 2348 next = head; 2349 } 2350 } 2351 MDI_PI_UNLOCK(pip); 2352 if (cont == 0) { 2353 break; 2354 } 2355 pip = next; 2356 2357 if (!sb) { 2358 /* 2359 * We need to handle the selection of 2360 * non-preferred path in the following 2361 * case: 2362 * 2363 * +------+ +------+ +------+ +-----+ 2364 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2365 * +------+ +------+ +------+ +-----+ 2366 * 2367 * If we start the search with B, we need to 2368 * skip beyond B to pick C which is non - 2369 * preferred in the second pass. The following 2370 * test, if true, will allow us to skip over 2371 * the 'start'(B in the example) to select 2372 * other non preferred elements. 2373 */ 2374 if ((start_pip != NULL) && (start_pip == pip) && 2375 (MDI_PI(start_pip)->pi_preferred 2376 != preferred)) { 2377 /* 2378 * try again after going past the start 2379 * pip 2380 */ 2381 MDI_PI_LOCK(pip); 2382 goto do_again; 2383 } 2384 } else { 2385 /* 2386 * Standard behavior case 2387 */ 2388 if (start == pip && preferred) { 2389 /* look for nonpreferred paths */ 2390 preferred = 0; 2391 } else if (start == pip && !preferred) { 2392 /* 2393 * Exit condition 2394 */ 2395 cont = 0; 2396 } 2397 } 2398 } while (cont); 2399 break; 2400 } 2401 2402 MDI_CLIENT_UNLOCK(ct); 2403 if (retry == 1) { 2404 return (MDI_BUSY); 2405 } else { 2406 return (MDI_NOPATH); 2407 } 2408 } 2409 2410 /* 2411 * For a client, return the next available path to any phci 2412 * 2413 * Note: 2414 * Caller should hold the branch's devinfo node to get a consistent 2415 * snap shot of the mdi_pathinfo nodes. 2416 * 2417 * Please note that even the list is stable the mdi_pathinfo 2418 * node state and properties are volatile. The caller should lock 2419 * and unlock the nodes by calling mdi_pi_lock() and 2420 * mdi_pi_unlock() functions to get a stable properties. 2421 * 2422 * If there is a need to use the nodes beyond the hold of the 2423 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2424 * need to be held against unexpected removal by calling 2425 * mdi_hold_path() and should be released by calling 2426 * mdi_rele_path() on completion. 2427 */ 2428 mdi_pathinfo_t * 2429 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2430 { 2431 mdi_client_t *ct; 2432 2433 if (!MDI_CLIENT(ct_dip)) 2434 return (NULL); 2435 2436 /* 2437 * Walk through client link 2438 */ 2439 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2440 ASSERT(ct != NULL); 2441 2442 if (pip == NULL) 2443 return ((mdi_pathinfo_t *)ct->ct_path_head); 2444 2445 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2446 } 2447 2448 /* 2449 * For a phci, return the next available path to any client 2450 * Note: ditto mdi_get_next_phci_path() 2451 */ 2452 mdi_pathinfo_t * 2453 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2454 { 2455 mdi_phci_t *ph; 2456 2457 if (!MDI_PHCI(ph_dip)) 2458 return (NULL); 2459 2460 /* 2461 * Walk through pHCI link 2462 */ 2463 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2464 ASSERT(ph != NULL); 2465 2466 if (pip == NULL) 2467 return ((mdi_pathinfo_t *)ph->ph_path_head); 2468 2469 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2470 } 2471 2472 /* 2473 * mdi_hold_path(): 2474 * Hold the mdi_pathinfo node against unwanted unexpected free. 2475 * Return Values: 2476 * None 2477 */ 2478 void 2479 mdi_hold_path(mdi_pathinfo_t *pip) 2480 { 2481 if (pip) { 2482 MDI_PI_LOCK(pip); 2483 MDI_PI_HOLD(pip); 2484 MDI_PI_UNLOCK(pip); 2485 } 2486 } 2487 2488 2489 /* 2490 * mdi_rele_path(): 2491 * Release the mdi_pathinfo node which was selected 2492 * through mdi_select_path() mechanism or manually held by 2493 * calling mdi_hold_path(). 2494 * Return Values: 2495 * None 2496 */ 2497 void 2498 mdi_rele_path(mdi_pathinfo_t *pip) 2499 { 2500 if (pip) { 2501 MDI_PI_LOCK(pip); 2502 MDI_PI_RELE(pip); 2503 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2504 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2505 } 2506 MDI_PI_UNLOCK(pip); 2507 } 2508 } 2509 2510 /* 2511 * mdi_pi_lock(): 2512 * Lock the mdi_pathinfo node. 2513 * Note: 2514 * The caller should release the lock by calling mdi_pi_unlock() 2515 */ 2516 void 2517 mdi_pi_lock(mdi_pathinfo_t *pip) 2518 { 2519 ASSERT(pip != NULL); 2520 if (pip) { 2521 MDI_PI_LOCK(pip); 2522 } 2523 } 2524 2525 2526 /* 2527 * mdi_pi_unlock(): 2528 * Unlock the mdi_pathinfo node. 2529 * Note: 2530 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2531 */ 2532 void 2533 mdi_pi_unlock(mdi_pathinfo_t *pip) 2534 { 2535 ASSERT(pip != NULL); 2536 if (pip) { 2537 MDI_PI_UNLOCK(pip); 2538 } 2539 } 2540 2541 /* 2542 * mdi_pi_find(): 2543 * Search the list of mdi_pathinfo nodes attached to the 2544 * pHCI/Client device node whose path address matches "paddr". 2545 * Returns a pointer to the mdi_pathinfo node if a matching node is 2546 * found. 2547 * Return Values: 2548 * mdi_pathinfo node handle 2549 * NULL 2550 * Notes: 2551 * Caller need not hold any locks to call this function. 2552 */ 2553 mdi_pathinfo_t * 2554 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2555 { 2556 mdi_phci_t *ph; 2557 mdi_vhci_t *vh; 2558 mdi_client_t *ct; 2559 mdi_pathinfo_t *pip = NULL; 2560 2561 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2562 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2563 if ((pdip == NULL) || (paddr == NULL)) { 2564 return (NULL); 2565 } 2566 ph = i_devi_get_phci(pdip); 2567 if (ph == NULL) { 2568 /* 2569 * Invalid pHCI device, Nothing more to do. 2570 */ 2571 MDI_DEBUG(2, (CE_WARN, pdip, 2572 "!mdi_pi_find: invalid phci")); 2573 return (NULL); 2574 } 2575 2576 vh = ph->ph_vhci; 2577 if (vh == NULL) { 2578 /* 2579 * Invalid vHCI device, Nothing more to do. 2580 */ 2581 MDI_DEBUG(2, (CE_WARN, pdip, 2582 "!mdi_pi_find: invalid vhci")); 2583 return (NULL); 2584 } 2585 2586 /* 2587 * Look for pathinfo node identified by paddr. 2588 */ 2589 if (caddr == NULL) { 2590 /* 2591 * Find a mdi_pathinfo node under pHCI list for a matching 2592 * unit address. 2593 */ 2594 MDI_PHCI_LOCK(ph); 2595 if (MDI_PHCI_IS_OFFLINE(ph)) { 2596 MDI_DEBUG(2, (CE_WARN, pdip, 2597 "!mdi_pi_find: offline phci %p", (void *)ph)); 2598 MDI_PHCI_UNLOCK(ph); 2599 return (NULL); 2600 } 2601 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2602 2603 while (pip != NULL) { 2604 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2605 break; 2606 } 2607 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2608 } 2609 MDI_PHCI_UNLOCK(ph); 2610 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2611 (void *)pip)); 2612 return (pip); 2613 } 2614 2615 /* 2616 * XXX - Is the rest of the code in this function really necessary? 2617 * The consumers of mdi_pi_find() can search for the desired pathinfo 2618 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2619 * whether the search is based on the pathinfo nodes attached to 2620 * the pHCI or the client node, the result will be the same. 2621 */ 2622 2623 /* 2624 * Find the client device corresponding to 'caddr' 2625 */ 2626 MDI_VHCI_CLIENT_LOCK(vh); 2627 2628 /* 2629 * XXX - Passing NULL to the following function works as long as the 2630 * the client addresses (caddr) are unique per vhci basis. 2631 */ 2632 ct = i_mdi_client_find(vh, NULL, caddr); 2633 if (ct == NULL) { 2634 /* 2635 * Client not found, Obviously mdi_pathinfo node has not been 2636 * created yet. 2637 */ 2638 MDI_VHCI_CLIENT_UNLOCK(vh); 2639 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2640 "found for caddr %s", caddr ? caddr : "NULL")); 2641 return (NULL); 2642 } 2643 2644 /* 2645 * Hold the client lock and look for a mdi_pathinfo node with matching 2646 * pHCI and paddr 2647 */ 2648 MDI_CLIENT_LOCK(ct); 2649 2650 /* 2651 * Release the global mutex as it is no more needed. Note: We always 2652 * respect the locking order while acquiring. 2653 */ 2654 MDI_VHCI_CLIENT_UNLOCK(vh); 2655 2656 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2657 while (pip != NULL) { 2658 /* 2659 * Compare the unit address 2660 */ 2661 if ((MDI_PI(pip)->pi_phci == ph) && 2662 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2663 break; 2664 } 2665 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2666 } 2667 MDI_CLIENT_UNLOCK(ct); 2668 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2669 return (pip); 2670 } 2671 2672 /* 2673 * mdi_pi_alloc(): 2674 * Allocate and initialize a new instance of a mdi_pathinfo node. 2675 * The mdi_pathinfo node returned by this function identifies a 2676 * unique device path is capable of having properties attached 2677 * and passed to mdi_pi_online() to fully attach and online the 2678 * path and client device node. 2679 * The mdi_pathinfo node returned by this function must be 2680 * destroyed using mdi_pi_free() if the path is no longer 2681 * operational or if the caller fails to attach a client device 2682 * node when calling mdi_pi_online(). The framework will not free 2683 * the resources allocated. 2684 * This function can be called from both interrupt and kernel 2685 * contexts. DDI_NOSLEEP flag should be used while calling 2686 * from interrupt contexts. 2687 * Return Values: 2688 * MDI_SUCCESS 2689 * MDI_FAILURE 2690 * MDI_NOMEM 2691 */ 2692 /*ARGSUSED*/ 2693 int 2694 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2695 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2696 { 2697 mdi_vhci_t *vh; 2698 mdi_phci_t *ph; 2699 mdi_client_t *ct; 2700 mdi_pathinfo_t *pip = NULL; 2701 dev_info_t *cdip; 2702 int rv = MDI_NOMEM; 2703 int path_allocated = 0; 2704 2705 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2706 cname ? cname : "NULL", caddr ? caddr : "NULL", 2707 paddr ? paddr : "NULL")); 2708 2709 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2710 ret_pip == NULL) { 2711 /* Nothing more to do */ 2712 return (MDI_FAILURE); 2713 } 2714 2715 *ret_pip = NULL; 2716 2717 /* No allocations on detaching pHCI */ 2718 if (DEVI_IS_DETACHING(pdip)) { 2719 /* Invalid pHCI device, return failure */ 2720 MDI_DEBUG(1, (CE_WARN, pdip, 2721 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2722 return (MDI_FAILURE); 2723 } 2724 2725 ph = i_devi_get_phci(pdip); 2726 ASSERT(ph != NULL); 2727 if (ph == NULL) { 2728 /* Invalid pHCI device, return failure */ 2729 MDI_DEBUG(1, (CE_WARN, pdip, 2730 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2731 return (MDI_FAILURE); 2732 } 2733 2734 MDI_PHCI_LOCK(ph); 2735 vh = ph->ph_vhci; 2736 if (vh == NULL) { 2737 /* Invalid vHCI device, return failure */ 2738 MDI_DEBUG(1, (CE_WARN, pdip, 2739 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2740 MDI_PHCI_UNLOCK(ph); 2741 return (MDI_FAILURE); 2742 } 2743 2744 if (MDI_PHCI_IS_READY(ph) == 0) { 2745 /* 2746 * Do not allow new node creation when pHCI is in 2747 * offline/suspended states 2748 */ 2749 MDI_DEBUG(1, (CE_WARN, pdip, 2750 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2751 MDI_PHCI_UNLOCK(ph); 2752 return (MDI_BUSY); 2753 } 2754 MDI_PHCI_UNSTABLE(ph); 2755 MDI_PHCI_UNLOCK(ph); 2756 2757 /* look for a matching client, create one if not found */ 2758 MDI_VHCI_CLIENT_LOCK(vh); 2759 ct = i_mdi_client_find(vh, cname, caddr); 2760 if (ct == NULL) { 2761 ct = i_mdi_client_alloc(vh, cname, caddr); 2762 ASSERT(ct != NULL); 2763 } 2764 2765 if (ct->ct_dip == NULL) { 2766 /* 2767 * Allocate a devinfo node 2768 */ 2769 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2770 compatible, ncompatible); 2771 if (ct->ct_dip == NULL) { 2772 (void) i_mdi_client_free(vh, ct); 2773 goto fail; 2774 } 2775 } 2776 cdip = ct->ct_dip; 2777 2778 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2779 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2780 2781 MDI_CLIENT_LOCK(ct); 2782 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2783 while (pip != NULL) { 2784 /* 2785 * Compare the unit address 2786 */ 2787 if ((MDI_PI(pip)->pi_phci == ph) && 2788 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2789 break; 2790 } 2791 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2792 } 2793 MDI_CLIENT_UNLOCK(ct); 2794 2795 if (pip == NULL) { 2796 /* 2797 * This is a new path for this client device. Allocate and 2798 * initialize a new pathinfo node 2799 */ 2800 pip = i_mdi_pi_alloc(ph, paddr, ct); 2801 ASSERT(pip != NULL); 2802 path_allocated = 1; 2803 } 2804 rv = MDI_SUCCESS; 2805 2806 fail: 2807 /* 2808 * Release the global mutex. 2809 */ 2810 MDI_VHCI_CLIENT_UNLOCK(vh); 2811 2812 /* 2813 * Mark the pHCI as stable 2814 */ 2815 MDI_PHCI_LOCK(ph); 2816 MDI_PHCI_STABLE(ph); 2817 MDI_PHCI_UNLOCK(ph); 2818 *ret_pip = pip; 2819 2820 MDI_DEBUG(2, (CE_NOTE, pdip, 2821 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2822 2823 if (path_allocated) 2824 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2825 2826 return (rv); 2827 } 2828 2829 /*ARGSUSED*/ 2830 int 2831 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2832 int flags, mdi_pathinfo_t **ret_pip) 2833 { 2834 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2835 flags, ret_pip)); 2836 } 2837 2838 /* 2839 * i_mdi_pi_alloc(): 2840 * Allocate a mdi_pathinfo node and add to the pHCI path list 2841 * Return Values: 2842 * mdi_pathinfo 2843 */ 2844 /*ARGSUSED*/ 2845 static mdi_pathinfo_t * 2846 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2847 { 2848 mdi_pathinfo_t *pip; 2849 int ct_circular; 2850 int ph_circular; 2851 static char path[MAXPATHLEN]; 2852 char *path_persistent; 2853 int path_instance; 2854 mod_hash_val_t hv; 2855 2856 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2857 2858 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2859 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2860 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2861 MDI_PATHINFO_STATE_TRANSIENT; 2862 2863 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2864 MDI_PI_SET_USER_DISABLE(pip); 2865 2866 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2867 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2868 2869 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2870 MDI_PI_SET_DRV_DISABLE(pip); 2871 2872 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2873 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2874 MDI_PI(pip)->pi_client = ct; 2875 MDI_PI(pip)->pi_phci = ph; 2876 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2877 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2878 2879 /* 2880 * We form the "path" to the pathinfo node, and see if we have 2881 * already allocated a 'path_instance' for that "path". If so, 2882 * we use the already allocated 'path_instance'. If not, we 2883 * allocate a new 'path_instance' and associate it with a copy of 2884 * the "path" string (which is never freed). The association 2885 * between a 'path_instance' this "path" string persists until 2886 * reboot. 2887 */ 2888 mutex_enter(&mdi_pathmap_mutex); 2889 (void) ddi_pathname(ph->ph_dip, path); 2890 (void) sprintf(path + strlen(path), "/%s@%s", 2891 ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr); 2892 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2893 path_instance = (uint_t)(intptr_t)hv; 2894 } else { 2895 /* allocate a new 'path_instance' and persistent "path" */ 2896 path_instance = mdi_pathmap_instance++; 2897 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2898 (void) mod_hash_insert(mdi_pathmap_bypath, 2899 (mod_hash_key_t)path_persistent, 2900 (mod_hash_val_t)(intptr_t)path_instance); 2901 (void) mod_hash_insert(mdi_pathmap_byinstance, 2902 (mod_hash_key_t)(intptr_t)path_instance, 2903 (mod_hash_val_t)path_persistent); 2904 } 2905 mutex_exit(&mdi_pathmap_mutex); 2906 MDI_PI(pip)->pi_path_instance = path_instance; 2907 2908 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2909 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2910 MDI_PI(pip)->pi_pprivate = NULL; 2911 MDI_PI(pip)->pi_cprivate = NULL; 2912 MDI_PI(pip)->pi_vprivate = NULL; 2913 MDI_PI(pip)->pi_client_link = NULL; 2914 MDI_PI(pip)->pi_phci_link = NULL; 2915 MDI_PI(pip)->pi_ref_cnt = 0; 2916 MDI_PI(pip)->pi_kstats = NULL; 2917 MDI_PI(pip)->pi_preferred = 1; 2918 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2919 2920 /* 2921 * Lock both dev_info nodes against changes in parallel. 2922 * 2923 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2924 * This atypical operation is done to synchronize pathinfo nodes 2925 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2926 * the pathinfo nodes are children of the Client. 2927 */ 2928 ndi_devi_enter(ct->ct_dip, &ct_circular); 2929 ndi_devi_enter(ph->ph_dip, &ph_circular); 2930 2931 i_mdi_phci_add_path(ph, pip); 2932 i_mdi_client_add_path(ct, pip); 2933 2934 ndi_devi_exit(ph->ph_dip, ph_circular); 2935 ndi_devi_exit(ct->ct_dip, ct_circular); 2936 2937 return (pip); 2938 } 2939 2940 /* 2941 * mdi_pi_pathname_by_instance(): 2942 * Lookup of "path" by 'path_instance'. Return "path". 2943 * NOTE: returned "path" remains valid forever (until reboot). 2944 */ 2945 char * 2946 mdi_pi_pathname_by_instance(int path_instance) 2947 { 2948 char *path; 2949 mod_hash_val_t hv; 2950 2951 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2952 mutex_enter(&mdi_pathmap_mutex); 2953 if (mod_hash_find(mdi_pathmap_byinstance, 2954 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2955 path = (char *)hv; 2956 else 2957 path = NULL; 2958 mutex_exit(&mdi_pathmap_mutex); 2959 return (path); 2960 } 2961 2962 /* 2963 * i_mdi_phci_add_path(): 2964 * Add a mdi_pathinfo node to pHCI list. 2965 * Notes: 2966 * Caller should per-pHCI mutex 2967 */ 2968 static void 2969 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2970 { 2971 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2972 2973 MDI_PHCI_LOCK(ph); 2974 if (ph->ph_path_head == NULL) { 2975 ph->ph_path_head = pip; 2976 } else { 2977 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2978 } 2979 ph->ph_path_tail = pip; 2980 ph->ph_path_count++; 2981 MDI_PHCI_UNLOCK(ph); 2982 } 2983 2984 /* 2985 * i_mdi_client_add_path(): 2986 * Add mdi_pathinfo node to client list 2987 */ 2988 static void 2989 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2990 { 2991 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2992 2993 MDI_CLIENT_LOCK(ct); 2994 if (ct->ct_path_head == NULL) { 2995 ct->ct_path_head = pip; 2996 } else { 2997 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2998 } 2999 ct->ct_path_tail = pip; 3000 ct->ct_path_count++; 3001 MDI_CLIENT_UNLOCK(ct); 3002 } 3003 3004 /* 3005 * mdi_pi_free(): 3006 * Free the mdi_pathinfo node and also client device node if this 3007 * is the last path to the device 3008 * Return Values: 3009 * MDI_SUCCESS 3010 * MDI_FAILURE 3011 * MDI_BUSY 3012 */ 3013 /*ARGSUSED*/ 3014 int 3015 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3016 { 3017 int rv = MDI_FAILURE; 3018 mdi_vhci_t *vh; 3019 mdi_phci_t *ph; 3020 mdi_client_t *ct; 3021 int (*f)(); 3022 int client_held = 0; 3023 3024 MDI_PI_LOCK(pip); 3025 ph = MDI_PI(pip)->pi_phci; 3026 ASSERT(ph != NULL); 3027 if (ph == NULL) { 3028 /* 3029 * Invalid pHCI device, return failure 3030 */ 3031 MDI_DEBUG(1, (CE_WARN, NULL, 3032 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3033 MDI_PI_UNLOCK(pip); 3034 return (MDI_FAILURE); 3035 } 3036 3037 vh = ph->ph_vhci; 3038 ASSERT(vh != NULL); 3039 if (vh == NULL) { 3040 /* Invalid pHCI device, return failure */ 3041 MDI_DEBUG(1, (CE_WARN, NULL, 3042 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3043 MDI_PI_UNLOCK(pip); 3044 return (MDI_FAILURE); 3045 } 3046 3047 ct = MDI_PI(pip)->pi_client; 3048 ASSERT(ct != NULL); 3049 if (ct == NULL) { 3050 /* 3051 * Invalid Client device, return failure 3052 */ 3053 MDI_DEBUG(1, (CE_WARN, NULL, 3054 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3055 MDI_PI_UNLOCK(pip); 3056 return (MDI_FAILURE); 3057 } 3058 3059 /* 3060 * Check to see for busy condition. A mdi_pathinfo can only be freed 3061 * if the node state is either offline or init and the reference count 3062 * is zero. 3063 */ 3064 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3065 MDI_PI_IS_INITING(pip))) { 3066 /* 3067 * Node is busy 3068 */ 3069 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3070 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3071 MDI_PI_UNLOCK(pip); 3072 return (MDI_BUSY); 3073 } 3074 3075 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3076 /* 3077 * Give a chance for pending I/Os to complete. 3078 */ 3079 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3080 "%d cmds still pending on path: %p\n", 3081 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3082 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3083 &MDI_PI(pip)->pi_mutex, 3084 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3085 /* 3086 * The timeout time reached without ref_cnt being zero 3087 * being signaled. 3088 */ 3089 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3090 "!mdi_pi_free: " 3091 "Timeout reached on path %p without the cond\n", 3092 (void *)pip)); 3093 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3094 "!mdi_pi_free: " 3095 "%d cmds still pending on path: %p\n", 3096 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3097 MDI_PI_UNLOCK(pip); 3098 return (MDI_BUSY); 3099 } 3100 } 3101 if (MDI_PI(pip)->pi_pm_held) { 3102 client_held = 1; 3103 } 3104 MDI_PI_UNLOCK(pip); 3105 3106 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3107 3108 MDI_CLIENT_LOCK(ct); 3109 3110 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3111 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3112 3113 /* 3114 * Wait till failover is complete before removing this node. 3115 */ 3116 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3117 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3118 3119 MDI_CLIENT_UNLOCK(ct); 3120 MDI_VHCI_CLIENT_LOCK(vh); 3121 MDI_CLIENT_LOCK(ct); 3122 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3123 3124 if (!MDI_PI_IS_INITING(pip)) { 3125 f = vh->vh_ops->vo_pi_uninit; 3126 if (f != NULL) { 3127 rv = (*f)(vh->vh_dip, pip, 0); 3128 } 3129 } 3130 /* 3131 * If vo_pi_uninit() completed successfully. 3132 */ 3133 if (rv == MDI_SUCCESS) { 3134 if (client_held) { 3135 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3136 "i_mdi_pm_rele_client\n")); 3137 i_mdi_pm_rele_client(ct, 1); 3138 } 3139 i_mdi_pi_free(ph, pip, ct); 3140 if (ct->ct_path_count == 0) { 3141 /* 3142 * Client lost its last path. 3143 * Clean up the client device 3144 */ 3145 MDI_CLIENT_UNLOCK(ct); 3146 (void) i_mdi_client_free(ct->ct_vhci, ct); 3147 MDI_VHCI_CLIENT_UNLOCK(vh); 3148 return (rv); 3149 } 3150 } 3151 MDI_CLIENT_UNLOCK(ct); 3152 MDI_VHCI_CLIENT_UNLOCK(vh); 3153 3154 if (rv == MDI_FAILURE) 3155 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3156 3157 return (rv); 3158 } 3159 3160 /* 3161 * i_mdi_pi_free(): 3162 * Free the mdi_pathinfo node 3163 */ 3164 static void 3165 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3166 { 3167 int ct_circular; 3168 int ph_circular; 3169 3170 ASSERT(MDI_CLIENT_LOCKED(ct)); 3171 3172 /* 3173 * remove any per-path kstats 3174 */ 3175 i_mdi_pi_kstat_destroy(pip); 3176 3177 /* See comments in i_mdi_pi_alloc() */ 3178 ndi_devi_enter(ct->ct_dip, &ct_circular); 3179 ndi_devi_enter(ph->ph_dip, &ph_circular); 3180 3181 i_mdi_client_remove_path(ct, pip); 3182 i_mdi_phci_remove_path(ph, pip); 3183 3184 ndi_devi_exit(ph->ph_dip, ph_circular); 3185 ndi_devi_exit(ct->ct_dip, ct_circular); 3186 3187 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3188 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3189 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3190 if (MDI_PI(pip)->pi_addr) { 3191 kmem_free(MDI_PI(pip)->pi_addr, 3192 strlen(MDI_PI(pip)->pi_addr) + 1); 3193 MDI_PI(pip)->pi_addr = NULL; 3194 } 3195 3196 if (MDI_PI(pip)->pi_prop) { 3197 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3198 MDI_PI(pip)->pi_prop = NULL; 3199 } 3200 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3201 } 3202 3203 3204 /* 3205 * i_mdi_phci_remove_path(): 3206 * Remove a mdi_pathinfo node from pHCI list. 3207 * Notes: 3208 * Caller should hold per-pHCI mutex 3209 */ 3210 static void 3211 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3212 { 3213 mdi_pathinfo_t *prev = NULL; 3214 mdi_pathinfo_t *path = NULL; 3215 3216 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3217 3218 MDI_PHCI_LOCK(ph); 3219 path = ph->ph_path_head; 3220 while (path != NULL) { 3221 if (path == pip) { 3222 break; 3223 } 3224 prev = path; 3225 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3226 } 3227 3228 if (path) { 3229 ph->ph_path_count--; 3230 if (prev) { 3231 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3232 } else { 3233 ph->ph_path_head = 3234 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3235 } 3236 if (ph->ph_path_tail == path) { 3237 ph->ph_path_tail = prev; 3238 } 3239 } 3240 3241 /* 3242 * Clear the pHCI link 3243 */ 3244 MDI_PI(pip)->pi_phci_link = NULL; 3245 MDI_PI(pip)->pi_phci = NULL; 3246 MDI_PHCI_UNLOCK(ph); 3247 } 3248 3249 /* 3250 * i_mdi_client_remove_path(): 3251 * Remove a mdi_pathinfo node from client path list. 3252 */ 3253 static void 3254 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3255 { 3256 mdi_pathinfo_t *prev = NULL; 3257 mdi_pathinfo_t *path; 3258 3259 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3260 3261 ASSERT(MDI_CLIENT_LOCKED(ct)); 3262 path = ct->ct_path_head; 3263 while (path != NULL) { 3264 if (path == pip) { 3265 break; 3266 } 3267 prev = path; 3268 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3269 } 3270 3271 if (path) { 3272 ct->ct_path_count--; 3273 if (prev) { 3274 MDI_PI(prev)->pi_client_link = 3275 MDI_PI(path)->pi_client_link; 3276 } else { 3277 ct->ct_path_head = 3278 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3279 } 3280 if (ct->ct_path_tail == path) { 3281 ct->ct_path_tail = prev; 3282 } 3283 if (ct->ct_path_last == path) { 3284 ct->ct_path_last = ct->ct_path_head; 3285 } 3286 } 3287 MDI_PI(pip)->pi_client_link = NULL; 3288 MDI_PI(pip)->pi_client = NULL; 3289 } 3290 3291 /* 3292 * i_mdi_pi_state_change(): 3293 * online a mdi_pathinfo node 3294 * 3295 * Return Values: 3296 * MDI_SUCCESS 3297 * MDI_FAILURE 3298 */ 3299 /*ARGSUSED*/ 3300 static int 3301 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3302 { 3303 int rv = MDI_SUCCESS; 3304 mdi_vhci_t *vh; 3305 mdi_phci_t *ph; 3306 mdi_client_t *ct; 3307 int (*f)(); 3308 dev_info_t *cdip; 3309 3310 MDI_PI_LOCK(pip); 3311 3312 ph = MDI_PI(pip)->pi_phci; 3313 ASSERT(ph); 3314 if (ph == NULL) { 3315 /* 3316 * Invalid pHCI device, fail the request 3317 */ 3318 MDI_PI_UNLOCK(pip); 3319 MDI_DEBUG(1, (CE_WARN, NULL, 3320 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3321 return (MDI_FAILURE); 3322 } 3323 3324 vh = ph->ph_vhci; 3325 ASSERT(vh); 3326 if (vh == NULL) { 3327 /* 3328 * Invalid vHCI device, fail the request 3329 */ 3330 MDI_PI_UNLOCK(pip); 3331 MDI_DEBUG(1, (CE_WARN, NULL, 3332 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3333 return (MDI_FAILURE); 3334 } 3335 3336 ct = MDI_PI(pip)->pi_client; 3337 ASSERT(ct != NULL); 3338 if (ct == NULL) { 3339 /* 3340 * Invalid client device, fail the request 3341 */ 3342 MDI_PI_UNLOCK(pip); 3343 MDI_DEBUG(1, (CE_WARN, NULL, 3344 "!mdi_pi_state_change: invalid client pip=%p", 3345 (void *)pip)); 3346 return (MDI_FAILURE); 3347 } 3348 3349 /* 3350 * If this path has not been initialized yet, Callback vHCI driver's 3351 * pathinfo node initialize entry point 3352 */ 3353 3354 if (MDI_PI_IS_INITING(pip)) { 3355 MDI_PI_UNLOCK(pip); 3356 f = vh->vh_ops->vo_pi_init; 3357 if (f != NULL) { 3358 rv = (*f)(vh->vh_dip, pip, 0); 3359 if (rv != MDI_SUCCESS) { 3360 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3361 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3362 (void *)vh, (void *)pip)); 3363 return (MDI_FAILURE); 3364 } 3365 } 3366 MDI_PI_LOCK(pip); 3367 MDI_PI_CLEAR_TRANSIENT(pip); 3368 } 3369 3370 /* 3371 * Do not allow state transition when pHCI is in offline/suspended 3372 * states 3373 */ 3374 i_mdi_phci_lock(ph, pip); 3375 if (MDI_PHCI_IS_READY(ph) == 0) { 3376 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3377 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3378 (void *)ph)); 3379 MDI_PI_UNLOCK(pip); 3380 i_mdi_phci_unlock(ph); 3381 return (MDI_BUSY); 3382 } 3383 MDI_PHCI_UNSTABLE(ph); 3384 i_mdi_phci_unlock(ph); 3385 3386 /* 3387 * Check if mdi_pathinfo state is in transient state. 3388 * If yes, offlining is in progress and wait till transient state is 3389 * cleared. 3390 */ 3391 if (MDI_PI_IS_TRANSIENT(pip)) { 3392 while (MDI_PI_IS_TRANSIENT(pip)) { 3393 cv_wait(&MDI_PI(pip)->pi_state_cv, 3394 &MDI_PI(pip)->pi_mutex); 3395 } 3396 } 3397 3398 /* 3399 * Grab the client lock in reverse order sequence and release the 3400 * mdi_pathinfo mutex. 3401 */ 3402 i_mdi_client_lock(ct, pip); 3403 MDI_PI_UNLOCK(pip); 3404 3405 /* 3406 * Wait till failover state is cleared 3407 */ 3408 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3409 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3410 3411 /* 3412 * Mark the mdi_pathinfo node state as transient 3413 */ 3414 MDI_PI_LOCK(pip); 3415 switch (state) { 3416 case MDI_PATHINFO_STATE_ONLINE: 3417 MDI_PI_SET_ONLINING(pip); 3418 break; 3419 3420 case MDI_PATHINFO_STATE_STANDBY: 3421 MDI_PI_SET_STANDBYING(pip); 3422 break; 3423 3424 case MDI_PATHINFO_STATE_FAULT: 3425 /* 3426 * Mark the pathinfo state as FAULTED 3427 */ 3428 MDI_PI_SET_FAULTING(pip); 3429 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3430 break; 3431 3432 case MDI_PATHINFO_STATE_OFFLINE: 3433 /* 3434 * ndi_devi_offline() cannot hold pip or ct locks. 3435 */ 3436 MDI_PI_UNLOCK(pip); 3437 /* 3438 * Don't offline the client dev_info node unless we have 3439 * no available paths left at all. 3440 */ 3441 cdip = ct->ct_dip; 3442 if ((flag & NDI_DEVI_REMOVE) && 3443 (ct->ct_path_count == 1)) { 3444 i_mdi_client_unlock(ct); 3445 rv = ndi_devi_offline(cdip, 0); 3446 if (rv != NDI_SUCCESS) { 3447 /* 3448 * Convert to MDI error code 3449 */ 3450 switch (rv) { 3451 case NDI_BUSY: 3452 rv = MDI_BUSY; 3453 break; 3454 default: 3455 rv = MDI_FAILURE; 3456 break; 3457 } 3458 goto state_change_exit; 3459 } else { 3460 i_mdi_client_lock(ct, NULL); 3461 } 3462 } 3463 /* 3464 * Mark the mdi_pathinfo node state as transient 3465 */ 3466 MDI_PI_LOCK(pip); 3467 MDI_PI_SET_OFFLINING(pip); 3468 break; 3469 } 3470 MDI_PI_UNLOCK(pip); 3471 MDI_CLIENT_UNSTABLE(ct); 3472 i_mdi_client_unlock(ct); 3473 3474 f = vh->vh_ops->vo_pi_state_change; 3475 if (f != NULL) 3476 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3477 3478 MDI_CLIENT_LOCK(ct); 3479 MDI_PI_LOCK(pip); 3480 if (rv == MDI_NOT_SUPPORTED) { 3481 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3482 } 3483 if (rv != MDI_SUCCESS) { 3484 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3485 "!vo_pi_state_change: failed rv = %x", rv)); 3486 } 3487 if (MDI_PI_IS_TRANSIENT(pip)) { 3488 if (rv == MDI_SUCCESS) { 3489 MDI_PI_CLEAR_TRANSIENT(pip); 3490 } else { 3491 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3492 } 3493 } 3494 3495 /* 3496 * Wake anyone waiting for this mdi_pathinfo node 3497 */ 3498 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3499 MDI_PI_UNLOCK(pip); 3500 3501 /* 3502 * Mark the client device as stable 3503 */ 3504 MDI_CLIENT_STABLE(ct); 3505 if (rv == MDI_SUCCESS) { 3506 if (ct->ct_unstable == 0) { 3507 cdip = ct->ct_dip; 3508 3509 /* 3510 * Onlining the mdi_pathinfo node will impact the 3511 * client state Update the client and dev_info node 3512 * state accordingly 3513 */ 3514 rv = NDI_SUCCESS; 3515 i_mdi_client_update_state(ct); 3516 switch (MDI_CLIENT_STATE(ct)) { 3517 case MDI_CLIENT_STATE_OPTIMAL: 3518 case MDI_CLIENT_STATE_DEGRADED: 3519 if (cdip && !i_ddi_devi_attached(cdip) && 3520 ((state == MDI_PATHINFO_STATE_ONLINE) || 3521 (state == MDI_PATHINFO_STATE_STANDBY))) { 3522 3523 /* 3524 * Must do ndi_devi_online() through 3525 * hotplug thread for deferred 3526 * attach mechanism to work 3527 */ 3528 MDI_CLIENT_UNLOCK(ct); 3529 rv = ndi_devi_online(cdip, 0); 3530 MDI_CLIENT_LOCK(ct); 3531 if ((rv != NDI_SUCCESS) && 3532 (MDI_CLIENT_STATE(ct) == 3533 MDI_CLIENT_STATE_DEGRADED)) { 3534 /* 3535 * ndi_devi_online failed. 3536 * Reset client flags to 3537 * offline. 3538 */ 3539 MDI_DEBUG(1, (CE_WARN, cdip, 3540 "!ndi_devi_online: failed " 3541 " Error: %x", rv)); 3542 MDI_CLIENT_SET_OFFLINE(ct); 3543 } 3544 if (rv != NDI_SUCCESS) { 3545 /* Reset the path state */ 3546 MDI_PI_LOCK(pip); 3547 MDI_PI(pip)->pi_state = 3548 MDI_PI_OLD_STATE(pip); 3549 MDI_PI_UNLOCK(pip); 3550 } 3551 } 3552 break; 3553 3554 case MDI_CLIENT_STATE_FAILED: 3555 /* 3556 * This is the last path case for 3557 * non-user initiated events. 3558 */ 3559 if (((flag & NDI_DEVI_REMOVE) == 0) && 3560 cdip && (i_ddi_node_state(cdip) >= 3561 DS_INITIALIZED)) { 3562 MDI_CLIENT_UNLOCK(ct); 3563 rv = ndi_devi_offline(cdip, 0); 3564 MDI_CLIENT_LOCK(ct); 3565 3566 if (rv != NDI_SUCCESS) { 3567 /* 3568 * ndi_devi_offline failed. 3569 * Reset client flags to 3570 * online as the path could not 3571 * be offlined. 3572 */ 3573 MDI_DEBUG(1, (CE_WARN, cdip, 3574 "!ndi_devi_offline: failed " 3575 " Error: %x", rv)); 3576 MDI_CLIENT_SET_ONLINE(ct); 3577 } 3578 } 3579 break; 3580 } 3581 /* 3582 * Convert to MDI error code 3583 */ 3584 switch (rv) { 3585 case NDI_SUCCESS: 3586 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3587 i_mdi_report_path_state(ct, pip); 3588 rv = MDI_SUCCESS; 3589 break; 3590 case NDI_BUSY: 3591 rv = MDI_BUSY; 3592 break; 3593 default: 3594 rv = MDI_FAILURE; 3595 break; 3596 } 3597 } 3598 } 3599 MDI_CLIENT_UNLOCK(ct); 3600 3601 state_change_exit: 3602 /* 3603 * Mark the pHCI as stable again. 3604 */ 3605 MDI_PHCI_LOCK(ph); 3606 MDI_PHCI_STABLE(ph); 3607 MDI_PHCI_UNLOCK(ph); 3608 return (rv); 3609 } 3610 3611 /* 3612 * mdi_pi_online(): 3613 * Place the path_info node in the online state. The path is 3614 * now available to be selected by mdi_select_path() for 3615 * transporting I/O requests to client devices. 3616 * Return Values: 3617 * MDI_SUCCESS 3618 * MDI_FAILURE 3619 */ 3620 int 3621 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3622 { 3623 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3624 int client_held = 0; 3625 int rv; 3626 int se_flag; 3627 int kmem_flag; 3628 3629 ASSERT(ct != NULL); 3630 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3631 if (rv != MDI_SUCCESS) 3632 return (rv); 3633 3634 MDI_PI_LOCK(pip); 3635 if (MDI_PI(pip)->pi_pm_held == 0) { 3636 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3637 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3638 i_mdi_pm_hold_pip(pip); 3639 client_held = 1; 3640 } 3641 MDI_PI_UNLOCK(pip); 3642 3643 if (client_held) { 3644 MDI_CLIENT_LOCK(ct); 3645 if (ct->ct_power_cnt == 0) { 3646 rv = i_mdi_power_all_phci(ct); 3647 } 3648 3649 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3650 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3651 i_mdi_pm_hold_client(ct, 1); 3652 MDI_CLIENT_UNLOCK(ct); 3653 } 3654 3655 /* determine interrupt context */ 3656 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3657 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3658 3659 /* A new path is online. Invalidate DINFOCACHE snap shot. */ 3660 i_ddi_di_cache_invalidate(kmem_flag); 3661 3662 return (rv); 3663 } 3664 3665 /* 3666 * mdi_pi_standby(): 3667 * Place the mdi_pathinfo node in standby state 3668 * 3669 * Return Values: 3670 * MDI_SUCCESS 3671 * MDI_FAILURE 3672 */ 3673 int 3674 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3675 { 3676 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3677 } 3678 3679 /* 3680 * mdi_pi_fault(): 3681 * Place the mdi_pathinfo node in fault'ed state 3682 * Return Values: 3683 * MDI_SUCCESS 3684 * MDI_FAILURE 3685 */ 3686 int 3687 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3688 { 3689 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3690 } 3691 3692 /* 3693 * mdi_pi_offline(): 3694 * Offline a mdi_pathinfo node. 3695 * Return Values: 3696 * MDI_SUCCESS 3697 * MDI_FAILURE 3698 */ 3699 int 3700 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3701 { 3702 int ret, client_held = 0; 3703 mdi_client_t *ct; 3704 int se_flag; 3705 int kmem_flag; 3706 3707 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3708 3709 if (ret == MDI_SUCCESS) { 3710 MDI_PI_LOCK(pip); 3711 if (MDI_PI(pip)->pi_pm_held) { 3712 client_held = 1; 3713 } 3714 MDI_PI_UNLOCK(pip); 3715 3716 if (client_held) { 3717 ct = MDI_PI(pip)->pi_client; 3718 MDI_CLIENT_LOCK(ct); 3719 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3720 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3721 i_mdi_pm_rele_client(ct, 1); 3722 MDI_CLIENT_UNLOCK(ct); 3723 } 3724 3725 /* determine interrupt context */ 3726 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3727 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3728 3729 /* pathinfo is offlined. update DINFOCACHE. */ 3730 i_ddi_di_cache_invalidate(kmem_flag); 3731 } 3732 3733 return (ret); 3734 } 3735 3736 /* 3737 * i_mdi_pi_offline(): 3738 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3739 */ 3740 static int 3741 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3742 { 3743 dev_info_t *vdip = NULL; 3744 mdi_vhci_t *vh = NULL; 3745 mdi_client_t *ct = NULL; 3746 int (*f)(); 3747 int rv; 3748 3749 MDI_PI_LOCK(pip); 3750 ct = MDI_PI(pip)->pi_client; 3751 ASSERT(ct != NULL); 3752 3753 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3754 /* 3755 * Give a chance for pending I/Os to complete. 3756 */ 3757 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3758 "%d cmds still pending on path: %p\n", 3759 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3760 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3761 &MDI_PI(pip)->pi_mutex, 3762 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3763 /* 3764 * The timeout time reached without ref_cnt being zero 3765 * being signaled. 3766 */ 3767 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3768 "Timeout reached on path %p without the cond\n", 3769 (void *)pip)); 3770 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3771 "%d cmds still pending on path: %p\n", 3772 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3773 } 3774 } 3775 vh = ct->ct_vhci; 3776 vdip = vh->vh_dip; 3777 3778 /* 3779 * Notify vHCI that has registered this event 3780 */ 3781 ASSERT(vh->vh_ops); 3782 f = vh->vh_ops->vo_pi_state_change; 3783 3784 if (f != NULL) { 3785 MDI_PI_UNLOCK(pip); 3786 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3787 flags)) != MDI_SUCCESS) { 3788 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3789 "!vo_path_offline failed " 3790 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3791 } 3792 MDI_PI_LOCK(pip); 3793 } 3794 3795 /* 3796 * Set the mdi_pathinfo node state and clear the transient condition 3797 */ 3798 MDI_PI_SET_OFFLINE(pip); 3799 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3800 MDI_PI_UNLOCK(pip); 3801 3802 MDI_CLIENT_LOCK(ct); 3803 if (rv == MDI_SUCCESS) { 3804 if (ct->ct_unstable == 0) { 3805 dev_info_t *cdip = ct->ct_dip; 3806 3807 /* 3808 * Onlining the mdi_pathinfo node will impact the 3809 * client state Update the client and dev_info node 3810 * state accordingly 3811 */ 3812 i_mdi_client_update_state(ct); 3813 rv = NDI_SUCCESS; 3814 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3815 if (cdip && 3816 (i_ddi_node_state(cdip) >= 3817 DS_INITIALIZED)) { 3818 MDI_CLIENT_UNLOCK(ct); 3819 rv = ndi_devi_offline(cdip, 0); 3820 MDI_CLIENT_LOCK(ct); 3821 if (rv != NDI_SUCCESS) { 3822 /* 3823 * ndi_devi_offline failed. 3824 * Reset client flags to 3825 * online. 3826 */ 3827 MDI_DEBUG(4, (CE_WARN, cdip, 3828 "!ndi_devi_offline: failed " 3829 " Error: %x", rv)); 3830 MDI_CLIENT_SET_ONLINE(ct); 3831 } 3832 } 3833 } 3834 /* 3835 * Convert to MDI error code 3836 */ 3837 switch (rv) { 3838 case NDI_SUCCESS: 3839 rv = MDI_SUCCESS; 3840 break; 3841 case NDI_BUSY: 3842 rv = MDI_BUSY; 3843 break; 3844 default: 3845 rv = MDI_FAILURE; 3846 break; 3847 } 3848 } 3849 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3850 i_mdi_report_path_state(ct, pip); 3851 } 3852 3853 MDI_CLIENT_UNLOCK(ct); 3854 3855 /* 3856 * Change in the mdi_pathinfo node state will impact the client state 3857 */ 3858 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3859 (void *)ct, (void *)pip)); 3860 return (rv); 3861 } 3862 3863 3864 /* 3865 * mdi_pi_get_addr(): 3866 * Get the unit address associated with a mdi_pathinfo node 3867 * 3868 * Return Values: 3869 * char * 3870 */ 3871 char * 3872 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3873 { 3874 if (pip == NULL) 3875 return (NULL); 3876 3877 return (MDI_PI(pip)->pi_addr); 3878 } 3879 3880 /* 3881 * mdi_pi_get_path_instance(): 3882 * Get the 'path_instance' of a mdi_pathinfo node 3883 * 3884 * Return Values: 3885 * path_instance 3886 */ 3887 int 3888 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3889 { 3890 if (pip == NULL) 3891 return (0); 3892 3893 return (MDI_PI(pip)->pi_path_instance); 3894 } 3895 3896 /* 3897 * mdi_pi_pathname(): 3898 * Return pointer to path to pathinfo node. 3899 */ 3900 char * 3901 mdi_pi_pathname(mdi_pathinfo_t *pip) 3902 { 3903 if (pip == NULL) 3904 return (NULL); 3905 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3906 } 3907 3908 char * 3909 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 3910 { 3911 char *obp_path = NULL; 3912 if ((pip == NULL) || (path == NULL)) 3913 return (NULL); 3914 3915 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 3916 (void) strcpy(path, obp_path); 3917 (void) mdi_prop_free(obp_path); 3918 } else { 3919 path = NULL; 3920 } 3921 return (path); 3922 } 3923 3924 int 3925 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 3926 { 3927 dev_info_t *pdip; 3928 char *obp_path = NULL; 3929 int rc = MDI_FAILURE; 3930 3931 if (pip == NULL) 3932 return (MDI_FAILURE); 3933 3934 pdip = mdi_pi_get_phci(pip); 3935 if (pdip == NULL) 3936 return (MDI_FAILURE); 3937 3938 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 3939 3940 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 3941 (void) ddi_pathname(pdip, obp_path); 3942 } 3943 3944 if (component) { 3945 (void) strncat(obp_path, "/", MAXPATHLEN); 3946 (void) strncat(obp_path, component, MAXPATHLEN); 3947 } 3948 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 3949 3950 if (obp_path) 3951 kmem_free(obp_path, MAXPATHLEN); 3952 return (rc); 3953 } 3954 3955 /* 3956 * mdi_pi_get_client(): 3957 * Get the client devinfo associated with a mdi_pathinfo node 3958 * 3959 * Return Values: 3960 * Handle to client device dev_info node 3961 */ 3962 dev_info_t * 3963 mdi_pi_get_client(mdi_pathinfo_t *pip) 3964 { 3965 dev_info_t *dip = NULL; 3966 if (pip) { 3967 dip = MDI_PI(pip)->pi_client->ct_dip; 3968 } 3969 return (dip); 3970 } 3971 3972 /* 3973 * mdi_pi_get_phci(): 3974 * Get the pHCI devinfo associated with the mdi_pathinfo node 3975 * Return Values: 3976 * Handle to dev_info node 3977 */ 3978 dev_info_t * 3979 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3980 { 3981 dev_info_t *dip = NULL; 3982 if (pip) { 3983 dip = MDI_PI(pip)->pi_phci->ph_dip; 3984 } 3985 return (dip); 3986 } 3987 3988 /* 3989 * mdi_pi_get_client_private(): 3990 * Get the client private information associated with the 3991 * mdi_pathinfo node 3992 */ 3993 void * 3994 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3995 { 3996 void *cprivate = NULL; 3997 if (pip) { 3998 cprivate = MDI_PI(pip)->pi_cprivate; 3999 } 4000 return (cprivate); 4001 } 4002 4003 /* 4004 * mdi_pi_set_client_private(): 4005 * Set the client private information in the mdi_pathinfo node 4006 */ 4007 void 4008 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4009 { 4010 if (pip) { 4011 MDI_PI(pip)->pi_cprivate = priv; 4012 } 4013 } 4014 4015 /* 4016 * mdi_pi_get_phci_private(): 4017 * Get the pHCI private information associated with the 4018 * mdi_pathinfo node 4019 */ 4020 caddr_t 4021 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4022 { 4023 caddr_t pprivate = NULL; 4024 if (pip) { 4025 pprivate = MDI_PI(pip)->pi_pprivate; 4026 } 4027 return (pprivate); 4028 } 4029 4030 /* 4031 * mdi_pi_set_phci_private(): 4032 * Set the pHCI private information in the mdi_pathinfo node 4033 */ 4034 void 4035 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4036 { 4037 if (pip) { 4038 MDI_PI(pip)->pi_pprivate = priv; 4039 } 4040 } 4041 4042 /* 4043 * mdi_pi_get_state(): 4044 * Get the mdi_pathinfo node state. Transient states are internal 4045 * and not provided to the users 4046 */ 4047 mdi_pathinfo_state_t 4048 mdi_pi_get_state(mdi_pathinfo_t *pip) 4049 { 4050 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4051 4052 if (pip) { 4053 if (MDI_PI_IS_TRANSIENT(pip)) { 4054 /* 4055 * mdi_pathinfo is in state transition. Return the 4056 * last good state. 4057 */ 4058 state = MDI_PI_OLD_STATE(pip); 4059 } else { 4060 state = MDI_PI_STATE(pip); 4061 } 4062 } 4063 return (state); 4064 } 4065 4066 /* 4067 * Note that the following function needs to be the new interface for 4068 * mdi_pi_get_state when mpxio gets integrated to ON. 4069 */ 4070 int 4071 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4072 uint32_t *ext_state) 4073 { 4074 *state = MDI_PATHINFO_STATE_INIT; 4075 4076 if (pip) { 4077 if (MDI_PI_IS_TRANSIENT(pip)) { 4078 /* 4079 * mdi_pathinfo is in state transition. Return the 4080 * last good state. 4081 */ 4082 *state = MDI_PI_OLD_STATE(pip); 4083 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4084 } else { 4085 *state = MDI_PI_STATE(pip); 4086 *ext_state = MDI_PI_EXT_STATE(pip); 4087 } 4088 } 4089 return (MDI_SUCCESS); 4090 } 4091 4092 /* 4093 * mdi_pi_get_preferred: 4094 * Get the preferred path flag 4095 */ 4096 int 4097 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4098 { 4099 if (pip) { 4100 return (MDI_PI(pip)->pi_preferred); 4101 } 4102 return (0); 4103 } 4104 4105 /* 4106 * mdi_pi_set_preferred: 4107 * Set the preferred path flag 4108 */ 4109 void 4110 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4111 { 4112 if (pip) { 4113 MDI_PI(pip)->pi_preferred = preferred; 4114 } 4115 } 4116 4117 /* 4118 * mdi_pi_set_state(): 4119 * Set the mdi_pathinfo node state 4120 */ 4121 void 4122 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4123 { 4124 uint32_t ext_state; 4125 4126 if (pip) { 4127 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4128 MDI_PI(pip)->pi_state = state; 4129 MDI_PI(pip)->pi_state |= ext_state; 4130 } 4131 } 4132 4133 /* 4134 * Property functions: 4135 */ 4136 int 4137 i_map_nvlist_error_to_mdi(int val) 4138 { 4139 int rv; 4140 4141 switch (val) { 4142 case 0: 4143 rv = DDI_PROP_SUCCESS; 4144 break; 4145 case EINVAL: 4146 case ENOTSUP: 4147 rv = DDI_PROP_INVAL_ARG; 4148 break; 4149 case ENOMEM: 4150 rv = DDI_PROP_NO_MEMORY; 4151 break; 4152 default: 4153 rv = DDI_PROP_NOT_FOUND; 4154 break; 4155 } 4156 return (rv); 4157 } 4158 4159 /* 4160 * mdi_pi_get_next_prop(): 4161 * Property walk function. The caller should hold mdi_pi_lock() 4162 * and release by calling mdi_pi_unlock() at the end of walk to 4163 * get a consistent value. 4164 */ 4165 nvpair_t * 4166 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4167 { 4168 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4169 return (NULL); 4170 } 4171 ASSERT(MDI_PI_LOCKED(pip)); 4172 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4173 } 4174 4175 /* 4176 * mdi_prop_remove(): 4177 * Remove the named property from the named list. 4178 */ 4179 int 4180 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4181 { 4182 if (pip == NULL) { 4183 return (DDI_PROP_NOT_FOUND); 4184 } 4185 ASSERT(!MDI_PI_LOCKED(pip)); 4186 MDI_PI_LOCK(pip); 4187 if (MDI_PI(pip)->pi_prop == NULL) { 4188 MDI_PI_UNLOCK(pip); 4189 return (DDI_PROP_NOT_FOUND); 4190 } 4191 if (name) { 4192 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4193 } else { 4194 char nvp_name[MAXNAMELEN]; 4195 nvpair_t *nvp; 4196 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4197 while (nvp) { 4198 nvpair_t *next; 4199 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4200 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4201 nvpair_name(nvp)); 4202 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4203 nvp_name); 4204 nvp = next; 4205 } 4206 } 4207 MDI_PI_UNLOCK(pip); 4208 return (DDI_PROP_SUCCESS); 4209 } 4210 4211 /* 4212 * mdi_prop_size(): 4213 * Get buffer size needed to pack the property data. 4214 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4215 * buffer size. 4216 */ 4217 int 4218 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4219 { 4220 int rv; 4221 size_t bufsize; 4222 4223 *buflenp = 0; 4224 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4225 return (DDI_PROP_NOT_FOUND); 4226 } 4227 ASSERT(MDI_PI_LOCKED(pip)); 4228 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4229 &bufsize, NV_ENCODE_NATIVE); 4230 *buflenp = bufsize; 4231 return (i_map_nvlist_error_to_mdi(rv)); 4232 } 4233 4234 /* 4235 * mdi_prop_pack(): 4236 * pack the property list. The caller should hold the 4237 * mdi_pathinfo_t node to get a consistent data 4238 */ 4239 int 4240 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4241 { 4242 int rv; 4243 size_t bufsize; 4244 4245 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4246 return (DDI_PROP_NOT_FOUND); 4247 } 4248 4249 ASSERT(MDI_PI_LOCKED(pip)); 4250 4251 bufsize = buflen; 4252 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4253 NV_ENCODE_NATIVE, KM_SLEEP); 4254 4255 return (i_map_nvlist_error_to_mdi(rv)); 4256 } 4257 4258 /* 4259 * mdi_prop_update_byte(): 4260 * Create/Update a byte property 4261 */ 4262 int 4263 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4264 { 4265 int rv; 4266 4267 if (pip == NULL) { 4268 return (DDI_PROP_INVAL_ARG); 4269 } 4270 ASSERT(!MDI_PI_LOCKED(pip)); 4271 MDI_PI_LOCK(pip); 4272 if (MDI_PI(pip)->pi_prop == NULL) { 4273 MDI_PI_UNLOCK(pip); 4274 return (DDI_PROP_NOT_FOUND); 4275 } 4276 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4277 MDI_PI_UNLOCK(pip); 4278 return (i_map_nvlist_error_to_mdi(rv)); 4279 } 4280 4281 /* 4282 * mdi_prop_update_byte_array(): 4283 * Create/Update a byte array property 4284 */ 4285 int 4286 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4287 uint_t nelements) 4288 { 4289 int rv; 4290 4291 if (pip == NULL) { 4292 return (DDI_PROP_INVAL_ARG); 4293 } 4294 ASSERT(!MDI_PI_LOCKED(pip)); 4295 MDI_PI_LOCK(pip); 4296 if (MDI_PI(pip)->pi_prop == NULL) { 4297 MDI_PI_UNLOCK(pip); 4298 return (DDI_PROP_NOT_FOUND); 4299 } 4300 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4301 MDI_PI_UNLOCK(pip); 4302 return (i_map_nvlist_error_to_mdi(rv)); 4303 } 4304 4305 /* 4306 * mdi_prop_update_int(): 4307 * Create/Update a 32 bit integer property 4308 */ 4309 int 4310 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4311 { 4312 int rv; 4313 4314 if (pip == NULL) { 4315 return (DDI_PROP_INVAL_ARG); 4316 } 4317 ASSERT(!MDI_PI_LOCKED(pip)); 4318 MDI_PI_LOCK(pip); 4319 if (MDI_PI(pip)->pi_prop == NULL) { 4320 MDI_PI_UNLOCK(pip); 4321 return (DDI_PROP_NOT_FOUND); 4322 } 4323 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4324 MDI_PI_UNLOCK(pip); 4325 return (i_map_nvlist_error_to_mdi(rv)); 4326 } 4327 4328 /* 4329 * mdi_prop_update_int64(): 4330 * Create/Update a 64 bit integer property 4331 */ 4332 int 4333 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4334 { 4335 int rv; 4336 4337 if (pip == NULL) { 4338 return (DDI_PROP_INVAL_ARG); 4339 } 4340 ASSERT(!MDI_PI_LOCKED(pip)); 4341 MDI_PI_LOCK(pip); 4342 if (MDI_PI(pip)->pi_prop == NULL) { 4343 MDI_PI_UNLOCK(pip); 4344 return (DDI_PROP_NOT_FOUND); 4345 } 4346 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4347 MDI_PI_UNLOCK(pip); 4348 return (i_map_nvlist_error_to_mdi(rv)); 4349 } 4350 4351 /* 4352 * mdi_prop_update_int_array(): 4353 * Create/Update a int array property 4354 */ 4355 int 4356 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4357 uint_t nelements) 4358 { 4359 int rv; 4360 4361 if (pip == NULL) { 4362 return (DDI_PROP_INVAL_ARG); 4363 } 4364 ASSERT(!MDI_PI_LOCKED(pip)); 4365 MDI_PI_LOCK(pip); 4366 if (MDI_PI(pip)->pi_prop == NULL) { 4367 MDI_PI_UNLOCK(pip); 4368 return (DDI_PROP_NOT_FOUND); 4369 } 4370 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4371 nelements); 4372 MDI_PI_UNLOCK(pip); 4373 return (i_map_nvlist_error_to_mdi(rv)); 4374 } 4375 4376 /* 4377 * mdi_prop_update_string(): 4378 * Create/Update a string property 4379 */ 4380 int 4381 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4382 { 4383 int rv; 4384 4385 if (pip == NULL) { 4386 return (DDI_PROP_INVAL_ARG); 4387 } 4388 ASSERT(!MDI_PI_LOCKED(pip)); 4389 MDI_PI_LOCK(pip); 4390 if (MDI_PI(pip)->pi_prop == NULL) { 4391 MDI_PI_UNLOCK(pip); 4392 return (DDI_PROP_NOT_FOUND); 4393 } 4394 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4395 MDI_PI_UNLOCK(pip); 4396 return (i_map_nvlist_error_to_mdi(rv)); 4397 } 4398 4399 /* 4400 * mdi_prop_update_string_array(): 4401 * Create/Update a string array property 4402 */ 4403 int 4404 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4405 uint_t nelements) 4406 { 4407 int rv; 4408 4409 if (pip == NULL) { 4410 return (DDI_PROP_INVAL_ARG); 4411 } 4412 ASSERT(!MDI_PI_LOCKED(pip)); 4413 MDI_PI_LOCK(pip); 4414 if (MDI_PI(pip)->pi_prop == NULL) { 4415 MDI_PI_UNLOCK(pip); 4416 return (DDI_PROP_NOT_FOUND); 4417 } 4418 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4419 nelements); 4420 MDI_PI_UNLOCK(pip); 4421 return (i_map_nvlist_error_to_mdi(rv)); 4422 } 4423 4424 /* 4425 * mdi_prop_lookup_byte(): 4426 * Look for byte property identified by name. The data returned 4427 * is the actual property and valid as long as mdi_pathinfo_t node 4428 * is alive. 4429 */ 4430 int 4431 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4432 { 4433 int rv; 4434 4435 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4436 return (DDI_PROP_NOT_FOUND); 4437 } 4438 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4439 return (i_map_nvlist_error_to_mdi(rv)); 4440 } 4441 4442 4443 /* 4444 * mdi_prop_lookup_byte_array(): 4445 * Look for byte array property identified by name. The data 4446 * returned is the actual property and valid as long as 4447 * mdi_pathinfo_t node is alive. 4448 */ 4449 int 4450 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4451 uint_t *nelements) 4452 { 4453 int rv; 4454 4455 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4456 return (DDI_PROP_NOT_FOUND); 4457 } 4458 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4459 nelements); 4460 return (i_map_nvlist_error_to_mdi(rv)); 4461 } 4462 4463 /* 4464 * mdi_prop_lookup_int(): 4465 * Look for int property identified by name. The data returned 4466 * is the actual property and valid as long as mdi_pathinfo_t 4467 * node is alive. 4468 */ 4469 int 4470 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4471 { 4472 int rv; 4473 4474 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4475 return (DDI_PROP_NOT_FOUND); 4476 } 4477 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4478 return (i_map_nvlist_error_to_mdi(rv)); 4479 } 4480 4481 /* 4482 * mdi_prop_lookup_int64(): 4483 * Look for int64 property identified by name. The data returned 4484 * is the actual property and valid as long as mdi_pathinfo_t node 4485 * is alive. 4486 */ 4487 int 4488 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4489 { 4490 int rv; 4491 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4492 return (DDI_PROP_NOT_FOUND); 4493 } 4494 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4495 return (i_map_nvlist_error_to_mdi(rv)); 4496 } 4497 4498 /* 4499 * mdi_prop_lookup_int_array(): 4500 * Look for int array property identified by name. The data 4501 * returned is the actual property and valid as long as 4502 * mdi_pathinfo_t node is alive. 4503 */ 4504 int 4505 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4506 uint_t *nelements) 4507 { 4508 int rv; 4509 4510 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4511 return (DDI_PROP_NOT_FOUND); 4512 } 4513 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4514 (int32_t **)data, nelements); 4515 return (i_map_nvlist_error_to_mdi(rv)); 4516 } 4517 4518 /* 4519 * mdi_prop_lookup_string(): 4520 * Look for string property identified by name. The data 4521 * returned is the actual property and valid as long as 4522 * mdi_pathinfo_t node is alive. 4523 */ 4524 int 4525 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4526 { 4527 int rv; 4528 4529 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4530 return (DDI_PROP_NOT_FOUND); 4531 } 4532 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4533 return (i_map_nvlist_error_to_mdi(rv)); 4534 } 4535 4536 /* 4537 * mdi_prop_lookup_string_array(): 4538 * Look for string array property identified by name. The data 4539 * returned is the actual property and valid as long as 4540 * mdi_pathinfo_t node is alive. 4541 */ 4542 int 4543 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4544 uint_t *nelements) 4545 { 4546 int rv; 4547 4548 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4549 return (DDI_PROP_NOT_FOUND); 4550 } 4551 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4552 nelements); 4553 return (i_map_nvlist_error_to_mdi(rv)); 4554 } 4555 4556 /* 4557 * mdi_prop_free(): 4558 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4559 * functions return the pointer to actual property data and not a 4560 * copy of it. So the data returned is valid as long as 4561 * mdi_pathinfo_t node is valid. 4562 */ 4563 /*ARGSUSED*/ 4564 int 4565 mdi_prop_free(void *data) 4566 { 4567 return (DDI_PROP_SUCCESS); 4568 } 4569 4570 /*ARGSUSED*/ 4571 static void 4572 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4573 { 4574 char *phci_path, *ct_path; 4575 char *ct_status; 4576 char *status; 4577 dev_info_t *dip = ct->ct_dip; 4578 char lb_buf[64]; 4579 4580 ASSERT(MDI_CLIENT_LOCKED(ct)); 4581 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4582 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4583 return; 4584 } 4585 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4586 ct_status = "optimal"; 4587 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4588 ct_status = "degraded"; 4589 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4590 ct_status = "failed"; 4591 } else { 4592 ct_status = "unknown"; 4593 } 4594 4595 if (MDI_PI_IS_OFFLINE(pip)) { 4596 status = "offline"; 4597 } else if (MDI_PI_IS_ONLINE(pip)) { 4598 status = "online"; 4599 } else if (MDI_PI_IS_STANDBY(pip)) { 4600 status = "standby"; 4601 } else if (MDI_PI_IS_FAULT(pip)) { 4602 status = "faulted"; 4603 } else { 4604 status = "unknown"; 4605 } 4606 4607 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4608 (void) snprintf(lb_buf, sizeof (lb_buf), 4609 "%s, region-size: %d", mdi_load_balance_lba, 4610 ct->ct_lb_args->region_size); 4611 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4612 (void) snprintf(lb_buf, sizeof (lb_buf), 4613 "%s", mdi_load_balance_none); 4614 } else { 4615 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4616 mdi_load_balance_rr); 4617 } 4618 4619 if (dip) { 4620 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4621 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4622 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4623 "path %s (%s%d) to target address: %s is %s" 4624 " Load balancing: %s\n", 4625 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4626 ddi_get_instance(dip), ct_status, 4627 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4628 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4629 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4630 MDI_PI(pip)->pi_addr, status, lb_buf); 4631 kmem_free(phci_path, MAXPATHLEN); 4632 kmem_free(ct_path, MAXPATHLEN); 4633 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4634 } 4635 } 4636 4637 #ifdef DEBUG 4638 /* 4639 * i_mdi_log(): 4640 * Utility function for error message management 4641 * 4642 */ 4643 /*PRINTFLIKE3*/ 4644 static void 4645 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4646 { 4647 char name[MAXNAMELEN]; 4648 char buf[MAXNAMELEN]; 4649 char *bp; 4650 va_list ap; 4651 int log_only = 0; 4652 int boot_only = 0; 4653 int console_only = 0; 4654 4655 if (dip) { 4656 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4657 ddi_node_name(dip), ddi_get_instance(dip)); 4658 } else { 4659 name[0] = 0; 4660 } 4661 4662 va_start(ap, fmt); 4663 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4664 va_end(ap); 4665 4666 switch (buf[0]) { 4667 case '!': 4668 bp = &buf[1]; 4669 log_only = 1; 4670 break; 4671 case '?': 4672 bp = &buf[1]; 4673 boot_only = 1; 4674 break; 4675 case '^': 4676 bp = &buf[1]; 4677 console_only = 1; 4678 break; 4679 default: 4680 bp = buf; 4681 break; 4682 } 4683 if (mdi_debug_logonly) { 4684 log_only = 1; 4685 boot_only = 0; 4686 console_only = 0; 4687 } 4688 4689 switch (level) { 4690 case CE_NOTE: 4691 level = CE_CONT; 4692 /* FALLTHROUGH */ 4693 case CE_CONT: 4694 case CE_WARN: 4695 case CE_PANIC: 4696 if (boot_only) { 4697 cmn_err(level, "?mdi: %s%s", name, bp); 4698 } else if (console_only) { 4699 cmn_err(level, "^mdi: %s%s", name, bp); 4700 } else if (log_only) { 4701 cmn_err(level, "!mdi: %s%s", name, bp); 4702 } else { 4703 cmn_err(level, "mdi: %s%s", name, bp); 4704 } 4705 break; 4706 default: 4707 cmn_err(level, "mdi: %s%s", name, bp); 4708 break; 4709 } 4710 } 4711 #endif /* DEBUG */ 4712 4713 void 4714 i_mdi_client_online(dev_info_t *ct_dip) 4715 { 4716 mdi_client_t *ct; 4717 4718 /* 4719 * Client online notification. Mark client state as online 4720 * restore our binding with dev_info node 4721 */ 4722 ct = i_devi_get_client(ct_dip); 4723 ASSERT(ct != NULL); 4724 MDI_CLIENT_LOCK(ct); 4725 MDI_CLIENT_SET_ONLINE(ct); 4726 /* catch for any memory leaks */ 4727 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4728 ct->ct_dip = ct_dip; 4729 4730 if (ct->ct_power_cnt == 0) 4731 (void) i_mdi_power_all_phci(ct); 4732 4733 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4734 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4735 i_mdi_pm_hold_client(ct, 1); 4736 4737 MDI_CLIENT_UNLOCK(ct); 4738 } 4739 4740 void 4741 i_mdi_phci_online(dev_info_t *ph_dip) 4742 { 4743 mdi_phci_t *ph; 4744 4745 /* pHCI online notification. Mark state accordingly */ 4746 ph = i_devi_get_phci(ph_dip); 4747 ASSERT(ph != NULL); 4748 MDI_PHCI_LOCK(ph); 4749 MDI_PHCI_SET_ONLINE(ph); 4750 MDI_PHCI_UNLOCK(ph); 4751 } 4752 4753 /* 4754 * mdi_devi_online(): 4755 * Online notification from NDI framework on pHCI/client 4756 * device online. 4757 * Return Values: 4758 * NDI_SUCCESS 4759 * MDI_FAILURE 4760 */ 4761 /*ARGSUSED*/ 4762 int 4763 mdi_devi_online(dev_info_t *dip, uint_t flags) 4764 { 4765 if (MDI_PHCI(dip)) { 4766 i_mdi_phci_online(dip); 4767 } 4768 4769 if (MDI_CLIENT(dip)) { 4770 i_mdi_client_online(dip); 4771 } 4772 return (NDI_SUCCESS); 4773 } 4774 4775 /* 4776 * mdi_devi_offline(): 4777 * Offline notification from NDI framework on pHCI/Client device 4778 * offline. 4779 * 4780 * Return Values: 4781 * NDI_SUCCESS 4782 * NDI_FAILURE 4783 */ 4784 /*ARGSUSED*/ 4785 int 4786 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4787 { 4788 int rv = NDI_SUCCESS; 4789 4790 if (MDI_CLIENT(dip)) { 4791 rv = i_mdi_client_offline(dip, flags); 4792 if (rv != NDI_SUCCESS) 4793 return (rv); 4794 } 4795 4796 if (MDI_PHCI(dip)) { 4797 rv = i_mdi_phci_offline(dip, flags); 4798 4799 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4800 /* set client back online */ 4801 i_mdi_client_online(dip); 4802 } 4803 } 4804 4805 return (rv); 4806 } 4807 4808 /*ARGSUSED*/ 4809 static int 4810 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4811 { 4812 int rv = NDI_SUCCESS; 4813 mdi_phci_t *ph; 4814 mdi_client_t *ct; 4815 mdi_pathinfo_t *pip; 4816 mdi_pathinfo_t *next; 4817 mdi_pathinfo_t *failed_pip = NULL; 4818 dev_info_t *cdip; 4819 4820 /* 4821 * pHCI component offline notification 4822 * Make sure that this pHCI instance is free to be offlined. 4823 * If it is OK to proceed, Offline and remove all the child 4824 * mdi_pathinfo nodes. This process automatically offlines 4825 * corresponding client devices, for which this pHCI provides 4826 * critical services. 4827 */ 4828 ph = i_devi_get_phci(dip); 4829 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4830 (void *)dip, (void *)ph)); 4831 if (ph == NULL) { 4832 return (rv); 4833 } 4834 4835 MDI_PHCI_LOCK(ph); 4836 4837 if (MDI_PHCI_IS_OFFLINE(ph)) { 4838 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4839 (void *)ph)); 4840 MDI_PHCI_UNLOCK(ph); 4841 return (NDI_SUCCESS); 4842 } 4843 4844 /* 4845 * Check to see if the pHCI can be offlined 4846 */ 4847 if (ph->ph_unstable) { 4848 MDI_DEBUG(1, (CE_WARN, dip, 4849 "!One or more target devices are in transient " 4850 "state. This device can not be removed at " 4851 "this moment. Please try again later.")); 4852 MDI_PHCI_UNLOCK(ph); 4853 return (NDI_BUSY); 4854 } 4855 4856 pip = ph->ph_path_head; 4857 while (pip != NULL) { 4858 MDI_PI_LOCK(pip); 4859 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4860 4861 /* 4862 * The mdi_pathinfo state is OK. Check the client state. 4863 * If failover in progress fail the pHCI from offlining 4864 */ 4865 ct = MDI_PI(pip)->pi_client; 4866 i_mdi_client_lock(ct, pip); 4867 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4868 (ct->ct_unstable)) { 4869 /* 4870 * Failover is in progress, Fail the DR 4871 */ 4872 MDI_DEBUG(1, (CE_WARN, dip, 4873 "!pHCI device (%s%d) is Busy. %s", 4874 ddi_driver_name(dip), ddi_get_instance(dip), 4875 "This device can not be removed at " 4876 "this moment. Please try again later.")); 4877 MDI_PI_UNLOCK(pip); 4878 i_mdi_client_unlock(ct); 4879 MDI_PHCI_UNLOCK(ph); 4880 return (NDI_BUSY); 4881 } 4882 MDI_PI_UNLOCK(pip); 4883 4884 /* 4885 * Check to see of we are removing the last path of this 4886 * client device... 4887 */ 4888 cdip = ct->ct_dip; 4889 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4890 (i_mdi_client_compute_state(ct, ph) == 4891 MDI_CLIENT_STATE_FAILED)) { 4892 i_mdi_client_unlock(ct); 4893 MDI_PHCI_UNLOCK(ph); 4894 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4895 /* 4896 * ndi_devi_offline() failed. 4897 * This pHCI provides the critical path 4898 * to one or more client devices. 4899 * Return busy. 4900 */ 4901 MDI_PHCI_LOCK(ph); 4902 MDI_DEBUG(1, (CE_WARN, dip, 4903 "!pHCI device (%s%d) is Busy. %s", 4904 ddi_driver_name(dip), ddi_get_instance(dip), 4905 "This device can not be removed at " 4906 "this moment. Please try again later.")); 4907 failed_pip = pip; 4908 break; 4909 } else { 4910 MDI_PHCI_LOCK(ph); 4911 pip = next; 4912 } 4913 } else { 4914 i_mdi_client_unlock(ct); 4915 pip = next; 4916 } 4917 } 4918 4919 if (failed_pip) { 4920 pip = ph->ph_path_head; 4921 while (pip != failed_pip) { 4922 MDI_PI_LOCK(pip); 4923 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4924 ct = MDI_PI(pip)->pi_client; 4925 i_mdi_client_lock(ct, pip); 4926 cdip = ct->ct_dip; 4927 switch (MDI_CLIENT_STATE(ct)) { 4928 case MDI_CLIENT_STATE_OPTIMAL: 4929 case MDI_CLIENT_STATE_DEGRADED: 4930 if (cdip) { 4931 MDI_PI_UNLOCK(pip); 4932 i_mdi_client_unlock(ct); 4933 MDI_PHCI_UNLOCK(ph); 4934 (void) ndi_devi_online(cdip, 0); 4935 MDI_PHCI_LOCK(ph); 4936 pip = next; 4937 continue; 4938 } 4939 break; 4940 4941 case MDI_CLIENT_STATE_FAILED: 4942 if (cdip) { 4943 MDI_PI_UNLOCK(pip); 4944 i_mdi_client_unlock(ct); 4945 MDI_PHCI_UNLOCK(ph); 4946 (void) ndi_devi_offline(cdip, 0); 4947 MDI_PHCI_LOCK(ph); 4948 pip = next; 4949 continue; 4950 } 4951 break; 4952 } 4953 MDI_PI_UNLOCK(pip); 4954 i_mdi_client_unlock(ct); 4955 pip = next; 4956 } 4957 MDI_PHCI_UNLOCK(ph); 4958 return (NDI_BUSY); 4959 } 4960 4961 /* 4962 * Mark the pHCI as offline 4963 */ 4964 MDI_PHCI_SET_OFFLINE(ph); 4965 4966 /* 4967 * Mark the child mdi_pathinfo nodes as transient 4968 */ 4969 pip = ph->ph_path_head; 4970 while (pip != NULL) { 4971 MDI_PI_LOCK(pip); 4972 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4973 MDI_PI_SET_OFFLINING(pip); 4974 MDI_PI_UNLOCK(pip); 4975 pip = next; 4976 } 4977 MDI_PHCI_UNLOCK(ph); 4978 /* 4979 * Give a chance for any pending commands to execute 4980 */ 4981 delay(1); 4982 MDI_PHCI_LOCK(ph); 4983 pip = ph->ph_path_head; 4984 while (pip != NULL) { 4985 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4986 (void) i_mdi_pi_offline(pip, flags); 4987 MDI_PI_LOCK(pip); 4988 ct = MDI_PI(pip)->pi_client; 4989 if (!MDI_PI_IS_OFFLINE(pip)) { 4990 MDI_DEBUG(1, (CE_WARN, dip, 4991 "!pHCI device (%s%d) is Busy. %s", 4992 ddi_driver_name(dip), ddi_get_instance(dip), 4993 "This device can not be removed at " 4994 "this moment. Please try again later.")); 4995 MDI_PI_UNLOCK(pip); 4996 MDI_PHCI_SET_ONLINE(ph); 4997 MDI_PHCI_UNLOCK(ph); 4998 return (NDI_BUSY); 4999 } 5000 MDI_PI_UNLOCK(pip); 5001 pip = next; 5002 } 5003 MDI_PHCI_UNLOCK(ph); 5004 5005 return (rv); 5006 } 5007 5008 void 5009 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5010 { 5011 mdi_phci_t *ph; 5012 mdi_client_t *ct; 5013 mdi_pathinfo_t *pip; 5014 mdi_pathinfo_t *next; 5015 dev_info_t *cdip; 5016 5017 if (!MDI_PHCI(dip)) 5018 return; 5019 5020 ph = i_devi_get_phci(dip); 5021 if (ph == NULL) { 5022 return; 5023 } 5024 5025 MDI_PHCI_LOCK(ph); 5026 5027 if (MDI_PHCI_IS_OFFLINE(ph)) { 5028 /* has no last path */ 5029 MDI_PHCI_UNLOCK(ph); 5030 return; 5031 } 5032 5033 pip = ph->ph_path_head; 5034 while (pip != NULL) { 5035 MDI_PI_LOCK(pip); 5036 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5037 5038 ct = MDI_PI(pip)->pi_client; 5039 i_mdi_client_lock(ct, pip); 5040 MDI_PI_UNLOCK(pip); 5041 5042 cdip = ct->ct_dip; 5043 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5044 (i_mdi_client_compute_state(ct, ph) == 5045 MDI_CLIENT_STATE_FAILED)) { 5046 /* Last path. Mark client dip as retiring */ 5047 i_mdi_client_unlock(ct); 5048 MDI_PHCI_UNLOCK(ph); 5049 (void) e_ddi_mark_retiring(cdip, cons_array); 5050 MDI_PHCI_LOCK(ph); 5051 pip = next; 5052 } else { 5053 i_mdi_client_unlock(ct); 5054 pip = next; 5055 } 5056 } 5057 5058 MDI_PHCI_UNLOCK(ph); 5059 5060 return; 5061 } 5062 5063 void 5064 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5065 { 5066 mdi_phci_t *ph; 5067 mdi_client_t *ct; 5068 mdi_pathinfo_t *pip; 5069 mdi_pathinfo_t *next; 5070 dev_info_t *cdip; 5071 5072 if (!MDI_PHCI(dip)) 5073 return; 5074 5075 ph = i_devi_get_phci(dip); 5076 if (ph == NULL) 5077 return; 5078 5079 MDI_PHCI_LOCK(ph); 5080 5081 if (MDI_PHCI_IS_OFFLINE(ph)) { 5082 MDI_PHCI_UNLOCK(ph); 5083 /* not last path */ 5084 return; 5085 } 5086 5087 if (ph->ph_unstable) { 5088 MDI_PHCI_UNLOCK(ph); 5089 /* can't check for constraints */ 5090 *constraint = 0; 5091 return; 5092 } 5093 5094 pip = ph->ph_path_head; 5095 while (pip != NULL) { 5096 MDI_PI_LOCK(pip); 5097 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5098 5099 /* 5100 * The mdi_pathinfo state is OK. Check the client state. 5101 * If failover in progress fail the pHCI from offlining 5102 */ 5103 ct = MDI_PI(pip)->pi_client; 5104 i_mdi_client_lock(ct, pip); 5105 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5106 (ct->ct_unstable)) { 5107 /* 5108 * Failover is in progress, can't check for constraints 5109 */ 5110 MDI_PI_UNLOCK(pip); 5111 i_mdi_client_unlock(ct); 5112 MDI_PHCI_UNLOCK(ph); 5113 *constraint = 0; 5114 return; 5115 } 5116 MDI_PI_UNLOCK(pip); 5117 5118 /* 5119 * Check to see of we are retiring the last path of this 5120 * client device... 5121 */ 5122 cdip = ct->ct_dip; 5123 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5124 (i_mdi_client_compute_state(ct, ph) == 5125 MDI_CLIENT_STATE_FAILED)) { 5126 i_mdi_client_unlock(ct); 5127 MDI_PHCI_UNLOCK(ph); 5128 (void) e_ddi_retire_notify(cdip, constraint); 5129 MDI_PHCI_LOCK(ph); 5130 pip = next; 5131 } else { 5132 i_mdi_client_unlock(ct); 5133 pip = next; 5134 } 5135 } 5136 5137 MDI_PHCI_UNLOCK(ph); 5138 5139 return; 5140 } 5141 5142 /* 5143 * offline the path(s) hanging off the PHCI. If the 5144 * last path to any client, check that constraints 5145 * have been applied. 5146 */ 5147 void 5148 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5149 { 5150 mdi_phci_t *ph; 5151 mdi_client_t *ct; 5152 mdi_pathinfo_t *pip; 5153 mdi_pathinfo_t *next; 5154 dev_info_t *cdip; 5155 int unstable = 0; 5156 int constraint; 5157 5158 if (!MDI_PHCI(dip)) 5159 return; 5160 5161 ph = i_devi_get_phci(dip); 5162 if (ph == NULL) { 5163 /* no last path and no pips */ 5164 return; 5165 } 5166 5167 MDI_PHCI_LOCK(ph); 5168 5169 if (MDI_PHCI_IS_OFFLINE(ph)) { 5170 MDI_PHCI_UNLOCK(ph); 5171 /* no last path and no pips */ 5172 return; 5173 } 5174 5175 /* 5176 * Check to see if the pHCI can be offlined 5177 */ 5178 if (ph->ph_unstable) { 5179 unstable = 1; 5180 } 5181 5182 pip = ph->ph_path_head; 5183 while (pip != NULL) { 5184 MDI_PI_LOCK(pip); 5185 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5186 5187 /* 5188 * if failover in progress fail the pHCI from offlining 5189 */ 5190 ct = MDI_PI(pip)->pi_client; 5191 i_mdi_client_lock(ct, pip); 5192 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5193 (ct->ct_unstable)) { 5194 unstable = 1; 5195 } 5196 MDI_PI_UNLOCK(pip); 5197 5198 /* 5199 * Check to see of we are removing the last path of this 5200 * client device... 5201 */ 5202 cdip = ct->ct_dip; 5203 if (!phci_only && cdip && 5204 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5205 (i_mdi_client_compute_state(ct, ph) == 5206 MDI_CLIENT_STATE_FAILED)) { 5207 i_mdi_client_unlock(ct); 5208 MDI_PHCI_UNLOCK(ph); 5209 /* 5210 * We don't retire clients we just retire the 5211 * path to a client. If it is the last path 5212 * to a client, constraints are checked and 5213 * if we pass the last path is offlined. MPXIO will 5214 * then fail all I/Os to the client. Since we don't 5215 * want to retire the client on a path error 5216 * set constraint = 0 so that the client dip 5217 * is not retired. 5218 */ 5219 constraint = 0; 5220 (void) e_ddi_retire_finalize(cdip, &constraint); 5221 MDI_PHCI_LOCK(ph); 5222 pip = next; 5223 } else { 5224 i_mdi_client_unlock(ct); 5225 pip = next; 5226 } 5227 } 5228 5229 /* 5230 * Cannot offline pip(s) 5231 */ 5232 if (unstable) { 5233 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5234 "retire, dip = %p", (void *)dip); 5235 MDI_PHCI_UNLOCK(ph); 5236 return; 5237 } 5238 5239 /* 5240 * Mark the pHCI as offline 5241 */ 5242 MDI_PHCI_SET_OFFLINE(ph); 5243 5244 /* 5245 * Mark the child mdi_pathinfo nodes as transient 5246 */ 5247 pip = ph->ph_path_head; 5248 while (pip != NULL) { 5249 MDI_PI_LOCK(pip); 5250 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5251 MDI_PI_SET_OFFLINING(pip); 5252 MDI_PI_UNLOCK(pip); 5253 pip = next; 5254 } 5255 MDI_PHCI_UNLOCK(ph); 5256 /* 5257 * Give a chance for any pending commands to execute 5258 */ 5259 delay(1); 5260 MDI_PHCI_LOCK(ph); 5261 pip = ph->ph_path_head; 5262 while (pip != NULL) { 5263 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5264 (void) i_mdi_pi_offline(pip, 0); 5265 MDI_PI_LOCK(pip); 5266 ct = MDI_PI(pip)->pi_client; 5267 if (!MDI_PI_IS_OFFLINE(pip)) { 5268 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5269 "PHCI dip = %p", (void *)dip); 5270 MDI_PI_UNLOCK(pip); 5271 MDI_PHCI_SET_ONLINE(ph); 5272 MDI_PHCI_UNLOCK(ph); 5273 return; 5274 } 5275 MDI_PI_UNLOCK(pip); 5276 pip = next; 5277 } 5278 MDI_PHCI_UNLOCK(ph); 5279 5280 return; 5281 } 5282 5283 void 5284 mdi_phci_unretire(dev_info_t *dip) 5285 { 5286 ASSERT(MDI_PHCI(dip)); 5287 5288 /* 5289 * Online the phci 5290 */ 5291 i_mdi_phci_online(dip); 5292 } 5293 5294 /*ARGSUSED*/ 5295 static int 5296 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5297 { 5298 int rv = NDI_SUCCESS; 5299 mdi_client_t *ct; 5300 5301 /* 5302 * Client component to go offline. Make sure that we are 5303 * not in failing over state and update client state 5304 * accordingly 5305 */ 5306 ct = i_devi_get_client(dip); 5307 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5308 (void *)dip, (void *)ct)); 5309 if (ct != NULL) { 5310 MDI_CLIENT_LOCK(ct); 5311 if (ct->ct_unstable) { 5312 /* 5313 * One or more paths are in transient state, 5314 * Dont allow offline of a client device 5315 */ 5316 MDI_DEBUG(1, (CE_WARN, dip, 5317 "!One or more paths to this device is " 5318 "in transient state. This device can not " 5319 "be removed at this moment. " 5320 "Please try again later.")); 5321 MDI_CLIENT_UNLOCK(ct); 5322 return (NDI_BUSY); 5323 } 5324 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5325 /* 5326 * Failover is in progress, Dont allow DR of 5327 * a client device 5328 */ 5329 MDI_DEBUG(1, (CE_WARN, dip, 5330 "!Client device (%s%d) is Busy. %s", 5331 ddi_driver_name(dip), ddi_get_instance(dip), 5332 "This device can not be removed at " 5333 "this moment. Please try again later.")); 5334 MDI_CLIENT_UNLOCK(ct); 5335 return (NDI_BUSY); 5336 } 5337 MDI_CLIENT_SET_OFFLINE(ct); 5338 5339 /* 5340 * Unbind our relationship with the dev_info node 5341 */ 5342 if (flags & NDI_DEVI_REMOVE) { 5343 ct->ct_dip = NULL; 5344 } 5345 MDI_CLIENT_UNLOCK(ct); 5346 } 5347 return (rv); 5348 } 5349 5350 /* 5351 * mdi_pre_attach(): 5352 * Pre attach() notification handler 5353 */ 5354 /*ARGSUSED*/ 5355 int 5356 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5357 { 5358 /* don't support old DDI_PM_RESUME */ 5359 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5360 (cmd == DDI_PM_RESUME)) 5361 return (DDI_FAILURE); 5362 5363 return (DDI_SUCCESS); 5364 } 5365 5366 /* 5367 * mdi_post_attach(): 5368 * Post attach() notification handler 5369 */ 5370 /*ARGSUSED*/ 5371 void 5372 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5373 { 5374 mdi_phci_t *ph; 5375 mdi_client_t *ct; 5376 mdi_vhci_t *vh; 5377 5378 if (MDI_PHCI(dip)) { 5379 ph = i_devi_get_phci(dip); 5380 ASSERT(ph != NULL); 5381 5382 MDI_PHCI_LOCK(ph); 5383 switch (cmd) { 5384 case DDI_ATTACH: 5385 MDI_DEBUG(2, (CE_NOTE, dip, 5386 "!pHCI post_attach: called %p\n", (void *)ph)); 5387 if (error == DDI_SUCCESS) { 5388 MDI_PHCI_SET_ATTACH(ph); 5389 } else { 5390 MDI_DEBUG(1, (CE_NOTE, dip, 5391 "!pHCI post_attach: failed error=%d\n", 5392 error)); 5393 MDI_PHCI_SET_DETACH(ph); 5394 } 5395 break; 5396 5397 case DDI_RESUME: 5398 MDI_DEBUG(2, (CE_NOTE, dip, 5399 "!pHCI post_resume: called %p\n", (void *)ph)); 5400 if (error == DDI_SUCCESS) { 5401 MDI_PHCI_SET_RESUME(ph); 5402 } else { 5403 MDI_DEBUG(1, (CE_NOTE, dip, 5404 "!pHCI post_resume: failed error=%d\n", 5405 error)); 5406 MDI_PHCI_SET_SUSPEND(ph); 5407 } 5408 break; 5409 } 5410 MDI_PHCI_UNLOCK(ph); 5411 } 5412 5413 if (MDI_CLIENT(dip)) { 5414 ct = i_devi_get_client(dip); 5415 ASSERT(ct != NULL); 5416 5417 MDI_CLIENT_LOCK(ct); 5418 switch (cmd) { 5419 case DDI_ATTACH: 5420 MDI_DEBUG(2, (CE_NOTE, dip, 5421 "!Client post_attach: called %p\n", (void *)ct)); 5422 if (error != DDI_SUCCESS) { 5423 MDI_DEBUG(1, (CE_NOTE, dip, 5424 "!Client post_attach: failed error=%d\n", 5425 error)); 5426 MDI_CLIENT_SET_DETACH(ct); 5427 MDI_DEBUG(4, (CE_WARN, dip, 5428 "mdi_post_attach i_mdi_pm_reset_client\n")); 5429 i_mdi_pm_reset_client(ct); 5430 break; 5431 } 5432 5433 /* 5434 * Client device has successfully attached, inform 5435 * the vhci. 5436 */ 5437 vh = ct->ct_vhci; 5438 if (vh->vh_ops->vo_client_attached) 5439 (*vh->vh_ops->vo_client_attached)(dip); 5440 5441 MDI_CLIENT_SET_ATTACH(ct); 5442 break; 5443 5444 case DDI_RESUME: 5445 MDI_DEBUG(2, (CE_NOTE, dip, 5446 "!Client post_attach: called %p\n", (void *)ct)); 5447 if (error == DDI_SUCCESS) { 5448 MDI_CLIENT_SET_RESUME(ct); 5449 } else { 5450 MDI_DEBUG(1, (CE_NOTE, dip, 5451 "!Client post_resume: failed error=%d\n", 5452 error)); 5453 MDI_CLIENT_SET_SUSPEND(ct); 5454 } 5455 break; 5456 } 5457 MDI_CLIENT_UNLOCK(ct); 5458 } 5459 } 5460 5461 /* 5462 * mdi_pre_detach(): 5463 * Pre detach notification handler 5464 */ 5465 /*ARGSUSED*/ 5466 int 5467 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5468 { 5469 int rv = DDI_SUCCESS; 5470 5471 if (MDI_CLIENT(dip)) { 5472 (void) i_mdi_client_pre_detach(dip, cmd); 5473 } 5474 5475 if (MDI_PHCI(dip)) { 5476 rv = i_mdi_phci_pre_detach(dip, cmd); 5477 } 5478 5479 return (rv); 5480 } 5481 5482 /*ARGSUSED*/ 5483 static int 5484 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5485 { 5486 int rv = DDI_SUCCESS; 5487 mdi_phci_t *ph; 5488 mdi_client_t *ct; 5489 mdi_pathinfo_t *pip; 5490 mdi_pathinfo_t *failed_pip = NULL; 5491 mdi_pathinfo_t *next; 5492 5493 ph = i_devi_get_phci(dip); 5494 if (ph == NULL) { 5495 return (rv); 5496 } 5497 5498 MDI_PHCI_LOCK(ph); 5499 switch (cmd) { 5500 case DDI_DETACH: 5501 MDI_DEBUG(2, (CE_NOTE, dip, 5502 "!pHCI pre_detach: called %p\n", (void *)ph)); 5503 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5504 /* 5505 * mdi_pathinfo nodes are still attached to 5506 * this pHCI. Fail the detach for this pHCI. 5507 */ 5508 MDI_DEBUG(2, (CE_WARN, dip, 5509 "!pHCI pre_detach: " 5510 "mdi_pathinfo nodes are still attached " 5511 "%p\n", (void *)ph)); 5512 rv = DDI_FAILURE; 5513 break; 5514 } 5515 MDI_PHCI_SET_DETACH(ph); 5516 break; 5517 5518 case DDI_SUSPEND: 5519 /* 5520 * pHCI is getting suspended. Since mpxio client 5521 * devices may not be suspended at this point, to avoid 5522 * a potential stack overflow, it is important to suspend 5523 * client devices before pHCI can be suspended. 5524 */ 5525 5526 MDI_DEBUG(2, (CE_NOTE, dip, 5527 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5528 /* 5529 * Suspend all the client devices accessible through this pHCI 5530 */ 5531 pip = ph->ph_path_head; 5532 while (pip != NULL && rv == DDI_SUCCESS) { 5533 dev_info_t *cdip; 5534 MDI_PI_LOCK(pip); 5535 next = 5536 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5537 ct = MDI_PI(pip)->pi_client; 5538 i_mdi_client_lock(ct, pip); 5539 cdip = ct->ct_dip; 5540 MDI_PI_UNLOCK(pip); 5541 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5542 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5543 i_mdi_client_unlock(ct); 5544 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5545 DDI_SUCCESS) { 5546 /* 5547 * Suspend of one of the client 5548 * device has failed. 5549 */ 5550 MDI_DEBUG(1, (CE_WARN, dip, 5551 "!Suspend of device (%s%d) failed.", 5552 ddi_driver_name(cdip), 5553 ddi_get_instance(cdip))); 5554 failed_pip = pip; 5555 break; 5556 } 5557 } else { 5558 i_mdi_client_unlock(ct); 5559 } 5560 pip = next; 5561 } 5562 5563 if (rv == DDI_SUCCESS) { 5564 /* 5565 * Suspend of client devices is complete. Proceed 5566 * with pHCI suspend. 5567 */ 5568 MDI_PHCI_SET_SUSPEND(ph); 5569 } else { 5570 /* 5571 * Revert back all the suspended client device states 5572 * to converse. 5573 */ 5574 pip = ph->ph_path_head; 5575 while (pip != failed_pip) { 5576 dev_info_t *cdip; 5577 MDI_PI_LOCK(pip); 5578 next = 5579 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5580 ct = MDI_PI(pip)->pi_client; 5581 i_mdi_client_lock(ct, pip); 5582 cdip = ct->ct_dip; 5583 MDI_PI_UNLOCK(pip); 5584 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5585 i_mdi_client_unlock(ct); 5586 (void) devi_attach(cdip, DDI_RESUME); 5587 } else { 5588 i_mdi_client_unlock(ct); 5589 } 5590 pip = next; 5591 } 5592 } 5593 break; 5594 5595 default: 5596 rv = DDI_FAILURE; 5597 break; 5598 } 5599 MDI_PHCI_UNLOCK(ph); 5600 return (rv); 5601 } 5602 5603 /*ARGSUSED*/ 5604 static int 5605 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5606 { 5607 int rv = DDI_SUCCESS; 5608 mdi_client_t *ct; 5609 5610 ct = i_devi_get_client(dip); 5611 if (ct == NULL) { 5612 return (rv); 5613 } 5614 5615 MDI_CLIENT_LOCK(ct); 5616 switch (cmd) { 5617 case DDI_DETACH: 5618 MDI_DEBUG(2, (CE_NOTE, dip, 5619 "!Client pre_detach: called %p\n", (void *)ct)); 5620 MDI_CLIENT_SET_DETACH(ct); 5621 break; 5622 5623 case DDI_SUSPEND: 5624 MDI_DEBUG(2, (CE_NOTE, dip, 5625 "!Client pre_suspend: called %p\n", (void *)ct)); 5626 MDI_CLIENT_SET_SUSPEND(ct); 5627 break; 5628 5629 default: 5630 rv = DDI_FAILURE; 5631 break; 5632 } 5633 MDI_CLIENT_UNLOCK(ct); 5634 return (rv); 5635 } 5636 5637 /* 5638 * mdi_post_detach(): 5639 * Post detach notification handler 5640 */ 5641 /*ARGSUSED*/ 5642 void 5643 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5644 { 5645 /* 5646 * Detach/Suspend of mpxio component failed. Update our state 5647 * too 5648 */ 5649 if (MDI_PHCI(dip)) 5650 i_mdi_phci_post_detach(dip, cmd, error); 5651 5652 if (MDI_CLIENT(dip)) 5653 i_mdi_client_post_detach(dip, cmd, error); 5654 } 5655 5656 /*ARGSUSED*/ 5657 static void 5658 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5659 { 5660 mdi_phci_t *ph; 5661 5662 /* 5663 * Detach/Suspend of phci component failed. Update our state 5664 * too 5665 */ 5666 ph = i_devi_get_phci(dip); 5667 if (ph == NULL) { 5668 return; 5669 } 5670 5671 MDI_PHCI_LOCK(ph); 5672 /* 5673 * Detach of pHCI failed. Restore back converse 5674 * state 5675 */ 5676 switch (cmd) { 5677 case DDI_DETACH: 5678 MDI_DEBUG(2, (CE_NOTE, dip, 5679 "!pHCI post_detach: called %p\n", (void *)ph)); 5680 if (error != DDI_SUCCESS) 5681 MDI_PHCI_SET_ATTACH(ph); 5682 break; 5683 5684 case DDI_SUSPEND: 5685 MDI_DEBUG(2, (CE_NOTE, dip, 5686 "!pHCI post_suspend: called %p\n", (void *)ph)); 5687 if (error != DDI_SUCCESS) 5688 MDI_PHCI_SET_RESUME(ph); 5689 break; 5690 } 5691 MDI_PHCI_UNLOCK(ph); 5692 } 5693 5694 /*ARGSUSED*/ 5695 static void 5696 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5697 { 5698 mdi_client_t *ct; 5699 5700 ct = i_devi_get_client(dip); 5701 if (ct == NULL) { 5702 return; 5703 } 5704 MDI_CLIENT_LOCK(ct); 5705 /* 5706 * Detach of Client failed. Restore back converse 5707 * state 5708 */ 5709 switch (cmd) { 5710 case DDI_DETACH: 5711 MDI_DEBUG(2, (CE_NOTE, dip, 5712 "!Client post_detach: called %p\n", (void *)ct)); 5713 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5714 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5715 "i_mdi_pm_rele_client\n")); 5716 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5717 } else { 5718 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5719 "i_mdi_pm_reset_client\n")); 5720 i_mdi_pm_reset_client(ct); 5721 } 5722 if (error != DDI_SUCCESS) 5723 MDI_CLIENT_SET_ATTACH(ct); 5724 break; 5725 5726 case DDI_SUSPEND: 5727 MDI_DEBUG(2, (CE_NOTE, dip, 5728 "!Client post_suspend: called %p\n", (void *)ct)); 5729 if (error != DDI_SUCCESS) 5730 MDI_CLIENT_SET_RESUME(ct); 5731 break; 5732 } 5733 MDI_CLIENT_UNLOCK(ct); 5734 } 5735 5736 int 5737 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5738 { 5739 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5740 } 5741 5742 /* 5743 * create and install per-path (client - pHCI) statistics 5744 * I/O stats supported: nread, nwritten, reads, and writes 5745 * Error stats - hard errors, soft errors, & transport errors 5746 */ 5747 int 5748 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5749 { 5750 kstat_t *kiosp, *kerrsp; 5751 struct pi_errs *nsp; 5752 struct mdi_pi_kstats *mdi_statp; 5753 5754 if (MDI_PI(pip)->pi_kstats != NULL) 5755 return (MDI_SUCCESS); 5756 5757 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5758 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5759 return (MDI_FAILURE); 5760 } 5761 5762 (void) strcat(ksname, ",err"); 5763 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5764 KSTAT_TYPE_NAMED, 5765 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5766 if (kerrsp == NULL) { 5767 kstat_delete(kiosp); 5768 return (MDI_FAILURE); 5769 } 5770 5771 nsp = (struct pi_errs *)kerrsp->ks_data; 5772 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5773 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5774 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5775 KSTAT_DATA_UINT32); 5776 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5777 KSTAT_DATA_UINT32); 5778 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5779 KSTAT_DATA_UINT32); 5780 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5781 KSTAT_DATA_UINT32); 5782 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5783 KSTAT_DATA_UINT32); 5784 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5785 KSTAT_DATA_UINT32); 5786 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5787 KSTAT_DATA_UINT32); 5788 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5789 5790 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5791 mdi_statp->pi_kstat_ref = 1; 5792 mdi_statp->pi_kstat_iostats = kiosp; 5793 mdi_statp->pi_kstat_errstats = kerrsp; 5794 kstat_install(kiosp); 5795 kstat_install(kerrsp); 5796 MDI_PI(pip)->pi_kstats = mdi_statp; 5797 return (MDI_SUCCESS); 5798 } 5799 5800 /* 5801 * destroy per-path properties 5802 */ 5803 static void 5804 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5805 { 5806 5807 struct mdi_pi_kstats *mdi_statp; 5808 5809 if (MDI_PI(pip)->pi_kstats == NULL) 5810 return; 5811 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5812 return; 5813 5814 MDI_PI(pip)->pi_kstats = NULL; 5815 5816 /* 5817 * the kstat may be shared between multiple pathinfo nodes 5818 * decrement this pathinfo's usage, removing the kstats 5819 * themselves when the last pathinfo reference is removed. 5820 */ 5821 ASSERT(mdi_statp->pi_kstat_ref > 0); 5822 if (--mdi_statp->pi_kstat_ref != 0) 5823 return; 5824 5825 kstat_delete(mdi_statp->pi_kstat_iostats); 5826 kstat_delete(mdi_statp->pi_kstat_errstats); 5827 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5828 } 5829 5830 /* 5831 * update I/O paths KSTATS 5832 */ 5833 void 5834 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5835 { 5836 kstat_t *iostatp; 5837 size_t xfer_cnt; 5838 5839 ASSERT(pip != NULL); 5840 5841 /* 5842 * I/O can be driven across a path prior to having path 5843 * statistics available, i.e. probe(9e). 5844 */ 5845 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5846 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5847 xfer_cnt = bp->b_bcount - bp->b_resid; 5848 if (bp->b_flags & B_READ) { 5849 KSTAT_IO_PTR(iostatp)->reads++; 5850 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5851 } else { 5852 KSTAT_IO_PTR(iostatp)->writes++; 5853 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5854 } 5855 } 5856 } 5857 5858 /* 5859 * Enable the path(specific client/target/initiator) 5860 * Enabling a path means that MPxIO may select the enabled path for routing 5861 * future I/O requests, subject to other path state constraints. 5862 */ 5863 int 5864 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5865 { 5866 mdi_phci_t *ph; 5867 5868 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5869 if (ph == NULL) { 5870 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5871 " failed. pip: %p ph = NULL\n", (void *)pip)); 5872 return (MDI_FAILURE); 5873 } 5874 5875 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5876 MDI_ENABLE_OP); 5877 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5878 " Returning success pip = %p. ph = %p\n", 5879 (void *)pip, (void *)ph)); 5880 return (MDI_SUCCESS); 5881 5882 } 5883 5884 /* 5885 * Disable the path (specific client/target/initiator) 5886 * Disabling a path means that MPxIO will not select the disabled path for 5887 * routing any new I/O requests. 5888 */ 5889 int 5890 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5891 { 5892 mdi_phci_t *ph; 5893 5894 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5895 if (ph == NULL) { 5896 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5897 " failed. pip: %p ph = NULL\n", (void *)pip)); 5898 return (MDI_FAILURE); 5899 } 5900 5901 (void) i_mdi_enable_disable_path(pip, 5902 ph->ph_vhci, flags, MDI_DISABLE_OP); 5903 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5904 "Returning success pip = %p. ph = %p", 5905 (void *)pip, (void *)ph)); 5906 return (MDI_SUCCESS); 5907 } 5908 5909 /* 5910 * disable the path to a particular pHCI (pHCI specified in the phci_path 5911 * argument) for a particular client (specified in the client_path argument). 5912 * Disabling a path means that MPxIO will not select the disabled path for 5913 * routing any new I/O requests. 5914 * NOTE: this will be removed once the NWS files are changed to use the new 5915 * mdi_{enable,disable}_path interfaces 5916 */ 5917 int 5918 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5919 { 5920 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5921 } 5922 5923 /* 5924 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5925 * argument) for a particular client (specified in the client_path argument). 5926 * Enabling a path means that MPxIO may select the enabled path for routing 5927 * future I/O requests, subject to other path state constraints. 5928 * NOTE: this will be removed once the NWS files are changed to use the new 5929 * mdi_{enable,disable}_path interfaces 5930 */ 5931 5932 int 5933 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5934 { 5935 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5936 } 5937 5938 /* 5939 * Common routine for doing enable/disable. 5940 */ 5941 static mdi_pathinfo_t * 5942 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5943 int op) 5944 { 5945 int sync_flag = 0; 5946 int rv; 5947 mdi_pathinfo_t *next; 5948 int (*f)() = NULL; 5949 5950 f = vh->vh_ops->vo_pi_state_change; 5951 5952 sync_flag = (flags << 8) & 0xf00; 5953 5954 /* 5955 * Do a callback into the mdi consumer to let it 5956 * know that path is about to get enabled/disabled. 5957 */ 5958 if (f != NULL) { 5959 rv = (*f)(vh->vh_dip, pip, 0, 5960 MDI_PI_EXT_STATE(pip), 5961 MDI_EXT_STATE_CHANGE | sync_flag | 5962 op | MDI_BEFORE_STATE_CHANGE); 5963 if (rv != MDI_SUCCESS) { 5964 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5965 "!vo_pi_state_change: failed rv = %x", rv)); 5966 } 5967 } 5968 MDI_PI_LOCK(pip); 5969 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5970 5971 switch (flags) { 5972 case USER_DISABLE: 5973 if (op == MDI_DISABLE_OP) { 5974 MDI_PI_SET_USER_DISABLE(pip); 5975 } else { 5976 MDI_PI_SET_USER_ENABLE(pip); 5977 } 5978 break; 5979 case DRIVER_DISABLE: 5980 if (op == MDI_DISABLE_OP) { 5981 MDI_PI_SET_DRV_DISABLE(pip); 5982 } else { 5983 MDI_PI_SET_DRV_ENABLE(pip); 5984 } 5985 break; 5986 case DRIVER_DISABLE_TRANSIENT: 5987 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5988 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5989 } else { 5990 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5991 } 5992 break; 5993 } 5994 MDI_PI_UNLOCK(pip); 5995 /* 5996 * Do a callback into the mdi consumer to let it 5997 * know that path is now enabled/disabled. 5998 */ 5999 if (f != NULL) { 6000 rv = (*f)(vh->vh_dip, pip, 0, 6001 MDI_PI_EXT_STATE(pip), 6002 MDI_EXT_STATE_CHANGE | sync_flag | 6003 op | MDI_AFTER_STATE_CHANGE); 6004 if (rv != MDI_SUCCESS) { 6005 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 6006 "!vo_pi_state_change: failed rv = %x", rv)); 6007 } 6008 } 6009 return (next); 6010 } 6011 6012 /* 6013 * Common routine for doing enable/disable. 6014 * NOTE: this will be removed once the NWS files are changed to use the new 6015 * mdi_{enable,disable}_path has been putback 6016 */ 6017 int 6018 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6019 { 6020 6021 mdi_phci_t *ph; 6022 mdi_vhci_t *vh = NULL; 6023 mdi_client_t *ct; 6024 mdi_pathinfo_t *next, *pip; 6025 int found_it; 6026 6027 ph = i_devi_get_phci(pdip); 6028 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6029 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 6030 (void *)cdip)); 6031 if (ph == NULL) { 6032 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 6033 "Op %d failed. ph = NULL\n", op)); 6034 return (MDI_FAILURE); 6035 } 6036 6037 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6038 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6039 "Op Invalid operation = %d\n", op)); 6040 return (MDI_FAILURE); 6041 } 6042 6043 vh = ph->ph_vhci; 6044 6045 if (cdip == NULL) { 6046 /* 6047 * Need to mark the Phci as enabled/disabled. 6048 */ 6049 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6050 "Op %d for the phci\n", op)); 6051 MDI_PHCI_LOCK(ph); 6052 switch (flags) { 6053 case USER_DISABLE: 6054 if (op == MDI_DISABLE_OP) { 6055 MDI_PHCI_SET_USER_DISABLE(ph); 6056 } else { 6057 MDI_PHCI_SET_USER_ENABLE(ph); 6058 } 6059 break; 6060 case DRIVER_DISABLE: 6061 if (op == MDI_DISABLE_OP) { 6062 MDI_PHCI_SET_DRV_DISABLE(ph); 6063 } else { 6064 MDI_PHCI_SET_DRV_ENABLE(ph); 6065 } 6066 break; 6067 case DRIVER_DISABLE_TRANSIENT: 6068 if (op == MDI_DISABLE_OP) { 6069 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6070 } else { 6071 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6072 } 6073 break; 6074 default: 6075 MDI_PHCI_UNLOCK(ph); 6076 MDI_DEBUG(1, (CE_NOTE, NULL, 6077 "!i_mdi_pi_enable_disable:" 6078 " Invalid flag argument= %d\n", flags)); 6079 } 6080 6081 /* 6082 * Phci has been disabled. Now try to enable/disable 6083 * path info's to each client. 6084 */ 6085 pip = ph->ph_path_head; 6086 while (pip != NULL) { 6087 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6088 } 6089 MDI_PHCI_UNLOCK(ph); 6090 } else { 6091 6092 /* 6093 * Disable a specific client. 6094 */ 6095 ct = i_devi_get_client(cdip); 6096 if (ct == NULL) { 6097 MDI_DEBUG(1, (CE_NOTE, NULL, 6098 "!i_mdi_pi_enable_disable:" 6099 " failed. ct = NULL operation = %d\n", op)); 6100 return (MDI_FAILURE); 6101 } 6102 6103 MDI_CLIENT_LOCK(ct); 6104 pip = ct->ct_path_head; 6105 found_it = 0; 6106 while (pip != NULL) { 6107 MDI_PI_LOCK(pip); 6108 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6109 if (MDI_PI(pip)->pi_phci == ph) { 6110 MDI_PI_UNLOCK(pip); 6111 found_it = 1; 6112 break; 6113 } 6114 MDI_PI_UNLOCK(pip); 6115 pip = next; 6116 } 6117 6118 6119 MDI_CLIENT_UNLOCK(ct); 6120 if (found_it == 0) { 6121 MDI_DEBUG(1, (CE_NOTE, NULL, 6122 "!i_mdi_pi_enable_disable:" 6123 " failed. Could not find corresponding pip\n")); 6124 return (MDI_FAILURE); 6125 } 6126 6127 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6128 } 6129 6130 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6131 "Op %d Returning success pdip = %p cdip = %p\n", 6132 op, (void *)pdip, (void *)cdip)); 6133 return (MDI_SUCCESS); 6134 } 6135 6136 /* 6137 * Ensure phci powered up 6138 */ 6139 static void 6140 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6141 { 6142 dev_info_t *ph_dip; 6143 6144 ASSERT(pip != NULL); 6145 ASSERT(MDI_PI_LOCKED(pip)); 6146 6147 if (MDI_PI(pip)->pi_pm_held) { 6148 return; 6149 } 6150 6151 ph_dip = mdi_pi_get_phci(pip); 6152 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6153 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6154 if (ph_dip == NULL) { 6155 return; 6156 } 6157 6158 MDI_PI_UNLOCK(pip); 6159 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6160 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6161 6162 pm_hold_power(ph_dip); 6163 6164 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6165 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6166 MDI_PI_LOCK(pip); 6167 6168 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6169 if (DEVI(ph_dip)->devi_pm_info) 6170 MDI_PI(pip)->pi_pm_held = 1; 6171 } 6172 6173 /* 6174 * Allow phci powered down 6175 */ 6176 static void 6177 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6178 { 6179 dev_info_t *ph_dip = NULL; 6180 6181 ASSERT(pip != NULL); 6182 ASSERT(MDI_PI_LOCKED(pip)); 6183 6184 if (MDI_PI(pip)->pi_pm_held == 0) { 6185 return; 6186 } 6187 6188 ph_dip = mdi_pi_get_phci(pip); 6189 ASSERT(ph_dip != NULL); 6190 6191 MDI_PI_UNLOCK(pip); 6192 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6193 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6194 6195 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6196 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6197 pm_rele_power(ph_dip); 6198 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6199 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6200 6201 MDI_PI_LOCK(pip); 6202 MDI_PI(pip)->pi_pm_held = 0; 6203 } 6204 6205 static void 6206 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6207 { 6208 ASSERT(MDI_CLIENT_LOCKED(ct)); 6209 6210 ct->ct_power_cnt += incr; 6211 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6212 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6213 ct->ct_power_cnt, incr)); 6214 ASSERT(ct->ct_power_cnt >= 0); 6215 } 6216 6217 static void 6218 i_mdi_rele_all_phci(mdi_client_t *ct) 6219 { 6220 mdi_pathinfo_t *pip; 6221 6222 ASSERT(MDI_CLIENT_LOCKED(ct)); 6223 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6224 while (pip != NULL) { 6225 mdi_hold_path(pip); 6226 MDI_PI_LOCK(pip); 6227 i_mdi_pm_rele_pip(pip); 6228 MDI_PI_UNLOCK(pip); 6229 mdi_rele_path(pip); 6230 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6231 } 6232 } 6233 6234 static void 6235 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6236 { 6237 ASSERT(MDI_CLIENT_LOCKED(ct)); 6238 6239 if (i_ddi_devi_attached(ct->ct_dip)) { 6240 ct->ct_power_cnt -= decr; 6241 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6242 "ct_power_cnt = %d decr = %d\n", 6243 (void *)ct, ct->ct_power_cnt, decr)); 6244 } 6245 6246 ASSERT(ct->ct_power_cnt >= 0); 6247 if (ct->ct_power_cnt == 0) { 6248 i_mdi_rele_all_phci(ct); 6249 return; 6250 } 6251 } 6252 6253 static void 6254 i_mdi_pm_reset_client(mdi_client_t *ct) 6255 { 6256 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6257 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6258 ASSERT(MDI_CLIENT_LOCKED(ct)); 6259 ct->ct_power_cnt = 0; 6260 i_mdi_rele_all_phci(ct); 6261 ct->ct_powercnt_config = 0; 6262 ct->ct_powercnt_unconfig = 0; 6263 ct->ct_powercnt_reset = 1; 6264 } 6265 6266 static int 6267 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6268 { 6269 int ret; 6270 dev_info_t *ph_dip; 6271 6272 MDI_PI_LOCK(pip); 6273 i_mdi_pm_hold_pip(pip); 6274 6275 ph_dip = mdi_pi_get_phci(pip); 6276 MDI_PI_UNLOCK(pip); 6277 6278 /* bring all components of phci to full power */ 6279 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6280 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 6281 ddi_get_instance(ph_dip), (void *)pip)); 6282 6283 ret = pm_powerup(ph_dip); 6284 6285 if (ret == DDI_FAILURE) { 6286 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6287 "pm_powerup FAILED for %s%d %p\n", 6288 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 6289 (void *)pip)); 6290 6291 MDI_PI_LOCK(pip); 6292 i_mdi_pm_rele_pip(pip); 6293 MDI_PI_UNLOCK(pip); 6294 return (MDI_FAILURE); 6295 } 6296 6297 return (MDI_SUCCESS); 6298 } 6299 6300 static int 6301 i_mdi_power_all_phci(mdi_client_t *ct) 6302 { 6303 mdi_pathinfo_t *pip; 6304 int succeeded = 0; 6305 6306 ASSERT(MDI_CLIENT_LOCKED(ct)); 6307 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6308 while (pip != NULL) { 6309 /* 6310 * Don't power if MDI_PATHINFO_STATE_FAULT 6311 * or MDI_PATHINFO_STATE_OFFLINE. 6312 */ 6313 if (MDI_PI_IS_INIT(pip) || 6314 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6315 mdi_hold_path(pip); 6316 MDI_CLIENT_UNLOCK(ct); 6317 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6318 succeeded = 1; 6319 6320 ASSERT(ct == MDI_PI(pip)->pi_client); 6321 MDI_CLIENT_LOCK(ct); 6322 mdi_rele_path(pip); 6323 } 6324 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6325 } 6326 6327 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6328 } 6329 6330 /* 6331 * mdi_bus_power(): 6332 * 1. Place the phci(s) into powered up state so that 6333 * client can do power management 6334 * 2. Ensure phci powered up as client power managing 6335 * Return Values: 6336 * MDI_SUCCESS 6337 * MDI_FAILURE 6338 */ 6339 int 6340 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6341 void *arg, void *result) 6342 { 6343 int ret = MDI_SUCCESS; 6344 pm_bp_child_pwrchg_t *bpc; 6345 mdi_client_t *ct; 6346 dev_info_t *cdip; 6347 pm_bp_has_changed_t *bphc; 6348 6349 /* 6350 * BUS_POWER_NOINVOL not supported 6351 */ 6352 if (op == BUS_POWER_NOINVOL) 6353 return (MDI_FAILURE); 6354 6355 /* 6356 * ignore other OPs. 6357 * return quickly to save cou cycles on the ct processing 6358 */ 6359 switch (op) { 6360 case BUS_POWER_PRE_NOTIFICATION: 6361 case BUS_POWER_POST_NOTIFICATION: 6362 bpc = (pm_bp_child_pwrchg_t *)arg; 6363 cdip = bpc->bpc_dip; 6364 break; 6365 case BUS_POWER_HAS_CHANGED: 6366 bphc = (pm_bp_has_changed_t *)arg; 6367 cdip = bphc->bphc_dip; 6368 break; 6369 default: 6370 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6371 } 6372 6373 ASSERT(MDI_CLIENT(cdip)); 6374 6375 ct = i_devi_get_client(cdip); 6376 if (ct == NULL) 6377 return (MDI_FAILURE); 6378 6379 /* 6380 * wait till the mdi_pathinfo node state change are processed 6381 */ 6382 MDI_CLIENT_LOCK(ct); 6383 switch (op) { 6384 case BUS_POWER_PRE_NOTIFICATION: 6385 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6386 "BUS_POWER_PRE_NOTIFICATION:" 6387 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6388 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6389 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6390 6391 /* serialize power level change per client */ 6392 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6393 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6394 6395 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6396 6397 if (ct->ct_power_cnt == 0) { 6398 ret = i_mdi_power_all_phci(ct); 6399 } 6400 6401 /* 6402 * if new_level > 0: 6403 * - hold phci(s) 6404 * - power up phci(s) if not already 6405 * ignore power down 6406 */ 6407 if (bpc->bpc_nlevel > 0) { 6408 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6409 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6410 "mdi_bus_power i_mdi_pm_hold_client\n")); 6411 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6412 } 6413 } 6414 break; 6415 case BUS_POWER_POST_NOTIFICATION: 6416 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6417 "BUS_POWER_POST_NOTIFICATION:" 6418 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6419 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6420 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6421 *(int *)result)); 6422 6423 if (*(int *)result == DDI_SUCCESS) { 6424 if (bpc->bpc_nlevel > 0) { 6425 MDI_CLIENT_SET_POWER_UP(ct); 6426 } else { 6427 MDI_CLIENT_SET_POWER_DOWN(ct); 6428 } 6429 } 6430 6431 /* release the hold we did in pre-notification */ 6432 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6433 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6434 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6435 "mdi_bus_power i_mdi_pm_rele_client\n")); 6436 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6437 } 6438 6439 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6440 /* another thread might started attaching */ 6441 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6442 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6443 "mdi_bus_power i_mdi_pm_rele_client\n")); 6444 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6445 /* detaching has been taken care in pm_post_unconfig */ 6446 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6447 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6448 "mdi_bus_power i_mdi_pm_reset_client\n")); 6449 i_mdi_pm_reset_client(ct); 6450 } 6451 } 6452 6453 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6454 cv_broadcast(&ct->ct_powerchange_cv); 6455 6456 break; 6457 6458 /* need to do more */ 6459 case BUS_POWER_HAS_CHANGED: 6460 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6461 "BUS_POWER_HAS_CHANGED:" 6462 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6463 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6464 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6465 6466 if (bphc->bphc_nlevel > 0 && 6467 bphc->bphc_nlevel > bphc->bphc_olevel) { 6468 if (ct->ct_power_cnt == 0) { 6469 ret = i_mdi_power_all_phci(ct); 6470 } 6471 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6472 "mdi_bus_power i_mdi_pm_hold_client\n")); 6473 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6474 } 6475 6476 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6477 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6478 "mdi_bus_power i_mdi_pm_rele_client\n")); 6479 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6480 } 6481 break; 6482 } 6483 6484 MDI_CLIENT_UNLOCK(ct); 6485 return (ret); 6486 } 6487 6488 static int 6489 i_mdi_pm_pre_config_one(dev_info_t *child) 6490 { 6491 int ret = MDI_SUCCESS; 6492 mdi_client_t *ct; 6493 6494 ct = i_devi_get_client(child); 6495 if (ct == NULL) 6496 return (MDI_FAILURE); 6497 6498 MDI_CLIENT_LOCK(ct); 6499 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6500 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6501 6502 if (!MDI_CLIENT_IS_FAILED(ct)) { 6503 MDI_CLIENT_UNLOCK(ct); 6504 MDI_DEBUG(4, (CE_NOTE, child, 6505 "i_mdi_pm_pre_config_one already configured\n")); 6506 return (MDI_SUCCESS); 6507 } 6508 6509 if (ct->ct_powercnt_config) { 6510 MDI_CLIENT_UNLOCK(ct); 6511 MDI_DEBUG(4, (CE_NOTE, child, 6512 "i_mdi_pm_pre_config_one ALREADY held\n")); 6513 return (MDI_SUCCESS); 6514 } 6515 6516 if (ct->ct_power_cnt == 0) { 6517 ret = i_mdi_power_all_phci(ct); 6518 } 6519 MDI_DEBUG(4, (CE_NOTE, child, 6520 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6521 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6522 ct->ct_powercnt_config = 1; 6523 ct->ct_powercnt_reset = 0; 6524 MDI_CLIENT_UNLOCK(ct); 6525 return (ret); 6526 } 6527 6528 static int 6529 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6530 { 6531 int ret = MDI_SUCCESS; 6532 dev_info_t *cdip; 6533 int circ; 6534 6535 ASSERT(MDI_VHCI(vdip)); 6536 6537 /* ndi_devi_config_one */ 6538 if (child) { 6539 ASSERT(DEVI_BUSY_OWNED(vdip)); 6540 return (i_mdi_pm_pre_config_one(child)); 6541 } 6542 6543 /* devi_config_common */ 6544 ndi_devi_enter(vdip, &circ); 6545 cdip = ddi_get_child(vdip); 6546 while (cdip) { 6547 dev_info_t *next = ddi_get_next_sibling(cdip); 6548 6549 ret = i_mdi_pm_pre_config_one(cdip); 6550 if (ret != MDI_SUCCESS) 6551 break; 6552 cdip = next; 6553 } 6554 ndi_devi_exit(vdip, circ); 6555 return (ret); 6556 } 6557 6558 static int 6559 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6560 { 6561 int ret = MDI_SUCCESS; 6562 mdi_client_t *ct; 6563 6564 ct = i_devi_get_client(child); 6565 if (ct == NULL) 6566 return (MDI_FAILURE); 6567 6568 MDI_CLIENT_LOCK(ct); 6569 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6570 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6571 6572 if (!i_ddi_devi_attached(ct->ct_dip)) { 6573 MDI_DEBUG(4, (CE_NOTE, child, 6574 "i_mdi_pm_pre_unconfig node detached already\n")); 6575 MDI_CLIENT_UNLOCK(ct); 6576 return (MDI_SUCCESS); 6577 } 6578 6579 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6580 (flags & NDI_AUTODETACH)) { 6581 MDI_DEBUG(4, (CE_NOTE, child, 6582 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6583 MDI_CLIENT_UNLOCK(ct); 6584 return (MDI_FAILURE); 6585 } 6586 6587 if (ct->ct_powercnt_unconfig) { 6588 MDI_DEBUG(4, (CE_NOTE, child, 6589 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6590 MDI_CLIENT_UNLOCK(ct); 6591 *held = 1; 6592 return (MDI_SUCCESS); 6593 } 6594 6595 if (ct->ct_power_cnt == 0) { 6596 ret = i_mdi_power_all_phci(ct); 6597 } 6598 MDI_DEBUG(4, (CE_NOTE, child, 6599 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6600 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6601 ct->ct_powercnt_unconfig = 1; 6602 ct->ct_powercnt_reset = 0; 6603 MDI_CLIENT_UNLOCK(ct); 6604 if (ret == MDI_SUCCESS) 6605 *held = 1; 6606 return (ret); 6607 } 6608 6609 static int 6610 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6611 int flags) 6612 { 6613 int ret = MDI_SUCCESS; 6614 dev_info_t *cdip; 6615 int circ; 6616 6617 ASSERT(MDI_VHCI(vdip)); 6618 *held = 0; 6619 6620 /* ndi_devi_unconfig_one */ 6621 if (child) { 6622 ASSERT(DEVI_BUSY_OWNED(vdip)); 6623 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6624 } 6625 6626 /* devi_unconfig_common */ 6627 ndi_devi_enter(vdip, &circ); 6628 cdip = ddi_get_child(vdip); 6629 while (cdip) { 6630 dev_info_t *next = ddi_get_next_sibling(cdip); 6631 6632 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6633 cdip = next; 6634 } 6635 ndi_devi_exit(vdip, circ); 6636 6637 if (*held) 6638 ret = MDI_SUCCESS; 6639 6640 return (ret); 6641 } 6642 6643 static void 6644 i_mdi_pm_post_config_one(dev_info_t *child) 6645 { 6646 mdi_client_t *ct; 6647 6648 ct = i_devi_get_client(child); 6649 if (ct == NULL) 6650 return; 6651 6652 MDI_CLIENT_LOCK(ct); 6653 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6654 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6655 6656 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6657 MDI_DEBUG(4, (CE_NOTE, child, 6658 "i_mdi_pm_post_config_one NOT configured\n")); 6659 MDI_CLIENT_UNLOCK(ct); 6660 return; 6661 } 6662 6663 /* client has not been updated */ 6664 if (MDI_CLIENT_IS_FAILED(ct)) { 6665 MDI_DEBUG(4, (CE_NOTE, child, 6666 "i_mdi_pm_post_config_one NOT configured\n")); 6667 MDI_CLIENT_UNLOCK(ct); 6668 return; 6669 } 6670 6671 /* another thread might have powered it down or detached it */ 6672 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6673 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6674 (!i_ddi_devi_attached(ct->ct_dip) && 6675 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6676 MDI_DEBUG(4, (CE_NOTE, child, 6677 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6678 i_mdi_pm_reset_client(ct); 6679 } else { 6680 mdi_pathinfo_t *pip, *next; 6681 int valid_path_count = 0; 6682 6683 MDI_DEBUG(4, (CE_NOTE, child, 6684 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6685 pip = ct->ct_path_head; 6686 while (pip != NULL) { 6687 MDI_PI_LOCK(pip); 6688 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6689 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6690 valid_path_count ++; 6691 MDI_PI_UNLOCK(pip); 6692 pip = next; 6693 } 6694 i_mdi_pm_rele_client(ct, valid_path_count); 6695 } 6696 ct->ct_powercnt_config = 0; 6697 MDI_CLIENT_UNLOCK(ct); 6698 } 6699 6700 static void 6701 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6702 { 6703 int circ; 6704 dev_info_t *cdip; 6705 6706 ASSERT(MDI_VHCI(vdip)); 6707 6708 /* ndi_devi_config_one */ 6709 if (child) { 6710 ASSERT(DEVI_BUSY_OWNED(vdip)); 6711 i_mdi_pm_post_config_one(child); 6712 return; 6713 } 6714 6715 /* devi_config_common */ 6716 ndi_devi_enter(vdip, &circ); 6717 cdip = ddi_get_child(vdip); 6718 while (cdip) { 6719 dev_info_t *next = ddi_get_next_sibling(cdip); 6720 6721 i_mdi_pm_post_config_one(cdip); 6722 cdip = next; 6723 } 6724 ndi_devi_exit(vdip, circ); 6725 } 6726 6727 static void 6728 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6729 { 6730 mdi_client_t *ct; 6731 6732 ct = i_devi_get_client(child); 6733 if (ct == NULL) 6734 return; 6735 6736 MDI_CLIENT_LOCK(ct); 6737 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6738 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6739 6740 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6741 MDI_DEBUG(4, (CE_NOTE, child, 6742 "i_mdi_pm_post_unconfig NOT held\n")); 6743 MDI_CLIENT_UNLOCK(ct); 6744 return; 6745 } 6746 6747 /* failure detaching or another thread just attached it */ 6748 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6749 i_ddi_devi_attached(ct->ct_dip)) || 6750 (!i_ddi_devi_attached(ct->ct_dip) && 6751 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6752 MDI_DEBUG(4, (CE_NOTE, child, 6753 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6754 i_mdi_pm_reset_client(ct); 6755 } else { 6756 mdi_pathinfo_t *pip, *next; 6757 int valid_path_count = 0; 6758 6759 MDI_DEBUG(4, (CE_NOTE, child, 6760 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6761 pip = ct->ct_path_head; 6762 while (pip != NULL) { 6763 MDI_PI_LOCK(pip); 6764 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6765 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6766 valid_path_count ++; 6767 MDI_PI_UNLOCK(pip); 6768 pip = next; 6769 } 6770 i_mdi_pm_rele_client(ct, valid_path_count); 6771 ct->ct_powercnt_unconfig = 0; 6772 } 6773 6774 MDI_CLIENT_UNLOCK(ct); 6775 } 6776 6777 static void 6778 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6779 { 6780 int circ; 6781 dev_info_t *cdip; 6782 6783 ASSERT(MDI_VHCI(vdip)); 6784 6785 if (!held) { 6786 MDI_DEBUG(4, (CE_NOTE, vdip, 6787 "i_mdi_pm_post_unconfig held = %d\n", held)); 6788 return; 6789 } 6790 6791 if (child) { 6792 ASSERT(DEVI_BUSY_OWNED(vdip)); 6793 i_mdi_pm_post_unconfig_one(child); 6794 return; 6795 } 6796 6797 ndi_devi_enter(vdip, &circ); 6798 cdip = ddi_get_child(vdip); 6799 while (cdip) { 6800 dev_info_t *next = ddi_get_next_sibling(cdip); 6801 6802 i_mdi_pm_post_unconfig_one(cdip); 6803 cdip = next; 6804 } 6805 ndi_devi_exit(vdip, circ); 6806 } 6807 6808 int 6809 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6810 { 6811 int circ, ret = MDI_SUCCESS; 6812 dev_info_t *client_dip = NULL; 6813 mdi_client_t *ct; 6814 6815 /* 6816 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6817 * Power up pHCI for the named client device. 6818 * Note: Before the client is enumerated under vhci by phci, 6819 * client_dip can be NULL. Then proceed to power up all the 6820 * pHCIs. 6821 */ 6822 if (devnm != NULL) { 6823 ndi_devi_enter(vdip, &circ); 6824 client_dip = ndi_devi_findchild(vdip, devnm); 6825 } 6826 6827 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6828 op, devnm ? devnm : "NULL", (void *)client_dip)); 6829 6830 switch (op) { 6831 case MDI_PM_PRE_CONFIG: 6832 ret = i_mdi_pm_pre_config(vdip, client_dip); 6833 break; 6834 6835 case MDI_PM_PRE_UNCONFIG: 6836 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6837 flags); 6838 break; 6839 6840 case MDI_PM_POST_CONFIG: 6841 i_mdi_pm_post_config(vdip, client_dip); 6842 break; 6843 6844 case MDI_PM_POST_UNCONFIG: 6845 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6846 break; 6847 6848 case MDI_PM_HOLD_POWER: 6849 case MDI_PM_RELE_POWER: 6850 ASSERT(args); 6851 6852 client_dip = (dev_info_t *)args; 6853 ASSERT(MDI_CLIENT(client_dip)); 6854 6855 ct = i_devi_get_client(client_dip); 6856 MDI_CLIENT_LOCK(ct); 6857 6858 if (op == MDI_PM_HOLD_POWER) { 6859 if (ct->ct_power_cnt == 0) { 6860 (void) i_mdi_power_all_phci(ct); 6861 MDI_DEBUG(4, (CE_NOTE, client_dip, 6862 "mdi_power i_mdi_pm_hold_client\n")); 6863 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6864 } 6865 } else { 6866 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6867 MDI_DEBUG(4, (CE_NOTE, client_dip, 6868 "mdi_power i_mdi_pm_rele_client\n")); 6869 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6870 } else { 6871 MDI_DEBUG(4, (CE_NOTE, client_dip, 6872 "mdi_power i_mdi_pm_reset_client\n")); 6873 i_mdi_pm_reset_client(ct); 6874 } 6875 } 6876 6877 MDI_CLIENT_UNLOCK(ct); 6878 break; 6879 6880 default: 6881 break; 6882 } 6883 6884 if (devnm) 6885 ndi_devi_exit(vdip, circ); 6886 6887 return (ret); 6888 } 6889 6890 int 6891 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6892 { 6893 mdi_vhci_t *vhci; 6894 6895 if (!MDI_VHCI(dip)) 6896 return (MDI_FAILURE); 6897 6898 if (mdi_class) { 6899 vhci = DEVI(dip)->devi_mdi_xhci; 6900 ASSERT(vhci); 6901 *mdi_class = vhci->vh_class; 6902 } 6903 6904 return (MDI_SUCCESS); 6905 } 6906 6907 int 6908 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6909 { 6910 mdi_phci_t *phci; 6911 6912 if (!MDI_PHCI(dip)) 6913 return (MDI_FAILURE); 6914 6915 if (mdi_class) { 6916 phci = DEVI(dip)->devi_mdi_xhci; 6917 ASSERT(phci); 6918 *mdi_class = phci->ph_vhci->vh_class; 6919 } 6920 6921 return (MDI_SUCCESS); 6922 } 6923 6924 int 6925 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6926 { 6927 mdi_client_t *client; 6928 6929 if (!MDI_CLIENT(dip)) 6930 return (MDI_FAILURE); 6931 6932 if (mdi_class) { 6933 client = DEVI(dip)->devi_mdi_client; 6934 ASSERT(client); 6935 *mdi_class = client->ct_vhci->vh_class; 6936 } 6937 6938 return (MDI_SUCCESS); 6939 } 6940 6941 void * 6942 mdi_client_get_vhci_private(dev_info_t *dip) 6943 { 6944 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6945 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6946 mdi_client_t *ct; 6947 ct = i_devi_get_client(dip); 6948 return (ct->ct_vprivate); 6949 } 6950 return (NULL); 6951 } 6952 6953 void 6954 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6955 { 6956 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6957 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6958 mdi_client_t *ct; 6959 ct = i_devi_get_client(dip); 6960 ct->ct_vprivate = data; 6961 } 6962 } 6963 /* 6964 * mdi_pi_get_vhci_private(): 6965 * Get the vhci private information associated with the 6966 * mdi_pathinfo node 6967 */ 6968 void * 6969 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6970 { 6971 caddr_t vprivate = NULL; 6972 if (pip) { 6973 vprivate = MDI_PI(pip)->pi_vprivate; 6974 } 6975 return (vprivate); 6976 } 6977 6978 /* 6979 * mdi_pi_set_vhci_private(): 6980 * Set the vhci private information in the mdi_pathinfo node 6981 */ 6982 void 6983 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6984 { 6985 if (pip) { 6986 MDI_PI(pip)->pi_vprivate = priv; 6987 } 6988 } 6989 6990 /* 6991 * mdi_phci_get_vhci_private(): 6992 * Get the vhci private information associated with the 6993 * mdi_phci node 6994 */ 6995 void * 6996 mdi_phci_get_vhci_private(dev_info_t *dip) 6997 { 6998 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6999 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7000 mdi_phci_t *ph; 7001 ph = i_devi_get_phci(dip); 7002 return (ph->ph_vprivate); 7003 } 7004 return (NULL); 7005 } 7006 7007 /* 7008 * mdi_phci_set_vhci_private(): 7009 * Set the vhci private information in the mdi_phci node 7010 */ 7011 void 7012 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7013 { 7014 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7015 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7016 mdi_phci_t *ph; 7017 ph = i_devi_get_phci(dip); 7018 ph->ph_vprivate = priv; 7019 } 7020 } 7021 7022 /* 7023 * List of vhci class names: 7024 * A vhci class name must be in this list only if the corresponding vhci 7025 * driver intends to use the mdi provided bus config implementation 7026 * (i.e., mdi_vhci_bus_config()). 7027 */ 7028 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7029 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7030 7031 /* 7032 * During boot time, the on-disk vhci cache for every vhci class is read 7033 * in the form of an nvlist and stored here. 7034 */ 7035 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7036 7037 /* nvpair names in vhci cache nvlist */ 7038 #define MDI_VHCI_CACHE_VERSION 1 7039 #define MDI_NVPNAME_VERSION "version" 7040 #define MDI_NVPNAME_PHCIS "phcis" 7041 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7042 7043 /* 7044 * Given vhci class name, return its on-disk vhci cache filename. 7045 * Memory for the returned filename which includes the full path is allocated 7046 * by this function. 7047 */ 7048 static char * 7049 vhclass2vhcache_filename(char *vhclass) 7050 { 7051 char *filename; 7052 int len; 7053 static char *fmt = "/etc/devices/mdi_%s_cache"; 7054 7055 /* 7056 * fmt contains the on-disk vhci cache file name format; 7057 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7058 */ 7059 7060 /* the -1 below is to account for "%s" in the format string */ 7061 len = strlen(fmt) + strlen(vhclass) - 1; 7062 filename = kmem_alloc(len, KM_SLEEP); 7063 (void) snprintf(filename, len, fmt, vhclass); 7064 ASSERT(len == (strlen(filename) + 1)); 7065 return (filename); 7066 } 7067 7068 /* 7069 * initialize the vhci cache related data structures and read the on-disk 7070 * vhci cached data into memory. 7071 */ 7072 static void 7073 setup_vhci_cache(mdi_vhci_t *vh) 7074 { 7075 mdi_vhci_config_t *vhc; 7076 mdi_vhci_cache_t *vhcache; 7077 int i; 7078 nvlist_t *nvl = NULL; 7079 7080 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7081 vh->vh_config = vhc; 7082 vhcache = &vhc->vhc_vhcache; 7083 7084 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7085 7086 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7087 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7088 7089 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7090 7091 /* 7092 * Create string hash; same as mod_hash_create_strhash() except that 7093 * we use NULL key destructor. 7094 */ 7095 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7096 mdi_bus_config_cache_hash_size, 7097 mod_hash_null_keydtor, mod_hash_null_valdtor, 7098 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7099 7100 /* 7101 * The on-disk vhci cache is read during booting prior to the 7102 * lights-out period by mdi_read_devices_files(). 7103 */ 7104 for (i = 0; i < N_VHCI_CLASSES; i++) { 7105 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7106 nvl = vhcache_nvl[i]; 7107 vhcache_nvl[i] = NULL; 7108 break; 7109 } 7110 } 7111 7112 /* 7113 * this is to cover the case of some one manually causing unloading 7114 * (or detaching) and reloading (or attaching) of a vhci driver. 7115 */ 7116 if (nvl == NULL && modrootloaded) 7117 nvl = read_on_disk_vhci_cache(vh->vh_class); 7118 7119 if (nvl != NULL) { 7120 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7121 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7122 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7123 else { 7124 cmn_err(CE_WARN, 7125 "%s: data file corrupted, will recreate\n", 7126 vhc->vhc_vhcache_filename); 7127 } 7128 rw_exit(&vhcache->vhcache_lock); 7129 nvlist_free(nvl); 7130 } 7131 7132 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7133 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7134 7135 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7136 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7137 } 7138 7139 /* 7140 * free all vhci cache related resources 7141 */ 7142 static int 7143 destroy_vhci_cache(mdi_vhci_t *vh) 7144 { 7145 mdi_vhci_config_t *vhc = vh->vh_config; 7146 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7147 mdi_vhcache_phci_t *cphci, *cphci_next; 7148 mdi_vhcache_client_t *cct, *cct_next; 7149 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7150 7151 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7152 return (MDI_FAILURE); 7153 7154 kmem_free(vhc->vhc_vhcache_filename, 7155 strlen(vhc->vhc_vhcache_filename) + 1); 7156 7157 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7158 7159 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7160 cphci = cphci_next) { 7161 cphci_next = cphci->cphci_next; 7162 free_vhcache_phci(cphci); 7163 } 7164 7165 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7166 cct_next = cct->cct_next; 7167 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7168 cpi_next = cpi->cpi_next; 7169 free_vhcache_pathinfo(cpi); 7170 } 7171 free_vhcache_client(cct); 7172 } 7173 7174 rw_destroy(&vhcache->vhcache_lock); 7175 7176 mutex_destroy(&vhc->vhc_lock); 7177 cv_destroy(&vhc->vhc_cv); 7178 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7179 return (MDI_SUCCESS); 7180 } 7181 7182 /* 7183 * Stop all vhci cache related async threads and free their resources. 7184 */ 7185 static int 7186 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7187 { 7188 mdi_async_client_config_t *acc, *acc_next; 7189 7190 mutex_enter(&vhc->vhc_lock); 7191 vhc->vhc_flags |= MDI_VHC_EXIT; 7192 ASSERT(vhc->vhc_acc_thrcount >= 0); 7193 cv_broadcast(&vhc->vhc_cv); 7194 7195 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7196 vhc->vhc_acc_thrcount != 0) { 7197 mutex_exit(&vhc->vhc_lock); 7198 delay(1); 7199 mutex_enter(&vhc->vhc_lock); 7200 } 7201 7202 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7203 7204 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7205 acc_next = acc->acc_next; 7206 free_async_client_config(acc); 7207 } 7208 vhc->vhc_acc_list_head = NULL; 7209 vhc->vhc_acc_list_tail = NULL; 7210 vhc->vhc_acc_count = 0; 7211 7212 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7213 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7214 mutex_exit(&vhc->vhc_lock); 7215 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7216 vhcache_dirty(vhc); 7217 return (MDI_FAILURE); 7218 } 7219 } else 7220 mutex_exit(&vhc->vhc_lock); 7221 7222 if (callb_delete(vhc->vhc_cbid) != 0) 7223 return (MDI_FAILURE); 7224 7225 return (MDI_SUCCESS); 7226 } 7227 7228 /* 7229 * Stop vhci cache flush thread 7230 */ 7231 /* ARGSUSED */ 7232 static boolean_t 7233 stop_vhcache_flush_thread(void *arg, int code) 7234 { 7235 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7236 7237 mutex_enter(&vhc->vhc_lock); 7238 vhc->vhc_flags |= MDI_VHC_EXIT; 7239 cv_broadcast(&vhc->vhc_cv); 7240 7241 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7242 mutex_exit(&vhc->vhc_lock); 7243 delay(1); 7244 mutex_enter(&vhc->vhc_lock); 7245 } 7246 7247 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7248 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7249 mutex_exit(&vhc->vhc_lock); 7250 (void) flush_vhcache(vhc, 1); 7251 } else 7252 mutex_exit(&vhc->vhc_lock); 7253 7254 return (B_TRUE); 7255 } 7256 7257 /* 7258 * Enqueue the vhcache phci (cphci) at the tail of the list 7259 */ 7260 static void 7261 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7262 { 7263 cphci->cphci_next = NULL; 7264 if (vhcache->vhcache_phci_head == NULL) 7265 vhcache->vhcache_phci_head = cphci; 7266 else 7267 vhcache->vhcache_phci_tail->cphci_next = cphci; 7268 vhcache->vhcache_phci_tail = cphci; 7269 } 7270 7271 /* 7272 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7273 */ 7274 static void 7275 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7276 mdi_vhcache_pathinfo_t *cpi) 7277 { 7278 cpi->cpi_next = NULL; 7279 if (cct->cct_cpi_head == NULL) 7280 cct->cct_cpi_head = cpi; 7281 else 7282 cct->cct_cpi_tail->cpi_next = cpi; 7283 cct->cct_cpi_tail = cpi; 7284 } 7285 7286 /* 7287 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7288 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7289 * flag set come at the beginning of the list. All cpis which have this 7290 * flag set come at the end of the list. 7291 */ 7292 static void 7293 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7294 mdi_vhcache_pathinfo_t *newcpi) 7295 { 7296 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7297 7298 if (cct->cct_cpi_head == NULL || 7299 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7300 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7301 else { 7302 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7303 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7304 prev_cpi = cpi, cpi = cpi->cpi_next) 7305 ; 7306 7307 if (prev_cpi == NULL) 7308 cct->cct_cpi_head = newcpi; 7309 else 7310 prev_cpi->cpi_next = newcpi; 7311 7312 newcpi->cpi_next = cpi; 7313 7314 if (cpi == NULL) 7315 cct->cct_cpi_tail = newcpi; 7316 } 7317 } 7318 7319 /* 7320 * Enqueue the vhcache client (cct) at the tail of the list 7321 */ 7322 static void 7323 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7324 mdi_vhcache_client_t *cct) 7325 { 7326 cct->cct_next = NULL; 7327 if (vhcache->vhcache_client_head == NULL) 7328 vhcache->vhcache_client_head = cct; 7329 else 7330 vhcache->vhcache_client_tail->cct_next = cct; 7331 vhcache->vhcache_client_tail = cct; 7332 } 7333 7334 static void 7335 free_string_array(char **str, int nelem) 7336 { 7337 int i; 7338 7339 if (str) { 7340 for (i = 0; i < nelem; i++) { 7341 if (str[i]) 7342 kmem_free(str[i], strlen(str[i]) + 1); 7343 } 7344 kmem_free(str, sizeof (char *) * nelem); 7345 } 7346 } 7347 7348 static void 7349 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7350 { 7351 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7352 kmem_free(cphci, sizeof (*cphci)); 7353 } 7354 7355 static void 7356 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7357 { 7358 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7359 kmem_free(cpi, sizeof (*cpi)); 7360 } 7361 7362 static void 7363 free_vhcache_client(mdi_vhcache_client_t *cct) 7364 { 7365 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7366 kmem_free(cct, sizeof (*cct)); 7367 } 7368 7369 static char * 7370 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7371 { 7372 char *name_addr; 7373 int len; 7374 7375 len = strlen(ct_name) + strlen(ct_addr) + 2; 7376 name_addr = kmem_alloc(len, KM_SLEEP); 7377 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7378 7379 if (ret_len) 7380 *ret_len = len; 7381 return (name_addr); 7382 } 7383 7384 /* 7385 * Copy the contents of paddrnvl to vhci cache. 7386 * paddrnvl nvlist contains path information for a vhci client. 7387 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7388 */ 7389 static void 7390 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7391 mdi_vhcache_client_t *cct) 7392 { 7393 nvpair_t *nvp = NULL; 7394 mdi_vhcache_pathinfo_t *cpi; 7395 uint_t nelem; 7396 uint32_t *val; 7397 7398 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7399 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7400 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7401 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7402 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7403 ASSERT(nelem == 2); 7404 cpi->cpi_cphci = cphci_list[val[0]]; 7405 cpi->cpi_flags = val[1]; 7406 enqueue_tail_vhcache_pathinfo(cct, cpi); 7407 } 7408 } 7409 7410 /* 7411 * Copy the contents of caddrmapnvl to vhci cache. 7412 * caddrmapnvl nvlist contains vhci client address to phci client address 7413 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7414 * this nvlist. 7415 */ 7416 static void 7417 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7418 mdi_vhcache_phci_t *cphci_list[]) 7419 { 7420 nvpair_t *nvp = NULL; 7421 nvlist_t *paddrnvl; 7422 mdi_vhcache_client_t *cct; 7423 7424 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7425 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7426 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7427 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7428 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7429 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7430 /* the client must contain at least one path */ 7431 ASSERT(cct->cct_cpi_head != NULL); 7432 7433 enqueue_vhcache_client(vhcache, cct); 7434 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7435 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7436 } 7437 } 7438 7439 /* 7440 * Copy the contents of the main nvlist to vhci cache. 7441 * 7442 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7443 * The nvlist contains the mappings between the vhci client addresses and 7444 * their corresponding phci client addresses. 7445 * 7446 * The structure of the nvlist is as follows: 7447 * 7448 * Main nvlist: 7449 * NAME TYPE DATA 7450 * version int32 version number 7451 * phcis string array array of phci paths 7452 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7453 * 7454 * structure of c2paddrs_nvl: 7455 * NAME TYPE DATA 7456 * caddr1 nvlist_t paddrs_nvl1 7457 * caddr2 nvlist_t paddrs_nvl2 7458 * ... 7459 * where caddr1, caddr2, ... are vhci client name and addresses in the 7460 * form of "<clientname>@<clientaddress>". 7461 * (for example: "ssd@2000002037cd9f72"); 7462 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7463 * 7464 * structure of paddrs_nvl: 7465 * NAME TYPE DATA 7466 * pi_addr1 uint32_array (phci-id, cpi_flags) 7467 * pi_addr2 uint32_array (phci-id, cpi_flags) 7468 * ... 7469 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7470 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7471 * phci-ids are integers that identify PHCIs to which the 7472 * the bus specific address belongs to. These integers are used as an index 7473 * into to the phcis string array in the main nvlist to get the PHCI path. 7474 */ 7475 static int 7476 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7477 { 7478 char **phcis, **phci_namep; 7479 uint_t nphcis; 7480 mdi_vhcache_phci_t *cphci, **cphci_list; 7481 nvlist_t *caddrmapnvl; 7482 int32_t ver; 7483 int i; 7484 size_t cphci_list_size; 7485 7486 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7487 7488 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7489 ver != MDI_VHCI_CACHE_VERSION) 7490 return (MDI_FAILURE); 7491 7492 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7493 &nphcis) != 0) 7494 return (MDI_SUCCESS); 7495 7496 ASSERT(nphcis > 0); 7497 7498 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7499 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7500 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7501 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7502 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7503 enqueue_vhcache_phci(vhcache, cphci); 7504 cphci_list[i] = cphci; 7505 } 7506 7507 ASSERT(vhcache->vhcache_phci_head != NULL); 7508 7509 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7510 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7511 7512 kmem_free(cphci_list, cphci_list_size); 7513 return (MDI_SUCCESS); 7514 } 7515 7516 /* 7517 * Build paddrnvl for the specified client using the information in the 7518 * vhci cache and add it to the caddrmapnnvl. 7519 * Returns 0 on success, errno on failure. 7520 */ 7521 static int 7522 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7523 nvlist_t *caddrmapnvl) 7524 { 7525 mdi_vhcache_pathinfo_t *cpi; 7526 nvlist_t *nvl; 7527 int err; 7528 uint32_t val[2]; 7529 7530 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7531 7532 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7533 return (err); 7534 7535 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7536 val[0] = cpi->cpi_cphci->cphci_id; 7537 val[1] = cpi->cpi_flags; 7538 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7539 != 0) 7540 goto out; 7541 } 7542 7543 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7544 out: 7545 nvlist_free(nvl); 7546 return (err); 7547 } 7548 7549 /* 7550 * Build caddrmapnvl using the information in the vhci cache 7551 * and add it to the mainnvl. 7552 * Returns 0 on success, errno on failure. 7553 */ 7554 static int 7555 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7556 { 7557 mdi_vhcache_client_t *cct; 7558 nvlist_t *nvl; 7559 int err; 7560 7561 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7562 7563 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7564 return (err); 7565 7566 for (cct = vhcache->vhcache_client_head; cct != NULL; 7567 cct = cct->cct_next) { 7568 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7569 goto out; 7570 } 7571 7572 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7573 out: 7574 nvlist_free(nvl); 7575 return (err); 7576 } 7577 7578 /* 7579 * Build nvlist using the information in the vhci cache. 7580 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7581 * Returns nvl on success, NULL on failure. 7582 */ 7583 static nvlist_t * 7584 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7585 { 7586 mdi_vhcache_phci_t *cphci; 7587 uint_t phci_count; 7588 char **phcis; 7589 nvlist_t *nvl; 7590 int err, i; 7591 7592 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7593 nvl = NULL; 7594 goto out; 7595 } 7596 7597 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7598 MDI_VHCI_CACHE_VERSION)) != 0) 7599 goto out; 7600 7601 rw_enter(&vhcache->vhcache_lock, RW_READER); 7602 if (vhcache->vhcache_phci_head == NULL) { 7603 rw_exit(&vhcache->vhcache_lock); 7604 return (nvl); 7605 } 7606 7607 phci_count = 0; 7608 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7609 cphci = cphci->cphci_next) 7610 cphci->cphci_id = phci_count++; 7611 7612 /* build phci pathname list */ 7613 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7614 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7615 cphci = cphci->cphci_next, i++) 7616 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7617 7618 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7619 phci_count); 7620 free_string_array(phcis, phci_count); 7621 7622 if (err == 0 && 7623 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7624 rw_exit(&vhcache->vhcache_lock); 7625 return (nvl); 7626 } 7627 7628 rw_exit(&vhcache->vhcache_lock); 7629 out: 7630 if (nvl) 7631 nvlist_free(nvl); 7632 return (NULL); 7633 } 7634 7635 /* 7636 * Lookup vhcache phci structure for the specified phci path. 7637 */ 7638 static mdi_vhcache_phci_t * 7639 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7640 { 7641 mdi_vhcache_phci_t *cphci; 7642 7643 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7644 7645 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7646 cphci = cphci->cphci_next) { 7647 if (strcmp(cphci->cphci_path, phci_path) == 0) 7648 return (cphci); 7649 } 7650 7651 return (NULL); 7652 } 7653 7654 /* 7655 * Lookup vhcache phci structure for the specified phci. 7656 */ 7657 static mdi_vhcache_phci_t * 7658 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7659 { 7660 mdi_vhcache_phci_t *cphci; 7661 7662 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7663 7664 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7665 cphci = cphci->cphci_next) { 7666 if (cphci->cphci_phci == ph) 7667 return (cphci); 7668 } 7669 7670 return (NULL); 7671 } 7672 7673 /* 7674 * Add the specified phci to the vhci cache if not already present. 7675 */ 7676 static void 7677 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7678 { 7679 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7680 mdi_vhcache_phci_t *cphci; 7681 char *pathname; 7682 int cache_updated; 7683 7684 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7685 7686 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7687 (void) ddi_pathname(ph->ph_dip, pathname); 7688 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7689 != NULL) { 7690 cphci->cphci_phci = ph; 7691 cache_updated = 0; 7692 } else { 7693 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7694 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7695 cphci->cphci_phci = ph; 7696 enqueue_vhcache_phci(vhcache, cphci); 7697 cache_updated = 1; 7698 } 7699 7700 rw_exit(&vhcache->vhcache_lock); 7701 7702 /* 7703 * Since a new phci has been added, reset 7704 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7705 * during next vhcache_discover_paths(). 7706 */ 7707 mutex_enter(&vhc->vhc_lock); 7708 vhc->vhc_path_discovery_cutoff_time = 0; 7709 mutex_exit(&vhc->vhc_lock); 7710 7711 kmem_free(pathname, MAXPATHLEN); 7712 if (cache_updated) 7713 vhcache_dirty(vhc); 7714 } 7715 7716 /* 7717 * Remove the reference to the specified phci from the vhci cache. 7718 */ 7719 static void 7720 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7721 { 7722 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7723 mdi_vhcache_phci_t *cphci; 7724 7725 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7726 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7727 /* do not remove the actual mdi_vhcache_phci structure */ 7728 cphci->cphci_phci = NULL; 7729 } 7730 rw_exit(&vhcache->vhcache_lock); 7731 } 7732 7733 static void 7734 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7735 mdi_vhcache_lookup_token_t *src) 7736 { 7737 if (src == NULL) { 7738 dst->lt_cct = NULL; 7739 dst->lt_cct_lookup_time = 0; 7740 } else { 7741 dst->lt_cct = src->lt_cct; 7742 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7743 } 7744 } 7745 7746 /* 7747 * Look up vhcache client for the specified client. 7748 */ 7749 static mdi_vhcache_client_t * 7750 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7751 mdi_vhcache_lookup_token_t *token) 7752 { 7753 mod_hash_val_t hv; 7754 char *name_addr; 7755 int len; 7756 7757 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7758 7759 /* 7760 * If no vhcache clean occurred since the last lookup, we can 7761 * simply return the cct from the last lookup operation. 7762 * It works because ccts are never freed except during the vhcache 7763 * cleanup operation. 7764 */ 7765 if (token != NULL && 7766 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7767 return (token->lt_cct); 7768 7769 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7770 if (mod_hash_find(vhcache->vhcache_client_hash, 7771 (mod_hash_key_t)name_addr, &hv) == 0) { 7772 if (token) { 7773 token->lt_cct = (mdi_vhcache_client_t *)hv; 7774 token->lt_cct_lookup_time = lbolt64; 7775 } 7776 } else { 7777 if (token) { 7778 token->lt_cct = NULL; 7779 token->lt_cct_lookup_time = 0; 7780 } 7781 hv = NULL; 7782 } 7783 kmem_free(name_addr, len); 7784 return ((mdi_vhcache_client_t *)hv); 7785 } 7786 7787 /* 7788 * Add the specified path to the vhci cache if not already present. 7789 * Also add the vhcache client for the client corresponding to this path 7790 * if it doesn't already exist. 7791 */ 7792 static void 7793 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7794 { 7795 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7796 mdi_vhcache_client_t *cct; 7797 mdi_vhcache_pathinfo_t *cpi; 7798 mdi_phci_t *ph = pip->pi_phci; 7799 mdi_client_t *ct = pip->pi_client; 7800 int cache_updated = 0; 7801 7802 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7803 7804 /* if vhcache client for this pip doesn't already exist, add it */ 7805 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7806 NULL)) == NULL) { 7807 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7808 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7809 ct->ct_guid, NULL); 7810 enqueue_vhcache_client(vhcache, cct); 7811 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7812 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7813 cache_updated = 1; 7814 } 7815 7816 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7817 if (cpi->cpi_cphci->cphci_phci == ph && 7818 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7819 cpi->cpi_pip = pip; 7820 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7821 cpi->cpi_flags &= 7822 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7823 sort_vhcache_paths(cct); 7824 cache_updated = 1; 7825 } 7826 break; 7827 } 7828 } 7829 7830 if (cpi == NULL) { 7831 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7832 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7833 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7834 ASSERT(cpi->cpi_cphci != NULL); 7835 cpi->cpi_pip = pip; 7836 enqueue_vhcache_pathinfo(cct, cpi); 7837 cache_updated = 1; 7838 } 7839 7840 rw_exit(&vhcache->vhcache_lock); 7841 7842 if (cache_updated) 7843 vhcache_dirty(vhc); 7844 } 7845 7846 /* 7847 * Remove the reference to the specified path from the vhci cache. 7848 */ 7849 static void 7850 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7851 { 7852 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7853 mdi_client_t *ct = pip->pi_client; 7854 mdi_vhcache_client_t *cct; 7855 mdi_vhcache_pathinfo_t *cpi; 7856 7857 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7858 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7859 NULL)) != NULL) { 7860 for (cpi = cct->cct_cpi_head; cpi != NULL; 7861 cpi = cpi->cpi_next) { 7862 if (cpi->cpi_pip == pip) { 7863 cpi->cpi_pip = NULL; 7864 break; 7865 } 7866 } 7867 } 7868 rw_exit(&vhcache->vhcache_lock); 7869 } 7870 7871 /* 7872 * Flush the vhci cache to disk. 7873 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7874 */ 7875 static int 7876 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7877 { 7878 nvlist_t *nvl; 7879 int err; 7880 int rv; 7881 7882 /* 7883 * It is possible that the system may shutdown before 7884 * i_ddi_io_initialized (during stmsboot for example). To allow for 7885 * flushing the cache in this case do not check for 7886 * i_ddi_io_initialized when force flag is set. 7887 */ 7888 if (force_flag == 0 && !i_ddi_io_initialized()) 7889 return (MDI_FAILURE); 7890 7891 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7892 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7893 nvlist_free(nvl); 7894 } else 7895 err = EFAULT; 7896 7897 rv = MDI_SUCCESS; 7898 mutex_enter(&vhc->vhc_lock); 7899 if (err != 0) { 7900 if (err == EROFS) { 7901 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7902 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7903 MDI_VHC_VHCACHE_DIRTY); 7904 } else { 7905 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7906 cmn_err(CE_CONT, "%s: update failed\n", 7907 vhc->vhc_vhcache_filename); 7908 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7909 } 7910 rv = MDI_FAILURE; 7911 } 7912 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7913 cmn_err(CE_CONT, 7914 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7915 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7916 } 7917 mutex_exit(&vhc->vhc_lock); 7918 7919 return (rv); 7920 } 7921 7922 /* 7923 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7924 * Exits itself if left idle for the idle timeout period. 7925 */ 7926 static void 7927 vhcache_flush_thread(void *arg) 7928 { 7929 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7930 clock_t idle_time, quit_at_ticks; 7931 callb_cpr_t cprinfo; 7932 7933 /* number of seconds to sleep idle before exiting */ 7934 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7935 7936 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7937 "mdi_vhcache_flush"); 7938 mutex_enter(&vhc->vhc_lock); 7939 for (; ; ) { 7940 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7941 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7942 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7943 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7944 (void) cv_timedwait(&vhc->vhc_cv, 7945 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7946 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7947 } else { 7948 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7949 mutex_exit(&vhc->vhc_lock); 7950 7951 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7952 vhcache_dirty(vhc); 7953 7954 mutex_enter(&vhc->vhc_lock); 7955 } 7956 } 7957 7958 quit_at_ticks = ddi_get_lbolt() + idle_time; 7959 7960 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7961 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7962 ddi_get_lbolt() < quit_at_ticks) { 7963 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7964 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7965 quit_at_ticks); 7966 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7967 } 7968 7969 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7970 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7971 goto out; 7972 } 7973 7974 out: 7975 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7976 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7977 CALLB_CPR_EXIT(&cprinfo); 7978 } 7979 7980 /* 7981 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7982 */ 7983 static void 7984 vhcache_dirty(mdi_vhci_config_t *vhc) 7985 { 7986 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7987 int create_thread; 7988 7989 rw_enter(&vhcache->vhcache_lock, RW_READER); 7990 /* do not flush cache until the cache is fully built */ 7991 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7992 rw_exit(&vhcache->vhcache_lock); 7993 return; 7994 } 7995 rw_exit(&vhcache->vhcache_lock); 7996 7997 mutex_enter(&vhc->vhc_lock); 7998 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7999 mutex_exit(&vhc->vhc_lock); 8000 return; 8001 } 8002 8003 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8004 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8005 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8006 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8007 cv_broadcast(&vhc->vhc_cv); 8008 create_thread = 0; 8009 } else { 8010 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8011 create_thread = 1; 8012 } 8013 mutex_exit(&vhc->vhc_lock); 8014 8015 if (create_thread) 8016 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8017 0, &p0, TS_RUN, minclsyspri); 8018 } 8019 8020 /* 8021 * phci bus config structure - one for for each phci bus config operation that 8022 * we initiate on behalf of a vhci. 8023 */ 8024 typedef struct mdi_phci_bus_config_s { 8025 char *phbc_phci_path; 8026 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8027 struct mdi_phci_bus_config_s *phbc_next; 8028 } mdi_phci_bus_config_t; 8029 8030 /* vhci bus config structure - one for each vhci bus config operation */ 8031 typedef struct mdi_vhci_bus_config_s { 8032 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8033 major_t vhbc_op_major; /* bus config op major */ 8034 uint_t vhbc_op_flags; /* bus config op flags */ 8035 kmutex_t vhbc_lock; 8036 kcondvar_t vhbc_cv; 8037 int vhbc_thr_count; 8038 } mdi_vhci_bus_config_t; 8039 8040 /* 8041 * bus config the specified phci 8042 */ 8043 static void 8044 bus_config_phci(void *arg) 8045 { 8046 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8047 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8048 dev_info_t *ph_dip; 8049 8050 /* 8051 * first configure all path components upto phci and then configure 8052 * the phci children. 8053 */ 8054 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8055 != NULL) { 8056 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8057 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8058 (void) ndi_devi_config_driver(ph_dip, 8059 vhbc->vhbc_op_flags, 8060 vhbc->vhbc_op_major); 8061 } else 8062 (void) ndi_devi_config(ph_dip, 8063 vhbc->vhbc_op_flags); 8064 8065 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8066 ndi_rele_devi(ph_dip); 8067 } 8068 8069 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8070 kmem_free(phbc, sizeof (*phbc)); 8071 8072 mutex_enter(&vhbc->vhbc_lock); 8073 vhbc->vhbc_thr_count--; 8074 if (vhbc->vhbc_thr_count == 0) 8075 cv_broadcast(&vhbc->vhbc_cv); 8076 mutex_exit(&vhbc->vhbc_lock); 8077 } 8078 8079 /* 8080 * Bus config all phcis associated with the vhci in parallel. 8081 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8082 */ 8083 static void 8084 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8085 ddi_bus_config_op_t op, major_t maj) 8086 { 8087 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8088 mdi_vhci_bus_config_t *vhbc; 8089 mdi_vhcache_phci_t *cphci; 8090 8091 rw_enter(&vhcache->vhcache_lock, RW_READER); 8092 if (vhcache->vhcache_phci_head == NULL) { 8093 rw_exit(&vhcache->vhcache_lock); 8094 return; 8095 } 8096 8097 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8098 8099 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8100 cphci = cphci->cphci_next) { 8101 /* skip phcis that haven't attached before root is available */ 8102 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8103 continue; 8104 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8105 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8106 KM_SLEEP); 8107 phbc->phbc_vhbusconfig = vhbc; 8108 phbc->phbc_next = phbc_head; 8109 phbc_head = phbc; 8110 vhbc->vhbc_thr_count++; 8111 } 8112 rw_exit(&vhcache->vhcache_lock); 8113 8114 vhbc->vhbc_op = op; 8115 vhbc->vhbc_op_major = maj; 8116 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8117 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8118 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8119 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8120 8121 /* now create threads to initiate bus config on all phcis in parallel */ 8122 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8123 phbc_next = phbc->phbc_next; 8124 if (mdi_mtc_off) 8125 bus_config_phci((void *)phbc); 8126 else 8127 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8128 0, &p0, TS_RUN, minclsyspri); 8129 } 8130 8131 mutex_enter(&vhbc->vhbc_lock); 8132 /* wait until all threads exit */ 8133 while (vhbc->vhbc_thr_count > 0) 8134 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8135 mutex_exit(&vhbc->vhbc_lock); 8136 8137 mutex_destroy(&vhbc->vhbc_lock); 8138 cv_destroy(&vhbc->vhbc_cv); 8139 kmem_free(vhbc, sizeof (*vhbc)); 8140 } 8141 8142 /* 8143 * Single threaded version of bus_config_all_phcis() 8144 */ 8145 static void 8146 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8147 ddi_bus_config_op_t op, major_t maj) 8148 { 8149 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8150 8151 single_threaded_vhconfig_enter(vhc); 8152 bus_config_all_phcis(vhcache, flags, op, maj); 8153 single_threaded_vhconfig_exit(vhc); 8154 } 8155 8156 /* 8157 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8158 * The path includes the child component in addition to the phci path. 8159 */ 8160 static int 8161 bus_config_one_phci_child(char *path) 8162 { 8163 dev_info_t *ph_dip, *child; 8164 char *devnm; 8165 int rv = MDI_FAILURE; 8166 8167 /* extract the child component of the phci */ 8168 devnm = strrchr(path, '/'); 8169 *devnm++ = '\0'; 8170 8171 /* 8172 * first configure all path components upto phci and then 8173 * configure the phci child. 8174 */ 8175 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8176 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8177 NDI_SUCCESS) { 8178 /* 8179 * release the hold that ndi_devi_config_one() placed 8180 */ 8181 ndi_rele_devi(child); 8182 rv = MDI_SUCCESS; 8183 } 8184 8185 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8186 ndi_rele_devi(ph_dip); 8187 } 8188 8189 devnm--; 8190 *devnm = '/'; 8191 return (rv); 8192 } 8193 8194 /* 8195 * Build a list of phci client paths for the specified vhci client. 8196 * The list includes only those phci client paths which aren't configured yet. 8197 */ 8198 static mdi_phys_path_t * 8199 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8200 { 8201 mdi_vhcache_pathinfo_t *cpi; 8202 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8203 int config_path, len; 8204 8205 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8206 /* 8207 * include only those paths that aren't configured. 8208 */ 8209 config_path = 0; 8210 if (cpi->cpi_pip == NULL) 8211 config_path = 1; 8212 else { 8213 MDI_PI_LOCK(cpi->cpi_pip); 8214 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8215 config_path = 1; 8216 MDI_PI_UNLOCK(cpi->cpi_pip); 8217 } 8218 8219 if (config_path) { 8220 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8221 len = strlen(cpi->cpi_cphci->cphci_path) + 8222 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8223 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8224 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8225 cpi->cpi_cphci->cphci_path, ct_name, 8226 cpi->cpi_addr); 8227 pp->phys_path_next = NULL; 8228 8229 if (pp_head == NULL) 8230 pp_head = pp; 8231 else 8232 pp_tail->phys_path_next = pp; 8233 pp_tail = pp; 8234 } 8235 } 8236 8237 return (pp_head); 8238 } 8239 8240 /* 8241 * Free the memory allocated for phci client path list. 8242 */ 8243 static void 8244 free_phclient_path_list(mdi_phys_path_t *pp_head) 8245 { 8246 mdi_phys_path_t *pp, *pp_next; 8247 8248 for (pp = pp_head; pp != NULL; pp = pp_next) { 8249 pp_next = pp->phys_path_next; 8250 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8251 kmem_free(pp, sizeof (*pp)); 8252 } 8253 } 8254 8255 /* 8256 * Allocated async client structure and initialize with the specified values. 8257 */ 8258 static mdi_async_client_config_t * 8259 alloc_async_client_config(char *ct_name, char *ct_addr, 8260 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8261 { 8262 mdi_async_client_config_t *acc; 8263 8264 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8265 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8266 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8267 acc->acc_phclient_path_list_head = pp_head; 8268 init_vhcache_lookup_token(&acc->acc_token, tok); 8269 acc->acc_next = NULL; 8270 return (acc); 8271 } 8272 8273 /* 8274 * Free the memory allocated for the async client structure and their members. 8275 */ 8276 static void 8277 free_async_client_config(mdi_async_client_config_t *acc) 8278 { 8279 if (acc->acc_phclient_path_list_head) 8280 free_phclient_path_list(acc->acc_phclient_path_list_head); 8281 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8282 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8283 kmem_free(acc, sizeof (*acc)); 8284 } 8285 8286 /* 8287 * Sort vhcache pathinfos (cpis) of the specified client. 8288 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8289 * flag set come at the beginning of the list. All cpis which have this 8290 * flag set come at the end of the list. 8291 */ 8292 static void 8293 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8294 { 8295 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8296 8297 cpi_head = cct->cct_cpi_head; 8298 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8299 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8300 cpi_next = cpi->cpi_next; 8301 enqueue_vhcache_pathinfo(cct, cpi); 8302 } 8303 } 8304 8305 /* 8306 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8307 * every vhcache pathinfo of the specified client. If not adjust the flag 8308 * setting appropriately. 8309 * 8310 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8311 * on-disk vhci cache. So every time this flag is updated the cache must be 8312 * flushed. 8313 */ 8314 static void 8315 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8316 mdi_vhcache_lookup_token_t *tok) 8317 { 8318 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8319 mdi_vhcache_client_t *cct; 8320 mdi_vhcache_pathinfo_t *cpi; 8321 8322 rw_enter(&vhcache->vhcache_lock, RW_READER); 8323 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8324 == NULL) { 8325 rw_exit(&vhcache->vhcache_lock); 8326 return; 8327 } 8328 8329 /* 8330 * to avoid unnecessary on-disk cache updates, first check if an 8331 * update is really needed. If no update is needed simply return. 8332 */ 8333 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8334 if ((cpi->cpi_pip != NULL && 8335 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8336 (cpi->cpi_pip == NULL && 8337 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8338 break; 8339 } 8340 } 8341 if (cpi == NULL) { 8342 rw_exit(&vhcache->vhcache_lock); 8343 return; 8344 } 8345 8346 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8347 rw_exit(&vhcache->vhcache_lock); 8348 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8349 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8350 tok)) == NULL) { 8351 rw_exit(&vhcache->vhcache_lock); 8352 return; 8353 } 8354 } 8355 8356 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8357 if (cpi->cpi_pip != NULL) 8358 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8359 else 8360 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8361 } 8362 sort_vhcache_paths(cct); 8363 8364 rw_exit(&vhcache->vhcache_lock); 8365 vhcache_dirty(vhc); 8366 } 8367 8368 /* 8369 * Configure all specified paths of the client. 8370 */ 8371 static void 8372 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8373 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8374 { 8375 mdi_phys_path_t *pp; 8376 8377 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8378 (void) bus_config_one_phci_child(pp->phys_path); 8379 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8380 } 8381 8382 /* 8383 * Dequeue elements from vhci async client config list and bus configure 8384 * their corresponding phci clients. 8385 */ 8386 static void 8387 config_client_paths_thread(void *arg) 8388 { 8389 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8390 mdi_async_client_config_t *acc; 8391 clock_t quit_at_ticks; 8392 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8393 callb_cpr_t cprinfo; 8394 8395 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8396 "mdi_config_client_paths"); 8397 8398 for (; ; ) { 8399 quit_at_ticks = ddi_get_lbolt() + idle_time; 8400 8401 mutex_enter(&vhc->vhc_lock); 8402 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8403 vhc->vhc_acc_list_head == NULL && 8404 ddi_get_lbolt() < quit_at_ticks) { 8405 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8406 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8407 quit_at_ticks); 8408 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8409 } 8410 8411 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8412 vhc->vhc_acc_list_head == NULL) 8413 goto out; 8414 8415 acc = vhc->vhc_acc_list_head; 8416 vhc->vhc_acc_list_head = acc->acc_next; 8417 if (vhc->vhc_acc_list_head == NULL) 8418 vhc->vhc_acc_list_tail = NULL; 8419 vhc->vhc_acc_count--; 8420 mutex_exit(&vhc->vhc_lock); 8421 8422 config_client_paths_sync(vhc, acc->acc_ct_name, 8423 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8424 &acc->acc_token); 8425 8426 free_async_client_config(acc); 8427 } 8428 8429 out: 8430 vhc->vhc_acc_thrcount--; 8431 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8432 CALLB_CPR_EXIT(&cprinfo); 8433 } 8434 8435 /* 8436 * Arrange for all the phci client paths (pp_head) for the specified client 8437 * to be bus configured asynchronously by a thread. 8438 */ 8439 static void 8440 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8441 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8442 { 8443 mdi_async_client_config_t *acc, *newacc; 8444 int create_thread; 8445 8446 if (pp_head == NULL) 8447 return; 8448 8449 if (mdi_mtc_off) { 8450 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8451 free_phclient_path_list(pp_head); 8452 return; 8453 } 8454 8455 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8456 ASSERT(newacc); 8457 8458 mutex_enter(&vhc->vhc_lock); 8459 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8460 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8461 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8462 free_async_client_config(newacc); 8463 mutex_exit(&vhc->vhc_lock); 8464 return; 8465 } 8466 } 8467 8468 if (vhc->vhc_acc_list_head == NULL) 8469 vhc->vhc_acc_list_head = newacc; 8470 else 8471 vhc->vhc_acc_list_tail->acc_next = newacc; 8472 vhc->vhc_acc_list_tail = newacc; 8473 vhc->vhc_acc_count++; 8474 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8475 cv_broadcast(&vhc->vhc_cv); 8476 create_thread = 0; 8477 } else { 8478 vhc->vhc_acc_thrcount++; 8479 create_thread = 1; 8480 } 8481 mutex_exit(&vhc->vhc_lock); 8482 8483 if (create_thread) 8484 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8485 0, &p0, TS_RUN, minclsyspri); 8486 } 8487 8488 /* 8489 * Return number of online paths for the specified client. 8490 */ 8491 static int 8492 nonline_paths(mdi_vhcache_client_t *cct) 8493 { 8494 mdi_vhcache_pathinfo_t *cpi; 8495 int online_count = 0; 8496 8497 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8498 if (cpi->cpi_pip != NULL) { 8499 MDI_PI_LOCK(cpi->cpi_pip); 8500 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8501 online_count++; 8502 MDI_PI_UNLOCK(cpi->cpi_pip); 8503 } 8504 } 8505 8506 return (online_count); 8507 } 8508 8509 /* 8510 * Bus configure all paths for the specified vhci client. 8511 * If at least one path for the client is already online, the remaining paths 8512 * will be configured asynchronously. Otherwise, it synchronously configures 8513 * the paths until at least one path is online and then rest of the paths 8514 * will be configured asynchronously. 8515 */ 8516 static void 8517 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8518 { 8519 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8520 mdi_phys_path_t *pp_head, *pp; 8521 mdi_vhcache_client_t *cct; 8522 mdi_vhcache_lookup_token_t tok; 8523 8524 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8525 8526 init_vhcache_lookup_token(&tok, NULL); 8527 8528 if (ct_name == NULL || ct_addr == NULL || 8529 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8530 == NULL || 8531 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8532 rw_exit(&vhcache->vhcache_lock); 8533 return; 8534 } 8535 8536 /* if at least one path is online, configure the rest asynchronously */ 8537 if (nonline_paths(cct) > 0) { 8538 rw_exit(&vhcache->vhcache_lock); 8539 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8540 return; 8541 } 8542 8543 rw_exit(&vhcache->vhcache_lock); 8544 8545 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8546 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8547 rw_enter(&vhcache->vhcache_lock, RW_READER); 8548 8549 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8550 ct_addr, &tok)) == NULL) { 8551 rw_exit(&vhcache->vhcache_lock); 8552 goto out; 8553 } 8554 8555 if (nonline_paths(cct) > 0 && 8556 pp->phys_path_next != NULL) { 8557 rw_exit(&vhcache->vhcache_lock); 8558 config_client_paths_async(vhc, ct_name, ct_addr, 8559 pp->phys_path_next, &tok); 8560 pp->phys_path_next = NULL; 8561 goto out; 8562 } 8563 8564 rw_exit(&vhcache->vhcache_lock); 8565 } 8566 } 8567 8568 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8569 out: 8570 free_phclient_path_list(pp_head); 8571 } 8572 8573 static void 8574 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8575 { 8576 mutex_enter(&vhc->vhc_lock); 8577 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8578 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8579 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8580 mutex_exit(&vhc->vhc_lock); 8581 } 8582 8583 static void 8584 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8585 { 8586 mutex_enter(&vhc->vhc_lock); 8587 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8588 cv_broadcast(&vhc->vhc_cv); 8589 mutex_exit(&vhc->vhc_lock); 8590 } 8591 8592 typedef struct mdi_phci_driver_info { 8593 char *phdriver_name; /* name of the phci driver */ 8594 8595 /* set to non zero if the phci driver supports root device */ 8596 int phdriver_root_support; 8597 } mdi_phci_driver_info_t; 8598 8599 /* 8600 * vhci class and root support capability of a phci driver can be 8601 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8602 * phci driver.conf file. The built-in tables below contain this information 8603 * for those phci drivers whose driver.conf files don't yet contain this info. 8604 * 8605 * All phci drivers expect iscsi have root device support. 8606 */ 8607 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8608 { "fp", 1 }, 8609 { "iscsi", 0 }, 8610 { "ibsrp", 1 } 8611 }; 8612 8613 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8614 8615 static void * 8616 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8617 { 8618 void *new_ptr; 8619 8620 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8621 if (old_ptr) { 8622 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8623 kmem_free(old_ptr, old_size); 8624 } 8625 return (new_ptr); 8626 } 8627 8628 static void 8629 add_to_phci_list(char ***driver_list, int **root_support_list, 8630 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8631 { 8632 ASSERT(*cur_elements <= *max_elements); 8633 if (*cur_elements == *max_elements) { 8634 *max_elements += 10; 8635 *driver_list = mdi_realloc(*driver_list, 8636 sizeof (char *) * (*cur_elements), 8637 sizeof (char *) * (*max_elements)); 8638 *root_support_list = mdi_realloc(*root_support_list, 8639 sizeof (int) * (*cur_elements), 8640 sizeof (int) * (*max_elements)); 8641 } 8642 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8643 (*root_support_list)[*cur_elements] = root_support; 8644 (*cur_elements)++; 8645 } 8646 8647 static void 8648 get_phci_driver_list(char *vhci_class, char ***driver_list, 8649 int **root_support_list, int *cur_elements, int *max_elements) 8650 { 8651 mdi_phci_driver_info_t *st_driver_list, *p; 8652 int st_ndrivers, root_support, i, j, driver_conf_count; 8653 major_t m; 8654 struct devnames *dnp; 8655 ddi_prop_t *propp; 8656 8657 *driver_list = NULL; 8658 *root_support_list = NULL; 8659 *cur_elements = 0; 8660 *max_elements = 0; 8661 8662 /* add the phci drivers derived from the phci driver.conf files */ 8663 for (m = 0; m < devcnt; m++) { 8664 dnp = &devnamesp[m]; 8665 8666 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8667 LOCK_DEV_OPS(&dnp->dn_lock); 8668 if (dnp->dn_global_prop_ptr != NULL && 8669 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8670 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8671 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8672 strcmp(propp->prop_val, vhci_class) == 0) { 8673 8674 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8675 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8676 &dnp->dn_global_prop_ptr->prop_list) 8677 == NULL) ? 1 : 0; 8678 8679 add_to_phci_list(driver_list, root_support_list, 8680 cur_elements, max_elements, dnp->dn_name, 8681 root_support); 8682 8683 UNLOCK_DEV_OPS(&dnp->dn_lock); 8684 } else 8685 UNLOCK_DEV_OPS(&dnp->dn_lock); 8686 } 8687 } 8688 8689 driver_conf_count = *cur_elements; 8690 8691 /* add the phci drivers specified in the built-in tables */ 8692 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8693 st_driver_list = scsi_phci_driver_list; 8694 st_ndrivers = sizeof (scsi_phci_driver_list) / 8695 sizeof (mdi_phci_driver_info_t); 8696 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8697 st_driver_list = ib_phci_driver_list; 8698 st_ndrivers = sizeof (ib_phci_driver_list) / 8699 sizeof (mdi_phci_driver_info_t); 8700 } else { 8701 st_driver_list = NULL; 8702 st_ndrivers = 0; 8703 } 8704 8705 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8706 /* add this phci driver if not already added before */ 8707 for (j = 0; j < driver_conf_count; j++) { 8708 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8709 break; 8710 } 8711 if (j == driver_conf_count) { 8712 add_to_phci_list(driver_list, root_support_list, 8713 cur_elements, max_elements, p->phdriver_name, 8714 p->phdriver_root_support); 8715 } 8716 } 8717 } 8718 8719 /* 8720 * Attach the phci driver instances associated with the specified vhci class. 8721 * If root is mounted attach all phci driver instances. 8722 * If root is not mounted, attach the instances of only those phci 8723 * drivers that have the root support. 8724 */ 8725 static void 8726 attach_phci_drivers(char *vhci_class) 8727 { 8728 char **driver_list, **p; 8729 int *root_support_list; 8730 int cur_elements, max_elements, i; 8731 major_t m; 8732 8733 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8734 &cur_elements, &max_elements); 8735 8736 for (i = 0; i < cur_elements; i++) { 8737 if (modrootloaded || root_support_list[i]) { 8738 m = ddi_name_to_major(driver_list[i]); 8739 if (m != DDI_MAJOR_T_NONE && 8740 ddi_hold_installed_driver(m)) 8741 ddi_rele_driver(m); 8742 } 8743 } 8744 8745 if (driver_list) { 8746 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8747 kmem_free(*p, strlen(*p) + 1); 8748 kmem_free(driver_list, sizeof (char *) * max_elements); 8749 kmem_free(root_support_list, sizeof (int) * max_elements); 8750 } 8751 } 8752 8753 /* 8754 * Build vhci cache: 8755 * 8756 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8757 * the phci driver instances. During this process the cache gets built. 8758 * 8759 * Cache is built fully if the root is mounted. 8760 * If the root is not mounted, phci drivers that do not have root support 8761 * are not attached. As a result the cache is built partially. The entries 8762 * in the cache reflect only those phci drivers that have root support. 8763 */ 8764 static int 8765 build_vhci_cache(mdi_vhci_t *vh) 8766 { 8767 mdi_vhci_config_t *vhc = vh->vh_config; 8768 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8769 8770 single_threaded_vhconfig_enter(vhc); 8771 8772 rw_enter(&vhcache->vhcache_lock, RW_READER); 8773 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8774 rw_exit(&vhcache->vhcache_lock); 8775 single_threaded_vhconfig_exit(vhc); 8776 return (0); 8777 } 8778 rw_exit(&vhcache->vhcache_lock); 8779 8780 attach_phci_drivers(vh->vh_class); 8781 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8782 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8783 8784 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8785 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8786 rw_exit(&vhcache->vhcache_lock); 8787 8788 single_threaded_vhconfig_exit(vhc); 8789 vhcache_dirty(vhc); 8790 return (1); 8791 } 8792 8793 /* 8794 * Determine if discovery of paths is needed. 8795 */ 8796 static int 8797 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8798 { 8799 int rv = 1; 8800 8801 mutex_enter(&vhc->vhc_lock); 8802 if (i_ddi_io_initialized() == 0) { 8803 if (vhc->vhc_path_discovery_boot > 0) { 8804 vhc->vhc_path_discovery_boot--; 8805 goto out; 8806 } 8807 } else { 8808 if (vhc->vhc_path_discovery_postboot > 0) { 8809 vhc->vhc_path_discovery_postboot--; 8810 goto out; 8811 } 8812 } 8813 8814 /* 8815 * Do full path discovery at most once per mdi_path_discovery_interval. 8816 * This is to avoid a series of full path discoveries when opening 8817 * stale /dev/[r]dsk links. 8818 */ 8819 if (mdi_path_discovery_interval != -1 && 8820 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8821 goto out; 8822 8823 rv = 0; 8824 out: 8825 mutex_exit(&vhc->vhc_lock); 8826 return (rv); 8827 } 8828 8829 /* 8830 * Discover all paths: 8831 * 8832 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8833 * driver instances. During this process all paths will be discovered. 8834 */ 8835 static int 8836 vhcache_discover_paths(mdi_vhci_t *vh) 8837 { 8838 mdi_vhci_config_t *vhc = vh->vh_config; 8839 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8840 int rv = 0; 8841 8842 single_threaded_vhconfig_enter(vhc); 8843 8844 if (vhcache_do_discovery(vhc)) { 8845 attach_phci_drivers(vh->vh_class); 8846 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8847 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8848 8849 mutex_enter(&vhc->vhc_lock); 8850 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8851 mdi_path_discovery_interval * TICKS_PER_SECOND; 8852 mutex_exit(&vhc->vhc_lock); 8853 rv = 1; 8854 } 8855 8856 single_threaded_vhconfig_exit(vhc); 8857 return (rv); 8858 } 8859 8860 /* 8861 * Generic vhci bus config implementation: 8862 * 8863 * Parameters 8864 * vdip vhci dip 8865 * flags bus config flags 8866 * op bus config operation 8867 * The remaining parameters are bus config operation specific 8868 * 8869 * for BUS_CONFIG_ONE 8870 * arg pointer to name@addr 8871 * child upon successful return from this function, *child will be 8872 * set to the configured and held devinfo child node of vdip. 8873 * ct_addr pointer to client address (i.e. GUID) 8874 * 8875 * for BUS_CONFIG_DRIVER 8876 * arg major number of the driver 8877 * child and ct_addr parameters are ignored 8878 * 8879 * for BUS_CONFIG_ALL 8880 * arg, child, and ct_addr parameters are ignored 8881 * 8882 * Note that for the rest of the bus config operations, this function simply 8883 * calls the framework provided default bus config routine. 8884 */ 8885 int 8886 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8887 void *arg, dev_info_t **child, char *ct_addr) 8888 { 8889 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8890 mdi_vhci_config_t *vhc = vh->vh_config; 8891 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8892 int rv = 0; 8893 int params_valid = 0; 8894 char *cp; 8895 8896 /* 8897 * To bus config vhcis we relay operation, possibly using another 8898 * thread, to phcis. The phci driver then interacts with MDI to cause 8899 * vhci child nodes to be enumerated under the vhci node. Adding a 8900 * vhci child requires an ndi_devi_enter of the vhci. Since another 8901 * thread may be adding the child, to avoid deadlock we can't wait 8902 * for the relayed operations to complete if we have already entered 8903 * the vhci node. 8904 */ 8905 if (DEVI_BUSY_OWNED(vdip)) { 8906 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8907 "vhci dip is busy owned %p\n", (void *)vdip)); 8908 goto default_bus_config; 8909 } 8910 8911 rw_enter(&vhcache->vhcache_lock, RW_READER); 8912 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8913 rw_exit(&vhcache->vhcache_lock); 8914 rv = build_vhci_cache(vh); 8915 rw_enter(&vhcache->vhcache_lock, RW_READER); 8916 } 8917 8918 switch (op) { 8919 case BUS_CONFIG_ONE: 8920 if (arg != NULL && ct_addr != NULL) { 8921 /* extract node name */ 8922 cp = (char *)arg; 8923 while (*cp != '\0' && *cp != '@') 8924 cp++; 8925 if (*cp == '@') { 8926 params_valid = 1; 8927 *cp = '\0'; 8928 config_client_paths(vhc, (char *)arg, ct_addr); 8929 /* config_client_paths() releases cache_lock */ 8930 *cp = '@'; 8931 break; 8932 } 8933 } 8934 8935 rw_exit(&vhcache->vhcache_lock); 8936 break; 8937 8938 case BUS_CONFIG_DRIVER: 8939 rw_exit(&vhcache->vhcache_lock); 8940 if (rv == 0) 8941 st_bus_config_all_phcis(vhc, flags, op, 8942 (major_t)(uintptr_t)arg); 8943 break; 8944 8945 case BUS_CONFIG_ALL: 8946 rw_exit(&vhcache->vhcache_lock); 8947 if (rv == 0) 8948 st_bus_config_all_phcis(vhc, flags, op, -1); 8949 break; 8950 8951 default: 8952 rw_exit(&vhcache->vhcache_lock); 8953 break; 8954 } 8955 8956 8957 default_bus_config: 8958 /* 8959 * All requested child nodes are enumerated under the vhci. 8960 * Now configure them. 8961 */ 8962 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8963 NDI_SUCCESS) { 8964 return (MDI_SUCCESS); 8965 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8966 /* discover all paths and try configuring again */ 8967 if (vhcache_discover_paths(vh) && 8968 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8969 NDI_SUCCESS) 8970 return (MDI_SUCCESS); 8971 } 8972 8973 return (MDI_FAILURE); 8974 } 8975 8976 /* 8977 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8978 */ 8979 static nvlist_t * 8980 read_on_disk_vhci_cache(char *vhci_class) 8981 { 8982 nvlist_t *nvl; 8983 int err; 8984 char *filename; 8985 8986 filename = vhclass2vhcache_filename(vhci_class); 8987 8988 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8989 kmem_free(filename, strlen(filename) + 1); 8990 return (nvl); 8991 } else if (err == EIO) 8992 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8993 else if (err == EINVAL) 8994 cmn_err(CE_WARN, 8995 "%s: data file corrupted, will recreate\n", filename); 8996 8997 kmem_free(filename, strlen(filename) + 1); 8998 return (NULL); 8999 } 9000 9001 /* 9002 * Read on-disk vhci cache into nvlists for all vhci classes. 9003 * Called during booting by i_ddi_read_devices_files(). 9004 */ 9005 void 9006 mdi_read_devices_files(void) 9007 { 9008 int i; 9009 9010 for (i = 0; i < N_VHCI_CLASSES; i++) 9011 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9012 } 9013 9014 /* 9015 * Remove all stale entries from vhci cache. 9016 */ 9017 static void 9018 clean_vhcache(mdi_vhci_config_t *vhc) 9019 { 9020 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9021 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9022 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9023 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9024 9025 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9026 9027 cct_head = vhcache->vhcache_client_head; 9028 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9029 for (cct = cct_head; cct != NULL; cct = cct_next) { 9030 cct_next = cct->cct_next; 9031 9032 cpi_head = cct->cct_cpi_head; 9033 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9034 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9035 cpi_next = cpi->cpi_next; 9036 if (cpi->cpi_pip != NULL) { 9037 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9038 enqueue_tail_vhcache_pathinfo(cct, cpi); 9039 } else 9040 free_vhcache_pathinfo(cpi); 9041 } 9042 9043 if (cct->cct_cpi_head != NULL) 9044 enqueue_vhcache_client(vhcache, cct); 9045 else { 9046 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9047 (mod_hash_key_t)cct->cct_name_addr); 9048 free_vhcache_client(cct); 9049 } 9050 } 9051 9052 cphci_head = vhcache->vhcache_phci_head; 9053 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9054 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9055 cphci_next = cphci->cphci_next; 9056 if (cphci->cphci_phci != NULL) 9057 enqueue_vhcache_phci(vhcache, cphci); 9058 else 9059 free_vhcache_phci(cphci); 9060 } 9061 9062 vhcache->vhcache_clean_time = lbolt64; 9063 rw_exit(&vhcache->vhcache_lock); 9064 vhcache_dirty(vhc); 9065 } 9066 9067 /* 9068 * Remove all stale entries from vhci cache. 9069 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9070 */ 9071 void 9072 mdi_clean_vhcache(void) 9073 { 9074 mdi_vhci_t *vh; 9075 9076 mutex_enter(&mdi_mutex); 9077 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9078 vh->vh_refcnt++; 9079 mutex_exit(&mdi_mutex); 9080 clean_vhcache(vh->vh_config); 9081 mutex_enter(&mdi_mutex); 9082 vh->vh_refcnt--; 9083 } 9084 mutex_exit(&mdi_mutex); 9085 } 9086 9087 /* 9088 * mdi_vhci_walk_clients(): 9089 * Walker routine to traverse client dev_info nodes 9090 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9091 * below the client, including nexus devices, which we dont want. 9092 * So we just traverse the immediate siblings, starting from 1st client. 9093 */ 9094 void 9095 mdi_vhci_walk_clients(dev_info_t *vdip, 9096 int (*f)(dev_info_t *, void *), void *arg) 9097 { 9098 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9099 dev_info_t *cdip; 9100 mdi_client_t *ct; 9101 9102 MDI_VHCI_CLIENT_LOCK(vh); 9103 cdip = ddi_get_child(vdip); 9104 while (cdip) { 9105 ct = i_devi_get_client(cdip); 9106 MDI_CLIENT_LOCK(ct); 9107 9108 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9109 cdip = ddi_get_next_sibling(cdip); 9110 else 9111 cdip = NULL; 9112 9113 MDI_CLIENT_UNLOCK(ct); 9114 } 9115 MDI_VHCI_CLIENT_UNLOCK(vh); 9116 } 9117 9118 /* 9119 * mdi_vhci_walk_phcis(): 9120 * Walker routine to traverse phci dev_info nodes 9121 */ 9122 void 9123 mdi_vhci_walk_phcis(dev_info_t *vdip, 9124 int (*f)(dev_info_t *, void *), void *arg) 9125 { 9126 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9127 mdi_phci_t *ph, *next; 9128 9129 MDI_VHCI_PHCI_LOCK(vh); 9130 ph = vh->vh_phci_head; 9131 while (ph) { 9132 MDI_PHCI_LOCK(ph); 9133 9134 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9135 next = ph->ph_next; 9136 else 9137 next = NULL; 9138 9139 MDI_PHCI_UNLOCK(ph); 9140 ph = next; 9141 } 9142 MDI_VHCI_PHCI_UNLOCK(vh); 9143 } 9144 9145 9146 /* 9147 * mdi_walk_vhcis(): 9148 * Walker routine to traverse vhci dev_info nodes 9149 */ 9150 void 9151 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9152 { 9153 mdi_vhci_t *vh = NULL; 9154 9155 mutex_enter(&mdi_mutex); 9156 /* 9157 * Scan for already registered vhci 9158 */ 9159 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9160 vh->vh_refcnt++; 9161 mutex_exit(&mdi_mutex); 9162 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9163 mutex_enter(&mdi_mutex); 9164 vh->vh_refcnt--; 9165 break; 9166 } else { 9167 mutex_enter(&mdi_mutex); 9168 vh->vh_refcnt--; 9169 } 9170 } 9171 9172 mutex_exit(&mdi_mutex); 9173 } 9174 9175 /* 9176 * i_mdi_log_sysevent(): 9177 * Logs events for pickup by syseventd 9178 */ 9179 static void 9180 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9181 { 9182 char *path_name; 9183 nvlist_t *attr_list; 9184 9185 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9186 KM_SLEEP) != DDI_SUCCESS) { 9187 goto alloc_failed; 9188 } 9189 9190 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9191 (void) ddi_pathname(dip, path_name); 9192 9193 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9194 ddi_driver_name(dip)) != DDI_SUCCESS) { 9195 goto error; 9196 } 9197 9198 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9199 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9200 goto error; 9201 } 9202 9203 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9204 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9205 goto error; 9206 } 9207 9208 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9209 path_name) != DDI_SUCCESS) { 9210 goto error; 9211 } 9212 9213 if (nvlist_add_string(attr_list, DDI_CLASS, 9214 ph_vh_class) != DDI_SUCCESS) { 9215 goto error; 9216 } 9217 9218 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9219 attr_list, NULL, DDI_SLEEP); 9220 9221 error: 9222 kmem_free(path_name, MAXPATHLEN); 9223 nvlist_free(attr_list); 9224 return; 9225 9226 alloc_failed: 9227 MDI_DEBUG(1, (CE_WARN, dip, 9228 "!i_mdi_log_sysevent: Unable to send sysevent")); 9229 } 9230 9231 char ** 9232 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9233 { 9234 char **driver_list, **ret_driver_list = NULL; 9235 int *root_support_list; 9236 int cur_elements, max_elements; 9237 9238 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9239 &cur_elements, &max_elements); 9240 9241 9242 if (driver_list) { 9243 kmem_free(root_support_list, sizeof (int) * max_elements); 9244 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9245 * max_elements, sizeof (char *) * cur_elements); 9246 } 9247 *ndrivers = cur_elements; 9248 9249 return (ret_driver_list); 9250 9251 } 9252 9253 void 9254 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9255 { 9256 char **p; 9257 int i; 9258 9259 if (driver_list) { 9260 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9261 kmem_free(*p, strlen(*p) + 1); 9262 kmem_free(driver_list, sizeof (char *) * ndrivers); 9263 } 9264 } 9265