1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * The "path" to a pathinfo node is identical to the /devices path to a 160 * devinfo node had the device been enumerated under a pHCI instead of 161 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 162 * This association persists across create/delete of the pathinfo nodes, 163 * but not across reboot. 164 */ 165 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 166 static int mdi_pathmap_hash_size = 256; 167 static kmutex_t mdi_pathmap_mutex; 168 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 169 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 170 171 /* 172 * MDI component property name/value string definitions 173 */ 174 const char *mdi_component_prop = "mpxio-component"; 175 const char *mdi_component_prop_vhci = "vhci"; 176 const char *mdi_component_prop_phci = "phci"; 177 const char *mdi_component_prop_client = "client"; 178 179 /* 180 * MDI client global unique identifier property name 181 */ 182 const char *mdi_client_guid_prop = "client-guid"; 183 184 /* 185 * MDI client load balancing property name/value string definitions 186 */ 187 const char *mdi_load_balance = "load-balance"; 188 const char *mdi_load_balance_none = "none"; 189 const char *mdi_load_balance_rr = "round-robin"; 190 const char *mdi_load_balance_lba = "logical-block"; 191 192 /* 193 * Obsolete vHCI class definition; to be removed after Leadville update 194 */ 195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 196 197 static char vhci_greeting[] = 198 "\tThere already exists one vHCI driver for class %s\n" 199 "\tOnly one vHCI driver for each class is allowed\n"; 200 201 /* 202 * Static function prototypes 203 */ 204 static int i_mdi_phci_offline(dev_info_t *, uint_t); 205 static int i_mdi_client_offline(dev_info_t *, uint_t); 206 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 207 static void i_mdi_phci_post_detach(dev_info_t *, 208 ddi_detach_cmd_t, int); 209 static int i_mdi_client_pre_detach(dev_info_t *, 210 ddi_detach_cmd_t); 211 static void i_mdi_client_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 214 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 215 static int i_mdi_lba_lb(mdi_client_t *ct, 216 mdi_pathinfo_t **ret_pip, struct buf *buf); 217 static void i_mdi_pm_hold_client(mdi_client_t *, int); 218 static void i_mdi_pm_rele_client(mdi_client_t *, int); 219 static void i_mdi_pm_reset_client(mdi_client_t *); 220 static int i_mdi_power_all_phci(mdi_client_t *); 221 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 222 223 224 /* 225 * Internal mdi_pathinfo node functions 226 */ 227 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 228 229 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 230 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 231 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 232 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 233 static void i_mdi_phci_unlock(mdi_phci_t *); 234 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 235 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 236 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 237 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 238 mdi_client_t *); 239 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_remove_path(mdi_client_t *, 241 mdi_pathinfo_t *); 242 243 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 244 mdi_pathinfo_state_t, int); 245 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 246 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 247 char **, int); 248 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 249 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 250 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 251 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 252 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 253 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 254 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 255 static void i_mdi_client_update_state(mdi_client_t *); 256 static int i_mdi_client_compute_state(mdi_client_t *, 257 mdi_phci_t *); 258 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 259 static void i_mdi_client_unlock(mdi_client_t *); 260 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 261 static mdi_client_t *i_devi_get_client(dev_info_t *); 262 /* 263 * NOTE: this will be removed once the NWS files are changed to use the new 264 * mdi_{enable,disable}_path interfaces 265 */ 266 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 267 int, int); 268 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 269 mdi_vhci_t *vh, int flags, int op); 270 /* 271 * Failover related function prototypes 272 */ 273 static int i_mdi_failover(void *); 274 275 /* 276 * misc internal functions 277 */ 278 static int i_mdi_get_hash_key(char *); 279 static int i_map_nvlist_error_to_mdi(int); 280 static void i_mdi_report_path_state(mdi_client_t *, 281 mdi_pathinfo_t *); 282 283 static void setup_vhci_cache(mdi_vhci_t *); 284 static int destroy_vhci_cache(mdi_vhci_t *); 285 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 286 static boolean_t stop_vhcache_flush_thread(void *, int); 287 static void free_string_array(char **, int); 288 static void free_vhcache_phci(mdi_vhcache_phci_t *); 289 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 290 static void free_vhcache_client(mdi_vhcache_client_t *); 291 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 292 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 293 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 294 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_pi_add(mdi_vhci_config_t *, 296 struct mdi_pathinfo *); 297 static void vhcache_pi_remove(mdi_vhci_config_t *, 298 struct mdi_pathinfo *); 299 static void free_phclient_path_list(mdi_phys_path_t *); 300 static void sort_vhcache_paths(mdi_vhcache_client_t *); 301 static int flush_vhcache(mdi_vhci_config_t *, int); 302 static void vhcache_dirty(mdi_vhci_config_t *); 303 static void free_async_client_config(mdi_async_client_config_t *); 304 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 305 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 306 static nvlist_t *read_on_disk_vhci_cache(char *); 307 extern int fread_nvlist(char *, nvlist_t **); 308 extern int fwrite_nvlist(char *, nvlist_t *); 309 310 /* called once when first vhci registers with mdi */ 311 static void 312 i_mdi_init() 313 { 314 static int initialized = 0; 315 316 if (initialized) 317 return; 318 initialized = 1; 319 320 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 321 322 /* Create our taskq resources */ 323 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 324 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 325 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 326 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 327 328 /* Allocate ['path_instance' <-> "path"] maps */ 329 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 330 mdi_pathmap_bypath = mod_hash_create_strhash( 331 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 332 mod_hash_null_valdtor); 333 mdi_pathmap_byinstance = mod_hash_create_idhash( 334 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 335 mod_hash_null_valdtor); 336 } 337 338 /* 339 * mdi_get_component_type(): 340 * Return mpxio component type 341 * Return Values: 342 * MDI_COMPONENT_NONE 343 * MDI_COMPONENT_VHCI 344 * MDI_COMPONENT_PHCI 345 * MDI_COMPONENT_CLIENT 346 * XXX This doesn't work under multi-level MPxIO and should be 347 * removed when clients migrate mdi_component_is_*() interfaces. 348 */ 349 int 350 mdi_get_component_type(dev_info_t *dip) 351 { 352 return (DEVI(dip)->devi_mdi_component); 353 } 354 355 /* 356 * mdi_vhci_register(): 357 * Register a vHCI module with the mpxio framework 358 * mdi_vhci_register() is called by vHCI drivers to register the 359 * 'class_driver' vHCI driver and its MDI entrypoints with the 360 * mpxio framework. The vHCI driver must call this interface as 361 * part of its attach(9e) handler. 362 * Competing threads may try to attach mdi_vhci_register() as 363 * the vHCI drivers are loaded and attached as a result of pHCI 364 * driver instance registration (mdi_phci_register()) with the 365 * framework. 366 * Return Values: 367 * MDI_SUCCESS 368 * MDI_FAILURE 369 */ 370 /*ARGSUSED*/ 371 int 372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 373 int flags) 374 { 375 mdi_vhci_t *vh = NULL; 376 377 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 378 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 379 380 i_mdi_init(); 381 382 mutex_enter(&mdi_mutex); 383 /* 384 * Scan for already registered vhci 385 */ 386 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 387 if (strcmp(vh->vh_class, class) == 0) { 388 /* 389 * vHCI has already been created. Check for valid 390 * vHCI ops registration. We only support one vHCI 391 * module per class 392 */ 393 if (vh->vh_ops != NULL) { 394 mutex_exit(&mdi_mutex); 395 cmn_err(CE_NOTE, vhci_greeting, class); 396 return (MDI_FAILURE); 397 } 398 break; 399 } 400 } 401 402 /* 403 * if not yet created, create the vHCI component 404 */ 405 if (vh == NULL) { 406 struct client_hash *hash = NULL; 407 char *load_balance; 408 409 /* 410 * Allocate and initialize the mdi extensions 411 */ 412 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 413 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 414 KM_SLEEP); 415 vh->vh_client_table = hash; 416 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 417 (void) strcpy(vh->vh_class, class); 418 vh->vh_lb = LOAD_BALANCE_RR; 419 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 420 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 421 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 422 vh->vh_lb = LOAD_BALANCE_NONE; 423 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 424 == 0) { 425 vh->vh_lb = LOAD_BALANCE_LBA; 426 } 427 ddi_prop_free(load_balance); 428 } 429 430 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 431 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 432 433 /* 434 * Store the vHCI ops vectors 435 */ 436 vh->vh_dip = vdip; 437 vh->vh_ops = vops; 438 439 setup_vhci_cache(vh); 440 441 if (mdi_vhci_head == NULL) { 442 mdi_vhci_head = vh; 443 } 444 if (mdi_vhci_tail) { 445 mdi_vhci_tail->vh_next = vh; 446 } 447 mdi_vhci_tail = vh; 448 mdi_vhci_count++; 449 } 450 451 /* 452 * Claim the devfs node as a vhci component 453 */ 454 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 455 456 /* 457 * Initialize our back reference from dev_info node 458 */ 459 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 460 mutex_exit(&mdi_mutex); 461 return (MDI_SUCCESS); 462 } 463 464 /* 465 * mdi_vhci_unregister(): 466 * Unregister a vHCI module from mpxio framework 467 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 468 * of a vhci to unregister it from the framework. 469 * Return Values: 470 * MDI_SUCCESS 471 * MDI_FAILURE 472 */ 473 /*ARGSUSED*/ 474 int 475 mdi_vhci_unregister(dev_info_t *vdip, int flags) 476 { 477 mdi_vhci_t *found, *vh, *prev = NULL; 478 479 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 480 481 /* 482 * Check for invalid VHCI 483 */ 484 if ((vh = i_devi_get_vhci(vdip)) == NULL) 485 return (MDI_FAILURE); 486 487 /* 488 * Scan the list of registered vHCIs for a match 489 */ 490 mutex_enter(&mdi_mutex); 491 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 492 if (found == vh) 493 break; 494 prev = found; 495 } 496 497 if (found == NULL) { 498 mutex_exit(&mdi_mutex); 499 return (MDI_FAILURE); 500 } 501 502 /* 503 * Check the vHCI, pHCI and client count. All the pHCIs and clients 504 * should have been unregistered, before a vHCI can be 505 * unregistered. 506 */ 507 MDI_VHCI_PHCI_LOCK(vh); 508 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 509 MDI_VHCI_PHCI_UNLOCK(vh); 510 mutex_exit(&mdi_mutex); 511 return (MDI_FAILURE); 512 } 513 MDI_VHCI_PHCI_UNLOCK(vh); 514 515 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 516 mutex_exit(&mdi_mutex); 517 return (MDI_FAILURE); 518 } 519 520 /* 521 * Remove the vHCI from the global list 522 */ 523 if (vh == mdi_vhci_head) { 524 mdi_vhci_head = vh->vh_next; 525 } else { 526 prev->vh_next = vh->vh_next; 527 } 528 if (vh == mdi_vhci_tail) { 529 mdi_vhci_tail = prev; 530 } 531 mdi_vhci_count--; 532 mutex_exit(&mdi_mutex); 533 534 vh->vh_ops = NULL; 535 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 536 DEVI(vdip)->devi_mdi_xhci = NULL; 537 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 538 kmem_free(vh->vh_client_table, 539 mdi_client_table_size * sizeof (struct client_hash)); 540 mutex_destroy(&vh->vh_phci_mutex); 541 mutex_destroy(&vh->vh_client_mutex); 542 543 kmem_free(vh, sizeof (mdi_vhci_t)); 544 return (MDI_SUCCESS); 545 } 546 547 /* 548 * i_mdi_vhci_class2vhci(): 549 * Look for a matching vHCI module given a vHCI class name 550 * Return Values: 551 * Handle to a vHCI component 552 * NULL 553 */ 554 static mdi_vhci_t * 555 i_mdi_vhci_class2vhci(char *class) 556 { 557 mdi_vhci_t *vh = NULL; 558 559 ASSERT(!MUTEX_HELD(&mdi_mutex)); 560 561 mutex_enter(&mdi_mutex); 562 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 563 if (strcmp(vh->vh_class, class) == 0) { 564 break; 565 } 566 } 567 mutex_exit(&mdi_mutex); 568 return (vh); 569 } 570 571 /* 572 * i_devi_get_vhci(): 573 * Utility function to get the handle to a vHCI component 574 * Return Values: 575 * Handle to a vHCI component 576 * NULL 577 */ 578 mdi_vhci_t * 579 i_devi_get_vhci(dev_info_t *vdip) 580 { 581 mdi_vhci_t *vh = NULL; 582 if (MDI_VHCI(vdip)) { 583 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 584 } 585 return (vh); 586 } 587 588 /* 589 * mdi_phci_register(): 590 * Register a pHCI module with mpxio framework 591 * mdi_phci_register() is called by pHCI drivers to register with 592 * the mpxio framework and a specific 'class_driver' vHCI. The 593 * pHCI driver must call this interface as part of its attach(9e) 594 * handler. 595 * Return Values: 596 * MDI_SUCCESS 597 * MDI_FAILURE 598 */ 599 /*ARGSUSED*/ 600 int 601 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 602 { 603 mdi_phci_t *ph; 604 mdi_vhci_t *vh; 605 char *data; 606 char *pathname; 607 608 /* 609 * Some subsystems, like fcp, perform pHCI registration from a 610 * different thread than the one doing the pHCI attach(9E) - the 611 * driver attach code is waiting for this other thread to complete. 612 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 613 * (indicating that some thread has done an ndi_devi_enter of parent) 614 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 615 */ 616 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 617 618 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 619 (void) ddi_pathname(pdip, pathname); 620 621 /* 622 * Check for mpxio-disable property. Enable mpxio if the property is 623 * missing or not set to "yes". 624 * If the property is set to "yes" then emit a brief message. 625 */ 626 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 627 &data) == DDI_SUCCESS)) { 628 if (strcmp(data, "yes") == 0) { 629 MDI_DEBUG(1, (CE_CONT, pdip, 630 "?%s (%s%d) multipath capabilities " 631 "disabled via %s.conf.\n", pathname, 632 ddi_driver_name(pdip), ddi_get_instance(pdip), 633 ddi_driver_name(pdip))); 634 ddi_prop_free(data); 635 kmem_free(pathname, MAXPATHLEN); 636 return (MDI_FAILURE); 637 } 638 ddi_prop_free(data); 639 } 640 641 kmem_free(pathname, MAXPATHLEN); 642 643 /* 644 * Search for a matching vHCI 645 */ 646 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 647 if (vh == NULL) { 648 return (MDI_FAILURE); 649 } 650 651 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 652 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 653 ph->ph_dip = pdip; 654 ph->ph_vhci = vh; 655 ph->ph_next = NULL; 656 ph->ph_unstable = 0; 657 ph->ph_vprivate = 0; 658 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 659 660 MDI_PHCI_LOCK(ph); 661 MDI_PHCI_SET_POWER_UP(ph); 662 MDI_PHCI_UNLOCK(ph); 663 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 664 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 665 666 vhcache_phci_add(vh->vh_config, ph); 667 668 MDI_VHCI_PHCI_LOCK(vh); 669 if (vh->vh_phci_head == NULL) { 670 vh->vh_phci_head = ph; 671 } 672 if (vh->vh_phci_tail) { 673 vh->vh_phci_tail->ph_next = ph; 674 } 675 vh->vh_phci_tail = ph; 676 vh->vh_phci_count++; 677 MDI_VHCI_PHCI_UNLOCK(vh); 678 679 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 680 return (MDI_SUCCESS); 681 } 682 683 /* 684 * mdi_phci_unregister(): 685 * Unregister a pHCI module from mpxio framework 686 * mdi_phci_unregister() is called by the pHCI drivers from their 687 * detach(9E) handler to unregister their instances from the 688 * framework. 689 * Return Values: 690 * MDI_SUCCESS 691 * MDI_FAILURE 692 */ 693 /*ARGSUSED*/ 694 int 695 mdi_phci_unregister(dev_info_t *pdip, int flags) 696 { 697 mdi_vhci_t *vh; 698 mdi_phci_t *ph; 699 mdi_phci_t *tmp; 700 mdi_phci_t *prev = NULL; 701 702 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 703 704 ph = i_devi_get_phci(pdip); 705 if (ph == NULL) { 706 MDI_DEBUG(1, (CE_WARN, pdip, 707 "!pHCI unregister: Not a valid pHCI")); 708 return (MDI_FAILURE); 709 } 710 711 vh = ph->ph_vhci; 712 ASSERT(vh != NULL); 713 if (vh == NULL) { 714 MDI_DEBUG(1, (CE_WARN, pdip, 715 "!pHCI unregister: Not a valid vHCI")); 716 return (MDI_FAILURE); 717 } 718 719 MDI_VHCI_PHCI_LOCK(vh); 720 tmp = vh->vh_phci_head; 721 while (tmp) { 722 if (tmp == ph) { 723 break; 724 } 725 prev = tmp; 726 tmp = tmp->ph_next; 727 } 728 729 if (ph == vh->vh_phci_head) { 730 vh->vh_phci_head = ph->ph_next; 731 } else { 732 prev->ph_next = ph->ph_next; 733 } 734 735 if (ph == vh->vh_phci_tail) { 736 vh->vh_phci_tail = prev; 737 } 738 739 vh->vh_phci_count--; 740 MDI_VHCI_PHCI_UNLOCK(vh); 741 742 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 743 ESC_DDI_INITIATOR_UNREGISTER); 744 vhcache_phci_remove(vh->vh_config, ph); 745 cv_destroy(&ph->ph_unstable_cv); 746 mutex_destroy(&ph->ph_mutex); 747 kmem_free(ph, sizeof (mdi_phci_t)); 748 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 749 DEVI(pdip)->devi_mdi_xhci = NULL; 750 return (MDI_SUCCESS); 751 } 752 753 /* 754 * i_devi_get_phci(): 755 * Utility function to return the phci extensions. 756 */ 757 static mdi_phci_t * 758 i_devi_get_phci(dev_info_t *pdip) 759 { 760 mdi_phci_t *ph = NULL; 761 if (MDI_PHCI(pdip)) { 762 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 763 } 764 return (ph); 765 } 766 767 /* 768 * Single thread mdi entry into devinfo node for modifying its children. 769 * If necessary we perform an ndi_devi_enter of the vHCI before doing 770 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 771 * for the vHCI and one for the pHCI. 772 */ 773 void 774 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 775 { 776 dev_info_t *vdip; 777 int vcircular, pcircular; 778 779 /* Verify calling context */ 780 ASSERT(MDI_PHCI(phci_dip)); 781 vdip = mdi_devi_get_vdip(phci_dip); 782 ASSERT(vdip); /* A pHCI always has a vHCI */ 783 784 /* 785 * If pHCI is detaching then the framework has already entered the 786 * vHCI on a threads that went down the code path leading to 787 * detach_node(). This framework enter of the vHCI during pHCI 788 * detach is done to avoid deadlock with vHCI power management 789 * operations which enter the vHCI and the enter down the path 790 * to the pHCI. If pHCI is detaching then we piggyback this calls 791 * enter of the vHCI on frameworks vHCI enter that has already 792 * occurred - this is OK because we know that the framework thread 793 * doing detach is waiting for our completion. 794 * 795 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 796 * race with detach - but we can't do that because the framework has 797 * already entered the parent, so we have some complexity instead. 798 */ 799 for (;;) { 800 if (ndi_devi_tryenter(vdip, &vcircular)) { 801 ASSERT(vcircular != -1); 802 if (DEVI_IS_DETACHING(phci_dip)) { 803 ndi_devi_exit(vdip, vcircular); 804 vcircular = -1; 805 } 806 break; 807 } else if (DEVI_IS_DETACHING(phci_dip)) { 808 vcircular = -1; 809 break; 810 } else { 811 delay(1); 812 } 813 } 814 815 ndi_devi_enter(phci_dip, &pcircular); 816 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 817 } 818 819 /* 820 * Release mdi_devi_enter or successful mdi_devi_tryenter. 821 */ 822 void 823 mdi_devi_exit(dev_info_t *phci_dip, int circular) 824 { 825 dev_info_t *vdip; 826 int vcircular, pcircular; 827 828 /* Verify calling context */ 829 ASSERT(MDI_PHCI(phci_dip)); 830 vdip = mdi_devi_get_vdip(phci_dip); 831 ASSERT(vdip); /* A pHCI always has a vHCI */ 832 833 /* extract two circular recursion values from single int */ 834 pcircular = (short)(circular & 0xFFFF); 835 vcircular = (short)((circular >> 16) & 0xFFFF); 836 837 ndi_devi_exit(phci_dip, pcircular); 838 if (vcircular != -1) 839 ndi_devi_exit(vdip, vcircular); 840 } 841 842 /* 843 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 844 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 845 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 846 * with vHCI power management code during path online/offline. Each 847 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 848 * occur within the scope of an active mdi_devi_enter that establishes the 849 * circular value. 850 */ 851 void 852 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 853 { 854 int pcircular; 855 856 /* Verify calling context */ 857 ASSERT(MDI_PHCI(phci_dip)); 858 859 pcircular = (short)(circular & 0xFFFF); 860 ndi_devi_exit(phci_dip, pcircular); 861 } 862 863 void 864 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 865 { 866 int pcircular; 867 868 /* Verify calling context */ 869 ASSERT(MDI_PHCI(phci_dip)); 870 871 ndi_devi_enter(phci_dip, &pcircular); 872 873 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 874 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 875 } 876 877 /* 878 * mdi_devi_get_vdip(): 879 * given a pHCI dip return vHCI dip 880 */ 881 dev_info_t * 882 mdi_devi_get_vdip(dev_info_t *pdip) 883 { 884 mdi_phci_t *ph; 885 886 ph = i_devi_get_phci(pdip); 887 if (ph && ph->ph_vhci) 888 return (ph->ph_vhci->vh_dip); 889 return (NULL); 890 } 891 892 /* 893 * mdi_devi_pdip_entered(): 894 * Return 1 if we are vHCI and have done an ndi_devi_enter 895 * of a pHCI 896 */ 897 int 898 mdi_devi_pdip_entered(dev_info_t *vdip) 899 { 900 mdi_vhci_t *vh; 901 mdi_phci_t *ph; 902 903 vh = i_devi_get_vhci(vdip); 904 if (vh == NULL) 905 return (0); 906 907 MDI_VHCI_PHCI_LOCK(vh); 908 ph = vh->vh_phci_head; 909 while (ph) { 910 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 911 MDI_VHCI_PHCI_UNLOCK(vh); 912 return (1); 913 } 914 ph = ph->ph_next; 915 } 916 MDI_VHCI_PHCI_UNLOCK(vh); 917 return (0); 918 } 919 920 /* 921 * mdi_phci_path2devinfo(): 922 * Utility function to search for a valid phci device given 923 * the devfs pathname. 924 */ 925 dev_info_t * 926 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 927 { 928 char *temp_pathname; 929 mdi_vhci_t *vh; 930 mdi_phci_t *ph; 931 dev_info_t *pdip = NULL; 932 933 vh = i_devi_get_vhci(vdip); 934 ASSERT(vh != NULL); 935 936 if (vh == NULL) { 937 /* 938 * Invalid vHCI component, return failure 939 */ 940 return (NULL); 941 } 942 943 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 944 MDI_VHCI_PHCI_LOCK(vh); 945 ph = vh->vh_phci_head; 946 while (ph != NULL) { 947 pdip = ph->ph_dip; 948 ASSERT(pdip != NULL); 949 *temp_pathname = '\0'; 950 (void) ddi_pathname(pdip, temp_pathname); 951 if (strcmp(temp_pathname, pathname) == 0) { 952 break; 953 } 954 ph = ph->ph_next; 955 } 956 if (ph == NULL) { 957 pdip = NULL; 958 } 959 MDI_VHCI_PHCI_UNLOCK(vh); 960 kmem_free(temp_pathname, MAXPATHLEN); 961 return (pdip); 962 } 963 964 /* 965 * mdi_phci_get_path_count(): 966 * get number of path information nodes associated with a given 967 * pHCI device. 968 */ 969 int 970 mdi_phci_get_path_count(dev_info_t *pdip) 971 { 972 mdi_phci_t *ph; 973 int count = 0; 974 975 ph = i_devi_get_phci(pdip); 976 if (ph != NULL) { 977 count = ph->ph_path_count; 978 } 979 return (count); 980 } 981 982 /* 983 * i_mdi_phci_lock(): 984 * Lock a pHCI device 985 * Return Values: 986 * None 987 * Note: 988 * The default locking order is: 989 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 990 * But there are number of situations where locks need to be 991 * grabbed in reverse order. This routine implements try and lock 992 * mechanism depending on the requested parameter option. 993 */ 994 static void 995 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 996 { 997 if (pip) { 998 /* Reverse locking is requested. */ 999 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1000 /* 1001 * tryenter failed. Try to grab again 1002 * after a small delay 1003 */ 1004 MDI_PI_HOLD(pip); 1005 MDI_PI_UNLOCK(pip); 1006 delay(1); 1007 MDI_PI_LOCK(pip); 1008 MDI_PI_RELE(pip); 1009 } 1010 } else { 1011 MDI_PHCI_LOCK(ph); 1012 } 1013 } 1014 1015 /* 1016 * i_mdi_phci_unlock(): 1017 * Unlock the pHCI component 1018 */ 1019 static void 1020 i_mdi_phci_unlock(mdi_phci_t *ph) 1021 { 1022 MDI_PHCI_UNLOCK(ph); 1023 } 1024 1025 /* 1026 * i_mdi_devinfo_create(): 1027 * create client device's devinfo node 1028 * Return Values: 1029 * dev_info 1030 * NULL 1031 * Notes: 1032 */ 1033 static dev_info_t * 1034 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1035 char **compatible, int ncompatible) 1036 { 1037 dev_info_t *cdip = NULL; 1038 1039 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1040 1041 /* Verify for duplicate entry */ 1042 cdip = i_mdi_devinfo_find(vh, name, guid); 1043 ASSERT(cdip == NULL); 1044 if (cdip) { 1045 cmn_err(CE_WARN, 1046 "i_mdi_devinfo_create: client dip %p already exists", 1047 (void *)cdip); 1048 } 1049 1050 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1051 if (cdip == NULL) 1052 goto fail; 1053 1054 /* 1055 * Create component type and Global unique identifier 1056 * properties 1057 */ 1058 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1059 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1060 goto fail; 1061 } 1062 1063 /* Decorate the node with compatible property */ 1064 if (compatible && 1065 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1066 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1067 goto fail; 1068 } 1069 1070 return (cdip); 1071 1072 fail: 1073 if (cdip) { 1074 (void) ndi_prop_remove_all(cdip); 1075 (void) ndi_devi_free(cdip); 1076 } 1077 return (NULL); 1078 } 1079 1080 /* 1081 * i_mdi_devinfo_find(): 1082 * Find a matching devinfo node for given client node name 1083 * and its guid. 1084 * Return Values: 1085 * Handle to a dev_info node or NULL 1086 */ 1087 static dev_info_t * 1088 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1089 { 1090 char *data; 1091 dev_info_t *cdip = NULL; 1092 dev_info_t *ndip = NULL; 1093 int circular; 1094 1095 ndi_devi_enter(vh->vh_dip, &circular); 1096 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1097 while ((cdip = ndip) != NULL) { 1098 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1099 1100 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1101 continue; 1102 } 1103 1104 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1105 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1106 &data) != DDI_PROP_SUCCESS) { 1107 continue; 1108 } 1109 1110 if (strcmp(data, guid) != 0) { 1111 ddi_prop_free(data); 1112 continue; 1113 } 1114 ddi_prop_free(data); 1115 break; 1116 } 1117 ndi_devi_exit(vh->vh_dip, circular); 1118 return (cdip); 1119 } 1120 1121 /* 1122 * i_mdi_devinfo_remove(): 1123 * Remove a client device node 1124 */ 1125 static int 1126 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1127 { 1128 int rv = MDI_SUCCESS; 1129 1130 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1131 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1132 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1133 if (rv != NDI_SUCCESS) { 1134 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1135 " failed. cdip = %p\n", (void *)cdip)); 1136 } 1137 /* 1138 * Convert to MDI error code 1139 */ 1140 switch (rv) { 1141 case NDI_SUCCESS: 1142 rv = MDI_SUCCESS; 1143 break; 1144 case NDI_BUSY: 1145 rv = MDI_BUSY; 1146 break; 1147 default: 1148 rv = MDI_FAILURE; 1149 break; 1150 } 1151 } 1152 return (rv); 1153 } 1154 1155 /* 1156 * i_devi_get_client() 1157 * Utility function to get mpxio component extensions 1158 */ 1159 static mdi_client_t * 1160 i_devi_get_client(dev_info_t *cdip) 1161 { 1162 mdi_client_t *ct = NULL; 1163 1164 if (MDI_CLIENT(cdip)) { 1165 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1166 } 1167 return (ct); 1168 } 1169 1170 /* 1171 * i_mdi_is_child_present(): 1172 * Search for the presence of client device dev_info node 1173 */ 1174 static int 1175 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1176 { 1177 int rv = MDI_FAILURE; 1178 struct dev_info *dip; 1179 int circular; 1180 1181 ndi_devi_enter(vdip, &circular); 1182 dip = DEVI(vdip)->devi_child; 1183 while (dip) { 1184 if (dip == DEVI(cdip)) { 1185 rv = MDI_SUCCESS; 1186 break; 1187 } 1188 dip = dip->devi_sibling; 1189 } 1190 ndi_devi_exit(vdip, circular); 1191 return (rv); 1192 } 1193 1194 1195 /* 1196 * i_mdi_client_lock(): 1197 * Grab client component lock 1198 * Return Values: 1199 * None 1200 * Note: 1201 * The default locking order is: 1202 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1203 * But there are number of situations where locks need to be 1204 * grabbed in reverse order. This routine implements try and lock 1205 * mechanism depending on the requested parameter option. 1206 */ 1207 static void 1208 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1209 { 1210 if (pip) { 1211 /* 1212 * Reverse locking is requested. 1213 */ 1214 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1215 /* 1216 * tryenter failed. Try to grab again 1217 * after a small delay 1218 */ 1219 MDI_PI_HOLD(pip); 1220 MDI_PI_UNLOCK(pip); 1221 delay(1); 1222 MDI_PI_LOCK(pip); 1223 MDI_PI_RELE(pip); 1224 } 1225 } else { 1226 MDI_CLIENT_LOCK(ct); 1227 } 1228 } 1229 1230 /* 1231 * i_mdi_client_unlock(): 1232 * Unlock a client component 1233 */ 1234 static void 1235 i_mdi_client_unlock(mdi_client_t *ct) 1236 { 1237 MDI_CLIENT_UNLOCK(ct); 1238 } 1239 1240 /* 1241 * i_mdi_client_alloc(): 1242 * Allocate and initialize a client structure. Caller should 1243 * hold the vhci client lock. 1244 * Return Values: 1245 * Handle to a client component 1246 */ 1247 /*ARGSUSED*/ 1248 static mdi_client_t * 1249 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1250 { 1251 mdi_client_t *ct; 1252 1253 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1254 1255 /* 1256 * Allocate and initialize a component structure. 1257 */ 1258 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1259 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1260 ct->ct_hnext = NULL; 1261 ct->ct_hprev = NULL; 1262 ct->ct_dip = NULL; 1263 ct->ct_vhci = vh; 1264 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1265 (void) strcpy(ct->ct_drvname, name); 1266 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1267 (void) strcpy(ct->ct_guid, lguid); 1268 ct->ct_cprivate = NULL; 1269 ct->ct_vprivate = NULL; 1270 ct->ct_flags = 0; 1271 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1272 MDI_CLIENT_LOCK(ct); 1273 MDI_CLIENT_SET_OFFLINE(ct); 1274 MDI_CLIENT_SET_DETACH(ct); 1275 MDI_CLIENT_SET_POWER_UP(ct); 1276 MDI_CLIENT_UNLOCK(ct); 1277 ct->ct_failover_flags = 0; 1278 ct->ct_failover_status = 0; 1279 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1280 ct->ct_unstable = 0; 1281 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1282 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1283 ct->ct_lb = vh->vh_lb; 1284 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1285 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1286 ct->ct_path_count = 0; 1287 ct->ct_path_head = NULL; 1288 ct->ct_path_tail = NULL; 1289 ct->ct_path_last = NULL; 1290 1291 /* 1292 * Add this client component to our client hash queue 1293 */ 1294 i_mdi_client_enlist_table(vh, ct); 1295 return (ct); 1296 } 1297 1298 /* 1299 * i_mdi_client_enlist_table(): 1300 * Attach the client device to the client hash table. Caller 1301 * should hold the vhci client lock. 1302 */ 1303 static void 1304 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1305 { 1306 int index; 1307 struct client_hash *head; 1308 1309 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1310 1311 index = i_mdi_get_hash_key(ct->ct_guid); 1312 head = &vh->vh_client_table[index]; 1313 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1314 head->ct_hash_head = ct; 1315 head->ct_hash_count++; 1316 vh->vh_client_count++; 1317 } 1318 1319 /* 1320 * i_mdi_client_delist_table(): 1321 * Attach the client device to the client hash table. 1322 * Caller should hold the vhci client lock. 1323 */ 1324 static void 1325 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1326 { 1327 int index; 1328 char *guid; 1329 struct client_hash *head; 1330 mdi_client_t *next; 1331 mdi_client_t *last; 1332 1333 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1334 1335 guid = ct->ct_guid; 1336 index = i_mdi_get_hash_key(guid); 1337 head = &vh->vh_client_table[index]; 1338 1339 last = NULL; 1340 next = (mdi_client_t *)head->ct_hash_head; 1341 while (next != NULL) { 1342 if (next == ct) { 1343 break; 1344 } 1345 last = next; 1346 next = next->ct_hnext; 1347 } 1348 1349 if (next) { 1350 head->ct_hash_count--; 1351 if (last == NULL) { 1352 head->ct_hash_head = ct->ct_hnext; 1353 } else { 1354 last->ct_hnext = ct->ct_hnext; 1355 } 1356 ct->ct_hnext = NULL; 1357 vh->vh_client_count--; 1358 } 1359 } 1360 1361 1362 /* 1363 * i_mdi_client_free(): 1364 * Free a client component 1365 */ 1366 static int 1367 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1368 { 1369 int rv = MDI_SUCCESS; 1370 int flags = ct->ct_flags; 1371 dev_info_t *cdip; 1372 dev_info_t *vdip; 1373 1374 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1375 1376 vdip = vh->vh_dip; 1377 cdip = ct->ct_dip; 1378 1379 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1380 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1381 DEVI(cdip)->devi_mdi_client = NULL; 1382 1383 /* 1384 * Clear out back ref. to dev_info_t node 1385 */ 1386 ct->ct_dip = NULL; 1387 1388 /* 1389 * Remove this client from our hash queue 1390 */ 1391 i_mdi_client_delist_table(vh, ct); 1392 1393 /* 1394 * Uninitialize and free the component 1395 */ 1396 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1397 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1398 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1399 cv_destroy(&ct->ct_failover_cv); 1400 cv_destroy(&ct->ct_unstable_cv); 1401 cv_destroy(&ct->ct_powerchange_cv); 1402 mutex_destroy(&ct->ct_mutex); 1403 kmem_free(ct, sizeof (*ct)); 1404 1405 if (cdip != NULL) { 1406 MDI_VHCI_CLIENT_UNLOCK(vh); 1407 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1408 MDI_VHCI_CLIENT_LOCK(vh); 1409 } 1410 return (rv); 1411 } 1412 1413 /* 1414 * i_mdi_client_find(): 1415 * Find the client structure corresponding to a given guid 1416 * Caller should hold the vhci client lock. 1417 */ 1418 static mdi_client_t * 1419 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1420 { 1421 int index; 1422 struct client_hash *head; 1423 mdi_client_t *ct; 1424 1425 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1426 1427 index = i_mdi_get_hash_key(guid); 1428 head = &vh->vh_client_table[index]; 1429 1430 ct = head->ct_hash_head; 1431 while (ct != NULL) { 1432 if (strcmp(ct->ct_guid, guid) == 0 && 1433 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1434 break; 1435 } 1436 ct = ct->ct_hnext; 1437 } 1438 return (ct); 1439 } 1440 1441 /* 1442 * i_mdi_client_update_state(): 1443 * Compute and update client device state 1444 * Notes: 1445 * A client device can be in any of three possible states: 1446 * 1447 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1448 * one online/standby paths. Can tolerate failures. 1449 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1450 * no alternate paths available as standby. A failure on the online 1451 * would result in loss of access to device data. 1452 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1453 * no paths available to access the device. 1454 */ 1455 static void 1456 i_mdi_client_update_state(mdi_client_t *ct) 1457 { 1458 int state; 1459 1460 ASSERT(MDI_CLIENT_LOCKED(ct)); 1461 state = i_mdi_client_compute_state(ct, NULL); 1462 MDI_CLIENT_SET_STATE(ct, state); 1463 } 1464 1465 /* 1466 * i_mdi_client_compute_state(): 1467 * Compute client device state 1468 * 1469 * mdi_phci_t * Pointer to pHCI structure which should 1470 * while computing the new value. Used by 1471 * i_mdi_phci_offline() to find the new 1472 * client state after DR of a pHCI. 1473 */ 1474 static int 1475 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1476 { 1477 int state; 1478 int online_count = 0; 1479 int standby_count = 0; 1480 mdi_pathinfo_t *pip, *next; 1481 1482 ASSERT(MDI_CLIENT_LOCKED(ct)); 1483 pip = ct->ct_path_head; 1484 while (pip != NULL) { 1485 MDI_PI_LOCK(pip); 1486 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1487 if (MDI_PI(pip)->pi_phci == ph) { 1488 MDI_PI_UNLOCK(pip); 1489 pip = next; 1490 continue; 1491 } 1492 1493 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1494 == MDI_PATHINFO_STATE_ONLINE) 1495 online_count++; 1496 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1497 == MDI_PATHINFO_STATE_STANDBY) 1498 standby_count++; 1499 MDI_PI_UNLOCK(pip); 1500 pip = next; 1501 } 1502 1503 if (online_count == 0) { 1504 if (standby_count == 0) { 1505 state = MDI_CLIENT_STATE_FAILED; 1506 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1507 " ct = %p\n", (void *)ct)); 1508 } else if (standby_count == 1) { 1509 state = MDI_CLIENT_STATE_DEGRADED; 1510 } else { 1511 state = MDI_CLIENT_STATE_OPTIMAL; 1512 } 1513 } else if (online_count == 1) { 1514 if (standby_count == 0) { 1515 state = MDI_CLIENT_STATE_DEGRADED; 1516 } else { 1517 state = MDI_CLIENT_STATE_OPTIMAL; 1518 } 1519 } else { 1520 state = MDI_CLIENT_STATE_OPTIMAL; 1521 } 1522 return (state); 1523 } 1524 1525 /* 1526 * i_mdi_client2devinfo(): 1527 * Utility function 1528 */ 1529 dev_info_t * 1530 i_mdi_client2devinfo(mdi_client_t *ct) 1531 { 1532 return (ct->ct_dip); 1533 } 1534 1535 /* 1536 * mdi_client_path2_devinfo(): 1537 * Given the parent devinfo and child devfs pathname, search for 1538 * a valid devfs node handle. 1539 */ 1540 dev_info_t * 1541 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1542 { 1543 dev_info_t *cdip = NULL; 1544 dev_info_t *ndip = NULL; 1545 char *temp_pathname; 1546 int circular; 1547 1548 /* 1549 * Allocate temp buffer 1550 */ 1551 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1552 1553 /* 1554 * Lock parent against changes 1555 */ 1556 ndi_devi_enter(vdip, &circular); 1557 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1558 while ((cdip = ndip) != NULL) { 1559 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1560 1561 *temp_pathname = '\0'; 1562 (void) ddi_pathname(cdip, temp_pathname); 1563 if (strcmp(temp_pathname, pathname) == 0) { 1564 break; 1565 } 1566 } 1567 /* 1568 * Release devinfo lock 1569 */ 1570 ndi_devi_exit(vdip, circular); 1571 1572 /* 1573 * Free the temp buffer 1574 */ 1575 kmem_free(temp_pathname, MAXPATHLEN); 1576 return (cdip); 1577 } 1578 1579 /* 1580 * mdi_client_get_path_count(): 1581 * Utility function to get number of path information nodes 1582 * associated with a given client device. 1583 */ 1584 int 1585 mdi_client_get_path_count(dev_info_t *cdip) 1586 { 1587 mdi_client_t *ct; 1588 int count = 0; 1589 1590 ct = i_devi_get_client(cdip); 1591 if (ct != NULL) { 1592 count = ct->ct_path_count; 1593 } 1594 return (count); 1595 } 1596 1597 1598 /* 1599 * i_mdi_get_hash_key(): 1600 * Create a hash using strings as keys 1601 * 1602 */ 1603 static int 1604 i_mdi_get_hash_key(char *str) 1605 { 1606 uint32_t g, hash = 0; 1607 char *p; 1608 1609 for (p = str; *p != '\0'; p++) { 1610 g = *p; 1611 hash += g; 1612 } 1613 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1614 } 1615 1616 /* 1617 * mdi_get_lb_policy(): 1618 * Get current load balancing policy for a given client device 1619 */ 1620 client_lb_t 1621 mdi_get_lb_policy(dev_info_t *cdip) 1622 { 1623 client_lb_t lb = LOAD_BALANCE_NONE; 1624 mdi_client_t *ct; 1625 1626 ct = i_devi_get_client(cdip); 1627 if (ct != NULL) { 1628 lb = ct->ct_lb; 1629 } 1630 return (lb); 1631 } 1632 1633 /* 1634 * mdi_set_lb_region_size(): 1635 * Set current region size for the load-balance 1636 */ 1637 int 1638 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1639 { 1640 mdi_client_t *ct; 1641 int rv = MDI_FAILURE; 1642 1643 ct = i_devi_get_client(cdip); 1644 if (ct != NULL && ct->ct_lb_args != NULL) { 1645 ct->ct_lb_args->region_size = region_size; 1646 rv = MDI_SUCCESS; 1647 } 1648 return (rv); 1649 } 1650 1651 /* 1652 * mdi_Set_lb_policy(): 1653 * Set current load balancing policy for a given client device 1654 */ 1655 int 1656 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1657 { 1658 mdi_client_t *ct; 1659 int rv = MDI_FAILURE; 1660 1661 ct = i_devi_get_client(cdip); 1662 if (ct != NULL) { 1663 ct->ct_lb = lb; 1664 rv = MDI_SUCCESS; 1665 } 1666 return (rv); 1667 } 1668 1669 /* 1670 * mdi_failover(): 1671 * failover function called by the vHCI drivers to initiate 1672 * a failover operation. This is typically due to non-availability 1673 * of online paths to route I/O requests. Failover can be 1674 * triggered through user application also. 1675 * 1676 * The vHCI driver calls mdi_failover() to initiate a failover 1677 * operation. mdi_failover() calls back into the vHCI driver's 1678 * vo_failover() entry point to perform the actual failover 1679 * operation. The reason for requiring the vHCI driver to 1680 * initiate failover by calling mdi_failover(), instead of directly 1681 * executing vo_failover() itself, is to ensure that the mdi 1682 * framework can keep track of the client state properly. 1683 * Additionally, mdi_failover() provides as a convenience the 1684 * option of performing the failover operation synchronously or 1685 * asynchronously 1686 * 1687 * Upon successful completion of the failover operation, the 1688 * paths that were previously ONLINE will be in the STANDBY state, 1689 * and the newly activated paths will be in the ONLINE state. 1690 * 1691 * The flags modifier determines whether the activation is done 1692 * synchronously: MDI_FAILOVER_SYNC 1693 * Return Values: 1694 * MDI_SUCCESS 1695 * MDI_FAILURE 1696 * MDI_BUSY 1697 */ 1698 /*ARGSUSED*/ 1699 int 1700 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1701 { 1702 int rv; 1703 mdi_client_t *ct; 1704 1705 ct = i_devi_get_client(cdip); 1706 ASSERT(ct != NULL); 1707 if (ct == NULL) { 1708 /* cdip is not a valid client device. Nothing more to do. */ 1709 return (MDI_FAILURE); 1710 } 1711 1712 MDI_CLIENT_LOCK(ct); 1713 1714 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1715 /* A path to the client is being freed */ 1716 MDI_CLIENT_UNLOCK(ct); 1717 return (MDI_BUSY); 1718 } 1719 1720 1721 if (MDI_CLIENT_IS_FAILED(ct)) { 1722 /* 1723 * Client is in failed state. Nothing more to do. 1724 */ 1725 MDI_CLIENT_UNLOCK(ct); 1726 return (MDI_FAILURE); 1727 } 1728 1729 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1730 /* 1731 * Failover is already in progress; return BUSY 1732 */ 1733 MDI_CLIENT_UNLOCK(ct); 1734 return (MDI_BUSY); 1735 } 1736 /* 1737 * Make sure that mdi_pathinfo node state changes are processed. 1738 * We do not allow failovers to progress while client path state 1739 * changes are in progress 1740 */ 1741 if (ct->ct_unstable) { 1742 if (flags == MDI_FAILOVER_ASYNC) { 1743 MDI_CLIENT_UNLOCK(ct); 1744 return (MDI_BUSY); 1745 } else { 1746 while (ct->ct_unstable) 1747 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1748 } 1749 } 1750 1751 /* 1752 * Client device is in stable state. Before proceeding, perform sanity 1753 * checks again. 1754 */ 1755 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1756 (!i_ddi_devi_attached(ct->ct_dip))) { 1757 /* 1758 * Client is in failed state. Nothing more to do. 1759 */ 1760 MDI_CLIENT_UNLOCK(ct); 1761 return (MDI_FAILURE); 1762 } 1763 1764 /* 1765 * Set the client state as failover in progress. 1766 */ 1767 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1768 ct->ct_failover_flags = flags; 1769 MDI_CLIENT_UNLOCK(ct); 1770 1771 if (flags == MDI_FAILOVER_ASYNC) { 1772 /* 1773 * Submit the initiate failover request via CPR safe 1774 * taskq threads. 1775 */ 1776 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1777 ct, KM_SLEEP); 1778 return (MDI_ACCEPT); 1779 } else { 1780 /* 1781 * Synchronous failover mode. Typically invoked from the user 1782 * land. 1783 */ 1784 rv = i_mdi_failover(ct); 1785 } 1786 return (rv); 1787 } 1788 1789 /* 1790 * i_mdi_failover(): 1791 * internal failover function. Invokes vHCI drivers failover 1792 * callback function and process the failover status 1793 * Return Values: 1794 * None 1795 * 1796 * Note: A client device in failover state can not be detached or freed. 1797 */ 1798 static int 1799 i_mdi_failover(void *arg) 1800 { 1801 int rv = MDI_SUCCESS; 1802 mdi_client_t *ct = (mdi_client_t *)arg; 1803 mdi_vhci_t *vh = ct->ct_vhci; 1804 1805 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1806 1807 if (vh->vh_ops->vo_failover != NULL) { 1808 /* 1809 * Call vHCI drivers callback routine 1810 */ 1811 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1812 ct->ct_failover_flags); 1813 } 1814 1815 MDI_CLIENT_LOCK(ct); 1816 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1817 1818 /* 1819 * Save the failover return status 1820 */ 1821 ct->ct_failover_status = rv; 1822 1823 /* 1824 * As a result of failover, client status would have been changed. 1825 * Update the client state and wake up anyone waiting on this client 1826 * device. 1827 */ 1828 i_mdi_client_update_state(ct); 1829 1830 cv_broadcast(&ct->ct_failover_cv); 1831 MDI_CLIENT_UNLOCK(ct); 1832 return (rv); 1833 } 1834 1835 /* 1836 * Load balancing is logical block. 1837 * IOs within the range described by region_size 1838 * would go on the same path. This would improve the 1839 * performance by cache-hit on some of the RAID devices. 1840 * Search only for online paths(At some point we 1841 * may want to balance across target ports). 1842 * If no paths are found then default to round-robin. 1843 */ 1844 static int 1845 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1846 { 1847 int path_index = -1; 1848 int online_path_count = 0; 1849 int online_nonpref_path_count = 0; 1850 int region_size = ct->ct_lb_args->region_size; 1851 mdi_pathinfo_t *pip; 1852 mdi_pathinfo_t *next; 1853 int preferred, path_cnt; 1854 1855 pip = ct->ct_path_head; 1856 while (pip) { 1857 MDI_PI_LOCK(pip); 1858 if (MDI_PI(pip)->pi_state == 1859 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1860 online_path_count++; 1861 } else if (MDI_PI(pip)->pi_state == 1862 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1863 online_nonpref_path_count++; 1864 } 1865 next = (mdi_pathinfo_t *) 1866 MDI_PI(pip)->pi_client_link; 1867 MDI_PI_UNLOCK(pip); 1868 pip = next; 1869 } 1870 /* if found any online/preferred then use this type */ 1871 if (online_path_count > 0) { 1872 path_cnt = online_path_count; 1873 preferred = 1; 1874 } else if (online_nonpref_path_count > 0) { 1875 path_cnt = online_nonpref_path_count; 1876 preferred = 0; 1877 } else { 1878 path_cnt = 0; 1879 } 1880 if (path_cnt) { 1881 path_index = (bp->b_blkno >> region_size) % path_cnt; 1882 pip = ct->ct_path_head; 1883 while (pip && path_index != -1) { 1884 MDI_PI_LOCK(pip); 1885 if (path_index == 0 && 1886 (MDI_PI(pip)->pi_state == 1887 MDI_PATHINFO_STATE_ONLINE) && 1888 MDI_PI(pip)->pi_preferred == preferred) { 1889 MDI_PI_HOLD(pip); 1890 MDI_PI_UNLOCK(pip); 1891 *ret_pip = pip; 1892 return (MDI_SUCCESS); 1893 } 1894 path_index --; 1895 next = (mdi_pathinfo_t *) 1896 MDI_PI(pip)->pi_client_link; 1897 MDI_PI_UNLOCK(pip); 1898 pip = next; 1899 } 1900 if (pip == NULL) { 1901 MDI_DEBUG(4, (CE_NOTE, NULL, 1902 "!lba %llx, no pip !!\n", 1903 bp->b_lblkno)); 1904 } else { 1905 MDI_DEBUG(4, (CE_NOTE, NULL, 1906 "!lba %llx, no pip for path_index, " 1907 "pip %p\n", bp->b_lblkno, (void *)pip)); 1908 } 1909 } 1910 return (MDI_FAILURE); 1911 } 1912 1913 /* 1914 * mdi_select_path(): 1915 * select a path to access a client device. 1916 * 1917 * mdi_select_path() function is called by the vHCI drivers to 1918 * select a path to route the I/O request to. The caller passes 1919 * the block I/O data transfer structure ("buf") as one of the 1920 * parameters. The mpxio framework uses the buf structure 1921 * contents to maintain per path statistics (total I/O size / 1922 * count pending). If more than one online paths are available to 1923 * select, the framework automatically selects a suitable path 1924 * for routing I/O request. If a failover operation is active for 1925 * this client device the call shall be failed with MDI_BUSY error 1926 * code. 1927 * 1928 * By default this function returns a suitable path in online 1929 * state based on the current load balancing policy. Currently 1930 * we support LOAD_BALANCE_NONE (Previously selected online path 1931 * will continue to be used till the path is usable) and 1932 * LOAD_BALANCE_RR (Online paths will be selected in a round 1933 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1934 * based on the logical block). The load balancing 1935 * through vHCI drivers configuration file (driver.conf). 1936 * 1937 * vHCI drivers may override this default behavior by specifying 1938 * appropriate flags. The meaning of the thrid argument depends 1939 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1940 * then the argument is the "path instance" of the path to select. 1941 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1942 * "start_pip". A non NULL "start_pip" is the starting point to 1943 * walk and find the next appropriate path. The following values 1944 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1945 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1946 * STANDBY path). 1947 * 1948 * The non-standard behavior is used by the scsi_vhci driver, 1949 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1950 * attach of client devices (to avoid an unnecessary failover 1951 * when the STANDBY path comes up first), during failover 1952 * (to activate a STANDBY path as ONLINE). 1953 * 1954 * The selected path is returned in a a mdi_hold_path() state 1955 * (pi_ref_cnt). Caller should release the hold by calling 1956 * mdi_rele_path(). 1957 * 1958 * Return Values: 1959 * MDI_SUCCESS - Completed successfully 1960 * MDI_BUSY - Client device is busy failing over 1961 * MDI_NOPATH - Client device is online, but no valid path are 1962 * available to access this client device 1963 * MDI_FAILURE - Invalid client device or state 1964 * MDI_DEVI_ONLINING 1965 * - Client device (struct dev_info state) is in 1966 * onlining state. 1967 */ 1968 1969 /*ARGSUSED*/ 1970 int 1971 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1972 void *arg, mdi_pathinfo_t **ret_pip) 1973 { 1974 mdi_client_t *ct; 1975 mdi_pathinfo_t *pip; 1976 mdi_pathinfo_t *next; 1977 mdi_pathinfo_t *head; 1978 mdi_pathinfo_t *start; 1979 client_lb_t lbp; /* load balancing policy */ 1980 int sb = 1; /* standard behavior */ 1981 int preferred = 1; /* preferred path */ 1982 int cond, cont = 1; 1983 int retry = 0; 1984 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 1985 int path_instance; /* request specific path instance */ 1986 1987 /* determine type of arg based on flags */ 1988 if (flags & MDI_SELECT_PATH_INSTANCE) { 1989 flags &= ~MDI_SELECT_PATH_INSTANCE; 1990 path_instance = (int)(intptr_t)arg; 1991 start_pip = NULL; 1992 } else { 1993 path_instance = 0; 1994 start_pip = (mdi_pathinfo_t *)arg; 1995 } 1996 1997 if (flags != 0) { 1998 /* 1999 * disable default behavior 2000 */ 2001 sb = 0; 2002 } 2003 2004 *ret_pip = NULL; 2005 ct = i_devi_get_client(cdip); 2006 if (ct == NULL) { 2007 /* mdi extensions are NULL, Nothing more to do */ 2008 return (MDI_FAILURE); 2009 } 2010 2011 MDI_CLIENT_LOCK(ct); 2012 2013 if (sb) { 2014 if (MDI_CLIENT_IS_FAILED(ct)) { 2015 /* 2016 * Client is not ready to accept any I/O requests. 2017 * Fail this request. 2018 */ 2019 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2020 "client state offline ct = %p\n", (void *)ct)); 2021 MDI_CLIENT_UNLOCK(ct); 2022 return (MDI_FAILURE); 2023 } 2024 2025 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2026 /* 2027 * Check for Failover is in progress. If so tell the 2028 * caller that this device is busy. 2029 */ 2030 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2031 "client failover in progress ct = %p\n", 2032 (void *)ct)); 2033 MDI_CLIENT_UNLOCK(ct); 2034 return (MDI_BUSY); 2035 } 2036 2037 /* 2038 * Check to see whether the client device is attached. 2039 * If not so, let the vHCI driver manually select a path 2040 * (standby) and let the probe/attach process to continue. 2041 */ 2042 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2043 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2044 "ct = %p\n", (void *)ct)); 2045 MDI_CLIENT_UNLOCK(ct); 2046 return (MDI_DEVI_ONLINING); 2047 } 2048 } 2049 2050 /* 2051 * Cache in the client list head. If head of the list is NULL 2052 * return MDI_NOPATH 2053 */ 2054 head = ct->ct_path_head; 2055 if (head == NULL) { 2056 MDI_CLIENT_UNLOCK(ct); 2057 return (MDI_NOPATH); 2058 } 2059 2060 /* Caller is specifying a specific pathinfo path by path_instance */ 2061 if (path_instance) { 2062 /* search for pathinfo with correct path_instance */ 2063 for (pip = head; 2064 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2065 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2066 ; 2067 2068 /* If path can't be selected then MDI_FAILURE is returned. */ 2069 if (pip == NULL) { 2070 MDI_CLIENT_UNLOCK(ct); 2071 return (MDI_FAILURE); 2072 } 2073 2074 /* verify state of path */ 2075 MDI_PI_LOCK(pip); 2076 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2077 MDI_PI_UNLOCK(pip); 2078 MDI_CLIENT_UNLOCK(ct); 2079 return (MDI_FAILURE); 2080 } 2081 2082 /* 2083 * Return the path in hold state. Caller should release the 2084 * lock by calling mdi_rele_path() 2085 */ 2086 MDI_PI_HOLD(pip); 2087 MDI_PI_UNLOCK(pip); 2088 ct->ct_path_last = pip; 2089 *ret_pip = pip; 2090 MDI_CLIENT_UNLOCK(ct); 2091 return (MDI_SUCCESS); 2092 } 2093 2094 /* 2095 * for non default behavior, bypass current 2096 * load balancing policy and always use LOAD_BALANCE_RR 2097 * except that the start point will be adjusted based 2098 * on the provided start_pip 2099 */ 2100 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2101 2102 switch (lbp) { 2103 case LOAD_BALANCE_NONE: 2104 /* 2105 * Load balancing is None or Alternate path mode 2106 * Start looking for a online mdi_pathinfo node starting from 2107 * last known selected path 2108 */ 2109 preferred = 1; 2110 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2111 if (pip == NULL) { 2112 pip = head; 2113 } 2114 start = pip; 2115 do { 2116 MDI_PI_LOCK(pip); 2117 /* 2118 * No need to explicitly check if the path is disabled. 2119 * Since we are checking for state == ONLINE and the 2120 * same veriable is used for DISABLE/ENABLE information. 2121 */ 2122 if ((MDI_PI(pip)->pi_state == 2123 MDI_PATHINFO_STATE_ONLINE) && 2124 preferred == MDI_PI(pip)->pi_preferred) { 2125 /* 2126 * Return the path in hold state. Caller should 2127 * release the lock by calling mdi_rele_path() 2128 */ 2129 MDI_PI_HOLD(pip); 2130 MDI_PI_UNLOCK(pip); 2131 ct->ct_path_last = pip; 2132 *ret_pip = pip; 2133 MDI_CLIENT_UNLOCK(ct); 2134 return (MDI_SUCCESS); 2135 } 2136 2137 /* 2138 * Path is busy. 2139 */ 2140 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2141 MDI_PI_IS_TRANSIENT(pip)) 2142 retry = 1; 2143 /* 2144 * Keep looking for a next available online path 2145 */ 2146 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2147 if (next == NULL) { 2148 next = head; 2149 } 2150 MDI_PI_UNLOCK(pip); 2151 pip = next; 2152 if (start == pip && preferred) { 2153 preferred = 0; 2154 } else if (start == pip && !preferred) { 2155 cont = 0; 2156 } 2157 } while (cont); 2158 break; 2159 2160 case LOAD_BALANCE_LBA: 2161 /* 2162 * Make sure we are looking 2163 * for an online path. Otherwise, if it is for a STANDBY 2164 * path request, it will go through and fetch an ONLINE 2165 * path which is not desirable. 2166 */ 2167 if ((ct->ct_lb_args != NULL) && 2168 (ct->ct_lb_args->region_size) && bp && 2169 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2170 if (i_mdi_lba_lb(ct, ret_pip, bp) 2171 == MDI_SUCCESS) { 2172 MDI_CLIENT_UNLOCK(ct); 2173 return (MDI_SUCCESS); 2174 } 2175 } 2176 /* FALLTHROUGH */ 2177 case LOAD_BALANCE_RR: 2178 /* 2179 * Load balancing is Round Robin. Start looking for a online 2180 * mdi_pathinfo node starting from last known selected path 2181 * as the start point. If override flags are specified, 2182 * process accordingly. 2183 * If the search is already in effect(start_pip not null), 2184 * then lets just use the same path preference to continue the 2185 * traversal. 2186 */ 2187 2188 if (start_pip != NULL) { 2189 preferred = MDI_PI(start_pip)->pi_preferred; 2190 } else { 2191 preferred = 1; 2192 } 2193 2194 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2195 if (start == NULL) { 2196 pip = head; 2197 } else { 2198 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2199 if (pip == NULL) { 2200 if (!sb) { 2201 if (preferred == 0) { 2202 /* 2203 * Looks like we have completed 2204 * the traversal as preferred 2205 * value is 0. Time to bail out. 2206 */ 2207 *ret_pip = NULL; 2208 MDI_CLIENT_UNLOCK(ct); 2209 return (MDI_NOPATH); 2210 } else { 2211 /* 2212 * Looks like we reached the 2213 * end of the list. Lets enable 2214 * traversal of non preferred 2215 * paths. 2216 */ 2217 preferred = 0; 2218 } 2219 } 2220 pip = head; 2221 } 2222 } 2223 start = pip; 2224 do { 2225 MDI_PI_LOCK(pip); 2226 if (sb) { 2227 cond = ((MDI_PI(pip)->pi_state == 2228 MDI_PATHINFO_STATE_ONLINE && 2229 MDI_PI(pip)->pi_preferred == 2230 preferred) ? 1 : 0); 2231 } else { 2232 if (flags == MDI_SELECT_ONLINE_PATH) { 2233 cond = ((MDI_PI(pip)->pi_state == 2234 MDI_PATHINFO_STATE_ONLINE && 2235 MDI_PI(pip)->pi_preferred == 2236 preferred) ? 1 : 0); 2237 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2238 cond = ((MDI_PI(pip)->pi_state == 2239 MDI_PATHINFO_STATE_STANDBY && 2240 MDI_PI(pip)->pi_preferred == 2241 preferred) ? 1 : 0); 2242 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2243 MDI_SELECT_STANDBY_PATH)) { 2244 cond = (((MDI_PI(pip)->pi_state == 2245 MDI_PATHINFO_STATE_ONLINE || 2246 (MDI_PI(pip)->pi_state == 2247 MDI_PATHINFO_STATE_STANDBY)) && 2248 MDI_PI(pip)->pi_preferred == 2249 preferred) ? 1 : 0); 2250 } else if (flags == 2251 (MDI_SELECT_STANDBY_PATH | 2252 MDI_SELECT_ONLINE_PATH | 2253 MDI_SELECT_USER_DISABLE_PATH)) { 2254 cond = (((MDI_PI(pip)->pi_state == 2255 MDI_PATHINFO_STATE_ONLINE || 2256 (MDI_PI(pip)->pi_state == 2257 MDI_PATHINFO_STATE_STANDBY) || 2258 (MDI_PI(pip)->pi_state == 2259 (MDI_PATHINFO_STATE_ONLINE| 2260 MDI_PATHINFO_STATE_USER_DISABLE)) || 2261 (MDI_PI(pip)->pi_state == 2262 (MDI_PATHINFO_STATE_STANDBY | 2263 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2264 MDI_PI(pip)->pi_preferred == 2265 preferred) ? 1 : 0); 2266 } else { 2267 cond = 0; 2268 } 2269 } 2270 /* 2271 * No need to explicitly check if the path is disabled. 2272 * Since we are checking for state == ONLINE and the 2273 * same veriable is used for DISABLE/ENABLE information. 2274 */ 2275 if (cond) { 2276 /* 2277 * Return the path in hold state. Caller should 2278 * release the lock by calling mdi_rele_path() 2279 */ 2280 MDI_PI_HOLD(pip); 2281 MDI_PI_UNLOCK(pip); 2282 if (sb) 2283 ct->ct_path_last = pip; 2284 *ret_pip = pip; 2285 MDI_CLIENT_UNLOCK(ct); 2286 return (MDI_SUCCESS); 2287 } 2288 /* 2289 * Path is busy. 2290 */ 2291 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2292 MDI_PI_IS_TRANSIENT(pip)) 2293 retry = 1; 2294 2295 /* 2296 * Keep looking for a next available online path 2297 */ 2298 do_again: 2299 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2300 if (next == NULL) { 2301 if (!sb) { 2302 if (preferred == 1) { 2303 /* 2304 * Looks like we reached the 2305 * end of the list. Lets enable 2306 * traversal of non preferred 2307 * paths. 2308 */ 2309 preferred = 0; 2310 next = head; 2311 } else { 2312 /* 2313 * We have done both the passes 2314 * Preferred as well as for 2315 * Non-preferred. Bail out now. 2316 */ 2317 cont = 0; 2318 } 2319 } else { 2320 /* 2321 * Standard behavior case. 2322 */ 2323 next = head; 2324 } 2325 } 2326 MDI_PI_UNLOCK(pip); 2327 if (cont == 0) { 2328 break; 2329 } 2330 pip = next; 2331 2332 if (!sb) { 2333 /* 2334 * We need to handle the selection of 2335 * non-preferred path in the following 2336 * case: 2337 * 2338 * +------+ +------+ +------+ +-----+ 2339 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2340 * +------+ +------+ +------+ +-----+ 2341 * 2342 * If we start the search with B, we need to 2343 * skip beyond B to pick C which is non - 2344 * preferred in the second pass. The following 2345 * test, if true, will allow us to skip over 2346 * the 'start'(B in the example) to select 2347 * other non preferred elements. 2348 */ 2349 if ((start_pip != NULL) && (start_pip == pip) && 2350 (MDI_PI(start_pip)->pi_preferred 2351 != preferred)) { 2352 /* 2353 * try again after going past the start 2354 * pip 2355 */ 2356 MDI_PI_LOCK(pip); 2357 goto do_again; 2358 } 2359 } else { 2360 /* 2361 * Standard behavior case 2362 */ 2363 if (start == pip && preferred) { 2364 /* look for nonpreferred paths */ 2365 preferred = 0; 2366 } else if (start == pip && !preferred) { 2367 /* 2368 * Exit condition 2369 */ 2370 cont = 0; 2371 } 2372 } 2373 } while (cont); 2374 break; 2375 } 2376 2377 MDI_CLIENT_UNLOCK(ct); 2378 if (retry == 1) { 2379 return (MDI_BUSY); 2380 } else { 2381 return (MDI_NOPATH); 2382 } 2383 } 2384 2385 /* 2386 * For a client, return the next available path to any phci 2387 * 2388 * Note: 2389 * Caller should hold the branch's devinfo node to get a consistent 2390 * snap shot of the mdi_pathinfo nodes. 2391 * 2392 * Please note that even the list is stable the mdi_pathinfo 2393 * node state and properties are volatile. The caller should lock 2394 * and unlock the nodes by calling mdi_pi_lock() and 2395 * mdi_pi_unlock() functions to get a stable properties. 2396 * 2397 * If there is a need to use the nodes beyond the hold of the 2398 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2399 * need to be held against unexpected removal by calling 2400 * mdi_hold_path() and should be released by calling 2401 * mdi_rele_path() on completion. 2402 */ 2403 mdi_pathinfo_t * 2404 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2405 { 2406 mdi_client_t *ct; 2407 2408 if (!MDI_CLIENT(ct_dip)) 2409 return (NULL); 2410 2411 /* 2412 * Walk through client link 2413 */ 2414 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2415 ASSERT(ct != NULL); 2416 2417 if (pip == NULL) 2418 return ((mdi_pathinfo_t *)ct->ct_path_head); 2419 2420 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2421 } 2422 2423 /* 2424 * For a phci, return the next available path to any client 2425 * Note: ditto mdi_get_next_phci_path() 2426 */ 2427 mdi_pathinfo_t * 2428 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2429 { 2430 mdi_phci_t *ph; 2431 2432 if (!MDI_PHCI(ph_dip)) 2433 return (NULL); 2434 2435 /* 2436 * Walk through pHCI link 2437 */ 2438 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2439 ASSERT(ph != NULL); 2440 2441 if (pip == NULL) 2442 return ((mdi_pathinfo_t *)ph->ph_path_head); 2443 2444 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2445 } 2446 2447 /* 2448 * mdi_hold_path(): 2449 * Hold the mdi_pathinfo node against unwanted unexpected free. 2450 * Return Values: 2451 * None 2452 */ 2453 void 2454 mdi_hold_path(mdi_pathinfo_t *pip) 2455 { 2456 if (pip) { 2457 MDI_PI_LOCK(pip); 2458 MDI_PI_HOLD(pip); 2459 MDI_PI_UNLOCK(pip); 2460 } 2461 } 2462 2463 2464 /* 2465 * mdi_rele_path(): 2466 * Release the mdi_pathinfo node which was selected 2467 * through mdi_select_path() mechanism or manually held by 2468 * calling mdi_hold_path(). 2469 * Return Values: 2470 * None 2471 */ 2472 void 2473 mdi_rele_path(mdi_pathinfo_t *pip) 2474 { 2475 if (pip) { 2476 MDI_PI_LOCK(pip); 2477 MDI_PI_RELE(pip); 2478 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2479 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2480 } 2481 MDI_PI_UNLOCK(pip); 2482 } 2483 } 2484 2485 /* 2486 * mdi_pi_lock(): 2487 * Lock the mdi_pathinfo node. 2488 * Note: 2489 * The caller should release the lock by calling mdi_pi_unlock() 2490 */ 2491 void 2492 mdi_pi_lock(mdi_pathinfo_t *pip) 2493 { 2494 ASSERT(pip != NULL); 2495 if (pip) { 2496 MDI_PI_LOCK(pip); 2497 } 2498 } 2499 2500 2501 /* 2502 * mdi_pi_unlock(): 2503 * Unlock the mdi_pathinfo node. 2504 * Note: 2505 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2506 */ 2507 void 2508 mdi_pi_unlock(mdi_pathinfo_t *pip) 2509 { 2510 ASSERT(pip != NULL); 2511 if (pip) { 2512 MDI_PI_UNLOCK(pip); 2513 } 2514 } 2515 2516 /* 2517 * mdi_pi_find(): 2518 * Search the list of mdi_pathinfo nodes attached to the 2519 * pHCI/Client device node whose path address matches "paddr". 2520 * Returns a pointer to the mdi_pathinfo node if a matching node is 2521 * found. 2522 * Return Values: 2523 * mdi_pathinfo node handle 2524 * NULL 2525 * Notes: 2526 * Caller need not hold any locks to call this function. 2527 */ 2528 mdi_pathinfo_t * 2529 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2530 { 2531 mdi_phci_t *ph; 2532 mdi_vhci_t *vh; 2533 mdi_client_t *ct; 2534 mdi_pathinfo_t *pip = NULL; 2535 2536 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2537 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2538 if ((pdip == NULL) || (paddr == NULL)) { 2539 return (NULL); 2540 } 2541 ph = i_devi_get_phci(pdip); 2542 if (ph == NULL) { 2543 /* 2544 * Invalid pHCI device, Nothing more to do. 2545 */ 2546 MDI_DEBUG(2, (CE_WARN, pdip, 2547 "!mdi_pi_find: invalid phci")); 2548 return (NULL); 2549 } 2550 2551 vh = ph->ph_vhci; 2552 if (vh == NULL) { 2553 /* 2554 * Invalid vHCI device, Nothing more to do. 2555 */ 2556 MDI_DEBUG(2, (CE_WARN, pdip, 2557 "!mdi_pi_find: invalid vhci")); 2558 return (NULL); 2559 } 2560 2561 /* 2562 * Look for pathinfo node identified by paddr. 2563 */ 2564 if (caddr == NULL) { 2565 /* 2566 * Find a mdi_pathinfo node under pHCI list for a matching 2567 * unit address. 2568 */ 2569 MDI_PHCI_LOCK(ph); 2570 if (MDI_PHCI_IS_OFFLINE(ph)) { 2571 MDI_DEBUG(2, (CE_WARN, pdip, 2572 "!mdi_pi_find: offline phci %p", (void *)ph)); 2573 MDI_PHCI_UNLOCK(ph); 2574 return (NULL); 2575 } 2576 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2577 2578 while (pip != NULL) { 2579 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2580 break; 2581 } 2582 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2583 } 2584 MDI_PHCI_UNLOCK(ph); 2585 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2586 (void *)pip)); 2587 return (pip); 2588 } 2589 2590 /* 2591 * XXX - Is the rest of the code in this function really necessary? 2592 * The consumers of mdi_pi_find() can search for the desired pathinfo 2593 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2594 * whether the search is based on the pathinfo nodes attached to 2595 * the pHCI or the client node, the result will be the same. 2596 */ 2597 2598 /* 2599 * Find the client device corresponding to 'caddr' 2600 */ 2601 MDI_VHCI_CLIENT_LOCK(vh); 2602 2603 /* 2604 * XXX - Passing NULL to the following function works as long as the 2605 * the client addresses (caddr) are unique per vhci basis. 2606 */ 2607 ct = i_mdi_client_find(vh, NULL, caddr); 2608 if (ct == NULL) { 2609 /* 2610 * Client not found, Obviously mdi_pathinfo node has not been 2611 * created yet. 2612 */ 2613 MDI_VHCI_CLIENT_UNLOCK(vh); 2614 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2615 "found for caddr %s", caddr ? caddr : "NULL")); 2616 return (NULL); 2617 } 2618 2619 /* 2620 * Hold the client lock and look for a mdi_pathinfo node with matching 2621 * pHCI and paddr 2622 */ 2623 MDI_CLIENT_LOCK(ct); 2624 2625 /* 2626 * Release the global mutex as it is no more needed. Note: We always 2627 * respect the locking order while acquiring. 2628 */ 2629 MDI_VHCI_CLIENT_UNLOCK(vh); 2630 2631 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2632 while (pip != NULL) { 2633 /* 2634 * Compare the unit address 2635 */ 2636 if ((MDI_PI(pip)->pi_phci == ph) && 2637 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2638 break; 2639 } 2640 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2641 } 2642 MDI_CLIENT_UNLOCK(ct); 2643 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2644 return (pip); 2645 } 2646 2647 /* 2648 * mdi_pi_alloc(): 2649 * Allocate and initialize a new instance of a mdi_pathinfo node. 2650 * The mdi_pathinfo node returned by this function identifies a 2651 * unique device path is capable of having properties attached 2652 * and passed to mdi_pi_online() to fully attach and online the 2653 * path and client device node. 2654 * The mdi_pathinfo node returned by this function must be 2655 * destroyed using mdi_pi_free() if the path is no longer 2656 * operational or if the caller fails to attach a client device 2657 * node when calling mdi_pi_online(). The framework will not free 2658 * the resources allocated. 2659 * This function can be called from both interrupt and kernel 2660 * contexts. DDI_NOSLEEP flag should be used while calling 2661 * from interrupt contexts. 2662 * Return Values: 2663 * MDI_SUCCESS 2664 * MDI_FAILURE 2665 * MDI_NOMEM 2666 */ 2667 /*ARGSUSED*/ 2668 int 2669 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2670 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2671 { 2672 mdi_vhci_t *vh; 2673 mdi_phci_t *ph; 2674 mdi_client_t *ct; 2675 mdi_pathinfo_t *pip = NULL; 2676 dev_info_t *cdip; 2677 int rv = MDI_NOMEM; 2678 int path_allocated = 0; 2679 2680 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2681 cname ? cname : "NULL", caddr ? caddr : "NULL", 2682 paddr ? paddr : "NULL")); 2683 2684 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2685 ret_pip == NULL) { 2686 /* Nothing more to do */ 2687 return (MDI_FAILURE); 2688 } 2689 2690 *ret_pip = NULL; 2691 2692 /* No allocations on detaching pHCI */ 2693 if (DEVI_IS_DETACHING(pdip)) { 2694 /* Invalid pHCI device, return failure */ 2695 MDI_DEBUG(1, (CE_WARN, pdip, 2696 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2697 return (MDI_FAILURE); 2698 } 2699 2700 ph = i_devi_get_phci(pdip); 2701 ASSERT(ph != NULL); 2702 if (ph == NULL) { 2703 /* Invalid pHCI device, return failure */ 2704 MDI_DEBUG(1, (CE_WARN, pdip, 2705 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2706 return (MDI_FAILURE); 2707 } 2708 2709 MDI_PHCI_LOCK(ph); 2710 vh = ph->ph_vhci; 2711 if (vh == NULL) { 2712 /* Invalid vHCI device, return failure */ 2713 MDI_DEBUG(1, (CE_WARN, pdip, 2714 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2715 MDI_PHCI_UNLOCK(ph); 2716 return (MDI_FAILURE); 2717 } 2718 2719 if (MDI_PHCI_IS_READY(ph) == 0) { 2720 /* 2721 * Do not allow new node creation when pHCI is in 2722 * offline/suspended states 2723 */ 2724 MDI_DEBUG(1, (CE_WARN, pdip, 2725 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2726 MDI_PHCI_UNLOCK(ph); 2727 return (MDI_BUSY); 2728 } 2729 MDI_PHCI_UNSTABLE(ph); 2730 MDI_PHCI_UNLOCK(ph); 2731 2732 /* look for a matching client, create one if not found */ 2733 MDI_VHCI_CLIENT_LOCK(vh); 2734 ct = i_mdi_client_find(vh, cname, caddr); 2735 if (ct == NULL) { 2736 ct = i_mdi_client_alloc(vh, cname, caddr); 2737 ASSERT(ct != NULL); 2738 } 2739 2740 if (ct->ct_dip == NULL) { 2741 /* 2742 * Allocate a devinfo node 2743 */ 2744 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2745 compatible, ncompatible); 2746 if (ct->ct_dip == NULL) { 2747 (void) i_mdi_client_free(vh, ct); 2748 goto fail; 2749 } 2750 } 2751 cdip = ct->ct_dip; 2752 2753 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2754 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2755 2756 MDI_CLIENT_LOCK(ct); 2757 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2758 while (pip != NULL) { 2759 /* 2760 * Compare the unit address 2761 */ 2762 if ((MDI_PI(pip)->pi_phci == ph) && 2763 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2764 break; 2765 } 2766 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2767 } 2768 MDI_CLIENT_UNLOCK(ct); 2769 2770 if (pip == NULL) { 2771 /* 2772 * This is a new path for this client device. Allocate and 2773 * initialize a new pathinfo node 2774 */ 2775 pip = i_mdi_pi_alloc(ph, paddr, ct); 2776 ASSERT(pip != NULL); 2777 path_allocated = 1; 2778 } 2779 rv = MDI_SUCCESS; 2780 2781 fail: 2782 /* 2783 * Release the global mutex. 2784 */ 2785 MDI_VHCI_CLIENT_UNLOCK(vh); 2786 2787 /* 2788 * Mark the pHCI as stable 2789 */ 2790 MDI_PHCI_LOCK(ph); 2791 MDI_PHCI_STABLE(ph); 2792 MDI_PHCI_UNLOCK(ph); 2793 *ret_pip = pip; 2794 2795 MDI_DEBUG(2, (CE_NOTE, pdip, 2796 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2797 2798 if (path_allocated) 2799 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2800 2801 return (rv); 2802 } 2803 2804 /*ARGSUSED*/ 2805 int 2806 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2807 int flags, mdi_pathinfo_t **ret_pip) 2808 { 2809 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2810 flags, ret_pip)); 2811 } 2812 2813 /* 2814 * i_mdi_pi_alloc(): 2815 * Allocate a mdi_pathinfo node and add to the pHCI path list 2816 * Return Values: 2817 * mdi_pathinfo 2818 */ 2819 /*ARGSUSED*/ 2820 static mdi_pathinfo_t * 2821 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2822 { 2823 mdi_pathinfo_t *pip; 2824 int ct_circular; 2825 int ph_circular; 2826 static char path[MAXPATHLEN]; 2827 char *path_persistent; 2828 int path_instance; 2829 mod_hash_val_t hv; 2830 2831 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2832 2833 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2834 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2835 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2836 MDI_PATHINFO_STATE_TRANSIENT; 2837 2838 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2839 MDI_PI_SET_USER_DISABLE(pip); 2840 2841 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2842 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2843 2844 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2845 MDI_PI_SET_DRV_DISABLE(pip); 2846 2847 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2848 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2849 MDI_PI(pip)->pi_client = ct; 2850 MDI_PI(pip)->pi_phci = ph; 2851 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2852 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2853 2854 /* 2855 * We form the "path" to the pathinfo node, and see if we have 2856 * already allocated a 'path_instance' for that "path". If so, 2857 * we use the already allocated 'path_instance'. If not, we 2858 * allocate a new 'path_instance' and associate it with a copy of 2859 * the "path" string (which is never freed). The association 2860 * between a 'path_instance' this "path" string persists until 2861 * reboot. 2862 */ 2863 mutex_enter(&mdi_pathmap_mutex); 2864 (void) ddi_pathname(ph->ph_dip, path); 2865 (void) sprintf(path + strlen(path), "/%s@%s", 2866 ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr); 2867 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2868 path_instance = (uint_t)(intptr_t)hv; 2869 } else { 2870 /* allocate a new 'path_instance' and persistent "path" */ 2871 path_instance = mdi_pathmap_instance++; 2872 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2873 (void) mod_hash_insert(mdi_pathmap_bypath, 2874 (mod_hash_key_t)path_persistent, 2875 (mod_hash_val_t)(intptr_t)path_instance); 2876 (void) mod_hash_insert(mdi_pathmap_byinstance, 2877 (mod_hash_key_t)(intptr_t)path_instance, 2878 (mod_hash_val_t)path_persistent); 2879 } 2880 mutex_exit(&mdi_pathmap_mutex); 2881 MDI_PI(pip)->pi_path_instance = path_instance; 2882 2883 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2884 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2885 MDI_PI(pip)->pi_pprivate = NULL; 2886 MDI_PI(pip)->pi_cprivate = NULL; 2887 MDI_PI(pip)->pi_vprivate = NULL; 2888 MDI_PI(pip)->pi_client_link = NULL; 2889 MDI_PI(pip)->pi_phci_link = NULL; 2890 MDI_PI(pip)->pi_ref_cnt = 0; 2891 MDI_PI(pip)->pi_kstats = NULL; 2892 MDI_PI(pip)->pi_preferred = 1; 2893 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2894 2895 /* 2896 * Lock both dev_info nodes against changes in parallel. 2897 * 2898 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2899 * This atypical operation is done to synchronize pathinfo nodes 2900 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2901 * the pathinfo nodes are children of the Client. 2902 */ 2903 ndi_devi_enter(ct->ct_dip, &ct_circular); 2904 ndi_devi_enter(ph->ph_dip, &ph_circular); 2905 2906 i_mdi_phci_add_path(ph, pip); 2907 i_mdi_client_add_path(ct, pip); 2908 2909 ndi_devi_exit(ph->ph_dip, ph_circular); 2910 ndi_devi_exit(ct->ct_dip, ct_circular); 2911 2912 return (pip); 2913 } 2914 2915 /* 2916 * mdi_pi_pathname_by_instance(): 2917 * Lookup of "path" by 'path_instance'. Return "path". 2918 * NOTE: returned "path" remains valid forever (until reboot). 2919 */ 2920 char * 2921 mdi_pi_pathname_by_instance(int path_instance) 2922 { 2923 char *path; 2924 mod_hash_val_t hv; 2925 2926 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2927 mutex_enter(&mdi_pathmap_mutex); 2928 if (mod_hash_find(mdi_pathmap_byinstance, 2929 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2930 path = (char *)hv; 2931 else 2932 path = NULL; 2933 mutex_exit(&mdi_pathmap_mutex); 2934 return (path); 2935 } 2936 2937 /* 2938 * i_mdi_phci_add_path(): 2939 * Add a mdi_pathinfo node to pHCI list. 2940 * Notes: 2941 * Caller should per-pHCI mutex 2942 */ 2943 static void 2944 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2945 { 2946 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2947 2948 MDI_PHCI_LOCK(ph); 2949 if (ph->ph_path_head == NULL) { 2950 ph->ph_path_head = pip; 2951 } else { 2952 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2953 } 2954 ph->ph_path_tail = pip; 2955 ph->ph_path_count++; 2956 MDI_PHCI_UNLOCK(ph); 2957 } 2958 2959 /* 2960 * i_mdi_client_add_path(): 2961 * Add mdi_pathinfo node to client list 2962 */ 2963 static void 2964 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2965 { 2966 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2967 2968 MDI_CLIENT_LOCK(ct); 2969 if (ct->ct_path_head == NULL) { 2970 ct->ct_path_head = pip; 2971 } else { 2972 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2973 } 2974 ct->ct_path_tail = pip; 2975 ct->ct_path_count++; 2976 MDI_CLIENT_UNLOCK(ct); 2977 } 2978 2979 /* 2980 * mdi_pi_free(): 2981 * Free the mdi_pathinfo node and also client device node if this 2982 * is the last path to the device 2983 * Return Values: 2984 * MDI_SUCCESS 2985 * MDI_FAILURE 2986 * MDI_BUSY 2987 */ 2988 /*ARGSUSED*/ 2989 int 2990 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2991 { 2992 int rv = MDI_FAILURE; 2993 mdi_vhci_t *vh; 2994 mdi_phci_t *ph; 2995 mdi_client_t *ct; 2996 int (*f)(); 2997 int client_held = 0; 2998 2999 MDI_PI_LOCK(pip); 3000 ph = MDI_PI(pip)->pi_phci; 3001 ASSERT(ph != NULL); 3002 if (ph == NULL) { 3003 /* 3004 * Invalid pHCI device, return failure 3005 */ 3006 MDI_DEBUG(1, (CE_WARN, NULL, 3007 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3008 MDI_PI_UNLOCK(pip); 3009 return (MDI_FAILURE); 3010 } 3011 3012 vh = ph->ph_vhci; 3013 ASSERT(vh != NULL); 3014 if (vh == NULL) { 3015 /* Invalid pHCI device, return failure */ 3016 MDI_DEBUG(1, (CE_WARN, NULL, 3017 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3018 MDI_PI_UNLOCK(pip); 3019 return (MDI_FAILURE); 3020 } 3021 3022 ct = MDI_PI(pip)->pi_client; 3023 ASSERT(ct != NULL); 3024 if (ct == NULL) { 3025 /* 3026 * Invalid Client device, return failure 3027 */ 3028 MDI_DEBUG(1, (CE_WARN, NULL, 3029 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3030 MDI_PI_UNLOCK(pip); 3031 return (MDI_FAILURE); 3032 } 3033 3034 /* 3035 * Check to see for busy condition. A mdi_pathinfo can only be freed 3036 * if the node state is either offline or init and the reference count 3037 * is zero. 3038 */ 3039 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3040 MDI_PI_IS_INITING(pip))) { 3041 /* 3042 * Node is busy 3043 */ 3044 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3045 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3046 MDI_PI_UNLOCK(pip); 3047 return (MDI_BUSY); 3048 } 3049 3050 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3051 /* 3052 * Give a chance for pending I/Os to complete. 3053 */ 3054 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3055 "%d cmds still pending on path: %p\n", 3056 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3057 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3058 &MDI_PI(pip)->pi_mutex, 3059 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3060 /* 3061 * The timeout time reached without ref_cnt being zero 3062 * being signaled. 3063 */ 3064 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3065 "!mdi_pi_free: " 3066 "Timeout reached on path %p without the cond\n", 3067 (void *)pip)); 3068 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3069 "!mdi_pi_free: " 3070 "%d cmds still pending on path: %p\n", 3071 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3072 MDI_PI_UNLOCK(pip); 3073 return (MDI_BUSY); 3074 } 3075 } 3076 if (MDI_PI(pip)->pi_pm_held) { 3077 client_held = 1; 3078 } 3079 MDI_PI_UNLOCK(pip); 3080 3081 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3082 3083 MDI_CLIENT_LOCK(ct); 3084 3085 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3086 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3087 3088 /* 3089 * Wait till failover is complete before removing this node. 3090 */ 3091 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3092 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3093 3094 MDI_CLIENT_UNLOCK(ct); 3095 MDI_VHCI_CLIENT_LOCK(vh); 3096 MDI_CLIENT_LOCK(ct); 3097 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3098 3099 if (!MDI_PI_IS_INITING(pip)) { 3100 f = vh->vh_ops->vo_pi_uninit; 3101 if (f != NULL) { 3102 rv = (*f)(vh->vh_dip, pip, 0); 3103 } 3104 } 3105 /* 3106 * If vo_pi_uninit() completed successfully. 3107 */ 3108 if (rv == MDI_SUCCESS) { 3109 if (client_held) { 3110 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3111 "i_mdi_pm_rele_client\n")); 3112 i_mdi_pm_rele_client(ct, 1); 3113 } 3114 i_mdi_pi_free(ph, pip, ct); 3115 if (ct->ct_path_count == 0) { 3116 /* 3117 * Client lost its last path. 3118 * Clean up the client device 3119 */ 3120 MDI_CLIENT_UNLOCK(ct); 3121 (void) i_mdi_client_free(ct->ct_vhci, ct); 3122 MDI_VHCI_CLIENT_UNLOCK(vh); 3123 return (rv); 3124 } 3125 } 3126 MDI_CLIENT_UNLOCK(ct); 3127 MDI_VHCI_CLIENT_UNLOCK(vh); 3128 3129 if (rv == MDI_FAILURE) 3130 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3131 3132 return (rv); 3133 } 3134 3135 /* 3136 * i_mdi_pi_free(): 3137 * Free the mdi_pathinfo node 3138 */ 3139 static void 3140 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3141 { 3142 int ct_circular; 3143 int ph_circular; 3144 3145 ASSERT(MDI_CLIENT_LOCKED(ct)); 3146 3147 /* 3148 * remove any per-path kstats 3149 */ 3150 i_mdi_pi_kstat_destroy(pip); 3151 3152 /* See comments in i_mdi_pi_alloc() */ 3153 ndi_devi_enter(ct->ct_dip, &ct_circular); 3154 ndi_devi_enter(ph->ph_dip, &ph_circular); 3155 3156 i_mdi_client_remove_path(ct, pip); 3157 i_mdi_phci_remove_path(ph, pip); 3158 3159 ndi_devi_exit(ph->ph_dip, ph_circular); 3160 ndi_devi_exit(ct->ct_dip, ct_circular); 3161 3162 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3163 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3164 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3165 if (MDI_PI(pip)->pi_addr) { 3166 kmem_free(MDI_PI(pip)->pi_addr, 3167 strlen(MDI_PI(pip)->pi_addr) + 1); 3168 MDI_PI(pip)->pi_addr = NULL; 3169 } 3170 3171 if (MDI_PI(pip)->pi_prop) { 3172 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3173 MDI_PI(pip)->pi_prop = NULL; 3174 } 3175 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3176 } 3177 3178 3179 /* 3180 * i_mdi_phci_remove_path(): 3181 * Remove a mdi_pathinfo node from pHCI list. 3182 * Notes: 3183 * Caller should hold per-pHCI mutex 3184 */ 3185 static void 3186 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3187 { 3188 mdi_pathinfo_t *prev = NULL; 3189 mdi_pathinfo_t *path = NULL; 3190 3191 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3192 3193 MDI_PHCI_LOCK(ph); 3194 path = ph->ph_path_head; 3195 while (path != NULL) { 3196 if (path == pip) { 3197 break; 3198 } 3199 prev = path; 3200 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3201 } 3202 3203 if (path) { 3204 ph->ph_path_count--; 3205 if (prev) { 3206 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3207 } else { 3208 ph->ph_path_head = 3209 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3210 } 3211 if (ph->ph_path_tail == path) { 3212 ph->ph_path_tail = prev; 3213 } 3214 } 3215 3216 /* 3217 * Clear the pHCI link 3218 */ 3219 MDI_PI(pip)->pi_phci_link = NULL; 3220 MDI_PI(pip)->pi_phci = NULL; 3221 MDI_PHCI_UNLOCK(ph); 3222 } 3223 3224 /* 3225 * i_mdi_client_remove_path(): 3226 * Remove a mdi_pathinfo node from client path list. 3227 */ 3228 static void 3229 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3230 { 3231 mdi_pathinfo_t *prev = NULL; 3232 mdi_pathinfo_t *path; 3233 3234 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3235 3236 ASSERT(MDI_CLIENT_LOCKED(ct)); 3237 path = ct->ct_path_head; 3238 while (path != NULL) { 3239 if (path == pip) { 3240 break; 3241 } 3242 prev = path; 3243 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3244 } 3245 3246 if (path) { 3247 ct->ct_path_count--; 3248 if (prev) { 3249 MDI_PI(prev)->pi_client_link = 3250 MDI_PI(path)->pi_client_link; 3251 } else { 3252 ct->ct_path_head = 3253 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3254 } 3255 if (ct->ct_path_tail == path) { 3256 ct->ct_path_tail = prev; 3257 } 3258 if (ct->ct_path_last == path) { 3259 ct->ct_path_last = ct->ct_path_head; 3260 } 3261 } 3262 MDI_PI(pip)->pi_client_link = NULL; 3263 MDI_PI(pip)->pi_client = NULL; 3264 } 3265 3266 /* 3267 * i_mdi_pi_state_change(): 3268 * online a mdi_pathinfo node 3269 * 3270 * Return Values: 3271 * MDI_SUCCESS 3272 * MDI_FAILURE 3273 */ 3274 /*ARGSUSED*/ 3275 static int 3276 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3277 { 3278 int rv = MDI_SUCCESS; 3279 mdi_vhci_t *vh; 3280 mdi_phci_t *ph; 3281 mdi_client_t *ct; 3282 int (*f)(); 3283 dev_info_t *cdip; 3284 3285 MDI_PI_LOCK(pip); 3286 3287 ph = MDI_PI(pip)->pi_phci; 3288 ASSERT(ph); 3289 if (ph == NULL) { 3290 /* 3291 * Invalid pHCI device, fail the request 3292 */ 3293 MDI_PI_UNLOCK(pip); 3294 MDI_DEBUG(1, (CE_WARN, NULL, 3295 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3296 return (MDI_FAILURE); 3297 } 3298 3299 vh = ph->ph_vhci; 3300 ASSERT(vh); 3301 if (vh == NULL) { 3302 /* 3303 * Invalid vHCI device, fail the request 3304 */ 3305 MDI_PI_UNLOCK(pip); 3306 MDI_DEBUG(1, (CE_WARN, NULL, 3307 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3308 return (MDI_FAILURE); 3309 } 3310 3311 ct = MDI_PI(pip)->pi_client; 3312 ASSERT(ct != NULL); 3313 if (ct == NULL) { 3314 /* 3315 * Invalid client device, fail the request 3316 */ 3317 MDI_PI_UNLOCK(pip); 3318 MDI_DEBUG(1, (CE_WARN, NULL, 3319 "!mdi_pi_state_change: invalid client pip=%p", 3320 (void *)pip)); 3321 return (MDI_FAILURE); 3322 } 3323 3324 /* 3325 * If this path has not been initialized yet, Callback vHCI driver's 3326 * pathinfo node initialize entry point 3327 */ 3328 3329 if (MDI_PI_IS_INITING(pip)) { 3330 MDI_PI_UNLOCK(pip); 3331 f = vh->vh_ops->vo_pi_init; 3332 if (f != NULL) { 3333 rv = (*f)(vh->vh_dip, pip, 0); 3334 if (rv != MDI_SUCCESS) { 3335 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3336 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3337 (void *)vh, (void *)pip)); 3338 return (MDI_FAILURE); 3339 } 3340 } 3341 MDI_PI_LOCK(pip); 3342 MDI_PI_CLEAR_TRANSIENT(pip); 3343 } 3344 3345 /* 3346 * Do not allow state transition when pHCI is in offline/suspended 3347 * states 3348 */ 3349 i_mdi_phci_lock(ph, pip); 3350 if (MDI_PHCI_IS_READY(ph) == 0) { 3351 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3352 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3353 (void *)ph)); 3354 MDI_PI_UNLOCK(pip); 3355 i_mdi_phci_unlock(ph); 3356 return (MDI_BUSY); 3357 } 3358 MDI_PHCI_UNSTABLE(ph); 3359 i_mdi_phci_unlock(ph); 3360 3361 /* 3362 * Check if mdi_pathinfo state is in transient state. 3363 * If yes, offlining is in progress and wait till transient state is 3364 * cleared. 3365 */ 3366 if (MDI_PI_IS_TRANSIENT(pip)) { 3367 while (MDI_PI_IS_TRANSIENT(pip)) { 3368 cv_wait(&MDI_PI(pip)->pi_state_cv, 3369 &MDI_PI(pip)->pi_mutex); 3370 } 3371 } 3372 3373 /* 3374 * Grab the client lock in reverse order sequence and release the 3375 * mdi_pathinfo mutex. 3376 */ 3377 i_mdi_client_lock(ct, pip); 3378 MDI_PI_UNLOCK(pip); 3379 3380 /* 3381 * Wait till failover state is cleared 3382 */ 3383 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3384 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3385 3386 /* 3387 * Mark the mdi_pathinfo node state as transient 3388 */ 3389 MDI_PI_LOCK(pip); 3390 switch (state) { 3391 case MDI_PATHINFO_STATE_ONLINE: 3392 MDI_PI_SET_ONLINING(pip); 3393 break; 3394 3395 case MDI_PATHINFO_STATE_STANDBY: 3396 MDI_PI_SET_STANDBYING(pip); 3397 break; 3398 3399 case MDI_PATHINFO_STATE_FAULT: 3400 /* 3401 * Mark the pathinfo state as FAULTED 3402 */ 3403 MDI_PI_SET_FAULTING(pip); 3404 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3405 break; 3406 3407 case MDI_PATHINFO_STATE_OFFLINE: 3408 /* 3409 * ndi_devi_offline() cannot hold pip or ct locks. 3410 */ 3411 MDI_PI_UNLOCK(pip); 3412 /* 3413 * Don't offline the client dev_info node unless we have 3414 * no available paths left at all. 3415 */ 3416 cdip = ct->ct_dip; 3417 if ((flag & NDI_DEVI_REMOVE) && 3418 (ct->ct_path_count == 1)) { 3419 i_mdi_client_unlock(ct); 3420 rv = ndi_devi_offline(cdip, 0); 3421 if (rv != NDI_SUCCESS) { 3422 /* 3423 * Convert to MDI error code 3424 */ 3425 switch (rv) { 3426 case NDI_BUSY: 3427 rv = MDI_BUSY; 3428 break; 3429 default: 3430 rv = MDI_FAILURE; 3431 break; 3432 } 3433 goto state_change_exit; 3434 } else { 3435 i_mdi_client_lock(ct, NULL); 3436 } 3437 } 3438 /* 3439 * Mark the mdi_pathinfo node state as transient 3440 */ 3441 MDI_PI_LOCK(pip); 3442 MDI_PI_SET_OFFLINING(pip); 3443 break; 3444 } 3445 MDI_PI_UNLOCK(pip); 3446 MDI_CLIENT_UNSTABLE(ct); 3447 i_mdi_client_unlock(ct); 3448 3449 f = vh->vh_ops->vo_pi_state_change; 3450 if (f != NULL) 3451 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3452 3453 MDI_CLIENT_LOCK(ct); 3454 MDI_PI_LOCK(pip); 3455 if (rv == MDI_NOT_SUPPORTED) { 3456 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3457 } 3458 if (rv != MDI_SUCCESS) { 3459 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3460 "!vo_pi_state_change: failed rv = %x", rv)); 3461 } 3462 if (MDI_PI_IS_TRANSIENT(pip)) { 3463 if (rv == MDI_SUCCESS) { 3464 MDI_PI_CLEAR_TRANSIENT(pip); 3465 } else { 3466 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3467 } 3468 } 3469 3470 /* 3471 * Wake anyone waiting for this mdi_pathinfo node 3472 */ 3473 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3474 MDI_PI_UNLOCK(pip); 3475 3476 /* 3477 * Mark the client device as stable 3478 */ 3479 MDI_CLIENT_STABLE(ct); 3480 if (rv == MDI_SUCCESS) { 3481 if (ct->ct_unstable == 0) { 3482 cdip = ct->ct_dip; 3483 3484 /* 3485 * Onlining the mdi_pathinfo node will impact the 3486 * client state Update the client and dev_info node 3487 * state accordingly 3488 */ 3489 rv = NDI_SUCCESS; 3490 i_mdi_client_update_state(ct); 3491 switch (MDI_CLIENT_STATE(ct)) { 3492 case MDI_CLIENT_STATE_OPTIMAL: 3493 case MDI_CLIENT_STATE_DEGRADED: 3494 if (cdip && !i_ddi_devi_attached(cdip) && 3495 ((state == MDI_PATHINFO_STATE_ONLINE) || 3496 (state == MDI_PATHINFO_STATE_STANDBY))) { 3497 3498 /* 3499 * Must do ndi_devi_online() through 3500 * hotplug thread for deferred 3501 * attach mechanism to work 3502 */ 3503 MDI_CLIENT_UNLOCK(ct); 3504 rv = ndi_devi_online(cdip, 0); 3505 MDI_CLIENT_LOCK(ct); 3506 if ((rv != NDI_SUCCESS) && 3507 (MDI_CLIENT_STATE(ct) == 3508 MDI_CLIENT_STATE_DEGRADED)) { 3509 /* 3510 * ndi_devi_online failed. 3511 * Reset client flags to 3512 * offline. 3513 */ 3514 MDI_DEBUG(1, (CE_WARN, cdip, 3515 "!ndi_devi_online: failed " 3516 " Error: %x", rv)); 3517 MDI_CLIENT_SET_OFFLINE(ct); 3518 } 3519 if (rv != NDI_SUCCESS) { 3520 /* Reset the path state */ 3521 MDI_PI_LOCK(pip); 3522 MDI_PI(pip)->pi_state = 3523 MDI_PI_OLD_STATE(pip); 3524 MDI_PI_UNLOCK(pip); 3525 } 3526 } 3527 break; 3528 3529 case MDI_CLIENT_STATE_FAILED: 3530 /* 3531 * This is the last path case for 3532 * non-user initiated events. 3533 */ 3534 if (((flag & NDI_DEVI_REMOVE) == 0) && 3535 cdip && (i_ddi_node_state(cdip) >= 3536 DS_INITIALIZED)) { 3537 MDI_CLIENT_UNLOCK(ct); 3538 rv = ndi_devi_offline(cdip, 0); 3539 MDI_CLIENT_LOCK(ct); 3540 3541 if (rv != NDI_SUCCESS) { 3542 /* 3543 * ndi_devi_offline failed. 3544 * Reset client flags to 3545 * online as the path could not 3546 * be offlined. 3547 */ 3548 MDI_DEBUG(1, (CE_WARN, cdip, 3549 "!ndi_devi_offline: failed " 3550 " Error: %x", rv)); 3551 MDI_CLIENT_SET_ONLINE(ct); 3552 } 3553 } 3554 break; 3555 } 3556 /* 3557 * Convert to MDI error code 3558 */ 3559 switch (rv) { 3560 case NDI_SUCCESS: 3561 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3562 i_mdi_report_path_state(ct, pip); 3563 rv = MDI_SUCCESS; 3564 break; 3565 case NDI_BUSY: 3566 rv = MDI_BUSY; 3567 break; 3568 default: 3569 rv = MDI_FAILURE; 3570 break; 3571 } 3572 } 3573 } 3574 MDI_CLIENT_UNLOCK(ct); 3575 3576 state_change_exit: 3577 /* 3578 * Mark the pHCI as stable again. 3579 */ 3580 MDI_PHCI_LOCK(ph); 3581 MDI_PHCI_STABLE(ph); 3582 MDI_PHCI_UNLOCK(ph); 3583 return (rv); 3584 } 3585 3586 /* 3587 * mdi_pi_online(): 3588 * Place the path_info node in the online state. The path is 3589 * now available to be selected by mdi_select_path() for 3590 * transporting I/O requests to client devices. 3591 * Return Values: 3592 * MDI_SUCCESS 3593 * MDI_FAILURE 3594 */ 3595 int 3596 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3597 { 3598 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3599 int client_held = 0; 3600 int rv; 3601 int se_flag; 3602 int kmem_flag; 3603 3604 ASSERT(ct != NULL); 3605 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3606 if (rv != MDI_SUCCESS) 3607 return (rv); 3608 3609 MDI_PI_LOCK(pip); 3610 if (MDI_PI(pip)->pi_pm_held == 0) { 3611 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3612 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3613 i_mdi_pm_hold_pip(pip); 3614 client_held = 1; 3615 } 3616 MDI_PI_UNLOCK(pip); 3617 3618 if (client_held) { 3619 MDI_CLIENT_LOCK(ct); 3620 if (ct->ct_power_cnt == 0) { 3621 rv = i_mdi_power_all_phci(ct); 3622 } 3623 3624 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3625 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3626 i_mdi_pm_hold_client(ct, 1); 3627 MDI_CLIENT_UNLOCK(ct); 3628 } 3629 3630 /* determine interrupt context */ 3631 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3632 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3633 3634 /* A new path is online. Invalidate DINFOCACHE snap shot. */ 3635 i_ddi_di_cache_invalidate(kmem_flag); 3636 3637 return (rv); 3638 } 3639 3640 /* 3641 * mdi_pi_standby(): 3642 * Place the mdi_pathinfo node in standby state 3643 * 3644 * Return Values: 3645 * MDI_SUCCESS 3646 * MDI_FAILURE 3647 */ 3648 int 3649 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3650 { 3651 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3652 } 3653 3654 /* 3655 * mdi_pi_fault(): 3656 * Place the mdi_pathinfo node in fault'ed state 3657 * Return Values: 3658 * MDI_SUCCESS 3659 * MDI_FAILURE 3660 */ 3661 int 3662 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3663 { 3664 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3665 } 3666 3667 /* 3668 * mdi_pi_offline(): 3669 * Offline a mdi_pathinfo node. 3670 * Return Values: 3671 * MDI_SUCCESS 3672 * MDI_FAILURE 3673 */ 3674 int 3675 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3676 { 3677 int ret, client_held = 0; 3678 mdi_client_t *ct; 3679 int se_flag; 3680 int kmem_flag; 3681 3682 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3683 3684 if (ret == MDI_SUCCESS) { 3685 MDI_PI_LOCK(pip); 3686 if (MDI_PI(pip)->pi_pm_held) { 3687 client_held = 1; 3688 } 3689 MDI_PI_UNLOCK(pip); 3690 3691 if (client_held) { 3692 ct = MDI_PI(pip)->pi_client; 3693 MDI_CLIENT_LOCK(ct); 3694 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3695 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3696 i_mdi_pm_rele_client(ct, 1); 3697 MDI_CLIENT_UNLOCK(ct); 3698 } 3699 3700 /* determine interrupt context */ 3701 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3702 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3703 3704 /* pathinfo is offlined. update DINFOCACHE. */ 3705 i_ddi_di_cache_invalidate(kmem_flag); 3706 } 3707 3708 return (ret); 3709 } 3710 3711 /* 3712 * i_mdi_pi_offline(): 3713 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3714 */ 3715 static int 3716 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3717 { 3718 dev_info_t *vdip = NULL; 3719 mdi_vhci_t *vh = NULL; 3720 mdi_client_t *ct = NULL; 3721 int (*f)(); 3722 int rv; 3723 3724 MDI_PI_LOCK(pip); 3725 ct = MDI_PI(pip)->pi_client; 3726 ASSERT(ct != NULL); 3727 3728 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3729 /* 3730 * Give a chance for pending I/Os to complete. 3731 */ 3732 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3733 "%d cmds still pending on path: %p\n", 3734 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3735 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3736 &MDI_PI(pip)->pi_mutex, 3737 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3738 /* 3739 * The timeout time reached without ref_cnt being zero 3740 * being signaled. 3741 */ 3742 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3743 "Timeout reached on path %p without the cond\n", 3744 (void *)pip)); 3745 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3746 "%d cmds still pending on path: %p\n", 3747 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3748 } 3749 } 3750 vh = ct->ct_vhci; 3751 vdip = vh->vh_dip; 3752 3753 /* 3754 * Notify vHCI that has registered this event 3755 */ 3756 ASSERT(vh->vh_ops); 3757 f = vh->vh_ops->vo_pi_state_change; 3758 3759 if (f != NULL) { 3760 MDI_PI_UNLOCK(pip); 3761 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3762 flags)) != MDI_SUCCESS) { 3763 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3764 "!vo_path_offline failed " 3765 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3766 } 3767 MDI_PI_LOCK(pip); 3768 } 3769 3770 /* 3771 * Set the mdi_pathinfo node state and clear the transient condition 3772 */ 3773 MDI_PI_SET_OFFLINE(pip); 3774 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3775 MDI_PI_UNLOCK(pip); 3776 3777 MDI_CLIENT_LOCK(ct); 3778 if (rv == MDI_SUCCESS) { 3779 if (ct->ct_unstable == 0) { 3780 dev_info_t *cdip = ct->ct_dip; 3781 3782 /* 3783 * Onlining the mdi_pathinfo node will impact the 3784 * client state Update the client and dev_info node 3785 * state accordingly 3786 */ 3787 i_mdi_client_update_state(ct); 3788 rv = NDI_SUCCESS; 3789 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3790 if (cdip && 3791 (i_ddi_node_state(cdip) >= 3792 DS_INITIALIZED)) { 3793 MDI_CLIENT_UNLOCK(ct); 3794 rv = ndi_devi_offline(cdip, 0); 3795 MDI_CLIENT_LOCK(ct); 3796 if (rv != NDI_SUCCESS) { 3797 /* 3798 * ndi_devi_offline failed. 3799 * Reset client flags to 3800 * online. 3801 */ 3802 MDI_DEBUG(4, (CE_WARN, cdip, 3803 "!ndi_devi_offline: failed " 3804 " Error: %x", rv)); 3805 MDI_CLIENT_SET_ONLINE(ct); 3806 } 3807 } 3808 } 3809 /* 3810 * Convert to MDI error code 3811 */ 3812 switch (rv) { 3813 case NDI_SUCCESS: 3814 rv = MDI_SUCCESS; 3815 break; 3816 case NDI_BUSY: 3817 rv = MDI_BUSY; 3818 break; 3819 default: 3820 rv = MDI_FAILURE; 3821 break; 3822 } 3823 } 3824 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3825 i_mdi_report_path_state(ct, pip); 3826 } 3827 3828 MDI_CLIENT_UNLOCK(ct); 3829 3830 /* 3831 * Change in the mdi_pathinfo node state will impact the client state 3832 */ 3833 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3834 (void *)ct, (void *)pip)); 3835 return (rv); 3836 } 3837 3838 3839 /* 3840 * mdi_pi_get_addr(): 3841 * Get the unit address associated with a mdi_pathinfo node 3842 * 3843 * Return Values: 3844 * char * 3845 */ 3846 char * 3847 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3848 { 3849 if (pip == NULL) 3850 return (NULL); 3851 3852 return (MDI_PI(pip)->pi_addr); 3853 } 3854 3855 /* 3856 * mdi_pi_get_path_instance(): 3857 * Get the 'path_instance' of a mdi_pathinfo node 3858 * 3859 * Return Values: 3860 * path_instance 3861 */ 3862 int 3863 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3864 { 3865 if (pip == NULL) 3866 return (0); 3867 3868 return (MDI_PI(pip)->pi_path_instance); 3869 } 3870 3871 /* 3872 * mdi_pi_pathname(): 3873 * Return pointer to path to pathinfo node. 3874 */ 3875 char * 3876 mdi_pi_pathname(mdi_pathinfo_t *pip) 3877 { 3878 if (pip == NULL) 3879 return (NULL); 3880 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3881 } 3882 3883 char * 3884 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 3885 { 3886 char *obp_path = NULL; 3887 if ((pip == NULL) || (path == NULL)) 3888 return (NULL); 3889 3890 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 3891 (void) strcpy(path, obp_path); 3892 (void) mdi_prop_free(obp_path); 3893 } else { 3894 path = NULL; 3895 } 3896 return (path); 3897 } 3898 3899 int 3900 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 3901 { 3902 dev_info_t *pdip; 3903 char obp_path[MAXPATHLEN]; 3904 3905 if (pip == NULL) 3906 return (MDI_FAILURE); 3907 bzero(obp_path, sizeof (obp_path)); 3908 3909 pdip = mdi_pi_get_phci(pip); 3910 if (pdip == NULL) 3911 return (MDI_FAILURE); 3912 3913 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 3914 (void) ddi_pathname(pdip, obp_path); 3915 } 3916 3917 if (component) { 3918 (void) strncat(obp_path, "/", sizeof (obp_path)); 3919 (void) strncat(obp_path, component, sizeof (obp_path)); 3920 } 3921 3922 return (mdi_prop_update_string(pip, "obp-path", obp_path)); 3923 } 3924 3925 /* 3926 * mdi_pi_get_client(): 3927 * Get the client devinfo associated with a mdi_pathinfo node 3928 * 3929 * Return Values: 3930 * Handle to client device dev_info node 3931 */ 3932 dev_info_t * 3933 mdi_pi_get_client(mdi_pathinfo_t *pip) 3934 { 3935 dev_info_t *dip = NULL; 3936 if (pip) { 3937 dip = MDI_PI(pip)->pi_client->ct_dip; 3938 } 3939 return (dip); 3940 } 3941 3942 /* 3943 * mdi_pi_get_phci(): 3944 * Get the pHCI devinfo associated with the mdi_pathinfo node 3945 * Return Values: 3946 * Handle to dev_info node 3947 */ 3948 dev_info_t * 3949 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3950 { 3951 dev_info_t *dip = NULL; 3952 if (pip) { 3953 dip = MDI_PI(pip)->pi_phci->ph_dip; 3954 } 3955 return (dip); 3956 } 3957 3958 /* 3959 * mdi_pi_get_client_private(): 3960 * Get the client private information associated with the 3961 * mdi_pathinfo node 3962 */ 3963 void * 3964 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3965 { 3966 void *cprivate = NULL; 3967 if (pip) { 3968 cprivate = MDI_PI(pip)->pi_cprivate; 3969 } 3970 return (cprivate); 3971 } 3972 3973 /* 3974 * mdi_pi_set_client_private(): 3975 * Set the client private information in the mdi_pathinfo node 3976 */ 3977 void 3978 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3979 { 3980 if (pip) { 3981 MDI_PI(pip)->pi_cprivate = priv; 3982 } 3983 } 3984 3985 /* 3986 * mdi_pi_get_phci_private(): 3987 * Get the pHCI private information associated with the 3988 * mdi_pathinfo node 3989 */ 3990 caddr_t 3991 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3992 { 3993 caddr_t pprivate = NULL; 3994 if (pip) { 3995 pprivate = MDI_PI(pip)->pi_pprivate; 3996 } 3997 return (pprivate); 3998 } 3999 4000 /* 4001 * mdi_pi_set_phci_private(): 4002 * Set the pHCI private information in the mdi_pathinfo node 4003 */ 4004 void 4005 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4006 { 4007 if (pip) { 4008 MDI_PI(pip)->pi_pprivate = priv; 4009 } 4010 } 4011 4012 /* 4013 * mdi_pi_get_state(): 4014 * Get the mdi_pathinfo node state. Transient states are internal 4015 * and not provided to the users 4016 */ 4017 mdi_pathinfo_state_t 4018 mdi_pi_get_state(mdi_pathinfo_t *pip) 4019 { 4020 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4021 4022 if (pip) { 4023 if (MDI_PI_IS_TRANSIENT(pip)) { 4024 /* 4025 * mdi_pathinfo is in state transition. Return the 4026 * last good state. 4027 */ 4028 state = MDI_PI_OLD_STATE(pip); 4029 } else { 4030 state = MDI_PI_STATE(pip); 4031 } 4032 } 4033 return (state); 4034 } 4035 4036 /* 4037 * Note that the following function needs to be the new interface for 4038 * mdi_pi_get_state when mpxio gets integrated to ON. 4039 */ 4040 int 4041 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4042 uint32_t *ext_state) 4043 { 4044 *state = MDI_PATHINFO_STATE_INIT; 4045 4046 if (pip) { 4047 if (MDI_PI_IS_TRANSIENT(pip)) { 4048 /* 4049 * mdi_pathinfo is in state transition. Return the 4050 * last good state. 4051 */ 4052 *state = MDI_PI_OLD_STATE(pip); 4053 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4054 } else { 4055 *state = MDI_PI_STATE(pip); 4056 *ext_state = MDI_PI_EXT_STATE(pip); 4057 } 4058 } 4059 return (MDI_SUCCESS); 4060 } 4061 4062 /* 4063 * mdi_pi_get_preferred: 4064 * Get the preferred path flag 4065 */ 4066 int 4067 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4068 { 4069 if (pip) { 4070 return (MDI_PI(pip)->pi_preferred); 4071 } 4072 return (0); 4073 } 4074 4075 /* 4076 * mdi_pi_set_preferred: 4077 * Set the preferred path flag 4078 */ 4079 void 4080 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4081 { 4082 if (pip) { 4083 MDI_PI(pip)->pi_preferred = preferred; 4084 } 4085 } 4086 4087 /* 4088 * mdi_pi_set_state(): 4089 * Set the mdi_pathinfo node state 4090 */ 4091 void 4092 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4093 { 4094 uint32_t ext_state; 4095 4096 if (pip) { 4097 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4098 MDI_PI(pip)->pi_state = state; 4099 MDI_PI(pip)->pi_state |= ext_state; 4100 } 4101 } 4102 4103 /* 4104 * Property functions: 4105 */ 4106 int 4107 i_map_nvlist_error_to_mdi(int val) 4108 { 4109 int rv; 4110 4111 switch (val) { 4112 case 0: 4113 rv = DDI_PROP_SUCCESS; 4114 break; 4115 case EINVAL: 4116 case ENOTSUP: 4117 rv = DDI_PROP_INVAL_ARG; 4118 break; 4119 case ENOMEM: 4120 rv = DDI_PROP_NO_MEMORY; 4121 break; 4122 default: 4123 rv = DDI_PROP_NOT_FOUND; 4124 break; 4125 } 4126 return (rv); 4127 } 4128 4129 /* 4130 * mdi_pi_get_next_prop(): 4131 * Property walk function. The caller should hold mdi_pi_lock() 4132 * and release by calling mdi_pi_unlock() at the end of walk to 4133 * get a consistent value. 4134 */ 4135 nvpair_t * 4136 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4137 { 4138 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4139 return (NULL); 4140 } 4141 ASSERT(MDI_PI_LOCKED(pip)); 4142 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4143 } 4144 4145 /* 4146 * mdi_prop_remove(): 4147 * Remove the named property from the named list. 4148 */ 4149 int 4150 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4151 { 4152 if (pip == NULL) { 4153 return (DDI_PROP_NOT_FOUND); 4154 } 4155 ASSERT(!MDI_PI_LOCKED(pip)); 4156 MDI_PI_LOCK(pip); 4157 if (MDI_PI(pip)->pi_prop == NULL) { 4158 MDI_PI_UNLOCK(pip); 4159 return (DDI_PROP_NOT_FOUND); 4160 } 4161 if (name) { 4162 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4163 } else { 4164 char nvp_name[MAXNAMELEN]; 4165 nvpair_t *nvp; 4166 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4167 while (nvp) { 4168 nvpair_t *next; 4169 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4170 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4171 nvpair_name(nvp)); 4172 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4173 nvp_name); 4174 nvp = next; 4175 } 4176 } 4177 MDI_PI_UNLOCK(pip); 4178 return (DDI_PROP_SUCCESS); 4179 } 4180 4181 /* 4182 * mdi_prop_size(): 4183 * Get buffer size needed to pack the property data. 4184 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4185 * buffer size. 4186 */ 4187 int 4188 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4189 { 4190 int rv; 4191 size_t bufsize; 4192 4193 *buflenp = 0; 4194 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4195 return (DDI_PROP_NOT_FOUND); 4196 } 4197 ASSERT(MDI_PI_LOCKED(pip)); 4198 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4199 &bufsize, NV_ENCODE_NATIVE); 4200 *buflenp = bufsize; 4201 return (i_map_nvlist_error_to_mdi(rv)); 4202 } 4203 4204 /* 4205 * mdi_prop_pack(): 4206 * pack the property list. The caller should hold the 4207 * mdi_pathinfo_t node to get a consistent data 4208 */ 4209 int 4210 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4211 { 4212 int rv; 4213 size_t bufsize; 4214 4215 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4216 return (DDI_PROP_NOT_FOUND); 4217 } 4218 4219 ASSERT(MDI_PI_LOCKED(pip)); 4220 4221 bufsize = buflen; 4222 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4223 NV_ENCODE_NATIVE, KM_SLEEP); 4224 4225 return (i_map_nvlist_error_to_mdi(rv)); 4226 } 4227 4228 /* 4229 * mdi_prop_update_byte(): 4230 * Create/Update a byte property 4231 */ 4232 int 4233 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4234 { 4235 int rv; 4236 4237 if (pip == NULL) { 4238 return (DDI_PROP_INVAL_ARG); 4239 } 4240 ASSERT(!MDI_PI_LOCKED(pip)); 4241 MDI_PI_LOCK(pip); 4242 if (MDI_PI(pip)->pi_prop == NULL) { 4243 MDI_PI_UNLOCK(pip); 4244 return (DDI_PROP_NOT_FOUND); 4245 } 4246 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4247 MDI_PI_UNLOCK(pip); 4248 return (i_map_nvlist_error_to_mdi(rv)); 4249 } 4250 4251 /* 4252 * mdi_prop_update_byte_array(): 4253 * Create/Update a byte array property 4254 */ 4255 int 4256 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4257 uint_t nelements) 4258 { 4259 int rv; 4260 4261 if (pip == NULL) { 4262 return (DDI_PROP_INVAL_ARG); 4263 } 4264 ASSERT(!MDI_PI_LOCKED(pip)); 4265 MDI_PI_LOCK(pip); 4266 if (MDI_PI(pip)->pi_prop == NULL) { 4267 MDI_PI_UNLOCK(pip); 4268 return (DDI_PROP_NOT_FOUND); 4269 } 4270 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4271 MDI_PI_UNLOCK(pip); 4272 return (i_map_nvlist_error_to_mdi(rv)); 4273 } 4274 4275 /* 4276 * mdi_prop_update_int(): 4277 * Create/Update a 32 bit integer property 4278 */ 4279 int 4280 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4281 { 4282 int rv; 4283 4284 if (pip == NULL) { 4285 return (DDI_PROP_INVAL_ARG); 4286 } 4287 ASSERT(!MDI_PI_LOCKED(pip)); 4288 MDI_PI_LOCK(pip); 4289 if (MDI_PI(pip)->pi_prop == NULL) { 4290 MDI_PI_UNLOCK(pip); 4291 return (DDI_PROP_NOT_FOUND); 4292 } 4293 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4294 MDI_PI_UNLOCK(pip); 4295 return (i_map_nvlist_error_to_mdi(rv)); 4296 } 4297 4298 /* 4299 * mdi_prop_update_int64(): 4300 * Create/Update a 64 bit integer property 4301 */ 4302 int 4303 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4304 { 4305 int rv; 4306 4307 if (pip == NULL) { 4308 return (DDI_PROP_INVAL_ARG); 4309 } 4310 ASSERT(!MDI_PI_LOCKED(pip)); 4311 MDI_PI_LOCK(pip); 4312 if (MDI_PI(pip)->pi_prop == NULL) { 4313 MDI_PI_UNLOCK(pip); 4314 return (DDI_PROP_NOT_FOUND); 4315 } 4316 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4317 MDI_PI_UNLOCK(pip); 4318 return (i_map_nvlist_error_to_mdi(rv)); 4319 } 4320 4321 /* 4322 * mdi_prop_update_int_array(): 4323 * Create/Update a int array property 4324 */ 4325 int 4326 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4327 uint_t nelements) 4328 { 4329 int rv; 4330 4331 if (pip == NULL) { 4332 return (DDI_PROP_INVAL_ARG); 4333 } 4334 ASSERT(!MDI_PI_LOCKED(pip)); 4335 MDI_PI_LOCK(pip); 4336 if (MDI_PI(pip)->pi_prop == NULL) { 4337 MDI_PI_UNLOCK(pip); 4338 return (DDI_PROP_NOT_FOUND); 4339 } 4340 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4341 nelements); 4342 MDI_PI_UNLOCK(pip); 4343 return (i_map_nvlist_error_to_mdi(rv)); 4344 } 4345 4346 /* 4347 * mdi_prop_update_string(): 4348 * Create/Update a string property 4349 */ 4350 int 4351 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4352 { 4353 int rv; 4354 4355 if (pip == NULL) { 4356 return (DDI_PROP_INVAL_ARG); 4357 } 4358 ASSERT(!MDI_PI_LOCKED(pip)); 4359 MDI_PI_LOCK(pip); 4360 if (MDI_PI(pip)->pi_prop == NULL) { 4361 MDI_PI_UNLOCK(pip); 4362 return (DDI_PROP_NOT_FOUND); 4363 } 4364 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4365 MDI_PI_UNLOCK(pip); 4366 return (i_map_nvlist_error_to_mdi(rv)); 4367 } 4368 4369 /* 4370 * mdi_prop_update_string_array(): 4371 * Create/Update a string array property 4372 */ 4373 int 4374 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4375 uint_t nelements) 4376 { 4377 int rv; 4378 4379 if (pip == NULL) { 4380 return (DDI_PROP_INVAL_ARG); 4381 } 4382 ASSERT(!MDI_PI_LOCKED(pip)); 4383 MDI_PI_LOCK(pip); 4384 if (MDI_PI(pip)->pi_prop == NULL) { 4385 MDI_PI_UNLOCK(pip); 4386 return (DDI_PROP_NOT_FOUND); 4387 } 4388 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4389 nelements); 4390 MDI_PI_UNLOCK(pip); 4391 return (i_map_nvlist_error_to_mdi(rv)); 4392 } 4393 4394 /* 4395 * mdi_prop_lookup_byte(): 4396 * Look for byte property identified by name. The data returned 4397 * is the actual property and valid as long as mdi_pathinfo_t node 4398 * is alive. 4399 */ 4400 int 4401 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4402 { 4403 int rv; 4404 4405 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4406 return (DDI_PROP_NOT_FOUND); 4407 } 4408 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4409 return (i_map_nvlist_error_to_mdi(rv)); 4410 } 4411 4412 4413 /* 4414 * mdi_prop_lookup_byte_array(): 4415 * Look for byte array property identified by name. The data 4416 * returned is the actual property and valid as long as 4417 * mdi_pathinfo_t node is alive. 4418 */ 4419 int 4420 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4421 uint_t *nelements) 4422 { 4423 int rv; 4424 4425 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4426 return (DDI_PROP_NOT_FOUND); 4427 } 4428 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4429 nelements); 4430 return (i_map_nvlist_error_to_mdi(rv)); 4431 } 4432 4433 /* 4434 * mdi_prop_lookup_int(): 4435 * Look for int property identified by name. The data returned 4436 * is the actual property and valid as long as mdi_pathinfo_t 4437 * node is alive. 4438 */ 4439 int 4440 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4441 { 4442 int rv; 4443 4444 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4445 return (DDI_PROP_NOT_FOUND); 4446 } 4447 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4448 return (i_map_nvlist_error_to_mdi(rv)); 4449 } 4450 4451 /* 4452 * mdi_prop_lookup_int64(): 4453 * Look for int64 property identified by name. The data returned 4454 * is the actual property and valid as long as mdi_pathinfo_t node 4455 * is alive. 4456 */ 4457 int 4458 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4459 { 4460 int rv; 4461 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4462 return (DDI_PROP_NOT_FOUND); 4463 } 4464 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4465 return (i_map_nvlist_error_to_mdi(rv)); 4466 } 4467 4468 /* 4469 * mdi_prop_lookup_int_array(): 4470 * Look for int array property identified by name. The data 4471 * returned is the actual property and valid as long as 4472 * mdi_pathinfo_t node is alive. 4473 */ 4474 int 4475 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4476 uint_t *nelements) 4477 { 4478 int rv; 4479 4480 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4481 return (DDI_PROP_NOT_FOUND); 4482 } 4483 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4484 (int32_t **)data, nelements); 4485 return (i_map_nvlist_error_to_mdi(rv)); 4486 } 4487 4488 /* 4489 * mdi_prop_lookup_string(): 4490 * Look for string property identified by name. The data 4491 * returned is the actual property and valid as long as 4492 * mdi_pathinfo_t node is alive. 4493 */ 4494 int 4495 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4496 { 4497 int rv; 4498 4499 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4500 return (DDI_PROP_NOT_FOUND); 4501 } 4502 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4503 return (i_map_nvlist_error_to_mdi(rv)); 4504 } 4505 4506 /* 4507 * mdi_prop_lookup_string_array(): 4508 * Look for string array property identified by name. The data 4509 * returned is the actual property and valid as long as 4510 * mdi_pathinfo_t node is alive. 4511 */ 4512 int 4513 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4514 uint_t *nelements) 4515 { 4516 int rv; 4517 4518 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4519 return (DDI_PROP_NOT_FOUND); 4520 } 4521 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4522 nelements); 4523 return (i_map_nvlist_error_to_mdi(rv)); 4524 } 4525 4526 /* 4527 * mdi_prop_free(): 4528 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4529 * functions return the pointer to actual property data and not a 4530 * copy of it. So the data returned is valid as long as 4531 * mdi_pathinfo_t node is valid. 4532 */ 4533 /*ARGSUSED*/ 4534 int 4535 mdi_prop_free(void *data) 4536 { 4537 return (DDI_PROP_SUCCESS); 4538 } 4539 4540 /*ARGSUSED*/ 4541 static void 4542 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4543 { 4544 char *phci_path, *ct_path; 4545 char *ct_status; 4546 char *status; 4547 dev_info_t *dip = ct->ct_dip; 4548 char lb_buf[64]; 4549 4550 ASSERT(MDI_CLIENT_LOCKED(ct)); 4551 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4552 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4553 return; 4554 } 4555 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4556 ct_status = "optimal"; 4557 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4558 ct_status = "degraded"; 4559 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4560 ct_status = "failed"; 4561 } else { 4562 ct_status = "unknown"; 4563 } 4564 4565 if (MDI_PI_IS_OFFLINE(pip)) { 4566 status = "offline"; 4567 } else if (MDI_PI_IS_ONLINE(pip)) { 4568 status = "online"; 4569 } else if (MDI_PI_IS_STANDBY(pip)) { 4570 status = "standby"; 4571 } else if (MDI_PI_IS_FAULT(pip)) { 4572 status = "faulted"; 4573 } else { 4574 status = "unknown"; 4575 } 4576 4577 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4578 (void) snprintf(lb_buf, sizeof (lb_buf), 4579 "%s, region-size: %d", mdi_load_balance_lba, 4580 ct->ct_lb_args->region_size); 4581 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4582 (void) snprintf(lb_buf, sizeof (lb_buf), 4583 "%s", mdi_load_balance_none); 4584 } else { 4585 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4586 mdi_load_balance_rr); 4587 } 4588 4589 if (dip) { 4590 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4591 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4592 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4593 "path %s (%s%d) to target address: %s is %s" 4594 " Load balancing: %s\n", 4595 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4596 ddi_get_instance(dip), ct_status, 4597 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4598 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4599 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4600 MDI_PI(pip)->pi_addr, status, lb_buf); 4601 kmem_free(phci_path, MAXPATHLEN); 4602 kmem_free(ct_path, MAXPATHLEN); 4603 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4604 } 4605 } 4606 4607 #ifdef DEBUG 4608 /* 4609 * i_mdi_log(): 4610 * Utility function for error message management 4611 * 4612 */ 4613 /*PRINTFLIKE3*/ 4614 static void 4615 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4616 { 4617 char name[MAXNAMELEN]; 4618 char buf[MAXNAMELEN]; 4619 char *bp; 4620 va_list ap; 4621 int log_only = 0; 4622 int boot_only = 0; 4623 int console_only = 0; 4624 4625 if (dip) { 4626 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4627 ddi_node_name(dip), ddi_get_instance(dip)); 4628 } else { 4629 name[0] = 0; 4630 } 4631 4632 va_start(ap, fmt); 4633 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4634 va_end(ap); 4635 4636 switch (buf[0]) { 4637 case '!': 4638 bp = &buf[1]; 4639 log_only = 1; 4640 break; 4641 case '?': 4642 bp = &buf[1]; 4643 boot_only = 1; 4644 break; 4645 case '^': 4646 bp = &buf[1]; 4647 console_only = 1; 4648 break; 4649 default: 4650 bp = buf; 4651 break; 4652 } 4653 if (mdi_debug_logonly) { 4654 log_only = 1; 4655 boot_only = 0; 4656 console_only = 0; 4657 } 4658 4659 switch (level) { 4660 case CE_NOTE: 4661 level = CE_CONT; 4662 /* FALLTHROUGH */ 4663 case CE_CONT: 4664 case CE_WARN: 4665 case CE_PANIC: 4666 if (boot_only) { 4667 cmn_err(level, "?mdi: %s%s", name, bp); 4668 } else if (console_only) { 4669 cmn_err(level, "^mdi: %s%s", name, bp); 4670 } else if (log_only) { 4671 cmn_err(level, "!mdi: %s%s", name, bp); 4672 } else { 4673 cmn_err(level, "mdi: %s%s", name, bp); 4674 } 4675 break; 4676 default: 4677 cmn_err(level, "mdi: %s%s", name, bp); 4678 break; 4679 } 4680 } 4681 #endif /* DEBUG */ 4682 4683 void 4684 i_mdi_client_online(dev_info_t *ct_dip) 4685 { 4686 mdi_client_t *ct; 4687 4688 /* 4689 * Client online notification. Mark client state as online 4690 * restore our binding with dev_info node 4691 */ 4692 ct = i_devi_get_client(ct_dip); 4693 ASSERT(ct != NULL); 4694 MDI_CLIENT_LOCK(ct); 4695 MDI_CLIENT_SET_ONLINE(ct); 4696 /* catch for any memory leaks */ 4697 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4698 ct->ct_dip = ct_dip; 4699 4700 if (ct->ct_power_cnt == 0) 4701 (void) i_mdi_power_all_phci(ct); 4702 4703 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4704 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4705 i_mdi_pm_hold_client(ct, 1); 4706 4707 MDI_CLIENT_UNLOCK(ct); 4708 } 4709 4710 void 4711 i_mdi_phci_online(dev_info_t *ph_dip) 4712 { 4713 mdi_phci_t *ph; 4714 4715 /* pHCI online notification. Mark state accordingly */ 4716 ph = i_devi_get_phci(ph_dip); 4717 ASSERT(ph != NULL); 4718 MDI_PHCI_LOCK(ph); 4719 MDI_PHCI_SET_ONLINE(ph); 4720 MDI_PHCI_UNLOCK(ph); 4721 } 4722 4723 /* 4724 * mdi_devi_online(): 4725 * Online notification from NDI framework on pHCI/client 4726 * device online. 4727 * Return Values: 4728 * NDI_SUCCESS 4729 * MDI_FAILURE 4730 */ 4731 /*ARGSUSED*/ 4732 int 4733 mdi_devi_online(dev_info_t *dip, uint_t flags) 4734 { 4735 if (MDI_PHCI(dip)) { 4736 i_mdi_phci_online(dip); 4737 } 4738 4739 if (MDI_CLIENT(dip)) { 4740 i_mdi_client_online(dip); 4741 } 4742 return (NDI_SUCCESS); 4743 } 4744 4745 /* 4746 * mdi_devi_offline(): 4747 * Offline notification from NDI framework on pHCI/Client device 4748 * offline. 4749 * 4750 * Return Values: 4751 * NDI_SUCCESS 4752 * NDI_FAILURE 4753 */ 4754 /*ARGSUSED*/ 4755 int 4756 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4757 { 4758 int rv = NDI_SUCCESS; 4759 4760 if (MDI_CLIENT(dip)) { 4761 rv = i_mdi_client_offline(dip, flags); 4762 if (rv != NDI_SUCCESS) 4763 return (rv); 4764 } 4765 4766 if (MDI_PHCI(dip)) { 4767 rv = i_mdi_phci_offline(dip, flags); 4768 4769 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4770 /* set client back online */ 4771 i_mdi_client_online(dip); 4772 } 4773 } 4774 4775 return (rv); 4776 } 4777 4778 /*ARGSUSED*/ 4779 static int 4780 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4781 { 4782 int rv = NDI_SUCCESS; 4783 mdi_phci_t *ph; 4784 mdi_client_t *ct; 4785 mdi_pathinfo_t *pip; 4786 mdi_pathinfo_t *next; 4787 mdi_pathinfo_t *failed_pip = NULL; 4788 dev_info_t *cdip; 4789 4790 /* 4791 * pHCI component offline notification 4792 * Make sure that this pHCI instance is free to be offlined. 4793 * If it is OK to proceed, Offline and remove all the child 4794 * mdi_pathinfo nodes. This process automatically offlines 4795 * corresponding client devices, for which this pHCI provides 4796 * critical services. 4797 */ 4798 ph = i_devi_get_phci(dip); 4799 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4800 (void *)dip, (void *)ph)); 4801 if (ph == NULL) { 4802 return (rv); 4803 } 4804 4805 MDI_PHCI_LOCK(ph); 4806 4807 if (MDI_PHCI_IS_OFFLINE(ph)) { 4808 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4809 (void *)ph)); 4810 MDI_PHCI_UNLOCK(ph); 4811 return (NDI_SUCCESS); 4812 } 4813 4814 /* 4815 * Check to see if the pHCI can be offlined 4816 */ 4817 if (ph->ph_unstable) { 4818 MDI_DEBUG(1, (CE_WARN, dip, 4819 "!One or more target devices are in transient " 4820 "state. This device can not be removed at " 4821 "this moment. Please try again later.")); 4822 MDI_PHCI_UNLOCK(ph); 4823 return (NDI_BUSY); 4824 } 4825 4826 pip = ph->ph_path_head; 4827 while (pip != NULL) { 4828 MDI_PI_LOCK(pip); 4829 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4830 4831 /* 4832 * The mdi_pathinfo state is OK. Check the client state. 4833 * If failover in progress fail the pHCI from offlining 4834 */ 4835 ct = MDI_PI(pip)->pi_client; 4836 i_mdi_client_lock(ct, pip); 4837 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4838 (ct->ct_unstable)) { 4839 /* 4840 * Failover is in progress, Fail the DR 4841 */ 4842 MDI_DEBUG(1, (CE_WARN, dip, 4843 "!pHCI device (%s%d) is Busy. %s", 4844 ddi_driver_name(dip), ddi_get_instance(dip), 4845 "This device can not be removed at " 4846 "this moment. Please try again later.")); 4847 MDI_PI_UNLOCK(pip); 4848 i_mdi_client_unlock(ct); 4849 MDI_PHCI_UNLOCK(ph); 4850 return (NDI_BUSY); 4851 } 4852 MDI_PI_UNLOCK(pip); 4853 4854 /* 4855 * Check to see of we are removing the last path of this 4856 * client device... 4857 */ 4858 cdip = ct->ct_dip; 4859 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4860 (i_mdi_client_compute_state(ct, ph) == 4861 MDI_CLIENT_STATE_FAILED)) { 4862 i_mdi_client_unlock(ct); 4863 MDI_PHCI_UNLOCK(ph); 4864 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4865 /* 4866 * ndi_devi_offline() failed. 4867 * This pHCI provides the critical path 4868 * to one or more client devices. 4869 * Return busy. 4870 */ 4871 MDI_PHCI_LOCK(ph); 4872 MDI_DEBUG(1, (CE_WARN, dip, 4873 "!pHCI device (%s%d) is Busy. %s", 4874 ddi_driver_name(dip), ddi_get_instance(dip), 4875 "This device can not be removed at " 4876 "this moment. Please try again later.")); 4877 failed_pip = pip; 4878 break; 4879 } else { 4880 MDI_PHCI_LOCK(ph); 4881 pip = next; 4882 } 4883 } else { 4884 i_mdi_client_unlock(ct); 4885 pip = next; 4886 } 4887 } 4888 4889 if (failed_pip) { 4890 pip = ph->ph_path_head; 4891 while (pip != failed_pip) { 4892 MDI_PI_LOCK(pip); 4893 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4894 ct = MDI_PI(pip)->pi_client; 4895 i_mdi_client_lock(ct, pip); 4896 cdip = ct->ct_dip; 4897 switch (MDI_CLIENT_STATE(ct)) { 4898 case MDI_CLIENT_STATE_OPTIMAL: 4899 case MDI_CLIENT_STATE_DEGRADED: 4900 if (cdip) { 4901 MDI_PI_UNLOCK(pip); 4902 i_mdi_client_unlock(ct); 4903 MDI_PHCI_UNLOCK(ph); 4904 (void) ndi_devi_online(cdip, 0); 4905 MDI_PHCI_LOCK(ph); 4906 pip = next; 4907 continue; 4908 } 4909 break; 4910 4911 case MDI_CLIENT_STATE_FAILED: 4912 if (cdip) { 4913 MDI_PI_UNLOCK(pip); 4914 i_mdi_client_unlock(ct); 4915 MDI_PHCI_UNLOCK(ph); 4916 (void) ndi_devi_offline(cdip, 0); 4917 MDI_PHCI_LOCK(ph); 4918 pip = next; 4919 continue; 4920 } 4921 break; 4922 } 4923 MDI_PI_UNLOCK(pip); 4924 i_mdi_client_unlock(ct); 4925 pip = next; 4926 } 4927 MDI_PHCI_UNLOCK(ph); 4928 return (NDI_BUSY); 4929 } 4930 4931 /* 4932 * Mark the pHCI as offline 4933 */ 4934 MDI_PHCI_SET_OFFLINE(ph); 4935 4936 /* 4937 * Mark the child mdi_pathinfo nodes as transient 4938 */ 4939 pip = ph->ph_path_head; 4940 while (pip != NULL) { 4941 MDI_PI_LOCK(pip); 4942 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4943 MDI_PI_SET_OFFLINING(pip); 4944 MDI_PI_UNLOCK(pip); 4945 pip = next; 4946 } 4947 MDI_PHCI_UNLOCK(ph); 4948 /* 4949 * Give a chance for any pending commands to execute 4950 */ 4951 delay(1); 4952 MDI_PHCI_LOCK(ph); 4953 pip = ph->ph_path_head; 4954 while (pip != NULL) { 4955 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4956 (void) i_mdi_pi_offline(pip, flags); 4957 MDI_PI_LOCK(pip); 4958 ct = MDI_PI(pip)->pi_client; 4959 if (!MDI_PI_IS_OFFLINE(pip)) { 4960 MDI_DEBUG(1, (CE_WARN, dip, 4961 "!pHCI device (%s%d) is Busy. %s", 4962 ddi_driver_name(dip), ddi_get_instance(dip), 4963 "This device can not be removed at " 4964 "this moment. Please try again later.")); 4965 MDI_PI_UNLOCK(pip); 4966 MDI_PHCI_SET_ONLINE(ph); 4967 MDI_PHCI_UNLOCK(ph); 4968 return (NDI_BUSY); 4969 } 4970 MDI_PI_UNLOCK(pip); 4971 pip = next; 4972 } 4973 MDI_PHCI_UNLOCK(ph); 4974 4975 return (rv); 4976 } 4977 4978 void 4979 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 4980 { 4981 mdi_phci_t *ph; 4982 mdi_client_t *ct; 4983 mdi_pathinfo_t *pip; 4984 mdi_pathinfo_t *next; 4985 dev_info_t *cdip; 4986 4987 if (!MDI_PHCI(dip)) 4988 return; 4989 4990 ph = i_devi_get_phci(dip); 4991 if (ph == NULL) { 4992 return; 4993 } 4994 4995 MDI_PHCI_LOCK(ph); 4996 4997 if (MDI_PHCI_IS_OFFLINE(ph)) { 4998 /* has no last path */ 4999 MDI_PHCI_UNLOCK(ph); 5000 return; 5001 } 5002 5003 pip = ph->ph_path_head; 5004 while (pip != NULL) { 5005 MDI_PI_LOCK(pip); 5006 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5007 5008 ct = MDI_PI(pip)->pi_client; 5009 i_mdi_client_lock(ct, pip); 5010 MDI_PI_UNLOCK(pip); 5011 5012 cdip = ct->ct_dip; 5013 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5014 (i_mdi_client_compute_state(ct, ph) == 5015 MDI_CLIENT_STATE_FAILED)) { 5016 /* Last path. Mark client dip as retiring */ 5017 i_mdi_client_unlock(ct); 5018 MDI_PHCI_UNLOCK(ph); 5019 (void) e_ddi_mark_retiring(cdip, cons_array); 5020 MDI_PHCI_LOCK(ph); 5021 pip = next; 5022 } else { 5023 i_mdi_client_unlock(ct); 5024 pip = next; 5025 } 5026 } 5027 5028 MDI_PHCI_UNLOCK(ph); 5029 5030 return; 5031 } 5032 5033 void 5034 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5035 { 5036 mdi_phci_t *ph; 5037 mdi_client_t *ct; 5038 mdi_pathinfo_t *pip; 5039 mdi_pathinfo_t *next; 5040 dev_info_t *cdip; 5041 5042 if (!MDI_PHCI(dip)) 5043 return; 5044 5045 ph = i_devi_get_phci(dip); 5046 if (ph == NULL) 5047 return; 5048 5049 MDI_PHCI_LOCK(ph); 5050 5051 if (MDI_PHCI_IS_OFFLINE(ph)) { 5052 MDI_PHCI_UNLOCK(ph); 5053 /* not last path */ 5054 return; 5055 } 5056 5057 if (ph->ph_unstable) { 5058 MDI_PHCI_UNLOCK(ph); 5059 /* can't check for constraints */ 5060 *constraint = 0; 5061 return; 5062 } 5063 5064 pip = ph->ph_path_head; 5065 while (pip != NULL) { 5066 MDI_PI_LOCK(pip); 5067 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5068 5069 /* 5070 * The mdi_pathinfo state is OK. Check the client state. 5071 * If failover in progress fail the pHCI from offlining 5072 */ 5073 ct = MDI_PI(pip)->pi_client; 5074 i_mdi_client_lock(ct, pip); 5075 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5076 (ct->ct_unstable)) { 5077 /* 5078 * Failover is in progress, can't check for constraints 5079 */ 5080 MDI_PI_UNLOCK(pip); 5081 i_mdi_client_unlock(ct); 5082 MDI_PHCI_UNLOCK(ph); 5083 *constraint = 0; 5084 return; 5085 } 5086 MDI_PI_UNLOCK(pip); 5087 5088 /* 5089 * Check to see of we are retiring the last path of this 5090 * client device... 5091 */ 5092 cdip = ct->ct_dip; 5093 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5094 (i_mdi_client_compute_state(ct, ph) == 5095 MDI_CLIENT_STATE_FAILED)) { 5096 i_mdi_client_unlock(ct); 5097 MDI_PHCI_UNLOCK(ph); 5098 (void) e_ddi_retire_notify(cdip, constraint); 5099 MDI_PHCI_LOCK(ph); 5100 pip = next; 5101 } else { 5102 i_mdi_client_unlock(ct); 5103 pip = next; 5104 } 5105 } 5106 5107 MDI_PHCI_UNLOCK(ph); 5108 5109 return; 5110 } 5111 5112 /* 5113 * offline the path(s) hanging off the PHCI. If the 5114 * last path to any client, check that constraints 5115 * have been applied. 5116 */ 5117 void 5118 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5119 { 5120 mdi_phci_t *ph; 5121 mdi_client_t *ct; 5122 mdi_pathinfo_t *pip; 5123 mdi_pathinfo_t *next; 5124 dev_info_t *cdip; 5125 int unstable = 0; 5126 int constraint; 5127 5128 if (!MDI_PHCI(dip)) 5129 return; 5130 5131 ph = i_devi_get_phci(dip); 5132 if (ph == NULL) { 5133 /* no last path and no pips */ 5134 return; 5135 } 5136 5137 MDI_PHCI_LOCK(ph); 5138 5139 if (MDI_PHCI_IS_OFFLINE(ph)) { 5140 MDI_PHCI_UNLOCK(ph); 5141 /* no last path and no pips */ 5142 return; 5143 } 5144 5145 /* 5146 * Check to see if the pHCI can be offlined 5147 */ 5148 if (ph->ph_unstable) { 5149 unstable = 1; 5150 } 5151 5152 pip = ph->ph_path_head; 5153 while (pip != NULL) { 5154 MDI_PI_LOCK(pip); 5155 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5156 5157 /* 5158 * if failover in progress fail the pHCI from offlining 5159 */ 5160 ct = MDI_PI(pip)->pi_client; 5161 i_mdi_client_lock(ct, pip); 5162 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5163 (ct->ct_unstable)) { 5164 unstable = 1; 5165 } 5166 MDI_PI_UNLOCK(pip); 5167 5168 /* 5169 * Check to see of we are removing the last path of this 5170 * client device... 5171 */ 5172 cdip = ct->ct_dip; 5173 if (!phci_only && cdip && 5174 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5175 (i_mdi_client_compute_state(ct, ph) == 5176 MDI_CLIENT_STATE_FAILED)) { 5177 i_mdi_client_unlock(ct); 5178 MDI_PHCI_UNLOCK(ph); 5179 /* 5180 * We don't retire clients we just retire the 5181 * path to a client. If it is the last path 5182 * to a client, constraints are checked and 5183 * if we pass the last path is offlined. MPXIO will 5184 * then fail all I/Os to the client. Since we don't 5185 * want to retire the client on a path error 5186 * set constraint = 0 so that the client dip 5187 * is not retired. 5188 */ 5189 constraint = 0; 5190 (void) e_ddi_retire_finalize(cdip, &constraint); 5191 MDI_PHCI_LOCK(ph); 5192 pip = next; 5193 } else { 5194 i_mdi_client_unlock(ct); 5195 pip = next; 5196 } 5197 } 5198 5199 /* 5200 * Cannot offline pip(s) 5201 */ 5202 if (unstable) { 5203 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5204 "retire, dip = %p", (void *)dip); 5205 MDI_PHCI_UNLOCK(ph); 5206 return; 5207 } 5208 5209 /* 5210 * Mark the pHCI as offline 5211 */ 5212 MDI_PHCI_SET_OFFLINE(ph); 5213 5214 /* 5215 * Mark the child mdi_pathinfo nodes as transient 5216 */ 5217 pip = ph->ph_path_head; 5218 while (pip != NULL) { 5219 MDI_PI_LOCK(pip); 5220 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5221 MDI_PI_SET_OFFLINING(pip); 5222 MDI_PI_UNLOCK(pip); 5223 pip = next; 5224 } 5225 MDI_PHCI_UNLOCK(ph); 5226 /* 5227 * Give a chance for any pending commands to execute 5228 */ 5229 delay(1); 5230 MDI_PHCI_LOCK(ph); 5231 pip = ph->ph_path_head; 5232 while (pip != NULL) { 5233 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5234 (void) i_mdi_pi_offline(pip, 0); 5235 MDI_PI_LOCK(pip); 5236 ct = MDI_PI(pip)->pi_client; 5237 if (!MDI_PI_IS_OFFLINE(pip)) { 5238 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5239 "PHCI dip = %p", (void *)dip); 5240 MDI_PI_UNLOCK(pip); 5241 MDI_PHCI_SET_ONLINE(ph); 5242 MDI_PHCI_UNLOCK(ph); 5243 return; 5244 } 5245 MDI_PI_UNLOCK(pip); 5246 pip = next; 5247 } 5248 MDI_PHCI_UNLOCK(ph); 5249 5250 return; 5251 } 5252 5253 void 5254 mdi_phci_unretire(dev_info_t *dip) 5255 { 5256 ASSERT(MDI_PHCI(dip)); 5257 5258 /* 5259 * Online the phci 5260 */ 5261 i_mdi_phci_online(dip); 5262 } 5263 5264 /*ARGSUSED*/ 5265 static int 5266 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5267 { 5268 int rv = NDI_SUCCESS; 5269 mdi_client_t *ct; 5270 5271 /* 5272 * Client component to go offline. Make sure that we are 5273 * not in failing over state and update client state 5274 * accordingly 5275 */ 5276 ct = i_devi_get_client(dip); 5277 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5278 (void *)dip, (void *)ct)); 5279 if (ct != NULL) { 5280 MDI_CLIENT_LOCK(ct); 5281 if (ct->ct_unstable) { 5282 /* 5283 * One or more paths are in transient state, 5284 * Dont allow offline of a client device 5285 */ 5286 MDI_DEBUG(1, (CE_WARN, dip, 5287 "!One or more paths to this device is " 5288 "in transient state. This device can not " 5289 "be removed at this moment. " 5290 "Please try again later.")); 5291 MDI_CLIENT_UNLOCK(ct); 5292 return (NDI_BUSY); 5293 } 5294 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5295 /* 5296 * Failover is in progress, Dont allow DR of 5297 * a client device 5298 */ 5299 MDI_DEBUG(1, (CE_WARN, dip, 5300 "!Client device (%s%d) is Busy. %s", 5301 ddi_driver_name(dip), ddi_get_instance(dip), 5302 "This device can not be removed at " 5303 "this moment. Please try again later.")); 5304 MDI_CLIENT_UNLOCK(ct); 5305 return (NDI_BUSY); 5306 } 5307 MDI_CLIENT_SET_OFFLINE(ct); 5308 5309 /* 5310 * Unbind our relationship with the dev_info node 5311 */ 5312 if (flags & NDI_DEVI_REMOVE) { 5313 ct->ct_dip = NULL; 5314 } 5315 MDI_CLIENT_UNLOCK(ct); 5316 } 5317 return (rv); 5318 } 5319 5320 /* 5321 * mdi_pre_attach(): 5322 * Pre attach() notification handler 5323 */ 5324 /*ARGSUSED*/ 5325 int 5326 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5327 { 5328 /* don't support old DDI_PM_RESUME */ 5329 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5330 (cmd == DDI_PM_RESUME)) 5331 return (DDI_FAILURE); 5332 5333 return (DDI_SUCCESS); 5334 } 5335 5336 /* 5337 * mdi_post_attach(): 5338 * Post attach() notification handler 5339 */ 5340 /*ARGSUSED*/ 5341 void 5342 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5343 { 5344 mdi_phci_t *ph; 5345 mdi_client_t *ct; 5346 mdi_vhci_t *vh; 5347 5348 if (MDI_PHCI(dip)) { 5349 ph = i_devi_get_phci(dip); 5350 ASSERT(ph != NULL); 5351 5352 MDI_PHCI_LOCK(ph); 5353 switch (cmd) { 5354 case DDI_ATTACH: 5355 MDI_DEBUG(2, (CE_NOTE, dip, 5356 "!pHCI post_attach: called %p\n", (void *)ph)); 5357 if (error == DDI_SUCCESS) { 5358 MDI_PHCI_SET_ATTACH(ph); 5359 } else { 5360 MDI_DEBUG(1, (CE_NOTE, dip, 5361 "!pHCI post_attach: failed error=%d\n", 5362 error)); 5363 MDI_PHCI_SET_DETACH(ph); 5364 } 5365 break; 5366 5367 case DDI_RESUME: 5368 MDI_DEBUG(2, (CE_NOTE, dip, 5369 "!pHCI post_resume: called %p\n", (void *)ph)); 5370 if (error == DDI_SUCCESS) { 5371 MDI_PHCI_SET_RESUME(ph); 5372 } else { 5373 MDI_DEBUG(1, (CE_NOTE, dip, 5374 "!pHCI post_resume: failed error=%d\n", 5375 error)); 5376 MDI_PHCI_SET_SUSPEND(ph); 5377 } 5378 break; 5379 } 5380 MDI_PHCI_UNLOCK(ph); 5381 } 5382 5383 if (MDI_CLIENT(dip)) { 5384 ct = i_devi_get_client(dip); 5385 ASSERT(ct != NULL); 5386 5387 MDI_CLIENT_LOCK(ct); 5388 switch (cmd) { 5389 case DDI_ATTACH: 5390 MDI_DEBUG(2, (CE_NOTE, dip, 5391 "!Client post_attach: called %p\n", (void *)ct)); 5392 if (error != DDI_SUCCESS) { 5393 MDI_DEBUG(1, (CE_NOTE, dip, 5394 "!Client post_attach: failed error=%d\n", 5395 error)); 5396 MDI_CLIENT_SET_DETACH(ct); 5397 MDI_DEBUG(4, (CE_WARN, dip, 5398 "mdi_post_attach i_mdi_pm_reset_client\n")); 5399 i_mdi_pm_reset_client(ct); 5400 break; 5401 } 5402 5403 /* 5404 * Client device has successfully attached, inform 5405 * the vhci. 5406 */ 5407 vh = ct->ct_vhci; 5408 if (vh->vh_ops->vo_client_attached) 5409 (*vh->vh_ops->vo_client_attached)(dip); 5410 5411 MDI_CLIENT_SET_ATTACH(ct); 5412 break; 5413 5414 case DDI_RESUME: 5415 MDI_DEBUG(2, (CE_NOTE, dip, 5416 "!Client post_attach: called %p\n", (void *)ct)); 5417 if (error == DDI_SUCCESS) { 5418 MDI_CLIENT_SET_RESUME(ct); 5419 } else { 5420 MDI_DEBUG(1, (CE_NOTE, dip, 5421 "!Client post_resume: failed error=%d\n", 5422 error)); 5423 MDI_CLIENT_SET_SUSPEND(ct); 5424 } 5425 break; 5426 } 5427 MDI_CLIENT_UNLOCK(ct); 5428 } 5429 } 5430 5431 /* 5432 * mdi_pre_detach(): 5433 * Pre detach notification handler 5434 */ 5435 /*ARGSUSED*/ 5436 int 5437 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5438 { 5439 int rv = DDI_SUCCESS; 5440 5441 if (MDI_CLIENT(dip)) { 5442 (void) i_mdi_client_pre_detach(dip, cmd); 5443 } 5444 5445 if (MDI_PHCI(dip)) { 5446 rv = i_mdi_phci_pre_detach(dip, cmd); 5447 } 5448 5449 return (rv); 5450 } 5451 5452 /*ARGSUSED*/ 5453 static int 5454 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5455 { 5456 int rv = DDI_SUCCESS; 5457 mdi_phci_t *ph; 5458 mdi_client_t *ct; 5459 mdi_pathinfo_t *pip; 5460 mdi_pathinfo_t *failed_pip = NULL; 5461 mdi_pathinfo_t *next; 5462 5463 ph = i_devi_get_phci(dip); 5464 if (ph == NULL) { 5465 return (rv); 5466 } 5467 5468 MDI_PHCI_LOCK(ph); 5469 switch (cmd) { 5470 case DDI_DETACH: 5471 MDI_DEBUG(2, (CE_NOTE, dip, 5472 "!pHCI pre_detach: called %p\n", (void *)ph)); 5473 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5474 /* 5475 * mdi_pathinfo nodes are still attached to 5476 * this pHCI. Fail the detach for this pHCI. 5477 */ 5478 MDI_DEBUG(2, (CE_WARN, dip, 5479 "!pHCI pre_detach: " 5480 "mdi_pathinfo nodes are still attached " 5481 "%p\n", (void *)ph)); 5482 rv = DDI_FAILURE; 5483 break; 5484 } 5485 MDI_PHCI_SET_DETACH(ph); 5486 break; 5487 5488 case DDI_SUSPEND: 5489 /* 5490 * pHCI is getting suspended. Since mpxio client 5491 * devices may not be suspended at this point, to avoid 5492 * a potential stack overflow, it is important to suspend 5493 * client devices before pHCI can be suspended. 5494 */ 5495 5496 MDI_DEBUG(2, (CE_NOTE, dip, 5497 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5498 /* 5499 * Suspend all the client devices accessible through this pHCI 5500 */ 5501 pip = ph->ph_path_head; 5502 while (pip != NULL && rv == DDI_SUCCESS) { 5503 dev_info_t *cdip; 5504 MDI_PI_LOCK(pip); 5505 next = 5506 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5507 ct = MDI_PI(pip)->pi_client; 5508 i_mdi_client_lock(ct, pip); 5509 cdip = ct->ct_dip; 5510 MDI_PI_UNLOCK(pip); 5511 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5512 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5513 i_mdi_client_unlock(ct); 5514 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5515 DDI_SUCCESS) { 5516 /* 5517 * Suspend of one of the client 5518 * device has failed. 5519 */ 5520 MDI_DEBUG(1, (CE_WARN, dip, 5521 "!Suspend of device (%s%d) failed.", 5522 ddi_driver_name(cdip), 5523 ddi_get_instance(cdip))); 5524 failed_pip = pip; 5525 break; 5526 } 5527 } else { 5528 i_mdi_client_unlock(ct); 5529 } 5530 pip = next; 5531 } 5532 5533 if (rv == DDI_SUCCESS) { 5534 /* 5535 * Suspend of client devices is complete. Proceed 5536 * with pHCI suspend. 5537 */ 5538 MDI_PHCI_SET_SUSPEND(ph); 5539 } else { 5540 /* 5541 * Revert back all the suspended client device states 5542 * to converse. 5543 */ 5544 pip = ph->ph_path_head; 5545 while (pip != failed_pip) { 5546 dev_info_t *cdip; 5547 MDI_PI_LOCK(pip); 5548 next = 5549 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5550 ct = MDI_PI(pip)->pi_client; 5551 i_mdi_client_lock(ct, pip); 5552 cdip = ct->ct_dip; 5553 MDI_PI_UNLOCK(pip); 5554 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5555 i_mdi_client_unlock(ct); 5556 (void) devi_attach(cdip, DDI_RESUME); 5557 } else { 5558 i_mdi_client_unlock(ct); 5559 } 5560 pip = next; 5561 } 5562 } 5563 break; 5564 5565 default: 5566 rv = DDI_FAILURE; 5567 break; 5568 } 5569 MDI_PHCI_UNLOCK(ph); 5570 return (rv); 5571 } 5572 5573 /*ARGSUSED*/ 5574 static int 5575 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5576 { 5577 int rv = DDI_SUCCESS; 5578 mdi_client_t *ct; 5579 5580 ct = i_devi_get_client(dip); 5581 if (ct == NULL) { 5582 return (rv); 5583 } 5584 5585 MDI_CLIENT_LOCK(ct); 5586 switch (cmd) { 5587 case DDI_DETACH: 5588 MDI_DEBUG(2, (CE_NOTE, dip, 5589 "!Client pre_detach: called %p\n", (void *)ct)); 5590 MDI_CLIENT_SET_DETACH(ct); 5591 break; 5592 5593 case DDI_SUSPEND: 5594 MDI_DEBUG(2, (CE_NOTE, dip, 5595 "!Client pre_suspend: called %p\n", (void *)ct)); 5596 MDI_CLIENT_SET_SUSPEND(ct); 5597 break; 5598 5599 default: 5600 rv = DDI_FAILURE; 5601 break; 5602 } 5603 MDI_CLIENT_UNLOCK(ct); 5604 return (rv); 5605 } 5606 5607 /* 5608 * mdi_post_detach(): 5609 * Post detach notification handler 5610 */ 5611 /*ARGSUSED*/ 5612 void 5613 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5614 { 5615 /* 5616 * Detach/Suspend of mpxio component failed. Update our state 5617 * too 5618 */ 5619 if (MDI_PHCI(dip)) 5620 i_mdi_phci_post_detach(dip, cmd, error); 5621 5622 if (MDI_CLIENT(dip)) 5623 i_mdi_client_post_detach(dip, cmd, error); 5624 } 5625 5626 /*ARGSUSED*/ 5627 static void 5628 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5629 { 5630 mdi_phci_t *ph; 5631 5632 /* 5633 * Detach/Suspend of phci component failed. Update our state 5634 * too 5635 */ 5636 ph = i_devi_get_phci(dip); 5637 if (ph == NULL) { 5638 return; 5639 } 5640 5641 MDI_PHCI_LOCK(ph); 5642 /* 5643 * Detach of pHCI failed. Restore back converse 5644 * state 5645 */ 5646 switch (cmd) { 5647 case DDI_DETACH: 5648 MDI_DEBUG(2, (CE_NOTE, dip, 5649 "!pHCI post_detach: called %p\n", (void *)ph)); 5650 if (error != DDI_SUCCESS) 5651 MDI_PHCI_SET_ATTACH(ph); 5652 break; 5653 5654 case DDI_SUSPEND: 5655 MDI_DEBUG(2, (CE_NOTE, dip, 5656 "!pHCI post_suspend: called %p\n", (void *)ph)); 5657 if (error != DDI_SUCCESS) 5658 MDI_PHCI_SET_RESUME(ph); 5659 break; 5660 } 5661 MDI_PHCI_UNLOCK(ph); 5662 } 5663 5664 /*ARGSUSED*/ 5665 static void 5666 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5667 { 5668 mdi_client_t *ct; 5669 5670 ct = i_devi_get_client(dip); 5671 if (ct == NULL) { 5672 return; 5673 } 5674 MDI_CLIENT_LOCK(ct); 5675 /* 5676 * Detach of Client failed. Restore back converse 5677 * state 5678 */ 5679 switch (cmd) { 5680 case DDI_DETACH: 5681 MDI_DEBUG(2, (CE_NOTE, dip, 5682 "!Client post_detach: called %p\n", (void *)ct)); 5683 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5684 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5685 "i_mdi_pm_rele_client\n")); 5686 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5687 } else { 5688 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5689 "i_mdi_pm_reset_client\n")); 5690 i_mdi_pm_reset_client(ct); 5691 } 5692 if (error != DDI_SUCCESS) 5693 MDI_CLIENT_SET_ATTACH(ct); 5694 break; 5695 5696 case DDI_SUSPEND: 5697 MDI_DEBUG(2, (CE_NOTE, dip, 5698 "!Client post_suspend: called %p\n", (void *)ct)); 5699 if (error != DDI_SUCCESS) 5700 MDI_CLIENT_SET_RESUME(ct); 5701 break; 5702 } 5703 MDI_CLIENT_UNLOCK(ct); 5704 } 5705 5706 int 5707 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5708 { 5709 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5710 } 5711 5712 /* 5713 * create and install per-path (client - pHCI) statistics 5714 * I/O stats supported: nread, nwritten, reads, and writes 5715 * Error stats - hard errors, soft errors, & transport errors 5716 */ 5717 int 5718 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5719 { 5720 kstat_t *kiosp, *kerrsp; 5721 struct pi_errs *nsp; 5722 struct mdi_pi_kstats *mdi_statp; 5723 5724 if (MDI_PI(pip)->pi_kstats != NULL) 5725 return (MDI_SUCCESS); 5726 5727 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5728 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5729 return (MDI_FAILURE); 5730 } 5731 5732 (void) strcat(ksname, ",err"); 5733 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5734 KSTAT_TYPE_NAMED, 5735 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5736 if (kerrsp == NULL) { 5737 kstat_delete(kiosp); 5738 return (MDI_FAILURE); 5739 } 5740 5741 nsp = (struct pi_errs *)kerrsp->ks_data; 5742 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5743 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5744 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5745 KSTAT_DATA_UINT32); 5746 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5747 KSTAT_DATA_UINT32); 5748 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5749 KSTAT_DATA_UINT32); 5750 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5751 KSTAT_DATA_UINT32); 5752 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5753 KSTAT_DATA_UINT32); 5754 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5755 KSTAT_DATA_UINT32); 5756 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5757 KSTAT_DATA_UINT32); 5758 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5759 5760 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5761 mdi_statp->pi_kstat_ref = 1; 5762 mdi_statp->pi_kstat_iostats = kiosp; 5763 mdi_statp->pi_kstat_errstats = kerrsp; 5764 kstat_install(kiosp); 5765 kstat_install(kerrsp); 5766 MDI_PI(pip)->pi_kstats = mdi_statp; 5767 return (MDI_SUCCESS); 5768 } 5769 5770 /* 5771 * destroy per-path properties 5772 */ 5773 static void 5774 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5775 { 5776 5777 struct mdi_pi_kstats *mdi_statp; 5778 5779 if (MDI_PI(pip)->pi_kstats == NULL) 5780 return; 5781 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5782 return; 5783 5784 MDI_PI(pip)->pi_kstats = NULL; 5785 5786 /* 5787 * the kstat may be shared between multiple pathinfo nodes 5788 * decrement this pathinfo's usage, removing the kstats 5789 * themselves when the last pathinfo reference is removed. 5790 */ 5791 ASSERT(mdi_statp->pi_kstat_ref > 0); 5792 if (--mdi_statp->pi_kstat_ref != 0) 5793 return; 5794 5795 kstat_delete(mdi_statp->pi_kstat_iostats); 5796 kstat_delete(mdi_statp->pi_kstat_errstats); 5797 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5798 } 5799 5800 /* 5801 * update I/O paths KSTATS 5802 */ 5803 void 5804 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5805 { 5806 kstat_t *iostatp; 5807 size_t xfer_cnt; 5808 5809 ASSERT(pip != NULL); 5810 5811 /* 5812 * I/O can be driven across a path prior to having path 5813 * statistics available, i.e. probe(9e). 5814 */ 5815 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5816 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5817 xfer_cnt = bp->b_bcount - bp->b_resid; 5818 if (bp->b_flags & B_READ) { 5819 KSTAT_IO_PTR(iostatp)->reads++; 5820 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5821 } else { 5822 KSTAT_IO_PTR(iostatp)->writes++; 5823 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5824 } 5825 } 5826 } 5827 5828 /* 5829 * Enable the path(specific client/target/initiator) 5830 * Enabling a path means that MPxIO may select the enabled path for routing 5831 * future I/O requests, subject to other path state constraints. 5832 */ 5833 int 5834 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5835 { 5836 mdi_phci_t *ph; 5837 5838 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5839 if (ph == NULL) { 5840 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5841 " failed. pip: %p ph = NULL\n", (void *)pip)); 5842 return (MDI_FAILURE); 5843 } 5844 5845 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5846 MDI_ENABLE_OP); 5847 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5848 " Returning success pip = %p. ph = %p\n", 5849 (void *)pip, (void *)ph)); 5850 return (MDI_SUCCESS); 5851 5852 } 5853 5854 /* 5855 * Disable the path (specific client/target/initiator) 5856 * Disabling a path means that MPxIO will not select the disabled path for 5857 * routing any new I/O requests. 5858 */ 5859 int 5860 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5861 { 5862 mdi_phci_t *ph; 5863 5864 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5865 if (ph == NULL) { 5866 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5867 " failed. pip: %p ph = NULL\n", (void *)pip)); 5868 return (MDI_FAILURE); 5869 } 5870 5871 (void) i_mdi_enable_disable_path(pip, 5872 ph->ph_vhci, flags, MDI_DISABLE_OP); 5873 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5874 "Returning success pip = %p. ph = %p", 5875 (void *)pip, (void *)ph)); 5876 return (MDI_SUCCESS); 5877 } 5878 5879 /* 5880 * disable the path to a particular pHCI (pHCI specified in the phci_path 5881 * argument) for a particular client (specified in the client_path argument). 5882 * Disabling a path means that MPxIO will not select the disabled path for 5883 * routing any new I/O requests. 5884 * NOTE: this will be removed once the NWS files are changed to use the new 5885 * mdi_{enable,disable}_path interfaces 5886 */ 5887 int 5888 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5889 { 5890 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5891 } 5892 5893 /* 5894 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5895 * argument) for a particular client (specified in the client_path argument). 5896 * Enabling a path means that MPxIO may select the enabled path for routing 5897 * future I/O requests, subject to other path state constraints. 5898 * NOTE: this will be removed once the NWS files are changed to use the new 5899 * mdi_{enable,disable}_path interfaces 5900 */ 5901 5902 int 5903 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5904 { 5905 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5906 } 5907 5908 /* 5909 * Common routine for doing enable/disable. 5910 */ 5911 static mdi_pathinfo_t * 5912 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5913 int op) 5914 { 5915 int sync_flag = 0; 5916 int rv; 5917 mdi_pathinfo_t *next; 5918 int (*f)() = NULL; 5919 5920 f = vh->vh_ops->vo_pi_state_change; 5921 5922 sync_flag = (flags << 8) & 0xf00; 5923 5924 /* 5925 * Do a callback into the mdi consumer to let it 5926 * know that path is about to get enabled/disabled. 5927 */ 5928 if (f != NULL) { 5929 rv = (*f)(vh->vh_dip, pip, 0, 5930 MDI_PI_EXT_STATE(pip), 5931 MDI_EXT_STATE_CHANGE | sync_flag | 5932 op | MDI_BEFORE_STATE_CHANGE); 5933 if (rv != MDI_SUCCESS) { 5934 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5935 "!vo_pi_state_change: failed rv = %x", rv)); 5936 } 5937 } 5938 MDI_PI_LOCK(pip); 5939 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5940 5941 switch (flags) { 5942 case USER_DISABLE: 5943 if (op == MDI_DISABLE_OP) { 5944 MDI_PI_SET_USER_DISABLE(pip); 5945 } else { 5946 MDI_PI_SET_USER_ENABLE(pip); 5947 } 5948 break; 5949 case DRIVER_DISABLE: 5950 if (op == MDI_DISABLE_OP) { 5951 MDI_PI_SET_DRV_DISABLE(pip); 5952 } else { 5953 MDI_PI_SET_DRV_ENABLE(pip); 5954 } 5955 break; 5956 case DRIVER_DISABLE_TRANSIENT: 5957 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5958 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5959 } else { 5960 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5961 } 5962 break; 5963 } 5964 MDI_PI_UNLOCK(pip); 5965 /* 5966 * Do a callback into the mdi consumer to let it 5967 * know that path is now enabled/disabled. 5968 */ 5969 if (f != NULL) { 5970 rv = (*f)(vh->vh_dip, pip, 0, 5971 MDI_PI_EXT_STATE(pip), 5972 MDI_EXT_STATE_CHANGE | sync_flag | 5973 op | MDI_AFTER_STATE_CHANGE); 5974 if (rv != MDI_SUCCESS) { 5975 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5976 "!vo_pi_state_change: failed rv = %x", rv)); 5977 } 5978 } 5979 return (next); 5980 } 5981 5982 /* 5983 * Common routine for doing enable/disable. 5984 * NOTE: this will be removed once the NWS files are changed to use the new 5985 * mdi_{enable,disable}_path has been putback 5986 */ 5987 int 5988 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5989 { 5990 5991 mdi_phci_t *ph; 5992 mdi_vhci_t *vh = NULL; 5993 mdi_client_t *ct; 5994 mdi_pathinfo_t *next, *pip; 5995 int found_it; 5996 5997 ph = i_devi_get_phci(pdip); 5998 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5999 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 6000 (void *)cdip)); 6001 if (ph == NULL) { 6002 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 6003 "Op %d failed. ph = NULL\n", op)); 6004 return (MDI_FAILURE); 6005 } 6006 6007 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6008 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6009 "Op Invalid operation = %d\n", op)); 6010 return (MDI_FAILURE); 6011 } 6012 6013 vh = ph->ph_vhci; 6014 6015 if (cdip == NULL) { 6016 /* 6017 * Need to mark the Phci as enabled/disabled. 6018 */ 6019 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6020 "Op %d for the phci\n", op)); 6021 MDI_PHCI_LOCK(ph); 6022 switch (flags) { 6023 case USER_DISABLE: 6024 if (op == MDI_DISABLE_OP) { 6025 MDI_PHCI_SET_USER_DISABLE(ph); 6026 } else { 6027 MDI_PHCI_SET_USER_ENABLE(ph); 6028 } 6029 break; 6030 case DRIVER_DISABLE: 6031 if (op == MDI_DISABLE_OP) { 6032 MDI_PHCI_SET_DRV_DISABLE(ph); 6033 } else { 6034 MDI_PHCI_SET_DRV_ENABLE(ph); 6035 } 6036 break; 6037 case DRIVER_DISABLE_TRANSIENT: 6038 if (op == MDI_DISABLE_OP) { 6039 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6040 } else { 6041 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6042 } 6043 break; 6044 default: 6045 MDI_PHCI_UNLOCK(ph); 6046 MDI_DEBUG(1, (CE_NOTE, NULL, 6047 "!i_mdi_pi_enable_disable:" 6048 " Invalid flag argument= %d\n", flags)); 6049 } 6050 6051 /* 6052 * Phci has been disabled. Now try to enable/disable 6053 * path info's to each client. 6054 */ 6055 pip = ph->ph_path_head; 6056 while (pip != NULL) { 6057 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6058 } 6059 MDI_PHCI_UNLOCK(ph); 6060 } else { 6061 6062 /* 6063 * Disable a specific client. 6064 */ 6065 ct = i_devi_get_client(cdip); 6066 if (ct == NULL) { 6067 MDI_DEBUG(1, (CE_NOTE, NULL, 6068 "!i_mdi_pi_enable_disable:" 6069 " failed. ct = NULL operation = %d\n", op)); 6070 return (MDI_FAILURE); 6071 } 6072 6073 MDI_CLIENT_LOCK(ct); 6074 pip = ct->ct_path_head; 6075 found_it = 0; 6076 while (pip != NULL) { 6077 MDI_PI_LOCK(pip); 6078 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6079 if (MDI_PI(pip)->pi_phci == ph) { 6080 MDI_PI_UNLOCK(pip); 6081 found_it = 1; 6082 break; 6083 } 6084 MDI_PI_UNLOCK(pip); 6085 pip = next; 6086 } 6087 6088 6089 MDI_CLIENT_UNLOCK(ct); 6090 if (found_it == 0) { 6091 MDI_DEBUG(1, (CE_NOTE, NULL, 6092 "!i_mdi_pi_enable_disable:" 6093 " failed. Could not find corresponding pip\n")); 6094 return (MDI_FAILURE); 6095 } 6096 6097 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6098 } 6099 6100 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6101 "Op %d Returning success pdip = %p cdip = %p\n", 6102 op, (void *)pdip, (void *)cdip)); 6103 return (MDI_SUCCESS); 6104 } 6105 6106 /* 6107 * Ensure phci powered up 6108 */ 6109 static void 6110 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6111 { 6112 dev_info_t *ph_dip; 6113 6114 ASSERT(pip != NULL); 6115 ASSERT(MDI_PI_LOCKED(pip)); 6116 6117 if (MDI_PI(pip)->pi_pm_held) { 6118 return; 6119 } 6120 6121 ph_dip = mdi_pi_get_phci(pip); 6122 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6123 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6124 if (ph_dip == NULL) { 6125 return; 6126 } 6127 6128 MDI_PI_UNLOCK(pip); 6129 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6130 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6131 6132 pm_hold_power(ph_dip); 6133 6134 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6135 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6136 MDI_PI_LOCK(pip); 6137 6138 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6139 if (DEVI(ph_dip)->devi_pm_info) 6140 MDI_PI(pip)->pi_pm_held = 1; 6141 } 6142 6143 /* 6144 * Allow phci powered down 6145 */ 6146 static void 6147 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6148 { 6149 dev_info_t *ph_dip = NULL; 6150 6151 ASSERT(pip != NULL); 6152 ASSERT(MDI_PI_LOCKED(pip)); 6153 6154 if (MDI_PI(pip)->pi_pm_held == 0) { 6155 return; 6156 } 6157 6158 ph_dip = mdi_pi_get_phci(pip); 6159 ASSERT(ph_dip != NULL); 6160 6161 MDI_PI_UNLOCK(pip); 6162 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6163 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6164 6165 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6166 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6167 pm_rele_power(ph_dip); 6168 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6169 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6170 6171 MDI_PI_LOCK(pip); 6172 MDI_PI(pip)->pi_pm_held = 0; 6173 } 6174 6175 static void 6176 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6177 { 6178 ASSERT(MDI_CLIENT_LOCKED(ct)); 6179 6180 ct->ct_power_cnt += incr; 6181 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6182 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6183 ct->ct_power_cnt, incr)); 6184 ASSERT(ct->ct_power_cnt >= 0); 6185 } 6186 6187 static void 6188 i_mdi_rele_all_phci(mdi_client_t *ct) 6189 { 6190 mdi_pathinfo_t *pip; 6191 6192 ASSERT(MDI_CLIENT_LOCKED(ct)); 6193 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6194 while (pip != NULL) { 6195 mdi_hold_path(pip); 6196 MDI_PI_LOCK(pip); 6197 i_mdi_pm_rele_pip(pip); 6198 MDI_PI_UNLOCK(pip); 6199 mdi_rele_path(pip); 6200 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6201 } 6202 } 6203 6204 static void 6205 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6206 { 6207 ASSERT(MDI_CLIENT_LOCKED(ct)); 6208 6209 if (i_ddi_devi_attached(ct->ct_dip)) { 6210 ct->ct_power_cnt -= decr; 6211 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6212 "ct_power_cnt = %d decr = %d\n", 6213 (void *)ct, ct->ct_power_cnt, decr)); 6214 } 6215 6216 ASSERT(ct->ct_power_cnt >= 0); 6217 if (ct->ct_power_cnt == 0) { 6218 i_mdi_rele_all_phci(ct); 6219 return; 6220 } 6221 } 6222 6223 static void 6224 i_mdi_pm_reset_client(mdi_client_t *ct) 6225 { 6226 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6227 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6228 ASSERT(MDI_CLIENT_LOCKED(ct)); 6229 ct->ct_power_cnt = 0; 6230 i_mdi_rele_all_phci(ct); 6231 ct->ct_powercnt_config = 0; 6232 ct->ct_powercnt_unconfig = 0; 6233 ct->ct_powercnt_reset = 1; 6234 } 6235 6236 static int 6237 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6238 { 6239 int ret; 6240 dev_info_t *ph_dip; 6241 6242 MDI_PI_LOCK(pip); 6243 i_mdi_pm_hold_pip(pip); 6244 6245 ph_dip = mdi_pi_get_phci(pip); 6246 MDI_PI_UNLOCK(pip); 6247 6248 /* bring all components of phci to full power */ 6249 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6250 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 6251 ddi_get_instance(ph_dip), (void *)pip)); 6252 6253 ret = pm_powerup(ph_dip); 6254 6255 if (ret == DDI_FAILURE) { 6256 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6257 "pm_powerup FAILED for %s%d %p\n", 6258 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 6259 (void *)pip)); 6260 6261 MDI_PI_LOCK(pip); 6262 i_mdi_pm_rele_pip(pip); 6263 MDI_PI_UNLOCK(pip); 6264 return (MDI_FAILURE); 6265 } 6266 6267 return (MDI_SUCCESS); 6268 } 6269 6270 static int 6271 i_mdi_power_all_phci(mdi_client_t *ct) 6272 { 6273 mdi_pathinfo_t *pip; 6274 int succeeded = 0; 6275 6276 ASSERT(MDI_CLIENT_LOCKED(ct)); 6277 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6278 while (pip != NULL) { 6279 /* 6280 * Don't power if MDI_PATHINFO_STATE_FAULT 6281 * or MDI_PATHINFO_STATE_OFFLINE. 6282 */ 6283 if (MDI_PI_IS_INIT(pip) || 6284 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6285 mdi_hold_path(pip); 6286 MDI_CLIENT_UNLOCK(ct); 6287 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6288 succeeded = 1; 6289 6290 ASSERT(ct == MDI_PI(pip)->pi_client); 6291 MDI_CLIENT_LOCK(ct); 6292 mdi_rele_path(pip); 6293 } 6294 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6295 } 6296 6297 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6298 } 6299 6300 /* 6301 * mdi_bus_power(): 6302 * 1. Place the phci(s) into powered up state so that 6303 * client can do power management 6304 * 2. Ensure phci powered up as client power managing 6305 * Return Values: 6306 * MDI_SUCCESS 6307 * MDI_FAILURE 6308 */ 6309 int 6310 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6311 void *arg, void *result) 6312 { 6313 int ret = MDI_SUCCESS; 6314 pm_bp_child_pwrchg_t *bpc; 6315 mdi_client_t *ct; 6316 dev_info_t *cdip; 6317 pm_bp_has_changed_t *bphc; 6318 6319 /* 6320 * BUS_POWER_NOINVOL not supported 6321 */ 6322 if (op == BUS_POWER_NOINVOL) 6323 return (MDI_FAILURE); 6324 6325 /* 6326 * ignore other OPs. 6327 * return quickly to save cou cycles on the ct processing 6328 */ 6329 switch (op) { 6330 case BUS_POWER_PRE_NOTIFICATION: 6331 case BUS_POWER_POST_NOTIFICATION: 6332 bpc = (pm_bp_child_pwrchg_t *)arg; 6333 cdip = bpc->bpc_dip; 6334 break; 6335 case BUS_POWER_HAS_CHANGED: 6336 bphc = (pm_bp_has_changed_t *)arg; 6337 cdip = bphc->bphc_dip; 6338 break; 6339 default: 6340 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6341 } 6342 6343 ASSERT(MDI_CLIENT(cdip)); 6344 6345 ct = i_devi_get_client(cdip); 6346 if (ct == NULL) 6347 return (MDI_FAILURE); 6348 6349 /* 6350 * wait till the mdi_pathinfo node state change are processed 6351 */ 6352 MDI_CLIENT_LOCK(ct); 6353 switch (op) { 6354 case BUS_POWER_PRE_NOTIFICATION: 6355 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6356 "BUS_POWER_PRE_NOTIFICATION:" 6357 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6358 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6359 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6360 6361 /* serialize power level change per client */ 6362 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6363 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6364 6365 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6366 6367 if (ct->ct_power_cnt == 0) { 6368 ret = i_mdi_power_all_phci(ct); 6369 } 6370 6371 /* 6372 * if new_level > 0: 6373 * - hold phci(s) 6374 * - power up phci(s) if not already 6375 * ignore power down 6376 */ 6377 if (bpc->bpc_nlevel > 0) { 6378 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6379 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6380 "mdi_bus_power i_mdi_pm_hold_client\n")); 6381 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6382 } 6383 } 6384 break; 6385 case BUS_POWER_POST_NOTIFICATION: 6386 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6387 "BUS_POWER_POST_NOTIFICATION:" 6388 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6389 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6390 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6391 *(int *)result)); 6392 6393 if (*(int *)result == DDI_SUCCESS) { 6394 if (bpc->bpc_nlevel > 0) { 6395 MDI_CLIENT_SET_POWER_UP(ct); 6396 } else { 6397 MDI_CLIENT_SET_POWER_DOWN(ct); 6398 } 6399 } 6400 6401 /* release the hold we did in pre-notification */ 6402 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6403 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6404 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6405 "mdi_bus_power i_mdi_pm_rele_client\n")); 6406 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6407 } 6408 6409 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6410 /* another thread might started attaching */ 6411 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6412 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6413 "mdi_bus_power i_mdi_pm_rele_client\n")); 6414 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6415 /* detaching has been taken care in pm_post_unconfig */ 6416 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6417 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6418 "mdi_bus_power i_mdi_pm_reset_client\n")); 6419 i_mdi_pm_reset_client(ct); 6420 } 6421 } 6422 6423 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6424 cv_broadcast(&ct->ct_powerchange_cv); 6425 6426 break; 6427 6428 /* need to do more */ 6429 case BUS_POWER_HAS_CHANGED: 6430 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6431 "BUS_POWER_HAS_CHANGED:" 6432 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6433 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6434 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6435 6436 if (bphc->bphc_nlevel > 0 && 6437 bphc->bphc_nlevel > bphc->bphc_olevel) { 6438 if (ct->ct_power_cnt == 0) { 6439 ret = i_mdi_power_all_phci(ct); 6440 } 6441 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6442 "mdi_bus_power i_mdi_pm_hold_client\n")); 6443 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6444 } 6445 6446 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6447 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6448 "mdi_bus_power i_mdi_pm_rele_client\n")); 6449 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6450 } 6451 break; 6452 } 6453 6454 MDI_CLIENT_UNLOCK(ct); 6455 return (ret); 6456 } 6457 6458 static int 6459 i_mdi_pm_pre_config_one(dev_info_t *child) 6460 { 6461 int ret = MDI_SUCCESS; 6462 mdi_client_t *ct; 6463 6464 ct = i_devi_get_client(child); 6465 if (ct == NULL) 6466 return (MDI_FAILURE); 6467 6468 MDI_CLIENT_LOCK(ct); 6469 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6470 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6471 6472 if (!MDI_CLIENT_IS_FAILED(ct)) { 6473 MDI_CLIENT_UNLOCK(ct); 6474 MDI_DEBUG(4, (CE_NOTE, child, 6475 "i_mdi_pm_pre_config_one already configured\n")); 6476 return (MDI_SUCCESS); 6477 } 6478 6479 if (ct->ct_powercnt_config) { 6480 MDI_CLIENT_UNLOCK(ct); 6481 MDI_DEBUG(4, (CE_NOTE, child, 6482 "i_mdi_pm_pre_config_one ALREADY held\n")); 6483 return (MDI_SUCCESS); 6484 } 6485 6486 if (ct->ct_power_cnt == 0) { 6487 ret = i_mdi_power_all_phci(ct); 6488 } 6489 MDI_DEBUG(4, (CE_NOTE, child, 6490 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6491 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6492 ct->ct_powercnt_config = 1; 6493 ct->ct_powercnt_reset = 0; 6494 MDI_CLIENT_UNLOCK(ct); 6495 return (ret); 6496 } 6497 6498 static int 6499 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6500 { 6501 int ret = MDI_SUCCESS; 6502 dev_info_t *cdip; 6503 int circ; 6504 6505 ASSERT(MDI_VHCI(vdip)); 6506 6507 /* ndi_devi_config_one */ 6508 if (child) { 6509 ASSERT(DEVI_BUSY_OWNED(vdip)); 6510 return (i_mdi_pm_pre_config_one(child)); 6511 } 6512 6513 /* devi_config_common */ 6514 ndi_devi_enter(vdip, &circ); 6515 cdip = ddi_get_child(vdip); 6516 while (cdip) { 6517 dev_info_t *next = ddi_get_next_sibling(cdip); 6518 6519 ret = i_mdi_pm_pre_config_one(cdip); 6520 if (ret != MDI_SUCCESS) 6521 break; 6522 cdip = next; 6523 } 6524 ndi_devi_exit(vdip, circ); 6525 return (ret); 6526 } 6527 6528 static int 6529 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6530 { 6531 int ret = MDI_SUCCESS; 6532 mdi_client_t *ct; 6533 6534 ct = i_devi_get_client(child); 6535 if (ct == NULL) 6536 return (MDI_FAILURE); 6537 6538 MDI_CLIENT_LOCK(ct); 6539 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6540 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6541 6542 if (!i_ddi_devi_attached(ct->ct_dip)) { 6543 MDI_DEBUG(4, (CE_NOTE, child, 6544 "i_mdi_pm_pre_unconfig node detached already\n")); 6545 MDI_CLIENT_UNLOCK(ct); 6546 return (MDI_SUCCESS); 6547 } 6548 6549 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6550 (flags & NDI_AUTODETACH)) { 6551 MDI_DEBUG(4, (CE_NOTE, child, 6552 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6553 MDI_CLIENT_UNLOCK(ct); 6554 return (MDI_FAILURE); 6555 } 6556 6557 if (ct->ct_powercnt_unconfig) { 6558 MDI_DEBUG(4, (CE_NOTE, child, 6559 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6560 MDI_CLIENT_UNLOCK(ct); 6561 *held = 1; 6562 return (MDI_SUCCESS); 6563 } 6564 6565 if (ct->ct_power_cnt == 0) { 6566 ret = i_mdi_power_all_phci(ct); 6567 } 6568 MDI_DEBUG(4, (CE_NOTE, child, 6569 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6570 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6571 ct->ct_powercnt_unconfig = 1; 6572 ct->ct_powercnt_reset = 0; 6573 MDI_CLIENT_UNLOCK(ct); 6574 if (ret == MDI_SUCCESS) 6575 *held = 1; 6576 return (ret); 6577 } 6578 6579 static int 6580 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6581 int flags) 6582 { 6583 int ret = MDI_SUCCESS; 6584 dev_info_t *cdip; 6585 int circ; 6586 6587 ASSERT(MDI_VHCI(vdip)); 6588 *held = 0; 6589 6590 /* ndi_devi_unconfig_one */ 6591 if (child) { 6592 ASSERT(DEVI_BUSY_OWNED(vdip)); 6593 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6594 } 6595 6596 /* devi_unconfig_common */ 6597 ndi_devi_enter(vdip, &circ); 6598 cdip = ddi_get_child(vdip); 6599 while (cdip) { 6600 dev_info_t *next = ddi_get_next_sibling(cdip); 6601 6602 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6603 cdip = next; 6604 } 6605 ndi_devi_exit(vdip, circ); 6606 6607 if (*held) 6608 ret = MDI_SUCCESS; 6609 6610 return (ret); 6611 } 6612 6613 static void 6614 i_mdi_pm_post_config_one(dev_info_t *child) 6615 { 6616 mdi_client_t *ct; 6617 6618 ct = i_devi_get_client(child); 6619 if (ct == NULL) 6620 return; 6621 6622 MDI_CLIENT_LOCK(ct); 6623 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6624 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6625 6626 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6627 MDI_DEBUG(4, (CE_NOTE, child, 6628 "i_mdi_pm_post_config_one NOT configured\n")); 6629 MDI_CLIENT_UNLOCK(ct); 6630 return; 6631 } 6632 6633 /* client has not been updated */ 6634 if (MDI_CLIENT_IS_FAILED(ct)) { 6635 MDI_DEBUG(4, (CE_NOTE, child, 6636 "i_mdi_pm_post_config_one NOT configured\n")); 6637 MDI_CLIENT_UNLOCK(ct); 6638 return; 6639 } 6640 6641 /* another thread might have powered it down or detached it */ 6642 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6643 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6644 (!i_ddi_devi_attached(ct->ct_dip) && 6645 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6646 MDI_DEBUG(4, (CE_NOTE, child, 6647 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6648 i_mdi_pm_reset_client(ct); 6649 } else { 6650 mdi_pathinfo_t *pip, *next; 6651 int valid_path_count = 0; 6652 6653 MDI_DEBUG(4, (CE_NOTE, child, 6654 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6655 pip = ct->ct_path_head; 6656 while (pip != NULL) { 6657 MDI_PI_LOCK(pip); 6658 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6659 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6660 valid_path_count ++; 6661 MDI_PI_UNLOCK(pip); 6662 pip = next; 6663 } 6664 i_mdi_pm_rele_client(ct, valid_path_count); 6665 } 6666 ct->ct_powercnt_config = 0; 6667 MDI_CLIENT_UNLOCK(ct); 6668 } 6669 6670 static void 6671 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6672 { 6673 int circ; 6674 dev_info_t *cdip; 6675 6676 ASSERT(MDI_VHCI(vdip)); 6677 6678 /* ndi_devi_config_one */ 6679 if (child) { 6680 ASSERT(DEVI_BUSY_OWNED(vdip)); 6681 i_mdi_pm_post_config_one(child); 6682 return; 6683 } 6684 6685 /* devi_config_common */ 6686 ndi_devi_enter(vdip, &circ); 6687 cdip = ddi_get_child(vdip); 6688 while (cdip) { 6689 dev_info_t *next = ddi_get_next_sibling(cdip); 6690 6691 i_mdi_pm_post_config_one(cdip); 6692 cdip = next; 6693 } 6694 ndi_devi_exit(vdip, circ); 6695 } 6696 6697 static void 6698 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6699 { 6700 mdi_client_t *ct; 6701 6702 ct = i_devi_get_client(child); 6703 if (ct == NULL) 6704 return; 6705 6706 MDI_CLIENT_LOCK(ct); 6707 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6708 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6709 6710 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6711 MDI_DEBUG(4, (CE_NOTE, child, 6712 "i_mdi_pm_post_unconfig NOT held\n")); 6713 MDI_CLIENT_UNLOCK(ct); 6714 return; 6715 } 6716 6717 /* failure detaching or another thread just attached it */ 6718 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6719 i_ddi_devi_attached(ct->ct_dip)) || 6720 (!i_ddi_devi_attached(ct->ct_dip) && 6721 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6722 MDI_DEBUG(4, (CE_NOTE, child, 6723 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6724 i_mdi_pm_reset_client(ct); 6725 } else { 6726 mdi_pathinfo_t *pip, *next; 6727 int valid_path_count = 0; 6728 6729 MDI_DEBUG(4, (CE_NOTE, child, 6730 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6731 pip = ct->ct_path_head; 6732 while (pip != NULL) { 6733 MDI_PI_LOCK(pip); 6734 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6735 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6736 valid_path_count ++; 6737 MDI_PI_UNLOCK(pip); 6738 pip = next; 6739 } 6740 i_mdi_pm_rele_client(ct, valid_path_count); 6741 ct->ct_powercnt_unconfig = 0; 6742 } 6743 6744 MDI_CLIENT_UNLOCK(ct); 6745 } 6746 6747 static void 6748 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6749 { 6750 int circ; 6751 dev_info_t *cdip; 6752 6753 ASSERT(MDI_VHCI(vdip)); 6754 6755 if (!held) { 6756 MDI_DEBUG(4, (CE_NOTE, vdip, 6757 "i_mdi_pm_post_unconfig held = %d\n", held)); 6758 return; 6759 } 6760 6761 if (child) { 6762 ASSERT(DEVI_BUSY_OWNED(vdip)); 6763 i_mdi_pm_post_unconfig_one(child); 6764 return; 6765 } 6766 6767 ndi_devi_enter(vdip, &circ); 6768 cdip = ddi_get_child(vdip); 6769 while (cdip) { 6770 dev_info_t *next = ddi_get_next_sibling(cdip); 6771 6772 i_mdi_pm_post_unconfig_one(cdip); 6773 cdip = next; 6774 } 6775 ndi_devi_exit(vdip, circ); 6776 } 6777 6778 int 6779 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6780 { 6781 int circ, ret = MDI_SUCCESS; 6782 dev_info_t *client_dip = NULL; 6783 mdi_client_t *ct; 6784 6785 /* 6786 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6787 * Power up pHCI for the named client device. 6788 * Note: Before the client is enumerated under vhci by phci, 6789 * client_dip can be NULL. Then proceed to power up all the 6790 * pHCIs. 6791 */ 6792 if (devnm != NULL) { 6793 ndi_devi_enter(vdip, &circ); 6794 client_dip = ndi_devi_findchild(vdip, devnm); 6795 } 6796 6797 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6798 op, devnm ? devnm : "NULL", (void *)client_dip)); 6799 6800 switch (op) { 6801 case MDI_PM_PRE_CONFIG: 6802 ret = i_mdi_pm_pre_config(vdip, client_dip); 6803 break; 6804 6805 case MDI_PM_PRE_UNCONFIG: 6806 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6807 flags); 6808 break; 6809 6810 case MDI_PM_POST_CONFIG: 6811 i_mdi_pm_post_config(vdip, client_dip); 6812 break; 6813 6814 case MDI_PM_POST_UNCONFIG: 6815 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6816 break; 6817 6818 case MDI_PM_HOLD_POWER: 6819 case MDI_PM_RELE_POWER: 6820 ASSERT(args); 6821 6822 client_dip = (dev_info_t *)args; 6823 ASSERT(MDI_CLIENT(client_dip)); 6824 6825 ct = i_devi_get_client(client_dip); 6826 MDI_CLIENT_LOCK(ct); 6827 6828 if (op == MDI_PM_HOLD_POWER) { 6829 if (ct->ct_power_cnt == 0) { 6830 (void) i_mdi_power_all_phci(ct); 6831 MDI_DEBUG(4, (CE_NOTE, client_dip, 6832 "mdi_power i_mdi_pm_hold_client\n")); 6833 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6834 } 6835 } else { 6836 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6837 MDI_DEBUG(4, (CE_NOTE, client_dip, 6838 "mdi_power i_mdi_pm_rele_client\n")); 6839 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6840 } else { 6841 MDI_DEBUG(4, (CE_NOTE, client_dip, 6842 "mdi_power i_mdi_pm_reset_client\n")); 6843 i_mdi_pm_reset_client(ct); 6844 } 6845 } 6846 6847 MDI_CLIENT_UNLOCK(ct); 6848 break; 6849 6850 default: 6851 break; 6852 } 6853 6854 if (devnm) 6855 ndi_devi_exit(vdip, circ); 6856 6857 return (ret); 6858 } 6859 6860 int 6861 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6862 { 6863 mdi_vhci_t *vhci; 6864 6865 if (!MDI_VHCI(dip)) 6866 return (MDI_FAILURE); 6867 6868 if (mdi_class) { 6869 vhci = DEVI(dip)->devi_mdi_xhci; 6870 ASSERT(vhci); 6871 *mdi_class = vhci->vh_class; 6872 } 6873 6874 return (MDI_SUCCESS); 6875 } 6876 6877 int 6878 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6879 { 6880 mdi_phci_t *phci; 6881 6882 if (!MDI_PHCI(dip)) 6883 return (MDI_FAILURE); 6884 6885 if (mdi_class) { 6886 phci = DEVI(dip)->devi_mdi_xhci; 6887 ASSERT(phci); 6888 *mdi_class = phci->ph_vhci->vh_class; 6889 } 6890 6891 return (MDI_SUCCESS); 6892 } 6893 6894 int 6895 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6896 { 6897 mdi_client_t *client; 6898 6899 if (!MDI_CLIENT(dip)) 6900 return (MDI_FAILURE); 6901 6902 if (mdi_class) { 6903 client = DEVI(dip)->devi_mdi_client; 6904 ASSERT(client); 6905 *mdi_class = client->ct_vhci->vh_class; 6906 } 6907 6908 return (MDI_SUCCESS); 6909 } 6910 6911 void * 6912 mdi_client_get_vhci_private(dev_info_t *dip) 6913 { 6914 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6915 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6916 mdi_client_t *ct; 6917 ct = i_devi_get_client(dip); 6918 return (ct->ct_vprivate); 6919 } 6920 return (NULL); 6921 } 6922 6923 void 6924 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6925 { 6926 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6927 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6928 mdi_client_t *ct; 6929 ct = i_devi_get_client(dip); 6930 ct->ct_vprivate = data; 6931 } 6932 } 6933 /* 6934 * mdi_pi_get_vhci_private(): 6935 * Get the vhci private information associated with the 6936 * mdi_pathinfo node 6937 */ 6938 void * 6939 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6940 { 6941 caddr_t vprivate = NULL; 6942 if (pip) { 6943 vprivate = MDI_PI(pip)->pi_vprivate; 6944 } 6945 return (vprivate); 6946 } 6947 6948 /* 6949 * mdi_pi_set_vhci_private(): 6950 * Set the vhci private information in the mdi_pathinfo node 6951 */ 6952 void 6953 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6954 { 6955 if (pip) { 6956 MDI_PI(pip)->pi_vprivate = priv; 6957 } 6958 } 6959 6960 /* 6961 * mdi_phci_get_vhci_private(): 6962 * Get the vhci private information associated with the 6963 * mdi_phci node 6964 */ 6965 void * 6966 mdi_phci_get_vhci_private(dev_info_t *dip) 6967 { 6968 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6969 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6970 mdi_phci_t *ph; 6971 ph = i_devi_get_phci(dip); 6972 return (ph->ph_vprivate); 6973 } 6974 return (NULL); 6975 } 6976 6977 /* 6978 * mdi_phci_set_vhci_private(): 6979 * Set the vhci private information in the mdi_phci node 6980 */ 6981 void 6982 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6983 { 6984 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6985 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6986 mdi_phci_t *ph; 6987 ph = i_devi_get_phci(dip); 6988 ph->ph_vprivate = priv; 6989 } 6990 } 6991 6992 /* 6993 * List of vhci class names: 6994 * A vhci class name must be in this list only if the corresponding vhci 6995 * driver intends to use the mdi provided bus config implementation 6996 * (i.e., mdi_vhci_bus_config()). 6997 */ 6998 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6999 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7000 7001 /* 7002 * During boot time, the on-disk vhci cache for every vhci class is read 7003 * in the form of an nvlist and stored here. 7004 */ 7005 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7006 7007 /* nvpair names in vhci cache nvlist */ 7008 #define MDI_VHCI_CACHE_VERSION 1 7009 #define MDI_NVPNAME_VERSION "version" 7010 #define MDI_NVPNAME_PHCIS "phcis" 7011 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7012 7013 /* 7014 * Given vhci class name, return its on-disk vhci cache filename. 7015 * Memory for the returned filename which includes the full path is allocated 7016 * by this function. 7017 */ 7018 static char * 7019 vhclass2vhcache_filename(char *vhclass) 7020 { 7021 char *filename; 7022 int len; 7023 static char *fmt = "/etc/devices/mdi_%s_cache"; 7024 7025 /* 7026 * fmt contains the on-disk vhci cache file name format; 7027 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7028 */ 7029 7030 /* the -1 below is to account for "%s" in the format string */ 7031 len = strlen(fmt) + strlen(vhclass) - 1; 7032 filename = kmem_alloc(len, KM_SLEEP); 7033 (void) snprintf(filename, len, fmt, vhclass); 7034 ASSERT(len == (strlen(filename) + 1)); 7035 return (filename); 7036 } 7037 7038 /* 7039 * initialize the vhci cache related data structures and read the on-disk 7040 * vhci cached data into memory. 7041 */ 7042 static void 7043 setup_vhci_cache(mdi_vhci_t *vh) 7044 { 7045 mdi_vhci_config_t *vhc; 7046 mdi_vhci_cache_t *vhcache; 7047 int i; 7048 nvlist_t *nvl = NULL; 7049 7050 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7051 vh->vh_config = vhc; 7052 vhcache = &vhc->vhc_vhcache; 7053 7054 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7055 7056 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7057 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7058 7059 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7060 7061 /* 7062 * Create string hash; same as mod_hash_create_strhash() except that 7063 * we use NULL key destructor. 7064 */ 7065 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7066 mdi_bus_config_cache_hash_size, 7067 mod_hash_null_keydtor, mod_hash_null_valdtor, 7068 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7069 7070 /* 7071 * The on-disk vhci cache is read during booting prior to the 7072 * lights-out period by mdi_read_devices_files(). 7073 */ 7074 for (i = 0; i < N_VHCI_CLASSES; i++) { 7075 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7076 nvl = vhcache_nvl[i]; 7077 vhcache_nvl[i] = NULL; 7078 break; 7079 } 7080 } 7081 7082 /* 7083 * this is to cover the case of some one manually causing unloading 7084 * (or detaching) and reloading (or attaching) of a vhci driver. 7085 */ 7086 if (nvl == NULL && modrootloaded) 7087 nvl = read_on_disk_vhci_cache(vh->vh_class); 7088 7089 if (nvl != NULL) { 7090 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7091 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7092 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7093 else { 7094 cmn_err(CE_WARN, 7095 "%s: data file corrupted, will recreate\n", 7096 vhc->vhc_vhcache_filename); 7097 } 7098 rw_exit(&vhcache->vhcache_lock); 7099 nvlist_free(nvl); 7100 } 7101 7102 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7103 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7104 7105 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7106 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7107 } 7108 7109 /* 7110 * free all vhci cache related resources 7111 */ 7112 static int 7113 destroy_vhci_cache(mdi_vhci_t *vh) 7114 { 7115 mdi_vhci_config_t *vhc = vh->vh_config; 7116 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7117 mdi_vhcache_phci_t *cphci, *cphci_next; 7118 mdi_vhcache_client_t *cct, *cct_next; 7119 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7120 7121 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7122 return (MDI_FAILURE); 7123 7124 kmem_free(vhc->vhc_vhcache_filename, 7125 strlen(vhc->vhc_vhcache_filename) + 1); 7126 7127 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7128 7129 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7130 cphci = cphci_next) { 7131 cphci_next = cphci->cphci_next; 7132 free_vhcache_phci(cphci); 7133 } 7134 7135 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7136 cct_next = cct->cct_next; 7137 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7138 cpi_next = cpi->cpi_next; 7139 free_vhcache_pathinfo(cpi); 7140 } 7141 free_vhcache_client(cct); 7142 } 7143 7144 rw_destroy(&vhcache->vhcache_lock); 7145 7146 mutex_destroy(&vhc->vhc_lock); 7147 cv_destroy(&vhc->vhc_cv); 7148 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7149 return (MDI_SUCCESS); 7150 } 7151 7152 /* 7153 * Stop all vhci cache related async threads and free their resources. 7154 */ 7155 static int 7156 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7157 { 7158 mdi_async_client_config_t *acc, *acc_next; 7159 7160 mutex_enter(&vhc->vhc_lock); 7161 vhc->vhc_flags |= MDI_VHC_EXIT; 7162 ASSERT(vhc->vhc_acc_thrcount >= 0); 7163 cv_broadcast(&vhc->vhc_cv); 7164 7165 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7166 vhc->vhc_acc_thrcount != 0) { 7167 mutex_exit(&vhc->vhc_lock); 7168 delay(1); 7169 mutex_enter(&vhc->vhc_lock); 7170 } 7171 7172 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7173 7174 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7175 acc_next = acc->acc_next; 7176 free_async_client_config(acc); 7177 } 7178 vhc->vhc_acc_list_head = NULL; 7179 vhc->vhc_acc_list_tail = NULL; 7180 vhc->vhc_acc_count = 0; 7181 7182 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7183 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7184 mutex_exit(&vhc->vhc_lock); 7185 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7186 vhcache_dirty(vhc); 7187 return (MDI_FAILURE); 7188 } 7189 } else 7190 mutex_exit(&vhc->vhc_lock); 7191 7192 if (callb_delete(vhc->vhc_cbid) != 0) 7193 return (MDI_FAILURE); 7194 7195 return (MDI_SUCCESS); 7196 } 7197 7198 /* 7199 * Stop vhci cache flush thread 7200 */ 7201 /* ARGSUSED */ 7202 static boolean_t 7203 stop_vhcache_flush_thread(void *arg, int code) 7204 { 7205 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7206 7207 mutex_enter(&vhc->vhc_lock); 7208 vhc->vhc_flags |= MDI_VHC_EXIT; 7209 cv_broadcast(&vhc->vhc_cv); 7210 7211 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7212 mutex_exit(&vhc->vhc_lock); 7213 delay(1); 7214 mutex_enter(&vhc->vhc_lock); 7215 } 7216 7217 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7218 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7219 mutex_exit(&vhc->vhc_lock); 7220 (void) flush_vhcache(vhc, 1); 7221 } else 7222 mutex_exit(&vhc->vhc_lock); 7223 7224 return (B_TRUE); 7225 } 7226 7227 /* 7228 * Enqueue the vhcache phci (cphci) at the tail of the list 7229 */ 7230 static void 7231 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7232 { 7233 cphci->cphci_next = NULL; 7234 if (vhcache->vhcache_phci_head == NULL) 7235 vhcache->vhcache_phci_head = cphci; 7236 else 7237 vhcache->vhcache_phci_tail->cphci_next = cphci; 7238 vhcache->vhcache_phci_tail = cphci; 7239 } 7240 7241 /* 7242 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7243 */ 7244 static void 7245 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7246 mdi_vhcache_pathinfo_t *cpi) 7247 { 7248 cpi->cpi_next = NULL; 7249 if (cct->cct_cpi_head == NULL) 7250 cct->cct_cpi_head = cpi; 7251 else 7252 cct->cct_cpi_tail->cpi_next = cpi; 7253 cct->cct_cpi_tail = cpi; 7254 } 7255 7256 /* 7257 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7258 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7259 * flag set come at the beginning of the list. All cpis which have this 7260 * flag set come at the end of the list. 7261 */ 7262 static void 7263 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7264 mdi_vhcache_pathinfo_t *newcpi) 7265 { 7266 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7267 7268 if (cct->cct_cpi_head == NULL || 7269 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7270 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7271 else { 7272 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7273 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7274 prev_cpi = cpi, cpi = cpi->cpi_next) 7275 ; 7276 7277 if (prev_cpi == NULL) 7278 cct->cct_cpi_head = newcpi; 7279 else 7280 prev_cpi->cpi_next = newcpi; 7281 7282 newcpi->cpi_next = cpi; 7283 7284 if (cpi == NULL) 7285 cct->cct_cpi_tail = newcpi; 7286 } 7287 } 7288 7289 /* 7290 * Enqueue the vhcache client (cct) at the tail of the list 7291 */ 7292 static void 7293 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7294 mdi_vhcache_client_t *cct) 7295 { 7296 cct->cct_next = NULL; 7297 if (vhcache->vhcache_client_head == NULL) 7298 vhcache->vhcache_client_head = cct; 7299 else 7300 vhcache->vhcache_client_tail->cct_next = cct; 7301 vhcache->vhcache_client_tail = cct; 7302 } 7303 7304 static void 7305 free_string_array(char **str, int nelem) 7306 { 7307 int i; 7308 7309 if (str) { 7310 for (i = 0; i < nelem; i++) { 7311 if (str[i]) 7312 kmem_free(str[i], strlen(str[i]) + 1); 7313 } 7314 kmem_free(str, sizeof (char *) * nelem); 7315 } 7316 } 7317 7318 static void 7319 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7320 { 7321 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7322 kmem_free(cphci, sizeof (*cphci)); 7323 } 7324 7325 static void 7326 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7327 { 7328 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7329 kmem_free(cpi, sizeof (*cpi)); 7330 } 7331 7332 static void 7333 free_vhcache_client(mdi_vhcache_client_t *cct) 7334 { 7335 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7336 kmem_free(cct, sizeof (*cct)); 7337 } 7338 7339 static char * 7340 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7341 { 7342 char *name_addr; 7343 int len; 7344 7345 len = strlen(ct_name) + strlen(ct_addr) + 2; 7346 name_addr = kmem_alloc(len, KM_SLEEP); 7347 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7348 7349 if (ret_len) 7350 *ret_len = len; 7351 return (name_addr); 7352 } 7353 7354 /* 7355 * Copy the contents of paddrnvl to vhci cache. 7356 * paddrnvl nvlist contains path information for a vhci client. 7357 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7358 */ 7359 static void 7360 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7361 mdi_vhcache_client_t *cct) 7362 { 7363 nvpair_t *nvp = NULL; 7364 mdi_vhcache_pathinfo_t *cpi; 7365 uint_t nelem; 7366 uint32_t *val; 7367 7368 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7369 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7370 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7371 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7372 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7373 ASSERT(nelem == 2); 7374 cpi->cpi_cphci = cphci_list[val[0]]; 7375 cpi->cpi_flags = val[1]; 7376 enqueue_tail_vhcache_pathinfo(cct, cpi); 7377 } 7378 } 7379 7380 /* 7381 * Copy the contents of caddrmapnvl to vhci cache. 7382 * caddrmapnvl nvlist contains vhci client address to phci client address 7383 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7384 * this nvlist. 7385 */ 7386 static void 7387 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7388 mdi_vhcache_phci_t *cphci_list[]) 7389 { 7390 nvpair_t *nvp = NULL; 7391 nvlist_t *paddrnvl; 7392 mdi_vhcache_client_t *cct; 7393 7394 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7395 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7396 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7397 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7398 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7399 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7400 /* the client must contain at least one path */ 7401 ASSERT(cct->cct_cpi_head != NULL); 7402 7403 enqueue_vhcache_client(vhcache, cct); 7404 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7405 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7406 } 7407 } 7408 7409 /* 7410 * Copy the contents of the main nvlist to vhci cache. 7411 * 7412 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7413 * The nvlist contains the mappings between the vhci client addresses and 7414 * their corresponding phci client addresses. 7415 * 7416 * The structure of the nvlist is as follows: 7417 * 7418 * Main nvlist: 7419 * NAME TYPE DATA 7420 * version int32 version number 7421 * phcis string array array of phci paths 7422 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7423 * 7424 * structure of c2paddrs_nvl: 7425 * NAME TYPE DATA 7426 * caddr1 nvlist_t paddrs_nvl1 7427 * caddr2 nvlist_t paddrs_nvl2 7428 * ... 7429 * where caddr1, caddr2, ... are vhci client name and addresses in the 7430 * form of "<clientname>@<clientaddress>". 7431 * (for example: "ssd@2000002037cd9f72"); 7432 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7433 * 7434 * structure of paddrs_nvl: 7435 * NAME TYPE DATA 7436 * pi_addr1 uint32_array (phci-id, cpi_flags) 7437 * pi_addr2 uint32_array (phci-id, cpi_flags) 7438 * ... 7439 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7440 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7441 * phci-ids are integers that identify PHCIs to which the 7442 * the bus specific address belongs to. These integers are used as an index 7443 * into to the phcis string array in the main nvlist to get the PHCI path. 7444 */ 7445 static int 7446 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7447 { 7448 char **phcis, **phci_namep; 7449 uint_t nphcis; 7450 mdi_vhcache_phci_t *cphci, **cphci_list; 7451 nvlist_t *caddrmapnvl; 7452 int32_t ver; 7453 int i; 7454 size_t cphci_list_size; 7455 7456 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7457 7458 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7459 ver != MDI_VHCI_CACHE_VERSION) 7460 return (MDI_FAILURE); 7461 7462 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7463 &nphcis) != 0) 7464 return (MDI_SUCCESS); 7465 7466 ASSERT(nphcis > 0); 7467 7468 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7469 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7470 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7471 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7472 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7473 enqueue_vhcache_phci(vhcache, cphci); 7474 cphci_list[i] = cphci; 7475 } 7476 7477 ASSERT(vhcache->vhcache_phci_head != NULL); 7478 7479 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7480 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7481 7482 kmem_free(cphci_list, cphci_list_size); 7483 return (MDI_SUCCESS); 7484 } 7485 7486 /* 7487 * Build paddrnvl for the specified client using the information in the 7488 * vhci cache and add it to the caddrmapnnvl. 7489 * Returns 0 on success, errno on failure. 7490 */ 7491 static int 7492 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7493 nvlist_t *caddrmapnvl) 7494 { 7495 mdi_vhcache_pathinfo_t *cpi; 7496 nvlist_t *nvl; 7497 int err; 7498 uint32_t val[2]; 7499 7500 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7501 7502 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7503 return (err); 7504 7505 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7506 val[0] = cpi->cpi_cphci->cphci_id; 7507 val[1] = cpi->cpi_flags; 7508 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7509 != 0) 7510 goto out; 7511 } 7512 7513 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7514 out: 7515 nvlist_free(nvl); 7516 return (err); 7517 } 7518 7519 /* 7520 * Build caddrmapnvl using the information in the vhci cache 7521 * and add it to the mainnvl. 7522 * Returns 0 on success, errno on failure. 7523 */ 7524 static int 7525 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7526 { 7527 mdi_vhcache_client_t *cct; 7528 nvlist_t *nvl; 7529 int err; 7530 7531 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7532 7533 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7534 return (err); 7535 7536 for (cct = vhcache->vhcache_client_head; cct != NULL; 7537 cct = cct->cct_next) { 7538 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7539 goto out; 7540 } 7541 7542 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7543 out: 7544 nvlist_free(nvl); 7545 return (err); 7546 } 7547 7548 /* 7549 * Build nvlist using the information in the vhci cache. 7550 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7551 * Returns nvl on success, NULL on failure. 7552 */ 7553 static nvlist_t * 7554 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7555 { 7556 mdi_vhcache_phci_t *cphci; 7557 uint_t phci_count; 7558 char **phcis; 7559 nvlist_t *nvl; 7560 int err, i; 7561 7562 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7563 nvl = NULL; 7564 goto out; 7565 } 7566 7567 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7568 MDI_VHCI_CACHE_VERSION)) != 0) 7569 goto out; 7570 7571 rw_enter(&vhcache->vhcache_lock, RW_READER); 7572 if (vhcache->vhcache_phci_head == NULL) { 7573 rw_exit(&vhcache->vhcache_lock); 7574 return (nvl); 7575 } 7576 7577 phci_count = 0; 7578 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7579 cphci = cphci->cphci_next) 7580 cphci->cphci_id = phci_count++; 7581 7582 /* build phci pathname list */ 7583 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7584 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7585 cphci = cphci->cphci_next, i++) 7586 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7587 7588 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7589 phci_count); 7590 free_string_array(phcis, phci_count); 7591 7592 if (err == 0 && 7593 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7594 rw_exit(&vhcache->vhcache_lock); 7595 return (nvl); 7596 } 7597 7598 rw_exit(&vhcache->vhcache_lock); 7599 out: 7600 if (nvl) 7601 nvlist_free(nvl); 7602 return (NULL); 7603 } 7604 7605 /* 7606 * Lookup vhcache phci structure for the specified phci path. 7607 */ 7608 static mdi_vhcache_phci_t * 7609 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7610 { 7611 mdi_vhcache_phci_t *cphci; 7612 7613 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7614 7615 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7616 cphci = cphci->cphci_next) { 7617 if (strcmp(cphci->cphci_path, phci_path) == 0) 7618 return (cphci); 7619 } 7620 7621 return (NULL); 7622 } 7623 7624 /* 7625 * Lookup vhcache phci structure for the specified phci. 7626 */ 7627 static mdi_vhcache_phci_t * 7628 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7629 { 7630 mdi_vhcache_phci_t *cphci; 7631 7632 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7633 7634 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7635 cphci = cphci->cphci_next) { 7636 if (cphci->cphci_phci == ph) 7637 return (cphci); 7638 } 7639 7640 return (NULL); 7641 } 7642 7643 /* 7644 * Add the specified phci to the vhci cache if not already present. 7645 */ 7646 static void 7647 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7648 { 7649 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7650 mdi_vhcache_phci_t *cphci; 7651 char *pathname; 7652 int cache_updated; 7653 7654 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7655 7656 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7657 (void) ddi_pathname(ph->ph_dip, pathname); 7658 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7659 != NULL) { 7660 cphci->cphci_phci = ph; 7661 cache_updated = 0; 7662 } else { 7663 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7664 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7665 cphci->cphci_phci = ph; 7666 enqueue_vhcache_phci(vhcache, cphci); 7667 cache_updated = 1; 7668 } 7669 7670 rw_exit(&vhcache->vhcache_lock); 7671 7672 /* 7673 * Since a new phci has been added, reset 7674 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7675 * during next vhcache_discover_paths(). 7676 */ 7677 mutex_enter(&vhc->vhc_lock); 7678 vhc->vhc_path_discovery_cutoff_time = 0; 7679 mutex_exit(&vhc->vhc_lock); 7680 7681 kmem_free(pathname, MAXPATHLEN); 7682 if (cache_updated) 7683 vhcache_dirty(vhc); 7684 } 7685 7686 /* 7687 * Remove the reference to the specified phci from the vhci cache. 7688 */ 7689 static void 7690 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7691 { 7692 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7693 mdi_vhcache_phci_t *cphci; 7694 7695 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7696 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7697 /* do not remove the actual mdi_vhcache_phci structure */ 7698 cphci->cphci_phci = NULL; 7699 } 7700 rw_exit(&vhcache->vhcache_lock); 7701 } 7702 7703 static void 7704 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7705 mdi_vhcache_lookup_token_t *src) 7706 { 7707 if (src == NULL) { 7708 dst->lt_cct = NULL; 7709 dst->lt_cct_lookup_time = 0; 7710 } else { 7711 dst->lt_cct = src->lt_cct; 7712 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7713 } 7714 } 7715 7716 /* 7717 * Look up vhcache client for the specified client. 7718 */ 7719 static mdi_vhcache_client_t * 7720 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7721 mdi_vhcache_lookup_token_t *token) 7722 { 7723 mod_hash_val_t hv; 7724 char *name_addr; 7725 int len; 7726 7727 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7728 7729 /* 7730 * If no vhcache clean occurred since the last lookup, we can 7731 * simply return the cct from the last lookup operation. 7732 * It works because ccts are never freed except during the vhcache 7733 * cleanup operation. 7734 */ 7735 if (token != NULL && 7736 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7737 return (token->lt_cct); 7738 7739 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7740 if (mod_hash_find(vhcache->vhcache_client_hash, 7741 (mod_hash_key_t)name_addr, &hv) == 0) { 7742 if (token) { 7743 token->lt_cct = (mdi_vhcache_client_t *)hv; 7744 token->lt_cct_lookup_time = lbolt64; 7745 } 7746 } else { 7747 if (token) { 7748 token->lt_cct = NULL; 7749 token->lt_cct_lookup_time = 0; 7750 } 7751 hv = NULL; 7752 } 7753 kmem_free(name_addr, len); 7754 return ((mdi_vhcache_client_t *)hv); 7755 } 7756 7757 /* 7758 * Add the specified path to the vhci cache if not already present. 7759 * Also add the vhcache client for the client corresponding to this path 7760 * if it doesn't already exist. 7761 */ 7762 static void 7763 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7764 { 7765 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7766 mdi_vhcache_client_t *cct; 7767 mdi_vhcache_pathinfo_t *cpi; 7768 mdi_phci_t *ph = pip->pi_phci; 7769 mdi_client_t *ct = pip->pi_client; 7770 int cache_updated = 0; 7771 7772 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7773 7774 /* if vhcache client for this pip doesn't already exist, add it */ 7775 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7776 NULL)) == NULL) { 7777 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7778 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7779 ct->ct_guid, NULL); 7780 enqueue_vhcache_client(vhcache, cct); 7781 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7782 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7783 cache_updated = 1; 7784 } 7785 7786 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7787 if (cpi->cpi_cphci->cphci_phci == ph && 7788 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7789 cpi->cpi_pip = pip; 7790 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7791 cpi->cpi_flags &= 7792 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7793 sort_vhcache_paths(cct); 7794 cache_updated = 1; 7795 } 7796 break; 7797 } 7798 } 7799 7800 if (cpi == NULL) { 7801 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7802 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7803 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7804 ASSERT(cpi->cpi_cphci != NULL); 7805 cpi->cpi_pip = pip; 7806 enqueue_vhcache_pathinfo(cct, cpi); 7807 cache_updated = 1; 7808 } 7809 7810 rw_exit(&vhcache->vhcache_lock); 7811 7812 if (cache_updated) 7813 vhcache_dirty(vhc); 7814 } 7815 7816 /* 7817 * Remove the reference to the specified path from the vhci cache. 7818 */ 7819 static void 7820 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7821 { 7822 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7823 mdi_client_t *ct = pip->pi_client; 7824 mdi_vhcache_client_t *cct; 7825 mdi_vhcache_pathinfo_t *cpi; 7826 7827 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7828 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7829 NULL)) != NULL) { 7830 for (cpi = cct->cct_cpi_head; cpi != NULL; 7831 cpi = cpi->cpi_next) { 7832 if (cpi->cpi_pip == pip) { 7833 cpi->cpi_pip = NULL; 7834 break; 7835 } 7836 } 7837 } 7838 rw_exit(&vhcache->vhcache_lock); 7839 } 7840 7841 /* 7842 * Flush the vhci cache to disk. 7843 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7844 */ 7845 static int 7846 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7847 { 7848 nvlist_t *nvl; 7849 int err; 7850 int rv; 7851 7852 /* 7853 * It is possible that the system may shutdown before 7854 * i_ddi_io_initialized (during stmsboot for example). To allow for 7855 * flushing the cache in this case do not check for 7856 * i_ddi_io_initialized when force flag is set. 7857 */ 7858 if (force_flag == 0 && !i_ddi_io_initialized()) 7859 return (MDI_FAILURE); 7860 7861 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7862 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7863 nvlist_free(nvl); 7864 } else 7865 err = EFAULT; 7866 7867 rv = MDI_SUCCESS; 7868 mutex_enter(&vhc->vhc_lock); 7869 if (err != 0) { 7870 if (err == EROFS) { 7871 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7872 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7873 MDI_VHC_VHCACHE_DIRTY); 7874 } else { 7875 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7876 cmn_err(CE_CONT, "%s: update failed\n", 7877 vhc->vhc_vhcache_filename); 7878 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7879 } 7880 rv = MDI_FAILURE; 7881 } 7882 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7883 cmn_err(CE_CONT, 7884 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7885 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7886 } 7887 mutex_exit(&vhc->vhc_lock); 7888 7889 return (rv); 7890 } 7891 7892 /* 7893 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7894 * Exits itself if left idle for the idle timeout period. 7895 */ 7896 static void 7897 vhcache_flush_thread(void *arg) 7898 { 7899 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7900 clock_t idle_time, quit_at_ticks; 7901 callb_cpr_t cprinfo; 7902 7903 /* number of seconds to sleep idle before exiting */ 7904 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7905 7906 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7907 "mdi_vhcache_flush"); 7908 mutex_enter(&vhc->vhc_lock); 7909 for (; ; ) { 7910 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7911 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7912 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7913 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7914 (void) cv_timedwait(&vhc->vhc_cv, 7915 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7916 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7917 } else { 7918 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7919 mutex_exit(&vhc->vhc_lock); 7920 7921 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7922 vhcache_dirty(vhc); 7923 7924 mutex_enter(&vhc->vhc_lock); 7925 } 7926 } 7927 7928 quit_at_ticks = ddi_get_lbolt() + idle_time; 7929 7930 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7931 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7932 ddi_get_lbolt() < quit_at_ticks) { 7933 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7934 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7935 quit_at_ticks); 7936 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7937 } 7938 7939 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7940 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7941 goto out; 7942 } 7943 7944 out: 7945 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7946 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7947 CALLB_CPR_EXIT(&cprinfo); 7948 } 7949 7950 /* 7951 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7952 */ 7953 static void 7954 vhcache_dirty(mdi_vhci_config_t *vhc) 7955 { 7956 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7957 int create_thread; 7958 7959 rw_enter(&vhcache->vhcache_lock, RW_READER); 7960 /* do not flush cache until the cache is fully built */ 7961 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7962 rw_exit(&vhcache->vhcache_lock); 7963 return; 7964 } 7965 rw_exit(&vhcache->vhcache_lock); 7966 7967 mutex_enter(&vhc->vhc_lock); 7968 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7969 mutex_exit(&vhc->vhc_lock); 7970 return; 7971 } 7972 7973 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7974 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7975 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7976 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7977 cv_broadcast(&vhc->vhc_cv); 7978 create_thread = 0; 7979 } else { 7980 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7981 create_thread = 1; 7982 } 7983 mutex_exit(&vhc->vhc_lock); 7984 7985 if (create_thread) 7986 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7987 0, &p0, TS_RUN, minclsyspri); 7988 } 7989 7990 /* 7991 * phci bus config structure - one for for each phci bus config operation that 7992 * we initiate on behalf of a vhci. 7993 */ 7994 typedef struct mdi_phci_bus_config_s { 7995 char *phbc_phci_path; 7996 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7997 struct mdi_phci_bus_config_s *phbc_next; 7998 } mdi_phci_bus_config_t; 7999 8000 /* vhci bus config structure - one for each vhci bus config operation */ 8001 typedef struct mdi_vhci_bus_config_s { 8002 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8003 major_t vhbc_op_major; /* bus config op major */ 8004 uint_t vhbc_op_flags; /* bus config op flags */ 8005 kmutex_t vhbc_lock; 8006 kcondvar_t vhbc_cv; 8007 int vhbc_thr_count; 8008 } mdi_vhci_bus_config_t; 8009 8010 /* 8011 * bus config the specified phci 8012 */ 8013 static void 8014 bus_config_phci(void *arg) 8015 { 8016 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8017 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8018 dev_info_t *ph_dip; 8019 8020 /* 8021 * first configure all path components upto phci and then configure 8022 * the phci children. 8023 */ 8024 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8025 != NULL) { 8026 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8027 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8028 (void) ndi_devi_config_driver(ph_dip, 8029 vhbc->vhbc_op_flags, 8030 vhbc->vhbc_op_major); 8031 } else 8032 (void) ndi_devi_config(ph_dip, 8033 vhbc->vhbc_op_flags); 8034 8035 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8036 ndi_rele_devi(ph_dip); 8037 } 8038 8039 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8040 kmem_free(phbc, sizeof (*phbc)); 8041 8042 mutex_enter(&vhbc->vhbc_lock); 8043 vhbc->vhbc_thr_count--; 8044 if (vhbc->vhbc_thr_count == 0) 8045 cv_broadcast(&vhbc->vhbc_cv); 8046 mutex_exit(&vhbc->vhbc_lock); 8047 } 8048 8049 /* 8050 * Bus config all phcis associated with the vhci in parallel. 8051 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8052 */ 8053 static void 8054 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8055 ddi_bus_config_op_t op, major_t maj) 8056 { 8057 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8058 mdi_vhci_bus_config_t *vhbc; 8059 mdi_vhcache_phci_t *cphci; 8060 8061 rw_enter(&vhcache->vhcache_lock, RW_READER); 8062 if (vhcache->vhcache_phci_head == NULL) { 8063 rw_exit(&vhcache->vhcache_lock); 8064 return; 8065 } 8066 8067 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8068 8069 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8070 cphci = cphci->cphci_next) { 8071 /* skip phcis that haven't attached before root is available */ 8072 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8073 continue; 8074 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8075 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8076 KM_SLEEP); 8077 phbc->phbc_vhbusconfig = vhbc; 8078 phbc->phbc_next = phbc_head; 8079 phbc_head = phbc; 8080 vhbc->vhbc_thr_count++; 8081 } 8082 rw_exit(&vhcache->vhcache_lock); 8083 8084 vhbc->vhbc_op = op; 8085 vhbc->vhbc_op_major = maj; 8086 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8087 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8088 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8089 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8090 8091 /* now create threads to initiate bus config on all phcis in parallel */ 8092 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8093 phbc_next = phbc->phbc_next; 8094 if (mdi_mtc_off) 8095 bus_config_phci((void *)phbc); 8096 else 8097 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8098 0, &p0, TS_RUN, minclsyspri); 8099 } 8100 8101 mutex_enter(&vhbc->vhbc_lock); 8102 /* wait until all threads exit */ 8103 while (vhbc->vhbc_thr_count > 0) 8104 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8105 mutex_exit(&vhbc->vhbc_lock); 8106 8107 mutex_destroy(&vhbc->vhbc_lock); 8108 cv_destroy(&vhbc->vhbc_cv); 8109 kmem_free(vhbc, sizeof (*vhbc)); 8110 } 8111 8112 /* 8113 * Single threaded version of bus_config_all_phcis() 8114 */ 8115 static void 8116 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8117 ddi_bus_config_op_t op, major_t maj) 8118 { 8119 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8120 8121 single_threaded_vhconfig_enter(vhc); 8122 bus_config_all_phcis(vhcache, flags, op, maj); 8123 single_threaded_vhconfig_exit(vhc); 8124 } 8125 8126 /* 8127 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8128 * The path includes the child component in addition to the phci path. 8129 */ 8130 static int 8131 bus_config_one_phci_child(char *path) 8132 { 8133 dev_info_t *ph_dip, *child; 8134 char *devnm; 8135 int rv = MDI_FAILURE; 8136 8137 /* extract the child component of the phci */ 8138 devnm = strrchr(path, '/'); 8139 *devnm++ = '\0'; 8140 8141 /* 8142 * first configure all path components upto phci and then 8143 * configure the phci child. 8144 */ 8145 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8146 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8147 NDI_SUCCESS) { 8148 /* 8149 * release the hold that ndi_devi_config_one() placed 8150 */ 8151 ndi_rele_devi(child); 8152 rv = MDI_SUCCESS; 8153 } 8154 8155 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8156 ndi_rele_devi(ph_dip); 8157 } 8158 8159 devnm--; 8160 *devnm = '/'; 8161 return (rv); 8162 } 8163 8164 /* 8165 * Build a list of phci client paths for the specified vhci client. 8166 * The list includes only those phci client paths which aren't configured yet. 8167 */ 8168 static mdi_phys_path_t * 8169 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8170 { 8171 mdi_vhcache_pathinfo_t *cpi; 8172 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8173 int config_path, len; 8174 8175 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8176 /* 8177 * include only those paths that aren't configured. 8178 */ 8179 config_path = 0; 8180 if (cpi->cpi_pip == NULL) 8181 config_path = 1; 8182 else { 8183 MDI_PI_LOCK(cpi->cpi_pip); 8184 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8185 config_path = 1; 8186 MDI_PI_UNLOCK(cpi->cpi_pip); 8187 } 8188 8189 if (config_path) { 8190 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8191 len = strlen(cpi->cpi_cphci->cphci_path) + 8192 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8193 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8194 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8195 cpi->cpi_cphci->cphci_path, ct_name, 8196 cpi->cpi_addr); 8197 pp->phys_path_next = NULL; 8198 8199 if (pp_head == NULL) 8200 pp_head = pp; 8201 else 8202 pp_tail->phys_path_next = pp; 8203 pp_tail = pp; 8204 } 8205 } 8206 8207 return (pp_head); 8208 } 8209 8210 /* 8211 * Free the memory allocated for phci client path list. 8212 */ 8213 static void 8214 free_phclient_path_list(mdi_phys_path_t *pp_head) 8215 { 8216 mdi_phys_path_t *pp, *pp_next; 8217 8218 for (pp = pp_head; pp != NULL; pp = pp_next) { 8219 pp_next = pp->phys_path_next; 8220 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8221 kmem_free(pp, sizeof (*pp)); 8222 } 8223 } 8224 8225 /* 8226 * Allocated async client structure and initialize with the specified values. 8227 */ 8228 static mdi_async_client_config_t * 8229 alloc_async_client_config(char *ct_name, char *ct_addr, 8230 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8231 { 8232 mdi_async_client_config_t *acc; 8233 8234 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8235 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8236 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8237 acc->acc_phclient_path_list_head = pp_head; 8238 init_vhcache_lookup_token(&acc->acc_token, tok); 8239 acc->acc_next = NULL; 8240 return (acc); 8241 } 8242 8243 /* 8244 * Free the memory allocated for the async client structure and their members. 8245 */ 8246 static void 8247 free_async_client_config(mdi_async_client_config_t *acc) 8248 { 8249 if (acc->acc_phclient_path_list_head) 8250 free_phclient_path_list(acc->acc_phclient_path_list_head); 8251 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8252 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8253 kmem_free(acc, sizeof (*acc)); 8254 } 8255 8256 /* 8257 * Sort vhcache pathinfos (cpis) of the specified client. 8258 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8259 * flag set come at the beginning of the list. All cpis which have this 8260 * flag set come at the end of the list. 8261 */ 8262 static void 8263 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8264 { 8265 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8266 8267 cpi_head = cct->cct_cpi_head; 8268 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8269 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8270 cpi_next = cpi->cpi_next; 8271 enqueue_vhcache_pathinfo(cct, cpi); 8272 } 8273 } 8274 8275 /* 8276 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8277 * every vhcache pathinfo of the specified client. If not adjust the flag 8278 * setting appropriately. 8279 * 8280 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8281 * on-disk vhci cache. So every time this flag is updated the cache must be 8282 * flushed. 8283 */ 8284 static void 8285 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8286 mdi_vhcache_lookup_token_t *tok) 8287 { 8288 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8289 mdi_vhcache_client_t *cct; 8290 mdi_vhcache_pathinfo_t *cpi; 8291 8292 rw_enter(&vhcache->vhcache_lock, RW_READER); 8293 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8294 == NULL) { 8295 rw_exit(&vhcache->vhcache_lock); 8296 return; 8297 } 8298 8299 /* 8300 * to avoid unnecessary on-disk cache updates, first check if an 8301 * update is really needed. If no update is needed simply return. 8302 */ 8303 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8304 if ((cpi->cpi_pip != NULL && 8305 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8306 (cpi->cpi_pip == NULL && 8307 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8308 break; 8309 } 8310 } 8311 if (cpi == NULL) { 8312 rw_exit(&vhcache->vhcache_lock); 8313 return; 8314 } 8315 8316 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8317 rw_exit(&vhcache->vhcache_lock); 8318 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8319 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8320 tok)) == NULL) { 8321 rw_exit(&vhcache->vhcache_lock); 8322 return; 8323 } 8324 } 8325 8326 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8327 if (cpi->cpi_pip != NULL) 8328 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8329 else 8330 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8331 } 8332 sort_vhcache_paths(cct); 8333 8334 rw_exit(&vhcache->vhcache_lock); 8335 vhcache_dirty(vhc); 8336 } 8337 8338 /* 8339 * Configure all specified paths of the client. 8340 */ 8341 static void 8342 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8343 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8344 { 8345 mdi_phys_path_t *pp; 8346 8347 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8348 (void) bus_config_one_phci_child(pp->phys_path); 8349 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8350 } 8351 8352 /* 8353 * Dequeue elements from vhci async client config list and bus configure 8354 * their corresponding phci clients. 8355 */ 8356 static void 8357 config_client_paths_thread(void *arg) 8358 { 8359 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8360 mdi_async_client_config_t *acc; 8361 clock_t quit_at_ticks; 8362 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8363 callb_cpr_t cprinfo; 8364 8365 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8366 "mdi_config_client_paths"); 8367 8368 for (; ; ) { 8369 quit_at_ticks = ddi_get_lbolt() + idle_time; 8370 8371 mutex_enter(&vhc->vhc_lock); 8372 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8373 vhc->vhc_acc_list_head == NULL && 8374 ddi_get_lbolt() < quit_at_ticks) { 8375 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8376 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8377 quit_at_ticks); 8378 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8379 } 8380 8381 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8382 vhc->vhc_acc_list_head == NULL) 8383 goto out; 8384 8385 acc = vhc->vhc_acc_list_head; 8386 vhc->vhc_acc_list_head = acc->acc_next; 8387 if (vhc->vhc_acc_list_head == NULL) 8388 vhc->vhc_acc_list_tail = NULL; 8389 vhc->vhc_acc_count--; 8390 mutex_exit(&vhc->vhc_lock); 8391 8392 config_client_paths_sync(vhc, acc->acc_ct_name, 8393 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8394 &acc->acc_token); 8395 8396 free_async_client_config(acc); 8397 } 8398 8399 out: 8400 vhc->vhc_acc_thrcount--; 8401 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8402 CALLB_CPR_EXIT(&cprinfo); 8403 } 8404 8405 /* 8406 * Arrange for all the phci client paths (pp_head) for the specified client 8407 * to be bus configured asynchronously by a thread. 8408 */ 8409 static void 8410 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8411 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8412 { 8413 mdi_async_client_config_t *acc, *newacc; 8414 int create_thread; 8415 8416 if (pp_head == NULL) 8417 return; 8418 8419 if (mdi_mtc_off) { 8420 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8421 free_phclient_path_list(pp_head); 8422 return; 8423 } 8424 8425 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8426 ASSERT(newacc); 8427 8428 mutex_enter(&vhc->vhc_lock); 8429 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8430 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8431 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8432 free_async_client_config(newacc); 8433 mutex_exit(&vhc->vhc_lock); 8434 return; 8435 } 8436 } 8437 8438 if (vhc->vhc_acc_list_head == NULL) 8439 vhc->vhc_acc_list_head = newacc; 8440 else 8441 vhc->vhc_acc_list_tail->acc_next = newacc; 8442 vhc->vhc_acc_list_tail = newacc; 8443 vhc->vhc_acc_count++; 8444 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8445 cv_broadcast(&vhc->vhc_cv); 8446 create_thread = 0; 8447 } else { 8448 vhc->vhc_acc_thrcount++; 8449 create_thread = 1; 8450 } 8451 mutex_exit(&vhc->vhc_lock); 8452 8453 if (create_thread) 8454 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8455 0, &p0, TS_RUN, minclsyspri); 8456 } 8457 8458 /* 8459 * Return number of online paths for the specified client. 8460 */ 8461 static int 8462 nonline_paths(mdi_vhcache_client_t *cct) 8463 { 8464 mdi_vhcache_pathinfo_t *cpi; 8465 int online_count = 0; 8466 8467 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8468 if (cpi->cpi_pip != NULL) { 8469 MDI_PI_LOCK(cpi->cpi_pip); 8470 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8471 online_count++; 8472 MDI_PI_UNLOCK(cpi->cpi_pip); 8473 } 8474 } 8475 8476 return (online_count); 8477 } 8478 8479 /* 8480 * Bus configure all paths for the specified vhci client. 8481 * If at least one path for the client is already online, the remaining paths 8482 * will be configured asynchronously. Otherwise, it synchronously configures 8483 * the paths until at least one path is online and then rest of the paths 8484 * will be configured asynchronously. 8485 */ 8486 static void 8487 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8488 { 8489 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8490 mdi_phys_path_t *pp_head, *pp; 8491 mdi_vhcache_client_t *cct; 8492 mdi_vhcache_lookup_token_t tok; 8493 8494 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8495 8496 init_vhcache_lookup_token(&tok, NULL); 8497 8498 if (ct_name == NULL || ct_addr == NULL || 8499 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8500 == NULL || 8501 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8502 rw_exit(&vhcache->vhcache_lock); 8503 return; 8504 } 8505 8506 /* if at least one path is online, configure the rest asynchronously */ 8507 if (nonline_paths(cct) > 0) { 8508 rw_exit(&vhcache->vhcache_lock); 8509 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8510 return; 8511 } 8512 8513 rw_exit(&vhcache->vhcache_lock); 8514 8515 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8516 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8517 rw_enter(&vhcache->vhcache_lock, RW_READER); 8518 8519 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8520 ct_addr, &tok)) == NULL) { 8521 rw_exit(&vhcache->vhcache_lock); 8522 goto out; 8523 } 8524 8525 if (nonline_paths(cct) > 0 && 8526 pp->phys_path_next != NULL) { 8527 rw_exit(&vhcache->vhcache_lock); 8528 config_client_paths_async(vhc, ct_name, ct_addr, 8529 pp->phys_path_next, &tok); 8530 pp->phys_path_next = NULL; 8531 goto out; 8532 } 8533 8534 rw_exit(&vhcache->vhcache_lock); 8535 } 8536 } 8537 8538 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8539 out: 8540 free_phclient_path_list(pp_head); 8541 } 8542 8543 static void 8544 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8545 { 8546 mutex_enter(&vhc->vhc_lock); 8547 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8548 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8549 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8550 mutex_exit(&vhc->vhc_lock); 8551 } 8552 8553 static void 8554 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8555 { 8556 mutex_enter(&vhc->vhc_lock); 8557 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8558 cv_broadcast(&vhc->vhc_cv); 8559 mutex_exit(&vhc->vhc_lock); 8560 } 8561 8562 typedef struct mdi_phci_driver_info { 8563 char *phdriver_name; /* name of the phci driver */ 8564 8565 /* set to non zero if the phci driver supports root device */ 8566 int phdriver_root_support; 8567 } mdi_phci_driver_info_t; 8568 8569 /* 8570 * vhci class and root support capability of a phci driver can be 8571 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8572 * phci driver.conf file. The built-in tables below contain this information 8573 * for those phci drivers whose driver.conf files don't yet contain this info. 8574 * 8575 * All phci drivers expect iscsi have root device support. 8576 */ 8577 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8578 { "fp", 1 }, 8579 { "iscsi", 0 }, 8580 { "ibsrp", 1 } 8581 }; 8582 8583 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8584 8585 static void * 8586 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8587 { 8588 void *new_ptr; 8589 8590 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8591 if (old_ptr) { 8592 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8593 kmem_free(old_ptr, old_size); 8594 } 8595 return (new_ptr); 8596 } 8597 8598 static void 8599 add_to_phci_list(char ***driver_list, int **root_support_list, 8600 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8601 { 8602 ASSERT(*cur_elements <= *max_elements); 8603 if (*cur_elements == *max_elements) { 8604 *max_elements += 10; 8605 *driver_list = mdi_realloc(*driver_list, 8606 sizeof (char *) * (*cur_elements), 8607 sizeof (char *) * (*max_elements)); 8608 *root_support_list = mdi_realloc(*root_support_list, 8609 sizeof (int) * (*cur_elements), 8610 sizeof (int) * (*max_elements)); 8611 } 8612 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8613 (*root_support_list)[*cur_elements] = root_support; 8614 (*cur_elements)++; 8615 } 8616 8617 static void 8618 get_phci_driver_list(char *vhci_class, char ***driver_list, 8619 int **root_support_list, int *cur_elements, int *max_elements) 8620 { 8621 mdi_phci_driver_info_t *st_driver_list, *p; 8622 int st_ndrivers, root_support, i, j, driver_conf_count; 8623 major_t m; 8624 struct devnames *dnp; 8625 ddi_prop_t *propp; 8626 8627 *driver_list = NULL; 8628 *root_support_list = NULL; 8629 *cur_elements = 0; 8630 *max_elements = 0; 8631 8632 /* add the phci drivers derived from the phci driver.conf files */ 8633 for (m = 0; m < devcnt; m++) { 8634 dnp = &devnamesp[m]; 8635 8636 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8637 LOCK_DEV_OPS(&dnp->dn_lock); 8638 if (dnp->dn_global_prop_ptr != NULL && 8639 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8640 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8641 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8642 strcmp(propp->prop_val, vhci_class) == 0) { 8643 8644 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8645 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8646 &dnp->dn_global_prop_ptr->prop_list) 8647 == NULL) ? 1 : 0; 8648 8649 add_to_phci_list(driver_list, root_support_list, 8650 cur_elements, max_elements, dnp->dn_name, 8651 root_support); 8652 8653 UNLOCK_DEV_OPS(&dnp->dn_lock); 8654 } else 8655 UNLOCK_DEV_OPS(&dnp->dn_lock); 8656 } 8657 } 8658 8659 driver_conf_count = *cur_elements; 8660 8661 /* add the phci drivers specified in the built-in tables */ 8662 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8663 st_driver_list = scsi_phci_driver_list; 8664 st_ndrivers = sizeof (scsi_phci_driver_list) / 8665 sizeof (mdi_phci_driver_info_t); 8666 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8667 st_driver_list = ib_phci_driver_list; 8668 st_ndrivers = sizeof (ib_phci_driver_list) / 8669 sizeof (mdi_phci_driver_info_t); 8670 } else { 8671 st_driver_list = NULL; 8672 st_ndrivers = 0; 8673 } 8674 8675 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8676 /* add this phci driver if not already added before */ 8677 for (j = 0; j < driver_conf_count; j++) { 8678 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8679 break; 8680 } 8681 if (j == driver_conf_count) { 8682 add_to_phci_list(driver_list, root_support_list, 8683 cur_elements, max_elements, p->phdriver_name, 8684 p->phdriver_root_support); 8685 } 8686 } 8687 } 8688 8689 /* 8690 * Attach the phci driver instances associated with the specified vhci class. 8691 * If root is mounted attach all phci driver instances. 8692 * If root is not mounted, attach the instances of only those phci 8693 * drivers that have the root support. 8694 */ 8695 static void 8696 attach_phci_drivers(char *vhci_class) 8697 { 8698 char **driver_list, **p; 8699 int *root_support_list; 8700 int cur_elements, max_elements, i; 8701 major_t m; 8702 8703 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8704 &cur_elements, &max_elements); 8705 8706 for (i = 0; i < cur_elements; i++) { 8707 if (modrootloaded || root_support_list[i]) { 8708 m = ddi_name_to_major(driver_list[i]); 8709 if (m != DDI_MAJOR_T_NONE && 8710 ddi_hold_installed_driver(m)) 8711 ddi_rele_driver(m); 8712 } 8713 } 8714 8715 if (driver_list) { 8716 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8717 kmem_free(*p, strlen(*p) + 1); 8718 kmem_free(driver_list, sizeof (char *) * max_elements); 8719 kmem_free(root_support_list, sizeof (int) * max_elements); 8720 } 8721 } 8722 8723 /* 8724 * Build vhci cache: 8725 * 8726 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8727 * the phci driver instances. During this process the cache gets built. 8728 * 8729 * Cache is built fully if the root is mounted. 8730 * If the root is not mounted, phci drivers that do not have root support 8731 * are not attached. As a result the cache is built partially. The entries 8732 * in the cache reflect only those phci drivers that have root support. 8733 */ 8734 static int 8735 build_vhci_cache(mdi_vhci_t *vh) 8736 { 8737 mdi_vhci_config_t *vhc = vh->vh_config; 8738 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8739 8740 single_threaded_vhconfig_enter(vhc); 8741 8742 rw_enter(&vhcache->vhcache_lock, RW_READER); 8743 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8744 rw_exit(&vhcache->vhcache_lock); 8745 single_threaded_vhconfig_exit(vhc); 8746 return (0); 8747 } 8748 rw_exit(&vhcache->vhcache_lock); 8749 8750 attach_phci_drivers(vh->vh_class); 8751 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8752 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8753 8754 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8755 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8756 rw_exit(&vhcache->vhcache_lock); 8757 8758 single_threaded_vhconfig_exit(vhc); 8759 vhcache_dirty(vhc); 8760 return (1); 8761 } 8762 8763 /* 8764 * Determine if discovery of paths is needed. 8765 */ 8766 static int 8767 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8768 { 8769 int rv = 1; 8770 8771 mutex_enter(&vhc->vhc_lock); 8772 if (i_ddi_io_initialized() == 0) { 8773 if (vhc->vhc_path_discovery_boot > 0) { 8774 vhc->vhc_path_discovery_boot--; 8775 goto out; 8776 } 8777 } else { 8778 if (vhc->vhc_path_discovery_postboot > 0) { 8779 vhc->vhc_path_discovery_postboot--; 8780 goto out; 8781 } 8782 } 8783 8784 /* 8785 * Do full path discovery at most once per mdi_path_discovery_interval. 8786 * This is to avoid a series of full path discoveries when opening 8787 * stale /dev/[r]dsk links. 8788 */ 8789 if (mdi_path_discovery_interval != -1 && 8790 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8791 goto out; 8792 8793 rv = 0; 8794 out: 8795 mutex_exit(&vhc->vhc_lock); 8796 return (rv); 8797 } 8798 8799 /* 8800 * Discover all paths: 8801 * 8802 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8803 * driver instances. During this process all paths will be discovered. 8804 */ 8805 static int 8806 vhcache_discover_paths(mdi_vhci_t *vh) 8807 { 8808 mdi_vhci_config_t *vhc = vh->vh_config; 8809 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8810 int rv = 0; 8811 8812 single_threaded_vhconfig_enter(vhc); 8813 8814 if (vhcache_do_discovery(vhc)) { 8815 attach_phci_drivers(vh->vh_class); 8816 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8817 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8818 8819 mutex_enter(&vhc->vhc_lock); 8820 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8821 mdi_path_discovery_interval * TICKS_PER_SECOND; 8822 mutex_exit(&vhc->vhc_lock); 8823 rv = 1; 8824 } 8825 8826 single_threaded_vhconfig_exit(vhc); 8827 return (rv); 8828 } 8829 8830 /* 8831 * Generic vhci bus config implementation: 8832 * 8833 * Parameters 8834 * vdip vhci dip 8835 * flags bus config flags 8836 * op bus config operation 8837 * The remaining parameters are bus config operation specific 8838 * 8839 * for BUS_CONFIG_ONE 8840 * arg pointer to name@addr 8841 * child upon successful return from this function, *child will be 8842 * set to the configured and held devinfo child node of vdip. 8843 * ct_addr pointer to client address (i.e. GUID) 8844 * 8845 * for BUS_CONFIG_DRIVER 8846 * arg major number of the driver 8847 * child and ct_addr parameters are ignored 8848 * 8849 * for BUS_CONFIG_ALL 8850 * arg, child, and ct_addr parameters are ignored 8851 * 8852 * Note that for the rest of the bus config operations, this function simply 8853 * calls the framework provided default bus config routine. 8854 */ 8855 int 8856 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8857 void *arg, dev_info_t **child, char *ct_addr) 8858 { 8859 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8860 mdi_vhci_config_t *vhc = vh->vh_config; 8861 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8862 int rv = 0; 8863 int params_valid = 0; 8864 char *cp; 8865 8866 /* 8867 * To bus config vhcis we relay operation, possibly using another 8868 * thread, to phcis. The phci driver then interacts with MDI to cause 8869 * vhci child nodes to be enumerated under the vhci node. Adding a 8870 * vhci child requires an ndi_devi_enter of the vhci. Since another 8871 * thread may be adding the child, to avoid deadlock we can't wait 8872 * for the relayed operations to complete if we have already entered 8873 * the vhci node. 8874 */ 8875 if (DEVI_BUSY_OWNED(vdip)) { 8876 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8877 "vhci dip is busy owned %p\n", (void *)vdip)); 8878 goto default_bus_config; 8879 } 8880 8881 rw_enter(&vhcache->vhcache_lock, RW_READER); 8882 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8883 rw_exit(&vhcache->vhcache_lock); 8884 rv = build_vhci_cache(vh); 8885 rw_enter(&vhcache->vhcache_lock, RW_READER); 8886 } 8887 8888 switch (op) { 8889 case BUS_CONFIG_ONE: 8890 if (arg != NULL && ct_addr != NULL) { 8891 /* extract node name */ 8892 cp = (char *)arg; 8893 while (*cp != '\0' && *cp != '@') 8894 cp++; 8895 if (*cp == '@') { 8896 params_valid = 1; 8897 *cp = '\0'; 8898 config_client_paths(vhc, (char *)arg, ct_addr); 8899 /* config_client_paths() releases cache_lock */ 8900 *cp = '@'; 8901 break; 8902 } 8903 } 8904 8905 rw_exit(&vhcache->vhcache_lock); 8906 break; 8907 8908 case BUS_CONFIG_DRIVER: 8909 rw_exit(&vhcache->vhcache_lock); 8910 if (rv == 0) 8911 st_bus_config_all_phcis(vhc, flags, op, 8912 (major_t)(uintptr_t)arg); 8913 break; 8914 8915 case BUS_CONFIG_ALL: 8916 rw_exit(&vhcache->vhcache_lock); 8917 if (rv == 0) 8918 st_bus_config_all_phcis(vhc, flags, op, -1); 8919 break; 8920 8921 default: 8922 rw_exit(&vhcache->vhcache_lock); 8923 break; 8924 } 8925 8926 8927 default_bus_config: 8928 /* 8929 * All requested child nodes are enumerated under the vhci. 8930 * Now configure them. 8931 */ 8932 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8933 NDI_SUCCESS) { 8934 return (MDI_SUCCESS); 8935 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8936 /* discover all paths and try configuring again */ 8937 if (vhcache_discover_paths(vh) && 8938 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8939 NDI_SUCCESS) 8940 return (MDI_SUCCESS); 8941 } 8942 8943 return (MDI_FAILURE); 8944 } 8945 8946 /* 8947 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8948 */ 8949 static nvlist_t * 8950 read_on_disk_vhci_cache(char *vhci_class) 8951 { 8952 nvlist_t *nvl; 8953 int err; 8954 char *filename; 8955 8956 filename = vhclass2vhcache_filename(vhci_class); 8957 8958 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8959 kmem_free(filename, strlen(filename) + 1); 8960 return (nvl); 8961 } else if (err == EIO) 8962 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8963 else if (err == EINVAL) 8964 cmn_err(CE_WARN, 8965 "%s: data file corrupted, will recreate\n", filename); 8966 8967 kmem_free(filename, strlen(filename) + 1); 8968 return (NULL); 8969 } 8970 8971 /* 8972 * Read on-disk vhci cache into nvlists for all vhci classes. 8973 * Called during booting by i_ddi_read_devices_files(). 8974 */ 8975 void 8976 mdi_read_devices_files(void) 8977 { 8978 int i; 8979 8980 for (i = 0; i < N_VHCI_CLASSES; i++) 8981 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8982 } 8983 8984 /* 8985 * Remove all stale entries from vhci cache. 8986 */ 8987 static void 8988 clean_vhcache(mdi_vhci_config_t *vhc) 8989 { 8990 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8991 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8992 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8993 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8994 8995 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8996 8997 cct_head = vhcache->vhcache_client_head; 8998 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8999 for (cct = cct_head; cct != NULL; cct = cct_next) { 9000 cct_next = cct->cct_next; 9001 9002 cpi_head = cct->cct_cpi_head; 9003 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9004 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9005 cpi_next = cpi->cpi_next; 9006 if (cpi->cpi_pip != NULL) { 9007 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9008 enqueue_tail_vhcache_pathinfo(cct, cpi); 9009 } else 9010 free_vhcache_pathinfo(cpi); 9011 } 9012 9013 if (cct->cct_cpi_head != NULL) 9014 enqueue_vhcache_client(vhcache, cct); 9015 else { 9016 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9017 (mod_hash_key_t)cct->cct_name_addr); 9018 free_vhcache_client(cct); 9019 } 9020 } 9021 9022 cphci_head = vhcache->vhcache_phci_head; 9023 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9024 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9025 cphci_next = cphci->cphci_next; 9026 if (cphci->cphci_phci != NULL) 9027 enqueue_vhcache_phci(vhcache, cphci); 9028 else 9029 free_vhcache_phci(cphci); 9030 } 9031 9032 vhcache->vhcache_clean_time = lbolt64; 9033 rw_exit(&vhcache->vhcache_lock); 9034 vhcache_dirty(vhc); 9035 } 9036 9037 /* 9038 * Remove all stale entries from vhci cache. 9039 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9040 */ 9041 void 9042 mdi_clean_vhcache(void) 9043 { 9044 mdi_vhci_t *vh; 9045 9046 mutex_enter(&mdi_mutex); 9047 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9048 vh->vh_refcnt++; 9049 mutex_exit(&mdi_mutex); 9050 clean_vhcache(vh->vh_config); 9051 mutex_enter(&mdi_mutex); 9052 vh->vh_refcnt--; 9053 } 9054 mutex_exit(&mdi_mutex); 9055 } 9056 9057 /* 9058 * mdi_vhci_walk_clients(): 9059 * Walker routine to traverse client dev_info nodes 9060 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9061 * below the client, including nexus devices, which we dont want. 9062 * So we just traverse the immediate siblings, starting from 1st client. 9063 */ 9064 void 9065 mdi_vhci_walk_clients(dev_info_t *vdip, 9066 int (*f)(dev_info_t *, void *), void *arg) 9067 { 9068 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9069 dev_info_t *cdip; 9070 mdi_client_t *ct; 9071 9072 MDI_VHCI_CLIENT_LOCK(vh); 9073 cdip = ddi_get_child(vdip); 9074 while (cdip) { 9075 ct = i_devi_get_client(cdip); 9076 MDI_CLIENT_LOCK(ct); 9077 9078 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9079 cdip = ddi_get_next_sibling(cdip); 9080 else 9081 cdip = NULL; 9082 9083 MDI_CLIENT_UNLOCK(ct); 9084 } 9085 MDI_VHCI_CLIENT_UNLOCK(vh); 9086 } 9087 9088 /* 9089 * mdi_vhci_walk_phcis(): 9090 * Walker routine to traverse phci dev_info nodes 9091 */ 9092 void 9093 mdi_vhci_walk_phcis(dev_info_t *vdip, 9094 int (*f)(dev_info_t *, void *), void *arg) 9095 { 9096 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9097 mdi_phci_t *ph, *next; 9098 9099 MDI_VHCI_PHCI_LOCK(vh); 9100 ph = vh->vh_phci_head; 9101 while (ph) { 9102 MDI_PHCI_LOCK(ph); 9103 9104 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9105 next = ph->ph_next; 9106 else 9107 next = NULL; 9108 9109 MDI_PHCI_UNLOCK(ph); 9110 ph = next; 9111 } 9112 MDI_VHCI_PHCI_UNLOCK(vh); 9113 } 9114 9115 9116 /* 9117 * mdi_walk_vhcis(): 9118 * Walker routine to traverse vhci dev_info nodes 9119 */ 9120 void 9121 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9122 { 9123 mdi_vhci_t *vh = NULL; 9124 9125 mutex_enter(&mdi_mutex); 9126 /* 9127 * Scan for already registered vhci 9128 */ 9129 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9130 vh->vh_refcnt++; 9131 mutex_exit(&mdi_mutex); 9132 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9133 mutex_enter(&mdi_mutex); 9134 vh->vh_refcnt--; 9135 break; 9136 } else { 9137 mutex_enter(&mdi_mutex); 9138 vh->vh_refcnt--; 9139 } 9140 } 9141 9142 mutex_exit(&mdi_mutex); 9143 } 9144 9145 /* 9146 * i_mdi_log_sysevent(): 9147 * Logs events for pickup by syseventd 9148 */ 9149 static void 9150 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9151 { 9152 char *path_name; 9153 nvlist_t *attr_list; 9154 9155 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9156 KM_SLEEP) != DDI_SUCCESS) { 9157 goto alloc_failed; 9158 } 9159 9160 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9161 (void) ddi_pathname(dip, path_name); 9162 9163 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9164 ddi_driver_name(dip)) != DDI_SUCCESS) { 9165 goto error; 9166 } 9167 9168 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9169 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9170 goto error; 9171 } 9172 9173 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9174 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9175 goto error; 9176 } 9177 9178 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9179 path_name) != DDI_SUCCESS) { 9180 goto error; 9181 } 9182 9183 if (nvlist_add_string(attr_list, DDI_CLASS, 9184 ph_vh_class) != DDI_SUCCESS) { 9185 goto error; 9186 } 9187 9188 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9189 attr_list, NULL, DDI_SLEEP); 9190 9191 error: 9192 kmem_free(path_name, MAXPATHLEN); 9193 nvlist_free(attr_list); 9194 return; 9195 9196 alloc_failed: 9197 MDI_DEBUG(1, (CE_WARN, dip, 9198 "!i_mdi_log_sysevent: Unable to send sysevent")); 9199 } 9200 9201 char ** 9202 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9203 { 9204 char **driver_list, **ret_driver_list = NULL; 9205 int *root_support_list; 9206 int cur_elements, max_elements; 9207 9208 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9209 &cur_elements, &max_elements); 9210 9211 9212 if (driver_list) { 9213 kmem_free(root_support_list, sizeof (int) * max_elements); 9214 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9215 * max_elements, sizeof (char *) * cur_elements); 9216 } 9217 *ndrivers = cur_elements; 9218 9219 return (ret_driver_list); 9220 9221 } 9222 9223 void 9224 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9225 { 9226 char **p; 9227 int i; 9228 9229 if (driver_list) { 9230 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9231 kmem_free(*p, strlen(*p) + 1); 9232 kmem_free(driver_list, sizeof (char *) * ndrivers); 9233 } 9234 } 9235