1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * The "path" to a pathinfo node is identical to the /devices path to a 160 * devinfo node had the device been enumerated under a pHCI instead of 161 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 162 * This association persists across create/delete of the pathinfo nodes, 163 * but not across reboot. 164 */ 165 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 166 static int mdi_pathmap_hash_size = 256; 167 static kmutex_t mdi_pathmap_mutex; 168 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 169 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 170 171 /* 172 * MDI component property name/value string definitions 173 */ 174 const char *mdi_component_prop = "mpxio-component"; 175 const char *mdi_component_prop_vhci = "vhci"; 176 const char *mdi_component_prop_phci = "phci"; 177 const char *mdi_component_prop_client = "client"; 178 179 /* 180 * MDI client global unique identifier property name 181 */ 182 const char *mdi_client_guid_prop = "client-guid"; 183 184 /* 185 * MDI client load balancing property name/value string definitions 186 */ 187 const char *mdi_load_balance = "load-balance"; 188 const char *mdi_load_balance_none = "none"; 189 const char *mdi_load_balance_rr = "round-robin"; 190 const char *mdi_load_balance_lba = "logical-block"; 191 192 /* 193 * Obsolete vHCI class definition; to be removed after Leadville update 194 */ 195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 196 197 static char vhci_greeting[] = 198 "\tThere already exists one vHCI driver for class %s\n" 199 "\tOnly one vHCI driver for each class is allowed\n"; 200 201 /* 202 * Static function prototypes 203 */ 204 static int i_mdi_phci_offline(dev_info_t *, uint_t); 205 static int i_mdi_client_offline(dev_info_t *, uint_t); 206 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 207 static void i_mdi_phci_post_detach(dev_info_t *, 208 ddi_detach_cmd_t, int); 209 static int i_mdi_client_pre_detach(dev_info_t *, 210 ddi_detach_cmd_t); 211 static void i_mdi_client_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 214 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 215 static int i_mdi_lba_lb(mdi_client_t *ct, 216 mdi_pathinfo_t **ret_pip, struct buf *buf); 217 static void i_mdi_pm_hold_client(mdi_client_t *, int); 218 static void i_mdi_pm_rele_client(mdi_client_t *, int); 219 static void i_mdi_pm_reset_client(mdi_client_t *); 220 static int i_mdi_power_all_phci(mdi_client_t *); 221 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 222 223 224 /* 225 * Internal mdi_pathinfo node functions 226 */ 227 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 228 229 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 230 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 231 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 232 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 233 static void i_mdi_phci_unlock(mdi_phci_t *); 234 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 235 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 236 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 237 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 238 mdi_client_t *); 239 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_remove_path(mdi_client_t *, 241 mdi_pathinfo_t *); 242 243 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 244 mdi_pathinfo_state_t, int); 245 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 246 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 247 char **, int); 248 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 249 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 250 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 251 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 252 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 253 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 254 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 255 static void i_mdi_client_update_state(mdi_client_t *); 256 static int i_mdi_client_compute_state(mdi_client_t *, 257 mdi_phci_t *); 258 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 259 static void i_mdi_client_unlock(mdi_client_t *); 260 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 261 static mdi_client_t *i_devi_get_client(dev_info_t *); 262 /* 263 * NOTE: this will be removed once the NWS files are changed to use the new 264 * mdi_{enable,disable}_path interfaces 265 */ 266 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 267 int, int); 268 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 269 mdi_vhci_t *vh, int flags, int op); 270 /* 271 * Failover related function prototypes 272 */ 273 static int i_mdi_failover(void *); 274 275 /* 276 * misc internal functions 277 */ 278 static int i_mdi_get_hash_key(char *); 279 static int i_map_nvlist_error_to_mdi(int); 280 static void i_mdi_report_path_state(mdi_client_t *, 281 mdi_pathinfo_t *); 282 283 static void setup_vhci_cache(mdi_vhci_t *); 284 static int destroy_vhci_cache(mdi_vhci_t *); 285 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 286 static boolean_t stop_vhcache_flush_thread(void *, int); 287 static void free_string_array(char **, int); 288 static void free_vhcache_phci(mdi_vhcache_phci_t *); 289 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 290 static void free_vhcache_client(mdi_vhcache_client_t *); 291 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 292 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 293 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 294 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_pi_add(mdi_vhci_config_t *, 296 struct mdi_pathinfo *); 297 static void vhcache_pi_remove(mdi_vhci_config_t *, 298 struct mdi_pathinfo *); 299 static void free_phclient_path_list(mdi_phys_path_t *); 300 static void sort_vhcache_paths(mdi_vhcache_client_t *); 301 static int flush_vhcache(mdi_vhci_config_t *, int); 302 static void vhcache_dirty(mdi_vhci_config_t *); 303 static void free_async_client_config(mdi_async_client_config_t *); 304 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 305 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 306 static nvlist_t *read_on_disk_vhci_cache(char *); 307 extern int fread_nvlist(char *, nvlist_t **); 308 extern int fwrite_nvlist(char *, nvlist_t *); 309 310 /* called once when first vhci registers with mdi */ 311 static void 312 i_mdi_init() 313 { 314 static int initialized = 0; 315 316 if (initialized) 317 return; 318 initialized = 1; 319 320 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 321 322 /* Create our taskq resources */ 323 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 324 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 325 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 326 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 327 328 /* Allocate ['path_instance' <-> "path"] maps */ 329 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 330 mdi_pathmap_bypath = mod_hash_create_strhash( 331 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 332 mod_hash_null_valdtor); 333 mdi_pathmap_byinstance = mod_hash_create_idhash( 334 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 335 mod_hash_null_valdtor); 336 } 337 338 /* 339 * mdi_get_component_type(): 340 * Return mpxio component type 341 * Return Values: 342 * MDI_COMPONENT_NONE 343 * MDI_COMPONENT_VHCI 344 * MDI_COMPONENT_PHCI 345 * MDI_COMPONENT_CLIENT 346 * XXX This doesn't work under multi-level MPxIO and should be 347 * removed when clients migrate mdi_component_is_*() interfaces. 348 */ 349 int 350 mdi_get_component_type(dev_info_t *dip) 351 { 352 return (DEVI(dip)->devi_mdi_component); 353 } 354 355 /* 356 * mdi_vhci_register(): 357 * Register a vHCI module with the mpxio framework 358 * mdi_vhci_register() is called by vHCI drivers to register the 359 * 'class_driver' vHCI driver and its MDI entrypoints with the 360 * mpxio framework. The vHCI driver must call this interface as 361 * part of its attach(9e) handler. 362 * Competing threads may try to attach mdi_vhci_register() as 363 * the vHCI drivers are loaded and attached as a result of pHCI 364 * driver instance registration (mdi_phci_register()) with the 365 * framework. 366 * Return Values: 367 * MDI_SUCCESS 368 * MDI_FAILURE 369 */ 370 /*ARGSUSED*/ 371 int 372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 373 int flags) 374 { 375 mdi_vhci_t *vh = NULL; 376 377 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 378 #ifdef DEBUG 379 /* 380 * IB nexus driver is loaded only when IB hardware is present. 381 * In order to be able to do this there is a need to drive the loading 382 * and attaching of the IB nexus driver (especially when an IB hardware 383 * is dynamically plugged in) when an IB HCA driver (PHCI) 384 * is being attached. Unfortunately this gets into the limitations 385 * of devfs as there seems to be no clean way to drive configuration 386 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 387 * for IB. 388 */ 389 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 390 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 391 #endif 392 393 i_mdi_init(); 394 395 mutex_enter(&mdi_mutex); 396 /* 397 * Scan for already registered vhci 398 */ 399 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 400 if (strcmp(vh->vh_class, class) == 0) { 401 /* 402 * vHCI has already been created. Check for valid 403 * vHCI ops registration. We only support one vHCI 404 * module per class 405 */ 406 if (vh->vh_ops != NULL) { 407 mutex_exit(&mdi_mutex); 408 cmn_err(CE_NOTE, vhci_greeting, class); 409 return (MDI_FAILURE); 410 } 411 break; 412 } 413 } 414 415 /* 416 * if not yet created, create the vHCI component 417 */ 418 if (vh == NULL) { 419 struct client_hash *hash = NULL; 420 char *load_balance; 421 422 /* 423 * Allocate and initialize the mdi extensions 424 */ 425 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 426 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 427 KM_SLEEP); 428 vh->vh_client_table = hash; 429 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 430 (void) strcpy(vh->vh_class, class); 431 vh->vh_lb = LOAD_BALANCE_RR; 432 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 433 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 434 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 435 vh->vh_lb = LOAD_BALANCE_NONE; 436 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 437 == 0) { 438 vh->vh_lb = LOAD_BALANCE_LBA; 439 } 440 ddi_prop_free(load_balance); 441 } 442 443 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 444 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 445 446 /* 447 * Store the vHCI ops vectors 448 */ 449 vh->vh_dip = vdip; 450 vh->vh_ops = vops; 451 452 setup_vhci_cache(vh); 453 454 if (mdi_vhci_head == NULL) { 455 mdi_vhci_head = vh; 456 } 457 if (mdi_vhci_tail) { 458 mdi_vhci_tail->vh_next = vh; 459 } 460 mdi_vhci_tail = vh; 461 mdi_vhci_count++; 462 } 463 464 /* 465 * Claim the devfs node as a vhci component 466 */ 467 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 468 469 /* 470 * Initialize our back reference from dev_info node 471 */ 472 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 473 mutex_exit(&mdi_mutex); 474 return (MDI_SUCCESS); 475 } 476 477 /* 478 * mdi_vhci_unregister(): 479 * Unregister a vHCI module from mpxio framework 480 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 481 * of a vhci to unregister it from the framework. 482 * Return Values: 483 * MDI_SUCCESS 484 * MDI_FAILURE 485 */ 486 /*ARGSUSED*/ 487 int 488 mdi_vhci_unregister(dev_info_t *vdip, int flags) 489 { 490 mdi_vhci_t *found, *vh, *prev = NULL; 491 492 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 493 494 /* 495 * Check for invalid VHCI 496 */ 497 if ((vh = i_devi_get_vhci(vdip)) == NULL) 498 return (MDI_FAILURE); 499 500 /* 501 * Scan the list of registered vHCIs for a match 502 */ 503 mutex_enter(&mdi_mutex); 504 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 505 if (found == vh) 506 break; 507 prev = found; 508 } 509 510 if (found == NULL) { 511 mutex_exit(&mdi_mutex); 512 return (MDI_FAILURE); 513 } 514 515 /* 516 * Check the vHCI, pHCI and client count. All the pHCIs and clients 517 * should have been unregistered, before a vHCI can be 518 * unregistered. 519 */ 520 MDI_VHCI_PHCI_LOCK(vh); 521 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 522 MDI_VHCI_PHCI_UNLOCK(vh); 523 mutex_exit(&mdi_mutex); 524 return (MDI_FAILURE); 525 } 526 MDI_VHCI_PHCI_UNLOCK(vh); 527 528 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 529 mutex_exit(&mdi_mutex); 530 return (MDI_FAILURE); 531 } 532 533 /* 534 * Remove the vHCI from the global list 535 */ 536 if (vh == mdi_vhci_head) { 537 mdi_vhci_head = vh->vh_next; 538 } else { 539 prev->vh_next = vh->vh_next; 540 } 541 if (vh == mdi_vhci_tail) { 542 mdi_vhci_tail = prev; 543 } 544 mdi_vhci_count--; 545 mutex_exit(&mdi_mutex); 546 547 vh->vh_ops = NULL; 548 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 549 DEVI(vdip)->devi_mdi_xhci = NULL; 550 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 551 kmem_free(vh->vh_client_table, 552 mdi_client_table_size * sizeof (struct client_hash)); 553 mutex_destroy(&vh->vh_phci_mutex); 554 mutex_destroy(&vh->vh_client_mutex); 555 556 kmem_free(vh, sizeof (mdi_vhci_t)); 557 return (MDI_SUCCESS); 558 } 559 560 /* 561 * i_mdi_vhci_class2vhci(): 562 * Look for a matching vHCI module given a vHCI class name 563 * Return Values: 564 * Handle to a vHCI component 565 * NULL 566 */ 567 static mdi_vhci_t * 568 i_mdi_vhci_class2vhci(char *class) 569 { 570 mdi_vhci_t *vh = NULL; 571 572 ASSERT(!MUTEX_HELD(&mdi_mutex)); 573 574 mutex_enter(&mdi_mutex); 575 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 576 if (strcmp(vh->vh_class, class) == 0) { 577 break; 578 } 579 } 580 mutex_exit(&mdi_mutex); 581 return (vh); 582 } 583 584 /* 585 * i_devi_get_vhci(): 586 * Utility function to get the handle to a vHCI component 587 * Return Values: 588 * Handle to a vHCI component 589 * NULL 590 */ 591 mdi_vhci_t * 592 i_devi_get_vhci(dev_info_t *vdip) 593 { 594 mdi_vhci_t *vh = NULL; 595 if (MDI_VHCI(vdip)) { 596 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 597 } 598 return (vh); 599 } 600 601 /* 602 * mdi_phci_register(): 603 * Register a pHCI module with mpxio framework 604 * mdi_phci_register() is called by pHCI drivers to register with 605 * the mpxio framework and a specific 'class_driver' vHCI. The 606 * pHCI driver must call this interface as part of its attach(9e) 607 * handler. 608 * Return Values: 609 * MDI_SUCCESS 610 * MDI_FAILURE 611 */ 612 /*ARGSUSED*/ 613 int 614 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 615 { 616 mdi_phci_t *ph; 617 mdi_vhci_t *vh; 618 char *data; 619 char *pathname; 620 621 /* 622 * Some subsystems, like fcp, perform pHCI registration from a 623 * different thread than the one doing the pHCI attach(9E) - the 624 * driver attach code is waiting for this other thread to complete. 625 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 626 * (indicating that some thread has done an ndi_devi_enter of parent) 627 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 628 */ 629 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 630 631 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 632 (void) ddi_pathname(pdip, pathname); 633 634 /* 635 * Check for mpxio-disable property. Enable mpxio if the property is 636 * missing or not set to "yes". 637 * If the property is set to "yes" then emit a brief message. 638 */ 639 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 640 &data) == DDI_SUCCESS)) { 641 if (strcmp(data, "yes") == 0) { 642 MDI_DEBUG(1, (CE_CONT, pdip, 643 "?%s (%s%d) multipath capabilities " 644 "disabled via %s.conf.\n", pathname, 645 ddi_driver_name(pdip), ddi_get_instance(pdip), 646 ddi_driver_name(pdip))); 647 ddi_prop_free(data); 648 kmem_free(pathname, MAXPATHLEN); 649 return (MDI_FAILURE); 650 } 651 ddi_prop_free(data); 652 } 653 654 kmem_free(pathname, MAXPATHLEN); 655 656 /* 657 * Search for a matching vHCI 658 */ 659 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 660 if (vh == NULL) { 661 return (MDI_FAILURE); 662 } 663 664 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 665 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 666 ph->ph_dip = pdip; 667 ph->ph_vhci = vh; 668 ph->ph_next = NULL; 669 ph->ph_unstable = 0; 670 ph->ph_vprivate = 0; 671 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 672 673 MDI_PHCI_LOCK(ph); 674 MDI_PHCI_SET_POWER_UP(ph); 675 MDI_PHCI_UNLOCK(ph); 676 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 677 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 678 679 vhcache_phci_add(vh->vh_config, ph); 680 681 MDI_VHCI_PHCI_LOCK(vh); 682 if (vh->vh_phci_head == NULL) { 683 vh->vh_phci_head = ph; 684 } 685 if (vh->vh_phci_tail) { 686 vh->vh_phci_tail->ph_next = ph; 687 } 688 vh->vh_phci_tail = ph; 689 vh->vh_phci_count++; 690 MDI_VHCI_PHCI_UNLOCK(vh); 691 692 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 693 return (MDI_SUCCESS); 694 } 695 696 /* 697 * mdi_phci_unregister(): 698 * Unregister a pHCI module from mpxio framework 699 * mdi_phci_unregister() is called by the pHCI drivers from their 700 * detach(9E) handler to unregister their instances from the 701 * framework. 702 * Return Values: 703 * MDI_SUCCESS 704 * MDI_FAILURE 705 */ 706 /*ARGSUSED*/ 707 int 708 mdi_phci_unregister(dev_info_t *pdip, int flags) 709 { 710 mdi_vhci_t *vh; 711 mdi_phci_t *ph; 712 mdi_phci_t *tmp; 713 mdi_phci_t *prev = NULL; 714 715 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 716 717 ph = i_devi_get_phci(pdip); 718 if (ph == NULL) { 719 MDI_DEBUG(1, (CE_WARN, pdip, 720 "!pHCI unregister: Not a valid pHCI")); 721 return (MDI_FAILURE); 722 } 723 724 vh = ph->ph_vhci; 725 ASSERT(vh != NULL); 726 if (vh == NULL) { 727 MDI_DEBUG(1, (CE_WARN, pdip, 728 "!pHCI unregister: Not a valid vHCI")); 729 return (MDI_FAILURE); 730 } 731 732 MDI_VHCI_PHCI_LOCK(vh); 733 tmp = vh->vh_phci_head; 734 while (tmp) { 735 if (tmp == ph) { 736 break; 737 } 738 prev = tmp; 739 tmp = tmp->ph_next; 740 } 741 742 if (ph == vh->vh_phci_head) { 743 vh->vh_phci_head = ph->ph_next; 744 } else { 745 prev->ph_next = ph->ph_next; 746 } 747 748 if (ph == vh->vh_phci_tail) { 749 vh->vh_phci_tail = prev; 750 } 751 752 vh->vh_phci_count--; 753 MDI_VHCI_PHCI_UNLOCK(vh); 754 755 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 756 ESC_DDI_INITIATOR_UNREGISTER); 757 vhcache_phci_remove(vh->vh_config, ph); 758 cv_destroy(&ph->ph_unstable_cv); 759 mutex_destroy(&ph->ph_mutex); 760 kmem_free(ph, sizeof (mdi_phci_t)); 761 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 762 DEVI(pdip)->devi_mdi_xhci = NULL; 763 return (MDI_SUCCESS); 764 } 765 766 /* 767 * i_devi_get_phci(): 768 * Utility function to return the phci extensions. 769 */ 770 static mdi_phci_t * 771 i_devi_get_phci(dev_info_t *pdip) 772 { 773 mdi_phci_t *ph = NULL; 774 if (MDI_PHCI(pdip)) { 775 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 776 } 777 return (ph); 778 } 779 780 /* 781 * Single thread mdi entry into devinfo node for modifying its children. 782 * If necessary we perform an ndi_devi_enter of the vHCI before doing 783 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 784 * for the vHCI and one for the pHCI. 785 */ 786 void 787 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 788 { 789 dev_info_t *vdip; 790 int vcircular, pcircular; 791 792 /* Verify calling context */ 793 ASSERT(MDI_PHCI(phci_dip)); 794 vdip = mdi_devi_get_vdip(phci_dip); 795 ASSERT(vdip); /* A pHCI always has a vHCI */ 796 797 /* 798 * If pHCI is detaching then the framework has already entered the 799 * vHCI on a threads that went down the code path leading to 800 * detach_node(). This framework enter of the vHCI during pHCI 801 * detach is done to avoid deadlock with vHCI power management 802 * operations which enter the vHCI and the enter down the path 803 * to the pHCI. If pHCI is detaching then we piggyback this calls 804 * enter of the vHCI on frameworks vHCI enter that has already 805 * occurred - this is OK because we know that the framework thread 806 * doing detach is waiting for our completion. 807 * 808 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 809 * race with detach - but we can't do that because the framework has 810 * already entered the parent, so we have some complexity instead. 811 */ 812 for (;;) { 813 if (ndi_devi_tryenter(vdip, &vcircular)) { 814 ASSERT(vcircular != -1); 815 if (DEVI_IS_DETACHING(phci_dip)) { 816 ndi_devi_exit(vdip, vcircular); 817 vcircular = -1; 818 } 819 break; 820 } else if (DEVI_IS_DETACHING(phci_dip)) { 821 vcircular = -1; 822 break; 823 } else { 824 delay(1); 825 } 826 } 827 828 ndi_devi_enter(phci_dip, &pcircular); 829 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 830 } 831 832 /* 833 * Release mdi_devi_enter or successful mdi_devi_tryenter. 834 */ 835 void 836 mdi_devi_exit(dev_info_t *phci_dip, int circular) 837 { 838 dev_info_t *vdip; 839 int vcircular, pcircular; 840 841 /* Verify calling context */ 842 ASSERT(MDI_PHCI(phci_dip)); 843 vdip = mdi_devi_get_vdip(phci_dip); 844 ASSERT(vdip); /* A pHCI always has a vHCI */ 845 846 /* extract two circular recursion values from single int */ 847 pcircular = (short)(circular & 0xFFFF); 848 vcircular = (short)((circular >> 16) & 0xFFFF); 849 850 ndi_devi_exit(phci_dip, pcircular); 851 if (vcircular != -1) 852 ndi_devi_exit(vdip, vcircular); 853 } 854 855 /* 856 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 857 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 858 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 859 * with vHCI power management code during path online/offline. Each 860 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 861 * occur within the scope of an active mdi_devi_enter that establishes the 862 * circular value. 863 */ 864 void 865 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 866 { 867 int pcircular; 868 869 /* Verify calling context */ 870 ASSERT(MDI_PHCI(phci_dip)); 871 872 pcircular = (short)(circular & 0xFFFF); 873 ndi_devi_exit(phci_dip, pcircular); 874 } 875 876 void 877 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 878 { 879 int pcircular; 880 881 /* Verify calling context */ 882 ASSERT(MDI_PHCI(phci_dip)); 883 884 ndi_devi_enter(phci_dip, &pcircular); 885 886 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 887 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 888 } 889 890 /* 891 * mdi_devi_get_vdip(): 892 * given a pHCI dip return vHCI dip 893 */ 894 dev_info_t * 895 mdi_devi_get_vdip(dev_info_t *pdip) 896 { 897 mdi_phci_t *ph; 898 899 ph = i_devi_get_phci(pdip); 900 if (ph && ph->ph_vhci) 901 return (ph->ph_vhci->vh_dip); 902 return (NULL); 903 } 904 905 /* 906 * mdi_devi_pdip_entered(): 907 * Return 1 if we are vHCI and have done an ndi_devi_enter 908 * of a pHCI 909 */ 910 int 911 mdi_devi_pdip_entered(dev_info_t *vdip) 912 { 913 mdi_vhci_t *vh; 914 mdi_phci_t *ph; 915 916 vh = i_devi_get_vhci(vdip); 917 if (vh == NULL) 918 return (0); 919 920 MDI_VHCI_PHCI_LOCK(vh); 921 ph = vh->vh_phci_head; 922 while (ph) { 923 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 924 MDI_VHCI_PHCI_UNLOCK(vh); 925 return (1); 926 } 927 ph = ph->ph_next; 928 } 929 MDI_VHCI_PHCI_UNLOCK(vh); 930 return (0); 931 } 932 933 /* 934 * mdi_phci_path2devinfo(): 935 * Utility function to search for a valid phci device given 936 * the devfs pathname. 937 */ 938 dev_info_t * 939 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 940 { 941 char *temp_pathname; 942 mdi_vhci_t *vh; 943 mdi_phci_t *ph; 944 dev_info_t *pdip = NULL; 945 946 vh = i_devi_get_vhci(vdip); 947 ASSERT(vh != NULL); 948 949 if (vh == NULL) { 950 /* 951 * Invalid vHCI component, return failure 952 */ 953 return (NULL); 954 } 955 956 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 957 MDI_VHCI_PHCI_LOCK(vh); 958 ph = vh->vh_phci_head; 959 while (ph != NULL) { 960 pdip = ph->ph_dip; 961 ASSERT(pdip != NULL); 962 *temp_pathname = '\0'; 963 (void) ddi_pathname(pdip, temp_pathname); 964 if (strcmp(temp_pathname, pathname) == 0) { 965 break; 966 } 967 ph = ph->ph_next; 968 } 969 if (ph == NULL) { 970 pdip = NULL; 971 } 972 MDI_VHCI_PHCI_UNLOCK(vh); 973 kmem_free(temp_pathname, MAXPATHLEN); 974 return (pdip); 975 } 976 977 /* 978 * mdi_phci_get_path_count(): 979 * get number of path information nodes associated with a given 980 * pHCI device. 981 */ 982 int 983 mdi_phci_get_path_count(dev_info_t *pdip) 984 { 985 mdi_phci_t *ph; 986 int count = 0; 987 988 ph = i_devi_get_phci(pdip); 989 if (ph != NULL) { 990 count = ph->ph_path_count; 991 } 992 return (count); 993 } 994 995 /* 996 * i_mdi_phci_lock(): 997 * Lock a pHCI device 998 * Return Values: 999 * None 1000 * Note: 1001 * The default locking order is: 1002 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1003 * But there are number of situations where locks need to be 1004 * grabbed in reverse order. This routine implements try and lock 1005 * mechanism depending on the requested parameter option. 1006 */ 1007 static void 1008 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1009 { 1010 if (pip) { 1011 /* Reverse locking is requested. */ 1012 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1013 /* 1014 * tryenter failed. Try to grab again 1015 * after a small delay 1016 */ 1017 MDI_PI_HOLD(pip); 1018 MDI_PI_UNLOCK(pip); 1019 delay(1); 1020 MDI_PI_LOCK(pip); 1021 MDI_PI_RELE(pip); 1022 } 1023 } else { 1024 MDI_PHCI_LOCK(ph); 1025 } 1026 } 1027 1028 /* 1029 * i_mdi_phci_unlock(): 1030 * Unlock the pHCI component 1031 */ 1032 static void 1033 i_mdi_phci_unlock(mdi_phci_t *ph) 1034 { 1035 MDI_PHCI_UNLOCK(ph); 1036 } 1037 1038 /* 1039 * i_mdi_devinfo_create(): 1040 * create client device's devinfo node 1041 * Return Values: 1042 * dev_info 1043 * NULL 1044 * Notes: 1045 */ 1046 static dev_info_t * 1047 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1048 char **compatible, int ncompatible) 1049 { 1050 dev_info_t *cdip = NULL; 1051 1052 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1053 1054 /* Verify for duplicate entry */ 1055 cdip = i_mdi_devinfo_find(vh, name, guid); 1056 ASSERT(cdip == NULL); 1057 if (cdip) { 1058 cmn_err(CE_WARN, 1059 "i_mdi_devinfo_create: client dip %p already exists", 1060 (void *)cdip); 1061 } 1062 1063 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1064 if (cdip == NULL) 1065 goto fail; 1066 1067 /* 1068 * Create component type and Global unique identifier 1069 * properties 1070 */ 1071 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1072 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1073 goto fail; 1074 } 1075 1076 /* Decorate the node with compatible property */ 1077 if (compatible && 1078 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1079 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1080 goto fail; 1081 } 1082 1083 return (cdip); 1084 1085 fail: 1086 if (cdip) { 1087 (void) ndi_prop_remove_all(cdip); 1088 (void) ndi_devi_free(cdip); 1089 } 1090 return (NULL); 1091 } 1092 1093 /* 1094 * i_mdi_devinfo_find(): 1095 * Find a matching devinfo node for given client node name 1096 * and its guid. 1097 * Return Values: 1098 * Handle to a dev_info node or NULL 1099 */ 1100 static dev_info_t * 1101 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1102 { 1103 char *data; 1104 dev_info_t *cdip = NULL; 1105 dev_info_t *ndip = NULL; 1106 int circular; 1107 1108 ndi_devi_enter(vh->vh_dip, &circular); 1109 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1110 while ((cdip = ndip) != NULL) { 1111 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1112 1113 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1114 continue; 1115 } 1116 1117 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1118 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1119 &data) != DDI_PROP_SUCCESS) { 1120 continue; 1121 } 1122 1123 if (strcmp(data, guid) != 0) { 1124 ddi_prop_free(data); 1125 continue; 1126 } 1127 ddi_prop_free(data); 1128 break; 1129 } 1130 ndi_devi_exit(vh->vh_dip, circular); 1131 return (cdip); 1132 } 1133 1134 /* 1135 * i_mdi_devinfo_remove(): 1136 * Remove a client device node 1137 */ 1138 static int 1139 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1140 { 1141 int rv = MDI_SUCCESS; 1142 1143 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1144 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1145 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1146 if (rv != NDI_SUCCESS) { 1147 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1148 " failed. cdip = %p\n", (void *)cdip)); 1149 } 1150 /* 1151 * Convert to MDI error code 1152 */ 1153 switch (rv) { 1154 case NDI_SUCCESS: 1155 rv = MDI_SUCCESS; 1156 break; 1157 case NDI_BUSY: 1158 rv = MDI_BUSY; 1159 break; 1160 default: 1161 rv = MDI_FAILURE; 1162 break; 1163 } 1164 } 1165 return (rv); 1166 } 1167 1168 /* 1169 * i_devi_get_client() 1170 * Utility function to get mpxio component extensions 1171 */ 1172 static mdi_client_t * 1173 i_devi_get_client(dev_info_t *cdip) 1174 { 1175 mdi_client_t *ct = NULL; 1176 1177 if (MDI_CLIENT(cdip)) { 1178 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1179 } 1180 return (ct); 1181 } 1182 1183 /* 1184 * i_mdi_is_child_present(): 1185 * Search for the presence of client device dev_info node 1186 */ 1187 static int 1188 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1189 { 1190 int rv = MDI_FAILURE; 1191 struct dev_info *dip; 1192 int circular; 1193 1194 ndi_devi_enter(vdip, &circular); 1195 dip = DEVI(vdip)->devi_child; 1196 while (dip) { 1197 if (dip == DEVI(cdip)) { 1198 rv = MDI_SUCCESS; 1199 break; 1200 } 1201 dip = dip->devi_sibling; 1202 } 1203 ndi_devi_exit(vdip, circular); 1204 return (rv); 1205 } 1206 1207 1208 /* 1209 * i_mdi_client_lock(): 1210 * Grab client component lock 1211 * Return Values: 1212 * None 1213 * Note: 1214 * The default locking order is: 1215 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1216 * But there are number of situations where locks need to be 1217 * grabbed in reverse order. This routine implements try and lock 1218 * mechanism depending on the requested parameter option. 1219 */ 1220 static void 1221 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1222 { 1223 if (pip) { 1224 /* 1225 * Reverse locking is requested. 1226 */ 1227 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1228 /* 1229 * tryenter failed. Try to grab again 1230 * after a small delay 1231 */ 1232 MDI_PI_HOLD(pip); 1233 MDI_PI_UNLOCK(pip); 1234 delay(1); 1235 MDI_PI_LOCK(pip); 1236 MDI_PI_RELE(pip); 1237 } 1238 } else { 1239 MDI_CLIENT_LOCK(ct); 1240 } 1241 } 1242 1243 /* 1244 * i_mdi_client_unlock(): 1245 * Unlock a client component 1246 */ 1247 static void 1248 i_mdi_client_unlock(mdi_client_t *ct) 1249 { 1250 MDI_CLIENT_UNLOCK(ct); 1251 } 1252 1253 /* 1254 * i_mdi_client_alloc(): 1255 * Allocate and initialize a client structure. Caller should 1256 * hold the vhci client lock. 1257 * Return Values: 1258 * Handle to a client component 1259 */ 1260 /*ARGSUSED*/ 1261 static mdi_client_t * 1262 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1263 { 1264 mdi_client_t *ct; 1265 1266 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1267 1268 /* 1269 * Allocate and initialize a component structure. 1270 */ 1271 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1272 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1273 ct->ct_hnext = NULL; 1274 ct->ct_hprev = NULL; 1275 ct->ct_dip = NULL; 1276 ct->ct_vhci = vh; 1277 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1278 (void) strcpy(ct->ct_drvname, name); 1279 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1280 (void) strcpy(ct->ct_guid, lguid); 1281 ct->ct_cprivate = NULL; 1282 ct->ct_vprivate = NULL; 1283 ct->ct_flags = 0; 1284 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1285 MDI_CLIENT_LOCK(ct); 1286 MDI_CLIENT_SET_OFFLINE(ct); 1287 MDI_CLIENT_SET_DETACH(ct); 1288 MDI_CLIENT_SET_POWER_UP(ct); 1289 MDI_CLIENT_UNLOCK(ct); 1290 ct->ct_failover_flags = 0; 1291 ct->ct_failover_status = 0; 1292 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1293 ct->ct_unstable = 0; 1294 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1295 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1296 ct->ct_lb = vh->vh_lb; 1297 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1298 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1299 ct->ct_path_count = 0; 1300 ct->ct_path_head = NULL; 1301 ct->ct_path_tail = NULL; 1302 ct->ct_path_last = NULL; 1303 1304 /* 1305 * Add this client component to our client hash queue 1306 */ 1307 i_mdi_client_enlist_table(vh, ct); 1308 return (ct); 1309 } 1310 1311 /* 1312 * i_mdi_client_enlist_table(): 1313 * Attach the client device to the client hash table. Caller 1314 * should hold the vhci client lock. 1315 */ 1316 static void 1317 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1318 { 1319 int index; 1320 struct client_hash *head; 1321 1322 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1323 1324 index = i_mdi_get_hash_key(ct->ct_guid); 1325 head = &vh->vh_client_table[index]; 1326 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1327 head->ct_hash_head = ct; 1328 head->ct_hash_count++; 1329 vh->vh_client_count++; 1330 } 1331 1332 /* 1333 * i_mdi_client_delist_table(): 1334 * Attach the client device to the client hash table. 1335 * Caller should hold the vhci client lock. 1336 */ 1337 static void 1338 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1339 { 1340 int index; 1341 char *guid; 1342 struct client_hash *head; 1343 mdi_client_t *next; 1344 mdi_client_t *last; 1345 1346 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1347 1348 guid = ct->ct_guid; 1349 index = i_mdi_get_hash_key(guid); 1350 head = &vh->vh_client_table[index]; 1351 1352 last = NULL; 1353 next = (mdi_client_t *)head->ct_hash_head; 1354 while (next != NULL) { 1355 if (next == ct) { 1356 break; 1357 } 1358 last = next; 1359 next = next->ct_hnext; 1360 } 1361 1362 if (next) { 1363 head->ct_hash_count--; 1364 if (last == NULL) { 1365 head->ct_hash_head = ct->ct_hnext; 1366 } else { 1367 last->ct_hnext = ct->ct_hnext; 1368 } 1369 ct->ct_hnext = NULL; 1370 vh->vh_client_count--; 1371 } 1372 } 1373 1374 1375 /* 1376 * i_mdi_client_free(): 1377 * Free a client component 1378 */ 1379 static int 1380 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1381 { 1382 int rv = MDI_SUCCESS; 1383 int flags = ct->ct_flags; 1384 dev_info_t *cdip; 1385 dev_info_t *vdip; 1386 1387 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1388 1389 vdip = vh->vh_dip; 1390 cdip = ct->ct_dip; 1391 1392 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1393 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1394 DEVI(cdip)->devi_mdi_client = NULL; 1395 1396 /* 1397 * Clear out back ref. to dev_info_t node 1398 */ 1399 ct->ct_dip = NULL; 1400 1401 /* 1402 * Remove this client from our hash queue 1403 */ 1404 i_mdi_client_delist_table(vh, ct); 1405 1406 /* 1407 * Uninitialize and free the component 1408 */ 1409 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1410 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1411 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1412 cv_destroy(&ct->ct_failover_cv); 1413 cv_destroy(&ct->ct_unstable_cv); 1414 cv_destroy(&ct->ct_powerchange_cv); 1415 mutex_destroy(&ct->ct_mutex); 1416 kmem_free(ct, sizeof (*ct)); 1417 1418 if (cdip != NULL) { 1419 MDI_VHCI_CLIENT_UNLOCK(vh); 1420 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1421 MDI_VHCI_CLIENT_LOCK(vh); 1422 } 1423 return (rv); 1424 } 1425 1426 /* 1427 * i_mdi_client_find(): 1428 * Find the client structure corresponding to a given guid 1429 * Caller should hold the vhci client lock. 1430 */ 1431 static mdi_client_t * 1432 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1433 { 1434 int index; 1435 struct client_hash *head; 1436 mdi_client_t *ct; 1437 1438 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1439 1440 index = i_mdi_get_hash_key(guid); 1441 head = &vh->vh_client_table[index]; 1442 1443 ct = head->ct_hash_head; 1444 while (ct != NULL) { 1445 if (strcmp(ct->ct_guid, guid) == 0 && 1446 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1447 break; 1448 } 1449 ct = ct->ct_hnext; 1450 } 1451 return (ct); 1452 } 1453 1454 /* 1455 * i_mdi_client_update_state(): 1456 * Compute and update client device state 1457 * Notes: 1458 * A client device can be in any of three possible states: 1459 * 1460 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1461 * one online/standby paths. Can tolerate failures. 1462 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1463 * no alternate paths available as standby. A failure on the online 1464 * would result in loss of access to device data. 1465 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1466 * no paths available to access the device. 1467 */ 1468 static void 1469 i_mdi_client_update_state(mdi_client_t *ct) 1470 { 1471 int state; 1472 1473 ASSERT(MDI_CLIENT_LOCKED(ct)); 1474 state = i_mdi_client_compute_state(ct, NULL); 1475 MDI_CLIENT_SET_STATE(ct, state); 1476 } 1477 1478 /* 1479 * i_mdi_client_compute_state(): 1480 * Compute client device state 1481 * 1482 * mdi_phci_t * Pointer to pHCI structure which should 1483 * while computing the new value. Used by 1484 * i_mdi_phci_offline() to find the new 1485 * client state after DR of a pHCI. 1486 */ 1487 static int 1488 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1489 { 1490 int state; 1491 int online_count = 0; 1492 int standby_count = 0; 1493 mdi_pathinfo_t *pip, *next; 1494 1495 ASSERT(MDI_CLIENT_LOCKED(ct)); 1496 pip = ct->ct_path_head; 1497 while (pip != NULL) { 1498 MDI_PI_LOCK(pip); 1499 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1500 if (MDI_PI(pip)->pi_phci == ph) { 1501 MDI_PI_UNLOCK(pip); 1502 pip = next; 1503 continue; 1504 } 1505 1506 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1507 == MDI_PATHINFO_STATE_ONLINE) 1508 online_count++; 1509 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1510 == MDI_PATHINFO_STATE_STANDBY) 1511 standby_count++; 1512 MDI_PI_UNLOCK(pip); 1513 pip = next; 1514 } 1515 1516 if (online_count == 0) { 1517 if (standby_count == 0) { 1518 state = MDI_CLIENT_STATE_FAILED; 1519 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1520 " ct = %p\n", (void *)ct)); 1521 } else if (standby_count == 1) { 1522 state = MDI_CLIENT_STATE_DEGRADED; 1523 } else { 1524 state = MDI_CLIENT_STATE_OPTIMAL; 1525 } 1526 } else if (online_count == 1) { 1527 if (standby_count == 0) { 1528 state = MDI_CLIENT_STATE_DEGRADED; 1529 } else { 1530 state = MDI_CLIENT_STATE_OPTIMAL; 1531 } 1532 } else { 1533 state = MDI_CLIENT_STATE_OPTIMAL; 1534 } 1535 return (state); 1536 } 1537 1538 /* 1539 * i_mdi_client2devinfo(): 1540 * Utility function 1541 */ 1542 dev_info_t * 1543 i_mdi_client2devinfo(mdi_client_t *ct) 1544 { 1545 return (ct->ct_dip); 1546 } 1547 1548 /* 1549 * mdi_client_path2_devinfo(): 1550 * Given the parent devinfo and child devfs pathname, search for 1551 * a valid devfs node handle. 1552 */ 1553 dev_info_t * 1554 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1555 { 1556 dev_info_t *cdip = NULL; 1557 dev_info_t *ndip = NULL; 1558 char *temp_pathname; 1559 int circular; 1560 1561 /* 1562 * Allocate temp buffer 1563 */ 1564 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1565 1566 /* 1567 * Lock parent against changes 1568 */ 1569 ndi_devi_enter(vdip, &circular); 1570 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1571 while ((cdip = ndip) != NULL) { 1572 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1573 1574 *temp_pathname = '\0'; 1575 (void) ddi_pathname(cdip, temp_pathname); 1576 if (strcmp(temp_pathname, pathname) == 0) { 1577 break; 1578 } 1579 } 1580 /* 1581 * Release devinfo lock 1582 */ 1583 ndi_devi_exit(vdip, circular); 1584 1585 /* 1586 * Free the temp buffer 1587 */ 1588 kmem_free(temp_pathname, MAXPATHLEN); 1589 return (cdip); 1590 } 1591 1592 /* 1593 * mdi_client_get_path_count(): 1594 * Utility function to get number of path information nodes 1595 * associated with a given client device. 1596 */ 1597 int 1598 mdi_client_get_path_count(dev_info_t *cdip) 1599 { 1600 mdi_client_t *ct; 1601 int count = 0; 1602 1603 ct = i_devi_get_client(cdip); 1604 if (ct != NULL) { 1605 count = ct->ct_path_count; 1606 } 1607 return (count); 1608 } 1609 1610 1611 /* 1612 * i_mdi_get_hash_key(): 1613 * Create a hash using strings as keys 1614 * 1615 */ 1616 static int 1617 i_mdi_get_hash_key(char *str) 1618 { 1619 uint32_t g, hash = 0; 1620 char *p; 1621 1622 for (p = str; *p != '\0'; p++) { 1623 g = *p; 1624 hash += g; 1625 } 1626 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1627 } 1628 1629 /* 1630 * mdi_get_lb_policy(): 1631 * Get current load balancing policy for a given client device 1632 */ 1633 client_lb_t 1634 mdi_get_lb_policy(dev_info_t *cdip) 1635 { 1636 client_lb_t lb = LOAD_BALANCE_NONE; 1637 mdi_client_t *ct; 1638 1639 ct = i_devi_get_client(cdip); 1640 if (ct != NULL) { 1641 lb = ct->ct_lb; 1642 } 1643 return (lb); 1644 } 1645 1646 /* 1647 * mdi_set_lb_region_size(): 1648 * Set current region size for the load-balance 1649 */ 1650 int 1651 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1652 { 1653 mdi_client_t *ct; 1654 int rv = MDI_FAILURE; 1655 1656 ct = i_devi_get_client(cdip); 1657 if (ct != NULL && ct->ct_lb_args != NULL) { 1658 ct->ct_lb_args->region_size = region_size; 1659 rv = MDI_SUCCESS; 1660 } 1661 return (rv); 1662 } 1663 1664 /* 1665 * mdi_Set_lb_policy(): 1666 * Set current load balancing policy for a given client device 1667 */ 1668 int 1669 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1670 { 1671 mdi_client_t *ct; 1672 int rv = MDI_FAILURE; 1673 1674 ct = i_devi_get_client(cdip); 1675 if (ct != NULL) { 1676 ct->ct_lb = lb; 1677 rv = MDI_SUCCESS; 1678 } 1679 return (rv); 1680 } 1681 1682 /* 1683 * mdi_failover(): 1684 * failover function called by the vHCI drivers to initiate 1685 * a failover operation. This is typically due to non-availability 1686 * of online paths to route I/O requests. Failover can be 1687 * triggered through user application also. 1688 * 1689 * The vHCI driver calls mdi_failover() to initiate a failover 1690 * operation. mdi_failover() calls back into the vHCI driver's 1691 * vo_failover() entry point to perform the actual failover 1692 * operation. The reason for requiring the vHCI driver to 1693 * initiate failover by calling mdi_failover(), instead of directly 1694 * executing vo_failover() itself, is to ensure that the mdi 1695 * framework can keep track of the client state properly. 1696 * Additionally, mdi_failover() provides as a convenience the 1697 * option of performing the failover operation synchronously or 1698 * asynchronously 1699 * 1700 * Upon successful completion of the failover operation, the 1701 * paths that were previously ONLINE will be in the STANDBY state, 1702 * and the newly activated paths will be in the ONLINE state. 1703 * 1704 * The flags modifier determines whether the activation is done 1705 * synchronously: MDI_FAILOVER_SYNC 1706 * Return Values: 1707 * MDI_SUCCESS 1708 * MDI_FAILURE 1709 * MDI_BUSY 1710 */ 1711 /*ARGSUSED*/ 1712 int 1713 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1714 { 1715 int rv; 1716 mdi_client_t *ct; 1717 1718 ct = i_devi_get_client(cdip); 1719 ASSERT(ct != NULL); 1720 if (ct == NULL) { 1721 /* cdip is not a valid client device. Nothing more to do. */ 1722 return (MDI_FAILURE); 1723 } 1724 1725 MDI_CLIENT_LOCK(ct); 1726 1727 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1728 /* A path to the client is being freed */ 1729 MDI_CLIENT_UNLOCK(ct); 1730 return (MDI_BUSY); 1731 } 1732 1733 1734 if (MDI_CLIENT_IS_FAILED(ct)) { 1735 /* 1736 * Client is in failed state. Nothing more to do. 1737 */ 1738 MDI_CLIENT_UNLOCK(ct); 1739 return (MDI_FAILURE); 1740 } 1741 1742 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1743 /* 1744 * Failover is already in progress; return BUSY 1745 */ 1746 MDI_CLIENT_UNLOCK(ct); 1747 return (MDI_BUSY); 1748 } 1749 /* 1750 * Make sure that mdi_pathinfo node state changes are processed. 1751 * We do not allow failovers to progress while client path state 1752 * changes are in progress 1753 */ 1754 if (ct->ct_unstable) { 1755 if (flags == MDI_FAILOVER_ASYNC) { 1756 MDI_CLIENT_UNLOCK(ct); 1757 return (MDI_BUSY); 1758 } else { 1759 while (ct->ct_unstable) 1760 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1761 } 1762 } 1763 1764 /* 1765 * Client device is in stable state. Before proceeding, perform sanity 1766 * checks again. 1767 */ 1768 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1769 (!i_ddi_devi_attached(ct->ct_dip))) { 1770 /* 1771 * Client is in failed state. Nothing more to do. 1772 */ 1773 MDI_CLIENT_UNLOCK(ct); 1774 return (MDI_FAILURE); 1775 } 1776 1777 /* 1778 * Set the client state as failover in progress. 1779 */ 1780 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1781 ct->ct_failover_flags = flags; 1782 MDI_CLIENT_UNLOCK(ct); 1783 1784 if (flags == MDI_FAILOVER_ASYNC) { 1785 /* 1786 * Submit the initiate failover request via CPR safe 1787 * taskq threads. 1788 */ 1789 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1790 ct, KM_SLEEP); 1791 return (MDI_ACCEPT); 1792 } else { 1793 /* 1794 * Synchronous failover mode. Typically invoked from the user 1795 * land. 1796 */ 1797 rv = i_mdi_failover(ct); 1798 } 1799 return (rv); 1800 } 1801 1802 /* 1803 * i_mdi_failover(): 1804 * internal failover function. Invokes vHCI drivers failover 1805 * callback function and process the failover status 1806 * Return Values: 1807 * None 1808 * 1809 * Note: A client device in failover state can not be detached or freed. 1810 */ 1811 static int 1812 i_mdi_failover(void *arg) 1813 { 1814 int rv = MDI_SUCCESS; 1815 mdi_client_t *ct = (mdi_client_t *)arg; 1816 mdi_vhci_t *vh = ct->ct_vhci; 1817 1818 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1819 1820 if (vh->vh_ops->vo_failover != NULL) { 1821 /* 1822 * Call vHCI drivers callback routine 1823 */ 1824 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1825 ct->ct_failover_flags); 1826 } 1827 1828 MDI_CLIENT_LOCK(ct); 1829 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1830 1831 /* 1832 * Save the failover return status 1833 */ 1834 ct->ct_failover_status = rv; 1835 1836 /* 1837 * As a result of failover, client status would have been changed. 1838 * Update the client state and wake up anyone waiting on this client 1839 * device. 1840 */ 1841 i_mdi_client_update_state(ct); 1842 1843 cv_broadcast(&ct->ct_failover_cv); 1844 MDI_CLIENT_UNLOCK(ct); 1845 return (rv); 1846 } 1847 1848 /* 1849 * Load balancing is logical block. 1850 * IOs within the range described by region_size 1851 * would go on the same path. This would improve the 1852 * performance by cache-hit on some of the RAID devices. 1853 * Search only for online paths(At some point we 1854 * may want to balance across target ports). 1855 * If no paths are found then default to round-robin. 1856 */ 1857 static int 1858 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1859 { 1860 int path_index = -1; 1861 int online_path_count = 0; 1862 int online_nonpref_path_count = 0; 1863 int region_size = ct->ct_lb_args->region_size; 1864 mdi_pathinfo_t *pip; 1865 mdi_pathinfo_t *next; 1866 int preferred, path_cnt; 1867 1868 pip = ct->ct_path_head; 1869 while (pip) { 1870 MDI_PI_LOCK(pip); 1871 if (MDI_PI(pip)->pi_state == 1872 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1873 online_path_count++; 1874 } else if (MDI_PI(pip)->pi_state == 1875 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1876 online_nonpref_path_count++; 1877 } 1878 next = (mdi_pathinfo_t *) 1879 MDI_PI(pip)->pi_client_link; 1880 MDI_PI_UNLOCK(pip); 1881 pip = next; 1882 } 1883 /* if found any online/preferred then use this type */ 1884 if (online_path_count > 0) { 1885 path_cnt = online_path_count; 1886 preferred = 1; 1887 } else if (online_nonpref_path_count > 0) { 1888 path_cnt = online_nonpref_path_count; 1889 preferred = 0; 1890 } else { 1891 path_cnt = 0; 1892 } 1893 if (path_cnt) { 1894 path_index = (bp->b_blkno >> region_size) % path_cnt; 1895 pip = ct->ct_path_head; 1896 while (pip && path_index != -1) { 1897 MDI_PI_LOCK(pip); 1898 if (path_index == 0 && 1899 (MDI_PI(pip)->pi_state == 1900 MDI_PATHINFO_STATE_ONLINE) && 1901 MDI_PI(pip)->pi_preferred == preferred) { 1902 MDI_PI_HOLD(pip); 1903 MDI_PI_UNLOCK(pip); 1904 *ret_pip = pip; 1905 return (MDI_SUCCESS); 1906 } 1907 path_index --; 1908 next = (mdi_pathinfo_t *) 1909 MDI_PI(pip)->pi_client_link; 1910 MDI_PI_UNLOCK(pip); 1911 pip = next; 1912 } 1913 if (pip == NULL) { 1914 MDI_DEBUG(4, (CE_NOTE, NULL, 1915 "!lba %llx, no pip !!\n", 1916 bp->b_lblkno)); 1917 } else { 1918 MDI_DEBUG(4, (CE_NOTE, NULL, 1919 "!lba %llx, no pip for path_index, " 1920 "pip %p\n", bp->b_lblkno, (void *)pip)); 1921 } 1922 } 1923 return (MDI_FAILURE); 1924 } 1925 1926 /* 1927 * mdi_select_path(): 1928 * select a path to access a client device. 1929 * 1930 * mdi_select_path() function is called by the vHCI drivers to 1931 * select a path to route the I/O request to. The caller passes 1932 * the block I/O data transfer structure ("buf") as one of the 1933 * parameters. The mpxio framework uses the buf structure 1934 * contents to maintain per path statistics (total I/O size / 1935 * count pending). If more than one online paths are available to 1936 * select, the framework automatically selects a suitable path 1937 * for routing I/O request. If a failover operation is active for 1938 * this client device the call shall be failed with MDI_BUSY error 1939 * code. 1940 * 1941 * By default this function returns a suitable path in online 1942 * state based on the current load balancing policy. Currently 1943 * we support LOAD_BALANCE_NONE (Previously selected online path 1944 * will continue to be used till the path is usable) and 1945 * LOAD_BALANCE_RR (Online paths will be selected in a round 1946 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1947 * based on the logical block). The load balancing 1948 * through vHCI drivers configuration file (driver.conf). 1949 * 1950 * vHCI drivers may override this default behavior by specifying 1951 * appropriate flags. The meaning of the thrid argument depends 1952 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1953 * then the argument is the "path instance" of the path to select. 1954 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1955 * "start_pip". A non NULL "start_pip" is the starting point to 1956 * walk and find the next appropriate path. The following values 1957 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1958 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1959 * STANDBY path). 1960 * 1961 * The non-standard behavior is used by the scsi_vhci driver, 1962 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1963 * attach of client devices (to avoid an unnecessary failover 1964 * when the STANDBY path comes up first), during failover 1965 * (to activate a STANDBY path as ONLINE). 1966 * 1967 * The selected path is returned in a a mdi_hold_path() state 1968 * (pi_ref_cnt). Caller should release the hold by calling 1969 * mdi_rele_path(). 1970 * 1971 * Return Values: 1972 * MDI_SUCCESS - Completed successfully 1973 * MDI_BUSY - Client device is busy failing over 1974 * MDI_NOPATH - Client device is online, but no valid path are 1975 * available to access this client device 1976 * MDI_FAILURE - Invalid client device or state 1977 * MDI_DEVI_ONLINING 1978 * - Client device (struct dev_info state) is in 1979 * onlining state. 1980 */ 1981 1982 /*ARGSUSED*/ 1983 int 1984 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1985 void *arg, mdi_pathinfo_t **ret_pip) 1986 { 1987 mdi_client_t *ct; 1988 mdi_pathinfo_t *pip; 1989 mdi_pathinfo_t *next; 1990 mdi_pathinfo_t *head; 1991 mdi_pathinfo_t *start; 1992 client_lb_t lbp; /* load balancing policy */ 1993 int sb = 1; /* standard behavior */ 1994 int preferred = 1; /* preferred path */ 1995 int cond, cont = 1; 1996 int retry = 0; 1997 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 1998 int path_instance; /* request specific path instance */ 1999 2000 /* determine type of arg based on flags */ 2001 if (flags & MDI_SELECT_PATH_INSTANCE) { 2002 flags &= ~MDI_SELECT_PATH_INSTANCE; 2003 path_instance = (int)(intptr_t)arg; 2004 start_pip = NULL; 2005 } else { 2006 path_instance = 0; 2007 start_pip = (mdi_pathinfo_t *)arg; 2008 } 2009 2010 if (flags != 0) { 2011 /* 2012 * disable default behavior 2013 */ 2014 sb = 0; 2015 } 2016 2017 *ret_pip = NULL; 2018 ct = i_devi_get_client(cdip); 2019 if (ct == NULL) { 2020 /* mdi extensions are NULL, Nothing more to do */ 2021 return (MDI_FAILURE); 2022 } 2023 2024 MDI_CLIENT_LOCK(ct); 2025 2026 if (sb) { 2027 if (MDI_CLIENT_IS_FAILED(ct)) { 2028 /* 2029 * Client is not ready to accept any I/O requests. 2030 * Fail this request. 2031 */ 2032 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2033 "client state offline ct = %p\n", (void *)ct)); 2034 MDI_CLIENT_UNLOCK(ct); 2035 return (MDI_FAILURE); 2036 } 2037 2038 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2039 /* 2040 * Check for Failover is in progress. If so tell the 2041 * caller that this device is busy. 2042 */ 2043 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2044 "client failover in progress ct = %p\n", 2045 (void *)ct)); 2046 MDI_CLIENT_UNLOCK(ct); 2047 return (MDI_BUSY); 2048 } 2049 2050 /* 2051 * Check to see whether the client device is attached. 2052 * If not so, let the vHCI driver manually select a path 2053 * (standby) and let the probe/attach process to continue. 2054 */ 2055 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2056 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2057 "ct = %p\n", (void *)ct)); 2058 MDI_CLIENT_UNLOCK(ct); 2059 return (MDI_DEVI_ONLINING); 2060 } 2061 } 2062 2063 /* 2064 * Cache in the client list head. If head of the list is NULL 2065 * return MDI_NOPATH 2066 */ 2067 head = ct->ct_path_head; 2068 if (head == NULL) { 2069 MDI_CLIENT_UNLOCK(ct); 2070 return (MDI_NOPATH); 2071 } 2072 2073 /* Caller is specifying a specific pathinfo path by path_instance */ 2074 if (path_instance) { 2075 /* search for pathinfo with correct path_instance */ 2076 for (pip = head; 2077 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2078 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2079 ; 2080 2081 /* If path can't be selected then MDI_FAILURE is returned. */ 2082 if (pip == NULL) { 2083 MDI_CLIENT_UNLOCK(ct); 2084 return (MDI_FAILURE); 2085 } 2086 2087 /* verify state of path */ 2088 MDI_PI_LOCK(pip); 2089 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2090 MDI_PI_UNLOCK(pip); 2091 MDI_CLIENT_UNLOCK(ct); 2092 return (MDI_FAILURE); 2093 } 2094 2095 /* 2096 * Return the path in hold state. Caller should release the 2097 * lock by calling mdi_rele_path() 2098 */ 2099 MDI_PI_HOLD(pip); 2100 MDI_PI_UNLOCK(pip); 2101 ct->ct_path_last = pip; 2102 *ret_pip = pip; 2103 MDI_CLIENT_UNLOCK(ct); 2104 return (MDI_SUCCESS); 2105 } 2106 2107 /* 2108 * for non default behavior, bypass current 2109 * load balancing policy and always use LOAD_BALANCE_RR 2110 * except that the start point will be adjusted based 2111 * on the provided start_pip 2112 */ 2113 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2114 2115 switch (lbp) { 2116 case LOAD_BALANCE_NONE: 2117 /* 2118 * Load balancing is None or Alternate path mode 2119 * Start looking for a online mdi_pathinfo node starting from 2120 * last known selected path 2121 */ 2122 preferred = 1; 2123 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2124 if (pip == NULL) { 2125 pip = head; 2126 } 2127 start = pip; 2128 do { 2129 MDI_PI_LOCK(pip); 2130 /* 2131 * No need to explicitly check if the path is disabled. 2132 * Since we are checking for state == ONLINE and the 2133 * same veriable is used for DISABLE/ENABLE information. 2134 */ 2135 if ((MDI_PI(pip)->pi_state == 2136 MDI_PATHINFO_STATE_ONLINE) && 2137 preferred == MDI_PI(pip)->pi_preferred) { 2138 /* 2139 * Return the path in hold state. Caller should 2140 * release the lock by calling mdi_rele_path() 2141 */ 2142 MDI_PI_HOLD(pip); 2143 MDI_PI_UNLOCK(pip); 2144 ct->ct_path_last = pip; 2145 *ret_pip = pip; 2146 MDI_CLIENT_UNLOCK(ct); 2147 return (MDI_SUCCESS); 2148 } 2149 2150 /* 2151 * Path is busy. 2152 */ 2153 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2154 MDI_PI_IS_TRANSIENT(pip)) 2155 retry = 1; 2156 /* 2157 * Keep looking for a next available online path 2158 */ 2159 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2160 if (next == NULL) { 2161 next = head; 2162 } 2163 MDI_PI_UNLOCK(pip); 2164 pip = next; 2165 if (start == pip && preferred) { 2166 preferred = 0; 2167 } else if (start == pip && !preferred) { 2168 cont = 0; 2169 } 2170 } while (cont); 2171 break; 2172 2173 case LOAD_BALANCE_LBA: 2174 /* 2175 * Make sure we are looking 2176 * for an online path. Otherwise, if it is for a STANDBY 2177 * path request, it will go through and fetch an ONLINE 2178 * path which is not desirable. 2179 */ 2180 if ((ct->ct_lb_args != NULL) && 2181 (ct->ct_lb_args->region_size) && bp && 2182 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2183 if (i_mdi_lba_lb(ct, ret_pip, bp) 2184 == MDI_SUCCESS) { 2185 MDI_CLIENT_UNLOCK(ct); 2186 return (MDI_SUCCESS); 2187 } 2188 } 2189 /* FALLTHROUGH */ 2190 case LOAD_BALANCE_RR: 2191 /* 2192 * Load balancing is Round Robin. Start looking for a online 2193 * mdi_pathinfo node starting from last known selected path 2194 * as the start point. If override flags are specified, 2195 * process accordingly. 2196 * If the search is already in effect(start_pip not null), 2197 * then lets just use the same path preference to continue the 2198 * traversal. 2199 */ 2200 2201 if (start_pip != NULL) { 2202 preferred = MDI_PI(start_pip)->pi_preferred; 2203 } else { 2204 preferred = 1; 2205 } 2206 2207 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2208 if (start == NULL) { 2209 pip = head; 2210 } else { 2211 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2212 if (pip == NULL) { 2213 if ( flags & MDI_SELECT_NO_PREFERRED) { 2214 /* 2215 * Return since we hit the end of list 2216 */ 2217 MDI_CLIENT_UNLOCK(ct); 2218 return (MDI_NOPATH); 2219 } 2220 2221 if (!sb) { 2222 if (preferred == 0) { 2223 /* 2224 * Looks like we have completed 2225 * the traversal as preferred 2226 * value is 0. Time to bail out. 2227 */ 2228 *ret_pip = NULL; 2229 MDI_CLIENT_UNLOCK(ct); 2230 return (MDI_NOPATH); 2231 } else { 2232 /* 2233 * Looks like we reached the 2234 * end of the list. Lets enable 2235 * traversal of non preferred 2236 * paths. 2237 */ 2238 preferred = 0; 2239 } 2240 } 2241 pip = head; 2242 } 2243 } 2244 start = pip; 2245 do { 2246 MDI_PI_LOCK(pip); 2247 if (sb) { 2248 cond = ((MDI_PI(pip)->pi_state == 2249 MDI_PATHINFO_STATE_ONLINE && 2250 MDI_PI(pip)->pi_preferred == 2251 preferred) ? 1 : 0); 2252 } else { 2253 if (flags == MDI_SELECT_ONLINE_PATH) { 2254 cond = ((MDI_PI(pip)->pi_state == 2255 MDI_PATHINFO_STATE_ONLINE && 2256 MDI_PI(pip)->pi_preferred == 2257 preferred) ? 1 : 0); 2258 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2259 cond = ((MDI_PI(pip)->pi_state == 2260 MDI_PATHINFO_STATE_STANDBY && 2261 MDI_PI(pip)->pi_preferred == 2262 preferred) ? 1 : 0); 2263 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2264 MDI_SELECT_STANDBY_PATH)) { 2265 cond = (((MDI_PI(pip)->pi_state == 2266 MDI_PATHINFO_STATE_ONLINE || 2267 (MDI_PI(pip)->pi_state == 2268 MDI_PATHINFO_STATE_STANDBY)) && 2269 MDI_PI(pip)->pi_preferred == 2270 preferred) ? 1 : 0); 2271 } else if (flags == 2272 (MDI_SELECT_STANDBY_PATH | 2273 MDI_SELECT_ONLINE_PATH | 2274 MDI_SELECT_USER_DISABLE_PATH)) { 2275 cond = (((MDI_PI(pip)->pi_state == 2276 MDI_PATHINFO_STATE_ONLINE || 2277 (MDI_PI(pip)->pi_state == 2278 MDI_PATHINFO_STATE_STANDBY) || 2279 (MDI_PI(pip)->pi_state == 2280 (MDI_PATHINFO_STATE_ONLINE| 2281 MDI_PATHINFO_STATE_USER_DISABLE)) || 2282 (MDI_PI(pip)->pi_state == 2283 (MDI_PATHINFO_STATE_STANDBY | 2284 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2285 MDI_PI(pip)->pi_preferred == 2286 preferred) ? 1 : 0); 2287 } else if (flags == 2288 (MDI_SELECT_STANDBY_PATH | 2289 MDI_SELECT_ONLINE_PATH | 2290 MDI_SELECT_NO_PREFERRED)) { 2291 cond = (((MDI_PI(pip)->pi_state == 2292 MDI_PATHINFO_STATE_ONLINE) || 2293 (MDI_PI(pip)->pi_state == 2294 MDI_PATHINFO_STATE_STANDBY)) 2295 ? 1 : 0); 2296 } else { 2297 cond = 0; 2298 } 2299 } 2300 /* 2301 * No need to explicitly check if the path is disabled. 2302 * Since we are checking for state == ONLINE and the 2303 * same veriable is used for DISABLE/ENABLE information. 2304 */ 2305 if (cond) { 2306 /* 2307 * Return the path in hold state. Caller should 2308 * release the lock by calling mdi_rele_path() 2309 */ 2310 MDI_PI_HOLD(pip); 2311 MDI_PI_UNLOCK(pip); 2312 if (sb) 2313 ct->ct_path_last = pip; 2314 *ret_pip = pip; 2315 MDI_CLIENT_UNLOCK(ct); 2316 return (MDI_SUCCESS); 2317 } 2318 /* 2319 * Path is busy. 2320 */ 2321 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2322 MDI_PI_IS_TRANSIENT(pip)) 2323 retry = 1; 2324 2325 /* 2326 * Keep looking for a next available online path 2327 */ 2328 do_again: 2329 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2330 if (next == NULL) { 2331 if ( flags & MDI_SELECT_NO_PREFERRED) { 2332 /* 2333 * Bail out since we hit the end of list 2334 */ 2335 MDI_PI_UNLOCK(pip); 2336 break; 2337 } 2338 2339 if (!sb) { 2340 if (preferred == 1) { 2341 /* 2342 * Looks like we reached the 2343 * end of the list. Lets enable 2344 * traversal of non preferred 2345 * paths. 2346 */ 2347 preferred = 0; 2348 next = head; 2349 } else { 2350 /* 2351 * We have done both the passes 2352 * Preferred as well as for 2353 * Non-preferred. Bail out now. 2354 */ 2355 cont = 0; 2356 } 2357 } else { 2358 /* 2359 * Standard behavior case. 2360 */ 2361 next = head; 2362 } 2363 } 2364 MDI_PI_UNLOCK(pip); 2365 if (cont == 0) { 2366 break; 2367 } 2368 pip = next; 2369 2370 if (!sb) { 2371 /* 2372 * We need to handle the selection of 2373 * non-preferred path in the following 2374 * case: 2375 * 2376 * +------+ +------+ +------+ +-----+ 2377 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2378 * +------+ +------+ +------+ +-----+ 2379 * 2380 * If we start the search with B, we need to 2381 * skip beyond B to pick C which is non - 2382 * preferred in the second pass. The following 2383 * test, if true, will allow us to skip over 2384 * the 'start'(B in the example) to select 2385 * other non preferred elements. 2386 */ 2387 if ((start_pip != NULL) && (start_pip == pip) && 2388 (MDI_PI(start_pip)->pi_preferred 2389 != preferred)) { 2390 /* 2391 * try again after going past the start 2392 * pip 2393 */ 2394 MDI_PI_LOCK(pip); 2395 goto do_again; 2396 } 2397 } else { 2398 /* 2399 * Standard behavior case 2400 */ 2401 if (start == pip && preferred) { 2402 /* look for nonpreferred paths */ 2403 preferred = 0; 2404 } else if (start == pip && !preferred) { 2405 /* 2406 * Exit condition 2407 */ 2408 cont = 0; 2409 } 2410 } 2411 } while (cont); 2412 break; 2413 } 2414 2415 MDI_CLIENT_UNLOCK(ct); 2416 if (retry == 1) { 2417 return (MDI_BUSY); 2418 } else { 2419 return (MDI_NOPATH); 2420 } 2421 } 2422 2423 /* 2424 * For a client, return the next available path to any phci 2425 * 2426 * Note: 2427 * Caller should hold the branch's devinfo node to get a consistent 2428 * snap shot of the mdi_pathinfo nodes. 2429 * 2430 * Please note that even the list is stable the mdi_pathinfo 2431 * node state and properties are volatile. The caller should lock 2432 * and unlock the nodes by calling mdi_pi_lock() and 2433 * mdi_pi_unlock() functions to get a stable properties. 2434 * 2435 * If there is a need to use the nodes beyond the hold of the 2436 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2437 * need to be held against unexpected removal by calling 2438 * mdi_hold_path() and should be released by calling 2439 * mdi_rele_path() on completion. 2440 */ 2441 mdi_pathinfo_t * 2442 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2443 { 2444 mdi_client_t *ct; 2445 2446 if (!MDI_CLIENT(ct_dip)) 2447 return (NULL); 2448 2449 /* 2450 * Walk through client link 2451 */ 2452 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2453 ASSERT(ct != NULL); 2454 2455 if (pip == NULL) 2456 return ((mdi_pathinfo_t *)ct->ct_path_head); 2457 2458 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2459 } 2460 2461 /* 2462 * For a phci, return the next available path to any client 2463 * Note: ditto mdi_get_next_phci_path() 2464 */ 2465 mdi_pathinfo_t * 2466 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2467 { 2468 mdi_phci_t *ph; 2469 2470 if (!MDI_PHCI(ph_dip)) 2471 return (NULL); 2472 2473 /* 2474 * Walk through pHCI link 2475 */ 2476 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2477 ASSERT(ph != NULL); 2478 2479 if (pip == NULL) 2480 return ((mdi_pathinfo_t *)ph->ph_path_head); 2481 2482 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2483 } 2484 2485 /* 2486 * mdi_hold_path(): 2487 * Hold the mdi_pathinfo node against unwanted unexpected free. 2488 * Return Values: 2489 * None 2490 */ 2491 void 2492 mdi_hold_path(mdi_pathinfo_t *pip) 2493 { 2494 if (pip) { 2495 MDI_PI_LOCK(pip); 2496 MDI_PI_HOLD(pip); 2497 MDI_PI_UNLOCK(pip); 2498 } 2499 } 2500 2501 2502 /* 2503 * mdi_rele_path(): 2504 * Release the mdi_pathinfo node which was selected 2505 * through mdi_select_path() mechanism or manually held by 2506 * calling mdi_hold_path(). 2507 * Return Values: 2508 * None 2509 */ 2510 void 2511 mdi_rele_path(mdi_pathinfo_t *pip) 2512 { 2513 if (pip) { 2514 MDI_PI_LOCK(pip); 2515 MDI_PI_RELE(pip); 2516 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2517 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2518 } 2519 MDI_PI_UNLOCK(pip); 2520 } 2521 } 2522 2523 /* 2524 * mdi_pi_lock(): 2525 * Lock the mdi_pathinfo node. 2526 * Note: 2527 * The caller should release the lock by calling mdi_pi_unlock() 2528 */ 2529 void 2530 mdi_pi_lock(mdi_pathinfo_t *pip) 2531 { 2532 ASSERT(pip != NULL); 2533 if (pip) { 2534 MDI_PI_LOCK(pip); 2535 } 2536 } 2537 2538 2539 /* 2540 * mdi_pi_unlock(): 2541 * Unlock the mdi_pathinfo node. 2542 * Note: 2543 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2544 */ 2545 void 2546 mdi_pi_unlock(mdi_pathinfo_t *pip) 2547 { 2548 ASSERT(pip != NULL); 2549 if (pip) { 2550 MDI_PI_UNLOCK(pip); 2551 } 2552 } 2553 2554 /* 2555 * mdi_pi_find(): 2556 * Search the list of mdi_pathinfo nodes attached to the 2557 * pHCI/Client device node whose path address matches "paddr". 2558 * Returns a pointer to the mdi_pathinfo node if a matching node is 2559 * found. 2560 * Return Values: 2561 * mdi_pathinfo node handle 2562 * NULL 2563 * Notes: 2564 * Caller need not hold any locks to call this function. 2565 */ 2566 mdi_pathinfo_t * 2567 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2568 { 2569 mdi_phci_t *ph; 2570 mdi_vhci_t *vh; 2571 mdi_client_t *ct; 2572 mdi_pathinfo_t *pip = NULL; 2573 2574 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2575 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2576 if ((pdip == NULL) || (paddr == NULL)) { 2577 return (NULL); 2578 } 2579 ph = i_devi_get_phci(pdip); 2580 if (ph == NULL) { 2581 /* 2582 * Invalid pHCI device, Nothing more to do. 2583 */ 2584 MDI_DEBUG(2, (CE_WARN, pdip, 2585 "!mdi_pi_find: invalid phci")); 2586 return (NULL); 2587 } 2588 2589 vh = ph->ph_vhci; 2590 if (vh == NULL) { 2591 /* 2592 * Invalid vHCI device, Nothing more to do. 2593 */ 2594 MDI_DEBUG(2, (CE_WARN, pdip, 2595 "!mdi_pi_find: invalid vhci")); 2596 return (NULL); 2597 } 2598 2599 /* 2600 * Look for pathinfo node identified by paddr. 2601 */ 2602 if (caddr == NULL) { 2603 /* 2604 * Find a mdi_pathinfo node under pHCI list for a matching 2605 * unit address. 2606 */ 2607 MDI_PHCI_LOCK(ph); 2608 if (MDI_PHCI_IS_OFFLINE(ph)) { 2609 MDI_DEBUG(2, (CE_WARN, pdip, 2610 "!mdi_pi_find: offline phci %p", (void *)ph)); 2611 MDI_PHCI_UNLOCK(ph); 2612 return (NULL); 2613 } 2614 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2615 2616 while (pip != NULL) { 2617 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2618 break; 2619 } 2620 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2621 } 2622 MDI_PHCI_UNLOCK(ph); 2623 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2624 (void *)pip)); 2625 return (pip); 2626 } 2627 2628 /* 2629 * XXX - Is the rest of the code in this function really necessary? 2630 * The consumers of mdi_pi_find() can search for the desired pathinfo 2631 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2632 * whether the search is based on the pathinfo nodes attached to 2633 * the pHCI or the client node, the result will be the same. 2634 */ 2635 2636 /* 2637 * Find the client device corresponding to 'caddr' 2638 */ 2639 MDI_VHCI_CLIENT_LOCK(vh); 2640 2641 /* 2642 * XXX - Passing NULL to the following function works as long as the 2643 * the client addresses (caddr) are unique per vhci basis. 2644 */ 2645 ct = i_mdi_client_find(vh, NULL, caddr); 2646 if (ct == NULL) { 2647 /* 2648 * Client not found, Obviously mdi_pathinfo node has not been 2649 * created yet. 2650 */ 2651 MDI_VHCI_CLIENT_UNLOCK(vh); 2652 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2653 "found for caddr %s", caddr ? caddr : "NULL")); 2654 return (NULL); 2655 } 2656 2657 /* 2658 * Hold the client lock and look for a mdi_pathinfo node with matching 2659 * pHCI and paddr 2660 */ 2661 MDI_CLIENT_LOCK(ct); 2662 2663 /* 2664 * Release the global mutex as it is no more needed. Note: We always 2665 * respect the locking order while acquiring. 2666 */ 2667 MDI_VHCI_CLIENT_UNLOCK(vh); 2668 2669 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2670 while (pip != NULL) { 2671 /* 2672 * Compare the unit address 2673 */ 2674 if ((MDI_PI(pip)->pi_phci == ph) && 2675 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2676 break; 2677 } 2678 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2679 } 2680 MDI_CLIENT_UNLOCK(ct); 2681 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2682 return (pip); 2683 } 2684 2685 /* 2686 * mdi_pi_alloc(): 2687 * Allocate and initialize a new instance of a mdi_pathinfo node. 2688 * The mdi_pathinfo node returned by this function identifies a 2689 * unique device path is capable of having properties attached 2690 * and passed to mdi_pi_online() to fully attach and online the 2691 * path and client device node. 2692 * The mdi_pathinfo node returned by this function must be 2693 * destroyed using mdi_pi_free() if the path is no longer 2694 * operational or if the caller fails to attach a client device 2695 * node when calling mdi_pi_online(). The framework will not free 2696 * the resources allocated. 2697 * This function can be called from both interrupt and kernel 2698 * contexts. DDI_NOSLEEP flag should be used while calling 2699 * from interrupt contexts. 2700 * Return Values: 2701 * MDI_SUCCESS 2702 * MDI_FAILURE 2703 * MDI_NOMEM 2704 */ 2705 /*ARGSUSED*/ 2706 int 2707 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2708 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2709 { 2710 mdi_vhci_t *vh; 2711 mdi_phci_t *ph; 2712 mdi_client_t *ct; 2713 mdi_pathinfo_t *pip = NULL; 2714 dev_info_t *cdip; 2715 int rv = MDI_NOMEM; 2716 int path_allocated = 0; 2717 2718 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2719 cname ? cname : "NULL", caddr ? caddr : "NULL", 2720 paddr ? paddr : "NULL")); 2721 2722 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2723 ret_pip == NULL) { 2724 /* Nothing more to do */ 2725 return (MDI_FAILURE); 2726 } 2727 2728 *ret_pip = NULL; 2729 2730 /* No allocations on detaching pHCI */ 2731 if (DEVI_IS_DETACHING(pdip)) { 2732 /* Invalid pHCI device, return failure */ 2733 MDI_DEBUG(1, (CE_WARN, pdip, 2734 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2735 return (MDI_FAILURE); 2736 } 2737 2738 ph = i_devi_get_phci(pdip); 2739 ASSERT(ph != NULL); 2740 if (ph == NULL) { 2741 /* Invalid pHCI device, return failure */ 2742 MDI_DEBUG(1, (CE_WARN, pdip, 2743 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2744 return (MDI_FAILURE); 2745 } 2746 2747 MDI_PHCI_LOCK(ph); 2748 vh = ph->ph_vhci; 2749 if (vh == NULL) { 2750 /* Invalid vHCI device, return failure */ 2751 MDI_DEBUG(1, (CE_WARN, pdip, 2752 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2753 MDI_PHCI_UNLOCK(ph); 2754 return (MDI_FAILURE); 2755 } 2756 2757 if (MDI_PHCI_IS_READY(ph) == 0) { 2758 /* 2759 * Do not allow new node creation when pHCI is in 2760 * offline/suspended states 2761 */ 2762 MDI_DEBUG(1, (CE_WARN, pdip, 2763 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2764 MDI_PHCI_UNLOCK(ph); 2765 return (MDI_BUSY); 2766 } 2767 MDI_PHCI_UNSTABLE(ph); 2768 MDI_PHCI_UNLOCK(ph); 2769 2770 /* look for a matching client, create one if not found */ 2771 MDI_VHCI_CLIENT_LOCK(vh); 2772 ct = i_mdi_client_find(vh, cname, caddr); 2773 if (ct == NULL) { 2774 ct = i_mdi_client_alloc(vh, cname, caddr); 2775 ASSERT(ct != NULL); 2776 } 2777 2778 if (ct->ct_dip == NULL) { 2779 /* 2780 * Allocate a devinfo node 2781 */ 2782 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2783 compatible, ncompatible); 2784 if (ct->ct_dip == NULL) { 2785 (void) i_mdi_client_free(vh, ct); 2786 goto fail; 2787 } 2788 } 2789 cdip = ct->ct_dip; 2790 2791 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2792 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2793 2794 MDI_CLIENT_LOCK(ct); 2795 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2796 while (pip != NULL) { 2797 /* 2798 * Compare the unit address 2799 */ 2800 if ((MDI_PI(pip)->pi_phci == ph) && 2801 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2802 break; 2803 } 2804 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2805 } 2806 MDI_CLIENT_UNLOCK(ct); 2807 2808 if (pip == NULL) { 2809 /* 2810 * This is a new path for this client device. Allocate and 2811 * initialize a new pathinfo node 2812 */ 2813 pip = i_mdi_pi_alloc(ph, paddr, ct); 2814 ASSERT(pip != NULL); 2815 path_allocated = 1; 2816 } 2817 rv = MDI_SUCCESS; 2818 2819 fail: 2820 /* 2821 * Release the global mutex. 2822 */ 2823 MDI_VHCI_CLIENT_UNLOCK(vh); 2824 2825 /* 2826 * Mark the pHCI as stable 2827 */ 2828 MDI_PHCI_LOCK(ph); 2829 MDI_PHCI_STABLE(ph); 2830 MDI_PHCI_UNLOCK(ph); 2831 *ret_pip = pip; 2832 2833 MDI_DEBUG(2, (CE_NOTE, pdip, 2834 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2835 2836 if (path_allocated) 2837 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2838 2839 return (rv); 2840 } 2841 2842 /*ARGSUSED*/ 2843 int 2844 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2845 int flags, mdi_pathinfo_t **ret_pip) 2846 { 2847 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2848 flags, ret_pip)); 2849 } 2850 2851 /* 2852 * i_mdi_pi_alloc(): 2853 * Allocate a mdi_pathinfo node and add to the pHCI path list 2854 * Return Values: 2855 * mdi_pathinfo 2856 */ 2857 /*ARGSUSED*/ 2858 static mdi_pathinfo_t * 2859 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2860 { 2861 mdi_pathinfo_t *pip; 2862 int ct_circular; 2863 int ph_circular; 2864 static char path[MAXPATHLEN]; 2865 char *path_persistent; 2866 int path_instance; 2867 mod_hash_val_t hv; 2868 2869 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2870 2871 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2872 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2873 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2874 MDI_PATHINFO_STATE_TRANSIENT; 2875 2876 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2877 MDI_PI_SET_USER_DISABLE(pip); 2878 2879 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2880 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2881 2882 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2883 MDI_PI_SET_DRV_DISABLE(pip); 2884 2885 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2886 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2887 MDI_PI(pip)->pi_client = ct; 2888 MDI_PI(pip)->pi_phci = ph; 2889 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2890 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2891 2892 /* 2893 * We form the "path" to the pathinfo node, and see if we have 2894 * already allocated a 'path_instance' for that "path". If so, 2895 * we use the already allocated 'path_instance'. If not, we 2896 * allocate a new 'path_instance' and associate it with a copy of 2897 * the "path" string (which is never freed). The association 2898 * between a 'path_instance' this "path" string persists until 2899 * reboot. 2900 */ 2901 mutex_enter(&mdi_pathmap_mutex); 2902 (void) ddi_pathname(ph->ph_dip, path); 2903 (void) sprintf(path + strlen(path), "/%s@%s", 2904 ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr); 2905 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2906 path_instance = (uint_t)(intptr_t)hv; 2907 } else { 2908 /* allocate a new 'path_instance' and persistent "path" */ 2909 path_instance = mdi_pathmap_instance++; 2910 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2911 (void) mod_hash_insert(mdi_pathmap_bypath, 2912 (mod_hash_key_t)path_persistent, 2913 (mod_hash_val_t)(intptr_t)path_instance); 2914 (void) mod_hash_insert(mdi_pathmap_byinstance, 2915 (mod_hash_key_t)(intptr_t)path_instance, 2916 (mod_hash_val_t)path_persistent); 2917 } 2918 mutex_exit(&mdi_pathmap_mutex); 2919 MDI_PI(pip)->pi_path_instance = path_instance; 2920 2921 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2922 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2923 MDI_PI(pip)->pi_pprivate = NULL; 2924 MDI_PI(pip)->pi_cprivate = NULL; 2925 MDI_PI(pip)->pi_vprivate = NULL; 2926 MDI_PI(pip)->pi_client_link = NULL; 2927 MDI_PI(pip)->pi_phci_link = NULL; 2928 MDI_PI(pip)->pi_ref_cnt = 0; 2929 MDI_PI(pip)->pi_kstats = NULL; 2930 MDI_PI(pip)->pi_preferred = 1; 2931 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2932 2933 /* 2934 * Lock both dev_info nodes against changes in parallel. 2935 * 2936 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2937 * This atypical operation is done to synchronize pathinfo nodes 2938 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2939 * the pathinfo nodes are children of the Client. 2940 */ 2941 ndi_devi_enter(ct->ct_dip, &ct_circular); 2942 ndi_devi_enter(ph->ph_dip, &ph_circular); 2943 2944 i_mdi_phci_add_path(ph, pip); 2945 i_mdi_client_add_path(ct, pip); 2946 2947 ndi_devi_exit(ph->ph_dip, ph_circular); 2948 ndi_devi_exit(ct->ct_dip, ct_circular); 2949 2950 return (pip); 2951 } 2952 2953 /* 2954 * mdi_pi_pathname_by_instance(): 2955 * Lookup of "path" by 'path_instance'. Return "path". 2956 * NOTE: returned "path" remains valid forever (until reboot). 2957 */ 2958 char * 2959 mdi_pi_pathname_by_instance(int path_instance) 2960 { 2961 char *path; 2962 mod_hash_val_t hv; 2963 2964 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2965 mutex_enter(&mdi_pathmap_mutex); 2966 if (mod_hash_find(mdi_pathmap_byinstance, 2967 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2968 path = (char *)hv; 2969 else 2970 path = NULL; 2971 mutex_exit(&mdi_pathmap_mutex); 2972 return (path); 2973 } 2974 2975 /* 2976 * i_mdi_phci_add_path(): 2977 * Add a mdi_pathinfo node to pHCI list. 2978 * Notes: 2979 * Caller should per-pHCI mutex 2980 */ 2981 static void 2982 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2983 { 2984 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2985 2986 MDI_PHCI_LOCK(ph); 2987 if (ph->ph_path_head == NULL) { 2988 ph->ph_path_head = pip; 2989 } else { 2990 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2991 } 2992 ph->ph_path_tail = pip; 2993 ph->ph_path_count++; 2994 MDI_PHCI_UNLOCK(ph); 2995 } 2996 2997 /* 2998 * i_mdi_client_add_path(): 2999 * Add mdi_pathinfo node to client list 3000 */ 3001 static void 3002 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3003 { 3004 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3005 3006 MDI_CLIENT_LOCK(ct); 3007 if (ct->ct_path_head == NULL) { 3008 ct->ct_path_head = pip; 3009 } else { 3010 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3011 } 3012 ct->ct_path_tail = pip; 3013 ct->ct_path_count++; 3014 MDI_CLIENT_UNLOCK(ct); 3015 } 3016 3017 /* 3018 * mdi_pi_free(): 3019 * Free the mdi_pathinfo node and also client device node if this 3020 * is the last path to the device 3021 * Return Values: 3022 * MDI_SUCCESS 3023 * MDI_FAILURE 3024 * MDI_BUSY 3025 */ 3026 /*ARGSUSED*/ 3027 int 3028 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3029 { 3030 int rv = MDI_FAILURE; 3031 mdi_vhci_t *vh; 3032 mdi_phci_t *ph; 3033 mdi_client_t *ct; 3034 int (*f)(); 3035 int client_held = 0; 3036 3037 MDI_PI_LOCK(pip); 3038 ph = MDI_PI(pip)->pi_phci; 3039 ASSERT(ph != NULL); 3040 if (ph == NULL) { 3041 /* 3042 * Invalid pHCI device, return failure 3043 */ 3044 MDI_DEBUG(1, (CE_WARN, NULL, 3045 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3046 MDI_PI_UNLOCK(pip); 3047 return (MDI_FAILURE); 3048 } 3049 3050 vh = ph->ph_vhci; 3051 ASSERT(vh != NULL); 3052 if (vh == NULL) { 3053 /* Invalid pHCI device, return failure */ 3054 MDI_DEBUG(1, (CE_WARN, NULL, 3055 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3056 MDI_PI_UNLOCK(pip); 3057 return (MDI_FAILURE); 3058 } 3059 3060 ct = MDI_PI(pip)->pi_client; 3061 ASSERT(ct != NULL); 3062 if (ct == NULL) { 3063 /* 3064 * Invalid Client device, return failure 3065 */ 3066 MDI_DEBUG(1, (CE_WARN, NULL, 3067 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3068 MDI_PI_UNLOCK(pip); 3069 return (MDI_FAILURE); 3070 } 3071 3072 /* 3073 * Check to see for busy condition. A mdi_pathinfo can only be freed 3074 * if the node state is either offline or init and the reference count 3075 * is zero. 3076 */ 3077 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3078 MDI_PI_IS_INITING(pip))) { 3079 /* 3080 * Node is busy 3081 */ 3082 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3083 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3084 MDI_PI_UNLOCK(pip); 3085 return (MDI_BUSY); 3086 } 3087 3088 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3089 /* 3090 * Give a chance for pending I/Os to complete. 3091 */ 3092 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3093 "%d cmds still pending on path: %p\n", 3094 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3095 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3096 &MDI_PI(pip)->pi_mutex, 3097 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3098 /* 3099 * The timeout time reached without ref_cnt being zero 3100 * being signaled. 3101 */ 3102 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3103 "!mdi_pi_free: " 3104 "Timeout reached on path %p without the cond\n", 3105 (void *)pip)); 3106 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3107 "!mdi_pi_free: " 3108 "%d cmds still pending on path: %p\n", 3109 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3110 MDI_PI_UNLOCK(pip); 3111 return (MDI_BUSY); 3112 } 3113 } 3114 if (MDI_PI(pip)->pi_pm_held) { 3115 client_held = 1; 3116 } 3117 MDI_PI_UNLOCK(pip); 3118 3119 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3120 3121 MDI_CLIENT_LOCK(ct); 3122 3123 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3124 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3125 3126 /* 3127 * Wait till failover is complete before removing this node. 3128 */ 3129 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3130 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3131 3132 MDI_CLIENT_UNLOCK(ct); 3133 MDI_VHCI_CLIENT_LOCK(vh); 3134 MDI_CLIENT_LOCK(ct); 3135 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3136 3137 if (!MDI_PI_IS_INITING(pip)) { 3138 f = vh->vh_ops->vo_pi_uninit; 3139 if (f != NULL) { 3140 rv = (*f)(vh->vh_dip, pip, 0); 3141 } 3142 } 3143 /* 3144 * If vo_pi_uninit() completed successfully. 3145 */ 3146 if (rv == MDI_SUCCESS) { 3147 if (client_held) { 3148 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3149 "i_mdi_pm_rele_client\n")); 3150 i_mdi_pm_rele_client(ct, 1); 3151 } 3152 i_mdi_pi_free(ph, pip, ct); 3153 if (ct->ct_path_count == 0) { 3154 /* 3155 * Client lost its last path. 3156 * Clean up the client device 3157 */ 3158 MDI_CLIENT_UNLOCK(ct); 3159 (void) i_mdi_client_free(ct->ct_vhci, ct); 3160 MDI_VHCI_CLIENT_UNLOCK(vh); 3161 return (rv); 3162 } 3163 } 3164 MDI_CLIENT_UNLOCK(ct); 3165 MDI_VHCI_CLIENT_UNLOCK(vh); 3166 3167 if (rv == MDI_FAILURE) 3168 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3169 3170 return (rv); 3171 } 3172 3173 /* 3174 * i_mdi_pi_free(): 3175 * Free the mdi_pathinfo node 3176 */ 3177 static void 3178 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3179 { 3180 int ct_circular; 3181 int ph_circular; 3182 3183 ASSERT(MDI_CLIENT_LOCKED(ct)); 3184 3185 /* 3186 * remove any per-path kstats 3187 */ 3188 i_mdi_pi_kstat_destroy(pip); 3189 3190 /* See comments in i_mdi_pi_alloc() */ 3191 ndi_devi_enter(ct->ct_dip, &ct_circular); 3192 ndi_devi_enter(ph->ph_dip, &ph_circular); 3193 3194 i_mdi_client_remove_path(ct, pip); 3195 i_mdi_phci_remove_path(ph, pip); 3196 3197 ndi_devi_exit(ph->ph_dip, ph_circular); 3198 ndi_devi_exit(ct->ct_dip, ct_circular); 3199 3200 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3201 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3202 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3203 if (MDI_PI(pip)->pi_addr) { 3204 kmem_free(MDI_PI(pip)->pi_addr, 3205 strlen(MDI_PI(pip)->pi_addr) + 1); 3206 MDI_PI(pip)->pi_addr = NULL; 3207 } 3208 3209 if (MDI_PI(pip)->pi_prop) { 3210 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3211 MDI_PI(pip)->pi_prop = NULL; 3212 } 3213 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3214 } 3215 3216 3217 /* 3218 * i_mdi_phci_remove_path(): 3219 * Remove a mdi_pathinfo node from pHCI list. 3220 * Notes: 3221 * Caller should hold per-pHCI mutex 3222 */ 3223 static void 3224 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3225 { 3226 mdi_pathinfo_t *prev = NULL; 3227 mdi_pathinfo_t *path = NULL; 3228 3229 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3230 3231 MDI_PHCI_LOCK(ph); 3232 path = ph->ph_path_head; 3233 while (path != NULL) { 3234 if (path == pip) { 3235 break; 3236 } 3237 prev = path; 3238 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3239 } 3240 3241 if (path) { 3242 ph->ph_path_count--; 3243 if (prev) { 3244 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3245 } else { 3246 ph->ph_path_head = 3247 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3248 } 3249 if (ph->ph_path_tail == path) { 3250 ph->ph_path_tail = prev; 3251 } 3252 } 3253 3254 /* 3255 * Clear the pHCI link 3256 */ 3257 MDI_PI(pip)->pi_phci_link = NULL; 3258 MDI_PI(pip)->pi_phci = NULL; 3259 MDI_PHCI_UNLOCK(ph); 3260 } 3261 3262 /* 3263 * i_mdi_client_remove_path(): 3264 * Remove a mdi_pathinfo node from client path list. 3265 */ 3266 static void 3267 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3268 { 3269 mdi_pathinfo_t *prev = NULL; 3270 mdi_pathinfo_t *path; 3271 3272 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3273 3274 ASSERT(MDI_CLIENT_LOCKED(ct)); 3275 path = ct->ct_path_head; 3276 while (path != NULL) { 3277 if (path == pip) { 3278 break; 3279 } 3280 prev = path; 3281 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3282 } 3283 3284 if (path) { 3285 ct->ct_path_count--; 3286 if (prev) { 3287 MDI_PI(prev)->pi_client_link = 3288 MDI_PI(path)->pi_client_link; 3289 } else { 3290 ct->ct_path_head = 3291 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3292 } 3293 if (ct->ct_path_tail == path) { 3294 ct->ct_path_tail = prev; 3295 } 3296 if (ct->ct_path_last == path) { 3297 ct->ct_path_last = ct->ct_path_head; 3298 } 3299 } 3300 MDI_PI(pip)->pi_client_link = NULL; 3301 MDI_PI(pip)->pi_client = NULL; 3302 } 3303 3304 /* 3305 * i_mdi_pi_state_change(): 3306 * online a mdi_pathinfo node 3307 * 3308 * Return Values: 3309 * MDI_SUCCESS 3310 * MDI_FAILURE 3311 */ 3312 /*ARGSUSED*/ 3313 static int 3314 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3315 { 3316 int rv = MDI_SUCCESS; 3317 mdi_vhci_t *vh; 3318 mdi_phci_t *ph; 3319 mdi_client_t *ct; 3320 int (*f)(); 3321 dev_info_t *cdip; 3322 3323 MDI_PI_LOCK(pip); 3324 3325 ph = MDI_PI(pip)->pi_phci; 3326 ASSERT(ph); 3327 if (ph == NULL) { 3328 /* 3329 * Invalid pHCI device, fail the request 3330 */ 3331 MDI_PI_UNLOCK(pip); 3332 MDI_DEBUG(1, (CE_WARN, NULL, 3333 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3334 return (MDI_FAILURE); 3335 } 3336 3337 vh = ph->ph_vhci; 3338 ASSERT(vh); 3339 if (vh == NULL) { 3340 /* 3341 * Invalid vHCI device, fail the request 3342 */ 3343 MDI_PI_UNLOCK(pip); 3344 MDI_DEBUG(1, (CE_WARN, NULL, 3345 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3346 return (MDI_FAILURE); 3347 } 3348 3349 ct = MDI_PI(pip)->pi_client; 3350 ASSERT(ct != NULL); 3351 if (ct == NULL) { 3352 /* 3353 * Invalid client device, fail the request 3354 */ 3355 MDI_PI_UNLOCK(pip); 3356 MDI_DEBUG(1, (CE_WARN, NULL, 3357 "!mdi_pi_state_change: invalid client pip=%p", 3358 (void *)pip)); 3359 return (MDI_FAILURE); 3360 } 3361 3362 /* 3363 * If this path has not been initialized yet, Callback vHCI driver's 3364 * pathinfo node initialize entry point 3365 */ 3366 3367 if (MDI_PI_IS_INITING(pip)) { 3368 MDI_PI_UNLOCK(pip); 3369 f = vh->vh_ops->vo_pi_init; 3370 if (f != NULL) { 3371 rv = (*f)(vh->vh_dip, pip, 0); 3372 if (rv != MDI_SUCCESS) { 3373 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3374 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3375 (void *)vh, (void *)pip)); 3376 return (MDI_FAILURE); 3377 } 3378 } 3379 MDI_PI_LOCK(pip); 3380 MDI_PI_CLEAR_TRANSIENT(pip); 3381 } 3382 3383 /* 3384 * Do not allow state transition when pHCI is in offline/suspended 3385 * states 3386 */ 3387 i_mdi_phci_lock(ph, pip); 3388 if (MDI_PHCI_IS_READY(ph) == 0) { 3389 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3390 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3391 (void *)ph)); 3392 MDI_PI_UNLOCK(pip); 3393 i_mdi_phci_unlock(ph); 3394 return (MDI_BUSY); 3395 } 3396 MDI_PHCI_UNSTABLE(ph); 3397 i_mdi_phci_unlock(ph); 3398 3399 /* 3400 * Check if mdi_pathinfo state is in transient state. 3401 * If yes, offlining is in progress and wait till transient state is 3402 * cleared. 3403 */ 3404 if (MDI_PI_IS_TRANSIENT(pip)) { 3405 while (MDI_PI_IS_TRANSIENT(pip)) { 3406 cv_wait(&MDI_PI(pip)->pi_state_cv, 3407 &MDI_PI(pip)->pi_mutex); 3408 } 3409 } 3410 3411 /* 3412 * Grab the client lock in reverse order sequence and release the 3413 * mdi_pathinfo mutex. 3414 */ 3415 i_mdi_client_lock(ct, pip); 3416 MDI_PI_UNLOCK(pip); 3417 3418 /* 3419 * Wait till failover state is cleared 3420 */ 3421 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3422 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3423 3424 /* 3425 * Mark the mdi_pathinfo node state as transient 3426 */ 3427 MDI_PI_LOCK(pip); 3428 switch (state) { 3429 case MDI_PATHINFO_STATE_ONLINE: 3430 MDI_PI_SET_ONLINING(pip); 3431 break; 3432 3433 case MDI_PATHINFO_STATE_STANDBY: 3434 MDI_PI_SET_STANDBYING(pip); 3435 break; 3436 3437 case MDI_PATHINFO_STATE_FAULT: 3438 /* 3439 * Mark the pathinfo state as FAULTED 3440 */ 3441 MDI_PI_SET_FAULTING(pip); 3442 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3443 break; 3444 3445 case MDI_PATHINFO_STATE_OFFLINE: 3446 /* 3447 * ndi_devi_offline() cannot hold pip or ct locks. 3448 */ 3449 MDI_PI_UNLOCK(pip); 3450 /* 3451 * Don't offline the client dev_info node unless we have 3452 * no available paths left at all. 3453 */ 3454 cdip = ct->ct_dip; 3455 if ((flag & NDI_DEVI_REMOVE) && 3456 (ct->ct_path_count == 1)) { 3457 i_mdi_client_unlock(ct); 3458 rv = ndi_devi_offline(cdip, 0); 3459 if (rv != NDI_SUCCESS) { 3460 /* 3461 * Convert to MDI error code 3462 */ 3463 switch (rv) { 3464 case NDI_BUSY: 3465 rv = MDI_BUSY; 3466 break; 3467 default: 3468 rv = MDI_FAILURE; 3469 break; 3470 } 3471 goto state_change_exit; 3472 } else { 3473 i_mdi_client_lock(ct, NULL); 3474 } 3475 } 3476 /* 3477 * Mark the mdi_pathinfo node state as transient 3478 */ 3479 MDI_PI_LOCK(pip); 3480 MDI_PI_SET_OFFLINING(pip); 3481 break; 3482 } 3483 MDI_PI_UNLOCK(pip); 3484 MDI_CLIENT_UNSTABLE(ct); 3485 i_mdi_client_unlock(ct); 3486 3487 f = vh->vh_ops->vo_pi_state_change; 3488 if (f != NULL) 3489 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3490 3491 MDI_CLIENT_LOCK(ct); 3492 MDI_PI_LOCK(pip); 3493 if (rv == MDI_NOT_SUPPORTED) { 3494 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3495 } 3496 if (rv != MDI_SUCCESS) { 3497 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3498 "!vo_pi_state_change: failed rv = %x", rv)); 3499 } 3500 if (MDI_PI_IS_TRANSIENT(pip)) { 3501 if (rv == MDI_SUCCESS) { 3502 MDI_PI_CLEAR_TRANSIENT(pip); 3503 } else { 3504 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3505 } 3506 } 3507 3508 /* 3509 * Wake anyone waiting for this mdi_pathinfo node 3510 */ 3511 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3512 MDI_PI_UNLOCK(pip); 3513 3514 /* 3515 * Mark the client device as stable 3516 */ 3517 MDI_CLIENT_STABLE(ct); 3518 if (rv == MDI_SUCCESS) { 3519 if (ct->ct_unstable == 0) { 3520 cdip = ct->ct_dip; 3521 3522 /* 3523 * Onlining the mdi_pathinfo node will impact the 3524 * client state Update the client and dev_info node 3525 * state accordingly 3526 */ 3527 rv = NDI_SUCCESS; 3528 i_mdi_client_update_state(ct); 3529 switch (MDI_CLIENT_STATE(ct)) { 3530 case MDI_CLIENT_STATE_OPTIMAL: 3531 case MDI_CLIENT_STATE_DEGRADED: 3532 if (cdip && !i_ddi_devi_attached(cdip) && 3533 ((state == MDI_PATHINFO_STATE_ONLINE) || 3534 (state == MDI_PATHINFO_STATE_STANDBY))) { 3535 3536 /* 3537 * Must do ndi_devi_online() through 3538 * hotplug thread for deferred 3539 * attach mechanism to work 3540 */ 3541 MDI_CLIENT_UNLOCK(ct); 3542 rv = ndi_devi_online(cdip, 0); 3543 MDI_CLIENT_LOCK(ct); 3544 if ((rv != NDI_SUCCESS) && 3545 (MDI_CLIENT_STATE(ct) == 3546 MDI_CLIENT_STATE_DEGRADED)) { 3547 /* 3548 * ndi_devi_online failed. 3549 * Reset client flags to 3550 * offline. 3551 */ 3552 MDI_DEBUG(1, (CE_WARN, cdip, 3553 "!ndi_devi_online: failed " 3554 " Error: %x", rv)); 3555 MDI_CLIENT_SET_OFFLINE(ct); 3556 } 3557 if (rv != NDI_SUCCESS) { 3558 /* Reset the path state */ 3559 MDI_PI_LOCK(pip); 3560 MDI_PI(pip)->pi_state = 3561 MDI_PI_OLD_STATE(pip); 3562 MDI_PI_UNLOCK(pip); 3563 } 3564 } 3565 break; 3566 3567 case MDI_CLIENT_STATE_FAILED: 3568 /* 3569 * This is the last path case for 3570 * non-user initiated events. 3571 */ 3572 if (((flag & NDI_DEVI_REMOVE) == 0) && 3573 cdip && (i_ddi_node_state(cdip) >= 3574 DS_INITIALIZED)) { 3575 MDI_CLIENT_UNLOCK(ct); 3576 rv = ndi_devi_offline(cdip, 0); 3577 MDI_CLIENT_LOCK(ct); 3578 3579 if (rv != NDI_SUCCESS) { 3580 /* 3581 * ndi_devi_offline failed. 3582 * Reset client flags to 3583 * online as the path could not 3584 * be offlined. 3585 */ 3586 MDI_DEBUG(1, (CE_WARN, cdip, 3587 "!ndi_devi_offline: failed " 3588 " Error: %x", rv)); 3589 MDI_CLIENT_SET_ONLINE(ct); 3590 } 3591 } 3592 break; 3593 } 3594 /* 3595 * Convert to MDI error code 3596 */ 3597 switch (rv) { 3598 case NDI_SUCCESS: 3599 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3600 i_mdi_report_path_state(ct, pip); 3601 rv = MDI_SUCCESS; 3602 break; 3603 case NDI_BUSY: 3604 rv = MDI_BUSY; 3605 break; 3606 default: 3607 rv = MDI_FAILURE; 3608 break; 3609 } 3610 } 3611 } 3612 MDI_CLIENT_UNLOCK(ct); 3613 3614 state_change_exit: 3615 /* 3616 * Mark the pHCI as stable again. 3617 */ 3618 MDI_PHCI_LOCK(ph); 3619 MDI_PHCI_STABLE(ph); 3620 MDI_PHCI_UNLOCK(ph); 3621 return (rv); 3622 } 3623 3624 /* 3625 * mdi_pi_online(): 3626 * Place the path_info node in the online state. The path is 3627 * now available to be selected by mdi_select_path() for 3628 * transporting I/O requests to client devices. 3629 * Return Values: 3630 * MDI_SUCCESS 3631 * MDI_FAILURE 3632 */ 3633 int 3634 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3635 { 3636 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3637 int client_held = 0; 3638 int rv; 3639 int se_flag; 3640 int kmem_flag; 3641 3642 ASSERT(ct != NULL); 3643 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3644 if (rv != MDI_SUCCESS) 3645 return (rv); 3646 3647 MDI_PI_LOCK(pip); 3648 if (MDI_PI(pip)->pi_pm_held == 0) { 3649 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3650 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3651 i_mdi_pm_hold_pip(pip); 3652 client_held = 1; 3653 } 3654 MDI_PI_UNLOCK(pip); 3655 3656 if (client_held) { 3657 MDI_CLIENT_LOCK(ct); 3658 if (ct->ct_power_cnt == 0) { 3659 rv = i_mdi_power_all_phci(ct); 3660 } 3661 3662 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3663 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3664 i_mdi_pm_hold_client(ct, 1); 3665 MDI_CLIENT_UNLOCK(ct); 3666 } 3667 3668 /* determine interrupt context */ 3669 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3670 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3671 3672 /* A new path is online. Invalidate DINFOCACHE snap shot. */ 3673 i_ddi_di_cache_invalidate(kmem_flag); 3674 3675 return (rv); 3676 } 3677 3678 /* 3679 * mdi_pi_standby(): 3680 * Place the mdi_pathinfo node in standby state 3681 * 3682 * Return Values: 3683 * MDI_SUCCESS 3684 * MDI_FAILURE 3685 */ 3686 int 3687 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3688 { 3689 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3690 } 3691 3692 /* 3693 * mdi_pi_fault(): 3694 * Place the mdi_pathinfo node in fault'ed state 3695 * Return Values: 3696 * MDI_SUCCESS 3697 * MDI_FAILURE 3698 */ 3699 int 3700 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3701 { 3702 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3703 } 3704 3705 /* 3706 * mdi_pi_offline(): 3707 * Offline a mdi_pathinfo node. 3708 * Return Values: 3709 * MDI_SUCCESS 3710 * MDI_FAILURE 3711 */ 3712 int 3713 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3714 { 3715 int ret, client_held = 0; 3716 mdi_client_t *ct; 3717 int se_flag; 3718 int kmem_flag; 3719 3720 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3721 3722 if (ret == MDI_SUCCESS) { 3723 MDI_PI_LOCK(pip); 3724 if (MDI_PI(pip)->pi_pm_held) { 3725 client_held = 1; 3726 } 3727 MDI_PI_UNLOCK(pip); 3728 3729 if (client_held) { 3730 ct = MDI_PI(pip)->pi_client; 3731 MDI_CLIENT_LOCK(ct); 3732 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3733 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3734 i_mdi_pm_rele_client(ct, 1); 3735 MDI_CLIENT_UNLOCK(ct); 3736 } 3737 3738 /* determine interrupt context */ 3739 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3740 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3741 3742 /* pathinfo is offlined. update DINFOCACHE. */ 3743 i_ddi_di_cache_invalidate(kmem_flag); 3744 } 3745 3746 return (ret); 3747 } 3748 3749 /* 3750 * i_mdi_pi_offline(): 3751 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3752 */ 3753 static int 3754 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3755 { 3756 dev_info_t *vdip = NULL; 3757 mdi_vhci_t *vh = NULL; 3758 mdi_client_t *ct = NULL; 3759 int (*f)(); 3760 int rv; 3761 3762 MDI_PI_LOCK(pip); 3763 ct = MDI_PI(pip)->pi_client; 3764 ASSERT(ct != NULL); 3765 3766 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3767 /* 3768 * Give a chance for pending I/Os to complete. 3769 */ 3770 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3771 "%d cmds still pending on path: %p\n", 3772 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3773 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3774 &MDI_PI(pip)->pi_mutex, 3775 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3776 /* 3777 * The timeout time reached without ref_cnt being zero 3778 * being signaled. 3779 */ 3780 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3781 "Timeout reached on path %p without the cond\n", 3782 (void *)pip)); 3783 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3784 "%d cmds still pending on path: %p\n", 3785 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3786 } 3787 } 3788 vh = ct->ct_vhci; 3789 vdip = vh->vh_dip; 3790 3791 /* 3792 * Notify vHCI that has registered this event 3793 */ 3794 ASSERT(vh->vh_ops); 3795 f = vh->vh_ops->vo_pi_state_change; 3796 3797 if (f != NULL) { 3798 MDI_PI_UNLOCK(pip); 3799 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3800 flags)) != MDI_SUCCESS) { 3801 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3802 "!vo_path_offline failed " 3803 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3804 } 3805 MDI_PI_LOCK(pip); 3806 } 3807 3808 /* 3809 * Set the mdi_pathinfo node state and clear the transient condition 3810 */ 3811 MDI_PI_SET_OFFLINE(pip); 3812 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3813 MDI_PI_UNLOCK(pip); 3814 3815 MDI_CLIENT_LOCK(ct); 3816 if (rv == MDI_SUCCESS) { 3817 if (ct->ct_unstable == 0) { 3818 dev_info_t *cdip = ct->ct_dip; 3819 3820 /* 3821 * Onlining the mdi_pathinfo node will impact the 3822 * client state Update the client and dev_info node 3823 * state accordingly 3824 */ 3825 i_mdi_client_update_state(ct); 3826 rv = NDI_SUCCESS; 3827 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3828 if (cdip && 3829 (i_ddi_node_state(cdip) >= 3830 DS_INITIALIZED)) { 3831 MDI_CLIENT_UNLOCK(ct); 3832 rv = ndi_devi_offline(cdip, 0); 3833 MDI_CLIENT_LOCK(ct); 3834 if (rv != NDI_SUCCESS) { 3835 /* 3836 * ndi_devi_offline failed. 3837 * Reset client flags to 3838 * online. 3839 */ 3840 MDI_DEBUG(4, (CE_WARN, cdip, 3841 "!ndi_devi_offline: failed " 3842 " Error: %x", rv)); 3843 MDI_CLIENT_SET_ONLINE(ct); 3844 } 3845 } 3846 } 3847 /* 3848 * Convert to MDI error code 3849 */ 3850 switch (rv) { 3851 case NDI_SUCCESS: 3852 rv = MDI_SUCCESS; 3853 break; 3854 case NDI_BUSY: 3855 rv = MDI_BUSY; 3856 break; 3857 default: 3858 rv = MDI_FAILURE; 3859 break; 3860 } 3861 } 3862 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3863 i_mdi_report_path_state(ct, pip); 3864 } 3865 3866 MDI_CLIENT_UNLOCK(ct); 3867 3868 /* 3869 * Change in the mdi_pathinfo node state will impact the client state 3870 */ 3871 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3872 (void *)ct, (void *)pip)); 3873 return (rv); 3874 } 3875 3876 3877 /* 3878 * mdi_pi_get_addr(): 3879 * Get the unit address associated with a mdi_pathinfo node 3880 * 3881 * Return Values: 3882 * char * 3883 */ 3884 char * 3885 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3886 { 3887 if (pip == NULL) 3888 return (NULL); 3889 3890 return (MDI_PI(pip)->pi_addr); 3891 } 3892 3893 /* 3894 * mdi_pi_get_path_instance(): 3895 * Get the 'path_instance' of a mdi_pathinfo node 3896 * 3897 * Return Values: 3898 * path_instance 3899 */ 3900 int 3901 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3902 { 3903 if (pip == NULL) 3904 return (0); 3905 3906 return (MDI_PI(pip)->pi_path_instance); 3907 } 3908 3909 /* 3910 * mdi_pi_pathname(): 3911 * Return pointer to path to pathinfo node. 3912 */ 3913 char * 3914 mdi_pi_pathname(mdi_pathinfo_t *pip) 3915 { 3916 if (pip == NULL) 3917 return (NULL); 3918 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3919 } 3920 3921 char * 3922 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 3923 { 3924 char *obp_path = NULL; 3925 if ((pip == NULL) || (path == NULL)) 3926 return (NULL); 3927 3928 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 3929 (void) strcpy(path, obp_path); 3930 (void) mdi_prop_free(obp_path); 3931 } else { 3932 path = NULL; 3933 } 3934 return (path); 3935 } 3936 3937 int 3938 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 3939 { 3940 dev_info_t *pdip; 3941 char *obp_path = NULL; 3942 int rc = MDI_FAILURE; 3943 3944 if (pip == NULL) 3945 return (MDI_FAILURE); 3946 3947 pdip = mdi_pi_get_phci(pip); 3948 if (pdip == NULL) 3949 return (MDI_FAILURE); 3950 3951 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 3952 3953 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 3954 (void) ddi_pathname(pdip, obp_path); 3955 } 3956 3957 if (component) { 3958 (void) strncat(obp_path, "/", MAXPATHLEN); 3959 (void) strncat(obp_path, component, MAXPATHLEN); 3960 } 3961 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 3962 3963 if (obp_path) 3964 kmem_free(obp_path, MAXPATHLEN); 3965 return (rc); 3966 } 3967 3968 /* 3969 * mdi_pi_get_client(): 3970 * Get the client devinfo associated with a mdi_pathinfo node 3971 * 3972 * Return Values: 3973 * Handle to client device dev_info node 3974 */ 3975 dev_info_t * 3976 mdi_pi_get_client(mdi_pathinfo_t *pip) 3977 { 3978 dev_info_t *dip = NULL; 3979 if (pip) { 3980 dip = MDI_PI(pip)->pi_client->ct_dip; 3981 } 3982 return (dip); 3983 } 3984 3985 /* 3986 * mdi_pi_get_phci(): 3987 * Get the pHCI devinfo associated with the mdi_pathinfo node 3988 * Return Values: 3989 * Handle to dev_info node 3990 */ 3991 dev_info_t * 3992 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3993 { 3994 dev_info_t *dip = NULL; 3995 if (pip) { 3996 dip = MDI_PI(pip)->pi_phci->ph_dip; 3997 } 3998 return (dip); 3999 } 4000 4001 /* 4002 * mdi_pi_get_client_private(): 4003 * Get the client private information associated with the 4004 * mdi_pathinfo node 4005 */ 4006 void * 4007 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4008 { 4009 void *cprivate = NULL; 4010 if (pip) { 4011 cprivate = MDI_PI(pip)->pi_cprivate; 4012 } 4013 return (cprivate); 4014 } 4015 4016 /* 4017 * mdi_pi_set_client_private(): 4018 * Set the client private information in the mdi_pathinfo node 4019 */ 4020 void 4021 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4022 { 4023 if (pip) { 4024 MDI_PI(pip)->pi_cprivate = priv; 4025 } 4026 } 4027 4028 /* 4029 * mdi_pi_get_phci_private(): 4030 * Get the pHCI private information associated with the 4031 * mdi_pathinfo node 4032 */ 4033 caddr_t 4034 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4035 { 4036 caddr_t pprivate = NULL; 4037 if (pip) { 4038 pprivate = MDI_PI(pip)->pi_pprivate; 4039 } 4040 return (pprivate); 4041 } 4042 4043 /* 4044 * mdi_pi_set_phci_private(): 4045 * Set the pHCI private information in the mdi_pathinfo node 4046 */ 4047 void 4048 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4049 { 4050 if (pip) { 4051 MDI_PI(pip)->pi_pprivate = priv; 4052 } 4053 } 4054 4055 /* 4056 * mdi_pi_get_state(): 4057 * Get the mdi_pathinfo node state. Transient states are internal 4058 * and not provided to the users 4059 */ 4060 mdi_pathinfo_state_t 4061 mdi_pi_get_state(mdi_pathinfo_t *pip) 4062 { 4063 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4064 4065 if (pip) { 4066 if (MDI_PI_IS_TRANSIENT(pip)) { 4067 /* 4068 * mdi_pathinfo is in state transition. Return the 4069 * last good state. 4070 */ 4071 state = MDI_PI_OLD_STATE(pip); 4072 } else { 4073 state = MDI_PI_STATE(pip); 4074 } 4075 } 4076 return (state); 4077 } 4078 4079 /* 4080 * Note that the following function needs to be the new interface for 4081 * mdi_pi_get_state when mpxio gets integrated to ON. 4082 */ 4083 int 4084 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4085 uint32_t *ext_state) 4086 { 4087 *state = MDI_PATHINFO_STATE_INIT; 4088 4089 if (pip) { 4090 if (MDI_PI_IS_TRANSIENT(pip)) { 4091 /* 4092 * mdi_pathinfo is in state transition. Return the 4093 * last good state. 4094 */ 4095 *state = MDI_PI_OLD_STATE(pip); 4096 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4097 } else { 4098 *state = MDI_PI_STATE(pip); 4099 *ext_state = MDI_PI_EXT_STATE(pip); 4100 } 4101 } 4102 return (MDI_SUCCESS); 4103 } 4104 4105 /* 4106 * mdi_pi_get_preferred: 4107 * Get the preferred path flag 4108 */ 4109 int 4110 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4111 { 4112 if (pip) { 4113 return (MDI_PI(pip)->pi_preferred); 4114 } 4115 return (0); 4116 } 4117 4118 /* 4119 * mdi_pi_set_preferred: 4120 * Set the preferred path flag 4121 */ 4122 void 4123 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4124 { 4125 if (pip) { 4126 MDI_PI(pip)->pi_preferred = preferred; 4127 } 4128 } 4129 4130 /* 4131 * mdi_pi_set_state(): 4132 * Set the mdi_pathinfo node state 4133 */ 4134 void 4135 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4136 { 4137 uint32_t ext_state; 4138 4139 if (pip) { 4140 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4141 MDI_PI(pip)->pi_state = state; 4142 MDI_PI(pip)->pi_state |= ext_state; 4143 } 4144 } 4145 4146 /* 4147 * Property functions: 4148 */ 4149 int 4150 i_map_nvlist_error_to_mdi(int val) 4151 { 4152 int rv; 4153 4154 switch (val) { 4155 case 0: 4156 rv = DDI_PROP_SUCCESS; 4157 break; 4158 case EINVAL: 4159 case ENOTSUP: 4160 rv = DDI_PROP_INVAL_ARG; 4161 break; 4162 case ENOMEM: 4163 rv = DDI_PROP_NO_MEMORY; 4164 break; 4165 default: 4166 rv = DDI_PROP_NOT_FOUND; 4167 break; 4168 } 4169 return (rv); 4170 } 4171 4172 /* 4173 * mdi_pi_get_next_prop(): 4174 * Property walk function. The caller should hold mdi_pi_lock() 4175 * and release by calling mdi_pi_unlock() at the end of walk to 4176 * get a consistent value. 4177 */ 4178 nvpair_t * 4179 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4180 { 4181 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4182 return (NULL); 4183 } 4184 ASSERT(MDI_PI_LOCKED(pip)); 4185 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4186 } 4187 4188 /* 4189 * mdi_prop_remove(): 4190 * Remove the named property from the named list. 4191 */ 4192 int 4193 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4194 { 4195 if (pip == NULL) { 4196 return (DDI_PROP_NOT_FOUND); 4197 } 4198 ASSERT(!MDI_PI_LOCKED(pip)); 4199 MDI_PI_LOCK(pip); 4200 if (MDI_PI(pip)->pi_prop == NULL) { 4201 MDI_PI_UNLOCK(pip); 4202 return (DDI_PROP_NOT_FOUND); 4203 } 4204 if (name) { 4205 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4206 } else { 4207 char nvp_name[MAXNAMELEN]; 4208 nvpair_t *nvp; 4209 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4210 while (nvp) { 4211 nvpair_t *next; 4212 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4213 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4214 nvpair_name(nvp)); 4215 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4216 nvp_name); 4217 nvp = next; 4218 } 4219 } 4220 MDI_PI_UNLOCK(pip); 4221 return (DDI_PROP_SUCCESS); 4222 } 4223 4224 /* 4225 * mdi_prop_size(): 4226 * Get buffer size needed to pack the property data. 4227 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4228 * buffer size. 4229 */ 4230 int 4231 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4232 { 4233 int rv; 4234 size_t bufsize; 4235 4236 *buflenp = 0; 4237 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4238 return (DDI_PROP_NOT_FOUND); 4239 } 4240 ASSERT(MDI_PI_LOCKED(pip)); 4241 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4242 &bufsize, NV_ENCODE_NATIVE); 4243 *buflenp = bufsize; 4244 return (i_map_nvlist_error_to_mdi(rv)); 4245 } 4246 4247 /* 4248 * mdi_prop_pack(): 4249 * pack the property list. The caller should hold the 4250 * mdi_pathinfo_t node to get a consistent data 4251 */ 4252 int 4253 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4254 { 4255 int rv; 4256 size_t bufsize; 4257 4258 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4259 return (DDI_PROP_NOT_FOUND); 4260 } 4261 4262 ASSERT(MDI_PI_LOCKED(pip)); 4263 4264 bufsize = buflen; 4265 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4266 NV_ENCODE_NATIVE, KM_SLEEP); 4267 4268 return (i_map_nvlist_error_to_mdi(rv)); 4269 } 4270 4271 /* 4272 * mdi_prop_update_byte(): 4273 * Create/Update a byte property 4274 */ 4275 int 4276 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4277 { 4278 int rv; 4279 4280 if (pip == NULL) { 4281 return (DDI_PROP_INVAL_ARG); 4282 } 4283 ASSERT(!MDI_PI_LOCKED(pip)); 4284 MDI_PI_LOCK(pip); 4285 if (MDI_PI(pip)->pi_prop == NULL) { 4286 MDI_PI_UNLOCK(pip); 4287 return (DDI_PROP_NOT_FOUND); 4288 } 4289 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4290 MDI_PI_UNLOCK(pip); 4291 return (i_map_nvlist_error_to_mdi(rv)); 4292 } 4293 4294 /* 4295 * mdi_prop_update_byte_array(): 4296 * Create/Update a byte array property 4297 */ 4298 int 4299 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4300 uint_t nelements) 4301 { 4302 int rv; 4303 4304 if (pip == NULL) { 4305 return (DDI_PROP_INVAL_ARG); 4306 } 4307 ASSERT(!MDI_PI_LOCKED(pip)); 4308 MDI_PI_LOCK(pip); 4309 if (MDI_PI(pip)->pi_prop == NULL) { 4310 MDI_PI_UNLOCK(pip); 4311 return (DDI_PROP_NOT_FOUND); 4312 } 4313 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4314 MDI_PI_UNLOCK(pip); 4315 return (i_map_nvlist_error_to_mdi(rv)); 4316 } 4317 4318 /* 4319 * mdi_prop_update_int(): 4320 * Create/Update a 32 bit integer property 4321 */ 4322 int 4323 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4324 { 4325 int rv; 4326 4327 if (pip == NULL) { 4328 return (DDI_PROP_INVAL_ARG); 4329 } 4330 ASSERT(!MDI_PI_LOCKED(pip)); 4331 MDI_PI_LOCK(pip); 4332 if (MDI_PI(pip)->pi_prop == NULL) { 4333 MDI_PI_UNLOCK(pip); 4334 return (DDI_PROP_NOT_FOUND); 4335 } 4336 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4337 MDI_PI_UNLOCK(pip); 4338 return (i_map_nvlist_error_to_mdi(rv)); 4339 } 4340 4341 /* 4342 * mdi_prop_update_int64(): 4343 * Create/Update a 64 bit integer property 4344 */ 4345 int 4346 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4347 { 4348 int rv; 4349 4350 if (pip == NULL) { 4351 return (DDI_PROP_INVAL_ARG); 4352 } 4353 ASSERT(!MDI_PI_LOCKED(pip)); 4354 MDI_PI_LOCK(pip); 4355 if (MDI_PI(pip)->pi_prop == NULL) { 4356 MDI_PI_UNLOCK(pip); 4357 return (DDI_PROP_NOT_FOUND); 4358 } 4359 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4360 MDI_PI_UNLOCK(pip); 4361 return (i_map_nvlist_error_to_mdi(rv)); 4362 } 4363 4364 /* 4365 * mdi_prop_update_int_array(): 4366 * Create/Update a int array property 4367 */ 4368 int 4369 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4370 uint_t nelements) 4371 { 4372 int rv; 4373 4374 if (pip == NULL) { 4375 return (DDI_PROP_INVAL_ARG); 4376 } 4377 ASSERT(!MDI_PI_LOCKED(pip)); 4378 MDI_PI_LOCK(pip); 4379 if (MDI_PI(pip)->pi_prop == NULL) { 4380 MDI_PI_UNLOCK(pip); 4381 return (DDI_PROP_NOT_FOUND); 4382 } 4383 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4384 nelements); 4385 MDI_PI_UNLOCK(pip); 4386 return (i_map_nvlist_error_to_mdi(rv)); 4387 } 4388 4389 /* 4390 * mdi_prop_update_string(): 4391 * Create/Update a string property 4392 */ 4393 int 4394 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4395 { 4396 int rv; 4397 4398 if (pip == NULL) { 4399 return (DDI_PROP_INVAL_ARG); 4400 } 4401 ASSERT(!MDI_PI_LOCKED(pip)); 4402 MDI_PI_LOCK(pip); 4403 if (MDI_PI(pip)->pi_prop == NULL) { 4404 MDI_PI_UNLOCK(pip); 4405 return (DDI_PROP_NOT_FOUND); 4406 } 4407 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4408 MDI_PI_UNLOCK(pip); 4409 return (i_map_nvlist_error_to_mdi(rv)); 4410 } 4411 4412 /* 4413 * mdi_prop_update_string_array(): 4414 * Create/Update a string array property 4415 */ 4416 int 4417 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4418 uint_t nelements) 4419 { 4420 int rv; 4421 4422 if (pip == NULL) { 4423 return (DDI_PROP_INVAL_ARG); 4424 } 4425 ASSERT(!MDI_PI_LOCKED(pip)); 4426 MDI_PI_LOCK(pip); 4427 if (MDI_PI(pip)->pi_prop == NULL) { 4428 MDI_PI_UNLOCK(pip); 4429 return (DDI_PROP_NOT_FOUND); 4430 } 4431 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4432 nelements); 4433 MDI_PI_UNLOCK(pip); 4434 return (i_map_nvlist_error_to_mdi(rv)); 4435 } 4436 4437 /* 4438 * mdi_prop_lookup_byte(): 4439 * Look for byte property identified by name. The data returned 4440 * is the actual property and valid as long as mdi_pathinfo_t node 4441 * is alive. 4442 */ 4443 int 4444 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4445 { 4446 int rv; 4447 4448 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4449 return (DDI_PROP_NOT_FOUND); 4450 } 4451 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4452 return (i_map_nvlist_error_to_mdi(rv)); 4453 } 4454 4455 4456 /* 4457 * mdi_prop_lookup_byte_array(): 4458 * Look for byte array property identified by name. The data 4459 * returned is the actual property and valid as long as 4460 * mdi_pathinfo_t node is alive. 4461 */ 4462 int 4463 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4464 uint_t *nelements) 4465 { 4466 int rv; 4467 4468 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4469 return (DDI_PROP_NOT_FOUND); 4470 } 4471 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4472 nelements); 4473 return (i_map_nvlist_error_to_mdi(rv)); 4474 } 4475 4476 /* 4477 * mdi_prop_lookup_int(): 4478 * Look for int property identified by name. The data returned 4479 * is the actual property and valid as long as mdi_pathinfo_t 4480 * node is alive. 4481 */ 4482 int 4483 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4484 { 4485 int rv; 4486 4487 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4488 return (DDI_PROP_NOT_FOUND); 4489 } 4490 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4491 return (i_map_nvlist_error_to_mdi(rv)); 4492 } 4493 4494 /* 4495 * mdi_prop_lookup_int64(): 4496 * Look for int64 property identified by name. The data returned 4497 * is the actual property and valid as long as mdi_pathinfo_t node 4498 * is alive. 4499 */ 4500 int 4501 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4502 { 4503 int rv; 4504 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4505 return (DDI_PROP_NOT_FOUND); 4506 } 4507 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4508 return (i_map_nvlist_error_to_mdi(rv)); 4509 } 4510 4511 /* 4512 * mdi_prop_lookup_int_array(): 4513 * Look for int array property identified by name. The data 4514 * returned is the actual property and valid as long as 4515 * mdi_pathinfo_t node is alive. 4516 */ 4517 int 4518 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4519 uint_t *nelements) 4520 { 4521 int rv; 4522 4523 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4524 return (DDI_PROP_NOT_FOUND); 4525 } 4526 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4527 (int32_t **)data, nelements); 4528 return (i_map_nvlist_error_to_mdi(rv)); 4529 } 4530 4531 /* 4532 * mdi_prop_lookup_string(): 4533 * Look for string property identified by name. The data 4534 * returned is the actual property and valid as long as 4535 * mdi_pathinfo_t node is alive. 4536 */ 4537 int 4538 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4539 { 4540 int rv; 4541 4542 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4543 return (DDI_PROP_NOT_FOUND); 4544 } 4545 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4546 return (i_map_nvlist_error_to_mdi(rv)); 4547 } 4548 4549 /* 4550 * mdi_prop_lookup_string_array(): 4551 * Look for string array property identified by name. The data 4552 * returned is the actual property and valid as long as 4553 * mdi_pathinfo_t node is alive. 4554 */ 4555 int 4556 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4557 uint_t *nelements) 4558 { 4559 int rv; 4560 4561 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4562 return (DDI_PROP_NOT_FOUND); 4563 } 4564 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4565 nelements); 4566 return (i_map_nvlist_error_to_mdi(rv)); 4567 } 4568 4569 /* 4570 * mdi_prop_free(): 4571 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4572 * functions return the pointer to actual property data and not a 4573 * copy of it. So the data returned is valid as long as 4574 * mdi_pathinfo_t node is valid. 4575 */ 4576 /*ARGSUSED*/ 4577 int 4578 mdi_prop_free(void *data) 4579 { 4580 return (DDI_PROP_SUCCESS); 4581 } 4582 4583 /*ARGSUSED*/ 4584 static void 4585 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4586 { 4587 char *phci_path, *ct_path; 4588 char *ct_status; 4589 char *status; 4590 dev_info_t *dip = ct->ct_dip; 4591 char lb_buf[64]; 4592 4593 ASSERT(MDI_CLIENT_LOCKED(ct)); 4594 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4595 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4596 return; 4597 } 4598 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4599 ct_status = "optimal"; 4600 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4601 ct_status = "degraded"; 4602 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4603 ct_status = "failed"; 4604 } else { 4605 ct_status = "unknown"; 4606 } 4607 4608 if (MDI_PI_IS_OFFLINE(pip)) { 4609 status = "offline"; 4610 } else if (MDI_PI_IS_ONLINE(pip)) { 4611 status = "online"; 4612 } else if (MDI_PI_IS_STANDBY(pip)) { 4613 status = "standby"; 4614 } else if (MDI_PI_IS_FAULT(pip)) { 4615 status = "faulted"; 4616 } else { 4617 status = "unknown"; 4618 } 4619 4620 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4621 (void) snprintf(lb_buf, sizeof (lb_buf), 4622 "%s, region-size: %d", mdi_load_balance_lba, 4623 ct->ct_lb_args->region_size); 4624 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4625 (void) snprintf(lb_buf, sizeof (lb_buf), 4626 "%s", mdi_load_balance_none); 4627 } else { 4628 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4629 mdi_load_balance_rr); 4630 } 4631 4632 if (dip) { 4633 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4634 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4635 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4636 "path %s (%s%d) to target address: %s is %s" 4637 " Load balancing: %s\n", 4638 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4639 ddi_get_instance(dip), ct_status, 4640 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4641 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4642 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4643 MDI_PI(pip)->pi_addr, status, lb_buf); 4644 kmem_free(phci_path, MAXPATHLEN); 4645 kmem_free(ct_path, MAXPATHLEN); 4646 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4647 } 4648 } 4649 4650 #ifdef DEBUG 4651 /* 4652 * i_mdi_log(): 4653 * Utility function for error message management 4654 * 4655 */ 4656 /*PRINTFLIKE3*/ 4657 static void 4658 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4659 { 4660 char name[MAXNAMELEN]; 4661 char buf[MAXNAMELEN]; 4662 char *bp; 4663 va_list ap; 4664 int log_only = 0; 4665 int boot_only = 0; 4666 int console_only = 0; 4667 4668 if (dip) { 4669 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4670 ddi_node_name(dip), ddi_get_instance(dip)); 4671 } else { 4672 name[0] = 0; 4673 } 4674 4675 va_start(ap, fmt); 4676 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4677 va_end(ap); 4678 4679 switch (buf[0]) { 4680 case '!': 4681 bp = &buf[1]; 4682 log_only = 1; 4683 break; 4684 case '?': 4685 bp = &buf[1]; 4686 boot_only = 1; 4687 break; 4688 case '^': 4689 bp = &buf[1]; 4690 console_only = 1; 4691 break; 4692 default: 4693 bp = buf; 4694 break; 4695 } 4696 if (mdi_debug_logonly) { 4697 log_only = 1; 4698 boot_only = 0; 4699 console_only = 0; 4700 } 4701 4702 switch (level) { 4703 case CE_NOTE: 4704 level = CE_CONT; 4705 /* FALLTHROUGH */ 4706 case CE_CONT: 4707 case CE_WARN: 4708 case CE_PANIC: 4709 if (boot_only) { 4710 cmn_err(level, "?mdi: %s%s", name, bp); 4711 } else if (console_only) { 4712 cmn_err(level, "^mdi: %s%s", name, bp); 4713 } else if (log_only) { 4714 cmn_err(level, "!mdi: %s%s", name, bp); 4715 } else { 4716 cmn_err(level, "mdi: %s%s", name, bp); 4717 } 4718 break; 4719 default: 4720 cmn_err(level, "mdi: %s%s", name, bp); 4721 break; 4722 } 4723 } 4724 #endif /* DEBUG */ 4725 4726 void 4727 i_mdi_client_online(dev_info_t *ct_dip) 4728 { 4729 mdi_client_t *ct; 4730 4731 /* 4732 * Client online notification. Mark client state as online 4733 * restore our binding with dev_info node 4734 */ 4735 ct = i_devi_get_client(ct_dip); 4736 ASSERT(ct != NULL); 4737 MDI_CLIENT_LOCK(ct); 4738 MDI_CLIENT_SET_ONLINE(ct); 4739 /* catch for any memory leaks */ 4740 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4741 ct->ct_dip = ct_dip; 4742 4743 if (ct->ct_power_cnt == 0) 4744 (void) i_mdi_power_all_phci(ct); 4745 4746 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4747 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4748 i_mdi_pm_hold_client(ct, 1); 4749 4750 MDI_CLIENT_UNLOCK(ct); 4751 } 4752 4753 void 4754 i_mdi_phci_online(dev_info_t *ph_dip) 4755 { 4756 mdi_phci_t *ph; 4757 4758 /* pHCI online notification. Mark state accordingly */ 4759 ph = i_devi_get_phci(ph_dip); 4760 ASSERT(ph != NULL); 4761 MDI_PHCI_LOCK(ph); 4762 MDI_PHCI_SET_ONLINE(ph); 4763 MDI_PHCI_UNLOCK(ph); 4764 } 4765 4766 /* 4767 * mdi_devi_online(): 4768 * Online notification from NDI framework on pHCI/client 4769 * device online. 4770 * Return Values: 4771 * NDI_SUCCESS 4772 * MDI_FAILURE 4773 */ 4774 /*ARGSUSED*/ 4775 int 4776 mdi_devi_online(dev_info_t *dip, uint_t flags) 4777 { 4778 if (MDI_PHCI(dip)) { 4779 i_mdi_phci_online(dip); 4780 } 4781 4782 if (MDI_CLIENT(dip)) { 4783 i_mdi_client_online(dip); 4784 } 4785 return (NDI_SUCCESS); 4786 } 4787 4788 /* 4789 * mdi_devi_offline(): 4790 * Offline notification from NDI framework on pHCI/Client device 4791 * offline. 4792 * 4793 * Return Values: 4794 * NDI_SUCCESS 4795 * NDI_FAILURE 4796 */ 4797 /*ARGSUSED*/ 4798 int 4799 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4800 { 4801 int rv = NDI_SUCCESS; 4802 4803 if (MDI_CLIENT(dip)) { 4804 rv = i_mdi_client_offline(dip, flags); 4805 if (rv != NDI_SUCCESS) 4806 return (rv); 4807 } 4808 4809 if (MDI_PHCI(dip)) { 4810 rv = i_mdi_phci_offline(dip, flags); 4811 4812 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4813 /* set client back online */ 4814 i_mdi_client_online(dip); 4815 } 4816 } 4817 4818 return (rv); 4819 } 4820 4821 /*ARGSUSED*/ 4822 static int 4823 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4824 { 4825 int rv = NDI_SUCCESS; 4826 mdi_phci_t *ph; 4827 mdi_client_t *ct; 4828 mdi_pathinfo_t *pip; 4829 mdi_pathinfo_t *next; 4830 mdi_pathinfo_t *failed_pip = NULL; 4831 dev_info_t *cdip; 4832 4833 /* 4834 * pHCI component offline notification 4835 * Make sure that this pHCI instance is free to be offlined. 4836 * If it is OK to proceed, Offline and remove all the child 4837 * mdi_pathinfo nodes. This process automatically offlines 4838 * corresponding client devices, for which this pHCI provides 4839 * critical services. 4840 */ 4841 ph = i_devi_get_phci(dip); 4842 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4843 (void *)dip, (void *)ph)); 4844 if (ph == NULL) { 4845 return (rv); 4846 } 4847 4848 MDI_PHCI_LOCK(ph); 4849 4850 if (MDI_PHCI_IS_OFFLINE(ph)) { 4851 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4852 (void *)ph)); 4853 MDI_PHCI_UNLOCK(ph); 4854 return (NDI_SUCCESS); 4855 } 4856 4857 /* 4858 * Check to see if the pHCI can be offlined 4859 */ 4860 if (ph->ph_unstable) { 4861 MDI_DEBUG(1, (CE_WARN, dip, 4862 "!One or more target devices are in transient " 4863 "state. This device can not be removed at " 4864 "this moment. Please try again later.")); 4865 MDI_PHCI_UNLOCK(ph); 4866 return (NDI_BUSY); 4867 } 4868 4869 pip = ph->ph_path_head; 4870 while (pip != NULL) { 4871 MDI_PI_LOCK(pip); 4872 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4873 4874 /* 4875 * The mdi_pathinfo state is OK. Check the client state. 4876 * If failover in progress fail the pHCI from offlining 4877 */ 4878 ct = MDI_PI(pip)->pi_client; 4879 i_mdi_client_lock(ct, pip); 4880 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4881 (ct->ct_unstable)) { 4882 /* 4883 * Failover is in progress, Fail the DR 4884 */ 4885 MDI_DEBUG(1, (CE_WARN, dip, 4886 "!pHCI device (%s%d) is Busy. %s", 4887 ddi_driver_name(dip), ddi_get_instance(dip), 4888 "This device can not be removed at " 4889 "this moment. Please try again later.")); 4890 MDI_PI_UNLOCK(pip); 4891 i_mdi_client_unlock(ct); 4892 MDI_PHCI_UNLOCK(ph); 4893 return (NDI_BUSY); 4894 } 4895 MDI_PI_UNLOCK(pip); 4896 4897 /* 4898 * Check to see of we are removing the last path of this 4899 * client device... 4900 */ 4901 cdip = ct->ct_dip; 4902 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4903 (i_mdi_client_compute_state(ct, ph) == 4904 MDI_CLIENT_STATE_FAILED)) { 4905 i_mdi_client_unlock(ct); 4906 MDI_PHCI_UNLOCK(ph); 4907 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4908 /* 4909 * ndi_devi_offline() failed. 4910 * This pHCI provides the critical path 4911 * to one or more client devices. 4912 * Return busy. 4913 */ 4914 MDI_PHCI_LOCK(ph); 4915 MDI_DEBUG(1, (CE_WARN, dip, 4916 "!pHCI device (%s%d) is Busy. %s", 4917 ddi_driver_name(dip), ddi_get_instance(dip), 4918 "This device can not be removed at " 4919 "this moment. Please try again later.")); 4920 failed_pip = pip; 4921 break; 4922 } else { 4923 MDI_PHCI_LOCK(ph); 4924 pip = next; 4925 } 4926 } else { 4927 i_mdi_client_unlock(ct); 4928 pip = next; 4929 } 4930 } 4931 4932 if (failed_pip) { 4933 pip = ph->ph_path_head; 4934 while (pip != failed_pip) { 4935 MDI_PI_LOCK(pip); 4936 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4937 ct = MDI_PI(pip)->pi_client; 4938 i_mdi_client_lock(ct, pip); 4939 cdip = ct->ct_dip; 4940 switch (MDI_CLIENT_STATE(ct)) { 4941 case MDI_CLIENT_STATE_OPTIMAL: 4942 case MDI_CLIENT_STATE_DEGRADED: 4943 if (cdip) { 4944 MDI_PI_UNLOCK(pip); 4945 i_mdi_client_unlock(ct); 4946 MDI_PHCI_UNLOCK(ph); 4947 (void) ndi_devi_online(cdip, 0); 4948 MDI_PHCI_LOCK(ph); 4949 pip = next; 4950 continue; 4951 } 4952 break; 4953 4954 case MDI_CLIENT_STATE_FAILED: 4955 if (cdip) { 4956 MDI_PI_UNLOCK(pip); 4957 i_mdi_client_unlock(ct); 4958 MDI_PHCI_UNLOCK(ph); 4959 (void) ndi_devi_offline(cdip, 0); 4960 MDI_PHCI_LOCK(ph); 4961 pip = next; 4962 continue; 4963 } 4964 break; 4965 } 4966 MDI_PI_UNLOCK(pip); 4967 i_mdi_client_unlock(ct); 4968 pip = next; 4969 } 4970 MDI_PHCI_UNLOCK(ph); 4971 return (NDI_BUSY); 4972 } 4973 4974 /* 4975 * Mark the pHCI as offline 4976 */ 4977 MDI_PHCI_SET_OFFLINE(ph); 4978 4979 /* 4980 * Mark the child mdi_pathinfo nodes as transient 4981 */ 4982 pip = ph->ph_path_head; 4983 while (pip != NULL) { 4984 MDI_PI_LOCK(pip); 4985 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4986 MDI_PI_SET_OFFLINING(pip); 4987 MDI_PI_UNLOCK(pip); 4988 pip = next; 4989 } 4990 MDI_PHCI_UNLOCK(ph); 4991 /* 4992 * Give a chance for any pending commands to execute 4993 */ 4994 delay(1); 4995 MDI_PHCI_LOCK(ph); 4996 pip = ph->ph_path_head; 4997 while (pip != NULL) { 4998 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4999 (void) i_mdi_pi_offline(pip, flags); 5000 MDI_PI_LOCK(pip); 5001 ct = MDI_PI(pip)->pi_client; 5002 if (!MDI_PI_IS_OFFLINE(pip)) { 5003 MDI_DEBUG(1, (CE_WARN, dip, 5004 "!pHCI device (%s%d) is Busy. %s", 5005 ddi_driver_name(dip), ddi_get_instance(dip), 5006 "This device can not be removed at " 5007 "this moment. Please try again later.")); 5008 MDI_PI_UNLOCK(pip); 5009 MDI_PHCI_SET_ONLINE(ph); 5010 MDI_PHCI_UNLOCK(ph); 5011 return (NDI_BUSY); 5012 } 5013 MDI_PI_UNLOCK(pip); 5014 pip = next; 5015 } 5016 MDI_PHCI_UNLOCK(ph); 5017 5018 return (rv); 5019 } 5020 5021 void 5022 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5023 { 5024 mdi_phci_t *ph; 5025 mdi_client_t *ct; 5026 mdi_pathinfo_t *pip; 5027 mdi_pathinfo_t *next; 5028 dev_info_t *cdip; 5029 5030 if (!MDI_PHCI(dip)) 5031 return; 5032 5033 ph = i_devi_get_phci(dip); 5034 if (ph == NULL) { 5035 return; 5036 } 5037 5038 MDI_PHCI_LOCK(ph); 5039 5040 if (MDI_PHCI_IS_OFFLINE(ph)) { 5041 /* has no last path */ 5042 MDI_PHCI_UNLOCK(ph); 5043 return; 5044 } 5045 5046 pip = ph->ph_path_head; 5047 while (pip != NULL) { 5048 MDI_PI_LOCK(pip); 5049 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5050 5051 ct = MDI_PI(pip)->pi_client; 5052 i_mdi_client_lock(ct, pip); 5053 MDI_PI_UNLOCK(pip); 5054 5055 cdip = ct->ct_dip; 5056 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5057 (i_mdi_client_compute_state(ct, ph) == 5058 MDI_CLIENT_STATE_FAILED)) { 5059 /* Last path. Mark client dip as retiring */ 5060 i_mdi_client_unlock(ct); 5061 MDI_PHCI_UNLOCK(ph); 5062 (void) e_ddi_mark_retiring(cdip, cons_array); 5063 MDI_PHCI_LOCK(ph); 5064 pip = next; 5065 } else { 5066 i_mdi_client_unlock(ct); 5067 pip = next; 5068 } 5069 } 5070 5071 MDI_PHCI_UNLOCK(ph); 5072 5073 return; 5074 } 5075 5076 void 5077 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5078 { 5079 mdi_phci_t *ph; 5080 mdi_client_t *ct; 5081 mdi_pathinfo_t *pip; 5082 mdi_pathinfo_t *next; 5083 dev_info_t *cdip; 5084 5085 if (!MDI_PHCI(dip)) 5086 return; 5087 5088 ph = i_devi_get_phci(dip); 5089 if (ph == NULL) 5090 return; 5091 5092 MDI_PHCI_LOCK(ph); 5093 5094 if (MDI_PHCI_IS_OFFLINE(ph)) { 5095 MDI_PHCI_UNLOCK(ph); 5096 /* not last path */ 5097 return; 5098 } 5099 5100 if (ph->ph_unstable) { 5101 MDI_PHCI_UNLOCK(ph); 5102 /* can't check for constraints */ 5103 *constraint = 0; 5104 return; 5105 } 5106 5107 pip = ph->ph_path_head; 5108 while (pip != NULL) { 5109 MDI_PI_LOCK(pip); 5110 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5111 5112 /* 5113 * The mdi_pathinfo state is OK. Check the client state. 5114 * If failover in progress fail the pHCI from offlining 5115 */ 5116 ct = MDI_PI(pip)->pi_client; 5117 i_mdi_client_lock(ct, pip); 5118 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5119 (ct->ct_unstable)) { 5120 /* 5121 * Failover is in progress, can't check for constraints 5122 */ 5123 MDI_PI_UNLOCK(pip); 5124 i_mdi_client_unlock(ct); 5125 MDI_PHCI_UNLOCK(ph); 5126 *constraint = 0; 5127 return; 5128 } 5129 MDI_PI_UNLOCK(pip); 5130 5131 /* 5132 * Check to see of we are retiring the last path of this 5133 * client device... 5134 */ 5135 cdip = ct->ct_dip; 5136 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5137 (i_mdi_client_compute_state(ct, ph) == 5138 MDI_CLIENT_STATE_FAILED)) { 5139 i_mdi_client_unlock(ct); 5140 MDI_PHCI_UNLOCK(ph); 5141 (void) e_ddi_retire_notify(cdip, constraint); 5142 MDI_PHCI_LOCK(ph); 5143 pip = next; 5144 } else { 5145 i_mdi_client_unlock(ct); 5146 pip = next; 5147 } 5148 } 5149 5150 MDI_PHCI_UNLOCK(ph); 5151 5152 return; 5153 } 5154 5155 /* 5156 * offline the path(s) hanging off the PHCI. If the 5157 * last path to any client, check that constraints 5158 * have been applied. 5159 */ 5160 void 5161 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5162 { 5163 mdi_phci_t *ph; 5164 mdi_client_t *ct; 5165 mdi_pathinfo_t *pip; 5166 mdi_pathinfo_t *next; 5167 dev_info_t *cdip; 5168 int unstable = 0; 5169 int constraint; 5170 5171 if (!MDI_PHCI(dip)) 5172 return; 5173 5174 ph = i_devi_get_phci(dip); 5175 if (ph == NULL) { 5176 /* no last path and no pips */ 5177 return; 5178 } 5179 5180 MDI_PHCI_LOCK(ph); 5181 5182 if (MDI_PHCI_IS_OFFLINE(ph)) { 5183 MDI_PHCI_UNLOCK(ph); 5184 /* no last path and no pips */ 5185 return; 5186 } 5187 5188 /* 5189 * Check to see if the pHCI can be offlined 5190 */ 5191 if (ph->ph_unstable) { 5192 unstable = 1; 5193 } 5194 5195 pip = ph->ph_path_head; 5196 while (pip != NULL) { 5197 MDI_PI_LOCK(pip); 5198 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5199 5200 /* 5201 * if failover in progress fail the pHCI from offlining 5202 */ 5203 ct = MDI_PI(pip)->pi_client; 5204 i_mdi_client_lock(ct, pip); 5205 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5206 (ct->ct_unstable)) { 5207 unstable = 1; 5208 } 5209 MDI_PI_UNLOCK(pip); 5210 5211 /* 5212 * Check to see of we are removing the last path of this 5213 * client device... 5214 */ 5215 cdip = ct->ct_dip; 5216 if (!phci_only && cdip && 5217 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5218 (i_mdi_client_compute_state(ct, ph) == 5219 MDI_CLIENT_STATE_FAILED)) { 5220 i_mdi_client_unlock(ct); 5221 MDI_PHCI_UNLOCK(ph); 5222 /* 5223 * We don't retire clients we just retire the 5224 * path to a client. If it is the last path 5225 * to a client, constraints are checked and 5226 * if we pass the last path is offlined. MPXIO will 5227 * then fail all I/Os to the client. Since we don't 5228 * want to retire the client on a path error 5229 * set constraint = 0 so that the client dip 5230 * is not retired. 5231 */ 5232 constraint = 0; 5233 (void) e_ddi_retire_finalize(cdip, &constraint); 5234 MDI_PHCI_LOCK(ph); 5235 pip = next; 5236 } else { 5237 i_mdi_client_unlock(ct); 5238 pip = next; 5239 } 5240 } 5241 5242 /* 5243 * Cannot offline pip(s) 5244 */ 5245 if (unstable) { 5246 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5247 "retire, dip = %p", (void *)dip); 5248 MDI_PHCI_UNLOCK(ph); 5249 return; 5250 } 5251 5252 /* 5253 * Mark the pHCI as offline 5254 */ 5255 MDI_PHCI_SET_OFFLINE(ph); 5256 5257 /* 5258 * Mark the child mdi_pathinfo nodes as transient 5259 */ 5260 pip = ph->ph_path_head; 5261 while (pip != NULL) { 5262 MDI_PI_LOCK(pip); 5263 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5264 MDI_PI_SET_OFFLINING(pip); 5265 MDI_PI_UNLOCK(pip); 5266 pip = next; 5267 } 5268 MDI_PHCI_UNLOCK(ph); 5269 /* 5270 * Give a chance for any pending commands to execute 5271 */ 5272 delay(1); 5273 MDI_PHCI_LOCK(ph); 5274 pip = ph->ph_path_head; 5275 while (pip != NULL) { 5276 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5277 (void) i_mdi_pi_offline(pip, 0); 5278 MDI_PI_LOCK(pip); 5279 ct = MDI_PI(pip)->pi_client; 5280 if (!MDI_PI_IS_OFFLINE(pip)) { 5281 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5282 "PHCI dip = %p", (void *)dip); 5283 MDI_PI_UNLOCK(pip); 5284 MDI_PHCI_SET_ONLINE(ph); 5285 MDI_PHCI_UNLOCK(ph); 5286 return; 5287 } 5288 MDI_PI_UNLOCK(pip); 5289 pip = next; 5290 } 5291 MDI_PHCI_UNLOCK(ph); 5292 5293 return; 5294 } 5295 5296 void 5297 mdi_phci_unretire(dev_info_t *dip) 5298 { 5299 ASSERT(MDI_PHCI(dip)); 5300 5301 /* 5302 * Online the phci 5303 */ 5304 i_mdi_phci_online(dip); 5305 } 5306 5307 /*ARGSUSED*/ 5308 static int 5309 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5310 { 5311 int rv = NDI_SUCCESS; 5312 mdi_client_t *ct; 5313 5314 /* 5315 * Client component to go offline. Make sure that we are 5316 * not in failing over state and update client state 5317 * accordingly 5318 */ 5319 ct = i_devi_get_client(dip); 5320 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5321 (void *)dip, (void *)ct)); 5322 if (ct != NULL) { 5323 MDI_CLIENT_LOCK(ct); 5324 if (ct->ct_unstable) { 5325 /* 5326 * One or more paths are in transient state, 5327 * Dont allow offline of a client device 5328 */ 5329 MDI_DEBUG(1, (CE_WARN, dip, 5330 "!One or more paths to this device is " 5331 "in transient state. This device can not " 5332 "be removed at this moment. " 5333 "Please try again later.")); 5334 MDI_CLIENT_UNLOCK(ct); 5335 return (NDI_BUSY); 5336 } 5337 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5338 /* 5339 * Failover is in progress, Dont allow DR of 5340 * a client device 5341 */ 5342 MDI_DEBUG(1, (CE_WARN, dip, 5343 "!Client device (%s%d) is Busy. %s", 5344 ddi_driver_name(dip), ddi_get_instance(dip), 5345 "This device can not be removed at " 5346 "this moment. Please try again later.")); 5347 MDI_CLIENT_UNLOCK(ct); 5348 return (NDI_BUSY); 5349 } 5350 MDI_CLIENT_SET_OFFLINE(ct); 5351 5352 /* 5353 * Unbind our relationship with the dev_info node 5354 */ 5355 if (flags & NDI_DEVI_REMOVE) { 5356 ct->ct_dip = NULL; 5357 } 5358 MDI_CLIENT_UNLOCK(ct); 5359 } 5360 return (rv); 5361 } 5362 5363 /* 5364 * mdi_pre_attach(): 5365 * Pre attach() notification handler 5366 */ 5367 /*ARGSUSED*/ 5368 int 5369 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5370 { 5371 /* don't support old DDI_PM_RESUME */ 5372 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5373 (cmd == DDI_PM_RESUME)) 5374 return (DDI_FAILURE); 5375 5376 return (DDI_SUCCESS); 5377 } 5378 5379 /* 5380 * mdi_post_attach(): 5381 * Post attach() notification handler 5382 */ 5383 /*ARGSUSED*/ 5384 void 5385 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5386 { 5387 mdi_phci_t *ph; 5388 mdi_client_t *ct; 5389 mdi_vhci_t *vh; 5390 5391 if (MDI_PHCI(dip)) { 5392 ph = i_devi_get_phci(dip); 5393 ASSERT(ph != NULL); 5394 5395 MDI_PHCI_LOCK(ph); 5396 switch (cmd) { 5397 case DDI_ATTACH: 5398 MDI_DEBUG(2, (CE_NOTE, dip, 5399 "!pHCI post_attach: called %p\n", (void *)ph)); 5400 if (error == DDI_SUCCESS) { 5401 MDI_PHCI_SET_ATTACH(ph); 5402 } else { 5403 MDI_DEBUG(1, (CE_NOTE, dip, 5404 "!pHCI post_attach: failed error=%d\n", 5405 error)); 5406 MDI_PHCI_SET_DETACH(ph); 5407 } 5408 break; 5409 5410 case DDI_RESUME: 5411 MDI_DEBUG(2, (CE_NOTE, dip, 5412 "!pHCI post_resume: called %p\n", (void *)ph)); 5413 if (error == DDI_SUCCESS) { 5414 MDI_PHCI_SET_RESUME(ph); 5415 } else { 5416 MDI_DEBUG(1, (CE_NOTE, dip, 5417 "!pHCI post_resume: failed error=%d\n", 5418 error)); 5419 MDI_PHCI_SET_SUSPEND(ph); 5420 } 5421 break; 5422 } 5423 MDI_PHCI_UNLOCK(ph); 5424 } 5425 5426 if (MDI_CLIENT(dip)) { 5427 ct = i_devi_get_client(dip); 5428 ASSERT(ct != NULL); 5429 5430 MDI_CLIENT_LOCK(ct); 5431 switch (cmd) { 5432 case DDI_ATTACH: 5433 MDI_DEBUG(2, (CE_NOTE, dip, 5434 "!Client post_attach: called %p\n", (void *)ct)); 5435 if (error != DDI_SUCCESS) { 5436 MDI_DEBUG(1, (CE_NOTE, dip, 5437 "!Client post_attach: failed error=%d\n", 5438 error)); 5439 MDI_CLIENT_SET_DETACH(ct); 5440 MDI_DEBUG(4, (CE_WARN, dip, 5441 "mdi_post_attach i_mdi_pm_reset_client\n")); 5442 i_mdi_pm_reset_client(ct); 5443 break; 5444 } 5445 5446 /* 5447 * Client device has successfully attached, inform 5448 * the vhci. 5449 */ 5450 vh = ct->ct_vhci; 5451 if (vh->vh_ops->vo_client_attached) 5452 (*vh->vh_ops->vo_client_attached)(dip); 5453 5454 MDI_CLIENT_SET_ATTACH(ct); 5455 break; 5456 5457 case DDI_RESUME: 5458 MDI_DEBUG(2, (CE_NOTE, dip, 5459 "!Client post_attach: called %p\n", (void *)ct)); 5460 if (error == DDI_SUCCESS) { 5461 MDI_CLIENT_SET_RESUME(ct); 5462 } else { 5463 MDI_DEBUG(1, (CE_NOTE, dip, 5464 "!Client post_resume: failed error=%d\n", 5465 error)); 5466 MDI_CLIENT_SET_SUSPEND(ct); 5467 } 5468 break; 5469 } 5470 MDI_CLIENT_UNLOCK(ct); 5471 } 5472 } 5473 5474 /* 5475 * mdi_pre_detach(): 5476 * Pre detach notification handler 5477 */ 5478 /*ARGSUSED*/ 5479 int 5480 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5481 { 5482 int rv = DDI_SUCCESS; 5483 5484 if (MDI_CLIENT(dip)) { 5485 (void) i_mdi_client_pre_detach(dip, cmd); 5486 } 5487 5488 if (MDI_PHCI(dip)) { 5489 rv = i_mdi_phci_pre_detach(dip, cmd); 5490 } 5491 5492 return (rv); 5493 } 5494 5495 /*ARGSUSED*/ 5496 static int 5497 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5498 { 5499 int rv = DDI_SUCCESS; 5500 mdi_phci_t *ph; 5501 mdi_client_t *ct; 5502 mdi_pathinfo_t *pip; 5503 mdi_pathinfo_t *failed_pip = NULL; 5504 mdi_pathinfo_t *next; 5505 5506 ph = i_devi_get_phci(dip); 5507 if (ph == NULL) { 5508 return (rv); 5509 } 5510 5511 MDI_PHCI_LOCK(ph); 5512 switch (cmd) { 5513 case DDI_DETACH: 5514 MDI_DEBUG(2, (CE_NOTE, dip, 5515 "!pHCI pre_detach: called %p\n", (void *)ph)); 5516 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5517 /* 5518 * mdi_pathinfo nodes are still attached to 5519 * this pHCI. Fail the detach for this pHCI. 5520 */ 5521 MDI_DEBUG(2, (CE_WARN, dip, 5522 "!pHCI pre_detach: " 5523 "mdi_pathinfo nodes are still attached " 5524 "%p\n", (void *)ph)); 5525 rv = DDI_FAILURE; 5526 break; 5527 } 5528 MDI_PHCI_SET_DETACH(ph); 5529 break; 5530 5531 case DDI_SUSPEND: 5532 /* 5533 * pHCI is getting suspended. Since mpxio client 5534 * devices may not be suspended at this point, to avoid 5535 * a potential stack overflow, it is important to suspend 5536 * client devices before pHCI can be suspended. 5537 */ 5538 5539 MDI_DEBUG(2, (CE_NOTE, dip, 5540 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5541 /* 5542 * Suspend all the client devices accessible through this pHCI 5543 */ 5544 pip = ph->ph_path_head; 5545 while (pip != NULL && rv == DDI_SUCCESS) { 5546 dev_info_t *cdip; 5547 MDI_PI_LOCK(pip); 5548 next = 5549 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5550 ct = MDI_PI(pip)->pi_client; 5551 i_mdi_client_lock(ct, pip); 5552 cdip = ct->ct_dip; 5553 MDI_PI_UNLOCK(pip); 5554 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5555 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5556 i_mdi_client_unlock(ct); 5557 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5558 DDI_SUCCESS) { 5559 /* 5560 * Suspend of one of the client 5561 * device has failed. 5562 */ 5563 MDI_DEBUG(1, (CE_WARN, dip, 5564 "!Suspend of device (%s%d) failed.", 5565 ddi_driver_name(cdip), 5566 ddi_get_instance(cdip))); 5567 failed_pip = pip; 5568 break; 5569 } 5570 } else { 5571 i_mdi_client_unlock(ct); 5572 } 5573 pip = next; 5574 } 5575 5576 if (rv == DDI_SUCCESS) { 5577 /* 5578 * Suspend of client devices is complete. Proceed 5579 * with pHCI suspend. 5580 */ 5581 MDI_PHCI_SET_SUSPEND(ph); 5582 } else { 5583 /* 5584 * Revert back all the suspended client device states 5585 * to converse. 5586 */ 5587 pip = ph->ph_path_head; 5588 while (pip != failed_pip) { 5589 dev_info_t *cdip; 5590 MDI_PI_LOCK(pip); 5591 next = 5592 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5593 ct = MDI_PI(pip)->pi_client; 5594 i_mdi_client_lock(ct, pip); 5595 cdip = ct->ct_dip; 5596 MDI_PI_UNLOCK(pip); 5597 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5598 i_mdi_client_unlock(ct); 5599 (void) devi_attach(cdip, DDI_RESUME); 5600 } else { 5601 i_mdi_client_unlock(ct); 5602 } 5603 pip = next; 5604 } 5605 } 5606 break; 5607 5608 default: 5609 rv = DDI_FAILURE; 5610 break; 5611 } 5612 MDI_PHCI_UNLOCK(ph); 5613 return (rv); 5614 } 5615 5616 /*ARGSUSED*/ 5617 static int 5618 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5619 { 5620 int rv = DDI_SUCCESS; 5621 mdi_client_t *ct; 5622 5623 ct = i_devi_get_client(dip); 5624 if (ct == NULL) { 5625 return (rv); 5626 } 5627 5628 MDI_CLIENT_LOCK(ct); 5629 switch (cmd) { 5630 case DDI_DETACH: 5631 MDI_DEBUG(2, (CE_NOTE, dip, 5632 "!Client pre_detach: called %p\n", (void *)ct)); 5633 MDI_CLIENT_SET_DETACH(ct); 5634 break; 5635 5636 case DDI_SUSPEND: 5637 MDI_DEBUG(2, (CE_NOTE, dip, 5638 "!Client pre_suspend: called %p\n", (void *)ct)); 5639 MDI_CLIENT_SET_SUSPEND(ct); 5640 break; 5641 5642 default: 5643 rv = DDI_FAILURE; 5644 break; 5645 } 5646 MDI_CLIENT_UNLOCK(ct); 5647 return (rv); 5648 } 5649 5650 /* 5651 * mdi_post_detach(): 5652 * Post detach notification handler 5653 */ 5654 /*ARGSUSED*/ 5655 void 5656 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5657 { 5658 /* 5659 * Detach/Suspend of mpxio component failed. Update our state 5660 * too 5661 */ 5662 if (MDI_PHCI(dip)) 5663 i_mdi_phci_post_detach(dip, cmd, error); 5664 5665 if (MDI_CLIENT(dip)) 5666 i_mdi_client_post_detach(dip, cmd, error); 5667 } 5668 5669 /*ARGSUSED*/ 5670 static void 5671 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5672 { 5673 mdi_phci_t *ph; 5674 5675 /* 5676 * Detach/Suspend of phci component failed. Update our state 5677 * too 5678 */ 5679 ph = i_devi_get_phci(dip); 5680 if (ph == NULL) { 5681 return; 5682 } 5683 5684 MDI_PHCI_LOCK(ph); 5685 /* 5686 * Detach of pHCI failed. Restore back converse 5687 * state 5688 */ 5689 switch (cmd) { 5690 case DDI_DETACH: 5691 MDI_DEBUG(2, (CE_NOTE, dip, 5692 "!pHCI post_detach: called %p\n", (void *)ph)); 5693 if (error != DDI_SUCCESS) 5694 MDI_PHCI_SET_ATTACH(ph); 5695 break; 5696 5697 case DDI_SUSPEND: 5698 MDI_DEBUG(2, (CE_NOTE, dip, 5699 "!pHCI post_suspend: called %p\n", (void *)ph)); 5700 if (error != DDI_SUCCESS) 5701 MDI_PHCI_SET_RESUME(ph); 5702 break; 5703 } 5704 MDI_PHCI_UNLOCK(ph); 5705 } 5706 5707 /*ARGSUSED*/ 5708 static void 5709 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5710 { 5711 mdi_client_t *ct; 5712 5713 ct = i_devi_get_client(dip); 5714 if (ct == NULL) { 5715 return; 5716 } 5717 MDI_CLIENT_LOCK(ct); 5718 /* 5719 * Detach of Client failed. Restore back converse 5720 * state 5721 */ 5722 switch (cmd) { 5723 case DDI_DETACH: 5724 MDI_DEBUG(2, (CE_NOTE, dip, 5725 "!Client post_detach: called %p\n", (void *)ct)); 5726 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5727 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5728 "i_mdi_pm_rele_client\n")); 5729 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5730 } else { 5731 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5732 "i_mdi_pm_reset_client\n")); 5733 i_mdi_pm_reset_client(ct); 5734 } 5735 if (error != DDI_SUCCESS) 5736 MDI_CLIENT_SET_ATTACH(ct); 5737 break; 5738 5739 case DDI_SUSPEND: 5740 MDI_DEBUG(2, (CE_NOTE, dip, 5741 "!Client post_suspend: called %p\n", (void *)ct)); 5742 if (error != DDI_SUCCESS) 5743 MDI_CLIENT_SET_RESUME(ct); 5744 break; 5745 } 5746 MDI_CLIENT_UNLOCK(ct); 5747 } 5748 5749 int 5750 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5751 { 5752 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5753 } 5754 5755 /* 5756 * create and install per-path (client - pHCI) statistics 5757 * I/O stats supported: nread, nwritten, reads, and writes 5758 * Error stats - hard errors, soft errors, & transport errors 5759 */ 5760 int 5761 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5762 { 5763 kstat_t *kiosp, *kerrsp; 5764 struct pi_errs *nsp; 5765 struct mdi_pi_kstats *mdi_statp; 5766 5767 if (MDI_PI(pip)->pi_kstats != NULL) 5768 return (MDI_SUCCESS); 5769 5770 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5771 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5772 return (MDI_FAILURE); 5773 } 5774 5775 (void) strcat(ksname, ",err"); 5776 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5777 KSTAT_TYPE_NAMED, 5778 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5779 if (kerrsp == NULL) { 5780 kstat_delete(kiosp); 5781 return (MDI_FAILURE); 5782 } 5783 5784 nsp = (struct pi_errs *)kerrsp->ks_data; 5785 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5786 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5787 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5788 KSTAT_DATA_UINT32); 5789 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5790 KSTAT_DATA_UINT32); 5791 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5792 KSTAT_DATA_UINT32); 5793 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5794 KSTAT_DATA_UINT32); 5795 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5796 KSTAT_DATA_UINT32); 5797 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5798 KSTAT_DATA_UINT32); 5799 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5800 KSTAT_DATA_UINT32); 5801 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5802 5803 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5804 mdi_statp->pi_kstat_ref = 1; 5805 mdi_statp->pi_kstat_iostats = kiosp; 5806 mdi_statp->pi_kstat_errstats = kerrsp; 5807 kstat_install(kiosp); 5808 kstat_install(kerrsp); 5809 MDI_PI(pip)->pi_kstats = mdi_statp; 5810 return (MDI_SUCCESS); 5811 } 5812 5813 /* 5814 * destroy per-path properties 5815 */ 5816 static void 5817 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5818 { 5819 5820 struct mdi_pi_kstats *mdi_statp; 5821 5822 if (MDI_PI(pip)->pi_kstats == NULL) 5823 return; 5824 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5825 return; 5826 5827 MDI_PI(pip)->pi_kstats = NULL; 5828 5829 /* 5830 * the kstat may be shared between multiple pathinfo nodes 5831 * decrement this pathinfo's usage, removing the kstats 5832 * themselves when the last pathinfo reference is removed. 5833 */ 5834 ASSERT(mdi_statp->pi_kstat_ref > 0); 5835 if (--mdi_statp->pi_kstat_ref != 0) 5836 return; 5837 5838 kstat_delete(mdi_statp->pi_kstat_iostats); 5839 kstat_delete(mdi_statp->pi_kstat_errstats); 5840 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5841 } 5842 5843 /* 5844 * update I/O paths KSTATS 5845 */ 5846 void 5847 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5848 { 5849 kstat_t *iostatp; 5850 size_t xfer_cnt; 5851 5852 ASSERT(pip != NULL); 5853 5854 /* 5855 * I/O can be driven across a path prior to having path 5856 * statistics available, i.e. probe(9e). 5857 */ 5858 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5859 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5860 xfer_cnt = bp->b_bcount - bp->b_resid; 5861 if (bp->b_flags & B_READ) { 5862 KSTAT_IO_PTR(iostatp)->reads++; 5863 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5864 } else { 5865 KSTAT_IO_PTR(iostatp)->writes++; 5866 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5867 } 5868 } 5869 } 5870 5871 /* 5872 * Enable the path(specific client/target/initiator) 5873 * Enabling a path means that MPxIO may select the enabled path for routing 5874 * future I/O requests, subject to other path state constraints. 5875 */ 5876 int 5877 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5878 { 5879 mdi_phci_t *ph; 5880 5881 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5882 if (ph == NULL) { 5883 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5884 " failed. pip: %p ph = NULL\n", (void *)pip)); 5885 return (MDI_FAILURE); 5886 } 5887 5888 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5889 MDI_ENABLE_OP); 5890 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5891 " Returning success pip = %p. ph = %p\n", 5892 (void *)pip, (void *)ph)); 5893 return (MDI_SUCCESS); 5894 5895 } 5896 5897 /* 5898 * Disable the path (specific client/target/initiator) 5899 * Disabling a path means that MPxIO will not select the disabled path for 5900 * routing any new I/O requests. 5901 */ 5902 int 5903 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5904 { 5905 mdi_phci_t *ph; 5906 5907 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5908 if (ph == NULL) { 5909 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5910 " failed. pip: %p ph = NULL\n", (void *)pip)); 5911 return (MDI_FAILURE); 5912 } 5913 5914 (void) i_mdi_enable_disable_path(pip, 5915 ph->ph_vhci, flags, MDI_DISABLE_OP); 5916 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5917 "Returning success pip = %p. ph = %p", 5918 (void *)pip, (void *)ph)); 5919 return (MDI_SUCCESS); 5920 } 5921 5922 /* 5923 * disable the path to a particular pHCI (pHCI specified in the phci_path 5924 * argument) for a particular client (specified in the client_path argument). 5925 * Disabling a path means that MPxIO will not select the disabled path for 5926 * routing any new I/O requests. 5927 * NOTE: this will be removed once the NWS files are changed to use the new 5928 * mdi_{enable,disable}_path interfaces 5929 */ 5930 int 5931 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5932 { 5933 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5934 } 5935 5936 /* 5937 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5938 * argument) for a particular client (specified in the client_path argument). 5939 * Enabling a path means that MPxIO may select the enabled path for routing 5940 * future I/O requests, subject to other path state constraints. 5941 * NOTE: this will be removed once the NWS files are changed to use the new 5942 * mdi_{enable,disable}_path interfaces 5943 */ 5944 5945 int 5946 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5947 { 5948 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5949 } 5950 5951 /* 5952 * Common routine for doing enable/disable. 5953 */ 5954 static mdi_pathinfo_t * 5955 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5956 int op) 5957 { 5958 int sync_flag = 0; 5959 int rv; 5960 mdi_pathinfo_t *next; 5961 int (*f)() = NULL; 5962 5963 f = vh->vh_ops->vo_pi_state_change; 5964 5965 sync_flag = (flags << 8) & 0xf00; 5966 5967 /* 5968 * Do a callback into the mdi consumer to let it 5969 * know that path is about to get enabled/disabled. 5970 */ 5971 if (f != NULL) { 5972 rv = (*f)(vh->vh_dip, pip, 0, 5973 MDI_PI_EXT_STATE(pip), 5974 MDI_EXT_STATE_CHANGE | sync_flag | 5975 op | MDI_BEFORE_STATE_CHANGE); 5976 if (rv != MDI_SUCCESS) { 5977 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5978 "!vo_pi_state_change: failed rv = %x", rv)); 5979 } 5980 } 5981 MDI_PI_LOCK(pip); 5982 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5983 5984 switch (flags) { 5985 case USER_DISABLE: 5986 if (op == MDI_DISABLE_OP) { 5987 MDI_PI_SET_USER_DISABLE(pip); 5988 } else { 5989 MDI_PI_SET_USER_ENABLE(pip); 5990 } 5991 break; 5992 case DRIVER_DISABLE: 5993 if (op == MDI_DISABLE_OP) { 5994 MDI_PI_SET_DRV_DISABLE(pip); 5995 } else { 5996 MDI_PI_SET_DRV_ENABLE(pip); 5997 } 5998 break; 5999 case DRIVER_DISABLE_TRANSIENT: 6000 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6001 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6002 } else { 6003 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6004 } 6005 break; 6006 } 6007 MDI_PI_UNLOCK(pip); 6008 /* 6009 * Do a callback into the mdi consumer to let it 6010 * know that path is now enabled/disabled. 6011 */ 6012 if (f != NULL) { 6013 rv = (*f)(vh->vh_dip, pip, 0, 6014 MDI_PI_EXT_STATE(pip), 6015 MDI_EXT_STATE_CHANGE | sync_flag | 6016 op | MDI_AFTER_STATE_CHANGE); 6017 if (rv != MDI_SUCCESS) { 6018 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 6019 "!vo_pi_state_change: failed rv = %x", rv)); 6020 } 6021 } 6022 return (next); 6023 } 6024 6025 /* 6026 * Common routine for doing enable/disable. 6027 * NOTE: this will be removed once the NWS files are changed to use the new 6028 * mdi_{enable,disable}_path has been putback 6029 */ 6030 int 6031 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6032 { 6033 6034 mdi_phci_t *ph; 6035 mdi_vhci_t *vh = NULL; 6036 mdi_client_t *ct; 6037 mdi_pathinfo_t *next, *pip; 6038 int found_it; 6039 6040 ph = i_devi_get_phci(pdip); 6041 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6042 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 6043 (void *)cdip)); 6044 if (ph == NULL) { 6045 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 6046 "Op %d failed. ph = NULL\n", op)); 6047 return (MDI_FAILURE); 6048 } 6049 6050 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6051 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6052 "Op Invalid operation = %d\n", op)); 6053 return (MDI_FAILURE); 6054 } 6055 6056 vh = ph->ph_vhci; 6057 6058 if (cdip == NULL) { 6059 /* 6060 * Need to mark the Phci as enabled/disabled. 6061 */ 6062 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6063 "Op %d for the phci\n", op)); 6064 MDI_PHCI_LOCK(ph); 6065 switch (flags) { 6066 case USER_DISABLE: 6067 if (op == MDI_DISABLE_OP) { 6068 MDI_PHCI_SET_USER_DISABLE(ph); 6069 } else { 6070 MDI_PHCI_SET_USER_ENABLE(ph); 6071 } 6072 break; 6073 case DRIVER_DISABLE: 6074 if (op == MDI_DISABLE_OP) { 6075 MDI_PHCI_SET_DRV_DISABLE(ph); 6076 } else { 6077 MDI_PHCI_SET_DRV_ENABLE(ph); 6078 } 6079 break; 6080 case DRIVER_DISABLE_TRANSIENT: 6081 if (op == MDI_DISABLE_OP) { 6082 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6083 } else { 6084 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6085 } 6086 break; 6087 default: 6088 MDI_PHCI_UNLOCK(ph); 6089 MDI_DEBUG(1, (CE_NOTE, NULL, 6090 "!i_mdi_pi_enable_disable:" 6091 " Invalid flag argument= %d\n", flags)); 6092 } 6093 6094 /* 6095 * Phci has been disabled. Now try to enable/disable 6096 * path info's to each client. 6097 */ 6098 pip = ph->ph_path_head; 6099 while (pip != NULL) { 6100 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6101 } 6102 MDI_PHCI_UNLOCK(ph); 6103 } else { 6104 6105 /* 6106 * Disable a specific client. 6107 */ 6108 ct = i_devi_get_client(cdip); 6109 if (ct == NULL) { 6110 MDI_DEBUG(1, (CE_NOTE, NULL, 6111 "!i_mdi_pi_enable_disable:" 6112 " failed. ct = NULL operation = %d\n", op)); 6113 return (MDI_FAILURE); 6114 } 6115 6116 MDI_CLIENT_LOCK(ct); 6117 pip = ct->ct_path_head; 6118 found_it = 0; 6119 while (pip != NULL) { 6120 MDI_PI_LOCK(pip); 6121 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6122 if (MDI_PI(pip)->pi_phci == ph) { 6123 MDI_PI_UNLOCK(pip); 6124 found_it = 1; 6125 break; 6126 } 6127 MDI_PI_UNLOCK(pip); 6128 pip = next; 6129 } 6130 6131 6132 MDI_CLIENT_UNLOCK(ct); 6133 if (found_it == 0) { 6134 MDI_DEBUG(1, (CE_NOTE, NULL, 6135 "!i_mdi_pi_enable_disable:" 6136 " failed. Could not find corresponding pip\n")); 6137 return (MDI_FAILURE); 6138 } 6139 6140 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6141 } 6142 6143 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6144 "Op %d Returning success pdip = %p cdip = %p\n", 6145 op, (void *)pdip, (void *)cdip)); 6146 return (MDI_SUCCESS); 6147 } 6148 6149 /* 6150 * Ensure phci powered up 6151 */ 6152 static void 6153 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6154 { 6155 dev_info_t *ph_dip; 6156 6157 ASSERT(pip != NULL); 6158 ASSERT(MDI_PI_LOCKED(pip)); 6159 6160 if (MDI_PI(pip)->pi_pm_held) { 6161 return; 6162 } 6163 6164 ph_dip = mdi_pi_get_phci(pip); 6165 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6166 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6167 if (ph_dip == NULL) { 6168 return; 6169 } 6170 6171 MDI_PI_UNLOCK(pip); 6172 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6173 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6174 6175 pm_hold_power(ph_dip); 6176 6177 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6178 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6179 MDI_PI_LOCK(pip); 6180 6181 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6182 if (DEVI(ph_dip)->devi_pm_info) 6183 MDI_PI(pip)->pi_pm_held = 1; 6184 } 6185 6186 /* 6187 * Allow phci powered down 6188 */ 6189 static void 6190 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6191 { 6192 dev_info_t *ph_dip = NULL; 6193 6194 ASSERT(pip != NULL); 6195 ASSERT(MDI_PI_LOCKED(pip)); 6196 6197 if (MDI_PI(pip)->pi_pm_held == 0) { 6198 return; 6199 } 6200 6201 ph_dip = mdi_pi_get_phci(pip); 6202 ASSERT(ph_dip != NULL); 6203 6204 MDI_PI_UNLOCK(pip); 6205 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6206 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6207 6208 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6209 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6210 pm_rele_power(ph_dip); 6211 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6212 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6213 6214 MDI_PI_LOCK(pip); 6215 MDI_PI(pip)->pi_pm_held = 0; 6216 } 6217 6218 static void 6219 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6220 { 6221 ASSERT(MDI_CLIENT_LOCKED(ct)); 6222 6223 ct->ct_power_cnt += incr; 6224 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6225 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6226 ct->ct_power_cnt, incr)); 6227 ASSERT(ct->ct_power_cnt >= 0); 6228 } 6229 6230 static void 6231 i_mdi_rele_all_phci(mdi_client_t *ct) 6232 { 6233 mdi_pathinfo_t *pip; 6234 6235 ASSERT(MDI_CLIENT_LOCKED(ct)); 6236 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6237 while (pip != NULL) { 6238 mdi_hold_path(pip); 6239 MDI_PI_LOCK(pip); 6240 i_mdi_pm_rele_pip(pip); 6241 MDI_PI_UNLOCK(pip); 6242 mdi_rele_path(pip); 6243 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6244 } 6245 } 6246 6247 static void 6248 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6249 { 6250 ASSERT(MDI_CLIENT_LOCKED(ct)); 6251 6252 if (i_ddi_devi_attached(ct->ct_dip)) { 6253 ct->ct_power_cnt -= decr; 6254 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6255 "ct_power_cnt = %d decr = %d\n", 6256 (void *)ct, ct->ct_power_cnt, decr)); 6257 } 6258 6259 ASSERT(ct->ct_power_cnt >= 0); 6260 if (ct->ct_power_cnt == 0) { 6261 i_mdi_rele_all_phci(ct); 6262 return; 6263 } 6264 } 6265 6266 static void 6267 i_mdi_pm_reset_client(mdi_client_t *ct) 6268 { 6269 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6270 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6271 ASSERT(MDI_CLIENT_LOCKED(ct)); 6272 ct->ct_power_cnt = 0; 6273 i_mdi_rele_all_phci(ct); 6274 ct->ct_powercnt_config = 0; 6275 ct->ct_powercnt_unconfig = 0; 6276 ct->ct_powercnt_reset = 1; 6277 } 6278 6279 static int 6280 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6281 { 6282 int ret; 6283 dev_info_t *ph_dip; 6284 6285 MDI_PI_LOCK(pip); 6286 i_mdi_pm_hold_pip(pip); 6287 6288 ph_dip = mdi_pi_get_phci(pip); 6289 MDI_PI_UNLOCK(pip); 6290 6291 /* bring all components of phci to full power */ 6292 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6293 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 6294 ddi_get_instance(ph_dip), (void *)pip)); 6295 6296 ret = pm_powerup(ph_dip); 6297 6298 if (ret == DDI_FAILURE) { 6299 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6300 "pm_powerup FAILED for %s%d %p\n", 6301 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 6302 (void *)pip)); 6303 6304 MDI_PI_LOCK(pip); 6305 i_mdi_pm_rele_pip(pip); 6306 MDI_PI_UNLOCK(pip); 6307 return (MDI_FAILURE); 6308 } 6309 6310 return (MDI_SUCCESS); 6311 } 6312 6313 static int 6314 i_mdi_power_all_phci(mdi_client_t *ct) 6315 { 6316 mdi_pathinfo_t *pip; 6317 int succeeded = 0; 6318 6319 ASSERT(MDI_CLIENT_LOCKED(ct)); 6320 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6321 while (pip != NULL) { 6322 /* 6323 * Don't power if MDI_PATHINFO_STATE_FAULT 6324 * or MDI_PATHINFO_STATE_OFFLINE. 6325 */ 6326 if (MDI_PI_IS_INIT(pip) || 6327 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6328 mdi_hold_path(pip); 6329 MDI_CLIENT_UNLOCK(ct); 6330 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6331 succeeded = 1; 6332 6333 ASSERT(ct == MDI_PI(pip)->pi_client); 6334 MDI_CLIENT_LOCK(ct); 6335 mdi_rele_path(pip); 6336 } 6337 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6338 } 6339 6340 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6341 } 6342 6343 /* 6344 * mdi_bus_power(): 6345 * 1. Place the phci(s) into powered up state so that 6346 * client can do power management 6347 * 2. Ensure phci powered up as client power managing 6348 * Return Values: 6349 * MDI_SUCCESS 6350 * MDI_FAILURE 6351 */ 6352 int 6353 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6354 void *arg, void *result) 6355 { 6356 int ret = MDI_SUCCESS; 6357 pm_bp_child_pwrchg_t *bpc; 6358 mdi_client_t *ct; 6359 dev_info_t *cdip; 6360 pm_bp_has_changed_t *bphc; 6361 6362 /* 6363 * BUS_POWER_NOINVOL not supported 6364 */ 6365 if (op == BUS_POWER_NOINVOL) 6366 return (MDI_FAILURE); 6367 6368 /* 6369 * ignore other OPs. 6370 * return quickly to save cou cycles on the ct processing 6371 */ 6372 switch (op) { 6373 case BUS_POWER_PRE_NOTIFICATION: 6374 case BUS_POWER_POST_NOTIFICATION: 6375 bpc = (pm_bp_child_pwrchg_t *)arg; 6376 cdip = bpc->bpc_dip; 6377 break; 6378 case BUS_POWER_HAS_CHANGED: 6379 bphc = (pm_bp_has_changed_t *)arg; 6380 cdip = bphc->bphc_dip; 6381 break; 6382 default: 6383 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6384 } 6385 6386 ASSERT(MDI_CLIENT(cdip)); 6387 6388 ct = i_devi_get_client(cdip); 6389 if (ct == NULL) 6390 return (MDI_FAILURE); 6391 6392 /* 6393 * wait till the mdi_pathinfo node state change are processed 6394 */ 6395 MDI_CLIENT_LOCK(ct); 6396 switch (op) { 6397 case BUS_POWER_PRE_NOTIFICATION: 6398 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6399 "BUS_POWER_PRE_NOTIFICATION:" 6400 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6401 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6402 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6403 6404 /* serialize power level change per client */ 6405 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6406 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6407 6408 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6409 6410 if (ct->ct_power_cnt == 0) { 6411 ret = i_mdi_power_all_phci(ct); 6412 } 6413 6414 /* 6415 * if new_level > 0: 6416 * - hold phci(s) 6417 * - power up phci(s) if not already 6418 * ignore power down 6419 */ 6420 if (bpc->bpc_nlevel > 0) { 6421 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6422 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6423 "mdi_bus_power i_mdi_pm_hold_client\n")); 6424 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6425 } 6426 } 6427 break; 6428 case BUS_POWER_POST_NOTIFICATION: 6429 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6430 "BUS_POWER_POST_NOTIFICATION:" 6431 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6432 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6433 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6434 *(int *)result)); 6435 6436 if (*(int *)result == DDI_SUCCESS) { 6437 if (bpc->bpc_nlevel > 0) { 6438 MDI_CLIENT_SET_POWER_UP(ct); 6439 } else { 6440 MDI_CLIENT_SET_POWER_DOWN(ct); 6441 } 6442 } 6443 6444 /* release the hold we did in pre-notification */ 6445 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6446 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6447 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6448 "mdi_bus_power i_mdi_pm_rele_client\n")); 6449 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6450 } 6451 6452 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6453 /* another thread might started attaching */ 6454 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6455 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6456 "mdi_bus_power i_mdi_pm_rele_client\n")); 6457 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6458 /* detaching has been taken care in pm_post_unconfig */ 6459 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6460 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6461 "mdi_bus_power i_mdi_pm_reset_client\n")); 6462 i_mdi_pm_reset_client(ct); 6463 } 6464 } 6465 6466 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6467 cv_broadcast(&ct->ct_powerchange_cv); 6468 6469 break; 6470 6471 /* need to do more */ 6472 case BUS_POWER_HAS_CHANGED: 6473 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6474 "BUS_POWER_HAS_CHANGED:" 6475 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6476 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6477 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6478 6479 if (bphc->bphc_nlevel > 0 && 6480 bphc->bphc_nlevel > bphc->bphc_olevel) { 6481 if (ct->ct_power_cnt == 0) { 6482 ret = i_mdi_power_all_phci(ct); 6483 } 6484 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6485 "mdi_bus_power i_mdi_pm_hold_client\n")); 6486 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6487 } 6488 6489 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6490 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6491 "mdi_bus_power i_mdi_pm_rele_client\n")); 6492 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6493 } 6494 break; 6495 } 6496 6497 MDI_CLIENT_UNLOCK(ct); 6498 return (ret); 6499 } 6500 6501 static int 6502 i_mdi_pm_pre_config_one(dev_info_t *child) 6503 { 6504 int ret = MDI_SUCCESS; 6505 mdi_client_t *ct; 6506 6507 ct = i_devi_get_client(child); 6508 if (ct == NULL) 6509 return (MDI_FAILURE); 6510 6511 MDI_CLIENT_LOCK(ct); 6512 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6513 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6514 6515 if (!MDI_CLIENT_IS_FAILED(ct)) { 6516 MDI_CLIENT_UNLOCK(ct); 6517 MDI_DEBUG(4, (CE_NOTE, child, 6518 "i_mdi_pm_pre_config_one already configured\n")); 6519 return (MDI_SUCCESS); 6520 } 6521 6522 if (ct->ct_powercnt_config) { 6523 MDI_CLIENT_UNLOCK(ct); 6524 MDI_DEBUG(4, (CE_NOTE, child, 6525 "i_mdi_pm_pre_config_one ALREADY held\n")); 6526 return (MDI_SUCCESS); 6527 } 6528 6529 if (ct->ct_power_cnt == 0) { 6530 ret = i_mdi_power_all_phci(ct); 6531 } 6532 MDI_DEBUG(4, (CE_NOTE, child, 6533 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6534 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6535 ct->ct_powercnt_config = 1; 6536 ct->ct_powercnt_reset = 0; 6537 MDI_CLIENT_UNLOCK(ct); 6538 return (ret); 6539 } 6540 6541 static int 6542 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6543 { 6544 int ret = MDI_SUCCESS; 6545 dev_info_t *cdip; 6546 int circ; 6547 6548 ASSERT(MDI_VHCI(vdip)); 6549 6550 /* ndi_devi_config_one */ 6551 if (child) { 6552 ASSERT(DEVI_BUSY_OWNED(vdip)); 6553 return (i_mdi_pm_pre_config_one(child)); 6554 } 6555 6556 /* devi_config_common */ 6557 ndi_devi_enter(vdip, &circ); 6558 cdip = ddi_get_child(vdip); 6559 while (cdip) { 6560 dev_info_t *next = ddi_get_next_sibling(cdip); 6561 6562 ret = i_mdi_pm_pre_config_one(cdip); 6563 if (ret != MDI_SUCCESS) 6564 break; 6565 cdip = next; 6566 } 6567 ndi_devi_exit(vdip, circ); 6568 return (ret); 6569 } 6570 6571 static int 6572 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6573 { 6574 int ret = MDI_SUCCESS; 6575 mdi_client_t *ct; 6576 6577 ct = i_devi_get_client(child); 6578 if (ct == NULL) 6579 return (MDI_FAILURE); 6580 6581 MDI_CLIENT_LOCK(ct); 6582 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6583 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6584 6585 if (!i_ddi_devi_attached(ct->ct_dip)) { 6586 MDI_DEBUG(4, (CE_NOTE, child, 6587 "i_mdi_pm_pre_unconfig node detached already\n")); 6588 MDI_CLIENT_UNLOCK(ct); 6589 return (MDI_SUCCESS); 6590 } 6591 6592 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6593 (flags & NDI_AUTODETACH)) { 6594 MDI_DEBUG(4, (CE_NOTE, child, 6595 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6596 MDI_CLIENT_UNLOCK(ct); 6597 return (MDI_FAILURE); 6598 } 6599 6600 if (ct->ct_powercnt_unconfig) { 6601 MDI_DEBUG(4, (CE_NOTE, child, 6602 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6603 MDI_CLIENT_UNLOCK(ct); 6604 *held = 1; 6605 return (MDI_SUCCESS); 6606 } 6607 6608 if (ct->ct_power_cnt == 0) { 6609 ret = i_mdi_power_all_phci(ct); 6610 } 6611 MDI_DEBUG(4, (CE_NOTE, child, 6612 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6613 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6614 ct->ct_powercnt_unconfig = 1; 6615 ct->ct_powercnt_reset = 0; 6616 MDI_CLIENT_UNLOCK(ct); 6617 if (ret == MDI_SUCCESS) 6618 *held = 1; 6619 return (ret); 6620 } 6621 6622 static int 6623 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6624 int flags) 6625 { 6626 int ret = MDI_SUCCESS; 6627 dev_info_t *cdip; 6628 int circ; 6629 6630 ASSERT(MDI_VHCI(vdip)); 6631 *held = 0; 6632 6633 /* ndi_devi_unconfig_one */ 6634 if (child) { 6635 ASSERT(DEVI_BUSY_OWNED(vdip)); 6636 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6637 } 6638 6639 /* devi_unconfig_common */ 6640 ndi_devi_enter(vdip, &circ); 6641 cdip = ddi_get_child(vdip); 6642 while (cdip) { 6643 dev_info_t *next = ddi_get_next_sibling(cdip); 6644 6645 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6646 cdip = next; 6647 } 6648 ndi_devi_exit(vdip, circ); 6649 6650 if (*held) 6651 ret = MDI_SUCCESS; 6652 6653 return (ret); 6654 } 6655 6656 static void 6657 i_mdi_pm_post_config_one(dev_info_t *child) 6658 { 6659 mdi_client_t *ct; 6660 6661 ct = i_devi_get_client(child); 6662 if (ct == NULL) 6663 return; 6664 6665 MDI_CLIENT_LOCK(ct); 6666 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6667 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6668 6669 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6670 MDI_DEBUG(4, (CE_NOTE, child, 6671 "i_mdi_pm_post_config_one NOT configured\n")); 6672 MDI_CLIENT_UNLOCK(ct); 6673 return; 6674 } 6675 6676 /* client has not been updated */ 6677 if (MDI_CLIENT_IS_FAILED(ct)) { 6678 MDI_DEBUG(4, (CE_NOTE, child, 6679 "i_mdi_pm_post_config_one NOT configured\n")); 6680 MDI_CLIENT_UNLOCK(ct); 6681 return; 6682 } 6683 6684 /* another thread might have powered it down or detached it */ 6685 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6686 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6687 (!i_ddi_devi_attached(ct->ct_dip) && 6688 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6689 MDI_DEBUG(4, (CE_NOTE, child, 6690 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6691 i_mdi_pm_reset_client(ct); 6692 } else { 6693 mdi_pathinfo_t *pip, *next; 6694 int valid_path_count = 0; 6695 6696 MDI_DEBUG(4, (CE_NOTE, child, 6697 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6698 pip = ct->ct_path_head; 6699 while (pip != NULL) { 6700 MDI_PI_LOCK(pip); 6701 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6702 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6703 valid_path_count ++; 6704 MDI_PI_UNLOCK(pip); 6705 pip = next; 6706 } 6707 i_mdi_pm_rele_client(ct, valid_path_count); 6708 } 6709 ct->ct_powercnt_config = 0; 6710 MDI_CLIENT_UNLOCK(ct); 6711 } 6712 6713 static void 6714 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6715 { 6716 int circ; 6717 dev_info_t *cdip; 6718 6719 ASSERT(MDI_VHCI(vdip)); 6720 6721 /* ndi_devi_config_one */ 6722 if (child) { 6723 ASSERT(DEVI_BUSY_OWNED(vdip)); 6724 i_mdi_pm_post_config_one(child); 6725 return; 6726 } 6727 6728 /* devi_config_common */ 6729 ndi_devi_enter(vdip, &circ); 6730 cdip = ddi_get_child(vdip); 6731 while (cdip) { 6732 dev_info_t *next = ddi_get_next_sibling(cdip); 6733 6734 i_mdi_pm_post_config_one(cdip); 6735 cdip = next; 6736 } 6737 ndi_devi_exit(vdip, circ); 6738 } 6739 6740 static void 6741 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6742 { 6743 mdi_client_t *ct; 6744 6745 ct = i_devi_get_client(child); 6746 if (ct == NULL) 6747 return; 6748 6749 MDI_CLIENT_LOCK(ct); 6750 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6751 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6752 6753 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6754 MDI_DEBUG(4, (CE_NOTE, child, 6755 "i_mdi_pm_post_unconfig NOT held\n")); 6756 MDI_CLIENT_UNLOCK(ct); 6757 return; 6758 } 6759 6760 /* failure detaching or another thread just attached it */ 6761 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6762 i_ddi_devi_attached(ct->ct_dip)) || 6763 (!i_ddi_devi_attached(ct->ct_dip) && 6764 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6765 MDI_DEBUG(4, (CE_NOTE, child, 6766 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6767 i_mdi_pm_reset_client(ct); 6768 } else { 6769 mdi_pathinfo_t *pip, *next; 6770 int valid_path_count = 0; 6771 6772 MDI_DEBUG(4, (CE_NOTE, child, 6773 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6774 pip = ct->ct_path_head; 6775 while (pip != NULL) { 6776 MDI_PI_LOCK(pip); 6777 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6778 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6779 valid_path_count ++; 6780 MDI_PI_UNLOCK(pip); 6781 pip = next; 6782 } 6783 i_mdi_pm_rele_client(ct, valid_path_count); 6784 ct->ct_powercnt_unconfig = 0; 6785 } 6786 6787 MDI_CLIENT_UNLOCK(ct); 6788 } 6789 6790 static void 6791 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6792 { 6793 int circ; 6794 dev_info_t *cdip; 6795 6796 ASSERT(MDI_VHCI(vdip)); 6797 6798 if (!held) { 6799 MDI_DEBUG(4, (CE_NOTE, vdip, 6800 "i_mdi_pm_post_unconfig held = %d\n", held)); 6801 return; 6802 } 6803 6804 if (child) { 6805 ASSERT(DEVI_BUSY_OWNED(vdip)); 6806 i_mdi_pm_post_unconfig_one(child); 6807 return; 6808 } 6809 6810 ndi_devi_enter(vdip, &circ); 6811 cdip = ddi_get_child(vdip); 6812 while (cdip) { 6813 dev_info_t *next = ddi_get_next_sibling(cdip); 6814 6815 i_mdi_pm_post_unconfig_one(cdip); 6816 cdip = next; 6817 } 6818 ndi_devi_exit(vdip, circ); 6819 } 6820 6821 int 6822 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6823 { 6824 int circ, ret = MDI_SUCCESS; 6825 dev_info_t *client_dip = NULL; 6826 mdi_client_t *ct; 6827 6828 /* 6829 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6830 * Power up pHCI for the named client device. 6831 * Note: Before the client is enumerated under vhci by phci, 6832 * client_dip can be NULL. Then proceed to power up all the 6833 * pHCIs. 6834 */ 6835 if (devnm != NULL) { 6836 ndi_devi_enter(vdip, &circ); 6837 client_dip = ndi_devi_findchild(vdip, devnm); 6838 } 6839 6840 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6841 op, devnm ? devnm : "NULL", (void *)client_dip)); 6842 6843 switch (op) { 6844 case MDI_PM_PRE_CONFIG: 6845 ret = i_mdi_pm_pre_config(vdip, client_dip); 6846 break; 6847 6848 case MDI_PM_PRE_UNCONFIG: 6849 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6850 flags); 6851 break; 6852 6853 case MDI_PM_POST_CONFIG: 6854 i_mdi_pm_post_config(vdip, client_dip); 6855 break; 6856 6857 case MDI_PM_POST_UNCONFIG: 6858 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6859 break; 6860 6861 case MDI_PM_HOLD_POWER: 6862 case MDI_PM_RELE_POWER: 6863 ASSERT(args); 6864 6865 client_dip = (dev_info_t *)args; 6866 ASSERT(MDI_CLIENT(client_dip)); 6867 6868 ct = i_devi_get_client(client_dip); 6869 MDI_CLIENT_LOCK(ct); 6870 6871 if (op == MDI_PM_HOLD_POWER) { 6872 if (ct->ct_power_cnt == 0) { 6873 (void) i_mdi_power_all_phci(ct); 6874 MDI_DEBUG(4, (CE_NOTE, client_dip, 6875 "mdi_power i_mdi_pm_hold_client\n")); 6876 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6877 } 6878 } else { 6879 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6880 MDI_DEBUG(4, (CE_NOTE, client_dip, 6881 "mdi_power i_mdi_pm_rele_client\n")); 6882 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6883 } else { 6884 MDI_DEBUG(4, (CE_NOTE, client_dip, 6885 "mdi_power i_mdi_pm_reset_client\n")); 6886 i_mdi_pm_reset_client(ct); 6887 } 6888 } 6889 6890 MDI_CLIENT_UNLOCK(ct); 6891 break; 6892 6893 default: 6894 break; 6895 } 6896 6897 if (devnm) 6898 ndi_devi_exit(vdip, circ); 6899 6900 return (ret); 6901 } 6902 6903 int 6904 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6905 { 6906 mdi_vhci_t *vhci; 6907 6908 if (!MDI_VHCI(dip)) 6909 return (MDI_FAILURE); 6910 6911 if (mdi_class) { 6912 vhci = DEVI(dip)->devi_mdi_xhci; 6913 ASSERT(vhci); 6914 *mdi_class = vhci->vh_class; 6915 } 6916 6917 return (MDI_SUCCESS); 6918 } 6919 6920 int 6921 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6922 { 6923 mdi_phci_t *phci; 6924 6925 if (!MDI_PHCI(dip)) 6926 return (MDI_FAILURE); 6927 6928 if (mdi_class) { 6929 phci = DEVI(dip)->devi_mdi_xhci; 6930 ASSERT(phci); 6931 *mdi_class = phci->ph_vhci->vh_class; 6932 } 6933 6934 return (MDI_SUCCESS); 6935 } 6936 6937 int 6938 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6939 { 6940 mdi_client_t *client; 6941 6942 if (!MDI_CLIENT(dip)) 6943 return (MDI_FAILURE); 6944 6945 if (mdi_class) { 6946 client = DEVI(dip)->devi_mdi_client; 6947 ASSERT(client); 6948 *mdi_class = client->ct_vhci->vh_class; 6949 } 6950 6951 return (MDI_SUCCESS); 6952 } 6953 6954 void * 6955 mdi_client_get_vhci_private(dev_info_t *dip) 6956 { 6957 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6958 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6959 mdi_client_t *ct; 6960 ct = i_devi_get_client(dip); 6961 return (ct->ct_vprivate); 6962 } 6963 return (NULL); 6964 } 6965 6966 void 6967 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6968 { 6969 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6970 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6971 mdi_client_t *ct; 6972 ct = i_devi_get_client(dip); 6973 ct->ct_vprivate = data; 6974 } 6975 } 6976 /* 6977 * mdi_pi_get_vhci_private(): 6978 * Get the vhci private information associated with the 6979 * mdi_pathinfo node 6980 */ 6981 void * 6982 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6983 { 6984 caddr_t vprivate = NULL; 6985 if (pip) { 6986 vprivate = MDI_PI(pip)->pi_vprivate; 6987 } 6988 return (vprivate); 6989 } 6990 6991 /* 6992 * mdi_pi_set_vhci_private(): 6993 * Set the vhci private information in the mdi_pathinfo node 6994 */ 6995 void 6996 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6997 { 6998 if (pip) { 6999 MDI_PI(pip)->pi_vprivate = priv; 7000 } 7001 } 7002 7003 /* 7004 * mdi_phci_get_vhci_private(): 7005 * Get the vhci private information associated with the 7006 * mdi_phci node 7007 */ 7008 void * 7009 mdi_phci_get_vhci_private(dev_info_t *dip) 7010 { 7011 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7012 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7013 mdi_phci_t *ph; 7014 ph = i_devi_get_phci(dip); 7015 return (ph->ph_vprivate); 7016 } 7017 return (NULL); 7018 } 7019 7020 /* 7021 * mdi_phci_set_vhci_private(): 7022 * Set the vhci private information in the mdi_phci node 7023 */ 7024 void 7025 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7026 { 7027 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7028 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7029 mdi_phci_t *ph; 7030 ph = i_devi_get_phci(dip); 7031 ph->ph_vprivate = priv; 7032 } 7033 } 7034 7035 /* 7036 * List of vhci class names: 7037 * A vhci class name must be in this list only if the corresponding vhci 7038 * driver intends to use the mdi provided bus config implementation 7039 * (i.e., mdi_vhci_bus_config()). 7040 */ 7041 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7042 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7043 7044 /* 7045 * During boot time, the on-disk vhci cache for every vhci class is read 7046 * in the form of an nvlist and stored here. 7047 */ 7048 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7049 7050 /* nvpair names in vhci cache nvlist */ 7051 #define MDI_VHCI_CACHE_VERSION 1 7052 #define MDI_NVPNAME_VERSION "version" 7053 #define MDI_NVPNAME_PHCIS "phcis" 7054 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7055 7056 /* 7057 * Given vhci class name, return its on-disk vhci cache filename. 7058 * Memory for the returned filename which includes the full path is allocated 7059 * by this function. 7060 */ 7061 static char * 7062 vhclass2vhcache_filename(char *vhclass) 7063 { 7064 char *filename; 7065 int len; 7066 static char *fmt = "/etc/devices/mdi_%s_cache"; 7067 7068 /* 7069 * fmt contains the on-disk vhci cache file name format; 7070 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7071 */ 7072 7073 /* the -1 below is to account for "%s" in the format string */ 7074 len = strlen(fmt) + strlen(vhclass) - 1; 7075 filename = kmem_alloc(len, KM_SLEEP); 7076 (void) snprintf(filename, len, fmt, vhclass); 7077 ASSERT(len == (strlen(filename) + 1)); 7078 return (filename); 7079 } 7080 7081 /* 7082 * initialize the vhci cache related data structures and read the on-disk 7083 * vhci cached data into memory. 7084 */ 7085 static void 7086 setup_vhci_cache(mdi_vhci_t *vh) 7087 { 7088 mdi_vhci_config_t *vhc; 7089 mdi_vhci_cache_t *vhcache; 7090 int i; 7091 nvlist_t *nvl = NULL; 7092 7093 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7094 vh->vh_config = vhc; 7095 vhcache = &vhc->vhc_vhcache; 7096 7097 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7098 7099 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7100 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7101 7102 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7103 7104 /* 7105 * Create string hash; same as mod_hash_create_strhash() except that 7106 * we use NULL key destructor. 7107 */ 7108 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7109 mdi_bus_config_cache_hash_size, 7110 mod_hash_null_keydtor, mod_hash_null_valdtor, 7111 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7112 7113 /* 7114 * The on-disk vhci cache is read during booting prior to the 7115 * lights-out period by mdi_read_devices_files(). 7116 */ 7117 for (i = 0; i < N_VHCI_CLASSES; i++) { 7118 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7119 nvl = vhcache_nvl[i]; 7120 vhcache_nvl[i] = NULL; 7121 break; 7122 } 7123 } 7124 7125 /* 7126 * this is to cover the case of some one manually causing unloading 7127 * (or detaching) and reloading (or attaching) of a vhci driver. 7128 */ 7129 if (nvl == NULL && modrootloaded) 7130 nvl = read_on_disk_vhci_cache(vh->vh_class); 7131 7132 if (nvl != NULL) { 7133 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7134 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7135 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7136 else { 7137 cmn_err(CE_WARN, 7138 "%s: data file corrupted, will recreate\n", 7139 vhc->vhc_vhcache_filename); 7140 } 7141 rw_exit(&vhcache->vhcache_lock); 7142 nvlist_free(nvl); 7143 } 7144 7145 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7146 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7147 7148 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7149 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7150 } 7151 7152 /* 7153 * free all vhci cache related resources 7154 */ 7155 static int 7156 destroy_vhci_cache(mdi_vhci_t *vh) 7157 { 7158 mdi_vhci_config_t *vhc = vh->vh_config; 7159 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7160 mdi_vhcache_phci_t *cphci, *cphci_next; 7161 mdi_vhcache_client_t *cct, *cct_next; 7162 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7163 7164 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7165 return (MDI_FAILURE); 7166 7167 kmem_free(vhc->vhc_vhcache_filename, 7168 strlen(vhc->vhc_vhcache_filename) + 1); 7169 7170 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7171 7172 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7173 cphci = cphci_next) { 7174 cphci_next = cphci->cphci_next; 7175 free_vhcache_phci(cphci); 7176 } 7177 7178 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7179 cct_next = cct->cct_next; 7180 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7181 cpi_next = cpi->cpi_next; 7182 free_vhcache_pathinfo(cpi); 7183 } 7184 free_vhcache_client(cct); 7185 } 7186 7187 rw_destroy(&vhcache->vhcache_lock); 7188 7189 mutex_destroy(&vhc->vhc_lock); 7190 cv_destroy(&vhc->vhc_cv); 7191 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7192 return (MDI_SUCCESS); 7193 } 7194 7195 /* 7196 * Stop all vhci cache related async threads and free their resources. 7197 */ 7198 static int 7199 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7200 { 7201 mdi_async_client_config_t *acc, *acc_next; 7202 7203 mutex_enter(&vhc->vhc_lock); 7204 vhc->vhc_flags |= MDI_VHC_EXIT; 7205 ASSERT(vhc->vhc_acc_thrcount >= 0); 7206 cv_broadcast(&vhc->vhc_cv); 7207 7208 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7209 vhc->vhc_acc_thrcount != 0) { 7210 mutex_exit(&vhc->vhc_lock); 7211 delay(1); 7212 mutex_enter(&vhc->vhc_lock); 7213 } 7214 7215 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7216 7217 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7218 acc_next = acc->acc_next; 7219 free_async_client_config(acc); 7220 } 7221 vhc->vhc_acc_list_head = NULL; 7222 vhc->vhc_acc_list_tail = NULL; 7223 vhc->vhc_acc_count = 0; 7224 7225 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7226 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7227 mutex_exit(&vhc->vhc_lock); 7228 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7229 vhcache_dirty(vhc); 7230 return (MDI_FAILURE); 7231 } 7232 } else 7233 mutex_exit(&vhc->vhc_lock); 7234 7235 if (callb_delete(vhc->vhc_cbid) != 0) 7236 return (MDI_FAILURE); 7237 7238 return (MDI_SUCCESS); 7239 } 7240 7241 /* 7242 * Stop vhci cache flush thread 7243 */ 7244 /* ARGSUSED */ 7245 static boolean_t 7246 stop_vhcache_flush_thread(void *arg, int code) 7247 { 7248 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7249 7250 mutex_enter(&vhc->vhc_lock); 7251 vhc->vhc_flags |= MDI_VHC_EXIT; 7252 cv_broadcast(&vhc->vhc_cv); 7253 7254 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7255 mutex_exit(&vhc->vhc_lock); 7256 delay(1); 7257 mutex_enter(&vhc->vhc_lock); 7258 } 7259 7260 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7261 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7262 mutex_exit(&vhc->vhc_lock); 7263 (void) flush_vhcache(vhc, 1); 7264 } else 7265 mutex_exit(&vhc->vhc_lock); 7266 7267 return (B_TRUE); 7268 } 7269 7270 /* 7271 * Enqueue the vhcache phci (cphci) at the tail of the list 7272 */ 7273 static void 7274 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7275 { 7276 cphci->cphci_next = NULL; 7277 if (vhcache->vhcache_phci_head == NULL) 7278 vhcache->vhcache_phci_head = cphci; 7279 else 7280 vhcache->vhcache_phci_tail->cphci_next = cphci; 7281 vhcache->vhcache_phci_tail = cphci; 7282 } 7283 7284 /* 7285 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7286 */ 7287 static void 7288 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7289 mdi_vhcache_pathinfo_t *cpi) 7290 { 7291 cpi->cpi_next = NULL; 7292 if (cct->cct_cpi_head == NULL) 7293 cct->cct_cpi_head = cpi; 7294 else 7295 cct->cct_cpi_tail->cpi_next = cpi; 7296 cct->cct_cpi_tail = cpi; 7297 } 7298 7299 /* 7300 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7301 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7302 * flag set come at the beginning of the list. All cpis which have this 7303 * flag set come at the end of the list. 7304 */ 7305 static void 7306 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7307 mdi_vhcache_pathinfo_t *newcpi) 7308 { 7309 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7310 7311 if (cct->cct_cpi_head == NULL || 7312 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7313 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7314 else { 7315 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7316 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7317 prev_cpi = cpi, cpi = cpi->cpi_next) 7318 ; 7319 7320 if (prev_cpi == NULL) 7321 cct->cct_cpi_head = newcpi; 7322 else 7323 prev_cpi->cpi_next = newcpi; 7324 7325 newcpi->cpi_next = cpi; 7326 7327 if (cpi == NULL) 7328 cct->cct_cpi_tail = newcpi; 7329 } 7330 } 7331 7332 /* 7333 * Enqueue the vhcache client (cct) at the tail of the list 7334 */ 7335 static void 7336 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7337 mdi_vhcache_client_t *cct) 7338 { 7339 cct->cct_next = NULL; 7340 if (vhcache->vhcache_client_head == NULL) 7341 vhcache->vhcache_client_head = cct; 7342 else 7343 vhcache->vhcache_client_tail->cct_next = cct; 7344 vhcache->vhcache_client_tail = cct; 7345 } 7346 7347 static void 7348 free_string_array(char **str, int nelem) 7349 { 7350 int i; 7351 7352 if (str) { 7353 for (i = 0; i < nelem; i++) { 7354 if (str[i]) 7355 kmem_free(str[i], strlen(str[i]) + 1); 7356 } 7357 kmem_free(str, sizeof (char *) * nelem); 7358 } 7359 } 7360 7361 static void 7362 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7363 { 7364 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7365 kmem_free(cphci, sizeof (*cphci)); 7366 } 7367 7368 static void 7369 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7370 { 7371 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7372 kmem_free(cpi, sizeof (*cpi)); 7373 } 7374 7375 static void 7376 free_vhcache_client(mdi_vhcache_client_t *cct) 7377 { 7378 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7379 kmem_free(cct, sizeof (*cct)); 7380 } 7381 7382 static char * 7383 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7384 { 7385 char *name_addr; 7386 int len; 7387 7388 len = strlen(ct_name) + strlen(ct_addr) + 2; 7389 name_addr = kmem_alloc(len, KM_SLEEP); 7390 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7391 7392 if (ret_len) 7393 *ret_len = len; 7394 return (name_addr); 7395 } 7396 7397 /* 7398 * Copy the contents of paddrnvl to vhci cache. 7399 * paddrnvl nvlist contains path information for a vhci client. 7400 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7401 */ 7402 static void 7403 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7404 mdi_vhcache_client_t *cct) 7405 { 7406 nvpair_t *nvp = NULL; 7407 mdi_vhcache_pathinfo_t *cpi; 7408 uint_t nelem; 7409 uint32_t *val; 7410 7411 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7412 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7413 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7414 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7415 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7416 ASSERT(nelem == 2); 7417 cpi->cpi_cphci = cphci_list[val[0]]; 7418 cpi->cpi_flags = val[1]; 7419 enqueue_tail_vhcache_pathinfo(cct, cpi); 7420 } 7421 } 7422 7423 /* 7424 * Copy the contents of caddrmapnvl to vhci cache. 7425 * caddrmapnvl nvlist contains vhci client address to phci client address 7426 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7427 * this nvlist. 7428 */ 7429 static void 7430 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7431 mdi_vhcache_phci_t *cphci_list[]) 7432 { 7433 nvpair_t *nvp = NULL; 7434 nvlist_t *paddrnvl; 7435 mdi_vhcache_client_t *cct; 7436 7437 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7438 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7439 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7440 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7441 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7442 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7443 /* the client must contain at least one path */ 7444 ASSERT(cct->cct_cpi_head != NULL); 7445 7446 enqueue_vhcache_client(vhcache, cct); 7447 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7448 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7449 } 7450 } 7451 7452 /* 7453 * Copy the contents of the main nvlist to vhci cache. 7454 * 7455 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7456 * The nvlist contains the mappings between the vhci client addresses and 7457 * their corresponding phci client addresses. 7458 * 7459 * The structure of the nvlist is as follows: 7460 * 7461 * Main nvlist: 7462 * NAME TYPE DATA 7463 * version int32 version number 7464 * phcis string array array of phci paths 7465 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7466 * 7467 * structure of c2paddrs_nvl: 7468 * NAME TYPE DATA 7469 * caddr1 nvlist_t paddrs_nvl1 7470 * caddr2 nvlist_t paddrs_nvl2 7471 * ... 7472 * where caddr1, caddr2, ... are vhci client name and addresses in the 7473 * form of "<clientname>@<clientaddress>". 7474 * (for example: "ssd@2000002037cd9f72"); 7475 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7476 * 7477 * structure of paddrs_nvl: 7478 * NAME TYPE DATA 7479 * pi_addr1 uint32_array (phci-id, cpi_flags) 7480 * pi_addr2 uint32_array (phci-id, cpi_flags) 7481 * ... 7482 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7483 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7484 * phci-ids are integers that identify PHCIs to which the 7485 * the bus specific address belongs to. These integers are used as an index 7486 * into to the phcis string array in the main nvlist to get the PHCI path. 7487 */ 7488 static int 7489 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7490 { 7491 char **phcis, **phci_namep; 7492 uint_t nphcis; 7493 mdi_vhcache_phci_t *cphci, **cphci_list; 7494 nvlist_t *caddrmapnvl; 7495 int32_t ver; 7496 int i; 7497 size_t cphci_list_size; 7498 7499 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7500 7501 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7502 ver != MDI_VHCI_CACHE_VERSION) 7503 return (MDI_FAILURE); 7504 7505 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7506 &nphcis) != 0) 7507 return (MDI_SUCCESS); 7508 7509 ASSERT(nphcis > 0); 7510 7511 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7512 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7513 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7514 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7515 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7516 enqueue_vhcache_phci(vhcache, cphci); 7517 cphci_list[i] = cphci; 7518 } 7519 7520 ASSERT(vhcache->vhcache_phci_head != NULL); 7521 7522 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7523 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7524 7525 kmem_free(cphci_list, cphci_list_size); 7526 return (MDI_SUCCESS); 7527 } 7528 7529 /* 7530 * Build paddrnvl for the specified client using the information in the 7531 * vhci cache and add it to the caddrmapnnvl. 7532 * Returns 0 on success, errno on failure. 7533 */ 7534 static int 7535 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7536 nvlist_t *caddrmapnvl) 7537 { 7538 mdi_vhcache_pathinfo_t *cpi; 7539 nvlist_t *nvl; 7540 int err; 7541 uint32_t val[2]; 7542 7543 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7544 7545 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7546 return (err); 7547 7548 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7549 val[0] = cpi->cpi_cphci->cphci_id; 7550 val[1] = cpi->cpi_flags; 7551 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7552 != 0) 7553 goto out; 7554 } 7555 7556 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7557 out: 7558 nvlist_free(nvl); 7559 return (err); 7560 } 7561 7562 /* 7563 * Build caddrmapnvl using the information in the vhci cache 7564 * and add it to the mainnvl. 7565 * Returns 0 on success, errno on failure. 7566 */ 7567 static int 7568 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7569 { 7570 mdi_vhcache_client_t *cct; 7571 nvlist_t *nvl; 7572 int err; 7573 7574 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7575 7576 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7577 return (err); 7578 7579 for (cct = vhcache->vhcache_client_head; cct != NULL; 7580 cct = cct->cct_next) { 7581 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7582 goto out; 7583 } 7584 7585 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7586 out: 7587 nvlist_free(nvl); 7588 return (err); 7589 } 7590 7591 /* 7592 * Build nvlist using the information in the vhci cache. 7593 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7594 * Returns nvl on success, NULL on failure. 7595 */ 7596 static nvlist_t * 7597 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7598 { 7599 mdi_vhcache_phci_t *cphci; 7600 uint_t phci_count; 7601 char **phcis; 7602 nvlist_t *nvl; 7603 int err, i; 7604 7605 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7606 nvl = NULL; 7607 goto out; 7608 } 7609 7610 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7611 MDI_VHCI_CACHE_VERSION)) != 0) 7612 goto out; 7613 7614 rw_enter(&vhcache->vhcache_lock, RW_READER); 7615 if (vhcache->vhcache_phci_head == NULL) { 7616 rw_exit(&vhcache->vhcache_lock); 7617 return (nvl); 7618 } 7619 7620 phci_count = 0; 7621 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7622 cphci = cphci->cphci_next) 7623 cphci->cphci_id = phci_count++; 7624 7625 /* build phci pathname list */ 7626 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7627 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7628 cphci = cphci->cphci_next, i++) 7629 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7630 7631 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7632 phci_count); 7633 free_string_array(phcis, phci_count); 7634 7635 if (err == 0 && 7636 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7637 rw_exit(&vhcache->vhcache_lock); 7638 return (nvl); 7639 } 7640 7641 rw_exit(&vhcache->vhcache_lock); 7642 out: 7643 if (nvl) 7644 nvlist_free(nvl); 7645 return (NULL); 7646 } 7647 7648 /* 7649 * Lookup vhcache phci structure for the specified phci path. 7650 */ 7651 static mdi_vhcache_phci_t * 7652 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7653 { 7654 mdi_vhcache_phci_t *cphci; 7655 7656 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7657 7658 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7659 cphci = cphci->cphci_next) { 7660 if (strcmp(cphci->cphci_path, phci_path) == 0) 7661 return (cphci); 7662 } 7663 7664 return (NULL); 7665 } 7666 7667 /* 7668 * Lookup vhcache phci structure for the specified phci. 7669 */ 7670 static mdi_vhcache_phci_t * 7671 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7672 { 7673 mdi_vhcache_phci_t *cphci; 7674 7675 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7676 7677 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7678 cphci = cphci->cphci_next) { 7679 if (cphci->cphci_phci == ph) 7680 return (cphci); 7681 } 7682 7683 return (NULL); 7684 } 7685 7686 /* 7687 * Add the specified phci to the vhci cache if not already present. 7688 */ 7689 static void 7690 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7691 { 7692 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7693 mdi_vhcache_phci_t *cphci; 7694 char *pathname; 7695 int cache_updated; 7696 7697 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7698 7699 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7700 (void) ddi_pathname(ph->ph_dip, pathname); 7701 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7702 != NULL) { 7703 cphci->cphci_phci = ph; 7704 cache_updated = 0; 7705 } else { 7706 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7707 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7708 cphci->cphci_phci = ph; 7709 enqueue_vhcache_phci(vhcache, cphci); 7710 cache_updated = 1; 7711 } 7712 7713 rw_exit(&vhcache->vhcache_lock); 7714 7715 /* 7716 * Since a new phci has been added, reset 7717 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7718 * during next vhcache_discover_paths(). 7719 */ 7720 mutex_enter(&vhc->vhc_lock); 7721 vhc->vhc_path_discovery_cutoff_time = 0; 7722 mutex_exit(&vhc->vhc_lock); 7723 7724 kmem_free(pathname, MAXPATHLEN); 7725 if (cache_updated) 7726 vhcache_dirty(vhc); 7727 } 7728 7729 /* 7730 * Remove the reference to the specified phci from the vhci cache. 7731 */ 7732 static void 7733 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7734 { 7735 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7736 mdi_vhcache_phci_t *cphci; 7737 7738 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7739 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7740 /* do not remove the actual mdi_vhcache_phci structure */ 7741 cphci->cphci_phci = NULL; 7742 } 7743 rw_exit(&vhcache->vhcache_lock); 7744 } 7745 7746 static void 7747 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7748 mdi_vhcache_lookup_token_t *src) 7749 { 7750 if (src == NULL) { 7751 dst->lt_cct = NULL; 7752 dst->lt_cct_lookup_time = 0; 7753 } else { 7754 dst->lt_cct = src->lt_cct; 7755 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7756 } 7757 } 7758 7759 /* 7760 * Look up vhcache client for the specified client. 7761 */ 7762 static mdi_vhcache_client_t * 7763 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7764 mdi_vhcache_lookup_token_t *token) 7765 { 7766 mod_hash_val_t hv; 7767 char *name_addr; 7768 int len; 7769 7770 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7771 7772 /* 7773 * If no vhcache clean occurred since the last lookup, we can 7774 * simply return the cct from the last lookup operation. 7775 * It works because ccts are never freed except during the vhcache 7776 * cleanup operation. 7777 */ 7778 if (token != NULL && 7779 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7780 return (token->lt_cct); 7781 7782 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7783 if (mod_hash_find(vhcache->vhcache_client_hash, 7784 (mod_hash_key_t)name_addr, &hv) == 0) { 7785 if (token) { 7786 token->lt_cct = (mdi_vhcache_client_t *)hv; 7787 token->lt_cct_lookup_time = lbolt64; 7788 } 7789 } else { 7790 if (token) { 7791 token->lt_cct = NULL; 7792 token->lt_cct_lookup_time = 0; 7793 } 7794 hv = NULL; 7795 } 7796 kmem_free(name_addr, len); 7797 return ((mdi_vhcache_client_t *)hv); 7798 } 7799 7800 /* 7801 * Add the specified path to the vhci cache if not already present. 7802 * Also add the vhcache client for the client corresponding to this path 7803 * if it doesn't already exist. 7804 */ 7805 static void 7806 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7807 { 7808 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7809 mdi_vhcache_client_t *cct; 7810 mdi_vhcache_pathinfo_t *cpi; 7811 mdi_phci_t *ph = pip->pi_phci; 7812 mdi_client_t *ct = pip->pi_client; 7813 int cache_updated = 0; 7814 7815 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7816 7817 /* if vhcache client for this pip doesn't already exist, add it */ 7818 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7819 NULL)) == NULL) { 7820 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7821 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7822 ct->ct_guid, NULL); 7823 enqueue_vhcache_client(vhcache, cct); 7824 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7825 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7826 cache_updated = 1; 7827 } 7828 7829 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7830 if (cpi->cpi_cphci->cphci_phci == ph && 7831 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7832 cpi->cpi_pip = pip; 7833 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7834 cpi->cpi_flags &= 7835 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7836 sort_vhcache_paths(cct); 7837 cache_updated = 1; 7838 } 7839 break; 7840 } 7841 } 7842 7843 if (cpi == NULL) { 7844 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7845 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7846 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7847 ASSERT(cpi->cpi_cphci != NULL); 7848 cpi->cpi_pip = pip; 7849 enqueue_vhcache_pathinfo(cct, cpi); 7850 cache_updated = 1; 7851 } 7852 7853 rw_exit(&vhcache->vhcache_lock); 7854 7855 if (cache_updated) 7856 vhcache_dirty(vhc); 7857 } 7858 7859 /* 7860 * Remove the reference to the specified path from the vhci cache. 7861 */ 7862 static void 7863 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7864 { 7865 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7866 mdi_client_t *ct = pip->pi_client; 7867 mdi_vhcache_client_t *cct; 7868 mdi_vhcache_pathinfo_t *cpi; 7869 7870 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7871 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7872 NULL)) != NULL) { 7873 for (cpi = cct->cct_cpi_head; cpi != NULL; 7874 cpi = cpi->cpi_next) { 7875 if (cpi->cpi_pip == pip) { 7876 cpi->cpi_pip = NULL; 7877 break; 7878 } 7879 } 7880 } 7881 rw_exit(&vhcache->vhcache_lock); 7882 } 7883 7884 /* 7885 * Flush the vhci cache to disk. 7886 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7887 */ 7888 static int 7889 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7890 { 7891 nvlist_t *nvl; 7892 int err; 7893 int rv; 7894 7895 /* 7896 * It is possible that the system may shutdown before 7897 * i_ddi_io_initialized (during stmsboot for example). To allow for 7898 * flushing the cache in this case do not check for 7899 * i_ddi_io_initialized when force flag is set. 7900 */ 7901 if (force_flag == 0 && !i_ddi_io_initialized()) 7902 return (MDI_FAILURE); 7903 7904 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7905 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7906 nvlist_free(nvl); 7907 } else 7908 err = EFAULT; 7909 7910 rv = MDI_SUCCESS; 7911 mutex_enter(&vhc->vhc_lock); 7912 if (err != 0) { 7913 if (err == EROFS) { 7914 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7915 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7916 MDI_VHC_VHCACHE_DIRTY); 7917 } else { 7918 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7919 cmn_err(CE_CONT, "%s: update failed\n", 7920 vhc->vhc_vhcache_filename); 7921 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7922 } 7923 rv = MDI_FAILURE; 7924 } 7925 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7926 cmn_err(CE_CONT, 7927 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7928 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7929 } 7930 mutex_exit(&vhc->vhc_lock); 7931 7932 return (rv); 7933 } 7934 7935 /* 7936 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7937 * Exits itself if left idle for the idle timeout period. 7938 */ 7939 static void 7940 vhcache_flush_thread(void *arg) 7941 { 7942 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7943 clock_t idle_time, quit_at_ticks; 7944 callb_cpr_t cprinfo; 7945 7946 /* number of seconds to sleep idle before exiting */ 7947 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7948 7949 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7950 "mdi_vhcache_flush"); 7951 mutex_enter(&vhc->vhc_lock); 7952 for (; ; ) { 7953 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7954 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7955 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7956 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7957 (void) cv_timedwait(&vhc->vhc_cv, 7958 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7959 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7960 } else { 7961 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7962 mutex_exit(&vhc->vhc_lock); 7963 7964 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7965 vhcache_dirty(vhc); 7966 7967 mutex_enter(&vhc->vhc_lock); 7968 } 7969 } 7970 7971 quit_at_ticks = ddi_get_lbolt() + idle_time; 7972 7973 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7974 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7975 ddi_get_lbolt() < quit_at_ticks) { 7976 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7977 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7978 quit_at_ticks); 7979 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7980 } 7981 7982 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7983 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7984 goto out; 7985 } 7986 7987 out: 7988 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7989 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7990 CALLB_CPR_EXIT(&cprinfo); 7991 } 7992 7993 /* 7994 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7995 */ 7996 static void 7997 vhcache_dirty(mdi_vhci_config_t *vhc) 7998 { 7999 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8000 int create_thread; 8001 8002 rw_enter(&vhcache->vhcache_lock, RW_READER); 8003 /* do not flush cache until the cache is fully built */ 8004 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8005 rw_exit(&vhcache->vhcache_lock); 8006 return; 8007 } 8008 rw_exit(&vhcache->vhcache_lock); 8009 8010 mutex_enter(&vhc->vhc_lock); 8011 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8012 mutex_exit(&vhc->vhc_lock); 8013 return; 8014 } 8015 8016 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8017 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8018 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8019 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8020 cv_broadcast(&vhc->vhc_cv); 8021 create_thread = 0; 8022 } else { 8023 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8024 create_thread = 1; 8025 } 8026 mutex_exit(&vhc->vhc_lock); 8027 8028 if (create_thread) 8029 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8030 0, &p0, TS_RUN, minclsyspri); 8031 } 8032 8033 /* 8034 * phci bus config structure - one for for each phci bus config operation that 8035 * we initiate on behalf of a vhci. 8036 */ 8037 typedef struct mdi_phci_bus_config_s { 8038 char *phbc_phci_path; 8039 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8040 struct mdi_phci_bus_config_s *phbc_next; 8041 } mdi_phci_bus_config_t; 8042 8043 /* vhci bus config structure - one for each vhci bus config operation */ 8044 typedef struct mdi_vhci_bus_config_s { 8045 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8046 major_t vhbc_op_major; /* bus config op major */ 8047 uint_t vhbc_op_flags; /* bus config op flags */ 8048 kmutex_t vhbc_lock; 8049 kcondvar_t vhbc_cv; 8050 int vhbc_thr_count; 8051 } mdi_vhci_bus_config_t; 8052 8053 /* 8054 * bus config the specified phci 8055 */ 8056 static void 8057 bus_config_phci(void *arg) 8058 { 8059 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8060 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8061 dev_info_t *ph_dip; 8062 8063 /* 8064 * first configure all path components upto phci and then configure 8065 * the phci children. 8066 */ 8067 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8068 != NULL) { 8069 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8070 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8071 (void) ndi_devi_config_driver(ph_dip, 8072 vhbc->vhbc_op_flags, 8073 vhbc->vhbc_op_major); 8074 } else 8075 (void) ndi_devi_config(ph_dip, 8076 vhbc->vhbc_op_flags); 8077 8078 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8079 ndi_rele_devi(ph_dip); 8080 } 8081 8082 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8083 kmem_free(phbc, sizeof (*phbc)); 8084 8085 mutex_enter(&vhbc->vhbc_lock); 8086 vhbc->vhbc_thr_count--; 8087 if (vhbc->vhbc_thr_count == 0) 8088 cv_broadcast(&vhbc->vhbc_cv); 8089 mutex_exit(&vhbc->vhbc_lock); 8090 } 8091 8092 /* 8093 * Bus config all phcis associated with the vhci in parallel. 8094 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8095 */ 8096 static void 8097 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8098 ddi_bus_config_op_t op, major_t maj) 8099 { 8100 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8101 mdi_vhci_bus_config_t *vhbc; 8102 mdi_vhcache_phci_t *cphci; 8103 8104 rw_enter(&vhcache->vhcache_lock, RW_READER); 8105 if (vhcache->vhcache_phci_head == NULL) { 8106 rw_exit(&vhcache->vhcache_lock); 8107 return; 8108 } 8109 8110 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8111 8112 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8113 cphci = cphci->cphci_next) { 8114 /* skip phcis that haven't attached before root is available */ 8115 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8116 continue; 8117 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8118 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8119 KM_SLEEP); 8120 phbc->phbc_vhbusconfig = vhbc; 8121 phbc->phbc_next = phbc_head; 8122 phbc_head = phbc; 8123 vhbc->vhbc_thr_count++; 8124 } 8125 rw_exit(&vhcache->vhcache_lock); 8126 8127 vhbc->vhbc_op = op; 8128 vhbc->vhbc_op_major = maj; 8129 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8130 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8131 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8132 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8133 8134 /* now create threads to initiate bus config on all phcis in parallel */ 8135 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8136 phbc_next = phbc->phbc_next; 8137 if (mdi_mtc_off) 8138 bus_config_phci((void *)phbc); 8139 else 8140 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8141 0, &p0, TS_RUN, minclsyspri); 8142 } 8143 8144 mutex_enter(&vhbc->vhbc_lock); 8145 /* wait until all threads exit */ 8146 while (vhbc->vhbc_thr_count > 0) 8147 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8148 mutex_exit(&vhbc->vhbc_lock); 8149 8150 mutex_destroy(&vhbc->vhbc_lock); 8151 cv_destroy(&vhbc->vhbc_cv); 8152 kmem_free(vhbc, sizeof (*vhbc)); 8153 } 8154 8155 /* 8156 * Single threaded version of bus_config_all_phcis() 8157 */ 8158 static void 8159 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8160 ddi_bus_config_op_t op, major_t maj) 8161 { 8162 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8163 8164 single_threaded_vhconfig_enter(vhc); 8165 bus_config_all_phcis(vhcache, flags, op, maj); 8166 single_threaded_vhconfig_exit(vhc); 8167 } 8168 8169 /* 8170 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8171 * The path includes the child component in addition to the phci path. 8172 */ 8173 static int 8174 bus_config_one_phci_child(char *path) 8175 { 8176 dev_info_t *ph_dip, *child; 8177 char *devnm; 8178 int rv = MDI_FAILURE; 8179 8180 /* extract the child component of the phci */ 8181 devnm = strrchr(path, '/'); 8182 *devnm++ = '\0'; 8183 8184 /* 8185 * first configure all path components upto phci and then 8186 * configure the phci child. 8187 */ 8188 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8189 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8190 NDI_SUCCESS) { 8191 /* 8192 * release the hold that ndi_devi_config_one() placed 8193 */ 8194 ndi_rele_devi(child); 8195 rv = MDI_SUCCESS; 8196 } 8197 8198 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8199 ndi_rele_devi(ph_dip); 8200 } 8201 8202 devnm--; 8203 *devnm = '/'; 8204 return (rv); 8205 } 8206 8207 /* 8208 * Build a list of phci client paths for the specified vhci client. 8209 * The list includes only those phci client paths which aren't configured yet. 8210 */ 8211 static mdi_phys_path_t * 8212 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8213 { 8214 mdi_vhcache_pathinfo_t *cpi; 8215 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8216 int config_path, len; 8217 8218 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8219 /* 8220 * include only those paths that aren't configured. 8221 */ 8222 config_path = 0; 8223 if (cpi->cpi_pip == NULL) 8224 config_path = 1; 8225 else { 8226 MDI_PI_LOCK(cpi->cpi_pip); 8227 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8228 config_path = 1; 8229 MDI_PI_UNLOCK(cpi->cpi_pip); 8230 } 8231 8232 if (config_path) { 8233 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8234 len = strlen(cpi->cpi_cphci->cphci_path) + 8235 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8236 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8237 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8238 cpi->cpi_cphci->cphci_path, ct_name, 8239 cpi->cpi_addr); 8240 pp->phys_path_next = NULL; 8241 8242 if (pp_head == NULL) 8243 pp_head = pp; 8244 else 8245 pp_tail->phys_path_next = pp; 8246 pp_tail = pp; 8247 } 8248 } 8249 8250 return (pp_head); 8251 } 8252 8253 /* 8254 * Free the memory allocated for phci client path list. 8255 */ 8256 static void 8257 free_phclient_path_list(mdi_phys_path_t *pp_head) 8258 { 8259 mdi_phys_path_t *pp, *pp_next; 8260 8261 for (pp = pp_head; pp != NULL; pp = pp_next) { 8262 pp_next = pp->phys_path_next; 8263 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8264 kmem_free(pp, sizeof (*pp)); 8265 } 8266 } 8267 8268 /* 8269 * Allocated async client structure and initialize with the specified values. 8270 */ 8271 static mdi_async_client_config_t * 8272 alloc_async_client_config(char *ct_name, char *ct_addr, 8273 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8274 { 8275 mdi_async_client_config_t *acc; 8276 8277 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8278 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8279 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8280 acc->acc_phclient_path_list_head = pp_head; 8281 init_vhcache_lookup_token(&acc->acc_token, tok); 8282 acc->acc_next = NULL; 8283 return (acc); 8284 } 8285 8286 /* 8287 * Free the memory allocated for the async client structure and their members. 8288 */ 8289 static void 8290 free_async_client_config(mdi_async_client_config_t *acc) 8291 { 8292 if (acc->acc_phclient_path_list_head) 8293 free_phclient_path_list(acc->acc_phclient_path_list_head); 8294 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8295 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8296 kmem_free(acc, sizeof (*acc)); 8297 } 8298 8299 /* 8300 * Sort vhcache pathinfos (cpis) of the specified client. 8301 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8302 * flag set come at the beginning of the list. All cpis which have this 8303 * flag set come at the end of the list. 8304 */ 8305 static void 8306 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8307 { 8308 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8309 8310 cpi_head = cct->cct_cpi_head; 8311 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8312 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8313 cpi_next = cpi->cpi_next; 8314 enqueue_vhcache_pathinfo(cct, cpi); 8315 } 8316 } 8317 8318 /* 8319 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8320 * every vhcache pathinfo of the specified client. If not adjust the flag 8321 * setting appropriately. 8322 * 8323 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8324 * on-disk vhci cache. So every time this flag is updated the cache must be 8325 * flushed. 8326 */ 8327 static void 8328 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8329 mdi_vhcache_lookup_token_t *tok) 8330 { 8331 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8332 mdi_vhcache_client_t *cct; 8333 mdi_vhcache_pathinfo_t *cpi; 8334 8335 rw_enter(&vhcache->vhcache_lock, RW_READER); 8336 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8337 == NULL) { 8338 rw_exit(&vhcache->vhcache_lock); 8339 return; 8340 } 8341 8342 /* 8343 * to avoid unnecessary on-disk cache updates, first check if an 8344 * update is really needed. If no update is needed simply return. 8345 */ 8346 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8347 if ((cpi->cpi_pip != NULL && 8348 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8349 (cpi->cpi_pip == NULL && 8350 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8351 break; 8352 } 8353 } 8354 if (cpi == NULL) { 8355 rw_exit(&vhcache->vhcache_lock); 8356 return; 8357 } 8358 8359 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8360 rw_exit(&vhcache->vhcache_lock); 8361 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8362 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8363 tok)) == NULL) { 8364 rw_exit(&vhcache->vhcache_lock); 8365 return; 8366 } 8367 } 8368 8369 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8370 if (cpi->cpi_pip != NULL) 8371 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8372 else 8373 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8374 } 8375 sort_vhcache_paths(cct); 8376 8377 rw_exit(&vhcache->vhcache_lock); 8378 vhcache_dirty(vhc); 8379 } 8380 8381 /* 8382 * Configure all specified paths of the client. 8383 */ 8384 static void 8385 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8386 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8387 { 8388 mdi_phys_path_t *pp; 8389 8390 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8391 (void) bus_config_one_phci_child(pp->phys_path); 8392 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8393 } 8394 8395 /* 8396 * Dequeue elements from vhci async client config list and bus configure 8397 * their corresponding phci clients. 8398 */ 8399 static void 8400 config_client_paths_thread(void *arg) 8401 { 8402 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8403 mdi_async_client_config_t *acc; 8404 clock_t quit_at_ticks; 8405 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8406 callb_cpr_t cprinfo; 8407 8408 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8409 "mdi_config_client_paths"); 8410 8411 for (; ; ) { 8412 quit_at_ticks = ddi_get_lbolt() + idle_time; 8413 8414 mutex_enter(&vhc->vhc_lock); 8415 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8416 vhc->vhc_acc_list_head == NULL && 8417 ddi_get_lbolt() < quit_at_ticks) { 8418 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8419 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8420 quit_at_ticks); 8421 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8422 } 8423 8424 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8425 vhc->vhc_acc_list_head == NULL) 8426 goto out; 8427 8428 acc = vhc->vhc_acc_list_head; 8429 vhc->vhc_acc_list_head = acc->acc_next; 8430 if (vhc->vhc_acc_list_head == NULL) 8431 vhc->vhc_acc_list_tail = NULL; 8432 vhc->vhc_acc_count--; 8433 mutex_exit(&vhc->vhc_lock); 8434 8435 config_client_paths_sync(vhc, acc->acc_ct_name, 8436 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8437 &acc->acc_token); 8438 8439 free_async_client_config(acc); 8440 } 8441 8442 out: 8443 vhc->vhc_acc_thrcount--; 8444 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8445 CALLB_CPR_EXIT(&cprinfo); 8446 } 8447 8448 /* 8449 * Arrange for all the phci client paths (pp_head) for the specified client 8450 * to be bus configured asynchronously by a thread. 8451 */ 8452 static void 8453 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8454 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8455 { 8456 mdi_async_client_config_t *acc, *newacc; 8457 int create_thread; 8458 8459 if (pp_head == NULL) 8460 return; 8461 8462 if (mdi_mtc_off) { 8463 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8464 free_phclient_path_list(pp_head); 8465 return; 8466 } 8467 8468 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8469 ASSERT(newacc); 8470 8471 mutex_enter(&vhc->vhc_lock); 8472 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8473 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8474 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8475 free_async_client_config(newacc); 8476 mutex_exit(&vhc->vhc_lock); 8477 return; 8478 } 8479 } 8480 8481 if (vhc->vhc_acc_list_head == NULL) 8482 vhc->vhc_acc_list_head = newacc; 8483 else 8484 vhc->vhc_acc_list_tail->acc_next = newacc; 8485 vhc->vhc_acc_list_tail = newacc; 8486 vhc->vhc_acc_count++; 8487 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8488 cv_broadcast(&vhc->vhc_cv); 8489 create_thread = 0; 8490 } else { 8491 vhc->vhc_acc_thrcount++; 8492 create_thread = 1; 8493 } 8494 mutex_exit(&vhc->vhc_lock); 8495 8496 if (create_thread) 8497 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8498 0, &p0, TS_RUN, minclsyspri); 8499 } 8500 8501 /* 8502 * Return number of online paths for the specified client. 8503 */ 8504 static int 8505 nonline_paths(mdi_vhcache_client_t *cct) 8506 { 8507 mdi_vhcache_pathinfo_t *cpi; 8508 int online_count = 0; 8509 8510 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8511 if (cpi->cpi_pip != NULL) { 8512 MDI_PI_LOCK(cpi->cpi_pip); 8513 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8514 online_count++; 8515 MDI_PI_UNLOCK(cpi->cpi_pip); 8516 } 8517 } 8518 8519 return (online_count); 8520 } 8521 8522 /* 8523 * Bus configure all paths for the specified vhci client. 8524 * If at least one path for the client is already online, the remaining paths 8525 * will be configured asynchronously. Otherwise, it synchronously configures 8526 * the paths until at least one path is online and then rest of the paths 8527 * will be configured asynchronously. 8528 */ 8529 static void 8530 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8531 { 8532 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8533 mdi_phys_path_t *pp_head, *pp; 8534 mdi_vhcache_client_t *cct; 8535 mdi_vhcache_lookup_token_t tok; 8536 8537 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8538 8539 init_vhcache_lookup_token(&tok, NULL); 8540 8541 if (ct_name == NULL || ct_addr == NULL || 8542 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8543 == NULL || 8544 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8545 rw_exit(&vhcache->vhcache_lock); 8546 return; 8547 } 8548 8549 /* if at least one path is online, configure the rest asynchronously */ 8550 if (nonline_paths(cct) > 0) { 8551 rw_exit(&vhcache->vhcache_lock); 8552 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8553 return; 8554 } 8555 8556 rw_exit(&vhcache->vhcache_lock); 8557 8558 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8559 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8560 rw_enter(&vhcache->vhcache_lock, RW_READER); 8561 8562 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8563 ct_addr, &tok)) == NULL) { 8564 rw_exit(&vhcache->vhcache_lock); 8565 goto out; 8566 } 8567 8568 if (nonline_paths(cct) > 0 && 8569 pp->phys_path_next != NULL) { 8570 rw_exit(&vhcache->vhcache_lock); 8571 config_client_paths_async(vhc, ct_name, ct_addr, 8572 pp->phys_path_next, &tok); 8573 pp->phys_path_next = NULL; 8574 goto out; 8575 } 8576 8577 rw_exit(&vhcache->vhcache_lock); 8578 } 8579 } 8580 8581 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8582 out: 8583 free_phclient_path_list(pp_head); 8584 } 8585 8586 static void 8587 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8588 { 8589 mutex_enter(&vhc->vhc_lock); 8590 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8591 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8592 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8593 mutex_exit(&vhc->vhc_lock); 8594 } 8595 8596 static void 8597 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8598 { 8599 mutex_enter(&vhc->vhc_lock); 8600 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8601 cv_broadcast(&vhc->vhc_cv); 8602 mutex_exit(&vhc->vhc_lock); 8603 } 8604 8605 typedef struct mdi_phci_driver_info { 8606 char *phdriver_name; /* name of the phci driver */ 8607 8608 /* set to non zero if the phci driver supports root device */ 8609 int phdriver_root_support; 8610 } mdi_phci_driver_info_t; 8611 8612 /* 8613 * vhci class and root support capability of a phci driver can be 8614 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8615 * phci driver.conf file. The built-in tables below contain this information 8616 * for those phci drivers whose driver.conf files don't yet contain this info. 8617 * 8618 * All phci drivers expect iscsi have root device support. 8619 */ 8620 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8621 { "fp", 1 }, 8622 { "iscsi", 0 }, 8623 { "ibsrp", 1 } 8624 }; 8625 8626 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8627 8628 static void * 8629 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8630 { 8631 void *new_ptr; 8632 8633 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8634 if (old_ptr) { 8635 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8636 kmem_free(old_ptr, old_size); 8637 } 8638 return (new_ptr); 8639 } 8640 8641 static void 8642 add_to_phci_list(char ***driver_list, int **root_support_list, 8643 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8644 { 8645 ASSERT(*cur_elements <= *max_elements); 8646 if (*cur_elements == *max_elements) { 8647 *max_elements += 10; 8648 *driver_list = mdi_realloc(*driver_list, 8649 sizeof (char *) * (*cur_elements), 8650 sizeof (char *) * (*max_elements)); 8651 *root_support_list = mdi_realloc(*root_support_list, 8652 sizeof (int) * (*cur_elements), 8653 sizeof (int) * (*max_elements)); 8654 } 8655 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8656 (*root_support_list)[*cur_elements] = root_support; 8657 (*cur_elements)++; 8658 } 8659 8660 static void 8661 get_phci_driver_list(char *vhci_class, char ***driver_list, 8662 int **root_support_list, int *cur_elements, int *max_elements) 8663 { 8664 mdi_phci_driver_info_t *st_driver_list, *p; 8665 int st_ndrivers, root_support, i, j, driver_conf_count; 8666 major_t m; 8667 struct devnames *dnp; 8668 ddi_prop_t *propp; 8669 8670 *driver_list = NULL; 8671 *root_support_list = NULL; 8672 *cur_elements = 0; 8673 *max_elements = 0; 8674 8675 /* add the phci drivers derived from the phci driver.conf files */ 8676 for (m = 0; m < devcnt; m++) { 8677 dnp = &devnamesp[m]; 8678 8679 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8680 LOCK_DEV_OPS(&dnp->dn_lock); 8681 if (dnp->dn_global_prop_ptr != NULL && 8682 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8683 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8684 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8685 strcmp(propp->prop_val, vhci_class) == 0) { 8686 8687 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8688 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8689 &dnp->dn_global_prop_ptr->prop_list) 8690 == NULL) ? 1 : 0; 8691 8692 add_to_phci_list(driver_list, root_support_list, 8693 cur_elements, max_elements, dnp->dn_name, 8694 root_support); 8695 8696 UNLOCK_DEV_OPS(&dnp->dn_lock); 8697 } else 8698 UNLOCK_DEV_OPS(&dnp->dn_lock); 8699 } 8700 } 8701 8702 driver_conf_count = *cur_elements; 8703 8704 /* add the phci drivers specified in the built-in tables */ 8705 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8706 st_driver_list = scsi_phci_driver_list; 8707 st_ndrivers = sizeof (scsi_phci_driver_list) / 8708 sizeof (mdi_phci_driver_info_t); 8709 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8710 st_driver_list = ib_phci_driver_list; 8711 st_ndrivers = sizeof (ib_phci_driver_list) / 8712 sizeof (mdi_phci_driver_info_t); 8713 } else { 8714 st_driver_list = NULL; 8715 st_ndrivers = 0; 8716 } 8717 8718 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8719 /* add this phci driver if not already added before */ 8720 for (j = 0; j < driver_conf_count; j++) { 8721 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8722 break; 8723 } 8724 if (j == driver_conf_count) { 8725 add_to_phci_list(driver_list, root_support_list, 8726 cur_elements, max_elements, p->phdriver_name, 8727 p->phdriver_root_support); 8728 } 8729 } 8730 } 8731 8732 /* 8733 * Attach the phci driver instances associated with the specified vhci class. 8734 * If root is mounted attach all phci driver instances. 8735 * If root is not mounted, attach the instances of only those phci 8736 * drivers that have the root support. 8737 */ 8738 static void 8739 attach_phci_drivers(char *vhci_class) 8740 { 8741 char **driver_list, **p; 8742 int *root_support_list; 8743 int cur_elements, max_elements, i; 8744 major_t m; 8745 8746 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8747 &cur_elements, &max_elements); 8748 8749 for (i = 0; i < cur_elements; i++) { 8750 if (modrootloaded || root_support_list[i]) { 8751 m = ddi_name_to_major(driver_list[i]); 8752 if (m != DDI_MAJOR_T_NONE && 8753 ddi_hold_installed_driver(m)) 8754 ddi_rele_driver(m); 8755 } 8756 } 8757 8758 if (driver_list) { 8759 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8760 kmem_free(*p, strlen(*p) + 1); 8761 kmem_free(driver_list, sizeof (char *) * max_elements); 8762 kmem_free(root_support_list, sizeof (int) * max_elements); 8763 } 8764 } 8765 8766 /* 8767 * Build vhci cache: 8768 * 8769 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8770 * the phci driver instances. During this process the cache gets built. 8771 * 8772 * Cache is built fully if the root is mounted. 8773 * If the root is not mounted, phci drivers that do not have root support 8774 * are not attached. As a result the cache is built partially. The entries 8775 * in the cache reflect only those phci drivers that have root support. 8776 */ 8777 static int 8778 build_vhci_cache(mdi_vhci_t *vh) 8779 { 8780 mdi_vhci_config_t *vhc = vh->vh_config; 8781 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8782 8783 single_threaded_vhconfig_enter(vhc); 8784 8785 rw_enter(&vhcache->vhcache_lock, RW_READER); 8786 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8787 rw_exit(&vhcache->vhcache_lock); 8788 single_threaded_vhconfig_exit(vhc); 8789 return (0); 8790 } 8791 rw_exit(&vhcache->vhcache_lock); 8792 8793 attach_phci_drivers(vh->vh_class); 8794 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8795 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8796 8797 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8798 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8799 rw_exit(&vhcache->vhcache_lock); 8800 8801 single_threaded_vhconfig_exit(vhc); 8802 vhcache_dirty(vhc); 8803 return (1); 8804 } 8805 8806 /* 8807 * Determine if discovery of paths is needed. 8808 */ 8809 static int 8810 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8811 { 8812 int rv = 1; 8813 8814 mutex_enter(&vhc->vhc_lock); 8815 if (i_ddi_io_initialized() == 0) { 8816 if (vhc->vhc_path_discovery_boot > 0) { 8817 vhc->vhc_path_discovery_boot--; 8818 goto out; 8819 } 8820 } else { 8821 if (vhc->vhc_path_discovery_postboot > 0) { 8822 vhc->vhc_path_discovery_postboot--; 8823 goto out; 8824 } 8825 } 8826 8827 /* 8828 * Do full path discovery at most once per mdi_path_discovery_interval. 8829 * This is to avoid a series of full path discoveries when opening 8830 * stale /dev/[r]dsk links. 8831 */ 8832 if (mdi_path_discovery_interval != -1 && 8833 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8834 goto out; 8835 8836 rv = 0; 8837 out: 8838 mutex_exit(&vhc->vhc_lock); 8839 return (rv); 8840 } 8841 8842 /* 8843 * Discover all paths: 8844 * 8845 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8846 * driver instances. During this process all paths will be discovered. 8847 */ 8848 static int 8849 vhcache_discover_paths(mdi_vhci_t *vh) 8850 { 8851 mdi_vhci_config_t *vhc = vh->vh_config; 8852 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8853 int rv = 0; 8854 8855 single_threaded_vhconfig_enter(vhc); 8856 8857 if (vhcache_do_discovery(vhc)) { 8858 attach_phci_drivers(vh->vh_class); 8859 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8860 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8861 8862 mutex_enter(&vhc->vhc_lock); 8863 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8864 mdi_path_discovery_interval * TICKS_PER_SECOND; 8865 mutex_exit(&vhc->vhc_lock); 8866 rv = 1; 8867 } 8868 8869 single_threaded_vhconfig_exit(vhc); 8870 return (rv); 8871 } 8872 8873 /* 8874 * Generic vhci bus config implementation: 8875 * 8876 * Parameters 8877 * vdip vhci dip 8878 * flags bus config flags 8879 * op bus config operation 8880 * The remaining parameters are bus config operation specific 8881 * 8882 * for BUS_CONFIG_ONE 8883 * arg pointer to name@addr 8884 * child upon successful return from this function, *child will be 8885 * set to the configured and held devinfo child node of vdip. 8886 * ct_addr pointer to client address (i.e. GUID) 8887 * 8888 * for BUS_CONFIG_DRIVER 8889 * arg major number of the driver 8890 * child and ct_addr parameters are ignored 8891 * 8892 * for BUS_CONFIG_ALL 8893 * arg, child, and ct_addr parameters are ignored 8894 * 8895 * Note that for the rest of the bus config operations, this function simply 8896 * calls the framework provided default bus config routine. 8897 */ 8898 int 8899 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8900 void *arg, dev_info_t **child, char *ct_addr) 8901 { 8902 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8903 mdi_vhci_config_t *vhc = vh->vh_config; 8904 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8905 int rv = 0; 8906 int params_valid = 0; 8907 char *cp; 8908 8909 /* 8910 * To bus config vhcis we relay operation, possibly using another 8911 * thread, to phcis. The phci driver then interacts with MDI to cause 8912 * vhci child nodes to be enumerated under the vhci node. Adding a 8913 * vhci child requires an ndi_devi_enter of the vhci. Since another 8914 * thread may be adding the child, to avoid deadlock we can't wait 8915 * for the relayed operations to complete if we have already entered 8916 * the vhci node. 8917 */ 8918 if (DEVI_BUSY_OWNED(vdip)) { 8919 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8920 "vhci dip is busy owned %p\n", (void *)vdip)); 8921 goto default_bus_config; 8922 } 8923 8924 rw_enter(&vhcache->vhcache_lock, RW_READER); 8925 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8926 rw_exit(&vhcache->vhcache_lock); 8927 rv = build_vhci_cache(vh); 8928 rw_enter(&vhcache->vhcache_lock, RW_READER); 8929 } 8930 8931 switch (op) { 8932 case BUS_CONFIG_ONE: 8933 if (arg != NULL && ct_addr != NULL) { 8934 /* extract node name */ 8935 cp = (char *)arg; 8936 while (*cp != '\0' && *cp != '@') 8937 cp++; 8938 if (*cp == '@') { 8939 params_valid = 1; 8940 *cp = '\0'; 8941 config_client_paths(vhc, (char *)arg, ct_addr); 8942 /* config_client_paths() releases cache_lock */ 8943 *cp = '@'; 8944 break; 8945 } 8946 } 8947 8948 rw_exit(&vhcache->vhcache_lock); 8949 break; 8950 8951 case BUS_CONFIG_DRIVER: 8952 rw_exit(&vhcache->vhcache_lock); 8953 if (rv == 0) 8954 st_bus_config_all_phcis(vhc, flags, op, 8955 (major_t)(uintptr_t)arg); 8956 break; 8957 8958 case BUS_CONFIG_ALL: 8959 rw_exit(&vhcache->vhcache_lock); 8960 if (rv == 0) 8961 st_bus_config_all_phcis(vhc, flags, op, -1); 8962 break; 8963 8964 default: 8965 rw_exit(&vhcache->vhcache_lock); 8966 break; 8967 } 8968 8969 8970 default_bus_config: 8971 /* 8972 * All requested child nodes are enumerated under the vhci. 8973 * Now configure them. 8974 */ 8975 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8976 NDI_SUCCESS) { 8977 return (MDI_SUCCESS); 8978 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8979 /* discover all paths and try configuring again */ 8980 if (vhcache_discover_paths(vh) && 8981 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8982 NDI_SUCCESS) 8983 return (MDI_SUCCESS); 8984 } 8985 8986 return (MDI_FAILURE); 8987 } 8988 8989 /* 8990 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8991 */ 8992 static nvlist_t * 8993 read_on_disk_vhci_cache(char *vhci_class) 8994 { 8995 nvlist_t *nvl; 8996 int err; 8997 char *filename; 8998 8999 filename = vhclass2vhcache_filename(vhci_class); 9000 9001 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9002 kmem_free(filename, strlen(filename) + 1); 9003 return (nvl); 9004 } else if (err == EIO) 9005 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 9006 else if (err == EINVAL) 9007 cmn_err(CE_WARN, 9008 "%s: data file corrupted, will recreate\n", filename); 9009 9010 kmem_free(filename, strlen(filename) + 1); 9011 return (NULL); 9012 } 9013 9014 /* 9015 * Read on-disk vhci cache into nvlists for all vhci classes. 9016 * Called during booting by i_ddi_read_devices_files(). 9017 */ 9018 void 9019 mdi_read_devices_files(void) 9020 { 9021 int i; 9022 9023 for (i = 0; i < N_VHCI_CLASSES; i++) 9024 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9025 } 9026 9027 /* 9028 * Remove all stale entries from vhci cache. 9029 */ 9030 static void 9031 clean_vhcache(mdi_vhci_config_t *vhc) 9032 { 9033 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9034 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9035 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9036 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9037 9038 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9039 9040 cct_head = vhcache->vhcache_client_head; 9041 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9042 for (cct = cct_head; cct != NULL; cct = cct_next) { 9043 cct_next = cct->cct_next; 9044 9045 cpi_head = cct->cct_cpi_head; 9046 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9047 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9048 cpi_next = cpi->cpi_next; 9049 if (cpi->cpi_pip != NULL) { 9050 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9051 enqueue_tail_vhcache_pathinfo(cct, cpi); 9052 } else 9053 free_vhcache_pathinfo(cpi); 9054 } 9055 9056 if (cct->cct_cpi_head != NULL) 9057 enqueue_vhcache_client(vhcache, cct); 9058 else { 9059 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9060 (mod_hash_key_t)cct->cct_name_addr); 9061 free_vhcache_client(cct); 9062 } 9063 } 9064 9065 cphci_head = vhcache->vhcache_phci_head; 9066 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9067 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9068 cphci_next = cphci->cphci_next; 9069 if (cphci->cphci_phci != NULL) 9070 enqueue_vhcache_phci(vhcache, cphci); 9071 else 9072 free_vhcache_phci(cphci); 9073 } 9074 9075 vhcache->vhcache_clean_time = lbolt64; 9076 rw_exit(&vhcache->vhcache_lock); 9077 vhcache_dirty(vhc); 9078 } 9079 9080 /* 9081 * Remove all stale entries from vhci cache. 9082 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9083 */ 9084 void 9085 mdi_clean_vhcache(void) 9086 { 9087 mdi_vhci_t *vh; 9088 9089 mutex_enter(&mdi_mutex); 9090 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9091 vh->vh_refcnt++; 9092 mutex_exit(&mdi_mutex); 9093 clean_vhcache(vh->vh_config); 9094 mutex_enter(&mdi_mutex); 9095 vh->vh_refcnt--; 9096 } 9097 mutex_exit(&mdi_mutex); 9098 } 9099 9100 /* 9101 * mdi_vhci_walk_clients(): 9102 * Walker routine to traverse client dev_info nodes 9103 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9104 * below the client, including nexus devices, which we dont want. 9105 * So we just traverse the immediate siblings, starting from 1st client. 9106 */ 9107 void 9108 mdi_vhci_walk_clients(dev_info_t *vdip, 9109 int (*f)(dev_info_t *, void *), void *arg) 9110 { 9111 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9112 dev_info_t *cdip; 9113 mdi_client_t *ct; 9114 9115 MDI_VHCI_CLIENT_LOCK(vh); 9116 cdip = ddi_get_child(vdip); 9117 while (cdip) { 9118 ct = i_devi_get_client(cdip); 9119 MDI_CLIENT_LOCK(ct); 9120 9121 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9122 cdip = ddi_get_next_sibling(cdip); 9123 else 9124 cdip = NULL; 9125 9126 MDI_CLIENT_UNLOCK(ct); 9127 } 9128 MDI_VHCI_CLIENT_UNLOCK(vh); 9129 } 9130 9131 /* 9132 * mdi_vhci_walk_phcis(): 9133 * Walker routine to traverse phci dev_info nodes 9134 */ 9135 void 9136 mdi_vhci_walk_phcis(dev_info_t *vdip, 9137 int (*f)(dev_info_t *, void *), void *arg) 9138 { 9139 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9140 mdi_phci_t *ph, *next; 9141 9142 MDI_VHCI_PHCI_LOCK(vh); 9143 ph = vh->vh_phci_head; 9144 while (ph) { 9145 MDI_PHCI_LOCK(ph); 9146 9147 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9148 next = ph->ph_next; 9149 else 9150 next = NULL; 9151 9152 MDI_PHCI_UNLOCK(ph); 9153 ph = next; 9154 } 9155 MDI_VHCI_PHCI_UNLOCK(vh); 9156 } 9157 9158 9159 /* 9160 * mdi_walk_vhcis(): 9161 * Walker routine to traverse vhci dev_info nodes 9162 */ 9163 void 9164 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9165 { 9166 mdi_vhci_t *vh = NULL; 9167 9168 mutex_enter(&mdi_mutex); 9169 /* 9170 * Scan for already registered vhci 9171 */ 9172 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9173 vh->vh_refcnt++; 9174 mutex_exit(&mdi_mutex); 9175 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9176 mutex_enter(&mdi_mutex); 9177 vh->vh_refcnt--; 9178 break; 9179 } else { 9180 mutex_enter(&mdi_mutex); 9181 vh->vh_refcnt--; 9182 } 9183 } 9184 9185 mutex_exit(&mdi_mutex); 9186 } 9187 9188 /* 9189 * i_mdi_log_sysevent(): 9190 * Logs events for pickup by syseventd 9191 */ 9192 static void 9193 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9194 { 9195 char *path_name; 9196 nvlist_t *attr_list; 9197 9198 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9199 KM_SLEEP) != DDI_SUCCESS) { 9200 goto alloc_failed; 9201 } 9202 9203 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9204 (void) ddi_pathname(dip, path_name); 9205 9206 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9207 ddi_driver_name(dip)) != DDI_SUCCESS) { 9208 goto error; 9209 } 9210 9211 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9212 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9213 goto error; 9214 } 9215 9216 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9217 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9218 goto error; 9219 } 9220 9221 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9222 path_name) != DDI_SUCCESS) { 9223 goto error; 9224 } 9225 9226 if (nvlist_add_string(attr_list, DDI_CLASS, 9227 ph_vh_class) != DDI_SUCCESS) { 9228 goto error; 9229 } 9230 9231 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9232 attr_list, NULL, DDI_SLEEP); 9233 9234 error: 9235 kmem_free(path_name, MAXPATHLEN); 9236 nvlist_free(attr_list); 9237 return; 9238 9239 alloc_failed: 9240 MDI_DEBUG(1, (CE_WARN, dip, 9241 "!i_mdi_log_sysevent: Unable to send sysevent")); 9242 } 9243 9244 char ** 9245 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9246 { 9247 char **driver_list, **ret_driver_list = NULL; 9248 int *root_support_list; 9249 int cur_elements, max_elements; 9250 9251 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9252 &cur_elements, &max_elements); 9253 9254 9255 if (driver_list) { 9256 kmem_free(root_support_list, sizeof (int) * max_elements); 9257 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9258 * max_elements, sizeof (char *) * cur_elements); 9259 } 9260 *ndrivers = cur_elements; 9261 9262 return (ret_driver_list); 9263 9264 } 9265 9266 void 9267 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9268 { 9269 char **p; 9270 int i; 9271 9272 if (driver_list) { 9273 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9274 kmem_free(*p, strlen(*p) + 1); 9275 kmem_free(driver_list, sizeof (char *) * ndrivers); 9276 } 9277 } 9278