1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * The "path" to a pathinfo node is identical to the /devices path to a 160 * devinfo node had the device been enumerated under a pHCI instead of 161 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 162 * This association persists across create/delete of the pathinfo nodes, 163 * but not across reboot. 164 */ 165 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 166 static int mdi_pathmap_hash_size = 256; 167 static kmutex_t mdi_pathmap_mutex; 168 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 169 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 170 171 /* 172 * MDI component property name/value string definitions 173 */ 174 const char *mdi_component_prop = "mpxio-component"; 175 const char *mdi_component_prop_vhci = "vhci"; 176 const char *mdi_component_prop_phci = "phci"; 177 const char *mdi_component_prop_client = "client"; 178 179 /* 180 * MDI client global unique identifier property name 181 */ 182 const char *mdi_client_guid_prop = "client-guid"; 183 184 /* 185 * MDI client load balancing property name/value string definitions 186 */ 187 const char *mdi_load_balance = "load-balance"; 188 const char *mdi_load_balance_none = "none"; 189 const char *mdi_load_balance_rr = "round-robin"; 190 const char *mdi_load_balance_lba = "logical-block"; 191 192 /* 193 * Obsolete vHCI class definition; to be removed after Leadville update 194 */ 195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 196 197 static char vhci_greeting[] = 198 "\tThere already exists one vHCI driver for class %s\n" 199 "\tOnly one vHCI driver for each class is allowed\n"; 200 201 /* 202 * Static function prototypes 203 */ 204 static int i_mdi_phci_offline(dev_info_t *, uint_t); 205 static int i_mdi_client_offline(dev_info_t *, uint_t); 206 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 207 static void i_mdi_phci_post_detach(dev_info_t *, 208 ddi_detach_cmd_t, int); 209 static int i_mdi_client_pre_detach(dev_info_t *, 210 ddi_detach_cmd_t); 211 static void i_mdi_client_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 214 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 215 static int i_mdi_lba_lb(mdi_client_t *ct, 216 mdi_pathinfo_t **ret_pip, struct buf *buf); 217 static void i_mdi_pm_hold_client(mdi_client_t *, int); 218 static void i_mdi_pm_rele_client(mdi_client_t *, int); 219 static void i_mdi_pm_reset_client(mdi_client_t *); 220 static int i_mdi_power_all_phci(mdi_client_t *); 221 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 222 223 224 /* 225 * Internal mdi_pathinfo node functions 226 */ 227 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 228 229 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 230 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 231 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 232 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 233 static void i_mdi_phci_unlock(mdi_phci_t *); 234 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 235 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 236 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 237 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 238 mdi_client_t *); 239 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_remove_path(mdi_client_t *, 241 mdi_pathinfo_t *); 242 243 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 244 mdi_pathinfo_state_t, int); 245 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 246 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 247 char **, int); 248 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 249 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 250 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 251 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 252 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 253 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 254 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 255 static void i_mdi_client_update_state(mdi_client_t *); 256 static int i_mdi_client_compute_state(mdi_client_t *, 257 mdi_phci_t *); 258 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 259 static void i_mdi_client_unlock(mdi_client_t *); 260 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 261 static mdi_client_t *i_devi_get_client(dev_info_t *); 262 /* 263 * NOTE: this will be removed once the NWS files are changed to use the new 264 * mdi_{enable,disable}_path interfaces 265 */ 266 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 267 int, int); 268 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 269 mdi_vhci_t *vh, int flags, int op); 270 /* 271 * Failover related function prototypes 272 */ 273 static int i_mdi_failover(void *); 274 275 /* 276 * misc internal functions 277 */ 278 static int i_mdi_get_hash_key(char *); 279 static int i_map_nvlist_error_to_mdi(int); 280 static void i_mdi_report_path_state(mdi_client_t *, 281 mdi_pathinfo_t *); 282 283 static void setup_vhci_cache(mdi_vhci_t *); 284 static int destroy_vhci_cache(mdi_vhci_t *); 285 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 286 static boolean_t stop_vhcache_flush_thread(void *, int); 287 static void free_string_array(char **, int); 288 static void free_vhcache_phci(mdi_vhcache_phci_t *); 289 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 290 static void free_vhcache_client(mdi_vhcache_client_t *); 291 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 292 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 293 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 294 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_pi_add(mdi_vhci_config_t *, 296 struct mdi_pathinfo *); 297 static void vhcache_pi_remove(mdi_vhci_config_t *, 298 struct mdi_pathinfo *); 299 static void free_phclient_path_list(mdi_phys_path_t *); 300 static void sort_vhcache_paths(mdi_vhcache_client_t *); 301 static int flush_vhcache(mdi_vhci_config_t *, int); 302 static void vhcache_dirty(mdi_vhci_config_t *); 303 static void free_async_client_config(mdi_async_client_config_t *); 304 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 305 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 306 static nvlist_t *read_on_disk_vhci_cache(char *); 307 extern int fread_nvlist(char *, nvlist_t **); 308 extern int fwrite_nvlist(char *, nvlist_t *); 309 310 /* called once when first vhci registers with mdi */ 311 static void 312 i_mdi_init() 313 { 314 static int initialized = 0; 315 316 if (initialized) 317 return; 318 initialized = 1; 319 320 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 321 322 /* Create our taskq resources */ 323 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 324 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 325 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 326 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 327 328 /* Allocate ['path_instance' <-> "path"] maps */ 329 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 330 mdi_pathmap_bypath = mod_hash_create_strhash( 331 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 332 mod_hash_null_valdtor); 333 mdi_pathmap_byinstance = mod_hash_create_idhash( 334 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 335 mod_hash_null_valdtor); 336 } 337 338 /* 339 * mdi_get_component_type(): 340 * Return mpxio component type 341 * Return Values: 342 * MDI_COMPONENT_NONE 343 * MDI_COMPONENT_VHCI 344 * MDI_COMPONENT_PHCI 345 * MDI_COMPONENT_CLIENT 346 * XXX This doesn't work under multi-level MPxIO and should be 347 * removed when clients migrate mdi_component_is_*() interfaces. 348 */ 349 int 350 mdi_get_component_type(dev_info_t *dip) 351 { 352 return (DEVI(dip)->devi_mdi_component); 353 } 354 355 /* 356 * mdi_vhci_register(): 357 * Register a vHCI module with the mpxio framework 358 * mdi_vhci_register() is called by vHCI drivers to register the 359 * 'class_driver' vHCI driver and its MDI entrypoints with the 360 * mpxio framework. The vHCI driver must call this interface as 361 * part of its attach(9e) handler. 362 * Competing threads may try to attach mdi_vhci_register() as 363 * the vHCI drivers are loaded and attached as a result of pHCI 364 * driver instance registration (mdi_phci_register()) with the 365 * framework. 366 * Return Values: 367 * MDI_SUCCESS 368 * MDI_FAILURE 369 */ 370 /*ARGSUSED*/ 371 int 372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 373 int flags) 374 { 375 mdi_vhci_t *vh = NULL; 376 377 /* Registrant can't be older */ 378 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 379 380 #ifdef DEBUG 381 /* 382 * IB nexus driver is loaded only when IB hardware is present. 383 * In order to be able to do this there is a need to drive the loading 384 * and attaching of the IB nexus driver (especially when an IB hardware 385 * is dynamically plugged in) when an IB HCA driver (PHCI) 386 * is being attached. Unfortunately this gets into the limitations 387 * of devfs as there seems to be no clean way to drive configuration 388 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 389 * for IB. 390 */ 391 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 392 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 393 #endif 394 395 i_mdi_init(); 396 397 mutex_enter(&mdi_mutex); 398 /* 399 * Scan for already registered vhci 400 */ 401 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 402 if (strcmp(vh->vh_class, class) == 0) { 403 /* 404 * vHCI has already been created. Check for valid 405 * vHCI ops registration. We only support one vHCI 406 * module per class 407 */ 408 if (vh->vh_ops != NULL) { 409 mutex_exit(&mdi_mutex); 410 cmn_err(CE_NOTE, vhci_greeting, class); 411 return (MDI_FAILURE); 412 } 413 break; 414 } 415 } 416 417 /* 418 * if not yet created, create the vHCI component 419 */ 420 if (vh == NULL) { 421 struct client_hash *hash = NULL; 422 char *load_balance; 423 424 /* 425 * Allocate and initialize the mdi extensions 426 */ 427 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 428 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 429 KM_SLEEP); 430 vh->vh_client_table = hash; 431 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 432 (void) strcpy(vh->vh_class, class); 433 vh->vh_lb = LOAD_BALANCE_RR; 434 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 435 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 436 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 437 vh->vh_lb = LOAD_BALANCE_NONE; 438 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 439 == 0) { 440 vh->vh_lb = LOAD_BALANCE_LBA; 441 } 442 ddi_prop_free(load_balance); 443 } 444 445 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 446 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 447 448 /* 449 * Store the vHCI ops vectors 450 */ 451 vh->vh_dip = vdip; 452 vh->vh_ops = vops; 453 454 setup_vhci_cache(vh); 455 456 if (mdi_vhci_head == NULL) { 457 mdi_vhci_head = vh; 458 } 459 if (mdi_vhci_tail) { 460 mdi_vhci_tail->vh_next = vh; 461 } 462 mdi_vhci_tail = vh; 463 mdi_vhci_count++; 464 } 465 466 /* 467 * Claim the devfs node as a vhci component 468 */ 469 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 470 471 /* 472 * Initialize our back reference from dev_info node 473 */ 474 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 475 mutex_exit(&mdi_mutex); 476 return (MDI_SUCCESS); 477 } 478 479 /* 480 * mdi_vhci_unregister(): 481 * Unregister a vHCI module from mpxio framework 482 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 483 * of a vhci to unregister it from the framework. 484 * Return Values: 485 * MDI_SUCCESS 486 * MDI_FAILURE 487 */ 488 /*ARGSUSED*/ 489 int 490 mdi_vhci_unregister(dev_info_t *vdip, int flags) 491 { 492 mdi_vhci_t *found, *vh, *prev = NULL; 493 494 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 495 496 /* 497 * Check for invalid VHCI 498 */ 499 if ((vh = i_devi_get_vhci(vdip)) == NULL) 500 return (MDI_FAILURE); 501 502 /* 503 * Scan the list of registered vHCIs for a match 504 */ 505 mutex_enter(&mdi_mutex); 506 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 507 if (found == vh) 508 break; 509 prev = found; 510 } 511 512 if (found == NULL) { 513 mutex_exit(&mdi_mutex); 514 return (MDI_FAILURE); 515 } 516 517 /* 518 * Check the vHCI, pHCI and client count. All the pHCIs and clients 519 * should have been unregistered, before a vHCI can be 520 * unregistered. 521 */ 522 MDI_VHCI_PHCI_LOCK(vh); 523 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 524 MDI_VHCI_PHCI_UNLOCK(vh); 525 mutex_exit(&mdi_mutex); 526 return (MDI_FAILURE); 527 } 528 MDI_VHCI_PHCI_UNLOCK(vh); 529 530 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 531 mutex_exit(&mdi_mutex); 532 return (MDI_FAILURE); 533 } 534 535 /* 536 * Remove the vHCI from the global list 537 */ 538 if (vh == mdi_vhci_head) { 539 mdi_vhci_head = vh->vh_next; 540 } else { 541 prev->vh_next = vh->vh_next; 542 } 543 if (vh == mdi_vhci_tail) { 544 mdi_vhci_tail = prev; 545 } 546 mdi_vhci_count--; 547 mutex_exit(&mdi_mutex); 548 549 vh->vh_ops = NULL; 550 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 551 DEVI(vdip)->devi_mdi_xhci = NULL; 552 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 553 kmem_free(vh->vh_client_table, 554 mdi_client_table_size * sizeof (struct client_hash)); 555 mutex_destroy(&vh->vh_phci_mutex); 556 mutex_destroy(&vh->vh_client_mutex); 557 558 kmem_free(vh, sizeof (mdi_vhci_t)); 559 return (MDI_SUCCESS); 560 } 561 562 /* 563 * i_mdi_vhci_class2vhci(): 564 * Look for a matching vHCI module given a vHCI class name 565 * Return Values: 566 * Handle to a vHCI component 567 * NULL 568 */ 569 static mdi_vhci_t * 570 i_mdi_vhci_class2vhci(char *class) 571 { 572 mdi_vhci_t *vh = NULL; 573 574 ASSERT(!MUTEX_HELD(&mdi_mutex)); 575 576 mutex_enter(&mdi_mutex); 577 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 578 if (strcmp(vh->vh_class, class) == 0) { 579 break; 580 } 581 } 582 mutex_exit(&mdi_mutex); 583 return (vh); 584 } 585 586 /* 587 * i_devi_get_vhci(): 588 * Utility function to get the handle to a vHCI component 589 * Return Values: 590 * Handle to a vHCI component 591 * NULL 592 */ 593 mdi_vhci_t * 594 i_devi_get_vhci(dev_info_t *vdip) 595 { 596 mdi_vhci_t *vh = NULL; 597 if (MDI_VHCI(vdip)) { 598 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 599 } 600 return (vh); 601 } 602 603 /* 604 * mdi_phci_register(): 605 * Register a pHCI module with mpxio framework 606 * mdi_phci_register() is called by pHCI drivers to register with 607 * the mpxio framework and a specific 'class_driver' vHCI. The 608 * pHCI driver must call this interface as part of its attach(9e) 609 * handler. 610 * Return Values: 611 * MDI_SUCCESS 612 * MDI_FAILURE 613 */ 614 /*ARGSUSED*/ 615 int 616 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 617 { 618 mdi_phci_t *ph; 619 mdi_vhci_t *vh; 620 char *data; 621 char *pathname; 622 623 /* 624 * Some subsystems, like fcp, perform pHCI registration from a 625 * different thread than the one doing the pHCI attach(9E) - the 626 * driver attach code is waiting for this other thread to complete. 627 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 628 * (indicating that some thread has done an ndi_devi_enter of parent) 629 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 630 */ 631 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 632 633 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 634 (void) ddi_pathname(pdip, pathname); 635 636 /* 637 * Check for mpxio-disable property. Enable mpxio if the property is 638 * missing or not set to "yes". 639 * If the property is set to "yes" then emit a brief message. 640 */ 641 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 642 &data) == DDI_SUCCESS)) { 643 if (strcmp(data, "yes") == 0) { 644 MDI_DEBUG(1, (CE_CONT, pdip, 645 "?%s (%s%d) multipath capabilities " 646 "disabled via %s.conf.\n", pathname, 647 ddi_driver_name(pdip), ddi_get_instance(pdip), 648 ddi_driver_name(pdip))); 649 ddi_prop_free(data); 650 kmem_free(pathname, MAXPATHLEN); 651 return (MDI_FAILURE); 652 } 653 ddi_prop_free(data); 654 } 655 656 kmem_free(pathname, MAXPATHLEN); 657 658 /* 659 * Search for a matching vHCI 660 */ 661 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 662 if (vh == NULL) { 663 return (MDI_FAILURE); 664 } 665 666 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 667 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 668 ph->ph_dip = pdip; 669 ph->ph_vhci = vh; 670 ph->ph_next = NULL; 671 ph->ph_unstable = 0; 672 ph->ph_vprivate = 0; 673 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 674 675 MDI_PHCI_LOCK(ph); 676 MDI_PHCI_SET_POWER_UP(ph); 677 MDI_PHCI_UNLOCK(ph); 678 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 679 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 680 681 vhcache_phci_add(vh->vh_config, ph); 682 683 MDI_VHCI_PHCI_LOCK(vh); 684 if (vh->vh_phci_head == NULL) { 685 vh->vh_phci_head = ph; 686 } 687 if (vh->vh_phci_tail) { 688 vh->vh_phci_tail->ph_next = ph; 689 } 690 vh->vh_phci_tail = ph; 691 vh->vh_phci_count++; 692 MDI_VHCI_PHCI_UNLOCK(vh); 693 694 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 695 return (MDI_SUCCESS); 696 } 697 698 /* 699 * mdi_phci_unregister(): 700 * Unregister a pHCI module from mpxio framework 701 * mdi_phci_unregister() is called by the pHCI drivers from their 702 * detach(9E) handler to unregister their instances from the 703 * framework. 704 * Return Values: 705 * MDI_SUCCESS 706 * MDI_FAILURE 707 */ 708 /*ARGSUSED*/ 709 int 710 mdi_phci_unregister(dev_info_t *pdip, int flags) 711 { 712 mdi_vhci_t *vh; 713 mdi_phci_t *ph; 714 mdi_phci_t *tmp; 715 mdi_phci_t *prev = NULL; 716 717 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 718 719 ph = i_devi_get_phci(pdip); 720 if (ph == NULL) { 721 MDI_DEBUG(1, (CE_WARN, pdip, 722 "!pHCI unregister: Not a valid pHCI")); 723 return (MDI_FAILURE); 724 } 725 726 vh = ph->ph_vhci; 727 ASSERT(vh != NULL); 728 if (vh == NULL) { 729 MDI_DEBUG(1, (CE_WARN, pdip, 730 "!pHCI unregister: Not a valid vHCI")); 731 return (MDI_FAILURE); 732 } 733 734 MDI_VHCI_PHCI_LOCK(vh); 735 tmp = vh->vh_phci_head; 736 while (tmp) { 737 if (tmp == ph) { 738 break; 739 } 740 prev = tmp; 741 tmp = tmp->ph_next; 742 } 743 744 if (ph == vh->vh_phci_head) { 745 vh->vh_phci_head = ph->ph_next; 746 } else { 747 prev->ph_next = ph->ph_next; 748 } 749 750 if (ph == vh->vh_phci_tail) { 751 vh->vh_phci_tail = prev; 752 } 753 754 vh->vh_phci_count--; 755 MDI_VHCI_PHCI_UNLOCK(vh); 756 757 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 758 ESC_DDI_INITIATOR_UNREGISTER); 759 vhcache_phci_remove(vh->vh_config, ph); 760 cv_destroy(&ph->ph_unstable_cv); 761 mutex_destroy(&ph->ph_mutex); 762 kmem_free(ph, sizeof (mdi_phci_t)); 763 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 764 DEVI(pdip)->devi_mdi_xhci = NULL; 765 return (MDI_SUCCESS); 766 } 767 768 /* 769 * i_devi_get_phci(): 770 * Utility function to return the phci extensions. 771 */ 772 static mdi_phci_t * 773 i_devi_get_phci(dev_info_t *pdip) 774 { 775 mdi_phci_t *ph = NULL; 776 777 if (MDI_PHCI(pdip)) { 778 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 779 } 780 return (ph); 781 } 782 783 /* 784 * Single thread mdi entry into devinfo node for modifying its children. 785 * If necessary we perform an ndi_devi_enter of the vHCI before doing 786 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 787 * for the vHCI and one for the pHCI. 788 */ 789 void 790 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 791 { 792 dev_info_t *vdip; 793 int vcircular, pcircular; 794 795 /* Verify calling context */ 796 ASSERT(MDI_PHCI(phci_dip)); 797 vdip = mdi_devi_get_vdip(phci_dip); 798 ASSERT(vdip); /* A pHCI always has a vHCI */ 799 800 /* 801 * If pHCI is detaching then the framework has already entered the 802 * vHCI on a threads that went down the code path leading to 803 * detach_node(). This framework enter of the vHCI during pHCI 804 * detach is done to avoid deadlock with vHCI power management 805 * operations which enter the vHCI and the enter down the path 806 * to the pHCI. If pHCI is detaching then we piggyback this calls 807 * enter of the vHCI on frameworks vHCI enter that has already 808 * occurred - this is OK because we know that the framework thread 809 * doing detach is waiting for our completion. 810 * 811 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 812 * race with detach - but we can't do that because the framework has 813 * already entered the parent, so we have some complexity instead. 814 */ 815 for (;;) { 816 if (ndi_devi_tryenter(vdip, &vcircular)) { 817 ASSERT(vcircular != -1); 818 if (DEVI_IS_DETACHING(phci_dip)) { 819 ndi_devi_exit(vdip, vcircular); 820 vcircular = -1; 821 } 822 break; 823 } else if (DEVI_IS_DETACHING(phci_dip)) { 824 vcircular = -1; 825 break; 826 } else { 827 delay(1); 828 } 829 } 830 831 ndi_devi_enter(phci_dip, &pcircular); 832 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 833 } 834 835 /* 836 * Attempt to mdi_devi_enter. 837 */ 838 int 839 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 840 { 841 dev_info_t *vdip; 842 int vcircular, pcircular; 843 844 /* Verify calling context */ 845 ASSERT(MDI_PHCI(phci_dip)); 846 vdip = mdi_devi_get_vdip(phci_dip); 847 ASSERT(vdip); /* A pHCI always has a vHCI */ 848 849 if (ndi_devi_tryenter(vdip, &vcircular)) { 850 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 851 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 852 return (1); /* locked */ 853 } 854 ndi_devi_exit(vdip, vcircular); 855 } 856 return (0); /* busy */ 857 } 858 859 /* 860 * Release mdi_devi_enter or successful mdi_devi_tryenter. 861 */ 862 void 863 mdi_devi_exit(dev_info_t *phci_dip, int circular) 864 { 865 dev_info_t *vdip; 866 int vcircular, pcircular; 867 868 /* Verify calling context */ 869 ASSERT(MDI_PHCI(phci_dip)); 870 vdip = mdi_devi_get_vdip(phci_dip); 871 ASSERT(vdip); /* A pHCI always has a vHCI */ 872 873 /* extract two circular recursion values from single int */ 874 pcircular = (short)(circular & 0xFFFF); 875 vcircular = (short)((circular >> 16) & 0xFFFF); 876 877 ndi_devi_exit(phci_dip, pcircular); 878 if (vcircular != -1) 879 ndi_devi_exit(vdip, vcircular); 880 } 881 882 /* 883 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 884 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 885 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 886 * with vHCI power management code during path online/offline. Each 887 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 888 * occur within the scope of an active mdi_devi_enter that establishes the 889 * circular value. 890 */ 891 void 892 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 893 { 894 int pcircular; 895 896 /* Verify calling context */ 897 ASSERT(MDI_PHCI(phci_dip)); 898 899 pcircular = (short)(circular & 0xFFFF); 900 ndi_devi_exit(phci_dip, pcircular); 901 } 902 903 void 904 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 905 { 906 int pcircular; 907 908 /* Verify calling context */ 909 ASSERT(MDI_PHCI(phci_dip)); 910 911 ndi_devi_enter(phci_dip, &pcircular); 912 913 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 914 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 915 } 916 917 /* 918 * mdi_devi_get_vdip(): 919 * given a pHCI dip return vHCI dip 920 */ 921 dev_info_t * 922 mdi_devi_get_vdip(dev_info_t *pdip) 923 { 924 mdi_phci_t *ph; 925 926 ph = i_devi_get_phci(pdip); 927 if (ph && ph->ph_vhci) 928 return (ph->ph_vhci->vh_dip); 929 return (NULL); 930 } 931 932 /* 933 * mdi_devi_pdip_entered(): 934 * Return 1 if we are vHCI and have done an ndi_devi_enter 935 * of a pHCI 936 */ 937 int 938 mdi_devi_pdip_entered(dev_info_t *vdip) 939 { 940 mdi_vhci_t *vh; 941 mdi_phci_t *ph; 942 943 vh = i_devi_get_vhci(vdip); 944 if (vh == NULL) 945 return (0); 946 947 MDI_VHCI_PHCI_LOCK(vh); 948 ph = vh->vh_phci_head; 949 while (ph) { 950 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 951 MDI_VHCI_PHCI_UNLOCK(vh); 952 return (1); 953 } 954 ph = ph->ph_next; 955 } 956 MDI_VHCI_PHCI_UNLOCK(vh); 957 return (0); 958 } 959 960 /* 961 * mdi_phci_path2devinfo(): 962 * Utility function to search for a valid phci device given 963 * the devfs pathname. 964 */ 965 dev_info_t * 966 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 967 { 968 char *temp_pathname; 969 mdi_vhci_t *vh; 970 mdi_phci_t *ph; 971 dev_info_t *pdip = NULL; 972 973 vh = i_devi_get_vhci(vdip); 974 ASSERT(vh != NULL); 975 976 if (vh == NULL) { 977 /* 978 * Invalid vHCI component, return failure 979 */ 980 return (NULL); 981 } 982 983 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 984 MDI_VHCI_PHCI_LOCK(vh); 985 ph = vh->vh_phci_head; 986 while (ph != NULL) { 987 pdip = ph->ph_dip; 988 ASSERT(pdip != NULL); 989 *temp_pathname = '\0'; 990 (void) ddi_pathname(pdip, temp_pathname); 991 if (strcmp(temp_pathname, pathname) == 0) { 992 break; 993 } 994 ph = ph->ph_next; 995 } 996 if (ph == NULL) { 997 pdip = NULL; 998 } 999 MDI_VHCI_PHCI_UNLOCK(vh); 1000 kmem_free(temp_pathname, MAXPATHLEN); 1001 return (pdip); 1002 } 1003 1004 /* 1005 * mdi_phci_get_path_count(): 1006 * get number of path information nodes associated with a given 1007 * pHCI device. 1008 */ 1009 int 1010 mdi_phci_get_path_count(dev_info_t *pdip) 1011 { 1012 mdi_phci_t *ph; 1013 int count = 0; 1014 1015 ph = i_devi_get_phci(pdip); 1016 if (ph != NULL) { 1017 count = ph->ph_path_count; 1018 } 1019 return (count); 1020 } 1021 1022 /* 1023 * i_mdi_phci_lock(): 1024 * Lock a pHCI device 1025 * Return Values: 1026 * None 1027 * Note: 1028 * The default locking order is: 1029 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1030 * But there are number of situations where locks need to be 1031 * grabbed in reverse order. This routine implements try and lock 1032 * mechanism depending on the requested parameter option. 1033 */ 1034 static void 1035 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1036 { 1037 if (pip) { 1038 /* Reverse locking is requested. */ 1039 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1040 /* 1041 * tryenter failed. Try to grab again 1042 * after a small delay 1043 */ 1044 MDI_PI_HOLD(pip); 1045 MDI_PI_UNLOCK(pip); 1046 delay(1); 1047 MDI_PI_LOCK(pip); 1048 MDI_PI_RELE(pip); 1049 } 1050 } else { 1051 MDI_PHCI_LOCK(ph); 1052 } 1053 } 1054 1055 /* 1056 * i_mdi_phci_unlock(): 1057 * Unlock the pHCI component 1058 */ 1059 static void 1060 i_mdi_phci_unlock(mdi_phci_t *ph) 1061 { 1062 MDI_PHCI_UNLOCK(ph); 1063 } 1064 1065 /* 1066 * i_mdi_devinfo_create(): 1067 * create client device's devinfo node 1068 * Return Values: 1069 * dev_info 1070 * NULL 1071 * Notes: 1072 */ 1073 static dev_info_t * 1074 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1075 char **compatible, int ncompatible) 1076 { 1077 dev_info_t *cdip = NULL; 1078 1079 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1080 1081 /* Verify for duplicate entry */ 1082 cdip = i_mdi_devinfo_find(vh, name, guid); 1083 ASSERT(cdip == NULL); 1084 if (cdip) { 1085 cmn_err(CE_WARN, 1086 "i_mdi_devinfo_create: client dip %p already exists", 1087 (void *)cdip); 1088 } 1089 1090 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1091 if (cdip == NULL) 1092 goto fail; 1093 1094 /* 1095 * Create component type and Global unique identifier 1096 * properties 1097 */ 1098 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1099 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1100 goto fail; 1101 } 1102 1103 /* Decorate the node with compatible property */ 1104 if (compatible && 1105 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1106 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1107 goto fail; 1108 } 1109 1110 return (cdip); 1111 1112 fail: 1113 if (cdip) { 1114 (void) ndi_prop_remove_all(cdip); 1115 (void) ndi_devi_free(cdip); 1116 } 1117 return (NULL); 1118 } 1119 1120 /* 1121 * i_mdi_devinfo_find(): 1122 * Find a matching devinfo node for given client node name 1123 * and its guid. 1124 * Return Values: 1125 * Handle to a dev_info node or NULL 1126 */ 1127 static dev_info_t * 1128 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1129 { 1130 char *data; 1131 dev_info_t *cdip = NULL; 1132 dev_info_t *ndip = NULL; 1133 int circular; 1134 1135 ndi_devi_enter(vh->vh_dip, &circular); 1136 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1137 while ((cdip = ndip) != NULL) { 1138 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1139 1140 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1141 continue; 1142 } 1143 1144 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1145 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1146 &data) != DDI_PROP_SUCCESS) { 1147 continue; 1148 } 1149 1150 if (strcmp(data, guid) != 0) { 1151 ddi_prop_free(data); 1152 continue; 1153 } 1154 ddi_prop_free(data); 1155 break; 1156 } 1157 ndi_devi_exit(vh->vh_dip, circular); 1158 return (cdip); 1159 } 1160 1161 /* 1162 * i_mdi_devinfo_remove(): 1163 * Remove a client device node 1164 */ 1165 static int 1166 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1167 { 1168 int rv = MDI_SUCCESS; 1169 1170 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1171 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1172 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1173 if (rv != NDI_SUCCESS) { 1174 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1175 " failed. cdip = %p\n", (void *)cdip)); 1176 } 1177 /* 1178 * Convert to MDI error code 1179 */ 1180 switch (rv) { 1181 case NDI_SUCCESS: 1182 rv = MDI_SUCCESS; 1183 break; 1184 case NDI_BUSY: 1185 rv = MDI_BUSY; 1186 break; 1187 default: 1188 rv = MDI_FAILURE; 1189 break; 1190 } 1191 } 1192 return (rv); 1193 } 1194 1195 /* 1196 * i_devi_get_client() 1197 * Utility function to get mpxio component extensions 1198 */ 1199 static mdi_client_t * 1200 i_devi_get_client(dev_info_t *cdip) 1201 { 1202 mdi_client_t *ct = NULL; 1203 1204 if (MDI_CLIENT(cdip)) { 1205 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1206 } 1207 return (ct); 1208 } 1209 1210 /* 1211 * i_mdi_is_child_present(): 1212 * Search for the presence of client device dev_info node 1213 */ 1214 static int 1215 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1216 { 1217 int rv = MDI_FAILURE; 1218 struct dev_info *dip; 1219 int circular; 1220 1221 ndi_devi_enter(vdip, &circular); 1222 dip = DEVI(vdip)->devi_child; 1223 while (dip) { 1224 if (dip == DEVI(cdip)) { 1225 rv = MDI_SUCCESS; 1226 break; 1227 } 1228 dip = dip->devi_sibling; 1229 } 1230 ndi_devi_exit(vdip, circular); 1231 return (rv); 1232 } 1233 1234 1235 /* 1236 * i_mdi_client_lock(): 1237 * Grab client component lock 1238 * Return Values: 1239 * None 1240 * Note: 1241 * The default locking order is: 1242 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1243 * But there are number of situations where locks need to be 1244 * grabbed in reverse order. This routine implements try and lock 1245 * mechanism depending on the requested parameter option. 1246 */ 1247 static void 1248 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1249 { 1250 if (pip) { 1251 /* 1252 * Reverse locking is requested. 1253 */ 1254 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1255 /* 1256 * tryenter failed. Try to grab again 1257 * after a small delay 1258 */ 1259 MDI_PI_HOLD(pip); 1260 MDI_PI_UNLOCK(pip); 1261 delay(1); 1262 MDI_PI_LOCK(pip); 1263 MDI_PI_RELE(pip); 1264 } 1265 } else { 1266 MDI_CLIENT_LOCK(ct); 1267 } 1268 } 1269 1270 /* 1271 * i_mdi_client_unlock(): 1272 * Unlock a client component 1273 */ 1274 static void 1275 i_mdi_client_unlock(mdi_client_t *ct) 1276 { 1277 MDI_CLIENT_UNLOCK(ct); 1278 } 1279 1280 /* 1281 * i_mdi_client_alloc(): 1282 * Allocate and initialize a client structure. Caller should 1283 * hold the vhci client lock. 1284 * Return Values: 1285 * Handle to a client component 1286 */ 1287 /*ARGSUSED*/ 1288 static mdi_client_t * 1289 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1290 { 1291 mdi_client_t *ct; 1292 1293 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1294 1295 /* 1296 * Allocate and initialize a component structure. 1297 */ 1298 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1299 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1300 ct->ct_hnext = NULL; 1301 ct->ct_hprev = NULL; 1302 ct->ct_dip = NULL; 1303 ct->ct_vhci = vh; 1304 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1305 (void) strcpy(ct->ct_drvname, name); 1306 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1307 (void) strcpy(ct->ct_guid, lguid); 1308 ct->ct_cprivate = NULL; 1309 ct->ct_vprivate = NULL; 1310 ct->ct_flags = 0; 1311 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1312 MDI_CLIENT_LOCK(ct); 1313 MDI_CLIENT_SET_OFFLINE(ct); 1314 MDI_CLIENT_SET_DETACH(ct); 1315 MDI_CLIENT_SET_POWER_UP(ct); 1316 MDI_CLIENT_UNLOCK(ct); 1317 ct->ct_failover_flags = 0; 1318 ct->ct_failover_status = 0; 1319 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1320 ct->ct_unstable = 0; 1321 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1322 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1323 ct->ct_lb = vh->vh_lb; 1324 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1325 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1326 ct->ct_path_count = 0; 1327 ct->ct_path_head = NULL; 1328 ct->ct_path_tail = NULL; 1329 ct->ct_path_last = NULL; 1330 1331 /* 1332 * Add this client component to our client hash queue 1333 */ 1334 i_mdi_client_enlist_table(vh, ct); 1335 return (ct); 1336 } 1337 1338 /* 1339 * i_mdi_client_enlist_table(): 1340 * Attach the client device to the client hash table. Caller 1341 * should hold the vhci client lock. 1342 */ 1343 static void 1344 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1345 { 1346 int index; 1347 struct client_hash *head; 1348 1349 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1350 1351 index = i_mdi_get_hash_key(ct->ct_guid); 1352 head = &vh->vh_client_table[index]; 1353 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1354 head->ct_hash_head = ct; 1355 head->ct_hash_count++; 1356 vh->vh_client_count++; 1357 } 1358 1359 /* 1360 * i_mdi_client_delist_table(): 1361 * Attach the client device to the client hash table. 1362 * Caller should hold the vhci client lock. 1363 */ 1364 static void 1365 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1366 { 1367 int index; 1368 char *guid; 1369 struct client_hash *head; 1370 mdi_client_t *next; 1371 mdi_client_t *last; 1372 1373 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1374 1375 guid = ct->ct_guid; 1376 index = i_mdi_get_hash_key(guid); 1377 head = &vh->vh_client_table[index]; 1378 1379 last = NULL; 1380 next = (mdi_client_t *)head->ct_hash_head; 1381 while (next != NULL) { 1382 if (next == ct) { 1383 break; 1384 } 1385 last = next; 1386 next = next->ct_hnext; 1387 } 1388 1389 if (next) { 1390 head->ct_hash_count--; 1391 if (last == NULL) { 1392 head->ct_hash_head = ct->ct_hnext; 1393 } else { 1394 last->ct_hnext = ct->ct_hnext; 1395 } 1396 ct->ct_hnext = NULL; 1397 vh->vh_client_count--; 1398 } 1399 } 1400 1401 1402 /* 1403 * i_mdi_client_free(): 1404 * Free a client component 1405 */ 1406 static int 1407 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1408 { 1409 int rv = MDI_SUCCESS; 1410 int flags = ct->ct_flags; 1411 dev_info_t *cdip; 1412 dev_info_t *vdip; 1413 1414 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1415 1416 vdip = vh->vh_dip; 1417 cdip = ct->ct_dip; 1418 1419 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1420 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1421 DEVI(cdip)->devi_mdi_client = NULL; 1422 1423 /* 1424 * Clear out back ref. to dev_info_t node 1425 */ 1426 ct->ct_dip = NULL; 1427 1428 /* 1429 * Remove this client from our hash queue 1430 */ 1431 i_mdi_client_delist_table(vh, ct); 1432 1433 /* 1434 * Uninitialize and free the component 1435 */ 1436 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1437 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1438 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1439 cv_destroy(&ct->ct_failover_cv); 1440 cv_destroy(&ct->ct_unstable_cv); 1441 cv_destroy(&ct->ct_powerchange_cv); 1442 mutex_destroy(&ct->ct_mutex); 1443 kmem_free(ct, sizeof (*ct)); 1444 1445 if (cdip != NULL) { 1446 MDI_VHCI_CLIENT_UNLOCK(vh); 1447 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1448 MDI_VHCI_CLIENT_LOCK(vh); 1449 } 1450 return (rv); 1451 } 1452 1453 /* 1454 * i_mdi_client_find(): 1455 * Find the client structure corresponding to a given guid 1456 * Caller should hold the vhci client lock. 1457 */ 1458 static mdi_client_t * 1459 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1460 { 1461 int index; 1462 struct client_hash *head; 1463 mdi_client_t *ct; 1464 1465 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1466 1467 index = i_mdi_get_hash_key(guid); 1468 head = &vh->vh_client_table[index]; 1469 1470 ct = head->ct_hash_head; 1471 while (ct != NULL) { 1472 if (strcmp(ct->ct_guid, guid) == 0 && 1473 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1474 break; 1475 } 1476 ct = ct->ct_hnext; 1477 } 1478 return (ct); 1479 } 1480 1481 /* 1482 * i_mdi_client_update_state(): 1483 * Compute and update client device state 1484 * Notes: 1485 * A client device can be in any of three possible states: 1486 * 1487 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1488 * one online/standby paths. Can tolerate failures. 1489 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1490 * no alternate paths available as standby. A failure on the online 1491 * would result in loss of access to device data. 1492 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1493 * no paths available to access the device. 1494 */ 1495 static void 1496 i_mdi_client_update_state(mdi_client_t *ct) 1497 { 1498 int state; 1499 1500 ASSERT(MDI_CLIENT_LOCKED(ct)); 1501 state = i_mdi_client_compute_state(ct, NULL); 1502 MDI_CLIENT_SET_STATE(ct, state); 1503 } 1504 1505 /* 1506 * i_mdi_client_compute_state(): 1507 * Compute client device state 1508 * 1509 * mdi_phci_t * Pointer to pHCI structure which should 1510 * while computing the new value. Used by 1511 * i_mdi_phci_offline() to find the new 1512 * client state after DR of a pHCI. 1513 */ 1514 static int 1515 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1516 { 1517 int state; 1518 int online_count = 0; 1519 int standby_count = 0; 1520 mdi_pathinfo_t *pip, *next; 1521 1522 ASSERT(MDI_CLIENT_LOCKED(ct)); 1523 pip = ct->ct_path_head; 1524 while (pip != NULL) { 1525 MDI_PI_LOCK(pip); 1526 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1527 if (MDI_PI(pip)->pi_phci == ph) { 1528 MDI_PI_UNLOCK(pip); 1529 pip = next; 1530 continue; 1531 } 1532 1533 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1534 == MDI_PATHINFO_STATE_ONLINE) 1535 online_count++; 1536 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1537 == MDI_PATHINFO_STATE_STANDBY) 1538 standby_count++; 1539 MDI_PI_UNLOCK(pip); 1540 pip = next; 1541 } 1542 1543 if (online_count == 0) { 1544 if (standby_count == 0) { 1545 state = MDI_CLIENT_STATE_FAILED; 1546 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1547 " ct = %p\n", (void *)ct)); 1548 } else if (standby_count == 1) { 1549 state = MDI_CLIENT_STATE_DEGRADED; 1550 } else { 1551 state = MDI_CLIENT_STATE_OPTIMAL; 1552 } 1553 } else if (online_count == 1) { 1554 if (standby_count == 0) { 1555 state = MDI_CLIENT_STATE_DEGRADED; 1556 } else { 1557 state = MDI_CLIENT_STATE_OPTIMAL; 1558 } 1559 } else { 1560 state = MDI_CLIENT_STATE_OPTIMAL; 1561 } 1562 return (state); 1563 } 1564 1565 /* 1566 * i_mdi_client2devinfo(): 1567 * Utility function 1568 */ 1569 dev_info_t * 1570 i_mdi_client2devinfo(mdi_client_t *ct) 1571 { 1572 return (ct->ct_dip); 1573 } 1574 1575 /* 1576 * mdi_client_path2_devinfo(): 1577 * Given the parent devinfo and child devfs pathname, search for 1578 * a valid devfs node handle. 1579 */ 1580 dev_info_t * 1581 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1582 { 1583 dev_info_t *cdip = NULL; 1584 dev_info_t *ndip = NULL; 1585 char *temp_pathname; 1586 int circular; 1587 1588 /* 1589 * Allocate temp buffer 1590 */ 1591 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1592 1593 /* 1594 * Lock parent against changes 1595 */ 1596 ndi_devi_enter(vdip, &circular); 1597 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1598 while ((cdip = ndip) != NULL) { 1599 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1600 1601 *temp_pathname = '\0'; 1602 (void) ddi_pathname(cdip, temp_pathname); 1603 if (strcmp(temp_pathname, pathname) == 0) { 1604 break; 1605 } 1606 } 1607 /* 1608 * Release devinfo lock 1609 */ 1610 ndi_devi_exit(vdip, circular); 1611 1612 /* 1613 * Free the temp buffer 1614 */ 1615 kmem_free(temp_pathname, MAXPATHLEN); 1616 return (cdip); 1617 } 1618 1619 /* 1620 * mdi_client_get_path_count(): 1621 * Utility function to get number of path information nodes 1622 * associated with a given client device. 1623 */ 1624 int 1625 mdi_client_get_path_count(dev_info_t *cdip) 1626 { 1627 mdi_client_t *ct; 1628 int count = 0; 1629 1630 ct = i_devi_get_client(cdip); 1631 if (ct != NULL) { 1632 count = ct->ct_path_count; 1633 } 1634 return (count); 1635 } 1636 1637 1638 /* 1639 * i_mdi_get_hash_key(): 1640 * Create a hash using strings as keys 1641 * 1642 */ 1643 static int 1644 i_mdi_get_hash_key(char *str) 1645 { 1646 uint32_t g, hash = 0; 1647 char *p; 1648 1649 for (p = str; *p != '\0'; p++) { 1650 g = *p; 1651 hash += g; 1652 } 1653 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1654 } 1655 1656 /* 1657 * mdi_get_lb_policy(): 1658 * Get current load balancing policy for a given client device 1659 */ 1660 client_lb_t 1661 mdi_get_lb_policy(dev_info_t *cdip) 1662 { 1663 client_lb_t lb = LOAD_BALANCE_NONE; 1664 mdi_client_t *ct; 1665 1666 ct = i_devi_get_client(cdip); 1667 if (ct != NULL) { 1668 lb = ct->ct_lb; 1669 } 1670 return (lb); 1671 } 1672 1673 /* 1674 * mdi_set_lb_region_size(): 1675 * Set current region size for the load-balance 1676 */ 1677 int 1678 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1679 { 1680 mdi_client_t *ct; 1681 int rv = MDI_FAILURE; 1682 1683 ct = i_devi_get_client(cdip); 1684 if (ct != NULL && ct->ct_lb_args != NULL) { 1685 ct->ct_lb_args->region_size = region_size; 1686 rv = MDI_SUCCESS; 1687 } 1688 return (rv); 1689 } 1690 1691 /* 1692 * mdi_Set_lb_policy(): 1693 * Set current load balancing policy for a given client device 1694 */ 1695 int 1696 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1697 { 1698 mdi_client_t *ct; 1699 int rv = MDI_FAILURE; 1700 1701 ct = i_devi_get_client(cdip); 1702 if (ct != NULL) { 1703 ct->ct_lb = lb; 1704 rv = MDI_SUCCESS; 1705 } 1706 return (rv); 1707 } 1708 1709 /* 1710 * mdi_failover(): 1711 * failover function called by the vHCI drivers to initiate 1712 * a failover operation. This is typically due to non-availability 1713 * of online paths to route I/O requests. Failover can be 1714 * triggered through user application also. 1715 * 1716 * The vHCI driver calls mdi_failover() to initiate a failover 1717 * operation. mdi_failover() calls back into the vHCI driver's 1718 * vo_failover() entry point to perform the actual failover 1719 * operation. The reason for requiring the vHCI driver to 1720 * initiate failover by calling mdi_failover(), instead of directly 1721 * executing vo_failover() itself, is to ensure that the mdi 1722 * framework can keep track of the client state properly. 1723 * Additionally, mdi_failover() provides as a convenience the 1724 * option of performing the failover operation synchronously or 1725 * asynchronously 1726 * 1727 * Upon successful completion of the failover operation, the 1728 * paths that were previously ONLINE will be in the STANDBY state, 1729 * and the newly activated paths will be in the ONLINE state. 1730 * 1731 * The flags modifier determines whether the activation is done 1732 * synchronously: MDI_FAILOVER_SYNC 1733 * Return Values: 1734 * MDI_SUCCESS 1735 * MDI_FAILURE 1736 * MDI_BUSY 1737 */ 1738 /*ARGSUSED*/ 1739 int 1740 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1741 { 1742 int rv; 1743 mdi_client_t *ct; 1744 1745 ct = i_devi_get_client(cdip); 1746 ASSERT(ct != NULL); 1747 if (ct == NULL) { 1748 /* cdip is not a valid client device. Nothing more to do. */ 1749 return (MDI_FAILURE); 1750 } 1751 1752 MDI_CLIENT_LOCK(ct); 1753 1754 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1755 /* A path to the client is being freed */ 1756 MDI_CLIENT_UNLOCK(ct); 1757 return (MDI_BUSY); 1758 } 1759 1760 1761 if (MDI_CLIENT_IS_FAILED(ct)) { 1762 /* 1763 * Client is in failed state. Nothing more to do. 1764 */ 1765 MDI_CLIENT_UNLOCK(ct); 1766 return (MDI_FAILURE); 1767 } 1768 1769 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1770 /* 1771 * Failover is already in progress; return BUSY 1772 */ 1773 MDI_CLIENT_UNLOCK(ct); 1774 return (MDI_BUSY); 1775 } 1776 /* 1777 * Make sure that mdi_pathinfo node state changes are processed. 1778 * We do not allow failovers to progress while client path state 1779 * changes are in progress 1780 */ 1781 if (ct->ct_unstable) { 1782 if (flags == MDI_FAILOVER_ASYNC) { 1783 MDI_CLIENT_UNLOCK(ct); 1784 return (MDI_BUSY); 1785 } else { 1786 while (ct->ct_unstable) 1787 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1788 } 1789 } 1790 1791 /* 1792 * Client device is in stable state. Before proceeding, perform sanity 1793 * checks again. 1794 */ 1795 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1796 (!i_ddi_devi_attached(ct->ct_dip))) { 1797 /* 1798 * Client is in failed state. Nothing more to do. 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_FAILURE); 1802 } 1803 1804 /* 1805 * Set the client state as failover in progress. 1806 */ 1807 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1808 ct->ct_failover_flags = flags; 1809 MDI_CLIENT_UNLOCK(ct); 1810 1811 if (flags == MDI_FAILOVER_ASYNC) { 1812 /* 1813 * Submit the initiate failover request via CPR safe 1814 * taskq threads. 1815 */ 1816 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1817 ct, KM_SLEEP); 1818 return (MDI_ACCEPT); 1819 } else { 1820 /* 1821 * Synchronous failover mode. Typically invoked from the user 1822 * land. 1823 */ 1824 rv = i_mdi_failover(ct); 1825 } 1826 return (rv); 1827 } 1828 1829 /* 1830 * i_mdi_failover(): 1831 * internal failover function. Invokes vHCI drivers failover 1832 * callback function and process the failover status 1833 * Return Values: 1834 * None 1835 * 1836 * Note: A client device in failover state can not be detached or freed. 1837 */ 1838 static int 1839 i_mdi_failover(void *arg) 1840 { 1841 int rv = MDI_SUCCESS; 1842 mdi_client_t *ct = (mdi_client_t *)arg; 1843 mdi_vhci_t *vh = ct->ct_vhci; 1844 1845 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1846 1847 if (vh->vh_ops->vo_failover != NULL) { 1848 /* 1849 * Call vHCI drivers callback routine 1850 */ 1851 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1852 ct->ct_failover_flags); 1853 } 1854 1855 MDI_CLIENT_LOCK(ct); 1856 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1857 1858 /* 1859 * Save the failover return status 1860 */ 1861 ct->ct_failover_status = rv; 1862 1863 /* 1864 * As a result of failover, client status would have been changed. 1865 * Update the client state and wake up anyone waiting on this client 1866 * device. 1867 */ 1868 i_mdi_client_update_state(ct); 1869 1870 cv_broadcast(&ct->ct_failover_cv); 1871 MDI_CLIENT_UNLOCK(ct); 1872 return (rv); 1873 } 1874 1875 /* 1876 * Load balancing is logical block. 1877 * IOs within the range described by region_size 1878 * would go on the same path. This would improve the 1879 * performance by cache-hit on some of the RAID devices. 1880 * Search only for online paths(At some point we 1881 * may want to balance across target ports). 1882 * If no paths are found then default to round-robin. 1883 */ 1884 static int 1885 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1886 { 1887 int path_index = -1; 1888 int online_path_count = 0; 1889 int online_nonpref_path_count = 0; 1890 int region_size = ct->ct_lb_args->region_size; 1891 mdi_pathinfo_t *pip; 1892 mdi_pathinfo_t *next; 1893 int preferred, path_cnt; 1894 1895 pip = ct->ct_path_head; 1896 while (pip) { 1897 MDI_PI_LOCK(pip); 1898 if (MDI_PI(pip)->pi_state == 1899 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1900 online_path_count++; 1901 } else if (MDI_PI(pip)->pi_state == 1902 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1903 online_nonpref_path_count++; 1904 } 1905 next = (mdi_pathinfo_t *) 1906 MDI_PI(pip)->pi_client_link; 1907 MDI_PI_UNLOCK(pip); 1908 pip = next; 1909 } 1910 /* if found any online/preferred then use this type */ 1911 if (online_path_count > 0) { 1912 path_cnt = online_path_count; 1913 preferred = 1; 1914 } else if (online_nonpref_path_count > 0) { 1915 path_cnt = online_nonpref_path_count; 1916 preferred = 0; 1917 } else { 1918 path_cnt = 0; 1919 } 1920 if (path_cnt) { 1921 path_index = (bp->b_blkno >> region_size) % path_cnt; 1922 pip = ct->ct_path_head; 1923 while (pip && path_index != -1) { 1924 MDI_PI_LOCK(pip); 1925 if (path_index == 0 && 1926 (MDI_PI(pip)->pi_state == 1927 MDI_PATHINFO_STATE_ONLINE) && 1928 MDI_PI(pip)->pi_preferred == preferred) { 1929 MDI_PI_HOLD(pip); 1930 MDI_PI_UNLOCK(pip); 1931 *ret_pip = pip; 1932 return (MDI_SUCCESS); 1933 } 1934 path_index --; 1935 next = (mdi_pathinfo_t *) 1936 MDI_PI(pip)->pi_client_link; 1937 MDI_PI_UNLOCK(pip); 1938 pip = next; 1939 } 1940 if (pip == NULL) { 1941 MDI_DEBUG(4, (CE_NOTE, NULL, 1942 "!lba %llx, no pip !!\n", 1943 bp->b_lblkno)); 1944 } else { 1945 MDI_DEBUG(4, (CE_NOTE, NULL, 1946 "!lba %llx, no pip for path_index, " 1947 "pip %p\n", bp->b_lblkno, (void *)pip)); 1948 } 1949 } 1950 return (MDI_FAILURE); 1951 } 1952 1953 /* 1954 * mdi_select_path(): 1955 * select a path to access a client device. 1956 * 1957 * mdi_select_path() function is called by the vHCI drivers to 1958 * select a path to route the I/O request to. The caller passes 1959 * the block I/O data transfer structure ("buf") as one of the 1960 * parameters. The mpxio framework uses the buf structure 1961 * contents to maintain per path statistics (total I/O size / 1962 * count pending). If more than one online paths are available to 1963 * select, the framework automatically selects a suitable path 1964 * for routing I/O request. If a failover operation is active for 1965 * this client device the call shall be failed with MDI_BUSY error 1966 * code. 1967 * 1968 * By default this function returns a suitable path in online 1969 * state based on the current load balancing policy. Currently 1970 * we support LOAD_BALANCE_NONE (Previously selected online path 1971 * will continue to be used till the path is usable) and 1972 * LOAD_BALANCE_RR (Online paths will be selected in a round 1973 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1974 * based on the logical block). The load balancing 1975 * through vHCI drivers configuration file (driver.conf). 1976 * 1977 * vHCI drivers may override this default behavior by specifying 1978 * appropriate flags. The meaning of the thrid argument depends 1979 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1980 * then the argument is the "path instance" of the path to select. 1981 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1982 * "start_pip". A non NULL "start_pip" is the starting point to 1983 * walk and find the next appropriate path. The following values 1984 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1985 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1986 * STANDBY path). 1987 * 1988 * The non-standard behavior is used by the scsi_vhci driver, 1989 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1990 * attach of client devices (to avoid an unnecessary failover 1991 * when the STANDBY path comes up first), during failover 1992 * (to activate a STANDBY path as ONLINE). 1993 * 1994 * The selected path is returned in a a mdi_hold_path() state 1995 * (pi_ref_cnt). Caller should release the hold by calling 1996 * mdi_rele_path(). 1997 * 1998 * Return Values: 1999 * MDI_SUCCESS - Completed successfully 2000 * MDI_BUSY - Client device is busy failing over 2001 * MDI_NOPATH - Client device is online, but no valid path are 2002 * available to access this client device 2003 * MDI_FAILURE - Invalid client device or state 2004 * MDI_DEVI_ONLINING 2005 * - Client device (struct dev_info state) is in 2006 * onlining state. 2007 */ 2008 2009 /*ARGSUSED*/ 2010 int 2011 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2012 void *arg, mdi_pathinfo_t **ret_pip) 2013 { 2014 mdi_client_t *ct; 2015 mdi_pathinfo_t *pip; 2016 mdi_pathinfo_t *next; 2017 mdi_pathinfo_t *head; 2018 mdi_pathinfo_t *start; 2019 client_lb_t lbp; /* load balancing policy */ 2020 int sb = 1; /* standard behavior */ 2021 int preferred = 1; /* preferred path */ 2022 int cond, cont = 1; 2023 int retry = 0; 2024 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2025 int path_instance; /* request specific path instance */ 2026 2027 /* determine type of arg based on flags */ 2028 if (flags & MDI_SELECT_PATH_INSTANCE) { 2029 flags &= ~MDI_SELECT_PATH_INSTANCE; 2030 path_instance = (int)(intptr_t)arg; 2031 start_pip = NULL; 2032 } else { 2033 path_instance = 0; 2034 start_pip = (mdi_pathinfo_t *)arg; 2035 } 2036 2037 if (flags != 0) { 2038 /* 2039 * disable default behavior 2040 */ 2041 sb = 0; 2042 } 2043 2044 *ret_pip = NULL; 2045 ct = i_devi_get_client(cdip); 2046 if (ct == NULL) { 2047 /* mdi extensions are NULL, Nothing more to do */ 2048 return (MDI_FAILURE); 2049 } 2050 2051 MDI_CLIENT_LOCK(ct); 2052 2053 if (sb) { 2054 if (MDI_CLIENT_IS_FAILED(ct)) { 2055 /* 2056 * Client is not ready to accept any I/O requests. 2057 * Fail this request. 2058 */ 2059 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2060 "client state offline ct = %p\n", (void *)ct)); 2061 MDI_CLIENT_UNLOCK(ct); 2062 return (MDI_FAILURE); 2063 } 2064 2065 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2066 /* 2067 * Check for Failover is in progress. If so tell the 2068 * caller that this device is busy. 2069 */ 2070 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2071 "client failover in progress ct = %p\n", 2072 (void *)ct)); 2073 MDI_CLIENT_UNLOCK(ct); 2074 return (MDI_BUSY); 2075 } 2076 2077 /* 2078 * Check to see whether the client device is attached. 2079 * If not so, let the vHCI driver manually select a path 2080 * (standby) and let the probe/attach process to continue. 2081 */ 2082 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2083 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2084 "ct = %p\n", (void *)ct)); 2085 MDI_CLIENT_UNLOCK(ct); 2086 return (MDI_DEVI_ONLINING); 2087 } 2088 } 2089 2090 /* 2091 * Cache in the client list head. If head of the list is NULL 2092 * return MDI_NOPATH 2093 */ 2094 head = ct->ct_path_head; 2095 if (head == NULL) { 2096 MDI_CLIENT_UNLOCK(ct); 2097 return (MDI_NOPATH); 2098 } 2099 2100 /* Caller is specifying a specific pathinfo path by path_instance */ 2101 if (path_instance) { 2102 /* search for pathinfo with correct path_instance */ 2103 for (pip = head; 2104 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2105 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2106 ; 2107 2108 /* If path can't be selected then MDI_FAILURE is returned. */ 2109 if (pip == NULL) { 2110 MDI_CLIENT_UNLOCK(ct); 2111 return (MDI_FAILURE); 2112 } 2113 2114 /* verify state of path */ 2115 MDI_PI_LOCK(pip); 2116 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2117 MDI_PI_UNLOCK(pip); 2118 MDI_CLIENT_UNLOCK(ct); 2119 return (MDI_FAILURE); 2120 } 2121 2122 /* 2123 * Return the path in hold state. Caller should release the 2124 * lock by calling mdi_rele_path() 2125 */ 2126 MDI_PI_HOLD(pip); 2127 MDI_PI_UNLOCK(pip); 2128 ct->ct_path_last = pip; 2129 *ret_pip = pip; 2130 MDI_CLIENT_UNLOCK(ct); 2131 return (MDI_SUCCESS); 2132 } 2133 2134 /* 2135 * for non default behavior, bypass current 2136 * load balancing policy and always use LOAD_BALANCE_RR 2137 * except that the start point will be adjusted based 2138 * on the provided start_pip 2139 */ 2140 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2141 2142 switch (lbp) { 2143 case LOAD_BALANCE_NONE: 2144 /* 2145 * Load balancing is None or Alternate path mode 2146 * Start looking for a online mdi_pathinfo node starting from 2147 * last known selected path 2148 */ 2149 preferred = 1; 2150 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2151 if (pip == NULL) { 2152 pip = head; 2153 } 2154 start = pip; 2155 do { 2156 MDI_PI_LOCK(pip); 2157 /* 2158 * No need to explicitly check if the path is disabled. 2159 * Since we are checking for state == ONLINE and the 2160 * same variable is used for DISABLE/ENABLE information. 2161 */ 2162 if ((MDI_PI(pip)->pi_state == 2163 MDI_PATHINFO_STATE_ONLINE) && 2164 preferred == MDI_PI(pip)->pi_preferred) { 2165 /* 2166 * Return the path in hold state. Caller should 2167 * release the lock by calling mdi_rele_path() 2168 */ 2169 MDI_PI_HOLD(pip); 2170 MDI_PI_UNLOCK(pip); 2171 ct->ct_path_last = pip; 2172 *ret_pip = pip; 2173 MDI_CLIENT_UNLOCK(ct); 2174 return (MDI_SUCCESS); 2175 } 2176 2177 /* 2178 * Path is busy. 2179 */ 2180 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2181 MDI_PI_IS_TRANSIENT(pip)) 2182 retry = 1; 2183 /* 2184 * Keep looking for a next available online path 2185 */ 2186 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2187 if (next == NULL) { 2188 next = head; 2189 } 2190 MDI_PI_UNLOCK(pip); 2191 pip = next; 2192 if (start == pip && preferred) { 2193 preferred = 0; 2194 } else if (start == pip && !preferred) { 2195 cont = 0; 2196 } 2197 } while (cont); 2198 break; 2199 2200 case LOAD_BALANCE_LBA: 2201 /* 2202 * Make sure we are looking 2203 * for an online path. Otherwise, if it is for a STANDBY 2204 * path request, it will go through and fetch an ONLINE 2205 * path which is not desirable. 2206 */ 2207 if ((ct->ct_lb_args != NULL) && 2208 (ct->ct_lb_args->region_size) && bp && 2209 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2210 if (i_mdi_lba_lb(ct, ret_pip, bp) 2211 == MDI_SUCCESS) { 2212 MDI_CLIENT_UNLOCK(ct); 2213 return (MDI_SUCCESS); 2214 } 2215 } 2216 /* FALLTHROUGH */ 2217 case LOAD_BALANCE_RR: 2218 /* 2219 * Load balancing is Round Robin. Start looking for a online 2220 * mdi_pathinfo node starting from last known selected path 2221 * as the start point. If override flags are specified, 2222 * process accordingly. 2223 * If the search is already in effect(start_pip not null), 2224 * then lets just use the same path preference to continue the 2225 * traversal. 2226 */ 2227 2228 if (start_pip != NULL) { 2229 preferred = MDI_PI(start_pip)->pi_preferred; 2230 } else { 2231 preferred = 1; 2232 } 2233 2234 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2235 if (start == NULL) { 2236 pip = head; 2237 } else { 2238 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2239 if (pip == NULL) { 2240 if ( flags & MDI_SELECT_NO_PREFERRED) { 2241 /* 2242 * Return since we hit the end of list 2243 */ 2244 MDI_CLIENT_UNLOCK(ct); 2245 return (MDI_NOPATH); 2246 } 2247 2248 if (!sb) { 2249 if (preferred == 0) { 2250 /* 2251 * Looks like we have completed 2252 * the traversal as preferred 2253 * value is 0. Time to bail out. 2254 */ 2255 *ret_pip = NULL; 2256 MDI_CLIENT_UNLOCK(ct); 2257 return (MDI_NOPATH); 2258 } else { 2259 /* 2260 * Looks like we reached the 2261 * end of the list. Lets enable 2262 * traversal of non preferred 2263 * paths. 2264 */ 2265 preferred = 0; 2266 } 2267 } 2268 pip = head; 2269 } 2270 } 2271 start = pip; 2272 do { 2273 MDI_PI_LOCK(pip); 2274 if (sb) { 2275 cond = ((MDI_PI(pip)->pi_state == 2276 MDI_PATHINFO_STATE_ONLINE && 2277 MDI_PI(pip)->pi_preferred == 2278 preferred) ? 1 : 0); 2279 } else { 2280 if (flags == MDI_SELECT_ONLINE_PATH) { 2281 cond = ((MDI_PI(pip)->pi_state == 2282 MDI_PATHINFO_STATE_ONLINE && 2283 MDI_PI(pip)->pi_preferred == 2284 preferred) ? 1 : 0); 2285 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2286 cond = ((MDI_PI(pip)->pi_state == 2287 MDI_PATHINFO_STATE_STANDBY && 2288 MDI_PI(pip)->pi_preferred == 2289 preferred) ? 1 : 0); 2290 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2291 MDI_SELECT_STANDBY_PATH)) { 2292 cond = (((MDI_PI(pip)->pi_state == 2293 MDI_PATHINFO_STATE_ONLINE || 2294 (MDI_PI(pip)->pi_state == 2295 MDI_PATHINFO_STATE_STANDBY)) && 2296 MDI_PI(pip)->pi_preferred == 2297 preferred) ? 1 : 0); 2298 } else if (flags == 2299 (MDI_SELECT_STANDBY_PATH | 2300 MDI_SELECT_ONLINE_PATH | 2301 MDI_SELECT_USER_DISABLE_PATH)) { 2302 cond = (((MDI_PI(pip)->pi_state == 2303 MDI_PATHINFO_STATE_ONLINE || 2304 (MDI_PI(pip)->pi_state == 2305 MDI_PATHINFO_STATE_STANDBY) || 2306 (MDI_PI(pip)->pi_state == 2307 (MDI_PATHINFO_STATE_ONLINE| 2308 MDI_PATHINFO_STATE_USER_DISABLE)) || 2309 (MDI_PI(pip)->pi_state == 2310 (MDI_PATHINFO_STATE_STANDBY | 2311 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2312 MDI_PI(pip)->pi_preferred == 2313 preferred) ? 1 : 0); 2314 } else if (flags == 2315 (MDI_SELECT_STANDBY_PATH | 2316 MDI_SELECT_ONLINE_PATH | 2317 MDI_SELECT_NO_PREFERRED)) { 2318 cond = (((MDI_PI(pip)->pi_state == 2319 MDI_PATHINFO_STATE_ONLINE) || 2320 (MDI_PI(pip)->pi_state == 2321 MDI_PATHINFO_STATE_STANDBY)) 2322 ? 1 : 0); 2323 } else { 2324 cond = 0; 2325 } 2326 } 2327 /* 2328 * No need to explicitly check if the path is disabled. 2329 * Since we are checking for state == ONLINE and the 2330 * same variable is used for DISABLE/ENABLE information. 2331 */ 2332 if (cond) { 2333 /* 2334 * Return the path in hold state. Caller should 2335 * release the lock by calling mdi_rele_path() 2336 */ 2337 MDI_PI_HOLD(pip); 2338 MDI_PI_UNLOCK(pip); 2339 if (sb) 2340 ct->ct_path_last = pip; 2341 *ret_pip = pip; 2342 MDI_CLIENT_UNLOCK(ct); 2343 return (MDI_SUCCESS); 2344 } 2345 /* 2346 * Path is busy. 2347 */ 2348 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2349 MDI_PI_IS_TRANSIENT(pip)) 2350 retry = 1; 2351 2352 /* 2353 * Keep looking for a next available online path 2354 */ 2355 do_again: 2356 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2357 if (next == NULL) { 2358 if ( flags & MDI_SELECT_NO_PREFERRED) { 2359 /* 2360 * Bail out since we hit the end of list 2361 */ 2362 MDI_PI_UNLOCK(pip); 2363 break; 2364 } 2365 2366 if (!sb) { 2367 if (preferred == 1) { 2368 /* 2369 * Looks like we reached the 2370 * end of the list. Lets enable 2371 * traversal of non preferred 2372 * paths. 2373 */ 2374 preferred = 0; 2375 next = head; 2376 } else { 2377 /* 2378 * We have done both the passes 2379 * Preferred as well as for 2380 * Non-preferred. Bail out now. 2381 */ 2382 cont = 0; 2383 } 2384 } else { 2385 /* 2386 * Standard behavior case. 2387 */ 2388 next = head; 2389 } 2390 } 2391 MDI_PI_UNLOCK(pip); 2392 if (cont == 0) { 2393 break; 2394 } 2395 pip = next; 2396 2397 if (!sb) { 2398 /* 2399 * We need to handle the selection of 2400 * non-preferred path in the following 2401 * case: 2402 * 2403 * +------+ +------+ +------+ +-----+ 2404 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2405 * +------+ +------+ +------+ +-----+ 2406 * 2407 * If we start the search with B, we need to 2408 * skip beyond B to pick C which is non - 2409 * preferred in the second pass. The following 2410 * test, if true, will allow us to skip over 2411 * the 'start'(B in the example) to select 2412 * other non preferred elements. 2413 */ 2414 if ((start_pip != NULL) && (start_pip == pip) && 2415 (MDI_PI(start_pip)->pi_preferred 2416 != preferred)) { 2417 /* 2418 * try again after going past the start 2419 * pip 2420 */ 2421 MDI_PI_LOCK(pip); 2422 goto do_again; 2423 } 2424 } else { 2425 /* 2426 * Standard behavior case 2427 */ 2428 if (start == pip && preferred) { 2429 /* look for nonpreferred paths */ 2430 preferred = 0; 2431 } else if (start == pip && !preferred) { 2432 /* 2433 * Exit condition 2434 */ 2435 cont = 0; 2436 } 2437 } 2438 } while (cont); 2439 break; 2440 } 2441 2442 MDI_CLIENT_UNLOCK(ct); 2443 if (retry == 1) { 2444 return (MDI_BUSY); 2445 } else { 2446 return (MDI_NOPATH); 2447 } 2448 } 2449 2450 /* 2451 * For a client, return the next available path to any phci 2452 * 2453 * Note: 2454 * Caller should hold the branch's devinfo node to get a consistent 2455 * snap shot of the mdi_pathinfo nodes. 2456 * 2457 * Please note that even the list is stable the mdi_pathinfo 2458 * node state and properties are volatile. The caller should lock 2459 * and unlock the nodes by calling mdi_pi_lock() and 2460 * mdi_pi_unlock() functions to get a stable properties. 2461 * 2462 * If there is a need to use the nodes beyond the hold of the 2463 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2464 * need to be held against unexpected removal by calling 2465 * mdi_hold_path() and should be released by calling 2466 * mdi_rele_path() on completion. 2467 */ 2468 mdi_pathinfo_t * 2469 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2470 { 2471 mdi_client_t *ct; 2472 2473 if (!MDI_CLIENT(ct_dip)) 2474 return (NULL); 2475 2476 /* 2477 * Walk through client link 2478 */ 2479 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2480 ASSERT(ct != NULL); 2481 2482 if (pip == NULL) 2483 return ((mdi_pathinfo_t *)ct->ct_path_head); 2484 2485 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2486 } 2487 2488 /* 2489 * For a phci, return the next available path to any client 2490 * Note: ditto mdi_get_next_phci_path() 2491 */ 2492 mdi_pathinfo_t * 2493 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2494 { 2495 mdi_phci_t *ph; 2496 2497 if (!MDI_PHCI(ph_dip)) 2498 return (NULL); 2499 2500 /* 2501 * Walk through pHCI link 2502 */ 2503 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2504 ASSERT(ph != NULL); 2505 2506 if (pip == NULL) 2507 return ((mdi_pathinfo_t *)ph->ph_path_head); 2508 2509 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2510 } 2511 2512 /* 2513 * mdi_hold_path(): 2514 * Hold the mdi_pathinfo node against unwanted unexpected free. 2515 * Return Values: 2516 * None 2517 */ 2518 void 2519 mdi_hold_path(mdi_pathinfo_t *pip) 2520 { 2521 if (pip) { 2522 MDI_PI_LOCK(pip); 2523 MDI_PI_HOLD(pip); 2524 MDI_PI_UNLOCK(pip); 2525 } 2526 } 2527 2528 2529 /* 2530 * mdi_rele_path(): 2531 * Release the mdi_pathinfo node which was selected 2532 * through mdi_select_path() mechanism or manually held by 2533 * calling mdi_hold_path(). 2534 * Return Values: 2535 * None 2536 */ 2537 void 2538 mdi_rele_path(mdi_pathinfo_t *pip) 2539 { 2540 if (pip) { 2541 MDI_PI_LOCK(pip); 2542 MDI_PI_RELE(pip); 2543 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2544 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2545 } 2546 MDI_PI_UNLOCK(pip); 2547 } 2548 } 2549 2550 /* 2551 * mdi_pi_lock(): 2552 * Lock the mdi_pathinfo node. 2553 * Note: 2554 * The caller should release the lock by calling mdi_pi_unlock() 2555 */ 2556 void 2557 mdi_pi_lock(mdi_pathinfo_t *pip) 2558 { 2559 ASSERT(pip != NULL); 2560 if (pip) { 2561 MDI_PI_LOCK(pip); 2562 } 2563 } 2564 2565 2566 /* 2567 * mdi_pi_unlock(): 2568 * Unlock the mdi_pathinfo node. 2569 * Note: 2570 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2571 */ 2572 void 2573 mdi_pi_unlock(mdi_pathinfo_t *pip) 2574 { 2575 ASSERT(pip != NULL); 2576 if (pip) { 2577 MDI_PI_UNLOCK(pip); 2578 } 2579 } 2580 2581 /* 2582 * mdi_pi_find(): 2583 * Search the list of mdi_pathinfo nodes attached to the 2584 * pHCI/Client device node whose path address matches "paddr". 2585 * Returns a pointer to the mdi_pathinfo node if a matching node is 2586 * found. 2587 * Return Values: 2588 * mdi_pathinfo node handle 2589 * NULL 2590 * Notes: 2591 * Caller need not hold any locks to call this function. 2592 */ 2593 mdi_pathinfo_t * 2594 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2595 { 2596 mdi_phci_t *ph; 2597 mdi_vhci_t *vh; 2598 mdi_client_t *ct; 2599 mdi_pathinfo_t *pip = NULL; 2600 2601 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2602 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2603 if ((pdip == NULL) || (paddr == NULL)) { 2604 return (NULL); 2605 } 2606 ph = i_devi_get_phci(pdip); 2607 if (ph == NULL) { 2608 /* 2609 * Invalid pHCI device, Nothing more to do. 2610 */ 2611 MDI_DEBUG(2, (CE_WARN, pdip, 2612 "!mdi_pi_find: invalid phci")); 2613 return (NULL); 2614 } 2615 2616 vh = ph->ph_vhci; 2617 if (vh == NULL) { 2618 /* 2619 * Invalid vHCI device, Nothing more to do. 2620 */ 2621 MDI_DEBUG(2, (CE_WARN, pdip, 2622 "!mdi_pi_find: invalid vhci")); 2623 return (NULL); 2624 } 2625 2626 /* 2627 * Look for pathinfo node identified by paddr. 2628 */ 2629 if (caddr == NULL) { 2630 /* 2631 * Find a mdi_pathinfo node under pHCI list for a matching 2632 * unit address. 2633 */ 2634 MDI_PHCI_LOCK(ph); 2635 if (MDI_PHCI_IS_OFFLINE(ph)) { 2636 MDI_DEBUG(2, (CE_WARN, pdip, 2637 "!mdi_pi_find: offline phci %p", (void *)ph)); 2638 MDI_PHCI_UNLOCK(ph); 2639 return (NULL); 2640 } 2641 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2642 2643 while (pip != NULL) { 2644 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2645 break; 2646 } 2647 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2648 } 2649 MDI_PHCI_UNLOCK(ph); 2650 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2651 (void *)pip)); 2652 return (pip); 2653 } 2654 2655 /* 2656 * XXX - Is the rest of the code in this function really necessary? 2657 * The consumers of mdi_pi_find() can search for the desired pathinfo 2658 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2659 * whether the search is based on the pathinfo nodes attached to 2660 * the pHCI or the client node, the result will be the same. 2661 */ 2662 2663 /* 2664 * Find the client device corresponding to 'caddr' 2665 */ 2666 MDI_VHCI_CLIENT_LOCK(vh); 2667 2668 /* 2669 * XXX - Passing NULL to the following function works as long as the 2670 * the client addresses (caddr) are unique per vhci basis. 2671 */ 2672 ct = i_mdi_client_find(vh, NULL, caddr); 2673 if (ct == NULL) { 2674 /* 2675 * Client not found, Obviously mdi_pathinfo node has not been 2676 * created yet. 2677 */ 2678 MDI_VHCI_CLIENT_UNLOCK(vh); 2679 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2680 "found for caddr %s", caddr ? caddr : "NULL")); 2681 return (NULL); 2682 } 2683 2684 /* 2685 * Hold the client lock and look for a mdi_pathinfo node with matching 2686 * pHCI and paddr 2687 */ 2688 MDI_CLIENT_LOCK(ct); 2689 2690 /* 2691 * Release the global mutex as it is no more needed. Note: We always 2692 * respect the locking order while acquiring. 2693 */ 2694 MDI_VHCI_CLIENT_UNLOCK(vh); 2695 2696 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2697 while (pip != NULL) { 2698 /* 2699 * Compare the unit address 2700 */ 2701 if ((MDI_PI(pip)->pi_phci == ph) && 2702 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2703 break; 2704 } 2705 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2706 } 2707 MDI_CLIENT_UNLOCK(ct); 2708 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2709 return (pip); 2710 } 2711 2712 /* 2713 * mdi_pi_alloc(): 2714 * Allocate and initialize a new instance of a mdi_pathinfo node. 2715 * The mdi_pathinfo node returned by this function identifies a 2716 * unique device path is capable of having properties attached 2717 * and passed to mdi_pi_online() to fully attach and online the 2718 * path and client device node. 2719 * The mdi_pathinfo node returned by this function must be 2720 * destroyed using mdi_pi_free() if the path is no longer 2721 * operational or if the caller fails to attach a client device 2722 * node when calling mdi_pi_online(). The framework will not free 2723 * the resources allocated. 2724 * This function can be called from both interrupt and kernel 2725 * contexts. DDI_NOSLEEP flag should be used while calling 2726 * from interrupt contexts. 2727 * Return Values: 2728 * MDI_SUCCESS 2729 * MDI_FAILURE 2730 * MDI_NOMEM 2731 */ 2732 /*ARGSUSED*/ 2733 int 2734 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2735 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2736 { 2737 mdi_vhci_t *vh; 2738 mdi_phci_t *ph; 2739 mdi_client_t *ct; 2740 mdi_pathinfo_t *pip = NULL; 2741 dev_info_t *cdip; 2742 int rv = MDI_NOMEM; 2743 int path_allocated = 0; 2744 2745 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2746 cname ? cname : "NULL", caddr ? caddr : "NULL", 2747 paddr ? paddr : "NULL")); 2748 2749 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2750 ret_pip == NULL) { 2751 /* Nothing more to do */ 2752 return (MDI_FAILURE); 2753 } 2754 2755 *ret_pip = NULL; 2756 2757 /* No allocations on detaching pHCI */ 2758 if (DEVI_IS_DETACHING(pdip)) { 2759 /* Invalid pHCI device, return failure */ 2760 MDI_DEBUG(1, (CE_WARN, pdip, 2761 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2762 return (MDI_FAILURE); 2763 } 2764 2765 ph = i_devi_get_phci(pdip); 2766 ASSERT(ph != NULL); 2767 if (ph == NULL) { 2768 /* Invalid pHCI device, return failure */ 2769 MDI_DEBUG(1, (CE_WARN, pdip, 2770 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2771 return (MDI_FAILURE); 2772 } 2773 2774 MDI_PHCI_LOCK(ph); 2775 vh = ph->ph_vhci; 2776 if (vh == NULL) { 2777 /* Invalid vHCI device, return failure */ 2778 MDI_DEBUG(1, (CE_WARN, pdip, 2779 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2780 MDI_PHCI_UNLOCK(ph); 2781 return (MDI_FAILURE); 2782 } 2783 2784 if (MDI_PHCI_IS_READY(ph) == 0) { 2785 /* 2786 * Do not allow new node creation when pHCI is in 2787 * offline/suspended states 2788 */ 2789 MDI_DEBUG(1, (CE_WARN, pdip, 2790 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2791 MDI_PHCI_UNLOCK(ph); 2792 return (MDI_BUSY); 2793 } 2794 MDI_PHCI_UNSTABLE(ph); 2795 MDI_PHCI_UNLOCK(ph); 2796 2797 /* look for a matching client, create one if not found */ 2798 MDI_VHCI_CLIENT_LOCK(vh); 2799 ct = i_mdi_client_find(vh, cname, caddr); 2800 if (ct == NULL) { 2801 ct = i_mdi_client_alloc(vh, cname, caddr); 2802 ASSERT(ct != NULL); 2803 } 2804 2805 if (ct->ct_dip == NULL) { 2806 /* 2807 * Allocate a devinfo node 2808 */ 2809 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2810 compatible, ncompatible); 2811 if (ct->ct_dip == NULL) { 2812 (void) i_mdi_client_free(vh, ct); 2813 goto fail; 2814 } 2815 } 2816 cdip = ct->ct_dip; 2817 2818 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2819 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2820 2821 MDI_CLIENT_LOCK(ct); 2822 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2823 while (pip != NULL) { 2824 /* 2825 * Compare the unit address 2826 */ 2827 if ((MDI_PI(pip)->pi_phci == ph) && 2828 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2829 break; 2830 } 2831 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2832 } 2833 MDI_CLIENT_UNLOCK(ct); 2834 2835 if (pip == NULL) { 2836 /* 2837 * This is a new path for this client device. Allocate and 2838 * initialize a new pathinfo node 2839 */ 2840 pip = i_mdi_pi_alloc(ph, paddr, ct); 2841 ASSERT(pip != NULL); 2842 path_allocated = 1; 2843 } 2844 rv = MDI_SUCCESS; 2845 2846 fail: 2847 /* 2848 * Release the global mutex. 2849 */ 2850 MDI_VHCI_CLIENT_UNLOCK(vh); 2851 2852 /* 2853 * Mark the pHCI as stable 2854 */ 2855 MDI_PHCI_LOCK(ph); 2856 MDI_PHCI_STABLE(ph); 2857 MDI_PHCI_UNLOCK(ph); 2858 *ret_pip = pip; 2859 2860 MDI_DEBUG(2, (CE_NOTE, pdip, 2861 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2862 2863 if (path_allocated) 2864 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2865 2866 return (rv); 2867 } 2868 2869 /*ARGSUSED*/ 2870 int 2871 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2872 int flags, mdi_pathinfo_t **ret_pip) 2873 { 2874 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2875 flags, ret_pip)); 2876 } 2877 2878 /* 2879 * i_mdi_pi_alloc(): 2880 * Allocate a mdi_pathinfo node and add to the pHCI path list 2881 * Return Values: 2882 * mdi_pathinfo 2883 */ 2884 /*ARGSUSED*/ 2885 static mdi_pathinfo_t * 2886 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2887 { 2888 mdi_pathinfo_t *pip; 2889 int ct_circular; 2890 int ph_circular; 2891 static char path[MAXPATHLEN]; 2892 char *path_persistent; 2893 int path_instance; 2894 mod_hash_val_t hv; 2895 2896 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2897 2898 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2899 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2900 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2901 MDI_PATHINFO_STATE_TRANSIENT; 2902 2903 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2904 MDI_PI_SET_USER_DISABLE(pip); 2905 2906 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2907 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2908 2909 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2910 MDI_PI_SET_DRV_DISABLE(pip); 2911 2912 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2913 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2914 MDI_PI(pip)->pi_client = ct; 2915 MDI_PI(pip)->pi_phci = ph; 2916 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2917 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2918 2919 /* 2920 * We form the "path" to the pathinfo node, and see if we have 2921 * already allocated a 'path_instance' for that "path". If so, 2922 * we use the already allocated 'path_instance'. If not, we 2923 * allocate a new 'path_instance' and associate it with a copy of 2924 * the "path" string (which is never freed). The association 2925 * between a 'path_instance' this "path" string persists until 2926 * reboot. 2927 */ 2928 mutex_enter(&mdi_pathmap_mutex); 2929 (void) ddi_pathname(ph->ph_dip, path); 2930 (void) sprintf(path + strlen(path), "/%s@%s", 2931 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2932 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2933 path_instance = (uint_t)(intptr_t)hv; 2934 } else { 2935 /* allocate a new 'path_instance' and persistent "path" */ 2936 path_instance = mdi_pathmap_instance++; 2937 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2938 (void) mod_hash_insert(mdi_pathmap_bypath, 2939 (mod_hash_key_t)path_persistent, 2940 (mod_hash_val_t)(intptr_t)path_instance); 2941 (void) mod_hash_insert(mdi_pathmap_byinstance, 2942 (mod_hash_key_t)(intptr_t)path_instance, 2943 (mod_hash_val_t)path_persistent); 2944 } 2945 mutex_exit(&mdi_pathmap_mutex); 2946 MDI_PI(pip)->pi_path_instance = path_instance; 2947 2948 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2949 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2950 MDI_PI(pip)->pi_pprivate = NULL; 2951 MDI_PI(pip)->pi_cprivate = NULL; 2952 MDI_PI(pip)->pi_vprivate = NULL; 2953 MDI_PI(pip)->pi_client_link = NULL; 2954 MDI_PI(pip)->pi_phci_link = NULL; 2955 MDI_PI(pip)->pi_ref_cnt = 0; 2956 MDI_PI(pip)->pi_kstats = NULL; 2957 MDI_PI(pip)->pi_preferred = 1; 2958 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2959 2960 /* 2961 * Lock both dev_info nodes against changes in parallel. 2962 * 2963 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2964 * This atypical operation is done to synchronize pathinfo nodes 2965 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2966 * the pathinfo nodes are children of the Client. 2967 */ 2968 ndi_devi_enter(ct->ct_dip, &ct_circular); 2969 ndi_devi_enter(ph->ph_dip, &ph_circular); 2970 2971 i_mdi_phci_add_path(ph, pip); 2972 i_mdi_client_add_path(ct, pip); 2973 2974 ndi_devi_exit(ph->ph_dip, ph_circular); 2975 ndi_devi_exit(ct->ct_dip, ct_circular); 2976 2977 return (pip); 2978 } 2979 2980 /* 2981 * mdi_pi_pathname_by_instance(): 2982 * Lookup of "path" by 'path_instance'. Return "path". 2983 * NOTE: returned "path" remains valid forever (until reboot). 2984 */ 2985 char * 2986 mdi_pi_pathname_by_instance(int path_instance) 2987 { 2988 char *path; 2989 mod_hash_val_t hv; 2990 2991 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2992 mutex_enter(&mdi_pathmap_mutex); 2993 if (mod_hash_find(mdi_pathmap_byinstance, 2994 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2995 path = (char *)hv; 2996 else 2997 path = NULL; 2998 mutex_exit(&mdi_pathmap_mutex); 2999 return (path); 3000 } 3001 3002 /* 3003 * i_mdi_phci_add_path(): 3004 * Add a mdi_pathinfo node to pHCI list. 3005 * Notes: 3006 * Caller should per-pHCI mutex 3007 */ 3008 static void 3009 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3010 { 3011 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3012 3013 MDI_PHCI_LOCK(ph); 3014 if (ph->ph_path_head == NULL) { 3015 ph->ph_path_head = pip; 3016 } else { 3017 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3018 } 3019 ph->ph_path_tail = pip; 3020 ph->ph_path_count++; 3021 MDI_PHCI_UNLOCK(ph); 3022 } 3023 3024 /* 3025 * i_mdi_client_add_path(): 3026 * Add mdi_pathinfo node to client list 3027 */ 3028 static void 3029 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3030 { 3031 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3032 3033 MDI_CLIENT_LOCK(ct); 3034 if (ct->ct_path_head == NULL) { 3035 ct->ct_path_head = pip; 3036 } else { 3037 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3038 } 3039 ct->ct_path_tail = pip; 3040 ct->ct_path_count++; 3041 MDI_CLIENT_UNLOCK(ct); 3042 } 3043 3044 /* 3045 * mdi_pi_free(): 3046 * Free the mdi_pathinfo node and also client device node if this 3047 * is the last path to the device 3048 * Return Values: 3049 * MDI_SUCCESS 3050 * MDI_FAILURE 3051 * MDI_BUSY 3052 */ 3053 /*ARGSUSED*/ 3054 int 3055 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3056 { 3057 int rv = MDI_FAILURE; 3058 mdi_vhci_t *vh; 3059 mdi_phci_t *ph; 3060 mdi_client_t *ct; 3061 int (*f)(); 3062 int client_held = 0; 3063 3064 MDI_PI_LOCK(pip); 3065 ph = MDI_PI(pip)->pi_phci; 3066 ASSERT(ph != NULL); 3067 if (ph == NULL) { 3068 /* 3069 * Invalid pHCI device, return failure 3070 */ 3071 MDI_DEBUG(1, (CE_WARN, NULL, 3072 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3073 MDI_PI_UNLOCK(pip); 3074 return (MDI_FAILURE); 3075 } 3076 3077 vh = ph->ph_vhci; 3078 ASSERT(vh != NULL); 3079 if (vh == NULL) { 3080 /* Invalid pHCI device, return failure */ 3081 MDI_DEBUG(1, (CE_WARN, NULL, 3082 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3083 MDI_PI_UNLOCK(pip); 3084 return (MDI_FAILURE); 3085 } 3086 3087 ct = MDI_PI(pip)->pi_client; 3088 ASSERT(ct != NULL); 3089 if (ct == NULL) { 3090 /* 3091 * Invalid Client device, return failure 3092 */ 3093 MDI_DEBUG(1, (CE_WARN, NULL, 3094 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3095 MDI_PI_UNLOCK(pip); 3096 return (MDI_FAILURE); 3097 } 3098 3099 /* 3100 * Check to see for busy condition. A mdi_pathinfo can only be freed 3101 * if the node state is either offline or init and the reference count 3102 * is zero. 3103 */ 3104 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3105 MDI_PI_IS_INITING(pip))) { 3106 /* 3107 * Node is busy 3108 */ 3109 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3110 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3111 MDI_PI_UNLOCK(pip); 3112 return (MDI_BUSY); 3113 } 3114 3115 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3116 /* 3117 * Give a chance for pending I/Os to complete. 3118 */ 3119 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3120 "%d cmds still pending on path: %p\n", 3121 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3122 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3123 &MDI_PI(pip)->pi_mutex, 3124 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3125 /* 3126 * The timeout time reached without ref_cnt being zero 3127 * being signaled. 3128 */ 3129 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3130 "!mdi_pi_free: " 3131 "Timeout reached on path %p without the cond\n", 3132 (void *)pip)); 3133 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3134 "!mdi_pi_free: " 3135 "%d cmds still pending on path: %p\n", 3136 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3137 MDI_PI_UNLOCK(pip); 3138 return (MDI_BUSY); 3139 } 3140 } 3141 if (MDI_PI(pip)->pi_pm_held) { 3142 client_held = 1; 3143 } 3144 MDI_PI_UNLOCK(pip); 3145 3146 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3147 3148 MDI_CLIENT_LOCK(ct); 3149 3150 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3151 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3152 3153 /* 3154 * Wait till failover is complete before removing this node. 3155 */ 3156 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3157 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3158 3159 MDI_CLIENT_UNLOCK(ct); 3160 MDI_VHCI_CLIENT_LOCK(vh); 3161 MDI_CLIENT_LOCK(ct); 3162 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3163 3164 if (!MDI_PI_IS_INITING(pip)) { 3165 f = vh->vh_ops->vo_pi_uninit; 3166 if (f != NULL) { 3167 rv = (*f)(vh->vh_dip, pip, 0); 3168 } 3169 } 3170 /* 3171 * If vo_pi_uninit() completed successfully. 3172 */ 3173 if (rv == MDI_SUCCESS) { 3174 if (client_held) { 3175 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3176 "i_mdi_pm_rele_client\n")); 3177 i_mdi_pm_rele_client(ct, 1); 3178 } 3179 i_mdi_pi_free(ph, pip, ct); 3180 if (ct->ct_path_count == 0) { 3181 /* 3182 * Client lost its last path. 3183 * Clean up the client device 3184 */ 3185 MDI_CLIENT_UNLOCK(ct); 3186 (void) i_mdi_client_free(ct->ct_vhci, ct); 3187 MDI_VHCI_CLIENT_UNLOCK(vh); 3188 return (rv); 3189 } 3190 } 3191 MDI_CLIENT_UNLOCK(ct); 3192 MDI_VHCI_CLIENT_UNLOCK(vh); 3193 3194 if (rv == MDI_FAILURE) 3195 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3196 3197 return (rv); 3198 } 3199 3200 /* 3201 * i_mdi_pi_free(): 3202 * Free the mdi_pathinfo node 3203 */ 3204 static void 3205 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3206 { 3207 int ct_circular; 3208 int ph_circular; 3209 3210 ASSERT(MDI_CLIENT_LOCKED(ct)); 3211 3212 /* 3213 * remove any per-path kstats 3214 */ 3215 i_mdi_pi_kstat_destroy(pip); 3216 3217 /* See comments in i_mdi_pi_alloc() */ 3218 ndi_devi_enter(ct->ct_dip, &ct_circular); 3219 ndi_devi_enter(ph->ph_dip, &ph_circular); 3220 3221 i_mdi_client_remove_path(ct, pip); 3222 i_mdi_phci_remove_path(ph, pip); 3223 3224 ndi_devi_exit(ph->ph_dip, ph_circular); 3225 ndi_devi_exit(ct->ct_dip, ct_circular); 3226 3227 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3228 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3229 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3230 if (MDI_PI(pip)->pi_addr) { 3231 kmem_free(MDI_PI(pip)->pi_addr, 3232 strlen(MDI_PI(pip)->pi_addr) + 1); 3233 MDI_PI(pip)->pi_addr = NULL; 3234 } 3235 3236 if (MDI_PI(pip)->pi_prop) { 3237 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3238 MDI_PI(pip)->pi_prop = NULL; 3239 } 3240 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3241 } 3242 3243 3244 /* 3245 * i_mdi_phci_remove_path(): 3246 * Remove a mdi_pathinfo node from pHCI list. 3247 * Notes: 3248 * Caller should hold per-pHCI mutex 3249 */ 3250 static void 3251 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3252 { 3253 mdi_pathinfo_t *prev = NULL; 3254 mdi_pathinfo_t *path = NULL; 3255 3256 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3257 3258 MDI_PHCI_LOCK(ph); 3259 path = ph->ph_path_head; 3260 while (path != NULL) { 3261 if (path == pip) { 3262 break; 3263 } 3264 prev = path; 3265 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3266 } 3267 3268 if (path) { 3269 ph->ph_path_count--; 3270 if (prev) { 3271 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3272 } else { 3273 ph->ph_path_head = 3274 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3275 } 3276 if (ph->ph_path_tail == path) { 3277 ph->ph_path_tail = prev; 3278 } 3279 } 3280 3281 /* 3282 * Clear the pHCI link 3283 */ 3284 MDI_PI(pip)->pi_phci_link = NULL; 3285 MDI_PI(pip)->pi_phci = NULL; 3286 MDI_PHCI_UNLOCK(ph); 3287 } 3288 3289 /* 3290 * i_mdi_client_remove_path(): 3291 * Remove a mdi_pathinfo node from client path list. 3292 */ 3293 static void 3294 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3295 { 3296 mdi_pathinfo_t *prev = NULL; 3297 mdi_pathinfo_t *path; 3298 3299 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3300 3301 ASSERT(MDI_CLIENT_LOCKED(ct)); 3302 path = ct->ct_path_head; 3303 while (path != NULL) { 3304 if (path == pip) { 3305 break; 3306 } 3307 prev = path; 3308 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3309 } 3310 3311 if (path) { 3312 ct->ct_path_count--; 3313 if (prev) { 3314 MDI_PI(prev)->pi_client_link = 3315 MDI_PI(path)->pi_client_link; 3316 } else { 3317 ct->ct_path_head = 3318 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3319 } 3320 if (ct->ct_path_tail == path) { 3321 ct->ct_path_tail = prev; 3322 } 3323 if (ct->ct_path_last == path) { 3324 ct->ct_path_last = ct->ct_path_head; 3325 } 3326 } 3327 MDI_PI(pip)->pi_client_link = NULL; 3328 MDI_PI(pip)->pi_client = NULL; 3329 } 3330 3331 /* 3332 * i_mdi_pi_state_change(): 3333 * online a mdi_pathinfo node 3334 * 3335 * Return Values: 3336 * MDI_SUCCESS 3337 * MDI_FAILURE 3338 */ 3339 /*ARGSUSED*/ 3340 static int 3341 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3342 { 3343 int rv = MDI_SUCCESS; 3344 mdi_vhci_t *vh; 3345 mdi_phci_t *ph; 3346 mdi_client_t *ct; 3347 int (*f)(); 3348 dev_info_t *cdip; 3349 3350 MDI_PI_LOCK(pip); 3351 3352 ph = MDI_PI(pip)->pi_phci; 3353 ASSERT(ph); 3354 if (ph == NULL) { 3355 /* 3356 * Invalid pHCI device, fail the request 3357 */ 3358 MDI_PI_UNLOCK(pip); 3359 MDI_DEBUG(1, (CE_WARN, NULL, 3360 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3361 return (MDI_FAILURE); 3362 } 3363 3364 vh = ph->ph_vhci; 3365 ASSERT(vh); 3366 if (vh == NULL) { 3367 /* 3368 * Invalid vHCI device, fail the request 3369 */ 3370 MDI_PI_UNLOCK(pip); 3371 MDI_DEBUG(1, (CE_WARN, NULL, 3372 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3373 return (MDI_FAILURE); 3374 } 3375 3376 ct = MDI_PI(pip)->pi_client; 3377 ASSERT(ct != NULL); 3378 if (ct == NULL) { 3379 /* 3380 * Invalid client device, fail the request 3381 */ 3382 MDI_PI_UNLOCK(pip); 3383 MDI_DEBUG(1, (CE_WARN, NULL, 3384 "!mdi_pi_state_change: invalid client pip=%p", 3385 (void *)pip)); 3386 return (MDI_FAILURE); 3387 } 3388 3389 /* 3390 * If this path has not been initialized yet, Callback vHCI driver's 3391 * pathinfo node initialize entry point 3392 */ 3393 3394 if (MDI_PI_IS_INITING(pip)) { 3395 MDI_PI_UNLOCK(pip); 3396 f = vh->vh_ops->vo_pi_init; 3397 if (f != NULL) { 3398 rv = (*f)(vh->vh_dip, pip, 0); 3399 if (rv != MDI_SUCCESS) { 3400 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3401 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3402 (void *)vh, (void *)pip)); 3403 return (MDI_FAILURE); 3404 } 3405 } 3406 MDI_PI_LOCK(pip); 3407 MDI_PI_CLEAR_TRANSIENT(pip); 3408 } 3409 3410 /* 3411 * Do not allow state transition when pHCI is in offline/suspended 3412 * states 3413 */ 3414 i_mdi_phci_lock(ph, pip); 3415 if (MDI_PHCI_IS_READY(ph) == 0) { 3416 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3417 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3418 (void *)ph)); 3419 MDI_PI_UNLOCK(pip); 3420 i_mdi_phci_unlock(ph); 3421 return (MDI_BUSY); 3422 } 3423 MDI_PHCI_UNSTABLE(ph); 3424 i_mdi_phci_unlock(ph); 3425 3426 /* 3427 * Check if mdi_pathinfo state is in transient state. 3428 * If yes, offlining is in progress and wait till transient state is 3429 * cleared. 3430 */ 3431 if (MDI_PI_IS_TRANSIENT(pip)) { 3432 while (MDI_PI_IS_TRANSIENT(pip)) { 3433 cv_wait(&MDI_PI(pip)->pi_state_cv, 3434 &MDI_PI(pip)->pi_mutex); 3435 } 3436 } 3437 3438 /* 3439 * Grab the client lock in reverse order sequence and release the 3440 * mdi_pathinfo mutex. 3441 */ 3442 i_mdi_client_lock(ct, pip); 3443 MDI_PI_UNLOCK(pip); 3444 3445 /* 3446 * Wait till failover state is cleared 3447 */ 3448 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3449 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3450 3451 /* 3452 * Mark the mdi_pathinfo node state as transient 3453 */ 3454 MDI_PI_LOCK(pip); 3455 switch (state) { 3456 case MDI_PATHINFO_STATE_ONLINE: 3457 MDI_PI_SET_ONLINING(pip); 3458 break; 3459 3460 case MDI_PATHINFO_STATE_STANDBY: 3461 MDI_PI_SET_STANDBYING(pip); 3462 break; 3463 3464 case MDI_PATHINFO_STATE_FAULT: 3465 /* 3466 * Mark the pathinfo state as FAULTED 3467 */ 3468 MDI_PI_SET_FAULTING(pip); 3469 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3470 break; 3471 3472 case MDI_PATHINFO_STATE_OFFLINE: 3473 /* 3474 * ndi_devi_offline() cannot hold pip or ct locks. 3475 */ 3476 MDI_PI_UNLOCK(pip); 3477 /* 3478 * Don't offline the client dev_info node unless we have 3479 * no available paths left at all. 3480 */ 3481 cdip = ct->ct_dip; 3482 if ((flag & NDI_DEVI_REMOVE) && 3483 (ct->ct_path_count == 1)) { 3484 i_mdi_client_unlock(ct); 3485 rv = ndi_devi_offline(cdip, 0); 3486 if (rv != NDI_SUCCESS) { 3487 /* 3488 * Convert to MDI error code 3489 */ 3490 switch (rv) { 3491 case NDI_BUSY: 3492 rv = MDI_BUSY; 3493 break; 3494 default: 3495 rv = MDI_FAILURE; 3496 break; 3497 } 3498 goto state_change_exit; 3499 } else { 3500 i_mdi_client_lock(ct, NULL); 3501 } 3502 } 3503 /* 3504 * Mark the mdi_pathinfo node state as transient 3505 */ 3506 MDI_PI_LOCK(pip); 3507 MDI_PI_SET_OFFLINING(pip); 3508 break; 3509 } 3510 MDI_PI_UNLOCK(pip); 3511 MDI_CLIENT_UNSTABLE(ct); 3512 i_mdi_client_unlock(ct); 3513 3514 f = vh->vh_ops->vo_pi_state_change; 3515 if (f != NULL) 3516 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3517 3518 MDI_CLIENT_LOCK(ct); 3519 MDI_PI_LOCK(pip); 3520 if (rv == MDI_NOT_SUPPORTED) { 3521 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3522 } 3523 if (rv != MDI_SUCCESS) { 3524 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3525 "!vo_pi_state_change: failed rv = %x", rv)); 3526 } 3527 if (MDI_PI_IS_TRANSIENT(pip)) { 3528 if (rv == MDI_SUCCESS) { 3529 MDI_PI_CLEAR_TRANSIENT(pip); 3530 } else { 3531 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3532 } 3533 } 3534 3535 /* 3536 * Wake anyone waiting for this mdi_pathinfo node 3537 */ 3538 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3539 MDI_PI_UNLOCK(pip); 3540 3541 /* 3542 * Mark the client device as stable 3543 */ 3544 MDI_CLIENT_STABLE(ct); 3545 if (rv == MDI_SUCCESS) { 3546 if (ct->ct_unstable == 0) { 3547 cdip = ct->ct_dip; 3548 3549 /* 3550 * Onlining the mdi_pathinfo node will impact the 3551 * client state Update the client and dev_info node 3552 * state accordingly 3553 */ 3554 rv = NDI_SUCCESS; 3555 i_mdi_client_update_state(ct); 3556 switch (MDI_CLIENT_STATE(ct)) { 3557 case MDI_CLIENT_STATE_OPTIMAL: 3558 case MDI_CLIENT_STATE_DEGRADED: 3559 if (cdip && !i_ddi_devi_attached(cdip) && 3560 ((state == MDI_PATHINFO_STATE_ONLINE) || 3561 (state == MDI_PATHINFO_STATE_STANDBY))) { 3562 3563 /* 3564 * Must do ndi_devi_online() through 3565 * hotplug thread for deferred 3566 * attach mechanism to work 3567 */ 3568 MDI_CLIENT_UNLOCK(ct); 3569 rv = ndi_devi_online(cdip, 0); 3570 MDI_CLIENT_LOCK(ct); 3571 if ((rv != NDI_SUCCESS) && 3572 (MDI_CLIENT_STATE(ct) == 3573 MDI_CLIENT_STATE_DEGRADED)) { 3574 /* 3575 * ndi_devi_online failed. 3576 * Reset client flags to 3577 * offline. 3578 */ 3579 MDI_DEBUG(1, (CE_WARN, cdip, 3580 "!ndi_devi_online: failed " 3581 " Error: %x", rv)); 3582 MDI_CLIENT_SET_OFFLINE(ct); 3583 } 3584 if (rv != NDI_SUCCESS) { 3585 /* Reset the path state */ 3586 MDI_PI_LOCK(pip); 3587 MDI_PI(pip)->pi_state = 3588 MDI_PI_OLD_STATE(pip); 3589 MDI_PI_UNLOCK(pip); 3590 } 3591 } 3592 break; 3593 3594 case MDI_CLIENT_STATE_FAILED: 3595 /* 3596 * This is the last path case for 3597 * non-user initiated events. 3598 */ 3599 if (((flag & NDI_DEVI_REMOVE) == 0) && 3600 cdip && (i_ddi_node_state(cdip) >= 3601 DS_INITIALIZED)) { 3602 MDI_CLIENT_UNLOCK(ct); 3603 rv = ndi_devi_offline(cdip, 0); 3604 MDI_CLIENT_LOCK(ct); 3605 3606 if (rv != NDI_SUCCESS) { 3607 /* 3608 * ndi_devi_offline failed. 3609 * Reset client flags to 3610 * online as the path could not 3611 * be offlined. 3612 */ 3613 MDI_DEBUG(1, (CE_WARN, cdip, 3614 "!ndi_devi_offline: failed " 3615 " Error: %x", rv)); 3616 MDI_CLIENT_SET_ONLINE(ct); 3617 } 3618 } 3619 break; 3620 } 3621 /* 3622 * Convert to MDI error code 3623 */ 3624 switch (rv) { 3625 case NDI_SUCCESS: 3626 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3627 i_mdi_report_path_state(ct, pip); 3628 rv = MDI_SUCCESS; 3629 break; 3630 case NDI_BUSY: 3631 rv = MDI_BUSY; 3632 break; 3633 default: 3634 rv = MDI_FAILURE; 3635 break; 3636 } 3637 } 3638 } 3639 MDI_CLIENT_UNLOCK(ct); 3640 3641 state_change_exit: 3642 /* 3643 * Mark the pHCI as stable again. 3644 */ 3645 MDI_PHCI_LOCK(ph); 3646 MDI_PHCI_STABLE(ph); 3647 MDI_PHCI_UNLOCK(ph); 3648 return (rv); 3649 } 3650 3651 /* 3652 * mdi_pi_online(): 3653 * Place the path_info node in the online state. The path is 3654 * now available to be selected by mdi_select_path() for 3655 * transporting I/O requests to client devices. 3656 * Return Values: 3657 * MDI_SUCCESS 3658 * MDI_FAILURE 3659 */ 3660 int 3661 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3662 { 3663 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3664 int client_held = 0; 3665 int rv; 3666 int se_flag; 3667 int kmem_flag; 3668 3669 ASSERT(ct != NULL); 3670 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3671 if (rv != MDI_SUCCESS) 3672 return (rv); 3673 3674 MDI_PI_LOCK(pip); 3675 if (MDI_PI(pip)->pi_pm_held == 0) { 3676 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3677 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3678 i_mdi_pm_hold_pip(pip); 3679 client_held = 1; 3680 } 3681 MDI_PI_UNLOCK(pip); 3682 3683 if (client_held) { 3684 MDI_CLIENT_LOCK(ct); 3685 if (ct->ct_power_cnt == 0) { 3686 rv = i_mdi_power_all_phci(ct); 3687 } 3688 3689 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3690 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3691 i_mdi_pm_hold_client(ct, 1); 3692 MDI_CLIENT_UNLOCK(ct); 3693 } 3694 3695 /* determine interrupt context */ 3696 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3697 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3698 3699 /* A new path is online. Invalidate DINFOCACHE snap shot. */ 3700 i_ddi_di_cache_invalidate(kmem_flag); 3701 3702 return (rv); 3703 } 3704 3705 /* 3706 * mdi_pi_standby(): 3707 * Place the mdi_pathinfo node in standby state 3708 * 3709 * Return Values: 3710 * MDI_SUCCESS 3711 * MDI_FAILURE 3712 */ 3713 int 3714 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3715 { 3716 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3717 } 3718 3719 /* 3720 * mdi_pi_fault(): 3721 * Place the mdi_pathinfo node in fault'ed state 3722 * Return Values: 3723 * MDI_SUCCESS 3724 * MDI_FAILURE 3725 */ 3726 int 3727 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3728 { 3729 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3730 } 3731 3732 /* 3733 * mdi_pi_offline(): 3734 * Offline a mdi_pathinfo node. 3735 * Return Values: 3736 * MDI_SUCCESS 3737 * MDI_FAILURE 3738 */ 3739 int 3740 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3741 { 3742 int ret, client_held = 0; 3743 mdi_client_t *ct; 3744 int se_flag; 3745 int kmem_flag; 3746 3747 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3748 3749 if (ret == MDI_SUCCESS) { 3750 MDI_PI_LOCK(pip); 3751 if (MDI_PI(pip)->pi_pm_held) { 3752 client_held = 1; 3753 } 3754 MDI_PI_UNLOCK(pip); 3755 3756 if (client_held) { 3757 ct = MDI_PI(pip)->pi_client; 3758 MDI_CLIENT_LOCK(ct); 3759 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3760 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3761 i_mdi_pm_rele_client(ct, 1); 3762 MDI_CLIENT_UNLOCK(ct); 3763 } 3764 3765 /* determine interrupt context */ 3766 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3767 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3768 3769 /* pathinfo is offlined. update DINFOCACHE. */ 3770 i_ddi_di_cache_invalidate(kmem_flag); 3771 } 3772 3773 return (ret); 3774 } 3775 3776 /* 3777 * i_mdi_pi_offline(): 3778 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3779 */ 3780 static int 3781 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3782 { 3783 dev_info_t *vdip = NULL; 3784 mdi_vhci_t *vh = NULL; 3785 mdi_client_t *ct = NULL; 3786 int (*f)(); 3787 int rv; 3788 3789 MDI_PI_LOCK(pip); 3790 ct = MDI_PI(pip)->pi_client; 3791 ASSERT(ct != NULL); 3792 3793 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3794 /* 3795 * Give a chance for pending I/Os to complete. 3796 */ 3797 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3798 "%d cmds still pending on path: %p\n", 3799 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3800 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3801 &MDI_PI(pip)->pi_mutex, 3802 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3803 /* 3804 * The timeout time reached without ref_cnt being zero 3805 * being signaled. 3806 */ 3807 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3808 "Timeout reached on path %p without the cond\n", 3809 (void *)pip)); 3810 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3811 "%d cmds still pending on path: %p\n", 3812 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3813 } 3814 } 3815 vh = ct->ct_vhci; 3816 vdip = vh->vh_dip; 3817 3818 /* 3819 * Notify vHCI that has registered this event 3820 */ 3821 ASSERT(vh->vh_ops); 3822 f = vh->vh_ops->vo_pi_state_change; 3823 3824 if (f != NULL) { 3825 MDI_PI_UNLOCK(pip); 3826 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3827 flags)) != MDI_SUCCESS) { 3828 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3829 "!vo_path_offline failed " 3830 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3831 } 3832 MDI_PI_LOCK(pip); 3833 } 3834 3835 /* 3836 * Set the mdi_pathinfo node state and clear the transient condition 3837 */ 3838 MDI_PI_SET_OFFLINE(pip); 3839 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3840 MDI_PI_UNLOCK(pip); 3841 3842 MDI_CLIENT_LOCK(ct); 3843 if (rv == MDI_SUCCESS) { 3844 if (ct->ct_unstable == 0) { 3845 dev_info_t *cdip = ct->ct_dip; 3846 3847 /* 3848 * Onlining the mdi_pathinfo node will impact the 3849 * client state Update the client and dev_info node 3850 * state accordingly 3851 */ 3852 i_mdi_client_update_state(ct); 3853 rv = NDI_SUCCESS; 3854 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3855 if (cdip && 3856 (i_ddi_node_state(cdip) >= 3857 DS_INITIALIZED)) { 3858 MDI_CLIENT_UNLOCK(ct); 3859 rv = ndi_devi_offline(cdip, 0); 3860 MDI_CLIENT_LOCK(ct); 3861 if (rv != NDI_SUCCESS) { 3862 /* 3863 * ndi_devi_offline failed. 3864 * Reset client flags to 3865 * online. 3866 */ 3867 MDI_DEBUG(4, (CE_WARN, cdip, 3868 "!ndi_devi_offline: failed " 3869 " Error: %x", rv)); 3870 MDI_CLIENT_SET_ONLINE(ct); 3871 } 3872 } 3873 } 3874 /* 3875 * Convert to MDI error code 3876 */ 3877 switch (rv) { 3878 case NDI_SUCCESS: 3879 rv = MDI_SUCCESS; 3880 break; 3881 case NDI_BUSY: 3882 rv = MDI_BUSY; 3883 break; 3884 default: 3885 rv = MDI_FAILURE; 3886 break; 3887 } 3888 } 3889 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3890 i_mdi_report_path_state(ct, pip); 3891 } 3892 3893 MDI_CLIENT_UNLOCK(ct); 3894 3895 /* 3896 * Change in the mdi_pathinfo node state will impact the client state 3897 */ 3898 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3899 (void *)ct, (void *)pip)); 3900 return (rv); 3901 } 3902 3903 /* 3904 * mdi_pi_get_node_name(): 3905 * Get the name associated with a mdi_pathinfo node. 3906 * Since pathinfo nodes are not directly named, we 3907 * return the node_name of the client. 3908 * 3909 * Return Values: 3910 * char * 3911 */ 3912 char * 3913 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3914 { 3915 mdi_client_t *ct; 3916 3917 if (pip == NULL) 3918 return (NULL); 3919 ct = MDI_PI(pip)->pi_client; 3920 if ((ct == NULL) || (ct->ct_dip == NULL)) 3921 return (NULL); 3922 return (ddi_node_name(ct->ct_dip)); 3923 } 3924 3925 /* 3926 * mdi_pi_get_addr(): 3927 * Get the unit address associated with a mdi_pathinfo node 3928 * 3929 * Return Values: 3930 * char * 3931 */ 3932 char * 3933 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3934 { 3935 if (pip == NULL) 3936 return (NULL); 3937 3938 return (MDI_PI(pip)->pi_addr); 3939 } 3940 3941 /* 3942 * mdi_pi_get_path_instance(): 3943 * Get the 'path_instance' of a mdi_pathinfo node 3944 * 3945 * Return Values: 3946 * path_instance 3947 */ 3948 int 3949 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3950 { 3951 if (pip == NULL) 3952 return (0); 3953 3954 return (MDI_PI(pip)->pi_path_instance); 3955 } 3956 3957 /* 3958 * mdi_pi_pathname(): 3959 * Return pointer to path to pathinfo node. 3960 */ 3961 char * 3962 mdi_pi_pathname(mdi_pathinfo_t *pip) 3963 { 3964 if (pip == NULL) 3965 return (NULL); 3966 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3967 } 3968 3969 char * 3970 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 3971 { 3972 char *obp_path = NULL; 3973 if ((pip == NULL) || (path == NULL)) 3974 return (NULL); 3975 3976 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 3977 (void) strcpy(path, obp_path); 3978 (void) mdi_prop_free(obp_path); 3979 } else { 3980 path = NULL; 3981 } 3982 return (path); 3983 } 3984 3985 int 3986 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 3987 { 3988 dev_info_t *pdip; 3989 char *obp_path = NULL; 3990 int rc = MDI_FAILURE; 3991 3992 if (pip == NULL) 3993 return (MDI_FAILURE); 3994 3995 pdip = mdi_pi_get_phci(pip); 3996 if (pdip == NULL) 3997 return (MDI_FAILURE); 3998 3999 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4000 4001 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4002 (void) ddi_pathname(pdip, obp_path); 4003 } 4004 4005 if (component) { 4006 (void) strncat(obp_path, "/", MAXPATHLEN); 4007 (void) strncat(obp_path, component, MAXPATHLEN); 4008 } 4009 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4010 4011 if (obp_path) 4012 kmem_free(obp_path, MAXPATHLEN); 4013 return (rc); 4014 } 4015 4016 /* 4017 * mdi_pi_get_client(): 4018 * Get the client devinfo associated with a mdi_pathinfo node 4019 * 4020 * Return Values: 4021 * Handle to client device dev_info node 4022 */ 4023 dev_info_t * 4024 mdi_pi_get_client(mdi_pathinfo_t *pip) 4025 { 4026 dev_info_t *dip = NULL; 4027 if (pip) { 4028 dip = MDI_PI(pip)->pi_client->ct_dip; 4029 } 4030 return (dip); 4031 } 4032 4033 /* 4034 * mdi_pi_get_phci(): 4035 * Get the pHCI devinfo associated with the mdi_pathinfo node 4036 * Return Values: 4037 * Handle to dev_info node 4038 */ 4039 dev_info_t * 4040 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4041 { 4042 dev_info_t *dip = NULL; 4043 if (pip) { 4044 dip = MDI_PI(pip)->pi_phci->ph_dip; 4045 } 4046 return (dip); 4047 } 4048 4049 /* 4050 * mdi_pi_get_client_private(): 4051 * Get the client private information associated with the 4052 * mdi_pathinfo node 4053 */ 4054 void * 4055 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4056 { 4057 void *cprivate = NULL; 4058 if (pip) { 4059 cprivate = MDI_PI(pip)->pi_cprivate; 4060 } 4061 return (cprivate); 4062 } 4063 4064 /* 4065 * mdi_pi_set_client_private(): 4066 * Set the client private information in the mdi_pathinfo node 4067 */ 4068 void 4069 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4070 { 4071 if (pip) { 4072 MDI_PI(pip)->pi_cprivate = priv; 4073 } 4074 } 4075 4076 /* 4077 * mdi_pi_get_phci_private(): 4078 * Get the pHCI private information associated with the 4079 * mdi_pathinfo node 4080 */ 4081 caddr_t 4082 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4083 { 4084 caddr_t pprivate = NULL; 4085 if (pip) { 4086 pprivate = MDI_PI(pip)->pi_pprivate; 4087 } 4088 return (pprivate); 4089 } 4090 4091 /* 4092 * mdi_pi_set_phci_private(): 4093 * Set the pHCI private information in the mdi_pathinfo node 4094 */ 4095 void 4096 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4097 { 4098 if (pip) { 4099 MDI_PI(pip)->pi_pprivate = priv; 4100 } 4101 } 4102 4103 /* 4104 * mdi_pi_get_state(): 4105 * Get the mdi_pathinfo node state. Transient states are internal 4106 * and not provided to the users 4107 */ 4108 mdi_pathinfo_state_t 4109 mdi_pi_get_state(mdi_pathinfo_t *pip) 4110 { 4111 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4112 4113 if (pip) { 4114 if (MDI_PI_IS_TRANSIENT(pip)) { 4115 /* 4116 * mdi_pathinfo is in state transition. Return the 4117 * last good state. 4118 */ 4119 state = MDI_PI_OLD_STATE(pip); 4120 } else { 4121 state = MDI_PI_STATE(pip); 4122 } 4123 } 4124 return (state); 4125 } 4126 4127 /* 4128 * Note that the following function needs to be the new interface for 4129 * mdi_pi_get_state when mpxio gets integrated to ON. 4130 */ 4131 int 4132 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4133 uint32_t *ext_state) 4134 { 4135 *state = MDI_PATHINFO_STATE_INIT; 4136 4137 if (pip) { 4138 if (MDI_PI_IS_TRANSIENT(pip)) { 4139 /* 4140 * mdi_pathinfo is in state transition. Return the 4141 * last good state. 4142 */ 4143 *state = MDI_PI_OLD_STATE(pip); 4144 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4145 } else { 4146 *state = MDI_PI_STATE(pip); 4147 *ext_state = MDI_PI_EXT_STATE(pip); 4148 } 4149 } 4150 return (MDI_SUCCESS); 4151 } 4152 4153 /* 4154 * mdi_pi_get_preferred: 4155 * Get the preferred path flag 4156 */ 4157 int 4158 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4159 { 4160 if (pip) { 4161 return (MDI_PI(pip)->pi_preferred); 4162 } 4163 return (0); 4164 } 4165 4166 /* 4167 * mdi_pi_set_preferred: 4168 * Set the preferred path flag 4169 */ 4170 void 4171 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4172 { 4173 if (pip) { 4174 MDI_PI(pip)->pi_preferred = preferred; 4175 } 4176 } 4177 4178 /* 4179 * mdi_pi_set_state(): 4180 * Set the mdi_pathinfo node state 4181 */ 4182 void 4183 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4184 { 4185 uint32_t ext_state; 4186 4187 if (pip) { 4188 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4189 MDI_PI(pip)->pi_state = state; 4190 MDI_PI(pip)->pi_state |= ext_state; 4191 } 4192 } 4193 4194 /* 4195 * Property functions: 4196 */ 4197 int 4198 i_map_nvlist_error_to_mdi(int val) 4199 { 4200 int rv; 4201 4202 switch (val) { 4203 case 0: 4204 rv = DDI_PROP_SUCCESS; 4205 break; 4206 case EINVAL: 4207 case ENOTSUP: 4208 rv = DDI_PROP_INVAL_ARG; 4209 break; 4210 case ENOMEM: 4211 rv = DDI_PROP_NO_MEMORY; 4212 break; 4213 default: 4214 rv = DDI_PROP_NOT_FOUND; 4215 break; 4216 } 4217 return (rv); 4218 } 4219 4220 /* 4221 * mdi_pi_get_next_prop(): 4222 * Property walk function. The caller should hold mdi_pi_lock() 4223 * and release by calling mdi_pi_unlock() at the end of walk to 4224 * get a consistent value. 4225 */ 4226 nvpair_t * 4227 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4228 { 4229 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4230 return (NULL); 4231 } 4232 ASSERT(MDI_PI_LOCKED(pip)); 4233 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4234 } 4235 4236 /* 4237 * mdi_prop_remove(): 4238 * Remove the named property from the named list. 4239 */ 4240 int 4241 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4242 { 4243 if (pip == NULL) { 4244 return (DDI_PROP_NOT_FOUND); 4245 } 4246 ASSERT(!MDI_PI_LOCKED(pip)); 4247 MDI_PI_LOCK(pip); 4248 if (MDI_PI(pip)->pi_prop == NULL) { 4249 MDI_PI_UNLOCK(pip); 4250 return (DDI_PROP_NOT_FOUND); 4251 } 4252 if (name) { 4253 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4254 } else { 4255 char nvp_name[MAXNAMELEN]; 4256 nvpair_t *nvp; 4257 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4258 while (nvp) { 4259 nvpair_t *next; 4260 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4261 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4262 nvpair_name(nvp)); 4263 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4264 nvp_name); 4265 nvp = next; 4266 } 4267 } 4268 MDI_PI_UNLOCK(pip); 4269 return (DDI_PROP_SUCCESS); 4270 } 4271 4272 /* 4273 * mdi_prop_size(): 4274 * Get buffer size needed to pack the property data. 4275 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4276 * buffer size. 4277 */ 4278 int 4279 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4280 { 4281 int rv; 4282 size_t bufsize; 4283 4284 *buflenp = 0; 4285 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4286 return (DDI_PROP_NOT_FOUND); 4287 } 4288 ASSERT(MDI_PI_LOCKED(pip)); 4289 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4290 &bufsize, NV_ENCODE_NATIVE); 4291 *buflenp = bufsize; 4292 return (i_map_nvlist_error_to_mdi(rv)); 4293 } 4294 4295 /* 4296 * mdi_prop_pack(): 4297 * pack the property list. The caller should hold the 4298 * mdi_pathinfo_t node to get a consistent data 4299 */ 4300 int 4301 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4302 { 4303 int rv; 4304 size_t bufsize; 4305 4306 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4307 return (DDI_PROP_NOT_FOUND); 4308 } 4309 4310 ASSERT(MDI_PI_LOCKED(pip)); 4311 4312 bufsize = buflen; 4313 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4314 NV_ENCODE_NATIVE, KM_SLEEP); 4315 4316 return (i_map_nvlist_error_to_mdi(rv)); 4317 } 4318 4319 /* 4320 * mdi_prop_update_byte(): 4321 * Create/Update a byte property 4322 */ 4323 int 4324 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4325 { 4326 int rv; 4327 4328 if (pip == NULL) { 4329 return (DDI_PROP_INVAL_ARG); 4330 } 4331 ASSERT(!MDI_PI_LOCKED(pip)); 4332 MDI_PI_LOCK(pip); 4333 if (MDI_PI(pip)->pi_prop == NULL) { 4334 MDI_PI_UNLOCK(pip); 4335 return (DDI_PROP_NOT_FOUND); 4336 } 4337 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4338 MDI_PI_UNLOCK(pip); 4339 return (i_map_nvlist_error_to_mdi(rv)); 4340 } 4341 4342 /* 4343 * mdi_prop_update_byte_array(): 4344 * Create/Update a byte array property 4345 */ 4346 int 4347 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4348 uint_t nelements) 4349 { 4350 int rv; 4351 4352 if (pip == NULL) { 4353 return (DDI_PROP_INVAL_ARG); 4354 } 4355 ASSERT(!MDI_PI_LOCKED(pip)); 4356 MDI_PI_LOCK(pip); 4357 if (MDI_PI(pip)->pi_prop == NULL) { 4358 MDI_PI_UNLOCK(pip); 4359 return (DDI_PROP_NOT_FOUND); 4360 } 4361 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4362 MDI_PI_UNLOCK(pip); 4363 return (i_map_nvlist_error_to_mdi(rv)); 4364 } 4365 4366 /* 4367 * mdi_prop_update_int(): 4368 * Create/Update a 32 bit integer property 4369 */ 4370 int 4371 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4372 { 4373 int rv; 4374 4375 if (pip == NULL) { 4376 return (DDI_PROP_INVAL_ARG); 4377 } 4378 ASSERT(!MDI_PI_LOCKED(pip)); 4379 MDI_PI_LOCK(pip); 4380 if (MDI_PI(pip)->pi_prop == NULL) { 4381 MDI_PI_UNLOCK(pip); 4382 return (DDI_PROP_NOT_FOUND); 4383 } 4384 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4385 MDI_PI_UNLOCK(pip); 4386 return (i_map_nvlist_error_to_mdi(rv)); 4387 } 4388 4389 /* 4390 * mdi_prop_update_int64(): 4391 * Create/Update a 64 bit integer property 4392 */ 4393 int 4394 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4395 { 4396 int rv; 4397 4398 if (pip == NULL) { 4399 return (DDI_PROP_INVAL_ARG); 4400 } 4401 ASSERT(!MDI_PI_LOCKED(pip)); 4402 MDI_PI_LOCK(pip); 4403 if (MDI_PI(pip)->pi_prop == NULL) { 4404 MDI_PI_UNLOCK(pip); 4405 return (DDI_PROP_NOT_FOUND); 4406 } 4407 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4408 MDI_PI_UNLOCK(pip); 4409 return (i_map_nvlist_error_to_mdi(rv)); 4410 } 4411 4412 /* 4413 * mdi_prop_update_int_array(): 4414 * Create/Update a int array property 4415 */ 4416 int 4417 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4418 uint_t nelements) 4419 { 4420 int rv; 4421 4422 if (pip == NULL) { 4423 return (DDI_PROP_INVAL_ARG); 4424 } 4425 ASSERT(!MDI_PI_LOCKED(pip)); 4426 MDI_PI_LOCK(pip); 4427 if (MDI_PI(pip)->pi_prop == NULL) { 4428 MDI_PI_UNLOCK(pip); 4429 return (DDI_PROP_NOT_FOUND); 4430 } 4431 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4432 nelements); 4433 MDI_PI_UNLOCK(pip); 4434 return (i_map_nvlist_error_to_mdi(rv)); 4435 } 4436 4437 /* 4438 * mdi_prop_update_string(): 4439 * Create/Update a string property 4440 */ 4441 int 4442 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4443 { 4444 int rv; 4445 4446 if (pip == NULL) { 4447 return (DDI_PROP_INVAL_ARG); 4448 } 4449 ASSERT(!MDI_PI_LOCKED(pip)); 4450 MDI_PI_LOCK(pip); 4451 if (MDI_PI(pip)->pi_prop == NULL) { 4452 MDI_PI_UNLOCK(pip); 4453 return (DDI_PROP_NOT_FOUND); 4454 } 4455 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4456 MDI_PI_UNLOCK(pip); 4457 return (i_map_nvlist_error_to_mdi(rv)); 4458 } 4459 4460 /* 4461 * mdi_prop_update_string_array(): 4462 * Create/Update a string array property 4463 */ 4464 int 4465 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4466 uint_t nelements) 4467 { 4468 int rv; 4469 4470 if (pip == NULL) { 4471 return (DDI_PROP_INVAL_ARG); 4472 } 4473 ASSERT(!MDI_PI_LOCKED(pip)); 4474 MDI_PI_LOCK(pip); 4475 if (MDI_PI(pip)->pi_prop == NULL) { 4476 MDI_PI_UNLOCK(pip); 4477 return (DDI_PROP_NOT_FOUND); 4478 } 4479 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4480 nelements); 4481 MDI_PI_UNLOCK(pip); 4482 return (i_map_nvlist_error_to_mdi(rv)); 4483 } 4484 4485 /* 4486 * mdi_prop_lookup_byte(): 4487 * Look for byte property identified by name. The data returned 4488 * is the actual property and valid as long as mdi_pathinfo_t node 4489 * is alive. 4490 */ 4491 int 4492 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4493 { 4494 int rv; 4495 4496 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4497 return (DDI_PROP_NOT_FOUND); 4498 } 4499 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4500 return (i_map_nvlist_error_to_mdi(rv)); 4501 } 4502 4503 4504 /* 4505 * mdi_prop_lookup_byte_array(): 4506 * Look for byte array property identified by name. The data 4507 * returned is the actual property and valid as long as 4508 * mdi_pathinfo_t node is alive. 4509 */ 4510 int 4511 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4512 uint_t *nelements) 4513 { 4514 int rv; 4515 4516 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4517 return (DDI_PROP_NOT_FOUND); 4518 } 4519 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4520 nelements); 4521 return (i_map_nvlist_error_to_mdi(rv)); 4522 } 4523 4524 /* 4525 * mdi_prop_lookup_int(): 4526 * Look for int property identified by name. The data returned 4527 * is the actual property and valid as long as mdi_pathinfo_t 4528 * node is alive. 4529 */ 4530 int 4531 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4532 { 4533 int rv; 4534 4535 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4536 return (DDI_PROP_NOT_FOUND); 4537 } 4538 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4539 return (i_map_nvlist_error_to_mdi(rv)); 4540 } 4541 4542 /* 4543 * mdi_prop_lookup_int64(): 4544 * Look for int64 property identified by name. The data returned 4545 * is the actual property and valid as long as mdi_pathinfo_t node 4546 * is alive. 4547 */ 4548 int 4549 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4550 { 4551 int rv; 4552 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4553 return (DDI_PROP_NOT_FOUND); 4554 } 4555 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4556 return (i_map_nvlist_error_to_mdi(rv)); 4557 } 4558 4559 /* 4560 * mdi_prop_lookup_int_array(): 4561 * Look for int array property identified by name. The data 4562 * returned is the actual property and valid as long as 4563 * mdi_pathinfo_t node is alive. 4564 */ 4565 int 4566 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4567 uint_t *nelements) 4568 { 4569 int rv; 4570 4571 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4572 return (DDI_PROP_NOT_FOUND); 4573 } 4574 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4575 (int32_t **)data, nelements); 4576 return (i_map_nvlist_error_to_mdi(rv)); 4577 } 4578 4579 /* 4580 * mdi_prop_lookup_string(): 4581 * Look for string property identified by name. The data 4582 * returned is the actual property and valid as long as 4583 * mdi_pathinfo_t node is alive. 4584 */ 4585 int 4586 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4587 { 4588 int rv; 4589 4590 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4591 return (DDI_PROP_NOT_FOUND); 4592 } 4593 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4594 return (i_map_nvlist_error_to_mdi(rv)); 4595 } 4596 4597 /* 4598 * mdi_prop_lookup_string_array(): 4599 * Look for string array property identified by name. The data 4600 * returned is the actual property and valid as long as 4601 * mdi_pathinfo_t node is alive. 4602 */ 4603 int 4604 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4605 uint_t *nelements) 4606 { 4607 int rv; 4608 4609 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4610 return (DDI_PROP_NOT_FOUND); 4611 } 4612 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4613 nelements); 4614 return (i_map_nvlist_error_to_mdi(rv)); 4615 } 4616 4617 /* 4618 * mdi_prop_free(): 4619 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4620 * functions return the pointer to actual property data and not a 4621 * copy of it. So the data returned is valid as long as 4622 * mdi_pathinfo_t node is valid. 4623 */ 4624 /*ARGSUSED*/ 4625 int 4626 mdi_prop_free(void *data) 4627 { 4628 return (DDI_PROP_SUCCESS); 4629 } 4630 4631 /*ARGSUSED*/ 4632 static void 4633 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4634 { 4635 char *phci_path, *ct_path; 4636 char *ct_status; 4637 char *status; 4638 dev_info_t *dip = ct->ct_dip; 4639 char lb_buf[64]; 4640 4641 ASSERT(MDI_CLIENT_LOCKED(ct)); 4642 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4643 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4644 return; 4645 } 4646 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4647 ct_status = "optimal"; 4648 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4649 ct_status = "degraded"; 4650 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4651 ct_status = "failed"; 4652 } else { 4653 ct_status = "unknown"; 4654 } 4655 4656 if (MDI_PI_IS_OFFLINE(pip)) { 4657 status = "offline"; 4658 } else if (MDI_PI_IS_ONLINE(pip)) { 4659 status = "online"; 4660 } else if (MDI_PI_IS_STANDBY(pip)) { 4661 status = "standby"; 4662 } else if (MDI_PI_IS_FAULT(pip)) { 4663 status = "faulted"; 4664 } else { 4665 status = "unknown"; 4666 } 4667 4668 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4669 (void) snprintf(lb_buf, sizeof (lb_buf), 4670 "%s, region-size: %d", mdi_load_balance_lba, 4671 ct->ct_lb_args->region_size); 4672 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4673 (void) snprintf(lb_buf, sizeof (lb_buf), 4674 "%s", mdi_load_balance_none); 4675 } else { 4676 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4677 mdi_load_balance_rr); 4678 } 4679 4680 if (dip) { 4681 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4682 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4683 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4684 "path %s (%s%d) to target address: %s is %s" 4685 " Load balancing: %s\n", 4686 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4687 ddi_get_instance(dip), ct_status, 4688 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4689 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4690 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4691 MDI_PI(pip)->pi_addr, status, lb_buf); 4692 kmem_free(phci_path, MAXPATHLEN); 4693 kmem_free(ct_path, MAXPATHLEN); 4694 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4695 } 4696 } 4697 4698 #ifdef DEBUG 4699 /* 4700 * i_mdi_log(): 4701 * Utility function for error message management 4702 * 4703 */ 4704 /*PRINTFLIKE3*/ 4705 static void 4706 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4707 { 4708 char name[MAXNAMELEN]; 4709 char buf[MAXNAMELEN]; 4710 char *bp; 4711 va_list ap; 4712 int log_only = 0; 4713 int boot_only = 0; 4714 int console_only = 0; 4715 4716 if (dip) { 4717 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4718 ddi_driver_name(dip), ddi_get_instance(dip)); 4719 } else { 4720 name[0] = 0; 4721 } 4722 4723 va_start(ap, fmt); 4724 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4725 va_end(ap); 4726 4727 switch (buf[0]) { 4728 case '!': 4729 bp = &buf[1]; 4730 log_only = 1; 4731 break; 4732 case '?': 4733 bp = &buf[1]; 4734 boot_only = 1; 4735 break; 4736 case '^': 4737 bp = &buf[1]; 4738 console_only = 1; 4739 break; 4740 default: 4741 bp = buf; 4742 break; 4743 } 4744 if (mdi_debug_logonly) { 4745 log_only = 1; 4746 boot_only = 0; 4747 console_only = 0; 4748 } 4749 4750 switch (level) { 4751 case CE_NOTE: 4752 level = CE_CONT; 4753 /* FALLTHROUGH */ 4754 case CE_CONT: 4755 case CE_WARN: 4756 case CE_PANIC: 4757 if (boot_only) { 4758 cmn_err(level, "?mdi: %s%s", name, bp); 4759 } else if (console_only) { 4760 cmn_err(level, "^mdi: %s%s", name, bp); 4761 } else if (log_only) { 4762 cmn_err(level, "!mdi: %s%s", name, bp); 4763 } else { 4764 cmn_err(level, "mdi: %s%s", name, bp); 4765 } 4766 break; 4767 default: 4768 cmn_err(level, "mdi: %s%s", name, bp); 4769 break; 4770 } 4771 } 4772 #endif /* DEBUG */ 4773 4774 void 4775 i_mdi_client_online(dev_info_t *ct_dip) 4776 { 4777 mdi_client_t *ct; 4778 4779 /* 4780 * Client online notification. Mark client state as online 4781 * restore our binding with dev_info node 4782 */ 4783 ct = i_devi_get_client(ct_dip); 4784 ASSERT(ct != NULL); 4785 MDI_CLIENT_LOCK(ct); 4786 MDI_CLIENT_SET_ONLINE(ct); 4787 /* catch for any memory leaks */ 4788 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4789 ct->ct_dip = ct_dip; 4790 4791 if (ct->ct_power_cnt == 0) 4792 (void) i_mdi_power_all_phci(ct); 4793 4794 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4795 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4796 i_mdi_pm_hold_client(ct, 1); 4797 4798 MDI_CLIENT_UNLOCK(ct); 4799 } 4800 4801 void 4802 i_mdi_phci_online(dev_info_t *ph_dip) 4803 { 4804 mdi_phci_t *ph; 4805 4806 /* pHCI online notification. Mark state accordingly */ 4807 ph = i_devi_get_phci(ph_dip); 4808 ASSERT(ph != NULL); 4809 MDI_PHCI_LOCK(ph); 4810 MDI_PHCI_SET_ONLINE(ph); 4811 MDI_PHCI_UNLOCK(ph); 4812 } 4813 4814 /* 4815 * mdi_devi_online(): 4816 * Online notification from NDI framework on pHCI/client 4817 * device online. 4818 * Return Values: 4819 * NDI_SUCCESS 4820 * MDI_FAILURE 4821 */ 4822 /*ARGSUSED*/ 4823 int 4824 mdi_devi_online(dev_info_t *dip, uint_t flags) 4825 { 4826 if (MDI_PHCI(dip)) { 4827 i_mdi_phci_online(dip); 4828 } 4829 4830 if (MDI_CLIENT(dip)) { 4831 i_mdi_client_online(dip); 4832 } 4833 return (NDI_SUCCESS); 4834 } 4835 4836 /* 4837 * mdi_devi_offline(): 4838 * Offline notification from NDI framework on pHCI/Client device 4839 * offline. 4840 * 4841 * Return Values: 4842 * NDI_SUCCESS 4843 * NDI_FAILURE 4844 */ 4845 /*ARGSUSED*/ 4846 int 4847 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4848 { 4849 int rv = NDI_SUCCESS; 4850 4851 if (MDI_CLIENT(dip)) { 4852 rv = i_mdi_client_offline(dip, flags); 4853 if (rv != NDI_SUCCESS) 4854 return (rv); 4855 } 4856 4857 if (MDI_PHCI(dip)) { 4858 rv = i_mdi_phci_offline(dip, flags); 4859 4860 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4861 /* set client back online */ 4862 i_mdi_client_online(dip); 4863 } 4864 } 4865 4866 return (rv); 4867 } 4868 4869 /*ARGSUSED*/ 4870 static int 4871 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4872 { 4873 int rv = NDI_SUCCESS; 4874 mdi_phci_t *ph; 4875 mdi_client_t *ct; 4876 mdi_pathinfo_t *pip; 4877 mdi_pathinfo_t *next; 4878 mdi_pathinfo_t *failed_pip = NULL; 4879 dev_info_t *cdip; 4880 4881 /* 4882 * pHCI component offline notification 4883 * Make sure that this pHCI instance is free to be offlined. 4884 * If it is OK to proceed, Offline and remove all the child 4885 * mdi_pathinfo nodes. This process automatically offlines 4886 * corresponding client devices, for which this pHCI provides 4887 * critical services. 4888 */ 4889 ph = i_devi_get_phci(dip); 4890 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4891 (void *)dip, (void *)ph)); 4892 if (ph == NULL) { 4893 return (rv); 4894 } 4895 4896 MDI_PHCI_LOCK(ph); 4897 4898 if (MDI_PHCI_IS_OFFLINE(ph)) { 4899 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4900 (void *)ph)); 4901 MDI_PHCI_UNLOCK(ph); 4902 return (NDI_SUCCESS); 4903 } 4904 4905 /* 4906 * Check to see if the pHCI can be offlined 4907 */ 4908 if (ph->ph_unstable) { 4909 MDI_DEBUG(1, (CE_WARN, dip, 4910 "!One or more target devices are in transient " 4911 "state. This device can not be removed at " 4912 "this moment. Please try again later.")); 4913 MDI_PHCI_UNLOCK(ph); 4914 return (NDI_BUSY); 4915 } 4916 4917 pip = ph->ph_path_head; 4918 while (pip != NULL) { 4919 MDI_PI_LOCK(pip); 4920 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4921 4922 /* 4923 * The mdi_pathinfo state is OK. Check the client state. 4924 * If failover in progress fail the pHCI from offlining 4925 */ 4926 ct = MDI_PI(pip)->pi_client; 4927 i_mdi_client_lock(ct, pip); 4928 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4929 (ct->ct_unstable)) { 4930 /* 4931 * Failover is in progress, Fail the DR 4932 */ 4933 MDI_DEBUG(1, (CE_WARN, dip, 4934 "!pHCI device (%s%d) is Busy. %s", 4935 ddi_driver_name(dip), ddi_get_instance(dip), 4936 "This device can not be removed at " 4937 "this moment. Please try again later.")); 4938 MDI_PI_UNLOCK(pip); 4939 i_mdi_client_unlock(ct); 4940 MDI_PHCI_UNLOCK(ph); 4941 return (NDI_BUSY); 4942 } 4943 MDI_PI_UNLOCK(pip); 4944 4945 /* 4946 * Check to see of we are removing the last path of this 4947 * client device... 4948 */ 4949 cdip = ct->ct_dip; 4950 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4951 (i_mdi_client_compute_state(ct, ph) == 4952 MDI_CLIENT_STATE_FAILED)) { 4953 i_mdi_client_unlock(ct); 4954 MDI_PHCI_UNLOCK(ph); 4955 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4956 /* 4957 * ndi_devi_offline() failed. 4958 * This pHCI provides the critical path 4959 * to one or more client devices. 4960 * Return busy. 4961 */ 4962 MDI_PHCI_LOCK(ph); 4963 MDI_DEBUG(1, (CE_WARN, dip, 4964 "!pHCI device (%s%d) is Busy. %s", 4965 ddi_driver_name(dip), ddi_get_instance(dip), 4966 "This device can not be removed at " 4967 "this moment. Please try again later.")); 4968 failed_pip = pip; 4969 break; 4970 } else { 4971 MDI_PHCI_LOCK(ph); 4972 pip = next; 4973 } 4974 } else { 4975 i_mdi_client_unlock(ct); 4976 pip = next; 4977 } 4978 } 4979 4980 if (failed_pip) { 4981 pip = ph->ph_path_head; 4982 while (pip != failed_pip) { 4983 MDI_PI_LOCK(pip); 4984 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4985 ct = MDI_PI(pip)->pi_client; 4986 i_mdi_client_lock(ct, pip); 4987 cdip = ct->ct_dip; 4988 switch (MDI_CLIENT_STATE(ct)) { 4989 case MDI_CLIENT_STATE_OPTIMAL: 4990 case MDI_CLIENT_STATE_DEGRADED: 4991 if (cdip) { 4992 MDI_PI_UNLOCK(pip); 4993 i_mdi_client_unlock(ct); 4994 MDI_PHCI_UNLOCK(ph); 4995 (void) ndi_devi_online(cdip, 0); 4996 MDI_PHCI_LOCK(ph); 4997 pip = next; 4998 continue; 4999 } 5000 break; 5001 5002 case MDI_CLIENT_STATE_FAILED: 5003 if (cdip) { 5004 MDI_PI_UNLOCK(pip); 5005 i_mdi_client_unlock(ct); 5006 MDI_PHCI_UNLOCK(ph); 5007 (void) ndi_devi_offline(cdip, 0); 5008 MDI_PHCI_LOCK(ph); 5009 pip = next; 5010 continue; 5011 } 5012 break; 5013 } 5014 MDI_PI_UNLOCK(pip); 5015 i_mdi_client_unlock(ct); 5016 pip = next; 5017 } 5018 MDI_PHCI_UNLOCK(ph); 5019 return (NDI_BUSY); 5020 } 5021 5022 /* 5023 * Mark the pHCI as offline 5024 */ 5025 MDI_PHCI_SET_OFFLINE(ph); 5026 5027 /* 5028 * Mark the child mdi_pathinfo nodes as transient 5029 */ 5030 pip = ph->ph_path_head; 5031 while (pip != NULL) { 5032 MDI_PI_LOCK(pip); 5033 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5034 MDI_PI_SET_OFFLINING(pip); 5035 MDI_PI_UNLOCK(pip); 5036 pip = next; 5037 } 5038 MDI_PHCI_UNLOCK(ph); 5039 /* 5040 * Give a chance for any pending commands to execute 5041 */ 5042 delay(1); 5043 MDI_PHCI_LOCK(ph); 5044 pip = ph->ph_path_head; 5045 while (pip != NULL) { 5046 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5047 (void) i_mdi_pi_offline(pip, flags); 5048 MDI_PI_LOCK(pip); 5049 ct = MDI_PI(pip)->pi_client; 5050 if (!MDI_PI_IS_OFFLINE(pip)) { 5051 MDI_DEBUG(1, (CE_WARN, dip, 5052 "!pHCI device (%s%d) is Busy. %s", 5053 ddi_driver_name(dip), ddi_get_instance(dip), 5054 "This device can not be removed at " 5055 "this moment. Please try again later.")); 5056 MDI_PI_UNLOCK(pip); 5057 MDI_PHCI_SET_ONLINE(ph); 5058 MDI_PHCI_UNLOCK(ph); 5059 return (NDI_BUSY); 5060 } 5061 MDI_PI_UNLOCK(pip); 5062 pip = next; 5063 } 5064 MDI_PHCI_UNLOCK(ph); 5065 5066 return (rv); 5067 } 5068 5069 void 5070 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5071 { 5072 mdi_phci_t *ph; 5073 mdi_client_t *ct; 5074 mdi_pathinfo_t *pip; 5075 mdi_pathinfo_t *next; 5076 dev_info_t *cdip; 5077 5078 if (!MDI_PHCI(dip)) 5079 return; 5080 5081 ph = i_devi_get_phci(dip); 5082 if (ph == NULL) { 5083 return; 5084 } 5085 5086 MDI_PHCI_LOCK(ph); 5087 5088 if (MDI_PHCI_IS_OFFLINE(ph)) { 5089 /* has no last path */ 5090 MDI_PHCI_UNLOCK(ph); 5091 return; 5092 } 5093 5094 pip = ph->ph_path_head; 5095 while (pip != NULL) { 5096 MDI_PI_LOCK(pip); 5097 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5098 5099 ct = MDI_PI(pip)->pi_client; 5100 i_mdi_client_lock(ct, pip); 5101 MDI_PI_UNLOCK(pip); 5102 5103 cdip = ct->ct_dip; 5104 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5105 (i_mdi_client_compute_state(ct, ph) == 5106 MDI_CLIENT_STATE_FAILED)) { 5107 /* Last path. Mark client dip as retiring */ 5108 i_mdi_client_unlock(ct); 5109 MDI_PHCI_UNLOCK(ph); 5110 (void) e_ddi_mark_retiring(cdip, cons_array); 5111 MDI_PHCI_LOCK(ph); 5112 pip = next; 5113 } else { 5114 i_mdi_client_unlock(ct); 5115 pip = next; 5116 } 5117 } 5118 5119 MDI_PHCI_UNLOCK(ph); 5120 5121 return; 5122 } 5123 5124 void 5125 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5126 { 5127 mdi_phci_t *ph; 5128 mdi_client_t *ct; 5129 mdi_pathinfo_t *pip; 5130 mdi_pathinfo_t *next; 5131 dev_info_t *cdip; 5132 5133 if (!MDI_PHCI(dip)) 5134 return; 5135 5136 ph = i_devi_get_phci(dip); 5137 if (ph == NULL) 5138 return; 5139 5140 MDI_PHCI_LOCK(ph); 5141 5142 if (MDI_PHCI_IS_OFFLINE(ph)) { 5143 MDI_PHCI_UNLOCK(ph); 5144 /* not last path */ 5145 return; 5146 } 5147 5148 if (ph->ph_unstable) { 5149 MDI_PHCI_UNLOCK(ph); 5150 /* can't check for constraints */ 5151 *constraint = 0; 5152 return; 5153 } 5154 5155 pip = ph->ph_path_head; 5156 while (pip != NULL) { 5157 MDI_PI_LOCK(pip); 5158 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5159 5160 /* 5161 * The mdi_pathinfo state is OK. Check the client state. 5162 * If failover in progress fail the pHCI from offlining 5163 */ 5164 ct = MDI_PI(pip)->pi_client; 5165 i_mdi_client_lock(ct, pip); 5166 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5167 (ct->ct_unstable)) { 5168 /* 5169 * Failover is in progress, can't check for constraints 5170 */ 5171 MDI_PI_UNLOCK(pip); 5172 i_mdi_client_unlock(ct); 5173 MDI_PHCI_UNLOCK(ph); 5174 *constraint = 0; 5175 return; 5176 } 5177 MDI_PI_UNLOCK(pip); 5178 5179 /* 5180 * Check to see of we are retiring the last path of this 5181 * client device... 5182 */ 5183 cdip = ct->ct_dip; 5184 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5185 (i_mdi_client_compute_state(ct, ph) == 5186 MDI_CLIENT_STATE_FAILED)) { 5187 i_mdi_client_unlock(ct); 5188 MDI_PHCI_UNLOCK(ph); 5189 (void) e_ddi_retire_notify(cdip, constraint); 5190 MDI_PHCI_LOCK(ph); 5191 pip = next; 5192 } else { 5193 i_mdi_client_unlock(ct); 5194 pip = next; 5195 } 5196 } 5197 5198 MDI_PHCI_UNLOCK(ph); 5199 5200 return; 5201 } 5202 5203 /* 5204 * offline the path(s) hanging off the PHCI. If the 5205 * last path to any client, check that constraints 5206 * have been applied. 5207 */ 5208 void 5209 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5210 { 5211 mdi_phci_t *ph; 5212 mdi_client_t *ct; 5213 mdi_pathinfo_t *pip; 5214 mdi_pathinfo_t *next; 5215 dev_info_t *cdip; 5216 int unstable = 0; 5217 int constraint; 5218 5219 if (!MDI_PHCI(dip)) 5220 return; 5221 5222 ph = i_devi_get_phci(dip); 5223 if (ph == NULL) { 5224 /* no last path and no pips */ 5225 return; 5226 } 5227 5228 MDI_PHCI_LOCK(ph); 5229 5230 if (MDI_PHCI_IS_OFFLINE(ph)) { 5231 MDI_PHCI_UNLOCK(ph); 5232 /* no last path and no pips */ 5233 return; 5234 } 5235 5236 /* 5237 * Check to see if the pHCI can be offlined 5238 */ 5239 if (ph->ph_unstable) { 5240 unstable = 1; 5241 } 5242 5243 pip = ph->ph_path_head; 5244 while (pip != NULL) { 5245 MDI_PI_LOCK(pip); 5246 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5247 5248 /* 5249 * if failover in progress fail the pHCI from offlining 5250 */ 5251 ct = MDI_PI(pip)->pi_client; 5252 i_mdi_client_lock(ct, pip); 5253 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5254 (ct->ct_unstable)) { 5255 unstable = 1; 5256 } 5257 MDI_PI_UNLOCK(pip); 5258 5259 /* 5260 * Check to see of we are removing the last path of this 5261 * client device... 5262 */ 5263 cdip = ct->ct_dip; 5264 if (!phci_only && cdip && 5265 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5266 (i_mdi_client_compute_state(ct, ph) == 5267 MDI_CLIENT_STATE_FAILED)) { 5268 i_mdi_client_unlock(ct); 5269 MDI_PHCI_UNLOCK(ph); 5270 /* 5271 * We don't retire clients we just retire the 5272 * path to a client. If it is the last path 5273 * to a client, constraints are checked and 5274 * if we pass the last path is offlined. MPXIO will 5275 * then fail all I/Os to the client. Since we don't 5276 * want to retire the client on a path error 5277 * set constraint = 0 so that the client dip 5278 * is not retired. 5279 */ 5280 constraint = 0; 5281 (void) e_ddi_retire_finalize(cdip, &constraint); 5282 MDI_PHCI_LOCK(ph); 5283 pip = next; 5284 } else { 5285 i_mdi_client_unlock(ct); 5286 pip = next; 5287 } 5288 } 5289 5290 /* 5291 * Cannot offline pip(s) 5292 */ 5293 if (unstable) { 5294 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5295 "retire, dip = %p", (void *)dip); 5296 MDI_PHCI_UNLOCK(ph); 5297 return; 5298 } 5299 5300 /* 5301 * Mark the pHCI as offline 5302 */ 5303 MDI_PHCI_SET_OFFLINE(ph); 5304 5305 /* 5306 * Mark the child mdi_pathinfo nodes as transient 5307 */ 5308 pip = ph->ph_path_head; 5309 while (pip != NULL) { 5310 MDI_PI_LOCK(pip); 5311 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5312 MDI_PI_SET_OFFLINING(pip); 5313 MDI_PI_UNLOCK(pip); 5314 pip = next; 5315 } 5316 MDI_PHCI_UNLOCK(ph); 5317 /* 5318 * Give a chance for any pending commands to execute 5319 */ 5320 delay(1); 5321 MDI_PHCI_LOCK(ph); 5322 pip = ph->ph_path_head; 5323 while (pip != NULL) { 5324 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5325 (void) i_mdi_pi_offline(pip, 0); 5326 MDI_PI_LOCK(pip); 5327 ct = MDI_PI(pip)->pi_client; 5328 if (!MDI_PI_IS_OFFLINE(pip)) { 5329 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5330 "PHCI dip = %p", (void *)dip); 5331 MDI_PI_UNLOCK(pip); 5332 MDI_PHCI_SET_ONLINE(ph); 5333 MDI_PHCI_UNLOCK(ph); 5334 return; 5335 } 5336 MDI_PI_UNLOCK(pip); 5337 pip = next; 5338 } 5339 MDI_PHCI_UNLOCK(ph); 5340 5341 return; 5342 } 5343 5344 void 5345 mdi_phci_unretire(dev_info_t *dip) 5346 { 5347 ASSERT(MDI_PHCI(dip)); 5348 5349 /* 5350 * Online the phci 5351 */ 5352 i_mdi_phci_online(dip); 5353 } 5354 5355 /*ARGSUSED*/ 5356 static int 5357 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5358 { 5359 int rv = NDI_SUCCESS; 5360 mdi_client_t *ct; 5361 5362 /* 5363 * Client component to go offline. Make sure that we are 5364 * not in failing over state and update client state 5365 * accordingly 5366 */ 5367 ct = i_devi_get_client(dip); 5368 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5369 (void *)dip, (void *)ct)); 5370 if (ct != NULL) { 5371 MDI_CLIENT_LOCK(ct); 5372 if (ct->ct_unstable) { 5373 /* 5374 * One or more paths are in transient state, 5375 * Dont allow offline of a client device 5376 */ 5377 MDI_DEBUG(1, (CE_WARN, dip, 5378 "!One or more paths to this device is " 5379 "in transient state. This device can not " 5380 "be removed at this moment. " 5381 "Please try again later.")); 5382 MDI_CLIENT_UNLOCK(ct); 5383 return (NDI_BUSY); 5384 } 5385 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5386 /* 5387 * Failover is in progress, Dont allow DR of 5388 * a client device 5389 */ 5390 MDI_DEBUG(1, (CE_WARN, dip, 5391 "!Client device (%s%d) is Busy. %s", 5392 ddi_driver_name(dip), ddi_get_instance(dip), 5393 "This device can not be removed at " 5394 "this moment. Please try again later.")); 5395 MDI_CLIENT_UNLOCK(ct); 5396 return (NDI_BUSY); 5397 } 5398 MDI_CLIENT_SET_OFFLINE(ct); 5399 5400 /* 5401 * Unbind our relationship with the dev_info node 5402 */ 5403 if (flags & NDI_DEVI_REMOVE) { 5404 ct->ct_dip = NULL; 5405 } 5406 MDI_CLIENT_UNLOCK(ct); 5407 } 5408 return (rv); 5409 } 5410 5411 /* 5412 * mdi_pre_attach(): 5413 * Pre attach() notification handler 5414 */ 5415 /*ARGSUSED*/ 5416 int 5417 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5418 { 5419 /* don't support old DDI_PM_RESUME */ 5420 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5421 (cmd == DDI_PM_RESUME)) 5422 return (DDI_FAILURE); 5423 5424 return (DDI_SUCCESS); 5425 } 5426 5427 /* 5428 * mdi_post_attach(): 5429 * Post attach() notification handler 5430 */ 5431 /*ARGSUSED*/ 5432 void 5433 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5434 { 5435 mdi_phci_t *ph; 5436 mdi_client_t *ct; 5437 mdi_vhci_t *vh; 5438 5439 if (MDI_PHCI(dip)) { 5440 ph = i_devi_get_phci(dip); 5441 ASSERT(ph != NULL); 5442 5443 MDI_PHCI_LOCK(ph); 5444 switch (cmd) { 5445 case DDI_ATTACH: 5446 MDI_DEBUG(2, (CE_NOTE, dip, 5447 "!pHCI post_attach: called %p\n", (void *)ph)); 5448 if (error == DDI_SUCCESS) { 5449 MDI_PHCI_SET_ATTACH(ph); 5450 } else { 5451 MDI_DEBUG(1, (CE_NOTE, dip, 5452 "!pHCI post_attach: failed error=%d\n", 5453 error)); 5454 MDI_PHCI_SET_DETACH(ph); 5455 } 5456 break; 5457 5458 case DDI_RESUME: 5459 MDI_DEBUG(2, (CE_NOTE, dip, 5460 "!pHCI post_resume: called %p\n", (void *)ph)); 5461 if (error == DDI_SUCCESS) { 5462 MDI_PHCI_SET_RESUME(ph); 5463 } else { 5464 MDI_DEBUG(1, (CE_NOTE, dip, 5465 "!pHCI post_resume: failed error=%d\n", 5466 error)); 5467 MDI_PHCI_SET_SUSPEND(ph); 5468 } 5469 break; 5470 } 5471 MDI_PHCI_UNLOCK(ph); 5472 } 5473 5474 if (MDI_CLIENT(dip)) { 5475 ct = i_devi_get_client(dip); 5476 ASSERT(ct != NULL); 5477 5478 MDI_CLIENT_LOCK(ct); 5479 switch (cmd) { 5480 case DDI_ATTACH: 5481 MDI_DEBUG(2, (CE_NOTE, dip, 5482 "!Client post_attach: called %p\n", (void *)ct)); 5483 if (error != DDI_SUCCESS) { 5484 MDI_DEBUG(1, (CE_NOTE, dip, 5485 "!Client post_attach: failed error=%d\n", 5486 error)); 5487 MDI_CLIENT_SET_DETACH(ct); 5488 MDI_DEBUG(4, (CE_WARN, dip, 5489 "mdi_post_attach i_mdi_pm_reset_client\n")); 5490 i_mdi_pm_reset_client(ct); 5491 break; 5492 } 5493 5494 /* 5495 * Client device has successfully attached, inform 5496 * the vhci. 5497 */ 5498 vh = ct->ct_vhci; 5499 if (vh->vh_ops->vo_client_attached) 5500 (*vh->vh_ops->vo_client_attached)(dip); 5501 5502 MDI_CLIENT_SET_ATTACH(ct); 5503 break; 5504 5505 case DDI_RESUME: 5506 MDI_DEBUG(2, (CE_NOTE, dip, 5507 "!Client post_attach: called %p\n", (void *)ct)); 5508 if (error == DDI_SUCCESS) { 5509 MDI_CLIENT_SET_RESUME(ct); 5510 } else { 5511 MDI_DEBUG(1, (CE_NOTE, dip, 5512 "!Client post_resume: failed error=%d\n", 5513 error)); 5514 MDI_CLIENT_SET_SUSPEND(ct); 5515 } 5516 break; 5517 } 5518 MDI_CLIENT_UNLOCK(ct); 5519 } 5520 } 5521 5522 /* 5523 * mdi_pre_detach(): 5524 * Pre detach notification handler 5525 */ 5526 /*ARGSUSED*/ 5527 int 5528 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5529 { 5530 int rv = DDI_SUCCESS; 5531 5532 if (MDI_CLIENT(dip)) { 5533 (void) i_mdi_client_pre_detach(dip, cmd); 5534 } 5535 5536 if (MDI_PHCI(dip)) { 5537 rv = i_mdi_phci_pre_detach(dip, cmd); 5538 } 5539 5540 return (rv); 5541 } 5542 5543 /*ARGSUSED*/ 5544 static int 5545 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5546 { 5547 int rv = DDI_SUCCESS; 5548 mdi_phci_t *ph; 5549 mdi_client_t *ct; 5550 mdi_pathinfo_t *pip; 5551 mdi_pathinfo_t *failed_pip = NULL; 5552 mdi_pathinfo_t *next; 5553 5554 ph = i_devi_get_phci(dip); 5555 if (ph == NULL) { 5556 return (rv); 5557 } 5558 5559 MDI_PHCI_LOCK(ph); 5560 switch (cmd) { 5561 case DDI_DETACH: 5562 MDI_DEBUG(2, (CE_NOTE, dip, 5563 "!pHCI pre_detach: called %p\n", (void *)ph)); 5564 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5565 /* 5566 * mdi_pathinfo nodes are still attached to 5567 * this pHCI. Fail the detach for this pHCI. 5568 */ 5569 MDI_DEBUG(2, (CE_WARN, dip, 5570 "!pHCI pre_detach: " 5571 "mdi_pathinfo nodes are still attached " 5572 "%p\n", (void *)ph)); 5573 rv = DDI_FAILURE; 5574 break; 5575 } 5576 MDI_PHCI_SET_DETACH(ph); 5577 break; 5578 5579 case DDI_SUSPEND: 5580 /* 5581 * pHCI is getting suspended. Since mpxio client 5582 * devices may not be suspended at this point, to avoid 5583 * a potential stack overflow, it is important to suspend 5584 * client devices before pHCI can be suspended. 5585 */ 5586 5587 MDI_DEBUG(2, (CE_NOTE, dip, 5588 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5589 /* 5590 * Suspend all the client devices accessible through this pHCI 5591 */ 5592 pip = ph->ph_path_head; 5593 while (pip != NULL && rv == DDI_SUCCESS) { 5594 dev_info_t *cdip; 5595 MDI_PI_LOCK(pip); 5596 next = 5597 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5598 ct = MDI_PI(pip)->pi_client; 5599 i_mdi_client_lock(ct, pip); 5600 cdip = ct->ct_dip; 5601 MDI_PI_UNLOCK(pip); 5602 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5603 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5604 i_mdi_client_unlock(ct); 5605 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5606 DDI_SUCCESS) { 5607 /* 5608 * Suspend of one of the client 5609 * device has failed. 5610 */ 5611 MDI_DEBUG(1, (CE_WARN, dip, 5612 "!Suspend of device (%s%d) failed.", 5613 ddi_driver_name(cdip), 5614 ddi_get_instance(cdip))); 5615 failed_pip = pip; 5616 break; 5617 } 5618 } else { 5619 i_mdi_client_unlock(ct); 5620 } 5621 pip = next; 5622 } 5623 5624 if (rv == DDI_SUCCESS) { 5625 /* 5626 * Suspend of client devices is complete. Proceed 5627 * with pHCI suspend. 5628 */ 5629 MDI_PHCI_SET_SUSPEND(ph); 5630 } else { 5631 /* 5632 * Revert back all the suspended client device states 5633 * to converse. 5634 */ 5635 pip = ph->ph_path_head; 5636 while (pip != failed_pip) { 5637 dev_info_t *cdip; 5638 MDI_PI_LOCK(pip); 5639 next = 5640 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5641 ct = MDI_PI(pip)->pi_client; 5642 i_mdi_client_lock(ct, pip); 5643 cdip = ct->ct_dip; 5644 MDI_PI_UNLOCK(pip); 5645 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5646 i_mdi_client_unlock(ct); 5647 (void) devi_attach(cdip, DDI_RESUME); 5648 } else { 5649 i_mdi_client_unlock(ct); 5650 } 5651 pip = next; 5652 } 5653 } 5654 break; 5655 5656 default: 5657 rv = DDI_FAILURE; 5658 break; 5659 } 5660 MDI_PHCI_UNLOCK(ph); 5661 return (rv); 5662 } 5663 5664 /*ARGSUSED*/ 5665 static int 5666 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5667 { 5668 int rv = DDI_SUCCESS; 5669 mdi_client_t *ct; 5670 5671 ct = i_devi_get_client(dip); 5672 if (ct == NULL) { 5673 return (rv); 5674 } 5675 5676 MDI_CLIENT_LOCK(ct); 5677 switch (cmd) { 5678 case DDI_DETACH: 5679 MDI_DEBUG(2, (CE_NOTE, dip, 5680 "!Client pre_detach: called %p\n", (void *)ct)); 5681 MDI_CLIENT_SET_DETACH(ct); 5682 break; 5683 5684 case DDI_SUSPEND: 5685 MDI_DEBUG(2, (CE_NOTE, dip, 5686 "!Client pre_suspend: called %p\n", (void *)ct)); 5687 MDI_CLIENT_SET_SUSPEND(ct); 5688 break; 5689 5690 default: 5691 rv = DDI_FAILURE; 5692 break; 5693 } 5694 MDI_CLIENT_UNLOCK(ct); 5695 return (rv); 5696 } 5697 5698 /* 5699 * mdi_post_detach(): 5700 * Post detach notification handler 5701 */ 5702 /*ARGSUSED*/ 5703 void 5704 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5705 { 5706 /* 5707 * Detach/Suspend of mpxio component failed. Update our state 5708 * too 5709 */ 5710 if (MDI_PHCI(dip)) 5711 i_mdi_phci_post_detach(dip, cmd, error); 5712 5713 if (MDI_CLIENT(dip)) 5714 i_mdi_client_post_detach(dip, cmd, error); 5715 } 5716 5717 /*ARGSUSED*/ 5718 static void 5719 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5720 { 5721 mdi_phci_t *ph; 5722 5723 /* 5724 * Detach/Suspend of phci component failed. Update our state 5725 * too 5726 */ 5727 ph = i_devi_get_phci(dip); 5728 if (ph == NULL) { 5729 return; 5730 } 5731 5732 MDI_PHCI_LOCK(ph); 5733 /* 5734 * Detach of pHCI failed. Restore back converse 5735 * state 5736 */ 5737 switch (cmd) { 5738 case DDI_DETACH: 5739 MDI_DEBUG(2, (CE_NOTE, dip, 5740 "!pHCI post_detach: called %p\n", (void *)ph)); 5741 if (error != DDI_SUCCESS) 5742 MDI_PHCI_SET_ATTACH(ph); 5743 break; 5744 5745 case DDI_SUSPEND: 5746 MDI_DEBUG(2, (CE_NOTE, dip, 5747 "!pHCI post_suspend: called %p\n", (void *)ph)); 5748 if (error != DDI_SUCCESS) 5749 MDI_PHCI_SET_RESUME(ph); 5750 break; 5751 } 5752 MDI_PHCI_UNLOCK(ph); 5753 } 5754 5755 /*ARGSUSED*/ 5756 static void 5757 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5758 { 5759 mdi_client_t *ct; 5760 5761 ct = i_devi_get_client(dip); 5762 if (ct == NULL) { 5763 return; 5764 } 5765 MDI_CLIENT_LOCK(ct); 5766 /* 5767 * Detach of Client failed. Restore back converse 5768 * state 5769 */ 5770 switch (cmd) { 5771 case DDI_DETACH: 5772 MDI_DEBUG(2, (CE_NOTE, dip, 5773 "!Client post_detach: called %p\n", (void *)ct)); 5774 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5775 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5776 "i_mdi_pm_rele_client\n")); 5777 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5778 } else { 5779 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5780 "i_mdi_pm_reset_client\n")); 5781 i_mdi_pm_reset_client(ct); 5782 } 5783 if (error != DDI_SUCCESS) 5784 MDI_CLIENT_SET_ATTACH(ct); 5785 break; 5786 5787 case DDI_SUSPEND: 5788 MDI_DEBUG(2, (CE_NOTE, dip, 5789 "!Client post_suspend: called %p\n", (void *)ct)); 5790 if (error != DDI_SUCCESS) 5791 MDI_CLIENT_SET_RESUME(ct); 5792 break; 5793 } 5794 MDI_CLIENT_UNLOCK(ct); 5795 } 5796 5797 int 5798 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5799 { 5800 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5801 } 5802 5803 /* 5804 * create and install per-path (client - pHCI) statistics 5805 * I/O stats supported: nread, nwritten, reads, and writes 5806 * Error stats - hard errors, soft errors, & transport errors 5807 */ 5808 int 5809 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5810 { 5811 kstat_t *kiosp, *kerrsp; 5812 struct pi_errs *nsp; 5813 struct mdi_pi_kstats *mdi_statp; 5814 5815 if (MDI_PI(pip)->pi_kstats != NULL) 5816 return (MDI_SUCCESS); 5817 5818 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5819 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5820 return (MDI_FAILURE); 5821 } 5822 5823 (void) strcat(ksname, ",err"); 5824 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5825 KSTAT_TYPE_NAMED, 5826 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5827 if (kerrsp == NULL) { 5828 kstat_delete(kiosp); 5829 return (MDI_FAILURE); 5830 } 5831 5832 nsp = (struct pi_errs *)kerrsp->ks_data; 5833 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5834 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5835 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5836 KSTAT_DATA_UINT32); 5837 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5838 KSTAT_DATA_UINT32); 5839 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5840 KSTAT_DATA_UINT32); 5841 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5842 KSTAT_DATA_UINT32); 5843 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5844 KSTAT_DATA_UINT32); 5845 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5846 KSTAT_DATA_UINT32); 5847 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5848 KSTAT_DATA_UINT32); 5849 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5850 5851 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5852 mdi_statp->pi_kstat_ref = 1; 5853 mdi_statp->pi_kstat_iostats = kiosp; 5854 mdi_statp->pi_kstat_errstats = kerrsp; 5855 kstat_install(kiosp); 5856 kstat_install(kerrsp); 5857 MDI_PI(pip)->pi_kstats = mdi_statp; 5858 return (MDI_SUCCESS); 5859 } 5860 5861 /* 5862 * destroy per-path properties 5863 */ 5864 static void 5865 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5866 { 5867 5868 struct mdi_pi_kstats *mdi_statp; 5869 5870 if (MDI_PI(pip)->pi_kstats == NULL) 5871 return; 5872 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5873 return; 5874 5875 MDI_PI(pip)->pi_kstats = NULL; 5876 5877 /* 5878 * the kstat may be shared between multiple pathinfo nodes 5879 * decrement this pathinfo's usage, removing the kstats 5880 * themselves when the last pathinfo reference is removed. 5881 */ 5882 ASSERT(mdi_statp->pi_kstat_ref > 0); 5883 if (--mdi_statp->pi_kstat_ref != 0) 5884 return; 5885 5886 kstat_delete(mdi_statp->pi_kstat_iostats); 5887 kstat_delete(mdi_statp->pi_kstat_errstats); 5888 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5889 } 5890 5891 /* 5892 * update I/O paths KSTATS 5893 */ 5894 void 5895 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5896 { 5897 kstat_t *iostatp; 5898 size_t xfer_cnt; 5899 5900 ASSERT(pip != NULL); 5901 5902 /* 5903 * I/O can be driven across a path prior to having path 5904 * statistics available, i.e. probe(9e). 5905 */ 5906 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5907 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5908 xfer_cnt = bp->b_bcount - bp->b_resid; 5909 if (bp->b_flags & B_READ) { 5910 KSTAT_IO_PTR(iostatp)->reads++; 5911 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5912 } else { 5913 KSTAT_IO_PTR(iostatp)->writes++; 5914 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5915 } 5916 } 5917 } 5918 5919 /* 5920 * Enable the path(specific client/target/initiator) 5921 * Enabling a path means that MPxIO may select the enabled path for routing 5922 * future I/O requests, subject to other path state constraints. 5923 */ 5924 int 5925 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5926 { 5927 mdi_phci_t *ph; 5928 5929 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5930 if (ph == NULL) { 5931 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5932 " failed. pip: %p ph = NULL\n", (void *)pip)); 5933 return (MDI_FAILURE); 5934 } 5935 5936 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5937 MDI_ENABLE_OP); 5938 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5939 " Returning success pip = %p. ph = %p\n", 5940 (void *)pip, (void *)ph)); 5941 return (MDI_SUCCESS); 5942 5943 } 5944 5945 /* 5946 * Disable the path (specific client/target/initiator) 5947 * Disabling a path means that MPxIO will not select the disabled path for 5948 * routing any new I/O requests. 5949 */ 5950 int 5951 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5952 { 5953 mdi_phci_t *ph; 5954 5955 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5956 if (ph == NULL) { 5957 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5958 " failed. pip: %p ph = NULL\n", (void *)pip)); 5959 return (MDI_FAILURE); 5960 } 5961 5962 (void) i_mdi_enable_disable_path(pip, 5963 ph->ph_vhci, flags, MDI_DISABLE_OP); 5964 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5965 "Returning success pip = %p. ph = %p", 5966 (void *)pip, (void *)ph)); 5967 return (MDI_SUCCESS); 5968 } 5969 5970 /* 5971 * disable the path to a particular pHCI (pHCI specified in the phci_path 5972 * argument) for a particular client (specified in the client_path argument). 5973 * Disabling a path means that MPxIO will not select the disabled path for 5974 * routing any new I/O requests. 5975 * NOTE: this will be removed once the NWS files are changed to use the new 5976 * mdi_{enable,disable}_path interfaces 5977 */ 5978 int 5979 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5980 { 5981 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5982 } 5983 5984 /* 5985 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5986 * argument) for a particular client (specified in the client_path argument). 5987 * Enabling a path means that MPxIO may select the enabled path for routing 5988 * future I/O requests, subject to other path state constraints. 5989 * NOTE: this will be removed once the NWS files are changed to use the new 5990 * mdi_{enable,disable}_path interfaces 5991 */ 5992 5993 int 5994 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5995 { 5996 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5997 } 5998 5999 /* 6000 * Common routine for doing enable/disable. 6001 */ 6002 static mdi_pathinfo_t * 6003 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6004 int op) 6005 { 6006 int sync_flag = 0; 6007 int rv; 6008 mdi_pathinfo_t *next; 6009 int (*f)() = NULL; 6010 6011 /* 6012 * Check to make sure the path is not already in the 6013 * requested state. If it is just return the next path 6014 * as we have nothing to do here. 6015 */ 6016 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6017 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6018 MDI_PI_LOCK(pip); 6019 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6020 MDI_PI_UNLOCK(pip); 6021 return (next); 6022 } 6023 6024 f = vh->vh_ops->vo_pi_state_change; 6025 6026 sync_flag = (flags << 8) & 0xf00; 6027 6028 /* 6029 * Do a callback into the mdi consumer to let it 6030 * know that path is about to get enabled/disabled. 6031 */ 6032 if (f != NULL) { 6033 rv = (*f)(vh->vh_dip, pip, 0, 6034 MDI_PI_EXT_STATE(pip), 6035 MDI_EXT_STATE_CHANGE | sync_flag | 6036 op | MDI_BEFORE_STATE_CHANGE); 6037 if (rv != MDI_SUCCESS) { 6038 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 6039 "!vo_pi_state_change: failed rv = %x", rv)); 6040 } 6041 } 6042 MDI_PI_LOCK(pip); 6043 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6044 6045 switch (flags) { 6046 case USER_DISABLE: 6047 if (op == MDI_DISABLE_OP) { 6048 MDI_PI_SET_USER_DISABLE(pip); 6049 } else { 6050 MDI_PI_SET_USER_ENABLE(pip); 6051 } 6052 break; 6053 case DRIVER_DISABLE: 6054 if (op == MDI_DISABLE_OP) { 6055 MDI_PI_SET_DRV_DISABLE(pip); 6056 } else { 6057 MDI_PI_SET_DRV_ENABLE(pip); 6058 } 6059 break; 6060 case DRIVER_DISABLE_TRANSIENT: 6061 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6062 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6063 } else { 6064 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6065 } 6066 break; 6067 } 6068 MDI_PI_UNLOCK(pip); 6069 /* 6070 * Do a callback into the mdi consumer to let it 6071 * know that path is now enabled/disabled. 6072 */ 6073 if (f != NULL) { 6074 rv = (*f)(vh->vh_dip, pip, 0, 6075 MDI_PI_EXT_STATE(pip), 6076 MDI_EXT_STATE_CHANGE | sync_flag | 6077 op | MDI_AFTER_STATE_CHANGE); 6078 if (rv != MDI_SUCCESS) { 6079 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 6080 "!vo_pi_state_change: failed rv = %x", rv)); 6081 } 6082 } 6083 return (next); 6084 } 6085 6086 /* 6087 * Common routine for doing enable/disable. 6088 * NOTE: this will be removed once the NWS files are changed to use the new 6089 * mdi_{enable,disable}_path has been putback 6090 */ 6091 int 6092 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6093 { 6094 6095 mdi_phci_t *ph; 6096 mdi_vhci_t *vh = NULL; 6097 mdi_client_t *ct; 6098 mdi_pathinfo_t *next, *pip; 6099 int found_it; 6100 6101 ph = i_devi_get_phci(pdip); 6102 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6103 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 6104 (void *)cdip)); 6105 if (ph == NULL) { 6106 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 6107 "Op %d failed. ph = NULL\n", op)); 6108 return (MDI_FAILURE); 6109 } 6110 6111 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6112 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6113 "Op Invalid operation = %d\n", op)); 6114 return (MDI_FAILURE); 6115 } 6116 6117 vh = ph->ph_vhci; 6118 6119 if (cdip == NULL) { 6120 /* 6121 * Need to mark the Phci as enabled/disabled. 6122 */ 6123 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6124 "Op %d for the phci\n", op)); 6125 MDI_PHCI_LOCK(ph); 6126 switch (flags) { 6127 case USER_DISABLE: 6128 if (op == MDI_DISABLE_OP) { 6129 MDI_PHCI_SET_USER_DISABLE(ph); 6130 } else { 6131 MDI_PHCI_SET_USER_ENABLE(ph); 6132 } 6133 break; 6134 case DRIVER_DISABLE: 6135 if (op == MDI_DISABLE_OP) { 6136 MDI_PHCI_SET_DRV_DISABLE(ph); 6137 } else { 6138 MDI_PHCI_SET_DRV_ENABLE(ph); 6139 } 6140 break; 6141 case DRIVER_DISABLE_TRANSIENT: 6142 if (op == MDI_DISABLE_OP) { 6143 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6144 } else { 6145 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6146 } 6147 break; 6148 default: 6149 MDI_PHCI_UNLOCK(ph); 6150 MDI_DEBUG(1, (CE_NOTE, NULL, 6151 "!i_mdi_pi_enable_disable:" 6152 " Invalid flag argument= %d\n", flags)); 6153 } 6154 6155 /* 6156 * Phci has been disabled. Now try to enable/disable 6157 * path info's to each client. 6158 */ 6159 pip = ph->ph_path_head; 6160 while (pip != NULL) { 6161 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6162 } 6163 MDI_PHCI_UNLOCK(ph); 6164 } else { 6165 6166 /* 6167 * Disable a specific client. 6168 */ 6169 ct = i_devi_get_client(cdip); 6170 if (ct == NULL) { 6171 MDI_DEBUG(1, (CE_NOTE, NULL, 6172 "!i_mdi_pi_enable_disable:" 6173 " failed. ct = NULL operation = %d\n", op)); 6174 return (MDI_FAILURE); 6175 } 6176 6177 MDI_CLIENT_LOCK(ct); 6178 pip = ct->ct_path_head; 6179 found_it = 0; 6180 while (pip != NULL) { 6181 MDI_PI_LOCK(pip); 6182 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6183 if (MDI_PI(pip)->pi_phci == ph) { 6184 MDI_PI_UNLOCK(pip); 6185 found_it = 1; 6186 break; 6187 } 6188 MDI_PI_UNLOCK(pip); 6189 pip = next; 6190 } 6191 6192 6193 MDI_CLIENT_UNLOCK(ct); 6194 if (found_it == 0) { 6195 MDI_DEBUG(1, (CE_NOTE, NULL, 6196 "!i_mdi_pi_enable_disable:" 6197 " failed. Could not find corresponding pip\n")); 6198 return (MDI_FAILURE); 6199 } 6200 6201 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6202 } 6203 6204 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6205 "Op %d Returning success pdip = %p cdip = %p\n", 6206 op, (void *)pdip, (void *)cdip)); 6207 return (MDI_SUCCESS); 6208 } 6209 6210 /* 6211 * Ensure phci powered up 6212 */ 6213 static void 6214 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6215 { 6216 dev_info_t *ph_dip; 6217 6218 ASSERT(pip != NULL); 6219 ASSERT(MDI_PI_LOCKED(pip)); 6220 6221 if (MDI_PI(pip)->pi_pm_held) { 6222 return; 6223 } 6224 6225 ph_dip = mdi_pi_get_phci(pip); 6226 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6227 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6228 if (ph_dip == NULL) { 6229 return; 6230 } 6231 6232 MDI_PI_UNLOCK(pip); 6233 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6234 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6235 6236 pm_hold_power(ph_dip); 6237 6238 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6239 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6240 MDI_PI_LOCK(pip); 6241 6242 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6243 if (DEVI(ph_dip)->devi_pm_info) 6244 MDI_PI(pip)->pi_pm_held = 1; 6245 } 6246 6247 /* 6248 * Allow phci powered down 6249 */ 6250 static void 6251 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6252 { 6253 dev_info_t *ph_dip = NULL; 6254 6255 ASSERT(pip != NULL); 6256 ASSERT(MDI_PI_LOCKED(pip)); 6257 6258 if (MDI_PI(pip)->pi_pm_held == 0) { 6259 return; 6260 } 6261 6262 ph_dip = mdi_pi_get_phci(pip); 6263 ASSERT(ph_dip != NULL); 6264 6265 MDI_PI_UNLOCK(pip); 6266 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6267 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6268 6269 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6270 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6271 pm_rele_power(ph_dip); 6272 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6273 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6274 6275 MDI_PI_LOCK(pip); 6276 MDI_PI(pip)->pi_pm_held = 0; 6277 } 6278 6279 static void 6280 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6281 { 6282 ASSERT(MDI_CLIENT_LOCKED(ct)); 6283 6284 ct->ct_power_cnt += incr; 6285 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6286 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6287 ct->ct_power_cnt, incr)); 6288 ASSERT(ct->ct_power_cnt >= 0); 6289 } 6290 6291 static void 6292 i_mdi_rele_all_phci(mdi_client_t *ct) 6293 { 6294 mdi_pathinfo_t *pip; 6295 6296 ASSERT(MDI_CLIENT_LOCKED(ct)); 6297 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6298 while (pip != NULL) { 6299 mdi_hold_path(pip); 6300 MDI_PI_LOCK(pip); 6301 i_mdi_pm_rele_pip(pip); 6302 MDI_PI_UNLOCK(pip); 6303 mdi_rele_path(pip); 6304 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6305 } 6306 } 6307 6308 static void 6309 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6310 { 6311 ASSERT(MDI_CLIENT_LOCKED(ct)); 6312 6313 if (i_ddi_devi_attached(ct->ct_dip)) { 6314 ct->ct_power_cnt -= decr; 6315 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6316 "ct_power_cnt = %d decr = %d\n", 6317 (void *)ct, ct->ct_power_cnt, decr)); 6318 } 6319 6320 ASSERT(ct->ct_power_cnt >= 0); 6321 if (ct->ct_power_cnt == 0) { 6322 i_mdi_rele_all_phci(ct); 6323 return; 6324 } 6325 } 6326 6327 static void 6328 i_mdi_pm_reset_client(mdi_client_t *ct) 6329 { 6330 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6331 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6332 ASSERT(MDI_CLIENT_LOCKED(ct)); 6333 ct->ct_power_cnt = 0; 6334 i_mdi_rele_all_phci(ct); 6335 ct->ct_powercnt_config = 0; 6336 ct->ct_powercnt_unconfig = 0; 6337 ct->ct_powercnt_reset = 1; 6338 } 6339 6340 static int 6341 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6342 { 6343 int ret; 6344 dev_info_t *ph_dip; 6345 6346 MDI_PI_LOCK(pip); 6347 i_mdi_pm_hold_pip(pip); 6348 6349 ph_dip = mdi_pi_get_phci(pip); 6350 MDI_PI_UNLOCK(pip); 6351 6352 /* bring all components of phci to full power */ 6353 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6354 "pm_powerup for %s%d %p\n", ddi_driver_name(ph_dip), 6355 ddi_get_instance(ph_dip), (void *)pip)); 6356 6357 ret = pm_powerup(ph_dip); 6358 6359 if (ret == DDI_FAILURE) { 6360 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6361 "pm_powerup FAILED for %s%d %p\n", 6362 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6363 (void *)pip)); 6364 6365 MDI_PI_LOCK(pip); 6366 i_mdi_pm_rele_pip(pip); 6367 MDI_PI_UNLOCK(pip); 6368 return (MDI_FAILURE); 6369 } 6370 6371 return (MDI_SUCCESS); 6372 } 6373 6374 static int 6375 i_mdi_power_all_phci(mdi_client_t *ct) 6376 { 6377 mdi_pathinfo_t *pip; 6378 int succeeded = 0; 6379 6380 ASSERT(MDI_CLIENT_LOCKED(ct)); 6381 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6382 while (pip != NULL) { 6383 /* 6384 * Don't power if MDI_PATHINFO_STATE_FAULT 6385 * or MDI_PATHINFO_STATE_OFFLINE. 6386 */ 6387 if (MDI_PI_IS_INIT(pip) || 6388 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6389 mdi_hold_path(pip); 6390 MDI_CLIENT_UNLOCK(ct); 6391 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6392 succeeded = 1; 6393 6394 ASSERT(ct == MDI_PI(pip)->pi_client); 6395 MDI_CLIENT_LOCK(ct); 6396 mdi_rele_path(pip); 6397 } 6398 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6399 } 6400 6401 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6402 } 6403 6404 /* 6405 * mdi_bus_power(): 6406 * 1. Place the phci(s) into powered up state so that 6407 * client can do power management 6408 * 2. Ensure phci powered up as client power managing 6409 * Return Values: 6410 * MDI_SUCCESS 6411 * MDI_FAILURE 6412 */ 6413 int 6414 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6415 void *arg, void *result) 6416 { 6417 int ret = MDI_SUCCESS; 6418 pm_bp_child_pwrchg_t *bpc; 6419 mdi_client_t *ct; 6420 dev_info_t *cdip; 6421 pm_bp_has_changed_t *bphc; 6422 6423 /* 6424 * BUS_POWER_NOINVOL not supported 6425 */ 6426 if (op == BUS_POWER_NOINVOL) 6427 return (MDI_FAILURE); 6428 6429 /* 6430 * ignore other OPs. 6431 * return quickly to save cou cycles on the ct processing 6432 */ 6433 switch (op) { 6434 case BUS_POWER_PRE_NOTIFICATION: 6435 case BUS_POWER_POST_NOTIFICATION: 6436 bpc = (pm_bp_child_pwrchg_t *)arg; 6437 cdip = bpc->bpc_dip; 6438 break; 6439 case BUS_POWER_HAS_CHANGED: 6440 bphc = (pm_bp_has_changed_t *)arg; 6441 cdip = bphc->bphc_dip; 6442 break; 6443 default: 6444 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6445 } 6446 6447 ASSERT(MDI_CLIENT(cdip)); 6448 6449 ct = i_devi_get_client(cdip); 6450 if (ct == NULL) 6451 return (MDI_FAILURE); 6452 6453 /* 6454 * wait till the mdi_pathinfo node state change are processed 6455 */ 6456 MDI_CLIENT_LOCK(ct); 6457 switch (op) { 6458 case BUS_POWER_PRE_NOTIFICATION: 6459 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6460 "BUS_POWER_PRE_NOTIFICATION:" 6461 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6462 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6463 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6464 6465 /* serialize power level change per client */ 6466 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6467 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6468 6469 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6470 6471 if (ct->ct_power_cnt == 0) { 6472 ret = i_mdi_power_all_phci(ct); 6473 } 6474 6475 /* 6476 * if new_level > 0: 6477 * - hold phci(s) 6478 * - power up phci(s) if not already 6479 * ignore power down 6480 */ 6481 if (bpc->bpc_nlevel > 0) { 6482 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6483 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6484 "mdi_bus_power i_mdi_pm_hold_client\n")); 6485 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6486 } 6487 } 6488 break; 6489 case BUS_POWER_POST_NOTIFICATION: 6490 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6491 "BUS_POWER_POST_NOTIFICATION:" 6492 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6493 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6494 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6495 *(int *)result)); 6496 6497 if (*(int *)result == DDI_SUCCESS) { 6498 if (bpc->bpc_nlevel > 0) { 6499 MDI_CLIENT_SET_POWER_UP(ct); 6500 } else { 6501 MDI_CLIENT_SET_POWER_DOWN(ct); 6502 } 6503 } 6504 6505 /* release the hold we did in pre-notification */ 6506 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6507 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6508 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6509 "mdi_bus_power i_mdi_pm_rele_client\n")); 6510 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6511 } 6512 6513 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6514 /* another thread might started attaching */ 6515 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6516 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6517 "mdi_bus_power i_mdi_pm_rele_client\n")); 6518 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6519 /* detaching has been taken care in pm_post_unconfig */ 6520 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6521 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6522 "mdi_bus_power i_mdi_pm_reset_client\n")); 6523 i_mdi_pm_reset_client(ct); 6524 } 6525 } 6526 6527 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6528 cv_broadcast(&ct->ct_powerchange_cv); 6529 6530 break; 6531 6532 /* need to do more */ 6533 case BUS_POWER_HAS_CHANGED: 6534 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6535 "BUS_POWER_HAS_CHANGED:" 6536 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6537 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6538 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6539 6540 if (bphc->bphc_nlevel > 0 && 6541 bphc->bphc_nlevel > bphc->bphc_olevel) { 6542 if (ct->ct_power_cnt == 0) { 6543 ret = i_mdi_power_all_phci(ct); 6544 } 6545 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6546 "mdi_bus_power i_mdi_pm_hold_client\n")); 6547 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6548 } 6549 6550 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6551 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6552 "mdi_bus_power i_mdi_pm_rele_client\n")); 6553 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6554 } 6555 break; 6556 } 6557 6558 MDI_CLIENT_UNLOCK(ct); 6559 return (ret); 6560 } 6561 6562 static int 6563 i_mdi_pm_pre_config_one(dev_info_t *child) 6564 { 6565 int ret = MDI_SUCCESS; 6566 mdi_client_t *ct; 6567 6568 ct = i_devi_get_client(child); 6569 if (ct == NULL) 6570 return (MDI_FAILURE); 6571 6572 MDI_CLIENT_LOCK(ct); 6573 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6574 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6575 6576 if (!MDI_CLIENT_IS_FAILED(ct)) { 6577 MDI_CLIENT_UNLOCK(ct); 6578 MDI_DEBUG(4, (CE_NOTE, child, 6579 "i_mdi_pm_pre_config_one already configured\n")); 6580 return (MDI_SUCCESS); 6581 } 6582 6583 if (ct->ct_powercnt_config) { 6584 MDI_CLIENT_UNLOCK(ct); 6585 MDI_DEBUG(4, (CE_NOTE, child, 6586 "i_mdi_pm_pre_config_one ALREADY held\n")); 6587 return (MDI_SUCCESS); 6588 } 6589 6590 if (ct->ct_power_cnt == 0) { 6591 ret = i_mdi_power_all_phci(ct); 6592 } 6593 MDI_DEBUG(4, (CE_NOTE, child, 6594 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6595 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6596 ct->ct_powercnt_config = 1; 6597 ct->ct_powercnt_reset = 0; 6598 MDI_CLIENT_UNLOCK(ct); 6599 return (ret); 6600 } 6601 6602 static int 6603 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6604 { 6605 int ret = MDI_SUCCESS; 6606 dev_info_t *cdip; 6607 int circ; 6608 6609 ASSERT(MDI_VHCI(vdip)); 6610 6611 /* ndi_devi_config_one */ 6612 if (child) { 6613 ASSERT(DEVI_BUSY_OWNED(vdip)); 6614 return (i_mdi_pm_pre_config_one(child)); 6615 } 6616 6617 /* devi_config_common */ 6618 ndi_devi_enter(vdip, &circ); 6619 cdip = ddi_get_child(vdip); 6620 while (cdip) { 6621 dev_info_t *next = ddi_get_next_sibling(cdip); 6622 6623 ret = i_mdi_pm_pre_config_one(cdip); 6624 if (ret != MDI_SUCCESS) 6625 break; 6626 cdip = next; 6627 } 6628 ndi_devi_exit(vdip, circ); 6629 return (ret); 6630 } 6631 6632 static int 6633 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6634 { 6635 int ret = MDI_SUCCESS; 6636 mdi_client_t *ct; 6637 6638 ct = i_devi_get_client(child); 6639 if (ct == NULL) 6640 return (MDI_FAILURE); 6641 6642 MDI_CLIENT_LOCK(ct); 6643 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6644 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6645 6646 if (!i_ddi_devi_attached(ct->ct_dip)) { 6647 MDI_DEBUG(4, (CE_NOTE, child, 6648 "i_mdi_pm_pre_unconfig node detached already\n")); 6649 MDI_CLIENT_UNLOCK(ct); 6650 return (MDI_SUCCESS); 6651 } 6652 6653 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6654 (flags & NDI_AUTODETACH)) { 6655 MDI_DEBUG(4, (CE_NOTE, child, 6656 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6657 MDI_CLIENT_UNLOCK(ct); 6658 return (MDI_FAILURE); 6659 } 6660 6661 if (ct->ct_powercnt_unconfig) { 6662 MDI_DEBUG(4, (CE_NOTE, child, 6663 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6664 MDI_CLIENT_UNLOCK(ct); 6665 *held = 1; 6666 return (MDI_SUCCESS); 6667 } 6668 6669 if (ct->ct_power_cnt == 0) { 6670 ret = i_mdi_power_all_phci(ct); 6671 } 6672 MDI_DEBUG(4, (CE_NOTE, child, 6673 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6674 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6675 ct->ct_powercnt_unconfig = 1; 6676 ct->ct_powercnt_reset = 0; 6677 MDI_CLIENT_UNLOCK(ct); 6678 if (ret == MDI_SUCCESS) 6679 *held = 1; 6680 return (ret); 6681 } 6682 6683 static int 6684 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6685 int flags) 6686 { 6687 int ret = MDI_SUCCESS; 6688 dev_info_t *cdip; 6689 int circ; 6690 6691 ASSERT(MDI_VHCI(vdip)); 6692 *held = 0; 6693 6694 /* ndi_devi_unconfig_one */ 6695 if (child) { 6696 ASSERT(DEVI_BUSY_OWNED(vdip)); 6697 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6698 } 6699 6700 /* devi_unconfig_common */ 6701 ndi_devi_enter(vdip, &circ); 6702 cdip = ddi_get_child(vdip); 6703 while (cdip) { 6704 dev_info_t *next = ddi_get_next_sibling(cdip); 6705 6706 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6707 cdip = next; 6708 } 6709 ndi_devi_exit(vdip, circ); 6710 6711 if (*held) 6712 ret = MDI_SUCCESS; 6713 6714 return (ret); 6715 } 6716 6717 static void 6718 i_mdi_pm_post_config_one(dev_info_t *child) 6719 { 6720 mdi_client_t *ct; 6721 6722 ct = i_devi_get_client(child); 6723 if (ct == NULL) 6724 return; 6725 6726 MDI_CLIENT_LOCK(ct); 6727 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6728 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6729 6730 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6731 MDI_DEBUG(4, (CE_NOTE, child, 6732 "i_mdi_pm_post_config_one NOT configured\n")); 6733 MDI_CLIENT_UNLOCK(ct); 6734 return; 6735 } 6736 6737 /* client has not been updated */ 6738 if (MDI_CLIENT_IS_FAILED(ct)) { 6739 MDI_DEBUG(4, (CE_NOTE, child, 6740 "i_mdi_pm_post_config_one NOT configured\n")); 6741 MDI_CLIENT_UNLOCK(ct); 6742 return; 6743 } 6744 6745 /* another thread might have powered it down or detached it */ 6746 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6747 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6748 (!i_ddi_devi_attached(ct->ct_dip) && 6749 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6750 MDI_DEBUG(4, (CE_NOTE, child, 6751 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6752 i_mdi_pm_reset_client(ct); 6753 } else { 6754 mdi_pathinfo_t *pip, *next; 6755 int valid_path_count = 0; 6756 6757 MDI_DEBUG(4, (CE_NOTE, child, 6758 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6759 pip = ct->ct_path_head; 6760 while (pip != NULL) { 6761 MDI_PI_LOCK(pip); 6762 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6763 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6764 valid_path_count ++; 6765 MDI_PI_UNLOCK(pip); 6766 pip = next; 6767 } 6768 i_mdi_pm_rele_client(ct, valid_path_count); 6769 } 6770 ct->ct_powercnt_config = 0; 6771 MDI_CLIENT_UNLOCK(ct); 6772 } 6773 6774 static void 6775 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6776 { 6777 int circ; 6778 dev_info_t *cdip; 6779 6780 ASSERT(MDI_VHCI(vdip)); 6781 6782 /* ndi_devi_config_one */ 6783 if (child) { 6784 ASSERT(DEVI_BUSY_OWNED(vdip)); 6785 i_mdi_pm_post_config_one(child); 6786 return; 6787 } 6788 6789 /* devi_config_common */ 6790 ndi_devi_enter(vdip, &circ); 6791 cdip = ddi_get_child(vdip); 6792 while (cdip) { 6793 dev_info_t *next = ddi_get_next_sibling(cdip); 6794 6795 i_mdi_pm_post_config_one(cdip); 6796 cdip = next; 6797 } 6798 ndi_devi_exit(vdip, circ); 6799 } 6800 6801 static void 6802 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6803 { 6804 mdi_client_t *ct; 6805 6806 ct = i_devi_get_client(child); 6807 if (ct == NULL) 6808 return; 6809 6810 MDI_CLIENT_LOCK(ct); 6811 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6812 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6813 6814 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6815 MDI_DEBUG(4, (CE_NOTE, child, 6816 "i_mdi_pm_post_unconfig NOT held\n")); 6817 MDI_CLIENT_UNLOCK(ct); 6818 return; 6819 } 6820 6821 /* failure detaching or another thread just attached it */ 6822 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6823 i_ddi_devi_attached(ct->ct_dip)) || 6824 (!i_ddi_devi_attached(ct->ct_dip) && 6825 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6826 MDI_DEBUG(4, (CE_NOTE, child, 6827 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6828 i_mdi_pm_reset_client(ct); 6829 } else { 6830 mdi_pathinfo_t *pip, *next; 6831 int valid_path_count = 0; 6832 6833 MDI_DEBUG(4, (CE_NOTE, child, 6834 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6835 pip = ct->ct_path_head; 6836 while (pip != NULL) { 6837 MDI_PI_LOCK(pip); 6838 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6839 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6840 valid_path_count ++; 6841 MDI_PI_UNLOCK(pip); 6842 pip = next; 6843 } 6844 i_mdi_pm_rele_client(ct, valid_path_count); 6845 ct->ct_powercnt_unconfig = 0; 6846 } 6847 6848 MDI_CLIENT_UNLOCK(ct); 6849 } 6850 6851 static void 6852 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6853 { 6854 int circ; 6855 dev_info_t *cdip; 6856 6857 ASSERT(MDI_VHCI(vdip)); 6858 6859 if (!held) { 6860 MDI_DEBUG(4, (CE_NOTE, vdip, 6861 "i_mdi_pm_post_unconfig held = %d\n", held)); 6862 return; 6863 } 6864 6865 if (child) { 6866 ASSERT(DEVI_BUSY_OWNED(vdip)); 6867 i_mdi_pm_post_unconfig_one(child); 6868 return; 6869 } 6870 6871 ndi_devi_enter(vdip, &circ); 6872 cdip = ddi_get_child(vdip); 6873 while (cdip) { 6874 dev_info_t *next = ddi_get_next_sibling(cdip); 6875 6876 i_mdi_pm_post_unconfig_one(cdip); 6877 cdip = next; 6878 } 6879 ndi_devi_exit(vdip, circ); 6880 } 6881 6882 int 6883 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6884 { 6885 int circ, ret = MDI_SUCCESS; 6886 dev_info_t *client_dip = NULL; 6887 mdi_client_t *ct; 6888 6889 /* 6890 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6891 * Power up pHCI for the named client device. 6892 * Note: Before the client is enumerated under vhci by phci, 6893 * client_dip can be NULL. Then proceed to power up all the 6894 * pHCIs. 6895 */ 6896 if (devnm != NULL) { 6897 ndi_devi_enter(vdip, &circ); 6898 client_dip = ndi_devi_findchild(vdip, devnm); 6899 } 6900 6901 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6902 op, devnm ? devnm : "NULL", (void *)client_dip)); 6903 6904 switch (op) { 6905 case MDI_PM_PRE_CONFIG: 6906 ret = i_mdi_pm_pre_config(vdip, client_dip); 6907 break; 6908 6909 case MDI_PM_PRE_UNCONFIG: 6910 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6911 flags); 6912 break; 6913 6914 case MDI_PM_POST_CONFIG: 6915 i_mdi_pm_post_config(vdip, client_dip); 6916 break; 6917 6918 case MDI_PM_POST_UNCONFIG: 6919 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6920 break; 6921 6922 case MDI_PM_HOLD_POWER: 6923 case MDI_PM_RELE_POWER: 6924 ASSERT(args); 6925 6926 client_dip = (dev_info_t *)args; 6927 ASSERT(MDI_CLIENT(client_dip)); 6928 6929 ct = i_devi_get_client(client_dip); 6930 MDI_CLIENT_LOCK(ct); 6931 6932 if (op == MDI_PM_HOLD_POWER) { 6933 if (ct->ct_power_cnt == 0) { 6934 (void) i_mdi_power_all_phci(ct); 6935 MDI_DEBUG(4, (CE_NOTE, client_dip, 6936 "mdi_power i_mdi_pm_hold_client\n")); 6937 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6938 } 6939 } else { 6940 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6941 MDI_DEBUG(4, (CE_NOTE, client_dip, 6942 "mdi_power i_mdi_pm_rele_client\n")); 6943 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6944 } else { 6945 MDI_DEBUG(4, (CE_NOTE, client_dip, 6946 "mdi_power i_mdi_pm_reset_client\n")); 6947 i_mdi_pm_reset_client(ct); 6948 } 6949 } 6950 6951 MDI_CLIENT_UNLOCK(ct); 6952 break; 6953 6954 default: 6955 break; 6956 } 6957 6958 if (devnm) 6959 ndi_devi_exit(vdip, circ); 6960 6961 return (ret); 6962 } 6963 6964 int 6965 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6966 { 6967 mdi_vhci_t *vhci; 6968 6969 if (!MDI_VHCI(dip)) 6970 return (MDI_FAILURE); 6971 6972 if (mdi_class) { 6973 vhci = DEVI(dip)->devi_mdi_xhci; 6974 ASSERT(vhci); 6975 *mdi_class = vhci->vh_class; 6976 } 6977 6978 return (MDI_SUCCESS); 6979 } 6980 6981 int 6982 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6983 { 6984 mdi_phci_t *phci; 6985 6986 if (!MDI_PHCI(dip)) 6987 return (MDI_FAILURE); 6988 6989 if (mdi_class) { 6990 phci = DEVI(dip)->devi_mdi_xhci; 6991 ASSERT(phci); 6992 *mdi_class = phci->ph_vhci->vh_class; 6993 } 6994 6995 return (MDI_SUCCESS); 6996 } 6997 6998 int 6999 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7000 { 7001 mdi_client_t *client; 7002 7003 if (!MDI_CLIENT(dip)) 7004 return (MDI_FAILURE); 7005 7006 if (mdi_class) { 7007 client = DEVI(dip)->devi_mdi_client; 7008 ASSERT(client); 7009 *mdi_class = client->ct_vhci->vh_class; 7010 } 7011 7012 return (MDI_SUCCESS); 7013 } 7014 7015 void * 7016 mdi_client_get_vhci_private(dev_info_t *dip) 7017 { 7018 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7019 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7020 mdi_client_t *ct; 7021 ct = i_devi_get_client(dip); 7022 return (ct->ct_vprivate); 7023 } 7024 return (NULL); 7025 } 7026 7027 void 7028 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7029 { 7030 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7031 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7032 mdi_client_t *ct; 7033 ct = i_devi_get_client(dip); 7034 ct->ct_vprivate = data; 7035 } 7036 } 7037 /* 7038 * mdi_pi_get_vhci_private(): 7039 * Get the vhci private information associated with the 7040 * mdi_pathinfo node 7041 */ 7042 void * 7043 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7044 { 7045 caddr_t vprivate = NULL; 7046 if (pip) { 7047 vprivate = MDI_PI(pip)->pi_vprivate; 7048 } 7049 return (vprivate); 7050 } 7051 7052 /* 7053 * mdi_pi_set_vhci_private(): 7054 * Set the vhci private information in the mdi_pathinfo node 7055 */ 7056 void 7057 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7058 { 7059 if (pip) { 7060 MDI_PI(pip)->pi_vprivate = priv; 7061 } 7062 } 7063 7064 /* 7065 * mdi_phci_get_vhci_private(): 7066 * Get the vhci private information associated with the 7067 * mdi_phci node 7068 */ 7069 void * 7070 mdi_phci_get_vhci_private(dev_info_t *dip) 7071 { 7072 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7073 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7074 mdi_phci_t *ph; 7075 ph = i_devi_get_phci(dip); 7076 return (ph->ph_vprivate); 7077 } 7078 return (NULL); 7079 } 7080 7081 /* 7082 * mdi_phci_set_vhci_private(): 7083 * Set the vhci private information in the mdi_phci node 7084 */ 7085 void 7086 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7087 { 7088 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7089 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7090 mdi_phci_t *ph; 7091 ph = i_devi_get_phci(dip); 7092 ph->ph_vprivate = priv; 7093 } 7094 } 7095 7096 /* 7097 * List of vhci class names: 7098 * A vhci class name must be in this list only if the corresponding vhci 7099 * driver intends to use the mdi provided bus config implementation 7100 * (i.e., mdi_vhci_bus_config()). 7101 */ 7102 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7103 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7104 7105 /* 7106 * During boot time, the on-disk vhci cache for every vhci class is read 7107 * in the form of an nvlist and stored here. 7108 */ 7109 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7110 7111 /* nvpair names in vhci cache nvlist */ 7112 #define MDI_VHCI_CACHE_VERSION 1 7113 #define MDI_NVPNAME_VERSION "version" 7114 #define MDI_NVPNAME_PHCIS "phcis" 7115 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7116 7117 /* 7118 * Given vhci class name, return its on-disk vhci cache filename. 7119 * Memory for the returned filename which includes the full path is allocated 7120 * by this function. 7121 */ 7122 static char * 7123 vhclass2vhcache_filename(char *vhclass) 7124 { 7125 char *filename; 7126 int len; 7127 static char *fmt = "/etc/devices/mdi_%s_cache"; 7128 7129 /* 7130 * fmt contains the on-disk vhci cache file name format; 7131 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7132 */ 7133 7134 /* the -1 below is to account for "%s" in the format string */ 7135 len = strlen(fmt) + strlen(vhclass) - 1; 7136 filename = kmem_alloc(len, KM_SLEEP); 7137 (void) snprintf(filename, len, fmt, vhclass); 7138 ASSERT(len == (strlen(filename) + 1)); 7139 return (filename); 7140 } 7141 7142 /* 7143 * initialize the vhci cache related data structures and read the on-disk 7144 * vhci cached data into memory. 7145 */ 7146 static void 7147 setup_vhci_cache(mdi_vhci_t *vh) 7148 { 7149 mdi_vhci_config_t *vhc; 7150 mdi_vhci_cache_t *vhcache; 7151 int i; 7152 nvlist_t *nvl = NULL; 7153 7154 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7155 vh->vh_config = vhc; 7156 vhcache = &vhc->vhc_vhcache; 7157 7158 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7159 7160 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7161 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7162 7163 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7164 7165 /* 7166 * Create string hash; same as mod_hash_create_strhash() except that 7167 * we use NULL key destructor. 7168 */ 7169 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7170 mdi_bus_config_cache_hash_size, 7171 mod_hash_null_keydtor, mod_hash_null_valdtor, 7172 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7173 7174 /* 7175 * The on-disk vhci cache is read during booting prior to the 7176 * lights-out period by mdi_read_devices_files(). 7177 */ 7178 for (i = 0; i < N_VHCI_CLASSES; i++) { 7179 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7180 nvl = vhcache_nvl[i]; 7181 vhcache_nvl[i] = NULL; 7182 break; 7183 } 7184 } 7185 7186 /* 7187 * this is to cover the case of some one manually causing unloading 7188 * (or detaching) and reloading (or attaching) of a vhci driver. 7189 */ 7190 if (nvl == NULL && modrootloaded) 7191 nvl = read_on_disk_vhci_cache(vh->vh_class); 7192 7193 if (nvl != NULL) { 7194 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7195 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7196 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7197 else { 7198 cmn_err(CE_WARN, 7199 "%s: data file corrupted, will recreate\n", 7200 vhc->vhc_vhcache_filename); 7201 } 7202 rw_exit(&vhcache->vhcache_lock); 7203 nvlist_free(nvl); 7204 } 7205 7206 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7207 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7208 7209 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7210 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7211 } 7212 7213 /* 7214 * free all vhci cache related resources 7215 */ 7216 static int 7217 destroy_vhci_cache(mdi_vhci_t *vh) 7218 { 7219 mdi_vhci_config_t *vhc = vh->vh_config; 7220 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7221 mdi_vhcache_phci_t *cphci, *cphci_next; 7222 mdi_vhcache_client_t *cct, *cct_next; 7223 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7224 7225 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7226 return (MDI_FAILURE); 7227 7228 kmem_free(vhc->vhc_vhcache_filename, 7229 strlen(vhc->vhc_vhcache_filename) + 1); 7230 7231 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7232 7233 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7234 cphci = cphci_next) { 7235 cphci_next = cphci->cphci_next; 7236 free_vhcache_phci(cphci); 7237 } 7238 7239 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7240 cct_next = cct->cct_next; 7241 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7242 cpi_next = cpi->cpi_next; 7243 free_vhcache_pathinfo(cpi); 7244 } 7245 free_vhcache_client(cct); 7246 } 7247 7248 rw_destroy(&vhcache->vhcache_lock); 7249 7250 mutex_destroy(&vhc->vhc_lock); 7251 cv_destroy(&vhc->vhc_cv); 7252 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7253 return (MDI_SUCCESS); 7254 } 7255 7256 /* 7257 * Stop all vhci cache related async threads and free their resources. 7258 */ 7259 static int 7260 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7261 { 7262 mdi_async_client_config_t *acc, *acc_next; 7263 7264 mutex_enter(&vhc->vhc_lock); 7265 vhc->vhc_flags |= MDI_VHC_EXIT; 7266 ASSERT(vhc->vhc_acc_thrcount >= 0); 7267 cv_broadcast(&vhc->vhc_cv); 7268 7269 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7270 vhc->vhc_acc_thrcount != 0) { 7271 mutex_exit(&vhc->vhc_lock); 7272 delay(1); 7273 mutex_enter(&vhc->vhc_lock); 7274 } 7275 7276 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7277 7278 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7279 acc_next = acc->acc_next; 7280 free_async_client_config(acc); 7281 } 7282 vhc->vhc_acc_list_head = NULL; 7283 vhc->vhc_acc_list_tail = NULL; 7284 vhc->vhc_acc_count = 0; 7285 7286 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7287 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7288 mutex_exit(&vhc->vhc_lock); 7289 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7290 vhcache_dirty(vhc); 7291 return (MDI_FAILURE); 7292 } 7293 } else 7294 mutex_exit(&vhc->vhc_lock); 7295 7296 if (callb_delete(vhc->vhc_cbid) != 0) 7297 return (MDI_FAILURE); 7298 7299 return (MDI_SUCCESS); 7300 } 7301 7302 /* 7303 * Stop vhci cache flush thread 7304 */ 7305 /* ARGSUSED */ 7306 static boolean_t 7307 stop_vhcache_flush_thread(void *arg, int code) 7308 { 7309 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7310 7311 mutex_enter(&vhc->vhc_lock); 7312 vhc->vhc_flags |= MDI_VHC_EXIT; 7313 cv_broadcast(&vhc->vhc_cv); 7314 7315 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7316 mutex_exit(&vhc->vhc_lock); 7317 delay(1); 7318 mutex_enter(&vhc->vhc_lock); 7319 } 7320 7321 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7322 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7323 mutex_exit(&vhc->vhc_lock); 7324 (void) flush_vhcache(vhc, 1); 7325 } else 7326 mutex_exit(&vhc->vhc_lock); 7327 7328 return (B_TRUE); 7329 } 7330 7331 /* 7332 * Enqueue the vhcache phci (cphci) at the tail of the list 7333 */ 7334 static void 7335 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7336 { 7337 cphci->cphci_next = NULL; 7338 if (vhcache->vhcache_phci_head == NULL) 7339 vhcache->vhcache_phci_head = cphci; 7340 else 7341 vhcache->vhcache_phci_tail->cphci_next = cphci; 7342 vhcache->vhcache_phci_tail = cphci; 7343 } 7344 7345 /* 7346 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7347 */ 7348 static void 7349 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7350 mdi_vhcache_pathinfo_t *cpi) 7351 { 7352 cpi->cpi_next = NULL; 7353 if (cct->cct_cpi_head == NULL) 7354 cct->cct_cpi_head = cpi; 7355 else 7356 cct->cct_cpi_tail->cpi_next = cpi; 7357 cct->cct_cpi_tail = cpi; 7358 } 7359 7360 /* 7361 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7362 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7363 * flag set come at the beginning of the list. All cpis which have this 7364 * flag set come at the end of the list. 7365 */ 7366 static void 7367 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7368 mdi_vhcache_pathinfo_t *newcpi) 7369 { 7370 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7371 7372 if (cct->cct_cpi_head == NULL || 7373 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7374 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7375 else { 7376 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7377 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7378 prev_cpi = cpi, cpi = cpi->cpi_next) 7379 ; 7380 7381 if (prev_cpi == NULL) 7382 cct->cct_cpi_head = newcpi; 7383 else 7384 prev_cpi->cpi_next = newcpi; 7385 7386 newcpi->cpi_next = cpi; 7387 7388 if (cpi == NULL) 7389 cct->cct_cpi_tail = newcpi; 7390 } 7391 } 7392 7393 /* 7394 * Enqueue the vhcache client (cct) at the tail of the list 7395 */ 7396 static void 7397 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7398 mdi_vhcache_client_t *cct) 7399 { 7400 cct->cct_next = NULL; 7401 if (vhcache->vhcache_client_head == NULL) 7402 vhcache->vhcache_client_head = cct; 7403 else 7404 vhcache->vhcache_client_tail->cct_next = cct; 7405 vhcache->vhcache_client_tail = cct; 7406 } 7407 7408 static void 7409 free_string_array(char **str, int nelem) 7410 { 7411 int i; 7412 7413 if (str) { 7414 for (i = 0; i < nelem; i++) { 7415 if (str[i]) 7416 kmem_free(str[i], strlen(str[i]) + 1); 7417 } 7418 kmem_free(str, sizeof (char *) * nelem); 7419 } 7420 } 7421 7422 static void 7423 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7424 { 7425 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7426 kmem_free(cphci, sizeof (*cphci)); 7427 } 7428 7429 static void 7430 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7431 { 7432 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7433 kmem_free(cpi, sizeof (*cpi)); 7434 } 7435 7436 static void 7437 free_vhcache_client(mdi_vhcache_client_t *cct) 7438 { 7439 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7440 kmem_free(cct, sizeof (*cct)); 7441 } 7442 7443 static char * 7444 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7445 { 7446 char *name_addr; 7447 int len; 7448 7449 len = strlen(ct_name) + strlen(ct_addr) + 2; 7450 name_addr = kmem_alloc(len, KM_SLEEP); 7451 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7452 7453 if (ret_len) 7454 *ret_len = len; 7455 return (name_addr); 7456 } 7457 7458 /* 7459 * Copy the contents of paddrnvl to vhci cache. 7460 * paddrnvl nvlist contains path information for a vhci client. 7461 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7462 */ 7463 static void 7464 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7465 mdi_vhcache_client_t *cct) 7466 { 7467 nvpair_t *nvp = NULL; 7468 mdi_vhcache_pathinfo_t *cpi; 7469 uint_t nelem; 7470 uint32_t *val; 7471 7472 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7473 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7474 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7475 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7476 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7477 ASSERT(nelem == 2); 7478 cpi->cpi_cphci = cphci_list[val[0]]; 7479 cpi->cpi_flags = val[1]; 7480 enqueue_tail_vhcache_pathinfo(cct, cpi); 7481 } 7482 } 7483 7484 /* 7485 * Copy the contents of caddrmapnvl to vhci cache. 7486 * caddrmapnvl nvlist contains vhci client address to phci client address 7487 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7488 * this nvlist. 7489 */ 7490 static void 7491 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7492 mdi_vhcache_phci_t *cphci_list[]) 7493 { 7494 nvpair_t *nvp = NULL; 7495 nvlist_t *paddrnvl; 7496 mdi_vhcache_client_t *cct; 7497 7498 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7499 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7500 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7501 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7502 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7503 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7504 /* the client must contain at least one path */ 7505 ASSERT(cct->cct_cpi_head != NULL); 7506 7507 enqueue_vhcache_client(vhcache, cct); 7508 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7509 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7510 } 7511 } 7512 7513 /* 7514 * Copy the contents of the main nvlist to vhci cache. 7515 * 7516 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7517 * The nvlist contains the mappings between the vhci client addresses and 7518 * their corresponding phci client addresses. 7519 * 7520 * The structure of the nvlist is as follows: 7521 * 7522 * Main nvlist: 7523 * NAME TYPE DATA 7524 * version int32 version number 7525 * phcis string array array of phci paths 7526 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7527 * 7528 * structure of c2paddrs_nvl: 7529 * NAME TYPE DATA 7530 * caddr1 nvlist_t paddrs_nvl1 7531 * caddr2 nvlist_t paddrs_nvl2 7532 * ... 7533 * where caddr1, caddr2, ... are vhci client name and addresses in the 7534 * form of "<clientname>@<clientaddress>". 7535 * (for example: "ssd@2000002037cd9f72"); 7536 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7537 * 7538 * structure of paddrs_nvl: 7539 * NAME TYPE DATA 7540 * pi_addr1 uint32_array (phci-id, cpi_flags) 7541 * pi_addr2 uint32_array (phci-id, cpi_flags) 7542 * ... 7543 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7544 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7545 * phci-ids are integers that identify PHCIs to which the 7546 * the bus specific address belongs to. These integers are used as an index 7547 * into to the phcis string array in the main nvlist to get the PHCI path. 7548 */ 7549 static int 7550 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7551 { 7552 char **phcis, **phci_namep; 7553 uint_t nphcis; 7554 mdi_vhcache_phci_t *cphci, **cphci_list; 7555 nvlist_t *caddrmapnvl; 7556 int32_t ver; 7557 int i; 7558 size_t cphci_list_size; 7559 7560 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7561 7562 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7563 ver != MDI_VHCI_CACHE_VERSION) 7564 return (MDI_FAILURE); 7565 7566 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7567 &nphcis) != 0) 7568 return (MDI_SUCCESS); 7569 7570 ASSERT(nphcis > 0); 7571 7572 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7573 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7574 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7575 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7576 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7577 enqueue_vhcache_phci(vhcache, cphci); 7578 cphci_list[i] = cphci; 7579 } 7580 7581 ASSERT(vhcache->vhcache_phci_head != NULL); 7582 7583 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7584 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7585 7586 kmem_free(cphci_list, cphci_list_size); 7587 return (MDI_SUCCESS); 7588 } 7589 7590 /* 7591 * Build paddrnvl for the specified client using the information in the 7592 * vhci cache and add it to the caddrmapnnvl. 7593 * Returns 0 on success, errno on failure. 7594 */ 7595 static int 7596 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7597 nvlist_t *caddrmapnvl) 7598 { 7599 mdi_vhcache_pathinfo_t *cpi; 7600 nvlist_t *nvl; 7601 int err; 7602 uint32_t val[2]; 7603 7604 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7605 7606 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7607 return (err); 7608 7609 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7610 val[0] = cpi->cpi_cphci->cphci_id; 7611 val[1] = cpi->cpi_flags; 7612 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7613 != 0) 7614 goto out; 7615 } 7616 7617 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7618 out: 7619 nvlist_free(nvl); 7620 return (err); 7621 } 7622 7623 /* 7624 * Build caddrmapnvl using the information in the vhci cache 7625 * and add it to the mainnvl. 7626 * Returns 0 on success, errno on failure. 7627 */ 7628 static int 7629 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7630 { 7631 mdi_vhcache_client_t *cct; 7632 nvlist_t *nvl; 7633 int err; 7634 7635 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7636 7637 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7638 return (err); 7639 7640 for (cct = vhcache->vhcache_client_head; cct != NULL; 7641 cct = cct->cct_next) { 7642 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7643 goto out; 7644 } 7645 7646 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7647 out: 7648 nvlist_free(nvl); 7649 return (err); 7650 } 7651 7652 /* 7653 * Build nvlist using the information in the vhci cache. 7654 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7655 * Returns nvl on success, NULL on failure. 7656 */ 7657 static nvlist_t * 7658 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7659 { 7660 mdi_vhcache_phci_t *cphci; 7661 uint_t phci_count; 7662 char **phcis; 7663 nvlist_t *nvl; 7664 int err, i; 7665 7666 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7667 nvl = NULL; 7668 goto out; 7669 } 7670 7671 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7672 MDI_VHCI_CACHE_VERSION)) != 0) 7673 goto out; 7674 7675 rw_enter(&vhcache->vhcache_lock, RW_READER); 7676 if (vhcache->vhcache_phci_head == NULL) { 7677 rw_exit(&vhcache->vhcache_lock); 7678 return (nvl); 7679 } 7680 7681 phci_count = 0; 7682 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7683 cphci = cphci->cphci_next) 7684 cphci->cphci_id = phci_count++; 7685 7686 /* build phci pathname list */ 7687 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7688 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7689 cphci = cphci->cphci_next, i++) 7690 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7691 7692 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7693 phci_count); 7694 free_string_array(phcis, phci_count); 7695 7696 if (err == 0 && 7697 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7698 rw_exit(&vhcache->vhcache_lock); 7699 return (nvl); 7700 } 7701 7702 rw_exit(&vhcache->vhcache_lock); 7703 out: 7704 if (nvl) 7705 nvlist_free(nvl); 7706 return (NULL); 7707 } 7708 7709 /* 7710 * Lookup vhcache phci structure for the specified phci path. 7711 */ 7712 static mdi_vhcache_phci_t * 7713 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7714 { 7715 mdi_vhcache_phci_t *cphci; 7716 7717 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7718 7719 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7720 cphci = cphci->cphci_next) { 7721 if (strcmp(cphci->cphci_path, phci_path) == 0) 7722 return (cphci); 7723 } 7724 7725 return (NULL); 7726 } 7727 7728 /* 7729 * Lookup vhcache phci structure for the specified phci. 7730 */ 7731 static mdi_vhcache_phci_t * 7732 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7733 { 7734 mdi_vhcache_phci_t *cphci; 7735 7736 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7737 7738 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7739 cphci = cphci->cphci_next) { 7740 if (cphci->cphci_phci == ph) 7741 return (cphci); 7742 } 7743 7744 return (NULL); 7745 } 7746 7747 /* 7748 * Add the specified phci to the vhci cache if not already present. 7749 */ 7750 static void 7751 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7752 { 7753 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7754 mdi_vhcache_phci_t *cphci; 7755 char *pathname; 7756 int cache_updated; 7757 7758 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7759 7760 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7761 (void) ddi_pathname(ph->ph_dip, pathname); 7762 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7763 != NULL) { 7764 cphci->cphci_phci = ph; 7765 cache_updated = 0; 7766 } else { 7767 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7768 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7769 cphci->cphci_phci = ph; 7770 enqueue_vhcache_phci(vhcache, cphci); 7771 cache_updated = 1; 7772 } 7773 7774 rw_exit(&vhcache->vhcache_lock); 7775 7776 /* 7777 * Since a new phci has been added, reset 7778 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7779 * during next vhcache_discover_paths(). 7780 */ 7781 mutex_enter(&vhc->vhc_lock); 7782 vhc->vhc_path_discovery_cutoff_time = 0; 7783 mutex_exit(&vhc->vhc_lock); 7784 7785 kmem_free(pathname, MAXPATHLEN); 7786 if (cache_updated) 7787 vhcache_dirty(vhc); 7788 } 7789 7790 /* 7791 * Remove the reference to the specified phci from the vhci cache. 7792 */ 7793 static void 7794 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7795 { 7796 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7797 mdi_vhcache_phci_t *cphci; 7798 7799 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7800 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7801 /* do not remove the actual mdi_vhcache_phci structure */ 7802 cphci->cphci_phci = NULL; 7803 } 7804 rw_exit(&vhcache->vhcache_lock); 7805 } 7806 7807 static void 7808 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7809 mdi_vhcache_lookup_token_t *src) 7810 { 7811 if (src == NULL) { 7812 dst->lt_cct = NULL; 7813 dst->lt_cct_lookup_time = 0; 7814 } else { 7815 dst->lt_cct = src->lt_cct; 7816 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7817 } 7818 } 7819 7820 /* 7821 * Look up vhcache client for the specified client. 7822 */ 7823 static mdi_vhcache_client_t * 7824 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7825 mdi_vhcache_lookup_token_t *token) 7826 { 7827 mod_hash_val_t hv; 7828 char *name_addr; 7829 int len; 7830 7831 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7832 7833 /* 7834 * If no vhcache clean occurred since the last lookup, we can 7835 * simply return the cct from the last lookup operation. 7836 * It works because ccts are never freed except during the vhcache 7837 * cleanup operation. 7838 */ 7839 if (token != NULL && 7840 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7841 return (token->lt_cct); 7842 7843 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7844 if (mod_hash_find(vhcache->vhcache_client_hash, 7845 (mod_hash_key_t)name_addr, &hv) == 0) { 7846 if (token) { 7847 token->lt_cct = (mdi_vhcache_client_t *)hv; 7848 token->lt_cct_lookup_time = lbolt64; 7849 } 7850 } else { 7851 if (token) { 7852 token->lt_cct = NULL; 7853 token->lt_cct_lookup_time = 0; 7854 } 7855 hv = NULL; 7856 } 7857 kmem_free(name_addr, len); 7858 return ((mdi_vhcache_client_t *)hv); 7859 } 7860 7861 /* 7862 * Add the specified path to the vhci cache if not already present. 7863 * Also add the vhcache client for the client corresponding to this path 7864 * if it doesn't already exist. 7865 */ 7866 static void 7867 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7868 { 7869 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7870 mdi_vhcache_client_t *cct; 7871 mdi_vhcache_pathinfo_t *cpi; 7872 mdi_phci_t *ph = pip->pi_phci; 7873 mdi_client_t *ct = pip->pi_client; 7874 int cache_updated = 0; 7875 7876 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7877 7878 /* if vhcache client for this pip doesn't already exist, add it */ 7879 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7880 NULL)) == NULL) { 7881 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7882 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7883 ct->ct_guid, NULL); 7884 enqueue_vhcache_client(vhcache, cct); 7885 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7886 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7887 cache_updated = 1; 7888 } 7889 7890 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7891 if (cpi->cpi_cphci->cphci_phci == ph && 7892 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7893 cpi->cpi_pip = pip; 7894 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7895 cpi->cpi_flags &= 7896 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7897 sort_vhcache_paths(cct); 7898 cache_updated = 1; 7899 } 7900 break; 7901 } 7902 } 7903 7904 if (cpi == NULL) { 7905 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7906 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7907 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7908 ASSERT(cpi->cpi_cphci != NULL); 7909 cpi->cpi_pip = pip; 7910 enqueue_vhcache_pathinfo(cct, cpi); 7911 cache_updated = 1; 7912 } 7913 7914 rw_exit(&vhcache->vhcache_lock); 7915 7916 if (cache_updated) 7917 vhcache_dirty(vhc); 7918 } 7919 7920 /* 7921 * Remove the reference to the specified path from the vhci cache. 7922 */ 7923 static void 7924 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7925 { 7926 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7927 mdi_client_t *ct = pip->pi_client; 7928 mdi_vhcache_client_t *cct; 7929 mdi_vhcache_pathinfo_t *cpi; 7930 7931 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7932 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7933 NULL)) != NULL) { 7934 for (cpi = cct->cct_cpi_head; cpi != NULL; 7935 cpi = cpi->cpi_next) { 7936 if (cpi->cpi_pip == pip) { 7937 cpi->cpi_pip = NULL; 7938 break; 7939 } 7940 } 7941 } 7942 rw_exit(&vhcache->vhcache_lock); 7943 } 7944 7945 /* 7946 * Flush the vhci cache to disk. 7947 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7948 */ 7949 static int 7950 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7951 { 7952 nvlist_t *nvl; 7953 int err; 7954 int rv; 7955 7956 /* 7957 * It is possible that the system may shutdown before 7958 * i_ddi_io_initialized (during stmsboot for example). To allow for 7959 * flushing the cache in this case do not check for 7960 * i_ddi_io_initialized when force flag is set. 7961 */ 7962 if (force_flag == 0 && !i_ddi_io_initialized()) 7963 return (MDI_FAILURE); 7964 7965 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7966 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7967 nvlist_free(nvl); 7968 } else 7969 err = EFAULT; 7970 7971 rv = MDI_SUCCESS; 7972 mutex_enter(&vhc->vhc_lock); 7973 if (err != 0) { 7974 if (err == EROFS) { 7975 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7976 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7977 MDI_VHC_VHCACHE_DIRTY); 7978 } else { 7979 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7980 cmn_err(CE_CONT, "%s: update failed\n", 7981 vhc->vhc_vhcache_filename); 7982 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7983 } 7984 rv = MDI_FAILURE; 7985 } 7986 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7987 cmn_err(CE_CONT, 7988 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7989 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7990 } 7991 mutex_exit(&vhc->vhc_lock); 7992 7993 return (rv); 7994 } 7995 7996 /* 7997 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7998 * Exits itself if left idle for the idle timeout period. 7999 */ 8000 static void 8001 vhcache_flush_thread(void *arg) 8002 { 8003 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8004 clock_t idle_time, quit_at_ticks; 8005 callb_cpr_t cprinfo; 8006 8007 /* number of seconds to sleep idle before exiting */ 8008 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8009 8010 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8011 "mdi_vhcache_flush"); 8012 mutex_enter(&vhc->vhc_lock); 8013 for (; ; ) { 8014 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8015 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8016 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8017 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8018 (void) cv_timedwait(&vhc->vhc_cv, 8019 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8020 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8021 } else { 8022 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8023 mutex_exit(&vhc->vhc_lock); 8024 8025 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8026 vhcache_dirty(vhc); 8027 8028 mutex_enter(&vhc->vhc_lock); 8029 } 8030 } 8031 8032 quit_at_ticks = ddi_get_lbolt() + idle_time; 8033 8034 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8035 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8036 ddi_get_lbolt() < quit_at_ticks) { 8037 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8038 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8039 quit_at_ticks); 8040 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8041 } 8042 8043 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8044 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8045 goto out; 8046 } 8047 8048 out: 8049 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8050 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8051 CALLB_CPR_EXIT(&cprinfo); 8052 } 8053 8054 /* 8055 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8056 */ 8057 static void 8058 vhcache_dirty(mdi_vhci_config_t *vhc) 8059 { 8060 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8061 int create_thread; 8062 8063 rw_enter(&vhcache->vhcache_lock, RW_READER); 8064 /* do not flush cache until the cache is fully built */ 8065 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8066 rw_exit(&vhcache->vhcache_lock); 8067 return; 8068 } 8069 rw_exit(&vhcache->vhcache_lock); 8070 8071 mutex_enter(&vhc->vhc_lock); 8072 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8073 mutex_exit(&vhc->vhc_lock); 8074 return; 8075 } 8076 8077 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8078 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8079 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8080 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8081 cv_broadcast(&vhc->vhc_cv); 8082 create_thread = 0; 8083 } else { 8084 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8085 create_thread = 1; 8086 } 8087 mutex_exit(&vhc->vhc_lock); 8088 8089 if (create_thread) 8090 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8091 0, &p0, TS_RUN, minclsyspri); 8092 } 8093 8094 /* 8095 * phci bus config structure - one for for each phci bus config operation that 8096 * we initiate on behalf of a vhci. 8097 */ 8098 typedef struct mdi_phci_bus_config_s { 8099 char *phbc_phci_path; 8100 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8101 struct mdi_phci_bus_config_s *phbc_next; 8102 } mdi_phci_bus_config_t; 8103 8104 /* vhci bus config structure - one for each vhci bus config operation */ 8105 typedef struct mdi_vhci_bus_config_s { 8106 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8107 major_t vhbc_op_major; /* bus config op major */ 8108 uint_t vhbc_op_flags; /* bus config op flags */ 8109 kmutex_t vhbc_lock; 8110 kcondvar_t vhbc_cv; 8111 int vhbc_thr_count; 8112 } mdi_vhci_bus_config_t; 8113 8114 /* 8115 * bus config the specified phci 8116 */ 8117 static void 8118 bus_config_phci(void *arg) 8119 { 8120 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8121 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8122 dev_info_t *ph_dip; 8123 8124 /* 8125 * first configure all path components upto phci and then configure 8126 * the phci children. 8127 */ 8128 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8129 != NULL) { 8130 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8131 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8132 (void) ndi_devi_config_driver(ph_dip, 8133 vhbc->vhbc_op_flags, 8134 vhbc->vhbc_op_major); 8135 } else 8136 (void) ndi_devi_config(ph_dip, 8137 vhbc->vhbc_op_flags); 8138 8139 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8140 ndi_rele_devi(ph_dip); 8141 } 8142 8143 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8144 kmem_free(phbc, sizeof (*phbc)); 8145 8146 mutex_enter(&vhbc->vhbc_lock); 8147 vhbc->vhbc_thr_count--; 8148 if (vhbc->vhbc_thr_count == 0) 8149 cv_broadcast(&vhbc->vhbc_cv); 8150 mutex_exit(&vhbc->vhbc_lock); 8151 } 8152 8153 /* 8154 * Bus config all phcis associated with the vhci in parallel. 8155 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8156 */ 8157 static void 8158 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8159 ddi_bus_config_op_t op, major_t maj) 8160 { 8161 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8162 mdi_vhci_bus_config_t *vhbc; 8163 mdi_vhcache_phci_t *cphci; 8164 8165 rw_enter(&vhcache->vhcache_lock, RW_READER); 8166 if (vhcache->vhcache_phci_head == NULL) { 8167 rw_exit(&vhcache->vhcache_lock); 8168 return; 8169 } 8170 8171 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8172 8173 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8174 cphci = cphci->cphci_next) { 8175 /* skip phcis that haven't attached before root is available */ 8176 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8177 continue; 8178 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8179 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8180 KM_SLEEP); 8181 phbc->phbc_vhbusconfig = vhbc; 8182 phbc->phbc_next = phbc_head; 8183 phbc_head = phbc; 8184 vhbc->vhbc_thr_count++; 8185 } 8186 rw_exit(&vhcache->vhcache_lock); 8187 8188 vhbc->vhbc_op = op; 8189 vhbc->vhbc_op_major = maj; 8190 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8191 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8192 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8193 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8194 8195 /* now create threads to initiate bus config on all phcis in parallel */ 8196 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8197 phbc_next = phbc->phbc_next; 8198 if (mdi_mtc_off) 8199 bus_config_phci((void *)phbc); 8200 else 8201 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8202 0, &p0, TS_RUN, minclsyspri); 8203 } 8204 8205 mutex_enter(&vhbc->vhbc_lock); 8206 /* wait until all threads exit */ 8207 while (vhbc->vhbc_thr_count > 0) 8208 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8209 mutex_exit(&vhbc->vhbc_lock); 8210 8211 mutex_destroy(&vhbc->vhbc_lock); 8212 cv_destroy(&vhbc->vhbc_cv); 8213 kmem_free(vhbc, sizeof (*vhbc)); 8214 } 8215 8216 /* 8217 * Single threaded version of bus_config_all_phcis() 8218 */ 8219 static void 8220 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8221 ddi_bus_config_op_t op, major_t maj) 8222 { 8223 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8224 8225 single_threaded_vhconfig_enter(vhc); 8226 bus_config_all_phcis(vhcache, flags, op, maj); 8227 single_threaded_vhconfig_exit(vhc); 8228 } 8229 8230 /* 8231 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8232 * The path includes the child component in addition to the phci path. 8233 */ 8234 static int 8235 bus_config_one_phci_child(char *path) 8236 { 8237 dev_info_t *ph_dip, *child; 8238 char *devnm; 8239 int rv = MDI_FAILURE; 8240 8241 /* extract the child component of the phci */ 8242 devnm = strrchr(path, '/'); 8243 *devnm++ = '\0'; 8244 8245 /* 8246 * first configure all path components upto phci and then 8247 * configure the phci child. 8248 */ 8249 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8250 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8251 NDI_SUCCESS) { 8252 /* 8253 * release the hold that ndi_devi_config_one() placed 8254 */ 8255 ndi_rele_devi(child); 8256 rv = MDI_SUCCESS; 8257 } 8258 8259 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8260 ndi_rele_devi(ph_dip); 8261 } 8262 8263 devnm--; 8264 *devnm = '/'; 8265 return (rv); 8266 } 8267 8268 /* 8269 * Build a list of phci client paths for the specified vhci client. 8270 * The list includes only those phci client paths which aren't configured yet. 8271 */ 8272 static mdi_phys_path_t * 8273 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8274 { 8275 mdi_vhcache_pathinfo_t *cpi; 8276 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8277 int config_path, len; 8278 8279 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8280 /* 8281 * include only those paths that aren't configured. 8282 */ 8283 config_path = 0; 8284 if (cpi->cpi_pip == NULL) 8285 config_path = 1; 8286 else { 8287 MDI_PI_LOCK(cpi->cpi_pip); 8288 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8289 config_path = 1; 8290 MDI_PI_UNLOCK(cpi->cpi_pip); 8291 } 8292 8293 if (config_path) { 8294 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8295 len = strlen(cpi->cpi_cphci->cphci_path) + 8296 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8297 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8298 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8299 cpi->cpi_cphci->cphci_path, ct_name, 8300 cpi->cpi_addr); 8301 pp->phys_path_next = NULL; 8302 8303 if (pp_head == NULL) 8304 pp_head = pp; 8305 else 8306 pp_tail->phys_path_next = pp; 8307 pp_tail = pp; 8308 } 8309 } 8310 8311 return (pp_head); 8312 } 8313 8314 /* 8315 * Free the memory allocated for phci client path list. 8316 */ 8317 static void 8318 free_phclient_path_list(mdi_phys_path_t *pp_head) 8319 { 8320 mdi_phys_path_t *pp, *pp_next; 8321 8322 for (pp = pp_head; pp != NULL; pp = pp_next) { 8323 pp_next = pp->phys_path_next; 8324 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8325 kmem_free(pp, sizeof (*pp)); 8326 } 8327 } 8328 8329 /* 8330 * Allocated async client structure and initialize with the specified values. 8331 */ 8332 static mdi_async_client_config_t * 8333 alloc_async_client_config(char *ct_name, char *ct_addr, 8334 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8335 { 8336 mdi_async_client_config_t *acc; 8337 8338 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8339 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8340 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8341 acc->acc_phclient_path_list_head = pp_head; 8342 init_vhcache_lookup_token(&acc->acc_token, tok); 8343 acc->acc_next = NULL; 8344 return (acc); 8345 } 8346 8347 /* 8348 * Free the memory allocated for the async client structure and their members. 8349 */ 8350 static void 8351 free_async_client_config(mdi_async_client_config_t *acc) 8352 { 8353 if (acc->acc_phclient_path_list_head) 8354 free_phclient_path_list(acc->acc_phclient_path_list_head); 8355 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8356 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8357 kmem_free(acc, sizeof (*acc)); 8358 } 8359 8360 /* 8361 * Sort vhcache pathinfos (cpis) of the specified client. 8362 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8363 * flag set come at the beginning of the list. All cpis which have this 8364 * flag set come at the end of the list. 8365 */ 8366 static void 8367 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8368 { 8369 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8370 8371 cpi_head = cct->cct_cpi_head; 8372 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8373 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8374 cpi_next = cpi->cpi_next; 8375 enqueue_vhcache_pathinfo(cct, cpi); 8376 } 8377 } 8378 8379 /* 8380 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8381 * every vhcache pathinfo of the specified client. If not adjust the flag 8382 * setting appropriately. 8383 * 8384 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8385 * on-disk vhci cache. So every time this flag is updated the cache must be 8386 * flushed. 8387 */ 8388 static void 8389 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8390 mdi_vhcache_lookup_token_t *tok) 8391 { 8392 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8393 mdi_vhcache_client_t *cct; 8394 mdi_vhcache_pathinfo_t *cpi; 8395 8396 rw_enter(&vhcache->vhcache_lock, RW_READER); 8397 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8398 == NULL) { 8399 rw_exit(&vhcache->vhcache_lock); 8400 return; 8401 } 8402 8403 /* 8404 * to avoid unnecessary on-disk cache updates, first check if an 8405 * update is really needed. If no update is needed simply return. 8406 */ 8407 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8408 if ((cpi->cpi_pip != NULL && 8409 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8410 (cpi->cpi_pip == NULL && 8411 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8412 break; 8413 } 8414 } 8415 if (cpi == NULL) { 8416 rw_exit(&vhcache->vhcache_lock); 8417 return; 8418 } 8419 8420 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8421 rw_exit(&vhcache->vhcache_lock); 8422 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8423 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8424 tok)) == NULL) { 8425 rw_exit(&vhcache->vhcache_lock); 8426 return; 8427 } 8428 } 8429 8430 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8431 if (cpi->cpi_pip != NULL) 8432 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8433 else 8434 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8435 } 8436 sort_vhcache_paths(cct); 8437 8438 rw_exit(&vhcache->vhcache_lock); 8439 vhcache_dirty(vhc); 8440 } 8441 8442 /* 8443 * Configure all specified paths of the client. 8444 */ 8445 static void 8446 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8447 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8448 { 8449 mdi_phys_path_t *pp; 8450 8451 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8452 (void) bus_config_one_phci_child(pp->phys_path); 8453 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8454 } 8455 8456 /* 8457 * Dequeue elements from vhci async client config list and bus configure 8458 * their corresponding phci clients. 8459 */ 8460 static void 8461 config_client_paths_thread(void *arg) 8462 { 8463 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8464 mdi_async_client_config_t *acc; 8465 clock_t quit_at_ticks; 8466 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8467 callb_cpr_t cprinfo; 8468 8469 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8470 "mdi_config_client_paths"); 8471 8472 for (; ; ) { 8473 quit_at_ticks = ddi_get_lbolt() + idle_time; 8474 8475 mutex_enter(&vhc->vhc_lock); 8476 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8477 vhc->vhc_acc_list_head == NULL && 8478 ddi_get_lbolt() < quit_at_ticks) { 8479 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8480 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8481 quit_at_ticks); 8482 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8483 } 8484 8485 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8486 vhc->vhc_acc_list_head == NULL) 8487 goto out; 8488 8489 acc = vhc->vhc_acc_list_head; 8490 vhc->vhc_acc_list_head = acc->acc_next; 8491 if (vhc->vhc_acc_list_head == NULL) 8492 vhc->vhc_acc_list_tail = NULL; 8493 vhc->vhc_acc_count--; 8494 mutex_exit(&vhc->vhc_lock); 8495 8496 config_client_paths_sync(vhc, acc->acc_ct_name, 8497 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8498 &acc->acc_token); 8499 8500 free_async_client_config(acc); 8501 } 8502 8503 out: 8504 vhc->vhc_acc_thrcount--; 8505 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8506 CALLB_CPR_EXIT(&cprinfo); 8507 } 8508 8509 /* 8510 * Arrange for all the phci client paths (pp_head) for the specified client 8511 * to be bus configured asynchronously by a thread. 8512 */ 8513 static void 8514 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8515 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8516 { 8517 mdi_async_client_config_t *acc, *newacc; 8518 int create_thread; 8519 8520 if (pp_head == NULL) 8521 return; 8522 8523 if (mdi_mtc_off) { 8524 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8525 free_phclient_path_list(pp_head); 8526 return; 8527 } 8528 8529 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8530 ASSERT(newacc); 8531 8532 mutex_enter(&vhc->vhc_lock); 8533 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8534 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8535 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8536 free_async_client_config(newacc); 8537 mutex_exit(&vhc->vhc_lock); 8538 return; 8539 } 8540 } 8541 8542 if (vhc->vhc_acc_list_head == NULL) 8543 vhc->vhc_acc_list_head = newacc; 8544 else 8545 vhc->vhc_acc_list_tail->acc_next = newacc; 8546 vhc->vhc_acc_list_tail = newacc; 8547 vhc->vhc_acc_count++; 8548 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8549 cv_broadcast(&vhc->vhc_cv); 8550 create_thread = 0; 8551 } else { 8552 vhc->vhc_acc_thrcount++; 8553 create_thread = 1; 8554 } 8555 mutex_exit(&vhc->vhc_lock); 8556 8557 if (create_thread) 8558 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8559 0, &p0, TS_RUN, minclsyspri); 8560 } 8561 8562 /* 8563 * Return number of online paths for the specified client. 8564 */ 8565 static int 8566 nonline_paths(mdi_vhcache_client_t *cct) 8567 { 8568 mdi_vhcache_pathinfo_t *cpi; 8569 int online_count = 0; 8570 8571 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8572 if (cpi->cpi_pip != NULL) { 8573 MDI_PI_LOCK(cpi->cpi_pip); 8574 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8575 online_count++; 8576 MDI_PI_UNLOCK(cpi->cpi_pip); 8577 } 8578 } 8579 8580 return (online_count); 8581 } 8582 8583 /* 8584 * Bus configure all paths for the specified vhci client. 8585 * If at least one path for the client is already online, the remaining paths 8586 * will be configured asynchronously. Otherwise, it synchronously configures 8587 * the paths until at least one path is online and then rest of the paths 8588 * will be configured asynchronously. 8589 */ 8590 static void 8591 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8592 { 8593 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8594 mdi_phys_path_t *pp_head, *pp; 8595 mdi_vhcache_client_t *cct; 8596 mdi_vhcache_lookup_token_t tok; 8597 8598 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8599 8600 init_vhcache_lookup_token(&tok, NULL); 8601 8602 if (ct_name == NULL || ct_addr == NULL || 8603 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8604 == NULL || 8605 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8606 rw_exit(&vhcache->vhcache_lock); 8607 return; 8608 } 8609 8610 /* if at least one path is online, configure the rest asynchronously */ 8611 if (nonline_paths(cct) > 0) { 8612 rw_exit(&vhcache->vhcache_lock); 8613 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8614 return; 8615 } 8616 8617 rw_exit(&vhcache->vhcache_lock); 8618 8619 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8620 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8621 rw_enter(&vhcache->vhcache_lock, RW_READER); 8622 8623 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8624 ct_addr, &tok)) == NULL) { 8625 rw_exit(&vhcache->vhcache_lock); 8626 goto out; 8627 } 8628 8629 if (nonline_paths(cct) > 0 && 8630 pp->phys_path_next != NULL) { 8631 rw_exit(&vhcache->vhcache_lock); 8632 config_client_paths_async(vhc, ct_name, ct_addr, 8633 pp->phys_path_next, &tok); 8634 pp->phys_path_next = NULL; 8635 goto out; 8636 } 8637 8638 rw_exit(&vhcache->vhcache_lock); 8639 } 8640 } 8641 8642 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8643 out: 8644 free_phclient_path_list(pp_head); 8645 } 8646 8647 static void 8648 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8649 { 8650 mutex_enter(&vhc->vhc_lock); 8651 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8652 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8653 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8654 mutex_exit(&vhc->vhc_lock); 8655 } 8656 8657 static void 8658 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8659 { 8660 mutex_enter(&vhc->vhc_lock); 8661 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8662 cv_broadcast(&vhc->vhc_cv); 8663 mutex_exit(&vhc->vhc_lock); 8664 } 8665 8666 typedef struct mdi_phci_driver_info { 8667 char *phdriver_name; /* name of the phci driver */ 8668 8669 /* set to non zero if the phci driver supports root device */ 8670 int phdriver_root_support; 8671 } mdi_phci_driver_info_t; 8672 8673 /* 8674 * vhci class and root support capability of a phci driver can be 8675 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8676 * phci driver.conf file. The built-in tables below contain this information 8677 * for those phci drivers whose driver.conf files don't yet contain this info. 8678 * 8679 * All phci drivers expect iscsi have root device support. 8680 */ 8681 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8682 { "fp", 1 }, 8683 { "iscsi", 0 }, 8684 { "ibsrp", 1 } 8685 }; 8686 8687 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8688 8689 static void * 8690 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8691 { 8692 void *new_ptr; 8693 8694 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8695 if (old_ptr) { 8696 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8697 kmem_free(old_ptr, old_size); 8698 } 8699 return (new_ptr); 8700 } 8701 8702 static void 8703 add_to_phci_list(char ***driver_list, int **root_support_list, 8704 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8705 { 8706 ASSERT(*cur_elements <= *max_elements); 8707 if (*cur_elements == *max_elements) { 8708 *max_elements += 10; 8709 *driver_list = mdi_realloc(*driver_list, 8710 sizeof (char *) * (*cur_elements), 8711 sizeof (char *) * (*max_elements)); 8712 *root_support_list = mdi_realloc(*root_support_list, 8713 sizeof (int) * (*cur_elements), 8714 sizeof (int) * (*max_elements)); 8715 } 8716 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8717 (*root_support_list)[*cur_elements] = root_support; 8718 (*cur_elements)++; 8719 } 8720 8721 static void 8722 get_phci_driver_list(char *vhci_class, char ***driver_list, 8723 int **root_support_list, int *cur_elements, int *max_elements) 8724 { 8725 mdi_phci_driver_info_t *st_driver_list, *p; 8726 int st_ndrivers, root_support, i, j, driver_conf_count; 8727 major_t m; 8728 struct devnames *dnp; 8729 ddi_prop_t *propp; 8730 8731 *driver_list = NULL; 8732 *root_support_list = NULL; 8733 *cur_elements = 0; 8734 *max_elements = 0; 8735 8736 /* add the phci drivers derived from the phci driver.conf files */ 8737 for (m = 0; m < devcnt; m++) { 8738 dnp = &devnamesp[m]; 8739 8740 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8741 LOCK_DEV_OPS(&dnp->dn_lock); 8742 if (dnp->dn_global_prop_ptr != NULL && 8743 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8744 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8745 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8746 strcmp(propp->prop_val, vhci_class) == 0) { 8747 8748 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8749 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8750 &dnp->dn_global_prop_ptr->prop_list) 8751 == NULL) ? 1 : 0; 8752 8753 add_to_phci_list(driver_list, root_support_list, 8754 cur_elements, max_elements, dnp->dn_name, 8755 root_support); 8756 8757 UNLOCK_DEV_OPS(&dnp->dn_lock); 8758 } else 8759 UNLOCK_DEV_OPS(&dnp->dn_lock); 8760 } 8761 } 8762 8763 driver_conf_count = *cur_elements; 8764 8765 /* add the phci drivers specified in the built-in tables */ 8766 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8767 st_driver_list = scsi_phci_driver_list; 8768 st_ndrivers = sizeof (scsi_phci_driver_list) / 8769 sizeof (mdi_phci_driver_info_t); 8770 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8771 st_driver_list = ib_phci_driver_list; 8772 st_ndrivers = sizeof (ib_phci_driver_list) / 8773 sizeof (mdi_phci_driver_info_t); 8774 } else { 8775 st_driver_list = NULL; 8776 st_ndrivers = 0; 8777 } 8778 8779 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8780 /* add this phci driver if not already added before */ 8781 for (j = 0; j < driver_conf_count; j++) { 8782 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8783 break; 8784 } 8785 if (j == driver_conf_count) { 8786 add_to_phci_list(driver_list, root_support_list, 8787 cur_elements, max_elements, p->phdriver_name, 8788 p->phdriver_root_support); 8789 } 8790 } 8791 } 8792 8793 /* 8794 * Attach the phci driver instances associated with the specified vhci class. 8795 * If root is mounted attach all phci driver instances. 8796 * If root is not mounted, attach the instances of only those phci 8797 * drivers that have the root support. 8798 */ 8799 static void 8800 attach_phci_drivers(char *vhci_class) 8801 { 8802 char **driver_list, **p; 8803 int *root_support_list; 8804 int cur_elements, max_elements, i; 8805 major_t m; 8806 8807 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8808 &cur_elements, &max_elements); 8809 8810 for (i = 0; i < cur_elements; i++) { 8811 if (modrootloaded || root_support_list[i]) { 8812 m = ddi_name_to_major(driver_list[i]); 8813 if (m != DDI_MAJOR_T_NONE && 8814 ddi_hold_installed_driver(m)) 8815 ddi_rele_driver(m); 8816 } 8817 } 8818 8819 if (driver_list) { 8820 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8821 kmem_free(*p, strlen(*p) + 1); 8822 kmem_free(driver_list, sizeof (char *) * max_elements); 8823 kmem_free(root_support_list, sizeof (int) * max_elements); 8824 } 8825 } 8826 8827 /* 8828 * Build vhci cache: 8829 * 8830 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8831 * the phci driver instances. During this process the cache gets built. 8832 * 8833 * Cache is built fully if the root is mounted. 8834 * If the root is not mounted, phci drivers that do not have root support 8835 * are not attached. As a result the cache is built partially. The entries 8836 * in the cache reflect only those phci drivers that have root support. 8837 */ 8838 static int 8839 build_vhci_cache(mdi_vhci_t *vh) 8840 { 8841 mdi_vhci_config_t *vhc = vh->vh_config; 8842 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8843 8844 single_threaded_vhconfig_enter(vhc); 8845 8846 rw_enter(&vhcache->vhcache_lock, RW_READER); 8847 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8848 rw_exit(&vhcache->vhcache_lock); 8849 single_threaded_vhconfig_exit(vhc); 8850 return (0); 8851 } 8852 rw_exit(&vhcache->vhcache_lock); 8853 8854 attach_phci_drivers(vh->vh_class); 8855 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8856 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8857 8858 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8859 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8860 rw_exit(&vhcache->vhcache_lock); 8861 8862 single_threaded_vhconfig_exit(vhc); 8863 vhcache_dirty(vhc); 8864 return (1); 8865 } 8866 8867 /* 8868 * Determine if discovery of paths is needed. 8869 */ 8870 static int 8871 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8872 { 8873 int rv = 1; 8874 8875 mutex_enter(&vhc->vhc_lock); 8876 if (i_ddi_io_initialized() == 0) { 8877 if (vhc->vhc_path_discovery_boot > 0) { 8878 vhc->vhc_path_discovery_boot--; 8879 goto out; 8880 } 8881 } else { 8882 if (vhc->vhc_path_discovery_postboot > 0) { 8883 vhc->vhc_path_discovery_postboot--; 8884 goto out; 8885 } 8886 } 8887 8888 /* 8889 * Do full path discovery at most once per mdi_path_discovery_interval. 8890 * This is to avoid a series of full path discoveries when opening 8891 * stale /dev/[r]dsk links. 8892 */ 8893 if (mdi_path_discovery_interval != -1 && 8894 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8895 goto out; 8896 8897 rv = 0; 8898 out: 8899 mutex_exit(&vhc->vhc_lock); 8900 return (rv); 8901 } 8902 8903 /* 8904 * Discover all paths: 8905 * 8906 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8907 * driver instances. During this process all paths will be discovered. 8908 */ 8909 static int 8910 vhcache_discover_paths(mdi_vhci_t *vh) 8911 { 8912 mdi_vhci_config_t *vhc = vh->vh_config; 8913 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8914 int rv = 0; 8915 8916 single_threaded_vhconfig_enter(vhc); 8917 8918 if (vhcache_do_discovery(vhc)) { 8919 attach_phci_drivers(vh->vh_class); 8920 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8921 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8922 8923 mutex_enter(&vhc->vhc_lock); 8924 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8925 mdi_path_discovery_interval * TICKS_PER_SECOND; 8926 mutex_exit(&vhc->vhc_lock); 8927 rv = 1; 8928 } 8929 8930 single_threaded_vhconfig_exit(vhc); 8931 return (rv); 8932 } 8933 8934 /* 8935 * Generic vhci bus config implementation: 8936 * 8937 * Parameters 8938 * vdip vhci dip 8939 * flags bus config flags 8940 * op bus config operation 8941 * The remaining parameters are bus config operation specific 8942 * 8943 * for BUS_CONFIG_ONE 8944 * arg pointer to name@addr 8945 * child upon successful return from this function, *child will be 8946 * set to the configured and held devinfo child node of vdip. 8947 * ct_addr pointer to client address (i.e. GUID) 8948 * 8949 * for BUS_CONFIG_DRIVER 8950 * arg major number of the driver 8951 * child and ct_addr parameters are ignored 8952 * 8953 * for BUS_CONFIG_ALL 8954 * arg, child, and ct_addr parameters are ignored 8955 * 8956 * Note that for the rest of the bus config operations, this function simply 8957 * calls the framework provided default bus config routine. 8958 */ 8959 int 8960 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8961 void *arg, dev_info_t **child, char *ct_addr) 8962 { 8963 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8964 mdi_vhci_config_t *vhc = vh->vh_config; 8965 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8966 int rv = 0; 8967 int params_valid = 0; 8968 char *cp; 8969 8970 /* 8971 * To bus config vhcis we relay operation, possibly using another 8972 * thread, to phcis. The phci driver then interacts with MDI to cause 8973 * vhci child nodes to be enumerated under the vhci node. Adding a 8974 * vhci child requires an ndi_devi_enter of the vhci. Since another 8975 * thread may be adding the child, to avoid deadlock we can't wait 8976 * for the relayed operations to complete if we have already entered 8977 * the vhci node. 8978 */ 8979 if (DEVI_BUSY_OWNED(vdip)) { 8980 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8981 "vhci dip is busy owned %p\n", (void *)vdip)); 8982 goto default_bus_config; 8983 } 8984 8985 rw_enter(&vhcache->vhcache_lock, RW_READER); 8986 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8987 rw_exit(&vhcache->vhcache_lock); 8988 rv = build_vhci_cache(vh); 8989 rw_enter(&vhcache->vhcache_lock, RW_READER); 8990 } 8991 8992 switch (op) { 8993 case BUS_CONFIG_ONE: 8994 if (arg != NULL && ct_addr != NULL) { 8995 /* extract node name */ 8996 cp = (char *)arg; 8997 while (*cp != '\0' && *cp != '@') 8998 cp++; 8999 if (*cp == '@') { 9000 params_valid = 1; 9001 *cp = '\0'; 9002 config_client_paths(vhc, (char *)arg, ct_addr); 9003 /* config_client_paths() releases cache_lock */ 9004 *cp = '@'; 9005 break; 9006 } 9007 } 9008 9009 rw_exit(&vhcache->vhcache_lock); 9010 break; 9011 9012 case BUS_CONFIG_DRIVER: 9013 rw_exit(&vhcache->vhcache_lock); 9014 if (rv == 0) 9015 st_bus_config_all_phcis(vhc, flags, op, 9016 (major_t)(uintptr_t)arg); 9017 break; 9018 9019 case BUS_CONFIG_ALL: 9020 rw_exit(&vhcache->vhcache_lock); 9021 if (rv == 0) 9022 st_bus_config_all_phcis(vhc, flags, op, -1); 9023 break; 9024 9025 default: 9026 rw_exit(&vhcache->vhcache_lock); 9027 break; 9028 } 9029 9030 9031 default_bus_config: 9032 /* 9033 * All requested child nodes are enumerated under the vhci. 9034 * Now configure them. 9035 */ 9036 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9037 NDI_SUCCESS) { 9038 return (MDI_SUCCESS); 9039 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9040 /* discover all paths and try configuring again */ 9041 if (vhcache_discover_paths(vh) && 9042 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9043 NDI_SUCCESS) 9044 return (MDI_SUCCESS); 9045 } 9046 9047 return (MDI_FAILURE); 9048 } 9049 9050 /* 9051 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9052 */ 9053 static nvlist_t * 9054 read_on_disk_vhci_cache(char *vhci_class) 9055 { 9056 nvlist_t *nvl; 9057 int err; 9058 char *filename; 9059 9060 filename = vhclass2vhcache_filename(vhci_class); 9061 9062 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9063 kmem_free(filename, strlen(filename) + 1); 9064 return (nvl); 9065 } else if (err == EIO) 9066 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 9067 else if (err == EINVAL) 9068 cmn_err(CE_WARN, 9069 "%s: data file corrupted, will recreate\n", filename); 9070 9071 kmem_free(filename, strlen(filename) + 1); 9072 return (NULL); 9073 } 9074 9075 /* 9076 * Read on-disk vhci cache into nvlists for all vhci classes. 9077 * Called during booting by i_ddi_read_devices_files(). 9078 */ 9079 void 9080 mdi_read_devices_files(void) 9081 { 9082 int i; 9083 9084 for (i = 0; i < N_VHCI_CLASSES; i++) 9085 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9086 } 9087 9088 /* 9089 * Remove all stale entries from vhci cache. 9090 */ 9091 static void 9092 clean_vhcache(mdi_vhci_config_t *vhc) 9093 { 9094 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9095 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9096 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9097 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9098 9099 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9100 9101 cct_head = vhcache->vhcache_client_head; 9102 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9103 for (cct = cct_head; cct != NULL; cct = cct_next) { 9104 cct_next = cct->cct_next; 9105 9106 cpi_head = cct->cct_cpi_head; 9107 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9108 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9109 cpi_next = cpi->cpi_next; 9110 if (cpi->cpi_pip != NULL) { 9111 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9112 enqueue_tail_vhcache_pathinfo(cct, cpi); 9113 } else 9114 free_vhcache_pathinfo(cpi); 9115 } 9116 9117 if (cct->cct_cpi_head != NULL) 9118 enqueue_vhcache_client(vhcache, cct); 9119 else { 9120 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9121 (mod_hash_key_t)cct->cct_name_addr); 9122 free_vhcache_client(cct); 9123 } 9124 } 9125 9126 cphci_head = vhcache->vhcache_phci_head; 9127 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9128 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9129 cphci_next = cphci->cphci_next; 9130 if (cphci->cphci_phci != NULL) 9131 enqueue_vhcache_phci(vhcache, cphci); 9132 else 9133 free_vhcache_phci(cphci); 9134 } 9135 9136 vhcache->vhcache_clean_time = lbolt64; 9137 rw_exit(&vhcache->vhcache_lock); 9138 vhcache_dirty(vhc); 9139 } 9140 9141 /* 9142 * Remove all stale entries from vhci cache. 9143 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9144 */ 9145 void 9146 mdi_clean_vhcache(void) 9147 { 9148 mdi_vhci_t *vh; 9149 9150 mutex_enter(&mdi_mutex); 9151 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9152 vh->vh_refcnt++; 9153 mutex_exit(&mdi_mutex); 9154 clean_vhcache(vh->vh_config); 9155 mutex_enter(&mdi_mutex); 9156 vh->vh_refcnt--; 9157 } 9158 mutex_exit(&mdi_mutex); 9159 } 9160 9161 /* 9162 * mdi_vhci_walk_clients(): 9163 * Walker routine to traverse client dev_info nodes 9164 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9165 * below the client, including nexus devices, which we dont want. 9166 * So we just traverse the immediate siblings, starting from 1st client. 9167 */ 9168 void 9169 mdi_vhci_walk_clients(dev_info_t *vdip, 9170 int (*f)(dev_info_t *, void *), void *arg) 9171 { 9172 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9173 dev_info_t *cdip; 9174 mdi_client_t *ct; 9175 9176 MDI_VHCI_CLIENT_LOCK(vh); 9177 cdip = ddi_get_child(vdip); 9178 while (cdip) { 9179 ct = i_devi_get_client(cdip); 9180 MDI_CLIENT_LOCK(ct); 9181 9182 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9183 cdip = ddi_get_next_sibling(cdip); 9184 else 9185 cdip = NULL; 9186 9187 MDI_CLIENT_UNLOCK(ct); 9188 } 9189 MDI_VHCI_CLIENT_UNLOCK(vh); 9190 } 9191 9192 /* 9193 * mdi_vhci_walk_phcis(): 9194 * Walker routine to traverse phci dev_info nodes 9195 */ 9196 void 9197 mdi_vhci_walk_phcis(dev_info_t *vdip, 9198 int (*f)(dev_info_t *, void *), void *arg) 9199 { 9200 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9201 mdi_phci_t *ph, *next; 9202 9203 MDI_VHCI_PHCI_LOCK(vh); 9204 ph = vh->vh_phci_head; 9205 while (ph) { 9206 MDI_PHCI_LOCK(ph); 9207 9208 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9209 next = ph->ph_next; 9210 else 9211 next = NULL; 9212 9213 MDI_PHCI_UNLOCK(ph); 9214 ph = next; 9215 } 9216 MDI_VHCI_PHCI_UNLOCK(vh); 9217 } 9218 9219 9220 /* 9221 * mdi_walk_vhcis(): 9222 * Walker routine to traverse vhci dev_info nodes 9223 */ 9224 void 9225 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9226 { 9227 mdi_vhci_t *vh = NULL; 9228 9229 mutex_enter(&mdi_mutex); 9230 /* 9231 * Scan for already registered vhci 9232 */ 9233 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9234 vh->vh_refcnt++; 9235 mutex_exit(&mdi_mutex); 9236 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9237 mutex_enter(&mdi_mutex); 9238 vh->vh_refcnt--; 9239 break; 9240 } else { 9241 mutex_enter(&mdi_mutex); 9242 vh->vh_refcnt--; 9243 } 9244 } 9245 9246 mutex_exit(&mdi_mutex); 9247 } 9248 9249 /* 9250 * i_mdi_log_sysevent(): 9251 * Logs events for pickup by syseventd 9252 */ 9253 static void 9254 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9255 { 9256 char *path_name; 9257 nvlist_t *attr_list; 9258 9259 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9260 KM_SLEEP) != DDI_SUCCESS) { 9261 goto alloc_failed; 9262 } 9263 9264 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9265 (void) ddi_pathname(dip, path_name); 9266 9267 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9268 ddi_driver_name(dip)) != DDI_SUCCESS) { 9269 goto error; 9270 } 9271 9272 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9273 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9274 goto error; 9275 } 9276 9277 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9278 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9279 goto error; 9280 } 9281 9282 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9283 path_name) != DDI_SUCCESS) { 9284 goto error; 9285 } 9286 9287 if (nvlist_add_string(attr_list, DDI_CLASS, 9288 ph_vh_class) != DDI_SUCCESS) { 9289 goto error; 9290 } 9291 9292 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9293 attr_list, NULL, DDI_SLEEP); 9294 9295 error: 9296 kmem_free(path_name, MAXPATHLEN); 9297 nvlist_free(attr_list); 9298 return; 9299 9300 alloc_failed: 9301 MDI_DEBUG(1, (CE_WARN, dip, 9302 "!i_mdi_log_sysevent: Unable to send sysevent")); 9303 } 9304 9305 char ** 9306 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9307 { 9308 char **driver_list, **ret_driver_list = NULL; 9309 int *root_support_list; 9310 int cur_elements, max_elements; 9311 9312 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9313 &cur_elements, &max_elements); 9314 9315 9316 if (driver_list) { 9317 kmem_free(root_support_list, sizeof (int) * max_elements); 9318 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9319 * max_elements, sizeof (char *) * cur_elements); 9320 } 9321 *ndrivers = cur_elements; 9322 9323 return (ret_driver_list); 9324 9325 } 9326 9327 void 9328 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9329 { 9330 char **p; 9331 int i; 9332 9333 if (driver_list) { 9334 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9335 kmem_free(*p, strlen(*p) + 1); 9336 kmem_free(driver_list, sizeof (char *) * ndrivers); 9337 } 9338 } 9339 9340 /* 9341 * mdi_is_dev_supported(): 9342 * function called by pHCI bus config operation to determine if a 9343 * device should be represented as a child of the vHCI or the 9344 * pHCI. This decision is made by the vHCI, using cinfo idenity 9345 * information passed by the pHCI - specifics of the cinfo 9346 * representation are by agreement between the pHCI and vHCI. 9347 * Return Values: 9348 * MDI_SUCCESS 9349 * MDI_FAILURE 9350 */ 9351 int 9352 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9353 { 9354 mdi_vhci_t *vh; 9355 9356 ASSERT(class && pdip); 9357 9358 /* 9359 * For dev_supported, mdi_phci_register() must have established pdip as 9360 * a pHCI. 9361 * 9362 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9363 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9364 */ 9365 if (!MDI_PHCI(pdip)) 9366 return (MDI_FAILURE); 9367 9368 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9369 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9370 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9371 return (MDI_FAILURE); 9372 } 9373 9374 /* Return vHCI answer */ 9375 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9376 } 9377 9378 int 9379 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9380 { 9381 uint_t devstate = 0; 9382 dev_info_t *cdip; 9383 9384 if ((pip == NULL) || (dcp == NULL)) 9385 return (MDI_FAILURE); 9386 9387 cdip = mdi_pi_get_client(pip); 9388 9389 switch (mdi_pi_get_state(pip)) { 9390 case MDI_PATHINFO_STATE_INIT: 9391 devstate = DEVICE_DOWN; 9392 break; 9393 case MDI_PATHINFO_STATE_ONLINE: 9394 devstate = DEVICE_ONLINE; 9395 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9396 devstate |= DEVICE_BUSY; 9397 break; 9398 case MDI_PATHINFO_STATE_STANDBY: 9399 devstate = DEVICE_ONLINE; 9400 break; 9401 case MDI_PATHINFO_STATE_FAULT: 9402 devstate = DEVICE_DOWN; 9403 break; 9404 case MDI_PATHINFO_STATE_OFFLINE: 9405 devstate = DEVICE_OFFLINE; 9406 break; 9407 default: 9408 ASSERT(MDI_PI(pip)->pi_state); 9409 } 9410 9411 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9412 return (MDI_FAILURE); 9413 9414 return (MDI_SUCCESS); 9415 } 9416