1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 28 * detailed discussion of the overall mpxio architecture. 29 * 30 * Default locking order: 31 * 32 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 34 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 36 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 39 */ 40 41 #include <sys/note.h> 42 #include <sys/types.h> 43 #include <sys/varargs.h> 44 #include <sys/param.h> 45 #include <sys/errno.h> 46 #include <sys/uio.h> 47 #include <sys/buf.h> 48 #include <sys/modctl.h> 49 #include <sys/open.h> 50 #include <sys/kmem.h> 51 #include <sys/poll.h> 52 #include <sys/conf.h> 53 #include <sys/bootconf.h> 54 #include <sys/cmn_err.h> 55 #include <sys/stat.h> 56 #include <sys/ddi.h> 57 #include <sys/sunddi.h> 58 #include <sys/ddipropdefs.h> 59 #include <sys/sunndi.h> 60 #include <sys/ndi_impldefs.h> 61 #include <sys/promif.h> 62 #include <sys/sunmdi.h> 63 #include <sys/mdi_impldefs.h> 64 #include <sys/taskq.h> 65 #include <sys/epm.h> 66 #include <sys/sunpm.h> 67 #include <sys/modhash.h> 68 #include <sys/disp.h> 69 #include <sys/autoconf.h> 70 #include <sys/sysmacros.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * The "path" to a pathinfo node is identical to the /devices path to a 160 * devinfo node had the device been enumerated under a pHCI instead of 161 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 162 * This association persists across create/delete of the pathinfo nodes, 163 * but not across reboot. 164 */ 165 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 166 static int mdi_pathmap_hash_size = 256; 167 static kmutex_t mdi_pathmap_mutex; 168 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 169 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 170 171 /* 172 * MDI component property name/value string definitions 173 */ 174 const char *mdi_component_prop = "mpxio-component"; 175 const char *mdi_component_prop_vhci = "vhci"; 176 const char *mdi_component_prop_phci = "phci"; 177 const char *mdi_component_prop_client = "client"; 178 179 /* 180 * MDI client global unique identifier property name 181 */ 182 const char *mdi_client_guid_prop = "client-guid"; 183 184 /* 185 * MDI client load balancing property name/value string definitions 186 */ 187 const char *mdi_load_balance = "load-balance"; 188 const char *mdi_load_balance_none = "none"; 189 const char *mdi_load_balance_rr = "round-robin"; 190 const char *mdi_load_balance_lba = "logical-block"; 191 192 /* 193 * Obsolete vHCI class definition; to be removed after Leadville update 194 */ 195 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 196 197 static char vhci_greeting[] = 198 "\tThere already exists one vHCI driver for class %s\n" 199 "\tOnly one vHCI driver for each class is allowed\n"; 200 201 /* 202 * Static function prototypes 203 */ 204 static int i_mdi_phci_offline(dev_info_t *, uint_t); 205 static int i_mdi_client_offline(dev_info_t *, uint_t); 206 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 207 static void i_mdi_phci_post_detach(dev_info_t *, 208 ddi_detach_cmd_t, int); 209 static int i_mdi_client_pre_detach(dev_info_t *, 210 ddi_detach_cmd_t); 211 static void i_mdi_client_post_detach(dev_info_t *, 212 ddi_detach_cmd_t, int); 213 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 214 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 215 static int i_mdi_lba_lb(mdi_client_t *ct, 216 mdi_pathinfo_t **ret_pip, struct buf *buf); 217 static void i_mdi_pm_hold_client(mdi_client_t *, int); 218 static void i_mdi_pm_rele_client(mdi_client_t *, int); 219 static void i_mdi_pm_reset_client(mdi_client_t *); 220 static int i_mdi_power_all_phci(mdi_client_t *); 221 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 222 223 224 /* 225 * Internal mdi_pathinfo node functions 226 */ 227 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 228 229 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 230 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 231 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 232 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 233 static void i_mdi_phci_unlock(mdi_phci_t *); 234 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 235 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 236 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 237 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 238 mdi_client_t *); 239 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 240 static void i_mdi_client_remove_path(mdi_client_t *, 241 mdi_pathinfo_t *); 242 243 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 244 mdi_pathinfo_state_t, int); 245 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 246 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 247 char **, int); 248 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 249 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 250 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 251 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 252 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 253 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 254 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 255 static void i_mdi_client_update_state(mdi_client_t *); 256 static int i_mdi_client_compute_state(mdi_client_t *, 257 mdi_phci_t *); 258 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 259 static void i_mdi_client_unlock(mdi_client_t *); 260 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 261 static mdi_client_t *i_devi_get_client(dev_info_t *); 262 /* 263 * NOTE: this will be removed once the NWS files are changed to use the new 264 * mdi_{enable,disable}_path interfaces 265 */ 266 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 267 int, int); 268 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 269 mdi_vhci_t *vh, int flags, int op); 270 /* 271 * Failover related function prototypes 272 */ 273 static int i_mdi_failover(void *); 274 275 /* 276 * misc internal functions 277 */ 278 static int i_mdi_get_hash_key(char *); 279 static int i_map_nvlist_error_to_mdi(int); 280 static void i_mdi_report_path_state(mdi_client_t *, 281 mdi_pathinfo_t *); 282 283 static void setup_vhci_cache(mdi_vhci_t *); 284 static int destroy_vhci_cache(mdi_vhci_t *); 285 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 286 static boolean_t stop_vhcache_flush_thread(void *, int); 287 static void free_string_array(char **, int); 288 static void free_vhcache_phci(mdi_vhcache_phci_t *); 289 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 290 static void free_vhcache_client(mdi_vhcache_client_t *); 291 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 292 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 293 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 294 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_pi_add(mdi_vhci_config_t *, 296 struct mdi_pathinfo *); 297 static void vhcache_pi_remove(mdi_vhci_config_t *, 298 struct mdi_pathinfo *); 299 static void free_phclient_path_list(mdi_phys_path_t *); 300 static void sort_vhcache_paths(mdi_vhcache_client_t *); 301 static int flush_vhcache(mdi_vhci_config_t *, int); 302 static void vhcache_dirty(mdi_vhci_config_t *); 303 static void free_async_client_config(mdi_async_client_config_t *); 304 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 305 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 306 static nvlist_t *read_on_disk_vhci_cache(char *); 307 extern int fread_nvlist(char *, nvlist_t **); 308 extern int fwrite_nvlist(char *, nvlist_t *); 309 310 /* called once when first vhci registers with mdi */ 311 static void 312 i_mdi_init() 313 { 314 static int initialized = 0; 315 316 if (initialized) 317 return; 318 initialized = 1; 319 320 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 321 322 /* Create our taskq resources */ 323 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 324 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 325 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 326 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 327 328 /* Allocate ['path_instance' <-> "path"] maps */ 329 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 330 mdi_pathmap_bypath = mod_hash_create_strhash( 331 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 332 mod_hash_null_valdtor); 333 mdi_pathmap_byinstance = mod_hash_create_idhash( 334 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 335 mod_hash_null_valdtor); 336 } 337 338 /* 339 * mdi_get_component_type(): 340 * Return mpxio component type 341 * Return Values: 342 * MDI_COMPONENT_NONE 343 * MDI_COMPONENT_VHCI 344 * MDI_COMPONENT_PHCI 345 * MDI_COMPONENT_CLIENT 346 * XXX This doesn't work under multi-level MPxIO and should be 347 * removed when clients migrate mdi_component_is_*() interfaces. 348 */ 349 int 350 mdi_get_component_type(dev_info_t *dip) 351 { 352 return (DEVI(dip)->devi_mdi_component); 353 } 354 355 /* 356 * mdi_vhci_register(): 357 * Register a vHCI module with the mpxio framework 358 * mdi_vhci_register() is called by vHCI drivers to register the 359 * 'class_driver' vHCI driver and its MDI entrypoints with the 360 * mpxio framework. The vHCI driver must call this interface as 361 * part of its attach(9e) handler. 362 * Competing threads may try to attach mdi_vhci_register() as 363 * the vHCI drivers are loaded and attached as a result of pHCI 364 * driver instance registration (mdi_phci_register()) with the 365 * framework. 366 * Return Values: 367 * MDI_SUCCESS 368 * MDI_FAILURE 369 */ 370 /*ARGSUSED*/ 371 int 372 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 373 int flags) 374 { 375 mdi_vhci_t *vh = NULL; 376 377 /* Registrant can't be older */ 378 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 379 380 #ifdef DEBUG 381 /* 382 * IB nexus driver is loaded only when IB hardware is present. 383 * In order to be able to do this there is a need to drive the loading 384 * and attaching of the IB nexus driver (especially when an IB hardware 385 * is dynamically plugged in) when an IB HCA driver (PHCI) 386 * is being attached. Unfortunately this gets into the limitations 387 * of devfs as there seems to be no clean way to drive configuration 388 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 389 * for IB. 390 */ 391 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 392 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 393 #endif 394 395 i_mdi_init(); 396 397 mutex_enter(&mdi_mutex); 398 /* 399 * Scan for already registered vhci 400 */ 401 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 402 if (strcmp(vh->vh_class, class) == 0) { 403 /* 404 * vHCI has already been created. Check for valid 405 * vHCI ops registration. We only support one vHCI 406 * module per class 407 */ 408 if (vh->vh_ops != NULL) { 409 mutex_exit(&mdi_mutex); 410 cmn_err(CE_NOTE, vhci_greeting, class); 411 return (MDI_FAILURE); 412 } 413 break; 414 } 415 } 416 417 /* 418 * if not yet created, create the vHCI component 419 */ 420 if (vh == NULL) { 421 struct client_hash *hash = NULL; 422 char *load_balance; 423 424 /* 425 * Allocate and initialize the mdi extensions 426 */ 427 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 428 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 429 KM_SLEEP); 430 vh->vh_client_table = hash; 431 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 432 (void) strcpy(vh->vh_class, class); 433 vh->vh_lb = LOAD_BALANCE_RR; 434 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 435 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 436 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 437 vh->vh_lb = LOAD_BALANCE_NONE; 438 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 439 == 0) { 440 vh->vh_lb = LOAD_BALANCE_LBA; 441 } 442 ddi_prop_free(load_balance); 443 } 444 445 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 446 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 447 448 /* 449 * Store the vHCI ops vectors 450 */ 451 vh->vh_dip = vdip; 452 vh->vh_ops = vops; 453 454 setup_vhci_cache(vh); 455 456 if (mdi_vhci_head == NULL) { 457 mdi_vhci_head = vh; 458 } 459 if (mdi_vhci_tail) { 460 mdi_vhci_tail->vh_next = vh; 461 } 462 mdi_vhci_tail = vh; 463 mdi_vhci_count++; 464 } 465 466 /* 467 * Claim the devfs node as a vhci component 468 */ 469 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 470 471 /* 472 * Initialize our back reference from dev_info node 473 */ 474 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 475 mutex_exit(&mdi_mutex); 476 return (MDI_SUCCESS); 477 } 478 479 /* 480 * mdi_vhci_unregister(): 481 * Unregister a vHCI module from mpxio framework 482 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 483 * of a vhci to unregister it from the framework. 484 * Return Values: 485 * MDI_SUCCESS 486 * MDI_FAILURE 487 */ 488 /*ARGSUSED*/ 489 int 490 mdi_vhci_unregister(dev_info_t *vdip, int flags) 491 { 492 mdi_vhci_t *found, *vh, *prev = NULL; 493 494 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 495 496 /* 497 * Check for invalid VHCI 498 */ 499 if ((vh = i_devi_get_vhci(vdip)) == NULL) 500 return (MDI_FAILURE); 501 502 /* 503 * Scan the list of registered vHCIs for a match 504 */ 505 mutex_enter(&mdi_mutex); 506 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 507 if (found == vh) 508 break; 509 prev = found; 510 } 511 512 if (found == NULL) { 513 mutex_exit(&mdi_mutex); 514 return (MDI_FAILURE); 515 } 516 517 /* 518 * Check the vHCI, pHCI and client count. All the pHCIs and clients 519 * should have been unregistered, before a vHCI can be 520 * unregistered. 521 */ 522 MDI_VHCI_PHCI_LOCK(vh); 523 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 524 MDI_VHCI_PHCI_UNLOCK(vh); 525 mutex_exit(&mdi_mutex); 526 return (MDI_FAILURE); 527 } 528 MDI_VHCI_PHCI_UNLOCK(vh); 529 530 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 531 mutex_exit(&mdi_mutex); 532 return (MDI_FAILURE); 533 } 534 535 /* 536 * Remove the vHCI from the global list 537 */ 538 if (vh == mdi_vhci_head) { 539 mdi_vhci_head = vh->vh_next; 540 } else { 541 prev->vh_next = vh->vh_next; 542 } 543 if (vh == mdi_vhci_tail) { 544 mdi_vhci_tail = prev; 545 } 546 mdi_vhci_count--; 547 mutex_exit(&mdi_mutex); 548 549 vh->vh_ops = NULL; 550 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 551 DEVI(vdip)->devi_mdi_xhci = NULL; 552 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 553 kmem_free(vh->vh_client_table, 554 mdi_client_table_size * sizeof (struct client_hash)); 555 mutex_destroy(&vh->vh_phci_mutex); 556 mutex_destroy(&vh->vh_client_mutex); 557 558 kmem_free(vh, sizeof (mdi_vhci_t)); 559 return (MDI_SUCCESS); 560 } 561 562 /* 563 * i_mdi_vhci_class2vhci(): 564 * Look for a matching vHCI module given a vHCI class name 565 * Return Values: 566 * Handle to a vHCI component 567 * NULL 568 */ 569 static mdi_vhci_t * 570 i_mdi_vhci_class2vhci(char *class) 571 { 572 mdi_vhci_t *vh = NULL; 573 574 ASSERT(!MUTEX_HELD(&mdi_mutex)); 575 576 mutex_enter(&mdi_mutex); 577 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 578 if (strcmp(vh->vh_class, class) == 0) { 579 break; 580 } 581 } 582 mutex_exit(&mdi_mutex); 583 return (vh); 584 } 585 586 /* 587 * i_devi_get_vhci(): 588 * Utility function to get the handle to a vHCI component 589 * Return Values: 590 * Handle to a vHCI component 591 * NULL 592 */ 593 mdi_vhci_t * 594 i_devi_get_vhci(dev_info_t *vdip) 595 { 596 mdi_vhci_t *vh = NULL; 597 if (MDI_VHCI(vdip)) { 598 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 599 } 600 return (vh); 601 } 602 603 /* 604 * mdi_phci_register(): 605 * Register a pHCI module with mpxio framework 606 * mdi_phci_register() is called by pHCI drivers to register with 607 * the mpxio framework and a specific 'class_driver' vHCI. The 608 * pHCI driver must call this interface as part of its attach(9e) 609 * handler. 610 * Return Values: 611 * MDI_SUCCESS 612 * MDI_FAILURE 613 */ 614 /*ARGSUSED*/ 615 int 616 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 617 { 618 mdi_phci_t *ph; 619 mdi_vhci_t *vh; 620 char *data; 621 char *pathname; 622 623 /* 624 * Some subsystems, like fcp, perform pHCI registration from a 625 * different thread than the one doing the pHCI attach(9E) - the 626 * driver attach code is waiting for this other thread to complete. 627 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 628 * (indicating that some thread has done an ndi_devi_enter of parent) 629 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 630 */ 631 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 632 633 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 634 (void) ddi_pathname(pdip, pathname); 635 636 /* 637 * Check for mpxio-disable property. Enable mpxio if the property is 638 * missing or not set to "yes". 639 * If the property is set to "yes" then emit a brief message. 640 */ 641 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 642 &data) == DDI_SUCCESS)) { 643 if (strcmp(data, "yes") == 0) { 644 MDI_DEBUG(1, (CE_CONT, pdip, 645 "?%s (%s%d) multipath capabilities " 646 "disabled via %s.conf.\n", pathname, 647 ddi_driver_name(pdip), ddi_get_instance(pdip), 648 ddi_driver_name(pdip))); 649 ddi_prop_free(data); 650 kmem_free(pathname, MAXPATHLEN); 651 return (MDI_FAILURE); 652 } 653 ddi_prop_free(data); 654 } 655 656 kmem_free(pathname, MAXPATHLEN); 657 658 /* 659 * Search for a matching vHCI 660 */ 661 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 662 if (vh == NULL) { 663 return (MDI_FAILURE); 664 } 665 666 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 667 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 668 ph->ph_dip = pdip; 669 ph->ph_vhci = vh; 670 ph->ph_next = NULL; 671 ph->ph_unstable = 0; 672 ph->ph_vprivate = 0; 673 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 674 675 MDI_PHCI_LOCK(ph); 676 MDI_PHCI_SET_POWER_UP(ph); 677 MDI_PHCI_UNLOCK(ph); 678 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 679 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 680 681 vhcache_phci_add(vh->vh_config, ph); 682 683 MDI_VHCI_PHCI_LOCK(vh); 684 if (vh->vh_phci_head == NULL) { 685 vh->vh_phci_head = ph; 686 } 687 if (vh->vh_phci_tail) { 688 vh->vh_phci_tail->ph_next = ph; 689 } 690 vh->vh_phci_tail = ph; 691 vh->vh_phci_count++; 692 MDI_VHCI_PHCI_UNLOCK(vh); 693 694 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 695 return (MDI_SUCCESS); 696 } 697 698 /* 699 * mdi_phci_unregister(): 700 * Unregister a pHCI module from mpxio framework 701 * mdi_phci_unregister() is called by the pHCI drivers from their 702 * detach(9E) handler to unregister their instances from the 703 * framework. 704 * Return Values: 705 * MDI_SUCCESS 706 * MDI_FAILURE 707 */ 708 /*ARGSUSED*/ 709 int 710 mdi_phci_unregister(dev_info_t *pdip, int flags) 711 { 712 mdi_vhci_t *vh; 713 mdi_phci_t *ph; 714 mdi_phci_t *tmp; 715 mdi_phci_t *prev = NULL; 716 717 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 718 719 ph = i_devi_get_phci(pdip); 720 if (ph == NULL) { 721 MDI_DEBUG(1, (CE_WARN, pdip, 722 "!pHCI unregister: Not a valid pHCI")); 723 return (MDI_FAILURE); 724 } 725 726 vh = ph->ph_vhci; 727 ASSERT(vh != NULL); 728 if (vh == NULL) { 729 MDI_DEBUG(1, (CE_WARN, pdip, 730 "!pHCI unregister: Not a valid vHCI")); 731 return (MDI_FAILURE); 732 } 733 734 MDI_VHCI_PHCI_LOCK(vh); 735 tmp = vh->vh_phci_head; 736 while (tmp) { 737 if (tmp == ph) { 738 break; 739 } 740 prev = tmp; 741 tmp = tmp->ph_next; 742 } 743 744 if (ph == vh->vh_phci_head) { 745 vh->vh_phci_head = ph->ph_next; 746 } else { 747 prev->ph_next = ph->ph_next; 748 } 749 750 if (ph == vh->vh_phci_tail) { 751 vh->vh_phci_tail = prev; 752 } 753 754 vh->vh_phci_count--; 755 MDI_VHCI_PHCI_UNLOCK(vh); 756 757 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 758 ESC_DDI_INITIATOR_UNREGISTER); 759 vhcache_phci_remove(vh->vh_config, ph); 760 cv_destroy(&ph->ph_unstable_cv); 761 mutex_destroy(&ph->ph_mutex); 762 kmem_free(ph, sizeof (mdi_phci_t)); 763 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 764 DEVI(pdip)->devi_mdi_xhci = NULL; 765 return (MDI_SUCCESS); 766 } 767 768 /* 769 * i_devi_get_phci(): 770 * Utility function to return the phci extensions. 771 */ 772 static mdi_phci_t * 773 i_devi_get_phci(dev_info_t *pdip) 774 { 775 mdi_phci_t *ph = NULL; 776 777 if (MDI_PHCI(pdip)) { 778 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 779 } 780 return (ph); 781 } 782 783 /* 784 * Single thread mdi entry into devinfo node for modifying its children. 785 * If necessary we perform an ndi_devi_enter of the vHCI before doing 786 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 787 * for the vHCI and one for the pHCI. 788 */ 789 void 790 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 791 { 792 dev_info_t *vdip; 793 int vcircular, pcircular; 794 795 /* Verify calling context */ 796 ASSERT(MDI_PHCI(phci_dip)); 797 vdip = mdi_devi_get_vdip(phci_dip); 798 ASSERT(vdip); /* A pHCI always has a vHCI */ 799 800 /* 801 * If pHCI is detaching then the framework has already entered the 802 * vHCI on a threads that went down the code path leading to 803 * detach_node(). This framework enter of the vHCI during pHCI 804 * detach is done to avoid deadlock with vHCI power management 805 * operations which enter the vHCI and the enter down the path 806 * to the pHCI. If pHCI is detaching then we piggyback this calls 807 * enter of the vHCI on frameworks vHCI enter that has already 808 * occurred - this is OK because we know that the framework thread 809 * doing detach is waiting for our completion. 810 * 811 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 812 * race with detach - but we can't do that because the framework has 813 * already entered the parent, so we have some complexity instead. 814 */ 815 for (;;) { 816 if (ndi_devi_tryenter(vdip, &vcircular)) { 817 ASSERT(vcircular != -1); 818 if (DEVI_IS_DETACHING(phci_dip)) { 819 ndi_devi_exit(vdip, vcircular); 820 vcircular = -1; 821 } 822 break; 823 } else if (DEVI_IS_DETACHING(phci_dip)) { 824 vcircular = -1; 825 break; 826 } else { 827 delay(1); 828 } 829 } 830 831 ndi_devi_enter(phci_dip, &pcircular); 832 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 833 } 834 835 /* 836 * Attempt to mdi_devi_enter. 837 */ 838 int 839 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 840 { 841 dev_info_t *vdip; 842 int vcircular, pcircular; 843 844 /* Verify calling context */ 845 ASSERT(MDI_PHCI(phci_dip)); 846 vdip = mdi_devi_get_vdip(phci_dip); 847 ASSERT(vdip); /* A pHCI always has a vHCI */ 848 849 if (ndi_devi_tryenter(vdip, &vcircular)) { 850 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 851 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 852 return (1); /* locked */ 853 } 854 ndi_devi_exit(vdip, vcircular); 855 } 856 return (0); /* busy */ 857 } 858 859 /* 860 * Release mdi_devi_enter or successful mdi_devi_tryenter. 861 */ 862 void 863 mdi_devi_exit(dev_info_t *phci_dip, int circular) 864 { 865 dev_info_t *vdip; 866 int vcircular, pcircular; 867 868 /* Verify calling context */ 869 ASSERT(MDI_PHCI(phci_dip)); 870 vdip = mdi_devi_get_vdip(phci_dip); 871 ASSERT(vdip); /* A pHCI always has a vHCI */ 872 873 /* extract two circular recursion values from single int */ 874 pcircular = (short)(circular & 0xFFFF); 875 vcircular = (short)((circular >> 16) & 0xFFFF); 876 877 ndi_devi_exit(phci_dip, pcircular); 878 if (vcircular != -1) 879 ndi_devi_exit(vdip, vcircular); 880 } 881 882 /* 883 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 884 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 885 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 886 * with vHCI power management code during path online/offline. Each 887 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 888 * occur within the scope of an active mdi_devi_enter that establishes the 889 * circular value. 890 */ 891 void 892 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 893 { 894 int pcircular; 895 896 /* Verify calling context */ 897 ASSERT(MDI_PHCI(phci_dip)); 898 899 pcircular = (short)(circular & 0xFFFF); 900 ndi_devi_exit(phci_dip, pcircular); 901 } 902 903 void 904 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 905 { 906 int pcircular; 907 908 /* Verify calling context */ 909 ASSERT(MDI_PHCI(phci_dip)); 910 911 ndi_devi_enter(phci_dip, &pcircular); 912 913 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 914 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 915 } 916 917 /* 918 * mdi_devi_get_vdip(): 919 * given a pHCI dip return vHCI dip 920 */ 921 dev_info_t * 922 mdi_devi_get_vdip(dev_info_t *pdip) 923 { 924 mdi_phci_t *ph; 925 926 ph = i_devi_get_phci(pdip); 927 if (ph && ph->ph_vhci) 928 return (ph->ph_vhci->vh_dip); 929 return (NULL); 930 } 931 932 /* 933 * mdi_devi_pdip_entered(): 934 * Return 1 if we are vHCI and have done an ndi_devi_enter 935 * of a pHCI 936 */ 937 int 938 mdi_devi_pdip_entered(dev_info_t *vdip) 939 { 940 mdi_vhci_t *vh; 941 mdi_phci_t *ph; 942 943 vh = i_devi_get_vhci(vdip); 944 if (vh == NULL) 945 return (0); 946 947 MDI_VHCI_PHCI_LOCK(vh); 948 ph = vh->vh_phci_head; 949 while (ph) { 950 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 951 MDI_VHCI_PHCI_UNLOCK(vh); 952 return (1); 953 } 954 ph = ph->ph_next; 955 } 956 MDI_VHCI_PHCI_UNLOCK(vh); 957 return (0); 958 } 959 960 /* 961 * mdi_phci_path2devinfo(): 962 * Utility function to search for a valid phci device given 963 * the devfs pathname. 964 */ 965 dev_info_t * 966 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 967 { 968 char *temp_pathname; 969 mdi_vhci_t *vh; 970 mdi_phci_t *ph; 971 dev_info_t *pdip = NULL; 972 973 vh = i_devi_get_vhci(vdip); 974 ASSERT(vh != NULL); 975 976 if (vh == NULL) { 977 /* 978 * Invalid vHCI component, return failure 979 */ 980 return (NULL); 981 } 982 983 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 984 MDI_VHCI_PHCI_LOCK(vh); 985 ph = vh->vh_phci_head; 986 while (ph != NULL) { 987 pdip = ph->ph_dip; 988 ASSERT(pdip != NULL); 989 *temp_pathname = '\0'; 990 (void) ddi_pathname(pdip, temp_pathname); 991 if (strcmp(temp_pathname, pathname) == 0) { 992 break; 993 } 994 ph = ph->ph_next; 995 } 996 if (ph == NULL) { 997 pdip = NULL; 998 } 999 MDI_VHCI_PHCI_UNLOCK(vh); 1000 kmem_free(temp_pathname, MAXPATHLEN); 1001 return (pdip); 1002 } 1003 1004 /* 1005 * mdi_phci_get_path_count(): 1006 * get number of path information nodes associated with a given 1007 * pHCI device. 1008 */ 1009 int 1010 mdi_phci_get_path_count(dev_info_t *pdip) 1011 { 1012 mdi_phci_t *ph; 1013 int count = 0; 1014 1015 ph = i_devi_get_phci(pdip); 1016 if (ph != NULL) { 1017 count = ph->ph_path_count; 1018 } 1019 return (count); 1020 } 1021 1022 /* 1023 * i_mdi_phci_lock(): 1024 * Lock a pHCI device 1025 * Return Values: 1026 * None 1027 * Note: 1028 * The default locking order is: 1029 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1030 * But there are number of situations where locks need to be 1031 * grabbed in reverse order. This routine implements try and lock 1032 * mechanism depending on the requested parameter option. 1033 */ 1034 static void 1035 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1036 { 1037 if (pip) { 1038 /* Reverse locking is requested. */ 1039 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1040 /* 1041 * tryenter failed. Try to grab again 1042 * after a small delay 1043 */ 1044 MDI_PI_HOLD(pip); 1045 MDI_PI_UNLOCK(pip); 1046 delay(1); 1047 MDI_PI_LOCK(pip); 1048 MDI_PI_RELE(pip); 1049 } 1050 } else { 1051 MDI_PHCI_LOCK(ph); 1052 } 1053 } 1054 1055 /* 1056 * i_mdi_phci_unlock(): 1057 * Unlock the pHCI component 1058 */ 1059 static void 1060 i_mdi_phci_unlock(mdi_phci_t *ph) 1061 { 1062 MDI_PHCI_UNLOCK(ph); 1063 } 1064 1065 /* 1066 * i_mdi_devinfo_create(): 1067 * create client device's devinfo node 1068 * Return Values: 1069 * dev_info 1070 * NULL 1071 * Notes: 1072 */ 1073 static dev_info_t * 1074 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1075 char **compatible, int ncompatible) 1076 { 1077 dev_info_t *cdip = NULL; 1078 1079 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1080 1081 /* Verify for duplicate entry */ 1082 cdip = i_mdi_devinfo_find(vh, name, guid); 1083 ASSERT(cdip == NULL); 1084 if (cdip) { 1085 cmn_err(CE_WARN, 1086 "i_mdi_devinfo_create: client dip %p already exists", 1087 (void *)cdip); 1088 } 1089 1090 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1091 if (cdip == NULL) 1092 goto fail; 1093 1094 /* 1095 * Create component type and Global unique identifier 1096 * properties 1097 */ 1098 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1099 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1100 goto fail; 1101 } 1102 1103 /* Decorate the node with compatible property */ 1104 if (compatible && 1105 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1106 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1107 goto fail; 1108 } 1109 1110 return (cdip); 1111 1112 fail: 1113 if (cdip) { 1114 (void) ndi_prop_remove_all(cdip); 1115 (void) ndi_devi_free(cdip); 1116 } 1117 return (NULL); 1118 } 1119 1120 /* 1121 * i_mdi_devinfo_find(): 1122 * Find a matching devinfo node for given client node name 1123 * and its guid. 1124 * Return Values: 1125 * Handle to a dev_info node or NULL 1126 */ 1127 static dev_info_t * 1128 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1129 { 1130 char *data; 1131 dev_info_t *cdip = NULL; 1132 dev_info_t *ndip = NULL; 1133 int circular; 1134 1135 ndi_devi_enter(vh->vh_dip, &circular); 1136 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1137 while ((cdip = ndip) != NULL) { 1138 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1139 1140 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1141 continue; 1142 } 1143 1144 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1145 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1146 &data) != DDI_PROP_SUCCESS) { 1147 continue; 1148 } 1149 1150 if (strcmp(data, guid) != 0) { 1151 ddi_prop_free(data); 1152 continue; 1153 } 1154 ddi_prop_free(data); 1155 break; 1156 } 1157 ndi_devi_exit(vh->vh_dip, circular); 1158 return (cdip); 1159 } 1160 1161 /* 1162 * i_mdi_devinfo_remove(): 1163 * Remove a client device node 1164 */ 1165 static int 1166 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1167 { 1168 int rv = MDI_SUCCESS; 1169 1170 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1171 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1172 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1173 if (rv != NDI_SUCCESS) { 1174 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1175 " failed. cdip = %p\n", (void *)cdip)); 1176 } 1177 /* 1178 * Convert to MDI error code 1179 */ 1180 switch (rv) { 1181 case NDI_SUCCESS: 1182 rv = MDI_SUCCESS; 1183 break; 1184 case NDI_BUSY: 1185 rv = MDI_BUSY; 1186 break; 1187 default: 1188 rv = MDI_FAILURE; 1189 break; 1190 } 1191 } 1192 return (rv); 1193 } 1194 1195 /* 1196 * i_devi_get_client() 1197 * Utility function to get mpxio component extensions 1198 */ 1199 static mdi_client_t * 1200 i_devi_get_client(dev_info_t *cdip) 1201 { 1202 mdi_client_t *ct = NULL; 1203 1204 if (MDI_CLIENT(cdip)) { 1205 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1206 } 1207 return (ct); 1208 } 1209 1210 /* 1211 * i_mdi_is_child_present(): 1212 * Search for the presence of client device dev_info node 1213 */ 1214 static int 1215 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1216 { 1217 int rv = MDI_FAILURE; 1218 struct dev_info *dip; 1219 int circular; 1220 1221 ndi_devi_enter(vdip, &circular); 1222 dip = DEVI(vdip)->devi_child; 1223 while (dip) { 1224 if (dip == DEVI(cdip)) { 1225 rv = MDI_SUCCESS; 1226 break; 1227 } 1228 dip = dip->devi_sibling; 1229 } 1230 ndi_devi_exit(vdip, circular); 1231 return (rv); 1232 } 1233 1234 1235 /* 1236 * i_mdi_client_lock(): 1237 * Grab client component lock 1238 * Return Values: 1239 * None 1240 * Note: 1241 * The default locking order is: 1242 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1243 * But there are number of situations where locks need to be 1244 * grabbed in reverse order. This routine implements try and lock 1245 * mechanism depending on the requested parameter option. 1246 */ 1247 static void 1248 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1249 { 1250 if (pip) { 1251 /* 1252 * Reverse locking is requested. 1253 */ 1254 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1255 /* 1256 * tryenter failed. Try to grab again 1257 * after a small delay 1258 */ 1259 MDI_PI_HOLD(pip); 1260 MDI_PI_UNLOCK(pip); 1261 delay(1); 1262 MDI_PI_LOCK(pip); 1263 MDI_PI_RELE(pip); 1264 } 1265 } else { 1266 MDI_CLIENT_LOCK(ct); 1267 } 1268 } 1269 1270 /* 1271 * i_mdi_client_unlock(): 1272 * Unlock a client component 1273 */ 1274 static void 1275 i_mdi_client_unlock(mdi_client_t *ct) 1276 { 1277 MDI_CLIENT_UNLOCK(ct); 1278 } 1279 1280 /* 1281 * i_mdi_client_alloc(): 1282 * Allocate and initialize a client structure. Caller should 1283 * hold the vhci client lock. 1284 * Return Values: 1285 * Handle to a client component 1286 */ 1287 /*ARGSUSED*/ 1288 static mdi_client_t * 1289 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1290 { 1291 mdi_client_t *ct; 1292 1293 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1294 1295 /* 1296 * Allocate and initialize a component structure. 1297 */ 1298 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1299 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1300 ct->ct_hnext = NULL; 1301 ct->ct_hprev = NULL; 1302 ct->ct_dip = NULL; 1303 ct->ct_vhci = vh; 1304 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1305 (void) strcpy(ct->ct_drvname, name); 1306 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1307 (void) strcpy(ct->ct_guid, lguid); 1308 ct->ct_cprivate = NULL; 1309 ct->ct_vprivate = NULL; 1310 ct->ct_flags = 0; 1311 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1312 MDI_CLIENT_LOCK(ct); 1313 MDI_CLIENT_SET_OFFLINE(ct); 1314 MDI_CLIENT_SET_DETACH(ct); 1315 MDI_CLIENT_SET_POWER_UP(ct); 1316 MDI_CLIENT_UNLOCK(ct); 1317 ct->ct_failover_flags = 0; 1318 ct->ct_failover_status = 0; 1319 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1320 ct->ct_unstable = 0; 1321 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1322 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1323 ct->ct_lb = vh->vh_lb; 1324 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1325 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1326 ct->ct_path_count = 0; 1327 ct->ct_path_head = NULL; 1328 ct->ct_path_tail = NULL; 1329 ct->ct_path_last = NULL; 1330 1331 /* 1332 * Add this client component to our client hash queue 1333 */ 1334 i_mdi_client_enlist_table(vh, ct); 1335 return (ct); 1336 } 1337 1338 /* 1339 * i_mdi_client_enlist_table(): 1340 * Attach the client device to the client hash table. Caller 1341 * should hold the vhci client lock. 1342 */ 1343 static void 1344 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1345 { 1346 int index; 1347 struct client_hash *head; 1348 1349 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1350 1351 index = i_mdi_get_hash_key(ct->ct_guid); 1352 head = &vh->vh_client_table[index]; 1353 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1354 head->ct_hash_head = ct; 1355 head->ct_hash_count++; 1356 vh->vh_client_count++; 1357 } 1358 1359 /* 1360 * i_mdi_client_delist_table(): 1361 * Attach the client device to the client hash table. 1362 * Caller should hold the vhci client lock. 1363 */ 1364 static void 1365 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1366 { 1367 int index; 1368 char *guid; 1369 struct client_hash *head; 1370 mdi_client_t *next; 1371 mdi_client_t *last; 1372 1373 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1374 1375 guid = ct->ct_guid; 1376 index = i_mdi_get_hash_key(guid); 1377 head = &vh->vh_client_table[index]; 1378 1379 last = NULL; 1380 next = (mdi_client_t *)head->ct_hash_head; 1381 while (next != NULL) { 1382 if (next == ct) { 1383 break; 1384 } 1385 last = next; 1386 next = next->ct_hnext; 1387 } 1388 1389 if (next) { 1390 head->ct_hash_count--; 1391 if (last == NULL) { 1392 head->ct_hash_head = ct->ct_hnext; 1393 } else { 1394 last->ct_hnext = ct->ct_hnext; 1395 } 1396 ct->ct_hnext = NULL; 1397 vh->vh_client_count--; 1398 } 1399 } 1400 1401 1402 /* 1403 * i_mdi_client_free(): 1404 * Free a client component 1405 */ 1406 static int 1407 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1408 { 1409 int rv = MDI_SUCCESS; 1410 int flags = ct->ct_flags; 1411 dev_info_t *cdip; 1412 dev_info_t *vdip; 1413 1414 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1415 1416 vdip = vh->vh_dip; 1417 cdip = ct->ct_dip; 1418 1419 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1420 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1421 DEVI(cdip)->devi_mdi_client = NULL; 1422 1423 /* 1424 * Clear out back ref. to dev_info_t node 1425 */ 1426 ct->ct_dip = NULL; 1427 1428 /* 1429 * Remove this client from our hash queue 1430 */ 1431 i_mdi_client_delist_table(vh, ct); 1432 1433 /* 1434 * Uninitialize and free the component 1435 */ 1436 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1437 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1438 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1439 cv_destroy(&ct->ct_failover_cv); 1440 cv_destroy(&ct->ct_unstable_cv); 1441 cv_destroy(&ct->ct_powerchange_cv); 1442 mutex_destroy(&ct->ct_mutex); 1443 kmem_free(ct, sizeof (*ct)); 1444 1445 if (cdip != NULL) { 1446 MDI_VHCI_CLIENT_UNLOCK(vh); 1447 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1448 MDI_VHCI_CLIENT_LOCK(vh); 1449 } 1450 return (rv); 1451 } 1452 1453 /* 1454 * i_mdi_client_find(): 1455 * Find the client structure corresponding to a given guid 1456 * Caller should hold the vhci client lock. 1457 */ 1458 static mdi_client_t * 1459 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1460 { 1461 int index; 1462 struct client_hash *head; 1463 mdi_client_t *ct; 1464 1465 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1466 1467 index = i_mdi_get_hash_key(guid); 1468 head = &vh->vh_client_table[index]; 1469 1470 ct = head->ct_hash_head; 1471 while (ct != NULL) { 1472 if (strcmp(ct->ct_guid, guid) == 0 && 1473 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1474 break; 1475 } 1476 ct = ct->ct_hnext; 1477 } 1478 return (ct); 1479 } 1480 1481 /* 1482 * i_mdi_client_update_state(): 1483 * Compute and update client device state 1484 * Notes: 1485 * A client device can be in any of three possible states: 1486 * 1487 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1488 * one online/standby paths. Can tolerate failures. 1489 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1490 * no alternate paths available as standby. A failure on the online 1491 * would result in loss of access to device data. 1492 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1493 * no paths available to access the device. 1494 */ 1495 static void 1496 i_mdi_client_update_state(mdi_client_t *ct) 1497 { 1498 int state; 1499 1500 ASSERT(MDI_CLIENT_LOCKED(ct)); 1501 state = i_mdi_client_compute_state(ct, NULL); 1502 MDI_CLIENT_SET_STATE(ct, state); 1503 } 1504 1505 /* 1506 * i_mdi_client_compute_state(): 1507 * Compute client device state 1508 * 1509 * mdi_phci_t * Pointer to pHCI structure which should 1510 * while computing the new value. Used by 1511 * i_mdi_phci_offline() to find the new 1512 * client state after DR of a pHCI. 1513 */ 1514 static int 1515 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1516 { 1517 int state; 1518 int online_count = 0; 1519 int standby_count = 0; 1520 mdi_pathinfo_t *pip, *next; 1521 1522 ASSERT(MDI_CLIENT_LOCKED(ct)); 1523 pip = ct->ct_path_head; 1524 while (pip != NULL) { 1525 MDI_PI_LOCK(pip); 1526 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1527 if (MDI_PI(pip)->pi_phci == ph) { 1528 MDI_PI_UNLOCK(pip); 1529 pip = next; 1530 continue; 1531 } 1532 1533 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1534 == MDI_PATHINFO_STATE_ONLINE) 1535 online_count++; 1536 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1537 == MDI_PATHINFO_STATE_STANDBY) 1538 standby_count++; 1539 MDI_PI_UNLOCK(pip); 1540 pip = next; 1541 } 1542 1543 if (online_count == 0) { 1544 if (standby_count == 0) { 1545 state = MDI_CLIENT_STATE_FAILED; 1546 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1547 " ct = %p\n", (void *)ct)); 1548 } else if (standby_count == 1) { 1549 state = MDI_CLIENT_STATE_DEGRADED; 1550 } else { 1551 state = MDI_CLIENT_STATE_OPTIMAL; 1552 } 1553 } else if (online_count == 1) { 1554 if (standby_count == 0) { 1555 state = MDI_CLIENT_STATE_DEGRADED; 1556 } else { 1557 state = MDI_CLIENT_STATE_OPTIMAL; 1558 } 1559 } else { 1560 state = MDI_CLIENT_STATE_OPTIMAL; 1561 } 1562 return (state); 1563 } 1564 1565 /* 1566 * i_mdi_client2devinfo(): 1567 * Utility function 1568 */ 1569 dev_info_t * 1570 i_mdi_client2devinfo(mdi_client_t *ct) 1571 { 1572 return (ct->ct_dip); 1573 } 1574 1575 /* 1576 * mdi_client_path2_devinfo(): 1577 * Given the parent devinfo and child devfs pathname, search for 1578 * a valid devfs node handle. 1579 */ 1580 dev_info_t * 1581 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1582 { 1583 dev_info_t *cdip = NULL; 1584 dev_info_t *ndip = NULL; 1585 char *temp_pathname; 1586 int circular; 1587 1588 /* 1589 * Allocate temp buffer 1590 */ 1591 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1592 1593 /* 1594 * Lock parent against changes 1595 */ 1596 ndi_devi_enter(vdip, &circular); 1597 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1598 while ((cdip = ndip) != NULL) { 1599 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1600 1601 *temp_pathname = '\0'; 1602 (void) ddi_pathname(cdip, temp_pathname); 1603 if (strcmp(temp_pathname, pathname) == 0) { 1604 break; 1605 } 1606 } 1607 /* 1608 * Release devinfo lock 1609 */ 1610 ndi_devi_exit(vdip, circular); 1611 1612 /* 1613 * Free the temp buffer 1614 */ 1615 kmem_free(temp_pathname, MAXPATHLEN); 1616 return (cdip); 1617 } 1618 1619 /* 1620 * mdi_client_get_path_count(): 1621 * Utility function to get number of path information nodes 1622 * associated with a given client device. 1623 */ 1624 int 1625 mdi_client_get_path_count(dev_info_t *cdip) 1626 { 1627 mdi_client_t *ct; 1628 int count = 0; 1629 1630 ct = i_devi_get_client(cdip); 1631 if (ct != NULL) { 1632 count = ct->ct_path_count; 1633 } 1634 return (count); 1635 } 1636 1637 1638 /* 1639 * i_mdi_get_hash_key(): 1640 * Create a hash using strings as keys 1641 * 1642 */ 1643 static int 1644 i_mdi_get_hash_key(char *str) 1645 { 1646 uint32_t g, hash = 0; 1647 char *p; 1648 1649 for (p = str; *p != '\0'; p++) { 1650 g = *p; 1651 hash += g; 1652 } 1653 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1654 } 1655 1656 /* 1657 * mdi_get_lb_policy(): 1658 * Get current load balancing policy for a given client device 1659 */ 1660 client_lb_t 1661 mdi_get_lb_policy(dev_info_t *cdip) 1662 { 1663 client_lb_t lb = LOAD_BALANCE_NONE; 1664 mdi_client_t *ct; 1665 1666 ct = i_devi_get_client(cdip); 1667 if (ct != NULL) { 1668 lb = ct->ct_lb; 1669 } 1670 return (lb); 1671 } 1672 1673 /* 1674 * mdi_set_lb_region_size(): 1675 * Set current region size for the load-balance 1676 */ 1677 int 1678 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1679 { 1680 mdi_client_t *ct; 1681 int rv = MDI_FAILURE; 1682 1683 ct = i_devi_get_client(cdip); 1684 if (ct != NULL && ct->ct_lb_args != NULL) { 1685 ct->ct_lb_args->region_size = region_size; 1686 rv = MDI_SUCCESS; 1687 } 1688 return (rv); 1689 } 1690 1691 /* 1692 * mdi_Set_lb_policy(): 1693 * Set current load balancing policy for a given client device 1694 */ 1695 int 1696 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1697 { 1698 mdi_client_t *ct; 1699 int rv = MDI_FAILURE; 1700 1701 ct = i_devi_get_client(cdip); 1702 if (ct != NULL) { 1703 ct->ct_lb = lb; 1704 rv = MDI_SUCCESS; 1705 } 1706 return (rv); 1707 } 1708 1709 /* 1710 * mdi_failover(): 1711 * failover function called by the vHCI drivers to initiate 1712 * a failover operation. This is typically due to non-availability 1713 * of online paths to route I/O requests. Failover can be 1714 * triggered through user application also. 1715 * 1716 * The vHCI driver calls mdi_failover() to initiate a failover 1717 * operation. mdi_failover() calls back into the vHCI driver's 1718 * vo_failover() entry point to perform the actual failover 1719 * operation. The reason for requiring the vHCI driver to 1720 * initiate failover by calling mdi_failover(), instead of directly 1721 * executing vo_failover() itself, is to ensure that the mdi 1722 * framework can keep track of the client state properly. 1723 * Additionally, mdi_failover() provides as a convenience the 1724 * option of performing the failover operation synchronously or 1725 * asynchronously 1726 * 1727 * Upon successful completion of the failover operation, the 1728 * paths that were previously ONLINE will be in the STANDBY state, 1729 * and the newly activated paths will be in the ONLINE state. 1730 * 1731 * The flags modifier determines whether the activation is done 1732 * synchronously: MDI_FAILOVER_SYNC 1733 * Return Values: 1734 * MDI_SUCCESS 1735 * MDI_FAILURE 1736 * MDI_BUSY 1737 */ 1738 /*ARGSUSED*/ 1739 int 1740 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1741 { 1742 int rv; 1743 mdi_client_t *ct; 1744 1745 ct = i_devi_get_client(cdip); 1746 ASSERT(ct != NULL); 1747 if (ct == NULL) { 1748 /* cdip is not a valid client device. Nothing more to do. */ 1749 return (MDI_FAILURE); 1750 } 1751 1752 MDI_CLIENT_LOCK(ct); 1753 1754 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1755 /* A path to the client is being freed */ 1756 MDI_CLIENT_UNLOCK(ct); 1757 return (MDI_BUSY); 1758 } 1759 1760 1761 if (MDI_CLIENT_IS_FAILED(ct)) { 1762 /* 1763 * Client is in failed state. Nothing more to do. 1764 */ 1765 MDI_CLIENT_UNLOCK(ct); 1766 return (MDI_FAILURE); 1767 } 1768 1769 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1770 /* 1771 * Failover is already in progress; return BUSY 1772 */ 1773 MDI_CLIENT_UNLOCK(ct); 1774 return (MDI_BUSY); 1775 } 1776 /* 1777 * Make sure that mdi_pathinfo node state changes are processed. 1778 * We do not allow failovers to progress while client path state 1779 * changes are in progress 1780 */ 1781 if (ct->ct_unstable) { 1782 if (flags == MDI_FAILOVER_ASYNC) { 1783 MDI_CLIENT_UNLOCK(ct); 1784 return (MDI_BUSY); 1785 } else { 1786 while (ct->ct_unstable) 1787 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1788 } 1789 } 1790 1791 /* 1792 * Client device is in stable state. Before proceeding, perform sanity 1793 * checks again. 1794 */ 1795 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1796 (!i_ddi_devi_attached(ct->ct_dip))) { 1797 /* 1798 * Client is in failed state. Nothing more to do. 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_FAILURE); 1802 } 1803 1804 /* 1805 * Set the client state as failover in progress. 1806 */ 1807 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1808 ct->ct_failover_flags = flags; 1809 MDI_CLIENT_UNLOCK(ct); 1810 1811 if (flags == MDI_FAILOVER_ASYNC) { 1812 /* 1813 * Submit the initiate failover request via CPR safe 1814 * taskq threads. 1815 */ 1816 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1817 ct, KM_SLEEP); 1818 return (MDI_ACCEPT); 1819 } else { 1820 /* 1821 * Synchronous failover mode. Typically invoked from the user 1822 * land. 1823 */ 1824 rv = i_mdi_failover(ct); 1825 } 1826 return (rv); 1827 } 1828 1829 /* 1830 * i_mdi_failover(): 1831 * internal failover function. Invokes vHCI drivers failover 1832 * callback function and process the failover status 1833 * Return Values: 1834 * None 1835 * 1836 * Note: A client device in failover state can not be detached or freed. 1837 */ 1838 static int 1839 i_mdi_failover(void *arg) 1840 { 1841 int rv = MDI_SUCCESS; 1842 mdi_client_t *ct = (mdi_client_t *)arg; 1843 mdi_vhci_t *vh = ct->ct_vhci; 1844 1845 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1846 1847 if (vh->vh_ops->vo_failover != NULL) { 1848 /* 1849 * Call vHCI drivers callback routine 1850 */ 1851 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1852 ct->ct_failover_flags); 1853 } 1854 1855 MDI_CLIENT_LOCK(ct); 1856 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1857 1858 /* 1859 * Save the failover return status 1860 */ 1861 ct->ct_failover_status = rv; 1862 1863 /* 1864 * As a result of failover, client status would have been changed. 1865 * Update the client state and wake up anyone waiting on this client 1866 * device. 1867 */ 1868 i_mdi_client_update_state(ct); 1869 1870 cv_broadcast(&ct->ct_failover_cv); 1871 MDI_CLIENT_UNLOCK(ct); 1872 return (rv); 1873 } 1874 1875 /* 1876 * Load balancing is logical block. 1877 * IOs within the range described by region_size 1878 * would go on the same path. This would improve the 1879 * performance by cache-hit on some of the RAID devices. 1880 * Search only for online paths(At some point we 1881 * may want to balance across target ports). 1882 * If no paths are found then default to round-robin. 1883 */ 1884 static int 1885 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1886 { 1887 int path_index = -1; 1888 int online_path_count = 0; 1889 int online_nonpref_path_count = 0; 1890 int region_size = ct->ct_lb_args->region_size; 1891 mdi_pathinfo_t *pip; 1892 mdi_pathinfo_t *next; 1893 int preferred, path_cnt; 1894 1895 pip = ct->ct_path_head; 1896 while (pip) { 1897 MDI_PI_LOCK(pip); 1898 if (MDI_PI(pip)->pi_state == 1899 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1900 online_path_count++; 1901 } else if (MDI_PI(pip)->pi_state == 1902 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1903 online_nonpref_path_count++; 1904 } 1905 next = (mdi_pathinfo_t *) 1906 MDI_PI(pip)->pi_client_link; 1907 MDI_PI_UNLOCK(pip); 1908 pip = next; 1909 } 1910 /* if found any online/preferred then use this type */ 1911 if (online_path_count > 0) { 1912 path_cnt = online_path_count; 1913 preferred = 1; 1914 } else if (online_nonpref_path_count > 0) { 1915 path_cnt = online_nonpref_path_count; 1916 preferred = 0; 1917 } else { 1918 path_cnt = 0; 1919 } 1920 if (path_cnt) { 1921 path_index = (bp->b_blkno >> region_size) % path_cnt; 1922 pip = ct->ct_path_head; 1923 while (pip && path_index != -1) { 1924 MDI_PI_LOCK(pip); 1925 if (path_index == 0 && 1926 (MDI_PI(pip)->pi_state == 1927 MDI_PATHINFO_STATE_ONLINE) && 1928 MDI_PI(pip)->pi_preferred == preferred) { 1929 MDI_PI_HOLD(pip); 1930 MDI_PI_UNLOCK(pip); 1931 *ret_pip = pip; 1932 return (MDI_SUCCESS); 1933 } 1934 path_index --; 1935 next = (mdi_pathinfo_t *) 1936 MDI_PI(pip)->pi_client_link; 1937 MDI_PI_UNLOCK(pip); 1938 pip = next; 1939 } 1940 if (pip == NULL) { 1941 MDI_DEBUG(4, (CE_NOTE, NULL, 1942 "!lba %llx, no pip !!\n", 1943 bp->b_lblkno)); 1944 } else { 1945 MDI_DEBUG(4, (CE_NOTE, NULL, 1946 "!lba %llx, no pip for path_index, " 1947 "pip %p\n", bp->b_lblkno, (void *)pip)); 1948 } 1949 } 1950 return (MDI_FAILURE); 1951 } 1952 1953 /* 1954 * mdi_select_path(): 1955 * select a path to access a client device. 1956 * 1957 * mdi_select_path() function is called by the vHCI drivers to 1958 * select a path to route the I/O request to. The caller passes 1959 * the block I/O data transfer structure ("buf") as one of the 1960 * parameters. The mpxio framework uses the buf structure 1961 * contents to maintain per path statistics (total I/O size / 1962 * count pending). If more than one online paths are available to 1963 * select, the framework automatically selects a suitable path 1964 * for routing I/O request. If a failover operation is active for 1965 * this client device the call shall be failed with MDI_BUSY error 1966 * code. 1967 * 1968 * By default this function returns a suitable path in online 1969 * state based on the current load balancing policy. Currently 1970 * we support LOAD_BALANCE_NONE (Previously selected online path 1971 * will continue to be used till the path is usable) and 1972 * LOAD_BALANCE_RR (Online paths will be selected in a round 1973 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1974 * based on the logical block). The load balancing 1975 * through vHCI drivers configuration file (driver.conf). 1976 * 1977 * vHCI drivers may override this default behavior by specifying 1978 * appropriate flags. The meaning of the thrid argument depends 1979 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1980 * then the argument is the "path instance" of the path to select. 1981 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1982 * "start_pip". A non NULL "start_pip" is the starting point to 1983 * walk and find the next appropriate path. The following values 1984 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1985 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1986 * STANDBY path). 1987 * 1988 * The non-standard behavior is used by the scsi_vhci driver, 1989 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1990 * attach of client devices (to avoid an unnecessary failover 1991 * when the STANDBY path comes up first), during failover 1992 * (to activate a STANDBY path as ONLINE). 1993 * 1994 * The selected path is returned in a a mdi_hold_path() state 1995 * (pi_ref_cnt). Caller should release the hold by calling 1996 * mdi_rele_path(). 1997 * 1998 * Return Values: 1999 * MDI_SUCCESS - Completed successfully 2000 * MDI_BUSY - Client device is busy failing over 2001 * MDI_NOPATH - Client device is online, but no valid path are 2002 * available to access this client device 2003 * MDI_FAILURE - Invalid client device or state 2004 * MDI_DEVI_ONLINING 2005 * - Client device (struct dev_info state) is in 2006 * onlining state. 2007 */ 2008 2009 /*ARGSUSED*/ 2010 int 2011 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2012 void *arg, mdi_pathinfo_t **ret_pip) 2013 { 2014 mdi_client_t *ct; 2015 mdi_pathinfo_t *pip; 2016 mdi_pathinfo_t *next; 2017 mdi_pathinfo_t *head; 2018 mdi_pathinfo_t *start; 2019 client_lb_t lbp; /* load balancing policy */ 2020 int sb = 1; /* standard behavior */ 2021 int preferred = 1; /* preferred path */ 2022 int cond, cont = 1; 2023 int retry = 0; 2024 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2025 int path_instance; /* request specific path instance */ 2026 2027 /* determine type of arg based on flags */ 2028 if (flags & MDI_SELECT_PATH_INSTANCE) { 2029 flags &= ~MDI_SELECT_PATH_INSTANCE; 2030 path_instance = (int)(intptr_t)arg; 2031 start_pip = NULL; 2032 } else { 2033 path_instance = 0; 2034 start_pip = (mdi_pathinfo_t *)arg; 2035 } 2036 2037 if (flags != 0) { 2038 /* 2039 * disable default behavior 2040 */ 2041 sb = 0; 2042 } 2043 2044 *ret_pip = NULL; 2045 ct = i_devi_get_client(cdip); 2046 if (ct == NULL) { 2047 /* mdi extensions are NULL, Nothing more to do */ 2048 return (MDI_FAILURE); 2049 } 2050 2051 MDI_CLIENT_LOCK(ct); 2052 2053 if (sb) { 2054 if (MDI_CLIENT_IS_FAILED(ct)) { 2055 /* 2056 * Client is not ready to accept any I/O requests. 2057 * Fail this request. 2058 */ 2059 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2060 "client state offline ct = %p\n", (void *)ct)); 2061 MDI_CLIENT_UNLOCK(ct); 2062 return (MDI_FAILURE); 2063 } 2064 2065 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2066 /* 2067 * Check for Failover is in progress. If so tell the 2068 * caller that this device is busy. 2069 */ 2070 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2071 "client failover in progress ct = %p\n", 2072 (void *)ct)); 2073 MDI_CLIENT_UNLOCK(ct); 2074 return (MDI_BUSY); 2075 } 2076 2077 /* 2078 * Check to see whether the client device is attached. 2079 * If not so, let the vHCI driver manually select a path 2080 * (standby) and let the probe/attach process to continue. 2081 */ 2082 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2083 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2084 "ct = %p\n", (void *)ct)); 2085 MDI_CLIENT_UNLOCK(ct); 2086 return (MDI_DEVI_ONLINING); 2087 } 2088 } 2089 2090 /* 2091 * Cache in the client list head. If head of the list is NULL 2092 * return MDI_NOPATH 2093 */ 2094 head = ct->ct_path_head; 2095 if (head == NULL) { 2096 MDI_CLIENT_UNLOCK(ct); 2097 return (MDI_NOPATH); 2098 } 2099 2100 /* Caller is specifying a specific pathinfo path by path_instance */ 2101 if (path_instance) { 2102 /* search for pathinfo with correct path_instance */ 2103 for (pip = head; 2104 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2105 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2106 ; 2107 2108 /* If path can't be selected then MDI_FAILURE is returned. */ 2109 if (pip == NULL) { 2110 MDI_CLIENT_UNLOCK(ct); 2111 return (MDI_FAILURE); 2112 } 2113 2114 /* verify state of path */ 2115 MDI_PI_LOCK(pip); 2116 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2117 MDI_PI_UNLOCK(pip); 2118 MDI_CLIENT_UNLOCK(ct); 2119 return (MDI_FAILURE); 2120 } 2121 2122 /* 2123 * Return the path in hold state. Caller should release the 2124 * lock by calling mdi_rele_path() 2125 */ 2126 MDI_PI_HOLD(pip); 2127 MDI_PI_UNLOCK(pip); 2128 ct->ct_path_last = pip; 2129 *ret_pip = pip; 2130 MDI_CLIENT_UNLOCK(ct); 2131 return (MDI_SUCCESS); 2132 } 2133 2134 /* 2135 * for non default behavior, bypass current 2136 * load balancing policy and always use LOAD_BALANCE_RR 2137 * except that the start point will be adjusted based 2138 * on the provided start_pip 2139 */ 2140 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2141 2142 switch (lbp) { 2143 case LOAD_BALANCE_NONE: 2144 /* 2145 * Load balancing is None or Alternate path mode 2146 * Start looking for a online mdi_pathinfo node starting from 2147 * last known selected path 2148 */ 2149 preferred = 1; 2150 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2151 if (pip == NULL) { 2152 pip = head; 2153 } 2154 start = pip; 2155 do { 2156 MDI_PI_LOCK(pip); 2157 /* 2158 * No need to explicitly check if the path is disabled. 2159 * Since we are checking for state == ONLINE and the 2160 * same variable is used for DISABLE/ENABLE information. 2161 */ 2162 if ((MDI_PI(pip)->pi_state == 2163 MDI_PATHINFO_STATE_ONLINE) && 2164 preferred == MDI_PI(pip)->pi_preferred) { 2165 /* 2166 * Return the path in hold state. Caller should 2167 * release the lock by calling mdi_rele_path() 2168 */ 2169 MDI_PI_HOLD(pip); 2170 MDI_PI_UNLOCK(pip); 2171 ct->ct_path_last = pip; 2172 *ret_pip = pip; 2173 MDI_CLIENT_UNLOCK(ct); 2174 return (MDI_SUCCESS); 2175 } 2176 2177 /* 2178 * Path is busy. 2179 */ 2180 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2181 MDI_PI_IS_TRANSIENT(pip)) 2182 retry = 1; 2183 /* 2184 * Keep looking for a next available online path 2185 */ 2186 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2187 if (next == NULL) { 2188 next = head; 2189 } 2190 MDI_PI_UNLOCK(pip); 2191 pip = next; 2192 if (start == pip && preferred) { 2193 preferred = 0; 2194 } else if (start == pip && !preferred) { 2195 cont = 0; 2196 } 2197 } while (cont); 2198 break; 2199 2200 case LOAD_BALANCE_LBA: 2201 /* 2202 * Make sure we are looking 2203 * for an online path. Otherwise, if it is for a STANDBY 2204 * path request, it will go through and fetch an ONLINE 2205 * path which is not desirable. 2206 */ 2207 if ((ct->ct_lb_args != NULL) && 2208 (ct->ct_lb_args->region_size) && bp && 2209 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2210 if (i_mdi_lba_lb(ct, ret_pip, bp) 2211 == MDI_SUCCESS) { 2212 MDI_CLIENT_UNLOCK(ct); 2213 return (MDI_SUCCESS); 2214 } 2215 } 2216 /* FALLTHROUGH */ 2217 case LOAD_BALANCE_RR: 2218 /* 2219 * Load balancing is Round Robin. Start looking for a online 2220 * mdi_pathinfo node starting from last known selected path 2221 * as the start point. If override flags are specified, 2222 * process accordingly. 2223 * If the search is already in effect(start_pip not null), 2224 * then lets just use the same path preference to continue the 2225 * traversal. 2226 */ 2227 2228 if (start_pip != NULL) { 2229 preferred = MDI_PI(start_pip)->pi_preferred; 2230 } else { 2231 preferred = 1; 2232 } 2233 2234 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2235 if (start == NULL) { 2236 pip = head; 2237 } else { 2238 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2239 if (pip == NULL) { 2240 if ( flags & MDI_SELECT_NO_PREFERRED) { 2241 /* 2242 * Return since we hit the end of list 2243 */ 2244 MDI_CLIENT_UNLOCK(ct); 2245 return (MDI_NOPATH); 2246 } 2247 2248 if (!sb) { 2249 if (preferred == 0) { 2250 /* 2251 * Looks like we have completed 2252 * the traversal as preferred 2253 * value is 0. Time to bail out. 2254 */ 2255 *ret_pip = NULL; 2256 MDI_CLIENT_UNLOCK(ct); 2257 return (MDI_NOPATH); 2258 } else { 2259 /* 2260 * Looks like we reached the 2261 * end of the list. Lets enable 2262 * traversal of non preferred 2263 * paths. 2264 */ 2265 preferred = 0; 2266 } 2267 } 2268 pip = head; 2269 } 2270 } 2271 start = pip; 2272 do { 2273 MDI_PI_LOCK(pip); 2274 if (sb) { 2275 cond = ((MDI_PI(pip)->pi_state == 2276 MDI_PATHINFO_STATE_ONLINE && 2277 MDI_PI(pip)->pi_preferred == 2278 preferred) ? 1 : 0); 2279 } else { 2280 if (flags == MDI_SELECT_ONLINE_PATH) { 2281 cond = ((MDI_PI(pip)->pi_state == 2282 MDI_PATHINFO_STATE_ONLINE && 2283 MDI_PI(pip)->pi_preferred == 2284 preferred) ? 1 : 0); 2285 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2286 cond = ((MDI_PI(pip)->pi_state == 2287 MDI_PATHINFO_STATE_STANDBY && 2288 MDI_PI(pip)->pi_preferred == 2289 preferred) ? 1 : 0); 2290 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2291 MDI_SELECT_STANDBY_PATH)) { 2292 cond = (((MDI_PI(pip)->pi_state == 2293 MDI_PATHINFO_STATE_ONLINE || 2294 (MDI_PI(pip)->pi_state == 2295 MDI_PATHINFO_STATE_STANDBY)) && 2296 MDI_PI(pip)->pi_preferred == 2297 preferred) ? 1 : 0); 2298 } else if (flags == 2299 (MDI_SELECT_STANDBY_PATH | 2300 MDI_SELECT_ONLINE_PATH | 2301 MDI_SELECT_USER_DISABLE_PATH)) { 2302 cond = (((MDI_PI(pip)->pi_state == 2303 MDI_PATHINFO_STATE_ONLINE || 2304 (MDI_PI(pip)->pi_state == 2305 MDI_PATHINFO_STATE_STANDBY) || 2306 (MDI_PI(pip)->pi_state == 2307 (MDI_PATHINFO_STATE_ONLINE| 2308 MDI_PATHINFO_STATE_USER_DISABLE)) || 2309 (MDI_PI(pip)->pi_state == 2310 (MDI_PATHINFO_STATE_STANDBY | 2311 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2312 MDI_PI(pip)->pi_preferred == 2313 preferred) ? 1 : 0); 2314 } else if (flags == 2315 (MDI_SELECT_STANDBY_PATH | 2316 MDI_SELECT_ONLINE_PATH | 2317 MDI_SELECT_NO_PREFERRED)) { 2318 cond = (((MDI_PI(pip)->pi_state == 2319 MDI_PATHINFO_STATE_ONLINE) || 2320 (MDI_PI(pip)->pi_state == 2321 MDI_PATHINFO_STATE_STANDBY)) 2322 ? 1 : 0); 2323 } else { 2324 cond = 0; 2325 } 2326 } 2327 /* 2328 * No need to explicitly check if the path is disabled. 2329 * Since we are checking for state == ONLINE and the 2330 * same variable is used for DISABLE/ENABLE information. 2331 */ 2332 if (cond) { 2333 /* 2334 * Return the path in hold state. Caller should 2335 * release the lock by calling mdi_rele_path() 2336 */ 2337 MDI_PI_HOLD(pip); 2338 MDI_PI_UNLOCK(pip); 2339 if (sb) 2340 ct->ct_path_last = pip; 2341 *ret_pip = pip; 2342 MDI_CLIENT_UNLOCK(ct); 2343 return (MDI_SUCCESS); 2344 } 2345 /* 2346 * Path is busy. 2347 */ 2348 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2349 MDI_PI_IS_TRANSIENT(pip)) 2350 retry = 1; 2351 2352 /* 2353 * Keep looking for a next available online path 2354 */ 2355 do_again: 2356 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2357 if (next == NULL) { 2358 if ( flags & MDI_SELECT_NO_PREFERRED) { 2359 /* 2360 * Bail out since we hit the end of list 2361 */ 2362 MDI_PI_UNLOCK(pip); 2363 break; 2364 } 2365 2366 if (!sb) { 2367 if (preferred == 1) { 2368 /* 2369 * Looks like we reached the 2370 * end of the list. Lets enable 2371 * traversal of non preferred 2372 * paths. 2373 */ 2374 preferred = 0; 2375 next = head; 2376 } else { 2377 /* 2378 * We have done both the passes 2379 * Preferred as well as for 2380 * Non-preferred. Bail out now. 2381 */ 2382 cont = 0; 2383 } 2384 } else { 2385 /* 2386 * Standard behavior case. 2387 */ 2388 next = head; 2389 } 2390 } 2391 MDI_PI_UNLOCK(pip); 2392 if (cont == 0) { 2393 break; 2394 } 2395 pip = next; 2396 2397 if (!sb) { 2398 /* 2399 * We need to handle the selection of 2400 * non-preferred path in the following 2401 * case: 2402 * 2403 * +------+ +------+ +------+ +-----+ 2404 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2405 * +------+ +------+ +------+ +-----+ 2406 * 2407 * If we start the search with B, we need to 2408 * skip beyond B to pick C which is non - 2409 * preferred in the second pass. The following 2410 * test, if true, will allow us to skip over 2411 * the 'start'(B in the example) to select 2412 * other non preferred elements. 2413 */ 2414 if ((start_pip != NULL) && (start_pip == pip) && 2415 (MDI_PI(start_pip)->pi_preferred 2416 != preferred)) { 2417 /* 2418 * try again after going past the start 2419 * pip 2420 */ 2421 MDI_PI_LOCK(pip); 2422 goto do_again; 2423 } 2424 } else { 2425 /* 2426 * Standard behavior case 2427 */ 2428 if (start == pip && preferred) { 2429 /* look for nonpreferred paths */ 2430 preferred = 0; 2431 } else if (start == pip && !preferred) { 2432 /* 2433 * Exit condition 2434 */ 2435 cont = 0; 2436 } 2437 } 2438 } while (cont); 2439 break; 2440 } 2441 2442 MDI_CLIENT_UNLOCK(ct); 2443 if (retry == 1) { 2444 return (MDI_BUSY); 2445 } else { 2446 return (MDI_NOPATH); 2447 } 2448 } 2449 2450 /* 2451 * For a client, return the next available path to any phci 2452 * 2453 * Note: 2454 * Caller should hold the branch's devinfo node to get a consistent 2455 * snap shot of the mdi_pathinfo nodes. 2456 * 2457 * Please note that even the list is stable the mdi_pathinfo 2458 * node state and properties are volatile. The caller should lock 2459 * and unlock the nodes by calling mdi_pi_lock() and 2460 * mdi_pi_unlock() functions to get a stable properties. 2461 * 2462 * If there is a need to use the nodes beyond the hold of the 2463 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2464 * need to be held against unexpected removal by calling 2465 * mdi_hold_path() and should be released by calling 2466 * mdi_rele_path() on completion. 2467 */ 2468 mdi_pathinfo_t * 2469 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2470 { 2471 mdi_client_t *ct; 2472 2473 if (!MDI_CLIENT(ct_dip)) 2474 return (NULL); 2475 2476 /* 2477 * Walk through client link 2478 */ 2479 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2480 ASSERT(ct != NULL); 2481 2482 if (pip == NULL) 2483 return ((mdi_pathinfo_t *)ct->ct_path_head); 2484 2485 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2486 } 2487 2488 /* 2489 * For a phci, return the next available path to any client 2490 * Note: ditto mdi_get_next_phci_path() 2491 */ 2492 mdi_pathinfo_t * 2493 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2494 { 2495 mdi_phci_t *ph; 2496 2497 if (!MDI_PHCI(ph_dip)) 2498 return (NULL); 2499 2500 /* 2501 * Walk through pHCI link 2502 */ 2503 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2504 ASSERT(ph != NULL); 2505 2506 if (pip == NULL) 2507 return ((mdi_pathinfo_t *)ph->ph_path_head); 2508 2509 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2510 } 2511 2512 /* 2513 * mdi_hold_path(): 2514 * Hold the mdi_pathinfo node against unwanted unexpected free. 2515 * Return Values: 2516 * None 2517 */ 2518 void 2519 mdi_hold_path(mdi_pathinfo_t *pip) 2520 { 2521 if (pip) { 2522 MDI_PI_LOCK(pip); 2523 MDI_PI_HOLD(pip); 2524 MDI_PI_UNLOCK(pip); 2525 } 2526 } 2527 2528 2529 /* 2530 * mdi_rele_path(): 2531 * Release the mdi_pathinfo node which was selected 2532 * through mdi_select_path() mechanism or manually held by 2533 * calling mdi_hold_path(). 2534 * Return Values: 2535 * None 2536 */ 2537 void 2538 mdi_rele_path(mdi_pathinfo_t *pip) 2539 { 2540 if (pip) { 2541 MDI_PI_LOCK(pip); 2542 MDI_PI_RELE(pip); 2543 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2544 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2545 } 2546 MDI_PI_UNLOCK(pip); 2547 } 2548 } 2549 2550 /* 2551 * mdi_pi_lock(): 2552 * Lock the mdi_pathinfo node. 2553 * Note: 2554 * The caller should release the lock by calling mdi_pi_unlock() 2555 */ 2556 void 2557 mdi_pi_lock(mdi_pathinfo_t *pip) 2558 { 2559 ASSERT(pip != NULL); 2560 if (pip) { 2561 MDI_PI_LOCK(pip); 2562 } 2563 } 2564 2565 2566 /* 2567 * mdi_pi_unlock(): 2568 * Unlock the mdi_pathinfo node. 2569 * Note: 2570 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2571 */ 2572 void 2573 mdi_pi_unlock(mdi_pathinfo_t *pip) 2574 { 2575 ASSERT(pip != NULL); 2576 if (pip) { 2577 MDI_PI_UNLOCK(pip); 2578 } 2579 } 2580 2581 /* 2582 * mdi_pi_find(): 2583 * Search the list of mdi_pathinfo nodes attached to the 2584 * pHCI/Client device node whose path address matches "paddr". 2585 * Returns a pointer to the mdi_pathinfo node if a matching node is 2586 * found. 2587 * Return Values: 2588 * mdi_pathinfo node handle 2589 * NULL 2590 * Notes: 2591 * Caller need not hold any locks to call this function. 2592 */ 2593 mdi_pathinfo_t * 2594 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2595 { 2596 mdi_phci_t *ph; 2597 mdi_vhci_t *vh; 2598 mdi_client_t *ct; 2599 mdi_pathinfo_t *pip = NULL; 2600 2601 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2602 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2603 if ((pdip == NULL) || (paddr == NULL)) { 2604 return (NULL); 2605 } 2606 ph = i_devi_get_phci(pdip); 2607 if (ph == NULL) { 2608 /* 2609 * Invalid pHCI device, Nothing more to do. 2610 */ 2611 MDI_DEBUG(2, (CE_WARN, pdip, 2612 "!mdi_pi_find: invalid phci")); 2613 return (NULL); 2614 } 2615 2616 vh = ph->ph_vhci; 2617 if (vh == NULL) { 2618 /* 2619 * Invalid vHCI device, Nothing more to do. 2620 */ 2621 MDI_DEBUG(2, (CE_WARN, pdip, 2622 "!mdi_pi_find: invalid vhci")); 2623 return (NULL); 2624 } 2625 2626 /* 2627 * Look for pathinfo node identified by paddr. 2628 */ 2629 if (caddr == NULL) { 2630 /* 2631 * Find a mdi_pathinfo node under pHCI list for a matching 2632 * unit address. 2633 */ 2634 MDI_PHCI_LOCK(ph); 2635 if (MDI_PHCI_IS_OFFLINE(ph)) { 2636 MDI_DEBUG(2, (CE_WARN, pdip, 2637 "!mdi_pi_find: offline phci %p", (void *)ph)); 2638 MDI_PHCI_UNLOCK(ph); 2639 return (NULL); 2640 } 2641 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2642 2643 while (pip != NULL) { 2644 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2645 break; 2646 } 2647 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2648 } 2649 MDI_PHCI_UNLOCK(ph); 2650 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2651 (void *)pip)); 2652 return (pip); 2653 } 2654 2655 /* 2656 * XXX - Is the rest of the code in this function really necessary? 2657 * The consumers of mdi_pi_find() can search for the desired pathinfo 2658 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2659 * whether the search is based on the pathinfo nodes attached to 2660 * the pHCI or the client node, the result will be the same. 2661 */ 2662 2663 /* 2664 * Find the client device corresponding to 'caddr' 2665 */ 2666 MDI_VHCI_CLIENT_LOCK(vh); 2667 2668 /* 2669 * XXX - Passing NULL to the following function works as long as the 2670 * the client addresses (caddr) are unique per vhci basis. 2671 */ 2672 ct = i_mdi_client_find(vh, NULL, caddr); 2673 if (ct == NULL) { 2674 /* 2675 * Client not found, Obviously mdi_pathinfo node has not been 2676 * created yet. 2677 */ 2678 MDI_VHCI_CLIENT_UNLOCK(vh); 2679 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2680 "found for caddr %s", caddr ? caddr : "NULL")); 2681 return (NULL); 2682 } 2683 2684 /* 2685 * Hold the client lock and look for a mdi_pathinfo node with matching 2686 * pHCI and paddr 2687 */ 2688 MDI_CLIENT_LOCK(ct); 2689 2690 /* 2691 * Release the global mutex as it is no more needed. Note: We always 2692 * respect the locking order while acquiring. 2693 */ 2694 MDI_VHCI_CLIENT_UNLOCK(vh); 2695 2696 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2697 while (pip != NULL) { 2698 /* 2699 * Compare the unit address 2700 */ 2701 if ((MDI_PI(pip)->pi_phci == ph) && 2702 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2703 break; 2704 } 2705 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2706 } 2707 MDI_CLIENT_UNLOCK(ct); 2708 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2709 return (pip); 2710 } 2711 2712 /* 2713 * mdi_pi_alloc(): 2714 * Allocate and initialize a new instance of a mdi_pathinfo node. 2715 * The mdi_pathinfo node returned by this function identifies a 2716 * unique device path is capable of having properties attached 2717 * and passed to mdi_pi_online() to fully attach and online the 2718 * path and client device node. 2719 * The mdi_pathinfo node returned by this function must be 2720 * destroyed using mdi_pi_free() if the path is no longer 2721 * operational or if the caller fails to attach a client device 2722 * node when calling mdi_pi_online(). The framework will not free 2723 * the resources allocated. 2724 * This function can be called from both interrupt and kernel 2725 * contexts. DDI_NOSLEEP flag should be used while calling 2726 * from interrupt contexts. 2727 * Return Values: 2728 * MDI_SUCCESS 2729 * MDI_FAILURE 2730 * MDI_NOMEM 2731 */ 2732 /*ARGSUSED*/ 2733 int 2734 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2735 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2736 { 2737 mdi_vhci_t *vh; 2738 mdi_phci_t *ph; 2739 mdi_client_t *ct; 2740 mdi_pathinfo_t *pip = NULL; 2741 dev_info_t *cdip; 2742 int rv = MDI_NOMEM; 2743 int path_allocated = 0; 2744 2745 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2746 cname ? cname : "NULL", caddr ? caddr : "NULL", 2747 paddr ? paddr : "NULL")); 2748 2749 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2750 ret_pip == NULL) { 2751 /* Nothing more to do */ 2752 return (MDI_FAILURE); 2753 } 2754 2755 *ret_pip = NULL; 2756 2757 /* No allocations on detaching pHCI */ 2758 if (DEVI_IS_DETACHING(pdip)) { 2759 /* Invalid pHCI device, return failure */ 2760 MDI_DEBUG(1, (CE_WARN, pdip, 2761 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2762 return (MDI_FAILURE); 2763 } 2764 2765 ph = i_devi_get_phci(pdip); 2766 ASSERT(ph != NULL); 2767 if (ph == NULL) { 2768 /* Invalid pHCI device, return failure */ 2769 MDI_DEBUG(1, (CE_WARN, pdip, 2770 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2771 return (MDI_FAILURE); 2772 } 2773 2774 MDI_PHCI_LOCK(ph); 2775 vh = ph->ph_vhci; 2776 if (vh == NULL) { 2777 /* Invalid vHCI device, return failure */ 2778 MDI_DEBUG(1, (CE_WARN, pdip, 2779 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2780 MDI_PHCI_UNLOCK(ph); 2781 return (MDI_FAILURE); 2782 } 2783 2784 if (MDI_PHCI_IS_READY(ph) == 0) { 2785 /* 2786 * Do not allow new node creation when pHCI is in 2787 * offline/suspended states 2788 */ 2789 MDI_DEBUG(1, (CE_WARN, pdip, 2790 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2791 MDI_PHCI_UNLOCK(ph); 2792 return (MDI_BUSY); 2793 } 2794 MDI_PHCI_UNSTABLE(ph); 2795 MDI_PHCI_UNLOCK(ph); 2796 2797 /* look for a matching client, create one if not found */ 2798 MDI_VHCI_CLIENT_LOCK(vh); 2799 ct = i_mdi_client_find(vh, cname, caddr); 2800 if (ct == NULL) { 2801 ct = i_mdi_client_alloc(vh, cname, caddr); 2802 ASSERT(ct != NULL); 2803 } 2804 2805 if (ct->ct_dip == NULL) { 2806 /* 2807 * Allocate a devinfo node 2808 */ 2809 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2810 compatible, ncompatible); 2811 if (ct->ct_dip == NULL) { 2812 (void) i_mdi_client_free(vh, ct); 2813 goto fail; 2814 } 2815 } 2816 cdip = ct->ct_dip; 2817 2818 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2819 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2820 2821 MDI_CLIENT_LOCK(ct); 2822 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2823 while (pip != NULL) { 2824 /* 2825 * Compare the unit address 2826 */ 2827 if ((MDI_PI(pip)->pi_phci == ph) && 2828 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2829 break; 2830 } 2831 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2832 } 2833 MDI_CLIENT_UNLOCK(ct); 2834 2835 if (pip == NULL) { 2836 /* 2837 * This is a new path for this client device. Allocate and 2838 * initialize a new pathinfo node 2839 */ 2840 pip = i_mdi_pi_alloc(ph, paddr, ct); 2841 ASSERT(pip != NULL); 2842 path_allocated = 1; 2843 } 2844 rv = MDI_SUCCESS; 2845 2846 fail: 2847 /* 2848 * Release the global mutex. 2849 */ 2850 MDI_VHCI_CLIENT_UNLOCK(vh); 2851 2852 /* 2853 * Mark the pHCI as stable 2854 */ 2855 MDI_PHCI_LOCK(ph); 2856 MDI_PHCI_STABLE(ph); 2857 MDI_PHCI_UNLOCK(ph); 2858 *ret_pip = pip; 2859 2860 MDI_DEBUG(2, (CE_NOTE, pdip, 2861 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2862 2863 if (path_allocated) 2864 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2865 2866 return (rv); 2867 } 2868 2869 /*ARGSUSED*/ 2870 int 2871 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2872 int flags, mdi_pathinfo_t **ret_pip) 2873 { 2874 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2875 flags, ret_pip)); 2876 } 2877 2878 /* 2879 * i_mdi_pi_alloc(): 2880 * Allocate a mdi_pathinfo node and add to the pHCI path list 2881 * Return Values: 2882 * mdi_pathinfo 2883 */ 2884 /*ARGSUSED*/ 2885 static mdi_pathinfo_t * 2886 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2887 { 2888 mdi_pathinfo_t *pip; 2889 int ct_circular; 2890 int ph_circular; 2891 static char path[MAXPATHLEN]; 2892 char *path_persistent; 2893 int path_instance; 2894 mod_hash_val_t hv; 2895 2896 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2897 2898 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2899 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2900 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2901 MDI_PATHINFO_STATE_TRANSIENT; 2902 2903 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2904 MDI_PI_SET_USER_DISABLE(pip); 2905 2906 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2907 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2908 2909 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2910 MDI_PI_SET_DRV_DISABLE(pip); 2911 2912 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2913 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2914 MDI_PI(pip)->pi_client = ct; 2915 MDI_PI(pip)->pi_phci = ph; 2916 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2917 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2918 2919 /* 2920 * We form the "path" to the pathinfo node, and see if we have 2921 * already allocated a 'path_instance' for that "path". If so, 2922 * we use the already allocated 'path_instance'. If not, we 2923 * allocate a new 'path_instance' and associate it with a copy of 2924 * the "path" string (which is never freed). The association 2925 * between a 'path_instance' this "path" string persists until 2926 * reboot. 2927 */ 2928 mutex_enter(&mdi_pathmap_mutex); 2929 (void) ddi_pathname(ph->ph_dip, path); 2930 (void) sprintf(path + strlen(path), "/%s@%s", 2931 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2932 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2933 path_instance = (uint_t)(intptr_t)hv; 2934 } else { 2935 /* allocate a new 'path_instance' and persistent "path" */ 2936 path_instance = mdi_pathmap_instance++; 2937 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2938 (void) mod_hash_insert(mdi_pathmap_bypath, 2939 (mod_hash_key_t)path_persistent, 2940 (mod_hash_val_t)(intptr_t)path_instance); 2941 (void) mod_hash_insert(mdi_pathmap_byinstance, 2942 (mod_hash_key_t)(intptr_t)path_instance, 2943 (mod_hash_val_t)path_persistent); 2944 } 2945 mutex_exit(&mdi_pathmap_mutex); 2946 MDI_PI(pip)->pi_path_instance = path_instance; 2947 2948 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2949 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2950 MDI_PI(pip)->pi_pprivate = NULL; 2951 MDI_PI(pip)->pi_cprivate = NULL; 2952 MDI_PI(pip)->pi_vprivate = NULL; 2953 MDI_PI(pip)->pi_client_link = NULL; 2954 MDI_PI(pip)->pi_phci_link = NULL; 2955 MDI_PI(pip)->pi_ref_cnt = 0; 2956 MDI_PI(pip)->pi_kstats = NULL; 2957 MDI_PI(pip)->pi_preferred = 1; 2958 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2959 2960 /* 2961 * Lock both dev_info nodes against changes in parallel. 2962 * 2963 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2964 * This atypical operation is done to synchronize pathinfo nodes 2965 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2966 * the pathinfo nodes are children of the Client. 2967 */ 2968 ndi_devi_enter(ct->ct_dip, &ct_circular); 2969 ndi_devi_enter(ph->ph_dip, &ph_circular); 2970 2971 i_mdi_phci_add_path(ph, pip); 2972 i_mdi_client_add_path(ct, pip); 2973 2974 ndi_devi_exit(ph->ph_dip, ph_circular); 2975 ndi_devi_exit(ct->ct_dip, ct_circular); 2976 2977 return (pip); 2978 } 2979 2980 /* 2981 * mdi_pi_pathname_by_instance(): 2982 * Lookup of "path" by 'path_instance'. Return "path". 2983 * NOTE: returned "path" remains valid forever (until reboot). 2984 */ 2985 char * 2986 mdi_pi_pathname_by_instance(int path_instance) 2987 { 2988 char *path; 2989 mod_hash_val_t hv; 2990 2991 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2992 mutex_enter(&mdi_pathmap_mutex); 2993 if (mod_hash_find(mdi_pathmap_byinstance, 2994 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2995 path = (char *)hv; 2996 else 2997 path = NULL; 2998 mutex_exit(&mdi_pathmap_mutex); 2999 return (path); 3000 } 3001 3002 /* 3003 * i_mdi_phci_add_path(): 3004 * Add a mdi_pathinfo node to pHCI list. 3005 * Notes: 3006 * Caller should per-pHCI mutex 3007 */ 3008 static void 3009 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3010 { 3011 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3012 3013 MDI_PHCI_LOCK(ph); 3014 if (ph->ph_path_head == NULL) { 3015 ph->ph_path_head = pip; 3016 } else { 3017 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3018 } 3019 ph->ph_path_tail = pip; 3020 ph->ph_path_count++; 3021 MDI_PHCI_UNLOCK(ph); 3022 } 3023 3024 /* 3025 * i_mdi_client_add_path(): 3026 * Add mdi_pathinfo node to client list 3027 */ 3028 static void 3029 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3030 { 3031 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3032 3033 MDI_CLIENT_LOCK(ct); 3034 if (ct->ct_path_head == NULL) { 3035 ct->ct_path_head = pip; 3036 } else { 3037 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3038 } 3039 ct->ct_path_tail = pip; 3040 ct->ct_path_count++; 3041 MDI_CLIENT_UNLOCK(ct); 3042 } 3043 3044 /* 3045 * mdi_pi_free(): 3046 * Free the mdi_pathinfo node and also client device node if this 3047 * is the last path to the device 3048 * Return Values: 3049 * MDI_SUCCESS 3050 * MDI_FAILURE 3051 * MDI_BUSY 3052 */ 3053 /*ARGSUSED*/ 3054 int 3055 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3056 { 3057 int rv = MDI_FAILURE; 3058 mdi_vhci_t *vh; 3059 mdi_phci_t *ph; 3060 mdi_client_t *ct; 3061 int (*f)(); 3062 int client_held = 0; 3063 3064 MDI_PI_LOCK(pip); 3065 ph = MDI_PI(pip)->pi_phci; 3066 ASSERT(ph != NULL); 3067 if (ph == NULL) { 3068 /* 3069 * Invalid pHCI device, return failure 3070 */ 3071 MDI_DEBUG(1, (CE_WARN, NULL, 3072 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3073 MDI_PI_UNLOCK(pip); 3074 return (MDI_FAILURE); 3075 } 3076 3077 vh = ph->ph_vhci; 3078 ASSERT(vh != NULL); 3079 if (vh == NULL) { 3080 /* Invalid pHCI device, return failure */ 3081 MDI_DEBUG(1, (CE_WARN, NULL, 3082 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3083 MDI_PI_UNLOCK(pip); 3084 return (MDI_FAILURE); 3085 } 3086 3087 ct = MDI_PI(pip)->pi_client; 3088 ASSERT(ct != NULL); 3089 if (ct == NULL) { 3090 /* 3091 * Invalid Client device, return failure 3092 */ 3093 MDI_DEBUG(1, (CE_WARN, NULL, 3094 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3095 MDI_PI_UNLOCK(pip); 3096 return (MDI_FAILURE); 3097 } 3098 3099 /* 3100 * Check to see for busy condition. A mdi_pathinfo can only be freed 3101 * if the node state is either offline or init and the reference count 3102 * is zero. 3103 */ 3104 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3105 MDI_PI_IS_INITING(pip))) { 3106 /* 3107 * Node is busy 3108 */ 3109 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3110 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3111 MDI_PI_UNLOCK(pip); 3112 return (MDI_BUSY); 3113 } 3114 3115 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3116 /* 3117 * Give a chance for pending I/Os to complete. 3118 */ 3119 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3120 "%d cmds still pending on path: %p\n", 3121 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3122 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3123 &MDI_PI(pip)->pi_mutex, 3124 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3125 /* 3126 * The timeout time reached without ref_cnt being zero 3127 * being signaled. 3128 */ 3129 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3130 "!mdi_pi_free: " 3131 "Timeout reached on path %p without the cond\n", 3132 (void *)pip)); 3133 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3134 "!mdi_pi_free: " 3135 "%d cmds still pending on path: %p\n", 3136 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3137 MDI_PI_UNLOCK(pip); 3138 return (MDI_BUSY); 3139 } 3140 } 3141 if (MDI_PI(pip)->pi_pm_held) { 3142 client_held = 1; 3143 } 3144 MDI_PI_UNLOCK(pip); 3145 3146 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3147 3148 MDI_CLIENT_LOCK(ct); 3149 3150 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3151 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3152 3153 /* 3154 * Wait till failover is complete before removing this node. 3155 */ 3156 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3157 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3158 3159 MDI_CLIENT_UNLOCK(ct); 3160 MDI_VHCI_CLIENT_LOCK(vh); 3161 MDI_CLIENT_LOCK(ct); 3162 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3163 3164 if (!MDI_PI_IS_INITING(pip)) { 3165 f = vh->vh_ops->vo_pi_uninit; 3166 if (f != NULL) { 3167 rv = (*f)(vh->vh_dip, pip, 0); 3168 } 3169 } 3170 /* 3171 * If vo_pi_uninit() completed successfully. 3172 */ 3173 if (rv == MDI_SUCCESS) { 3174 if (client_held) { 3175 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3176 "i_mdi_pm_rele_client\n")); 3177 i_mdi_pm_rele_client(ct, 1); 3178 } 3179 i_mdi_pi_free(ph, pip, ct); 3180 if (ct->ct_path_count == 0) { 3181 /* 3182 * Client lost its last path. 3183 * Clean up the client device 3184 */ 3185 MDI_CLIENT_UNLOCK(ct); 3186 (void) i_mdi_client_free(ct->ct_vhci, ct); 3187 MDI_VHCI_CLIENT_UNLOCK(vh); 3188 return (rv); 3189 } 3190 } 3191 MDI_CLIENT_UNLOCK(ct); 3192 MDI_VHCI_CLIENT_UNLOCK(vh); 3193 3194 if (rv == MDI_FAILURE) 3195 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3196 3197 return (rv); 3198 } 3199 3200 /* 3201 * i_mdi_pi_free(): 3202 * Free the mdi_pathinfo node 3203 */ 3204 static void 3205 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3206 { 3207 int ct_circular; 3208 int ph_circular; 3209 3210 ASSERT(MDI_CLIENT_LOCKED(ct)); 3211 3212 /* 3213 * remove any per-path kstats 3214 */ 3215 i_mdi_pi_kstat_destroy(pip); 3216 3217 /* See comments in i_mdi_pi_alloc() */ 3218 ndi_devi_enter(ct->ct_dip, &ct_circular); 3219 ndi_devi_enter(ph->ph_dip, &ph_circular); 3220 3221 i_mdi_client_remove_path(ct, pip); 3222 i_mdi_phci_remove_path(ph, pip); 3223 3224 ndi_devi_exit(ph->ph_dip, ph_circular); 3225 ndi_devi_exit(ct->ct_dip, ct_circular); 3226 3227 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3228 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3229 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3230 if (MDI_PI(pip)->pi_addr) { 3231 kmem_free(MDI_PI(pip)->pi_addr, 3232 strlen(MDI_PI(pip)->pi_addr) + 1); 3233 MDI_PI(pip)->pi_addr = NULL; 3234 } 3235 3236 if (MDI_PI(pip)->pi_prop) { 3237 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3238 MDI_PI(pip)->pi_prop = NULL; 3239 } 3240 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3241 } 3242 3243 3244 /* 3245 * i_mdi_phci_remove_path(): 3246 * Remove a mdi_pathinfo node from pHCI list. 3247 * Notes: 3248 * Caller should hold per-pHCI mutex 3249 */ 3250 static void 3251 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3252 { 3253 mdi_pathinfo_t *prev = NULL; 3254 mdi_pathinfo_t *path = NULL; 3255 3256 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3257 3258 MDI_PHCI_LOCK(ph); 3259 path = ph->ph_path_head; 3260 while (path != NULL) { 3261 if (path == pip) { 3262 break; 3263 } 3264 prev = path; 3265 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3266 } 3267 3268 if (path) { 3269 ph->ph_path_count--; 3270 if (prev) { 3271 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3272 } else { 3273 ph->ph_path_head = 3274 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3275 } 3276 if (ph->ph_path_tail == path) { 3277 ph->ph_path_tail = prev; 3278 } 3279 } 3280 3281 /* 3282 * Clear the pHCI link 3283 */ 3284 MDI_PI(pip)->pi_phci_link = NULL; 3285 MDI_PI(pip)->pi_phci = NULL; 3286 MDI_PHCI_UNLOCK(ph); 3287 } 3288 3289 /* 3290 * i_mdi_client_remove_path(): 3291 * Remove a mdi_pathinfo node from client path list. 3292 */ 3293 static void 3294 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3295 { 3296 mdi_pathinfo_t *prev = NULL; 3297 mdi_pathinfo_t *path; 3298 3299 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3300 3301 ASSERT(MDI_CLIENT_LOCKED(ct)); 3302 path = ct->ct_path_head; 3303 while (path != NULL) { 3304 if (path == pip) { 3305 break; 3306 } 3307 prev = path; 3308 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3309 } 3310 3311 if (path) { 3312 ct->ct_path_count--; 3313 if (prev) { 3314 MDI_PI(prev)->pi_client_link = 3315 MDI_PI(path)->pi_client_link; 3316 } else { 3317 ct->ct_path_head = 3318 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3319 } 3320 if (ct->ct_path_tail == path) { 3321 ct->ct_path_tail = prev; 3322 } 3323 if (ct->ct_path_last == path) { 3324 ct->ct_path_last = ct->ct_path_head; 3325 } 3326 } 3327 MDI_PI(pip)->pi_client_link = NULL; 3328 MDI_PI(pip)->pi_client = NULL; 3329 } 3330 3331 /* 3332 * i_mdi_pi_state_change(): 3333 * online a mdi_pathinfo node 3334 * 3335 * Return Values: 3336 * MDI_SUCCESS 3337 * MDI_FAILURE 3338 */ 3339 /*ARGSUSED*/ 3340 static int 3341 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3342 { 3343 int rv = MDI_SUCCESS; 3344 mdi_vhci_t *vh; 3345 mdi_phci_t *ph; 3346 mdi_client_t *ct; 3347 int (*f)(); 3348 dev_info_t *cdip; 3349 3350 MDI_PI_LOCK(pip); 3351 3352 ph = MDI_PI(pip)->pi_phci; 3353 ASSERT(ph); 3354 if (ph == NULL) { 3355 /* 3356 * Invalid pHCI device, fail the request 3357 */ 3358 MDI_PI_UNLOCK(pip); 3359 MDI_DEBUG(1, (CE_WARN, NULL, 3360 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3361 return (MDI_FAILURE); 3362 } 3363 3364 vh = ph->ph_vhci; 3365 ASSERT(vh); 3366 if (vh == NULL) { 3367 /* 3368 * Invalid vHCI device, fail the request 3369 */ 3370 MDI_PI_UNLOCK(pip); 3371 MDI_DEBUG(1, (CE_WARN, NULL, 3372 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3373 return (MDI_FAILURE); 3374 } 3375 3376 ct = MDI_PI(pip)->pi_client; 3377 ASSERT(ct != NULL); 3378 if (ct == NULL) { 3379 /* 3380 * Invalid client device, fail the request 3381 */ 3382 MDI_PI_UNLOCK(pip); 3383 MDI_DEBUG(1, (CE_WARN, NULL, 3384 "!mdi_pi_state_change: invalid client pip=%p", 3385 (void *)pip)); 3386 return (MDI_FAILURE); 3387 } 3388 3389 /* 3390 * If this path has not been initialized yet, Callback vHCI driver's 3391 * pathinfo node initialize entry point 3392 */ 3393 3394 if (MDI_PI_IS_INITING(pip)) { 3395 MDI_PI_UNLOCK(pip); 3396 f = vh->vh_ops->vo_pi_init; 3397 if (f != NULL) { 3398 rv = (*f)(vh->vh_dip, pip, 0); 3399 if (rv != MDI_SUCCESS) { 3400 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3401 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3402 (void *)vh, (void *)pip)); 3403 return (MDI_FAILURE); 3404 } 3405 } 3406 MDI_PI_LOCK(pip); 3407 MDI_PI_CLEAR_TRANSIENT(pip); 3408 } 3409 3410 /* 3411 * Do not allow state transition when pHCI is in offline/suspended 3412 * states 3413 */ 3414 i_mdi_phci_lock(ph, pip); 3415 if (MDI_PHCI_IS_READY(ph) == 0) { 3416 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3417 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3418 (void *)ph)); 3419 MDI_PI_UNLOCK(pip); 3420 i_mdi_phci_unlock(ph); 3421 return (MDI_BUSY); 3422 } 3423 MDI_PHCI_UNSTABLE(ph); 3424 i_mdi_phci_unlock(ph); 3425 3426 /* 3427 * Check if mdi_pathinfo state is in transient state. 3428 * If yes, offlining is in progress and wait till transient state is 3429 * cleared. 3430 */ 3431 if (MDI_PI_IS_TRANSIENT(pip)) { 3432 while (MDI_PI_IS_TRANSIENT(pip)) { 3433 cv_wait(&MDI_PI(pip)->pi_state_cv, 3434 &MDI_PI(pip)->pi_mutex); 3435 } 3436 } 3437 3438 /* 3439 * Grab the client lock in reverse order sequence and release the 3440 * mdi_pathinfo mutex. 3441 */ 3442 i_mdi_client_lock(ct, pip); 3443 MDI_PI_UNLOCK(pip); 3444 3445 /* 3446 * Wait till failover state is cleared 3447 */ 3448 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3449 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3450 3451 /* 3452 * Mark the mdi_pathinfo node state as transient 3453 */ 3454 MDI_PI_LOCK(pip); 3455 switch (state) { 3456 case MDI_PATHINFO_STATE_ONLINE: 3457 MDI_PI_SET_ONLINING(pip); 3458 break; 3459 3460 case MDI_PATHINFO_STATE_STANDBY: 3461 MDI_PI_SET_STANDBYING(pip); 3462 break; 3463 3464 case MDI_PATHINFO_STATE_FAULT: 3465 /* 3466 * Mark the pathinfo state as FAULTED 3467 */ 3468 MDI_PI_SET_FAULTING(pip); 3469 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3470 break; 3471 3472 case MDI_PATHINFO_STATE_OFFLINE: 3473 /* 3474 * ndi_devi_offline() cannot hold pip or ct locks. 3475 */ 3476 MDI_PI_UNLOCK(pip); 3477 /* 3478 * Don't offline the client dev_info node unless we have 3479 * no available paths left at all. 3480 */ 3481 cdip = ct->ct_dip; 3482 if ((flag & NDI_DEVI_REMOVE) && 3483 (ct->ct_path_count == 1)) { 3484 i_mdi_client_unlock(ct); 3485 rv = ndi_devi_offline(cdip, 0); 3486 if (rv != NDI_SUCCESS) { 3487 /* 3488 * Convert to MDI error code 3489 */ 3490 switch (rv) { 3491 case NDI_BUSY: 3492 rv = MDI_BUSY; 3493 break; 3494 default: 3495 rv = MDI_FAILURE; 3496 break; 3497 } 3498 goto state_change_exit; 3499 } else { 3500 i_mdi_client_lock(ct, NULL); 3501 } 3502 } 3503 /* 3504 * Mark the mdi_pathinfo node state as transient 3505 */ 3506 MDI_PI_LOCK(pip); 3507 MDI_PI_SET_OFFLINING(pip); 3508 break; 3509 } 3510 MDI_PI_UNLOCK(pip); 3511 MDI_CLIENT_UNSTABLE(ct); 3512 i_mdi_client_unlock(ct); 3513 3514 f = vh->vh_ops->vo_pi_state_change; 3515 if (f != NULL) 3516 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3517 3518 MDI_CLIENT_LOCK(ct); 3519 MDI_PI_LOCK(pip); 3520 if (rv == MDI_NOT_SUPPORTED) { 3521 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3522 } 3523 if (rv != MDI_SUCCESS) { 3524 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3525 "!vo_pi_state_change: failed rv = %x", rv)); 3526 } 3527 if (MDI_PI_IS_TRANSIENT(pip)) { 3528 if (rv == MDI_SUCCESS) { 3529 MDI_PI_CLEAR_TRANSIENT(pip); 3530 } else { 3531 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3532 } 3533 } 3534 3535 /* 3536 * Wake anyone waiting for this mdi_pathinfo node 3537 */ 3538 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3539 MDI_PI_UNLOCK(pip); 3540 3541 /* 3542 * Mark the client device as stable 3543 */ 3544 MDI_CLIENT_STABLE(ct); 3545 if (rv == MDI_SUCCESS) { 3546 if (ct->ct_unstable == 0) { 3547 cdip = ct->ct_dip; 3548 3549 /* 3550 * Onlining the mdi_pathinfo node will impact the 3551 * client state Update the client and dev_info node 3552 * state accordingly 3553 */ 3554 rv = NDI_SUCCESS; 3555 i_mdi_client_update_state(ct); 3556 switch (MDI_CLIENT_STATE(ct)) { 3557 case MDI_CLIENT_STATE_OPTIMAL: 3558 case MDI_CLIENT_STATE_DEGRADED: 3559 if (cdip && !i_ddi_devi_attached(cdip) && 3560 ((state == MDI_PATHINFO_STATE_ONLINE) || 3561 (state == MDI_PATHINFO_STATE_STANDBY))) { 3562 3563 /* 3564 * Must do ndi_devi_online() through 3565 * hotplug thread for deferred 3566 * attach mechanism to work 3567 */ 3568 MDI_CLIENT_UNLOCK(ct); 3569 rv = ndi_devi_online(cdip, 0); 3570 MDI_CLIENT_LOCK(ct); 3571 if ((rv != NDI_SUCCESS) && 3572 (MDI_CLIENT_STATE(ct) == 3573 MDI_CLIENT_STATE_DEGRADED)) { 3574 /* 3575 * ndi_devi_online failed. 3576 * Reset client flags to 3577 * offline. 3578 */ 3579 MDI_DEBUG(1, (CE_WARN, cdip, 3580 "!ndi_devi_online: failed " 3581 " Error: %x", rv)); 3582 MDI_CLIENT_SET_OFFLINE(ct); 3583 } 3584 if (rv != NDI_SUCCESS) { 3585 /* Reset the path state */ 3586 MDI_PI_LOCK(pip); 3587 MDI_PI(pip)->pi_state = 3588 MDI_PI_OLD_STATE(pip); 3589 MDI_PI_UNLOCK(pip); 3590 } 3591 } 3592 break; 3593 3594 case MDI_CLIENT_STATE_FAILED: 3595 /* 3596 * This is the last path case for 3597 * non-user initiated events. 3598 */ 3599 if (((flag & NDI_DEVI_REMOVE) == 0) && 3600 cdip && (i_ddi_node_state(cdip) >= 3601 DS_INITIALIZED)) { 3602 MDI_CLIENT_UNLOCK(ct); 3603 rv = ndi_devi_offline(cdip, 0); 3604 MDI_CLIENT_LOCK(ct); 3605 3606 if (rv != NDI_SUCCESS) { 3607 /* 3608 * ndi_devi_offline failed. 3609 * Reset client flags to 3610 * online as the path could not 3611 * be offlined. 3612 */ 3613 MDI_DEBUG(1, (CE_WARN, cdip, 3614 "!ndi_devi_offline: failed " 3615 " Error: %x", rv)); 3616 MDI_CLIENT_SET_ONLINE(ct); 3617 } 3618 } 3619 break; 3620 } 3621 /* 3622 * Convert to MDI error code 3623 */ 3624 switch (rv) { 3625 case NDI_SUCCESS: 3626 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3627 i_mdi_report_path_state(ct, pip); 3628 rv = MDI_SUCCESS; 3629 break; 3630 case NDI_BUSY: 3631 rv = MDI_BUSY; 3632 break; 3633 default: 3634 rv = MDI_FAILURE; 3635 break; 3636 } 3637 } 3638 } 3639 MDI_CLIENT_UNLOCK(ct); 3640 3641 state_change_exit: 3642 /* 3643 * Mark the pHCI as stable again. 3644 */ 3645 MDI_PHCI_LOCK(ph); 3646 MDI_PHCI_STABLE(ph); 3647 MDI_PHCI_UNLOCK(ph); 3648 return (rv); 3649 } 3650 3651 /* 3652 * mdi_pi_online(): 3653 * Place the path_info node in the online state. The path is 3654 * now available to be selected by mdi_select_path() for 3655 * transporting I/O requests to client devices. 3656 * Return Values: 3657 * MDI_SUCCESS 3658 * MDI_FAILURE 3659 */ 3660 int 3661 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3662 { 3663 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3664 int client_held = 0; 3665 int rv; 3666 int se_flag; 3667 int kmem_flag; 3668 3669 ASSERT(ct != NULL); 3670 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3671 if (rv != MDI_SUCCESS) 3672 return (rv); 3673 3674 MDI_PI_LOCK(pip); 3675 if (MDI_PI(pip)->pi_pm_held == 0) { 3676 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3677 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3678 i_mdi_pm_hold_pip(pip); 3679 client_held = 1; 3680 } 3681 MDI_PI_UNLOCK(pip); 3682 3683 if (client_held) { 3684 MDI_CLIENT_LOCK(ct); 3685 if (ct->ct_power_cnt == 0) { 3686 rv = i_mdi_power_all_phci(ct); 3687 } 3688 3689 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3690 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3691 i_mdi_pm_hold_client(ct, 1); 3692 MDI_CLIENT_UNLOCK(ct); 3693 } 3694 3695 /* determine interrupt context */ 3696 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3697 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3698 3699 /* A new path is online. Invalidate DINFOCACHE snap shot. */ 3700 i_ddi_di_cache_invalidate(kmem_flag); 3701 3702 return (rv); 3703 } 3704 3705 /* 3706 * mdi_pi_standby(): 3707 * Place the mdi_pathinfo node in standby state 3708 * 3709 * Return Values: 3710 * MDI_SUCCESS 3711 * MDI_FAILURE 3712 */ 3713 int 3714 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3715 { 3716 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3717 } 3718 3719 /* 3720 * mdi_pi_fault(): 3721 * Place the mdi_pathinfo node in fault'ed state 3722 * Return Values: 3723 * MDI_SUCCESS 3724 * MDI_FAILURE 3725 */ 3726 int 3727 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3728 { 3729 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3730 } 3731 3732 /* 3733 * mdi_pi_offline(): 3734 * Offline a mdi_pathinfo node. 3735 * Return Values: 3736 * MDI_SUCCESS 3737 * MDI_FAILURE 3738 */ 3739 int 3740 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3741 { 3742 int ret, client_held = 0; 3743 mdi_client_t *ct; 3744 int se_flag; 3745 int kmem_flag; 3746 3747 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3748 3749 if (ret == MDI_SUCCESS) { 3750 MDI_PI_LOCK(pip); 3751 if (MDI_PI(pip)->pi_pm_held) { 3752 client_held = 1; 3753 } 3754 MDI_PI_UNLOCK(pip); 3755 3756 if (client_held) { 3757 ct = MDI_PI(pip)->pi_client; 3758 MDI_CLIENT_LOCK(ct); 3759 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3760 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3761 i_mdi_pm_rele_client(ct, 1); 3762 MDI_CLIENT_UNLOCK(ct); 3763 } 3764 3765 /* determine interrupt context */ 3766 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3767 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3768 3769 /* pathinfo is offlined. update DINFOCACHE. */ 3770 i_ddi_di_cache_invalidate(kmem_flag); 3771 } 3772 3773 return (ret); 3774 } 3775 3776 /* 3777 * i_mdi_pi_offline(): 3778 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3779 */ 3780 static int 3781 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3782 { 3783 dev_info_t *vdip = NULL; 3784 mdi_vhci_t *vh = NULL; 3785 mdi_client_t *ct = NULL; 3786 int (*f)(); 3787 int rv; 3788 3789 MDI_PI_LOCK(pip); 3790 ct = MDI_PI(pip)->pi_client; 3791 ASSERT(ct != NULL); 3792 3793 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3794 /* 3795 * Give a chance for pending I/Os to complete. 3796 */ 3797 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3798 "%d cmds still pending on path: %p\n", 3799 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3800 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3801 &MDI_PI(pip)->pi_mutex, 3802 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3803 /* 3804 * The timeout time reached without ref_cnt being zero 3805 * being signaled. 3806 */ 3807 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3808 "Timeout reached on path %p without the cond\n", 3809 (void *)pip)); 3810 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3811 "%d cmds still pending on path: %p\n", 3812 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3813 } 3814 } 3815 vh = ct->ct_vhci; 3816 vdip = vh->vh_dip; 3817 3818 /* 3819 * Notify vHCI that has registered this event 3820 */ 3821 ASSERT(vh->vh_ops); 3822 f = vh->vh_ops->vo_pi_state_change; 3823 3824 if (f != NULL) { 3825 MDI_PI_UNLOCK(pip); 3826 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3827 flags)) != MDI_SUCCESS) { 3828 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3829 "!vo_path_offline failed " 3830 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3831 } 3832 MDI_PI_LOCK(pip); 3833 } 3834 3835 /* 3836 * Set the mdi_pathinfo node state and clear the transient condition 3837 */ 3838 MDI_PI_SET_OFFLINE(pip); 3839 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3840 MDI_PI_UNLOCK(pip); 3841 3842 MDI_CLIENT_LOCK(ct); 3843 if (rv == MDI_SUCCESS) { 3844 if (ct->ct_unstable == 0) { 3845 dev_info_t *cdip = ct->ct_dip; 3846 3847 /* 3848 * Onlining the mdi_pathinfo node will impact the 3849 * client state Update the client and dev_info node 3850 * state accordingly 3851 */ 3852 i_mdi_client_update_state(ct); 3853 rv = NDI_SUCCESS; 3854 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3855 if (cdip && 3856 (i_ddi_node_state(cdip) >= 3857 DS_INITIALIZED)) { 3858 MDI_CLIENT_UNLOCK(ct); 3859 rv = ndi_devi_offline(cdip, 0); 3860 MDI_CLIENT_LOCK(ct); 3861 if (rv != NDI_SUCCESS) { 3862 /* 3863 * ndi_devi_offline failed. 3864 * Reset client flags to 3865 * online. 3866 */ 3867 MDI_DEBUG(4, (CE_WARN, cdip, 3868 "!ndi_devi_offline: failed " 3869 " Error: %x", rv)); 3870 MDI_CLIENT_SET_ONLINE(ct); 3871 } 3872 } 3873 } 3874 /* 3875 * Convert to MDI error code 3876 */ 3877 switch (rv) { 3878 case NDI_SUCCESS: 3879 rv = MDI_SUCCESS; 3880 break; 3881 case NDI_BUSY: 3882 rv = MDI_BUSY; 3883 break; 3884 default: 3885 rv = MDI_FAILURE; 3886 break; 3887 } 3888 } 3889 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3890 i_mdi_report_path_state(ct, pip); 3891 } 3892 3893 MDI_CLIENT_UNLOCK(ct); 3894 3895 /* 3896 * Change in the mdi_pathinfo node state will impact the client state 3897 */ 3898 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3899 (void *)ct, (void *)pip)); 3900 return (rv); 3901 } 3902 3903 /* 3904 * mdi_pi_get_node_name(): 3905 * Get the name associated with a mdi_pathinfo node. 3906 * Since pathinfo nodes are not directly named, we 3907 * return the node_name of the client. 3908 * 3909 * Return Values: 3910 * char * 3911 */ 3912 char * 3913 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 3914 { 3915 mdi_client_t *ct; 3916 3917 if (pip == NULL) 3918 return (NULL); 3919 ct = MDI_PI(pip)->pi_client; 3920 if ((ct == NULL) || (ct->ct_dip == NULL)) 3921 return (NULL); 3922 return (ddi_node_name(ct->ct_dip)); 3923 } 3924 3925 /* 3926 * mdi_pi_get_addr(): 3927 * Get the unit address associated with a mdi_pathinfo node 3928 * 3929 * Return Values: 3930 * char * 3931 */ 3932 char * 3933 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3934 { 3935 if (pip == NULL) 3936 return (NULL); 3937 3938 return (MDI_PI(pip)->pi_addr); 3939 } 3940 3941 /* 3942 * mdi_pi_get_path_instance(): 3943 * Get the 'path_instance' of a mdi_pathinfo node 3944 * 3945 * Return Values: 3946 * path_instance 3947 */ 3948 int 3949 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3950 { 3951 if (pip == NULL) 3952 return (0); 3953 3954 return (MDI_PI(pip)->pi_path_instance); 3955 } 3956 3957 /* 3958 * mdi_pi_pathname(): 3959 * Return pointer to path to pathinfo node. 3960 */ 3961 char * 3962 mdi_pi_pathname(mdi_pathinfo_t *pip) 3963 { 3964 if (pip == NULL) 3965 return (NULL); 3966 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3967 } 3968 3969 char * 3970 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 3971 { 3972 char *obp_path = NULL; 3973 if ((pip == NULL) || (path == NULL)) 3974 return (NULL); 3975 3976 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 3977 (void) strcpy(path, obp_path); 3978 (void) mdi_prop_free(obp_path); 3979 } else { 3980 path = NULL; 3981 } 3982 return (path); 3983 } 3984 3985 int 3986 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 3987 { 3988 dev_info_t *pdip; 3989 char *obp_path = NULL; 3990 int rc = MDI_FAILURE; 3991 3992 if (pip == NULL) 3993 return (MDI_FAILURE); 3994 3995 pdip = mdi_pi_get_phci(pip); 3996 if (pdip == NULL) 3997 return (MDI_FAILURE); 3998 3999 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4000 4001 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4002 (void) ddi_pathname(pdip, obp_path); 4003 } 4004 4005 if (component) { 4006 (void) strncat(obp_path, "/", MAXPATHLEN); 4007 (void) strncat(obp_path, component, MAXPATHLEN); 4008 } 4009 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4010 4011 if (obp_path) 4012 kmem_free(obp_path, MAXPATHLEN); 4013 return (rc); 4014 } 4015 4016 /* 4017 * mdi_pi_get_client(): 4018 * Get the client devinfo associated with a mdi_pathinfo node 4019 * 4020 * Return Values: 4021 * Handle to client device dev_info node 4022 */ 4023 dev_info_t * 4024 mdi_pi_get_client(mdi_pathinfo_t *pip) 4025 { 4026 dev_info_t *dip = NULL; 4027 if (pip) { 4028 dip = MDI_PI(pip)->pi_client->ct_dip; 4029 } 4030 return (dip); 4031 } 4032 4033 /* 4034 * mdi_pi_get_phci(): 4035 * Get the pHCI devinfo associated with the mdi_pathinfo node 4036 * Return Values: 4037 * Handle to dev_info node 4038 */ 4039 dev_info_t * 4040 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4041 { 4042 dev_info_t *dip = NULL; 4043 if (pip) { 4044 dip = MDI_PI(pip)->pi_phci->ph_dip; 4045 } 4046 return (dip); 4047 } 4048 4049 /* 4050 * mdi_pi_get_client_private(): 4051 * Get the client private information associated with the 4052 * mdi_pathinfo node 4053 */ 4054 void * 4055 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4056 { 4057 void *cprivate = NULL; 4058 if (pip) { 4059 cprivate = MDI_PI(pip)->pi_cprivate; 4060 } 4061 return (cprivate); 4062 } 4063 4064 /* 4065 * mdi_pi_set_client_private(): 4066 * Set the client private information in the mdi_pathinfo node 4067 */ 4068 void 4069 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4070 { 4071 if (pip) { 4072 MDI_PI(pip)->pi_cprivate = priv; 4073 } 4074 } 4075 4076 /* 4077 * mdi_pi_get_phci_private(): 4078 * Get the pHCI private information associated with the 4079 * mdi_pathinfo node 4080 */ 4081 caddr_t 4082 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4083 { 4084 caddr_t pprivate = NULL; 4085 if (pip) { 4086 pprivate = MDI_PI(pip)->pi_pprivate; 4087 } 4088 return (pprivate); 4089 } 4090 4091 /* 4092 * mdi_pi_set_phci_private(): 4093 * Set the pHCI private information in the mdi_pathinfo node 4094 */ 4095 void 4096 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4097 { 4098 if (pip) { 4099 MDI_PI(pip)->pi_pprivate = priv; 4100 } 4101 } 4102 4103 /* 4104 * mdi_pi_get_state(): 4105 * Get the mdi_pathinfo node state. Transient states are internal 4106 * and not provided to the users 4107 */ 4108 mdi_pathinfo_state_t 4109 mdi_pi_get_state(mdi_pathinfo_t *pip) 4110 { 4111 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4112 4113 if (pip) { 4114 if (MDI_PI_IS_TRANSIENT(pip)) { 4115 /* 4116 * mdi_pathinfo is in state transition. Return the 4117 * last good state. 4118 */ 4119 state = MDI_PI_OLD_STATE(pip); 4120 } else { 4121 state = MDI_PI_STATE(pip); 4122 } 4123 } 4124 return (state); 4125 } 4126 4127 /* 4128 * Note that the following function needs to be the new interface for 4129 * mdi_pi_get_state when mpxio gets integrated to ON. 4130 */ 4131 int 4132 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4133 uint32_t *ext_state) 4134 { 4135 *state = MDI_PATHINFO_STATE_INIT; 4136 4137 if (pip) { 4138 if (MDI_PI_IS_TRANSIENT(pip)) { 4139 /* 4140 * mdi_pathinfo is in state transition. Return the 4141 * last good state. 4142 */ 4143 *state = MDI_PI_OLD_STATE(pip); 4144 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4145 } else { 4146 *state = MDI_PI_STATE(pip); 4147 *ext_state = MDI_PI_EXT_STATE(pip); 4148 } 4149 } 4150 return (MDI_SUCCESS); 4151 } 4152 4153 /* 4154 * mdi_pi_get_preferred: 4155 * Get the preferred path flag 4156 */ 4157 int 4158 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4159 { 4160 if (pip) { 4161 return (MDI_PI(pip)->pi_preferred); 4162 } 4163 return (0); 4164 } 4165 4166 /* 4167 * mdi_pi_set_preferred: 4168 * Set the preferred path flag 4169 */ 4170 void 4171 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4172 { 4173 if (pip) { 4174 MDI_PI(pip)->pi_preferred = preferred; 4175 } 4176 } 4177 4178 /* 4179 * mdi_pi_set_state(): 4180 * Set the mdi_pathinfo node state 4181 */ 4182 void 4183 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4184 { 4185 uint32_t ext_state; 4186 4187 if (pip) { 4188 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4189 MDI_PI(pip)->pi_state = state; 4190 MDI_PI(pip)->pi_state |= ext_state; 4191 } 4192 } 4193 4194 /* 4195 * Property functions: 4196 */ 4197 int 4198 i_map_nvlist_error_to_mdi(int val) 4199 { 4200 int rv; 4201 4202 switch (val) { 4203 case 0: 4204 rv = DDI_PROP_SUCCESS; 4205 break; 4206 case EINVAL: 4207 case ENOTSUP: 4208 rv = DDI_PROP_INVAL_ARG; 4209 break; 4210 case ENOMEM: 4211 rv = DDI_PROP_NO_MEMORY; 4212 break; 4213 default: 4214 rv = DDI_PROP_NOT_FOUND; 4215 break; 4216 } 4217 return (rv); 4218 } 4219 4220 /* 4221 * mdi_pi_get_next_prop(): 4222 * Property walk function. The caller should hold mdi_pi_lock() 4223 * and release by calling mdi_pi_unlock() at the end of walk to 4224 * get a consistent value. 4225 */ 4226 nvpair_t * 4227 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4228 { 4229 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4230 return (NULL); 4231 } 4232 ASSERT(MDI_PI_LOCKED(pip)); 4233 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4234 } 4235 4236 /* 4237 * mdi_prop_remove(): 4238 * Remove the named property from the named list. 4239 */ 4240 int 4241 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4242 { 4243 if (pip == NULL) { 4244 return (DDI_PROP_NOT_FOUND); 4245 } 4246 ASSERT(!MDI_PI_LOCKED(pip)); 4247 MDI_PI_LOCK(pip); 4248 if (MDI_PI(pip)->pi_prop == NULL) { 4249 MDI_PI_UNLOCK(pip); 4250 return (DDI_PROP_NOT_FOUND); 4251 } 4252 if (name) { 4253 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4254 } else { 4255 char nvp_name[MAXNAMELEN]; 4256 nvpair_t *nvp; 4257 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4258 while (nvp) { 4259 nvpair_t *next; 4260 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4261 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4262 nvpair_name(nvp)); 4263 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4264 nvp_name); 4265 nvp = next; 4266 } 4267 } 4268 MDI_PI_UNLOCK(pip); 4269 return (DDI_PROP_SUCCESS); 4270 } 4271 4272 /* 4273 * mdi_prop_size(): 4274 * Get buffer size needed to pack the property data. 4275 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4276 * buffer size. 4277 */ 4278 int 4279 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4280 { 4281 int rv; 4282 size_t bufsize; 4283 4284 *buflenp = 0; 4285 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4286 return (DDI_PROP_NOT_FOUND); 4287 } 4288 ASSERT(MDI_PI_LOCKED(pip)); 4289 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4290 &bufsize, NV_ENCODE_NATIVE); 4291 *buflenp = bufsize; 4292 return (i_map_nvlist_error_to_mdi(rv)); 4293 } 4294 4295 /* 4296 * mdi_prop_pack(): 4297 * pack the property list. The caller should hold the 4298 * mdi_pathinfo_t node to get a consistent data 4299 */ 4300 int 4301 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4302 { 4303 int rv; 4304 size_t bufsize; 4305 4306 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4307 return (DDI_PROP_NOT_FOUND); 4308 } 4309 4310 ASSERT(MDI_PI_LOCKED(pip)); 4311 4312 bufsize = buflen; 4313 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4314 NV_ENCODE_NATIVE, KM_SLEEP); 4315 4316 return (i_map_nvlist_error_to_mdi(rv)); 4317 } 4318 4319 /* 4320 * mdi_prop_update_byte(): 4321 * Create/Update a byte property 4322 */ 4323 int 4324 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4325 { 4326 int rv; 4327 4328 if (pip == NULL) { 4329 return (DDI_PROP_INVAL_ARG); 4330 } 4331 ASSERT(!MDI_PI_LOCKED(pip)); 4332 MDI_PI_LOCK(pip); 4333 if (MDI_PI(pip)->pi_prop == NULL) { 4334 MDI_PI_UNLOCK(pip); 4335 return (DDI_PROP_NOT_FOUND); 4336 } 4337 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4338 MDI_PI_UNLOCK(pip); 4339 return (i_map_nvlist_error_to_mdi(rv)); 4340 } 4341 4342 /* 4343 * mdi_prop_update_byte_array(): 4344 * Create/Update a byte array property 4345 */ 4346 int 4347 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4348 uint_t nelements) 4349 { 4350 int rv; 4351 4352 if (pip == NULL) { 4353 return (DDI_PROP_INVAL_ARG); 4354 } 4355 ASSERT(!MDI_PI_LOCKED(pip)); 4356 MDI_PI_LOCK(pip); 4357 if (MDI_PI(pip)->pi_prop == NULL) { 4358 MDI_PI_UNLOCK(pip); 4359 return (DDI_PROP_NOT_FOUND); 4360 } 4361 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4362 MDI_PI_UNLOCK(pip); 4363 return (i_map_nvlist_error_to_mdi(rv)); 4364 } 4365 4366 /* 4367 * mdi_prop_update_int(): 4368 * Create/Update a 32 bit integer property 4369 */ 4370 int 4371 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4372 { 4373 int rv; 4374 4375 if (pip == NULL) { 4376 return (DDI_PROP_INVAL_ARG); 4377 } 4378 ASSERT(!MDI_PI_LOCKED(pip)); 4379 MDI_PI_LOCK(pip); 4380 if (MDI_PI(pip)->pi_prop == NULL) { 4381 MDI_PI_UNLOCK(pip); 4382 return (DDI_PROP_NOT_FOUND); 4383 } 4384 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4385 MDI_PI_UNLOCK(pip); 4386 return (i_map_nvlist_error_to_mdi(rv)); 4387 } 4388 4389 /* 4390 * mdi_prop_update_int64(): 4391 * Create/Update a 64 bit integer property 4392 */ 4393 int 4394 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4395 { 4396 int rv; 4397 4398 if (pip == NULL) { 4399 return (DDI_PROP_INVAL_ARG); 4400 } 4401 ASSERT(!MDI_PI_LOCKED(pip)); 4402 MDI_PI_LOCK(pip); 4403 if (MDI_PI(pip)->pi_prop == NULL) { 4404 MDI_PI_UNLOCK(pip); 4405 return (DDI_PROP_NOT_FOUND); 4406 } 4407 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4408 MDI_PI_UNLOCK(pip); 4409 return (i_map_nvlist_error_to_mdi(rv)); 4410 } 4411 4412 /* 4413 * mdi_prop_update_int_array(): 4414 * Create/Update a int array property 4415 */ 4416 int 4417 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4418 uint_t nelements) 4419 { 4420 int rv; 4421 4422 if (pip == NULL) { 4423 return (DDI_PROP_INVAL_ARG); 4424 } 4425 ASSERT(!MDI_PI_LOCKED(pip)); 4426 MDI_PI_LOCK(pip); 4427 if (MDI_PI(pip)->pi_prop == NULL) { 4428 MDI_PI_UNLOCK(pip); 4429 return (DDI_PROP_NOT_FOUND); 4430 } 4431 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4432 nelements); 4433 MDI_PI_UNLOCK(pip); 4434 return (i_map_nvlist_error_to_mdi(rv)); 4435 } 4436 4437 /* 4438 * mdi_prop_update_string(): 4439 * Create/Update a string property 4440 */ 4441 int 4442 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4443 { 4444 int rv; 4445 4446 if (pip == NULL) { 4447 return (DDI_PROP_INVAL_ARG); 4448 } 4449 ASSERT(!MDI_PI_LOCKED(pip)); 4450 MDI_PI_LOCK(pip); 4451 if (MDI_PI(pip)->pi_prop == NULL) { 4452 MDI_PI_UNLOCK(pip); 4453 return (DDI_PROP_NOT_FOUND); 4454 } 4455 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4456 MDI_PI_UNLOCK(pip); 4457 return (i_map_nvlist_error_to_mdi(rv)); 4458 } 4459 4460 /* 4461 * mdi_prop_update_string_array(): 4462 * Create/Update a string array property 4463 */ 4464 int 4465 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4466 uint_t nelements) 4467 { 4468 int rv; 4469 4470 if (pip == NULL) { 4471 return (DDI_PROP_INVAL_ARG); 4472 } 4473 ASSERT(!MDI_PI_LOCKED(pip)); 4474 MDI_PI_LOCK(pip); 4475 if (MDI_PI(pip)->pi_prop == NULL) { 4476 MDI_PI_UNLOCK(pip); 4477 return (DDI_PROP_NOT_FOUND); 4478 } 4479 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4480 nelements); 4481 MDI_PI_UNLOCK(pip); 4482 return (i_map_nvlist_error_to_mdi(rv)); 4483 } 4484 4485 /* 4486 * mdi_prop_lookup_byte(): 4487 * Look for byte property identified by name. The data returned 4488 * is the actual property and valid as long as mdi_pathinfo_t node 4489 * is alive. 4490 */ 4491 int 4492 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4493 { 4494 int rv; 4495 4496 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4497 return (DDI_PROP_NOT_FOUND); 4498 } 4499 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4500 return (i_map_nvlist_error_to_mdi(rv)); 4501 } 4502 4503 4504 /* 4505 * mdi_prop_lookup_byte_array(): 4506 * Look for byte array property identified by name. The data 4507 * returned is the actual property and valid as long as 4508 * mdi_pathinfo_t node is alive. 4509 */ 4510 int 4511 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4512 uint_t *nelements) 4513 { 4514 int rv; 4515 4516 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4517 return (DDI_PROP_NOT_FOUND); 4518 } 4519 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4520 nelements); 4521 return (i_map_nvlist_error_to_mdi(rv)); 4522 } 4523 4524 /* 4525 * mdi_prop_lookup_int(): 4526 * Look for int property identified by name. The data returned 4527 * is the actual property and valid as long as mdi_pathinfo_t 4528 * node is alive. 4529 */ 4530 int 4531 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4532 { 4533 int rv; 4534 4535 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4536 return (DDI_PROP_NOT_FOUND); 4537 } 4538 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4539 return (i_map_nvlist_error_to_mdi(rv)); 4540 } 4541 4542 /* 4543 * mdi_prop_lookup_int64(): 4544 * Look for int64 property identified by name. The data returned 4545 * is the actual property and valid as long as mdi_pathinfo_t node 4546 * is alive. 4547 */ 4548 int 4549 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4550 { 4551 int rv; 4552 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4553 return (DDI_PROP_NOT_FOUND); 4554 } 4555 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4556 return (i_map_nvlist_error_to_mdi(rv)); 4557 } 4558 4559 /* 4560 * mdi_prop_lookup_int_array(): 4561 * Look for int array property identified by name. The data 4562 * returned is the actual property and valid as long as 4563 * mdi_pathinfo_t node is alive. 4564 */ 4565 int 4566 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4567 uint_t *nelements) 4568 { 4569 int rv; 4570 4571 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4572 return (DDI_PROP_NOT_FOUND); 4573 } 4574 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4575 (int32_t **)data, nelements); 4576 return (i_map_nvlist_error_to_mdi(rv)); 4577 } 4578 4579 /* 4580 * mdi_prop_lookup_string(): 4581 * Look for string property identified by name. The data 4582 * returned is the actual property and valid as long as 4583 * mdi_pathinfo_t node is alive. 4584 */ 4585 int 4586 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4587 { 4588 int rv; 4589 4590 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4591 return (DDI_PROP_NOT_FOUND); 4592 } 4593 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4594 return (i_map_nvlist_error_to_mdi(rv)); 4595 } 4596 4597 /* 4598 * mdi_prop_lookup_string_array(): 4599 * Look for string array property identified by name. The data 4600 * returned is the actual property and valid as long as 4601 * mdi_pathinfo_t node is alive. 4602 */ 4603 int 4604 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4605 uint_t *nelements) 4606 { 4607 int rv; 4608 4609 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4610 return (DDI_PROP_NOT_FOUND); 4611 } 4612 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4613 nelements); 4614 return (i_map_nvlist_error_to_mdi(rv)); 4615 } 4616 4617 /* 4618 * mdi_prop_free(): 4619 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4620 * functions return the pointer to actual property data and not a 4621 * copy of it. So the data returned is valid as long as 4622 * mdi_pathinfo_t node is valid. 4623 */ 4624 /*ARGSUSED*/ 4625 int 4626 mdi_prop_free(void *data) 4627 { 4628 return (DDI_PROP_SUCCESS); 4629 } 4630 4631 /*ARGSUSED*/ 4632 static void 4633 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4634 { 4635 char *phci_path, *ct_path; 4636 char *ct_status; 4637 char *status; 4638 dev_info_t *dip = ct->ct_dip; 4639 char lb_buf[64]; 4640 4641 ASSERT(MDI_CLIENT_LOCKED(ct)); 4642 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4643 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4644 return; 4645 } 4646 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4647 ct_status = "optimal"; 4648 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4649 ct_status = "degraded"; 4650 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4651 ct_status = "failed"; 4652 } else { 4653 ct_status = "unknown"; 4654 } 4655 4656 if (MDI_PI_IS_OFFLINE(pip)) { 4657 status = "offline"; 4658 } else if (MDI_PI_IS_ONLINE(pip)) { 4659 status = "online"; 4660 } else if (MDI_PI_IS_STANDBY(pip)) { 4661 status = "standby"; 4662 } else if (MDI_PI_IS_FAULT(pip)) { 4663 status = "faulted"; 4664 } else { 4665 status = "unknown"; 4666 } 4667 4668 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4669 (void) snprintf(lb_buf, sizeof (lb_buf), 4670 "%s, region-size: %d", mdi_load_balance_lba, 4671 ct->ct_lb_args->region_size); 4672 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4673 (void) snprintf(lb_buf, sizeof (lb_buf), 4674 "%s", mdi_load_balance_none); 4675 } else { 4676 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4677 mdi_load_balance_rr); 4678 } 4679 4680 if (dip) { 4681 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4682 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4683 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4684 "path %s (%s%d) to target address: %s is %s" 4685 " Load balancing: %s\n", 4686 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4687 ddi_get_instance(dip), ct_status, 4688 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4689 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4690 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4691 MDI_PI(pip)->pi_addr, status, lb_buf); 4692 kmem_free(phci_path, MAXPATHLEN); 4693 kmem_free(ct_path, MAXPATHLEN); 4694 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4695 } 4696 } 4697 4698 #ifdef DEBUG 4699 /* 4700 * i_mdi_log(): 4701 * Utility function for error message management 4702 * 4703 */ 4704 /*PRINTFLIKE3*/ 4705 static void 4706 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4707 { 4708 char name[MAXNAMELEN]; 4709 char buf[MAXNAMELEN]; 4710 char *bp; 4711 va_list ap; 4712 int log_only = 0; 4713 int boot_only = 0; 4714 int console_only = 0; 4715 4716 if (dip) { 4717 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4718 ddi_driver_name(dip), ddi_get_instance(dip)); 4719 } else { 4720 name[0] = 0; 4721 } 4722 4723 va_start(ap, fmt); 4724 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4725 va_end(ap); 4726 4727 switch (buf[0]) { 4728 case '!': 4729 bp = &buf[1]; 4730 log_only = 1; 4731 break; 4732 case '?': 4733 bp = &buf[1]; 4734 boot_only = 1; 4735 break; 4736 case '^': 4737 bp = &buf[1]; 4738 console_only = 1; 4739 break; 4740 default: 4741 bp = buf; 4742 break; 4743 } 4744 if (mdi_debug_logonly) { 4745 log_only = 1; 4746 boot_only = 0; 4747 console_only = 0; 4748 } 4749 4750 switch (level) { 4751 case CE_NOTE: 4752 level = CE_CONT; 4753 /* FALLTHROUGH */ 4754 case CE_CONT: 4755 case CE_WARN: 4756 case CE_PANIC: 4757 if (boot_only) { 4758 cmn_err(level, "?mdi: %s%s", name, bp); 4759 } else if (console_only) { 4760 cmn_err(level, "^mdi: %s%s", name, bp); 4761 } else if (log_only) { 4762 cmn_err(level, "!mdi: %s%s", name, bp); 4763 } else { 4764 cmn_err(level, "mdi: %s%s", name, bp); 4765 } 4766 break; 4767 default: 4768 cmn_err(level, "mdi: %s%s", name, bp); 4769 break; 4770 } 4771 } 4772 #endif /* DEBUG */ 4773 4774 void 4775 i_mdi_client_online(dev_info_t *ct_dip) 4776 { 4777 mdi_client_t *ct; 4778 4779 /* 4780 * Client online notification. Mark client state as online 4781 * restore our binding with dev_info node 4782 */ 4783 ct = i_devi_get_client(ct_dip); 4784 ASSERT(ct != NULL); 4785 MDI_CLIENT_LOCK(ct); 4786 MDI_CLIENT_SET_ONLINE(ct); 4787 /* catch for any memory leaks */ 4788 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4789 ct->ct_dip = ct_dip; 4790 4791 if (ct->ct_power_cnt == 0) 4792 (void) i_mdi_power_all_phci(ct); 4793 4794 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4795 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4796 i_mdi_pm_hold_client(ct, 1); 4797 4798 MDI_CLIENT_UNLOCK(ct); 4799 } 4800 4801 void 4802 i_mdi_phci_online(dev_info_t *ph_dip) 4803 { 4804 mdi_phci_t *ph; 4805 4806 /* pHCI online notification. Mark state accordingly */ 4807 ph = i_devi_get_phci(ph_dip); 4808 ASSERT(ph != NULL); 4809 MDI_PHCI_LOCK(ph); 4810 MDI_PHCI_SET_ONLINE(ph); 4811 MDI_PHCI_UNLOCK(ph); 4812 } 4813 4814 /* 4815 * mdi_devi_online(): 4816 * Online notification from NDI framework on pHCI/client 4817 * device online. 4818 * Return Values: 4819 * NDI_SUCCESS 4820 * MDI_FAILURE 4821 */ 4822 /*ARGSUSED*/ 4823 int 4824 mdi_devi_online(dev_info_t *dip, uint_t flags) 4825 { 4826 if (MDI_PHCI(dip)) { 4827 i_mdi_phci_online(dip); 4828 } 4829 4830 if (MDI_CLIENT(dip)) { 4831 i_mdi_client_online(dip); 4832 } 4833 return (NDI_SUCCESS); 4834 } 4835 4836 /* 4837 * mdi_devi_offline(): 4838 * Offline notification from NDI framework on pHCI/Client device 4839 * offline. 4840 * 4841 * Return Values: 4842 * NDI_SUCCESS 4843 * NDI_FAILURE 4844 */ 4845 /*ARGSUSED*/ 4846 int 4847 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4848 { 4849 int rv = NDI_SUCCESS; 4850 4851 if (MDI_CLIENT(dip)) { 4852 rv = i_mdi_client_offline(dip, flags); 4853 if (rv != NDI_SUCCESS) 4854 return (rv); 4855 } 4856 4857 if (MDI_PHCI(dip)) { 4858 rv = i_mdi_phci_offline(dip, flags); 4859 4860 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4861 /* set client back online */ 4862 i_mdi_client_online(dip); 4863 } 4864 } 4865 4866 return (rv); 4867 } 4868 4869 /*ARGSUSED*/ 4870 static int 4871 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4872 { 4873 int rv = NDI_SUCCESS; 4874 mdi_phci_t *ph; 4875 mdi_client_t *ct; 4876 mdi_pathinfo_t *pip; 4877 mdi_pathinfo_t *next; 4878 mdi_pathinfo_t *failed_pip = NULL; 4879 dev_info_t *cdip; 4880 4881 /* 4882 * pHCI component offline notification 4883 * Make sure that this pHCI instance is free to be offlined. 4884 * If it is OK to proceed, Offline and remove all the child 4885 * mdi_pathinfo nodes. This process automatically offlines 4886 * corresponding client devices, for which this pHCI provides 4887 * critical services. 4888 */ 4889 ph = i_devi_get_phci(dip); 4890 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4891 (void *)dip, (void *)ph)); 4892 if (ph == NULL) { 4893 return (rv); 4894 } 4895 4896 MDI_PHCI_LOCK(ph); 4897 4898 if (MDI_PHCI_IS_OFFLINE(ph)) { 4899 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4900 (void *)ph)); 4901 MDI_PHCI_UNLOCK(ph); 4902 return (NDI_SUCCESS); 4903 } 4904 4905 /* 4906 * Check to see if the pHCI can be offlined 4907 */ 4908 if (ph->ph_unstable) { 4909 MDI_DEBUG(1, (CE_WARN, dip, 4910 "!One or more target devices are in transient " 4911 "state. This device can not be removed at " 4912 "this moment. Please try again later.")); 4913 MDI_PHCI_UNLOCK(ph); 4914 return (NDI_BUSY); 4915 } 4916 4917 pip = ph->ph_path_head; 4918 while (pip != NULL) { 4919 MDI_PI_LOCK(pip); 4920 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4921 4922 /* 4923 * The mdi_pathinfo state is OK. Check the client state. 4924 * If failover in progress fail the pHCI from offlining 4925 */ 4926 ct = MDI_PI(pip)->pi_client; 4927 i_mdi_client_lock(ct, pip); 4928 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4929 (ct->ct_unstable)) { 4930 /* 4931 * Failover is in progress, Fail the DR 4932 */ 4933 MDI_DEBUG(1, (CE_WARN, dip, 4934 "!pHCI device (%s%d) is Busy. %s", 4935 ddi_driver_name(dip), ddi_get_instance(dip), 4936 "This device can not be removed at " 4937 "this moment. Please try again later.")); 4938 MDI_PI_UNLOCK(pip); 4939 i_mdi_client_unlock(ct); 4940 MDI_PHCI_UNLOCK(ph); 4941 return (NDI_BUSY); 4942 } 4943 MDI_PI_UNLOCK(pip); 4944 4945 /* 4946 * Check to see of we are removing the last path of this 4947 * client device... 4948 */ 4949 cdip = ct->ct_dip; 4950 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4951 (i_mdi_client_compute_state(ct, ph) == 4952 MDI_CLIENT_STATE_FAILED)) { 4953 i_mdi_client_unlock(ct); 4954 MDI_PHCI_UNLOCK(ph); 4955 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4956 /* 4957 * ndi_devi_offline() failed. 4958 * This pHCI provides the critical path 4959 * to one or more client devices. 4960 * Return busy. 4961 */ 4962 MDI_PHCI_LOCK(ph); 4963 MDI_DEBUG(1, (CE_WARN, dip, 4964 "!pHCI device (%s%d) is Busy. %s", 4965 ddi_driver_name(dip), ddi_get_instance(dip), 4966 "This device can not be removed at " 4967 "this moment. Please try again later.")); 4968 failed_pip = pip; 4969 break; 4970 } else { 4971 MDI_PHCI_LOCK(ph); 4972 pip = next; 4973 } 4974 } else { 4975 i_mdi_client_unlock(ct); 4976 pip = next; 4977 } 4978 } 4979 4980 if (failed_pip) { 4981 pip = ph->ph_path_head; 4982 while (pip != failed_pip) { 4983 MDI_PI_LOCK(pip); 4984 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4985 ct = MDI_PI(pip)->pi_client; 4986 i_mdi_client_lock(ct, pip); 4987 cdip = ct->ct_dip; 4988 switch (MDI_CLIENT_STATE(ct)) { 4989 case MDI_CLIENT_STATE_OPTIMAL: 4990 case MDI_CLIENT_STATE_DEGRADED: 4991 if (cdip) { 4992 MDI_PI_UNLOCK(pip); 4993 i_mdi_client_unlock(ct); 4994 MDI_PHCI_UNLOCK(ph); 4995 (void) ndi_devi_online(cdip, 0); 4996 MDI_PHCI_LOCK(ph); 4997 pip = next; 4998 continue; 4999 } 5000 break; 5001 5002 case MDI_CLIENT_STATE_FAILED: 5003 if (cdip) { 5004 MDI_PI_UNLOCK(pip); 5005 i_mdi_client_unlock(ct); 5006 MDI_PHCI_UNLOCK(ph); 5007 (void) ndi_devi_offline(cdip, 0); 5008 MDI_PHCI_LOCK(ph); 5009 pip = next; 5010 continue; 5011 } 5012 break; 5013 } 5014 MDI_PI_UNLOCK(pip); 5015 i_mdi_client_unlock(ct); 5016 pip = next; 5017 } 5018 MDI_PHCI_UNLOCK(ph); 5019 return (NDI_BUSY); 5020 } 5021 5022 /* 5023 * Mark the pHCI as offline 5024 */ 5025 MDI_PHCI_SET_OFFLINE(ph); 5026 5027 /* 5028 * Mark the child mdi_pathinfo nodes as transient 5029 */ 5030 pip = ph->ph_path_head; 5031 while (pip != NULL) { 5032 MDI_PI_LOCK(pip); 5033 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5034 MDI_PI_SET_OFFLINING(pip); 5035 MDI_PI_UNLOCK(pip); 5036 pip = next; 5037 } 5038 MDI_PHCI_UNLOCK(ph); 5039 /* 5040 * Give a chance for any pending commands to execute 5041 */ 5042 delay(1); 5043 MDI_PHCI_LOCK(ph); 5044 pip = ph->ph_path_head; 5045 while (pip != NULL) { 5046 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5047 (void) i_mdi_pi_offline(pip, flags); 5048 MDI_PI_LOCK(pip); 5049 ct = MDI_PI(pip)->pi_client; 5050 if (!MDI_PI_IS_OFFLINE(pip)) { 5051 MDI_DEBUG(1, (CE_WARN, dip, 5052 "!pHCI device (%s%d) is Busy. %s", 5053 ddi_driver_name(dip), ddi_get_instance(dip), 5054 "This device can not be removed at " 5055 "this moment. Please try again later.")); 5056 MDI_PI_UNLOCK(pip); 5057 MDI_PHCI_SET_ONLINE(ph); 5058 MDI_PHCI_UNLOCK(ph); 5059 return (NDI_BUSY); 5060 } 5061 MDI_PI_UNLOCK(pip); 5062 pip = next; 5063 } 5064 MDI_PHCI_UNLOCK(ph); 5065 5066 return (rv); 5067 } 5068 5069 void 5070 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5071 { 5072 mdi_phci_t *ph; 5073 mdi_client_t *ct; 5074 mdi_pathinfo_t *pip; 5075 mdi_pathinfo_t *next; 5076 dev_info_t *cdip; 5077 5078 if (!MDI_PHCI(dip)) 5079 return; 5080 5081 ph = i_devi_get_phci(dip); 5082 if (ph == NULL) { 5083 return; 5084 } 5085 5086 MDI_PHCI_LOCK(ph); 5087 5088 if (MDI_PHCI_IS_OFFLINE(ph)) { 5089 /* has no last path */ 5090 MDI_PHCI_UNLOCK(ph); 5091 return; 5092 } 5093 5094 pip = ph->ph_path_head; 5095 while (pip != NULL) { 5096 MDI_PI_LOCK(pip); 5097 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5098 5099 ct = MDI_PI(pip)->pi_client; 5100 i_mdi_client_lock(ct, pip); 5101 MDI_PI_UNLOCK(pip); 5102 5103 cdip = ct->ct_dip; 5104 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5105 (i_mdi_client_compute_state(ct, ph) == 5106 MDI_CLIENT_STATE_FAILED)) { 5107 /* Last path. Mark client dip as retiring */ 5108 i_mdi_client_unlock(ct); 5109 MDI_PHCI_UNLOCK(ph); 5110 (void) e_ddi_mark_retiring(cdip, cons_array); 5111 MDI_PHCI_LOCK(ph); 5112 pip = next; 5113 } else { 5114 i_mdi_client_unlock(ct); 5115 pip = next; 5116 } 5117 } 5118 5119 MDI_PHCI_UNLOCK(ph); 5120 5121 return; 5122 } 5123 5124 void 5125 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5126 { 5127 mdi_phci_t *ph; 5128 mdi_client_t *ct; 5129 mdi_pathinfo_t *pip; 5130 mdi_pathinfo_t *next; 5131 dev_info_t *cdip; 5132 5133 if (!MDI_PHCI(dip)) 5134 return; 5135 5136 ph = i_devi_get_phci(dip); 5137 if (ph == NULL) 5138 return; 5139 5140 MDI_PHCI_LOCK(ph); 5141 5142 if (MDI_PHCI_IS_OFFLINE(ph)) { 5143 MDI_PHCI_UNLOCK(ph); 5144 /* not last path */ 5145 return; 5146 } 5147 5148 if (ph->ph_unstable) { 5149 MDI_PHCI_UNLOCK(ph); 5150 /* can't check for constraints */ 5151 *constraint = 0; 5152 return; 5153 } 5154 5155 pip = ph->ph_path_head; 5156 while (pip != NULL) { 5157 MDI_PI_LOCK(pip); 5158 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5159 5160 /* 5161 * The mdi_pathinfo state is OK. Check the client state. 5162 * If failover in progress fail the pHCI from offlining 5163 */ 5164 ct = MDI_PI(pip)->pi_client; 5165 i_mdi_client_lock(ct, pip); 5166 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5167 (ct->ct_unstable)) { 5168 /* 5169 * Failover is in progress, can't check for constraints 5170 */ 5171 MDI_PI_UNLOCK(pip); 5172 i_mdi_client_unlock(ct); 5173 MDI_PHCI_UNLOCK(ph); 5174 *constraint = 0; 5175 return; 5176 } 5177 MDI_PI_UNLOCK(pip); 5178 5179 /* 5180 * Check to see of we are retiring the last path of this 5181 * client device... 5182 */ 5183 cdip = ct->ct_dip; 5184 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5185 (i_mdi_client_compute_state(ct, ph) == 5186 MDI_CLIENT_STATE_FAILED)) { 5187 i_mdi_client_unlock(ct); 5188 MDI_PHCI_UNLOCK(ph); 5189 (void) e_ddi_retire_notify(cdip, constraint); 5190 MDI_PHCI_LOCK(ph); 5191 pip = next; 5192 } else { 5193 i_mdi_client_unlock(ct); 5194 pip = next; 5195 } 5196 } 5197 5198 MDI_PHCI_UNLOCK(ph); 5199 5200 return; 5201 } 5202 5203 /* 5204 * offline the path(s) hanging off the PHCI. If the 5205 * last path to any client, check that constraints 5206 * have been applied. 5207 */ 5208 void 5209 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5210 { 5211 mdi_phci_t *ph; 5212 mdi_client_t *ct; 5213 mdi_pathinfo_t *pip; 5214 mdi_pathinfo_t *next; 5215 dev_info_t *cdip; 5216 int unstable = 0; 5217 int constraint; 5218 5219 if (!MDI_PHCI(dip)) 5220 return; 5221 5222 ph = i_devi_get_phci(dip); 5223 if (ph == NULL) { 5224 /* no last path and no pips */ 5225 return; 5226 } 5227 5228 MDI_PHCI_LOCK(ph); 5229 5230 if (MDI_PHCI_IS_OFFLINE(ph)) { 5231 MDI_PHCI_UNLOCK(ph); 5232 /* no last path and no pips */ 5233 return; 5234 } 5235 5236 /* 5237 * Check to see if the pHCI can be offlined 5238 */ 5239 if (ph->ph_unstable) { 5240 unstable = 1; 5241 } 5242 5243 pip = ph->ph_path_head; 5244 while (pip != NULL) { 5245 MDI_PI_LOCK(pip); 5246 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5247 5248 /* 5249 * if failover in progress fail the pHCI from offlining 5250 */ 5251 ct = MDI_PI(pip)->pi_client; 5252 i_mdi_client_lock(ct, pip); 5253 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5254 (ct->ct_unstable)) { 5255 unstable = 1; 5256 } 5257 MDI_PI_UNLOCK(pip); 5258 5259 /* 5260 * Check to see of we are removing the last path of this 5261 * client device... 5262 */ 5263 cdip = ct->ct_dip; 5264 if (!phci_only && cdip && 5265 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5266 (i_mdi_client_compute_state(ct, ph) == 5267 MDI_CLIENT_STATE_FAILED)) { 5268 i_mdi_client_unlock(ct); 5269 MDI_PHCI_UNLOCK(ph); 5270 /* 5271 * We don't retire clients we just retire the 5272 * path to a client. If it is the last path 5273 * to a client, constraints are checked and 5274 * if we pass the last path is offlined. MPXIO will 5275 * then fail all I/Os to the client. Since we don't 5276 * want to retire the client on a path error 5277 * set constraint = 0 so that the client dip 5278 * is not retired. 5279 */ 5280 constraint = 0; 5281 (void) e_ddi_retire_finalize(cdip, &constraint); 5282 MDI_PHCI_LOCK(ph); 5283 pip = next; 5284 } else { 5285 i_mdi_client_unlock(ct); 5286 pip = next; 5287 } 5288 } 5289 5290 /* 5291 * Cannot offline pip(s) 5292 */ 5293 if (unstable) { 5294 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5295 "retire, dip = %p", (void *)dip); 5296 MDI_PHCI_UNLOCK(ph); 5297 return; 5298 } 5299 5300 /* 5301 * Mark the pHCI as offline 5302 */ 5303 MDI_PHCI_SET_OFFLINE(ph); 5304 5305 /* 5306 * Mark the child mdi_pathinfo nodes as transient 5307 */ 5308 pip = ph->ph_path_head; 5309 while (pip != NULL) { 5310 MDI_PI_LOCK(pip); 5311 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5312 MDI_PI_SET_OFFLINING(pip); 5313 MDI_PI_UNLOCK(pip); 5314 pip = next; 5315 } 5316 MDI_PHCI_UNLOCK(ph); 5317 /* 5318 * Give a chance for any pending commands to execute 5319 */ 5320 delay(1); 5321 MDI_PHCI_LOCK(ph); 5322 pip = ph->ph_path_head; 5323 while (pip != NULL) { 5324 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5325 (void) i_mdi_pi_offline(pip, 0); 5326 MDI_PI_LOCK(pip); 5327 ct = MDI_PI(pip)->pi_client; 5328 if (!MDI_PI_IS_OFFLINE(pip)) { 5329 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5330 "PHCI dip = %p", (void *)dip); 5331 MDI_PI_UNLOCK(pip); 5332 MDI_PHCI_SET_ONLINE(ph); 5333 MDI_PHCI_UNLOCK(ph); 5334 return; 5335 } 5336 MDI_PI_UNLOCK(pip); 5337 pip = next; 5338 } 5339 MDI_PHCI_UNLOCK(ph); 5340 5341 return; 5342 } 5343 5344 void 5345 mdi_phci_unretire(dev_info_t *dip) 5346 { 5347 ASSERT(MDI_PHCI(dip)); 5348 5349 /* 5350 * Online the phci 5351 */ 5352 i_mdi_phci_online(dip); 5353 } 5354 5355 /*ARGSUSED*/ 5356 static int 5357 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5358 { 5359 int rv = NDI_SUCCESS; 5360 mdi_client_t *ct; 5361 5362 /* 5363 * Client component to go offline. Make sure that we are 5364 * not in failing over state and update client state 5365 * accordingly 5366 */ 5367 ct = i_devi_get_client(dip); 5368 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5369 (void *)dip, (void *)ct)); 5370 if (ct != NULL) { 5371 MDI_CLIENT_LOCK(ct); 5372 if (ct->ct_unstable) { 5373 /* 5374 * One or more paths are in transient state, 5375 * Dont allow offline of a client device 5376 */ 5377 MDI_DEBUG(1, (CE_WARN, dip, 5378 "!One or more paths to this device is " 5379 "in transient state. This device can not " 5380 "be removed at this moment. " 5381 "Please try again later.")); 5382 MDI_CLIENT_UNLOCK(ct); 5383 return (NDI_BUSY); 5384 } 5385 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5386 /* 5387 * Failover is in progress, Dont allow DR of 5388 * a client device 5389 */ 5390 MDI_DEBUG(1, (CE_WARN, dip, 5391 "!Client device (%s%d) is Busy. %s", 5392 ddi_driver_name(dip), ddi_get_instance(dip), 5393 "This device can not be removed at " 5394 "this moment. Please try again later.")); 5395 MDI_CLIENT_UNLOCK(ct); 5396 return (NDI_BUSY); 5397 } 5398 MDI_CLIENT_SET_OFFLINE(ct); 5399 5400 /* 5401 * Unbind our relationship with the dev_info node 5402 */ 5403 if (flags & NDI_DEVI_REMOVE) { 5404 ct->ct_dip = NULL; 5405 } 5406 MDI_CLIENT_UNLOCK(ct); 5407 } 5408 return (rv); 5409 } 5410 5411 /* 5412 * mdi_pre_attach(): 5413 * Pre attach() notification handler 5414 */ 5415 /*ARGSUSED*/ 5416 int 5417 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5418 { 5419 /* don't support old DDI_PM_RESUME */ 5420 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5421 (cmd == DDI_PM_RESUME)) 5422 return (DDI_FAILURE); 5423 5424 return (DDI_SUCCESS); 5425 } 5426 5427 /* 5428 * mdi_post_attach(): 5429 * Post attach() notification handler 5430 */ 5431 /*ARGSUSED*/ 5432 void 5433 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5434 { 5435 mdi_phci_t *ph; 5436 mdi_client_t *ct; 5437 mdi_vhci_t *vh; 5438 5439 if (MDI_PHCI(dip)) { 5440 ph = i_devi_get_phci(dip); 5441 ASSERT(ph != NULL); 5442 5443 MDI_PHCI_LOCK(ph); 5444 switch (cmd) { 5445 case DDI_ATTACH: 5446 MDI_DEBUG(2, (CE_NOTE, dip, 5447 "!pHCI post_attach: called %p\n", (void *)ph)); 5448 if (error == DDI_SUCCESS) { 5449 MDI_PHCI_SET_ATTACH(ph); 5450 } else { 5451 MDI_DEBUG(1, (CE_NOTE, dip, 5452 "!pHCI post_attach: failed error=%d\n", 5453 error)); 5454 MDI_PHCI_SET_DETACH(ph); 5455 } 5456 break; 5457 5458 case DDI_RESUME: 5459 MDI_DEBUG(2, (CE_NOTE, dip, 5460 "!pHCI post_resume: called %p\n", (void *)ph)); 5461 if (error == DDI_SUCCESS) { 5462 MDI_PHCI_SET_RESUME(ph); 5463 } else { 5464 MDI_DEBUG(1, (CE_NOTE, dip, 5465 "!pHCI post_resume: failed error=%d\n", 5466 error)); 5467 MDI_PHCI_SET_SUSPEND(ph); 5468 } 5469 break; 5470 } 5471 MDI_PHCI_UNLOCK(ph); 5472 } 5473 5474 if (MDI_CLIENT(dip)) { 5475 ct = i_devi_get_client(dip); 5476 ASSERT(ct != NULL); 5477 5478 MDI_CLIENT_LOCK(ct); 5479 switch (cmd) { 5480 case DDI_ATTACH: 5481 MDI_DEBUG(2, (CE_NOTE, dip, 5482 "!Client post_attach: called %p\n", (void *)ct)); 5483 if (error != DDI_SUCCESS) { 5484 MDI_DEBUG(1, (CE_NOTE, dip, 5485 "!Client post_attach: failed error=%d\n", 5486 error)); 5487 MDI_CLIENT_SET_DETACH(ct); 5488 MDI_DEBUG(4, (CE_WARN, dip, 5489 "mdi_post_attach i_mdi_pm_reset_client\n")); 5490 i_mdi_pm_reset_client(ct); 5491 break; 5492 } 5493 5494 /* 5495 * Client device has successfully attached, inform 5496 * the vhci. 5497 */ 5498 vh = ct->ct_vhci; 5499 if (vh->vh_ops->vo_client_attached) 5500 (*vh->vh_ops->vo_client_attached)(dip); 5501 5502 MDI_CLIENT_SET_ATTACH(ct); 5503 break; 5504 5505 case DDI_RESUME: 5506 MDI_DEBUG(2, (CE_NOTE, dip, 5507 "!Client post_attach: called %p\n", (void *)ct)); 5508 if (error == DDI_SUCCESS) { 5509 MDI_CLIENT_SET_RESUME(ct); 5510 } else { 5511 MDI_DEBUG(1, (CE_NOTE, dip, 5512 "!Client post_resume: failed error=%d\n", 5513 error)); 5514 MDI_CLIENT_SET_SUSPEND(ct); 5515 } 5516 break; 5517 } 5518 MDI_CLIENT_UNLOCK(ct); 5519 } 5520 } 5521 5522 /* 5523 * mdi_pre_detach(): 5524 * Pre detach notification handler 5525 */ 5526 /*ARGSUSED*/ 5527 int 5528 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5529 { 5530 int rv = DDI_SUCCESS; 5531 5532 if (MDI_CLIENT(dip)) { 5533 (void) i_mdi_client_pre_detach(dip, cmd); 5534 } 5535 5536 if (MDI_PHCI(dip)) { 5537 rv = i_mdi_phci_pre_detach(dip, cmd); 5538 } 5539 5540 return (rv); 5541 } 5542 5543 /*ARGSUSED*/ 5544 static int 5545 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5546 { 5547 int rv = DDI_SUCCESS; 5548 mdi_phci_t *ph; 5549 mdi_client_t *ct; 5550 mdi_pathinfo_t *pip; 5551 mdi_pathinfo_t *failed_pip = NULL; 5552 mdi_pathinfo_t *next; 5553 5554 ph = i_devi_get_phci(dip); 5555 if (ph == NULL) { 5556 return (rv); 5557 } 5558 5559 MDI_PHCI_LOCK(ph); 5560 switch (cmd) { 5561 case DDI_DETACH: 5562 MDI_DEBUG(2, (CE_NOTE, dip, 5563 "!pHCI pre_detach: called %p\n", (void *)ph)); 5564 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5565 /* 5566 * mdi_pathinfo nodes are still attached to 5567 * this pHCI. Fail the detach for this pHCI. 5568 */ 5569 MDI_DEBUG(2, (CE_WARN, dip, 5570 "!pHCI pre_detach: " 5571 "mdi_pathinfo nodes are still attached " 5572 "%p\n", (void *)ph)); 5573 rv = DDI_FAILURE; 5574 break; 5575 } 5576 MDI_PHCI_SET_DETACH(ph); 5577 break; 5578 5579 case DDI_SUSPEND: 5580 /* 5581 * pHCI is getting suspended. Since mpxio client 5582 * devices may not be suspended at this point, to avoid 5583 * a potential stack overflow, it is important to suspend 5584 * client devices before pHCI can be suspended. 5585 */ 5586 5587 MDI_DEBUG(2, (CE_NOTE, dip, 5588 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5589 /* 5590 * Suspend all the client devices accessible through this pHCI 5591 */ 5592 pip = ph->ph_path_head; 5593 while (pip != NULL && rv == DDI_SUCCESS) { 5594 dev_info_t *cdip; 5595 MDI_PI_LOCK(pip); 5596 next = 5597 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5598 ct = MDI_PI(pip)->pi_client; 5599 i_mdi_client_lock(ct, pip); 5600 cdip = ct->ct_dip; 5601 MDI_PI_UNLOCK(pip); 5602 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5603 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5604 i_mdi_client_unlock(ct); 5605 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5606 DDI_SUCCESS) { 5607 /* 5608 * Suspend of one of the client 5609 * device has failed. 5610 */ 5611 MDI_DEBUG(1, (CE_WARN, dip, 5612 "!Suspend of device (%s%d) failed.", 5613 ddi_driver_name(cdip), 5614 ddi_get_instance(cdip))); 5615 failed_pip = pip; 5616 break; 5617 } 5618 } else { 5619 i_mdi_client_unlock(ct); 5620 } 5621 pip = next; 5622 } 5623 5624 if (rv == DDI_SUCCESS) { 5625 /* 5626 * Suspend of client devices is complete. Proceed 5627 * with pHCI suspend. 5628 */ 5629 MDI_PHCI_SET_SUSPEND(ph); 5630 } else { 5631 /* 5632 * Revert back all the suspended client device states 5633 * to converse. 5634 */ 5635 pip = ph->ph_path_head; 5636 while (pip != failed_pip) { 5637 dev_info_t *cdip; 5638 MDI_PI_LOCK(pip); 5639 next = 5640 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5641 ct = MDI_PI(pip)->pi_client; 5642 i_mdi_client_lock(ct, pip); 5643 cdip = ct->ct_dip; 5644 MDI_PI_UNLOCK(pip); 5645 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5646 i_mdi_client_unlock(ct); 5647 (void) devi_attach(cdip, DDI_RESUME); 5648 } else { 5649 i_mdi_client_unlock(ct); 5650 } 5651 pip = next; 5652 } 5653 } 5654 break; 5655 5656 default: 5657 rv = DDI_FAILURE; 5658 break; 5659 } 5660 MDI_PHCI_UNLOCK(ph); 5661 return (rv); 5662 } 5663 5664 /*ARGSUSED*/ 5665 static int 5666 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5667 { 5668 int rv = DDI_SUCCESS; 5669 mdi_client_t *ct; 5670 5671 ct = i_devi_get_client(dip); 5672 if (ct == NULL) { 5673 return (rv); 5674 } 5675 5676 MDI_CLIENT_LOCK(ct); 5677 switch (cmd) { 5678 case DDI_DETACH: 5679 MDI_DEBUG(2, (CE_NOTE, dip, 5680 "!Client pre_detach: called %p\n", (void *)ct)); 5681 MDI_CLIENT_SET_DETACH(ct); 5682 break; 5683 5684 case DDI_SUSPEND: 5685 MDI_DEBUG(2, (CE_NOTE, dip, 5686 "!Client pre_suspend: called %p\n", (void *)ct)); 5687 MDI_CLIENT_SET_SUSPEND(ct); 5688 break; 5689 5690 default: 5691 rv = DDI_FAILURE; 5692 break; 5693 } 5694 MDI_CLIENT_UNLOCK(ct); 5695 return (rv); 5696 } 5697 5698 /* 5699 * mdi_post_detach(): 5700 * Post detach notification handler 5701 */ 5702 /*ARGSUSED*/ 5703 void 5704 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5705 { 5706 /* 5707 * Detach/Suspend of mpxio component failed. Update our state 5708 * too 5709 */ 5710 if (MDI_PHCI(dip)) 5711 i_mdi_phci_post_detach(dip, cmd, error); 5712 5713 if (MDI_CLIENT(dip)) 5714 i_mdi_client_post_detach(dip, cmd, error); 5715 } 5716 5717 /*ARGSUSED*/ 5718 static void 5719 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5720 { 5721 mdi_phci_t *ph; 5722 5723 /* 5724 * Detach/Suspend of phci component failed. Update our state 5725 * too 5726 */ 5727 ph = i_devi_get_phci(dip); 5728 if (ph == NULL) { 5729 return; 5730 } 5731 5732 MDI_PHCI_LOCK(ph); 5733 /* 5734 * Detach of pHCI failed. Restore back converse 5735 * state 5736 */ 5737 switch (cmd) { 5738 case DDI_DETACH: 5739 MDI_DEBUG(2, (CE_NOTE, dip, 5740 "!pHCI post_detach: called %p\n", (void *)ph)); 5741 if (error != DDI_SUCCESS) 5742 MDI_PHCI_SET_ATTACH(ph); 5743 break; 5744 5745 case DDI_SUSPEND: 5746 MDI_DEBUG(2, (CE_NOTE, dip, 5747 "!pHCI post_suspend: called %p\n", (void *)ph)); 5748 if (error != DDI_SUCCESS) 5749 MDI_PHCI_SET_RESUME(ph); 5750 break; 5751 } 5752 MDI_PHCI_UNLOCK(ph); 5753 } 5754 5755 /*ARGSUSED*/ 5756 static void 5757 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5758 { 5759 mdi_client_t *ct; 5760 5761 ct = i_devi_get_client(dip); 5762 if (ct == NULL) { 5763 return; 5764 } 5765 MDI_CLIENT_LOCK(ct); 5766 /* 5767 * Detach of Client failed. Restore back converse 5768 * state 5769 */ 5770 switch (cmd) { 5771 case DDI_DETACH: 5772 MDI_DEBUG(2, (CE_NOTE, dip, 5773 "!Client post_detach: called %p\n", (void *)ct)); 5774 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5775 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5776 "i_mdi_pm_rele_client\n")); 5777 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5778 } else { 5779 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5780 "i_mdi_pm_reset_client\n")); 5781 i_mdi_pm_reset_client(ct); 5782 } 5783 if (error != DDI_SUCCESS) 5784 MDI_CLIENT_SET_ATTACH(ct); 5785 break; 5786 5787 case DDI_SUSPEND: 5788 MDI_DEBUG(2, (CE_NOTE, dip, 5789 "!Client post_suspend: called %p\n", (void *)ct)); 5790 if (error != DDI_SUCCESS) 5791 MDI_CLIENT_SET_RESUME(ct); 5792 break; 5793 } 5794 MDI_CLIENT_UNLOCK(ct); 5795 } 5796 5797 int 5798 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5799 { 5800 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5801 } 5802 5803 /* 5804 * create and install per-path (client - pHCI) statistics 5805 * I/O stats supported: nread, nwritten, reads, and writes 5806 * Error stats - hard errors, soft errors, & transport errors 5807 */ 5808 int 5809 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5810 { 5811 kstat_t *kiosp, *kerrsp; 5812 struct pi_errs *nsp; 5813 struct mdi_pi_kstats *mdi_statp; 5814 5815 if (MDI_PI(pip)->pi_kstats != NULL) 5816 return (MDI_SUCCESS); 5817 5818 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5819 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5820 return (MDI_FAILURE); 5821 } 5822 5823 (void) strcat(ksname, ",err"); 5824 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5825 KSTAT_TYPE_NAMED, 5826 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5827 if (kerrsp == NULL) { 5828 kstat_delete(kiosp); 5829 return (MDI_FAILURE); 5830 } 5831 5832 nsp = (struct pi_errs *)kerrsp->ks_data; 5833 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5834 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5835 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5836 KSTAT_DATA_UINT32); 5837 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5838 KSTAT_DATA_UINT32); 5839 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5840 KSTAT_DATA_UINT32); 5841 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5842 KSTAT_DATA_UINT32); 5843 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5844 KSTAT_DATA_UINT32); 5845 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5846 KSTAT_DATA_UINT32); 5847 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5848 KSTAT_DATA_UINT32); 5849 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5850 5851 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5852 mdi_statp->pi_kstat_ref = 1; 5853 mdi_statp->pi_kstat_iostats = kiosp; 5854 mdi_statp->pi_kstat_errstats = kerrsp; 5855 kstat_install(kiosp); 5856 kstat_install(kerrsp); 5857 MDI_PI(pip)->pi_kstats = mdi_statp; 5858 return (MDI_SUCCESS); 5859 } 5860 5861 /* 5862 * destroy per-path properties 5863 */ 5864 static void 5865 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5866 { 5867 5868 struct mdi_pi_kstats *mdi_statp; 5869 5870 if (MDI_PI(pip)->pi_kstats == NULL) 5871 return; 5872 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5873 return; 5874 5875 MDI_PI(pip)->pi_kstats = NULL; 5876 5877 /* 5878 * the kstat may be shared between multiple pathinfo nodes 5879 * decrement this pathinfo's usage, removing the kstats 5880 * themselves when the last pathinfo reference is removed. 5881 */ 5882 ASSERT(mdi_statp->pi_kstat_ref > 0); 5883 if (--mdi_statp->pi_kstat_ref != 0) 5884 return; 5885 5886 kstat_delete(mdi_statp->pi_kstat_iostats); 5887 kstat_delete(mdi_statp->pi_kstat_errstats); 5888 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5889 } 5890 5891 /* 5892 * update I/O paths KSTATS 5893 */ 5894 void 5895 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5896 { 5897 kstat_t *iostatp; 5898 size_t xfer_cnt; 5899 5900 ASSERT(pip != NULL); 5901 5902 /* 5903 * I/O can be driven across a path prior to having path 5904 * statistics available, i.e. probe(9e). 5905 */ 5906 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5907 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5908 xfer_cnt = bp->b_bcount - bp->b_resid; 5909 if (bp->b_flags & B_READ) { 5910 KSTAT_IO_PTR(iostatp)->reads++; 5911 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5912 } else { 5913 KSTAT_IO_PTR(iostatp)->writes++; 5914 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5915 } 5916 } 5917 } 5918 5919 /* 5920 * Enable the path(specific client/target/initiator) 5921 * Enabling a path means that MPxIO may select the enabled path for routing 5922 * future I/O requests, subject to other path state constraints. 5923 */ 5924 int 5925 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5926 { 5927 mdi_phci_t *ph; 5928 5929 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5930 if (ph == NULL) { 5931 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5932 " failed. pip: %p ph = NULL\n", (void *)pip)); 5933 return (MDI_FAILURE); 5934 } 5935 5936 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5937 MDI_ENABLE_OP); 5938 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5939 " Returning success pip = %p. ph = %p\n", 5940 (void *)pip, (void *)ph)); 5941 return (MDI_SUCCESS); 5942 5943 } 5944 5945 /* 5946 * Disable the path (specific client/target/initiator) 5947 * Disabling a path means that MPxIO will not select the disabled path for 5948 * routing any new I/O requests. 5949 */ 5950 int 5951 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5952 { 5953 mdi_phci_t *ph; 5954 5955 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5956 if (ph == NULL) { 5957 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5958 " failed. pip: %p ph = NULL\n", (void *)pip)); 5959 return (MDI_FAILURE); 5960 } 5961 5962 (void) i_mdi_enable_disable_path(pip, 5963 ph->ph_vhci, flags, MDI_DISABLE_OP); 5964 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5965 "Returning success pip = %p. ph = %p", 5966 (void *)pip, (void *)ph)); 5967 return (MDI_SUCCESS); 5968 } 5969 5970 /* 5971 * disable the path to a particular pHCI (pHCI specified in the phci_path 5972 * argument) for a particular client (specified in the client_path argument). 5973 * Disabling a path means that MPxIO will not select the disabled path for 5974 * routing any new I/O requests. 5975 * NOTE: this will be removed once the NWS files are changed to use the new 5976 * mdi_{enable,disable}_path interfaces 5977 */ 5978 int 5979 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5980 { 5981 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5982 } 5983 5984 /* 5985 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5986 * argument) for a particular client (specified in the client_path argument). 5987 * Enabling a path means that MPxIO may select the enabled path for routing 5988 * future I/O requests, subject to other path state constraints. 5989 * NOTE: this will be removed once the NWS files are changed to use the new 5990 * mdi_{enable,disable}_path interfaces 5991 */ 5992 5993 int 5994 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5995 { 5996 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5997 } 5998 5999 /* 6000 * Common routine for doing enable/disable. 6001 */ 6002 static mdi_pathinfo_t * 6003 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6004 int op) 6005 { 6006 int sync_flag = 0; 6007 int rv; 6008 mdi_pathinfo_t *next; 6009 int (*f)() = NULL; 6010 6011 f = vh->vh_ops->vo_pi_state_change; 6012 6013 sync_flag = (flags << 8) & 0xf00; 6014 6015 /* 6016 * Do a callback into the mdi consumer to let it 6017 * know that path is about to get enabled/disabled. 6018 */ 6019 if (f != NULL) { 6020 rv = (*f)(vh->vh_dip, pip, 0, 6021 MDI_PI_EXT_STATE(pip), 6022 MDI_EXT_STATE_CHANGE | sync_flag | 6023 op | MDI_BEFORE_STATE_CHANGE); 6024 if (rv != MDI_SUCCESS) { 6025 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 6026 "!vo_pi_state_change: failed rv = %x", rv)); 6027 } 6028 } 6029 MDI_PI_LOCK(pip); 6030 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6031 6032 switch (flags) { 6033 case USER_DISABLE: 6034 if (op == MDI_DISABLE_OP) { 6035 MDI_PI_SET_USER_DISABLE(pip); 6036 } else { 6037 MDI_PI_SET_USER_ENABLE(pip); 6038 } 6039 break; 6040 case DRIVER_DISABLE: 6041 if (op == MDI_DISABLE_OP) { 6042 MDI_PI_SET_DRV_DISABLE(pip); 6043 } else { 6044 MDI_PI_SET_DRV_ENABLE(pip); 6045 } 6046 break; 6047 case DRIVER_DISABLE_TRANSIENT: 6048 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6049 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6050 } else { 6051 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6052 } 6053 break; 6054 } 6055 MDI_PI_UNLOCK(pip); 6056 /* 6057 * Do a callback into the mdi consumer to let it 6058 * know that path is now enabled/disabled. 6059 */ 6060 if (f != NULL) { 6061 rv = (*f)(vh->vh_dip, pip, 0, 6062 MDI_PI_EXT_STATE(pip), 6063 MDI_EXT_STATE_CHANGE | sync_flag | 6064 op | MDI_AFTER_STATE_CHANGE); 6065 if (rv != MDI_SUCCESS) { 6066 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 6067 "!vo_pi_state_change: failed rv = %x", rv)); 6068 } 6069 } 6070 return (next); 6071 } 6072 6073 /* 6074 * Common routine for doing enable/disable. 6075 * NOTE: this will be removed once the NWS files are changed to use the new 6076 * mdi_{enable,disable}_path has been putback 6077 */ 6078 int 6079 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6080 { 6081 6082 mdi_phci_t *ph; 6083 mdi_vhci_t *vh = NULL; 6084 mdi_client_t *ct; 6085 mdi_pathinfo_t *next, *pip; 6086 int found_it; 6087 6088 ph = i_devi_get_phci(pdip); 6089 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6090 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 6091 (void *)cdip)); 6092 if (ph == NULL) { 6093 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 6094 "Op %d failed. ph = NULL\n", op)); 6095 return (MDI_FAILURE); 6096 } 6097 6098 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6099 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6100 "Op Invalid operation = %d\n", op)); 6101 return (MDI_FAILURE); 6102 } 6103 6104 vh = ph->ph_vhci; 6105 6106 if (cdip == NULL) { 6107 /* 6108 * Need to mark the Phci as enabled/disabled. 6109 */ 6110 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6111 "Op %d for the phci\n", op)); 6112 MDI_PHCI_LOCK(ph); 6113 switch (flags) { 6114 case USER_DISABLE: 6115 if (op == MDI_DISABLE_OP) { 6116 MDI_PHCI_SET_USER_DISABLE(ph); 6117 } else { 6118 MDI_PHCI_SET_USER_ENABLE(ph); 6119 } 6120 break; 6121 case DRIVER_DISABLE: 6122 if (op == MDI_DISABLE_OP) { 6123 MDI_PHCI_SET_DRV_DISABLE(ph); 6124 } else { 6125 MDI_PHCI_SET_DRV_ENABLE(ph); 6126 } 6127 break; 6128 case DRIVER_DISABLE_TRANSIENT: 6129 if (op == MDI_DISABLE_OP) { 6130 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6131 } else { 6132 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6133 } 6134 break; 6135 default: 6136 MDI_PHCI_UNLOCK(ph); 6137 MDI_DEBUG(1, (CE_NOTE, NULL, 6138 "!i_mdi_pi_enable_disable:" 6139 " Invalid flag argument= %d\n", flags)); 6140 } 6141 6142 /* 6143 * Phci has been disabled. Now try to enable/disable 6144 * path info's to each client. 6145 */ 6146 pip = ph->ph_path_head; 6147 while (pip != NULL) { 6148 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6149 } 6150 MDI_PHCI_UNLOCK(ph); 6151 } else { 6152 6153 /* 6154 * Disable a specific client. 6155 */ 6156 ct = i_devi_get_client(cdip); 6157 if (ct == NULL) { 6158 MDI_DEBUG(1, (CE_NOTE, NULL, 6159 "!i_mdi_pi_enable_disable:" 6160 " failed. ct = NULL operation = %d\n", op)); 6161 return (MDI_FAILURE); 6162 } 6163 6164 MDI_CLIENT_LOCK(ct); 6165 pip = ct->ct_path_head; 6166 found_it = 0; 6167 while (pip != NULL) { 6168 MDI_PI_LOCK(pip); 6169 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6170 if (MDI_PI(pip)->pi_phci == ph) { 6171 MDI_PI_UNLOCK(pip); 6172 found_it = 1; 6173 break; 6174 } 6175 MDI_PI_UNLOCK(pip); 6176 pip = next; 6177 } 6178 6179 6180 MDI_CLIENT_UNLOCK(ct); 6181 if (found_it == 0) { 6182 MDI_DEBUG(1, (CE_NOTE, NULL, 6183 "!i_mdi_pi_enable_disable:" 6184 " failed. Could not find corresponding pip\n")); 6185 return (MDI_FAILURE); 6186 } 6187 6188 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6189 } 6190 6191 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6192 "Op %d Returning success pdip = %p cdip = %p\n", 6193 op, (void *)pdip, (void *)cdip)); 6194 return (MDI_SUCCESS); 6195 } 6196 6197 /* 6198 * Ensure phci powered up 6199 */ 6200 static void 6201 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6202 { 6203 dev_info_t *ph_dip; 6204 6205 ASSERT(pip != NULL); 6206 ASSERT(MDI_PI_LOCKED(pip)); 6207 6208 if (MDI_PI(pip)->pi_pm_held) { 6209 return; 6210 } 6211 6212 ph_dip = mdi_pi_get_phci(pip); 6213 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6214 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6215 if (ph_dip == NULL) { 6216 return; 6217 } 6218 6219 MDI_PI_UNLOCK(pip); 6220 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6221 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6222 6223 pm_hold_power(ph_dip); 6224 6225 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6226 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6227 MDI_PI_LOCK(pip); 6228 6229 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6230 if (DEVI(ph_dip)->devi_pm_info) 6231 MDI_PI(pip)->pi_pm_held = 1; 6232 } 6233 6234 /* 6235 * Allow phci powered down 6236 */ 6237 static void 6238 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6239 { 6240 dev_info_t *ph_dip = NULL; 6241 6242 ASSERT(pip != NULL); 6243 ASSERT(MDI_PI_LOCKED(pip)); 6244 6245 if (MDI_PI(pip)->pi_pm_held == 0) { 6246 return; 6247 } 6248 6249 ph_dip = mdi_pi_get_phci(pip); 6250 ASSERT(ph_dip != NULL); 6251 6252 MDI_PI_UNLOCK(pip); 6253 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6254 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6255 6256 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6257 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6258 pm_rele_power(ph_dip); 6259 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6260 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6261 6262 MDI_PI_LOCK(pip); 6263 MDI_PI(pip)->pi_pm_held = 0; 6264 } 6265 6266 static void 6267 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6268 { 6269 ASSERT(MDI_CLIENT_LOCKED(ct)); 6270 6271 ct->ct_power_cnt += incr; 6272 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6273 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6274 ct->ct_power_cnt, incr)); 6275 ASSERT(ct->ct_power_cnt >= 0); 6276 } 6277 6278 static void 6279 i_mdi_rele_all_phci(mdi_client_t *ct) 6280 { 6281 mdi_pathinfo_t *pip; 6282 6283 ASSERT(MDI_CLIENT_LOCKED(ct)); 6284 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6285 while (pip != NULL) { 6286 mdi_hold_path(pip); 6287 MDI_PI_LOCK(pip); 6288 i_mdi_pm_rele_pip(pip); 6289 MDI_PI_UNLOCK(pip); 6290 mdi_rele_path(pip); 6291 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6292 } 6293 } 6294 6295 static void 6296 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6297 { 6298 ASSERT(MDI_CLIENT_LOCKED(ct)); 6299 6300 if (i_ddi_devi_attached(ct->ct_dip)) { 6301 ct->ct_power_cnt -= decr; 6302 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6303 "ct_power_cnt = %d decr = %d\n", 6304 (void *)ct, ct->ct_power_cnt, decr)); 6305 } 6306 6307 ASSERT(ct->ct_power_cnt >= 0); 6308 if (ct->ct_power_cnt == 0) { 6309 i_mdi_rele_all_phci(ct); 6310 return; 6311 } 6312 } 6313 6314 static void 6315 i_mdi_pm_reset_client(mdi_client_t *ct) 6316 { 6317 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6318 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6319 ASSERT(MDI_CLIENT_LOCKED(ct)); 6320 ct->ct_power_cnt = 0; 6321 i_mdi_rele_all_phci(ct); 6322 ct->ct_powercnt_config = 0; 6323 ct->ct_powercnt_unconfig = 0; 6324 ct->ct_powercnt_reset = 1; 6325 } 6326 6327 static int 6328 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6329 { 6330 int ret; 6331 dev_info_t *ph_dip; 6332 6333 MDI_PI_LOCK(pip); 6334 i_mdi_pm_hold_pip(pip); 6335 6336 ph_dip = mdi_pi_get_phci(pip); 6337 MDI_PI_UNLOCK(pip); 6338 6339 /* bring all components of phci to full power */ 6340 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6341 "pm_powerup for %s%d %p\n", ddi_driver_name(ph_dip), 6342 ddi_get_instance(ph_dip), (void *)pip)); 6343 6344 ret = pm_powerup(ph_dip); 6345 6346 if (ret == DDI_FAILURE) { 6347 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6348 "pm_powerup FAILED for %s%d %p\n", 6349 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6350 (void *)pip)); 6351 6352 MDI_PI_LOCK(pip); 6353 i_mdi_pm_rele_pip(pip); 6354 MDI_PI_UNLOCK(pip); 6355 return (MDI_FAILURE); 6356 } 6357 6358 return (MDI_SUCCESS); 6359 } 6360 6361 static int 6362 i_mdi_power_all_phci(mdi_client_t *ct) 6363 { 6364 mdi_pathinfo_t *pip; 6365 int succeeded = 0; 6366 6367 ASSERT(MDI_CLIENT_LOCKED(ct)); 6368 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6369 while (pip != NULL) { 6370 /* 6371 * Don't power if MDI_PATHINFO_STATE_FAULT 6372 * or MDI_PATHINFO_STATE_OFFLINE. 6373 */ 6374 if (MDI_PI_IS_INIT(pip) || 6375 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6376 mdi_hold_path(pip); 6377 MDI_CLIENT_UNLOCK(ct); 6378 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6379 succeeded = 1; 6380 6381 ASSERT(ct == MDI_PI(pip)->pi_client); 6382 MDI_CLIENT_LOCK(ct); 6383 mdi_rele_path(pip); 6384 } 6385 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6386 } 6387 6388 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6389 } 6390 6391 /* 6392 * mdi_bus_power(): 6393 * 1. Place the phci(s) into powered up state so that 6394 * client can do power management 6395 * 2. Ensure phci powered up as client power managing 6396 * Return Values: 6397 * MDI_SUCCESS 6398 * MDI_FAILURE 6399 */ 6400 int 6401 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6402 void *arg, void *result) 6403 { 6404 int ret = MDI_SUCCESS; 6405 pm_bp_child_pwrchg_t *bpc; 6406 mdi_client_t *ct; 6407 dev_info_t *cdip; 6408 pm_bp_has_changed_t *bphc; 6409 6410 /* 6411 * BUS_POWER_NOINVOL not supported 6412 */ 6413 if (op == BUS_POWER_NOINVOL) 6414 return (MDI_FAILURE); 6415 6416 /* 6417 * ignore other OPs. 6418 * return quickly to save cou cycles on the ct processing 6419 */ 6420 switch (op) { 6421 case BUS_POWER_PRE_NOTIFICATION: 6422 case BUS_POWER_POST_NOTIFICATION: 6423 bpc = (pm_bp_child_pwrchg_t *)arg; 6424 cdip = bpc->bpc_dip; 6425 break; 6426 case BUS_POWER_HAS_CHANGED: 6427 bphc = (pm_bp_has_changed_t *)arg; 6428 cdip = bphc->bphc_dip; 6429 break; 6430 default: 6431 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6432 } 6433 6434 ASSERT(MDI_CLIENT(cdip)); 6435 6436 ct = i_devi_get_client(cdip); 6437 if (ct == NULL) 6438 return (MDI_FAILURE); 6439 6440 /* 6441 * wait till the mdi_pathinfo node state change are processed 6442 */ 6443 MDI_CLIENT_LOCK(ct); 6444 switch (op) { 6445 case BUS_POWER_PRE_NOTIFICATION: 6446 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6447 "BUS_POWER_PRE_NOTIFICATION:" 6448 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6449 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6450 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6451 6452 /* serialize power level change per client */ 6453 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6454 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6455 6456 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6457 6458 if (ct->ct_power_cnt == 0) { 6459 ret = i_mdi_power_all_phci(ct); 6460 } 6461 6462 /* 6463 * if new_level > 0: 6464 * - hold phci(s) 6465 * - power up phci(s) if not already 6466 * ignore power down 6467 */ 6468 if (bpc->bpc_nlevel > 0) { 6469 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6470 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6471 "mdi_bus_power i_mdi_pm_hold_client\n")); 6472 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6473 } 6474 } 6475 break; 6476 case BUS_POWER_POST_NOTIFICATION: 6477 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6478 "BUS_POWER_POST_NOTIFICATION:" 6479 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6480 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6481 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6482 *(int *)result)); 6483 6484 if (*(int *)result == DDI_SUCCESS) { 6485 if (bpc->bpc_nlevel > 0) { 6486 MDI_CLIENT_SET_POWER_UP(ct); 6487 } else { 6488 MDI_CLIENT_SET_POWER_DOWN(ct); 6489 } 6490 } 6491 6492 /* release the hold we did in pre-notification */ 6493 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6494 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6495 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6496 "mdi_bus_power i_mdi_pm_rele_client\n")); 6497 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6498 } 6499 6500 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6501 /* another thread might started attaching */ 6502 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6503 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6504 "mdi_bus_power i_mdi_pm_rele_client\n")); 6505 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6506 /* detaching has been taken care in pm_post_unconfig */ 6507 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6508 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6509 "mdi_bus_power i_mdi_pm_reset_client\n")); 6510 i_mdi_pm_reset_client(ct); 6511 } 6512 } 6513 6514 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6515 cv_broadcast(&ct->ct_powerchange_cv); 6516 6517 break; 6518 6519 /* need to do more */ 6520 case BUS_POWER_HAS_CHANGED: 6521 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6522 "BUS_POWER_HAS_CHANGED:" 6523 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6524 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6525 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6526 6527 if (bphc->bphc_nlevel > 0 && 6528 bphc->bphc_nlevel > bphc->bphc_olevel) { 6529 if (ct->ct_power_cnt == 0) { 6530 ret = i_mdi_power_all_phci(ct); 6531 } 6532 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6533 "mdi_bus_power i_mdi_pm_hold_client\n")); 6534 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6535 } 6536 6537 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6538 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6539 "mdi_bus_power i_mdi_pm_rele_client\n")); 6540 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6541 } 6542 break; 6543 } 6544 6545 MDI_CLIENT_UNLOCK(ct); 6546 return (ret); 6547 } 6548 6549 static int 6550 i_mdi_pm_pre_config_one(dev_info_t *child) 6551 { 6552 int ret = MDI_SUCCESS; 6553 mdi_client_t *ct; 6554 6555 ct = i_devi_get_client(child); 6556 if (ct == NULL) 6557 return (MDI_FAILURE); 6558 6559 MDI_CLIENT_LOCK(ct); 6560 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6561 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6562 6563 if (!MDI_CLIENT_IS_FAILED(ct)) { 6564 MDI_CLIENT_UNLOCK(ct); 6565 MDI_DEBUG(4, (CE_NOTE, child, 6566 "i_mdi_pm_pre_config_one already configured\n")); 6567 return (MDI_SUCCESS); 6568 } 6569 6570 if (ct->ct_powercnt_config) { 6571 MDI_CLIENT_UNLOCK(ct); 6572 MDI_DEBUG(4, (CE_NOTE, child, 6573 "i_mdi_pm_pre_config_one ALREADY held\n")); 6574 return (MDI_SUCCESS); 6575 } 6576 6577 if (ct->ct_power_cnt == 0) { 6578 ret = i_mdi_power_all_phci(ct); 6579 } 6580 MDI_DEBUG(4, (CE_NOTE, child, 6581 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6582 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6583 ct->ct_powercnt_config = 1; 6584 ct->ct_powercnt_reset = 0; 6585 MDI_CLIENT_UNLOCK(ct); 6586 return (ret); 6587 } 6588 6589 static int 6590 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6591 { 6592 int ret = MDI_SUCCESS; 6593 dev_info_t *cdip; 6594 int circ; 6595 6596 ASSERT(MDI_VHCI(vdip)); 6597 6598 /* ndi_devi_config_one */ 6599 if (child) { 6600 ASSERT(DEVI_BUSY_OWNED(vdip)); 6601 return (i_mdi_pm_pre_config_one(child)); 6602 } 6603 6604 /* devi_config_common */ 6605 ndi_devi_enter(vdip, &circ); 6606 cdip = ddi_get_child(vdip); 6607 while (cdip) { 6608 dev_info_t *next = ddi_get_next_sibling(cdip); 6609 6610 ret = i_mdi_pm_pre_config_one(cdip); 6611 if (ret != MDI_SUCCESS) 6612 break; 6613 cdip = next; 6614 } 6615 ndi_devi_exit(vdip, circ); 6616 return (ret); 6617 } 6618 6619 static int 6620 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6621 { 6622 int ret = MDI_SUCCESS; 6623 mdi_client_t *ct; 6624 6625 ct = i_devi_get_client(child); 6626 if (ct == NULL) 6627 return (MDI_FAILURE); 6628 6629 MDI_CLIENT_LOCK(ct); 6630 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6631 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6632 6633 if (!i_ddi_devi_attached(ct->ct_dip)) { 6634 MDI_DEBUG(4, (CE_NOTE, child, 6635 "i_mdi_pm_pre_unconfig node detached already\n")); 6636 MDI_CLIENT_UNLOCK(ct); 6637 return (MDI_SUCCESS); 6638 } 6639 6640 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6641 (flags & NDI_AUTODETACH)) { 6642 MDI_DEBUG(4, (CE_NOTE, child, 6643 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6644 MDI_CLIENT_UNLOCK(ct); 6645 return (MDI_FAILURE); 6646 } 6647 6648 if (ct->ct_powercnt_unconfig) { 6649 MDI_DEBUG(4, (CE_NOTE, child, 6650 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6651 MDI_CLIENT_UNLOCK(ct); 6652 *held = 1; 6653 return (MDI_SUCCESS); 6654 } 6655 6656 if (ct->ct_power_cnt == 0) { 6657 ret = i_mdi_power_all_phci(ct); 6658 } 6659 MDI_DEBUG(4, (CE_NOTE, child, 6660 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6661 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6662 ct->ct_powercnt_unconfig = 1; 6663 ct->ct_powercnt_reset = 0; 6664 MDI_CLIENT_UNLOCK(ct); 6665 if (ret == MDI_SUCCESS) 6666 *held = 1; 6667 return (ret); 6668 } 6669 6670 static int 6671 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6672 int flags) 6673 { 6674 int ret = MDI_SUCCESS; 6675 dev_info_t *cdip; 6676 int circ; 6677 6678 ASSERT(MDI_VHCI(vdip)); 6679 *held = 0; 6680 6681 /* ndi_devi_unconfig_one */ 6682 if (child) { 6683 ASSERT(DEVI_BUSY_OWNED(vdip)); 6684 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6685 } 6686 6687 /* devi_unconfig_common */ 6688 ndi_devi_enter(vdip, &circ); 6689 cdip = ddi_get_child(vdip); 6690 while (cdip) { 6691 dev_info_t *next = ddi_get_next_sibling(cdip); 6692 6693 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6694 cdip = next; 6695 } 6696 ndi_devi_exit(vdip, circ); 6697 6698 if (*held) 6699 ret = MDI_SUCCESS; 6700 6701 return (ret); 6702 } 6703 6704 static void 6705 i_mdi_pm_post_config_one(dev_info_t *child) 6706 { 6707 mdi_client_t *ct; 6708 6709 ct = i_devi_get_client(child); 6710 if (ct == NULL) 6711 return; 6712 6713 MDI_CLIENT_LOCK(ct); 6714 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6715 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6716 6717 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6718 MDI_DEBUG(4, (CE_NOTE, child, 6719 "i_mdi_pm_post_config_one NOT configured\n")); 6720 MDI_CLIENT_UNLOCK(ct); 6721 return; 6722 } 6723 6724 /* client has not been updated */ 6725 if (MDI_CLIENT_IS_FAILED(ct)) { 6726 MDI_DEBUG(4, (CE_NOTE, child, 6727 "i_mdi_pm_post_config_one NOT configured\n")); 6728 MDI_CLIENT_UNLOCK(ct); 6729 return; 6730 } 6731 6732 /* another thread might have powered it down or detached it */ 6733 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6734 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6735 (!i_ddi_devi_attached(ct->ct_dip) && 6736 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6737 MDI_DEBUG(4, (CE_NOTE, child, 6738 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6739 i_mdi_pm_reset_client(ct); 6740 } else { 6741 mdi_pathinfo_t *pip, *next; 6742 int valid_path_count = 0; 6743 6744 MDI_DEBUG(4, (CE_NOTE, child, 6745 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6746 pip = ct->ct_path_head; 6747 while (pip != NULL) { 6748 MDI_PI_LOCK(pip); 6749 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6750 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6751 valid_path_count ++; 6752 MDI_PI_UNLOCK(pip); 6753 pip = next; 6754 } 6755 i_mdi_pm_rele_client(ct, valid_path_count); 6756 } 6757 ct->ct_powercnt_config = 0; 6758 MDI_CLIENT_UNLOCK(ct); 6759 } 6760 6761 static void 6762 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6763 { 6764 int circ; 6765 dev_info_t *cdip; 6766 6767 ASSERT(MDI_VHCI(vdip)); 6768 6769 /* ndi_devi_config_one */ 6770 if (child) { 6771 ASSERT(DEVI_BUSY_OWNED(vdip)); 6772 i_mdi_pm_post_config_one(child); 6773 return; 6774 } 6775 6776 /* devi_config_common */ 6777 ndi_devi_enter(vdip, &circ); 6778 cdip = ddi_get_child(vdip); 6779 while (cdip) { 6780 dev_info_t *next = ddi_get_next_sibling(cdip); 6781 6782 i_mdi_pm_post_config_one(cdip); 6783 cdip = next; 6784 } 6785 ndi_devi_exit(vdip, circ); 6786 } 6787 6788 static void 6789 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6790 { 6791 mdi_client_t *ct; 6792 6793 ct = i_devi_get_client(child); 6794 if (ct == NULL) 6795 return; 6796 6797 MDI_CLIENT_LOCK(ct); 6798 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6799 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6800 6801 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6802 MDI_DEBUG(4, (CE_NOTE, child, 6803 "i_mdi_pm_post_unconfig NOT held\n")); 6804 MDI_CLIENT_UNLOCK(ct); 6805 return; 6806 } 6807 6808 /* failure detaching or another thread just attached it */ 6809 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6810 i_ddi_devi_attached(ct->ct_dip)) || 6811 (!i_ddi_devi_attached(ct->ct_dip) && 6812 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6813 MDI_DEBUG(4, (CE_NOTE, child, 6814 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6815 i_mdi_pm_reset_client(ct); 6816 } else { 6817 mdi_pathinfo_t *pip, *next; 6818 int valid_path_count = 0; 6819 6820 MDI_DEBUG(4, (CE_NOTE, child, 6821 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6822 pip = ct->ct_path_head; 6823 while (pip != NULL) { 6824 MDI_PI_LOCK(pip); 6825 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6826 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6827 valid_path_count ++; 6828 MDI_PI_UNLOCK(pip); 6829 pip = next; 6830 } 6831 i_mdi_pm_rele_client(ct, valid_path_count); 6832 ct->ct_powercnt_unconfig = 0; 6833 } 6834 6835 MDI_CLIENT_UNLOCK(ct); 6836 } 6837 6838 static void 6839 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6840 { 6841 int circ; 6842 dev_info_t *cdip; 6843 6844 ASSERT(MDI_VHCI(vdip)); 6845 6846 if (!held) { 6847 MDI_DEBUG(4, (CE_NOTE, vdip, 6848 "i_mdi_pm_post_unconfig held = %d\n", held)); 6849 return; 6850 } 6851 6852 if (child) { 6853 ASSERT(DEVI_BUSY_OWNED(vdip)); 6854 i_mdi_pm_post_unconfig_one(child); 6855 return; 6856 } 6857 6858 ndi_devi_enter(vdip, &circ); 6859 cdip = ddi_get_child(vdip); 6860 while (cdip) { 6861 dev_info_t *next = ddi_get_next_sibling(cdip); 6862 6863 i_mdi_pm_post_unconfig_one(cdip); 6864 cdip = next; 6865 } 6866 ndi_devi_exit(vdip, circ); 6867 } 6868 6869 int 6870 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6871 { 6872 int circ, ret = MDI_SUCCESS; 6873 dev_info_t *client_dip = NULL; 6874 mdi_client_t *ct; 6875 6876 /* 6877 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6878 * Power up pHCI for the named client device. 6879 * Note: Before the client is enumerated under vhci by phci, 6880 * client_dip can be NULL. Then proceed to power up all the 6881 * pHCIs. 6882 */ 6883 if (devnm != NULL) { 6884 ndi_devi_enter(vdip, &circ); 6885 client_dip = ndi_devi_findchild(vdip, devnm); 6886 } 6887 6888 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6889 op, devnm ? devnm : "NULL", (void *)client_dip)); 6890 6891 switch (op) { 6892 case MDI_PM_PRE_CONFIG: 6893 ret = i_mdi_pm_pre_config(vdip, client_dip); 6894 break; 6895 6896 case MDI_PM_PRE_UNCONFIG: 6897 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6898 flags); 6899 break; 6900 6901 case MDI_PM_POST_CONFIG: 6902 i_mdi_pm_post_config(vdip, client_dip); 6903 break; 6904 6905 case MDI_PM_POST_UNCONFIG: 6906 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6907 break; 6908 6909 case MDI_PM_HOLD_POWER: 6910 case MDI_PM_RELE_POWER: 6911 ASSERT(args); 6912 6913 client_dip = (dev_info_t *)args; 6914 ASSERT(MDI_CLIENT(client_dip)); 6915 6916 ct = i_devi_get_client(client_dip); 6917 MDI_CLIENT_LOCK(ct); 6918 6919 if (op == MDI_PM_HOLD_POWER) { 6920 if (ct->ct_power_cnt == 0) { 6921 (void) i_mdi_power_all_phci(ct); 6922 MDI_DEBUG(4, (CE_NOTE, client_dip, 6923 "mdi_power i_mdi_pm_hold_client\n")); 6924 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6925 } 6926 } else { 6927 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6928 MDI_DEBUG(4, (CE_NOTE, client_dip, 6929 "mdi_power i_mdi_pm_rele_client\n")); 6930 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6931 } else { 6932 MDI_DEBUG(4, (CE_NOTE, client_dip, 6933 "mdi_power i_mdi_pm_reset_client\n")); 6934 i_mdi_pm_reset_client(ct); 6935 } 6936 } 6937 6938 MDI_CLIENT_UNLOCK(ct); 6939 break; 6940 6941 default: 6942 break; 6943 } 6944 6945 if (devnm) 6946 ndi_devi_exit(vdip, circ); 6947 6948 return (ret); 6949 } 6950 6951 int 6952 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6953 { 6954 mdi_vhci_t *vhci; 6955 6956 if (!MDI_VHCI(dip)) 6957 return (MDI_FAILURE); 6958 6959 if (mdi_class) { 6960 vhci = DEVI(dip)->devi_mdi_xhci; 6961 ASSERT(vhci); 6962 *mdi_class = vhci->vh_class; 6963 } 6964 6965 return (MDI_SUCCESS); 6966 } 6967 6968 int 6969 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6970 { 6971 mdi_phci_t *phci; 6972 6973 if (!MDI_PHCI(dip)) 6974 return (MDI_FAILURE); 6975 6976 if (mdi_class) { 6977 phci = DEVI(dip)->devi_mdi_xhci; 6978 ASSERT(phci); 6979 *mdi_class = phci->ph_vhci->vh_class; 6980 } 6981 6982 return (MDI_SUCCESS); 6983 } 6984 6985 int 6986 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6987 { 6988 mdi_client_t *client; 6989 6990 if (!MDI_CLIENT(dip)) 6991 return (MDI_FAILURE); 6992 6993 if (mdi_class) { 6994 client = DEVI(dip)->devi_mdi_client; 6995 ASSERT(client); 6996 *mdi_class = client->ct_vhci->vh_class; 6997 } 6998 6999 return (MDI_SUCCESS); 7000 } 7001 7002 void * 7003 mdi_client_get_vhci_private(dev_info_t *dip) 7004 { 7005 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7006 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7007 mdi_client_t *ct; 7008 ct = i_devi_get_client(dip); 7009 return (ct->ct_vprivate); 7010 } 7011 return (NULL); 7012 } 7013 7014 void 7015 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7016 { 7017 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7018 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7019 mdi_client_t *ct; 7020 ct = i_devi_get_client(dip); 7021 ct->ct_vprivate = data; 7022 } 7023 } 7024 /* 7025 * mdi_pi_get_vhci_private(): 7026 * Get the vhci private information associated with the 7027 * mdi_pathinfo node 7028 */ 7029 void * 7030 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7031 { 7032 caddr_t vprivate = NULL; 7033 if (pip) { 7034 vprivate = MDI_PI(pip)->pi_vprivate; 7035 } 7036 return (vprivate); 7037 } 7038 7039 /* 7040 * mdi_pi_set_vhci_private(): 7041 * Set the vhci private information in the mdi_pathinfo node 7042 */ 7043 void 7044 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7045 { 7046 if (pip) { 7047 MDI_PI(pip)->pi_vprivate = priv; 7048 } 7049 } 7050 7051 /* 7052 * mdi_phci_get_vhci_private(): 7053 * Get the vhci private information associated with the 7054 * mdi_phci node 7055 */ 7056 void * 7057 mdi_phci_get_vhci_private(dev_info_t *dip) 7058 { 7059 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7060 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7061 mdi_phci_t *ph; 7062 ph = i_devi_get_phci(dip); 7063 return (ph->ph_vprivate); 7064 } 7065 return (NULL); 7066 } 7067 7068 /* 7069 * mdi_phci_set_vhci_private(): 7070 * Set the vhci private information in the mdi_phci node 7071 */ 7072 void 7073 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7074 { 7075 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7076 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7077 mdi_phci_t *ph; 7078 ph = i_devi_get_phci(dip); 7079 ph->ph_vprivate = priv; 7080 } 7081 } 7082 7083 /* 7084 * List of vhci class names: 7085 * A vhci class name must be in this list only if the corresponding vhci 7086 * driver intends to use the mdi provided bus config implementation 7087 * (i.e., mdi_vhci_bus_config()). 7088 */ 7089 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7090 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7091 7092 /* 7093 * During boot time, the on-disk vhci cache for every vhci class is read 7094 * in the form of an nvlist and stored here. 7095 */ 7096 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7097 7098 /* nvpair names in vhci cache nvlist */ 7099 #define MDI_VHCI_CACHE_VERSION 1 7100 #define MDI_NVPNAME_VERSION "version" 7101 #define MDI_NVPNAME_PHCIS "phcis" 7102 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7103 7104 /* 7105 * Given vhci class name, return its on-disk vhci cache filename. 7106 * Memory for the returned filename which includes the full path is allocated 7107 * by this function. 7108 */ 7109 static char * 7110 vhclass2vhcache_filename(char *vhclass) 7111 { 7112 char *filename; 7113 int len; 7114 static char *fmt = "/etc/devices/mdi_%s_cache"; 7115 7116 /* 7117 * fmt contains the on-disk vhci cache file name format; 7118 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7119 */ 7120 7121 /* the -1 below is to account for "%s" in the format string */ 7122 len = strlen(fmt) + strlen(vhclass) - 1; 7123 filename = kmem_alloc(len, KM_SLEEP); 7124 (void) snprintf(filename, len, fmt, vhclass); 7125 ASSERT(len == (strlen(filename) + 1)); 7126 return (filename); 7127 } 7128 7129 /* 7130 * initialize the vhci cache related data structures and read the on-disk 7131 * vhci cached data into memory. 7132 */ 7133 static void 7134 setup_vhci_cache(mdi_vhci_t *vh) 7135 { 7136 mdi_vhci_config_t *vhc; 7137 mdi_vhci_cache_t *vhcache; 7138 int i; 7139 nvlist_t *nvl = NULL; 7140 7141 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7142 vh->vh_config = vhc; 7143 vhcache = &vhc->vhc_vhcache; 7144 7145 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7146 7147 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7148 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7149 7150 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7151 7152 /* 7153 * Create string hash; same as mod_hash_create_strhash() except that 7154 * we use NULL key destructor. 7155 */ 7156 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7157 mdi_bus_config_cache_hash_size, 7158 mod_hash_null_keydtor, mod_hash_null_valdtor, 7159 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7160 7161 /* 7162 * The on-disk vhci cache is read during booting prior to the 7163 * lights-out period by mdi_read_devices_files(). 7164 */ 7165 for (i = 0; i < N_VHCI_CLASSES; i++) { 7166 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7167 nvl = vhcache_nvl[i]; 7168 vhcache_nvl[i] = NULL; 7169 break; 7170 } 7171 } 7172 7173 /* 7174 * this is to cover the case of some one manually causing unloading 7175 * (or detaching) and reloading (or attaching) of a vhci driver. 7176 */ 7177 if (nvl == NULL && modrootloaded) 7178 nvl = read_on_disk_vhci_cache(vh->vh_class); 7179 7180 if (nvl != NULL) { 7181 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7182 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7183 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7184 else { 7185 cmn_err(CE_WARN, 7186 "%s: data file corrupted, will recreate\n", 7187 vhc->vhc_vhcache_filename); 7188 } 7189 rw_exit(&vhcache->vhcache_lock); 7190 nvlist_free(nvl); 7191 } 7192 7193 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7194 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7195 7196 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7197 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7198 } 7199 7200 /* 7201 * free all vhci cache related resources 7202 */ 7203 static int 7204 destroy_vhci_cache(mdi_vhci_t *vh) 7205 { 7206 mdi_vhci_config_t *vhc = vh->vh_config; 7207 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7208 mdi_vhcache_phci_t *cphci, *cphci_next; 7209 mdi_vhcache_client_t *cct, *cct_next; 7210 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7211 7212 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7213 return (MDI_FAILURE); 7214 7215 kmem_free(vhc->vhc_vhcache_filename, 7216 strlen(vhc->vhc_vhcache_filename) + 1); 7217 7218 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7219 7220 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7221 cphci = cphci_next) { 7222 cphci_next = cphci->cphci_next; 7223 free_vhcache_phci(cphci); 7224 } 7225 7226 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7227 cct_next = cct->cct_next; 7228 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7229 cpi_next = cpi->cpi_next; 7230 free_vhcache_pathinfo(cpi); 7231 } 7232 free_vhcache_client(cct); 7233 } 7234 7235 rw_destroy(&vhcache->vhcache_lock); 7236 7237 mutex_destroy(&vhc->vhc_lock); 7238 cv_destroy(&vhc->vhc_cv); 7239 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7240 return (MDI_SUCCESS); 7241 } 7242 7243 /* 7244 * Stop all vhci cache related async threads and free their resources. 7245 */ 7246 static int 7247 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7248 { 7249 mdi_async_client_config_t *acc, *acc_next; 7250 7251 mutex_enter(&vhc->vhc_lock); 7252 vhc->vhc_flags |= MDI_VHC_EXIT; 7253 ASSERT(vhc->vhc_acc_thrcount >= 0); 7254 cv_broadcast(&vhc->vhc_cv); 7255 7256 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7257 vhc->vhc_acc_thrcount != 0) { 7258 mutex_exit(&vhc->vhc_lock); 7259 delay(1); 7260 mutex_enter(&vhc->vhc_lock); 7261 } 7262 7263 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7264 7265 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7266 acc_next = acc->acc_next; 7267 free_async_client_config(acc); 7268 } 7269 vhc->vhc_acc_list_head = NULL; 7270 vhc->vhc_acc_list_tail = NULL; 7271 vhc->vhc_acc_count = 0; 7272 7273 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7274 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7275 mutex_exit(&vhc->vhc_lock); 7276 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7277 vhcache_dirty(vhc); 7278 return (MDI_FAILURE); 7279 } 7280 } else 7281 mutex_exit(&vhc->vhc_lock); 7282 7283 if (callb_delete(vhc->vhc_cbid) != 0) 7284 return (MDI_FAILURE); 7285 7286 return (MDI_SUCCESS); 7287 } 7288 7289 /* 7290 * Stop vhci cache flush thread 7291 */ 7292 /* ARGSUSED */ 7293 static boolean_t 7294 stop_vhcache_flush_thread(void *arg, int code) 7295 { 7296 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7297 7298 mutex_enter(&vhc->vhc_lock); 7299 vhc->vhc_flags |= MDI_VHC_EXIT; 7300 cv_broadcast(&vhc->vhc_cv); 7301 7302 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7303 mutex_exit(&vhc->vhc_lock); 7304 delay(1); 7305 mutex_enter(&vhc->vhc_lock); 7306 } 7307 7308 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7309 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7310 mutex_exit(&vhc->vhc_lock); 7311 (void) flush_vhcache(vhc, 1); 7312 } else 7313 mutex_exit(&vhc->vhc_lock); 7314 7315 return (B_TRUE); 7316 } 7317 7318 /* 7319 * Enqueue the vhcache phci (cphci) at the tail of the list 7320 */ 7321 static void 7322 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7323 { 7324 cphci->cphci_next = NULL; 7325 if (vhcache->vhcache_phci_head == NULL) 7326 vhcache->vhcache_phci_head = cphci; 7327 else 7328 vhcache->vhcache_phci_tail->cphci_next = cphci; 7329 vhcache->vhcache_phci_tail = cphci; 7330 } 7331 7332 /* 7333 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7334 */ 7335 static void 7336 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7337 mdi_vhcache_pathinfo_t *cpi) 7338 { 7339 cpi->cpi_next = NULL; 7340 if (cct->cct_cpi_head == NULL) 7341 cct->cct_cpi_head = cpi; 7342 else 7343 cct->cct_cpi_tail->cpi_next = cpi; 7344 cct->cct_cpi_tail = cpi; 7345 } 7346 7347 /* 7348 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7349 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7350 * flag set come at the beginning of the list. All cpis which have this 7351 * flag set come at the end of the list. 7352 */ 7353 static void 7354 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7355 mdi_vhcache_pathinfo_t *newcpi) 7356 { 7357 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7358 7359 if (cct->cct_cpi_head == NULL || 7360 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7361 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7362 else { 7363 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7364 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7365 prev_cpi = cpi, cpi = cpi->cpi_next) 7366 ; 7367 7368 if (prev_cpi == NULL) 7369 cct->cct_cpi_head = newcpi; 7370 else 7371 prev_cpi->cpi_next = newcpi; 7372 7373 newcpi->cpi_next = cpi; 7374 7375 if (cpi == NULL) 7376 cct->cct_cpi_tail = newcpi; 7377 } 7378 } 7379 7380 /* 7381 * Enqueue the vhcache client (cct) at the tail of the list 7382 */ 7383 static void 7384 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7385 mdi_vhcache_client_t *cct) 7386 { 7387 cct->cct_next = NULL; 7388 if (vhcache->vhcache_client_head == NULL) 7389 vhcache->vhcache_client_head = cct; 7390 else 7391 vhcache->vhcache_client_tail->cct_next = cct; 7392 vhcache->vhcache_client_tail = cct; 7393 } 7394 7395 static void 7396 free_string_array(char **str, int nelem) 7397 { 7398 int i; 7399 7400 if (str) { 7401 for (i = 0; i < nelem; i++) { 7402 if (str[i]) 7403 kmem_free(str[i], strlen(str[i]) + 1); 7404 } 7405 kmem_free(str, sizeof (char *) * nelem); 7406 } 7407 } 7408 7409 static void 7410 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7411 { 7412 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7413 kmem_free(cphci, sizeof (*cphci)); 7414 } 7415 7416 static void 7417 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7418 { 7419 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7420 kmem_free(cpi, sizeof (*cpi)); 7421 } 7422 7423 static void 7424 free_vhcache_client(mdi_vhcache_client_t *cct) 7425 { 7426 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7427 kmem_free(cct, sizeof (*cct)); 7428 } 7429 7430 static char * 7431 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7432 { 7433 char *name_addr; 7434 int len; 7435 7436 len = strlen(ct_name) + strlen(ct_addr) + 2; 7437 name_addr = kmem_alloc(len, KM_SLEEP); 7438 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7439 7440 if (ret_len) 7441 *ret_len = len; 7442 return (name_addr); 7443 } 7444 7445 /* 7446 * Copy the contents of paddrnvl to vhci cache. 7447 * paddrnvl nvlist contains path information for a vhci client. 7448 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7449 */ 7450 static void 7451 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7452 mdi_vhcache_client_t *cct) 7453 { 7454 nvpair_t *nvp = NULL; 7455 mdi_vhcache_pathinfo_t *cpi; 7456 uint_t nelem; 7457 uint32_t *val; 7458 7459 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7460 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7461 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7462 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7463 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7464 ASSERT(nelem == 2); 7465 cpi->cpi_cphci = cphci_list[val[0]]; 7466 cpi->cpi_flags = val[1]; 7467 enqueue_tail_vhcache_pathinfo(cct, cpi); 7468 } 7469 } 7470 7471 /* 7472 * Copy the contents of caddrmapnvl to vhci cache. 7473 * caddrmapnvl nvlist contains vhci client address to phci client address 7474 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7475 * this nvlist. 7476 */ 7477 static void 7478 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7479 mdi_vhcache_phci_t *cphci_list[]) 7480 { 7481 nvpair_t *nvp = NULL; 7482 nvlist_t *paddrnvl; 7483 mdi_vhcache_client_t *cct; 7484 7485 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7486 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7487 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7488 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7489 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7490 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7491 /* the client must contain at least one path */ 7492 ASSERT(cct->cct_cpi_head != NULL); 7493 7494 enqueue_vhcache_client(vhcache, cct); 7495 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7496 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7497 } 7498 } 7499 7500 /* 7501 * Copy the contents of the main nvlist to vhci cache. 7502 * 7503 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7504 * The nvlist contains the mappings between the vhci client addresses and 7505 * their corresponding phci client addresses. 7506 * 7507 * The structure of the nvlist is as follows: 7508 * 7509 * Main nvlist: 7510 * NAME TYPE DATA 7511 * version int32 version number 7512 * phcis string array array of phci paths 7513 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7514 * 7515 * structure of c2paddrs_nvl: 7516 * NAME TYPE DATA 7517 * caddr1 nvlist_t paddrs_nvl1 7518 * caddr2 nvlist_t paddrs_nvl2 7519 * ... 7520 * where caddr1, caddr2, ... are vhci client name and addresses in the 7521 * form of "<clientname>@<clientaddress>". 7522 * (for example: "ssd@2000002037cd9f72"); 7523 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7524 * 7525 * structure of paddrs_nvl: 7526 * NAME TYPE DATA 7527 * pi_addr1 uint32_array (phci-id, cpi_flags) 7528 * pi_addr2 uint32_array (phci-id, cpi_flags) 7529 * ... 7530 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7531 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7532 * phci-ids are integers that identify PHCIs to which the 7533 * the bus specific address belongs to. These integers are used as an index 7534 * into to the phcis string array in the main nvlist to get the PHCI path. 7535 */ 7536 static int 7537 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7538 { 7539 char **phcis, **phci_namep; 7540 uint_t nphcis; 7541 mdi_vhcache_phci_t *cphci, **cphci_list; 7542 nvlist_t *caddrmapnvl; 7543 int32_t ver; 7544 int i; 7545 size_t cphci_list_size; 7546 7547 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7548 7549 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7550 ver != MDI_VHCI_CACHE_VERSION) 7551 return (MDI_FAILURE); 7552 7553 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7554 &nphcis) != 0) 7555 return (MDI_SUCCESS); 7556 7557 ASSERT(nphcis > 0); 7558 7559 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7560 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7561 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7562 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7563 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7564 enqueue_vhcache_phci(vhcache, cphci); 7565 cphci_list[i] = cphci; 7566 } 7567 7568 ASSERT(vhcache->vhcache_phci_head != NULL); 7569 7570 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7571 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7572 7573 kmem_free(cphci_list, cphci_list_size); 7574 return (MDI_SUCCESS); 7575 } 7576 7577 /* 7578 * Build paddrnvl for the specified client using the information in the 7579 * vhci cache and add it to the caddrmapnnvl. 7580 * Returns 0 on success, errno on failure. 7581 */ 7582 static int 7583 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7584 nvlist_t *caddrmapnvl) 7585 { 7586 mdi_vhcache_pathinfo_t *cpi; 7587 nvlist_t *nvl; 7588 int err; 7589 uint32_t val[2]; 7590 7591 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7592 7593 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7594 return (err); 7595 7596 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7597 val[0] = cpi->cpi_cphci->cphci_id; 7598 val[1] = cpi->cpi_flags; 7599 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7600 != 0) 7601 goto out; 7602 } 7603 7604 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7605 out: 7606 nvlist_free(nvl); 7607 return (err); 7608 } 7609 7610 /* 7611 * Build caddrmapnvl using the information in the vhci cache 7612 * and add it to the mainnvl. 7613 * Returns 0 on success, errno on failure. 7614 */ 7615 static int 7616 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7617 { 7618 mdi_vhcache_client_t *cct; 7619 nvlist_t *nvl; 7620 int err; 7621 7622 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7623 7624 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7625 return (err); 7626 7627 for (cct = vhcache->vhcache_client_head; cct != NULL; 7628 cct = cct->cct_next) { 7629 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7630 goto out; 7631 } 7632 7633 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7634 out: 7635 nvlist_free(nvl); 7636 return (err); 7637 } 7638 7639 /* 7640 * Build nvlist using the information in the vhci cache. 7641 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7642 * Returns nvl on success, NULL on failure. 7643 */ 7644 static nvlist_t * 7645 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7646 { 7647 mdi_vhcache_phci_t *cphci; 7648 uint_t phci_count; 7649 char **phcis; 7650 nvlist_t *nvl; 7651 int err, i; 7652 7653 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7654 nvl = NULL; 7655 goto out; 7656 } 7657 7658 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7659 MDI_VHCI_CACHE_VERSION)) != 0) 7660 goto out; 7661 7662 rw_enter(&vhcache->vhcache_lock, RW_READER); 7663 if (vhcache->vhcache_phci_head == NULL) { 7664 rw_exit(&vhcache->vhcache_lock); 7665 return (nvl); 7666 } 7667 7668 phci_count = 0; 7669 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7670 cphci = cphci->cphci_next) 7671 cphci->cphci_id = phci_count++; 7672 7673 /* build phci pathname list */ 7674 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7675 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7676 cphci = cphci->cphci_next, i++) 7677 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7678 7679 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7680 phci_count); 7681 free_string_array(phcis, phci_count); 7682 7683 if (err == 0 && 7684 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7685 rw_exit(&vhcache->vhcache_lock); 7686 return (nvl); 7687 } 7688 7689 rw_exit(&vhcache->vhcache_lock); 7690 out: 7691 if (nvl) 7692 nvlist_free(nvl); 7693 return (NULL); 7694 } 7695 7696 /* 7697 * Lookup vhcache phci structure for the specified phci path. 7698 */ 7699 static mdi_vhcache_phci_t * 7700 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7701 { 7702 mdi_vhcache_phci_t *cphci; 7703 7704 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7705 7706 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7707 cphci = cphci->cphci_next) { 7708 if (strcmp(cphci->cphci_path, phci_path) == 0) 7709 return (cphci); 7710 } 7711 7712 return (NULL); 7713 } 7714 7715 /* 7716 * Lookup vhcache phci structure for the specified phci. 7717 */ 7718 static mdi_vhcache_phci_t * 7719 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7720 { 7721 mdi_vhcache_phci_t *cphci; 7722 7723 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7724 7725 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7726 cphci = cphci->cphci_next) { 7727 if (cphci->cphci_phci == ph) 7728 return (cphci); 7729 } 7730 7731 return (NULL); 7732 } 7733 7734 /* 7735 * Add the specified phci to the vhci cache if not already present. 7736 */ 7737 static void 7738 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7739 { 7740 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7741 mdi_vhcache_phci_t *cphci; 7742 char *pathname; 7743 int cache_updated; 7744 7745 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7746 7747 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7748 (void) ddi_pathname(ph->ph_dip, pathname); 7749 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7750 != NULL) { 7751 cphci->cphci_phci = ph; 7752 cache_updated = 0; 7753 } else { 7754 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7755 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7756 cphci->cphci_phci = ph; 7757 enqueue_vhcache_phci(vhcache, cphci); 7758 cache_updated = 1; 7759 } 7760 7761 rw_exit(&vhcache->vhcache_lock); 7762 7763 /* 7764 * Since a new phci has been added, reset 7765 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7766 * during next vhcache_discover_paths(). 7767 */ 7768 mutex_enter(&vhc->vhc_lock); 7769 vhc->vhc_path_discovery_cutoff_time = 0; 7770 mutex_exit(&vhc->vhc_lock); 7771 7772 kmem_free(pathname, MAXPATHLEN); 7773 if (cache_updated) 7774 vhcache_dirty(vhc); 7775 } 7776 7777 /* 7778 * Remove the reference to the specified phci from the vhci cache. 7779 */ 7780 static void 7781 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7782 { 7783 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7784 mdi_vhcache_phci_t *cphci; 7785 7786 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7787 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7788 /* do not remove the actual mdi_vhcache_phci structure */ 7789 cphci->cphci_phci = NULL; 7790 } 7791 rw_exit(&vhcache->vhcache_lock); 7792 } 7793 7794 static void 7795 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7796 mdi_vhcache_lookup_token_t *src) 7797 { 7798 if (src == NULL) { 7799 dst->lt_cct = NULL; 7800 dst->lt_cct_lookup_time = 0; 7801 } else { 7802 dst->lt_cct = src->lt_cct; 7803 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7804 } 7805 } 7806 7807 /* 7808 * Look up vhcache client for the specified client. 7809 */ 7810 static mdi_vhcache_client_t * 7811 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7812 mdi_vhcache_lookup_token_t *token) 7813 { 7814 mod_hash_val_t hv; 7815 char *name_addr; 7816 int len; 7817 7818 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7819 7820 /* 7821 * If no vhcache clean occurred since the last lookup, we can 7822 * simply return the cct from the last lookup operation. 7823 * It works because ccts are never freed except during the vhcache 7824 * cleanup operation. 7825 */ 7826 if (token != NULL && 7827 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7828 return (token->lt_cct); 7829 7830 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7831 if (mod_hash_find(vhcache->vhcache_client_hash, 7832 (mod_hash_key_t)name_addr, &hv) == 0) { 7833 if (token) { 7834 token->lt_cct = (mdi_vhcache_client_t *)hv; 7835 token->lt_cct_lookup_time = lbolt64; 7836 } 7837 } else { 7838 if (token) { 7839 token->lt_cct = NULL; 7840 token->lt_cct_lookup_time = 0; 7841 } 7842 hv = NULL; 7843 } 7844 kmem_free(name_addr, len); 7845 return ((mdi_vhcache_client_t *)hv); 7846 } 7847 7848 /* 7849 * Add the specified path to the vhci cache if not already present. 7850 * Also add the vhcache client for the client corresponding to this path 7851 * if it doesn't already exist. 7852 */ 7853 static void 7854 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7855 { 7856 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7857 mdi_vhcache_client_t *cct; 7858 mdi_vhcache_pathinfo_t *cpi; 7859 mdi_phci_t *ph = pip->pi_phci; 7860 mdi_client_t *ct = pip->pi_client; 7861 int cache_updated = 0; 7862 7863 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7864 7865 /* if vhcache client for this pip doesn't already exist, add it */ 7866 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7867 NULL)) == NULL) { 7868 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7869 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7870 ct->ct_guid, NULL); 7871 enqueue_vhcache_client(vhcache, cct); 7872 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7873 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7874 cache_updated = 1; 7875 } 7876 7877 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7878 if (cpi->cpi_cphci->cphci_phci == ph && 7879 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7880 cpi->cpi_pip = pip; 7881 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7882 cpi->cpi_flags &= 7883 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7884 sort_vhcache_paths(cct); 7885 cache_updated = 1; 7886 } 7887 break; 7888 } 7889 } 7890 7891 if (cpi == NULL) { 7892 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7893 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7894 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7895 ASSERT(cpi->cpi_cphci != NULL); 7896 cpi->cpi_pip = pip; 7897 enqueue_vhcache_pathinfo(cct, cpi); 7898 cache_updated = 1; 7899 } 7900 7901 rw_exit(&vhcache->vhcache_lock); 7902 7903 if (cache_updated) 7904 vhcache_dirty(vhc); 7905 } 7906 7907 /* 7908 * Remove the reference to the specified path from the vhci cache. 7909 */ 7910 static void 7911 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7912 { 7913 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7914 mdi_client_t *ct = pip->pi_client; 7915 mdi_vhcache_client_t *cct; 7916 mdi_vhcache_pathinfo_t *cpi; 7917 7918 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7919 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7920 NULL)) != NULL) { 7921 for (cpi = cct->cct_cpi_head; cpi != NULL; 7922 cpi = cpi->cpi_next) { 7923 if (cpi->cpi_pip == pip) { 7924 cpi->cpi_pip = NULL; 7925 break; 7926 } 7927 } 7928 } 7929 rw_exit(&vhcache->vhcache_lock); 7930 } 7931 7932 /* 7933 * Flush the vhci cache to disk. 7934 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7935 */ 7936 static int 7937 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7938 { 7939 nvlist_t *nvl; 7940 int err; 7941 int rv; 7942 7943 /* 7944 * It is possible that the system may shutdown before 7945 * i_ddi_io_initialized (during stmsboot for example). To allow for 7946 * flushing the cache in this case do not check for 7947 * i_ddi_io_initialized when force flag is set. 7948 */ 7949 if (force_flag == 0 && !i_ddi_io_initialized()) 7950 return (MDI_FAILURE); 7951 7952 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7953 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7954 nvlist_free(nvl); 7955 } else 7956 err = EFAULT; 7957 7958 rv = MDI_SUCCESS; 7959 mutex_enter(&vhc->vhc_lock); 7960 if (err != 0) { 7961 if (err == EROFS) { 7962 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7963 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7964 MDI_VHC_VHCACHE_DIRTY); 7965 } else { 7966 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7967 cmn_err(CE_CONT, "%s: update failed\n", 7968 vhc->vhc_vhcache_filename); 7969 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7970 } 7971 rv = MDI_FAILURE; 7972 } 7973 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7974 cmn_err(CE_CONT, 7975 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7976 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7977 } 7978 mutex_exit(&vhc->vhc_lock); 7979 7980 return (rv); 7981 } 7982 7983 /* 7984 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7985 * Exits itself if left idle for the idle timeout period. 7986 */ 7987 static void 7988 vhcache_flush_thread(void *arg) 7989 { 7990 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7991 clock_t idle_time, quit_at_ticks; 7992 callb_cpr_t cprinfo; 7993 7994 /* number of seconds to sleep idle before exiting */ 7995 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7996 7997 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7998 "mdi_vhcache_flush"); 7999 mutex_enter(&vhc->vhc_lock); 8000 for (; ; ) { 8001 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8002 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8003 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8004 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8005 (void) cv_timedwait(&vhc->vhc_cv, 8006 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8007 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8008 } else { 8009 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8010 mutex_exit(&vhc->vhc_lock); 8011 8012 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8013 vhcache_dirty(vhc); 8014 8015 mutex_enter(&vhc->vhc_lock); 8016 } 8017 } 8018 8019 quit_at_ticks = ddi_get_lbolt() + idle_time; 8020 8021 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8022 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8023 ddi_get_lbolt() < quit_at_ticks) { 8024 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8025 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8026 quit_at_ticks); 8027 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8028 } 8029 8030 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8031 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8032 goto out; 8033 } 8034 8035 out: 8036 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8037 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8038 CALLB_CPR_EXIT(&cprinfo); 8039 } 8040 8041 /* 8042 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8043 */ 8044 static void 8045 vhcache_dirty(mdi_vhci_config_t *vhc) 8046 { 8047 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8048 int create_thread; 8049 8050 rw_enter(&vhcache->vhcache_lock, RW_READER); 8051 /* do not flush cache until the cache is fully built */ 8052 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8053 rw_exit(&vhcache->vhcache_lock); 8054 return; 8055 } 8056 rw_exit(&vhcache->vhcache_lock); 8057 8058 mutex_enter(&vhc->vhc_lock); 8059 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8060 mutex_exit(&vhc->vhc_lock); 8061 return; 8062 } 8063 8064 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8065 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8066 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8067 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8068 cv_broadcast(&vhc->vhc_cv); 8069 create_thread = 0; 8070 } else { 8071 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8072 create_thread = 1; 8073 } 8074 mutex_exit(&vhc->vhc_lock); 8075 8076 if (create_thread) 8077 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8078 0, &p0, TS_RUN, minclsyspri); 8079 } 8080 8081 /* 8082 * phci bus config structure - one for for each phci bus config operation that 8083 * we initiate on behalf of a vhci. 8084 */ 8085 typedef struct mdi_phci_bus_config_s { 8086 char *phbc_phci_path; 8087 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8088 struct mdi_phci_bus_config_s *phbc_next; 8089 } mdi_phci_bus_config_t; 8090 8091 /* vhci bus config structure - one for each vhci bus config operation */ 8092 typedef struct mdi_vhci_bus_config_s { 8093 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8094 major_t vhbc_op_major; /* bus config op major */ 8095 uint_t vhbc_op_flags; /* bus config op flags */ 8096 kmutex_t vhbc_lock; 8097 kcondvar_t vhbc_cv; 8098 int vhbc_thr_count; 8099 } mdi_vhci_bus_config_t; 8100 8101 /* 8102 * bus config the specified phci 8103 */ 8104 static void 8105 bus_config_phci(void *arg) 8106 { 8107 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8108 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8109 dev_info_t *ph_dip; 8110 8111 /* 8112 * first configure all path components upto phci and then configure 8113 * the phci children. 8114 */ 8115 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8116 != NULL) { 8117 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8118 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8119 (void) ndi_devi_config_driver(ph_dip, 8120 vhbc->vhbc_op_flags, 8121 vhbc->vhbc_op_major); 8122 } else 8123 (void) ndi_devi_config(ph_dip, 8124 vhbc->vhbc_op_flags); 8125 8126 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8127 ndi_rele_devi(ph_dip); 8128 } 8129 8130 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8131 kmem_free(phbc, sizeof (*phbc)); 8132 8133 mutex_enter(&vhbc->vhbc_lock); 8134 vhbc->vhbc_thr_count--; 8135 if (vhbc->vhbc_thr_count == 0) 8136 cv_broadcast(&vhbc->vhbc_cv); 8137 mutex_exit(&vhbc->vhbc_lock); 8138 } 8139 8140 /* 8141 * Bus config all phcis associated with the vhci in parallel. 8142 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8143 */ 8144 static void 8145 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8146 ddi_bus_config_op_t op, major_t maj) 8147 { 8148 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8149 mdi_vhci_bus_config_t *vhbc; 8150 mdi_vhcache_phci_t *cphci; 8151 8152 rw_enter(&vhcache->vhcache_lock, RW_READER); 8153 if (vhcache->vhcache_phci_head == NULL) { 8154 rw_exit(&vhcache->vhcache_lock); 8155 return; 8156 } 8157 8158 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8159 8160 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8161 cphci = cphci->cphci_next) { 8162 /* skip phcis that haven't attached before root is available */ 8163 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8164 continue; 8165 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8166 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8167 KM_SLEEP); 8168 phbc->phbc_vhbusconfig = vhbc; 8169 phbc->phbc_next = phbc_head; 8170 phbc_head = phbc; 8171 vhbc->vhbc_thr_count++; 8172 } 8173 rw_exit(&vhcache->vhcache_lock); 8174 8175 vhbc->vhbc_op = op; 8176 vhbc->vhbc_op_major = maj; 8177 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8178 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8179 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8180 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8181 8182 /* now create threads to initiate bus config on all phcis in parallel */ 8183 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8184 phbc_next = phbc->phbc_next; 8185 if (mdi_mtc_off) 8186 bus_config_phci((void *)phbc); 8187 else 8188 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8189 0, &p0, TS_RUN, minclsyspri); 8190 } 8191 8192 mutex_enter(&vhbc->vhbc_lock); 8193 /* wait until all threads exit */ 8194 while (vhbc->vhbc_thr_count > 0) 8195 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8196 mutex_exit(&vhbc->vhbc_lock); 8197 8198 mutex_destroy(&vhbc->vhbc_lock); 8199 cv_destroy(&vhbc->vhbc_cv); 8200 kmem_free(vhbc, sizeof (*vhbc)); 8201 } 8202 8203 /* 8204 * Single threaded version of bus_config_all_phcis() 8205 */ 8206 static void 8207 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8208 ddi_bus_config_op_t op, major_t maj) 8209 { 8210 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8211 8212 single_threaded_vhconfig_enter(vhc); 8213 bus_config_all_phcis(vhcache, flags, op, maj); 8214 single_threaded_vhconfig_exit(vhc); 8215 } 8216 8217 /* 8218 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8219 * The path includes the child component in addition to the phci path. 8220 */ 8221 static int 8222 bus_config_one_phci_child(char *path) 8223 { 8224 dev_info_t *ph_dip, *child; 8225 char *devnm; 8226 int rv = MDI_FAILURE; 8227 8228 /* extract the child component of the phci */ 8229 devnm = strrchr(path, '/'); 8230 *devnm++ = '\0'; 8231 8232 /* 8233 * first configure all path components upto phci and then 8234 * configure the phci child. 8235 */ 8236 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8237 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8238 NDI_SUCCESS) { 8239 /* 8240 * release the hold that ndi_devi_config_one() placed 8241 */ 8242 ndi_rele_devi(child); 8243 rv = MDI_SUCCESS; 8244 } 8245 8246 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8247 ndi_rele_devi(ph_dip); 8248 } 8249 8250 devnm--; 8251 *devnm = '/'; 8252 return (rv); 8253 } 8254 8255 /* 8256 * Build a list of phci client paths for the specified vhci client. 8257 * The list includes only those phci client paths which aren't configured yet. 8258 */ 8259 static mdi_phys_path_t * 8260 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8261 { 8262 mdi_vhcache_pathinfo_t *cpi; 8263 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8264 int config_path, len; 8265 8266 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8267 /* 8268 * include only those paths that aren't configured. 8269 */ 8270 config_path = 0; 8271 if (cpi->cpi_pip == NULL) 8272 config_path = 1; 8273 else { 8274 MDI_PI_LOCK(cpi->cpi_pip); 8275 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8276 config_path = 1; 8277 MDI_PI_UNLOCK(cpi->cpi_pip); 8278 } 8279 8280 if (config_path) { 8281 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8282 len = strlen(cpi->cpi_cphci->cphci_path) + 8283 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8284 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8285 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8286 cpi->cpi_cphci->cphci_path, ct_name, 8287 cpi->cpi_addr); 8288 pp->phys_path_next = NULL; 8289 8290 if (pp_head == NULL) 8291 pp_head = pp; 8292 else 8293 pp_tail->phys_path_next = pp; 8294 pp_tail = pp; 8295 } 8296 } 8297 8298 return (pp_head); 8299 } 8300 8301 /* 8302 * Free the memory allocated for phci client path list. 8303 */ 8304 static void 8305 free_phclient_path_list(mdi_phys_path_t *pp_head) 8306 { 8307 mdi_phys_path_t *pp, *pp_next; 8308 8309 for (pp = pp_head; pp != NULL; pp = pp_next) { 8310 pp_next = pp->phys_path_next; 8311 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8312 kmem_free(pp, sizeof (*pp)); 8313 } 8314 } 8315 8316 /* 8317 * Allocated async client structure and initialize with the specified values. 8318 */ 8319 static mdi_async_client_config_t * 8320 alloc_async_client_config(char *ct_name, char *ct_addr, 8321 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8322 { 8323 mdi_async_client_config_t *acc; 8324 8325 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8326 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8327 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8328 acc->acc_phclient_path_list_head = pp_head; 8329 init_vhcache_lookup_token(&acc->acc_token, tok); 8330 acc->acc_next = NULL; 8331 return (acc); 8332 } 8333 8334 /* 8335 * Free the memory allocated for the async client structure and their members. 8336 */ 8337 static void 8338 free_async_client_config(mdi_async_client_config_t *acc) 8339 { 8340 if (acc->acc_phclient_path_list_head) 8341 free_phclient_path_list(acc->acc_phclient_path_list_head); 8342 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8343 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8344 kmem_free(acc, sizeof (*acc)); 8345 } 8346 8347 /* 8348 * Sort vhcache pathinfos (cpis) of the specified client. 8349 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8350 * flag set come at the beginning of the list. All cpis which have this 8351 * flag set come at the end of the list. 8352 */ 8353 static void 8354 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8355 { 8356 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8357 8358 cpi_head = cct->cct_cpi_head; 8359 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8360 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8361 cpi_next = cpi->cpi_next; 8362 enqueue_vhcache_pathinfo(cct, cpi); 8363 } 8364 } 8365 8366 /* 8367 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8368 * every vhcache pathinfo of the specified client. If not adjust the flag 8369 * setting appropriately. 8370 * 8371 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8372 * on-disk vhci cache. So every time this flag is updated the cache must be 8373 * flushed. 8374 */ 8375 static void 8376 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8377 mdi_vhcache_lookup_token_t *tok) 8378 { 8379 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8380 mdi_vhcache_client_t *cct; 8381 mdi_vhcache_pathinfo_t *cpi; 8382 8383 rw_enter(&vhcache->vhcache_lock, RW_READER); 8384 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8385 == NULL) { 8386 rw_exit(&vhcache->vhcache_lock); 8387 return; 8388 } 8389 8390 /* 8391 * to avoid unnecessary on-disk cache updates, first check if an 8392 * update is really needed. If no update is needed simply return. 8393 */ 8394 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8395 if ((cpi->cpi_pip != NULL && 8396 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8397 (cpi->cpi_pip == NULL && 8398 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8399 break; 8400 } 8401 } 8402 if (cpi == NULL) { 8403 rw_exit(&vhcache->vhcache_lock); 8404 return; 8405 } 8406 8407 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8408 rw_exit(&vhcache->vhcache_lock); 8409 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8410 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8411 tok)) == NULL) { 8412 rw_exit(&vhcache->vhcache_lock); 8413 return; 8414 } 8415 } 8416 8417 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8418 if (cpi->cpi_pip != NULL) 8419 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8420 else 8421 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8422 } 8423 sort_vhcache_paths(cct); 8424 8425 rw_exit(&vhcache->vhcache_lock); 8426 vhcache_dirty(vhc); 8427 } 8428 8429 /* 8430 * Configure all specified paths of the client. 8431 */ 8432 static void 8433 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8434 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8435 { 8436 mdi_phys_path_t *pp; 8437 8438 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8439 (void) bus_config_one_phci_child(pp->phys_path); 8440 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8441 } 8442 8443 /* 8444 * Dequeue elements from vhci async client config list and bus configure 8445 * their corresponding phci clients. 8446 */ 8447 static void 8448 config_client_paths_thread(void *arg) 8449 { 8450 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8451 mdi_async_client_config_t *acc; 8452 clock_t quit_at_ticks; 8453 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8454 callb_cpr_t cprinfo; 8455 8456 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8457 "mdi_config_client_paths"); 8458 8459 for (; ; ) { 8460 quit_at_ticks = ddi_get_lbolt() + idle_time; 8461 8462 mutex_enter(&vhc->vhc_lock); 8463 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8464 vhc->vhc_acc_list_head == NULL && 8465 ddi_get_lbolt() < quit_at_ticks) { 8466 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8467 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8468 quit_at_ticks); 8469 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8470 } 8471 8472 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8473 vhc->vhc_acc_list_head == NULL) 8474 goto out; 8475 8476 acc = vhc->vhc_acc_list_head; 8477 vhc->vhc_acc_list_head = acc->acc_next; 8478 if (vhc->vhc_acc_list_head == NULL) 8479 vhc->vhc_acc_list_tail = NULL; 8480 vhc->vhc_acc_count--; 8481 mutex_exit(&vhc->vhc_lock); 8482 8483 config_client_paths_sync(vhc, acc->acc_ct_name, 8484 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8485 &acc->acc_token); 8486 8487 free_async_client_config(acc); 8488 } 8489 8490 out: 8491 vhc->vhc_acc_thrcount--; 8492 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8493 CALLB_CPR_EXIT(&cprinfo); 8494 } 8495 8496 /* 8497 * Arrange for all the phci client paths (pp_head) for the specified client 8498 * to be bus configured asynchronously by a thread. 8499 */ 8500 static void 8501 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8502 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8503 { 8504 mdi_async_client_config_t *acc, *newacc; 8505 int create_thread; 8506 8507 if (pp_head == NULL) 8508 return; 8509 8510 if (mdi_mtc_off) { 8511 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8512 free_phclient_path_list(pp_head); 8513 return; 8514 } 8515 8516 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8517 ASSERT(newacc); 8518 8519 mutex_enter(&vhc->vhc_lock); 8520 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8521 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8522 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8523 free_async_client_config(newacc); 8524 mutex_exit(&vhc->vhc_lock); 8525 return; 8526 } 8527 } 8528 8529 if (vhc->vhc_acc_list_head == NULL) 8530 vhc->vhc_acc_list_head = newacc; 8531 else 8532 vhc->vhc_acc_list_tail->acc_next = newacc; 8533 vhc->vhc_acc_list_tail = newacc; 8534 vhc->vhc_acc_count++; 8535 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8536 cv_broadcast(&vhc->vhc_cv); 8537 create_thread = 0; 8538 } else { 8539 vhc->vhc_acc_thrcount++; 8540 create_thread = 1; 8541 } 8542 mutex_exit(&vhc->vhc_lock); 8543 8544 if (create_thread) 8545 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8546 0, &p0, TS_RUN, minclsyspri); 8547 } 8548 8549 /* 8550 * Return number of online paths for the specified client. 8551 */ 8552 static int 8553 nonline_paths(mdi_vhcache_client_t *cct) 8554 { 8555 mdi_vhcache_pathinfo_t *cpi; 8556 int online_count = 0; 8557 8558 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8559 if (cpi->cpi_pip != NULL) { 8560 MDI_PI_LOCK(cpi->cpi_pip); 8561 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8562 online_count++; 8563 MDI_PI_UNLOCK(cpi->cpi_pip); 8564 } 8565 } 8566 8567 return (online_count); 8568 } 8569 8570 /* 8571 * Bus configure all paths for the specified vhci client. 8572 * If at least one path for the client is already online, the remaining paths 8573 * will be configured asynchronously. Otherwise, it synchronously configures 8574 * the paths until at least one path is online and then rest of the paths 8575 * will be configured asynchronously. 8576 */ 8577 static void 8578 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8579 { 8580 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8581 mdi_phys_path_t *pp_head, *pp; 8582 mdi_vhcache_client_t *cct; 8583 mdi_vhcache_lookup_token_t tok; 8584 8585 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8586 8587 init_vhcache_lookup_token(&tok, NULL); 8588 8589 if (ct_name == NULL || ct_addr == NULL || 8590 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8591 == NULL || 8592 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8593 rw_exit(&vhcache->vhcache_lock); 8594 return; 8595 } 8596 8597 /* if at least one path is online, configure the rest asynchronously */ 8598 if (nonline_paths(cct) > 0) { 8599 rw_exit(&vhcache->vhcache_lock); 8600 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8601 return; 8602 } 8603 8604 rw_exit(&vhcache->vhcache_lock); 8605 8606 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8607 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8608 rw_enter(&vhcache->vhcache_lock, RW_READER); 8609 8610 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8611 ct_addr, &tok)) == NULL) { 8612 rw_exit(&vhcache->vhcache_lock); 8613 goto out; 8614 } 8615 8616 if (nonline_paths(cct) > 0 && 8617 pp->phys_path_next != NULL) { 8618 rw_exit(&vhcache->vhcache_lock); 8619 config_client_paths_async(vhc, ct_name, ct_addr, 8620 pp->phys_path_next, &tok); 8621 pp->phys_path_next = NULL; 8622 goto out; 8623 } 8624 8625 rw_exit(&vhcache->vhcache_lock); 8626 } 8627 } 8628 8629 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8630 out: 8631 free_phclient_path_list(pp_head); 8632 } 8633 8634 static void 8635 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8636 { 8637 mutex_enter(&vhc->vhc_lock); 8638 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8639 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8640 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8641 mutex_exit(&vhc->vhc_lock); 8642 } 8643 8644 static void 8645 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8646 { 8647 mutex_enter(&vhc->vhc_lock); 8648 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8649 cv_broadcast(&vhc->vhc_cv); 8650 mutex_exit(&vhc->vhc_lock); 8651 } 8652 8653 typedef struct mdi_phci_driver_info { 8654 char *phdriver_name; /* name of the phci driver */ 8655 8656 /* set to non zero if the phci driver supports root device */ 8657 int phdriver_root_support; 8658 } mdi_phci_driver_info_t; 8659 8660 /* 8661 * vhci class and root support capability of a phci driver can be 8662 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8663 * phci driver.conf file. The built-in tables below contain this information 8664 * for those phci drivers whose driver.conf files don't yet contain this info. 8665 * 8666 * All phci drivers expect iscsi have root device support. 8667 */ 8668 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8669 { "fp", 1 }, 8670 { "iscsi", 0 }, 8671 { "ibsrp", 1 } 8672 }; 8673 8674 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8675 8676 static void * 8677 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8678 { 8679 void *new_ptr; 8680 8681 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8682 if (old_ptr) { 8683 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8684 kmem_free(old_ptr, old_size); 8685 } 8686 return (new_ptr); 8687 } 8688 8689 static void 8690 add_to_phci_list(char ***driver_list, int **root_support_list, 8691 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8692 { 8693 ASSERT(*cur_elements <= *max_elements); 8694 if (*cur_elements == *max_elements) { 8695 *max_elements += 10; 8696 *driver_list = mdi_realloc(*driver_list, 8697 sizeof (char *) * (*cur_elements), 8698 sizeof (char *) * (*max_elements)); 8699 *root_support_list = mdi_realloc(*root_support_list, 8700 sizeof (int) * (*cur_elements), 8701 sizeof (int) * (*max_elements)); 8702 } 8703 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8704 (*root_support_list)[*cur_elements] = root_support; 8705 (*cur_elements)++; 8706 } 8707 8708 static void 8709 get_phci_driver_list(char *vhci_class, char ***driver_list, 8710 int **root_support_list, int *cur_elements, int *max_elements) 8711 { 8712 mdi_phci_driver_info_t *st_driver_list, *p; 8713 int st_ndrivers, root_support, i, j, driver_conf_count; 8714 major_t m; 8715 struct devnames *dnp; 8716 ddi_prop_t *propp; 8717 8718 *driver_list = NULL; 8719 *root_support_list = NULL; 8720 *cur_elements = 0; 8721 *max_elements = 0; 8722 8723 /* add the phci drivers derived from the phci driver.conf files */ 8724 for (m = 0; m < devcnt; m++) { 8725 dnp = &devnamesp[m]; 8726 8727 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8728 LOCK_DEV_OPS(&dnp->dn_lock); 8729 if (dnp->dn_global_prop_ptr != NULL && 8730 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8731 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8732 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8733 strcmp(propp->prop_val, vhci_class) == 0) { 8734 8735 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8736 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8737 &dnp->dn_global_prop_ptr->prop_list) 8738 == NULL) ? 1 : 0; 8739 8740 add_to_phci_list(driver_list, root_support_list, 8741 cur_elements, max_elements, dnp->dn_name, 8742 root_support); 8743 8744 UNLOCK_DEV_OPS(&dnp->dn_lock); 8745 } else 8746 UNLOCK_DEV_OPS(&dnp->dn_lock); 8747 } 8748 } 8749 8750 driver_conf_count = *cur_elements; 8751 8752 /* add the phci drivers specified in the built-in tables */ 8753 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8754 st_driver_list = scsi_phci_driver_list; 8755 st_ndrivers = sizeof (scsi_phci_driver_list) / 8756 sizeof (mdi_phci_driver_info_t); 8757 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8758 st_driver_list = ib_phci_driver_list; 8759 st_ndrivers = sizeof (ib_phci_driver_list) / 8760 sizeof (mdi_phci_driver_info_t); 8761 } else { 8762 st_driver_list = NULL; 8763 st_ndrivers = 0; 8764 } 8765 8766 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8767 /* add this phci driver if not already added before */ 8768 for (j = 0; j < driver_conf_count; j++) { 8769 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8770 break; 8771 } 8772 if (j == driver_conf_count) { 8773 add_to_phci_list(driver_list, root_support_list, 8774 cur_elements, max_elements, p->phdriver_name, 8775 p->phdriver_root_support); 8776 } 8777 } 8778 } 8779 8780 /* 8781 * Attach the phci driver instances associated with the specified vhci class. 8782 * If root is mounted attach all phci driver instances. 8783 * If root is not mounted, attach the instances of only those phci 8784 * drivers that have the root support. 8785 */ 8786 static void 8787 attach_phci_drivers(char *vhci_class) 8788 { 8789 char **driver_list, **p; 8790 int *root_support_list; 8791 int cur_elements, max_elements, i; 8792 major_t m; 8793 8794 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8795 &cur_elements, &max_elements); 8796 8797 for (i = 0; i < cur_elements; i++) { 8798 if (modrootloaded || root_support_list[i]) { 8799 m = ddi_name_to_major(driver_list[i]); 8800 if (m != DDI_MAJOR_T_NONE && 8801 ddi_hold_installed_driver(m)) 8802 ddi_rele_driver(m); 8803 } 8804 } 8805 8806 if (driver_list) { 8807 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8808 kmem_free(*p, strlen(*p) + 1); 8809 kmem_free(driver_list, sizeof (char *) * max_elements); 8810 kmem_free(root_support_list, sizeof (int) * max_elements); 8811 } 8812 } 8813 8814 /* 8815 * Build vhci cache: 8816 * 8817 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8818 * the phci driver instances. During this process the cache gets built. 8819 * 8820 * Cache is built fully if the root is mounted. 8821 * If the root is not mounted, phci drivers that do not have root support 8822 * are not attached. As a result the cache is built partially. The entries 8823 * in the cache reflect only those phci drivers that have root support. 8824 */ 8825 static int 8826 build_vhci_cache(mdi_vhci_t *vh) 8827 { 8828 mdi_vhci_config_t *vhc = vh->vh_config; 8829 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8830 8831 single_threaded_vhconfig_enter(vhc); 8832 8833 rw_enter(&vhcache->vhcache_lock, RW_READER); 8834 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8835 rw_exit(&vhcache->vhcache_lock); 8836 single_threaded_vhconfig_exit(vhc); 8837 return (0); 8838 } 8839 rw_exit(&vhcache->vhcache_lock); 8840 8841 attach_phci_drivers(vh->vh_class); 8842 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8843 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8844 8845 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8846 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8847 rw_exit(&vhcache->vhcache_lock); 8848 8849 single_threaded_vhconfig_exit(vhc); 8850 vhcache_dirty(vhc); 8851 return (1); 8852 } 8853 8854 /* 8855 * Determine if discovery of paths is needed. 8856 */ 8857 static int 8858 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8859 { 8860 int rv = 1; 8861 8862 mutex_enter(&vhc->vhc_lock); 8863 if (i_ddi_io_initialized() == 0) { 8864 if (vhc->vhc_path_discovery_boot > 0) { 8865 vhc->vhc_path_discovery_boot--; 8866 goto out; 8867 } 8868 } else { 8869 if (vhc->vhc_path_discovery_postboot > 0) { 8870 vhc->vhc_path_discovery_postboot--; 8871 goto out; 8872 } 8873 } 8874 8875 /* 8876 * Do full path discovery at most once per mdi_path_discovery_interval. 8877 * This is to avoid a series of full path discoveries when opening 8878 * stale /dev/[r]dsk links. 8879 */ 8880 if (mdi_path_discovery_interval != -1 && 8881 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8882 goto out; 8883 8884 rv = 0; 8885 out: 8886 mutex_exit(&vhc->vhc_lock); 8887 return (rv); 8888 } 8889 8890 /* 8891 * Discover all paths: 8892 * 8893 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8894 * driver instances. During this process all paths will be discovered. 8895 */ 8896 static int 8897 vhcache_discover_paths(mdi_vhci_t *vh) 8898 { 8899 mdi_vhci_config_t *vhc = vh->vh_config; 8900 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8901 int rv = 0; 8902 8903 single_threaded_vhconfig_enter(vhc); 8904 8905 if (vhcache_do_discovery(vhc)) { 8906 attach_phci_drivers(vh->vh_class); 8907 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8908 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 8909 8910 mutex_enter(&vhc->vhc_lock); 8911 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8912 mdi_path_discovery_interval * TICKS_PER_SECOND; 8913 mutex_exit(&vhc->vhc_lock); 8914 rv = 1; 8915 } 8916 8917 single_threaded_vhconfig_exit(vhc); 8918 return (rv); 8919 } 8920 8921 /* 8922 * Generic vhci bus config implementation: 8923 * 8924 * Parameters 8925 * vdip vhci dip 8926 * flags bus config flags 8927 * op bus config operation 8928 * The remaining parameters are bus config operation specific 8929 * 8930 * for BUS_CONFIG_ONE 8931 * arg pointer to name@addr 8932 * child upon successful return from this function, *child will be 8933 * set to the configured and held devinfo child node of vdip. 8934 * ct_addr pointer to client address (i.e. GUID) 8935 * 8936 * for BUS_CONFIG_DRIVER 8937 * arg major number of the driver 8938 * child and ct_addr parameters are ignored 8939 * 8940 * for BUS_CONFIG_ALL 8941 * arg, child, and ct_addr parameters are ignored 8942 * 8943 * Note that for the rest of the bus config operations, this function simply 8944 * calls the framework provided default bus config routine. 8945 */ 8946 int 8947 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8948 void *arg, dev_info_t **child, char *ct_addr) 8949 { 8950 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8951 mdi_vhci_config_t *vhc = vh->vh_config; 8952 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8953 int rv = 0; 8954 int params_valid = 0; 8955 char *cp; 8956 8957 /* 8958 * To bus config vhcis we relay operation, possibly using another 8959 * thread, to phcis. The phci driver then interacts with MDI to cause 8960 * vhci child nodes to be enumerated under the vhci node. Adding a 8961 * vhci child requires an ndi_devi_enter of the vhci. Since another 8962 * thread may be adding the child, to avoid deadlock we can't wait 8963 * for the relayed operations to complete if we have already entered 8964 * the vhci node. 8965 */ 8966 if (DEVI_BUSY_OWNED(vdip)) { 8967 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8968 "vhci dip is busy owned %p\n", (void *)vdip)); 8969 goto default_bus_config; 8970 } 8971 8972 rw_enter(&vhcache->vhcache_lock, RW_READER); 8973 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8974 rw_exit(&vhcache->vhcache_lock); 8975 rv = build_vhci_cache(vh); 8976 rw_enter(&vhcache->vhcache_lock, RW_READER); 8977 } 8978 8979 switch (op) { 8980 case BUS_CONFIG_ONE: 8981 if (arg != NULL && ct_addr != NULL) { 8982 /* extract node name */ 8983 cp = (char *)arg; 8984 while (*cp != '\0' && *cp != '@') 8985 cp++; 8986 if (*cp == '@') { 8987 params_valid = 1; 8988 *cp = '\0'; 8989 config_client_paths(vhc, (char *)arg, ct_addr); 8990 /* config_client_paths() releases cache_lock */ 8991 *cp = '@'; 8992 break; 8993 } 8994 } 8995 8996 rw_exit(&vhcache->vhcache_lock); 8997 break; 8998 8999 case BUS_CONFIG_DRIVER: 9000 rw_exit(&vhcache->vhcache_lock); 9001 if (rv == 0) 9002 st_bus_config_all_phcis(vhc, flags, op, 9003 (major_t)(uintptr_t)arg); 9004 break; 9005 9006 case BUS_CONFIG_ALL: 9007 rw_exit(&vhcache->vhcache_lock); 9008 if (rv == 0) 9009 st_bus_config_all_phcis(vhc, flags, op, -1); 9010 break; 9011 9012 default: 9013 rw_exit(&vhcache->vhcache_lock); 9014 break; 9015 } 9016 9017 9018 default_bus_config: 9019 /* 9020 * All requested child nodes are enumerated under the vhci. 9021 * Now configure them. 9022 */ 9023 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9024 NDI_SUCCESS) { 9025 return (MDI_SUCCESS); 9026 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9027 /* discover all paths and try configuring again */ 9028 if (vhcache_discover_paths(vh) && 9029 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9030 NDI_SUCCESS) 9031 return (MDI_SUCCESS); 9032 } 9033 9034 return (MDI_FAILURE); 9035 } 9036 9037 /* 9038 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9039 */ 9040 static nvlist_t * 9041 read_on_disk_vhci_cache(char *vhci_class) 9042 { 9043 nvlist_t *nvl; 9044 int err; 9045 char *filename; 9046 9047 filename = vhclass2vhcache_filename(vhci_class); 9048 9049 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9050 kmem_free(filename, strlen(filename) + 1); 9051 return (nvl); 9052 } else if (err == EIO) 9053 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 9054 else if (err == EINVAL) 9055 cmn_err(CE_WARN, 9056 "%s: data file corrupted, will recreate\n", filename); 9057 9058 kmem_free(filename, strlen(filename) + 1); 9059 return (NULL); 9060 } 9061 9062 /* 9063 * Read on-disk vhci cache into nvlists for all vhci classes. 9064 * Called during booting by i_ddi_read_devices_files(). 9065 */ 9066 void 9067 mdi_read_devices_files(void) 9068 { 9069 int i; 9070 9071 for (i = 0; i < N_VHCI_CLASSES; i++) 9072 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9073 } 9074 9075 /* 9076 * Remove all stale entries from vhci cache. 9077 */ 9078 static void 9079 clean_vhcache(mdi_vhci_config_t *vhc) 9080 { 9081 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9082 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 9083 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 9084 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 9085 9086 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9087 9088 cct_head = vhcache->vhcache_client_head; 9089 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9090 for (cct = cct_head; cct != NULL; cct = cct_next) { 9091 cct_next = cct->cct_next; 9092 9093 cpi_head = cct->cct_cpi_head; 9094 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 9095 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 9096 cpi_next = cpi->cpi_next; 9097 if (cpi->cpi_pip != NULL) { 9098 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 9099 enqueue_tail_vhcache_pathinfo(cct, cpi); 9100 } else 9101 free_vhcache_pathinfo(cpi); 9102 } 9103 9104 if (cct->cct_cpi_head != NULL) 9105 enqueue_vhcache_client(vhcache, cct); 9106 else { 9107 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9108 (mod_hash_key_t)cct->cct_name_addr); 9109 free_vhcache_client(cct); 9110 } 9111 } 9112 9113 cphci_head = vhcache->vhcache_phci_head; 9114 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9115 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 9116 cphci_next = cphci->cphci_next; 9117 if (cphci->cphci_phci != NULL) 9118 enqueue_vhcache_phci(vhcache, cphci); 9119 else 9120 free_vhcache_phci(cphci); 9121 } 9122 9123 vhcache->vhcache_clean_time = lbolt64; 9124 rw_exit(&vhcache->vhcache_lock); 9125 vhcache_dirty(vhc); 9126 } 9127 9128 /* 9129 * Remove all stale entries from vhci cache. 9130 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9131 */ 9132 void 9133 mdi_clean_vhcache(void) 9134 { 9135 mdi_vhci_t *vh; 9136 9137 mutex_enter(&mdi_mutex); 9138 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9139 vh->vh_refcnt++; 9140 mutex_exit(&mdi_mutex); 9141 clean_vhcache(vh->vh_config); 9142 mutex_enter(&mdi_mutex); 9143 vh->vh_refcnt--; 9144 } 9145 mutex_exit(&mdi_mutex); 9146 } 9147 9148 /* 9149 * mdi_vhci_walk_clients(): 9150 * Walker routine to traverse client dev_info nodes 9151 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9152 * below the client, including nexus devices, which we dont want. 9153 * So we just traverse the immediate siblings, starting from 1st client. 9154 */ 9155 void 9156 mdi_vhci_walk_clients(dev_info_t *vdip, 9157 int (*f)(dev_info_t *, void *), void *arg) 9158 { 9159 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9160 dev_info_t *cdip; 9161 mdi_client_t *ct; 9162 9163 MDI_VHCI_CLIENT_LOCK(vh); 9164 cdip = ddi_get_child(vdip); 9165 while (cdip) { 9166 ct = i_devi_get_client(cdip); 9167 MDI_CLIENT_LOCK(ct); 9168 9169 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9170 cdip = ddi_get_next_sibling(cdip); 9171 else 9172 cdip = NULL; 9173 9174 MDI_CLIENT_UNLOCK(ct); 9175 } 9176 MDI_VHCI_CLIENT_UNLOCK(vh); 9177 } 9178 9179 /* 9180 * mdi_vhci_walk_phcis(): 9181 * Walker routine to traverse phci dev_info nodes 9182 */ 9183 void 9184 mdi_vhci_walk_phcis(dev_info_t *vdip, 9185 int (*f)(dev_info_t *, void *), void *arg) 9186 { 9187 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9188 mdi_phci_t *ph, *next; 9189 9190 MDI_VHCI_PHCI_LOCK(vh); 9191 ph = vh->vh_phci_head; 9192 while (ph) { 9193 MDI_PHCI_LOCK(ph); 9194 9195 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9196 next = ph->ph_next; 9197 else 9198 next = NULL; 9199 9200 MDI_PHCI_UNLOCK(ph); 9201 ph = next; 9202 } 9203 MDI_VHCI_PHCI_UNLOCK(vh); 9204 } 9205 9206 9207 /* 9208 * mdi_walk_vhcis(): 9209 * Walker routine to traverse vhci dev_info nodes 9210 */ 9211 void 9212 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9213 { 9214 mdi_vhci_t *vh = NULL; 9215 9216 mutex_enter(&mdi_mutex); 9217 /* 9218 * Scan for already registered vhci 9219 */ 9220 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9221 vh->vh_refcnt++; 9222 mutex_exit(&mdi_mutex); 9223 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9224 mutex_enter(&mdi_mutex); 9225 vh->vh_refcnt--; 9226 break; 9227 } else { 9228 mutex_enter(&mdi_mutex); 9229 vh->vh_refcnt--; 9230 } 9231 } 9232 9233 mutex_exit(&mdi_mutex); 9234 } 9235 9236 /* 9237 * i_mdi_log_sysevent(): 9238 * Logs events for pickup by syseventd 9239 */ 9240 static void 9241 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9242 { 9243 char *path_name; 9244 nvlist_t *attr_list; 9245 9246 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9247 KM_SLEEP) != DDI_SUCCESS) { 9248 goto alloc_failed; 9249 } 9250 9251 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9252 (void) ddi_pathname(dip, path_name); 9253 9254 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9255 ddi_driver_name(dip)) != DDI_SUCCESS) { 9256 goto error; 9257 } 9258 9259 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9260 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9261 goto error; 9262 } 9263 9264 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9265 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9266 goto error; 9267 } 9268 9269 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9270 path_name) != DDI_SUCCESS) { 9271 goto error; 9272 } 9273 9274 if (nvlist_add_string(attr_list, DDI_CLASS, 9275 ph_vh_class) != DDI_SUCCESS) { 9276 goto error; 9277 } 9278 9279 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9280 attr_list, NULL, DDI_SLEEP); 9281 9282 error: 9283 kmem_free(path_name, MAXPATHLEN); 9284 nvlist_free(attr_list); 9285 return; 9286 9287 alloc_failed: 9288 MDI_DEBUG(1, (CE_WARN, dip, 9289 "!i_mdi_log_sysevent: Unable to send sysevent")); 9290 } 9291 9292 char ** 9293 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9294 { 9295 char **driver_list, **ret_driver_list = NULL; 9296 int *root_support_list; 9297 int cur_elements, max_elements; 9298 9299 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9300 &cur_elements, &max_elements); 9301 9302 9303 if (driver_list) { 9304 kmem_free(root_support_list, sizeof (int) * max_elements); 9305 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9306 * max_elements, sizeof (char *) * cur_elements); 9307 } 9308 *ndrivers = cur_elements; 9309 9310 return (ret_driver_list); 9311 9312 } 9313 9314 void 9315 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9316 { 9317 char **p; 9318 int i; 9319 9320 if (driver_list) { 9321 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9322 kmem_free(*p, strlen(*p) + 1); 9323 kmem_free(driver_list, sizeof (char *) * ndrivers); 9324 } 9325 } 9326 9327 /* 9328 * mdi_is_dev_supported(): 9329 * function called by pHCI bus config operation to determine if a 9330 * device should be represented as a child of the vHCI or the 9331 * pHCI. This decision is made by the vHCI, using cinfo idenity 9332 * information passed by the pHCI - specifics of the cinfo 9333 * representation are by agreement between the pHCI and vHCI. 9334 * Return Values: 9335 * MDI_SUCCESS 9336 * MDI_FAILURE 9337 */ 9338 int 9339 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9340 { 9341 mdi_vhci_t *vh; 9342 9343 ASSERT(class && pdip); 9344 9345 /* 9346 * For dev_supported, mdi_phci_register() must have established pdip as 9347 * a pHCI. 9348 * 9349 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9350 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9351 */ 9352 if (!MDI_PHCI(pdip)) 9353 return (MDI_FAILURE); 9354 9355 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9356 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9357 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9358 return (MDI_FAILURE); 9359 } 9360 9361 /* Return vHCI answer */ 9362 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9363 } 9364 9365 int 9366 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9367 { 9368 uint_t devstate = 0; 9369 dev_info_t *cdip; 9370 9371 if ((pip == NULL) || (dcp == NULL)) 9372 return (MDI_FAILURE); 9373 9374 cdip = mdi_pi_get_client(pip); 9375 9376 switch (mdi_pi_get_state(pip)) { 9377 case MDI_PATHINFO_STATE_INIT: 9378 devstate = DEVICE_DOWN; 9379 break; 9380 case MDI_PATHINFO_STATE_ONLINE: 9381 devstate = DEVICE_ONLINE; 9382 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9383 devstate |= DEVICE_BUSY; 9384 break; 9385 case MDI_PATHINFO_STATE_STANDBY: 9386 devstate = DEVICE_ONLINE; 9387 break; 9388 case MDI_PATHINFO_STATE_FAULT: 9389 devstate = DEVICE_DOWN; 9390 break; 9391 case MDI_PATHINFO_STATE_OFFLINE: 9392 devstate = DEVICE_OFFLINE; 9393 break; 9394 default: 9395 ASSERT(MDI_PI(pip)->pi_state); 9396 } 9397 9398 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9399 return (MDI_FAILURE); 9400 9401 return (MDI_SUCCESS); 9402 } 9403