1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(level, stmnt) \ 78 if (mdi_debug >= (level)) i_mdi_log stmnt 79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(level, stmnt) 82 #endif /* DEBUG */ 83 84 extern pri_t minclsyspri; 85 extern int modrootloaded; 86 87 /* 88 * Global mutex: 89 * Protects vHCI list and structure members. 90 */ 91 kmutex_t mdi_mutex; 92 93 /* 94 * Registered vHCI class driver lists 95 */ 96 int mdi_vhci_count; 97 mdi_vhci_t *mdi_vhci_head; 98 mdi_vhci_t *mdi_vhci_tail; 99 100 /* 101 * Client Hash Table size 102 */ 103 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 104 105 /* 106 * taskq interface definitions 107 */ 108 #define MDI_TASKQ_N_THREADS 8 109 #define MDI_TASKQ_PRI minclsyspri 110 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 111 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 112 113 taskq_t *mdi_taskq; 114 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 115 116 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 117 118 /* 119 * The data should be "quiet" for this interval (in seconds) before the 120 * vhci cached data is flushed to the disk. 121 */ 122 static int mdi_vhcache_flush_delay = 10; 123 124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 125 static int mdi_vhcache_flush_daemon_idle_time = 60; 126 127 /* 128 * MDI falls back to discovery of all paths when a bus_config_one fails. 129 * The following parameters can be used to tune this operation. 130 * 131 * mdi_path_discovery_boot 132 * Number of times path discovery will be attempted during early boot. 133 * Probably there is no reason to ever set this value to greater than one. 134 * 135 * mdi_path_discovery_postboot 136 * Number of times path discovery will be attempted after early boot. 137 * Set it to a minimum of two to allow for discovery of iscsi paths which 138 * may happen very late during booting. 139 * 140 * mdi_path_discovery_interval 141 * Minimum number of seconds MDI will wait between successive discovery 142 * of all paths. Set it to -1 to disable discovery of all paths. 143 */ 144 static int mdi_path_discovery_boot = 1; 145 static int mdi_path_discovery_postboot = 2; 146 static int mdi_path_discovery_interval = 10; 147 148 /* 149 * number of seconds the asynchronous configuration thread will sleep idle 150 * before exiting. 151 */ 152 static int mdi_async_config_idle_time = 600; 153 154 static int mdi_bus_config_cache_hash_size = 256; 155 156 /* turns off multithreaded configuration for certain operations */ 157 static int mdi_mtc_off = 0; 158 159 /* 160 * The "path" to a pathinfo node is identical to the /devices path to a 161 * devinfo node had the device been enumerated under a pHCI instead of 162 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 163 * This association persists across create/delete of the pathinfo nodes, 164 * but not across reboot. 165 */ 166 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 167 static int mdi_pathmap_hash_size = 256; 168 static kmutex_t mdi_pathmap_mutex; 169 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 170 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 171 172 /* 173 * MDI component property name/value string definitions 174 */ 175 const char *mdi_component_prop = "mpxio-component"; 176 const char *mdi_component_prop_vhci = "vhci"; 177 const char *mdi_component_prop_phci = "phci"; 178 const char *mdi_component_prop_client = "client"; 179 180 /* 181 * MDI client global unique identifier property name 182 */ 183 const char *mdi_client_guid_prop = "client-guid"; 184 185 /* 186 * MDI client load balancing property name/value string definitions 187 */ 188 const char *mdi_load_balance = "load-balance"; 189 const char *mdi_load_balance_none = "none"; 190 const char *mdi_load_balance_rr = "round-robin"; 191 const char *mdi_load_balance_lba = "logical-block"; 192 193 /* 194 * Obsolete vHCI class definition; to be removed after Leadville update 195 */ 196 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 197 198 static char vhci_greeting[] = 199 "\tThere already exists one vHCI driver for class %s\n" 200 "\tOnly one vHCI driver for each class is allowed\n"; 201 202 /* 203 * Static function prototypes 204 */ 205 static int i_mdi_phci_offline(dev_info_t *, uint_t); 206 static int i_mdi_client_offline(dev_info_t *, uint_t); 207 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 208 static void i_mdi_phci_post_detach(dev_info_t *, 209 ddi_detach_cmd_t, int); 210 static int i_mdi_client_pre_detach(dev_info_t *, 211 ddi_detach_cmd_t); 212 static void i_mdi_client_post_detach(dev_info_t *, 213 ddi_detach_cmd_t, int); 214 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 215 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 216 static int i_mdi_lba_lb(mdi_client_t *ct, 217 mdi_pathinfo_t **ret_pip, struct buf *buf); 218 static void i_mdi_pm_hold_client(mdi_client_t *, int); 219 static void i_mdi_pm_rele_client(mdi_client_t *, int); 220 static void i_mdi_pm_reset_client(mdi_client_t *); 221 static int i_mdi_power_all_phci(mdi_client_t *); 222 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 223 224 225 /* 226 * Internal mdi_pathinfo node functions 227 */ 228 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 229 230 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 231 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 232 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 233 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 234 static void i_mdi_phci_unlock(mdi_phci_t *); 235 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 236 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 237 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 238 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 239 mdi_client_t *); 240 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 241 static void i_mdi_client_remove_path(mdi_client_t *, 242 mdi_pathinfo_t *); 243 244 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 245 mdi_pathinfo_state_t, int); 246 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 247 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 248 char **, int); 249 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 250 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 251 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 252 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 253 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 254 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 255 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 256 static void i_mdi_client_update_state(mdi_client_t *); 257 static int i_mdi_client_compute_state(mdi_client_t *, 258 mdi_phci_t *); 259 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 260 static void i_mdi_client_unlock(mdi_client_t *); 261 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 262 static mdi_client_t *i_devi_get_client(dev_info_t *); 263 /* 264 * NOTE: this will be removed once the NWS files are changed to use the new 265 * mdi_{enable,disable}_path interfaces 266 */ 267 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 268 int, int); 269 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 270 mdi_vhci_t *vh, int flags, int op); 271 /* 272 * Failover related function prototypes 273 */ 274 static int i_mdi_failover(void *); 275 276 /* 277 * misc internal functions 278 */ 279 static int i_mdi_get_hash_key(char *); 280 static int i_map_nvlist_error_to_mdi(int); 281 static void i_mdi_report_path_state(mdi_client_t *, 282 mdi_pathinfo_t *); 283 284 static void setup_vhci_cache(mdi_vhci_t *); 285 static int destroy_vhci_cache(mdi_vhci_t *); 286 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 287 static boolean_t stop_vhcache_flush_thread(void *, int); 288 static void free_string_array(char **, int); 289 static void free_vhcache_phci(mdi_vhcache_phci_t *); 290 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 291 static void free_vhcache_client(mdi_vhcache_client_t *); 292 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 293 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 294 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 295 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 296 static void vhcache_pi_add(mdi_vhci_config_t *, 297 struct mdi_pathinfo *); 298 static void vhcache_pi_remove(mdi_vhci_config_t *, 299 struct mdi_pathinfo *); 300 static void free_phclient_path_list(mdi_phys_path_t *); 301 static void sort_vhcache_paths(mdi_vhcache_client_t *); 302 static int flush_vhcache(mdi_vhci_config_t *, int); 303 static void vhcache_dirty(mdi_vhci_config_t *); 304 static void free_async_client_config(mdi_async_client_config_t *); 305 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 306 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 307 static nvlist_t *read_on_disk_vhci_cache(char *); 308 extern int fread_nvlist(char *, nvlist_t **); 309 extern int fwrite_nvlist(char *, nvlist_t *); 310 311 /* called once when first vhci registers with mdi */ 312 static void 313 i_mdi_init() 314 { 315 static int initialized = 0; 316 317 if (initialized) 318 return; 319 initialized = 1; 320 321 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 322 323 /* Create our taskq resources */ 324 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 325 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 326 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 327 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 328 329 /* Allocate ['path_instance' <-> "path"] maps */ 330 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 331 mdi_pathmap_bypath = mod_hash_create_strhash( 332 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 333 mod_hash_null_valdtor); 334 mdi_pathmap_byinstance = mod_hash_create_idhash( 335 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 336 mod_hash_null_valdtor); 337 } 338 339 /* 340 * mdi_get_component_type(): 341 * Return mpxio component type 342 * Return Values: 343 * MDI_COMPONENT_NONE 344 * MDI_COMPONENT_VHCI 345 * MDI_COMPONENT_PHCI 346 * MDI_COMPONENT_CLIENT 347 * XXX This doesn't work under multi-level MPxIO and should be 348 * removed when clients migrate mdi_component_is_*() interfaces. 349 */ 350 int 351 mdi_get_component_type(dev_info_t *dip) 352 { 353 return (DEVI(dip)->devi_mdi_component); 354 } 355 356 /* 357 * mdi_vhci_register(): 358 * Register a vHCI module with the mpxio framework 359 * mdi_vhci_register() is called by vHCI drivers to register the 360 * 'class_driver' vHCI driver and its MDI entrypoints with the 361 * mpxio framework. The vHCI driver must call this interface as 362 * part of its attach(9e) handler. 363 * Competing threads may try to attach mdi_vhci_register() as 364 * the vHCI drivers are loaded and attached as a result of pHCI 365 * driver instance registration (mdi_phci_register()) with the 366 * framework. 367 * Return Values: 368 * MDI_SUCCESS 369 * MDI_FAILURE 370 */ 371 /*ARGSUSED*/ 372 int 373 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 374 int flags) 375 { 376 mdi_vhci_t *vh = NULL; 377 378 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 379 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 380 381 i_mdi_init(); 382 383 mutex_enter(&mdi_mutex); 384 /* 385 * Scan for already registered vhci 386 */ 387 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 388 if (strcmp(vh->vh_class, class) == 0) { 389 /* 390 * vHCI has already been created. Check for valid 391 * vHCI ops registration. We only support one vHCI 392 * module per class 393 */ 394 if (vh->vh_ops != NULL) { 395 mutex_exit(&mdi_mutex); 396 cmn_err(CE_NOTE, vhci_greeting, class); 397 return (MDI_FAILURE); 398 } 399 break; 400 } 401 } 402 403 /* 404 * if not yet created, create the vHCI component 405 */ 406 if (vh == NULL) { 407 struct client_hash *hash = NULL; 408 char *load_balance; 409 410 /* 411 * Allocate and initialize the mdi extensions 412 */ 413 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 414 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 415 KM_SLEEP); 416 vh->vh_client_table = hash; 417 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 418 (void) strcpy(vh->vh_class, class); 419 vh->vh_lb = LOAD_BALANCE_RR; 420 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 421 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 422 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 423 vh->vh_lb = LOAD_BALANCE_NONE; 424 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 425 == 0) { 426 vh->vh_lb = LOAD_BALANCE_LBA; 427 } 428 ddi_prop_free(load_balance); 429 } 430 431 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 432 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 433 434 /* 435 * Store the vHCI ops vectors 436 */ 437 vh->vh_dip = vdip; 438 vh->vh_ops = vops; 439 440 setup_vhci_cache(vh); 441 442 if (mdi_vhci_head == NULL) { 443 mdi_vhci_head = vh; 444 } 445 if (mdi_vhci_tail) { 446 mdi_vhci_tail->vh_next = vh; 447 } 448 mdi_vhci_tail = vh; 449 mdi_vhci_count++; 450 } 451 452 /* 453 * Claim the devfs node as a vhci component 454 */ 455 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 456 457 /* 458 * Initialize our back reference from dev_info node 459 */ 460 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 461 mutex_exit(&mdi_mutex); 462 return (MDI_SUCCESS); 463 } 464 465 /* 466 * mdi_vhci_unregister(): 467 * Unregister a vHCI module from mpxio framework 468 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 469 * of a vhci to unregister it from the framework. 470 * Return Values: 471 * MDI_SUCCESS 472 * MDI_FAILURE 473 */ 474 /*ARGSUSED*/ 475 int 476 mdi_vhci_unregister(dev_info_t *vdip, int flags) 477 { 478 mdi_vhci_t *found, *vh, *prev = NULL; 479 480 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 481 482 /* 483 * Check for invalid VHCI 484 */ 485 if ((vh = i_devi_get_vhci(vdip)) == NULL) 486 return (MDI_FAILURE); 487 488 /* 489 * Scan the list of registered vHCIs for a match 490 */ 491 mutex_enter(&mdi_mutex); 492 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 493 if (found == vh) 494 break; 495 prev = found; 496 } 497 498 if (found == NULL) { 499 mutex_exit(&mdi_mutex); 500 return (MDI_FAILURE); 501 } 502 503 /* 504 * Check the vHCI, pHCI and client count. All the pHCIs and clients 505 * should have been unregistered, before a vHCI can be 506 * unregistered. 507 */ 508 MDI_VHCI_PHCI_LOCK(vh); 509 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 510 MDI_VHCI_PHCI_UNLOCK(vh); 511 mutex_exit(&mdi_mutex); 512 return (MDI_FAILURE); 513 } 514 MDI_VHCI_PHCI_UNLOCK(vh); 515 516 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 517 mutex_exit(&mdi_mutex); 518 return (MDI_FAILURE); 519 } 520 521 /* 522 * Remove the vHCI from the global list 523 */ 524 if (vh == mdi_vhci_head) { 525 mdi_vhci_head = vh->vh_next; 526 } else { 527 prev->vh_next = vh->vh_next; 528 } 529 if (vh == mdi_vhci_tail) { 530 mdi_vhci_tail = prev; 531 } 532 mdi_vhci_count--; 533 mutex_exit(&mdi_mutex); 534 535 vh->vh_ops = NULL; 536 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 537 DEVI(vdip)->devi_mdi_xhci = NULL; 538 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 539 kmem_free(vh->vh_client_table, 540 mdi_client_table_size * sizeof (struct client_hash)); 541 mutex_destroy(&vh->vh_phci_mutex); 542 mutex_destroy(&vh->vh_client_mutex); 543 544 kmem_free(vh, sizeof (mdi_vhci_t)); 545 return (MDI_SUCCESS); 546 } 547 548 /* 549 * i_mdi_vhci_class2vhci(): 550 * Look for a matching vHCI module given a vHCI class name 551 * Return Values: 552 * Handle to a vHCI component 553 * NULL 554 */ 555 static mdi_vhci_t * 556 i_mdi_vhci_class2vhci(char *class) 557 { 558 mdi_vhci_t *vh = NULL; 559 560 ASSERT(!MUTEX_HELD(&mdi_mutex)); 561 562 mutex_enter(&mdi_mutex); 563 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 564 if (strcmp(vh->vh_class, class) == 0) { 565 break; 566 } 567 } 568 mutex_exit(&mdi_mutex); 569 return (vh); 570 } 571 572 /* 573 * i_devi_get_vhci(): 574 * Utility function to get the handle to a vHCI component 575 * Return Values: 576 * Handle to a vHCI component 577 * NULL 578 */ 579 mdi_vhci_t * 580 i_devi_get_vhci(dev_info_t *vdip) 581 { 582 mdi_vhci_t *vh = NULL; 583 if (MDI_VHCI(vdip)) { 584 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 585 } 586 return (vh); 587 } 588 589 /* 590 * mdi_phci_register(): 591 * Register a pHCI module with mpxio framework 592 * mdi_phci_register() is called by pHCI drivers to register with 593 * the mpxio framework and a specific 'class_driver' vHCI. The 594 * pHCI driver must call this interface as part of its attach(9e) 595 * handler. 596 * Return Values: 597 * MDI_SUCCESS 598 * MDI_FAILURE 599 */ 600 /*ARGSUSED*/ 601 int 602 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 603 { 604 mdi_phci_t *ph; 605 mdi_vhci_t *vh; 606 char *data; 607 char *pathname; 608 609 /* 610 * Some subsystems, like fcp, perform pHCI registration from a 611 * different thread than the one doing the pHCI attach(9E) - the 612 * driver attach code is waiting for this other thread to complete. 613 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 614 * (indicating that some thread has done an ndi_devi_enter of parent) 615 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 616 */ 617 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 618 619 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 620 (void) ddi_pathname(pdip, pathname); 621 622 /* 623 * Check for mpxio-disable property. Enable mpxio if the property is 624 * missing or not set to "yes". 625 * If the property is set to "yes" then emit a brief message. 626 */ 627 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 628 &data) == DDI_SUCCESS)) { 629 if (strcmp(data, "yes") == 0) { 630 MDI_DEBUG(1, (CE_CONT, pdip, 631 "?%s (%s%d) multipath capabilities " 632 "disabled via %s.conf.\n", pathname, 633 ddi_driver_name(pdip), ddi_get_instance(pdip), 634 ddi_driver_name(pdip))); 635 ddi_prop_free(data); 636 kmem_free(pathname, MAXPATHLEN); 637 return (MDI_FAILURE); 638 } 639 ddi_prop_free(data); 640 } 641 642 kmem_free(pathname, MAXPATHLEN); 643 644 /* 645 * Search for a matching vHCI 646 */ 647 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 648 if (vh == NULL) { 649 return (MDI_FAILURE); 650 } 651 652 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 653 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 654 ph->ph_dip = pdip; 655 ph->ph_vhci = vh; 656 ph->ph_next = NULL; 657 ph->ph_unstable = 0; 658 ph->ph_vprivate = 0; 659 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 660 661 MDI_PHCI_LOCK(ph); 662 MDI_PHCI_SET_POWER_UP(ph); 663 MDI_PHCI_UNLOCK(ph); 664 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 665 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 666 667 vhcache_phci_add(vh->vh_config, ph); 668 669 MDI_VHCI_PHCI_LOCK(vh); 670 if (vh->vh_phci_head == NULL) { 671 vh->vh_phci_head = ph; 672 } 673 if (vh->vh_phci_tail) { 674 vh->vh_phci_tail->ph_next = ph; 675 } 676 vh->vh_phci_tail = ph; 677 vh->vh_phci_count++; 678 MDI_VHCI_PHCI_UNLOCK(vh); 679 680 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 681 return (MDI_SUCCESS); 682 } 683 684 /* 685 * mdi_phci_unregister(): 686 * Unregister a pHCI module from mpxio framework 687 * mdi_phci_unregister() is called by the pHCI drivers from their 688 * detach(9E) handler to unregister their instances from the 689 * framework. 690 * Return Values: 691 * MDI_SUCCESS 692 * MDI_FAILURE 693 */ 694 /*ARGSUSED*/ 695 int 696 mdi_phci_unregister(dev_info_t *pdip, int flags) 697 { 698 mdi_vhci_t *vh; 699 mdi_phci_t *ph; 700 mdi_phci_t *tmp; 701 mdi_phci_t *prev = NULL; 702 703 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 704 705 ph = i_devi_get_phci(pdip); 706 if (ph == NULL) { 707 MDI_DEBUG(1, (CE_WARN, pdip, 708 "!pHCI unregister: Not a valid pHCI")); 709 return (MDI_FAILURE); 710 } 711 712 vh = ph->ph_vhci; 713 ASSERT(vh != NULL); 714 if (vh == NULL) { 715 MDI_DEBUG(1, (CE_WARN, pdip, 716 "!pHCI unregister: Not a valid vHCI")); 717 return (MDI_FAILURE); 718 } 719 720 MDI_VHCI_PHCI_LOCK(vh); 721 tmp = vh->vh_phci_head; 722 while (tmp) { 723 if (tmp == ph) { 724 break; 725 } 726 prev = tmp; 727 tmp = tmp->ph_next; 728 } 729 730 if (ph == vh->vh_phci_head) { 731 vh->vh_phci_head = ph->ph_next; 732 } else { 733 prev->ph_next = ph->ph_next; 734 } 735 736 if (ph == vh->vh_phci_tail) { 737 vh->vh_phci_tail = prev; 738 } 739 740 vh->vh_phci_count--; 741 MDI_VHCI_PHCI_UNLOCK(vh); 742 743 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 744 ESC_DDI_INITIATOR_UNREGISTER); 745 vhcache_phci_remove(vh->vh_config, ph); 746 cv_destroy(&ph->ph_unstable_cv); 747 mutex_destroy(&ph->ph_mutex); 748 kmem_free(ph, sizeof (mdi_phci_t)); 749 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 750 DEVI(pdip)->devi_mdi_xhci = NULL; 751 return (MDI_SUCCESS); 752 } 753 754 /* 755 * i_devi_get_phci(): 756 * Utility function to return the phci extensions. 757 */ 758 static mdi_phci_t * 759 i_devi_get_phci(dev_info_t *pdip) 760 { 761 mdi_phci_t *ph = NULL; 762 if (MDI_PHCI(pdip)) { 763 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 764 } 765 return (ph); 766 } 767 768 /* 769 * Single thread mdi entry into devinfo node for modifying its children. 770 * If necessary we perform an ndi_devi_enter of the vHCI before doing 771 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 772 * for the vHCI and one for the pHCI. 773 */ 774 void 775 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 776 { 777 dev_info_t *vdip; 778 int vcircular, pcircular; 779 780 /* Verify calling context */ 781 ASSERT(MDI_PHCI(phci_dip)); 782 vdip = mdi_devi_get_vdip(phci_dip); 783 ASSERT(vdip); /* A pHCI always has a vHCI */ 784 785 /* 786 * If pHCI is detaching then the framework has already entered the 787 * vHCI on a threads that went down the code path leading to 788 * detach_node(). This framework enter of the vHCI during pHCI 789 * detach is done to avoid deadlock with vHCI power management 790 * operations which enter the vHCI and the enter down the path 791 * to the pHCI. If pHCI is detaching then we piggyback this calls 792 * enter of the vHCI on frameworks vHCI enter that has already 793 * occurred - this is OK because we know that the framework thread 794 * doing detach is waiting for our completion. 795 * 796 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 797 * race with detach - but we can't do that because the framework has 798 * already entered the parent, so we have some complexity instead. 799 */ 800 for (;;) { 801 if (ndi_devi_tryenter(vdip, &vcircular)) { 802 ASSERT(vcircular != -1); 803 if (DEVI_IS_DETACHING(phci_dip)) { 804 ndi_devi_exit(vdip, vcircular); 805 vcircular = -1; 806 } 807 break; 808 } else if (DEVI_IS_DETACHING(phci_dip)) { 809 vcircular = -1; 810 break; 811 } else { 812 delay(1); 813 } 814 } 815 816 ndi_devi_enter(phci_dip, &pcircular); 817 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 818 } 819 820 /* 821 * Release mdi_devi_enter or successful mdi_devi_tryenter. 822 */ 823 void 824 mdi_devi_exit(dev_info_t *phci_dip, int circular) 825 { 826 dev_info_t *vdip; 827 int vcircular, pcircular; 828 829 /* Verify calling context */ 830 ASSERT(MDI_PHCI(phci_dip)); 831 vdip = mdi_devi_get_vdip(phci_dip); 832 ASSERT(vdip); /* A pHCI always has a vHCI */ 833 834 /* extract two circular recursion values from single int */ 835 pcircular = (short)(circular & 0xFFFF); 836 vcircular = (short)((circular >> 16) & 0xFFFF); 837 838 ndi_devi_exit(phci_dip, pcircular); 839 if (vcircular != -1) 840 ndi_devi_exit(vdip, vcircular); 841 } 842 843 /* 844 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 845 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 846 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 847 * with vHCI power management code during path online/offline. Each 848 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 849 * occur within the scope of an active mdi_devi_enter that establishes the 850 * circular value. 851 */ 852 void 853 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 854 { 855 int pcircular; 856 857 /* Verify calling context */ 858 ASSERT(MDI_PHCI(phci_dip)); 859 860 pcircular = (short)(circular & 0xFFFF); 861 ndi_devi_exit(phci_dip, pcircular); 862 } 863 864 void 865 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 866 { 867 int pcircular; 868 869 /* Verify calling context */ 870 ASSERT(MDI_PHCI(phci_dip)); 871 872 ndi_devi_enter(phci_dip, &pcircular); 873 874 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 875 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 876 } 877 878 /* 879 * mdi_devi_get_vdip(): 880 * given a pHCI dip return vHCI dip 881 */ 882 dev_info_t * 883 mdi_devi_get_vdip(dev_info_t *pdip) 884 { 885 mdi_phci_t *ph; 886 887 ph = i_devi_get_phci(pdip); 888 if (ph && ph->ph_vhci) 889 return (ph->ph_vhci->vh_dip); 890 return (NULL); 891 } 892 893 /* 894 * mdi_devi_pdip_entered(): 895 * Return 1 if we are vHCI and have done an ndi_devi_enter 896 * of a pHCI 897 */ 898 int 899 mdi_devi_pdip_entered(dev_info_t *vdip) 900 { 901 mdi_vhci_t *vh; 902 mdi_phci_t *ph; 903 904 vh = i_devi_get_vhci(vdip); 905 if (vh == NULL) 906 return (0); 907 908 MDI_VHCI_PHCI_LOCK(vh); 909 ph = vh->vh_phci_head; 910 while (ph) { 911 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 912 MDI_VHCI_PHCI_UNLOCK(vh); 913 return (1); 914 } 915 ph = ph->ph_next; 916 } 917 MDI_VHCI_PHCI_UNLOCK(vh); 918 return (0); 919 } 920 921 /* 922 * mdi_phci_path2devinfo(): 923 * Utility function to search for a valid phci device given 924 * the devfs pathname. 925 */ 926 dev_info_t * 927 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 928 { 929 char *temp_pathname; 930 mdi_vhci_t *vh; 931 mdi_phci_t *ph; 932 dev_info_t *pdip = NULL; 933 934 vh = i_devi_get_vhci(vdip); 935 ASSERT(vh != NULL); 936 937 if (vh == NULL) { 938 /* 939 * Invalid vHCI component, return failure 940 */ 941 return (NULL); 942 } 943 944 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 945 MDI_VHCI_PHCI_LOCK(vh); 946 ph = vh->vh_phci_head; 947 while (ph != NULL) { 948 pdip = ph->ph_dip; 949 ASSERT(pdip != NULL); 950 *temp_pathname = '\0'; 951 (void) ddi_pathname(pdip, temp_pathname); 952 if (strcmp(temp_pathname, pathname) == 0) { 953 break; 954 } 955 ph = ph->ph_next; 956 } 957 if (ph == NULL) { 958 pdip = NULL; 959 } 960 MDI_VHCI_PHCI_UNLOCK(vh); 961 kmem_free(temp_pathname, MAXPATHLEN); 962 return (pdip); 963 } 964 965 /* 966 * mdi_phci_get_path_count(): 967 * get number of path information nodes associated with a given 968 * pHCI device. 969 */ 970 int 971 mdi_phci_get_path_count(dev_info_t *pdip) 972 { 973 mdi_phci_t *ph; 974 int count = 0; 975 976 ph = i_devi_get_phci(pdip); 977 if (ph != NULL) { 978 count = ph->ph_path_count; 979 } 980 return (count); 981 } 982 983 /* 984 * i_mdi_phci_lock(): 985 * Lock a pHCI device 986 * Return Values: 987 * None 988 * Note: 989 * The default locking order is: 990 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 991 * But there are number of situations where locks need to be 992 * grabbed in reverse order. This routine implements try and lock 993 * mechanism depending on the requested parameter option. 994 */ 995 static void 996 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 997 { 998 if (pip) { 999 /* Reverse locking is requested. */ 1000 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1001 /* 1002 * tryenter failed. Try to grab again 1003 * after a small delay 1004 */ 1005 MDI_PI_HOLD(pip); 1006 MDI_PI_UNLOCK(pip); 1007 delay(1); 1008 MDI_PI_LOCK(pip); 1009 MDI_PI_RELE(pip); 1010 } 1011 } else { 1012 MDI_PHCI_LOCK(ph); 1013 } 1014 } 1015 1016 /* 1017 * i_mdi_phci_unlock(): 1018 * Unlock the pHCI component 1019 */ 1020 static void 1021 i_mdi_phci_unlock(mdi_phci_t *ph) 1022 { 1023 MDI_PHCI_UNLOCK(ph); 1024 } 1025 1026 /* 1027 * i_mdi_devinfo_create(): 1028 * create client device's devinfo node 1029 * Return Values: 1030 * dev_info 1031 * NULL 1032 * Notes: 1033 */ 1034 static dev_info_t * 1035 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1036 char **compatible, int ncompatible) 1037 { 1038 dev_info_t *cdip = NULL; 1039 1040 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1041 1042 /* Verify for duplicate entry */ 1043 cdip = i_mdi_devinfo_find(vh, name, guid); 1044 ASSERT(cdip == NULL); 1045 if (cdip) { 1046 cmn_err(CE_WARN, 1047 "i_mdi_devinfo_create: client dip %p already exists", 1048 (void *)cdip); 1049 } 1050 1051 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1052 if (cdip == NULL) 1053 goto fail; 1054 1055 /* 1056 * Create component type and Global unique identifier 1057 * properties 1058 */ 1059 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1060 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1061 goto fail; 1062 } 1063 1064 /* Decorate the node with compatible property */ 1065 if (compatible && 1066 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1067 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1068 goto fail; 1069 } 1070 1071 return (cdip); 1072 1073 fail: 1074 if (cdip) { 1075 (void) ndi_prop_remove_all(cdip); 1076 (void) ndi_devi_free(cdip); 1077 } 1078 return (NULL); 1079 } 1080 1081 /* 1082 * i_mdi_devinfo_find(): 1083 * Find a matching devinfo node for given client node name 1084 * and its guid. 1085 * Return Values: 1086 * Handle to a dev_info node or NULL 1087 */ 1088 static dev_info_t * 1089 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1090 { 1091 char *data; 1092 dev_info_t *cdip = NULL; 1093 dev_info_t *ndip = NULL; 1094 int circular; 1095 1096 ndi_devi_enter(vh->vh_dip, &circular); 1097 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1098 while ((cdip = ndip) != NULL) { 1099 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1100 1101 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1102 continue; 1103 } 1104 1105 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1106 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1107 &data) != DDI_PROP_SUCCESS) { 1108 continue; 1109 } 1110 1111 if (strcmp(data, guid) != 0) { 1112 ddi_prop_free(data); 1113 continue; 1114 } 1115 ddi_prop_free(data); 1116 break; 1117 } 1118 ndi_devi_exit(vh->vh_dip, circular); 1119 return (cdip); 1120 } 1121 1122 /* 1123 * i_mdi_devinfo_remove(): 1124 * Remove a client device node 1125 */ 1126 static int 1127 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1128 { 1129 int rv = MDI_SUCCESS; 1130 1131 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1132 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1133 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1134 if (rv != NDI_SUCCESS) { 1135 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1136 " failed. cdip = %p\n", (void *)cdip)); 1137 } 1138 /* 1139 * Convert to MDI error code 1140 */ 1141 switch (rv) { 1142 case NDI_SUCCESS: 1143 rv = MDI_SUCCESS; 1144 break; 1145 case NDI_BUSY: 1146 rv = MDI_BUSY; 1147 break; 1148 default: 1149 rv = MDI_FAILURE; 1150 break; 1151 } 1152 } 1153 return (rv); 1154 } 1155 1156 /* 1157 * i_devi_get_client() 1158 * Utility function to get mpxio component extensions 1159 */ 1160 static mdi_client_t * 1161 i_devi_get_client(dev_info_t *cdip) 1162 { 1163 mdi_client_t *ct = NULL; 1164 1165 if (MDI_CLIENT(cdip)) { 1166 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1167 } 1168 return (ct); 1169 } 1170 1171 /* 1172 * i_mdi_is_child_present(): 1173 * Search for the presence of client device dev_info node 1174 */ 1175 static int 1176 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1177 { 1178 int rv = MDI_FAILURE; 1179 struct dev_info *dip; 1180 int circular; 1181 1182 ndi_devi_enter(vdip, &circular); 1183 dip = DEVI(vdip)->devi_child; 1184 while (dip) { 1185 if (dip == DEVI(cdip)) { 1186 rv = MDI_SUCCESS; 1187 break; 1188 } 1189 dip = dip->devi_sibling; 1190 } 1191 ndi_devi_exit(vdip, circular); 1192 return (rv); 1193 } 1194 1195 1196 /* 1197 * i_mdi_client_lock(): 1198 * Grab client component lock 1199 * Return Values: 1200 * None 1201 * Note: 1202 * The default locking order is: 1203 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1204 * But there are number of situations where locks need to be 1205 * grabbed in reverse order. This routine implements try and lock 1206 * mechanism depending on the requested parameter option. 1207 */ 1208 static void 1209 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1210 { 1211 if (pip) { 1212 /* 1213 * Reverse locking is requested. 1214 */ 1215 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1216 /* 1217 * tryenter failed. Try to grab again 1218 * after a small delay 1219 */ 1220 MDI_PI_HOLD(pip); 1221 MDI_PI_UNLOCK(pip); 1222 delay(1); 1223 MDI_PI_LOCK(pip); 1224 MDI_PI_RELE(pip); 1225 } 1226 } else { 1227 MDI_CLIENT_LOCK(ct); 1228 } 1229 } 1230 1231 /* 1232 * i_mdi_client_unlock(): 1233 * Unlock a client component 1234 */ 1235 static void 1236 i_mdi_client_unlock(mdi_client_t *ct) 1237 { 1238 MDI_CLIENT_UNLOCK(ct); 1239 } 1240 1241 /* 1242 * i_mdi_client_alloc(): 1243 * Allocate and initialize a client structure. Caller should 1244 * hold the vhci client lock. 1245 * Return Values: 1246 * Handle to a client component 1247 */ 1248 /*ARGSUSED*/ 1249 static mdi_client_t * 1250 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1251 { 1252 mdi_client_t *ct; 1253 1254 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1255 1256 /* 1257 * Allocate and initialize a component structure. 1258 */ 1259 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1260 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1261 ct->ct_hnext = NULL; 1262 ct->ct_hprev = NULL; 1263 ct->ct_dip = NULL; 1264 ct->ct_vhci = vh; 1265 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1266 (void) strcpy(ct->ct_drvname, name); 1267 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1268 (void) strcpy(ct->ct_guid, lguid); 1269 ct->ct_cprivate = NULL; 1270 ct->ct_vprivate = NULL; 1271 ct->ct_flags = 0; 1272 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1273 MDI_CLIENT_LOCK(ct); 1274 MDI_CLIENT_SET_OFFLINE(ct); 1275 MDI_CLIENT_SET_DETACH(ct); 1276 MDI_CLIENT_SET_POWER_UP(ct); 1277 MDI_CLIENT_UNLOCK(ct); 1278 ct->ct_failover_flags = 0; 1279 ct->ct_failover_status = 0; 1280 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1281 ct->ct_unstable = 0; 1282 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1283 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1284 ct->ct_lb = vh->vh_lb; 1285 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1286 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1287 ct->ct_path_count = 0; 1288 ct->ct_path_head = NULL; 1289 ct->ct_path_tail = NULL; 1290 ct->ct_path_last = NULL; 1291 1292 /* 1293 * Add this client component to our client hash queue 1294 */ 1295 i_mdi_client_enlist_table(vh, ct); 1296 return (ct); 1297 } 1298 1299 /* 1300 * i_mdi_client_enlist_table(): 1301 * Attach the client device to the client hash table. Caller 1302 * should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 struct client_hash *head; 1309 1310 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1311 1312 index = i_mdi_get_hash_key(ct->ct_guid); 1313 head = &vh->vh_client_table[index]; 1314 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1315 head->ct_hash_head = ct; 1316 head->ct_hash_count++; 1317 vh->vh_client_count++; 1318 } 1319 1320 /* 1321 * i_mdi_client_delist_table(): 1322 * Attach the client device to the client hash table. 1323 * Caller should hold the vhci client lock. 1324 */ 1325 static void 1326 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1327 { 1328 int index; 1329 char *guid; 1330 struct client_hash *head; 1331 mdi_client_t *next; 1332 mdi_client_t *last; 1333 1334 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1335 1336 guid = ct->ct_guid; 1337 index = i_mdi_get_hash_key(guid); 1338 head = &vh->vh_client_table[index]; 1339 1340 last = NULL; 1341 next = (mdi_client_t *)head->ct_hash_head; 1342 while (next != NULL) { 1343 if (next == ct) { 1344 break; 1345 } 1346 last = next; 1347 next = next->ct_hnext; 1348 } 1349 1350 if (next) { 1351 head->ct_hash_count--; 1352 if (last == NULL) { 1353 head->ct_hash_head = ct->ct_hnext; 1354 } else { 1355 last->ct_hnext = ct->ct_hnext; 1356 } 1357 ct->ct_hnext = NULL; 1358 vh->vh_client_count--; 1359 } 1360 } 1361 1362 1363 /* 1364 * i_mdi_client_free(): 1365 * Free a client component 1366 */ 1367 static int 1368 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1369 { 1370 int rv = MDI_SUCCESS; 1371 int flags = ct->ct_flags; 1372 dev_info_t *cdip; 1373 dev_info_t *vdip; 1374 1375 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1376 1377 vdip = vh->vh_dip; 1378 cdip = ct->ct_dip; 1379 1380 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1381 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1382 DEVI(cdip)->devi_mdi_client = NULL; 1383 1384 /* 1385 * Clear out back ref. to dev_info_t node 1386 */ 1387 ct->ct_dip = NULL; 1388 1389 /* 1390 * Remove this client from our hash queue 1391 */ 1392 i_mdi_client_delist_table(vh, ct); 1393 1394 /* 1395 * Uninitialize and free the component 1396 */ 1397 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1398 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1399 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1400 cv_destroy(&ct->ct_failover_cv); 1401 cv_destroy(&ct->ct_unstable_cv); 1402 cv_destroy(&ct->ct_powerchange_cv); 1403 mutex_destroy(&ct->ct_mutex); 1404 kmem_free(ct, sizeof (*ct)); 1405 1406 if (cdip != NULL) { 1407 MDI_VHCI_CLIENT_UNLOCK(vh); 1408 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1409 MDI_VHCI_CLIENT_LOCK(vh); 1410 } 1411 return (rv); 1412 } 1413 1414 /* 1415 * i_mdi_client_find(): 1416 * Find the client structure corresponding to a given guid 1417 * Caller should hold the vhci client lock. 1418 */ 1419 static mdi_client_t * 1420 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1421 { 1422 int index; 1423 struct client_hash *head; 1424 mdi_client_t *ct; 1425 1426 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1427 1428 index = i_mdi_get_hash_key(guid); 1429 head = &vh->vh_client_table[index]; 1430 1431 ct = head->ct_hash_head; 1432 while (ct != NULL) { 1433 if (strcmp(ct->ct_guid, guid) == 0 && 1434 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1435 break; 1436 } 1437 ct = ct->ct_hnext; 1438 } 1439 return (ct); 1440 } 1441 1442 /* 1443 * i_mdi_client_update_state(): 1444 * Compute and update client device state 1445 * Notes: 1446 * A client device can be in any of three possible states: 1447 * 1448 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1449 * one online/standby paths. Can tolerate failures. 1450 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1451 * no alternate paths available as standby. A failure on the online 1452 * would result in loss of access to device data. 1453 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1454 * no paths available to access the device. 1455 */ 1456 static void 1457 i_mdi_client_update_state(mdi_client_t *ct) 1458 { 1459 int state; 1460 1461 ASSERT(MDI_CLIENT_LOCKED(ct)); 1462 state = i_mdi_client_compute_state(ct, NULL); 1463 MDI_CLIENT_SET_STATE(ct, state); 1464 } 1465 1466 /* 1467 * i_mdi_client_compute_state(): 1468 * Compute client device state 1469 * 1470 * mdi_phci_t * Pointer to pHCI structure which should 1471 * while computing the new value. Used by 1472 * i_mdi_phci_offline() to find the new 1473 * client state after DR of a pHCI. 1474 */ 1475 static int 1476 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1477 { 1478 int state; 1479 int online_count = 0; 1480 int standby_count = 0; 1481 mdi_pathinfo_t *pip, *next; 1482 1483 ASSERT(MDI_CLIENT_LOCKED(ct)); 1484 pip = ct->ct_path_head; 1485 while (pip != NULL) { 1486 MDI_PI_LOCK(pip); 1487 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1488 if (MDI_PI(pip)->pi_phci == ph) { 1489 MDI_PI_UNLOCK(pip); 1490 pip = next; 1491 continue; 1492 } 1493 1494 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1495 == MDI_PATHINFO_STATE_ONLINE) 1496 online_count++; 1497 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1498 == MDI_PATHINFO_STATE_STANDBY) 1499 standby_count++; 1500 MDI_PI_UNLOCK(pip); 1501 pip = next; 1502 } 1503 1504 if (online_count == 0) { 1505 if (standby_count == 0) { 1506 state = MDI_CLIENT_STATE_FAILED; 1507 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1508 " ct = %p\n", (void *)ct)); 1509 } else if (standby_count == 1) { 1510 state = MDI_CLIENT_STATE_DEGRADED; 1511 } else { 1512 state = MDI_CLIENT_STATE_OPTIMAL; 1513 } 1514 } else if (online_count == 1) { 1515 if (standby_count == 0) { 1516 state = MDI_CLIENT_STATE_DEGRADED; 1517 } else { 1518 state = MDI_CLIENT_STATE_OPTIMAL; 1519 } 1520 } else { 1521 state = MDI_CLIENT_STATE_OPTIMAL; 1522 } 1523 return (state); 1524 } 1525 1526 /* 1527 * i_mdi_client2devinfo(): 1528 * Utility function 1529 */ 1530 dev_info_t * 1531 i_mdi_client2devinfo(mdi_client_t *ct) 1532 { 1533 return (ct->ct_dip); 1534 } 1535 1536 /* 1537 * mdi_client_path2_devinfo(): 1538 * Given the parent devinfo and child devfs pathname, search for 1539 * a valid devfs node handle. 1540 */ 1541 dev_info_t * 1542 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1543 { 1544 dev_info_t *cdip = NULL; 1545 dev_info_t *ndip = NULL; 1546 char *temp_pathname; 1547 int circular; 1548 1549 /* 1550 * Allocate temp buffer 1551 */ 1552 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1553 1554 /* 1555 * Lock parent against changes 1556 */ 1557 ndi_devi_enter(vdip, &circular); 1558 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1559 while ((cdip = ndip) != NULL) { 1560 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1561 1562 *temp_pathname = '\0'; 1563 (void) ddi_pathname(cdip, temp_pathname); 1564 if (strcmp(temp_pathname, pathname) == 0) { 1565 break; 1566 } 1567 } 1568 /* 1569 * Release devinfo lock 1570 */ 1571 ndi_devi_exit(vdip, circular); 1572 1573 /* 1574 * Free the temp buffer 1575 */ 1576 kmem_free(temp_pathname, MAXPATHLEN); 1577 return (cdip); 1578 } 1579 1580 /* 1581 * mdi_client_get_path_count(): 1582 * Utility function to get number of path information nodes 1583 * associated with a given client device. 1584 */ 1585 int 1586 mdi_client_get_path_count(dev_info_t *cdip) 1587 { 1588 mdi_client_t *ct; 1589 int count = 0; 1590 1591 ct = i_devi_get_client(cdip); 1592 if (ct != NULL) { 1593 count = ct->ct_path_count; 1594 } 1595 return (count); 1596 } 1597 1598 1599 /* 1600 * i_mdi_get_hash_key(): 1601 * Create a hash using strings as keys 1602 * 1603 */ 1604 static int 1605 i_mdi_get_hash_key(char *str) 1606 { 1607 uint32_t g, hash = 0; 1608 char *p; 1609 1610 for (p = str; *p != '\0'; p++) { 1611 g = *p; 1612 hash += g; 1613 } 1614 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1615 } 1616 1617 /* 1618 * mdi_get_lb_policy(): 1619 * Get current load balancing policy for a given client device 1620 */ 1621 client_lb_t 1622 mdi_get_lb_policy(dev_info_t *cdip) 1623 { 1624 client_lb_t lb = LOAD_BALANCE_NONE; 1625 mdi_client_t *ct; 1626 1627 ct = i_devi_get_client(cdip); 1628 if (ct != NULL) { 1629 lb = ct->ct_lb; 1630 } 1631 return (lb); 1632 } 1633 1634 /* 1635 * mdi_set_lb_region_size(): 1636 * Set current region size for the load-balance 1637 */ 1638 int 1639 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1640 { 1641 mdi_client_t *ct; 1642 int rv = MDI_FAILURE; 1643 1644 ct = i_devi_get_client(cdip); 1645 if (ct != NULL && ct->ct_lb_args != NULL) { 1646 ct->ct_lb_args->region_size = region_size; 1647 rv = MDI_SUCCESS; 1648 } 1649 return (rv); 1650 } 1651 1652 /* 1653 * mdi_Set_lb_policy(): 1654 * Set current load balancing policy for a given client device 1655 */ 1656 int 1657 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1658 { 1659 mdi_client_t *ct; 1660 int rv = MDI_FAILURE; 1661 1662 ct = i_devi_get_client(cdip); 1663 if (ct != NULL) { 1664 ct->ct_lb = lb; 1665 rv = MDI_SUCCESS; 1666 } 1667 return (rv); 1668 } 1669 1670 /* 1671 * mdi_failover(): 1672 * failover function called by the vHCI drivers to initiate 1673 * a failover operation. This is typically due to non-availability 1674 * of online paths to route I/O requests. Failover can be 1675 * triggered through user application also. 1676 * 1677 * The vHCI driver calls mdi_failover() to initiate a failover 1678 * operation. mdi_failover() calls back into the vHCI driver's 1679 * vo_failover() entry point to perform the actual failover 1680 * operation. The reason for requiring the vHCI driver to 1681 * initiate failover by calling mdi_failover(), instead of directly 1682 * executing vo_failover() itself, is to ensure that the mdi 1683 * framework can keep track of the client state properly. 1684 * Additionally, mdi_failover() provides as a convenience the 1685 * option of performing the failover operation synchronously or 1686 * asynchronously 1687 * 1688 * Upon successful completion of the failover operation, the 1689 * paths that were previously ONLINE will be in the STANDBY state, 1690 * and the newly activated paths will be in the ONLINE state. 1691 * 1692 * The flags modifier determines whether the activation is done 1693 * synchronously: MDI_FAILOVER_SYNC 1694 * Return Values: 1695 * MDI_SUCCESS 1696 * MDI_FAILURE 1697 * MDI_BUSY 1698 */ 1699 /*ARGSUSED*/ 1700 int 1701 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1702 { 1703 int rv; 1704 mdi_client_t *ct; 1705 1706 ct = i_devi_get_client(cdip); 1707 ASSERT(ct != NULL); 1708 if (ct == NULL) { 1709 /* cdip is not a valid client device. Nothing more to do. */ 1710 return (MDI_FAILURE); 1711 } 1712 1713 MDI_CLIENT_LOCK(ct); 1714 1715 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1716 /* A path to the client is being freed */ 1717 MDI_CLIENT_UNLOCK(ct); 1718 return (MDI_BUSY); 1719 } 1720 1721 1722 if (MDI_CLIENT_IS_FAILED(ct)) { 1723 /* 1724 * Client is in failed state. Nothing more to do. 1725 */ 1726 MDI_CLIENT_UNLOCK(ct); 1727 return (MDI_FAILURE); 1728 } 1729 1730 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1731 /* 1732 * Failover is already in progress; return BUSY 1733 */ 1734 MDI_CLIENT_UNLOCK(ct); 1735 return (MDI_BUSY); 1736 } 1737 /* 1738 * Make sure that mdi_pathinfo node state changes are processed. 1739 * We do not allow failovers to progress while client path state 1740 * changes are in progress 1741 */ 1742 if (ct->ct_unstable) { 1743 if (flags == MDI_FAILOVER_ASYNC) { 1744 MDI_CLIENT_UNLOCK(ct); 1745 return (MDI_BUSY); 1746 } else { 1747 while (ct->ct_unstable) 1748 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1749 } 1750 } 1751 1752 /* 1753 * Client device is in stable state. Before proceeding, perform sanity 1754 * checks again. 1755 */ 1756 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1757 (!i_ddi_devi_attached(ct->ct_dip))) { 1758 /* 1759 * Client is in failed state. Nothing more to do. 1760 */ 1761 MDI_CLIENT_UNLOCK(ct); 1762 return (MDI_FAILURE); 1763 } 1764 1765 /* 1766 * Set the client state as failover in progress. 1767 */ 1768 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1769 ct->ct_failover_flags = flags; 1770 MDI_CLIENT_UNLOCK(ct); 1771 1772 if (flags == MDI_FAILOVER_ASYNC) { 1773 /* 1774 * Submit the initiate failover request via CPR safe 1775 * taskq threads. 1776 */ 1777 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1778 ct, KM_SLEEP); 1779 return (MDI_ACCEPT); 1780 } else { 1781 /* 1782 * Synchronous failover mode. Typically invoked from the user 1783 * land. 1784 */ 1785 rv = i_mdi_failover(ct); 1786 } 1787 return (rv); 1788 } 1789 1790 /* 1791 * i_mdi_failover(): 1792 * internal failover function. Invokes vHCI drivers failover 1793 * callback function and process the failover status 1794 * Return Values: 1795 * None 1796 * 1797 * Note: A client device in failover state can not be detached or freed. 1798 */ 1799 static int 1800 i_mdi_failover(void *arg) 1801 { 1802 int rv = MDI_SUCCESS; 1803 mdi_client_t *ct = (mdi_client_t *)arg; 1804 mdi_vhci_t *vh = ct->ct_vhci; 1805 1806 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1807 1808 if (vh->vh_ops->vo_failover != NULL) { 1809 /* 1810 * Call vHCI drivers callback routine 1811 */ 1812 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1813 ct->ct_failover_flags); 1814 } 1815 1816 MDI_CLIENT_LOCK(ct); 1817 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1818 1819 /* 1820 * Save the failover return status 1821 */ 1822 ct->ct_failover_status = rv; 1823 1824 /* 1825 * As a result of failover, client status would have been changed. 1826 * Update the client state and wake up anyone waiting on this client 1827 * device. 1828 */ 1829 i_mdi_client_update_state(ct); 1830 1831 cv_broadcast(&ct->ct_failover_cv); 1832 MDI_CLIENT_UNLOCK(ct); 1833 return (rv); 1834 } 1835 1836 /* 1837 * Load balancing is logical block. 1838 * IOs within the range described by region_size 1839 * would go on the same path. This would improve the 1840 * performance by cache-hit on some of the RAID devices. 1841 * Search only for online paths(At some point we 1842 * may want to balance across target ports). 1843 * If no paths are found then default to round-robin. 1844 */ 1845 static int 1846 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1847 { 1848 int path_index = -1; 1849 int online_path_count = 0; 1850 int online_nonpref_path_count = 0; 1851 int region_size = ct->ct_lb_args->region_size; 1852 mdi_pathinfo_t *pip; 1853 mdi_pathinfo_t *next; 1854 int preferred, path_cnt; 1855 1856 pip = ct->ct_path_head; 1857 while (pip) { 1858 MDI_PI_LOCK(pip); 1859 if (MDI_PI(pip)->pi_state == 1860 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1861 online_path_count++; 1862 } else if (MDI_PI(pip)->pi_state == 1863 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1864 online_nonpref_path_count++; 1865 } 1866 next = (mdi_pathinfo_t *) 1867 MDI_PI(pip)->pi_client_link; 1868 MDI_PI_UNLOCK(pip); 1869 pip = next; 1870 } 1871 /* if found any online/preferred then use this type */ 1872 if (online_path_count > 0) { 1873 path_cnt = online_path_count; 1874 preferred = 1; 1875 } else if (online_nonpref_path_count > 0) { 1876 path_cnt = online_nonpref_path_count; 1877 preferred = 0; 1878 } else { 1879 path_cnt = 0; 1880 } 1881 if (path_cnt) { 1882 path_index = (bp->b_blkno >> region_size) % path_cnt; 1883 pip = ct->ct_path_head; 1884 while (pip && path_index != -1) { 1885 MDI_PI_LOCK(pip); 1886 if (path_index == 0 && 1887 (MDI_PI(pip)->pi_state == 1888 MDI_PATHINFO_STATE_ONLINE) && 1889 MDI_PI(pip)->pi_preferred == preferred) { 1890 MDI_PI_HOLD(pip); 1891 MDI_PI_UNLOCK(pip); 1892 *ret_pip = pip; 1893 return (MDI_SUCCESS); 1894 } 1895 path_index --; 1896 next = (mdi_pathinfo_t *) 1897 MDI_PI(pip)->pi_client_link; 1898 MDI_PI_UNLOCK(pip); 1899 pip = next; 1900 } 1901 if (pip == NULL) { 1902 MDI_DEBUG(4, (CE_NOTE, NULL, 1903 "!lba %llx, no pip !!\n", 1904 bp->b_lblkno)); 1905 } else { 1906 MDI_DEBUG(4, (CE_NOTE, NULL, 1907 "!lba %llx, no pip for path_index, " 1908 "pip %p\n", bp->b_lblkno, (void *)pip)); 1909 } 1910 } 1911 return (MDI_FAILURE); 1912 } 1913 1914 /* 1915 * mdi_select_path(): 1916 * select a path to access a client device. 1917 * 1918 * mdi_select_path() function is called by the vHCI drivers to 1919 * select a path to route the I/O request to. The caller passes 1920 * the block I/O data transfer structure ("buf") as one of the 1921 * parameters. The mpxio framework uses the buf structure 1922 * contents to maintain per path statistics (total I/O size / 1923 * count pending). If more than one online paths are available to 1924 * select, the framework automatically selects a suitable path 1925 * for routing I/O request. If a failover operation is active for 1926 * this client device the call shall be failed with MDI_BUSY error 1927 * code. 1928 * 1929 * By default this function returns a suitable path in online 1930 * state based on the current load balancing policy. Currently 1931 * we support LOAD_BALANCE_NONE (Previously selected online path 1932 * will continue to be used till the path is usable) and 1933 * LOAD_BALANCE_RR (Online paths will be selected in a round 1934 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1935 * based on the logical block). The load balancing 1936 * through vHCI drivers configuration file (driver.conf). 1937 * 1938 * vHCI drivers may override this default behavior by specifying 1939 * appropriate flags. The meaning of the thrid argument depends 1940 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 1941 * then the argument is the "path instance" of the path to select. 1942 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 1943 * "start_pip". A non NULL "start_pip" is the starting point to 1944 * walk and find the next appropriate path. The following values 1945 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 1946 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 1947 * STANDBY path). 1948 * 1949 * The non-standard behavior is used by the scsi_vhci driver, 1950 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1951 * attach of client devices (to avoid an unnecessary failover 1952 * when the STANDBY path comes up first), during failover 1953 * (to activate a STANDBY path as ONLINE). 1954 * 1955 * The selected path is returned in a a mdi_hold_path() state 1956 * (pi_ref_cnt). Caller should release the hold by calling 1957 * mdi_rele_path(). 1958 * 1959 * Return Values: 1960 * MDI_SUCCESS - Completed successfully 1961 * MDI_BUSY - Client device is busy failing over 1962 * MDI_NOPATH - Client device is online, but no valid path are 1963 * available to access this client device 1964 * MDI_FAILURE - Invalid client device or state 1965 * MDI_DEVI_ONLINING 1966 * - Client device (struct dev_info state) is in 1967 * onlining state. 1968 */ 1969 1970 /*ARGSUSED*/ 1971 int 1972 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1973 void *arg, mdi_pathinfo_t **ret_pip) 1974 { 1975 mdi_client_t *ct; 1976 mdi_pathinfo_t *pip; 1977 mdi_pathinfo_t *next; 1978 mdi_pathinfo_t *head; 1979 mdi_pathinfo_t *start; 1980 client_lb_t lbp; /* load balancing policy */ 1981 int sb = 1; /* standard behavior */ 1982 int preferred = 1; /* preferred path */ 1983 int cond, cont = 1; 1984 int retry = 0; 1985 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 1986 int path_instance; /* request specific path instance */ 1987 1988 /* determine type of arg based on flags */ 1989 if (flags & MDI_SELECT_PATH_INSTANCE) { 1990 flags &= ~MDI_SELECT_PATH_INSTANCE; 1991 path_instance = (int)(intptr_t)arg; 1992 start_pip = NULL; 1993 } else { 1994 path_instance = 0; 1995 start_pip = (mdi_pathinfo_t *)arg; 1996 } 1997 1998 if (flags != 0) { 1999 /* 2000 * disable default behavior 2001 */ 2002 sb = 0; 2003 } 2004 2005 *ret_pip = NULL; 2006 ct = i_devi_get_client(cdip); 2007 if (ct == NULL) { 2008 /* mdi extensions are NULL, Nothing more to do */ 2009 return (MDI_FAILURE); 2010 } 2011 2012 MDI_CLIENT_LOCK(ct); 2013 2014 if (sb) { 2015 if (MDI_CLIENT_IS_FAILED(ct)) { 2016 /* 2017 * Client is not ready to accept any I/O requests. 2018 * Fail this request. 2019 */ 2020 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2021 "client state offline ct = %p\n", (void *)ct)); 2022 MDI_CLIENT_UNLOCK(ct); 2023 return (MDI_FAILURE); 2024 } 2025 2026 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2027 /* 2028 * Check for Failover is in progress. If so tell the 2029 * caller that this device is busy. 2030 */ 2031 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 2032 "client failover in progress ct = %p\n", 2033 (void *)ct)); 2034 MDI_CLIENT_UNLOCK(ct); 2035 return (MDI_BUSY); 2036 } 2037 2038 /* 2039 * Check to see whether the client device is attached. 2040 * If not so, let the vHCI driver manually select a path 2041 * (standby) and let the probe/attach process to continue. 2042 */ 2043 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2044 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2045 "ct = %p\n", (void *)ct)); 2046 MDI_CLIENT_UNLOCK(ct); 2047 return (MDI_DEVI_ONLINING); 2048 } 2049 } 2050 2051 /* 2052 * Cache in the client list head. If head of the list is NULL 2053 * return MDI_NOPATH 2054 */ 2055 head = ct->ct_path_head; 2056 if (head == NULL) { 2057 MDI_CLIENT_UNLOCK(ct); 2058 return (MDI_NOPATH); 2059 } 2060 2061 /* Caller is specifying a specific pathinfo path by path_instance */ 2062 if (path_instance) { 2063 /* search for pathinfo with correct path_instance */ 2064 for (pip = head; 2065 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2066 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2067 ; 2068 2069 /* If path can't be selected then MDI_FAILURE is returned. */ 2070 if (pip == NULL) { 2071 MDI_CLIENT_UNLOCK(ct); 2072 return (MDI_FAILURE); 2073 } 2074 2075 /* verify state of path */ 2076 MDI_PI_LOCK(pip); 2077 if (MDI_PI(pip)->pi_state != MDI_PATHINFO_STATE_ONLINE) { 2078 MDI_PI_UNLOCK(pip); 2079 MDI_CLIENT_UNLOCK(ct); 2080 return (MDI_FAILURE); 2081 } 2082 2083 /* 2084 * Return the path in hold state. Caller should release the 2085 * lock by calling mdi_rele_path() 2086 */ 2087 MDI_PI_HOLD(pip); 2088 MDI_PI_UNLOCK(pip); 2089 ct->ct_path_last = pip; 2090 *ret_pip = pip; 2091 MDI_CLIENT_UNLOCK(ct); 2092 return (MDI_SUCCESS); 2093 } 2094 2095 /* 2096 * for non default behavior, bypass current 2097 * load balancing policy and always use LOAD_BALANCE_RR 2098 * except that the start point will be adjusted based 2099 * on the provided start_pip 2100 */ 2101 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2102 2103 switch (lbp) { 2104 case LOAD_BALANCE_NONE: 2105 /* 2106 * Load balancing is None or Alternate path mode 2107 * Start looking for a online mdi_pathinfo node starting from 2108 * last known selected path 2109 */ 2110 preferred = 1; 2111 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2112 if (pip == NULL) { 2113 pip = head; 2114 } 2115 start = pip; 2116 do { 2117 MDI_PI_LOCK(pip); 2118 /* 2119 * No need to explicitly check if the path is disabled. 2120 * Since we are checking for state == ONLINE and the 2121 * same veriable is used for DISABLE/ENABLE information. 2122 */ 2123 if ((MDI_PI(pip)->pi_state == 2124 MDI_PATHINFO_STATE_ONLINE) && 2125 preferred == MDI_PI(pip)->pi_preferred) { 2126 /* 2127 * Return the path in hold state. Caller should 2128 * release the lock by calling mdi_rele_path() 2129 */ 2130 MDI_PI_HOLD(pip); 2131 MDI_PI_UNLOCK(pip); 2132 ct->ct_path_last = pip; 2133 *ret_pip = pip; 2134 MDI_CLIENT_UNLOCK(ct); 2135 return (MDI_SUCCESS); 2136 } 2137 2138 /* 2139 * Path is busy. 2140 */ 2141 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2142 MDI_PI_IS_TRANSIENT(pip)) 2143 retry = 1; 2144 /* 2145 * Keep looking for a next available online path 2146 */ 2147 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2148 if (next == NULL) { 2149 next = head; 2150 } 2151 MDI_PI_UNLOCK(pip); 2152 pip = next; 2153 if (start == pip && preferred) { 2154 preferred = 0; 2155 } else if (start == pip && !preferred) { 2156 cont = 0; 2157 } 2158 } while (cont); 2159 break; 2160 2161 case LOAD_BALANCE_LBA: 2162 /* 2163 * Make sure we are looking 2164 * for an online path. Otherwise, if it is for a STANDBY 2165 * path request, it will go through and fetch an ONLINE 2166 * path which is not desirable. 2167 */ 2168 if ((ct->ct_lb_args != NULL) && 2169 (ct->ct_lb_args->region_size) && bp && 2170 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2171 if (i_mdi_lba_lb(ct, ret_pip, bp) 2172 == MDI_SUCCESS) { 2173 MDI_CLIENT_UNLOCK(ct); 2174 return (MDI_SUCCESS); 2175 } 2176 } 2177 /* FALLTHROUGH */ 2178 case LOAD_BALANCE_RR: 2179 /* 2180 * Load balancing is Round Robin. Start looking for a online 2181 * mdi_pathinfo node starting from last known selected path 2182 * as the start point. If override flags are specified, 2183 * process accordingly. 2184 * If the search is already in effect(start_pip not null), 2185 * then lets just use the same path preference to continue the 2186 * traversal. 2187 */ 2188 2189 if (start_pip != NULL) { 2190 preferred = MDI_PI(start_pip)->pi_preferred; 2191 } else { 2192 preferred = 1; 2193 } 2194 2195 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2196 if (start == NULL) { 2197 pip = head; 2198 } else { 2199 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2200 if (pip == NULL) { 2201 if (!sb) { 2202 if (preferred == 0) { 2203 /* 2204 * Looks like we have completed 2205 * the traversal as preferred 2206 * value is 0. Time to bail out. 2207 */ 2208 *ret_pip = NULL; 2209 MDI_CLIENT_UNLOCK(ct); 2210 return (MDI_NOPATH); 2211 } else { 2212 /* 2213 * Looks like we reached the 2214 * end of the list. Lets enable 2215 * traversal of non preferred 2216 * paths. 2217 */ 2218 preferred = 0; 2219 } 2220 } 2221 pip = head; 2222 } 2223 } 2224 start = pip; 2225 do { 2226 MDI_PI_LOCK(pip); 2227 if (sb) { 2228 cond = ((MDI_PI(pip)->pi_state == 2229 MDI_PATHINFO_STATE_ONLINE && 2230 MDI_PI(pip)->pi_preferred == 2231 preferred) ? 1 : 0); 2232 } else { 2233 if (flags == MDI_SELECT_ONLINE_PATH) { 2234 cond = ((MDI_PI(pip)->pi_state == 2235 MDI_PATHINFO_STATE_ONLINE && 2236 MDI_PI(pip)->pi_preferred == 2237 preferred) ? 1 : 0); 2238 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2239 cond = ((MDI_PI(pip)->pi_state == 2240 MDI_PATHINFO_STATE_STANDBY && 2241 MDI_PI(pip)->pi_preferred == 2242 preferred) ? 1 : 0); 2243 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2244 MDI_SELECT_STANDBY_PATH)) { 2245 cond = (((MDI_PI(pip)->pi_state == 2246 MDI_PATHINFO_STATE_ONLINE || 2247 (MDI_PI(pip)->pi_state == 2248 MDI_PATHINFO_STATE_STANDBY)) && 2249 MDI_PI(pip)->pi_preferred == 2250 preferred) ? 1 : 0); 2251 } else if (flags == 2252 (MDI_SELECT_STANDBY_PATH | 2253 MDI_SELECT_ONLINE_PATH | 2254 MDI_SELECT_USER_DISABLE_PATH)) { 2255 cond = (((MDI_PI(pip)->pi_state == 2256 MDI_PATHINFO_STATE_ONLINE || 2257 (MDI_PI(pip)->pi_state == 2258 MDI_PATHINFO_STATE_STANDBY) || 2259 (MDI_PI(pip)->pi_state == 2260 (MDI_PATHINFO_STATE_ONLINE| 2261 MDI_PATHINFO_STATE_USER_DISABLE)) || 2262 (MDI_PI(pip)->pi_state == 2263 (MDI_PATHINFO_STATE_STANDBY | 2264 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2265 MDI_PI(pip)->pi_preferred == 2266 preferred) ? 1 : 0); 2267 } else { 2268 cond = 0; 2269 } 2270 } 2271 /* 2272 * No need to explicitly check if the path is disabled. 2273 * Since we are checking for state == ONLINE and the 2274 * same veriable is used for DISABLE/ENABLE information. 2275 */ 2276 if (cond) { 2277 /* 2278 * Return the path in hold state. Caller should 2279 * release the lock by calling mdi_rele_path() 2280 */ 2281 MDI_PI_HOLD(pip); 2282 MDI_PI_UNLOCK(pip); 2283 if (sb) 2284 ct->ct_path_last = pip; 2285 *ret_pip = pip; 2286 MDI_CLIENT_UNLOCK(ct); 2287 return (MDI_SUCCESS); 2288 } 2289 /* 2290 * Path is busy. 2291 */ 2292 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2293 MDI_PI_IS_TRANSIENT(pip)) 2294 retry = 1; 2295 2296 /* 2297 * Keep looking for a next available online path 2298 */ 2299 do_again: 2300 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2301 if (next == NULL) { 2302 if (!sb) { 2303 if (preferred == 1) { 2304 /* 2305 * Looks like we reached the 2306 * end of the list. Lets enable 2307 * traversal of non preferred 2308 * paths. 2309 */ 2310 preferred = 0; 2311 next = head; 2312 } else { 2313 /* 2314 * We have done both the passes 2315 * Preferred as well as for 2316 * Non-preferred. Bail out now. 2317 */ 2318 cont = 0; 2319 } 2320 } else { 2321 /* 2322 * Standard behavior case. 2323 */ 2324 next = head; 2325 } 2326 } 2327 MDI_PI_UNLOCK(pip); 2328 if (cont == 0) { 2329 break; 2330 } 2331 pip = next; 2332 2333 if (!sb) { 2334 /* 2335 * We need to handle the selection of 2336 * non-preferred path in the following 2337 * case: 2338 * 2339 * +------+ +------+ +------+ +-----+ 2340 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2341 * +------+ +------+ +------+ +-----+ 2342 * 2343 * If we start the search with B, we need to 2344 * skip beyond B to pick C which is non - 2345 * preferred in the second pass. The following 2346 * test, if true, will allow us to skip over 2347 * the 'start'(B in the example) to select 2348 * other non preferred elements. 2349 */ 2350 if ((start_pip != NULL) && (start_pip == pip) && 2351 (MDI_PI(start_pip)->pi_preferred 2352 != preferred)) { 2353 /* 2354 * try again after going past the start 2355 * pip 2356 */ 2357 MDI_PI_LOCK(pip); 2358 goto do_again; 2359 } 2360 } else { 2361 /* 2362 * Standard behavior case 2363 */ 2364 if (start == pip && preferred) { 2365 /* look for nonpreferred paths */ 2366 preferred = 0; 2367 } else if (start == pip && !preferred) { 2368 /* 2369 * Exit condition 2370 */ 2371 cont = 0; 2372 } 2373 } 2374 } while (cont); 2375 break; 2376 } 2377 2378 MDI_CLIENT_UNLOCK(ct); 2379 if (retry == 1) { 2380 return (MDI_BUSY); 2381 } else { 2382 return (MDI_NOPATH); 2383 } 2384 } 2385 2386 /* 2387 * For a client, return the next available path to any phci 2388 * 2389 * Note: 2390 * Caller should hold the branch's devinfo node to get a consistent 2391 * snap shot of the mdi_pathinfo nodes. 2392 * 2393 * Please note that even the list is stable the mdi_pathinfo 2394 * node state and properties are volatile. The caller should lock 2395 * and unlock the nodes by calling mdi_pi_lock() and 2396 * mdi_pi_unlock() functions to get a stable properties. 2397 * 2398 * If there is a need to use the nodes beyond the hold of the 2399 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2400 * need to be held against unexpected removal by calling 2401 * mdi_hold_path() and should be released by calling 2402 * mdi_rele_path() on completion. 2403 */ 2404 mdi_pathinfo_t * 2405 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2406 { 2407 mdi_client_t *ct; 2408 2409 if (!MDI_CLIENT(ct_dip)) 2410 return (NULL); 2411 2412 /* 2413 * Walk through client link 2414 */ 2415 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2416 ASSERT(ct != NULL); 2417 2418 if (pip == NULL) 2419 return ((mdi_pathinfo_t *)ct->ct_path_head); 2420 2421 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2422 } 2423 2424 /* 2425 * For a phci, return the next available path to any client 2426 * Note: ditto mdi_get_next_phci_path() 2427 */ 2428 mdi_pathinfo_t * 2429 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2430 { 2431 mdi_phci_t *ph; 2432 2433 if (!MDI_PHCI(ph_dip)) 2434 return (NULL); 2435 2436 /* 2437 * Walk through pHCI link 2438 */ 2439 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2440 ASSERT(ph != NULL); 2441 2442 if (pip == NULL) 2443 return ((mdi_pathinfo_t *)ph->ph_path_head); 2444 2445 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2446 } 2447 2448 /* 2449 * mdi_hold_path(): 2450 * Hold the mdi_pathinfo node against unwanted unexpected free. 2451 * Return Values: 2452 * None 2453 */ 2454 void 2455 mdi_hold_path(mdi_pathinfo_t *pip) 2456 { 2457 if (pip) { 2458 MDI_PI_LOCK(pip); 2459 MDI_PI_HOLD(pip); 2460 MDI_PI_UNLOCK(pip); 2461 } 2462 } 2463 2464 2465 /* 2466 * mdi_rele_path(): 2467 * Release the mdi_pathinfo node which was selected 2468 * through mdi_select_path() mechanism or manually held by 2469 * calling mdi_hold_path(). 2470 * Return Values: 2471 * None 2472 */ 2473 void 2474 mdi_rele_path(mdi_pathinfo_t *pip) 2475 { 2476 if (pip) { 2477 MDI_PI_LOCK(pip); 2478 MDI_PI_RELE(pip); 2479 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2480 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2481 } 2482 MDI_PI_UNLOCK(pip); 2483 } 2484 } 2485 2486 /* 2487 * mdi_pi_lock(): 2488 * Lock the mdi_pathinfo node. 2489 * Note: 2490 * The caller should release the lock by calling mdi_pi_unlock() 2491 */ 2492 void 2493 mdi_pi_lock(mdi_pathinfo_t *pip) 2494 { 2495 ASSERT(pip != NULL); 2496 if (pip) { 2497 MDI_PI_LOCK(pip); 2498 } 2499 } 2500 2501 2502 /* 2503 * mdi_pi_unlock(): 2504 * Unlock the mdi_pathinfo node. 2505 * Note: 2506 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2507 */ 2508 void 2509 mdi_pi_unlock(mdi_pathinfo_t *pip) 2510 { 2511 ASSERT(pip != NULL); 2512 if (pip) { 2513 MDI_PI_UNLOCK(pip); 2514 } 2515 } 2516 2517 /* 2518 * mdi_pi_find(): 2519 * Search the list of mdi_pathinfo nodes attached to the 2520 * pHCI/Client device node whose path address matches "paddr". 2521 * Returns a pointer to the mdi_pathinfo node if a matching node is 2522 * found. 2523 * Return Values: 2524 * mdi_pathinfo node handle 2525 * NULL 2526 * Notes: 2527 * Caller need not hold any locks to call this function. 2528 */ 2529 mdi_pathinfo_t * 2530 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2531 { 2532 mdi_phci_t *ph; 2533 mdi_vhci_t *vh; 2534 mdi_client_t *ct; 2535 mdi_pathinfo_t *pip = NULL; 2536 2537 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2538 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2539 if ((pdip == NULL) || (paddr == NULL)) { 2540 return (NULL); 2541 } 2542 ph = i_devi_get_phci(pdip); 2543 if (ph == NULL) { 2544 /* 2545 * Invalid pHCI device, Nothing more to do. 2546 */ 2547 MDI_DEBUG(2, (CE_WARN, pdip, 2548 "!mdi_pi_find: invalid phci")); 2549 return (NULL); 2550 } 2551 2552 vh = ph->ph_vhci; 2553 if (vh == NULL) { 2554 /* 2555 * Invalid vHCI device, Nothing more to do. 2556 */ 2557 MDI_DEBUG(2, (CE_WARN, pdip, 2558 "!mdi_pi_find: invalid vhci")); 2559 return (NULL); 2560 } 2561 2562 /* 2563 * Look for pathinfo node identified by paddr. 2564 */ 2565 if (caddr == NULL) { 2566 /* 2567 * Find a mdi_pathinfo node under pHCI list for a matching 2568 * unit address. 2569 */ 2570 MDI_PHCI_LOCK(ph); 2571 if (MDI_PHCI_IS_OFFLINE(ph)) { 2572 MDI_DEBUG(2, (CE_WARN, pdip, 2573 "!mdi_pi_find: offline phci %p", (void *)ph)); 2574 MDI_PHCI_UNLOCK(ph); 2575 return (NULL); 2576 } 2577 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2578 2579 while (pip != NULL) { 2580 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2581 break; 2582 } 2583 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2584 } 2585 MDI_PHCI_UNLOCK(ph); 2586 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2587 (void *)pip)); 2588 return (pip); 2589 } 2590 2591 /* 2592 * XXX - Is the rest of the code in this function really necessary? 2593 * The consumers of mdi_pi_find() can search for the desired pathinfo 2594 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2595 * whether the search is based on the pathinfo nodes attached to 2596 * the pHCI or the client node, the result will be the same. 2597 */ 2598 2599 /* 2600 * Find the client device corresponding to 'caddr' 2601 */ 2602 MDI_VHCI_CLIENT_LOCK(vh); 2603 2604 /* 2605 * XXX - Passing NULL to the following function works as long as the 2606 * the client addresses (caddr) are unique per vhci basis. 2607 */ 2608 ct = i_mdi_client_find(vh, NULL, caddr); 2609 if (ct == NULL) { 2610 /* 2611 * Client not found, Obviously mdi_pathinfo node has not been 2612 * created yet. 2613 */ 2614 MDI_VHCI_CLIENT_UNLOCK(vh); 2615 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2616 "found for caddr %s", caddr ? caddr : "NULL")); 2617 return (NULL); 2618 } 2619 2620 /* 2621 * Hold the client lock and look for a mdi_pathinfo node with matching 2622 * pHCI and paddr 2623 */ 2624 MDI_CLIENT_LOCK(ct); 2625 2626 /* 2627 * Release the global mutex as it is no more needed. Note: We always 2628 * respect the locking order while acquiring. 2629 */ 2630 MDI_VHCI_CLIENT_UNLOCK(vh); 2631 2632 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2633 while (pip != NULL) { 2634 /* 2635 * Compare the unit address 2636 */ 2637 if ((MDI_PI(pip)->pi_phci == ph) && 2638 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2639 break; 2640 } 2641 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2642 } 2643 MDI_CLIENT_UNLOCK(ct); 2644 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2645 return (pip); 2646 } 2647 2648 /* 2649 * mdi_pi_alloc(): 2650 * Allocate and initialize a new instance of a mdi_pathinfo node. 2651 * The mdi_pathinfo node returned by this function identifies a 2652 * unique device path is capable of having properties attached 2653 * and passed to mdi_pi_online() to fully attach and online the 2654 * path and client device node. 2655 * The mdi_pathinfo node returned by this function must be 2656 * destroyed using mdi_pi_free() if the path is no longer 2657 * operational or if the caller fails to attach a client device 2658 * node when calling mdi_pi_online(). The framework will not free 2659 * the resources allocated. 2660 * This function can be called from both interrupt and kernel 2661 * contexts. DDI_NOSLEEP flag should be used while calling 2662 * from interrupt contexts. 2663 * Return Values: 2664 * MDI_SUCCESS 2665 * MDI_FAILURE 2666 * MDI_NOMEM 2667 */ 2668 /*ARGSUSED*/ 2669 int 2670 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2671 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2672 { 2673 mdi_vhci_t *vh; 2674 mdi_phci_t *ph; 2675 mdi_client_t *ct; 2676 mdi_pathinfo_t *pip = NULL; 2677 dev_info_t *cdip; 2678 int rv = MDI_NOMEM; 2679 int path_allocated = 0; 2680 2681 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2682 cname ? cname : "NULL", caddr ? caddr : "NULL", 2683 paddr ? paddr : "NULL")); 2684 2685 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2686 ret_pip == NULL) { 2687 /* Nothing more to do */ 2688 return (MDI_FAILURE); 2689 } 2690 2691 *ret_pip = NULL; 2692 2693 /* No allocations on detaching pHCI */ 2694 if (DEVI_IS_DETACHING(pdip)) { 2695 /* Invalid pHCI device, return failure */ 2696 MDI_DEBUG(1, (CE_WARN, pdip, 2697 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2698 return (MDI_FAILURE); 2699 } 2700 2701 ph = i_devi_get_phci(pdip); 2702 ASSERT(ph != NULL); 2703 if (ph == NULL) { 2704 /* Invalid pHCI device, return failure */ 2705 MDI_DEBUG(1, (CE_WARN, pdip, 2706 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2707 return (MDI_FAILURE); 2708 } 2709 2710 MDI_PHCI_LOCK(ph); 2711 vh = ph->ph_vhci; 2712 if (vh == NULL) { 2713 /* Invalid vHCI device, return failure */ 2714 MDI_DEBUG(1, (CE_WARN, pdip, 2715 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2716 MDI_PHCI_UNLOCK(ph); 2717 return (MDI_FAILURE); 2718 } 2719 2720 if (MDI_PHCI_IS_READY(ph) == 0) { 2721 /* 2722 * Do not allow new node creation when pHCI is in 2723 * offline/suspended states 2724 */ 2725 MDI_DEBUG(1, (CE_WARN, pdip, 2726 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2727 MDI_PHCI_UNLOCK(ph); 2728 return (MDI_BUSY); 2729 } 2730 MDI_PHCI_UNSTABLE(ph); 2731 MDI_PHCI_UNLOCK(ph); 2732 2733 /* look for a matching client, create one if not found */ 2734 MDI_VHCI_CLIENT_LOCK(vh); 2735 ct = i_mdi_client_find(vh, cname, caddr); 2736 if (ct == NULL) { 2737 ct = i_mdi_client_alloc(vh, cname, caddr); 2738 ASSERT(ct != NULL); 2739 } 2740 2741 if (ct->ct_dip == NULL) { 2742 /* 2743 * Allocate a devinfo node 2744 */ 2745 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2746 compatible, ncompatible); 2747 if (ct->ct_dip == NULL) { 2748 (void) i_mdi_client_free(vh, ct); 2749 goto fail; 2750 } 2751 } 2752 cdip = ct->ct_dip; 2753 2754 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2755 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2756 2757 MDI_CLIENT_LOCK(ct); 2758 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2759 while (pip != NULL) { 2760 /* 2761 * Compare the unit address 2762 */ 2763 if ((MDI_PI(pip)->pi_phci == ph) && 2764 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2765 break; 2766 } 2767 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2768 } 2769 MDI_CLIENT_UNLOCK(ct); 2770 2771 if (pip == NULL) { 2772 /* 2773 * This is a new path for this client device. Allocate and 2774 * initialize a new pathinfo node 2775 */ 2776 pip = i_mdi_pi_alloc(ph, paddr, ct); 2777 ASSERT(pip != NULL); 2778 path_allocated = 1; 2779 } 2780 rv = MDI_SUCCESS; 2781 2782 fail: 2783 /* 2784 * Release the global mutex. 2785 */ 2786 MDI_VHCI_CLIENT_UNLOCK(vh); 2787 2788 /* 2789 * Mark the pHCI as stable 2790 */ 2791 MDI_PHCI_LOCK(ph); 2792 MDI_PHCI_STABLE(ph); 2793 MDI_PHCI_UNLOCK(ph); 2794 *ret_pip = pip; 2795 2796 MDI_DEBUG(2, (CE_NOTE, pdip, 2797 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2798 2799 if (path_allocated) 2800 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2801 2802 return (rv); 2803 } 2804 2805 /*ARGSUSED*/ 2806 int 2807 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2808 int flags, mdi_pathinfo_t **ret_pip) 2809 { 2810 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2811 flags, ret_pip)); 2812 } 2813 2814 /* 2815 * i_mdi_pi_alloc(): 2816 * Allocate a mdi_pathinfo node and add to the pHCI path list 2817 * Return Values: 2818 * mdi_pathinfo 2819 */ 2820 /*ARGSUSED*/ 2821 static mdi_pathinfo_t * 2822 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2823 { 2824 mdi_pathinfo_t *pip; 2825 int ct_circular; 2826 int ph_circular; 2827 static char path[MAXPATHLEN]; 2828 char *path_persistent; 2829 int path_instance; 2830 int se_flag; 2831 int kmem_flag; 2832 mod_hash_val_t hv; 2833 2834 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2835 2836 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2837 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2838 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2839 MDI_PATHINFO_STATE_TRANSIENT; 2840 2841 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2842 MDI_PI_SET_USER_DISABLE(pip); 2843 2844 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2845 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2846 2847 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2848 MDI_PI_SET_DRV_DISABLE(pip); 2849 2850 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2851 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2852 MDI_PI(pip)->pi_client = ct; 2853 MDI_PI(pip)->pi_phci = ph; 2854 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2855 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2856 2857 /* 2858 * We form the "path" to the pathinfo node, and see if we have 2859 * already allocated a 'path_instance' for that "path". If so, 2860 * we use the already allocated 'path_instance'. If not, we 2861 * allocate a new 'path_instance' and associate it with a copy of 2862 * the "path" string (which is never freed). The association 2863 * between a 'path_instance' this "path" string persists until 2864 * reboot. 2865 */ 2866 mutex_enter(&mdi_pathmap_mutex); 2867 (void) ddi_pathname(ph->ph_dip, path); 2868 (void) sprintf(path + strlen(path), "/%s@%s", 2869 ddi_node_name(ct->ct_dip), MDI_PI(pip)->pi_addr); 2870 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2871 path_instance = (uint_t)(intptr_t)hv; 2872 } else { 2873 /* allocate a new 'path_instance' and persistent "path" */ 2874 path_instance = mdi_pathmap_instance++; 2875 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2876 (void) mod_hash_insert(mdi_pathmap_bypath, 2877 (mod_hash_key_t)path_persistent, 2878 (mod_hash_val_t)(intptr_t)path_instance); 2879 (void) mod_hash_insert(mdi_pathmap_byinstance, 2880 (mod_hash_key_t)(intptr_t)path_instance, 2881 (mod_hash_val_t)path_persistent); 2882 } 2883 mutex_exit(&mdi_pathmap_mutex); 2884 MDI_PI(pip)->pi_path_instance = path_instance; 2885 2886 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2887 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2888 MDI_PI(pip)->pi_pprivate = NULL; 2889 MDI_PI(pip)->pi_cprivate = NULL; 2890 MDI_PI(pip)->pi_vprivate = NULL; 2891 MDI_PI(pip)->pi_client_link = NULL; 2892 MDI_PI(pip)->pi_phci_link = NULL; 2893 MDI_PI(pip)->pi_ref_cnt = 0; 2894 MDI_PI(pip)->pi_kstats = NULL; 2895 MDI_PI(pip)->pi_preferred = 1; 2896 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2897 2898 /* 2899 * Lock both dev_info nodes against changes in parallel. 2900 * 2901 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2902 * This atypical operation is done to synchronize pathinfo nodes 2903 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2904 * the pathinfo nodes are children of the Client. 2905 */ 2906 ndi_devi_enter(ct->ct_dip, &ct_circular); 2907 ndi_devi_enter(ph->ph_dip, &ph_circular); 2908 2909 i_mdi_phci_add_path(ph, pip); 2910 i_mdi_client_add_path(ct, pip); 2911 2912 ndi_devi_exit(ph->ph_dip, ph_circular); 2913 ndi_devi_exit(ct->ct_dip, ct_circular); 2914 2915 /* determine interrupt context */ 2916 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2917 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2918 2919 i_ddi_di_cache_invalidate(kmem_flag); 2920 2921 return (pip); 2922 } 2923 2924 /* 2925 * mdi_pi_pathname_by_instance(): 2926 * Lookup of "path" by 'path_instance'. Return "path". 2927 * NOTE: returned "path" remains valid forever (until reboot). 2928 */ 2929 char * 2930 mdi_pi_pathname_by_instance(int path_instance) 2931 { 2932 char *path; 2933 mod_hash_val_t hv; 2934 2935 /* mdi_pathmap lookup of "path" by 'path_instance' */ 2936 mutex_enter(&mdi_pathmap_mutex); 2937 if (mod_hash_find(mdi_pathmap_byinstance, 2938 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 2939 path = (char *)hv; 2940 else 2941 path = NULL; 2942 mutex_exit(&mdi_pathmap_mutex); 2943 return (path); 2944 } 2945 2946 /* 2947 * i_mdi_phci_add_path(): 2948 * Add a mdi_pathinfo node to pHCI list. 2949 * Notes: 2950 * Caller should per-pHCI mutex 2951 */ 2952 static void 2953 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2954 { 2955 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2956 2957 MDI_PHCI_LOCK(ph); 2958 if (ph->ph_path_head == NULL) { 2959 ph->ph_path_head = pip; 2960 } else { 2961 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2962 } 2963 ph->ph_path_tail = pip; 2964 ph->ph_path_count++; 2965 MDI_PHCI_UNLOCK(ph); 2966 } 2967 2968 /* 2969 * i_mdi_client_add_path(): 2970 * Add mdi_pathinfo node to client list 2971 */ 2972 static void 2973 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2974 { 2975 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2976 2977 MDI_CLIENT_LOCK(ct); 2978 if (ct->ct_path_head == NULL) { 2979 ct->ct_path_head = pip; 2980 } else { 2981 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2982 } 2983 ct->ct_path_tail = pip; 2984 ct->ct_path_count++; 2985 MDI_CLIENT_UNLOCK(ct); 2986 } 2987 2988 /* 2989 * mdi_pi_free(): 2990 * Free the mdi_pathinfo node and also client device node if this 2991 * is the last path to the device 2992 * Return Values: 2993 * MDI_SUCCESS 2994 * MDI_FAILURE 2995 * MDI_BUSY 2996 */ 2997 /*ARGSUSED*/ 2998 int 2999 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3000 { 3001 int rv = MDI_FAILURE; 3002 mdi_vhci_t *vh; 3003 mdi_phci_t *ph; 3004 mdi_client_t *ct; 3005 int (*f)(); 3006 int client_held = 0; 3007 3008 MDI_PI_LOCK(pip); 3009 ph = MDI_PI(pip)->pi_phci; 3010 ASSERT(ph != NULL); 3011 if (ph == NULL) { 3012 /* 3013 * Invalid pHCI device, return failure 3014 */ 3015 MDI_DEBUG(1, (CE_WARN, NULL, 3016 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 3017 MDI_PI_UNLOCK(pip); 3018 return (MDI_FAILURE); 3019 } 3020 3021 vh = ph->ph_vhci; 3022 ASSERT(vh != NULL); 3023 if (vh == NULL) { 3024 /* Invalid pHCI device, return failure */ 3025 MDI_DEBUG(1, (CE_WARN, NULL, 3026 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 3027 MDI_PI_UNLOCK(pip); 3028 return (MDI_FAILURE); 3029 } 3030 3031 ct = MDI_PI(pip)->pi_client; 3032 ASSERT(ct != NULL); 3033 if (ct == NULL) { 3034 /* 3035 * Invalid Client device, return failure 3036 */ 3037 MDI_DEBUG(1, (CE_WARN, NULL, 3038 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 3039 MDI_PI_UNLOCK(pip); 3040 return (MDI_FAILURE); 3041 } 3042 3043 /* 3044 * Check to see for busy condition. A mdi_pathinfo can only be freed 3045 * if the node state is either offline or init and the reference count 3046 * is zero. 3047 */ 3048 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3049 MDI_PI_IS_INITING(pip))) { 3050 /* 3051 * Node is busy 3052 */ 3053 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3054 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 3055 MDI_PI_UNLOCK(pip); 3056 return (MDI_BUSY); 3057 } 3058 3059 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3060 /* 3061 * Give a chance for pending I/Os to complete. 3062 */ 3063 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 3064 "%d cmds still pending on path: %p\n", 3065 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3066 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3067 &MDI_PI(pip)->pi_mutex, 3068 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3069 /* 3070 * The timeout time reached without ref_cnt being zero 3071 * being signaled. 3072 */ 3073 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3074 "!mdi_pi_free: " 3075 "Timeout reached on path %p without the cond\n", 3076 (void *)pip)); 3077 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 3078 "!mdi_pi_free: " 3079 "%d cmds still pending on path: %p\n", 3080 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3081 MDI_PI_UNLOCK(pip); 3082 return (MDI_BUSY); 3083 } 3084 } 3085 if (MDI_PI(pip)->pi_pm_held) { 3086 client_held = 1; 3087 } 3088 MDI_PI_UNLOCK(pip); 3089 3090 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3091 3092 MDI_CLIENT_LOCK(ct); 3093 3094 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3095 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3096 3097 /* 3098 * Wait till failover is complete before removing this node. 3099 */ 3100 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3101 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3102 3103 MDI_CLIENT_UNLOCK(ct); 3104 MDI_VHCI_CLIENT_LOCK(vh); 3105 MDI_CLIENT_LOCK(ct); 3106 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3107 3108 if (!MDI_PI_IS_INITING(pip)) { 3109 f = vh->vh_ops->vo_pi_uninit; 3110 if (f != NULL) { 3111 rv = (*f)(vh->vh_dip, pip, 0); 3112 } 3113 } 3114 /* 3115 * If vo_pi_uninit() completed successfully. 3116 */ 3117 if (rv == MDI_SUCCESS) { 3118 if (client_held) { 3119 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 3120 "i_mdi_pm_rele_client\n")); 3121 i_mdi_pm_rele_client(ct, 1); 3122 } 3123 i_mdi_pi_free(ph, pip, ct); 3124 if (ct->ct_path_count == 0) { 3125 /* 3126 * Client lost its last path. 3127 * Clean up the client device 3128 */ 3129 MDI_CLIENT_UNLOCK(ct); 3130 (void) i_mdi_client_free(ct->ct_vhci, ct); 3131 MDI_VHCI_CLIENT_UNLOCK(vh); 3132 return (rv); 3133 } 3134 } 3135 MDI_CLIENT_UNLOCK(ct); 3136 MDI_VHCI_CLIENT_UNLOCK(vh); 3137 3138 if (rv == MDI_FAILURE) 3139 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3140 3141 return (rv); 3142 } 3143 3144 /* 3145 * i_mdi_pi_free(): 3146 * Free the mdi_pathinfo node 3147 */ 3148 static void 3149 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3150 { 3151 int ct_circular; 3152 int ph_circular; 3153 int se_flag; 3154 int kmem_flag; 3155 3156 ASSERT(MDI_CLIENT_LOCKED(ct)); 3157 3158 /* 3159 * remove any per-path kstats 3160 */ 3161 i_mdi_pi_kstat_destroy(pip); 3162 3163 /* See comments in i_mdi_pi_alloc() */ 3164 ndi_devi_enter(ct->ct_dip, &ct_circular); 3165 ndi_devi_enter(ph->ph_dip, &ph_circular); 3166 3167 i_mdi_client_remove_path(ct, pip); 3168 i_mdi_phci_remove_path(ph, pip); 3169 3170 ndi_devi_exit(ph->ph_dip, ph_circular); 3171 ndi_devi_exit(ct->ct_dip, ct_circular); 3172 3173 /* determine interrupt context */ 3174 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3175 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3176 3177 i_ddi_di_cache_invalidate(kmem_flag); 3178 3179 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3180 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3181 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3182 if (MDI_PI(pip)->pi_addr) { 3183 kmem_free(MDI_PI(pip)->pi_addr, 3184 strlen(MDI_PI(pip)->pi_addr) + 1); 3185 MDI_PI(pip)->pi_addr = NULL; 3186 } 3187 3188 if (MDI_PI(pip)->pi_prop) { 3189 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3190 MDI_PI(pip)->pi_prop = NULL; 3191 } 3192 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3193 } 3194 3195 3196 /* 3197 * i_mdi_phci_remove_path(): 3198 * Remove a mdi_pathinfo node from pHCI list. 3199 * Notes: 3200 * Caller should hold per-pHCI mutex 3201 */ 3202 static void 3203 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3204 { 3205 mdi_pathinfo_t *prev = NULL; 3206 mdi_pathinfo_t *path = NULL; 3207 3208 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3209 3210 MDI_PHCI_LOCK(ph); 3211 path = ph->ph_path_head; 3212 while (path != NULL) { 3213 if (path == pip) { 3214 break; 3215 } 3216 prev = path; 3217 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3218 } 3219 3220 if (path) { 3221 ph->ph_path_count--; 3222 if (prev) { 3223 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3224 } else { 3225 ph->ph_path_head = 3226 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3227 } 3228 if (ph->ph_path_tail == path) { 3229 ph->ph_path_tail = prev; 3230 } 3231 } 3232 3233 /* 3234 * Clear the pHCI link 3235 */ 3236 MDI_PI(pip)->pi_phci_link = NULL; 3237 MDI_PI(pip)->pi_phci = NULL; 3238 MDI_PHCI_UNLOCK(ph); 3239 } 3240 3241 /* 3242 * i_mdi_client_remove_path(): 3243 * Remove a mdi_pathinfo node from client path list. 3244 */ 3245 static void 3246 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3247 { 3248 mdi_pathinfo_t *prev = NULL; 3249 mdi_pathinfo_t *path; 3250 3251 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3252 3253 ASSERT(MDI_CLIENT_LOCKED(ct)); 3254 path = ct->ct_path_head; 3255 while (path != NULL) { 3256 if (path == pip) { 3257 break; 3258 } 3259 prev = path; 3260 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3261 } 3262 3263 if (path) { 3264 ct->ct_path_count--; 3265 if (prev) { 3266 MDI_PI(prev)->pi_client_link = 3267 MDI_PI(path)->pi_client_link; 3268 } else { 3269 ct->ct_path_head = 3270 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3271 } 3272 if (ct->ct_path_tail == path) { 3273 ct->ct_path_tail = prev; 3274 } 3275 if (ct->ct_path_last == path) { 3276 ct->ct_path_last = ct->ct_path_head; 3277 } 3278 } 3279 MDI_PI(pip)->pi_client_link = NULL; 3280 MDI_PI(pip)->pi_client = NULL; 3281 } 3282 3283 /* 3284 * i_mdi_pi_state_change(): 3285 * online a mdi_pathinfo node 3286 * 3287 * Return Values: 3288 * MDI_SUCCESS 3289 * MDI_FAILURE 3290 */ 3291 /*ARGSUSED*/ 3292 static int 3293 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3294 { 3295 int rv = MDI_SUCCESS; 3296 mdi_vhci_t *vh; 3297 mdi_phci_t *ph; 3298 mdi_client_t *ct; 3299 int (*f)(); 3300 dev_info_t *cdip; 3301 3302 MDI_PI_LOCK(pip); 3303 3304 ph = MDI_PI(pip)->pi_phci; 3305 ASSERT(ph); 3306 if (ph == NULL) { 3307 /* 3308 * Invalid pHCI device, fail the request 3309 */ 3310 MDI_PI_UNLOCK(pip); 3311 MDI_DEBUG(1, (CE_WARN, NULL, 3312 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3313 return (MDI_FAILURE); 3314 } 3315 3316 vh = ph->ph_vhci; 3317 ASSERT(vh); 3318 if (vh == NULL) { 3319 /* 3320 * Invalid vHCI device, fail the request 3321 */ 3322 MDI_PI_UNLOCK(pip); 3323 MDI_DEBUG(1, (CE_WARN, NULL, 3324 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3325 return (MDI_FAILURE); 3326 } 3327 3328 ct = MDI_PI(pip)->pi_client; 3329 ASSERT(ct != NULL); 3330 if (ct == NULL) { 3331 /* 3332 * Invalid client device, fail the request 3333 */ 3334 MDI_PI_UNLOCK(pip); 3335 MDI_DEBUG(1, (CE_WARN, NULL, 3336 "!mdi_pi_state_change: invalid client pip=%p", 3337 (void *)pip)); 3338 return (MDI_FAILURE); 3339 } 3340 3341 /* 3342 * If this path has not been initialized yet, Callback vHCI driver's 3343 * pathinfo node initialize entry point 3344 */ 3345 3346 if (MDI_PI_IS_INITING(pip)) { 3347 MDI_PI_UNLOCK(pip); 3348 f = vh->vh_ops->vo_pi_init; 3349 if (f != NULL) { 3350 rv = (*f)(vh->vh_dip, pip, 0); 3351 if (rv != MDI_SUCCESS) { 3352 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3353 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3354 (void *)vh, (void *)pip)); 3355 return (MDI_FAILURE); 3356 } 3357 } 3358 MDI_PI_LOCK(pip); 3359 MDI_PI_CLEAR_TRANSIENT(pip); 3360 } 3361 3362 /* 3363 * Do not allow state transition when pHCI is in offline/suspended 3364 * states 3365 */ 3366 i_mdi_phci_lock(ph, pip); 3367 if (MDI_PHCI_IS_READY(ph) == 0) { 3368 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3369 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3370 (void *)ph)); 3371 MDI_PI_UNLOCK(pip); 3372 i_mdi_phci_unlock(ph); 3373 return (MDI_BUSY); 3374 } 3375 MDI_PHCI_UNSTABLE(ph); 3376 i_mdi_phci_unlock(ph); 3377 3378 /* 3379 * Check if mdi_pathinfo state is in transient state. 3380 * If yes, offlining is in progress and wait till transient state is 3381 * cleared. 3382 */ 3383 if (MDI_PI_IS_TRANSIENT(pip)) { 3384 while (MDI_PI_IS_TRANSIENT(pip)) { 3385 cv_wait(&MDI_PI(pip)->pi_state_cv, 3386 &MDI_PI(pip)->pi_mutex); 3387 } 3388 } 3389 3390 /* 3391 * Grab the client lock in reverse order sequence and release the 3392 * mdi_pathinfo mutex. 3393 */ 3394 i_mdi_client_lock(ct, pip); 3395 MDI_PI_UNLOCK(pip); 3396 3397 /* 3398 * Wait till failover state is cleared 3399 */ 3400 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3401 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3402 3403 /* 3404 * Mark the mdi_pathinfo node state as transient 3405 */ 3406 MDI_PI_LOCK(pip); 3407 switch (state) { 3408 case MDI_PATHINFO_STATE_ONLINE: 3409 MDI_PI_SET_ONLINING(pip); 3410 break; 3411 3412 case MDI_PATHINFO_STATE_STANDBY: 3413 MDI_PI_SET_STANDBYING(pip); 3414 break; 3415 3416 case MDI_PATHINFO_STATE_FAULT: 3417 /* 3418 * Mark the pathinfo state as FAULTED 3419 */ 3420 MDI_PI_SET_FAULTING(pip); 3421 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3422 break; 3423 3424 case MDI_PATHINFO_STATE_OFFLINE: 3425 /* 3426 * ndi_devi_offline() cannot hold pip or ct locks. 3427 */ 3428 MDI_PI_UNLOCK(pip); 3429 /* 3430 * Don't offline the client dev_info node unless we have 3431 * no available paths left at all. 3432 */ 3433 cdip = ct->ct_dip; 3434 if ((flag & NDI_DEVI_REMOVE) && 3435 (ct->ct_path_count == 1)) { 3436 i_mdi_client_unlock(ct); 3437 rv = ndi_devi_offline(cdip, 0); 3438 if (rv != NDI_SUCCESS) { 3439 /* 3440 * Convert to MDI error code 3441 */ 3442 switch (rv) { 3443 case NDI_BUSY: 3444 rv = MDI_BUSY; 3445 break; 3446 default: 3447 rv = MDI_FAILURE; 3448 break; 3449 } 3450 goto state_change_exit; 3451 } else { 3452 i_mdi_client_lock(ct, NULL); 3453 } 3454 } 3455 /* 3456 * Mark the mdi_pathinfo node state as transient 3457 */ 3458 MDI_PI_LOCK(pip); 3459 MDI_PI_SET_OFFLINING(pip); 3460 break; 3461 } 3462 MDI_PI_UNLOCK(pip); 3463 MDI_CLIENT_UNSTABLE(ct); 3464 i_mdi_client_unlock(ct); 3465 3466 f = vh->vh_ops->vo_pi_state_change; 3467 if (f != NULL) 3468 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3469 3470 MDI_CLIENT_LOCK(ct); 3471 MDI_PI_LOCK(pip); 3472 if (rv == MDI_NOT_SUPPORTED) { 3473 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3474 } 3475 if (rv != MDI_SUCCESS) { 3476 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3477 "!vo_pi_state_change: failed rv = %x", rv)); 3478 } 3479 if (MDI_PI_IS_TRANSIENT(pip)) { 3480 if (rv == MDI_SUCCESS) { 3481 MDI_PI_CLEAR_TRANSIENT(pip); 3482 } else { 3483 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3484 } 3485 } 3486 3487 /* 3488 * Wake anyone waiting for this mdi_pathinfo node 3489 */ 3490 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3491 MDI_PI_UNLOCK(pip); 3492 3493 /* 3494 * Mark the client device as stable 3495 */ 3496 MDI_CLIENT_STABLE(ct); 3497 if (rv == MDI_SUCCESS) { 3498 if (ct->ct_unstable == 0) { 3499 cdip = ct->ct_dip; 3500 3501 /* 3502 * Onlining the mdi_pathinfo node will impact the 3503 * client state Update the client and dev_info node 3504 * state accordingly 3505 */ 3506 rv = NDI_SUCCESS; 3507 i_mdi_client_update_state(ct); 3508 switch (MDI_CLIENT_STATE(ct)) { 3509 case MDI_CLIENT_STATE_OPTIMAL: 3510 case MDI_CLIENT_STATE_DEGRADED: 3511 if (cdip && !i_ddi_devi_attached(cdip) && 3512 ((state == MDI_PATHINFO_STATE_ONLINE) || 3513 (state == MDI_PATHINFO_STATE_STANDBY))) { 3514 3515 /* 3516 * Must do ndi_devi_online() through 3517 * hotplug thread for deferred 3518 * attach mechanism to work 3519 */ 3520 MDI_CLIENT_UNLOCK(ct); 3521 rv = ndi_devi_online(cdip, 0); 3522 MDI_CLIENT_LOCK(ct); 3523 if ((rv != NDI_SUCCESS) && 3524 (MDI_CLIENT_STATE(ct) == 3525 MDI_CLIENT_STATE_DEGRADED)) { 3526 /* 3527 * ndi_devi_online failed. 3528 * Reset client flags to 3529 * offline. 3530 */ 3531 MDI_DEBUG(1, (CE_WARN, cdip, 3532 "!ndi_devi_online: failed " 3533 " Error: %x", rv)); 3534 MDI_CLIENT_SET_OFFLINE(ct); 3535 } 3536 if (rv != NDI_SUCCESS) { 3537 /* Reset the path state */ 3538 MDI_PI_LOCK(pip); 3539 MDI_PI(pip)->pi_state = 3540 MDI_PI_OLD_STATE(pip); 3541 MDI_PI_UNLOCK(pip); 3542 } 3543 } 3544 break; 3545 3546 case MDI_CLIENT_STATE_FAILED: 3547 /* 3548 * This is the last path case for 3549 * non-user initiated events. 3550 */ 3551 if (((flag & NDI_DEVI_REMOVE) == 0) && 3552 cdip && (i_ddi_node_state(cdip) >= 3553 DS_INITIALIZED)) { 3554 MDI_CLIENT_UNLOCK(ct); 3555 rv = ndi_devi_offline(cdip, 0); 3556 MDI_CLIENT_LOCK(ct); 3557 3558 if (rv != NDI_SUCCESS) { 3559 /* 3560 * ndi_devi_offline failed. 3561 * Reset client flags to 3562 * online as the path could not 3563 * be offlined. 3564 */ 3565 MDI_DEBUG(1, (CE_WARN, cdip, 3566 "!ndi_devi_offline: failed " 3567 " Error: %x", rv)); 3568 MDI_CLIENT_SET_ONLINE(ct); 3569 } 3570 } 3571 break; 3572 } 3573 /* 3574 * Convert to MDI error code 3575 */ 3576 switch (rv) { 3577 case NDI_SUCCESS: 3578 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3579 i_mdi_report_path_state(ct, pip); 3580 rv = MDI_SUCCESS; 3581 break; 3582 case NDI_BUSY: 3583 rv = MDI_BUSY; 3584 break; 3585 default: 3586 rv = MDI_FAILURE; 3587 break; 3588 } 3589 } 3590 } 3591 MDI_CLIENT_UNLOCK(ct); 3592 3593 state_change_exit: 3594 /* 3595 * Mark the pHCI as stable again. 3596 */ 3597 MDI_PHCI_LOCK(ph); 3598 MDI_PHCI_STABLE(ph); 3599 MDI_PHCI_UNLOCK(ph); 3600 return (rv); 3601 } 3602 3603 /* 3604 * mdi_pi_online(): 3605 * Place the path_info node in the online state. The path is 3606 * now available to be selected by mdi_select_path() for 3607 * transporting I/O requests to client devices. 3608 * Return Values: 3609 * MDI_SUCCESS 3610 * MDI_FAILURE 3611 */ 3612 int 3613 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3614 { 3615 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3616 int client_held = 0; 3617 int rv; 3618 3619 ASSERT(ct != NULL); 3620 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3621 if (rv != MDI_SUCCESS) 3622 return (rv); 3623 3624 MDI_PI_LOCK(pip); 3625 if (MDI_PI(pip)->pi_pm_held == 0) { 3626 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3627 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3628 i_mdi_pm_hold_pip(pip); 3629 client_held = 1; 3630 } 3631 MDI_PI_UNLOCK(pip); 3632 3633 if (client_held) { 3634 MDI_CLIENT_LOCK(ct); 3635 if (ct->ct_power_cnt == 0) { 3636 rv = i_mdi_power_all_phci(ct); 3637 } 3638 3639 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3640 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3641 i_mdi_pm_hold_client(ct, 1); 3642 MDI_CLIENT_UNLOCK(ct); 3643 } 3644 3645 return (rv); 3646 } 3647 3648 /* 3649 * mdi_pi_standby(): 3650 * Place the mdi_pathinfo node in standby state 3651 * 3652 * Return Values: 3653 * MDI_SUCCESS 3654 * MDI_FAILURE 3655 */ 3656 int 3657 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3658 { 3659 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3660 } 3661 3662 /* 3663 * mdi_pi_fault(): 3664 * Place the mdi_pathinfo node in fault'ed state 3665 * Return Values: 3666 * MDI_SUCCESS 3667 * MDI_FAILURE 3668 */ 3669 int 3670 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3671 { 3672 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3673 } 3674 3675 /* 3676 * mdi_pi_offline(): 3677 * Offline a mdi_pathinfo node. 3678 * Return Values: 3679 * MDI_SUCCESS 3680 * MDI_FAILURE 3681 */ 3682 int 3683 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3684 { 3685 int ret, client_held = 0; 3686 mdi_client_t *ct; 3687 3688 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3689 3690 if (ret == MDI_SUCCESS) { 3691 MDI_PI_LOCK(pip); 3692 if (MDI_PI(pip)->pi_pm_held) { 3693 client_held = 1; 3694 } 3695 MDI_PI_UNLOCK(pip); 3696 3697 if (client_held) { 3698 ct = MDI_PI(pip)->pi_client; 3699 MDI_CLIENT_LOCK(ct); 3700 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3701 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3702 i_mdi_pm_rele_client(ct, 1); 3703 MDI_CLIENT_UNLOCK(ct); 3704 } 3705 } 3706 3707 return (ret); 3708 } 3709 3710 /* 3711 * i_mdi_pi_offline(): 3712 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3713 */ 3714 static int 3715 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3716 { 3717 dev_info_t *vdip = NULL; 3718 mdi_vhci_t *vh = NULL; 3719 mdi_client_t *ct = NULL; 3720 int (*f)(); 3721 int rv; 3722 3723 MDI_PI_LOCK(pip); 3724 ct = MDI_PI(pip)->pi_client; 3725 ASSERT(ct != NULL); 3726 3727 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3728 /* 3729 * Give a chance for pending I/Os to complete. 3730 */ 3731 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3732 "%d cmds still pending on path: %p\n", 3733 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3734 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3735 &MDI_PI(pip)->pi_mutex, 3736 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3737 /* 3738 * The timeout time reached without ref_cnt being zero 3739 * being signaled. 3740 */ 3741 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3742 "Timeout reached on path %p without the cond\n", 3743 (void *)pip)); 3744 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3745 "%d cmds still pending on path: %p\n", 3746 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3747 } 3748 } 3749 vh = ct->ct_vhci; 3750 vdip = vh->vh_dip; 3751 3752 /* 3753 * Notify vHCI that has registered this event 3754 */ 3755 ASSERT(vh->vh_ops); 3756 f = vh->vh_ops->vo_pi_state_change; 3757 3758 if (f != NULL) { 3759 MDI_PI_UNLOCK(pip); 3760 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3761 flags)) != MDI_SUCCESS) { 3762 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3763 "!vo_path_offline failed " 3764 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3765 } 3766 MDI_PI_LOCK(pip); 3767 } 3768 3769 /* 3770 * Set the mdi_pathinfo node state and clear the transient condition 3771 */ 3772 MDI_PI_SET_OFFLINE(pip); 3773 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3774 MDI_PI_UNLOCK(pip); 3775 3776 MDI_CLIENT_LOCK(ct); 3777 if (rv == MDI_SUCCESS) { 3778 if (ct->ct_unstable == 0) { 3779 dev_info_t *cdip = ct->ct_dip; 3780 3781 /* 3782 * Onlining the mdi_pathinfo node will impact the 3783 * client state Update the client and dev_info node 3784 * state accordingly 3785 */ 3786 i_mdi_client_update_state(ct); 3787 rv = NDI_SUCCESS; 3788 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3789 if (cdip && 3790 (i_ddi_node_state(cdip) >= 3791 DS_INITIALIZED)) { 3792 MDI_CLIENT_UNLOCK(ct); 3793 rv = ndi_devi_offline(cdip, 0); 3794 MDI_CLIENT_LOCK(ct); 3795 if (rv != NDI_SUCCESS) { 3796 /* 3797 * ndi_devi_offline failed. 3798 * Reset client flags to 3799 * online. 3800 */ 3801 MDI_DEBUG(4, (CE_WARN, cdip, 3802 "!ndi_devi_offline: failed " 3803 " Error: %x", rv)); 3804 MDI_CLIENT_SET_ONLINE(ct); 3805 } 3806 } 3807 } 3808 /* 3809 * Convert to MDI error code 3810 */ 3811 switch (rv) { 3812 case NDI_SUCCESS: 3813 rv = MDI_SUCCESS; 3814 break; 3815 case NDI_BUSY: 3816 rv = MDI_BUSY; 3817 break; 3818 default: 3819 rv = MDI_FAILURE; 3820 break; 3821 } 3822 } 3823 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3824 i_mdi_report_path_state(ct, pip); 3825 } 3826 3827 MDI_CLIENT_UNLOCK(ct); 3828 3829 /* 3830 * Change in the mdi_pathinfo node state will impact the client state 3831 */ 3832 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3833 (void *)ct, (void *)pip)); 3834 return (rv); 3835 } 3836 3837 3838 /* 3839 * mdi_pi_get_addr(): 3840 * Get the unit address associated with a mdi_pathinfo node 3841 * 3842 * Return Values: 3843 * char * 3844 */ 3845 char * 3846 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3847 { 3848 if (pip == NULL) 3849 return (NULL); 3850 3851 return (MDI_PI(pip)->pi_addr); 3852 } 3853 3854 /* 3855 * mdi_pi_get_path_instance(): 3856 * Get the 'path_instance' of a mdi_pathinfo node 3857 * 3858 * Return Values: 3859 * path_instance 3860 */ 3861 int 3862 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 3863 { 3864 if (pip == NULL) 3865 return (0); 3866 3867 return (MDI_PI(pip)->pi_path_instance); 3868 } 3869 3870 /* 3871 * mdi_pi_pathname(): 3872 * Return pointer to path to pathinfo node. 3873 */ 3874 char * 3875 mdi_pi_pathname(mdi_pathinfo_t *pip) 3876 { 3877 if (pip == NULL) 3878 return (NULL); 3879 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 3880 } 3881 3882 /* 3883 * mdi_pi_get_client(): 3884 * Get the client devinfo associated with a mdi_pathinfo node 3885 * 3886 * Return Values: 3887 * Handle to client device dev_info node 3888 */ 3889 dev_info_t * 3890 mdi_pi_get_client(mdi_pathinfo_t *pip) 3891 { 3892 dev_info_t *dip = NULL; 3893 if (pip) { 3894 dip = MDI_PI(pip)->pi_client->ct_dip; 3895 } 3896 return (dip); 3897 } 3898 3899 /* 3900 * mdi_pi_get_phci(): 3901 * Get the pHCI devinfo associated with the mdi_pathinfo node 3902 * Return Values: 3903 * Handle to dev_info node 3904 */ 3905 dev_info_t * 3906 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3907 { 3908 dev_info_t *dip = NULL; 3909 if (pip) { 3910 dip = MDI_PI(pip)->pi_phci->ph_dip; 3911 } 3912 return (dip); 3913 } 3914 3915 /* 3916 * mdi_pi_get_client_private(): 3917 * Get the client private information associated with the 3918 * mdi_pathinfo node 3919 */ 3920 void * 3921 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3922 { 3923 void *cprivate = NULL; 3924 if (pip) { 3925 cprivate = MDI_PI(pip)->pi_cprivate; 3926 } 3927 return (cprivate); 3928 } 3929 3930 /* 3931 * mdi_pi_set_client_private(): 3932 * Set the client private information in the mdi_pathinfo node 3933 */ 3934 void 3935 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3936 { 3937 if (pip) { 3938 MDI_PI(pip)->pi_cprivate = priv; 3939 } 3940 } 3941 3942 /* 3943 * mdi_pi_get_phci_private(): 3944 * Get the pHCI private information associated with the 3945 * mdi_pathinfo node 3946 */ 3947 caddr_t 3948 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3949 { 3950 caddr_t pprivate = NULL; 3951 if (pip) { 3952 pprivate = MDI_PI(pip)->pi_pprivate; 3953 } 3954 return (pprivate); 3955 } 3956 3957 /* 3958 * mdi_pi_set_phci_private(): 3959 * Set the pHCI private information in the mdi_pathinfo node 3960 */ 3961 void 3962 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3963 { 3964 if (pip) { 3965 MDI_PI(pip)->pi_pprivate = priv; 3966 } 3967 } 3968 3969 /* 3970 * mdi_pi_get_state(): 3971 * Get the mdi_pathinfo node state. Transient states are internal 3972 * and not provided to the users 3973 */ 3974 mdi_pathinfo_state_t 3975 mdi_pi_get_state(mdi_pathinfo_t *pip) 3976 { 3977 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3978 3979 if (pip) { 3980 if (MDI_PI_IS_TRANSIENT(pip)) { 3981 /* 3982 * mdi_pathinfo is in state transition. Return the 3983 * last good state. 3984 */ 3985 state = MDI_PI_OLD_STATE(pip); 3986 } else { 3987 state = MDI_PI_STATE(pip); 3988 } 3989 } 3990 return (state); 3991 } 3992 3993 /* 3994 * Note that the following function needs to be the new interface for 3995 * mdi_pi_get_state when mpxio gets integrated to ON. 3996 */ 3997 int 3998 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3999 uint32_t *ext_state) 4000 { 4001 *state = MDI_PATHINFO_STATE_INIT; 4002 4003 if (pip) { 4004 if (MDI_PI_IS_TRANSIENT(pip)) { 4005 /* 4006 * mdi_pathinfo is in state transition. Return the 4007 * last good state. 4008 */ 4009 *state = MDI_PI_OLD_STATE(pip); 4010 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4011 } else { 4012 *state = MDI_PI_STATE(pip); 4013 *ext_state = MDI_PI_EXT_STATE(pip); 4014 } 4015 } 4016 return (MDI_SUCCESS); 4017 } 4018 4019 /* 4020 * mdi_pi_get_preferred: 4021 * Get the preferred path flag 4022 */ 4023 int 4024 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4025 { 4026 if (pip) { 4027 return (MDI_PI(pip)->pi_preferred); 4028 } 4029 return (0); 4030 } 4031 4032 /* 4033 * mdi_pi_set_preferred: 4034 * Set the preferred path flag 4035 */ 4036 void 4037 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4038 { 4039 if (pip) { 4040 MDI_PI(pip)->pi_preferred = preferred; 4041 } 4042 } 4043 4044 /* 4045 * mdi_pi_set_state(): 4046 * Set the mdi_pathinfo node state 4047 */ 4048 void 4049 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4050 { 4051 uint32_t ext_state; 4052 4053 if (pip) { 4054 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4055 MDI_PI(pip)->pi_state = state; 4056 MDI_PI(pip)->pi_state |= ext_state; 4057 } 4058 } 4059 4060 /* 4061 * Property functions: 4062 */ 4063 int 4064 i_map_nvlist_error_to_mdi(int val) 4065 { 4066 int rv; 4067 4068 switch (val) { 4069 case 0: 4070 rv = DDI_PROP_SUCCESS; 4071 break; 4072 case EINVAL: 4073 case ENOTSUP: 4074 rv = DDI_PROP_INVAL_ARG; 4075 break; 4076 case ENOMEM: 4077 rv = DDI_PROP_NO_MEMORY; 4078 break; 4079 default: 4080 rv = DDI_PROP_NOT_FOUND; 4081 break; 4082 } 4083 return (rv); 4084 } 4085 4086 /* 4087 * mdi_pi_get_next_prop(): 4088 * Property walk function. The caller should hold mdi_pi_lock() 4089 * and release by calling mdi_pi_unlock() at the end of walk to 4090 * get a consistent value. 4091 */ 4092 nvpair_t * 4093 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4094 { 4095 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4096 return (NULL); 4097 } 4098 ASSERT(MDI_PI_LOCKED(pip)); 4099 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4100 } 4101 4102 /* 4103 * mdi_prop_remove(): 4104 * Remove the named property from the named list. 4105 */ 4106 int 4107 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4108 { 4109 if (pip == NULL) { 4110 return (DDI_PROP_NOT_FOUND); 4111 } 4112 ASSERT(!MDI_PI_LOCKED(pip)); 4113 MDI_PI_LOCK(pip); 4114 if (MDI_PI(pip)->pi_prop == NULL) { 4115 MDI_PI_UNLOCK(pip); 4116 return (DDI_PROP_NOT_FOUND); 4117 } 4118 if (name) { 4119 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4120 } else { 4121 char nvp_name[MAXNAMELEN]; 4122 nvpair_t *nvp; 4123 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4124 while (nvp) { 4125 nvpair_t *next; 4126 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4127 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 4128 nvpair_name(nvp)); 4129 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4130 nvp_name); 4131 nvp = next; 4132 } 4133 } 4134 MDI_PI_UNLOCK(pip); 4135 return (DDI_PROP_SUCCESS); 4136 } 4137 4138 /* 4139 * mdi_prop_size(): 4140 * Get buffer size needed to pack the property data. 4141 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4142 * buffer size. 4143 */ 4144 int 4145 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4146 { 4147 int rv; 4148 size_t bufsize; 4149 4150 *buflenp = 0; 4151 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4152 return (DDI_PROP_NOT_FOUND); 4153 } 4154 ASSERT(MDI_PI_LOCKED(pip)); 4155 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4156 &bufsize, NV_ENCODE_NATIVE); 4157 *buflenp = bufsize; 4158 return (i_map_nvlist_error_to_mdi(rv)); 4159 } 4160 4161 /* 4162 * mdi_prop_pack(): 4163 * pack the property list. The caller should hold the 4164 * mdi_pathinfo_t node to get a consistent data 4165 */ 4166 int 4167 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4168 { 4169 int rv; 4170 size_t bufsize; 4171 4172 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4173 return (DDI_PROP_NOT_FOUND); 4174 } 4175 4176 ASSERT(MDI_PI_LOCKED(pip)); 4177 4178 bufsize = buflen; 4179 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4180 NV_ENCODE_NATIVE, KM_SLEEP); 4181 4182 return (i_map_nvlist_error_to_mdi(rv)); 4183 } 4184 4185 /* 4186 * mdi_prop_update_byte(): 4187 * Create/Update a byte property 4188 */ 4189 int 4190 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4191 { 4192 int rv; 4193 4194 if (pip == NULL) { 4195 return (DDI_PROP_INVAL_ARG); 4196 } 4197 ASSERT(!MDI_PI_LOCKED(pip)); 4198 MDI_PI_LOCK(pip); 4199 if (MDI_PI(pip)->pi_prop == NULL) { 4200 MDI_PI_UNLOCK(pip); 4201 return (DDI_PROP_NOT_FOUND); 4202 } 4203 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4204 MDI_PI_UNLOCK(pip); 4205 return (i_map_nvlist_error_to_mdi(rv)); 4206 } 4207 4208 /* 4209 * mdi_prop_update_byte_array(): 4210 * Create/Update a byte array property 4211 */ 4212 int 4213 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4214 uint_t nelements) 4215 { 4216 int rv; 4217 4218 if (pip == NULL) { 4219 return (DDI_PROP_INVAL_ARG); 4220 } 4221 ASSERT(!MDI_PI_LOCKED(pip)); 4222 MDI_PI_LOCK(pip); 4223 if (MDI_PI(pip)->pi_prop == NULL) { 4224 MDI_PI_UNLOCK(pip); 4225 return (DDI_PROP_NOT_FOUND); 4226 } 4227 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4228 MDI_PI_UNLOCK(pip); 4229 return (i_map_nvlist_error_to_mdi(rv)); 4230 } 4231 4232 /* 4233 * mdi_prop_update_int(): 4234 * Create/Update a 32 bit integer property 4235 */ 4236 int 4237 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4238 { 4239 int rv; 4240 4241 if (pip == NULL) { 4242 return (DDI_PROP_INVAL_ARG); 4243 } 4244 ASSERT(!MDI_PI_LOCKED(pip)); 4245 MDI_PI_LOCK(pip); 4246 if (MDI_PI(pip)->pi_prop == NULL) { 4247 MDI_PI_UNLOCK(pip); 4248 return (DDI_PROP_NOT_FOUND); 4249 } 4250 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4251 MDI_PI_UNLOCK(pip); 4252 return (i_map_nvlist_error_to_mdi(rv)); 4253 } 4254 4255 /* 4256 * mdi_prop_update_int64(): 4257 * Create/Update a 64 bit integer property 4258 */ 4259 int 4260 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4261 { 4262 int rv; 4263 4264 if (pip == NULL) { 4265 return (DDI_PROP_INVAL_ARG); 4266 } 4267 ASSERT(!MDI_PI_LOCKED(pip)); 4268 MDI_PI_LOCK(pip); 4269 if (MDI_PI(pip)->pi_prop == NULL) { 4270 MDI_PI_UNLOCK(pip); 4271 return (DDI_PROP_NOT_FOUND); 4272 } 4273 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4274 MDI_PI_UNLOCK(pip); 4275 return (i_map_nvlist_error_to_mdi(rv)); 4276 } 4277 4278 /* 4279 * mdi_prop_update_int_array(): 4280 * Create/Update a int array property 4281 */ 4282 int 4283 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4284 uint_t nelements) 4285 { 4286 int rv; 4287 4288 if (pip == NULL) { 4289 return (DDI_PROP_INVAL_ARG); 4290 } 4291 ASSERT(!MDI_PI_LOCKED(pip)); 4292 MDI_PI_LOCK(pip); 4293 if (MDI_PI(pip)->pi_prop == NULL) { 4294 MDI_PI_UNLOCK(pip); 4295 return (DDI_PROP_NOT_FOUND); 4296 } 4297 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4298 nelements); 4299 MDI_PI_UNLOCK(pip); 4300 return (i_map_nvlist_error_to_mdi(rv)); 4301 } 4302 4303 /* 4304 * mdi_prop_update_string(): 4305 * Create/Update a string property 4306 */ 4307 int 4308 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4309 { 4310 int rv; 4311 4312 if (pip == NULL) { 4313 return (DDI_PROP_INVAL_ARG); 4314 } 4315 ASSERT(!MDI_PI_LOCKED(pip)); 4316 MDI_PI_LOCK(pip); 4317 if (MDI_PI(pip)->pi_prop == NULL) { 4318 MDI_PI_UNLOCK(pip); 4319 return (DDI_PROP_NOT_FOUND); 4320 } 4321 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4322 MDI_PI_UNLOCK(pip); 4323 return (i_map_nvlist_error_to_mdi(rv)); 4324 } 4325 4326 /* 4327 * mdi_prop_update_string_array(): 4328 * Create/Update a string array property 4329 */ 4330 int 4331 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4332 uint_t nelements) 4333 { 4334 int rv; 4335 4336 if (pip == NULL) { 4337 return (DDI_PROP_INVAL_ARG); 4338 } 4339 ASSERT(!MDI_PI_LOCKED(pip)); 4340 MDI_PI_LOCK(pip); 4341 if (MDI_PI(pip)->pi_prop == NULL) { 4342 MDI_PI_UNLOCK(pip); 4343 return (DDI_PROP_NOT_FOUND); 4344 } 4345 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4346 nelements); 4347 MDI_PI_UNLOCK(pip); 4348 return (i_map_nvlist_error_to_mdi(rv)); 4349 } 4350 4351 /* 4352 * mdi_prop_lookup_byte(): 4353 * Look for byte property identified by name. The data returned 4354 * is the actual property and valid as long as mdi_pathinfo_t node 4355 * is alive. 4356 */ 4357 int 4358 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4359 { 4360 int rv; 4361 4362 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4363 return (DDI_PROP_NOT_FOUND); 4364 } 4365 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4366 return (i_map_nvlist_error_to_mdi(rv)); 4367 } 4368 4369 4370 /* 4371 * mdi_prop_lookup_byte_array(): 4372 * Look for byte array property identified by name. The data 4373 * returned is the actual property and valid as long as 4374 * mdi_pathinfo_t node is alive. 4375 */ 4376 int 4377 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4378 uint_t *nelements) 4379 { 4380 int rv; 4381 4382 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4383 return (DDI_PROP_NOT_FOUND); 4384 } 4385 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4386 nelements); 4387 return (i_map_nvlist_error_to_mdi(rv)); 4388 } 4389 4390 /* 4391 * mdi_prop_lookup_int(): 4392 * Look for int property identified by name. The data returned 4393 * is the actual property and valid as long as mdi_pathinfo_t 4394 * node is alive. 4395 */ 4396 int 4397 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4398 { 4399 int rv; 4400 4401 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4402 return (DDI_PROP_NOT_FOUND); 4403 } 4404 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4405 return (i_map_nvlist_error_to_mdi(rv)); 4406 } 4407 4408 /* 4409 * mdi_prop_lookup_int64(): 4410 * Look for int64 property identified by name. The data returned 4411 * is the actual property and valid as long as mdi_pathinfo_t node 4412 * is alive. 4413 */ 4414 int 4415 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4416 { 4417 int rv; 4418 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4419 return (DDI_PROP_NOT_FOUND); 4420 } 4421 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4422 return (i_map_nvlist_error_to_mdi(rv)); 4423 } 4424 4425 /* 4426 * mdi_prop_lookup_int_array(): 4427 * Look for int array property identified by name. The data 4428 * returned is the actual property and valid as long as 4429 * mdi_pathinfo_t node is alive. 4430 */ 4431 int 4432 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4433 uint_t *nelements) 4434 { 4435 int rv; 4436 4437 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4438 return (DDI_PROP_NOT_FOUND); 4439 } 4440 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4441 (int32_t **)data, nelements); 4442 return (i_map_nvlist_error_to_mdi(rv)); 4443 } 4444 4445 /* 4446 * mdi_prop_lookup_string(): 4447 * Look for string property identified by name. The data 4448 * returned is the actual property and valid as long as 4449 * mdi_pathinfo_t node is alive. 4450 */ 4451 int 4452 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4453 { 4454 int rv; 4455 4456 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4457 return (DDI_PROP_NOT_FOUND); 4458 } 4459 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4460 return (i_map_nvlist_error_to_mdi(rv)); 4461 } 4462 4463 /* 4464 * mdi_prop_lookup_string_array(): 4465 * Look for string array property identified by name. The data 4466 * returned is the actual property and valid as long as 4467 * mdi_pathinfo_t node is alive. 4468 */ 4469 int 4470 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4471 uint_t *nelements) 4472 { 4473 int rv; 4474 4475 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4476 return (DDI_PROP_NOT_FOUND); 4477 } 4478 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4479 nelements); 4480 return (i_map_nvlist_error_to_mdi(rv)); 4481 } 4482 4483 /* 4484 * mdi_prop_free(): 4485 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4486 * functions return the pointer to actual property data and not a 4487 * copy of it. So the data returned is valid as long as 4488 * mdi_pathinfo_t node is valid. 4489 */ 4490 /*ARGSUSED*/ 4491 int 4492 mdi_prop_free(void *data) 4493 { 4494 return (DDI_PROP_SUCCESS); 4495 } 4496 4497 /*ARGSUSED*/ 4498 static void 4499 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4500 { 4501 char *phci_path, *ct_path; 4502 char *ct_status; 4503 char *status; 4504 dev_info_t *dip = ct->ct_dip; 4505 char lb_buf[64]; 4506 4507 ASSERT(MDI_CLIENT_LOCKED(ct)); 4508 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4509 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4510 return; 4511 } 4512 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4513 ct_status = "optimal"; 4514 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4515 ct_status = "degraded"; 4516 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4517 ct_status = "failed"; 4518 } else { 4519 ct_status = "unknown"; 4520 } 4521 4522 if (MDI_PI_IS_OFFLINE(pip)) { 4523 status = "offline"; 4524 } else if (MDI_PI_IS_ONLINE(pip)) { 4525 status = "online"; 4526 } else if (MDI_PI_IS_STANDBY(pip)) { 4527 status = "standby"; 4528 } else if (MDI_PI_IS_FAULT(pip)) { 4529 status = "faulted"; 4530 } else { 4531 status = "unknown"; 4532 } 4533 4534 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4535 (void) snprintf(lb_buf, sizeof (lb_buf), 4536 "%s, region-size: %d", mdi_load_balance_lba, 4537 ct->ct_lb_args->region_size); 4538 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4539 (void) snprintf(lb_buf, sizeof (lb_buf), 4540 "%s", mdi_load_balance_none); 4541 } else { 4542 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4543 mdi_load_balance_rr); 4544 } 4545 4546 if (dip) { 4547 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4548 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4549 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4550 "path %s (%s%d) to target address: %s is %s" 4551 " Load balancing: %s\n", 4552 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4553 ddi_get_instance(dip), ct_status, 4554 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4555 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4556 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4557 MDI_PI(pip)->pi_addr, status, lb_buf); 4558 kmem_free(phci_path, MAXPATHLEN); 4559 kmem_free(ct_path, MAXPATHLEN); 4560 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4561 } 4562 } 4563 4564 #ifdef DEBUG 4565 /* 4566 * i_mdi_log(): 4567 * Utility function for error message management 4568 * 4569 */ 4570 /*PRINTFLIKE3*/ 4571 static void 4572 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4573 { 4574 char name[MAXNAMELEN]; 4575 char buf[MAXNAMELEN]; 4576 char *bp; 4577 va_list ap; 4578 int log_only = 0; 4579 int boot_only = 0; 4580 int console_only = 0; 4581 4582 if (dip) { 4583 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4584 ddi_node_name(dip), ddi_get_instance(dip)); 4585 } else { 4586 name[0] = 0; 4587 } 4588 4589 va_start(ap, fmt); 4590 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4591 va_end(ap); 4592 4593 switch (buf[0]) { 4594 case '!': 4595 bp = &buf[1]; 4596 log_only = 1; 4597 break; 4598 case '?': 4599 bp = &buf[1]; 4600 boot_only = 1; 4601 break; 4602 case '^': 4603 bp = &buf[1]; 4604 console_only = 1; 4605 break; 4606 default: 4607 bp = buf; 4608 break; 4609 } 4610 if (mdi_debug_logonly) { 4611 log_only = 1; 4612 boot_only = 0; 4613 console_only = 0; 4614 } 4615 4616 switch (level) { 4617 case CE_NOTE: 4618 level = CE_CONT; 4619 /* FALLTHROUGH */ 4620 case CE_CONT: 4621 case CE_WARN: 4622 case CE_PANIC: 4623 if (boot_only) { 4624 cmn_err(level, "?mdi: %s%s", name, bp); 4625 } else if (console_only) { 4626 cmn_err(level, "^mdi: %s%s", name, bp); 4627 } else if (log_only) { 4628 cmn_err(level, "!mdi: %s%s", name, bp); 4629 } else { 4630 cmn_err(level, "mdi: %s%s", name, bp); 4631 } 4632 break; 4633 default: 4634 cmn_err(level, "mdi: %s%s", name, bp); 4635 break; 4636 } 4637 } 4638 #endif /* DEBUG */ 4639 4640 void 4641 i_mdi_client_online(dev_info_t *ct_dip) 4642 { 4643 mdi_client_t *ct; 4644 4645 /* 4646 * Client online notification. Mark client state as online 4647 * restore our binding with dev_info node 4648 */ 4649 ct = i_devi_get_client(ct_dip); 4650 ASSERT(ct != NULL); 4651 MDI_CLIENT_LOCK(ct); 4652 MDI_CLIENT_SET_ONLINE(ct); 4653 /* catch for any memory leaks */ 4654 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4655 ct->ct_dip = ct_dip; 4656 4657 if (ct->ct_power_cnt == 0) 4658 (void) i_mdi_power_all_phci(ct); 4659 4660 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4661 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4662 i_mdi_pm_hold_client(ct, 1); 4663 4664 MDI_CLIENT_UNLOCK(ct); 4665 } 4666 4667 void 4668 i_mdi_phci_online(dev_info_t *ph_dip) 4669 { 4670 mdi_phci_t *ph; 4671 4672 /* pHCI online notification. Mark state accordingly */ 4673 ph = i_devi_get_phci(ph_dip); 4674 ASSERT(ph != NULL); 4675 MDI_PHCI_LOCK(ph); 4676 MDI_PHCI_SET_ONLINE(ph); 4677 MDI_PHCI_UNLOCK(ph); 4678 } 4679 4680 /* 4681 * mdi_devi_online(): 4682 * Online notification from NDI framework on pHCI/client 4683 * device online. 4684 * Return Values: 4685 * NDI_SUCCESS 4686 * MDI_FAILURE 4687 */ 4688 /*ARGSUSED*/ 4689 int 4690 mdi_devi_online(dev_info_t *dip, uint_t flags) 4691 { 4692 if (MDI_PHCI(dip)) { 4693 i_mdi_phci_online(dip); 4694 } 4695 4696 if (MDI_CLIENT(dip)) { 4697 i_mdi_client_online(dip); 4698 } 4699 return (NDI_SUCCESS); 4700 } 4701 4702 /* 4703 * mdi_devi_offline(): 4704 * Offline notification from NDI framework on pHCI/Client device 4705 * offline. 4706 * 4707 * Return Values: 4708 * NDI_SUCCESS 4709 * NDI_FAILURE 4710 */ 4711 /*ARGSUSED*/ 4712 int 4713 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4714 { 4715 int rv = NDI_SUCCESS; 4716 4717 if (MDI_CLIENT(dip)) { 4718 rv = i_mdi_client_offline(dip, flags); 4719 if (rv != NDI_SUCCESS) 4720 return (rv); 4721 } 4722 4723 if (MDI_PHCI(dip)) { 4724 rv = i_mdi_phci_offline(dip, flags); 4725 4726 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4727 /* set client back online */ 4728 i_mdi_client_online(dip); 4729 } 4730 } 4731 4732 return (rv); 4733 } 4734 4735 /*ARGSUSED*/ 4736 static int 4737 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4738 { 4739 int rv = NDI_SUCCESS; 4740 mdi_phci_t *ph; 4741 mdi_client_t *ct; 4742 mdi_pathinfo_t *pip; 4743 mdi_pathinfo_t *next; 4744 mdi_pathinfo_t *failed_pip = NULL; 4745 dev_info_t *cdip; 4746 4747 /* 4748 * pHCI component offline notification 4749 * Make sure that this pHCI instance is free to be offlined. 4750 * If it is OK to proceed, Offline and remove all the child 4751 * mdi_pathinfo nodes. This process automatically offlines 4752 * corresponding client devices, for which this pHCI provides 4753 * critical services. 4754 */ 4755 ph = i_devi_get_phci(dip); 4756 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4757 (void *)dip, (void *)ph)); 4758 if (ph == NULL) { 4759 return (rv); 4760 } 4761 4762 MDI_PHCI_LOCK(ph); 4763 4764 if (MDI_PHCI_IS_OFFLINE(ph)) { 4765 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4766 (void *)ph)); 4767 MDI_PHCI_UNLOCK(ph); 4768 return (NDI_SUCCESS); 4769 } 4770 4771 /* 4772 * Check to see if the pHCI can be offlined 4773 */ 4774 if (ph->ph_unstable) { 4775 MDI_DEBUG(1, (CE_WARN, dip, 4776 "!One or more target devices are in transient " 4777 "state. This device can not be removed at " 4778 "this moment. Please try again later.")); 4779 MDI_PHCI_UNLOCK(ph); 4780 return (NDI_BUSY); 4781 } 4782 4783 pip = ph->ph_path_head; 4784 while (pip != NULL) { 4785 MDI_PI_LOCK(pip); 4786 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4787 4788 /* 4789 * The mdi_pathinfo state is OK. Check the client state. 4790 * If failover in progress fail the pHCI from offlining 4791 */ 4792 ct = MDI_PI(pip)->pi_client; 4793 i_mdi_client_lock(ct, pip); 4794 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4795 (ct->ct_unstable)) { 4796 /* 4797 * Failover is in progress, Fail the DR 4798 */ 4799 MDI_DEBUG(1, (CE_WARN, dip, 4800 "!pHCI device (%s%d) is Busy. %s", 4801 ddi_driver_name(dip), ddi_get_instance(dip), 4802 "This device can not be removed at " 4803 "this moment. Please try again later.")); 4804 MDI_PI_UNLOCK(pip); 4805 i_mdi_client_unlock(ct); 4806 MDI_PHCI_UNLOCK(ph); 4807 return (NDI_BUSY); 4808 } 4809 MDI_PI_UNLOCK(pip); 4810 4811 /* 4812 * Check to see of we are removing the last path of this 4813 * client device... 4814 */ 4815 cdip = ct->ct_dip; 4816 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4817 (i_mdi_client_compute_state(ct, ph) == 4818 MDI_CLIENT_STATE_FAILED)) { 4819 i_mdi_client_unlock(ct); 4820 MDI_PHCI_UNLOCK(ph); 4821 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4822 /* 4823 * ndi_devi_offline() failed. 4824 * This pHCI provides the critical path 4825 * to one or more client devices. 4826 * Return busy. 4827 */ 4828 MDI_PHCI_LOCK(ph); 4829 MDI_DEBUG(1, (CE_WARN, dip, 4830 "!pHCI device (%s%d) is Busy. %s", 4831 ddi_driver_name(dip), ddi_get_instance(dip), 4832 "This device can not be removed at " 4833 "this moment. Please try again later.")); 4834 failed_pip = pip; 4835 break; 4836 } else { 4837 MDI_PHCI_LOCK(ph); 4838 pip = next; 4839 } 4840 } else { 4841 i_mdi_client_unlock(ct); 4842 pip = next; 4843 } 4844 } 4845 4846 if (failed_pip) { 4847 pip = ph->ph_path_head; 4848 while (pip != failed_pip) { 4849 MDI_PI_LOCK(pip); 4850 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4851 ct = MDI_PI(pip)->pi_client; 4852 i_mdi_client_lock(ct, pip); 4853 cdip = ct->ct_dip; 4854 switch (MDI_CLIENT_STATE(ct)) { 4855 case MDI_CLIENT_STATE_OPTIMAL: 4856 case MDI_CLIENT_STATE_DEGRADED: 4857 if (cdip) { 4858 MDI_PI_UNLOCK(pip); 4859 i_mdi_client_unlock(ct); 4860 MDI_PHCI_UNLOCK(ph); 4861 (void) ndi_devi_online(cdip, 0); 4862 MDI_PHCI_LOCK(ph); 4863 pip = next; 4864 continue; 4865 } 4866 break; 4867 4868 case MDI_CLIENT_STATE_FAILED: 4869 if (cdip) { 4870 MDI_PI_UNLOCK(pip); 4871 i_mdi_client_unlock(ct); 4872 MDI_PHCI_UNLOCK(ph); 4873 (void) ndi_devi_offline(cdip, 0); 4874 MDI_PHCI_LOCK(ph); 4875 pip = next; 4876 continue; 4877 } 4878 break; 4879 } 4880 MDI_PI_UNLOCK(pip); 4881 i_mdi_client_unlock(ct); 4882 pip = next; 4883 } 4884 MDI_PHCI_UNLOCK(ph); 4885 return (NDI_BUSY); 4886 } 4887 4888 /* 4889 * Mark the pHCI as offline 4890 */ 4891 MDI_PHCI_SET_OFFLINE(ph); 4892 4893 /* 4894 * Mark the child mdi_pathinfo nodes as transient 4895 */ 4896 pip = ph->ph_path_head; 4897 while (pip != NULL) { 4898 MDI_PI_LOCK(pip); 4899 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4900 MDI_PI_SET_OFFLINING(pip); 4901 MDI_PI_UNLOCK(pip); 4902 pip = next; 4903 } 4904 MDI_PHCI_UNLOCK(ph); 4905 /* 4906 * Give a chance for any pending commands to execute 4907 */ 4908 delay(1); 4909 MDI_PHCI_LOCK(ph); 4910 pip = ph->ph_path_head; 4911 while (pip != NULL) { 4912 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4913 (void) i_mdi_pi_offline(pip, flags); 4914 MDI_PI_LOCK(pip); 4915 ct = MDI_PI(pip)->pi_client; 4916 if (!MDI_PI_IS_OFFLINE(pip)) { 4917 MDI_DEBUG(1, (CE_WARN, dip, 4918 "!pHCI device (%s%d) is Busy. %s", 4919 ddi_driver_name(dip), ddi_get_instance(dip), 4920 "This device can not be removed at " 4921 "this moment. Please try again later.")); 4922 MDI_PI_UNLOCK(pip); 4923 MDI_PHCI_SET_ONLINE(ph); 4924 MDI_PHCI_UNLOCK(ph); 4925 return (NDI_BUSY); 4926 } 4927 MDI_PI_UNLOCK(pip); 4928 pip = next; 4929 } 4930 MDI_PHCI_UNLOCK(ph); 4931 4932 return (rv); 4933 } 4934 4935 void 4936 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 4937 { 4938 mdi_phci_t *ph; 4939 mdi_client_t *ct; 4940 mdi_pathinfo_t *pip; 4941 mdi_pathinfo_t *next; 4942 dev_info_t *cdip; 4943 4944 if (!MDI_PHCI(dip)) 4945 return; 4946 4947 ph = i_devi_get_phci(dip); 4948 if (ph == NULL) { 4949 return; 4950 } 4951 4952 MDI_PHCI_LOCK(ph); 4953 4954 if (MDI_PHCI_IS_OFFLINE(ph)) { 4955 /* has no last path */ 4956 MDI_PHCI_UNLOCK(ph); 4957 return; 4958 } 4959 4960 pip = ph->ph_path_head; 4961 while (pip != NULL) { 4962 MDI_PI_LOCK(pip); 4963 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4964 4965 ct = MDI_PI(pip)->pi_client; 4966 i_mdi_client_lock(ct, pip); 4967 MDI_PI_UNLOCK(pip); 4968 4969 cdip = ct->ct_dip; 4970 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4971 (i_mdi_client_compute_state(ct, ph) == 4972 MDI_CLIENT_STATE_FAILED)) { 4973 /* Last path. Mark client dip as retiring */ 4974 i_mdi_client_unlock(ct); 4975 MDI_PHCI_UNLOCK(ph); 4976 (void) e_ddi_mark_retiring(cdip, cons_array); 4977 MDI_PHCI_LOCK(ph); 4978 pip = next; 4979 } else { 4980 i_mdi_client_unlock(ct); 4981 pip = next; 4982 } 4983 } 4984 4985 MDI_PHCI_UNLOCK(ph); 4986 4987 return; 4988 } 4989 4990 void 4991 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 4992 { 4993 mdi_phci_t *ph; 4994 mdi_client_t *ct; 4995 mdi_pathinfo_t *pip; 4996 mdi_pathinfo_t *next; 4997 dev_info_t *cdip; 4998 4999 if (!MDI_PHCI(dip)) 5000 return; 5001 5002 ph = i_devi_get_phci(dip); 5003 if (ph == NULL) 5004 return; 5005 5006 MDI_PHCI_LOCK(ph); 5007 5008 if (MDI_PHCI_IS_OFFLINE(ph)) { 5009 MDI_PHCI_UNLOCK(ph); 5010 /* not last path */ 5011 return; 5012 } 5013 5014 if (ph->ph_unstable) { 5015 MDI_PHCI_UNLOCK(ph); 5016 /* can't check for constraints */ 5017 *constraint = 0; 5018 return; 5019 } 5020 5021 pip = ph->ph_path_head; 5022 while (pip != NULL) { 5023 MDI_PI_LOCK(pip); 5024 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5025 5026 /* 5027 * The mdi_pathinfo state is OK. Check the client state. 5028 * If failover in progress fail the pHCI from offlining 5029 */ 5030 ct = MDI_PI(pip)->pi_client; 5031 i_mdi_client_lock(ct, pip); 5032 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5033 (ct->ct_unstable)) { 5034 /* 5035 * Failover is in progress, can't check for constraints 5036 */ 5037 MDI_PI_UNLOCK(pip); 5038 i_mdi_client_unlock(ct); 5039 MDI_PHCI_UNLOCK(ph); 5040 *constraint = 0; 5041 return; 5042 } 5043 MDI_PI_UNLOCK(pip); 5044 5045 /* 5046 * Check to see of we are retiring the last path of this 5047 * client device... 5048 */ 5049 cdip = ct->ct_dip; 5050 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5051 (i_mdi_client_compute_state(ct, ph) == 5052 MDI_CLIENT_STATE_FAILED)) { 5053 i_mdi_client_unlock(ct); 5054 MDI_PHCI_UNLOCK(ph); 5055 (void) e_ddi_retire_notify(cdip, constraint); 5056 MDI_PHCI_LOCK(ph); 5057 pip = next; 5058 } else { 5059 i_mdi_client_unlock(ct); 5060 pip = next; 5061 } 5062 } 5063 5064 MDI_PHCI_UNLOCK(ph); 5065 5066 return; 5067 } 5068 5069 /* 5070 * offline the path(s) hanging off the PHCI. If the 5071 * last path to any client, check that constraints 5072 * have been applied. 5073 */ 5074 void 5075 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) 5076 { 5077 mdi_phci_t *ph; 5078 mdi_client_t *ct; 5079 mdi_pathinfo_t *pip; 5080 mdi_pathinfo_t *next; 5081 dev_info_t *cdip; 5082 int unstable = 0; 5083 int constraint; 5084 5085 if (!MDI_PHCI(dip)) 5086 return; 5087 5088 ph = i_devi_get_phci(dip); 5089 if (ph == NULL) { 5090 /* no last path and no pips */ 5091 return; 5092 } 5093 5094 MDI_PHCI_LOCK(ph); 5095 5096 if (MDI_PHCI_IS_OFFLINE(ph)) { 5097 MDI_PHCI_UNLOCK(ph); 5098 /* no last path and no pips */ 5099 return; 5100 } 5101 5102 /* 5103 * Check to see if the pHCI can be offlined 5104 */ 5105 if (ph->ph_unstable) { 5106 unstable = 1; 5107 } 5108 5109 pip = ph->ph_path_head; 5110 while (pip != NULL) { 5111 MDI_PI_LOCK(pip); 5112 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5113 5114 /* 5115 * if failover in progress fail the pHCI from offlining 5116 */ 5117 ct = MDI_PI(pip)->pi_client; 5118 i_mdi_client_lock(ct, pip); 5119 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5120 (ct->ct_unstable)) { 5121 unstable = 1; 5122 } 5123 MDI_PI_UNLOCK(pip); 5124 5125 /* 5126 * Check to see of we are removing the last path of this 5127 * client device... 5128 */ 5129 cdip = ct->ct_dip; 5130 if (!phci_only && cdip && 5131 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5132 (i_mdi_client_compute_state(ct, ph) == 5133 MDI_CLIENT_STATE_FAILED)) { 5134 i_mdi_client_unlock(ct); 5135 MDI_PHCI_UNLOCK(ph); 5136 /* 5137 * We don't retire clients we just retire the 5138 * path to a client. If it is the last path 5139 * to a client, constraints are checked and 5140 * if we pass the last path is offlined. MPXIO will 5141 * then fail all I/Os to the client. Since we don't 5142 * want to retire the client on a path error 5143 * set constraint = 0 so that the client dip 5144 * is not retired. 5145 */ 5146 constraint = 0; 5147 (void) e_ddi_retire_finalize(cdip, &constraint); 5148 MDI_PHCI_LOCK(ph); 5149 pip = next; 5150 } else { 5151 i_mdi_client_unlock(ct); 5152 pip = next; 5153 } 5154 } 5155 5156 /* 5157 * Cannot offline pip(s) 5158 */ 5159 if (unstable) { 5160 cmn_err(CE_WARN, "PHCI in transient state, cannot " 5161 "retire, dip = %p", (void *)dip); 5162 MDI_PHCI_UNLOCK(ph); 5163 return; 5164 } 5165 5166 /* 5167 * Mark the pHCI as offline 5168 */ 5169 MDI_PHCI_SET_OFFLINE(ph); 5170 5171 /* 5172 * Mark the child mdi_pathinfo nodes as transient 5173 */ 5174 pip = ph->ph_path_head; 5175 while (pip != NULL) { 5176 MDI_PI_LOCK(pip); 5177 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5178 MDI_PI_SET_OFFLINING(pip); 5179 MDI_PI_UNLOCK(pip); 5180 pip = next; 5181 } 5182 MDI_PHCI_UNLOCK(ph); 5183 /* 5184 * Give a chance for any pending commands to execute 5185 */ 5186 delay(1); 5187 MDI_PHCI_LOCK(ph); 5188 pip = ph->ph_path_head; 5189 while (pip != NULL) { 5190 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5191 (void) i_mdi_pi_offline(pip, 0); 5192 MDI_PI_LOCK(pip); 5193 ct = MDI_PI(pip)->pi_client; 5194 if (!MDI_PI_IS_OFFLINE(pip)) { 5195 cmn_err(CE_WARN, "PHCI busy, cannot offline path: " 5196 "PHCI dip = %p", (void *)dip); 5197 MDI_PI_UNLOCK(pip); 5198 MDI_PHCI_SET_ONLINE(ph); 5199 MDI_PHCI_UNLOCK(ph); 5200 return; 5201 } 5202 MDI_PI_UNLOCK(pip); 5203 pip = next; 5204 } 5205 MDI_PHCI_UNLOCK(ph); 5206 5207 return; 5208 } 5209 5210 void 5211 mdi_phci_unretire(dev_info_t *dip) 5212 { 5213 ASSERT(MDI_PHCI(dip)); 5214 5215 /* 5216 * Online the phci 5217 */ 5218 i_mdi_phci_online(dip); 5219 } 5220 5221 /*ARGSUSED*/ 5222 static int 5223 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5224 { 5225 int rv = NDI_SUCCESS; 5226 mdi_client_t *ct; 5227 5228 /* 5229 * Client component to go offline. Make sure that we are 5230 * not in failing over state and update client state 5231 * accordingly 5232 */ 5233 ct = i_devi_get_client(dip); 5234 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 5235 (void *)dip, (void *)ct)); 5236 if (ct != NULL) { 5237 MDI_CLIENT_LOCK(ct); 5238 if (ct->ct_unstable) { 5239 /* 5240 * One or more paths are in transient state, 5241 * Dont allow offline of a client device 5242 */ 5243 MDI_DEBUG(1, (CE_WARN, dip, 5244 "!One or more paths to this device is " 5245 "in transient state. This device can not " 5246 "be removed at this moment. " 5247 "Please try again later.")); 5248 MDI_CLIENT_UNLOCK(ct); 5249 return (NDI_BUSY); 5250 } 5251 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5252 /* 5253 * Failover is in progress, Dont allow DR of 5254 * a client device 5255 */ 5256 MDI_DEBUG(1, (CE_WARN, dip, 5257 "!Client device (%s%d) is Busy. %s", 5258 ddi_driver_name(dip), ddi_get_instance(dip), 5259 "This device can not be removed at " 5260 "this moment. Please try again later.")); 5261 MDI_CLIENT_UNLOCK(ct); 5262 return (NDI_BUSY); 5263 } 5264 MDI_CLIENT_SET_OFFLINE(ct); 5265 5266 /* 5267 * Unbind our relationship with the dev_info node 5268 */ 5269 if (flags & NDI_DEVI_REMOVE) { 5270 ct->ct_dip = NULL; 5271 } 5272 MDI_CLIENT_UNLOCK(ct); 5273 } 5274 return (rv); 5275 } 5276 5277 /* 5278 * mdi_pre_attach(): 5279 * Pre attach() notification handler 5280 */ 5281 /*ARGSUSED*/ 5282 int 5283 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5284 { 5285 /* don't support old DDI_PM_RESUME */ 5286 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5287 (cmd == DDI_PM_RESUME)) 5288 return (DDI_FAILURE); 5289 5290 return (DDI_SUCCESS); 5291 } 5292 5293 /* 5294 * mdi_post_attach(): 5295 * Post attach() notification handler 5296 */ 5297 /*ARGSUSED*/ 5298 void 5299 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5300 { 5301 mdi_phci_t *ph; 5302 mdi_client_t *ct; 5303 mdi_vhci_t *vh; 5304 5305 if (MDI_PHCI(dip)) { 5306 ph = i_devi_get_phci(dip); 5307 ASSERT(ph != NULL); 5308 5309 MDI_PHCI_LOCK(ph); 5310 switch (cmd) { 5311 case DDI_ATTACH: 5312 MDI_DEBUG(2, (CE_NOTE, dip, 5313 "!pHCI post_attach: called %p\n", (void *)ph)); 5314 if (error == DDI_SUCCESS) { 5315 MDI_PHCI_SET_ATTACH(ph); 5316 } else { 5317 MDI_DEBUG(1, (CE_NOTE, dip, 5318 "!pHCI post_attach: failed error=%d\n", 5319 error)); 5320 MDI_PHCI_SET_DETACH(ph); 5321 } 5322 break; 5323 5324 case DDI_RESUME: 5325 MDI_DEBUG(2, (CE_NOTE, dip, 5326 "!pHCI post_resume: called %p\n", (void *)ph)); 5327 if (error == DDI_SUCCESS) { 5328 MDI_PHCI_SET_RESUME(ph); 5329 } else { 5330 MDI_DEBUG(1, (CE_NOTE, dip, 5331 "!pHCI post_resume: failed error=%d\n", 5332 error)); 5333 MDI_PHCI_SET_SUSPEND(ph); 5334 } 5335 break; 5336 } 5337 MDI_PHCI_UNLOCK(ph); 5338 } 5339 5340 if (MDI_CLIENT(dip)) { 5341 ct = i_devi_get_client(dip); 5342 ASSERT(ct != NULL); 5343 5344 MDI_CLIENT_LOCK(ct); 5345 switch (cmd) { 5346 case DDI_ATTACH: 5347 MDI_DEBUG(2, (CE_NOTE, dip, 5348 "!Client post_attach: called %p\n", (void *)ct)); 5349 if (error != DDI_SUCCESS) { 5350 MDI_DEBUG(1, (CE_NOTE, dip, 5351 "!Client post_attach: failed error=%d\n", 5352 error)); 5353 MDI_CLIENT_SET_DETACH(ct); 5354 MDI_DEBUG(4, (CE_WARN, dip, 5355 "mdi_post_attach i_mdi_pm_reset_client\n")); 5356 i_mdi_pm_reset_client(ct); 5357 break; 5358 } 5359 5360 /* 5361 * Client device has successfully attached, inform 5362 * the vhci. 5363 */ 5364 vh = ct->ct_vhci; 5365 if (vh->vh_ops->vo_client_attached) 5366 (*vh->vh_ops->vo_client_attached)(dip); 5367 5368 MDI_CLIENT_SET_ATTACH(ct); 5369 break; 5370 5371 case DDI_RESUME: 5372 MDI_DEBUG(2, (CE_NOTE, dip, 5373 "!Client post_attach: called %p\n", (void *)ct)); 5374 if (error == DDI_SUCCESS) { 5375 MDI_CLIENT_SET_RESUME(ct); 5376 } else { 5377 MDI_DEBUG(1, (CE_NOTE, dip, 5378 "!Client post_resume: failed error=%d\n", 5379 error)); 5380 MDI_CLIENT_SET_SUSPEND(ct); 5381 } 5382 break; 5383 } 5384 MDI_CLIENT_UNLOCK(ct); 5385 } 5386 } 5387 5388 /* 5389 * mdi_pre_detach(): 5390 * Pre detach notification handler 5391 */ 5392 /*ARGSUSED*/ 5393 int 5394 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5395 { 5396 int rv = DDI_SUCCESS; 5397 5398 if (MDI_CLIENT(dip)) { 5399 (void) i_mdi_client_pre_detach(dip, cmd); 5400 } 5401 5402 if (MDI_PHCI(dip)) { 5403 rv = i_mdi_phci_pre_detach(dip, cmd); 5404 } 5405 5406 return (rv); 5407 } 5408 5409 /*ARGSUSED*/ 5410 static int 5411 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5412 { 5413 int rv = DDI_SUCCESS; 5414 mdi_phci_t *ph; 5415 mdi_client_t *ct; 5416 mdi_pathinfo_t *pip; 5417 mdi_pathinfo_t *failed_pip = NULL; 5418 mdi_pathinfo_t *next; 5419 5420 ph = i_devi_get_phci(dip); 5421 if (ph == NULL) { 5422 return (rv); 5423 } 5424 5425 MDI_PHCI_LOCK(ph); 5426 switch (cmd) { 5427 case DDI_DETACH: 5428 MDI_DEBUG(2, (CE_NOTE, dip, 5429 "!pHCI pre_detach: called %p\n", (void *)ph)); 5430 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5431 /* 5432 * mdi_pathinfo nodes are still attached to 5433 * this pHCI. Fail the detach for this pHCI. 5434 */ 5435 MDI_DEBUG(2, (CE_WARN, dip, 5436 "!pHCI pre_detach: " 5437 "mdi_pathinfo nodes are still attached " 5438 "%p\n", (void *)ph)); 5439 rv = DDI_FAILURE; 5440 break; 5441 } 5442 MDI_PHCI_SET_DETACH(ph); 5443 break; 5444 5445 case DDI_SUSPEND: 5446 /* 5447 * pHCI is getting suspended. Since mpxio client 5448 * devices may not be suspended at this point, to avoid 5449 * a potential stack overflow, it is important to suspend 5450 * client devices before pHCI can be suspended. 5451 */ 5452 5453 MDI_DEBUG(2, (CE_NOTE, dip, 5454 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5455 /* 5456 * Suspend all the client devices accessible through this pHCI 5457 */ 5458 pip = ph->ph_path_head; 5459 while (pip != NULL && rv == DDI_SUCCESS) { 5460 dev_info_t *cdip; 5461 MDI_PI_LOCK(pip); 5462 next = 5463 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5464 ct = MDI_PI(pip)->pi_client; 5465 i_mdi_client_lock(ct, pip); 5466 cdip = ct->ct_dip; 5467 MDI_PI_UNLOCK(pip); 5468 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5469 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5470 i_mdi_client_unlock(ct); 5471 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5472 DDI_SUCCESS) { 5473 /* 5474 * Suspend of one of the client 5475 * device has failed. 5476 */ 5477 MDI_DEBUG(1, (CE_WARN, dip, 5478 "!Suspend of device (%s%d) failed.", 5479 ddi_driver_name(cdip), 5480 ddi_get_instance(cdip))); 5481 failed_pip = pip; 5482 break; 5483 } 5484 } else { 5485 i_mdi_client_unlock(ct); 5486 } 5487 pip = next; 5488 } 5489 5490 if (rv == DDI_SUCCESS) { 5491 /* 5492 * Suspend of client devices is complete. Proceed 5493 * with pHCI suspend. 5494 */ 5495 MDI_PHCI_SET_SUSPEND(ph); 5496 } else { 5497 /* 5498 * Revert back all the suspended client device states 5499 * to converse. 5500 */ 5501 pip = ph->ph_path_head; 5502 while (pip != failed_pip) { 5503 dev_info_t *cdip; 5504 MDI_PI_LOCK(pip); 5505 next = 5506 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5507 ct = MDI_PI(pip)->pi_client; 5508 i_mdi_client_lock(ct, pip); 5509 cdip = ct->ct_dip; 5510 MDI_PI_UNLOCK(pip); 5511 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5512 i_mdi_client_unlock(ct); 5513 (void) devi_attach(cdip, DDI_RESUME); 5514 } else { 5515 i_mdi_client_unlock(ct); 5516 } 5517 pip = next; 5518 } 5519 } 5520 break; 5521 5522 default: 5523 rv = DDI_FAILURE; 5524 break; 5525 } 5526 MDI_PHCI_UNLOCK(ph); 5527 return (rv); 5528 } 5529 5530 /*ARGSUSED*/ 5531 static int 5532 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5533 { 5534 int rv = DDI_SUCCESS; 5535 mdi_client_t *ct; 5536 5537 ct = i_devi_get_client(dip); 5538 if (ct == NULL) { 5539 return (rv); 5540 } 5541 5542 MDI_CLIENT_LOCK(ct); 5543 switch (cmd) { 5544 case DDI_DETACH: 5545 MDI_DEBUG(2, (CE_NOTE, dip, 5546 "!Client pre_detach: called %p\n", (void *)ct)); 5547 MDI_CLIENT_SET_DETACH(ct); 5548 break; 5549 5550 case DDI_SUSPEND: 5551 MDI_DEBUG(2, (CE_NOTE, dip, 5552 "!Client pre_suspend: called %p\n", (void *)ct)); 5553 MDI_CLIENT_SET_SUSPEND(ct); 5554 break; 5555 5556 default: 5557 rv = DDI_FAILURE; 5558 break; 5559 } 5560 MDI_CLIENT_UNLOCK(ct); 5561 return (rv); 5562 } 5563 5564 /* 5565 * mdi_post_detach(): 5566 * Post detach notification handler 5567 */ 5568 /*ARGSUSED*/ 5569 void 5570 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5571 { 5572 /* 5573 * Detach/Suspend of mpxio component failed. Update our state 5574 * too 5575 */ 5576 if (MDI_PHCI(dip)) 5577 i_mdi_phci_post_detach(dip, cmd, error); 5578 5579 if (MDI_CLIENT(dip)) 5580 i_mdi_client_post_detach(dip, cmd, error); 5581 } 5582 5583 /*ARGSUSED*/ 5584 static void 5585 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5586 { 5587 mdi_phci_t *ph; 5588 5589 /* 5590 * Detach/Suspend of phci component failed. Update our state 5591 * too 5592 */ 5593 ph = i_devi_get_phci(dip); 5594 if (ph == NULL) { 5595 return; 5596 } 5597 5598 MDI_PHCI_LOCK(ph); 5599 /* 5600 * Detach of pHCI failed. Restore back converse 5601 * state 5602 */ 5603 switch (cmd) { 5604 case DDI_DETACH: 5605 MDI_DEBUG(2, (CE_NOTE, dip, 5606 "!pHCI post_detach: called %p\n", (void *)ph)); 5607 if (error != DDI_SUCCESS) 5608 MDI_PHCI_SET_ATTACH(ph); 5609 break; 5610 5611 case DDI_SUSPEND: 5612 MDI_DEBUG(2, (CE_NOTE, dip, 5613 "!pHCI post_suspend: called %p\n", (void *)ph)); 5614 if (error != DDI_SUCCESS) 5615 MDI_PHCI_SET_RESUME(ph); 5616 break; 5617 } 5618 MDI_PHCI_UNLOCK(ph); 5619 } 5620 5621 /*ARGSUSED*/ 5622 static void 5623 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5624 { 5625 mdi_client_t *ct; 5626 5627 ct = i_devi_get_client(dip); 5628 if (ct == NULL) { 5629 return; 5630 } 5631 MDI_CLIENT_LOCK(ct); 5632 /* 5633 * Detach of Client failed. Restore back converse 5634 * state 5635 */ 5636 switch (cmd) { 5637 case DDI_DETACH: 5638 MDI_DEBUG(2, (CE_NOTE, dip, 5639 "!Client post_detach: called %p\n", (void *)ct)); 5640 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5641 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5642 "i_mdi_pm_rele_client\n")); 5643 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5644 } else { 5645 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5646 "i_mdi_pm_reset_client\n")); 5647 i_mdi_pm_reset_client(ct); 5648 } 5649 if (error != DDI_SUCCESS) 5650 MDI_CLIENT_SET_ATTACH(ct); 5651 break; 5652 5653 case DDI_SUSPEND: 5654 MDI_DEBUG(2, (CE_NOTE, dip, 5655 "!Client post_suspend: called %p\n", (void *)ct)); 5656 if (error != DDI_SUCCESS) 5657 MDI_CLIENT_SET_RESUME(ct); 5658 break; 5659 } 5660 MDI_CLIENT_UNLOCK(ct); 5661 } 5662 5663 int 5664 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5665 { 5666 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5667 } 5668 5669 /* 5670 * create and install per-path (client - pHCI) statistics 5671 * I/O stats supported: nread, nwritten, reads, and writes 5672 * Error stats - hard errors, soft errors, & transport errors 5673 */ 5674 int 5675 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5676 { 5677 kstat_t *kiosp, *kerrsp; 5678 struct pi_errs *nsp; 5679 struct mdi_pi_kstats *mdi_statp; 5680 5681 if (MDI_PI(pip)->pi_kstats != NULL) 5682 return (MDI_SUCCESS); 5683 5684 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5685 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5686 return (MDI_FAILURE); 5687 } 5688 5689 (void) strcat(ksname, ",err"); 5690 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5691 KSTAT_TYPE_NAMED, 5692 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5693 if (kerrsp == NULL) { 5694 kstat_delete(kiosp); 5695 return (MDI_FAILURE); 5696 } 5697 5698 nsp = (struct pi_errs *)kerrsp->ks_data; 5699 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5700 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5701 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5702 KSTAT_DATA_UINT32); 5703 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5704 KSTAT_DATA_UINT32); 5705 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5706 KSTAT_DATA_UINT32); 5707 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5708 KSTAT_DATA_UINT32); 5709 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5710 KSTAT_DATA_UINT32); 5711 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5712 KSTAT_DATA_UINT32); 5713 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5714 KSTAT_DATA_UINT32); 5715 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5716 5717 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5718 mdi_statp->pi_kstat_ref = 1; 5719 mdi_statp->pi_kstat_iostats = kiosp; 5720 mdi_statp->pi_kstat_errstats = kerrsp; 5721 kstat_install(kiosp); 5722 kstat_install(kerrsp); 5723 MDI_PI(pip)->pi_kstats = mdi_statp; 5724 return (MDI_SUCCESS); 5725 } 5726 5727 /* 5728 * destroy per-path properties 5729 */ 5730 static void 5731 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5732 { 5733 5734 struct mdi_pi_kstats *mdi_statp; 5735 5736 if (MDI_PI(pip)->pi_kstats == NULL) 5737 return; 5738 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5739 return; 5740 5741 MDI_PI(pip)->pi_kstats = NULL; 5742 5743 /* 5744 * the kstat may be shared between multiple pathinfo nodes 5745 * decrement this pathinfo's usage, removing the kstats 5746 * themselves when the last pathinfo reference is removed. 5747 */ 5748 ASSERT(mdi_statp->pi_kstat_ref > 0); 5749 if (--mdi_statp->pi_kstat_ref != 0) 5750 return; 5751 5752 kstat_delete(mdi_statp->pi_kstat_iostats); 5753 kstat_delete(mdi_statp->pi_kstat_errstats); 5754 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5755 } 5756 5757 /* 5758 * update I/O paths KSTATS 5759 */ 5760 void 5761 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5762 { 5763 kstat_t *iostatp; 5764 size_t xfer_cnt; 5765 5766 ASSERT(pip != NULL); 5767 5768 /* 5769 * I/O can be driven across a path prior to having path 5770 * statistics available, i.e. probe(9e). 5771 */ 5772 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5773 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5774 xfer_cnt = bp->b_bcount - bp->b_resid; 5775 if (bp->b_flags & B_READ) { 5776 KSTAT_IO_PTR(iostatp)->reads++; 5777 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5778 } else { 5779 KSTAT_IO_PTR(iostatp)->writes++; 5780 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5781 } 5782 } 5783 } 5784 5785 /* 5786 * Enable the path(specific client/target/initiator) 5787 * Enabling a path means that MPxIO may select the enabled path for routing 5788 * future I/O requests, subject to other path state constraints. 5789 */ 5790 int 5791 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5792 { 5793 mdi_phci_t *ph; 5794 5795 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5796 if (ph == NULL) { 5797 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5798 " failed. pip: %p ph = NULL\n", (void *)pip)); 5799 return (MDI_FAILURE); 5800 } 5801 5802 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5803 MDI_ENABLE_OP); 5804 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5805 " Returning success pip = %p. ph = %p\n", 5806 (void *)pip, (void *)ph)); 5807 return (MDI_SUCCESS); 5808 5809 } 5810 5811 /* 5812 * Disable the path (specific client/target/initiator) 5813 * Disabling a path means that MPxIO will not select the disabled path for 5814 * routing any new I/O requests. 5815 */ 5816 int 5817 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5818 { 5819 mdi_phci_t *ph; 5820 5821 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5822 if (ph == NULL) { 5823 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5824 " failed. pip: %p ph = NULL\n", (void *)pip)); 5825 return (MDI_FAILURE); 5826 } 5827 5828 (void) i_mdi_enable_disable_path(pip, 5829 ph->ph_vhci, flags, MDI_DISABLE_OP); 5830 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5831 "Returning success pip = %p. ph = %p", 5832 (void *)pip, (void *)ph)); 5833 return (MDI_SUCCESS); 5834 } 5835 5836 /* 5837 * disable the path to a particular pHCI (pHCI specified in the phci_path 5838 * argument) for a particular client (specified in the client_path argument). 5839 * Disabling a path means that MPxIO will not select the disabled path for 5840 * routing any new I/O requests. 5841 * NOTE: this will be removed once the NWS files are changed to use the new 5842 * mdi_{enable,disable}_path interfaces 5843 */ 5844 int 5845 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5846 { 5847 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5848 } 5849 5850 /* 5851 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5852 * argument) for a particular client (specified in the client_path argument). 5853 * Enabling a path means that MPxIO may select the enabled path for routing 5854 * future I/O requests, subject to other path state constraints. 5855 * NOTE: this will be removed once the NWS files are changed to use the new 5856 * mdi_{enable,disable}_path interfaces 5857 */ 5858 5859 int 5860 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5861 { 5862 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5863 } 5864 5865 /* 5866 * Common routine for doing enable/disable. 5867 */ 5868 static mdi_pathinfo_t * 5869 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5870 int op) 5871 { 5872 int sync_flag = 0; 5873 int rv; 5874 mdi_pathinfo_t *next; 5875 int (*f)() = NULL; 5876 5877 f = vh->vh_ops->vo_pi_state_change; 5878 5879 sync_flag = (flags << 8) & 0xf00; 5880 5881 /* 5882 * Do a callback into the mdi consumer to let it 5883 * know that path is about to get enabled/disabled. 5884 */ 5885 if (f != NULL) { 5886 rv = (*f)(vh->vh_dip, pip, 0, 5887 MDI_PI_EXT_STATE(pip), 5888 MDI_EXT_STATE_CHANGE | sync_flag | 5889 op | MDI_BEFORE_STATE_CHANGE); 5890 if (rv != MDI_SUCCESS) { 5891 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5892 "!vo_pi_state_change: failed rv = %x", rv)); 5893 } 5894 } 5895 MDI_PI_LOCK(pip); 5896 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5897 5898 switch (flags) { 5899 case USER_DISABLE: 5900 if (op == MDI_DISABLE_OP) { 5901 MDI_PI_SET_USER_DISABLE(pip); 5902 } else { 5903 MDI_PI_SET_USER_ENABLE(pip); 5904 } 5905 break; 5906 case DRIVER_DISABLE: 5907 if (op == MDI_DISABLE_OP) { 5908 MDI_PI_SET_DRV_DISABLE(pip); 5909 } else { 5910 MDI_PI_SET_DRV_ENABLE(pip); 5911 } 5912 break; 5913 case DRIVER_DISABLE_TRANSIENT: 5914 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5915 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5916 } else { 5917 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5918 } 5919 break; 5920 } 5921 MDI_PI_UNLOCK(pip); 5922 /* 5923 * Do a callback into the mdi consumer to let it 5924 * know that path is now enabled/disabled. 5925 */ 5926 if (f != NULL) { 5927 rv = (*f)(vh->vh_dip, pip, 0, 5928 MDI_PI_EXT_STATE(pip), 5929 MDI_EXT_STATE_CHANGE | sync_flag | 5930 op | MDI_AFTER_STATE_CHANGE); 5931 if (rv != MDI_SUCCESS) { 5932 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5933 "!vo_pi_state_change: failed rv = %x", rv)); 5934 } 5935 } 5936 return (next); 5937 } 5938 5939 /* 5940 * Common routine for doing enable/disable. 5941 * NOTE: this will be removed once the NWS files are changed to use the new 5942 * mdi_{enable,disable}_path has been putback 5943 */ 5944 int 5945 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5946 { 5947 5948 mdi_phci_t *ph; 5949 mdi_vhci_t *vh = NULL; 5950 mdi_client_t *ct; 5951 mdi_pathinfo_t *next, *pip; 5952 int found_it; 5953 5954 ph = i_devi_get_phci(pdip); 5955 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5956 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5957 (void *)cdip)); 5958 if (ph == NULL) { 5959 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5960 "Op %d failed. ph = NULL\n", op)); 5961 return (MDI_FAILURE); 5962 } 5963 5964 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5965 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5966 "Op Invalid operation = %d\n", op)); 5967 return (MDI_FAILURE); 5968 } 5969 5970 vh = ph->ph_vhci; 5971 5972 if (cdip == NULL) { 5973 /* 5974 * Need to mark the Phci as enabled/disabled. 5975 */ 5976 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5977 "Op %d for the phci\n", op)); 5978 MDI_PHCI_LOCK(ph); 5979 switch (flags) { 5980 case USER_DISABLE: 5981 if (op == MDI_DISABLE_OP) { 5982 MDI_PHCI_SET_USER_DISABLE(ph); 5983 } else { 5984 MDI_PHCI_SET_USER_ENABLE(ph); 5985 } 5986 break; 5987 case DRIVER_DISABLE: 5988 if (op == MDI_DISABLE_OP) { 5989 MDI_PHCI_SET_DRV_DISABLE(ph); 5990 } else { 5991 MDI_PHCI_SET_DRV_ENABLE(ph); 5992 } 5993 break; 5994 case DRIVER_DISABLE_TRANSIENT: 5995 if (op == MDI_DISABLE_OP) { 5996 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5997 } else { 5998 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5999 } 6000 break; 6001 default: 6002 MDI_PHCI_UNLOCK(ph); 6003 MDI_DEBUG(1, (CE_NOTE, NULL, 6004 "!i_mdi_pi_enable_disable:" 6005 " Invalid flag argument= %d\n", flags)); 6006 } 6007 6008 /* 6009 * Phci has been disabled. Now try to enable/disable 6010 * path info's to each client. 6011 */ 6012 pip = ph->ph_path_head; 6013 while (pip != NULL) { 6014 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6015 } 6016 MDI_PHCI_UNLOCK(ph); 6017 } else { 6018 6019 /* 6020 * Disable a specific client. 6021 */ 6022 ct = i_devi_get_client(cdip); 6023 if (ct == NULL) { 6024 MDI_DEBUG(1, (CE_NOTE, NULL, 6025 "!i_mdi_pi_enable_disable:" 6026 " failed. ct = NULL operation = %d\n", op)); 6027 return (MDI_FAILURE); 6028 } 6029 6030 MDI_CLIENT_LOCK(ct); 6031 pip = ct->ct_path_head; 6032 found_it = 0; 6033 while (pip != NULL) { 6034 MDI_PI_LOCK(pip); 6035 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6036 if (MDI_PI(pip)->pi_phci == ph) { 6037 MDI_PI_UNLOCK(pip); 6038 found_it = 1; 6039 break; 6040 } 6041 MDI_PI_UNLOCK(pip); 6042 pip = next; 6043 } 6044 6045 6046 MDI_CLIENT_UNLOCK(ct); 6047 if (found_it == 0) { 6048 MDI_DEBUG(1, (CE_NOTE, NULL, 6049 "!i_mdi_pi_enable_disable:" 6050 " failed. Could not find corresponding pip\n")); 6051 return (MDI_FAILURE); 6052 } 6053 6054 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6055 } 6056 6057 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 6058 "Op %d Returning success pdip = %p cdip = %p\n", 6059 op, (void *)pdip, (void *)cdip)); 6060 return (MDI_SUCCESS); 6061 } 6062 6063 /* 6064 * Ensure phci powered up 6065 */ 6066 static void 6067 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6068 { 6069 dev_info_t *ph_dip; 6070 6071 ASSERT(pip != NULL); 6072 ASSERT(MDI_PI_LOCKED(pip)); 6073 6074 if (MDI_PI(pip)->pi_pm_held) { 6075 return; 6076 } 6077 6078 ph_dip = mdi_pi_get_phci(pip); 6079 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 6080 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6081 if (ph_dip == NULL) { 6082 return; 6083 } 6084 6085 MDI_PI_UNLOCK(pip); 6086 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6087 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6088 6089 pm_hold_power(ph_dip); 6090 6091 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6092 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6093 MDI_PI_LOCK(pip); 6094 6095 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6096 if (DEVI(ph_dip)->devi_pm_info) 6097 MDI_PI(pip)->pi_pm_held = 1; 6098 } 6099 6100 /* 6101 * Allow phci powered down 6102 */ 6103 static void 6104 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6105 { 6106 dev_info_t *ph_dip = NULL; 6107 6108 ASSERT(pip != NULL); 6109 ASSERT(MDI_PI_LOCKED(pip)); 6110 6111 if (MDI_PI(pip)->pi_pm_held == 0) { 6112 return; 6113 } 6114 6115 ph_dip = mdi_pi_get_phci(pip); 6116 ASSERT(ph_dip != NULL); 6117 6118 MDI_PI_UNLOCK(pip); 6119 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 6120 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 6121 6122 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 6123 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6124 pm_rele_power(ph_dip); 6125 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 6126 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6127 6128 MDI_PI_LOCK(pip); 6129 MDI_PI(pip)->pi_pm_held = 0; 6130 } 6131 6132 static void 6133 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6134 { 6135 ASSERT(MDI_CLIENT_LOCKED(ct)); 6136 6137 ct->ct_power_cnt += incr; 6138 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 6139 "ct_power_cnt = %d incr = %d\n", (void *)ct, 6140 ct->ct_power_cnt, incr)); 6141 ASSERT(ct->ct_power_cnt >= 0); 6142 } 6143 6144 static void 6145 i_mdi_rele_all_phci(mdi_client_t *ct) 6146 { 6147 mdi_pathinfo_t *pip; 6148 6149 ASSERT(MDI_CLIENT_LOCKED(ct)); 6150 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6151 while (pip != NULL) { 6152 mdi_hold_path(pip); 6153 MDI_PI_LOCK(pip); 6154 i_mdi_pm_rele_pip(pip); 6155 MDI_PI_UNLOCK(pip); 6156 mdi_rele_path(pip); 6157 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6158 } 6159 } 6160 6161 static void 6162 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6163 { 6164 ASSERT(MDI_CLIENT_LOCKED(ct)); 6165 6166 if (i_ddi_devi_attached(ct->ct_dip)) { 6167 ct->ct_power_cnt -= decr; 6168 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 6169 "ct_power_cnt = %d decr = %d\n", 6170 (void *)ct, ct->ct_power_cnt, decr)); 6171 } 6172 6173 ASSERT(ct->ct_power_cnt >= 0); 6174 if (ct->ct_power_cnt == 0) { 6175 i_mdi_rele_all_phci(ct); 6176 return; 6177 } 6178 } 6179 6180 static void 6181 i_mdi_pm_reset_client(mdi_client_t *ct) 6182 { 6183 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 6184 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 6185 ASSERT(MDI_CLIENT_LOCKED(ct)); 6186 ct->ct_power_cnt = 0; 6187 i_mdi_rele_all_phci(ct); 6188 ct->ct_powercnt_config = 0; 6189 ct->ct_powercnt_unconfig = 0; 6190 ct->ct_powercnt_reset = 1; 6191 } 6192 6193 static int 6194 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6195 { 6196 int ret; 6197 dev_info_t *ph_dip; 6198 6199 MDI_PI_LOCK(pip); 6200 i_mdi_pm_hold_pip(pip); 6201 6202 ph_dip = mdi_pi_get_phci(pip); 6203 MDI_PI_UNLOCK(pip); 6204 6205 /* bring all components of phci to full power */ 6206 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6207 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 6208 ddi_get_instance(ph_dip), (void *)pip)); 6209 6210 ret = pm_powerup(ph_dip); 6211 6212 if (ret == DDI_FAILURE) { 6213 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 6214 "pm_powerup FAILED for %s%d %p\n", 6215 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 6216 (void *)pip)); 6217 6218 MDI_PI_LOCK(pip); 6219 i_mdi_pm_rele_pip(pip); 6220 MDI_PI_UNLOCK(pip); 6221 return (MDI_FAILURE); 6222 } 6223 6224 return (MDI_SUCCESS); 6225 } 6226 6227 static int 6228 i_mdi_power_all_phci(mdi_client_t *ct) 6229 { 6230 mdi_pathinfo_t *pip; 6231 int succeeded = 0; 6232 6233 ASSERT(MDI_CLIENT_LOCKED(ct)); 6234 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6235 while (pip != NULL) { 6236 /* 6237 * Don't power if MDI_PATHINFO_STATE_FAULT 6238 * or MDI_PATHINFO_STATE_OFFLINE. 6239 */ 6240 if (MDI_PI_IS_INIT(pip) || 6241 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6242 mdi_hold_path(pip); 6243 MDI_CLIENT_UNLOCK(ct); 6244 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6245 succeeded = 1; 6246 6247 ASSERT(ct == MDI_PI(pip)->pi_client); 6248 MDI_CLIENT_LOCK(ct); 6249 mdi_rele_path(pip); 6250 } 6251 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6252 } 6253 6254 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6255 } 6256 6257 /* 6258 * mdi_bus_power(): 6259 * 1. Place the phci(s) into powered up state so that 6260 * client can do power management 6261 * 2. Ensure phci powered up as client power managing 6262 * Return Values: 6263 * MDI_SUCCESS 6264 * MDI_FAILURE 6265 */ 6266 int 6267 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6268 void *arg, void *result) 6269 { 6270 int ret = MDI_SUCCESS; 6271 pm_bp_child_pwrchg_t *bpc; 6272 mdi_client_t *ct; 6273 dev_info_t *cdip; 6274 pm_bp_has_changed_t *bphc; 6275 6276 /* 6277 * BUS_POWER_NOINVOL not supported 6278 */ 6279 if (op == BUS_POWER_NOINVOL) 6280 return (MDI_FAILURE); 6281 6282 /* 6283 * ignore other OPs. 6284 * return quickly to save cou cycles on the ct processing 6285 */ 6286 switch (op) { 6287 case BUS_POWER_PRE_NOTIFICATION: 6288 case BUS_POWER_POST_NOTIFICATION: 6289 bpc = (pm_bp_child_pwrchg_t *)arg; 6290 cdip = bpc->bpc_dip; 6291 break; 6292 case BUS_POWER_HAS_CHANGED: 6293 bphc = (pm_bp_has_changed_t *)arg; 6294 cdip = bphc->bphc_dip; 6295 break; 6296 default: 6297 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6298 } 6299 6300 ASSERT(MDI_CLIENT(cdip)); 6301 6302 ct = i_devi_get_client(cdip); 6303 if (ct == NULL) 6304 return (MDI_FAILURE); 6305 6306 /* 6307 * wait till the mdi_pathinfo node state change are processed 6308 */ 6309 MDI_CLIENT_LOCK(ct); 6310 switch (op) { 6311 case BUS_POWER_PRE_NOTIFICATION: 6312 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6313 "BUS_POWER_PRE_NOTIFICATION:" 6314 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6315 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6316 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6317 6318 /* serialize power level change per client */ 6319 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6320 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6321 6322 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6323 6324 if (ct->ct_power_cnt == 0) { 6325 ret = i_mdi_power_all_phci(ct); 6326 } 6327 6328 /* 6329 * if new_level > 0: 6330 * - hold phci(s) 6331 * - power up phci(s) if not already 6332 * ignore power down 6333 */ 6334 if (bpc->bpc_nlevel > 0) { 6335 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6336 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6337 "mdi_bus_power i_mdi_pm_hold_client\n")); 6338 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6339 } 6340 } 6341 break; 6342 case BUS_POWER_POST_NOTIFICATION: 6343 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 6344 "BUS_POWER_POST_NOTIFICATION:" 6345 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 6346 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6347 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6348 *(int *)result)); 6349 6350 if (*(int *)result == DDI_SUCCESS) { 6351 if (bpc->bpc_nlevel > 0) { 6352 MDI_CLIENT_SET_POWER_UP(ct); 6353 } else { 6354 MDI_CLIENT_SET_POWER_DOWN(ct); 6355 } 6356 } 6357 6358 /* release the hold we did in pre-notification */ 6359 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6360 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6361 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6362 "mdi_bus_power i_mdi_pm_rele_client\n")); 6363 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6364 } 6365 6366 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6367 /* another thread might started attaching */ 6368 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6369 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6370 "mdi_bus_power i_mdi_pm_rele_client\n")); 6371 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6372 /* detaching has been taken care in pm_post_unconfig */ 6373 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6374 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 6375 "mdi_bus_power i_mdi_pm_reset_client\n")); 6376 i_mdi_pm_reset_client(ct); 6377 } 6378 } 6379 6380 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6381 cv_broadcast(&ct->ct_powerchange_cv); 6382 6383 break; 6384 6385 /* need to do more */ 6386 case BUS_POWER_HAS_CHANGED: 6387 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 6388 "BUS_POWER_HAS_CHANGED:" 6389 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 6390 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6391 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6392 6393 if (bphc->bphc_nlevel > 0 && 6394 bphc->bphc_nlevel > bphc->bphc_olevel) { 6395 if (ct->ct_power_cnt == 0) { 6396 ret = i_mdi_power_all_phci(ct); 6397 } 6398 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6399 "mdi_bus_power i_mdi_pm_hold_client\n")); 6400 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6401 } 6402 6403 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6404 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6405 "mdi_bus_power i_mdi_pm_rele_client\n")); 6406 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6407 } 6408 break; 6409 } 6410 6411 MDI_CLIENT_UNLOCK(ct); 6412 return (ret); 6413 } 6414 6415 static int 6416 i_mdi_pm_pre_config_one(dev_info_t *child) 6417 { 6418 int ret = MDI_SUCCESS; 6419 mdi_client_t *ct; 6420 6421 ct = i_devi_get_client(child); 6422 if (ct == NULL) 6423 return (MDI_FAILURE); 6424 6425 MDI_CLIENT_LOCK(ct); 6426 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6427 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6428 6429 if (!MDI_CLIENT_IS_FAILED(ct)) { 6430 MDI_CLIENT_UNLOCK(ct); 6431 MDI_DEBUG(4, (CE_NOTE, child, 6432 "i_mdi_pm_pre_config_one already configured\n")); 6433 return (MDI_SUCCESS); 6434 } 6435 6436 if (ct->ct_powercnt_config) { 6437 MDI_CLIENT_UNLOCK(ct); 6438 MDI_DEBUG(4, (CE_NOTE, child, 6439 "i_mdi_pm_pre_config_one ALREADY held\n")); 6440 return (MDI_SUCCESS); 6441 } 6442 6443 if (ct->ct_power_cnt == 0) { 6444 ret = i_mdi_power_all_phci(ct); 6445 } 6446 MDI_DEBUG(4, (CE_NOTE, child, 6447 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6448 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6449 ct->ct_powercnt_config = 1; 6450 ct->ct_powercnt_reset = 0; 6451 MDI_CLIENT_UNLOCK(ct); 6452 return (ret); 6453 } 6454 6455 static int 6456 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6457 { 6458 int ret = MDI_SUCCESS; 6459 dev_info_t *cdip; 6460 int circ; 6461 6462 ASSERT(MDI_VHCI(vdip)); 6463 6464 /* ndi_devi_config_one */ 6465 if (child) { 6466 ASSERT(DEVI_BUSY_OWNED(vdip)); 6467 return (i_mdi_pm_pre_config_one(child)); 6468 } 6469 6470 /* devi_config_common */ 6471 ndi_devi_enter(vdip, &circ); 6472 cdip = ddi_get_child(vdip); 6473 while (cdip) { 6474 dev_info_t *next = ddi_get_next_sibling(cdip); 6475 6476 ret = i_mdi_pm_pre_config_one(cdip); 6477 if (ret != MDI_SUCCESS) 6478 break; 6479 cdip = next; 6480 } 6481 ndi_devi_exit(vdip, circ); 6482 return (ret); 6483 } 6484 6485 static int 6486 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6487 { 6488 int ret = MDI_SUCCESS; 6489 mdi_client_t *ct; 6490 6491 ct = i_devi_get_client(child); 6492 if (ct == NULL) 6493 return (MDI_FAILURE); 6494 6495 MDI_CLIENT_LOCK(ct); 6496 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6497 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6498 6499 if (!i_ddi_devi_attached(ct->ct_dip)) { 6500 MDI_DEBUG(4, (CE_NOTE, child, 6501 "i_mdi_pm_pre_unconfig node detached already\n")); 6502 MDI_CLIENT_UNLOCK(ct); 6503 return (MDI_SUCCESS); 6504 } 6505 6506 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6507 (flags & NDI_AUTODETACH)) { 6508 MDI_DEBUG(4, (CE_NOTE, child, 6509 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6510 MDI_CLIENT_UNLOCK(ct); 6511 return (MDI_FAILURE); 6512 } 6513 6514 if (ct->ct_powercnt_unconfig) { 6515 MDI_DEBUG(4, (CE_NOTE, child, 6516 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6517 MDI_CLIENT_UNLOCK(ct); 6518 *held = 1; 6519 return (MDI_SUCCESS); 6520 } 6521 6522 if (ct->ct_power_cnt == 0) { 6523 ret = i_mdi_power_all_phci(ct); 6524 } 6525 MDI_DEBUG(4, (CE_NOTE, child, 6526 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6527 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6528 ct->ct_powercnt_unconfig = 1; 6529 ct->ct_powercnt_reset = 0; 6530 MDI_CLIENT_UNLOCK(ct); 6531 if (ret == MDI_SUCCESS) 6532 *held = 1; 6533 return (ret); 6534 } 6535 6536 static int 6537 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6538 int flags) 6539 { 6540 int ret = MDI_SUCCESS; 6541 dev_info_t *cdip; 6542 int circ; 6543 6544 ASSERT(MDI_VHCI(vdip)); 6545 *held = 0; 6546 6547 /* ndi_devi_unconfig_one */ 6548 if (child) { 6549 ASSERT(DEVI_BUSY_OWNED(vdip)); 6550 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6551 } 6552 6553 /* devi_unconfig_common */ 6554 ndi_devi_enter(vdip, &circ); 6555 cdip = ddi_get_child(vdip); 6556 while (cdip) { 6557 dev_info_t *next = ddi_get_next_sibling(cdip); 6558 6559 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6560 cdip = next; 6561 } 6562 ndi_devi_exit(vdip, circ); 6563 6564 if (*held) 6565 ret = MDI_SUCCESS; 6566 6567 return (ret); 6568 } 6569 6570 static void 6571 i_mdi_pm_post_config_one(dev_info_t *child) 6572 { 6573 mdi_client_t *ct; 6574 6575 ct = i_devi_get_client(child); 6576 if (ct == NULL) 6577 return; 6578 6579 MDI_CLIENT_LOCK(ct); 6580 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6581 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6582 6583 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6584 MDI_DEBUG(4, (CE_NOTE, child, 6585 "i_mdi_pm_post_config_one NOT configured\n")); 6586 MDI_CLIENT_UNLOCK(ct); 6587 return; 6588 } 6589 6590 /* client has not been updated */ 6591 if (MDI_CLIENT_IS_FAILED(ct)) { 6592 MDI_DEBUG(4, (CE_NOTE, child, 6593 "i_mdi_pm_post_config_one NOT configured\n")); 6594 MDI_CLIENT_UNLOCK(ct); 6595 return; 6596 } 6597 6598 /* another thread might have powered it down or detached it */ 6599 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6600 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6601 (!i_ddi_devi_attached(ct->ct_dip) && 6602 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6603 MDI_DEBUG(4, (CE_NOTE, child, 6604 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6605 i_mdi_pm_reset_client(ct); 6606 } else { 6607 mdi_pathinfo_t *pip, *next; 6608 int valid_path_count = 0; 6609 6610 MDI_DEBUG(4, (CE_NOTE, child, 6611 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6612 pip = ct->ct_path_head; 6613 while (pip != NULL) { 6614 MDI_PI_LOCK(pip); 6615 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6616 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6617 valid_path_count ++; 6618 MDI_PI_UNLOCK(pip); 6619 pip = next; 6620 } 6621 i_mdi_pm_rele_client(ct, valid_path_count); 6622 } 6623 ct->ct_powercnt_config = 0; 6624 MDI_CLIENT_UNLOCK(ct); 6625 } 6626 6627 static void 6628 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6629 { 6630 int circ; 6631 dev_info_t *cdip; 6632 6633 ASSERT(MDI_VHCI(vdip)); 6634 6635 /* ndi_devi_config_one */ 6636 if (child) { 6637 ASSERT(DEVI_BUSY_OWNED(vdip)); 6638 i_mdi_pm_post_config_one(child); 6639 return; 6640 } 6641 6642 /* devi_config_common */ 6643 ndi_devi_enter(vdip, &circ); 6644 cdip = ddi_get_child(vdip); 6645 while (cdip) { 6646 dev_info_t *next = ddi_get_next_sibling(cdip); 6647 6648 i_mdi_pm_post_config_one(cdip); 6649 cdip = next; 6650 } 6651 ndi_devi_exit(vdip, circ); 6652 } 6653 6654 static void 6655 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6656 { 6657 mdi_client_t *ct; 6658 6659 ct = i_devi_get_client(child); 6660 if (ct == NULL) 6661 return; 6662 6663 MDI_CLIENT_LOCK(ct); 6664 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6665 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6666 6667 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6668 MDI_DEBUG(4, (CE_NOTE, child, 6669 "i_mdi_pm_post_unconfig NOT held\n")); 6670 MDI_CLIENT_UNLOCK(ct); 6671 return; 6672 } 6673 6674 /* failure detaching or another thread just attached it */ 6675 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6676 i_ddi_devi_attached(ct->ct_dip)) || 6677 (!i_ddi_devi_attached(ct->ct_dip) && 6678 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6679 MDI_DEBUG(4, (CE_NOTE, child, 6680 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6681 i_mdi_pm_reset_client(ct); 6682 } else { 6683 mdi_pathinfo_t *pip, *next; 6684 int valid_path_count = 0; 6685 6686 MDI_DEBUG(4, (CE_NOTE, child, 6687 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6688 pip = ct->ct_path_head; 6689 while (pip != NULL) { 6690 MDI_PI_LOCK(pip); 6691 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6692 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6693 valid_path_count ++; 6694 MDI_PI_UNLOCK(pip); 6695 pip = next; 6696 } 6697 i_mdi_pm_rele_client(ct, valid_path_count); 6698 ct->ct_powercnt_unconfig = 0; 6699 } 6700 6701 MDI_CLIENT_UNLOCK(ct); 6702 } 6703 6704 static void 6705 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6706 { 6707 int circ; 6708 dev_info_t *cdip; 6709 6710 ASSERT(MDI_VHCI(vdip)); 6711 6712 if (!held) { 6713 MDI_DEBUG(4, (CE_NOTE, vdip, 6714 "i_mdi_pm_post_unconfig held = %d\n", held)); 6715 return; 6716 } 6717 6718 if (child) { 6719 ASSERT(DEVI_BUSY_OWNED(vdip)); 6720 i_mdi_pm_post_unconfig_one(child); 6721 return; 6722 } 6723 6724 ndi_devi_enter(vdip, &circ); 6725 cdip = ddi_get_child(vdip); 6726 while (cdip) { 6727 dev_info_t *next = ddi_get_next_sibling(cdip); 6728 6729 i_mdi_pm_post_unconfig_one(cdip); 6730 cdip = next; 6731 } 6732 ndi_devi_exit(vdip, circ); 6733 } 6734 6735 int 6736 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6737 { 6738 int circ, ret = MDI_SUCCESS; 6739 dev_info_t *client_dip = NULL; 6740 mdi_client_t *ct; 6741 6742 /* 6743 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6744 * Power up pHCI for the named client device. 6745 * Note: Before the client is enumerated under vhci by phci, 6746 * client_dip can be NULL. Then proceed to power up all the 6747 * pHCIs. 6748 */ 6749 if (devnm != NULL) { 6750 ndi_devi_enter(vdip, &circ); 6751 client_dip = ndi_devi_findchild(vdip, devnm); 6752 } 6753 6754 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6755 op, devnm ? devnm : "NULL", (void *)client_dip)); 6756 6757 switch (op) { 6758 case MDI_PM_PRE_CONFIG: 6759 ret = i_mdi_pm_pre_config(vdip, client_dip); 6760 break; 6761 6762 case MDI_PM_PRE_UNCONFIG: 6763 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6764 flags); 6765 break; 6766 6767 case MDI_PM_POST_CONFIG: 6768 i_mdi_pm_post_config(vdip, client_dip); 6769 break; 6770 6771 case MDI_PM_POST_UNCONFIG: 6772 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6773 break; 6774 6775 case MDI_PM_HOLD_POWER: 6776 case MDI_PM_RELE_POWER: 6777 ASSERT(args); 6778 6779 client_dip = (dev_info_t *)args; 6780 ASSERT(MDI_CLIENT(client_dip)); 6781 6782 ct = i_devi_get_client(client_dip); 6783 MDI_CLIENT_LOCK(ct); 6784 6785 if (op == MDI_PM_HOLD_POWER) { 6786 if (ct->ct_power_cnt == 0) { 6787 (void) i_mdi_power_all_phci(ct); 6788 MDI_DEBUG(4, (CE_NOTE, client_dip, 6789 "mdi_power i_mdi_pm_hold_client\n")); 6790 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6791 } 6792 } else { 6793 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6794 MDI_DEBUG(4, (CE_NOTE, client_dip, 6795 "mdi_power i_mdi_pm_rele_client\n")); 6796 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6797 } else { 6798 MDI_DEBUG(4, (CE_NOTE, client_dip, 6799 "mdi_power i_mdi_pm_reset_client\n")); 6800 i_mdi_pm_reset_client(ct); 6801 } 6802 } 6803 6804 MDI_CLIENT_UNLOCK(ct); 6805 break; 6806 6807 default: 6808 break; 6809 } 6810 6811 if (devnm) 6812 ndi_devi_exit(vdip, circ); 6813 6814 return (ret); 6815 } 6816 6817 int 6818 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6819 { 6820 mdi_vhci_t *vhci; 6821 6822 if (!MDI_VHCI(dip)) 6823 return (MDI_FAILURE); 6824 6825 if (mdi_class) { 6826 vhci = DEVI(dip)->devi_mdi_xhci; 6827 ASSERT(vhci); 6828 *mdi_class = vhci->vh_class; 6829 } 6830 6831 return (MDI_SUCCESS); 6832 } 6833 6834 int 6835 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6836 { 6837 mdi_phci_t *phci; 6838 6839 if (!MDI_PHCI(dip)) 6840 return (MDI_FAILURE); 6841 6842 if (mdi_class) { 6843 phci = DEVI(dip)->devi_mdi_xhci; 6844 ASSERT(phci); 6845 *mdi_class = phci->ph_vhci->vh_class; 6846 } 6847 6848 return (MDI_SUCCESS); 6849 } 6850 6851 int 6852 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6853 { 6854 mdi_client_t *client; 6855 6856 if (!MDI_CLIENT(dip)) 6857 return (MDI_FAILURE); 6858 6859 if (mdi_class) { 6860 client = DEVI(dip)->devi_mdi_client; 6861 ASSERT(client); 6862 *mdi_class = client->ct_vhci->vh_class; 6863 } 6864 6865 return (MDI_SUCCESS); 6866 } 6867 6868 void * 6869 mdi_client_get_vhci_private(dev_info_t *dip) 6870 { 6871 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6872 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6873 mdi_client_t *ct; 6874 ct = i_devi_get_client(dip); 6875 return (ct->ct_vprivate); 6876 } 6877 return (NULL); 6878 } 6879 6880 void 6881 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6882 { 6883 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6884 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6885 mdi_client_t *ct; 6886 ct = i_devi_get_client(dip); 6887 ct->ct_vprivate = data; 6888 } 6889 } 6890 /* 6891 * mdi_pi_get_vhci_private(): 6892 * Get the vhci private information associated with the 6893 * mdi_pathinfo node 6894 */ 6895 void * 6896 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6897 { 6898 caddr_t vprivate = NULL; 6899 if (pip) { 6900 vprivate = MDI_PI(pip)->pi_vprivate; 6901 } 6902 return (vprivate); 6903 } 6904 6905 /* 6906 * mdi_pi_set_vhci_private(): 6907 * Set the vhci private information in the mdi_pathinfo node 6908 */ 6909 void 6910 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6911 { 6912 if (pip) { 6913 MDI_PI(pip)->pi_vprivate = priv; 6914 } 6915 } 6916 6917 /* 6918 * mdi_phci_get_vhci_private(): 6919 * Get the vhci private information associated with the 6920 * mdi_phci node 6921 */ 6922 void * 6923 mdi_phci_get_vhci_private(dev_info_t *dip) 6924 { 6925 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6926 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6927 mdi_phci_t *ph; 6928 ph = i_devi_get_phci(dip); 6929 return (ph->ph_vprivate); 6930 } 6931 return (NULL); 6932 } 6933 6934 /* 6935 * mdi_phci_set_vhci_private(): 6936 * Set the vhci private information in the mdi_phci node 6937 */ 6938 void 6939 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6940 { 6941 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6942 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6943 mdi_phci_t *ph; 6944 ph = i_devi_get_phci(dip); 6945 ph->ph_vprivate = priv; 6946 } 6947 } 6948 6949 /* 6950 * List of vhci class names: 6951 * A vhci class name must be in this list only if the corresponding vhci 6952 * driver intends to use the mdi provided bus config implementation 6953 * (i.e., mdi_vhci_bus_config()). 6954 */ 6955 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6956 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6957 6958 /* 6959 * During boot time, the on-disk vhci cache for every vhci class is read 6960 * in the form of an nvlist and stored here. 6961 */ 6962 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6963 6964 /* nvpair names in vhci cache nvlist */ 6965 #define MDI_VHCI_CACHE_VERSION 1 6966 #define MDI_NVPNAME_VERSION "version" 6967 #define MDI_NVPNAME_PHCIS "phcis" 6968 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6969 6970 /* 6971 * Given vhci class name, return its on-disk vhci cache filename. 6972 * Memory for the returned filename which includes the full path is allocated 6973 * by this function. 6974 */ 6975 static char * 6976 vhclass2vhcache_filename(char *vhclass) 6977 { 6978 char *filename; 6979 int len; 6980 static char *fmt = "/etc/devices/mdi_%s_cache"; 6981 6982 /* 6983 * fmt contains the on-disk vhci cache file name format; 6984 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6985 */ 6986 6987 /* the -1 below is to account for "%s" in the format string */ 6988 len = strlen(fmt) + strlen(vhclass) - 1; 6989 filename = kmem_alloc(len, KM_SLEEP); 6990 (void) snprintf(filename, len, fmt, vhclass); 6991 ASSERT(len == (strlen(filename) + 1)); 6992 return (filename); 6993 } 6994 6995 /* 6996 * initialize the vhci cache related data structures and read the on-disk 6997 * vhci cached data into memory. 6998 */ 6999 static void 7000 setup_vhci_cache(mdi_vhci_t *vh) 7001 { 7002 mdi_vhci_config_t *vhc; 7003 mdi_vhci_cache_t *vhcache; 7004 int i; 7005 nvlist_t *nvl = NULL; 7006 7007 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7008 vh->vh_config = vhc; 7009 vhcache = &vhc->vhc_vhcache; 7010 7011 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7012 7013 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7014 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7015 7016 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7017 7018 /* 7019 * Create string hash; same as mod_hash_create_strhash() except that 7020 * we use NULL key destructor. 7021 */ 7022 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7023 mdi_bus_config_cache_hash_size, 7024 mod_hash_null_keydtor, mod_hash_null_valdtor, 7025 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7026 7027 /* 7028 * The on-disk vhci cache is read during booting prior to the 7029 * lights-out period by mdi_read_devices_files(). 7030 */ 7031 for (i = 0; i < N_VHCI_CLASSES; i++) { 7032 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7033 nvl = vhcache_nvl[i]; 7034 vhcache_nvl[i] = NULL; 7035 break; 7036 } 7037 } 7038 7039 /* 7040 * this is to cover the case of some one manually causing unloading 7041 * (or detaching) and reloading (or attaching) of a vhci driver. 7042 */ 7043 if (nvl == NULL && modrootloaded) 7044 nvl = read_on_disk_vhci_cache(vh->vh_class); 7045 7046 if (nvl != NULL) { 7047 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7048 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7049 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7050 else { 7051 cmn_err(CE_WARN, 7052 "%s: data file corrupted, will recreate\n", 7053 vhc->vhc_vhcache_filename); 7054 } 7055 rw_exit(&vhcache->vhcache_lock); 7056 nvlist_free(nvl); 7057 } 7058 7059 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7060 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7061 7062 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7063 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7064 } 7065 7066 /* 7067 * free all vhci cache related resources 7068 */ 7069 static int 7070 destroy_vhci_cache(mdi_vhci_t *vh) 7071 { 7072 mdi_vhci_config_t *vhc = vh->vh_config; 7073 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7074 mdi_vhcache_phci_t *cphci, *cphci_next; 7075 mdi_vhcache_client_t *cct, *cct_next; 7076 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7077 7078 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7079 return (MDI_FAILURE); 7080 7081 kmem_free(vhc->vhc_vhcache_filename, 7082 strlen(vhc->vhc_vhcache_filename) + 1); 7083 7084 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7085 7086 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7087 cphci = cphci_next) { 7088 cphci_next = cphci->cphci_next; 7089 free_vhcache_phci(cphci); 7090 } 7091 7092 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7093 cct_next = cct->cct_next; 7094 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7095 cpi_next = cpi->cpi_next; 7096 free_vhcache_pathinfo(cpi); 7097 } 7098 free_vhcache_client(cct); 7099 } 7100 7101 rw_destroy(&vhcache->vhcache_lock); 7102 7103 mutex_destroy(&vhc->vhc_lock); 7104 cv_destroy(&vhc->vhc_cv); 7105 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7106 return (MDI_SUCCESS); 7107 } 7108 7109 /* 7110 * Stop all vhci cache related async threads and free their resources. 7111 */ 7112 static int 7113 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7114 { 7115 mdi_async_client_config_t *acc, *acc_next; 7116 7117 mutex_enter(&vhc->vhc_lock); 7118 vhc->vhc_flags |= MDI_VHC_EXIT; 7119 ASSERT(vhc->vhc_acc_thrcount >= 0); 7120 cv_broadcast(&vhc->vhc_cv); 7121 7122 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7123 vhc->vhc_acc_thrcount != 0) { 7124 mutex_exit(&vhc->vhc_lock); 7125 delay(1); 7126 mutex_enter(&vhc->vhc_lock); 7127 } 7128 7129 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7130 7131 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7132 acc_next = acc->acc_next; 7133 free_async_client_config(acc); 7134 } 7135 vhc->vhc_acc_list_head = NULL; 7136 vhc->vhc_acc_list_tail = NULL; 7137 vhc->vhc_acc_count = 0; 7138 7139 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7140 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7141 mutex_exit(&vhc->vhc_lock); 7142 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7143 vhcache_dirty(vhc); 7144 return (MDI_FAILURE); 7145 } 7146 } else 7147 mutex_exit(&vhc->vhc_lock); 7148 7149 if (callb_delete(vhc->vhc_cbid) != 0) 7150 return (MDI_FAILURE); 7151 7152 return (MDI_SUCCESS); 7153 } 7154 7155 /* 7156 * Stop vhci cache flush thread 7157 */ 7158 /* ARGSUSED */ 7159 static boolean_t 7160 stop_vhcache_flush_thread(void *arg, int code) 7161 { 7162 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7163 7164 mutex_enter(&vhc->vhc_lock); 7165 vhc->vhc_flags |= MDI_VHC_EXIT; 7166 cv_broadcast(&vhc->vhc_cv); 7167 7168 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7169 mutex_exit(&vhc->vhc_lock); 7170 delay(1); 7171 mutex_enter(&vhc->vhc_lock); 7172 } 7173 7174 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7175 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7176 mutex_exit(&vhc->vhc_lock); 7177 (void) flush_vhcache(vhc, 1); 7178 } else 7179 mutex_exit(&vhc->vhc_lock); 7180 7181 return (B_TRUE); 7182 } 7183 7184 /* 7185 * Enqueue the vhcache phci (cphci) at the tail of the list 7186 */ 7187 static void 7188 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7189 { 7190 cphci->cphci_next = NULL; 7191 if (vhcache->vhcache_phci_head == NULL) 7192 vhcache->vhcache_phci_head = cphci; 7193 else 7194 vhcache->vhcache_phci_tail->cphci_next = cphci; 7195 vhcache->vhcache_phci_tail = cphci; 7196 } 7197 7198 /* 7199 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7200 */ 7201 static void 7202 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7203 mdi_vhcache_pathinfo_t *cpi) 7204 { 7205 cpi->cpi_next = NULL; 7206 if (cct->cct_cpi_head == NULL) 7207 cct->cct_cpi_head = cpi; 7208 else 7209 cct->cct_cpi_tail->cpi_next = cpi; 7210 cct->cct_cpi_tail = cpi; 7211 } 7212 7213 /* 7214 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7215 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7216 * flag set come at the beginning of the list. All cpis which have this 7217 * flag set come at the end of the list. 7218 */ 7219 static void 7220 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7221 mdi_vhcache_pathinfo_t *newcpi) 7222 { 7223 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7224 7225 if (cct->cct_cpi_head == NULL || 7226 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7227 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7228 else { 7229 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7230 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7231 prev_cpi = cpi, cpi = cpi->cpi_next) 7232 ; 7233 7234 if (prev_cpi == NULL) 7235 cct->cct_cpi_head = newcpi; 7236 else 7237 prev_cpi->cpi_next = newcpi; 7238 7239 newcpi->cpi_next = cpi; 7240 7241 if (cpi == NULL) 7242 cct->cct_cpi_tail = newcpi; 7243 } 7244 } 7245 7246 /* 7247 * Enqueue the vhcache client (cct) at the tail of the list 7248 */ 7249 static void 7250 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7251 mdi_vhcache_client_t *cct) 7252 { 7253 cct->cct_next = NULL; 7254 if (vhcache->vhcache_client_head == NULL) 7255 vhcache->vhcache_client_head = cct; 7256 else 7257 vhcache->vhcache_client_tail->cct_next = cct; 7258 vhcache->vhcache_client_tail = cct; 7259 } 7260 7261 static void 7262 free_string_array(char **str, int nelem) 7263 { 7264 int i; 7265 7266 if (str) { 7267 for (i = 0; i < nelem; i++) { 7268 if (str[i]) 7269 kmem_free(str[i], strlen(str[i]) + 1); 7270 } 7271 kmem_free(str, sizeof (char *) * nelem); 7272 } 7273 } 7274 7275 static void 7276 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7277 { 7278 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7279 kmem_free(cphci, sizeof (*cphci)); 7280 } 7281 7282 static void 7283 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7284 { 7285 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7286 kmem_free(cpi, sizeof (*cpi)); 7287 } 7288 7289 static void 7290 free_vhcache_client(mdi_vhcache_client_t *cct) 7291 { 7292 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7293 kmem_free(cct, sizeof (*cct)); 7294 } 7295 7296 static char * 7297 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7298 { 7299 char *name_addr; 7300 int len; 7301 7302 len = strlen(ct_name) + strlen(ct_addr) + 2; 7303 name_addr = kmem_alloc(len, KM_SLEEP); 7304 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7305 7306 if (ret_len) 7307 *ret_len = len; 7308 return (name_addr); 7309 } 7310 7311 /* 7312 * Copy the contents of paddrnvl to vhci cache. 7313 * paddrnvl nvlist contains path information for a vhci client. 7314 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7315 */ 7316 static void 7317 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7318 mdi_vhcache_client_t *cct) 7319 { 7320 nvpair_t *nvp = NULL; 7321 mdi_vhcache_pathinfo_t *cpi; 7322 uint_t nelem; 7323 uint32_t *val; 7324 7325 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7326 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7327 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7328 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7329 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7330 ASSERT(nelem == 2); 7331 cpi->cpi_cphci = cphci_list[val[0]]; 7332 cpi->cpi_flags = val[1]; 7333 enqueue_tail_vhcache_pathinfo(cct, cpi); 7334 } 7335 } 7336 7337 /* 7338 * Copy the contents of caddrmapnvl to vhci cache. 7339 * caddrmapnvl nvlist contains vhci client address to phci client address 7340 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7341 * this nvlist. 7342 */ 7343 static void 7344 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7345 mdi_vhcache_phci_t *cphci_list[]) 7346 { 7347 nvpair_t *nvp = NULL; 7348 nvlist_t *paddrnvl; 7349 mdi_vhcache_client_t *cct; 7350 7351 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7352 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7353 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7354 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7355 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7356 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7357 /* the client must contain at least one path */ 7358 ASSERT(cct->cct_cpi_head != NULL); 7359 7360 enqueue_vhcache_client(vhcache, cct); 7361 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7362 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7363 } 7364 } 7365 7366 /* 7367 * Copy the contents of the main nvlist to vhci cache. 7368 * 7369 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7370 * The nvlist contains the mappings between the vhci client addresses and 7371 * their corresponding phci client addresses. 7372 * 7373 * The structure of the nvlist is as follows: 7374 * 7375 * Main nvlist: 7376 * NAME TYPE DATA 7377 * version int32 version number 7378 * phcis string array array of phci paths 7379 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7380 * 7381 * structure of c2paddrs_nvl: 7382 * NAME TYPE DATA 7383 * caddr1 nvlist_t paddrs_nvl1 7384 * caddr2 nvlist_t paddrs_nvl2 7385 * ... 7386 * where caddr1, caddr2, ... are vhci client name and addresses in the 7387 * form of "<clientname>@<clientaddress>". 7388 * (for example: "ssd@2000002037cd9f72"); 7389 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7390 * 7391 * structure of paddrs_nvl: 7392 * NAME TYPE DATA 7393 * pi_addr1 uint32_array (phci-id, cpi_flags) 7394 * pi_addr2 uint32_array (phci-id, cpi_flags) 7395 * ... 7396 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7397 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7398 * phci-ids are integers that identify PHCIs to which the 7399 * the bus specific address belongs to. These integers are used as an index 7400 * into to the phcis string array in the main nvlist to get the PHCI path. 7401 */ 7402 static int 7403 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7404 { 7405 char **phcis, **phci_namep; 7406 uint_t nphcis; 7407 mdi_vhcache_phci_t *cphci, **cphci_list; 7408 nvlist_t *caddrmapnvl; 7409 int32_t ver; 7410 int i; 7411 size_t cphci_list_size; 7412 7413 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7414 7415 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7416 ver != MDI_VHCI_CACHE_VERSION) 7417 return (MDI_FAILURE); 7418 7419 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7420 &nphcis) != 0) 7421 return (MDI_SUCCESS); 7422 7423 ASSERT(nphcis > 0); 7424 7425 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7426 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7427 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7428 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7429 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7430 enqueue_vhcache_phci(vhcache, cphci); 7431 cphci_list[i] = cphci; 7432 } 7433 7434 ASSERT(vhcache->vhcache_phci_head != NULL); 7435 7436 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7437 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7438 7439 kmem_free(cphci_list, cphci_list_size); 7440 return (MDI_SUCCESS); 7441 } 7442 7443 /* 7444 * Build paddrnvl for the specified client using the information in the 7445 * vhci cache and add it to the caddrmapnnvl. 7446 * Returns 0 on success, errno on failure. 7447 */ 7448 static int 7449 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7450 nvlist_t *caddrmapnvl) 7451 { 7452 mdi_vhcache_pathinfo_t *cpi; 7453 nvlist_t *nvl; 7454 int err; 7455 uint32_t val[2]; 7456 7457 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7458 7459 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7460 return (err); 7461 7462 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7463 val[0] = cpi->cpi_cphci->cphci_id; 7464 val[1] = cpi->cpi_flags; 7465 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7466 != 0) 7467 goto out; 7468 } 7469 7470 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7471 out: 7472 nvlist_free(nvl); 7473 return (err); 7474 } 7475 7476 /* 7477 * Build caddrmapnvl using the information in the vhci cache 7478 * and add it to the mainnvl. 7479 * Returns 0 on success, errno on failure. 7480 */ 7481 static int 7482 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7483 { 7484 mdi_vhcache_client_t *cct; 7485 nvlist_t *nvl; 7486 int err; 7487 7488 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7489 7490 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7491 return (err); 7492 7493 for (cct = vhcache->vhcache_client_head; cct != NULL; 7494 cct = cct->cct_next) { 7495 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7496 goto out; 7497 } 7498 7499 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7500 out: 7501 nvlist_free(nvl); 7502 return (err); 7503 } 7504 7505 /* 7506 * Build nvlist using the information in the vhci cache. 7507 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7508 * Returns nvl on success, NULL on failure. 7509 */ 7510 static nvlist_t * 7511 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7512 { 7513 mdi_vhcache_phci_t *cphci; 7514 uint_t phci_count; 7515 char **phcis; 7516 nvlist_t *nvl; 7517 int err, i; 7518 7519 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7520 nvl = NULL; 7521 goto out; 7522 } 7523 7524 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7525 MDI_VHCI_CACHE_VERSION)) != 0) 7526 goto out; 7527 7528 rw_enter(&vhcache->vhcache_lock, RW_READER); 7529 if (vhcache->vhcache_phci_head == NULL) { 7530 rw_exit(&vhcache->vhcache_lock); 7531 return (nvl); 7532 } 7533 7534 phci_count = 0; 7535 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7536 cphci = cphci->cphci_next) 7537 cphci->cphci_id = phci_count++; 7538 7539 /* build phci pathname list */ 7540 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7541 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7542 cphci = cphci->cphci_next, i++) 7543 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7544 7545 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7546 phci_count); 7547 free_string_array(phcis, phci_count); 7548 7549 if (err == 0 && 7550 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7551 rw_exit(&vhcache->vhcache_lock); 7552 return (nvl); 7553 } 7554 7555 rw_exit(&vhcache->vhcache_lock); 7556 out: 7557 if (nvl) 7558 nvlist_free(nvl); 7559 return (NULL); 7560 } 7561 7562 /* 7563 * Lookup vhcache phci structure for the specified phci path. 7564 */ 7565 static mdi_vhcache_phci_t * 7566 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7567 { 7568 mdi_vhcache_phci_t *cphci; 7569 7570 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7571 7572 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7573 cphci = cphci->cphci_next) { 7574 if (strcmp(cphci->cphci_path, phci_path) == 0) 7575 return (cphci); 7576 } 7577 7578 return (NULL); 7579 } 7580 7581 /* 7582 * Lookup vhcache phci structure for the specified phci. 7583 */ 7584 static mdi_vhcache_phci_t * 7585 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7586 { 7587 mdi_vhcache_phci_t *cphci; 7588 7589 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7590 7591 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7592 cphci = cphci->cphci_next) { 7593 if (cphci->cphci_phci == ph) 7594 return (cphci); 7595 } 7596 7597 return (NULL); 7598 } 7599 7600 /* 7601 * Add the specified phci to the vhci cache if not already present. 7602 */ 7603 static void 7604 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7605 { 7606 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7607 mdi_vhcache_phci_t *cphci; 7608 char *pathname; 7609 int cache_updated; 7610 7611 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7612 7613 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7614 (void) ddi_pathname(ph->ph_dip, pathname); 7615 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7616 != NULL) { 7617 cphci->cphci_phci = ph; 7618 cache_updated = 0; 7619 } else { 7620 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7621 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7622 cphci->cphci_phci = ph; 7623 enqueue_vhcache_phci(vhcache, cphci); 7624 cache_updated = 1; 7625 } 7626 7627 rw_exit(&vhcache->vhcache_lock); 7628 7629 /* 7630 * Since a new phci has been added, reset 7631 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7632 * during next vhcache_discover_paths(). 7633 */ 7634 mutex_enter(&vhc->vhc_lock); 7635 vhc->vhc_path_discovery_cutoff_time = 0; 7636 mutex_exit(&vhc->vhc_lock); 7637 7638 kmem_free(pathname, MAXPATHLEN); 7639 if (cache_updated) 7640 vhcache_dirty(vhc); 7641 } 7642 7643 /* 7644 * Remove the reference to the specified phci from the vhci cache. 7645 */ 7646 static void 7647 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7648 { 7649 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7650 mdi_vhcache_phci_t *cphci; 7651 7652 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7653 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7654 /* do not remove the actual mdi_vhcache_phci structure */ 7655 cphci->cphci_phci = NULL; 7656 } 7657 rw_exit(&vhcache->vhcache_lock); 7658 } 7659 7660 static void 7661 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7662 mdi_vhcache_lookup_token_t *src) 7663 { 7664 if (src == NULL) { 7665 dst->lt_cct = NULL; 7666 dst->lt_cct_lookup_time = 0; 7667 } else { 7668 dst->lt_cct = src->lt_cct; 7669 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7670 } 7671 } 7672 7673 /* 7674 * Look up vhcache client for the specified client. 7675 */ 7676 static mdi_vhcache_client_t * 7677 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7678 mdi_vhcache_lookup_token_t *token) 7679 { 7680 mod_hash_val_t hv; 7681 char *name_addr; 7682 int len; 7683 7684 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7685 7686 /* 7687 * If no vhcache clean occurred since the last lookup, we can 7688 * simply return the cct from the last lookup operation. 7689 * It works because ccts are never freed except during the vhcache 7690 * cleanup operation. 7691 */ 7692 if (token != NULL && 7693 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7694 return (token->lt_cct); 7695 7696 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7697 if (mod_hash_find(vhcache->vhcache_client_hash, 7698 (mod_hash_key_t)name_addr, &hv) == 0) { 7699 if (token) { 7700 token->lt_cct = (mdi_vhcache_client_t *)hv; 7701 token->lt_cct_lookup_time = lbolt64; 7702 } 7703 } else { 7704 if (token) { 7705 token->lt_cct = NULL; 7706 token->lt_cct_lookup_time = 0; 7707 } 7708 hv = NULL; 7709 } 7710 kmem_free(name_addr, len); 7711 return ((mdi_vhcache_client_t *)hv); 7712 } 7713 7714 /* 7715 * Add the specified path to the vhci cache if not already present. 7716 * Also add the vhcache client for the client corresponding to this path 7717 * if it doesn't already exist. 7718 */ 7719 static void 7720 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7721 { 7722 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7723 mdi_vhcache_client_t *cct; 7724 mdi_vhcache_pathinfo_t *cpi; 7725 mdi_phci_t *ph = pip->pi_phci; 7726 mdi_client_t *ct = pip->pi_client; 7727 int cache_updated = 0; 7728 7729 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7730 7731 /* if vhcache client for this pip doesn't already exist, add it */ 7732 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7733 NULL)) == NULL) { 7734 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7735 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7736 ct->ct_guid, NULL); 7737 enqueue_vhcache_client(vhcache, cct); 7738 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7739 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7740 cache_updated = 1; 7741 } 7742 7743 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7744 if (cpi->cpi_cphci->cphci_phci == ph && 7745 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7746 cpi->cpi_pip = pip; 7747 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7748 cpi->cpi_flags &= 7749 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7750 sort_vhcache_paths(cct); 7751 cache_updated = 1; 7752 } 7753 break; 7754 } 7755 } 7756 7757 if (cpi == NULL) { 7758 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7759 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7760 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7761 ASSERT(cpi->cpi_cphci != NULL); 7762 cpi->cpi_pip = pip; 7763 enqueue_vhcache_pathinfo(cct, cpi); 7764 cache_updated = 1; 7765 } 7766 7767 rw_exit(&vhcache->vhcache_lock); 7768 7769 if (cache_updated) 7770 vhcache_dirty(vhc); 7771 } 7772 7773 /* 7774 * Remove the reference to the specified path from the vhci cache. 7775 */ 7776 static void 7777 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7778 { 7779 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7780 mdi_client_t *ct = pip->pi_client; 7781 mdi_vhcache_client_t *cct; 7782 mdi_vhcache_pathinfo_t *cpi; 7783 7784 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7785 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7786 NULL)) != NULL) { 7787 for (cpi = cct->cct_cpi_head; cpi != NULL; 7788 cpi = cpi->cpi_next) { 7789 if (cpi->cpi_pip == pip) { 7790 cpi->cpi_pip = NULL; 7791 break; 7792 } 7793 } 7794 } 7795 rw_exit(&vhcache->vhcache_lock); 7796 } 7797 7798 /* 7799 * Flush the vhci cache to disk. 7800 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7801 */ 7802 static int 7803 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7804 { 7805 nvlist_t *nvl; 7806 int err; 7807 int rv; 7808 7809 /* 7810 * It is possible that the system may shutdown before 7811 * i_ddi_io_initialized (during stmsboot for example). To allow for 7812 * flushing the cache in this case do not check for 7813 * i_ddi_io_initialized when force flag is set. 7814 */ 7815 if (force_flag == 0 && !i_ddi_io_initialized()) 7816 return (MDI_FAILURE); 7817 7818 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7819 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7820 nvlist_free(nvl); 7821 } else 7822 err = EFAULT; 7823 7824 rv = MDI_SUCCESS; 7825 mutex_enter(&vhc->vhc_lock); 7826 if (err != 0) { 7827 if (err == EROFS) { 7828 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7829 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7830 MDI_VHC_VHCACHE_DIRTY); 7831 } else { 7832 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7833 cmn_err(CE_CONT, "%s: update failed\n", 7834 vhc->vhc_vhcache_filename); 7835 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7836 } 7837 rv = MDI_FAILURE; 7838 } 7839 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7840 cmn_err(CE_CONT, 7841 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7842 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7843 } 7844 mutex_exit(&vhc->vhc_lock); 7845 7846 return (rv); 7847 } 7848 7849 /* 7850 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7851 * Exits itself if left idle for the idle timeout period. 7852 */ 7853 static void 7854 vhcache_flush_thread(void *arg) 7855 { 7856 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7857 clock_t idle_time, quit_at_ticks; 7858 callb_cpr_t cprinfo; 7859 7860 /* number of seconds to sleep idle before exiting */ 7861 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7862 7863 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7864 "mdi_vhcache_flush"); 7865 mutex_enter(&vhc->vhc_lock); 7866 for (; ; ) { 7867 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7868 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7869 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7870 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7871 (void) cv_timedwait(&vhc->vhc_cv, 7872 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7873 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7874 } else { 7875 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7876 mutex_exit(&vhc->vhc_lock); 7877 7878 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7879 vhcache_dirty(vhc); 7880 7881 mutex_enter(&vhc->vhc_lock); 7882 } 7883 } 7884 7885 quit_at_ticks = ddi_get_lbolt() + idle_time; 7886 7887 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7888 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7889 ddi_get_lbolt() < quit_at_ticks) { 7890 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7891 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7892 quit_at_ticks); 7893 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7894 } 7895 7896 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7897 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7898 goto out; 7899 } 7900 7901 out: 7902 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7903 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7904 CALLB_CPR_EXIT(&cprinfo); 7905 } 7906 7907 /* 7908 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7909 */ 7910 static void 7911 vhcache_dirty(mdi_vhci_config_t *vhc) 7912 { 7913 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7914 int create_thread; 7915 7916 rw_enter(&vhcache->vhcache_lock, RW_READER); 7917 /* do not flush cache until the cache is fully built */ 7918 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7919 rw_exit(&vhcache->vhcache_lock); 7920 return; 7921 } 7922 rw_exit(&vhcache->vhcache_lock); 7923 7924 mutex_enter(&vhc->vhc_lock); 7925 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7926 mutex_exit(&vhc->vhc_lock); 7927 return; 7928 } 7929 7930 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7931 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7932 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7933 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7934 cv_broadcast(&vhc->vhc_cv); 7935 create_thread = 0; 7936 } else { 7937 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7938 create_thread = 1; 7939 } 7940 mutex_exit(&vhc->vhc_lock); 7941 7942 if (create_thread) 7943 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7944 0, &p0, TS_RUN, minclsyspri); 7945 } 7946 7947 /* 7948 * phci bus config structure - one for for each phci bus config operation that 7949 * we initiate on behalf of a vhci. 7950 */ 7951 typedef struct mdi_phci_bus_config_s { 7952 char *phbc_phci_path; 7953 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7954 struct mdi_phci_bus_config_s *phbc_next; 7955 } mdi_phci_bus_config_t; 7956 7957 /* vhci bus config structure - one for each vhci bus config operation */ 7958 typedef struct mdi_vhci_bus_config_s { 7959 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7960 major_t vhbc_op_major; /* bus config op major */ 7961 uint_t vhbc_op_flags; /* bus config op flags */ 7962 kmutex_t vhbc_lock; 7963 kcondvar_t vhbc_cv; 7964 int vhbc_thr_count; 7965 } mdi_vhci_bus_config_t; 7966 7967 /* 7968 * bus config the specified phci 7969 */ 7970 static void 7971 bus_config_phci(void *arg) 7972 { 7973 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7974 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7975 dev_info_t *ph_dip; 7976 7977 /* 7978 * first configure all path components upto phci and then configure 7979 * the phci children. 7980 */ 7981 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7982 != NULL) { 7983 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7984 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7985 (void) ndi_devi_config_driver(ph_dip, 7986 vhbc->vhbc_op_flags, 7987 vhbc->vhbc_op_major); 7988 } else 7989 (void) ndi_devi_config(ph_dip, 7990 vhbc->vhbc_op_flags); 7991 7992 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7993 ndi_rele_devi(ph_dip); 7994 } 7995 7996 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7997 kmem_free(phbc, sizeof (*phbc)); 7998 7999 mutex_enter(&vhbc->vhbc_lock); 8000 vhbc->vhbc_thr_count--; 8001 if (vhbc->vhbc_thr_count == 0) 8002 cv_broadcast(&vhbc->vhbc_cv); 8003 mutex_exit(&vhbc->vhbc_lock); 8004 } 8005 8006 /* 8007 * Bus config all phcis associated with the vhci in parallel. 8008 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8009 */ 8010 static void 8011 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8012 ddi_bus_config_op_t op, major_t maj) 8013 { 8014 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8015 mdi_vhci_bus_config_t *vhbc; 8016 mdi_vhcache_phci_t *cphci; 8017 8018 rw_enter(&vhcache->vhcache_lock, RW_READER); 8019 if (vhcache->vhcache_phci_head == NULL) { 8020 rw_exit(&vhcache->vhcache_lock); 8021 return; 8022 } 8023 8024 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8025 8026 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8027 cphci = cphci->cphci_next) { 8028 /* skip phcis that haven't attached before root is available */ 8029 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8030 continue; 8031 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8032 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8033 KM_SLEEP); 8034 phbc->phbc_vhbusconfig = vhbc; 8035 phbc->phbc_next = phbc_head; 8036 phbc_head = phbc; 8037 vhbc->vhbc_thr_count++; 8038 } 8039 rw_exit(&vhcache->vhcache_lock); 8040 8041 vhbc->vhbc_op = op; 8042 vhbc->vhbc_op_major = maj; 8043 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8044 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8045 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8046 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8047 8048 /* now create threads to initiate bus config on all phcis in parallel */ 8049 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8050 phbc_next = phbc->phbc_next; 8051 if (mdi_mtc_off) 8052 bus_config_phci((void *)phbc); 8053 else 8054 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8055 0, &p0, TS_RUN, minclsyspri); 8056 } 8057 8058 mutex_enter(&vhbc->vhbc_lock); 8059 /* wait until all threads exit */ 8060 while (vhbc->vhbc_thr_count > 0) 8061 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8062 mutex_exit(&vhbc->vhbc_lock); 8063 8064 mutex_destroy(&vhbc->vhbc_lock); 8065 cv_destroy(&vhbc->vhbc_cv); 8066 kmem_free(vhbc, sizeof (*vhbc)); 8067 } 8068 8069 /* 8070 * Single threaded version of bus_config_all_phcis() 8071 */ 8072 static void 8073 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8074 ddi_bus_config_op_t op, major_t maj) 8075 { 8076 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8077 8078 single_threaded_vhconfig_enter(vhc); 8079 bus_config_all_phcis(vhcache, flags, op, maj); 8080 single_threaded_vhconfig_exit(vhc); 8081 } 8082 8083 /* 8084 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8085 * The path includes the child component in addition to the phci path. 8086 */ 8087 static int 8088 bus_config_one_phci_child(char *path) 8089 { 8090 dev_info_t *ph_dip, *child; 8091 char *devnm; 8092 int rv = MDI_FAILURE; 8093 8094 /* extract the child component of the phci */ 8095 devnm = strrchr(path, '/'); 8096 *devnm++ = '\0'; 8097 8098 /* 8099 * first configure all path components upto phci and then 8100 * configure the phci child. 8101 */ 8102 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8103 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8104 NDI_SUCCESS) { 8105 /* 8106 * release the hold that ndi_devi_config_one() placed 8107 */ 8108 ndi_rele_devi(child); 8109 rv = MDI_SUCCESS; 8110 } 8111 8112 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8113 ndi_rele_devi(ph_dip); 8114 } 8115 8116 devnm--; 8117 *devnm = '/'; 8118 return (rv); 8119 } 8120 8121 /* 8122 * Build a list of phci client paths for the specified vhci client. 8123 * The list includes only those phci client paths which aren't configured yet. 8124 */ 8125 static mdi_phys_path_t * 8126 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8127 { 8128 mdi_vhcache_pathinfo_t *cpi; 8129 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8130 int config_path, len; 8131 8132 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8133 /* 8134 * include only those paths that aren't configured. 8135 */ 8136 config_path = 0; 8137 if (cpi->cpi_pip == NULL) 8138 config_path = 1; 8139 else { 8140 MDI_PI_LOCK(cpi->cpi_pip); 8141 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8142 config_path = 1; 8143 MDI_PI_UNLOCK(cpi->cpi_pip); 8144 } 8145 8146 if (config_path) { 8147 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8148 len = strlen(cpi->cpi_cphci->cphci_path) + 8149 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8150 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8151 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8152 cpi->cpi_cphci->cphci_path, ct_name, 8153 cpi->cpi_addr); 8154 pp->phys_path_next = NULL; 8155 8156 if (pp_head == NULL) 8157 pp_head = pp; 8158 else 8159 pp_tail->phys_path_next = pp; 8160 pp_tail = pp; 8161 } 8162 } 8163 8164 return (pp_head); 8165 } 8166 8167 /* 8168 * Free the memory allocated for phci client path list. 8169 */ 8170 static void 8171 free_phclient_path_list(mdi_phys_path_t *pp_head) 8172 { 8173 mdi_phys_path_t *pp, *pp_next; 8174 8175 for (pp = pp_head; pp != NULL; pp = pp_next) { 8176 pp_next = pp->phys_path_next; 8177 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8178 kmem_free(pp, sizeof (*pp)); 8179 } 8180 } 8181 8182 /* 8183 * Allocated async client structure and initialize with the specified values. 8184 */ 8185 static mdi_async_client_config_t * 8186 alloc_async_client_config(char *ct_name, char *ct_addr, 8187 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8188 { 8189 mdi_async_client_config_t *acc; 8190 8191 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8192 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8193 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8194 acc->acc_phclient_path_list_head = pp_head; 8195 init_vhcache_lookup_token(&acc->acc_token, tok); 8196 acc->acc_next = NULL; 8197 return (acc); 8198 } 8199 8200 /* 8201 * Free the memory allocated for the async client structure and their members. 8202 */ 8203 static void 8204 free_async_client_config(mdi_async_client_config_t *acc) 8205 { 8206 if (acc->acc_phclient_path_list_head) 8207 free_phclient_path_list(acc->acc_phclient_path_list_head); 8208 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8209 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8210 kmem_free(acc, sizeof (*acc)); 8211 } 8212 8213 /* 8214 * Sort vhcache pathinfos (cpis) of the specified client. 8215 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8216 * flag set come at the beginning of the list. All cpis which have this 8217 * flag set come at the end of the list. 8218 */ 8219 static void 8220 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8221 { 8222 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8223 8224 cpi_head = cct->cct_cpi_head; 8225 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8226 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8227 cpi_next = cpi->cpi_next; 8228 enqueue_vhcache_pathinfo(cct, cpi); 8229 } 8230 } 8231 8232 /* 8233 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8234 * every vhcache pathinfo of the specified client. If not adjust the flag 8235 * setting appropriately. 8236 * 8237 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8238 * on-disk vhci cache. So every time this flag is updated the cache must be 8239 * flushed. 8240 */ 8241 static void 8242 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8243 mdi_vhcache_lookup_token_t *tok) 8244 { 8245 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8246 mdi_vhcache_client_t *cct; 8247 mdi_vhcache_pathinfo_t *cpi; 8248 8249 rw_enter(&vhcache->vhcache_lock, RW_READER); 8250 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8251 == NULL) { 8252 rw_exit(&vhcache->vhcache_lock); 8253 return; 8254 } 8255 8256 /* 8257 * to avoid unnecessary on-disk cache updates, first check if an 8258 * update is really needed. If no update is needed simply return. 8259 */ 8260 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8261 if ((cpi->cpi_pip != NULL && 8262 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8263 (cpi->cpi_pip == NULL && 8264 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8265 break; 8266 } 8267 } 8268 if (cpi == NULL) { 8269 rw_exit(&vhcache->vhcache_lock); 8270 return; 8271 } 8272 8273 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8274 rw_exit(&vhcache->vhcache_lock); 8275 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8276 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8277 tok)) == NULL) { 8278 rw_exit(&vhcache->vhcache_lock); 8279 return; 8280 } 8281 } 8282 8283 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8284 if (cpi->cpi_pip != NULL) 8285 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8286 else 8287 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8288 } 8289 sort_vhcache_paths(cct); 8290 8291 rw_exit(&vhcache->vhcache_lock); 8292 vhcache_dirty(vhc); 8293 } 8294 8295 /* 8296 * Configure all specified paths of the client. 8297 */ 8298 static void 8299 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8300 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8301 { 8302 mdi_phys_path_t *pp; 8303 8304 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8305 (void) bus_config_one_phci_child(pp->phys_path); 8306 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8307 } 8308 8309 /* 8310 * Dequeue elements from vhci async client config list and bus configure 8311 * their corresponding phci clients. 8312 */ 8313 static void 8314 config_client_paths_thread(void *arg) 8315 { 8316 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8317 mdi_async_client_config_t *acc; 8318 clock_t quit_at_ticks; 8319 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8320 callb_cpr_t cprinfo; 8321 8322 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8323 "mdi_config_client_paths"); 8324 8325 for (; ; ) { 8326 quit_at_ticks = ddi_get_lbolt() + idle_time; 8327 8328 mutex_enter(&vhc->vhc_lock); 8329 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8330 vhc->vhc_acc_list_head == NULL && 8331 ddi_get_lbolt() < quit_at_ticks) { 8332 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8333 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8334 quit_at_ticks); 8335 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8336 } 8337 8338 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8339 vhc->vhc_acc_list_head == NULL) 8340 goto out; 8341 8342 acc = vhc->vhc_acc_list_head; 8343 vhc->vhc_acc_list_head = acc->acc_next; 8344 if (vhc->vhc_acc_list_head == NULL) 8345 vhc->vhc_acc_list_tail = NULL; 8346 vhc->vhc_acc_count--; 8347 mutex_exit(&vhc->vhc_lock); 8348 8349 config_client_paths_sync(vhc, acc->acc_ct_name, 8350 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8351 &acc->acc_token); 8352 8353 free_async_client_config(acc); 8354 } 8355 8356 out: 8357 vhc->vhc_acc_thrcount--; 8358 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8359 CALLB_CPR_EXIT(&cprinfo); 8360 } 8361 8362 /* 8363 * Arrange for all the phci client paths (pp_head) for the specified client 8364 * to be bus configured asynchronously by a thread. 8365 */ 8366 static void 8367 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8368 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8369 { 8370 mdi_async_client_config_t *acc, *newacc; 8371 int create_thread; 8372 8373 if (pp_head == NULL) 8374 return; 8375 8376 if (mdi_mtc_off) { 8377 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8378 free_phclient_path_list(pp_head); 8379 return; 8380 } 8381 8382 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8383 ASSERT(newacc); 8384 8385 mutex_enter(&vhc->vhc_lock); 8386 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8387 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8388 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8389 free_async_client_config(newacc); 8390 mutex_exit(&vhc->vhc_lock); 8391 return; 8392 } 8393 } 8394 8395 if (vhc->vhc_acc_list_head == NULL) 8396 vhc->vhc_acc_list_head = newacc; 8397 else 8398 vhc->vhc_acc_list_tail->acc_next = newacc; 8399 vhc->vhc_acc_list_tail = newacc; 8400 vhc->vhc_acc_count++; 8401 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8402 cv_broadcast(&vhc->vhc_cv); 8403 create_thread = 0; 8404 } else { 8405 vhc->vhc_acc_thrcount++; 8406 create_thread = 1; 8407 } 8408 mutex_exit(&vhc->vhc_lock); 8409 8410 if (create_thread) 8411 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8412 0, &p0, TS_RUN, minclsyspri); 8413 } 8414 8415 /* 8416 * Return number of online paths for the specified client. 8417 */ 8418 static int 8419 nonline_paths(mdi_vhcache_client_t *cct) 8420 { 8421 mdi_vhcache_pathinfo_t *cpi; 8422 int online_count = 0; 8423 8424 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8425 if (cpi->cpi_pip != NULL) { 8426 MDI_PI_LOCK(cpi->cpi_pip); 8427 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8428 online_count++; 8429 MDI_PI_UNLOCK(cpi->cpi_pip); 8430 } 8431 } 8432 8433 return (online_count); 8434 } 8435 8436 /* 8437 * Bus configure all paths for the specified vhci client. 8438 * If at least one path for the client is already online, the remaining paths 8439 * will be configured asynchronously. Otherwise, it synchronously configures 8440 * the paths until at least one path is online and then rest of the paths 8441 * will be configured asynchronously. 8442 */ 8443 static void 8444 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8445 { 8446 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8447 mdi_phys_path_t *pp_head, *pp; 8448 mdi_vhcache_client_t *cct; 8449 mdi_vhcache_lookup_token_t tok; 8450 8451 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8452 8453 init_vhcache_lookup_token(&tok, NULL); 8454 8455 if (ct_name == NULL || ct_addr == NULL || 8456 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8457 == NULL || 8458 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8459 rw_exit(&vhcache->vhcache_lock); 8460 return; 8461 } 8462 8463 /* if at least one path is online, configure the rest asynchronously */ 8464 if (nonline_paths(cct) > 0) { 8465 rw_exit(&vhcache->vhcache_lock); 8466 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8467 return; 8468 } 8469 8470 rw_exit(&vhcache->vhcache_lock); 8471 8472 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8473 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8474 rw_enter(&vhcache->vhcache_lock, RW_READER); 8475 8476 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8477 ct_addr, &tok)) == NULL) { 8478 rw_exit(&vhcache->vhcache_lock); 8479 goto out; 8480 } 8481 8482 if (nonline_paths(cct) > 0 && 8483 pp->phys_path_next != NULL) { 8484 rw_exit(&vhcache->vhcache_lock); 8485 config_client_paths_async(vhc, ct_name, ct_addr, 8486 pp->phys_path_next, &tok); 8487 pp->phys_path_next = NULL; 8488 goto out; 8489 } 8490 8491 rw_exit(&vhcache->vhcache_lock); 8492 } 8493 } 8494 8495 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8496 out: 8497 free_phclient_path_list(pp_head); 8498 } 8499 8500 static void 8501 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8502 { 8503 mutex_enter(&vhc->vhc_lock); 8504 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8505 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8506 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8507 mutex_exit(&vhc->vhc_lock); 8508 } 8509 8510 static void 8511 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8512 { 8513 mutex_enter(&vhc->vhc_lock); 8514 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8515 cv_broadcast(&vhc->vhc_cv); 8516 mutex_exit(&vhc->vhc_lock); 8517 } 8518 8519 typedef struct mdi_phci_driver_info { 8520 char *phdriver_name; /* name of the phci driver */ 8521 8522 /* set to non zero if the phci driver supports root device */ 8523 int phdriver_root_support; 8524 } mdi_phci_driver_info_t; 8525 8526 /* 8527 * vhci class and root support capability of a phci driver can be 8528 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8529 * phci driver.conf file. The built-in tables below contain this information 8530 * for those phci drivers whose driver.conf files don't yet contain this info. 8531 * 8532 * All phci drivers expect iscsi have root device support. 8533 */ 8534 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8535 { "fp", 1 }, 8536 { "iscsi", 0 }, 8537 { "ibsrp", 1 } 8538 }; 8539 8540 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8541 8542 static void * 8543 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8544 { 8545 void *new_ptr; 8546 8547 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8548 if (old_ptr) { 8549 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8550 kmem_free(old_ptr, old_size); 8551 } 8552 return (new_ptr); 8553 } 8554 8555 static void 8556 add_to_phci_list(char ***driver_list, int **root_support_list, 8557 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8558 { 8559 ASSERT(*cur_elements <= *max_elements); 8560 if (*cur_elements == *max_elements) { 8561 *max_elements += 10; 8562 *driver_list = mdi_realloc(*driver_list, 8563 sizeof (char *) * (*cur_elements), 8564 sizeof (char *) * (*max_elements)); 8565 *root_support_list = mdi_realloc(*root_support_list, 8566 sizeof (int) * (*cur_elements), 8567 sizeof (int) * (*max_elements)); 8568 } 8569 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8570 (*root_support_list)[*cur_elements] = root_support; 8571 (*cur_elements)++; 8572 } 8573 8574 static void 8575 get_phci_driver_list(char *vhci_class, char ***driver_list, 8576 int **root_support_list, int *cur_elements, int *max_elements) 8577 { 8578 mdi_phci_driver_info_t *st_driver_list, *p; 8579 int st_ndrivers, root_support, i, j, driver_conf_count; 8580 major_t m; 8581 struct devnames *dnp; 8582 ddi_prop_t *propp; 8583 8584 *driver_list = NULL; 8585 *root_support_list = NULL; 8586 *cur_elements = 0; 8587 *max_elements = 0; 8588 8589 /* add the phci drivers derived from the phci driver.conf files */ 8590 for (m = 0; m < devcnt; m++) { 8591 dnp = &devnamesp[m]; 8592 8593 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8594 LOCK_DEV_OPS(&dnp->dn_lock); 8595 if (dnp->dn_global_prop_ptr != NULL && 8596 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8597 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8598 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8599 strcmp(propp->prop_val, vhci_class) == 0) { 8600 8601 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8602 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8603 &dnp->dn_global_prop_ptr->prop_list) 8604 == NULL) ? 1 : 0; 8605 8606 add_to_phci_list(driver_list, root_support_list, 8607 cur_elements, max_elements, dnp->dn_name, 8608 root_support); 8609 8610 UNLOCK_DEV_OPS(&dnp->dn_lock); 8611 } else 8612 UNLOCK_DEV_OPS(&dnp->dn_lock); 8613 } 8614 } 8615 8616 driver_conf_count = *cur_elements; 8617 8618 /* add the phci drivers specified in the built-in tables */ 8619 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8620 st_driver_list = scsi_phci_driver_list; 8621 st_ndrivers = sizeof (scsi_phci_driver_list) / 8622 sizeof (mdi_phci_driver_info_t); 8623 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8624 st_driver_list = ib_phci_driver_list; 8625 st_ndrivers = sizeof (ib_phci_driver_list) / 8626 sizeof (mdi_phci_driver_info_t); 8627 } else { 8628 st_driver_list = NULL; 8629 st_ndrivers = 0; 8630 } 8631 8632 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8633 /* add this phci driver if not already added before */ 8634 for (j = 0; j < driver_conf_count; j++) { 8635 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8636 break; 8637 } 8638 if (j == driver_conf_count) { 8639 add_to_phci_list(driver_list, root_support_list, 8640 cur_elements, max_elements, p->phdriver_name, 8641 p->phdriver_root_support); 8642 } 8643 } 8644 } 8645 8646 /* 8647 * Attach the phci driver instances associated with the specified vhci class. 8648 * If root is mounted attach all phci driver instances. 8649 * If root is not mounted, attach the instances of only those phci 8650 * drivers that have the root support. 8651 */ 8652 static void 8653 attach_phci_drivers(char *vhci_class) 8654 { 8655 char **driver_list, **p; 8656 int *root_support_list; 8657 int cur_elements, max_elements, i; 8658 major_t m; 8659 8660 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8661 &cur_elements, &max_elements); 8662 8663 for (i = 0; i < cur_elements; i++) { 8664 if (modrootloaded || root_support_list[i]) { 8665 m = ddi_name_to_major(driver_list[i]); 8666 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8667 ddi_rele_driver(m); 8668 } 8669 } 8670 8671 if (driver_list) { 8672 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8673 kmem_free(*p, strlen(*p) + 1); 8674 kmem_free(driver_list, sizeof (char *) * max_elements); 8675 kmem_free(root_support_list, sizeof (int) * max_elements); 8676 } 8677 } 8678 8679 /* 8680 * Build vhci cache: 8681 * 8682 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8683 * the phci driver instances. During this process the cache gets built. 8684 * 8685 * Cache is built fully if the root is mounted. 8686 * If the root is not mounted, phci drivers that do not have root support 8687 * are not attached. As a result the cache is built partially. The entries 8688 * in the cache reflect only those phci drivers that have root support. 8689 */ 8690 static int 8691 build_vhci_cache(mdi_vhci_t *vh) 8692 { 8693 mdi_vhci_config_t *vhc = vh->vh_config; 8694 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8695 8696 single_threaded_vhconfig_enter(vhc); 8697 8698 rw_enter(&vhcache->vhcache_lock, RW_READER); 8699 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8700 rw_exit(&vhcache->vhcache_lock); 8701 single_threaded_vhconfig_exit(vhc); 8702 return (0); 8703 } 8704 rw_exit(&vhcache->vhcache_lock); 8705 8706 attach_phci_drivers(vh->vh_class); 8707 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8708 BUS_CONFIG_ALL, (major_t)-1); 8709 8710 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8711 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8712 rw_exit(&vhcache->vhcache_lock); 8713 8714 single_threaded_vhconfig_exit(vhc); 8715 vhcache_dirty(vhc); 8716 return (1); 8717 } 8718 8719 /* 8720 * Determine if discovery of paths is needed. 8721 */ 8722 static int 8723 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8724 { 8725 int rv = 1; 8726 8727 mutex_enter(&vhc->vhc_lock); 8728 if (i_ddi_io_initialized() == 0) { 8729 if (vhc->vhc_path_discovery_boot > 0) { 8730 vhc->vhc_path_discovery_boot--; 8731 goto out; 8732 } 8733 } else { 8734 if (vhc->vhc_path_discovery_postboot > 0) { 8735 vhc->vhc_path_discovery_postboot--; 8736 goto out; 8737 } 8738 } 8739 8740 /* 8741 * Do full path discovery at most once per mdi_path_discovery_interval. 8742 * This is to avoid a series of full path discoveries when opening 8743 * stale /dev/[r]dsk links. 8744 */ 8745 if (mdi_path_discovery_interval != -1 && 8746 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8747 goto out; 8748 8749 rv = 0; 8750 out: 8751 mutex_exit(&vhc->vhc_lock); 8752 return (rv); 8753 } 8754 8755 /* 8756 * Discover all paths: 8757 * 8758 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8759 * driver instances. During this process all paths will be discovered. 8760 */ 8761 static int 8762 vhcache_discover_paths(mdi_vhci_t *vh) 8763 { 8764 mdi_vhci_config_t *vhc = vh->vh_config; 8765 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8766 int rv = 0; 8767 8768 single_threaded_vhconfig_enter(vhc); 8769 8770 if (vhcache_do_discovery(vhc)) { 8771 attach_phci_drivers(vh->vh_class); 8772 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8773 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8774 8775 mutex_enter(&vhc->vhc_lock); 8776 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8777 mdi_path_discovery_interval * TICKS_PER_SECOND; 8778 mutex_exit(&vhc->vhc_lock); 8779 rv = 1; 8780 } 8781 8782 single_threaded_vhconfig_exit(vhc); 8783 return (rv); 8784 } 8785 8786 /* 8787 * Generic vhci bus config implementation: 8788 * 8789 * Parameters 8790 * vdip vhci dip 8791 * flags bus config flags 8792 * op bus config operation 8793 * The remaining parameters are bus config operation specific 8794 * 8795 * for BUS_CONFIG_ONE 8796 * arg pointer to name@addr 8797 * child upon successful return from this function, *child will be 8798 * set to the configured and held devinfo child node of vdip. 8799 * ct_addr pointer to client address (i.e. GUID) 8800 * 8801 * for BUS_CONFIG_DRIVER 8802 * arg major number of the driver 8803 * child and ct_addr parameters are ignored 8804 * 8805 * for BUS_CONFIG_ALL 8806 * arg, child, and ct_addr parameters are ignored 8807 * 8808 * Note that for the rest of the bus config operations, this function simply 8809 * calls the framework provided default bus config routine. 8810 */ 8811 int 8812 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8813 void *arg, dev_info_t **child, char *ct_addr) 8814 { 8815 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8816 mdi_vhci_config_t *vhc = vh->vh_config; 8817 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8818 int rv = 0; 8819 int params_valid = 0; 8820 char *cp; 8821 8822 /* 8823 * To bus config vhcis we relay operation, possibly using another 8824 * thread, to phcis. The phci driver then interacts with MDI to cause 8825 * vhci child nodes to be enumerated under the vhci node. Adding a 8826 * vhci child requires an ndi_devi_enter of the vhci. Since another 8827 * thread may be adding the child, to avoid deadlock we can't wait 8828 * for the relayed operations to complete if we have already entered 8829 * the vhci node. 8830 */ 8831 if (DEVI_BUSY_OWNED(vdip)) { 8832 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8833 "vhci dip is busy owned %p\n", (void *)vdip)); 8834 goto default_bus_config; 8835 } 8836 8837 rw_enter(&vhcache->vhcache_lock, RW_READER); 8838 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8839 rw_exit(&vhcache->vhcache_lock); 8840 rv = build_vhci_cache(vh); 8841 rw_enter(&vhcache->vhcache_lock, RW_READER); 8842 } 8843 8844 switch (op) { 8845 case BUS_CONFIG_ONE: 8846 if (arg != NULL && ct_addr != NULL) { 8847 /* extract node name */ 8848 cp = (char *)arg; 8849 while (*cp != '\0' && *cp != '@') 8850 cp++; 8851 if (*cp == '@') { 8852 params_valid = 1; 8853 *cp = '\0'; 8854 config_client_paths(vhc, (char *)arg, ct_addr); 8855 /* config_client_paths() releases cache_lock */ 8856 *cp = '@'; 8857 break; 8858 } 8859 } 8860 8861 rw_exit(&vhcache->vhcache_lock); 8862 break; 8863 8864 case BUS_CONFIG_DRIVER: 8865 rw_exit(&vhcache->vhcache_lock); 8866 if (rv == 0) 8867 st_bus_config_all_phcis(vhc, flags, op, 8868 (major_t)(uintptr_t)arg); 8869 break; 8870 8871 case BUS_CONFIG_ALL: 8872 rw_exit(&vhcache->vhcache_lock); 8873 if (rv == 0) 8874 st_bus_config_all_phcis(vhc, flags, op, -1); 8875 break; 8876 8877 default: 8878 rw_exit(&vhcache->vhcache_lock); 8879 break; 8880 } 8881 8882 8883 default_bus_config: 8884 /* 8885 * All requested child nodes are enumerated under the vhci. 8886 * Now configure them. 8887 */ 8888 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8889 NDI_SUCCESS) { 8890 return (MDI_SUCCESS); 8891 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8892 /* discover all paths and try configuring again */ 8893 if (vhcache_discover_paths(vh) && 8894 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8895 NDI_SUCCESS) 8896 return (MDI_SUCCESS); 8897 } 8898 8899 return (MDI_FAILURE); 8900 } 8901 8902 /* 8903 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8904 */ 8905 static nvlist_t * 8906 read_on_disk_vhci_cache(char *vhci_class) 8907 { 8908 nvlist_t *nvl; 8909 int err; 8910 char *filename; 8911 8912 filename = vhclass2vhcache_filename(vhci_class); 8913 8914 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8915 kmem_free(filename, strlen(filename) + 1); 8916 return (nvl); 8917 } else if (err == EIO) 8918 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8919 else if (err == EINVAL) 8920 cmn_err(CE_WARN, 8921 "%s: data file corrupted, will recreate\n", filename); 8922 8923 kmem_free(filename, strlen(filename) + 1); 8924 return (NULL); 8925 } 8926 8927 /* 8928 * Read on-disk vhci cache into nvlists for all vhci classes. 8929 * Called during booting by i_ddi_read_devices_files(). 8930 */ 8931 void 8932 mdi_read_devices_files(void) 8933 { 8934 int i; 8935 8936 for (i = 0; i < N_VHCI_CLASSES; i++) 8937 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8938 } 8939 8940 /* 8941 * Remove all stale entries from vhci cache. 8942 */ 8943 static void 8944 clean_vhcache(mdi_vhci_config_t *vhc) 8945 { 8946 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8947 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8948 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8949 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8950 8951 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8952 8953 cct_head = vhcache->vhcache_client_head; 8954 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8955 for (cct = cct_head; cct != NULL; cct = cct_next) { 8956 cct_next = cct->cct_next; 8957 8958 cpi_head = cct->cct_cpi_head; 8959 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8960 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8961 cpi_next = cpi->cpi_next; 8962 if (cpi->cpi_pip != NULL) { 8963 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8964 enqueue_tail_vhcache_pathinfo(cct, cpi); 8965 } else 8966 free_vhcache_pathinfo(cpi); 8967 } 8968 8969 if (cct->cct_cpi_head != NULL) 8970 enqueue_vhcache_client(vhcache, cct); 8971 else { 8972 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8973 (mod_hash_key_t)cct->cct_name_addr); 8974 free_vhcache_client(cct); 8975 } 8976 } 8977 8978 cphci_head = vhcache->vhcache_phci_head; 8979 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8980 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8981 cphci_next = cphci->cphci_next; 8982 if (cphci->cphci_phci != NULL) 8983 enqueue_vhcache_phci(vhcache, cphci); 8984 else 8985 free_vhcache_phci(cphci); 8986 } 8987 8988 vhcache->vhcache_clean_time = lbolt64; 8989 rw_exit(&vhcache->vhcache_lock); 8990 vhcache_dirty(vhc); 8991 } 8992 8993 /* 8994 * Remove all stale entries from vhci cache. 8995 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8996 */ 8997 void 8998 mdi_clean_vhcache(void) 8999 { 9000 mdi_vhci_t *vh; 9001 9002 mutex_enter(&mdi_mutex); 9003 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9004 vh->vh_refcnt++; 9005 mutex_exit(&mdi_mutex); 9006 clean_vhcache(vh->vh_config); 9007 mutex_enter(&mdi_mutex); 9008 vh->vh_refcnt--; 9009 } 9010 mutex_exit(&mdi_mutex); 9011 } 9012 9013 /* 9014 * mdi_vhci_walk_clients(): 9015 * Walker routine to traverse client dev_info nodes 9016 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9017 * below the client, including nexus devices, which we dont want. 9018 * So we just traverse the immediate siblings, starting from 1st client. 9019 */ 9020 void 9021 mdi_vhci_walk_clients(dev_info_t *vdip, 9022 int (*f)(dev_info_t *, void *), void *arg) 9023 { 9024 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9025 dev_info_t *cdip; 9026 mdi_client_t *ct; 9027 9028 MDI_VHCI_CLIENT_LOCK(vh); 9029 cdip = ddi_get_child(vdip); 9030 while (cdip) { 9031 ct = i_devi_get_client(cdip); 9032 MDI_CLIENT_LOCK(ct); 9033 9034 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9035 cdip = ddi_get_next_sibling(cdip); 9036 else 9037 cdip = NULL; 9038 9039 MDI_CLIENT_UNLOCK(ct); 9040 } 9041 MDI_VHCI_CLIENT_UNLOCK(vh); 9042 } 9043 9044 /* 9045 * mdi_vhci_walk_phcis(): 9046 * Walker routine to traverse phci dev_info nodes 9047 */ 9048 void 9049 mdi_vhci_walk_phcis(dev_info_t *vdip, 9050 int (*f)(dev_info_t *, void *), void *arg) 9051 { 9052 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9053 mdi_phci_t *ph, *next; 9054 9055 MDI_VHCI_PHCI_LOCK(vh); 9056 ph = vh->vh_phci_head; 9057 while (ph) { 9058 MDI_PHCI_LOCK(ph); 9059 9060 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9061 next = ph->ph_next; 9062 else 9063 next = NULL; 9064 9065 MDI_PHCI_UNLOCK(ph); 9066 ph = next; 9067 } 9068 MDI_VHCI_PHCI_UNLOCK(vh); 9069 } 9070 9071 9072 /* 9073 * mdi_walk_vhcis(): 9074 * Walker routine to traverse vhci dev_info nodes 9075 */ 9076 void 9077 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9078 { 9079 mdi_vhci_t *vh = NULL; 9080 9081 mutex_enter(&mdi_mutex); 9082 /* 9083 * Scan for already registered vhci 9084 */ 9085 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9086 vh->vh_refcnt++; 9087 mutex_exit(&mdi_mutex); 9088 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9089 mutex_enter(&mdi_mutex); 9090 vh->vh_refcnt--; 9091 break; 9092 } else { 9093 mutex_enter(&mdi_mutex); 9094 vh->vh_refcnt--; 9095 } 9096 } 9097 9098 mutex_exit(&mdi_mutex); 9099 } 9100 9101 /* 9102 * i_mdi_log_sysevent(): 9103 * Logs events for pickup by syseventd 9104 */ 9105 static void 9106 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9107 { 9108 char *path_name; 9109 nvlist_t *attr_list; 9110 9111 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9112 KM_SLEEP) != DDI_SUCCESS) { 9113 goto alloc_failed; 9114 } 9115 9116 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9117 (void) ddi_pathname(dip, path_name); 9118 9119 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9120 ddi_driver_name(dip)) != DDI_SUCCESS) { 9121 goto error; 9122 } 9123 9124 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9125 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9126 goto error; 9127 } 9128 9129 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9130 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9131 goto error; 9132 } 9133 9134 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9135 path_name) != DDI_SUCCESS) { 9136 goto error; 9137 } 9138 9139 if (nvlist_add_string(attr_list, DDI_CLASS, 9140 ph_vh_class) != DDI_SUCCESS) { 9141 goto error; 9142 } 9143 9144 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9145 attr_list, NULL, DDI_SLEEP); 9146 9147 error: 9148 kmem_free(path_name, MAXPATHLEN); 9149 nvlist_free(attr_list); 9150 return; 9151 9152 alloc_failed: 9153 MDI_DEBUG(1, (CE_WARN, dip, 9154 "!i_mdi_log_sysevent: Unable to send sysevent")); 9155 } 9156 9157 char ** 9158 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9159 { 9160 char **driver_list, **ret_driver_list = NULL; 9161 int *root_support_list; 9162 int cur_elements, max_elements; 9163 9164 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9165 &cur_elements, &max_elements); 9166 9167 9168 if (driver_list) { 9169 kmem_free(root_support_list, sizeof (int) * max_elements); 9170 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9171 * max_elements, sizeof (char *) * cur_elements); 9172 } 9173 *ndrivers = cur_elements; 9174 9175 return (ret_driver_list); 9176 9177 } 9178 9179 void 9180 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9181 { 9182 char **p; 9183 int i; 9184 9185 if (driver_list) { 9186 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9187 kmem_free(*p, strlen(*p) + 1); 9188 kmem_free(driver_list, sizeof (char *) * ndrivers); 9189 } 9190 } 9191