1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(level, stmnt) \ 78 if (mdi_debug >= (level)) i_mdi_log stmnt 79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(level, stmnt) 82 #endif /* DEBUG */ 83 84 extern pri_t minclsyspri; 85 extern int modrootloaded; 86 87 /* 88 * Global mutex: 89 * Protects vHCI list and structure members. 90 */ 91 kmutex_t mdi_mutex; 92 93 /* 94 * Registered vHCI class driver lists 95 */ 96 int mdi_vhci_count; 97 mdi_vhci_t *mdi_vhci_head; 98 mdi_vhci_t *mdi_vhci_tail; 99 100 /* 101 * Client Hash Table size 102 */ 103 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 104 105 /* 106 * taskq interface definitions 107 */ 108 #define MDI_TASKQ_N_THREADS 8 109 #define MDI_TASKQ_PRI minclsyspri 110 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 111 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 112 113 taskq_t *mdi_taskq; 114 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 115 116 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 117 118 /* 119 * The data should be "quiet" for this interval (in seconds) before the 120 * vhci cached data is flushed to the disk. 121 */ 122 static int mdi_vhcache_flush_delay = 10; 123 124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 125 static int mdi_vhcache_flush_daemon_idle_time = 60; 126 127 /* 128 * MDI falls back to discovery of all paths when a bus_config_one fails. 129 * The following parameters can be used to tune this operation. 130 * 131 * mdi_path_discovery_boot 132 * Number of times path discovery will be attempted during early boot. 133 * Probably there is no reason to ever set this value to greater than one. 134 * 135 * mdi_path_discovery_postboot 136 * Number of times path discovery will be attempted after early boot. 137 * Set it to a minimum of two to allow for discovery of iscsi paths which 138 * may happen very late during booting. 139 * 140 * mdi_path_discovery_interval 141 * Minimum number of seconds MDI will wait between successive discovery 142 * of all paths. Set it to -1 to disable discovery of all paths. 143 */ 144 static int mdi_path_discovery_boot = 1; 145 static int mdi_path_discovery_postboot = 2; 146 static int mdi_path_discovery_interval = 10; 147 148 /* 149 * number of seconds the asynchronous configuration thread will sleep idle 150 * before exiting. 151 */ 152 static int mdi_async_config_idle_time = 600; 153 154 static int mdi_bus_config_cache_hash_size = 256; 155 156 /* turns off multithreaded configuration for certain operations */ 157 static int mdi_mtc_off = 0; 158 159 /* 160 * MDI component property name/value string definitions 161 */ 162 const char *mdi_component_prop = "mpxio-component"; 163 const char *mdi_component_prop_vhci = "vhci"; 164 const char *mdi_component_prop_phci = "phci"; 165 const char *mdi_component_prop_client = "client"; 166 167 /* 168 * MDI client global unique identifier property name 169 */ 170 const char *mdi_client_guid_prop = "client-guid"; 171 172 /* 173 * MDI client load balancing property name/value string definitions 174 */ 175 const char *mdi_load_balance = "load-balance"; 176 const char *mdi_load_balance_none = "none"; 177 const char *mdi_load_balance_rr = "round-robin"; 178 const char *mdi_load_balance_lba = "logical-block"; 179 180 /* 181 * Obsolete vHCI class definition; to be removed after Leadville update 182 */ 183 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 184 185 static char vhci_greeting[] = 186 "\tThere already exists one vHCI driver for class %s\n" 187 "\tOnly one vHCI driver for each class is allowed\n"; 188 189 /* 190 * Static function prototypes 191 */ 192 static int i_mdi_phci_offline(dev_info_t *, uint_t); 193 static int i_mdi_client_offline(dev_info_t *, uint_t); 194 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 195 static void i_mdi_phci_post_detach(dev_info_t *, 196 ddi_detach_cmd_t, int); 197 static int i_mdi_client_pre_detach(dev_info_t *, 198 ddi_detach_cmd_t); 199 static void i_mdi_client_post_detach(dev_info_t *, 200 ddi_detach_cmd_t, int); 201 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 202 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 203 static int i_mdi_lba_lb(mdi_client_t *ct, 204 mdi_pathinfo_t **ret_pip, struct buf *buf); 205 static void i_mdi_pm_hold_client(mdi_client_t *, int); 206 static void i_mdi_pm_rele_client(mdi_client_t *, int); 207 static void i_mdi_pm_reset_client(mdi_client_t *); 208 static int i_mdi_power_all_phci(mdi_client_t *); 209 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 210 211 212 /* 213 * Internal mdi_pathinfo node functions 214 */ 215 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 216 217 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 218 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 219 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 220 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 274 static boolean_t stop_vhcache_flush_thread(void *, int); 275 static void free_string_array(char **, int); 276 static void free_vhcache_phci(mdi_vhcache_phci_t *); 277 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 278 static void free_vhcache_client(mdi_vhcache_client_t *); 279 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 280 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 281 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 282 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_pi_add(mdi_vhci_config_t *, 284 struct mdi_pathinfo *); 285 static void vhcache_pi_remove(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void free_phclient_path_list(mdi_phys_path_t *); 288 static void sort_vhcache_paths(mdi_vhcache_client_t *); 289 static int flush_vhcache(mdi_vhci_config_t *, int); 290 static void vhcache_dirty(mdi_vhci_config_t *); 291 static void free_async_client_config(mdi_async_client_config_t *); 292 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 293 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 294 static nvlist_t *read_on_disk_vhci_cache(char *); 295 extern int fread_nvlist(char *, nvlist_t **); 296 extern int fwrite_nvlist(char *, nvlist_t *); 297 298 /* called once when first vhci registers with mdi */ 299 static void 300 i_mdi_init() 301 { 302 static int initialized = 0; 303 304 if (initialized) 305 return; 306 initialized = 1; 307 308 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 309 /* 310 * Create our taskq resources 311 */ 312 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 313 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 314 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 315 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 316 } 317 318 /* 319 * mdi_get_component_type(): 320 * Return mpxio component type 321 * Return Values: 322 * MDI_COMPONENT_NONE 323 * MDI_COMPONENT_VHCI 324 * MDI_COMPONENT_PHCI 325 * MDI_COMPONENT_CLIENT 326 * XXX This doesn't work under multi-level MPxIO and should be 327 * removed when clients migrate mdi_component_is_*() interfaces. 328 */ 329 int 330 mdi_get_component_type(dev_info_t *dip) 331 { 332 return (DEVI(dip)->devi_mdi_component); 333 } 334 335 /* 336 * mdi_vhci_register(): 337 * Register a vHCI module with the mpxio framework 338 * mdi_vhci_register() is called by vHCI drivers to register the 339 * 'class_driver' vHCI driver and its MDI entrypoints with the 340 * mpxio framework. The vHCI driver must call this interface as 341 * part of its attach(9e) handler. 342 * Competing threads may try to attach mdi_vhci_register() as 343 * the vHCI drivers are loaded and attached as a result of pHCI 344 * driver instance registration (mdi_phci_register()) with the 345 * framework. 346 * Return Values: 347 * MDI_SUCCESS 348 * MDI_FAILURE 349 */ 350 /*ARGSUSED*/ 351 int 352 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 353 int flags) 354 { 355 mdi_vhci_t *vh = NULL; 356 357 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 358 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 359 360 i_mdi_init(); 361 362 mutex_enter(&mdi_mutex); 363 /* 364 * Scan for already registered vhci 365 */ 366 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 367 if (strcmp(vh->vh_class, class) == 0) { 368 /* 369 * vHCI has already been created. Check for valid 370 * vHCI ops registration. We only support one vHCI 371 * module per class 372 */ 373 if (vh->vh_ops != NULL) { 374 mutex_exit(&mdi_mutex); 375 cmn_err(CE_NOTE, vhci_greeting, class); 376 return (MDI_FAILURE); 377 } 378 break; 379 } 380 } 381 382 /* 383 * if not yet created, create the vHCI component 384 */ 385 if (vh == NULL) { 386 struct client_hash *hash = NULL; 387 char *load_balance; 388 389 /* 390 * Allocate and initialize the mdi extensions 391 */ 392 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 393 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 394 KM_SLEEP); 395 vh->vh_client_table = hash; 396 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 397 (void) strcpy(vh->vh_class, class); 398 vh->vh_lb = LOAD_BALANCE_RR; 399 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 400 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 401 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 402 vh->vh_lb = LOAD_BALANCE_NONE; 403 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 404 == 0) { 405 vh->vh_lb = LOAD_BALANCE_LBA; 406 } 407 ddi_prop_free(load_balance); 408 } 409 410 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 411 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 412 413 /* 414 * Store the vHCI ops vectors 415 */ 416 vh->vh_dip = vdip; 417 vh->vh_ops = vops; 418 419 setup_vhci_cache(vh); 420 421 if (mdi_vhci_head == NULL) { 422 mdi_vhci_head = vh; 423 } 424 if (mdi_vhci_tail) { 425 mdi_vhci_tail->vh_next = vh; 426 } 427 mdi_vhci_tail = vh; 428 mdi_vhci_count++; 429 } 430 431 /* 432 * Claim the devfs node as a vhci component 433 */ 434 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 435 436 /* 437 * Initialize our back reference from dev_info node 438 */ 439 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 440 mutex_exit(&mdi_mutex); 441 return (MDI_SUCCESS); 442 } 443 444 /* 445 * mdi_vhci_unregister(): 446 * Unregister a vHCI module from mpxio framework 447 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 448 * of a vhci to unregister it from the framework. 449 * Return Values: 450 * MDI_SUCCESS 451 * MDI_FAILURE 452 */ 453 /*ARGSUSED*/ 454 int 455 mdi_vhci_unregister(dev_info_t *vdip, int flags) 456 { 457 mdi_vhci_t *found, *vh, *prev = NULL; 458 459 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 460 461 /* 462 * Check for invalid VHCI 463 */ 464 if ((vh = i_devi_get_vhci(vdip)) == NULL) 465 return (MDI_FAILURE); 466 467 /* 468 * Scan the list of registered vHCIs for a match 469 */ 470 mutex_enter(&mdi_mutex); 471 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 472 if (found == vh) 473 break; 474 prev = found; 475 } 476 477 if (found == NULL) { 478 mutex_exit(&mdi_mutex); 479 return (MDI_FAILURE); 480 } 481 482 /* 483 * Check the vHCI, pHCI and client count. All the pHCIs and clients 484 * should have been unregistered, before a vHCI can be 485 * unregistered. 486 */ 487 MDI_VHCI_PHCI_LOCK(vh); 488 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 489 MDI_VHCI_PHCI_UNLOCK(vh); 490 mutex_exit(&mdi_mutex); 491 return (MDI_FAILURE); 492 } 493 MDI_VHCI_PHCI_UNLOCK(vh); 494 495 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 496 mutex_exit(&mdi_mutex); 497 return (MDI_FAILURE); 498 } 499 500 /* 501 * Remove the vHCI from the global list 502 */ 503 if (vh == mdi_vhci_head) { 504 mdi_vhci_head = vh->vh_next; 505 } else { 506 prev->vh_next = vh->vh_next; 507 } 508 if (vh == mdi_vhci_tail) { 509 mdi_vhci_tail = prev; 510 } 511 mdi_vhci_count--; 512 mutex_exit(&mdi_mutex); 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 mutex_destroy(&vh->vh_phci_mutex); 521 mutex_destroy(&vh->vh_client_mutex); 522 523 kmem_free(vh, sizeof (mdi_vhci_t)); 524 return (MDI_SUCCESS); 525 } 526 527 /* 528 * i_mdi_vhci_class2vhci(): 529 * Look for a matching vHCI module given a vHCI class name 530 * Return Values: 531 * Handle to a vHCI component 532 * NULL 533 */ 534 static mdi_vhci_t * 535 i_mdi_vhci_class2vhci(char *class) 536 { 537 mdi_vhci_t *vh = NULL; 538 539 ASSERT(!MUTEX_HELD(&mdi_mutex)); 540 541 mutex_enter(&mdi_mutex); 542 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 543 if (strcmp(vh->vh_class, class) == 0) { 544 break; 545 } 546 } 547 mutex_exit(&mdi_mutex); 548 return (vh); 549 } 550 551 /* 552 * i_devi_get_vhci(): 553 * Utility function to get the handle to a vHCI component 554 * Return Values: 555 * Handle to a vHCI component 556 * NULL 557 */ 558 mdi_vhci_t * 559 i_devi_get_vhci(dev_info_t *vdip) 560 { 561 mdi_vhci_t *vh = NULL; 562 if (MDI_VHCI(vdip)) { 563 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 564 } 565 return (vh); 566 } 567 568 /* 569 * mdi_phci_register(): 570 * Register a pHCI module with mpxio framework 571 * mdi_phci_register() is called by pHCI drivers to register with 572 * the mpxio framework and a specific 'class_driver' vHCI. The 573 * pHCI driver must call this interface as part of its attach(9e) 574 * handler. 575 * Return Values: 576 * MDI_SUCCESS 577 * MDI_FAILURE 578 */ 579 /*ARGSUSED*/ 580 int 581 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 582 { 583 mdi_phci_t *ph; 584 mdi_vhci_t *vh; 585 char *data; 586 char *pathname; 587 588 /* 589 * Some subsystems, like fcp, perform pHCI registration from a 590 * different thread than the one doing the pHCI attach(9E) - the 591 * driver attach code is waiting for this other thread to complete. 592 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 593 * (indicating that some thread has done an ndi_devi_enter of parent) 594 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 595 */ 596 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 597 598 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 599 (void) ddi_pathname(pdip, pathname); 600 601 /* 602 * Check for mpxio-disable property. Enable mpxio if the property is 603 * missing or not set to "yes". 604 * If the property is set to "yes" then emit a brief message. 605 */ 606 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 607 &data) == DDI_SUCCESS)) { 608 if (strcmp(data, "yes") == 0) { 609 MDI_DEBUG(1, (CE_CONT, pdip, 610 "?%s (%s%d) multipath capabilities " 611 "disabled via %s.conf.\n", pathname, 612 ddi_driver_name(pdip), ddi_get_instance(pdip), 613 ddi_driver_name(pdip))); 614 ddi_prop_free(data); 615 kmem_free(pathname, MAXPATHLEN); 616 return (MDI_FAILURE); 617 } 618 ddi_prop_free(data); 619 } 620 621 kmem_free(pathname, MAXPATHLEN); 622 623 /* 624 * Search for a matching vHCI 625 */ 626 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 627 if (vh == NULL) { 628 return (MDI_FAILURE); 629 } 630 631 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 632 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 633 ph->ph_dip = pdip; 634 ph->ph_vhci = vh; 635 ph->ph_next = NULL; 636 ph->ph_unstable = 0; 637 ph->ph_vprivate = 0; 638 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 639 640 MDI_PHCI_LOCK(ph); 641 MDI_PHCI_SET_POWER_UP(ph); 642 MDI_PHCI_UNLOCK(ph); 643 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 644 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 645 646 vhcache_phci_add(vh->vh_config, ph); 647 648 MDI_VHCI_PHCI_LOCK(vh); 649 if (vh->vh_phci_head == NULL) { 650 vh->vh_phci_head = ph; 651 } 652 if (vh->vh_phci_tail) { 653 vh->vh_phci_tail->ph_next = ph; 654 } 655 vh->vh_phci_tail = ph; 656 vh->vh_phci_count++; 657 MDI_VHCI_PHCI_UNLOCK(vh); 658 659 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 660 return (MDI_SUCCESS); 661 } 662 663 /* 664 * mdi_phci_unregister(): 665 * Unregister a pHCI module from mpxio framework 666 * mdi_phci_unregister() is called by the pHCI drivers from their 667 * detach(9E) handler to unregister their instances from the 668 * framework. 669 * Return Values: 670 * MDI_SUCCESS 671 * MDI_FAILURE 672 */ 673 /*ARGSUSED*/ 674 int 675 mdi_phci_unregister(dev_info_t *pdip, int flags) 676 { 677 mdi_vhci_t *vh; 678 mdi_phci_t *ph; 679 mdi_phci_t *tmp; 680 mdi_phci_t *prev = NULL; 681 682 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 683 684 ph = i_devi_get_phci(pdip); 685 if (ph == NULL) { 686 MDI_DEBUG(1, (CE_WARN, pdip, 687 "!pHCI unregister: Not a valid pHCI")); 688 return (MDI_FAILURE); 689 } 690 691 vh = ph->ph_vhci; 692 ASSERT(vh != NULL); 693 if (vh == NULL) { 694 MDI_DEBUG(1, (CE_WARN, pdip, 695 "!pHCI unregister: Not a valid vHCI")); 696 return (MDI_FAILURE); 697 } 698 699 MDI_VHCI_PHCI_LOCK(vh); 700 tmp = vh->vh_phci_head; 701 while (tmp) { 702 if (tmp == ph) { 703 break; 704 } 705 prev = tmp; 706 tmp = tmp->ph_next; 707 } 708 709 if (ph == vh->vh_phci_head) { 710 vh->vh_phci_head = ph->ph_next; 711 } else { 712 prev->ph_next = ph->ph_next; 713 } 714 715 if (ph == vh->vh_phci_tail) { 716 vh->vh_phci_tail = prev; 717 } 718 719 vh->vh_phci_count--; 720 MDI_VHCI_PHCI_UNLOCK(vh); 721 722 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 723 ESC_DDI_INITIATOR_UNREGISTER); 724 vhcache_phci_remove(vh->vh_config, ph); 725 cv_destroy(&ph->ph_unstable_cv); 726 mutex_destroy(&ph->ph_mutex); 727 kmem_free(ph, sizeof (mdi_phci_t)); 728 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 729 DEVI(pdip)->devi_mdi_xhci = NULL; 730 return (MDI_SUCCESS); 731 } 732 733 /* 734 * i_devi_get_phci(): 735 * Utility function to return the phci extensions. 736 */ 737 static mdi_phci_t * 738 i_devi_get_phci(dev_info_t *pdip) 739 { 740 mdi_phci_t *ph = NULL; 741 if (MDI_PHCI(pdip)) { 742 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 743 } 744 return (ph); 745 } 746 747 /* 748 * Single thread mdi entry into devinfo node for modifying its children. 749 * If necessary we perform an ndi_devi_enter of the vHCI before doing 750 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 751 * for the vHCI and one for the pHCI. 752 */ 753 void 754 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 755 { 756 dev_info_t *vdip; 757 int vcircular, pcircular; 758 759 /* Verify calling context */ 760 ASSERT(MDI_PHCI(phci_dip)); 761 vdip = mdi_devi_get_vdip(phci_dip); 762 ASSERT(vdip); /* A pHCI always has a vHCI */ 763 764 /* 765 * If pHCI is detaching then the framework has already entered the 766 * vHCI on a threads that went down the code path leading to 767 * detach_node(). This framework enter of the vHCI during pHCI 768 * detach is done to avoid deadlock with vHCI power management 769 * operations which enter the vHCI and the enter down the path 770 * to the pHCI. If pHCI is detaching then we piggyback this calls 771 * enter of the vHCI on frameworks vHCI enter that has already 772 * occurred - this is OK because we know that the framework thread 773 * doing detach is waiting for our completion. 774 * 775 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 776 * race with detach - but we can't do that because the framework has 777 * already entered the parent, so we have some complexity instead. 778 */ 779 for (;;) { 780 if (ndi_devi_tryenter(vdip, &vcircular)) { 781 ASSERT(vcircular != -1); 782 if (DEVI_IS_DETACHING(phci_dip)) { 783 ndi_devi_exit(vdip, vcircular); 784 vcircular = -1; 785 } 786 break; 787 } else if (DEVI_IS_DETACHING(phci_dip)) { 788 vcircular = -1; 789 break; 790 } else { 791 delay(1); 792 } 793 } 794 795 ndi_devi_enter(phci_dip, &pcircular); 796 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 797 } 798 799 /* 800 * Release mdi_devi_enter or successful mdi_devi_tryenter. 801 */ 802 void 803 mdi_devi_exit(dev_info_t *phci_dip, int circular) 804 { 805 dev_info_t *vdip; 806 int vcircular, pcircular; 807 808 /* Verify calling context */ 809 ASSERT(MDI_PHCI(phci_dip)); 810 vdip = mdi_devi_get_vdip(phci_dip); 811 ASSERT(vdip); /* A pHCI always has a vHCI */ 812 813 /* extract two circular recursion values from single int */ 814 pcircular = (short)(circular & 0xFFFF); 815 vcircular = (short)((circular >> 16) & 0xFFFF); 816 817 ndi_devi_exit(phci_dip, pcircular); 818 if (vcircular != -1) 819 ndi_devi_exit(vdip, vcircular); 820 } 821 822 /* 823 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 824 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 825 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 826 * with vHCI power management code during path online/offline. Each 827 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 828 * occur within the scope of an active mdi_devi_enter that establishes the 829 * circular value. 830 */ 831 void 832 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 833 { 834 int pcircular; 835 836 /* Verify calling context */ 837 ASSERT(MDI_PHCI(phci_dip)); 838 839 pcircular = (short)(circular & 0xFFFF); 840 ndi_devi_exit(phci_dip, pcircular); 841 } 842 843 void 844 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 845 { 846 int pcircular; 847 848 /* Verify calling context */ 849 ASSERT(MDI_PHCI(phci_dip)); 850 851 ndi_devi_enter(phci_dip, &pcircular); 852 853 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 854 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 855 } 856 857 /* 858 * mdi_devi_get_vdip(): 859 * given a pHCI dip return vHCI dip 860 */ 861 dev_info_t * 862 mdi_devi_get_vdip(dev_info_t *pdip) 863 { 864 mdi_phci_t *ph; 865 866 ph = i_devi_get_phci(pdip); 867 if (ph && ph->ph_vhci) 868 return (ph->ph_vhci->vh_dip); 869 return (NULL); 870 } 871 872 /* 873 * mdi_devi_pdip_entered(): 874 * Return 1 if we are vHCI and have done an ndi_devi_enter 875 * of a pHCI 876 */ 877 int 878 mdi_devi_pdip_entered(dev_info_t *vdip) 879 { 880 mdi_vhci_t *vh; 881 mdi_phci_t *ph; 882 883 vh = i_devi_get_vhci(vdip); 884 if (vh == NULL) 885 return (0); 886 887 MDI_VHCI_PHCI_LOCK(vh); 888 ph = vh->vh_phci_head; 889 while (ph) { 890 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 891 MDI_VHCI_PHCI_UNLOCK(vh); 892 return (1); 893 } 894 ph = ph->ph_next; 895 } 896 MDI_VHCI_PHCI_UNLOCK(vh); 897 return (0); 898 } 899 900 /* 901 * mdi_phci_path2devinfo(): 902 * Utility function to search for a valid phci device given 903 * the devfs pathname. 904 */ 905 dev_info_t * 906 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 907 { 908 char *temp_pathname; 909 mdi_vhci_t *vh; 910 mdi_phci_t *ph; 911 dev_info_t *pdip = NULL; 912 913 vh = i_devi_get_vhci(vdip); 914 ASSERT(vh != NULL); 915 916 if (vh == NULL) { 917 /* 918 * Invalid vHCI component, return failure 919 */ 920 return (NULL); 921 } 922 923 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 924 MDI_VHCI_PHCI_LOCK(vh); 925 ph = vh->vh_phci_head; 926 while (ph != NULL) { 927 pdip = ph->ph_dip; 928 ASSERT(pdip != NULL); 929 *temp_pathname = '\0'; 930 (void) ddi_pathname(pdip, temp_pathname); 931 if (strcmp(temp_pathname, pathname) == 0) { 932 break; 933 } 934 ph = ph->ph_next; 935 } 936 if (ph == NULL) { 937 pdip = NULL; 938 } 939 MDI_VHCI_PHCI_UNLOCK(vh); 940 kmem_free(temp_pathname, MAXPATHLEN); 941 return (pdip); 942 } 943 944 /* 945 * mdi_phci_get_path_count(): 946 * get number of path information nodes associated with a given 947 * pHCI device. 948 */ 949 int 950 mdi_phci_get_path_count(dev_info_t *pdip) 951 { 952 mdi_phci_t *ph; 953 int count = 0; 954 955 ph = i_devi_get_phci(pdip); 956 if (ph != NULL) { 957 count = ph->ph_path_count; 958 } 959 return (count); 960 } 961 962 /* 963 * i_mdi_phci_lock(): 964 * Lock a pHCI device 965 * Return Values: 966 * None 967 * Note: 968 * The default locking order is: 969 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 970 * But there are number of situations where locks need to be 971 * grabbed in reverse order. This routine implements try and lock 972 * mechanism depending on the requested parameter option. 973 */ 974 static void 975 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 976 { 977 if (pip) { 978 /* Reverse locking is requested. */ 979 while (MDI_PHCI_TRYLOCK(ph) == 0) { 980 /* 981 * tryenter failed. Try to grab again 982 * after a small delay 983 */ 984 MDI_PI_HOLD(pip); 985 MDI_PI_UNLOCK(pip); 986 delay(1); 987 MDI_PI_LOCK(pip); 988 MDI_PI_RELE(pip); 989 } 990 } else { 991 MDI_PHCI_LOCK(ph); 992 } 993 } 994 995 /* 996 * i_mdi_phci_unlock(): 997 * Unlock the pHCI component 998 */ 999 static void 1000 i_mdi_phci_unlock(mdi_phci_t *ph) 1001 { 1002 MDI_PHCI_UNLOCK(ph); 1003 } 1004 1005 /* 1006 * i_mdi_devinfo_create(): 1007 * create client device's devinfo node 1008 * Return Values: 1009 * dev_info 1010 * NULL 1011 * Notes: 1012 */ 1013 static dev_info_t * 1014 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1015 char **compatible, int ncompatible) 1016 { 1017 dev_info_t *cdip = NULL; 1018 1019 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1020 1021 /* Verify for duplicate entry */ 1022 cdip = i_mdi_devinfo_find(vh, name, guid); 1023 ASSERT(cdip == NULL); 1024 if (cdip) { 1025 cmn_err(CE_WARN, 1026 "i_mdi_devinfo_create: client dip %p already exists", 1027 (void *)cdip); 1028 } 1029 1030 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1031 if (cdip == NULL) 1032 goto fail; 1033 1034 /* 1035 * Create component type and Global unique identifier 1036 * properties 1037 */ 1038 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1039 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1040 goto fail; 1041 } 1042 1043 /* Decorate the node with compatible property */ 1044 if (compatible && 1045 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1046 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1047 goto fail; 1048 } 1049 1050 return (cdip); 1051 1052 fail: 1053 if (cdip) { 1054 (void) ndi_prop_remove_all(cdip); 1055 (void) ndi_devi_free(cdip); 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * i_mdi_devinfo_find(): 1062 * Find a matching devinfo node for given client node name 1063 * and its guid. 1064 * Return Values: 1065 * Handle to a dev_info node or NULL 1066 */ 1067 static dev_info_t * 1068 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1069 { 1070 char *data; 1071 dev_info_t *cdip = NULL; 1072 dev_info_t *ndip = NULL; 1073 int circular; 1074 1075 ndi_devi_enter(vh->vh_dip, &circular); 1076 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1077 while ((cdip = ndip) != NULL) { 1078 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1079 1080 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1081 continue; 1082 } 1083 1084 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1085 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1086 &data) != DDI_PROP_SUCCESS) { 1087 continue; 1088 } 1089 1090 if (strcmp(data, guid) != 0) { 1091 ddi_prop_free(data); 1092 continue; 1093 } 1094 ddi_prop_free(data); 1095 break; 1096 } 1097 ndi_devi_exit(vh->vh_dip, circular); 1098 return (cdip); 1099 } 1100 1101 /* 1102 * i_mdi_devinfo_remove(): 1103 * Remove a client device node 1104 */ 1105 static int 1106 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1107 { 1108 int rv = MDI_SUCCESS; 1109 1110 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1111 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1112 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1113 if (rv != NDI_SUCCESS) { 1114 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1115 " failed. cdip = %p\n", (void *)cdip)); 1116 } 1117 /* 1118 * Convert to MDI error code 1119 */ 1120 switch (rv) { 1121 case NDI_SUCCESS: 1122 rv = MDI_SUCCESS; 1123 break; 1124 case NDI_BUSY: 1125 rv = MDI_BUSY; 1126 break; 1127 default: 1128 rv = MDI_FAILURE; 1129 break; 1130 } 1131 } 1132 return (rv); 1133 } 1134 1135 /* 1136 * i_devi_get_client() 1137 * Utility function to get mpxio component extensions 1138 */ 1139 static mdi_client_t * 1140 i_devi_get_client(dev_info_t *cdip) 1141 { 1142 mdi_client_t *ct = NULL; 1143 1144 if (MDI_CLIENT(cdip)) { 1145 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1146 } 1147 return (ct); 1148 } 1149 1150 /* 1151 * i_mdi_is_child_present(): 1152 * Search for the presence of client device dev_info node 1153 */ 1154 static int 1155 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1156 { 1157 int rv = MDI_FAILURE; 1158 struct dev_info *dip; 1159 int circular; 1160 1161 ndi_devi_enter(vdip, &circular); 1162 dip = DEVI(vdip)->devi_child; 1163 while (dip) { 1164 if (dip == DEVI(cdip)) { 1165 rv = MDI_SUCCESS; 1166 break; 1167 } 1168 dip = dip->devi_sibling; 1169 } 1170 ndi_devi_exit(vdip, circular); 1171 return (rv); 1172 } 1173 1174 1175 /* 1176 * i_mdi_client_lock(): 1177 * Grab client component lock 1178 * Return Values: 1179 * None 1180 * Note: 1181 * The default locking order is: 1182 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1183 * But there are number of situations where locks need to be 1184 * grabbed in reverse order. This routine implements try and lock 1185 * mechanism depending on the requested parameter option. 1186 */ 1187 static void 1188 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1189 { 1190 if (pip) { 1191 /* 1192 * Reverse locking is requested. 1193 */ 1194 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1195 /* 1196 * tryenter failed. Try to grab again 1197 * after a small delay 1198 */ 1199 MDI_PI_HOLD(pip); 1200 MDI_PI_UNLOCK(pip); 1201 delay(1); 1202 MDI_PI_LOCK(pip); 1203 MDI_PI_RELE(pip); 1204 } 1205 } else { 1206 MDI_CLIENT_LOCK(ct); 1207 } 1208 } 1209 1210 /* 1211 * i_mdi_client_unlock(): 1212 * Unlock a client component 1213 */ 1214 static void 1215 i_mdi_client_unlock(mdi_client_t *ct) 1216 { 1217 MDI_CLIENT_UNLOCK(ct); 1218 } 1219 1220 /* 1221 * i_mdi_client_alloc(): 1222 * Allocate and initialize a client structure. Caller should 1223 * hold the vhci client lock. 1224 * Return Values: 1225 * Handle to a client component 1226 */ 1227 /*ARGSUSED*/ 1228 static mdi_client_t * 1229 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1230 { 1231 mdi_client_t *ct; 1232 1233 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1234 1235 /* 1236 * Allocate and initialize a component structure. 1237 */ 1238 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1239 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1240 ct->ct_hnext = NULL; 1241 ct->ct_hprev = NULL; 1242 ct->ct_dip = NULL; 1243 ct->ct_vhci = vh; 1244 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1245 (void) strcpy(ct->ct_drvname, name); 1246 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1247 (void) strcpy(ct->ct_guid, lguid); 1248 ct->ct_cprivate = NULL; 1249 ct->ct_vprivate = NULL; 1250 ct->ct_flags = 0; 1251 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1252 MDI_CLIENT_LOCK(ct); 1253 MDI_CLIENT_SET_OFFLINE(ct); 1254 MDI_CLIENT_SET_DETACH(ct); 1255 MDI_CLIENT_SET_POWER_UP(ct); 1256 MDI_CLIENT_UNLOCK(ct); 1257 ct->ct_failover_flags = 0; 1258 ct->ct_failover_status = 0; 1259 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1260 ct->ct_unstable = 0; 1261 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1262 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1263 ct->ct_lb = vh->vh_lb; 1264 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1265 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1266 ct->ct_path_count = 0; 1267 ct->ct_path_head = NULL; 1268 ct->ct_path_tail = NULL; 1269 ct->ct_path_last = NULL; 1270 1271 /* 1272 * Add this client component to our client hash queue 1273 */ 1274 i_mdi_client_enlist_table(vh, ct); 1275 return (ct); 1276 } 1277 1278 /* 1279 * i_mdi_client_enlist_table(): 1280 * Attach the client device to the client hash table. Caller 1281 * should hold the vhci client lock. 1282 */ 1283 static void 1284 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1285 { 1286 int index; 1287 struct client_hash *head; 1288 1289 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1290 1291 index = i_mdi_get_hash_key(ct->ct_guid); 1292 head = &vh->vh_client_table[index]; 1293 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1294 head->ct_hash_head = ct; 1295 head->ct_hash_count++; 1296 vh->vh_client_count++; 1297 } 1298 1299 /* 1300 * i_mdi_client_delist_table(): 1301 * Attach the client device to the client hash table. 1302 * Caller should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 char *guid; 1309 struct client_hash *head; 1310 mdi_client_t *next; 1311 mdi_client_t *last; 1312 1313 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1314 1315 guid = ct->ct_guid; 1316 index = i_mdi_get_hash_key(guid); 1317 head = &vh->vh_client_table[index]; 1318 1319 last = NULL; 1320 next = (mdi_client_t *)head->ct_hash_head; 1321 while (next != NULL) { 1322 if (next == ct) { 1323 break; 1324 } 1325 last = next; 1326 next = next->ct_hnext; 1327 } 1328 1329 if (next) { 1330 head->ct_hash_count--; 1331 if (last == NULL) { 1332 head->ct_hash_head = ct->ct_hnext; 1333 } else { 1334 last->ct_hnext = ct->ct_hnext; 1335 } 1336 ct->ct_hnext = NULL; 1337 vh->vh_client_count--; 1338 } 1339 } 1340 1341 1342 /* 1343 * i_mdi_client_free(): 1344 * Free a client component 1345 */ 1346 static int 1347 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1348 { 1349 int rv = MDI_SUCCESS; 1350 int flags = ct->ct_flags; 1351 dev_info_t *cdip; 1352 dev_info_t *vdip; 1353 1354 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1355 1356 vdip = vh->vh_dip; 1357 cdip = ct->ct_dip; 1358 1359 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1360 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1361 DEVI(cdip)->devi_mdi_client = NULL; 1362 1363 /* 1364 * Clear out back ref. to dev_info_t node 1365 */ 1366 ct->ct_dip = NULL; 1367 1368 /* 1369 * Remove this client from our hash queue 1370 */ 1371 i_mdi_client_delist_table(vh, ct); 1372 1373 /* 1374 * Uninitialize and free the component 1375 */ 1376 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1377 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1378 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1379 cv_destroy(&ct->ct_failover_cv); 1380 cv_destroy(&ct->ct_unstable_cv); 1381 cv_destroy(&ct->ct_powerchange_cv); 1382 mutex_destroy(&ct->ct_mutex); 1383 kmem_free(ct, sizeof (*ct)); 1384 1385 if (cdip != NULL) { 1386 MDI_VHCI_CLIENT_UNLOCK(vh); 1387 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1388 MDI_VHCI_CLIENT_LOCK(vh); 1389 } 1390 return (rv); 1391 } 1392 1393 /* 1394 * i_mdi_client_find(): 1395 * Find the client structure corresponding to a given guid 1396 * Caller should hold the vhci client lock. 1397 */ 1398 static mdi_client_t * 1399 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1400 { 1401 int index; 1402 struct client_hash *head; 1403 mdi_client_t *ct; 1404 1405 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1406 1407 index = i_mdi_get_hash_key(guid); 1408 head = &vh->vh_client_table[index]; 1409 1410 ct = head->ct_hash_head; 1411 while (ct != NULL) { 1412 if (strcmp(ct->ct_guid, guid) == 0 && 1413 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1414 break; 1415 } 1416 ct = ct->ct_hnext; 1417 } 1418 return (ct); 1419 } 1420 1421 /* 1422 * i_mdi_client_update_state(): 1423 * Compute and update client device state 1424 * Notes: 1425 * A client device can be in any of three possible states: 1426 * 1427 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1428 * one online/standby paths. Can tolerate failures. 1429 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1430 * no alternate paths available as standby. A failure on the online 1431 * would result in loss of access to device data. 1432 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1433 * no paths available to access the device. 1434 */ 1435 static void 1436 i_mdi_client_update_state(mdi_client_t *ct) 1437 { 1438 int state; 1439 1440 ASSERT(MDI_CLIENT_LOCKED(ct)); 1441 state = i_mdi_client_compute_state(ct, NULL); 1442 MDI_CLIENT_SET_STATE(ct, state); 1443 } 1444 1445 /* 1446 * i_mdi_client_compute_state(): 1447 * Compute client device state 1448 * 1449 * mdi_phci_t * Pointer to pHCI structure which should 1450 * while computing the new value. Used by 1451 * i_mdi_phci_offline() to find the new 1452 * client state after DR of a pHCI. 1453 */ 1454 static int 1455 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1456 { 1457 int state; 1458 int online_count = 0; 1459 int standby_count = 0; 1460 mdi_pathinfo_t *pip, *next; 1461 1462 ASSERT(MDI_CLIENT_LOCKED(ct)); 1463 pip = ct->ct_path_head; 1464 while (pip != NULL) { 1465 MDI_PI_LOCK(pip); 1466 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1467 if (MDI_PI(pip)->pi_phci == ph) { 1468 MDI_PI_UNLOCK(pip); 1469 pip = next; 1470 continue; 1471 } 1472 1473 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1474 == MDI_PATHINFO_STATE_ONLINE) 1475 online_count++; 1476 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1477 == MDI_PATHINFO_STATE_STANDBY) 1478 standby_count++; 1479 MDI_PI_UNLOCK(pip); 1480 pip = next; 1481 } 1482 1483 if (online_count == 0) { 1484 if (standby_count == 0) { 1485 state = MDI_CLIENT_STATE_FAILED; 1486 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1487 " ct = %p\n", (void *)ct)); 1488 } else if (standby_count == 1) { 1489 state = MDI_CLIENT_STATE_DEGRADED; 1490 } else { 1491 state = MDI_CLIENT_STATE_OPTIMAL; 1492 } 1493 } else if (online_count == 1) { 1494 if (standby_count == 0) { 1495 state = MDI_CLIENT_STATE_DEGRADED; 1496 } else { 1497 state = MDI_CLIENT_STATE_OPTIMAL; 1498 } 1499 } else { 1500 state = MDI_CLIENT_STATE_OPTIMAL; 1501 } 1502 return (state); 1503 } 1504 1505 /* 1506 * i_mdi_client2devinfo(): 1507 * Utility function 1508 */ 1509 dev_info_t * 1510 i_mdi_client2devinfo(mdi_client_t *ct) 1511 { 1512 return (ct->ct_dip); 1513 } 1514 1515 /* 1516 * mdi_client_path2_devinfo(): 1517 * Given the parent devinfo and child devfs pathname, search for 1518 * a valid devfs node handle. 1519 */ 1520 dev_info_t * 1521 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1522 { 1523 dev_info_t *cdip = NULL; 1524 dev_info_t *ndip = NULL; 1525 char *temp_pathname; 1526 int circular; 1527 1528 /* 1529 * Allocate temp buffer 1530 */ 1531 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1532 1533 /* 1534 * Lock parent against changes 1535 */ 1536 ndi_devi_enter(vdip, &circular); 1537 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1538 while ((cdip = ndip) != NULL) { 1539 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1540 1541 *temp_pathname = '\0'; 1542 (void) ddi_pathname(cdip, temp_pathname); 1543 if (strcmp(temp_pathname, pathname) == 0) { 1544 break; 1545 } 1546 } 1547 /* 1548 * Release devinfo lock 1549 */ 1550 ndi_devi_exit(vdip, circular); 1551 1552 /* 1553 * Free the temp buffer 1554 */ 1555 kmem_free(temp_pathname, MAXPATHLEN); 1556 return (cdip); 1557 } 1558 1559 /* 1560 * mdi_client_get_path_count(): 1561 * Utility function to get number of path information nodes 1562 * associated with a given client device. 1563 */ 1564 int 1565 mdi_client_get_path_count(dev_info_t *cdip) 1566 { 1567 mdi_client_t *ct; 1568 int count = 0; 1569 1570 ct = i_devi_get_client(cdip); 1571 if (ct != NULL) { 1572 count = ct->ct_path_count; 1573 } 1574 return (count); 1575 } 1576 1577 1578 /* 1579 * i_mdi_get_hash_key(): 1580 * Create a hash using strings as keys 1581 * 1582 */ 1583 static int 1584 i_mdi_get_hash_key(char *str) 1585 { 1586 uint32_t g, hash = 0; 1587 char *p; 1588 1589 for (p = str; *p != '\0'; p++) { 1590 g = *p; 1591 hash += g; 1592 } 1593 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1594 } 1595 1596 /* 1597 * mdi_get_lb_policy(): 1598 * Get current load balancing policy for a given client device 1599 */ 1600 client_lb_t 1601 mdi_get_lb_policy(dev_info_t *cdip) 1602 { 1603 client_lb_t lb = LOAD_BALANCE_NONE; 1604 mdi_client_t *ct; 1605 1606 ct = i_devi_get_client(cdip); 1607 if (ct != NULL) { 1608 lb = ct->ct_lb; 1609 } 1610 return (lb); 1611 } 1612 1613 /* 1614 * mdi_set_lb_region_size(): 1615 * Set current region size for the load-balance 1616 */ 1617 int 1618 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1619 { 1620 mdi_client_t *ct; 1621 int rv = MDI_FAILURE; 1622 1623 ct = i_devi_get_client(cdip); 1624 if (ct != NULL && ct->ct_lb_args != NULL) { 1625 ct->ct_lb_args->region_size = region_size; 1626 rv = MDI_SUCCESS; 1627 } 1628 return (rv); 1629 } 1630 1631 /* 1632 * mdi_Set_lb_policy(): 1633 * Set current load balancing policy for a given client device 1634 */ 1635 int 1636 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1637 { 1638 mdi_client_t *ct; 1639 int rv = MDI_FAILURE; 1640 1641 ct = i_devi_get_client(cdip); 1642 if (ct != NULL) { 1643 ct->ct_lb = lb; 1644 rv = MDI_SUCCESS; 1645 } 1646 return (rv); 1647 } 1648 1649 /* 1650 * mdi_failover(): 1651 * failover function called by the vHCI drivers to initiate 1652 * a failover operation. This is typically due to non-availability 1653 * of online paths to route I/O requests. Failover can be 1654 * triggered through user application also. 1655 * 1656 * The vHCI driver calls mdi_failover() to initiate a failover 1657 * operation. mdi_failover() calls back into the vHCI driver's 1658 * vo_failover() entry point to perform the actual failover 1659 * operation. The reason for requiring the vHCI driver to 1660 * initiate failover by calling mdi_failover(), instead of directly 1661 * executing vo_failover() itself, is to ensure that the mdi 1662 * framework can keep track of the client state properly. 1663 * Additionally, mdi_failover() provides as a convenience the 1664 * option of performing the failover operation synchronously or 1665 * asynchronously 1666 * 1667 * Upon successful completion of the failover operation, the 1668 * paths that were previously ONLINE will be in the STANDBY state, 1669 * and the newly activated paths will be in the ONLINE state. 1670 * 1671 * The flags modifier determines whether the activation is done 1672 * synchronously: MDI_FAILOVER_SYNC 1673 * Return Values: 1674 * MDI_SUCCESS 1675 * MDI_FAILURE 1676 * MDI_BUSY 1677 */ 1678 /*ARGSUSED*/ 1679 int 1680 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1681 { 1682 int rv; 1683 mdi_client_t *ct; 1684 1685 ct = i_devi_get_client(cdip); 1686 ASSERT(ct != NULL); 1687 if (ct == NULL) { 1688 /* cdip is not a valid client device. Nothing more to do. */ 1689 return (MDI_FAILURE); 1690 } 1691 1692 MDI_CLIENT_LOCK(ct); 1693 1694 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1695 /* A path to the client is being freed */ 1696 MDI_CLIENT_UNLOCK(ct); 1697 return (MDI_BUSY); 1698 } 1699 1700 1701 if (MDI_CLIENT_IS_FAILED(ct)) { 1702 /* 1703 * Client is in failed state. Nothing more to do. 1704 */ 1705 MDI_CLIENT_UNLOCK(ct); 1706 return (MDI_FAILURE); 1707 } 1708 1709 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1710 /* 1711 * Failover is already in progress; return BUSY 1712 */ 1713 MDI_CLIENT_UNLOCK(ct); 1714 return (MDI_BUSY); 1715 } 1716 /* 1717 * Make sure that mdi_pathinfo node state changes are processed. 1718 * We do not allow failovers to progress while client path state 1719 * changes are in progress 1720 */ 1721 if (ct->ct_unstable) { 1722 if (flags == MDI_FAILOVER_ASYNC) { 1723 MDI_CLIENT_UNLOCK(ct); 1724 return (MDI_BUSY); 1725 } else { 1726 while (ct->ct_unstable) 1727 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1728 } 1729 } 1730 1731 /* 1732 * Client device is in stable state. Before proceeding, perform sanity 1733 * checks again. 1734 */ 1735 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1736 (!i_ddi_devi_attached(ct->ct_dip))) { 1737 /* 1738 * Client is in failed state. Nothing more to do. 1739 */ 1740 MDI_CLIENT_UNLOCK(ct); 1741 return (MDI_FAILURE); 1742 } 1743 1744 /* 1745 * Set the client state as failover in progress. 1746 */ 1747 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1748 ct->ct_failover_flags = flags; 1749 MDI_CLIENT_UNLOCK(ct); 1750 1751 if (flags == MDI_FAILOVER_ASYNC) { 1752 /* 1753 * Submit the initiate failover request via CPR safe 1754 * taskq threads. 1755 */ 1756 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1757 ct, KM_SLEEP); 1758 return (MDI_ACCEPT); 1759 } else { 1760 /* 1761 * Synchronous failover mode. Typically invoked from the user 1762 * land. 1763 */ 1764 rv = i_mdi_failover(ct); 1765 } 1766 return (rv); 1767 } 1768 1769 /* 1770 * i_mdi_failover(): 1771 * internal failover function. Invokes vHCI drivers failover 1772 * callback function and process the failover status 1773 * Return Values: 1774 * None 1775 * 1776 * Note: A client device in failover state can not be detached or freed. 1777 */ 1778 static int 1779 i_mdi_failover(void *arg) 1780 { 1781 int rv = MDI_SUCCESS; 1782 mdi_client_t *ct = (mdi_client_t *)arg; 1783 mdi_vhci_t *vh = ct->ct_vhci; 1784 1785 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1786 1787 if (vh->vh_ops->vo_failover != NULL) { 1788 /* 1789 * Call vHCI drivers callback routine 1790 */ 1791 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1792 ct->ct_failover_flags); 1793 } 1794 1795 MDI_CLIENT_LOCK(ct); 1796 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1797 1798 /* 1799 * Save the failover return status 1800 */ 1801 ct->ct_failover_status = rv; 1802 1803 /* 1804 * As a result of failover, client status would have been changed. 1805 * Update the client state and wake up anyone waiting on this client 1806 * device. 1807 */ 1808 i_mdi_client_update_state(ct); 1809 1810 cv_broadcast(&ct->ct_failover_cv); 1811 MDI_CLIENT_UNLOCK(ct); 1812 return (rv); 1813 } 1814 1815 /* 1816 * Load balancing is logical block. 1817 * IOs within the range described by region_size 1818 * would go on the same path. This would improve the 1819 * performance by cache-hit on some of the RAID devices. 1820 * Search only for online paths(At some point we 1821 * may want to balance across target ports). 1822 * If no paths are found then default to round-robin. 1823 */ 1824 static int 1825 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1826 { 1827 int path_index = -1; 1828 int online_path_count = 0; 1829 int online_nonpref_path_count = 0; 1830 int region_size = ct->ct_lb_args->region_size; 1831 mdi_pathinfo_t *pip; 1832 mdi_pathinfo_t *next; 1833 int preferred, path_cnt; 1834 1835 pip = ct->ct_path_head; 1836 while (pip) { 1837 MDI_PI_LOCK(pip); 1838 if (MDI_PI(pip)->pi_state == 1839 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1840 online_path_count++; 1841 } else if (MDI_PI(pip)->pi_state == 1842 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1843 online_nonpref_path_count++; 1844 } 1845 next = (mdi_pathinfo_t *) 1846 MDI_PI(pip)->pi_client_link; 1847 MDI_PI_UNLOCK(pip); 1848 pip = next; 1849 } 1850 /* if found any online/preferred then use this type */ 1851 if (online_path_count > 0) { 1852 path_cnt = online_path_count; 1853 preferred = 1; 1854 } else if (online_nonpref_path_count > 0) { 1855 path_cnt = online_nonpref_path_count; 1856 preferred = 0; 1857 } else { 1858 path_cnt = 0; 1859 } 1860 if (path_cnt) { 1861 path_index = (bp->b_blkno >> region_size) % path_cnt; 1862 pip = ct->ct_path_head; 1863 while (pip && path_index != -1) { 1864 MDI_PI_LOCK(pip); 1865 if (path_index == 0 && 1866 (MDI_PI(pip)->pi_state == 1867 MDI_PATHINFO_STATE_ONLINE) && 1868 MDI_PI(pip)->pi_preferred == preferred) { 1869 MDI_PI_HOLD(pip); 1870 MDI_PI_UNLOCK(pip); 1871 *ret_pip = pip; 1872 return (MDI_SUCCESS); 1873 } 1874 path_index --; 1875 next = (mdi_pathinfo_t *) 1876 MDI_PI(pip)->pi_client_link; 1877 MDI_PI_UNLOCK(pip); 1878 pip = next; 1879 } 1880 if (pip == NULL) { 1881 MDI_DEBUG(4, (CE_NOTE, NULL, 1882 "!lba %llx, no pip !!\n", 1883 bp->b_lblkno)); 1884 } else { 1885 MDI_DEBUG(4, (CE_NOTE, NULL, 1886 "!lba %llx, no pip for path_index, " 1887 "pip %p\n", bp->b_lblkno, (void *)pip)); 1888 } 1889 } 1890 return (MDI_FAILURE); 1891 } 1892 1893 /* 1894 * mdi_select_path(): 1895 * select a path to access a client device. 1896 * 1897 * mdi_select_path() function is called by the vHCI drivers to 1898 * select a path to route the I/O request to. The caller passes 1899 * the block I/O data transfer structure ("buf") as one of the 1900 * parameters. The mpxio framework uses the buf structure 1901 * contents to maintain per path statistics (total I/O size / 1902 * count pending). If more than one online paths are available to 1903 * select, the framework automatically selects a suitable path 1904 * for routing I/O request. If a failover operation is active for 1905 * this client device the call shall be failed with MDI_BUSY error 1906 * code. 1907 * 1908 * By default this function returns a suitable path in online 1909 * state based on the current load balancing policy. Currently 1910 * we support LOAD_BALANCE_NONE (Previously selected online path 1911 * will continue to be used till the path is usable) and 1912 * LOAD_BALANCE_RR (Online paths will be selected in a round 1913 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1914 * based on the logical block). The load balancing 1915 * through vHCI drivers configuration file (driver.conf). 1916 * 1917 * vHCI drivers may override this default behavior by specifying 1918 * appropriate flags. If start_pip is specified (non NULL) is 1919 * used as start point to walk and find the next appropriate path. 1920 * The following values are currently defined: 1921 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1922 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1923 * 1924 * The non-standard behavior is used by the scsi_vhci driver, 1925 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1926 * attach of client devices (to avoid an unnecessary failover 1927 * when the STANDBY path comes up first), during failover 1928 * (to activate a STANDBY path as ONLINE). 1929 * 1930 * The selected path is returned in a a mdi_hold_path() state 1931 * (pi_ref_cnt). Caller should release the hold by calling 1932 * mdi_rele_path(). 1933 * 1934 * Return Values: 1935 * MDI_SUCCESS - Completed successfully 1936 * MDI_BUSY - Client device is busy failing over 1937 * MDI_NOPATH - Client device is online, but no valid path are 1938 * available to access this client device 1939 * MDI_FAILURE - Invalid client device or state 1940 * MDI_DEVI_ONLINING 1941 * - Client device (struct dev_info state) is in 1942 * onlining state. 1943 */ 1944 1945 /*ARGSUSED*/ 1946 int 1947 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1948 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1949 { 1950 mdi_client_t *ct; 1951 mdi_pathinfo_t *pip; 1952 mdi_pathinfo_t *next; 1953 mdi_pathinfo_t *head; 1954 mdi_pathinfo_t *start; 1955 client_lb_t lbp; /* load balancing policy */ 1956 int sb = 1; /* standard behavior */ 1957 int preferred = 1; /* preferred path */ 1958 int cond, cont = 1; 1959 int retry = 0; 1960 1961 if (flags != 0) { 1962 /* 1963 * disable default behavior 1964 */ 1965 sb = 0; 1966 } 1967 1968 *ret_pip = NULL; 1969 ct = i_devi_get_client(cdip); 1970 if (ct == NULL) { 1971 /* mdi extensions are NULL, Nothing more to do */ 1972 return (MDI_FAILURE); 1973 } 1974 1975 MDI_CLIENT_LOCK(ct); 1976 1977 if (sb) { 1978 if (MDI_CLIENT_IS_FAILED(ct)) { 1979 /* 1980 * Client is not ready to accept any I/O requests. 1981 * Fail this request. 1982 */ 1983 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1984 "client state offline ct = %p\n", (void *)ct)); 1985 MDI_CLIENT_UNLOCK(ct); 1986 return (MDI_FAILURE); 1987 } 1988 1989 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1990 /* 1991 * Check for Failover is in progress. If so tell the 1992 * caller that this device is busy. 1993 */ 1994 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1995 "client failover in progress ct = %p\n", 1996 (void *)ct)); 1997 MDI_CLIENT_UNLOCK(ct); 1998 return (MDI_BUSY); 1999 } 2000 2001 /* 2002 * Check to see whether the client device is attached. 2003 * If not so, let the vHCI driver manually select a path 2004 * (standby) and let the probe/attach process to continue. 2005 */ 2006 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2007 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2008 "ct = %p\n", (void *)ct)); 2009 MDI_CLIENT_UNLOCK(ct); 2010 return (MDI_DEVI_ONLINING); 2011 } 2012 } 2013 2014 /* 2015 * Cache in the client list head. If head of the list is NULL 2016 * return MDI_NOPATH 2017 */ 2018 head = ct->ct_path_head; 2019 if (head == NULL) { 2020 MDI_CLIENT_UNLOCK(ct); 2021 return (MDI_NOPATH); 2022 } 2023 2024 /* 2025 * for non default behavior, bypass current 2026 * load balancing policy and always use LOAD_BALANCE_RR 2027 * except that the start point will be adjusted based 2028 * on the provided start_pip 2029 */ 2030 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2031 2032 switch (lbp) { 2033 case LOAD_BALANCE_NONE: 2034 /* 2035 * Load balancing is None or Alternate path mode 2036 * Start looking for a online mdi_pathinfo node starting from 2037 * last known selected path 2038 */ 2039 preferred = 1; 2040 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2041 if (pip == NULL) { 2042 pip = head; 2043 } 2044 start = pip; 2045 do { 2046 MDI_PI_LOCK(pip); 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if ((MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_ONLINE) && 2054 preferred == MDI_PI(pip)->pi_preferred) { 2055 /* 2056 * Return the path in hold state. Caller should 2057 * release the lock by calling mdi_rele_path() 2058 */ 2059 MDI_PI_HOLD(pip); 2060 MDI_PI_UNLOCK(pip); 2061 ct->ct_path_last = pip; 2062 *ret_pip = pip; 2063 MDI_CLIENT_UNLOCK(ct); 2064 return (MDI_SUCCESS); 2065 } 2066 2067 /* 2068 * Path is busy. 2069 */ 2070 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2071 MDI_PI_IS_TRANSIENT(pip)) 2072 retry = 1; 2073 /* 2074 * Keep looking for a next available online path 2075 */ 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 next = head; 2079 } 2080 MDI_PI_UNLOCK(pip); 2081 pip = next; 2082 if (start == pip && preferred) { 2083 preferred = 0; 2084 } else if (start == pip && !preferred) { 2085 cont = 0; 2086 } 2087 } while (cont); 2088 break; 2089 2090 case LOAD_BALANCE_LBA: 2091 /* 2092 * Make sure we are looking 2093 * for an online path. Otherwise, if it is for a STANDBY 2094 * path request, it will go through and fetch an ONLINE 2095 * path which is not desirable. 2096 */ 2097 if ((ct->ct_lb_args != NULL) && 2098 (ct->ct_lb_args->region_size) && bp && 2099 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2100 if (i_mdi_lba_lb(ct, ret_pip, bp) 2101 == MDI_SUCCESS) { 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_SUCCESS); 2104 } 2105 } 2106 /* FALLTHROUGH */ 2107 case LOAD_BALANCE_RR: 2108 /* 2109 * Load balancing is Round Robin. Start looking for a online 2110 * mdi_pathinfo node starting from last known selected path 2111 * as the start point. If override flags are specified, 2112 * process accordingly. 2113 * If the search is already in effect(start_pip not null), 2114 * then lets just use the same path preference to continue the 2115 * traversal. 2116 */ 2117 2118 if (start_pip != NULL) { 2119 preferred = MDI_PI(start_pip)->pi_preferred; 2120 } else { 2121 preferred = 1; 2122 } 2123 2124 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2125 if (start == NULL) { 2126 pip = head; 2127 } else { 2128 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2129 if (pip == NULL) { 2130 if (!sb) { 2131 if (preferred == 0) { 2132 /* 2133 * Looks like we have completed 2134 * the traversal as preferred 2135 * value is 0. Time to bail out. 2136 */ 2137 *ret_pip = NULL; 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } else { 2141 /* 2142 * Looks like we reached the 2143 * end of the list. Lets enable 2144 * traversal of non preferred 2145 * paths. 2146 */ 2147 preferred = 0; 2148 } 2149 } 2150 pip = head; 2151 } 2152 } 2153 start = pip; 2154 do { 2155 MDI_PI_LOCK(pip); 2156 if (sb) { 2157 cond = ((MDI_PI(pip)->pi_state == 2158 MDI_PATHINFO_STATE_ONLINE && 2159 MDI_PI(pip)->pi_preferred == 2160 preferred) ? 1 : 0); 2161 } else { 2162 if (flags == MDI_SELECT_ONLINE_PATH) { 2163 cond = ((MDI_PI(pip)->pi_state == 2164 MDI_PATHINFO_STATE_ONLINE && 2165 MDI_PI(pip)->pi_preferred == 2166 preferred) ? 1 : 0); 2167 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2168 cond = ((MDI_PI(pip)->pi_state == 2169 MDI_PATHINFO_STATE_STANDBY && 2170 MDI_PI(pip)->pi_preferred == 2171 preferred) ? 1 : 0); 2172 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2173 MDI_SELECT_STANDBY_PATH)) { 2174 cond = (((MDI_PI(pip)->pi_state == 2175 MDI_PATHINFO_STATE_ONLINE || 2176 (MDI_PI(pip)->pi_state == 2177 MDI_PATHINFO_STATE_STANDBY)) && 2178 MDI_PI(pip)->pi_preferred == 2179 preferred) ? 1 : 0); 2180 } else if (flags == 2181 (MDI_SELECT_STANDBY_PATH | 2182 MDI_SELECT_ONLINE_PATH | 2183 MDI_SELECT_USER_DISABLE_PATH)) { 2184 cond = (((MDI_PI(pip)->pi_state == 2185 MDI_PATHINFO_STATE_ONLINE || 2186 (MDI_PI(pip)->pi_state == 2187 MDI_PATHINFO_STATE_STANDBY) || 2188 (MDI_PI(pip)->pi_state == 2189 (MDI_PATHINFO_STATE_ONLINE| 2190 MDI_PATHINFO_STATE_USER_DISABLE)) || 2191 (MDI_PI(pip)->pi_state == 2192 (MDI_PATHINFO_STATE_STANDBY | 2193 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2194 MDI_PI(pip)->pi_preferred == 2195 preferred) ? 1 : 0); 2196 } else { 2197 cond = 0; 2198 } 2199 } 2200 /* 2201 * No need to explicitly check if the path is disabled. 2202 * Since we are checking for state == ONLINE and the 2203 * same veriable is used for DISABLE/ENABLE information. 2204 */ 2205 if (cond) { 2206 /* 2207 * Return the path in hold state. Caller should 2208 * release the lock by calling mdi_rele_path() 2209 */ 2210 MDI_PI_HOLD(pip); 2211 MDI_PI_UNLOCK(pip); 2212 if (sb) 2213 ct->ct_path_last = pip; 2214 *ret_pip = pip; 2215 MDI_CLIENT_UNLOCK(ct); 2216 return (MDI_SUCCESS); 2217 } 2218 /* 2219 * Path is busy. 2220 */ 2221 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2222 MDI_PI_IS_TRANSIENT(pip)) 2223 retry = 1; 2224 2225 /* 2226 * Keep looking for a next available online path 2227 */ 2228 do_again: 2229 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2230 if (next == NULL) { 2231 if (!sb) { 2232 if (preferred == 1) { 2233 /* 2234 * Looks like we reached the 2235 * end of the list. Lets enable 2236 * traversal of non preferred 2237 * paths. 2238 */ 2239 preferred = 0; 2240 next = head; 2241 } else { 2242 /* 2243 * We have done both the passes 2244 * Preferred as well as for 2245 * Non-preferred. Bail out now. 2246 */ 2247 cont = 0; 2248 } 2249 } else { 2250 /* 2251 * Standard behavior case. 2252 */ 2253 next = head; 2254 } 2255 } 2256 MDI_PI_UNLOCK(pip); 2257 if (cont == 0) { 2258 break; 2259 } 2260 pip = next; 2261 2262 if (!sb) { 2263 /* 2264 * We need to handle the selection of 2265 * non-preferred path in the following 2266 * case: 2267 * 2268 * +------+ +------+ +------+ +-----+ 2269 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2270 * +------+ +------+ +------+ +-----+ 2271 * 2272 * If we start the search with B, we need to 2273 * skip beyond B to pick C which is non - 2274 * preferred in the second pass. The following 2275 * test, if true, will allow us to skip over 2276 * the 'start'(B in the example) to select 2277 * other non preferred elements. 2278 */ 2279 if ((start_pip != NULL) && (start_pip == pip) && 2280 (MDI_PI(start_pip)->pi_preferred 2281 != preferred)) { 2282 /* 2283 * try again after going past the start 2284 * pip 2285 */ 2286 MDI_PI_LOCK(pip); 2287 goto do_again; 2288 } 2289 } else { 2290 /* 2291 * Standard behavior case 2292 */ 2293 if (start == pip && preferred) { 2294 /* look for nonpreferred paths */ 2295 preferred = 0; 2296 } else if (start == pip && !preferred) { 2297 /* 2298 * Exit condition 2299 */ 2300 cont = 0; 2301 } 2302 } 2303 } while (cont); 2304 break; 2305 } 2306 2307 MDI_CLIENT_UNLOCK(ct); 2308 if (retry == 1) { 2309 return (MDI_BUSY); 2310 } else { 2311 return (MDI_NOPATH); 2312 } 2313 } 2314 2315 /* 2316 * For a client, return the next available path to any phci 2317 * 2318 * Note: 2319 * Caller should hold the branch's devinfo node to get a consistent 2320 * snap shot of the mdi_pathinfo nodes. 2321 * 2322 * Please note that even the list is stable the mdi_pathinfo 2323 * node state and properties are volatile. The caller should lock 2324 * and unlock the nodes by calling mdi_pi_lock() and 2325 * mdi_pi_unlock() functions to get a stable properties. 2326 * 2327 * If there is a need to use the nodes beyond the hold of the 2328 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2329 * need to be held against unexpected removal by calling 2330 * mdi_hold_path() and should be released by calling 2331 * mdi_rele_path() on completion. 2332 */ 2333 mdi_pathinfo_t * 2334 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2335 { 2336 mdi_client_t *ct; 2337 2338 if (!MDI_CLIENT(ct_dip)) 2339 return (NULL); 2340 2341 /* 2342 * Walk through client link 2343 */ 2344 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2345 ASSERT(ct != NULL); 2346 2347 if (pip == NULL) 2348 return ((mdi_pathinfo_t *)ct->ct_path_head); 2349 2350 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2351 } 2352 2353 /* 2354 * For a phci, return the next available path to any client 2355 * Note: ditto mdi_get_next_phci_path() 2356 */ 2357 mdi_pathinfo_t * 2358 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2359 { 2360 mdi_phci_t *ph; 2361 2362 if (!MDI_PHCI(ph_dip)) 2363 return (NULL); 2364 2365 /* 2366 * Walk through pHCI link 2367 */ 2368 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2369 ASSERT(ph != NULL); 2370 2371 if (pip == NULL) 2372 return ((mdi_pathinfo_t *)ph->ph_path_head); 2373 2374 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2375 } 2376 2377 /* 2378 * mdi_hold_path(): 2379 * Hold the mdi_pathinfo node against unwanted unexpected free. 2380 * Return Values: 2381 * None 2382 */ 2383 void 2384 mdi_hold_path(mdi_pathinfo_t *pip) 2385 { 2386 if (pip) { 2387 MDI_PI_LOCK(pip); 2388 MDI_PI_HOLD(pip); 2389 MDI_PI_UNLOCK(pip); 2390 } 2391 } 2392 2393 2394 /* 2395 * mdi_rele_path(): 2396 * Release the mdi_pathinfo node which was selected 2397 * through mdi_select_path() mechanism or manually held by 2398 * calling mdi_hold_path(). 2399 * Return Values: 2400 * None 2401 */ 2402 void 2403 mdi_rele_path(mdi_pathinfo_t *pip) 2404 { 2405 if (pip) { 2406 MDI_PI_LOCK(pip); 2407 MDI_PI_RELE(pip); 2408 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2409 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2410 } 2411 MDI_PI_UNLOCK(pip); 2412 } 2413 } 2414 2415 /* 2416 * mdi_pi_lock(): 2417 * Lock the mdi_pathinfo node. 2418 * Note: 2419 * The caller should release the lock by calling mdi_pi_unlock() 2420 */ 2421 void 2422 mdi_pi_lock(mdi_pathinfo_t *pip) 2423 { 2424 ASSERT(pip != NULL); 2425 if (pip) { 2426 MDI_PI_LOCK(pip); 2427 } 2428 } 2429 2430 2431 /* 2432 * mdi_pi_unlock(): 2433 * Unlock the mdi_pathinfo node. 2434 * Note: 2435 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2436 */ 2437 void 2438 mdi_pi_unlock(mdi_pathinfo_t *pip) 2439 { 2440 ASSERT(pip != NULL); 2441 if (pip) { 2442 MDI_PI_UNLOCK(pip); 2443 } 2444 } 2445 2446 /* 2447 * mdi_pi_find(): 2448 * Search the list of mdi_pathinfo nodes attached to the 2449 * pHCI/Client device node whose path address matches "paddr". 2450 * Returns a pointer to the mdi_pathinfo node if a matching node is 2451 * found. 2452 * Return Values: 2453 * mdi_pathinfo node handle 2454 * NULL 2455 * Notes: 2456 * Caller need not hold any locks to call this function. 2457 */ 2458 mdi_pathinfo_t * 2459 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2460 { 2461 mdi_phci_t *ph; 2462 mdi_vhci_t *vh; 2463 mdi_client_t *ct; 2464 mdi_pathinfo_t *pip = NULL; 2465 2466 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2467 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2468 if ((pdip == NULL) || (paddr == NULL)) { 2469 return (NULL); 2470 } 2471 ph = i_devi_get_phci(pdip); 2472 if (ph == NULL) { 2473 /* 2474 * Invalid pHCI device, Nothing more to do. 2475 */ 2476 MDI_DEBUG(2, (CE_WARN, pdip, 2477 "!mdi_pi_find: invalid phci")); 2478 return (NULL); 2479 } 2480 2481 vh = ph->ph_vhci; 2482 if (vh == NULL) { 2483 /* 2484 * Invalid vHCI device, Nothing more to do. 2485 */ 2486 MDI_DEBUG(2, (CE_WARN, pdip, 2487 "!mdi_pi_find: invalid vhci")); 2488 return (NULL); 2489 } 2490 2491 /* 2492 * Look for pathinfo node identified by paddr. 2493 */ 2494 if (caddr == NULL) { 2495 /* 2496 * Find a mdi_pathinfo node under pHCI list for a matching 2497 * unit address. 2498 */ 2499 MDI_PHCI_LOCK(ph); 2500 if (MDI_PHCI_IS_OFFLINE(ph)) { 2501 MDI_DEBUG(2, (CE_WARN, pdip, 2502 "!mdi_pi_find: offline phci %p", (void *)ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (NULL); 2505 } 2506 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2507 2508 while (pip != NULL) { 2509 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2510 break; 2511 } 2512 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2513 } 2514 MDI_PHCI_UNLOCK(ph); 2515 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2516 (void *)pip)); 2517 return (pip); 2518 } 2519 2520 /* 2521 * XXX - Is the rest of the code in this function really necessary? 2522 * The consumers of mdi_pi_find() can search for the desired pathinfo 2523 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2524 * whether the search is based on the pathinfo nodes attached to 2525 * the pHCI or the client node, the result will be the same. 2526 */ 2527 2528 /* 2529 * Find the client device corresponding to 'caddr' 2530 */ 2531 MDI_VHCI_CLIENT_LOCK(vh); 2532 2533 /* 2534 * XXX - Passing NULL to the following function works as long as the 2535 * the client addresses (caddr) are unique per vhci basis. 2536 */ 2537 ct = i_mdi_client_find(vh, NULL, caddr); 2538 if (ct == NULL) { 2539 /* 2540 * Client not found, Obviously mdi_pathinfo node has not been 2541 * created yet. 2542 */ 2543 MDI_VHCI_CLIENT_UNLOCK(vh); 2544 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2545 "found for caddr %s", caddr ? caddr : "NULL")); 2546 return (NULL); 2547 } 2548 2549 /* 2550 * Hold the client lock and look for a mdi_pathinfo node with matching 2551 * pHCI and paddr 2552 */ 2553 MDI_CLIENT_LOCK(ct); 2554 2555 /* 2556 * Release the global mutex as it is no more needed. Note: We always 2557 * respect the locking order while acquiring. 2558 */ 2559 MDI_VHCI_CLIENT_UNLOCK(vh); 2560 2561 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2562 while (pip != NULL) { 2563 /* 2564 * Compare the unit address 2565 */ 2566 if ((MDI_PI(pip)->pi_phci == ph) && 2567 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2568 break; 2569 } 2570 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2571 } 2572 MDI_CLIENT_UNLOCK(ct); 2573 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2574 return (pip); 2575 } 2576 2577 /* 2578 * mdi_pi_alloc(): 2579 * Allocate and initialize a new instance of a mdi_pathinfo node. 2580 * The mdi_pathinfo node returned by this function identifies a 2581 * unique device path is capable of having properties attached 2582 * and passed to mdi_pi_online() to fully attach and online the 2583 * path and client device node. 2584 * The mdi_pathinfo node returned by this function must be 2585 * destroyed using mdi_pi_free() if the path is no longer 2586 * operational or if the caller fails to attach a client device 2587 * node when calling mdi_pi_online(). The framework will not free 2588 * the resources allocated. 2589 * This function can be called from both interrupt and kernel 2590 * contexts. DDI_NOSLEEP flag should be used while calling 2591 * from interrupt contexts. 2592 * Return Values: 2593 * MDI_SUCCESS 2594 * MDI_FAILURE 2595 * MDI_NOMEM 2596 */ 2597 /*ARGSUSED*/ 2598 int 2599 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2600 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2601 { 2602 mdi_vhci_t *vh; 2603 mdi_phci_t *ph; 2604 mdi_client_t *ct; 2605 mdi_pathinfo_t *pip = NULL; 2606 dev_info_t *cdip; 2607 int rv = MDI_NOMEM; 2608 int path_allocated = 0; 2609 2610 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2611 cname ? cname : "NULL", caddr ? caddr : "NULL", 2612 paddr ? paddr : "NULL")); 2613 2614 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2615 ret_pip == NULL) { 2616 /* Nothing more to do */ 2617 return (MDI_FAILURE); 2618 } 2619 2620 *ret_pip = NULL; 2621 2622 /* No allocations on detaching pHCI */ 2623 if (DEVI_IS_DETACHING(pdip)) { 2624 /* Invalid pHCI device, return failure */ 2625 MDI_DEBUG(1, (CE_WARN, pdip, 2626 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2627 return (MDI_FAILURE); 2628 } 2629 2630 ph = i_devi_get_phci(pdip); 2631 ASSERT(ph != NULL); 2632 if (ph == NULL) { 2633 /* Invalid pHCI device, return failure */ 2634 MDI_DEBUG(1, (CE_WARN, pdip, 2635 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2636 return (MDI_FAILURE); 2637 } 2638 2639 MDI_PHCI_LOCK(ph); 2640 vh = ph->ph_vhci; 2641 if (vh == NULL) { 2642 /* Invalid vHCI device, return failure */ 2643 MDI_DEBUG(1, (CE_WARN, pdip, 2644 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2645 MDI_PHCI_UNLOCK(ph); 2646 return (MDI_FAILURE); 2647 } 2648 2649 if (MDI_PHCI_IS_READY(ph) == 0) { 2650 /* 2651 * Do not allow new node creation when pHCI is in 2652 * offline/suspended states 2653 */ 2654 MDI_DEBUG(1, (CE_WARN, pdip, 2655 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2656 MDI_PHCI_UNLOCK(ph); 2657 return (MDI_BUSY); 2658 } 2659 MDI_PHCI_UNSTABLE(ph); 2660 MDI_PHCI_UNLOCK(ph); 2661 2662 /* look for a matching client, create one if not found */ 2663 MDI_VHCI_CLIENT_LOCK(vh); 2664 ct = i_mdi_client_find(vh, cname, caddr); 2665 if (ct == NULL) { 2666 ct = i_mdi_client_alloc(vh, cname, caddr); 2667 ASSERT(ct != NULL); 2668 } 2669 2670 if (ct->ct_dip == NULL) { 2671 /* 2672 * Allocate a devinfo node 2673 */ 2674 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2675 compatible, ncompatible); 2676 if (ct->ct_dip == NULL) { 2677 (void) i_mdi_client_free(vh, ct); 2678 goto fail; 2679 } 2680 } 2681 cdip = ct->ct_dip; 2682 2683 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2684 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2685 2686 MDI_CLIENT_LOCK(ct); 2687 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2688 while (pip != NULL) { 2689 /* 2690 * Compare the unit address 2691 */ 2692 if ((MDI_PI(pip)->pi_phci == ph) && 2693 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2694 break; 2695 } 2696 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2697 } 2698 MDI_CLIENT_UNLOCK(ct); 2699 2700 if (pip == NULL) { 2701 /* 2702 * This is a new path for this client device. Allocate and 2703 * initialize a new pathinfo node 2704 */ 2705 pip = i_mdi_pi_alloc(ph, paddr, ct); 2706 ASSERT(pip != NULL); 2707 path_allocated = 1; 2708 } 2709 rv = MDI_SUCCESS; 2710 2711 fail: 2712 /* 2713 * Release the global mutex. 2714 */ 2715 MDI_VHCI_CLIENT_UNLOCK(vh); 2716 2717 /* 2718 * Mark the pHCI as stable 2719 */ 2720 MDI_PHCI_LOCK(ph); 2721 MDI_PHCI_STABLE(ph); 2722 MDI_PHCI_UNLOCK(ph); 2723 *ret_pip = pip; 2724 2725 MDI_DEBUG(2, (CE_NOTE, pdip, 2726 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2727 2728 if (path_allocated) 2729 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2730 2731 return (rv); 2732 } 2733 2734 /*ARGSUSED*/ 2735 int 2736 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2737 int flags, mdi_pathinfo_t **ret_pip) 2738 { 2739 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2740 flags, ret_pip)); 2741 } 2742 2743 /* 2744 * i_mdi_pi_alloc(): 2745 * Allocate a mdi_pathinfo node and add to the pHCI path list 2746 * Return Values: 2747 * mdi_pathinfo 2748 */ 2749 /*ARGSUSED*/ 2750 static mdi_pathinfo_t * 2751 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2752 { 2753 mdi_pathinfo_t *pip; 2754 int ct_circular; 2755 int ph_circular; 2756 int se_flag; 2757 int kmem_flag; 2758 2759 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2760 2761 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2762 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2763 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2764 MDI_PATHINFO_STATE_TRANSIENT; 2765 2766 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2767 MDI_PI_SET_USER_DISABLE(pip); 2768 2769 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2770 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2771 2772 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2773 MDI_PI_SET_DRV_DISABLE(pip); 2774 2775 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2776 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2777 MDI_PI(pip)->pi_client = ct; 2778 MDI_PI(pip)->pi_phci = ph; 2779 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2780 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2781 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2782 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2783 MDI_PI(pip)->pi_pprivate = NULL; 2784 MDI_PI(pip)->pi_cprivate = NULL; 2785 MDI_PI(pip)->pi_vprivate = NULL; 2786 MDI_PI(pip)->pi_client_link = NULL; 2787 MDI_PI(pip)->pi_phci_link = NULL; 2788 MDI_PI(pip)->pi_ref_cnt = 0; 2789 MDI_PI(pip)->pi_kstats = NULL; 2790 MDI_PI(pip)->pi_preferred = 1; 2791 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2792 2793 /* 2794 * Lock both dev_info nodes against changes in parallel. 2795 * 2796 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2797 * This atypical operation is done to synchronize pathinfo nodes 2798 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2799 * the pathinfo nodes are children of the Client. 2800 */ 2801 ndi_devi_enter(ct->ct_dip, &ct_circular); 2802 ndi_devi_enter(ph->ph_dip, &ph_circular); 2803 2804 i_mdi_phci_add_path(ph, pip); 2805 i_mdi_client_add_path(ct, pip); 2806 2807 ndi_devi_exit(ph->ph_dip, ph_circular); 2808 ndi_devi_exit(ct->ct_dip, ct_circular); 2809 2810 /* determine interrupt context */ 2811 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2812 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2813 2814 i_ddi_di_cache_invalidate(kmem_flag); 2815 2816 return (pip); 2817 } 2818 2819 /* 2820 * i_mdi_phci_add_path(): 2821 * Add a mdi_pathinfo node to pHCI list. 2822 * Notes: 2823 * Caller should per-pHCI mutex 2824 */ 2825 static void 2826 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2827 { 2828 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2829 2830 MDI_PHCI_LOCK(ph); 2831 if (ph->ph_path_head == NULL) { 2832 ph->ph_path_head = pip; 2833 } else { 2834 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2835 } 2836 ph->ph_path_tail = pip; 2837 ph->ph_path_count++; 2838 MDI_PHCI_UNLOCK(ph); 2839 } 2840 2841 /* 2842 * i_mdi_client_add_path(): 2843 * Add mdi_pathinfo node to client list 2844 */ 2845 static void 2846 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2847 { 2848 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2849 2850 MDI_CLIENT_LOCK(ct); 2851 if (ct->ct_path_head == NULL) { 2852 ct->ct_path_head = pip; 2853 } else { 2854 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2855 } 2856 ct->ct_path_tail = pip; 2857 ct->ct_path_count++; 2858 MDI_CLIENT_UNLOCK(ct); 2859 } 2860 2861 /* 2862 * mdi_pi_free(): 2863 * Free the mdi_pathinfo node and also client device node if this 2864 * is the last path to the device 2865 * Return Values: 2866 * MDI_SUCCESS 2867 * MDI_FAILURE 2868 * MDI_BUSY 2869 */ 2870 /*ARGSUSED*/ 2871 int 2872 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2873 { 2874 int rv = MDI_SUCCESS; 2875 mdi_vhci_t *vh; 2876 mdi_phci_t *ph; 2877 mdi_client_t *ct; 2878 int (*f)(); 2879 int client_held = 0; 2880 2881 MDI_PI_LOCK(pip); 2882 ph = MDI_PI(pip)->pi_phci; 2883 ASSERT(ph != NULL); 2884 if (ph == NULL) { 2885 /* 2886 * Invalid pHCI device, return failure 2887 */ 2888 MDI_DEBUG(1, (CE_WARN, NULL, 2889 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2890 MDI_PI_UNLOCK(pip); 2891 return (MDI_FAILURE); 2892 } 2893 2894 vh = ph->ph_vhci; 2895 ASSERT(vh != NULL); 2896 if (vh == NULL) { 2897 /* Invalid pHCI device, return failure */ 2898 MDI_DEBUG(1, (CE_WARN, NULL, 2899 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2900 MDI_PI_UNLOCK(pip); 2901 return (MDI_FAILURE); 2902 } 2903 2904 ct = MDI_PI(pip)->pi_client; 2905 ASSERT(ct != NULL); 2906 if (ct == NULL) { 2907 /* 2908 * Invalid Client device, return failure 2909 */ 2910 MDI_DEBUG(1, (CE_WARN, NULL, 2911 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2912 MDI_PI_UNLOCK(pip); 2913 return (MDI_FAILURE); 2914 } 2915 2916 /* 2917 * Check to see for busy condition. A mdi_pathinfo can only be freed 2918 * if the node state is either offline or init and the reference count 2919 * is zero. 2920 */ 2921 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2922 MDI_PI_IS_INITING(pip))) { 2923 /* 2924 * Node is busy 2925 */ 2926 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2927 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2928 MDI_PI_UNLOCK(pip); 2929 return (MDI_BUSY); 2930 } 2931 2932 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2933 /* 2934 * Give a chance for pending I/Os to complete. 2935 */ 2936 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2937 "%d cmds still pending on path: %p\n", 2938 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2939 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2940 &MDI_PI(pip)->pi_mutex, 2941 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2942 /* 2943 * The timeout time reached without ref_cnt being zero 2944 * being signaled. 2945 */ 2946 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2947 "!mdi_pi_free: " 2948 "Timeout reached on path %p without the cond\n", 2949 (void *)pip)); 2950 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2951 "!mdi_pi_free: " 2952 "%d cmds still pending on path: %p\n", 2953 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2954 MDI_PI_UNLOCK(pip); 2955 return (MDI_BUSY); 2956 } 2957 } 2958 if (MDI_PI(pip)->pi_pm_held) { 2959 client_held = 1; 2960 } 2961 MDI_PI_UNLOCK(pip); 2962 2963 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2964 2965 MDI_CLIENT_LOCK(ct); 2966 2967 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2968 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2969 2970 /* 2971 * Wait till failover is complete before removing this node. 2972 */ 2973 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2974 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2975 2976 MDI_CLIENT_UNLOCK(ct); 2977 MDI_VHCI_CLIENT_LOCK(vh); 2978 MDI_CLIENT_LOCK(ct); 2979 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2980 2981 if (!MDI_PI_IS_INITING(pip)) { 2982 f = vh->vh_ops->vo_pi_uninit; 2983 if (f != NULL) { 2984 rv = (*f)(vh->vh_dip, pip, 0); 2985 } 2986 } 2987 /* 2988 * If vo_pi_uninit() completed successfully. 2989 */ 2990 if (rv == MDI_SUCCESS) { 2991 if (client_held) { 2992 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2993 "i_mdi_pm_rele_client\n")); 2994 i_mdi_pm_rele_client(ct, 1); 2995 } 2996 i_mdi_pi_free(ph, pip, ct); 2997 if (ct->ct_path_count == 0) { 2998 /* 2999 * Client lost its last path. 3000 * Clean up the client device 3001 */ 3002 MDI_CLIENT_UNLOCK(ct); 3003 (void) i_mdi_client_free(ct->ct_vhci, ct); 3004 MDI_VHCI_CLIENT_UNLOCK(vh); 3005 return (rv); 3006 } 3007 } 3008 MDI_CLIENT_UNLOCK(ct); 3009 MDI_VHCI_CLIENT_UNLOCK(vh); 3010 3011 if (rv == MDI_FAILURE) 3012 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3013 3014 return (rv); 3015 } 3016 3017 /* 3018 * i_mdi_pi_free(): 3019 * Free the mdi_pathinfo node 3020 */ 3021 static void 3022 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3023 { 3024 int ct_circular; 3025 int ph_circular; 3026 int se_flag; 3027 int kmem_flag; 3028 3029 ASSERT(MDI_CLIENT_LOCKED(ct)); 3030 3031 /* 3032 * remove any per-path kstats 3033 */ 3034 i_mdi_pi_kstat_destroy(pip); 3035 3036 /* See comments in i_mdi_pi_alloc() */ 3037 ndi_devi_enter(ct->ct_dip, &ct_circular); 3038 ndi_devi_enter(ph->ph_dip, &ph_circular); 3039 3040 i_mdi_client_remove_path(ct, pip); 3041 i_mdi_phci_remove_path(ph, pip); 3042 3043 ndi_devi_exit(ph->ph_dip, ph_circular); 3044 ndi_devi_exit(ct->ct_dip, ct_circular); 3045 3046 /* determine interrupt context */ 3047 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3048 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3049 3050 i_ddi_di_cache_invalidate(kmem_flag); 3051 3052 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3053 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3054 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3055 if (MDI_PI(pip)->pi_addr) { 3056 kmem_free(MDI_PI(pip)->pi_addr, 3057 strlen(MDI_PI(pip)->pi_addr) + 1); 3058 MDI_PI(pip)->pi_addr = NULL; 3059 } 3060 3061 if (MDI_PI(pip)->pi_prop) { 3062 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3063 MDI_PI(pip)->pi_prop = NULL; 3064 } 3065 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3066 } 3067 3068 3069 /* 3070 * i_mdi_phci_remove_path(): 3071 * Remove a mdi_pathinfo node from pHCI list. 3072 * Notes: 3073 * Caller should hold per-pHCI mutex 3074 */ 3075 static void 3076 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3077 { 3078 mdi_pathinfo_t *prev = NULL; 3079 mdi_pathinfo_t *path = NULL; 3080 3081 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3082 3083 MDI_PHCI_LOCK(ph); 3084 path = ph->ph_path_head; 3085 while (path != NULL) { 3086 if (path == pip) { 3087 break; 3088 } 3089 prev = path; 3090 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3091 } 3092 3093 if (path) { 3094 ph->ph_path_count--; 3095 if (prev) { 3096 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3097 } else { 3098 ph->ph_path_head = 3099 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3100 } 3101 if (ph->ph_path_tail == path) { 3102 ph->ph_path_tail = prev; 3103 } 3104 } 3105 3106 /* 3107 * Clear the pHCI link 3108 */ 3109 MDI_PI(pip)->pi_phci_link = NULL; 3110 MDI_PI(pip)->pi_phci = NULL; 3111 MDI_PHCI_UNLOCK(ph); 3112 } 3113 3114 /* 3115 * i_mdi_client_remove_path(): 3116 * Remove a mdi_pathinfo node from client path list. 3117 */ 3118 static void 3119 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3120 { 3121 mdi_pathinfo_t *prev = NULL; 3122 mdi_pathinfo_t *path; 3123 3124 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3125 3126 ASSERT(MDI_CLIENT_LOCKED(ct)); 3127 path = ct->ct_path_head; 3128 while (path != NULL) { 3129 if (path == pip) { 3130 break; 3131 } 3132 prev = path; 3133 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3134 } 3135 3136 if (path) { 3137 ct->ct_path_count--; 3138 if (prev) { 3139 MDI_PI(prev)->pi_client_link = 3140 MDI_PI(path)->pi_client_link; 3141 } else { 3142 ct->ct_path_head = 3143 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3144 } 3145 if (ct->ct_path_tail == path) { 3146 ct->ct_path_tail = prev; 3147 } 3148 if (ct->ct_path_last == path) { 3149 ct->ct_path_last = ct->ct_path_head; 3150 } 3151 } 3152 MDI_PI(pip)->pi_client_link = NULL; 3153 MDI_PI(pip)->pi_client = NULL; 3154 } 3155 3156 /* 3157 * i_mdi_pi_state_change(): 3158 * online a mdi_pathinfo node 3159 * 3160 * Return Values: 3161 * MDI_SUCCESS 3162 * MDI_FAILURE 3163 */ 3164 /*ARGSUSED*/ 3165 static int 3166 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3167 { 3168 int rv = MDI_SUCCESS; 3169 mdi_vhci_t *vh; 3170 mdi_phci_t *ph; 3171 mdi_client_t *ct; 3172 int (*f)(); 3173 dev_info_t *cdip; 3174 3175 MDI_PI_LOCK(pip); 3176 3177 ph = MDI_PI(pip)->pi_phci; 3178 ASSERT(ph); 3179 if (ph == NULL) { 3180 /* 3181 * Invalid pHCI device, fail the request 3182 */ 3183 MDI_PI_UNLOCK(pip); 3184 MDI_DEBUG(1, (CE_WARN, NULL, 3185 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3186 return (MDI_FAILURE); 3187 } 3188 3189 vh = ph->ph_vhci; 3190 ASSERT(vh); 3191 if (vh == NULL) { 3192 /* 3193 * Invalid vHCI device, fail the request 3194 */ 3195 MDI_PI_UNLOCK(pip); 3196 MDI_DEBUG(1, (CE_WARN, NULL, 3197 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3198 return (MDI_FAILURE); 3199 } 3200 3201 ct = MDI_PI(pip)->pi_client; 3202 ASSERT(ct != NULL); 3203 if (ct == NULL) { 3204 /* 3205 * Invalid client device, fail the request 3206 */ 3207 MDI_PI_UNLOCK(pip); 3208 MDI_DEBUG(1, (CE_WARN, NULL, 3209 "!mdi_pi_state_change: invalid client pip=%p", 3210 (void *)pip)); 3211 return (MDI_FAILURE); 3212 } 3213 3214 /* 3215 * If this path has not been initialized yet, Callback vHCI driver's 3216 * pathinfo node initialize entry point 3217 */ 3218 3219 if (MDI_PI_IS_INITING(pip)) { 3220 MDI_PI_UNLOCK(pip); 3221 f = vh->vh_ops->vo_pi_init; 3222 if (f != NULL) { 3223 rv = (*f)(vh->vh_dip, pip, 0); 3224 if (rv != MDI_SUCCESS) { 3225 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3226 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3227 (void *)vh, (void *)pip)); 3228 return (MDI_FAILURE); 3229 } 3230 } 3231 MDI_PI_LOCK(pip); 3232 MDI_PI_CLEAR_TRANSIENT(pip); 3233 } 3234 3235 /* 3236 * Do not allow state transition when pHCI is in offline/suspended 3237 * states 3238 */ 3239 i_mdi_phci_lock(ph, pip); 3240 if (MDI_PHCI_IS_READY(ph) == 0) { 3241 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3242 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3243 (void *)ph)); 3244 MDI_PI_UNLOCK(pip); 3245 i_mdi_phci_unlock(ph); 3246 return (MDI_BUSY); 3247 } 3248 MDI_PHCI_UNSTABLE(ph); 3249 i_mdi_phci_unlock(ph); 3250 3251 /* 3252 * Check if mdi_pathinfo state is in transient state. 3253 * If yes, offlining is in progress and wait till transient state is 3254 * cleared. 3255 */ 3256 if (MDI_PI_IS_TRANSIENT(pip)) { 3257 while (MDI_PI_IS_TRANSIENT(pip)) { 3258 cv_wait(&MDI_PI(pip)->pi_state_cv, 3259 &MDI_PI(pip)->pi_mutex); 3260 } 3261 } 3262 3263 /* 3264 * Grab the client lock in reverse order sequence and release the 3265 * mdi_pathinfo mutex. 3266 */ 3267 i_mdi_client_lock(ct, pip); 3268 MDI_PI_UNLOCK(pip); 3269 3270 /* 3271 * Wait till failover state is cleared 3272 */ 3273 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3274 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3275 3276 /* 3277 * Mark the mdi_pathinfo node state as transient 3278 */ 3279 MDI_PI_LOCK(pip); 3280 switch (state) { 3281 case MDI_PATHINFO_STATE_ONLINE: 3282 MDI_PI_SET_ONLINING(pip); 3283 break; 3284 3285 case MDI_PATHINFO_STATE_STANDBY: 3286 MDI_PI_SET_STANDBYING(pip); 3287 break; 3288 3289 case MDI_PATHINFO_STATE_FAULT: 3290 /* 3291 * Mark the pathinfo state as FAULTED 3292 */ 3293 MDI_PI_SET_FAULTING(pip); 3294 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3295 break; 3296 3297 case MDI_PATHINFO_STATE_OFFLINE: 3298 /* 3299 * ndi_devi_offline() cannot hold pip or ct locks. 3300 */ 3301 MDI_PI_UNLOCK(pip); 3302 /* 3303 * Don't offline the client dev_info node unless we have 3304 * no available paths left at all. 3305 */ 3306 cdip = ct->ct_dip; 3307 if ((flag & NDI_DEVI_REMOVE) && 3308 (ct->ct_path_count == 1)) { 3309 i_mdi_client_unlock(ct); 3310 rv = ndi_devi_offline(cdip, 0); 3311 if (rv != NDI_SUCCESS) { 3312 /* 3313 * Convert to MDI error code 3314 */ 3315 switch (rv) { 3316 case NDI_BUSY: 3317 rv = MDI_BUSY; 3318 break; 3319 default: 3320 rv = MDI_FAILURE; 3321 break; 3322 } 3323 goto state_change_exit; 3324 } else { 3325 i_mdi_client_lock(ct, NULL); 3326 } 3327 } 3328 /* 3329 * Mark the mdi_pathinfo node state as transient 3330 */ 3331 MDI_PI_LOCK(pip); 3332 MDI_PI_SET_OFFLINING(pip); 3333 break; 3334 } 3335 MDI_PI_UNLOCK(pip); 3336 MDI_CLIENT_UNSTABLE(ct); 3337 i_mdi_client_unlock(ct); 3338 3339 f = vh->vh_ops->vo_pi_state_change; 3340 if (f != NULL) 3341 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3342 3343 MDI_CLIENT_LOCK(ct); 3344 MDI_PI_LOCK(pip); 3345 if (rv == MDI_NOT_SUPPORTED) { 3346 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3347 } 3348 if (rv != MDI_SUCCESS) { 3349 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3350 "!vo_pi_state_change: failed rv = %x", rv)); 3351 } 3352 if (MDI_PI_IS_TRANSIENT(pip)) { 3353 if (rv == MDI_SUCCESS) { 3354 MDI_PI_CLEAR_TRANSIENT(pip); 3355 } else { 3356 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3357 } 3358 } 3359 3360 /* 3361 * Wake anyone waiting for this mdi_pathinfo node 3362 */ 3363 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3364 MDI_PI_UNLOCK(pip); 3365 3366 /* 3367 * Mark the client device as stable 3368 */ 3369 MDI_CLIENT_STABLE(ct); 3370 if (rv == MDI_SUCCESS) { 3371 if (ct->ct_unstable == 0) { 3372 cdip = ct->ct_dip; 3373 3374 /* 3375 * Onlining the mdi_pathinfo node will impact the 3376 * client state Update the client and dev_info node 3377 * state accordingly 3378 */ 3379 rv = NDI_SUCCESS; 3380 i_mdi_client_update_state(ct); 3381 switch (MDI_CLIENT_STATE(ct)) { 3382 case MDI_CLIENT_STATE_OPTIMAL: 3383 case MDI_CLIENT_STATE_DEGRADED: 3384 if (cdip && !i_ddi_devi_attached(cdip) && 3385 ((state == MDI_PATHINFO_STATE_ONLINE) || 3386 (state == MDI_PATHINFO_STATE_STANDBY))) { 3387 3388 /* 3389 * Must do ndi_devi_online() through 3390 * hotplug thread for deferred 3391 * attach mechanism to work 3392 */ 3393 MDI_CLIENT_UNLOCK(ct); 3394 rv = ndi_devi_online(cdip, 0); 3395 MDI_CLIENT_LOCK(ct); 3396 if ((rv != NDI_SUCCESS) && 3397 (MDI_CLIENT_STATE(ct) == 3398 MDI_CLIENT_STATE_DEGRADED)) { 3399 /* 3400 * ndi_devi_online failed. 3401 * Reset client flags to 3402 * offline. 3403 */ 3404 MDI_DEBUG(1, (CE_WARN, cdip, 3405 "!ndi_devi_online: failed " 3406 " Error: %x", rv)); 3407 MDI_CLIENT_SET_OFFLINE(ct); 3408 } 3409 if (rv != NDI_SUCCESS) { 3410 /* Reset the path state */ 3411 MDI_PI_LOCK(pip); 3412 MDI_PI(pip)->pi_state = 3413 MDI_PI_OLD_STATE(pip); 3414 MDI_PI_UNLOCK(pip); 3415 } 3416 } 3417 break; 3418 3419 case MDI_CLIENT_STATE_FAILED: 3420 /* 3421 * This is the last path case for 3422 * non-user initiated events. 3423 */ 3424 if (((flag & NDI_DEVI_REMOVE) == 0) && 3425 cdip && (i_ddi_node_state(cdip) >= 3426 DS_INITIALIZED)) { 3427 MDI_CLIENT_UNLOCK(ct); 3428 rv = ndi_devi_offline(cdip, 0); 3429 MDI_CLIENT_LOCK(ct); 3430 3431 if (rv != NDI_SUCCESS) { 3432 /* 3433 * ndi_devi_offline failed. 3434 * Reset client flags to 3435 * online as the path could not 3436 * be offlined. 3437 */ 3438 MDI_DEBUG(1, (CE_WARN, cdip, 3439 "!ndi_devi_offline: failed " 3440 " Error: %x", rv)); 3441 MDI_CLIENT_SET_ONLINE(ct); 3442 } 3443 } 3444 break; 3445 } 3446 /* 3447 * Convert to MDI error code 3448 */ 3449 switch (rv) { 3450 case NDI_SUCCESS: 3451 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3452 i_mdi_report_path_state(ct, pip); 3453 rv = MDI_SUCCESS; 3454 break; 3455 case NDI_BUSY: 3456 rv = MDI_BUSY; 3457 break; 3458 default: 3459 rv = MDI_FAILURE; 3460 break; 3461 } 3462 } 3463 } 3464 MDI_CLIENT_UNLOCK(ct); 3465 3466 state_change_exit: 3467 /* 3468 * Mark the pHCI as stable again. 3469 */ 3470 MDI_PHCI_LOCK(ph); 3471 MDI_PHCI_STABLE(ph); 3472 MDI_PHCI_UNLOCK(ph); 3473 return (rv); 3474 } 3475 3476 /* 3477 * mdi_pi_online(): 3478 * Place the path_info node in the online state. The path is 3479 * now available to be selected by mdi_select_path() for 3480 * transporting I/O requests to client devices. 3481 * Return Values: 3482 * MDI_SUCCESS 3483 * MDI_FAILURE 3484 */ 3485 int 3486 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3487 { 3488 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3489 int client_held = 0; 3490 int rv; 3491 3492 ASSERT(ct != NULL); 3493 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3494 if (rv != MDI_SUCCESS) 3495 return (rv); 3496 3497 MDI_PI_LOCK(pip); 3498 if (MDI_PI(pip)->pi_pm_held == 0) { 3499 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3500 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3501 i_mdi_pm_hold_pip(pip); 3502 client_held = 1; 3503 } 3504 MDI_PI_UNLOCK(pip); 3505 3506 if (client_held) { 3507 MDI_CLIENT_LOCK(ct); 3508 if (ct->ct_power_cnt == 0) { 3509 rv = i_mdi_power_all_phci(ct); 3510 } 3511 3512 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3513 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3514 i_mdi_pm_hold_client(ct, 1); 3515 MDI_CLIENT_UNLOCK(ct); 3516 } 3517 3518 return (rv); 3519 } 3520 3521 /* 3522 * mdi_pi_standby(): 3523 * Place the mdi_pathinfo node in standby state 3524 * 3525 * Return Values: 3526 * MDI_SUCCESS 3527 * MDI_FAILURE 3528 */ 3529 int 3530 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3531 { 3532 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3533 } 3534 3535 /* 3536 * mdi_pi_fault(): 3537 * Place the mdi_pathinfo node in fault'ed state 3538 * Return Values: 3539 * MDI_SUCCESS 3540 * MDI_FAILURE 3541 */ 3542 int 3543 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3544 { 3545 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3546 } 3547 3548 /* 3549 * mdi_pi_offline(): 3550 * Offline a mdi_pathinfo node. 3551 * Return Values: 3552 * MDI_SUCCESS 3553 * MDI_FAILURE 3554 */ 3555 int 3556 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3557 { 3558 int ret, client_held = 0; 3559 mdi_client_t *ct; 3560 3561 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3562 3563 if (ret == MDI_SUCCESS) { 3564 MDI_PI_LOCK(pip); 3565 if (MDI_PI(pip)->pi_pm_held) { 3566 client_held = 1; 3567 } 3568 MDI_PI_UNLOCK(pip); 3569 3570 if (client_held) { 3571 ct = MDI_PI(pip)->pi_client; 3572 MDI_CLIENT_LOCK(ct); 3573 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3574 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3575 i_mdi_pm_rele_client(ct, 1); 3576 MDI_CLIENT_UNLOCK(ct); 3577 } 3578 } 3579 3580 return (ret); 3581 } 3582 3583 /* 3584 * i_mdi_pi_offline(): 3585 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3586 */ 3587 static int 3588 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3589 { 3590 dev_info_t *vdip = NULL; 3591 mdi_vhci_t *vh = NULL; 3592 mdi_client_t *ct = NULL; 3593 int (*f)(); 3594 int rv; 3595 3596 MDI_PI_LOCK(pip); 3597 ct = MDI_PI(pip)->pi_client; 3598 ASSERT(ct != NULL); 3599 3600 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3601 /* 3602 * Give a chance for pending I/Os to complete. 3603 */ 3604 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3605 "%d cmds still pending on path: %p\n", 3606 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3607 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3608 &MDI_PI(pip)->pi_mutex, 3609 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3610 /* 3611 * The timeout time reached without ref_cnt being zero 3612 * being signaled. 3613 */ 3614 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3615 "Timeout reached on path %p without the cond\n", 3616 (void *)pip)); 3617 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3618 "%d cmds still pending on path: %p\n", 3619 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3620 } 3621 } 3622 vh = ct->ct_vhci; 3623 vdip = vh->vh_dip; 3624 3625 /* 3626 * Notify vHCI that has registered this event 3627 */ 3628 ASSERT(vh->vh_ops); 3629 f = vh->vh_ops->vo_pi_state_change; 3630 3631 if (f != NULL) { 3632 MDI_PI_UNLOCK(pip); 3633 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3634 flags)) != MDI_SUCCESS) { 3635 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3636 "!vo_path_offline failed " 3637 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3638 } 3639 MDI_PI_LOCK(pip); 3640 } 3641 3642 /* 3643 * Set the mdi_pathinfo node state and clear the transient condition 3644 */ 3645 MDI_PI_SET_OFFLINE(pip); 3646 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3647 MDI_PI_UNLOCK(pip); 3648 3649 MDI_CLIENT_LOCK(ct); 3650 if (rv == MDI_SUCCESS) { 3651 if (ct->ct_unstable == 0) { 3652 dev_info_t *cdip = ct->ct_dip; 3653 3654 /* 3655 * Onlining the mdi_pathinfo node will impact the 3656 * client state Update the client and dev_info node 3657 * state accordingly 3658 */ 3659 i_mdi_client_update_state(ct); 3660 rv = NDI_SUCCESS; 3661 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3662 if (cdip && 3663 (i_ddi_node_state(cdip) >= 3664 DS_INITIALIZED)) { 3665 MDI_CLIENT_UNLOCK(ct); 3666 rv = ndi_devi_offline(cdip, 0); 3667 MDI_CLIENT_LOCK(ct); 3668 if (rv != NDI_SUCCESS) { 3669 /* 3670 * ndi_devi_offline failed. 3671 * Reset client flags to 3672 * online. 3673 */ 3674 MDI_DEBUG(4, (CE_WARN, cdip, 3675 "!ndi_devi_offline: failed " 3676 " Error: %x", rv)); 3677 MDI_CLIENT_SET_ONLINE(ct); 3678 } 3679 } 3680 } 3681 /* 3682 * Convert to MDI error code 3683 */ 3684 switch (rv) { 3685 case NDI_SUCCESS: 3686 rv = MDI_SUCCESS; 3687 break; 3688 case NDI_BUSY: 3689 rv = MDI_BUSY; 3690 break; 3691 default: 3692 rv = MDI_FAILURE; 3693 break; 3694 } 3695 } 3696 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3697 i_mdi_report_path_state(ct, pip); 3698 } 3699 3700 MDI_CLIENT_UNLOCK(ct); 3701 3702 /* 3703 * Change in the mdi_pathinfo node state will impact the client state 3704 */ 3705 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3706 (void *)ct, (void *)pip)); 3707 return (rv); 3708 } 3709 3710 3711 /* 3712 * mdi_pi_get_addr(): 3713 * Get the unit address associated with a mdi_pathinfo node 3714 * 3715 * Return Values: 3716 * char * 3717 */ 3718 char * 3719 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3720 { 3721 if (pip == NULL) 3722 return (NULL); 3723 3724 return (MDI_PI(pip)->pi_addr); 3725 } 3726 3727 /* 3728 * mdi_pi_get_client(): 3729 * Get the client devinfo associated with a mdi_pathinfo node 3730 * 3731 * Return Values: 3732 * Handle to client device dev_info node 3733 */ 3734 dev_info_t * 3735 mdi_pi_get_client(mdi_pathinfo_t *pip) 3736 { 3737 dev_info_t *dip = NULL; 3738 if (pip) { 3739 dip = MDI_PI(pip)->pi_client->ct_dip; 3740 } 3741 return (dip); 3742 } 3743 3744 /* 3745 * mdi_pi_get_phci(): 3746 * Get the pHCI devinfo associated with the mdi_pathinfo node 3747 * Return Values: 3748 * Handle to dev_info node 3749 */ 3750 dev_info_t * 3751 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3752 { 3753 dev_info_t *dip = NULL; 3754 if (pip) { 3755 dip = MDI_PI(pip)->pi_phci->ph_dip; 3756 } 3757 return (dip); 3758 } 3759 3760 /* 3761 * mdi_pi_get_client_private(): 3762 * Get the client private information associated with the 3763 * mdi_pathinfo node 3764 */ 3765 void * 3766 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3767 { 3768 void *cprivate = NULL; 3769 if (pip) { 3770 cprivate = MDI_PI(pip)->pi_cprivate; 3771 } 3772 return (cprivate); 3773 } 3774 3775 /* 3776 * mdi_pi_set_client_private(): 3777 * Set the client private information in the mdi_pathinfo node 3778 */ 3779 void 3780 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3781 { 3782 if (pip) { 3783 MDI_PI(pip)->pi_cprivate = priv; 3784 } 3785 } 3786 3787 /* 3788 * mdi_pi_get_phci_private(): 3789 * Get the pHCI private information associated with the 3790 * mdi_pathinfo node 3791 */ 3792 caddr_t 3793 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3794 { 3795 caddr_t pprivate = NULL; 3796 if (pip) { 3797 pprivate = MDI_PI(pip)->pi_pprivate; 3798 } 3799 return (pprivate); 3800 } 3801 3802 /* 3803 * mdi_pi_set_phci_private(): 3804 * Set the pHCI private information in the mdi_pathinfo node 3805 */ 3806 void 3807 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3808 { 3809 if (pip) { 3810 MDI_PI(pip)->pi_pprivate = priv; 3811 } 3812 } 3813 3814 /* 3815 * mdi_pi_get_state(): 3816 * Get the mdi_pathinfo node state. Transient states are internal 3817 * and not provided to the users 3818 */ 3819 mdi_pathinfo_state_t 3820 mdi_pi_get_state(mdi_pathinfo_t *pip) 3821 { 3822 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3823 3824 if (pip) { 3825 if (MDI_PI_IS_TRANSIENT(pip)) { 3826 /* 3827 * mdi_pathinfo is in state transition. Return the 3828 * last good state. 3829 */ 3830 state = MDI_PI_OLD_STATE(pip); 3831 } else { 3832 state = MDI_PI_STATE(pip); 3833 } 3834 } 3835 return (state); 3836 } 3837 3838 /* 3839 * Note that the following function needs to be the new interface for 3840 * mdi_pi_get_state when mpxio gets integrated to ON. 3841 */ 3842 int 3843 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3844 uint32_t *ext_state) 3845 { 3846 *state = MDI_PATHINFO_STATE_INIT; 3847 3848 if (pip) { 3849 if (MDI_PI_IS_TRANSIENT(pip)) { 3850 /* 3851 * mdi_pathinfo is in state transition. Return the 3852 * last good state. 3853 */ 3854 *state = MDI_PI_OLD_STATE(pip); 3855 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3856 } else { 3857 *state = MDI_PI_STATE(pip); 3858 *ext_state = MDI_PI_EXT_STATE(pip); 3859 } 3860 } 3861 return (MDI_SUCCESS); 3862 } 3863 3864 /* 3865 * mdi_pi_get_preferred: 3866 * Get the preferred path flag 3867 */ 3868 int 3869 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3870 { 3871 if (pip) { 3872 return (MDI_PI(pip)->pi_preferred); 3873 } 3874 return (0); 3875 } 3876 3877 /* 3878 * mdi_pi_set_preferred: 3879 * Set the preferred path flag 3880 */ 3881 void 3882 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3883 { 3884 if (pip) { 3885 MDI_PI(pip)->pi_preferred = preferred; 3886 } 3887 } 3888 3889 /* 3890 * mdi_pi_set_state(): 3891 * Set the mdi_pathinfo node state 3892 */ 3893 void 3894 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3895 { 3896 uint32_t ext_state; 3897 3898 if (pip) { 3899 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3900 MDI_PI(pip)->pi_state = state; 3901 MDI_PI(pip)->pi_state |= ext_state; 3902 } 3903 } 3904 3905 /* 3906 * Property functions: 3907 */ 3908 int 3909 i_map_nvlist_error_to_mdi(int val) 3910 { 3911 int rv; 3912 3913 switch (val) { 3914 case 0: 3915 rv = DDI_PROP_SUCCESS; 3916 break; 3917 case EINVAL: 3918 case ENOTSUP: 3919 rv = DDI_PROP_INVAL_ARG; 3920 break; 3921 case ENOMEM: 3922 rv = DDI_PROP_NO_MEMORY; 3923 break; 3924 default: 3925 rv = DDI_PROP_NOT_FOUND; 3926 break; 3927 } 3928 return (rv); 3929 } 3930 3931 /* 3932 * mdi_pi_get_next_prop(): 3933 * Property walk function. The caller should hold mdi_pi_lock() 3934 * and release by calling mdi_pi_unlock() at the end of walk to 3935 * get a consistent value. 3936 */ 3937 nvpair_t * 3938 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3939 { 3940 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3941 return (NULL); 3942 } 3943 ASSERT(MDI_PI_LOCKED(pip)); 3944 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3945 } 3946 3947 /* 3948 * mdi_prop_remove(): 3949 * Remove the named property from the named list. 3950 */ 3951 int 3952 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3953 { 3954 if (pip == NULL) { 3955 return (DDI_PROP_NOT_FOUND); 3956 } 3957 ASSERT(!MDI_PI_LOCKED(pip)); 3958 MDI_PI_LOCK(pip); 3959 if (MDI_PI(pip)->pi_prop == NULL) { 3960 MDI_PI_UNLOCK(pip); 3961 return (DDI_PROP_NOT_FOUND); 3962 } 3963 if (name) { 3964 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3965 } else { 3966 char nvp_name[MAXNAMELEN]; 3967 nvpair_t *nvp; 3968 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3969 while (nvp) { 3970 nvpair_t *next; 3971 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3972 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3973 nvpair_name(nvp)); 3974 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3975 nvp_name); 3976 nvp = next; 3977 } 3978 } 3979 MDI_PI_UNLOCK(pip); 3980 return (DDI_PROP_SUCCESS); 3981 } 3982 3983 /* 3984 * mdi_prop_size(): 3985 * Get buffer size needed to pack the property data. 3986 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3987 * buffer size. 3988 */ 3989 int 3990 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3991 { 3992 int rv; 3993 size_t bufsize; 3994 3995 *buflenp = 0; 3996 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3997 return (DDI_PROP_NOT_FOUND); 3998 } 3999 ASSERT(MDI_PI_LOCKED(pip)); 4000 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4001 &bufsize, NV_ENCODE_NATIVE); 4002 *buflenp = bufsize; 4003 return (i_map_nvlist_error_to_mdi(rv)); 4004 } 4005 4006 /* 4007 * mdi_prop_pack(): 4008 * pack the property list. The caller should hold the 4009 * mdi_pathinfo_t node to get a consistent data 4010 */ 4011 int 4012 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4013 { 4014 int rv; 4015 size_t bufsize; 4016 4017 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4018 return (DDI_PROP_NOT_FOUND); 4019 } 4020 4021 ASSERT(MDI_PI_LOCKED(pip)); 4022 4023 bufsize = buflen; 4024 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4025 NV_ENCODE_NATIVE, KM_SLEEP); 4026 4027 return (i_map_nvlist_error_to_mdi(rv)); 4028 } 4029 4030 /* 4031 * mdi_prop_update_byte(): 4032 * Create/Update a byte property 4033 */ 4034 int 4035 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4036 { 4037 int rv; 4038 4039 if (pip == NULL) { 4040 return (DDI_PROP_INVAL_ARG); 4041 } 4042 ASSERT(!MDI_PI_LOCKED(pip)); 4043 MDI_PI_LOCK(pip); 4044 if (MDI_PI(pip)->pi_prop == NULL) { 4045 MDI_PI_UNLOCK(pip); 4046 return (DDI_PROP_NOT_FOUND); 4047 } 4048 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4049 MDI_PI_UNLOCK(pip); 4050 return (i_map_nvlist_error_to_mdi(rv)); 4051 } 4052 4053 /* 4054 * mdi_prop_update_byte_array(): 4055 * Create/Update a byte array property 4056 */ 4057 int 4058 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4059 uint_t nelements) 4060 { 4061 int rv; 4062 4063 if (pip == NULL) { 4064 return (DDI_PROP_INVAL_ARG); 4065 } 4066 ASSERT(!MDI_PI_LOCKED(pip)); 4067 MDI_PI_LOCK(pip); 4068 if (MDI_PI(pip)->pi_prop == NULL) { 4069 MDI_PI_UNLOCK(pip); 4070 return (DDI_PROP_NOT_FOUND); 4071 } 4072 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4073 MDI_PI_UNLOCK(pip); 4074 return (i_map_nvlist_error_to_mdi(rv)); 4075 } 4076 4077 /* 4078 * mdi_prop_update_int(): 4079 * Create/Update a 32 bit integer property 4080 */ 4081 int 4082 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4083 { 4084 int rv; 4085 4086 if (pip == NULL) { 4087 return (DDI_PROP_INVAL_ARG); 4088 } 4089 ASSERT(!MDI_PI_LOCKED(pip)); 4090 MDI_PI_LOCK(pip); 4091 if (MDI_PI(pip)->pi_prop == NULL) { 4092 MDI_PI_UNLOCK(pip); 4093 return (DDI_PROP_NOT_FOUND); 4094 } 4095 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4096 MDI_PI_UNLOCK(pip); 4097 return (i_map_nvlist_error_to_mdi(rv)); 4098 } 4099 4100 /* 4101 * mdi_prop_update_int64(): 4102 * Create/Update a 64 bit integer property 4103 */ 4104 int 4105 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4106 { 4107 int rv; 4108 4109 if (pip == NULL) { 4110 return (DDI_PROP_INVAL_ARG); 4111 } 4112 ASSERT(!MDI_PI_LOCKED(pip)); 4113 MDI_PI_LOCK(pip); 4114 if (MDI_PI(pip)->pi_prop == NULL) { 4115 MDI_PI_UNLOCK(pip); 4116 return (DDI_PROP_NOT_FOUND); 4117 } 4118 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4119 MDI_PI_UNLOCK(pip); 4120 return (i_map_nvlist_error_to_mdi(rv)); 4121 } 4122 4123 /* 4124 * mdi_prop_update_int_array(): 4125 * Create/Update a int array property 4126 */ 4127 int 4128 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4129 uint_t nelements) 4130 { 4131 int rv; 4132 4133 if (pip == NULL) { 4134 return (DDI_PROP_INVAL_ARG); 4135 } 4136 ASSERT(!MDI_PI_LOCKED(pip)); 4137 MDI_PI_LOCK(pip); 4138 if (MDI_PI(pip)->pi_prop == NULL) { 4139 MDI_PI_UNLOCK(pip); 4140 return (DDI_PROP_NOT_FOUND); 4141 } 4142 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4143 nelements); 4144 MDI_PI_UNLOCK(pip); 4145 return (i_map_nvlist_error_to_mdi(rv)); 4146 } 4147 4148 /* 4149 * mdi_prop_update_string(): 4150 * Create/Update a string property 4151 */ 4152 int 4153 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4154 { 4155 int rv; 4156 4157 if (pip == NULL) { 4158 return (DDI_PROP_INVAL_ARG); 4159 } 4160 ASSERT(!MDI_PI_LOCKED(pip)); 4161 MDI_PI_LOCK(pip); 4162 if (MDI_PI(pip)->pi_prop == NULL) { 4163 MDI_PI_UNLOCK(pip); 4164 return (DDI_PROP_NOT_FOUND); 4165 } 4166 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4167 MDI_PI_UNLOCK(pip); 4168 return (i_map_nvlist_error_to_mdi(rv)); 4169 } 4170 4171 /* 4172 * mdi_prop_update_string_array(): 4173 * Create/Update a string array property 4174 */ 4175 int 4176 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4177 uint_t nelements) 4178 { 4179 int rv; 4180 4181 if (pip == NULL) { 4182 return (DDI_PROP_INVAL_ARG); 4183 } 4184 ASSERT(!MDI_PI_LOCKED(pip)); 4185 MDI_PI_LOCK(pip); 4186 if (MDI_PI(pip)->pi_prop == NULL) { 4187 MDI_PI_UNLOCK(pip); 4188 return (DDI_PROP_NOT_FOUND); 4189 } 4190 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4191 nelements); 4192 MDI_PI_UNLOCK(pip); 4193 return (i_map_nvlist_error_to_mdi(rv)); 4194 } 4195 4196 /* 4197 * mdi_prop_lookup_byte(): 4198 * Look for byte property identified by name. The data returned 4199 * is the actual property and valid as long as mdi_pathinfo_t node 4200 * is alive. 4201 */ 4202 int 4203 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4204 { 4205 int rv; 4206 4207 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4208 return (DDI_PROP_NOT_FOUND); 4209 } 4210 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4211 return (i_map_nvlist_error_to_mdi(rv)); 4212 } 4213 4214 4215 /* 4216 * mdi_prop_lookup_byte_array(): 4217 * Look for byte array property identified by name. The data 4218 * returned is the actual property and valid as long as 4219 * mdi_pathinfo_t node is alive. 4220 */ 4221 int 4222 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4223 uint_t *nelements) 4224 { 4225 int rv; 4226 4227 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4228 return (DDI_PROP_NOT_FOUND); 4229 } 4230 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4231 nelements); 4232 return (i_map_nvlist_error_to_mdi(rv)); 4233 } 4234 4235 /* 4236 * mdi_prop_lookup_int(): 4237 * Look for int property identified by name. The data returned 4238 * is the actual property and valid as long as mdi_pathinfo_t 4239 * node is alive. 4240 */ 4241 int 4242 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4243 { 4244 int rv; 4245 4246 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4247 return (DDI_PROP_NOT_FOUND); 4248 } 4249 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4250 return (i_map_nvlist_error_to_mdi(rv)); 4251 } 4252 4253 /* 4254 * mdi_prop_lookup_int64(): 4255 * Look for int64 property identified by name. The data returned 4256 * is the actual property and valid as long as mdi_pathinfo_t node 4257 * is alive. 4258 */ 4259 int 4260 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4261 { 4262 int rv; 4263 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4264 return (DDI_PROP_NOT_FOUND); 4265 } 4266 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4267 return (i_map_nvlist_error_to_mdi(rv)); 4268 } 4269 4270 /* 4271 * mdi_prop_lookup_int_array(): 4272 * Look for int array property identified by name. The data 4273 * returned is the actual property and valid as long as 4274 * mdi_pathinfo_t node is alive. 4275 */ 4276 int 4277 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4278 uint_t *nelements) 4279 { 4280 int rv; 4281 4282 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4283 return (DDI_PROP_NOT_FOUND); 4284 } 4285 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4286 (int32_t **)data, nelements); 4287 return (i_map_nvlist_error_to_mdi(rv)); 4288 } 4289 4290 /* 4291 * mdi_prop_lookup_string(): 4292 * Look for string property identified by name. The data 4293 * returned is the actual property and valid as long as 4294 * mdi_pathinfo_t node is alive. 4295 */ 4296 int 4297 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4298 { 4299 int rv; 4300 4301 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4302 return (DDI_PROP_NOT_FOUND); 4303 } 4304 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4305 return (i_map_nvlist_error_to_mdi(rv)); 4306 } 4307 4308 /* 4309 * mdi_prop_lookup_string_array(): 4310 * Look for string array property identified by name. The data 4311 * returned is the actual property and valid as long as 4312 * mdi_pathinfo_t node is alive. 4313 */ 4314 int 4315 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4316 uint_t *nelements) 4317 { 4318 int rv; 4319 4320 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4321 return (DDI_PROP_NOT_FOUND); 4322 } 4323 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4324 nelements); 4325 return (i_map_nvlist_error_to_mdi(rv)); 4326 } 4327 4328 /* 4329 * mdi_prop_free(): 4330 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4331 * functions return the pointer to actual property data and not a 4332 * copy of it. So the data returned is valid as long as 4333 * mdi_pathinfo_t node is valid. 4334 */ 4335 /*ARGSUSED*/ 4336 int 4337 mdi_prop_free(void *data) 4338 { 4339 return (DDI_PROP_SUCCESS); 4340 } 4341 4342 /*ARGSUSED*/ 4343 static void 4344 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4345 { 4346 char *phci_path, *ct_path; 4347 char *ct_status; 4348 char *status; 4349 dev_info_t *dip = ct->ct_dip; 4350 char lb_buf[64]; 4351 4352 ASSERT(MDI_CLIENT_LOCKED(ct)); 4353 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4354 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4355 return; 4356 } 4357 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4358 ct_status = "optimal"; 4359 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4360 ct_status = "degraded"; 4361 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4362 ct_status = "failed"; 4363 } else { 4364 ct_status = "unknown"; 4365 } 4366 4367 if (MDI_PI_IS_OFFLINE(pip)) { 4368 status = "offline"; 4369 } else if (MDI_PI_IS_ONLINE(pip)) { 4370 status = "online"; 4371 } else if (MDI_PI_IS_STANDBY(pip)) { 4372 status = "standby"; 4373 } else if (MDI_PI_IS_FAULT(pip)) { 4374 status = "faulted"; 4375 } else { 4376 status = "unknown"; 4377 } 4378 4379 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4380 (void) snprintf(lb_buf, sizeof (lb_buf), 4381 "%s, region-size: %d", mdi_load_balance_lba, 4382 ct->ct_lb_args->region_size); 4383 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4384 (void) snprintf(lb_buf, sizeof (lb_buf), 4385 "%s", mdi_load_balance_none); 4386 } else { 4387 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4388 mdi_load_balance_rr); 4389 } 4390 4391 if (dip) { 4392 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4393 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4394 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4395 "path %s (%s%d) to target address: %s is %s" 4396 " Load balancing: %s\n", 4397 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4398 ddi_get_instance(dip), ct_status, 4399 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4400 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4401 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4402 MDI_PI(pip)->pi_addr, status, lb_buf); 4403 kmem_free(phci_path, MAXPATHLEN); 4404 kmem_free(ct_path, MAXPATHLEN); 4405 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4406 } 4407 } 4408 4409 #ifdef DEBUG 4410 /* 4411 * i_mdi_log(): 4412 * Utility function for error message management 4413 * 4414 */ 4415 /*PRINTFLIKE3*/ 4416 static void 4417 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4418 { 4419 char name[MAXNAMELEN]; 4420 char buf[MAXNAMELEN]; 4421 char *bp; 4422 va_list ap; 4423 int log_only = 0; 4424 int boot_only = 0; 4425 int console_only = 0; 4426 4427 if (dip) { 4428 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4429 ddi_node_name(dip), ddi_get_instance(dip)); 4430 } else { 4431 name[0] = 0; 4432 } 4433 4434 va_start(ap, fmt); 4435 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4436 va_end(ap); 4437 4438 switch (buf[0]) { 4439 case '!': 4440 bp = &buf[1]; 4441 log_only = 1; 4442 break; 4443 case '?': 4444 bp = &buf[1]; 4445 boot_only = 1; 4446 break; 4447 case '^': 4448 bp = &buf[1]; 4449 console_only = 1; 4450 break; 4451 default: 4452 bp = buf; 4453 break; 4454 } 4455 if (mdi_debug_logonly) { 4456 log_only = 1; 4457 boot_only = 0; 4458 console_only = 0; 4459 } 4460 4461 switch (level) { 4462 case CE_NOTE: 4463 level = CE_CONT; 4464 /* FALLTHROUGH */ 4465 case CE_CONT: 4466 case CE_WARN: 4467 case CE_PANIC: 4468 if (boot_only) { 4469 cmn_err(level, "?mdi: %s%s", name, bp); 4470 } else if (console_only) { 4471 cmn_err(level, "^mdi: %s%s", name, bp); 4472 } else if (log_only) { 4473 cmn_err(level, "!mdi: %s%s", name, bp); 4474 } else { 4475 cmn_err(level, "mdi: %s%s", name, bp); 4476 } 4477 break; 4478 default: 4479 cmn_err(level, "mdi: %s%s", name, bp); 4480 break; 4481 } 4482 } 4483 #endif /* DEBUG */ 4484 4485 void 4486 i_mdi_client_online(dev_info_t *ct_dip) 4487 { 4488 mdi_client_t *ct; 4489 4490 /* 4491 * Client online notification. Mark client state as online 4492 * restore our binding with dev_info node 4493 */ 4494 ct = i_devi_get_client(ct_dip); 4495 ASSERT(ct != NULL); 4496 MDI_CLIENT_LOCK(ct); 4497 MDI_CLIENT_SET_ONLINE(ct); 4498 /* catch for any memory leaks */ 4499 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4500 ct->ct_dip = ct_dip; 4501 4502 if (ct->ct_power_cnt == 0) 4503 (void) i_mdi_power_all_phci(ct); 4504 4505 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4506 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4507 i_mdi_pm_hold_client(ct, 1); 4508 4509 MDI_CLIENT_UNLOCK(ct); 4510 } 4511 4512 void 4513 i_mdi_phci_online(dev_info_t *ph_dip) 4514 { 4515 mdi_phci_t *ph; 4516 4517 /* pHCI online notification. Mark state accordingly */ 4518 ph = i_devi_get_phci(ph_dip); 4519 ASSERT(ph != NULL); 4520 MDI_PHCI_LOCK(ph); 4521 MDI_PHCI_SET_ONLINE(ph); 4522 MDI_PHCI_UNLOCK(ph); 4523 } 4524 4525 /* 4526 * mdi_devi_online(): 4527 * Online notification from NDI framework on pHCI/client 4528 * device online. 4529 * Return Values: 4530 * NDI_SUCCESS 4531 * MDI_FAILURE 4532 */ 4533 /*ARGSUSED*/ 4534 int 4535 mdi_devi_online(dev_info_t *dip, uint_t flags) 4536 { 4537 if (MDI_PHCI(dip)) { 4538 i_mdi_phci_online(dip); 4539 } 4540 4541 if (MDI_CLIENT(dip)) { 4542 i_mdi_client_online(dip); 4543 } 4544 return (NDI_SUCCESS); 4545 } 4546 4547 /* 4548 * mdi_devi_offline(): 4549 * Offline notification from NDI framework on pHCI/Client device 4550 * offline. 4551 * 4552 * Return Values: 4553 * NDI_SUCCESS 4554 * NDI_FAILURE 4555 */ 4556 /*ARGSUSED*/ 4557 int 4558 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4559 { 4560 int rv = NDI_SUCCESS; 4561 4562 if (MDI_CLIENT(dip)) { 4563 rv = i_mdi_client_offline(dip, flags); 4564 if (rv != NDI_SUCCESS) 4565 return (rv); 4566 } 4567 4568 if (MDI_PHCI(dip)) { 4569 rv = i_mdi_phci_offline(dip, flags); 4570 4571 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4572 /* set client back online */ 4573 i_mdi_client_online(dip); 4574 } 4575 } 4576 4577 return (rv); 4578 } 4579 4580 /*ARGSUSED*/ 4581 static int 4582 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4583 { 4584 int rv = NDI_SUCCESS; 4585 mdi_phci_t *ph; 4586 mdi_client_t *ct; 4587 mdi_pathinfo_t *pip; 4588 mdi_pathinfo_t *next; 4589 mdi_pathinfo_t *failed_pip = NULL; 4590 dev_info_t *cdip; 4591 4592 /* 4593 * pHCI component offline notification 4594 * Make sure that this pHCI instance is free to be offlined. 4595 * If it is OK to proceed, Offline and remove all the child 4596 * mdi_pathinfo nodes. This process automatically offlines 4597 * corresponding client devices, for which this pHCI provides 4598 * critical services. 4599 */ 4600 ph = i_devi_get_phci(dip); 4601 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4602 (void *)dip, (void *)ph)); 4603 if (ph == NULL) { 4604 return (rv); 4605 } 4606 4607 MDI_PHCI_LOCK(ph); 4608 4609 if (MDI_PHCI_IS_OFFLINE(ph)) { 4610 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4611 (void *)ph)); 4612 MDI_PHCI_UNLOCK(ph); 4613 return (NDI_SUCCESS); 4614 } 4615 4616 /* 4617 * Check to see if the pHCI can be offlined 4618 */ 4619 if (ph->ph_unstable) { 4620 MDI_DEBUG(1, (CE_WARN, dip, 4621 "!One or more target devices are in transient " 4622 "state. This device can not be removed at " 4623 "this moment. Please try again later.")); 4624 MDI_PHCI_UNLOCK(ph); 4625 return (NDI_BUSY); 4626 } 4627 4628 pip = ph->ph_path_head; 4629 while (pip != NULL) { 4630 MDI_PI_LOCK(pip); 4631 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4632 4633 /* 4634 * The mdi_pathinfo state is OK. Check the client state. 4635 * If failover in progress fail the pHCI from offlining 4636 */ 4637 ct = MDI_PI(pip)->pi_client; 4638 i_mdi_client_lock(ct, pip); 4639 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4640 (ct->ct_unstable)) { 4641 /* 4642 * Failover is in progress, Fail the DR 4643 */ 4644 MDI_DEBUG(1, (CE_WARN, dip, 4645 "!pHCI device (%s%d) is Busy. %s", 4646 ddi_driver_name(dip), ddi_get_instance(dip), 4647 "This device can not be removed at " 4648 "this moment. Please try again later.")); 4649 MDI_PI_UNLOCK(pip); 4650 i_mdi_client_unlock(ct); 4651 MDI_PHCI_UNLOCK(ph); 4652 return (NDI_BUSY); 4653 } 4654 MDI_PI_UNLOCK(pip); 4655 4656 /* 4657 * Check to see of we are removing the last path of this 4658 * client device... 4659 */ 4660 cdip = ct->ct_dip; 4661 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4662 (i_mdi_client_compute_state(ct, ph) == 4663 MDI_CLIENT_STATE_FAILED)) { 4664 i_mdi_client_unlock(ct); 4665 MDI_PHCI_UNLOCK(ph); 4666 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4667 /* 4668 * ndi_devi_offline() failed. 4669 * This pHCI provides the critical path 4670 * to one or more client devices. 4671 * Return busy. 4672 */ 4673 MDI_PHCI_LOCK(ph); 4674 MDI_DEBUG(1, (CE_WARN, dip, 4675 "!pHCI device (%s%d) is Busy. %s", 4676 ddi_driver_name(dip), ddi_get_instance(dip), 4677 "This device can not be removed at " 4678 "this moment. Please try again later.")); 4679 failed_pip = pip; 4680 break; 4681 } else { 4682 MDI_PHCI_LOCK(ph); 4683 pip = next; 4684 } 4685 } else { 4686 i_mdi_client_unlock(ct); 4687 pip = next; 4688 } 4689 } 4690 4691 if (failed_pip) { 4692 pip = ph->ph_path_head; 4693 while (pip != failed_pip) { 4694 MDI_PI_LOCK(pip); 4695 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4696 ct = MDI_PI(pip)->pi_client; 4697 i_mdi_client_lock(ct, pip); 4698 cdip = ct->ct_dip; 4699 switch (MDI_CLIENT_STATE(ct)) { 4700 case MDI_CLIENT_STATE_OPTIMAL: 4701 case MDI_CLIENT_STATE_DEGRADED: 4702 if (cdip) { 4703 MDI_PI_UNLOCK(pip); 4704 i_mdi_client_unlock(ct); 4705 MDI_PHCI_UNLOCK(ph); 4706 (void) ndi_devi_online(cdip, 0); 4707 MDI_PHCI_LOCK(ph); 4708 pip = next; 4709 continue; 4710 } 4711 break; 4712 4713 case MDI_CLIENT_STATE_FAILED: 4714 if (cdip) { 4715 MDI_PI_UNLOCK(pip); 4716 i_mdi_client_unlock(ct); 4717 MDI_PHCI_UNLOCK(ph); 4718 (void) ndi_devi_offline(cdip, 0); 4719 MDI_PHCI_LOCK(ph); 4720 pip = next; 4721 continue; 4722 } 4723 break; 4724 } 4725 MDI_PI_UNLOCK(pip); 4726 i_mdi_client_unlock(ct); 4727 pip = next; 4728 } 4729 MDI_PHCI_UNLOCK(ph); 4730 return (NDI_BUSY); 4731 } 4732 4733 /* 4734 * Mark the pHCI as offline 4735 */ 4736 MDI_PHCI_SET_OFFLINE(ph); 4737 4738 /* 4739 * Mark the child mdi_pathinfo nodes as transient 4740 */ 4741 pip = ph->ph_path_head; 4742 while (pip != NULL) { 4743 MDI_PI_LOCK(pip); 4744 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4745 MDI_PI_SET_OFFLINING(pip); 4746 MDI_PI_UNLOCK(pip); 4747 pip = next; 4748 } 4749 MDI_PHCI_UNLOCK(ph); 4750 /* 4751 * Give a chance for any pending commands to execute 4752 */ 4753 delay(1); 4754 MDI_PHCI_LOCK(ph); 4755 pip = ph->ph_path_head; 4756 while (pip != NULL) { 4757 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4758 (void) i_mdi_pi_offline(pip, flags); 4759 MDI_PI_LOCK(pip); 4760 ct = MDI_PI(pip)->pi_client; 4761 if (!MDI_PI_IS_OFFLINE(pip)) { 4762 MDI_DEBUG(1, (CE_WARN, dip, 4763 "!pHCI device (%s%d) is Busy. %s", 4764 ddi_driver_name(dip), ddi_get_instance(dip), 4765 "This device can not be removed at " 4766 "this moment. Please try again later.")); 4767 MDI_PI_UNLOCK(pip); 4768 MDI_PHCI_SET_ONLINE(ph); 4769 MDI_PHCI_UNLOCK(ph); 4770 return (NDI_BUSY); 4771 } 4772 MDI_PI_UNLOCK(pip); 4773 pip = next; 4774 } 4775 MDI_PHCI_UNLOCK(ph); 4776 4777 return (rv); 4778 } 4779 4780 /*ARGSUSED*/ 4781 static int 4782 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4783 { 4784 int rv = NDI_SUCCESS; 4785 mdi_client_t *ct; 4786 4787 /* 4788 * Client component to go offline. Make sure that we are 4789 * not in failing over state and update client state 4790 * accordingly 4791 */ 4792 ct = i_devi_get_client(dip); 4793 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 4794 (void *)dip, (void *)ct)); 4795 if (ct != NULL) { 4796 MDI_CLIENT_LOCK(ct); 4797 if (ct->ct_unstable) { 4798 /* 4799 * One or more paths are in transient state, 4800 * Dont allow offline of a client device 4801 */ 4802 MDI_DEBUG(1, (CE_WARN, dip, 4803 "!One or more paths to this device is " 4804 "in transient state. This device can not " 4805 "be removed at this moment. " 4806 "Please try again later.")); 4807 MDI_CLIENT_UNLOCK(ct); 4808 return (NDI_BUSY); 4809 } 4810 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4811 /* 4812 * Failover is in progress, Dont allow DR of 4813 * a client device 4814 */ 4815 MDI_DEBUG(1, (CE_WARN, dip, 4816 "!Client device (%s%d) is Busy. %s", 4817 ddi_driver_name(dip), ddi_get_instance(dip), 4818 "This device can not be removed at " 4819 "this moment. Please try again later.")); 4820 MDI_CLIENT_UNLOCK(ct); 4821 return (NDI_BUSY); 4822 } 4823 MDI_CLIENT_SET_OFFLINE(ct); 4824 4825 /* 4826 * Unbind our relationship with the dev_info node 4827 */ 4828 if (flags & NDI_DEVI_REMOVE) { 4829 ct->ct_dip = NULL; 4830 } 4831 MDI_CLIENT_UNLOCK(ct); 4832 } 4833 return (rv); 4834 } 4835 4836 /* 4837 * mdi_pre_attach(): 4838 * Pre attach() notification handler 4839 */ 4840 /*ARGSUSED*/ 4841 int 4842 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4843 { 4844 /* don't support old DDI_PM_RESUME */ 4845 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4846 (cmd == DDI_PM_RESUME)) 4847 return (DDI_FAILURE); 4848 4849 return (DDI_SUCCESS); 4850 } 4851 4852 /* 4853 * mdi_post_attach(): 4854 * Post attach() notification handler 4855 */ 4856 /*ARGSUSED*/ 4857 void 4858 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4859 { 4860 mdi_phci_t *ph; 4861 mdi_client_t *ct; 4862 mdi_vhci_t *vh; 4863 4864 if (MDI_PHCI(dip)) { 4865 ph = i_devi_get_phci(dip); 4866 ASSERT(ph != NULL); 4867 4868 MDI_PHCI_LOCK(ph); 4869 switch (cmd) { 4870 case DDI_ATTACH: 4871 MDI_DEBUG(2, (CE_NOTE, dip, 4872 "!pHCI post_attach: called %p\n", (void *)ph)); 4873 if (error == DDI_SUCCESS) { 4874 MDI_PHCI_SET_ATTACH(ph); 4875 } else { 4876 MDI_DEBUG(1, (CE_NOTE, dip, 4877 "!pHCI post_attach: failed error=%d\n", 4878 error)); 4879 MDI_PHCI_SET_DETACH(ph); 4880 } 4881 break; 4882 4883 case DDI_RESUME: 4884 MDI_DEBUG(2, (CE_NOTE, dip, 4885 "!pHCI post_resume: called %p\n", (void *)ph)); 4886 if (error == DDI_SUCCESS) { 4887 MDI_PHCI_SET_RESUME(ph); 4888 } else { 4889 MDI_DEBUG(1, (CE_NOTE, dip, 4890 "!pHCI post_resume: failed error=%d\n", 4891 error)); 4892 MDI_PHCI_SET_SUSPEND(ph); 4893 } 4894 break; 4895 } 4896 MDI_PHCI_UNLOCK(ph); 4897 } 4898 4899 if (MDI_CLIENT(dip)) { 4900 ct = i_devi_get_client(dip); 4901 ASSERT(ct != NULL); 4902 4903 MDI_CLIENT_LOCK(ct); 4904 switch (cmd) { 4905 case DDI_ATTACH: 4906 MDI_DEBUG(2, (CE_NOTE, dip, 4907 "!Client post_attach: called %p\n", (void *)ct)); 4908 if (error != DDI_SUCCESS) { 4909 MDI_DEBUG(1, (CE_NOTE, dip, 4910 "!Client post_attach: failed error=%d\n", 4911 error)); 4912 MDI_CLIENT_SET_DETACH(ct); 4913 MDI_DEBUG(4, (CE_WARN, dip, 4914 "mdi_post_attach i_mdi_pm_reset_client\n")); 4915 i_mdi_pm_reset_client(ct); 4916 break; 4917 } 4918 4919 /* 4920 * Client device has successfully attached, inform 4921 * the vhci. 4922 */ 4923 vh = ct->ct_vhci; 4924 if (vh->vh_ops->vo_client_attached) 4925 (*vh->vh_ops->vo_client_attached)(dip); 4926 4927 MDI_CLIENT_SET_ATTACH(ct); 4928 break; 4929 4930 case DDI_RESUME: 4931 MDI_DEBUG(2, (CE_NOTE, dip, 4932 "!Client post_attach: called %p\n", (void *)ct)); 4933 if (error == DDI_SUCCESS) { 4934 MDI_CLIENT_SET_RESUME(ct); 4935 } else { 4936 MDI_DEBUG(1, (CE_NOTE, dip, 4937 "!Client post_resume: failed error=%d\n", 4938 error)); 4939 MDI_CLIENT_SET_SUSPEND(ct); 4940 } 4941 break; 4942 } 4943 MDI_CLIENT_UNLOCK(ct); 4944 } 4945 } 4946 4947 /* 4948 * mdi_pre_detach(): 4949 * Pre detach notification handler 4950 */ 4951 /*ARGSUSED*/ 4952 int 4953 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4954 { 4955 int rv = DDI_SUCCESS; 4956 4957 if (MDI_CLIENT(dip)) { 4958 (void) i_mdi_client_pre_detach(dip, cmd); 4959 } 4960 4961 if (MDI_PHCI(dip)) { 4962 rv = i_mdi_phci_pre_detach(dip, cmd); 4963 } 4964 4965 return (rv); 4966 } 4967 4968 /*ARGSUSED*/ 4969 static int 4970 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4971 { 4972 int rv = DDI_SUCCESS; 4973 mdi_phci_t *ph; 4974 mdi_client_t *ct; 4975 mdi_pathinfo_t *pip; 4976 mdi_pathinfo_t *failed_pip = NULL; 4977 mdi_pathinfo_t *next; 4978 4979 ph = i_devi_get_phci(dip); 4980 if (ph == NULL) { 4981 return (rv); 4982 } 4983 4984 MDI_PHCI_LOCK(ph); 4985 switch (cmd) { 4986 case DDI_DETACH: 4987 MDI_DEBUG(2, (CE_NOTE, dip, 4988 "!pHCI pre_detach: called %p\n", (void *)ph)); 4989 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4990 /* 4991 * mdi_pathinfo nodes are still attached to 4992 * this pHCI. Fail the detach for this pHCI. 4993 */ 4994 MDI_DEBUG(2, (CE_WARN, dip, 4995 "!pHCI pre_detach: " 4996 "mdi_pathinfo nodes are still attached " 4997 "%p\n", (void *)ph)); 4998 rv = DDI_FAILURE; 4999 break; 5000 } 5001 MDI_PHCI_SET_DETACH(ph); 5002 break; 5003 5004 case DDI_SUSPEND: 5005 /* 5006 * pHCI is getting suspended. Since mpxio client 5007 * devices may not be suspended at this point, to avoid 5008 * a potential stack overflow, it is important to suspend 5009 * client devices before pHCI can be suspended. 5010 */ 5011 5012 MDI_DEBUG(2, (CE_NOTE, dip, 5013 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5014 /* 5015 * Suspend all the client devices accessible through this pHCI 5016 */ 5017 pip = ph->ph_path_head; 5018 while (pip != NULL && rv == DDI_SUCCESS) { 5019 dev_info_t *cdip; 5020 MDI_PI_LOCK(pip); 5021 next = 5022 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5023 ct = MDI_PI(pip)->pi_client; 5024 i_mdi_client_lock(ct, pip); 5025 cdip = ct->ct_dip; 5026 MDI_PI_UNLOCK(pip); 5027 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5028 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5029 i_mdi_client_unlock(ct); 5030 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5031 DDI_SUCCESS) { 5032 /* 5033 * Suspend of one of the client 5034 * device has failed. 5035 */ 5036 MDI_DEBUG(1, (CE_WARN, dip, 5037 "!Suspend of device (%s%d) failed.", 5038 ddi_driver_name(cdip), 5039 ddi_get_instance(cdip))); 5040 failed_pip = pip; 5041 break; 5042 } 5043 } else { 5044 i_mdi_client_unlock(ct); 5045 } 5046 pip = next; 5047 } 5048 5049 if (rv == DDI_SUCCESS) { 5050 /* 5051 * Suspend of client devices is complete. Proceed 5052 * with pHCI suspend. 5053 */ 5054 MDI_PHCI_SET_SUSPEND(ph); 5055 } else { 5056 /* 5057 * Revert back all the suspended client device states 5058 * to converse. 5059 */ 5060 pip = ph->ph_path_head; 5061 while (pip != failed_pip) { 5062 dev_info_t *cdip; 5063 MDI_PI_LOCK(pip); 5064 next = 5065 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5066 ct = MDI_PI(pip)->pi_client; 5067 i_mdi_client_lock(ct, pip); 5068 cdip = ct->ct_dip; 5069 MDI_PI_UNLOCK(pip); 5070 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5071 i_mdi_client_unlock(ct); 5072 (void) devi_attach(cdip, DDI_RESUME); 5073 } else { 5074 i_mdi_client_unlock(ct); 5075 } 5076 pip = next; 5077 } 5078 } 5079 break; 5080 5081 default: 5082 rv = DDI_FAILURE; 5083 break; 5084 } 5085 MDI_PHCI_UNLOCK(ph); 5086 return (rv); 5087 } 5088 5089 /*ARGSUSED*/ 5090 static int 5091 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5092 { 5093 int rv = DDI_SUCCESS; 5094 mdi_client_t *ct; 5095 5096 ct = i_devi_get_client(dip); 5097 if (ct == NULL) { 5098 return (rv); 5099 } 5100 5101 MDI_CLIENT_LOCK(ct); 5102 switch (cmd) { 5103 case DDI_DETACH: 5104 MDI_DEBUG(2, (CE_NOTE, dip, 5105 "!Client pre_detach: called %p\n", (void *)ct)); 5106 MDI_CLIENT_SET_DETACH(ct); 5107 break; 5108 5109 case DDI_SUSPEND: 5110 MDI_DEBUG(2, (CE_NOTE, dip, 5111 "!Client pre_suspend: called %p\n", (void *)ct)); 5112 MDI_CLIENT_SET_SUSPEND(ct); 5113 break; 5114 5115 default: 5116 rv = DDI_FAILURE; 5117 break; 5118 } 5119 MDI_CLIENT_UNLOCK(ct); 5120 return (rv); 5121 } 5122 5123 /* 5124 * mdi_post_detach(): 5125 * Post detach notification handler 5126 */ 5127 /*ARGSUSED*/ 5128 void 5129 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5130 { 5131 /* 5132 * Detach/Suspend of mpxio component failed. Update our state 5133 * too 5134 */ 5135 if (MDI_PHCI(dip)) 5136 i_mdi_phci_post_detach(dip, cmd, error); 5137 5138 if (MDI_CLIENT(dip)) 5139 i_mdi_client_post_detach(dip, cmd, error); 5140 } 5141 5142 /*ARGSUSED*/ 5143 static void 5144 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5145 { 5146 mdi_phci_t *ph; 5147 5148 /* 5149 * Detach/Suspend of phci component failed. Update our state 5150 * too 5151 */ 5152 ph = i_devi_get_phci(dip); 5153 if (ph == NULL) { 5154 return; 5155 } 5156 5157 MDI_PHCI_LOCK(ph); 5158 /* 5159 * Detach of pHCI failed. Restore back converse 5160 * state 5161 */ 5162 switch (cmd) { 5163 case DDI_DETACH: 5164 MDI_DEBUG(2, (CE_NOTE, dip, 5165 "!pHCI post_detach: called %p\n", (void *)ph)); 5166 if (error != DDI_SUCCESS) 5167 MDI_PHCI_SET_ATTACH(ph); 5168 break; 5169 5170 case DDI_SUSPEND: 5171 MDI_DEBUG(2, (CE_NOTE, dip, 5172 "!pHCI post_suspend: called %p\n", (void *)ph)); 5173 if (error != DDI_SUCCESS) 5174 MDI_PHCI_SET_RESUME(ph); 5175 break; 5176 } 5177 MDI_PHCI_UNLOCK(ph); 5178 } 5179 5180 /*ARGSUSED*/ 5181 static void 5182 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5183 { 5184 mdi_client_t *ct; 5185 5186 ct = i_devi_get_client(dip); 5187 if (ct == NULL) { 5188 return; 5189 } 5190 MDI_CLIENT_LOCK(ct); 5191 /* 5192 * Detach of Client failed. Restore back converse 5193 * state 5194 */ 5195 switch (cmd) { 5196 case DDI_DETACH: 5197 MDI_DEBUG(2, (CE_NOTE, dip, 5198 "!Client post_detach: called %p\n", (void *)ct)); 5199 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5200 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5201 "i_mdi_pm_rele_client\n")); 5202 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5203 } else { 5204 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5205 "i_mdi_pm_reset_client\n")); 5206 i_mdi_pm_reset_client(ct); 5207 } 5208 if (error != DDI_SUCCESS) 5209 MDI_CLIENT_SET_ATTACH(ct); 5210 break; 5211 5212 case DDI_SUSPEND: 5213 MDI_DEBUG(2, (CE_NOTE, dip, 5214 "!Client post_suspend: called %p\n", (void *)ct)); 5215 if (error != DDI_SUCCESS) 5216 MDI_CLIENT_SET_RESUME(ct); 5217 break; 5218 } 5219 MDI_CLIENT_UNLOCK(ct); 5220 } 5221 5222 int 5223 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5224 { 5225 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5226 } 5227 5228 /* 5229 * create and install per-path (client - pHCI) statistics 5230 * I/O stats supported: nread, nwritten, reads, and writes 5231 * Error stats - hard errors, soft errors, & transport errors 5232 */ 5233 int 5234 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5235 { 5236 kstat_t *kiosp, *kerrsp; 5237 struct pi_errs *nsp; 5238 struct mdi_pi_kstats *mdi_statp; 5239 5240 if (MDI_PI(pip)->pi_kstats != NULL) 5241 return (MDI_SUCCESS); 5242 5243 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5244 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5245 return (MDI_FAILURE); 5246 } 5247 5248 (void) strcat(ksname, ",err"); 5249 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5250 KSTAT_TYPE_NAMED, 5251 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5252 if (kerrsp == NULL) { 5253 kstat_delete(kiosp); 5254 return (MDI_FAILURE); 5255 } 5256 5257 nsp = (struct pi_errs *)kerrsp->ks_data; 5258 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5259 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5260 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5261 KSTAT_DATA_UINT32); 5262 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5263 KSTAT_DATA_UINT32); 5264 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5265 KSTAT_DATA_UINT32); 5266 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5267 KSTAT_DATA_UINT32); 5268 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5269 KSTAT_DATA_UINT32); 5270 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5271 KSTAT_DATA_UINT32); 5272 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5273 KSTAT_DATA_UINT32); 5274 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5275 5276 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5277 mdi_statp->pi_kstat_ref = 1; 5278 mdi_statp->pi_kstat_iostats = kiosp; 5279 mdi_statp->pi_kstat_errstats = kerrsp; 5280 kstat_install(kiosp); 5281 kstat_install(kerrsp); 5282 MDI_PI(pip)->pi_kstats = mdi_statp; 5283 return (MDI_SUCCESS); 5284 } 5285 5286 /* 5287 * destroy per-path properties 5288 */ 5289 static void 5290 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5291 { 5292 5293 struct mdi_pi_kstats *mdi_statp; 5294 5295 if (MDI_PI(pip)->pi_kstats == NULL) 5296 return; 5297 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5298 return; 5299 5300 MDI_PI(pip)->pi_kstats = NULL; 5301 5302 /* 5303 * the kstat may be shared between multiple pathinfo nodes 5304 * decrement this pathinfo's usage, removing the kstats 5305 * themselves when the last pathinfo reference is removed. 5306 */ 5307 ASSERT(mdi_statp->pi_kstat_ref > 0); 5308 if (--mdi_statp->pi_kstat_ref != 0) 5309 return; 5310 5311 kstat_delete(mdi_statp->pi_kstat_iostats); 5312 kstat_delete(mdi_statp->pi_kstat_errstats); 5313 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5314 } 5315 5316 /* 5317 * update I/O paths KSTATS 5318 */ 5319 void 5320 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5321 { 5322 kstat_t *iostatp; 5323 size_t xfer_cnt; 5324 5325 ASSERT(pip != NULL); 5326 5327 /* 5328 * I/O can be driven across a path prior to having path 5329 * statistics available, i.e. probe(9e). 5330 */ 5331 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5332 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5333 xfer_cnt = bp->b_bcount - bp->b_resid; 5334 if (bp->b_flags & B_READ) { 5335 KSTAT_IO_PTR(iostatp)->reads++; 5336 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5337 } else { 5338 KSTAT_IO_PTR(iostatp)->writes++; 5339 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5340 } 5341 } 5342 } 5343 5344 /* 5345 * Enable the path(specific client/target/initiator) 5346 * Enabling a path means that MPxIO may select the enabled path for routing 5347 * future I/O requests, subject to other path state constraints. 5348 */ 5349 int 5350 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5351 { 5352 mdi_phci_t *ph; 5353 5354 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5355 if (ph == NULL) { 5356 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5357 " failed. pip: %p ph = NULL\n", (void *)pip)); 5358 return (MDI_FAILURE); 5359 } 5360 5361 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5362 MDI_ENABLE_OP); 5363 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5364 " Returning success pip = %p. ph = %p\n", 5365 (void *)pip, (void *)ph)); 5366 return (MDI_SUCCESS); 5367 5368 } 5369 5370 /* 5371 * Disable the path (specific client/target/initiator) 5372 * Disabling a path means that MPxIO will not select the disabled path for 5373 * routing any new I/O requests. 5374 */ 5375 int 5376 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5377 { 5378 mdi_phci_t *ph; 5379 5380 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5381 if (ph == NULL) { 5382 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5383 " failed. pip: %p ph = NULL\n", (void *)pip)); 5384 return (MDI_FAILURE); 5385 } 5386 5387 (void) i_mdi_enable_disable_path(pip, 5388 ph->ph_vhci, flags, MDI_DISABLE_OP); 5389 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5390 "Returning success pip = %p. ph = %p", 5391 (void *)pip, (void *)ph)); 5392 return (MDI_SUCCESS); 5393 } 5394 5395 /* 5396 * disable the path to a particular pHCI (pHCI specified in the phci_path 5397 * argument) for a particular client (specified in the client_path argument). 5398 * Disabling a path means that MPxIO will not select the disabled path for 5399 * routing any new I/O requests. 5400 * NOTE: this will be removed once the NWS files are changed to use the new 5401 * mdi_{enable,disable}_path interfaces 5402 */ 5403 int 5404 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5405 { 5406 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5407 } 5408 5409 /* 5410 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5411 * argument) for a particular client (specified in the client_path argument). 5412 * Enabling a path means that MPxIO may select the enabled path for routing 5413 * future I/O requests, subject to other path state constraints. 5414 * NOTE: this will be removed once the NWS files are changed to use the new 5415 * mdi_{enable,disable}_path interfaces 5416 */ 5417 5418 int 5419 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5420 { 5421 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5422 } 5423 5424 /* 5425 * Common routine for doing enable/disable. 5426 */ 5427 static mdi_pathinfo_t * 5428 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5429 int op) 5430 { 5431 int sync_flag = 0; 5432 int rv; 5433 mdi_pathinfo_t *next; 5434 int (*f)() = NULL; 5435 5436 f = vh->vh_ops->vo_pi_state_change; 5437 5438 sync_flag = (flags << 8) & 0xf00; 5439 5440 /* 5441 * Do a callback into the mdi consumer to let it 5442 * know that path is about to get enabled/disabled. 5443 */ 5444 if (f != NULL) { 5445 rv = (*f)(vh->vh_dip, pip, 0, 5446 MDI_PI_EXT_STATE(pip), 5447 MDI_EXT_STATE_CHANGE | sync_flag | 5448 op | MDI_BEFORE_STATE_CHANGE); 5449 if (rv != MDI_SUCCESS) { 5450 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5451 "!vo_pi_state_change: failed rv = %x", rv)); 5452 } 5453 } 5454 MDI_PI_LOCK(pip); 5455 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5456 5457 switch (flags) { 5458 case USER_DISABLE: 5459 if (op == MDI_DISABLE_OP) { 5460 MDI_PI_SET_USER_DISABLE(pip); 5461 } else { 5462 MDI_PI_SET_USER_ENABLE(pip); 5463 } 5464 break; 5465 case DRIVER_DISABLE: 5466 if (op == MDI_DISABLE_OP) { 5467 MDI_PI_SET_DRV_DISABLE(pip); 5468 } else { 5469 MDI_PI_SET_DRV_ENABLE(pip); 5470 } 5471 break; 5472 case DRIVER_DISABLE_TRANSIENT: 5473 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5474 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5475 } else { 5476 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5477 } 5478 break; 5479 } 5480 MDI_PI_UNLOCK(pip); 5481 /* 5482 * Do a callback into the mdi consumer to let it 5483 * know that path is now enabled/disabled. 5484 */ 5485 if (f != NULL) { 5486 rv = (*f)(vh->vh_dip, pip, 0, 5487 MDI_PI_EXT_STATE(pip), 5488 MDI_EXT_STATE_CHANGE | sync_flag | 5489 op | MDI_AFTER_STATE_CHANGE); 5490 if (rv != MDI_SUCCESS) { 5491 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5492 "!vo_pi_state_change: failed rv = %x", rv)); 5493 } 5494 } 5495 return (next); 5496 } 5497 5498 /* 5499 * Common routine for doing enable/disable. 5500 * NOTE: this will be removed once the NWS files are changed to use the new 5501 * mdi_{enable,disable}_path has been putback 5502 */ 5503 int 5504 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5505 { 5506 5507 mdi_phci_t *ph; 5508 mdi_vhci_t *vh = NULL; 5509 mdi_client_t *ct; 5510 mdi_pathinfo_t *next, *pip; 5511 int found_it; 5512 5513 ph = i_devi_get_phci(pdip); 5514 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5515 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5516 (void *)cdip)); 5517 if (ph == NULL) { 5518 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5519 "Op %d failed. ph = NULL\n", op)); 5520 return (MDI_FAILURE); 5521 } 5522 5523 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5524 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5525 "Op Invalid operation = %d\n", op)); 5526 return (MDI_FAILURE); 5527 } 5528 5529 vh = ph->ph_vhci; 5530 5531 if (cdip == NULL) { 5532 /* 5533 * Need to mark the Phci as enabled/disabled. 5534 */ 5535 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5536 "Op %d for the phci\n", op)); 5537 MDI_PHCI_LOCK(ph); 5538 switch (flags) { 5539 case USER_DISABLE: 5540 if (op == MDI_DISABLE_OP) { 5541 MDI_PHCI_SET_USER_DISABLE(ph); 5542 } else { 5543 MDI_PHCI_SET_USER_ENABLE(ph); 5544 } 5545 break; 5546 case DRIVER_DISABLE: 5547 if (op == MDI_DISABLE_OP) { 5548 MDI_PHCI_SET_DRV_DISABLE(ph); 5549 } else { 5550 MDI_PHCI_SET_DRV_ENABLE(ph); 5551 } 5552 break; 5553 case DRIVER_DISABLE_TRANSIENT: 5554 if (op == MDI_DISABLE_OP) { 5555 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5556 } else { 5557 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5558 } 5559 break; 5560 default: 5561 MDI_PHCI_UNLOCK(ph); 5562 MDI_DEBUG(1, (CE_NOTE, NULL, 5563 "!i_mdi_pi_enable_disable:" 5564 " Invalid flag argument= %d\n", flags)); 5565 } 5566 5567 /* 5568 * Phci has been disabled. Now try to enable/disable 5569 * path info's to each client. 5570 */ 5571 pip = ph->ph_path_head; 5572 while (pip != NULL) { 5573 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5574 } 5575 MDI_PHCI_UNLOCK(ph); 5576 } else { 5577 5578 /* 5579 * Disable a specific client. 5580 */ 5581 ct = i_devi_get_client(cdip); 5582 if (ct == NULL) { 5583 MDI_DEBUG(1, (CE_NOTE, NULL, 5584 "!i_mdi_pi_enable_disable:" 5585 " failed. ct = NULL operation = %d\n", op)); 5586 return (MDI_FAILURE); 5587 } 5588 5589 MDI_CLIENT_LOCK(ct); 5590 pip = ct->ct_path_head; 5591 found_it = 0; 5592 while (pip != NULL) { 5593 MDI_PI_LOCK(pip); 5594 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5595 if (MDI_PI(pip)->pi_phci == ph) { 5596 MDI_PI_UNLOCK(pip); 5597 found_it = 1; 5598 break; 5599 } 5600 MDI_PI_UNLOCK(pip); 5601 pip = next; 5602 } 5603 5604 5605 MDI_CLIENT_UNLOCK(ct); 5606 if (found_it == 0) { 5607 MDI_DEBUG(1, (CE_NOTE, NULL, 5608 "!i_mdi_pi_enable_disable:" 5609 " failed. Could not find corresponding pip\n")); 5610 return (MDI_FAILURE); 5611 } 5612 5613 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5614 } 5615 5616 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5617 "Op %d Returning success pdip = %p cdip = %p\n", 5618 op, (void *)pdip, (void *)cdip)); 5619 return (MDI_SUCCESS); 5620 } 5621 5622 /* 5623 * Ensure phci powered up 5624 */ 5625 static void 5626 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5627 { 5628 dev_info_t *ph_dip; 5629 5630 ASSERT(pip != NULL); 5631 ASSERT(MDI_PI_LOCKED(pip)); 5632 5633 if (MDI_PI(pip)->pi_pm_held) { 5634 return; 5635 } 5636 5637 ph_dip = mdi_pi_get_phci(pip); 5638 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5639 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5640 if (ph_dip == NULL) { 5641 return; 5642 } 5643 5644 MDI_PI_UNLOCK(pip); 5645 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5646 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5647 5648 pm_hold_power(ph_dip); 5649 5650 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5651 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5652 MDI_PI_LOCK(pip); 5653 5654 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 5655 if (DEVI(ph_dip)->devi_pm_info) 5656 MDI_PI(pip)->pi_pm_held = 1; 5657 } 5658 5659 /* 5660 * Allow phci powered down 5661 */ 5662 static void 5663 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5664 { 5665 dev_info_t *ph_dip = NULL; 5666 5667 ASSERT(pip != NULL); 5668 ASSERT(MDI_PI_LOCKED(pip)); 5669 5670 if (MDI_PI(pip)->pi_pm_held == 0) { 5671 return; 5672 } 5673 5674 ph_dip = mdi_pi_get_phci(pip); 5675 ASSERT(ph_dip != NULL); 5676 5677 MDI_PI_UNLOCK(pip); 5678 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5679 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5680 5681 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5682 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5683 pm_rele_power(ph_dip); 5684 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5685 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5686 5687 MDI_PI_LOCK(pip); 5688 MDI_PI(pip)->pi_pm_held = 0; 5689 } 5690 5691 static void 5692 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5693 { 5694 ASSERT(MDI_CLIENT_LOCKED(ct)); 5695 5696 ct->ct_power_cnt += incr; 5697 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5698 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5699 ct->ct_power_cnt, incr)); 5700 ASSERT(ct->ct_power_cnt >= 0); 5701 } 5702 5703 static void 5704 i_mdi_rele_all_phci(mdi_client_t *ct) 5705 { 5706 mdi_pathinfo_t *pip; 5707 5708 ASSERT(MDI_CLIENT_LOCKED(ct)); 5709 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5710 while (pip != NULL) { 5711 mdi_hold_path(pip); 5712 MDI_PI_LOCK(pip); 5713 i_mdi_pm_rele_pip(pip); 5714 MDI_PI_UNLOCK(pip); 5715 mdi_rele_path(pip); 5716 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5717 } 5718 } 5719 5720 static void 5721 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5722 { 5723 ASSERT(MDI_CLIENT_LOCKED(ct)); 5724 5725 if (i_ddi_devi_attached(ct->ct_dip)) { 5726 ct->ct_power_cnt -= decr; 5727 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 5728 "ct_power_cnt = %d decr = %d\n", 5729 (void *)ct, ct->ct_power_cnt, decr)); 5730 } 5731 5732 ASSERT(ct->ct_power_cnt >= 0); 5733 if (ct->ct_power_cnt == 0) { 5734 i_mdi_rele_all_phci(ct); 5735 return; 5736 } 5737 } 5738 5739 static void 5740 i_mdi_pm_reset_client(mdi_client_t *ct) 5741 { 5742 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 5743 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 5744 ASSERT(MDI_CLIENT_LOCKED(ct)); 5745 ct->ct_power_cnt = 0; 5746 i_mdi_rele_all_phci(ct); 5747 ct->ct_powercnt_config = 0; 5748 ct->ct_powercnt_unconfig = 0; 5749 ct->ct_powercnt_reset = 1; 5750 } 5751 5752 static int 5753 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5754 { 5755 int ret; 5756 dev_info_t *ph_dip; 5757 5758 MDI_PI_LOCK(pip); 5759 i_mdi_pm_hold_pip(pip); 5760 5761 ph_dip = mdi_pi_get_phci(pip); 5762 MDI_PI_UNLOCK(pip); 5763 5764 /* bring all components of phci to full power */ 5765 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5766 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 5767 ddi_get_instance(ph_dip), (void *)pip)); 5768 5769 ret = pm_powerup(ph_dip); 5770 5771 if (ret == DDI_FAILURE) { 5772 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5773 "pm_powerup FAILED for %s%d %p\n", 5774 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 5775 (void *)pip)); 5776 5777 MDI_PI_LOCK(pip); 5778 i_mdi_pm_rele_pip(pip); 5779 MDI_PI_UNLOCK(pip); 5780 return (MDI_FAILURE); 5781 } 5782 5783 return (MDI_SUCCESS); 5784 } 5785 5786 static int 5787 i_mdi_power_all_phci(mdi_client_t *ct) 5788 { 5789 mdi_pathinfo_t *pip; 5790 int succeeded = 0; 5791 5792 ASSERT(MDI_CLIENT_LOCKED(ct)); 5793 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5794 while (pip != NULL) { 5795 /* 5796 * Don't power if MDI_PATHINFO_STATE_FAULT 5797 * or MDI_PATHINFO_STATE_OFFLINE. 5798 */ 5799 if (MDI_PI_IS_INIT(pip) || 5800 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 5801 mdi_hold_path(pip); 5802 MDI_CLIENT_UNLOCK(ct); 5803 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5804 succeeded = 1; 5805 5806 ASSERT(ct == MDI_PI(pip)->pi_client); 5807 MDI_CLIENT_LOCK(ct); 5808 mdi_rele_path(pip); 5809 } 5810 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5811 } 5812 5813 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5814 } 5815 5816 /* 5817 * mdi_bus_power(): 5818 * 1. Place the phci(s) into powered up state so that 5819 * client can do power management 5820 * 2. Ensure phci powered up as client power managing 5821 * Return Values: 5822 * MDI_SUCCESS 5823 * MDI_FAILURE 5824 */ 5825 int 5826 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5827 void *arg, void *result) 5828 { 5829 int ret = MDI_SUCCESS; 5830 pm_bp_child_pwrchg_t *bpc; 5831 mdi_client_t *ct; 5832 dev_info_t *cdip; 5833 pm_bp_has_changed_t *bphc; 5834 5835 /* 5836 * BUS_POWER_NOINVOL not supported 5837 */ 5838 if (op == BUS_POWER_NOINVOL) 5839 return (MDI_FAILURE); 5840 5841 /* 5842 * ignore other OPs. 5843 * return quickly to save cou cycles on the ct processing 5844 */ 5845 switch (op) { 5846 case BUS_POWER_PRE_NOTIFICATION: 5847 case BUS_POWER_POST_NOTIFICATION: 5848 bpc = (pm_bp_child_pwrchg_t *)arg; 5849 cdip = bpc->bpc_dip; 5850 break; 5851 case BUS_POWER_HAS_CHANGED: 5852 bphc = (pm_bp_has_changed_t *)arg; 5853 cdip = bphc->bphc_dip; 5854 break; 5855 default: 5856 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5857 } 5858 5859 ASSERT(MDI_CLIENT(cdip)); 5860 5861 ct = i_devi_get_client(cdip); 5862 if (ct == NULL) 5863 return (MDI_FAILURE); 5864 5865 /* 5866 * wait till the mdi_pathinfo node state change are processed 5867 */ 5868 MDI_CLIENT_LOCK(ct); 5869 switch (op) { 5870 case BUS_POWER_PRE_NOTIFICATION: 5871 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5872 "BUS_POWER_PRE_NOTIFICATION:" 5873 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5874 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5875 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5876 5877 /* serialize power level change per client */ 5878 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5879 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5880 5881 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5882 5883 if (ct->ct_power_cnt == 0) { 5884 ret = i_mdi_power_all_phci(ct); 5885 } 5886 5887 /* 5888 * if new_level > 0: 5889 * - hold phci(s) 5890 * - power up phci(s) if not already 5891 * ignore power down 5892 */ 5893 if (bpc->bpc_nlevel > 0) { 5894 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5895 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5896 "mdi_bus_power i_mdi_pm_hold_client\n")); 5897 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5898 } 5899 } 5900 break; 5901 case BUS_POWER_POST_NOTIFICATION: 5902 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5903 "BUS_POWER_POST_NOTIFICATION:" 5904 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5905 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5906 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5907 *(int *)result)); 5908 5909 if (*(int *)result == DDI_SUCCESS) { 5910 if (bpc->bpc_nlevel > 0) { 5911 MDI_CLIENT_SET_POWER_UP(ct); 5912 } else { 5913 MDI_CLIENT_SET_POWER_DOWN(ct); 5914 } 5915 } 5916 5917 /* release the hold we did in pre-notification */ 5918 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5919 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5920 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5921 "mdi_bus_power i_mdi_pm_rele_client\n")); 5922 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5923 } 5924 5925 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5926 /* another thread might started attaching */ 5927 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5928 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5929 "mdi_bus_power i_mdi_pm_rele_client\n")); 5930 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5931 /* detaching has been taken care in pm_post_unconfig */ 5932 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5933 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5934 "mdi_bus_power i_mdi_pm_reset_client\n")); 5935 i_mdi_pm_reset_client(ct); 5936 } 5937 } 5938 5939 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5940 cv_broadcast(&ct->ct_powerchange_cv); 5941 5942 break; 5943 5944 /* need to do more */ 5945 case BUS_POWER_HAS_CHANGED: 5946 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5947 "BUS_POWER_HAS_CHANGED:" 5948 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5949 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5950 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5951 5952 if (bphc->bphc_nlevel > 0 && 5953 bphc->bphc_nlevel > bphc->bphc_olevel) { 5954 if (ct->ct_power_cnt == 0) { 5955 ret = i_mdi_power_all_phci(ct); 5956 } 5957 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5958 "mdi_bus_power i_mdi_pm_hold_client\n")); 5959 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5960 } 5961 5962 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5963 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5964 "mdi_bus_power i_mdi_pm_rele_client\n")); 5965 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5966 } 5967 break; 5968 } 5969 5970 MDI_CLIENT_UNLOCK(ct); 5971 return (ret); 5972 } 5973 5974 static int 5975 i_mdi_pm_pre_config_one(dev_info_t *child) 5976 { 5977 int ret = MDI_SUCCESS; 5978 mdi_client_t *ct; 5979 5980 ct = i_devi_get_client(child); 5981 if (ct == NULL) 5982 return (MDI_FAILURE); 5983 5984 MDI_CLIENT_LOCK(ct); 5985 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5986 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5987 5988 if (!MDI_CLIENT_IS_FAILED(ct)) { 5989 MDI_CLIENT_UNLOCK(ct); 5990 MDI_DEBUG(4, (CE_NOTE, child, 5991 "i_mdi_pm_pre_config_one already configured\n")); 5992 return (MDI_SUCCESS); 5993 } 5994 5995 if (ct->ct_powercnt_config) { 5996 MDI_CLIENT_UNLOCK(ct); 5997 MDI_DEBUG(4, (CE_NOTE, child, 5998 "i_mdi_pm_pre_config_one ALREADY held\n")); 5999 return (MDI_SUCCESS); 6000 } 6001 6002 if (ct->ct_power_cnt == 0) { 6003 ret = i_mdi_power_all_phci(ct); 6004 } 6005 MDI_DEBUG(4, (CE_NOTE, child, 6006 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6007 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6008 ct->ct_powercnt_config = 1; 6009 ct->ct_powercnt_reset = 0; 6010 MDI_CLIENT_UNLOCK(ct); 6011 return (ret); 6012 } 6013 6014 static int 6015 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6016 { 6017 int ret = MDI_SUCCESS; 6018 dev_info_t *cdip; 6019 int circ; 6020 6021 ASSERT(MDI_VHCI(vdip)); 6022 6023 /* ndi_devi_config_one */ 6024 if (child) { 6025 ASSERT(DEVI_BUSY_OWNED(vdip)); 6026 return (i_mdi_pm_pre_config_one(child)); 6027 } 6028 6029 /* devi_config_common */ 6030 ndi_devi_enter(vdip, &circ); 6031 cdip = ddi_get_child(vdip); 6032 while (cdip) { 6033 dev_info_t *next = ddi_get_next_sibling(cdip); 6034 6035 ret = i_mdi_pm_pre_config_one(cdip); 6036 if (ret != MDI_SUCCESS) 6037 break; 6038 cdip = next; 6039 } 6040 ndi_devi_exit(vdip, circ); 6041 return (ret); 6042 } 6043 6044 static int 6045 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6046 { 6047 int ret = MDI_SUCCESS; 6048 mdi_client_t *ct; 6049 6050 ct = i_devi_get_client(child); 6051 if (ct == NULL) 6052 return (MDI_FAILURE); 6053 6054 MDI_CLIENT_LOCK(ct); 6055 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6056 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6057 6058 if (!i_ddi_devi_attached(ct->ct_dip)) { 6059 MDI_DEBUG(4, (CE_NOTE, child, 6060 "i_mdi_pm_pre_unconfig node detached already\n")); 6061 MDI_CLIENT_UNLOCK(ct); 6062 return (MDI_SUCCESS); 6063 } 6064 6065 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6066 (flags & NDI_AUTODETACH)) { 6067 MDI_DEBUG(4, (CE_NOTE, child, 6068 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6069 MDI_CLIENT_UNLOCK(ct); 6070 return (MDI_FAILURE); 6071 } 6072 6073 if (ct->ct_powercnt_unconfig) { 6074 MDI_DEBUG(4, (CE_NOTE, child, 6075 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6076 MDI_CLIENT_UNLOCK(ct); 6077 *held = 1; 6078 return (MDI_SUCCESS); 6079 } 6080 6081 if (ct->ct_power_cnt == 0) { 6082 ret = i_mdi_power_all_phci(ct); 6083 } 6084 MDI_DEBUG(4, (CE_NOTE, child, 6085 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6086 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6087 ct->ct_powercnt_unconfig = 1; 6088 ct->ct_powercnt_reset = 0; 6089 MDI_CLIENT_UNLOCK(ct); 6090 if (ret == MDI_SUCCESS) 6091 *held = 1; 6092 return (ret); 6093 } 6094 6095 static int 6096 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6097 int flags) 6098 { 6099 int ret = MDI_SUCCESS; 6100 dev_info_t *cdip; 6101 int circ; 6102 6103 ASSERT(MDI_VHCI(vdip)); 6104 *held = 0; 6105 6106 /* ndi_devi_unconfig_one */ 6107 if (child) { 6108 ASSERT(DEVI_BUSY_OWNED(vdip)); 6109 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6110 } 6111 6112 /* devi_unconfig_common */ 6113 ndi_devi_enter(vdip, &circ); 6114 cdip = ddi_get_child(vdip); 6115 while (cdip) { 6116 dev_info_t *next = ddi_get_next_sibling(cdip); 6117 6118 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6119 cdip = next; 6120 } 6121 ndi_devi_exit(vdip, circ); 6122 6123 if (*held) 6124 ret = MDI_SUCCESS; 6125 6126 return (ret); 6127 } 6128 6129 static void 6130 i_mdi_pm_post_config_one(dev_info_t *child) 6131 { 6132 mdi_client_t *ct; 6133 6134 ct = i_devi_get_client(child); 6135 if (ct == NULL) 6136 return; 6137 6138 MDI_CLIENT_LOCK(ct); 6139 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6140 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6141 6142 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6143 MDI_DEBUG(4, (CE_NOTE, child, 6144 "i_mdi_pm_post_config_one NOT configured\n")); 6145 MDI_CLIENT_UNLOCK(ct); 6146 return; 6147 } 6148 6149 /* client has not been updated */ 6150 if (MDI_CLIENT_IS_FAILED(ct)) { 6151 MDI_DEBUG(4, (CE_NOTE, child, 6152 "i_mdi_pm_post_config_one NOT configured\n")); 6153 MDI_CLIENT_UNLOCK(ct); 6154 return; 6155 } 6156 6157 /* another thread might have powered it down or detached it */ 6158 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6159 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6160 (!i_ddi_devi_attached(ct->ct_dip) && 6161 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6162 MDI_DEBUG(4, (CE_NOTE, child, 6163 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6164 i_mdi_pm_reset_client(ct); 6165 } else { 6166 mdi_pathinfo_t *pip, *next; 6167 int valid_path_count = 0; 6168 6169 MDI_DEBUG(4, (CE_NOTE, child, 6170 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6171 pip = ct->ct_path_head; 6172 while (pip != NULL) { 6173 MDI_PI_LOCK(pip); 6174 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6175 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6176 valid_path_count ++; 6177 MDI_PI_UNLOCK(pip); 6178 pip = next; 6179 } 6180 i_mdi_pm_rele_client(ct, valid_path_count); 6181 } 6182 ct->ct_powercnt_config = 0; 6183 MDI_CLIENT_UNLOCK(ct); 6184 } 6185 6186 static void 6187 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6188 { 6189 int circ; 6190 dev_info_t *cdip; 6191 6192 ASSERT(MDI_VHCI(vdip)); 6193 6194 /* ndi_devi_config_one */ 6195 if (child) { 6196 ASSERT(DEVI_BUSY_OWNED(vdip)); 6197 i_mdi_pm_post_config_one(child); 6198 return; 6199 } 6200 6201 /* devi_config_common */ 6202 ndi_devi_enter(vdip, &circ); 6203 cdip = ddi_get_child(vdip); 6204 while (cdip) { 6205 dev_info_t *next = ddi_get_next_sibling(cdip); 6206 6207 i_mdi_pm_post_config_one(cdip); 6208 cdip = next; 6209 } 6210 ndi_devi_exit(vdip, circ); 6211 } 6212 6213 static void 6214 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6215 { 6216 mdi_client_t *ct; 6217 6218 ct = i_devi_get_client(child); 6219 if (ct == NULL) 6220 return; 6221 6222 MDI_CLIENT_LOCK(ct); 6223 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6224 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6225 6226 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6227 MDI_DEBUG(4, (CE_NOTE, child, 6228 "i_mdi_pm_post_unconfig NOT held\n")); 6229 MDI_CLIENT_UNLOCK(ct); 6230 return; 6231 } 6232 6233 /* failure detaching or another thread just attached it */ 6234 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6235 i_ddi_devi_attached(ct->ct_dip)) || 6236 (!i_ddi_devi_attached(ct->ct_dip) && 6237 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6238 MDI_DEBUG(4, (CE_NOTE, child, 6239 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6240 i_mdi_pm_reset_client(ct); 6241 } else { 6242 mdi_pathinfo_t *pip, *next; 6243 int valid_path_count = 0; 6244 6245 MDI_DEBUG(4, (CE_NOTE, child, 6246 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6247 pip = ct->ct_path_head; 6248 while (pip != NULL) { 6249 MDI_PI_LOCK(pip); 6250 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6251 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6252 valid_path_count ++; 6253 MDI_PI_UNLOCK(pip); 6254 pip = next; 6255 } 6256 i_mdi_pm_rele_client(ct, valid_path_count); 6257 ct->ct_powercnt_unconfig = 0; 6258 } 6259 6260 MDI_CLIENT_UNLOCK(ct); 6261 } 6262 6263 static void 6264 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6265 { 6266 int circ; 6267 dev_info_t *cdip; 6268 6269 ASSERT(MDI_VHCI(vdip)); 6270 6271 if (!held) { 6272 MDI_DEBUG(4, (CE_NOTE, vdip, 6273 "i_mdi_pm_post_unconfig held = %d\n", held)); 6274 return; 6275 } 6276 6277 if (child) { 6278 ASSERT(DEVI_BUSY_OWNED(vdip)); 6279 i_mdi_pm_post_unconfig_one(child); 6280 return; 6281 } 6282 6283 ndi_devi_enter(vdip, &circ); 6284 cdip = ddi_get_child(vdip); 6285 while (cdip) { 6286 dev_info_t *next = ddi_get_next_sibling(cdip); 6287 6288 i_mdi_pm_post_unconfig_one(cdip); 6289 cdip = next; 6290 } 6291 ndi_devi_exit(vdip, circ); 6292 } 6293 6294 int 6295 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6296 { 6297 int circ, ret = MDI_SUCCESS; 6298 dev_info_t *client_dip = NULL; 6299 mdi_client_t *ct; 6300 6301 /* 6302 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6303 * Power up pHCI for the named client device. 6304 * Note: Before the client is enumerated under vhci by phci, 6305 * client_dip can be NULL. Then proceed to power up all the 6306 * pHCIs. 6307 */ 6308 if (devnm != NULL) { 6309 ndi_devi_enter(vdip, &circ); 6310 client_dip = ndi_devi_findchild(vdip, devnm); 6311 } 6312 6313 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6314 op, devnm ? devnm : "NULL", (void *)client_dip)); 6315 6316 switch (op) { 6317 case MDI_PM_PRE_CONFIG: 6318 ret = i_mdi_pm_pre_config(vdip, client_dip); 6319 break; 6320 6321 case MDI_PM_PRE_UNCONFIG: 6322 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6323 flags); 6324 break; 6325 6326 case MDI_PM_POST_CONFIG: 6327 i_mdi_pm_post_config(vdip, client_dip); 6328 break; 6329 6330 case MDI_PM_POST_UNCONFIG: 6331 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6332 break; 6333 6334 case MDI_PM_HOLD_POWER: 6335 case MDI_PM_RELE_POWER: 6336 ASSERT(args); 6337 6338 client_dip = (dev_info_t *)args; 6339 ASSERT(MDI_CLIENT(client_dip)); 6340 6341 ct = i_devi_get_client(client_dip); 6342 MDI_CLIENT_LOCK(ct); 6343 6344 if (op == MDI_PM_HOLD_POWER) { 6345 if (ct->ct_power_cnt == 0) { 6346 (void) i_mdi_power_all_phci(ct); 6347 MDI_DEBUG(4, (CE_NOTE, client_dip, 6348 "mdi_power i_mdi_pm_hold_client\n")); 6349 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6350 } 6351 } else { 6352 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6353 MDI_DEBUG(4, (CE_NOTE, client_dip, 6354 "mdi_power i_mdi_pm_rele_client\n")); 6355 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6356 } else { 6357 MDI_DEBUG(4, (CE_NOTE, client_dip, 6358 "mdi_power i_mdi_pm_reset_client\n")); 6359 i_mdi_pm_reset_client(ct); 6360 } 6361 } 6362 6363 MDI_CLIENT_UNLOCK(ct); 6364 break; 6365 6366 default: 6367 break; 6368 } 6369 6370 if (devnm) 6371 ndi_devi_exit(vdip, circ); 6372 6373 return (ret); 6374 } 6375 6376 int 6377 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6378 { 6379 mdi_vhci_t *vhci; 6380 6381 if (!MDI_VHCI(dip)) 6382 return (MDI_FAILURE); 6383 6384 if (mdi_class) { 6385 vhci = DEVI(dip)->devi_mdi_xhci; 6386 ASSERT(vhci); 6387 *mdi_class = vhci->vh_class; 6388 } 6389 6390 return (MDI_SUCCESS); 6391 } 6392 6393 int 6394 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6395 { 6396 mdi_phci_t *phci; 6397 6398 if (!MDI_PHCI(dip)) 6399 return (MDI_FAILURE); 6400 6401 if (mdi_class) { 6402 phci = DEVI(dip)->devi_mdi_xhci; 6403 ASSERT(phci); 6404 *mdi_class = phci->ph_vhci->vh_class; 6405 } 6406 6407 return (MDI_SUCCESS); 6408 } 6409 6410 int 6411 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6412 { 6413 mdi_client_t *client; 6414 6415 if (!MDI_CLIENT(dip)) 6416 return (MDI_FAILURE); 6417 6418 if (mdi_class) { 6419 client = DEVI(dip)->devi_mdi_client; 6420 ASSERT(client); 6421 *mdi_class = client->ct_vhci->vh_class; 6422 } 6423 6424 return (MDI_SUCCESS); 6425 } 6426 6427 void * 6428 mdi_client_get_vhci_private(dev_info_t *dip) 6429 { 6430 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6431 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6432 mdi_client_t *ct; 6433 ct = i_devi_get_client(dip); 6434 return (ct->ct_vprivate); 6435 } 6436 return (NULL); 6437 } 6438 6439 void 6440 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6441 { 6442 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6443 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6444 mdi_client_t *ct; 6445 ct = i_devi_get_client(dip); 6446 ct->ct_vprivate = data; 6447 } 6448 } 6449 /* 6450 * mdi_pi_get_vhci_private(): 6451 * Get the vhci private information associated with the 6452 * mdi_pathinfo node 6453 */ 6454 void * 6455 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6456 { 6457 caddr_t vprivate = NULL; 6458 if (pip) { 6459 vprivate = MDI_PI(pip)->pi_vprivate; 6460 } 6461 return (vprivate); 6462 } 6463 6464 /* 6465 * mdi_pi_set_vhci_private(): 6466 * Set the vhci private information in the mdi_pathinfo node 6467 */ 6468 void 6469 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6470 { 6471 if (pip) { 6472 MDI_PI(pip)->pi_vprivate = priv; 6473 } 6474 } 6475 6476 /* 6477 * mdi_phci_get_vhci_private(): 6478 * Get the vhci private information associated with the 6479 * mdi_phci node 6480 */ 6481 void * 6482 mdi_phci_get_vhci_private(dev_info_t *dip) 6483 { 6484 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6485 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6486 mdi_phci_t *ph; 6487 ph = i_devi_get_phci(dip); 6488 return (ph->ph_vprivate); 6489 } 6490 return (NULL); 6491 } 6492 6493 /* 6494 * mdi_phci_set_vhci_private(): 6495 * Set the vhci private information in the mdi_phci node 6496 */ 6497 void 6498 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6499 { 6500 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6501 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6502 mdi_phci_t *ph; 6503 ph = i_devi_get_phci(dip); 6504 ph->ph_vprivate = priv; 6505 } 6506 } 6507 6508 /* 6509 * List of vhci class names: 6510 * A vhci class name must be in this list only if the corresponding vhci 6511 * driver intends to use the mdi provided bus config implementation 6512 * (i.e., mdi_vhci_bus_config()). 6513 */ 6514 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6515 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6516 6517 /* 6518 * During boot time, the on-disk vhci cache for every vhci class is read 6519 * in the form of an nvlist and stored here. 6520 */ 6521 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6522 6523 /* nvpair names in vhci cache nvlist */ 6524 #define MDI_VHCI_CACHE_VERSION 1 6525 #define MDI_NVPNAME_VERSION "version" 6526 #define MDI_NVPNAME_PHCIS "phcis" 6527 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6528 6529 /* 6530 * Given vhci class name, return its on-disk vhci cache filename. 6531 * Memory for the returned filename which includes the full path is allocated 6532 * by this function. 6533 */ 6534 static char * 6535 vhclass2vhcache_filename(char *vhclass) 6536 { 6537 char *filename; 6538 int len; 6539 static char *fmt = "/etc/devices/mdi_%s_cache"; 6540 6541 /* 6542 * fmt contains the on-disk vhci cache file name format; 6543 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6544 */ 6545 6546 /* the -1 below is to account for "%s" in the format string */ 6547 len = strlen(fmt) + strlen(vhclass) - 1; 6548 filename = kmem_alloc(len, KM_SLEEP); 6549 (void) snprintf(filename, len, fmt, vhclass); 6550 ASSERT(len == (strlen(filename) + 1)); 6551 return (filename); 6552 } 6553 6554 /* 6555 * initialize the vhci cache related data structures and read the on-disk 6556 * vhci cached data into memory. 6557 */ 6558 static void 6559 setup_vhci_cache(mdi_vhci_t *vh) 6560 { 6561 mdi_vhci_config_t *vhc; 6562 mdi_vhci_cache_t *vhcache; 6563 int i; 6564 nvlist_t *nvl = NULL; 6565 6566 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6567 vh->vh_config = vhc; 6568 vhcache = &vhc->vhc_vhcache; 6569 6570 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6571 6572 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6573 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6574 6575 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6576 6577 /* 6578 * Create string hash; same as mod_hash_create_strhash() except that 6579 * we use NULL key destructor. 6580 */ 6581 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6582 mdi_bus_config_cache_hash_size, 6583 mod_hash_null_keydtor, mod_hash_null_valdtor, 6584 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6585 6586 /* 6587 * The on-disk vhci cache is read during booting prior to the 6588 * lights-out period by mdi_read_devices_files(). 6589 */ 6590 for (i = 0; i < N_VHCI_CLASSES; i++) { 6591 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6592 nvl = vhcache_nvl[i]; 6593 vhcache_nvl[i] = NULL; 6594 break; 6595 } 6596 } 6597 6598 /* 6599 * this is to cover the case of some one manually causing unloading 6600 * (or detaching) and reloading (or attaching) of a vhci driver. 6601 */ 6602 if (nvl == NULL && modrootloaded) 6603 nvl = read_on_disk_vhci_cache(vh->vh_class); 6604 6605 if (nvl != NULL) { 6606 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6607 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6608 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6609 else { 6610 cmn_err(CE_WARN, 6611 "%s: data file corrupted, will recreate\n", 6612 vhc->vhc_vhcache_filename); 6613 } 6614 rw_exit(&vhcache->vhcache_lock); 6615 nvlist_free(nvl); 6616 } 6617 6618 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6619 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6620 6621 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6622 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6623 } 6624 6625 /* 6626 * free all vhci cache related resources 6627 */ 6628 static int 6629 destroy_vhci_cache(mdi_vhci_t *vh) 6630 { 6631 mdi_vhci_config_t *vhc = vh->vh_config; 6632 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6633 mdi_vhcache_phci_t *cphci, *cphci_next; 6634 mdi_vhcache_client_t *cct, *cct_next; 6635 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6636 6637 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6638 return (MDI_FAILURE); 6639 6640 kmem_free(vhc->vhc_vhcache_filename, 6641 strlen(vhc->vhc_vhcache_filename) + 1); 6642 6643 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6644 6645 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6646 cphci = cphci_next) { 6647 cphci_next = cphci->cphci_next; 6648 free_vhcache_phci(cphci); 6649 } 6650 6651 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6652 cct_next = cct->cct_next; 6653 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6654 cpi_next = cpi->cpi_next; 6655 free_vhcache_pathinfo(cpi); 6656 } 6657 free_vhcache_client(cct); 6658 } 6659 6660 rw_destroy(&vhcache->vhcache_lock); 6661 6662 mutex_destroy(&vhc->vhc_lock); 6663 cv_destroy(&vhc->vhc_cv); 6664 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6665 return (MDI_SUCCESS); 6666 } 6667 6668 /* 6669 * Stop all vhci cache related async threads and free their resources. 6670 */ 6671 static int 6672 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6673 { 6674 mdi_async_client_config_t *acc, *acc_next; 6675 6676 mutex_enter(&vhc->vhc_lock); 6677 vhc->vhc_flags |= MDI_VHC_EXIT; 6678 ASSERT(vhc->vhc_acc_thrcount >= 0); 6679 cv_broadcast(&vhc->vhc_cv); 6680 6681 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6682 vhc->vhc_acc_thrcount != 0) { 6683 mutex_exit(&vhc->vhc_lock); 6684 delay(1); 6685 mutex_enter(&vhc->vhc_lock); 6686 } 6687 6688 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6689 6690 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6691 acc_next = acc->acc_next; 6692 free_async_client_config(acc); 6693 } 6694 vhc->vhc_acc_list_head = NULL; 6695 vhc->vhc_acc_list_tail = NULL; 6696 vhc->vhc_acc_count = 0; 6697 6698 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6699 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6700 mutex_exit(&vhc->vhc_lock); 6701 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6702 vhcache_dirty(vhc); 6703 return (MDI_FAILURE); 6704 } 6705 } else 6706 mutex_exit(&vhc->vhc_lock); 6707 6708 if (callb_delete(vhc->vhc_cbid) != 0) 6709 return (MDI_FAILURE); 6710 6711 return (MDI_SUCCESS); 6712 } 6713 6714 /* 6715 * Stop vhci cache flush thread 6716 */ 6717 /* ARGSUSED */ 6718 static boolean_t 6719 stop_vhcache_flush_thread(void *arg, int code) 6720 { 6721 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6722 6723 mutex_enter(&vhc->vhc_lock); 6724 vhc->vhc_flags |= MDI_VHC_EXIT; 6725 cv_broadcast(&vhc->vhc_cv); 6726 6727 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6728 mutex_exit(&vhc->vhc_lock); 6729 delay(1); 6730 mutex_enter(&vhc->vhc_lock); 6731 } 6732 6733 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6734 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6735 mutex_exit(&vhc->vhc_lock); 6736 (void) flush_vhcache(vhc, 1); 6737 } else 6738 mutex_exit(&vhc->vhc_lock); 6739 6740 return (B_TRUE); 6741 } 6742 6743 /* 6744 * Enqueue the vhcache phci (cphci) at the tail of the list 6745 */ 6746 static void 6747 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6748 { 6749 cphci->cphci_next = NULL; 6750 if (vhcache->vhcache_phci_head == NULL) 6751 vhcache->vhcache_phci_head = cphci; 6752 else 6753 vhcache->vhcache_phci_tail->cphci_next = cphci; 6754 vhcache->vhcache_phci_tail = cphci; 6755 } 6756 6757 /* 6758 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6759 */ 6760 static void 6761 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6762 mdi_vhcache_pathinfo_t *cpi) 6763 { 6764 cpi->cpi_next = NULL; 6765 if (cct->cct_cpi_head == NULL) 6766 cct->cct_cpi_head = cpi; 6767 else 6768 cct->cct_cpi_tail->cpi_next = cpi; 6769 cct->cct_cpi_tail = cpi; 6770 } 6771 6772 /* 6773 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6774 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6775 * flag set come at the beginning of the list. All cpis which have this 6776 * flag set come at the end of the list. 6777 */ 6778 static void 6779 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6780 mdi_vhcache_pathinfo_t *newcpi) 6781 { 6782 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6783 6784 if (cct->cct_cpi_head == NULL || 6785 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6786 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6787 else { 6788 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6789 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6790 prev_cpi = cpi, cpi = cpi->cpi_next) 6791 ; 6792 6793 if (prev_cpi == NULL) 6794 cct->cct_cpi_head = newcpi; 6795 else 6796 prev_cpi->cpi_next = newcpi; 6797 6798 newcpi->cpi_next = cpi; 6799 6800 if (cpi == NULL) 6801 cct->cct_cpi_tail = newcpi; 6802 } 6803 } 6804 6805 /* 6806 * Enqueue the vhcache client (cct) at the tail of the list 6807 */ 6808 static void 6809 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6810 mdi_vhcache_client_t *cct) 6811 { 6812 cct->cct_next = NULL; 6813 if (vhcache->vhcache_client_head == NULL) 6814 vhcache->vhcache_client_head = cct; 6815 else 6816 vhcache->vhcache_client_tail->cct_next = cct; 6817 vhcache->vhcache_client_tail = cct; 6818 } 6819 6820 static void 6821 free_string_array(char **str, int nelem) 6822 { 6823 int i; 6824 6825 if (str) { 6826 for (i = 0; i < nelem; i++) { 6827 if (str[i]) 6828 kmem_free(str[i], strlen(str[i]) + 1); 6829 } 6830 kmem_free(str, sizeof (char *) * nelem); 6831 } 6832 } 6833 6834 static void 6835 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6836 { 6837 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6838 kmem_free(cphci, sizeof (*cphci)); 6839 } 6840 6841 static void 6842 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6843 { 6844 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6845 kmem_free(cpi, sizeof (*cpi)); 6846 } 6847 6848 static void 6849 free_vhcache_client(mdi_vhcache_client_t *cct) 6850 { 6851 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6852 kmem_free(cct, sizeof (*cct)); 6853 } 6854 6855 static char * 6856 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6857 { 6858 char *name_addr; 6859 int len; 6860 6861 len = strlen(ct_name) + strlen(ct_addr) + 2; 6862 name_addr = kmem_alloc(len, KM_SLEEP); 6863 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6864 6865 if (ret_len) 6866 *ret_len = len; 6867 return (name_addr); 6868 } 6869 6870 /* 6871 * Copy the contents of paddrnvl to vhci cache. 6872 * paddrnvl nvlist contains path information for a vhci client. 6873 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6874 */ 6875 static void 6876 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6877 mdi_vhcache_client_t *cct) 6878 { 6879 nvpair_t *nvp = NULL; 6880 mdi_vhcache_pathinfo_t *cpi; 6881 uint_t nelem; 6882 uint32_t *val; 6883 6884 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6885 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6886 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6887 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6888 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6889 ASSERT(nelem == 2); 6890 cpi->cpi_cphci = cphci_list[val[0]]; 6891 cpi->cpi_flags = val[1]; 6892 enqueue_tail_vhcache_pathinfo(cct, cpi); 6893 } 6894 } 6895 6896 /* 6897 * Copy the contents of caddrmapnvl to vhci cache. 6898 * caddrmapnvl nvlist contains vhci client address to phci client address 6899 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6900 * this nvlist. 6901 */ 6902 static void 6903 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6904 mdi_vhcache_phci_t *cphci_list[]) 6905 { 6906 nvpair_t *nvp = NULL; 6907 nvlist_t *paddrnvl; 6908 mdi_vhcache_client_t *cct; 6909 6910 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6911 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6912 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6913 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6914 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6915 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6916 /* the client must contain at least one path */ 6917 ASSERT(cct->cct_cpi_head != NULL); 6918 6919 enqueue_vhcache_client(vhcache, cct); 6920 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6921 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6922 } 6923 } 6924 6925 /* 6926 * Copy the contents of the main nvlist to vhci cache. 6927 * 6928 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6929 * The nvlist contains the mappings between the vhci client addresses and 6930 * their corresponding phci client addresses. 6931 * 6932 * The structure of the nvlist is as follows: 6933 * 6934 * Main nvlist: 6935 * NAME TYPE DATA 6936 * version int32 version number 6937 * phcis string array array of phci paths 6938 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6939 * 6940 * structure of c2paddrs_nvl: 6941 * NAME TYPE DATA 6942 * caddr1 nvlist_t paddrs_nvl1 6943 * caddr2 nvlist_t paddrs_nvl2 6944 * ... 6945 * where caddr1, caddr2, ... are vhci client name and addresses in the 6946 * form of "<clientname>@<clientaddress>". 6947 * (for example: "ssd@2000002037cd9f72"); 6948 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6949 * 6950 * structure of paddrs_nvl: 6951 * NAME TYPE DATA 6952 * pi_addr1 uint32_array (phci-id, cpi_flags) 6953 * pi_addr2 uint32_array (phci-id, cpi_flags) 6954 * ... 6955 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6956 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6957 * phci-ids are integers that identify PHCIs to which the 6958 * the bus specific address belongs to. These integers are used as an index 6959 * into to the phcis string array in the main nvlist to get the PHCI path. 6960 */ 6961 static int 6962 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6963 { 6964 char **phcis, **phci_namep; 6965 uint_t nphcis; 6966 mdi_vhcache_phci_t *cphci, **cphci_list; 6967 nvlist_t *caddrmapnvl; 6968 int32_t ver; 6969 int i; 6970 size_t cphci_list_size; 6971 6972 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6973 6974 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6975 ver != MDI_VHCI_CACHE_VERSION) 6976 return (MDI_FAILURE); 6977 6978 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6979 &nphcis) != 0) 6980 return (MDI_SUCCESS); 6981 6982 ASSERT(nphcis > 0); 6983 6984 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6985 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6986 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6987 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6988 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6989 enqueue_vhcache_phci(vhcache, cphci); 6990 cphci_list[i] = cphci; 6991 } 6992 6993 ASSERT(vhcache->vhcache_phci_head != NULL); 6994 6995 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6996 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6997 6998 kmem_free(cphci_list, cphci_list_size); 6999 return (MDI_SUCCESS); 7000 } 7001 7002 /* 7003 * Build paddrnvl for the specified client using the information in the 7004 * vhci cache and add it to the caddrmapnnvl. 7005 * Returns 0 on success, errno on failure. 7006 */ 7007 static int 7008 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7009 nvlist_t *caddrmapnvl) 7010 { 7011 mdi_vhcache_pathinfo_t *cpi; 7012 nvlist_t *nvl; 7013 int err; 7014 uint32_t val[2]; 7015 7016 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7017 7018 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7019 return (err); 7020 7021 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7022 val[0] = cpi->cpi_cphci->cphci_id; 7023 val[1] = cpi->cpi_flags; 7024 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7025 != 0) 7026 goto out; 7027 } 7028 7029 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7030 out: 7031 nvlist_free(nvl); 7032 return (err); 7033 } 7034 7035 /* 7036 * Build caddrmapnvl using the information in the vhci cache 7037 * and add it to the mainnvl. 7038 * Returns 0 on success, errno on failure. 7039 */ 7040 static int 7041 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7042 { 7043 mdi_vhcache_client_t *cct; 7044 nvlist_t *nvl; 7045 int err; 7046 7047 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7048 7049 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7050 return (err); 7051 7052 for (cct = vhcache->vhcache_client_head; cct != NULL; 7053 cct = cct->cct_next) { 7054 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7055 goto out; 7056 } 7057 7058 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7059 out: 7060 nvlist_free(nvl); 7061 return (err); 7062 } 7063 7064 /* 7065 * Build nvlist using the information in the vhci cache. 7066 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7067 * Returns nvl on success, NULL on failure. 7068 */ 7069 static nvlist_t * 7070 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7071 { 7072 mdi_vhcache_phci_t *cphci; 7073 uint_t phci_count; 7074 char **phcis; 7075 nvlist_t *nvl; 7076 int err, i; 7077 7078 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7079 nvl = NULL; 7080 goto out; 7081 } 7082 7083 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7084 MDI_VHCI_CACHE_VERSION)) != 0) 7085 goto out; 7086 7087 rw_enter(&vhcache->vhcache_lock, RW_READER); 7088 if (vhcache->vhcache_phci_head == NULL) { 7089 rw_exit(&vhcache->vhcache_lock); 7090 return (nvl); 7091 } 7092 7093 phci_count = 0; 7094 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7095 cphci = cphci->cphci_next) 7096 cphci->cphci_id = phci_count++; 7097 7098 /* build phci pathname list */ 7099 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7100 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7101 cphci = cphci->cphci_next, i++) 7102 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7103 7104 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7105 phci_count); 7106 free_string_array(phcis, phci_count); 7107 7108 if (err == 0 && 7109 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7110 rw_exit(&vhcache->vhcache_lock); 7111 return (nvl); 7112 } 7113 7114 rw_exit(&vhcache->vhcache_lock); 7115 out: 7116 if (nvl) 7117 nvlist_free(nvl); 7118 return (NULL); 7119 } 7120 7121 /* 7122 * Lookup vhcache phci structure for the specified phci path. 7123 */ 7124 static mdi_vhcache_phci_t * 7125 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7126 { 7127 mdi_vhcache_phci_t *cphci; 7128 7129 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7130 7131 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7132 cphci = cphci->cphci_next) { 7133 if (strcmp(cphci->cphci_path, phci_path) == 0) 7134 return (cphci); 7135 } 7136 7137 return (NULL); 7138 } 7139 7140 /* 7141 * Lookup vhcache phci structure for the specified phci. 7142 */ 7143 static mdi_vhcache_phci_t * 7144 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7145 { 7146 mdi_vhcache_phci_t *cphci; 7147 7148 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7149 7150 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7151 cphci = cphci->cphci_next) { 7152 if (cphci->cphci_phci == ph) 7153 return (cphci); 7154 } 7155 7156 return (NULL); 7157 } 7158 7159 /* 7160 * Add the specified phci to the vhci cache if not already present. 7161 */ 7162 static void 7163 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7164 { 7165 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7166 mdi_vhcache_phci_t *cphci; 7167 char *pathname; 7168 int cache_updated; 7169 7170 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7171 7172 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7173 (void) ddi_pathname(ph->ph_dip, pathname); 7174 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7175 != NULL) { 7176 cphci->cphci_phci = ph; 7177 cache_updated = 0; 7178 } else { 7179 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7180 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7181 cphci->cphci_phci = ph; 7182 enqueue_vhcache_phci(vhcache, cphci); 7183 cache_updated = 1; 7184 } 7185 7186 rw_exit(&vhcache->vhcache_lock); 7187 7188 /* 7189 * Since a new phci has been added, reset 7190 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7191 * during next vhcache_discover_paths(). 7192 */ 7193 mutex_enter(&vhc->vhc_lock); 7194 vhc->vhc_path_discovery_cutoff_time = 0; 7195 mutex_exit(&vhc->vhc_lock); 7196 7197 kmem_free(pathname, MAXPATHLEN); 7198 if (cache_updated) 7199 vhcache_dirty(vhc); 7200 } 7201 7202 /* 7203 * Remove the reference to the specified phci from the vhci cache. 7204 */ 7205 static void 7206 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7207 { 7208 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7209 mdi_vhcache_phci_t *cphci; 7210 7211 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7212 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7213 /* do not remove the actual mdi_vhcache_phci structure */ 7214 cphci->cphci_phci = NULL; 7215 } 7216 rw_exit(&vhcache->vhcache_lock); 7217 } 7218 7219 static void 7220 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7221 mdi_vhcache_lookup_token_t *src) 7222 { 7223 if (src == NULL) { 7224 dst->lt_cct = NULL; 7225 dst->lt_cct_lookup_time = 0; 7226 } else { 7227 dst->lt_cct = src->lt_cct; 7228 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7229 } 7230 } 7231 7232 /* 7233 * Look up vhcache client for the specified client. 7234 */ 7235 static mdi_vhcache_client_t * 7236 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7237 mdi_vhcache_lookup_token_t *token) 7238 { 7239 mod_hash_val_t hv; 7240 char *name_addr; 7241 int len; 7242 7243 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7244 7245 /* 7246 * If no vhcache clean occurred since the last lookup, we can 7247 * simply return the cct from the last lookup operation. 7248 * It works because ccts are never freed except during the vhcache 7249 * cleanup operation. 7250 */ 7251 if (token != NULL && 7252 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7253 return (token->lt_cct); 7254 7255 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7256 if (mod_hash_find(vhcache->vhcache_client_hash, 7257 (mod_hash_key_t)name_addr, &hv) == 0) { 7258 if (token) { 7259 token->lt_cct = (mdi_vhcache_client_t *)hv; 7260 token->lt_cct_lookup_time = lbolt64; 7261 } 7262 } else { 7263 if (token) { 7264 token->lt_cct = NULL; 7265 token->lt_cct_lookup_time = 0; 7266 } 7267 hv = NULL; 7268 } 7269 kmem_free(name_addr, len); 7270 return ((mdi_vhcache_client_t *)hv); 7271 } 7272 7273 /* 7274 * Add the specified path to the vhci cache if not already present. 7275 * Also add the vhcache client for the client corresponding to this path 7276 * if it doesn't already exist. 7277 */ 7278 static void 7279 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7280 { 7281 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7282 mdi_vhcache_client_t *cct; 7283 mdi_vhcache_pathinfo_t *cpi; 7284 mdi_phci_t *ph = pip->pi_phci; 7285 mdi_client_t *ct = pip->pi_client; 7286 int cache_updated = 0; 7287 7288 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7289 7290 /* if vhcache client for this pip doesn't already exist, add it */ 7291 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7292 NULL)) == NULL) { 7293 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7294 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7295 ct->ct_guid, NULL); 7296 enqueue_vhcache_client(vhcache, cct); 7297 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7298 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7299 cache_updated = 1; 7300 } 7301 7302 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7303 if (cpi->cpi_cphci->cphci_phci == ph && 7304 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7305 cpi->cpi_pip = pip; 7306 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7307 cpi->cpi_flags &= 7308 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7309 sort_vhcache_paths(cct); 7310 cache_updated = 1; 7311 } 7312 break; 7313 } 7314 } 7315 7316 if (cpi == NULL) { 7317 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7318 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7319 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7320 ASSERT(cpi->cpi_cphci != NULL); 7321 cpi->cpi_pip = pip; 7322 enqueue_vhcache_pathinfo(cct, cpi); 7323 cache_updated = 1; 7324 } 7325 7326 rw_exit(&vhcache->vhcache_lock); 7327 7328 if (cache_updated) 7329 vhcache_dirty(vhc); 7330 } 7331 7332 /* 7333 * Remove the reference to the specified path from the vhci cache. 7334 */ 7335 static void 7336 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7337 { 7338 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7339 mdi_client_t *ct = pip->pi_client; 7340 mdi_vhcache_client_t *cct; 7341 mdi_vhcache_pathinfo_t *cpi; 7342 7343 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7344 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7345 NULL)) != NULL) { 7346 for (cpi = cct->cct_cpi_head; cpi != NULL; 7347 cpi = cpi->cpi_next) { 7348 if (cpi->cpi_pip == pip) { 7349 cpi->cpi_pip = NULL; 7350 break; 7351 } 7352 } 7353 } 7354 rw_exit(&vhcache->vhcache_lock); 7355 } 7356 7357 /* 7358 * Flush the vhci cache to disk. 7359 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7360 */ 7361 static int 7362 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7363 { 7364 nvlist_t *nvl; 7365 int err; 7366 int rv; 7367 7368 /* 7369 * It is possible that the system may shutdown before 7370 * i_ddi_io_initialized (during stmsboot for example). To allow for 7371 * flushing the cache in this case do not check for 7372 * i_ddi_io_initialized when force flag is set. 7373 */ 7374 if (force_flag == 0 && !i_ddi_io_initialized()) 7375 return (MDI_FAILURE); 7376 7377 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7378 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7379 nvlist_free(nvl); 7380 } else 7381 err = EFAULT; 7382 7383 rv = MDI_SUCCESS; 7384 mutex_enter(&vhc->vhc_lock); 7385 if (err != 0) { 7386 if (err == EROFS) { 7387 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7388 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7389 MDI_VHC_VHCACHE_DIRTY); 7390 } else { 7391 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7392 cmn_err(CE_CONT, "%s: update failed\n", 7393 vhc->vhc_vhcache_filename); 7394 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7395 } 7396 rv = MDI_FAILURE; 7397 } 7398 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7399 cmn_err(CE_CONT, 7400 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7401 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7402 } 7403 mutex_exit(&vhc->vhc_lock); 7404 7405 return (rv); 7406 } 7407 7408 /* 7409 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7410 * Exits itself if left idle for the idle timeout period. 7411 */ 7412 static void 7413 vhcache_flush_thread(void *arg) 7414 { 7415 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7416 clock_t idle_time, quit_at_ticks; 7417 callb_cpr_t cprinfo; 7418 7419 /* number of seconds to sleep idle before exiting */ 7420 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7421 7422 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7423 "mdi_vhcache_flush"); 7424 mutex_enter(&vhc->vhc_lock); 7425 for (; ; ) { 7426 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7427 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7428 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7429 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7430 (void) cv_timedwait(&vhc->vhc_cv, 7431 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7432 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7433 } else { 7434 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7435 mutex_exit(&vhc->vhc_lock); 7436 7437 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7438 vhcache_dirty(vhc); 7439 7440 mutex_enter(&vhc->vhc_lock); 7441 } 7442 } 7443 7444 quit_at_ticks = ddi_get_lbolt() + idle_time; 7445 7446 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7447 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7448 ddi_get_lbolt() < quit_at_ticks) { 7449 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7450 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7451 quit_at_ticks); 7452 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7453 } 7454 7455 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7456 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7457 goto out; 7458 } 7459 7460 out: 7461 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7462 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7463 CALLB_CPR_EXIT(&cprinfo); 7464 } 7465 7466 /* 7467 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7468 */ 7469 static void 7470 vhcache_dirty(mdi_vhci_config_t *vhc) 7471 { 7472 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7473 int create_thread; 7474 7475 rw_enter(&vhcache->vhcache_lock, RW_READER); 7476 /* do not flush cache until the cache is fully built */ 7477 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7478 rw_exit(&vhcache->vhcache_lock); 7479 return; 7480 } 7481 rw_exit(&vhcache->vhcache_lock); 7482 7483 mutex_enter(&vhc->vhc_lock); 7484 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7485 mutex_exit(&vhc->vhc_lock); 7486 return; 7487 } 7488 7489 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7490 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7491 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7492 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7493 cv_broadcast(&vhc->vhc_cv); 7494 create_thread = 0; 7495 } else { 7496 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7497 create_thread = 1; 7498 } 7499 mutex_exit(&vhc->vhc_lock); 7500 7501 if (create_thread) 7502 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7503 0, &p0, TS_RUN, minclsyspri); 7504 } 7505 7506 /* 7507 * phci bus config structure - one for for each phci bus config operation that 7508 * we initiate on behalf of a vhci. 7509 */ 7510 typedef struct mdi_phci_bus_config_s { 7511 char *phbc_phci_path; 7512 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7513 struct mdi_phci_bus_config_s *phbc_next; 7514 } mdi_phci_bus_config_t; 7515 7516 /* vhci bus config structure - one for each vhci bus config operation */ 7517 typedef struct mdi_vhci_bus_config_s { 7518 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7519 major_t vhbc_op_major; /* bus config op major */ 7520 uint_t vhbc_op_flags; /* bus config op flags */ 7521 kmutex_t vhbc_lock; 7522 kcondvar_t vhbc_cv; 7523 int vhbc_thr_count; 7524 } mdi_vhci_bus_config_t; 7525 7526 /* 7527 * bus config the specified phci 7528 */ 7529 static void 7530 bus_config_phci(void *arg) 7531 { 7532 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7533 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7534 dev_info_t *ph_dip; 7535 7536 /* 7537 * first configure all path components upto phci and then configure 7538 * the phci children. 7539 */ 7540 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7541 != NULL) { 7542 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7543 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7544 (void) ndi_devi_config_driver(ph_dip, 7545 vhbc->vhbc_op_flags, 7546 vhbc->vhbc_op_major); 7547 } else 7548 (void) ndi_devi_config(ph_dip, 7549 vhbc->vhbc_op_flags); 7550 7551 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7552 ndi_rele_devi(ph_dip); 7553 } 7554 7555 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7556 kmem_free(phbc, sizeof (*phbc)); 7557 7558 mutex_enter(&vhbc->vhbc_lock); 7559 vhbc->vhbc_thr_count--; 7560 if (vhbc->vhbc_thr_count == 0) 7561 cv_broadcast(&vhbc->vhbc_cv); 7562 mutex_exit(&vhbc->vhbc_lock); 7563 } 7564 7565 /* 7566 * Bus config all phcis associated with the vhci in parallel. 7567 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7568 */ 7569 static void 7570 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7571 ddi_bus_config_op_t op, major_t maj) 7572 { 7573 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7574 mdi_vhci_bus_config_t *vhbc; 7575 mdi_vhcache_phci_t *cphci; 7576 7577 rw_enter(&vhcache->vhcache_lock, RW_READER); 7578 if (vhcache->vhcache_phci_head == NULL) { 7579 rw_exit(&vhcache->vhcache_lock); 7580 return; 7581 } 7582 7583 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7584 7585 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7586 cphci = cphci->cphci_next) { 7587 /* skip phcis that haven't attached before root is available */ 7588 if (!modrootloaded && (cphci->cphci_phci == NULL)) 7589 continue; 7590 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7591 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7592 KM_SLEEP); 7593 phbc->phbc_vhbusconfig = vhbc; 7594 phbc->phbc_next = phbc_head; 7595 phbc_head = phbc; 7596 vhbc->vhbc_thr_count++; 7597 } 7598 rw_exit(&vhcache->vhcache_lock); 7599 7600 vhbc->vhbc_op = op; 7601 vhbc->vhbc_op_major = maj; 7602 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7603 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7604 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7605 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7606 7607 /* now create threads to initiate bus config on all phcis in parallel */ 7608 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7609 phbc_next = phbc->phbc_next; 7610 if (mdi_mtc_off) 7611 bus_config_phci((void *)phbc); 7612 else 7613 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7614 0, &p0, TS_RUN, minclsyspri); 7615 } 7616 7617 mutex_enter(&vhbc->vhbc_lock); 7618 /* wait until all threads exit */ 7619 while (vhbc->vhbc_thr_count > 0) 7620 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7621 mutex_exit(&vhbc->vhbc_lock); 7622 7623 mutex_destroy(&vhbc->vhbc_lock); 7624 cv_destroy(&vhbc->vhbc_cv); 7625 kmem_free(vhbc, sizeof (*vhbc)); 7626 } 7627 7628 /* 7629 * Single threaded version of bus_config_all_phcis() 7630 */ 7631 static void 7632 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7633 ddi_bus_config_op_t op, major_t maj) 7634 { 7635 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7636 7637 single_threaded_vhconfig_enter(vhc); 7638 bus_config_all_phcis(vhcache, flags, op, maj); 7639 single_threaded_vhconfig_exit(vhc); 7640 } 7641 7642 /* 7643 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7644 * The path includes the child component in addition to the phci path. 7645 */ 7646 static int 7647 bus_config_one_phci_child(char *path) 7648 { 7649 dev_info_t *ph_dip, *child; 7650 char *devnm; 7651 int rv = MDI_FAILURE; 7652 7653 /* extract the child component of the phci */ 7654 devnm = strrchr(path, '/'); 7655 *devnm++ = '\0'; 7656 7657 /* 7658 * first configure all path components upto phci and then 7659 * configure the phci child. 7660 */ 7661 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7662 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7663 NDI_SUCCESS) { 7664 /* 7665 * release the hold that ndi_devi_config_one() placed 7666 */ 7667 ndi_rele_devi(child); 7668 rv = MDI_SUCCESS; 7669 } 7670 7671 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7672 ndi_rele_devi(ph_dip); 7673 } 7674 7675 devnm--; 7676 *devnm = '/'; 7677 return (rv); 7678 } 7679 7680 /* 7681 * Build a list of phci client paths for the specified vhci client. 7682 * The list includes only those phci client paths which aren't configured yet. 7683 */ 7684 static mdi_phys_path_t * 7685 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7686 { 7687 mdi_vhcache_pathinfo_t *cpi; 7688 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7689 int config_path, len; 7690 7691 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7692 /* 7693 * include only those paths that aren't configured. 7694 */ 7695 config_path = 0; 7696 if (cpi->cpi_pip == NULL) 7697 config_path = 1; 7698 else { 7699 MDI_PI_LOCK(cpi->cpi_pip); 7700 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7701 config_path = 1; 7702 MDI_PI_UNLOCK(cpi->cpi_pip); 7703 } 7704 7705 if (config_path) { 7706 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7707 len = strlen(cpi->cpi_cphci->cphci_path) + 7708 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7709 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7710 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7711 cpi->cpi_cphci->cphci_path, ct_name, 7712 cpi->cpi_addr); 7713 pp->phys_path_next = NULL; 7714 7715 if (pp_head == NULL) 7716 pp_head = pp; 7717 else 7718 pp_tail->phys_path_next = pp; 7719 pp_tail = pp; 7720 } 7721 } 7722 7723 return (pp_head); 7724 } 7725 7726 /* 7727 * Free the memory allocated for phci client path list. 7728 */ 7729 static void 7730 free_phclient_path_list(mdi_phys_path_t *pp_head) 7731 { 7732 mdi_phys_path_t *pp, *pp_next; 7733 7734 for (pp = pp_head; pp != NULL; pp = pp_next) { 7735 pp_next = pp->phys_path_next; 7736 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7737 kmem_free(pp, sizeof (*pp)); 7738 } 7739 } 7740 7741 /* 7742 * Allocated async client structure and initialize with the specified values. 7743 */ 7744 static mdi_async_client_config_t * 7745 alloc_async_client_config(char *ct_name, char *ct_addr, 7746 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7747 { 7748 mdi_async_client_config_t *acc; 7749 7750 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7751 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7752 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7753 acc->acc_phclient_path_list_head = pp_head; 7754 init_vhcache_lookup_token(&acc->acc_token, tok); 7755 acc->acc_next = NULL; 7756 return (acc); 7757 } 7758 7759 /* 7760 * Free the memory allocated for the async client structure and their members. 7761 */ 7762 static void 7763 free_async_client_config(mdi_async_client_config_t *acc) 7764 { 7765 if (acc->acc_phclient_path_list_head) 7766 free_phclient_path_list(acc->acc_phclient_path_list_head); 7767 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7768 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7769 kmem_free(acc, sizeof (*acc)); 7770 } 7771 7772 /* 7773 * Sort vhcache pathinfos (cpis) of the specified client. 7774 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7775 * flag set come at the beginning of the list. All cpis which have this 7776 * flag set come at the end of the list. 7777 */ 7778 static void 7779 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7780 { 7781 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7782 7783 cpi_head = cct->cct_cpi_head; 7784 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7785 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7786 cpi_next = cpi->cpi_next; 7787 enqueue_vhcache_pathinfo(cct, cpi); 7788 } 7789 } 7790 7791 /* 7792 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7793 * every vhcache pathinfo of the specified client. If not adjust the flag 7794 * setting appropriately. 7795 * 7796 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7797 * on-disk vhci cache. So every time this flag is updated the cache must be 7798 * flushed. 7799 */ 7800 static void 7801 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7802 mdi_vhcache_lookup_token_t *tok) 7803 { 7804 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7805 mdi_vhcache_client_t *cct; 7806 mdi_vhcache_pathinfo_t *cpi; 7807 7808 rw_enter(&vhcache->vhcache_lock, RW_READER); 7809 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7810 == NULL) { 7811 rw_exit(&vhcache->vhcache_lock); 7812 return; 7813 } 7814 7815 /* 7816 * to avoid unnecessary on-disk cache updates, first check if an 7817 * update is really needed. If no update is needed simply return. 7818 */ 7819 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7820 if ((cpi->cpi_pip != NULL && 7821 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7822 (cpi->cpi_pip == NULL && 7823 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7824 break; 7825 } 7826 } 7827 if (cpi == NULL) { 7828 rw_exit(&vhcache->vhcache_lock); 7829 return; 7830 } 7831 7832 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7833 rw_exit(&vhcache->vhcache_lock); 7834 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7835 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7836 tok)) == NULL) { 7837 rw_exit(&vhcache->vhcache_lock); 7838 return; 7839 } 7840 } 7841 7842 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7843 if (cpi->cpi_pip != NULL) 7844 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7845 else 7846 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7847 } 7848 sort_vhcache_paths(cct); 7849 7850 rw_exit(&vhcache->vhcache_lock); 7851 vhcache_dirty(vhc); 7852 } 7853 7854 /* 7855 * Configure all specified paths of the client. 7856 */ 7857 static void 7858 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7859 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7860 { 7861 mdi_phys_path_t *pp; 7862 7863 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7864 (void) bus_config_one_phci_child(pp->phys_path); 7865 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7866 } 7867 7868 /* 7869 * Dequeue elements from vhci async client config list and bus configure 7870 * their corresponding phci clients. 7871 */ 7872 static void 7873 config_client_paths_thread(void *arg) 7874 { 7875 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7876 mdi_async_client_config_t *acc; 7877 clock_t quit_at_ticks; 7878 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7879 callb_cpr_t cprinfo; 7880 7881 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7882 "mdi_config_client_paths"); 7883 7884 for (; ; ) { 7885 quit_at_ticks = ddi_get_lbolt() + idle_time; 7886 7887 mutex_enter(&vhc->vhc_lock); 7888 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7889 vhc->vhc_acc_list_head == NULL && 7890 ddi_get_lbolt() < quit_at_ticks) { 7891 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7892 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7893 quit_at_ticks); 7894 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7895 } 7896 7897 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7898 vhc->vhc_acc_list_head == NULL) 7899 goto out; 7900 7901 acc = vhc->vhc_acc_list_head; 7902 vhc->vhc_acc_list_head = acc->acc_next; 7903 if (vhc->vhc_acc_list_head == NULL) 7904 vhc->vhc_acc_list_tail = NULL; 7905 vhc->vhc_acc_count--; 7906 mutex_exit(&vhc->vhc_lock); 7907 7908 config_client_paths_sync(vhc, acc->acc_ct_name, 7909 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7910 &acc->acc_token); 7911 7912 free_async_client_config(acc); 7913 } 7914 7915 out: 7916 vhc->vhc_acc_thrcount--; 7917 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7918 CALLB_CPR_EXIT(&cprinfo); 7919 } 7920 7921 /* 7922 * Arrange for all the phci client paths (pp_head) for the specified client 7923 * to be bus configured asynchronously by a thread. 7924 */ 7925 static void 7926 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7927 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7928 { 7929 mdi_async_client_config_t *acc, *newacc; 7930 int create_thread; 7931 7932 if (pp_head == NULL) 7933 return; 7934 7935 if (mdi_mtc_off) { 7936 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7937 free_phclient_path_list(pp_head); 7938 return; 7939 } 7940 7941 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7942 ASSERT(newacc); 7943 7944 mutex_enter(&vhc->vhc_lock); 7945 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7946 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7947 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7948 free_async_client_config(newacc); 7949 mutex_exit(&vhc->vhc_lock); 7950 return; 7951 } 7952 } 7953 7954 if (vhc->vhc_acc_list_head == NULL) 7955 vhc->vhc_acc_list_head = newacc; 7956 else 7957 vhc->vhc_acc_list_tail->acc_next = newacc; 7958 vhc->vhc_acc_list_tail = newacc; 7959 vhc->vhc_acc_count++; 7960 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7961 cv_broadcast(&vhc->vhc_cv); 7962 create_thread = 0; 7963 } else { 7964 vhc->vhc_acc_thrcount++; 7965 create_thread = 1; 7966 } 7967 mutex_exit(&vhc->vhc_lock); 7968 7969 if (create_thread) 7970 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7971 0, &p0, TS_RUN, minclsyspri); 7972 } 7973 7974 /* 7975 * Return number of online paths for the specified client. 7976 */ 7977 static int 7978 nonline_paths(mdi_vhcache_client_t *cct) 7979 { 7980 mdi_vhcache_pathinfo_t *cpi; 7981 int online_count = 0; 7982 7983 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7984 if (cpi->cpi_pip != NULL) { 7985 MDI_PI_LOCK(cpi->cpi_pip); 7986 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7987 online_count++; 7988 MDI_PI_UNLOCK(cpi->cpi_pip); 7989 } 7990 } 7991 7992 return (online_count); 7993 } 7994 7995 /* 7996 * Bus configure all paths for the specified vhci client. 7997 * If at least one path for the client is already online, the remaining paths 7998 * will be configured asynchronously. Otherwise, it synchronously configures 7999 * the paths until at least one path is online and then rest of the paths 8000 * will be configured asynchronously. 8001 */ 8002 static void 8003 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8004 { 8005 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8006 mdi_phys_path_t *pp_head, *pp; 8007 mdi_vhcache_client_t *cct; 8008 mdi_vhcache_lookup_token_t tok; 8009 8010 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8011 8012 init_vhcache_lookup_token(&tok, NULL); 8013 8014 if (ct_name == NULL || ct_addr == NULL || 8015 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8016 == NULL || 8017 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8018 rw_exit(&vhcache->vhcache_lock); 8019 return; 8020 } 8021 8022 /* if at least one path is online, configure the rest asynchronously */ 8023 if (nonline_paths(cct) > 0) { 8024 rw_exit(&vhcache->vhcache_lock); 8025 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8026 return; 8027 } 8028 8029 rw_exit(&vhcache->vhcache_lock); 8030 8031 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8032 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8033 rw_enter(&vhcache->vhcache_lock, RW_READER); 8034 8035 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8036 ct_addr, &tok)) == NULL) { 8037 rw_exit(&vhcache->vhcache_lock); 8038 goto out; 8039 } 8040 8041 if (nonline_paths(cct) > 0 && 8042 pp->phys_path_next != NULL) { 8043 rw_exit(&vhcache->vhcache_lock); 8044 config_client_paths_async(vhc, ct_name, ct_addr, 8045 pp->phys_path_next, &tok); 8046 pp->phys_path_next = NULL; 8047 goto out; 8048 } 8049 8050 rw_exit(&vhcache->vhcache_lock); 8051 } 8052 } 8053 8054 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8055 out: 8056 free_phclient_path_list(pp_head); 8057 } 8058 8059 static void 8060 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8061 { 8062 mutex_enter(&vhc->vhc_lock); 8063 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8064 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8065 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8066 mutex_exit(&vhc->vhc_lock); 8067 } 8068 8069 static void 8070 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8071 { 8072 mutex_enter(&vhc->vhc_lock); 8073 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8074 cv_broadcast(&vhc->vhc_cv); 8075 mutex_exit(&vhc->vhc_lock); 8076 } 8077 8078 typedef struct mdi_phci_driver_info { 8079 char *phdriver_name; /* name of the phci driver */ 8080 8081 /* set to non zero if the phci driver supports root device */ 8082 int phdriver_root_support; 8083 } mdi_phci_driver_info_t; 8084 8085 /* 8086 * vhci class and root support capability of a phci driver can be 8087 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8088 * phci driver.conf file. The built-in tables below contain this information 8089 * for those phci drivers whose driver.conf files don't yet contain this info. 8090 * 8091 * All phci drivers expect iscsi have root device support. 8092 */ 8093 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8094 { "fp", 1 }, 8095 { "iscsi", 0 }, 8096 { "ibsrp", 1 } 8097 }; 8098 8099 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8100 8101 static void * 8102 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8103 { 8104 void *new_ptr; 8105 8106 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8107 if (old_ptr) { 8108 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8109 kmem_free(old_ptr, old_size); 8110 } 8111 return (new_ptr); 8112 } 8113 8114 static void 8115 add_to_phci_list(char ***driver_list, int **root_support_list, 8116 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8117 { 8118 ASSERT(*cur_elements <= *max_elements); 8119 if (*cur_elements == *max_elements) { 8120 *max_elements += 10; 8121 *driver_list = mdi_realloc(*driver_list, 8122 sizeof (char *) * (*cur_elements), 8123 sizeof (char *) * (*max_elements)); 8124 *root_support_list = mdi_realloc(*root_support_list, 8125 sizeof (int) * (*cur_elements), 8126 sizeof (int) * (*max_elements)); 8127 } 8128 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8129 (*root_support_list)[*cur_elements] = root_support; 8130 (*cur_elements)++; 8131 } 8132 8133 static void 8134 get_phci_driver_list(char *vhci_class, char ***driver_list, 8135 int **root_support_list, int *cur_elements, int *max_elements) 8136 { 8137 mdi_phci_driver_info_t *st_driver_list, *p; 8138 int st_ndrivers, root_support, i, j, driver_conf_count; 8139 major_t m; 8140 struct devnames *dnp; 8141 ddi_prop_t *propp; 8142 8143 *driver_list = NULL; 8144 *root_support_list = NULL; 8145 *cur_elements = 0; 8146 *max_elements = 0; 8147 8148 /* add the phci drivers derived from the phci driver.conf files */ 8149 for (m = 0; m < devcnt; m++) { 8150 dnp = &devnamesp[m]; 8151 8152 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8153 LOCK_DEV_OPS(&dnp->dn_lock); 8154 if (dnp->dn_global_prop_ptr != NULL && 8155 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8156 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8157 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8158 strcmp(propp->prop_val, vhci_class) == 0) { 8159 8160 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8161 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8162 &dnp->dn_global_prop_ptr->prop_list) 8163 == NULL) ? 1 : 0; 8164 8165 add_to_phci_list(driver_list, root_support_list, 8166 cur_elements, max_elements, dnp->dn_name, 8167 root_support); 8168 8169 UNLOCK_DEV_OPS(&dnp->dn_lock); 8170 } else 8171 UNLOCK_DEV_OPS(&dnp->dn_lock); 8172 } 8173 } 8174 8175 driver_conf_count = *cur_elements; 8176 8177 /* add the phci drivers specified in the built-in tables */ 8178 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8179 st_driver_list = scsi_phci_driver_list; 8180 st_ndrivers = sizeof (scsi_phci_driver_list) / 8181 sizeof (mdi_phci_driver_info_t); 8182 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8183 st_driver_list = ib_phci_driver_list; 8184 st_ndrivers = sizeof (ib_phci_driver_list) / 8185 sizeof (mdi_phci_driver_info_t); 8186 } else { 8187 st_driver_list = NULL; 8188 st_ndrivers = 0; 8189 } 8190 8191 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8192 /* add this phci driver if not already added before */ 8193 for (j = 0; j < driver_conf_count; j++) { 8194 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8195 break; 8196 } 8197 if (j == driver_conf_count) { 8198 add_to_phci_list(driver_list, root_support_list, 8199 cur_elements, max_elements, p->phdriver_name, 8200 p->phdriver_root_support); 8201 } 8202 } 8203 } 8204 8205 /* 8206 * Attach the phci driver instances associated with the specified vhci class. 8207 * If root is mounted attach all phci driver instances. 8208 * If root is not mounted, attach the instances of only those phci 8209 * drivers that have the root support. 8210 */ 8211 static void 8212 attach_phci_drivers(char *vhci_class) 8213 { 8214 char **driver_list, **p; 8215 int *root_support_list; 8216 int cur_elements, max_elements, i; 8217 major_t m; 8218 8219 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8220 &cur_elements, &max_elements); 8221 8222 for (i = 0; i < cur_elements; i++) { 8223 if (modrootloaded || root_support_list[i]) { 8224 m = ddi_name_to_major(driver_list[i]); 8225 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8226 ddi_rele_driver(m); 8227 } 8228 } 8229 8230 if (driver_list) { 8231 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8232 kmem_free(*p, strlen(*p) + 1); 8233 kmem_free(driver_list, sizeof (char *) * max_elements); 8234 kmem_free(root_support_list, sizeof (int) * max_elements); 8235 } 8236 } 8237 8238 /* 8239 * Build vhci cache: 8240 * 8241 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8242 * the phci driver instances. During this process the cache gets built. 8243 * 8244 * Cache is built fully if the root is mounted. 8245 * If the root is not mounted, phci drivers that do not have root support 8246 * are not attached. As a result the cache is built partially. The entries 8247 * in the cache reflect only those phci drivers that have root support. 8248 */ 8249 static int 8250 build_vhci_cache(mdi_vhci_t *vh) 8251 { 8252 mdi_vhci_config_t *vhc = vh->vh_config; 8253 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8254 8255 single_threaded_vhconfig_enter(vhc); 8256 8257 rw_enter(&vhcache->vhcache_lock, RW_READER); 8258 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8259 rw_exit(&vhcache->vhcache_lock); 8260 single_threaded_vhconfig_exit(vhc); 8261 return (0); 8262 } 8263 rw_exit(&vhcache->vhcache_lock); 8264 8265 attach_phci_drivers(vh->vh_class); 8266 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8267 BUS_CONFIG_ALL, (major_t)-1); 8268 8269 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8270 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8271 rw_exit(&vhcache->vhcache_lock); 8272 8273 single_threaded_vhconfig_exit(vhc); 8274 vhcache_dirty(vhc); 8275 return (1); 8276 } 8277 8278 /* 8279 * Determine if discovery of paths is needed. 8280 */ 8281 static int 8282 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8283 { 8284 int rv = 1; 8285 8286 mutex_enter(&vhc->vhc_lock); 8287 if (i_ddi_io_initialized() == 0) { 8288 if (vhc->vhc_path_discovery_boot > 0) { 8289 vhc->vhc_path_discovery_boot--; 8290 goto out; 8291 } 8292 } else { 8293 if (vhc->vhc_path_discovery_postboot > 0) { 8294 vhc->vhc_path_discovery_postboot--; 8295 goto out; 8296 } 8297 } 8298 8299 /* 8300 * Do full path discovery at most once per mdi_path_discovery_interval. 8301 * This is to avoid a series of full path discoveries when opening 8302 * stale /dev/[r]dsk links. 8303 */ 8304 if (mdi_path_discovery_interval != -1 && 8305 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8306 goto out; 8307 8308 rv = 0; 8309 out: 8310 mutex_exit(&vhc->vhc_lock); 8311 return (rv); 8312 } 8313 8314 /* 8315 * Discover all paths: 8316 * 8317 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8318 * driver instances. During this process all paths will be discovered. 8319 */ 8320 static int 8321 vhcache_discover_paths(mdi_vhci_t *vh) 8322 { 8323 mdi_vhci_config_t *vhc = vh->vh_config; 8324 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8325 int rv = 0; 8326 8327 single_threaded_vhconfig_enter(vhc); 8328 8329 if (vhcache_do_discovery(vhc)) { 8330 attach_phci_drivers(vh->vh_class); 8331 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8332 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8333 8334 mutex_enter(&vhc->vhc_lock); 8335 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8336 mdi_path_discovery_interval * TICKS_PER_SECOND; 8337 mutex_exit(&vhc->vhc_lock); 8338 rv = 1; 8339 } 8340 8341 single_threaded_vhconfig_exit(vhc); 8342 return (rv); 8343 } 8344 8345 /* 8346 * Generic vhci bus config implementation: 8347 * 8348 * Parameters 8349 * vdip vhci dip 8350 * flags bus config flags 8351 * op bus config operation 8352 * The remaining parameters are bus config operation specific 8353 * 8354 * for BUS_CONFIG_ONE 8355 * arg pointer to name@addr 8356 * child upon successful return from this function, *child will be 8357 * set to the configured and held devinfo child node of vdip. 8358 * ct_addr pointer to client address (i.e. GUID) 8359 * 8360 * for BUS_CONFIG_DRIVER 8361 * arg major number of the driver 8362 * child and ct_addr parameters are ignored 8363 * 8364 * for BUS_CONFIG_ALL 8365 * arg, child, and ct_addr parameters are ignored 8366 * 8367 * Note that for the rest of the bus config operations, this function simply 8368 * calls the framework provided default bus config routine. 8369 */ 8370 int 8371 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8372 void *arg, dev_info_t **child, char *ct_addr) 8373 { 8374 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8375 mdi_vhci_config_t *vhc = vh->vh_config; 8376 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8377 int rv = 0; 8378 int params_valid = 0; 8379 char *cp; 8380 8381 /* 8382 * To bus config vhcis we relay operation, possibly using another 8383 * thread, to phcis. The phci driver then interacts with MDI to cause 8384 * vhci child nodes to be enumerated under the vhci node. Adding a 8385 * vhci child requires an ndi_devi_enter of the vhci. Since another 8386 * thread may be adding the child, to avoid deadlock we can't wait 8387 * for the relayed operations to complete if we have already entered 8388 * the vhci node. 8389 */ 8390 if (DEVI_BUSY_OWNED(vdip)) { 8391 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8392 "vhci dip is busy owned %p\n", (void *)vdip)); 8393 goto default_bus_config; 8394 } 8395 8396 rw_enter(&vhcache->vhcache_lock, RW_READER); 8397 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8398 rw_exit(&vhcache->vhcache_lock); 8399 rv = build_vhci_cache(vh); 8400 rw_enter(&vhcache->vhcache_lock, RW_READER); 8401 } 8402 8403 switch (op) { 8404 case BUS_CONFIG_ONE: 8405 if (arg != NULL && ct_addr != NULL) { 8406 /* extract node name */ 8407 cp = (char *)arg; 8408 while (*cp != '\0' && *cp != '@') 8409 cp++; 8410 if (*cp == '@') { 8411 params_valid = 1; 8412 *cp = '\0'; 8413 config_client_paths(vhc, (char *)arg, ct_addr); 8414 /* config_client_paths() releases cache_lock */ 8415 *cp = '@'; 8416 break; 8417 } 8418 } 8419 8420 rw_exit(&vhcache->vhcache_lock); 8421 break; 8422 8423 case BUS_CONFIG_DRIVER: 8424 rw_exit(&vhcache->vhcache_lock); 8425 if (rv == 0) 8426 st_bus_config_all_phcis(vhc, flags, op, 8427 (major_t)(uintptr_t)arg); 8428 break; 8429 8430 case BUS_CONFIG_ALL: 8431 rw_exit(&vhcache->vhcache_lock); 8432 if (rv == 0) 8433 st_bus_config_all_phcis(vhc, flags, op, -1); 8434 break; 8435 8436 default: 8437 rw_exit(&vhcache->vhcache_lock); 8438 break; 8439 } 8440 8441 8442 default_bus_config: 8443 /* 8444 * All requested child nodes are enumerated under the vhci. 8445 * Now configure them. 8446 */ 8447 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8448 NDI_SUCCESS) { 8449 return (MDI_SUCCESS); 8450 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8451 /* discover all paths and try configuring again */ 8452 if (vhcache_discover_paths(vh) && 8453 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8454 NDI_SUCCESS) 8455 return (MDI_SUCCESS); 8456 } 8457 8458 return (MDI_FAILURE); 8459 } 8460 8461 /* 8462 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8463 */ 8464 static nvlist_t * 8465 read_on_disk_vhci_cache(char *vhci_class) 8466 { 8467 nvlist_t *nvl; 8468 int err; 8469 char *filename; 8470 8471 filename = vhclass2vhcache_filename(vhci_class); 8472 8473 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8474 kmem_free(filename, strlen(filename) + 1); 8475 return (nvl); 8476 } else if (err == EIO) 8477 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8478 else if (err == EINVAL) 8479 cmn_err(CE_WARN, 8480 "%s: data file corrupted, will recreate\n", filename); 8481 8482 kmem_free(filename, strlen(filename) + 1); 8483 return (NULL); 8484 } 8485 8486 /* 8487 * Read on-disk vhci cache into nvlists for all vhci classes. 8488 * Called during booting by i_ddi_read_devices_files(). 8489 */ 8490 void 8491 mdi_read_devices_files(void) 8492 { 8493 int i; 8494 8495 for (i = 0; i < N_VHCI_CLASSES; i++) 8496 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8497 } 8498 8499 /* 8500 * Remove all stale entries from vhci cache. 8501 */ 8502 static void 8503 clean_vhcache(mdi_vhci_config_t *vhc) 8504 { 8505 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8506 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8507 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8508 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8509 8510 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8511 8512 cct_head = vhcache->vhcache_client_head; 8513 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8514 for (cct = cct_head; cct != NULL; cct = cct_next) { 8515 cct_next = cct->cct_next; 8516 8517 cpi_head = cct->cct_cpi_head; 8518 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8519 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8520 cpi_next = cpi->cpi_next; 8521 if (cpi->cpi_pip != NULL) { 8522 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8523 enqueue_tail_vhcache_pathinfo(cct, cpi); 8524 } else 8525 free_vhcache_pathinfo(cpi); 8526 } 8527 8528 if (cct->cct_cpi_head != NULL) 8529 enqueue_vhcache_client(vhcache, cct); 8530 else { 8531 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8532 (mod_hash_key_t)cct->cct_name_addr); 8533 free_vhcache_client(cct); 8534 } 8535 } 8536 8537 cphci_head = vhcache->vhcache_phci_head; 8538 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8539 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8540 cphci_next = cphci->cphci_next; 8541 if (cphci->cphci_phci != NULL) 8542 enqueue_vhcache_phci(vhcache, cphci); 8543 else 8544 free_vhcache_phci(cphci); 8545 } 8546 8547 vhcache->vhcache_clean_time = lbolt64; 8548 rw_exit(&vhcache->vhcache_lock); 8549 vhcache_dirty(vhc); 8550 } 8551 8552 /* 8553 * Remove all stale entries from vhci cache. 8554 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8555 */ 8556 void 8557 mdi_clean_vhcache(void) 8558 { 8559 mdi_vhci_t *vh; 8560 8561 mutex_enter(&mdi_mutex); 8562 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8563 vh->vh_refcnt++; 8564 mutex_exit(&mdi_mutex); 8565 clean_vhcache(vh->vh_config); 8566 mutex_enter(&mdi_mutex); 8567 vh->vh_refcnt--; 8568 } 8569 mutex_exit(&mdi_mutex); 8570 } 8571 8572 /* 8573 * mdi_vhci_walk_clients(): 8574 * Walker routine to traverse client dev_info nodes 8575 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8576 * below the client, including nexus devices, which we dont want. 8577 * So we just traverse the immediate siblings, starting from 1st client. 8578 */ 8579 void 8580 mdi_vhci_walk_clients(dev_info_t *vdip, 8581 int (*f)(dev_info_t *, void *), void *arg) 8582 { 8583 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8584 dev_info_t *cdip; 8585 mdi_client_t *ct; 8586 8587 MDI_VHCI_CLIENT_LOCK(vh); 8588 cdip = ddi_get_child(vdip); 8589 while (cdip) { 8590 ct = i_devi_get_client(cdip); 8591 MDI_CLIENT_LOCK(ct); 8592 8593 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8594 cdip = ddi_get_next_sibling(cdip); 8595 else 8596 cdip = NULL; 8597 8598 MDI_CLIENT_UNLOCK(ct); 8599 } 8600 MDI_VHCI_CLIENT_UNLOCK(vh); 8601 } 8602 8603 /* 8604 * mdi_vhci_walk_phcis(): 8605 * Walker routine to traverse phci dev_info nodes 8606 */ 8607 void 8608 mdi_vhci_walk_phcis(dev_info_t *vdip, 8609 int (*f)(dev_info_t *, void *), void *arg) 8610 { 8611 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8612 mdi_phci_t *ph, *next; 8613 8614 MDI_VHCI_PHCI_LOCK(vh); 8615 ph = vh->vh_phci_head; 8616 while (ph) { 8617 MDI_PHCI_LOCK(ph); 8618 8619 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8620 next = ph->ph_next; 8621 else 8622 next = NULL; 8623 8624 MDI_PHCI_UNLOCK(ph); 8625 ph = next; 8626 } 8627 MDI_VHCI_PHCI_UNLOCK(vh); 8628 } 8629 8630 8631 /* 8632 * mdi_walk_vhcis(): 8633 * Walker routine to traverse vhci dev_info nodes 8634 */ 8635 void 8636 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8637 { 8638 mdi_vhci_t *vh = NULL; 8639 8640 mutex_enter(&mdi_mutex); 8641 /* 8642 * Scan for already registered vhci 8643 */ 8644 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8645 vh->vh_refcnt++; 8646 mutex_exit(&mdi_mutex); 8647 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8648 mutex_enter(&mdi_mutex); 8649 vh->vh_refcnt--; 8650 break; 8651 } else { 8652 mutex_enter(&mdi_mutex); 8653 vh->vh_refcnt--; 8654 } 8655 } 8656 8657 mutex_exit(&mdi_mutex); 8658 } 8659 8660 /* 8661 * i_mdi_log_sysevent(): 8662 * Logs events for pickup by syseventd 8663 */ 8664 static void 8665 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8666 { 8667 char *path_name; 8668 nvlist_t *attr_list; 8669 8670 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8671 KM_SLEEP) != DDI_SUCCESS) { 8672 goto alloc_failed; 8673 } 8674 8675 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8676 (void) ddi_pathname(dip, path_name); 8677 8678 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8679 ddi_driver_name(dip)) != DDI_SUCCESS) { 8680 goto error; 8681 } 8682 8683 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8684 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8685 goto error; 8686 } 8687 8688 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8689 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8690 goto error; 8691 } 8692 8693 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8694 path_name) != DDI_SUCCESS) { 8695 goto error; 8696 } 8697 8698 if (nvlist_add_string(attr_list, DDI_CLASS, 8699 ph_vh_class) != DDI_SUCCESS) { 8700 goto error; 8701 } 8702 8703 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8704 attr_list, NULL, DDI_SLEEP); 8705 8706 error: 8707 kmem_free(path_name, MAXPATHLEN); 8708 nvlist_free(attr_list); 8709 return; 8710 8711 alloc_failed: 8712 MDI_DEBUG(1, (CE_WARN, dip, 8713 "!i_mdi_log_sysevent: Unable to send sysevent")); 8714 } 8715 8716 char ** 8717 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 8718 { 8719 char **driver_list, **ret_driver_list = NULL; 8720 int *root_support_list; 8721 int cur_elements, max_elements; 8722 8723 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8724 &cur_elements, &max_elements); 8725 8726 8727 if (driver_list) { 8728 kmem_free(root_support_list, sizeof (int) * max_elements); 8729 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 8730 * max_elements, sizeof (char *) * cur_elements); 8731 } 8732 *ndrivers = cur_elements; 8733 8734 return (ret_driver_list); 8735 8736 } 8737 8738 void 8739 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 8740 { 8741 char **p; 8742 int i; 8743 8744 if (driver_list) { 8745 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 8746 kmem_free(*p, strlen(*p) + 1); 8747 kmem_free(driver_list, sizeof (char *) * ndrivers); 8748 } 8749 } 8750