1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(level, stmnt) \ 78 if (mdi_debug >= (level)) i_mdi_log stmnt 79 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 80 #else /* !DEBUG */ 81 #define MDI_DEBUG(level, stmnt) 82 #endif /* DEBUG */ 83 84 extern pri_t minclsyspri; 85 extern int modrootloaded; 86 87 /* 88 * Global mutex: 89 * Protects vHCI list and structure members. 90 */ 91 kmutex_t mdi_mutex; 92 93 /* 94 * Registered vHCI class driver lists 95 */ 96 int mdi_vhci_count; 97 mdi_vhci_t *mdi_vhci_head; 98 mdi_vhci_t *mdi_vhci_tail; 99 100 /* 101 * Client Hash Table size 102 */ 103 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 104 105 /* 106 * taskq interface definitions 107 */ 108 #define MDI_TASKQ_N_THREADS 8 109 #define MDI_TASKQ_PRI minclsyspri 110 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 111 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 112 113 taskq_t *mdi_taskq; 114 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 115 116 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 117 118 /* 119 * The data should be "quiet" for this interval (in seconds) before the 120 * vhci cached data is flushed to the disk. 121 */ 122 static int mdi_vhcache_flush_delay = 10; 123 124 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 125 static int mdi_vhcache_flush_daemon_idle_time = 60; 126 127 /* 128 * MDI falls back to discovery of all paths when a bus_config_one fails. 129 * The following parameters can be used to tune this operation. 130 * 131 * mdi_path_discovery_boot 132 * Number of times path discovery will be attempted during early boot. 133 * Probably there is no reason to ever set this value to greater than one. 134 * 135 * mdi_path_discovery_postboot 136 * Number of times path discovery will be attempted after early boot. 137 * Set it to a minimum of two to allow for discovery of iscsi paths which 138 * may happen very late during booting. 139 * 140 * mdi_path_discovery_interval 141 * Minimum number of seconds MDI will wait between successive discovery 142 * of all paths. Set it to -1 to disable discovery of all paths. 143 */ 144 static int mdi_path_discovery_boot = 1; 145 static int mdi_path_discovery_postboot = 2; 146 static int mdi_path_discovery_interval = 10; 147 148 /* 149 * number of seconds the asynchronous configuration thread will sleep idle 150 * before exiting. 151 */ 152 static int mdi_async_config_idle_time = 600; 153 154 static int mdi_bus_config_cache_hash_size = 256; 155 156 /* turns off multithreaded configuration for certain operations */ 157 static int mdi_mtc_off = 0; 158 159 /* 160 * MDI component property name/value string definitions 161 */ 162 const char *mdi_component_prop = "mpxio-component"; 163 const char *mdi_component_prop_vhci = "vhci"; 164 const char *mdi_component_prop_phci = "phci"; 165 const char *mdi_component_prop_client = "client"; 166 167 /* 168 * MDI client global unique identifier property name 169 */ 170 const char *mdi_client_guid_prop = "client-guid"; 171 172 /* 173 * MDI client load balancing property name/value string definitions 174 */ 175 const char *mdi_load_balance = "load-balance"; 176 const char *mdi_load_balance_none = "none"; 177 const char *mdi_load_balance_rr = "round-robin"; 178 const char *mdi_load_balance_lba = "logical-block"; 179 180 /* 181 * Obsolete vHCI class definition; to be removed after Leadville update 182 */ 183 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 184 185 static char vhci_greeting[] = 186 "\tThere already exists one vHCI driver for class %s\n" 187 "\tOnly one vHCI driver for each class is allowed\n"; 188 189 /* 190 * Static function prototypes 191 */ 192 static int i_mdi_phci_offline(dev_info_t *, uint_t); 193 static int i_mdi_client_offline(dev_info_t *, uint_t); 194 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 195 static void i_mdi_phci_post_detach(dev_info_t *, 196 ddi_detach_cmd_t, int); 197 static int i_mdi_client_pre_detach(dev_info_t *, 198 ddi_detach_cmd_t); 199 static void i_mdi_client_post_detach(dev_info_t *, 200 ddi_detach_cmd_t, int); 201 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 202 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 203 static int i_mdi_lba_lb(mdi_client_t *ct, 204 mdi_pathinfo_t **ret_pip, struct buf *buf); 205 static void i_mdi_pm_hold_client(mdi_client_t *, int); 206 static void i_mdi_pm_rele_client(mdi_client_t *, int); 207 static void i_mdi_pm_reset_client(mdi_client_t *); 208 static int i_mdi_power_all_phci(mdi_client_t *); 209 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 210 211 212 /* 213 * Internal mdi_pathinfo node functions 214 */ 215 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 216 217 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 218 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 219 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 220 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 274 static boolean_t stop_vhcache_flush_thread(void *, int); 275 static void free_string_array(char **, int); 276 static void free_vhcache_phci(mdi_vhcache_phci_t *); 277 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 278 static void free_vhcache_client(mdi_vhcache_client_t *); 279 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 280 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 281 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 282 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_pi_add(mdi_vhci_config_t *, 284 struct mdi_pathinfo *); 285 static void vhcache_pi_remove(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void free_phclient_path_list(mdi_phys_path_t *); 288 static void sort_vhcache_paths(mdi_vhcache_client_t *); 289 static int flush_vhcache(mdi_vhci_config_t *, int); 290 static void vhcache_dirty(mdi_vhci_config_t *); 291 static void free_async_client_config(mdi_async_client_config_t *); 292 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 293 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 294 static nvlist_t *read_on_disk_vhci_cache(char *); 295 extern int fread_nvlist(char *, nvlist_t **); 296 extern int fwrite_nvlist(char *, nvlist_t *); 297 298 /* called once when first vhci registers with mdi */ 299 static void 300 i_mdi_init() 301 { 302 static int initialized = 0; 303 304 if (initialized) 305 return; 306 initialized = 1; 307 308 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 309 /* 310 * Create our taskq resources 311 */ 312 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 313 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 314 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 315 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 316 } 317 318 /* 319 * mdi_get_component_type(): 320 * Return mpxio component type 321 * Return Values: 322 * MDI_COMPONENT_NONE 323 * MDI_COMPONENT_VHCI 324 * MDI_COMPONENT_PHCI 325 * MDI_COMPONENT_CLIENT 326 * XXX This doesn't work under multi-level MPxIO and should be 327 * removed when clients migrate mdi_component_is_*() interfaces. 328 */ 329 int 330 mdi_get_component_type(dev_info_t *dip) 331 { 332 return (DEVI(dip)->devi_mdi_component); 333 } 334 335 /* 336 * mdi_vhci_register(): 337 * Register a vHCI module with the mpxio framework 338 * mdi_vhci_register() is called by vHCI drivers to register the 339 * 'class_driver' vHCI driver and its MDI entrypoints with the 340 * mpxio framework. The vHCI driver must call this interface as 341 * part of its attach(9e) handler. 342 * Competing threads may try to attach mdi_vhci_register() as 343 * the vHCI drivers are loaded and attached as a result of pHCI 344 * driver instance registration (mdi_phci_register()) with the 345 * framework. 346 * Return Values: 347 * MDI_SUCCESS 348 * MDI_FAILURE 349 */ 350 /*ARGSUSED*/ 351 int 352 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 353 int flags) 354 { 355 mdi_vhci_t *vh = NULL; 356 357 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 358 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 359 360 i_mdi_init(); 361 362 mutex_enter(&mdi_mutex); 363 /* 364 * Scan for already registered vhci 365 */ 366 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 367 if (strcmp(vh->vh_class, class) == 0) { 368 /* 369 * vHCI has already been created. Check for valid 370 * vHCI ops registration. We only support one vHCI 371 * module per class 372 */ 373 if (vh->vh_ops != NULL) { 374 mutex_exit(&mdi_mutex); 375 cmn_err(CE_NOTE, vhci_greeting, class); 376 return (MDI_FAILURE); 377 } 378 break; 379 } 380 } 381 382 /* 383 * if not yet created, create the vHCI component 384 */ 385 if (vh == NULL) { 386 struct client_hash *hash = NULL; 387 char *load_balance; 388 389 /* 390 * Allocate and initialize the mdi extensions 391 */ 392 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 393 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 394 KM_SLEEP); 395 vh->vh_client_table = hash; 396 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 397 (void) strcpy(vh->vh_class, class); 398 vh->vh_lb = LOAD_BALANCE_RR; 399 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 400 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 401 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 402 vh->vh_lb = LOAD_BALANCE_NONE; 403 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 404 == 0) { 405 vh->vh_lb = LOAD_BALANCE_LBA; 406 } 407 ddi_prop_free(load_balance); 408 } 409 410 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 411 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 412 413 /* 414 * Store the vHCI ops vectors 415 */ 416 vh->vh_dip = vdip; 417 vh->vh_ops = vops; 418 419 setup_vhci_cache(vh); 420 421 if (mdi_vhci_head == NULL) { 422 mdi_vhci_head = vh; 423 } 424 if (mdi_vhci_tail) { 425 mdi_vhci_tail->vh_next = vh; 426 } 427 mdi_vhci_tail = vh; 428 mdi_vhci_count++; 429 } 430 431 /* 432 * Claim the devfs node as a vhci component 433 */ 434 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 435 436 /* 437 * Initialize our back reference from dev_info node 438 */ 439 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 440 mutex_exit(&mdi_mutex); 441 return (MDI_SUCCESS); 442 } 443 444 /* 445 * mdi_vhci_unregister(): 446 * Unregister a vHCI module from mpxio framework 447 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 448 * of a vhci to unregister it from the framework. 449 * Return Values: 450 * MDI_SUCCESS 451 * MDI_FAILURE 452 */ 453 /*ARGSUSED*/ 454 int 455 mdi_vhci_unregister(dev_info_t *vdip, int flags) 456 { 457 mdi_vhci_t *found, *vh, *prev = NULL; 458 459 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 460 461 /* 462 * Check for invalid VHCI 463 */ 464 if ((vh = i_devi_get_vhci(vdip)) == NULL) 465 return (MDI_FAILURE); 466 467 /* 468 * Scan the list of registered vHCIs for a match 469 */ 470 mutex_enter(&mdi_mutex); 471 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 472 if (found == vh) 473 break; 474 prev = found; 475 } 476 477 if (found == NULL) { 478 mutex_exit(&mdi_mutex); 479 return (MDI_FAILURE); 480 } 481 482 /* 483 * Check the vHCI, pHCI and client count. All the pHCIs and clients 484 * should have been unregistered, before a vHCI can be 485 * unregistered. 486 */ 487 MDI_VHCI_PHCI_LOCK(vh); 488 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 489 MDI_VHCI_PHCI_UNLOCK(vh); 490 mutex_exit(&mdi_mutex); 491 return (MDI_FAILURE); 492 } 493 MDI_VHCI_PHCI_UNLOCK(vh); 494 495 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 496 mutex_exit(&mdi_mutex); 497 return (MDI_FAILURE); 498 } 499 500 /* 501 * Remove the vHCI from the global list 502 */ 503 if (vh == mdi_vhci_head) { 504 mdi_vhci_head = vh->vh_next; 505 } else { 506 prev->vh_next = vh->vh_next; 507 } 508 if (vh == mdi_vhci_tail) { 509 mdi_vhci_tail = prev; 510 } 511 mdi_vhci_count--; 512 mutex_exit(&mdi_mutex); 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 mutex_destroy(&vh->vh_phci_mutex); 521 mutex_destroy(&vh->vh_client_mutex); 522 523 kmem_free(vh, sizeof (mdi_vhci_t)); 524 return (MDI_SUCCESS); 525 } 526 527 /* 528 * i_mdi_vhci_class2vhci(): 529 * Look for a matching vHCI module given a vHCI class name 530 * Return Values: 531 * Handle to a vHCI component 532 * NULL 533 */ 534 static mdi_vhci_t * 535 i_mdi_vhci_class2vhci(char *class) 536 { 537 mdi_vhci_t *vh = NULL; 538 539 ASSERT(!MUTEX_HELD(&mdi_mutex)); 540 541 mutex_enter(&mdi_mutex); 542 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 543 if (strcmp(vh->vh_class, class) == 0) { 544 break; 545 } 546 } 547 mutex_exit(&mdi_mutex); 548 return (vh); 549 } 550 551 /* 552 * i_devi_get_vhci(): 553 * Utility function to get the handle to a vHCI component 554 * Return Values: 555 * Handle to a vHCI component 556 * NULL 557 */ 558 mdi_vhci_t * 559 i_devi_get_vhci(dev_info_t *vdip) 560 { 561 mdi_vhci_t *vh = NULL; 562 if (MDI_VHCI(vdip)) { 563 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 564 } 565 return (vh); 566 } 567 568 /* 569 * mdi_phci_register(): 570 * Register a pHCI module with mpxio framework 571 * mdi_phci_register() is called by pHCI drivers to register with 572 * the mpxio framework and a specific 'class_driver' vHCI. The 573 * pHCI driver must call this interface as part of its attach(9e) 574 * handler. 575 * Return Values: 576 * MDI_SUCCESS 577 * MDI_FAILURE 578 */ 579 /*ARGSUSED*/ 580 int 581 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 582 { 583 mdi_phci_t *ph; 584 mdi_vhci_t *vh; 585 char *data; 586 char *pathname; 587 588 /* 589 * Some subsystems, like fcp, perform pHCI registration from a 590 * different thread than the one doing the pHCI attach(9E) - the 591 * driver attach code is waiting for this other thread to complete. 592 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 593 * (indicating that some thread has done an ndi_devi_enter of parent) 594 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 595 */ 596 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 597 598 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 599 (void) ddi_pathname(pdip, pathname); 600 601 /* 602 * Check for mpxio-disable property. Enable mpxio if the property is 603 * missing or not set to "yes". 604 * If the property is set to "yes" then emit a brief message. 605 */ 606 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 607 &data) == DDI_SUCCESS)) { 608 if (strcmp(data, "yes") == 0) { 609 MDI_DEBUG(1, (CE_CONT, pdip, 610 "?%s (%s%d) multipath capabilities " 611 "disabled via %s.conf.\n", pathname, 612 ddi_driver_name(pdip), ddi_get_instance(pdip), 613 ddi_driver_name(pdip))); 614 ddi_prop_free(data); 615 kmem_free(pathname, MAXPATHLEN); 616 return (MDI_FAILURE); 617 } 618 ddi_prop_free(data); 619 } 620 621 kmem_free(pathname, MAXPATHLEN); 622 623 /* 624 * Search for a matching vHCI 625 */ 626 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 627 if (vh == NULL) { 628 return (MDI_FAILURE); 629 } 630 631 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 632 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 633 ph->ph_dip = pdip; 634 ph->ph_vhci = vh; 635 ph->ph_next = NULL; 636 ph->ph_unstable = 0; 637 ph->ph_vprivate = 0; 638 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 639 640 MDI_PHCI_LOCK(ph); 641 MDI_PHCI_SET_POWER_UP(ph); 642 MDI_PHCI_UNLOCK(ph); 643 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 644 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 645 646 vhcache_phci_add(vh->vh_config, ph); 647 648 MDI_VHCI_PHCI_LOCK(vh); 649 if (vh->vh_phci_head == NULL) { 650 vh->vh_phci_head = ph; 651 } 652 if (vh->vh_phci_tail) { 653 vh->vh_phci_tail->ph_next = ph; 654 } 655 vh->vh_phci_tail = ph; 656 vh->vh_phci_count++; 657 MDI_VHCI_PHCI_UNLOCK(vh); 658 659 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 660 return (MDI_SUCCESS); 661 } 662 663 /* 664 * mdi_phci_unregister(): 665 * Unregister a pHCI module from mpxio framework 666 * mdi_phci_unregister() is called by the pHCI drivers from their 667 * detach(9E) handler to unregister their instances from the 668 * framework. 669 * Return Values: 670 * MDI_SUCCESS 671 * MDI_FAILURE 672 */ 673 /*ARGSUSED*/ 674 int 675 mdi_phci_unregister(dev_info_t *pdip, int flags) 676 { 677 mdi_vhci_t *vh; 678 mdi_phci_t *ph; 679 mdi_phci_t *tmp; 680 mdi_phci_t *prev = NULL; 681 682 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 683 684 ph = i_devi_get_phci(pdip); 685 if (ph == NULL) { 686 MDI_DEBUG(1, (CE_WARN, pdip, 687 "!pHCI unregister: Not a valid pHCI")); 688 return (MDI_FAILURE); 689 } 690 691 vh = ph->ph_vhci; 692 ASSERT(vh != NULL); 693 if (vh == NULL) { 694 MDI_DEBUG(1, (CE_WARN, pdip, 695 "!pHCI unregister: Not a valid vHCI")); 696 return (MDI_FAILURE); 697 } 698 699 MDI_VHCI_PHCI_LOCK(vh); 700 tmp = vh->vh_phci_head; 701 while (tmp) { 702 if (tmp == ph) { 703 break; 704 } 705 prev = tmp; 706 tmp = tmp->ph_next; 707 } 708 709 if (ph == vh->vh_phci_head) { 710 vh->vh_phci_head = ph->ph_next; 711 } else { 712 prev->ph_next = ph->ph_next; 713 } 714 715 if (ph == vh->vh_phci_tail) { 716 vh->vh_phci_tail = prev; 717 } 718 719 vh->vh_phci_count--; 720 MDI_VHCI_PHCI_UNLOCK(vh); 721 722 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 723 ESC_DDI_INITIATOR_UNREGISTER); 724 vhcache_phci_remove(vh->vh_config, ph); 725 cv_destroy(&ph->ph_unstable_cv); 726 mutex_destroy(&ph->ph_mutex); 727 kmem_free(ph, sizeof (mdi_phci_t)); 728 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 729 DEVI(pdip)->devi_mdi_xhci = NULL; 730 return (MDI_SUCCESS); 731 } 732 733 /* 734 * i_devi_get_phci(): 735 * Utility function to return the phci extensions. 736 */ 737 static mdi_phci_t * 738 i_devi_get_phci(dev_info_t *pdip) 739 { 740 mdi_phci_t *ph = NULL; 741 if (MDI_PHCI(pdip)) { 742 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 743 } 744 return (ph); 745 } 746 747 /* 748 * Single thread mdi entry into devinfo node for modifying its children. 749 * If necessary we perform an ndi_devi_enter of the vHCI before doing 750 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 751 * for the vHCI and one for the pHCI. 752 */ 753 void 754 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 755 { 756 dev_info_t *vdip; 757 int vcircular, pcircular; 758 759 /* Verify calling context */ 760 ASSERT(MDI_PHCI(phci_dip)); 761 vdip = mdi_devi_get_vdip(phci_dip); 762 ASSERT(vdip); /* A pHCI always has a vHCI */ 763 764 /* 765 * If pHCI is detaching then the framework has already entered the 766 * vHCI on a threads that went down the code path leading to 767 * detach_node(). This framework enter of the vHCI during pHCI 768 * detach is done to avoid deadlock with vHCI power management 769 * operations which enter the vHCI and the enter down the path 770 * to the pHCI. If pHCI is detaching then we piggyback this calls 771 * enter of the vHCI on frameworks vHCI enter that has already 772 * occurred - this is OK because we know that the framework thread 773 * doing detach is waiting for our completion. 774 * 775 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 776 * race with detach - but we can't do that because the framework has 777 * already entered the parent, so we have some complexity instead. 778 */ 779 for (;;) { 780 if (ndi_devi_tryenter(vdip, &vcircular)) { 781 ASSERT(vcircular != -1); 782 if (DEVI_IS_DETACHING(phci_dip)) { 783 ndi_devi_exit(vdip, vcircular); 784 vcircular = -1; 785 } 786 break; 787 } else if (DEVI_IS_DETACHING(phci_dip)) { 788 vcircular = -1; 789 break; 790 } else { 791 delay(1); 792 } 793 } 794 795 ndi_devi_enter(phci_dip, &pcircular); 796 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 797 } 798 799 /* 800 * Release mdi_devi_enter or successful mdi_devi_tryenter. 801 */ 802 void 803 mdi_devi_exit(dev_info_t *phci_dip, int circular) 804 { 805 dev_info_t *vdip; 806 int vcircular, pcircular; 807 808 /* Verify calling context */ 809 ASSERT(MDI_PHCI(phci_dip)); 810 vdip = mdi_devi_get_vdip(phci_dip); 811 ASSERT(vdip); /* A pHCI always has a vHCI */ 812 813 /* extract two circular recursion values from single int */ 814 pcircular = (short)(circular & 0xFFFF); 815 vcircular = (short)((circular >> 16) & 0xFFFF); 816 817 ndi_devi_exit(phci_dip, pcircular); 818 if (vcircular != -1) 819 ndi_devi_exit(vdip, vcircular); 820 } 821 822 /* 823 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 824 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 825 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 826 * with vHCI power management code during path online/offline. Each 827 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 828 * occur within the scope of an active mdi_devi_enter that establishes the 829 * circular value. 830 */ 831 void 832 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 833 { 834 int pcircular; 835 836 /* Verify calling context */ 837 ASSERT(MDI_PHCI(phci_dip)); 838 839 pcircular = (short)(circular & 0xFFFF); 840 ndi_devi_exit(phci_dip, pcircular); 841 } 842 843 void 844 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 845 { 846 int pcircular; 847 848 /* Verify calling context */ 849 ASSERT(MDI_PHCI(phci_dip)); 850 851 ndi_devi_enter(phci_dip, &pcircular); 852 853 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 854 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 855 } 856 857 /* 858 * mdi_devi_get_vdip(): 859 * given a pHCI dip return vHCI dip 860 */ 861 dev_info_t * 862 mdi_devi_get_vdip(dev_info_t *pdip) 863 { 864 mdi_phci_t *ph; 865 866 ph = i_devi_get_phci(pdip); 867 if (ph && ph->ph_vhci) 868 return (ph->ph_vhci->vh_dip); 869 return (NULL); 870 } 871 872 /* 873 * mdi_devi_pdip_entered(): 874 * Return 1 if we are vHCI and have done an ndi_devi_enter 875 * of a pHCI 876 */ 877 int 878 mdi_devi_pdip_entered(dev_info_t *vdip) 879 { 880 mdi_vhci_t *vh; 881 mdi_phci_t *ph; 882 883 vh = i_devi_get_vhci(vdip); 884 if (vh == NULL) 885 return (0); 886 887 MDI_VHCI_PHCI_LOCK(vh); 888 ph = vh->vh_phci_head; 889 while (ph) { 890 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 891 MDI_VHCI_PHCI_UNLOCK(vh); 892 return (1); 893 } 894 ph = ph->ph_next; 895 } 896 MDI_VHCI_PHCI_UNLOCK(vh); 897 return (0); 898 } 899 900 /* 901 * mdi_phci_path2devinfo(): 902 * Utility function to search for a valid phci device given 903 * the devfs pathname. 904 */ 905 dev_info_t * 906 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 907 { 908 char *temp_pathname; 909 mdi_vhci_t *vh; 910 mdi_phci_t *ph; 911 dev_info_t *pdip = NULL; 912 913 vh = i_devi_get_vhci(vdip); 914 ASSERT(vh != NULL); 915 916 if (vh == NULL) { 917 /* 918 * Invalid vHCI component, return failure 919 */ 920 return (NULL); 921 } 922 923 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 924 MDI_VHCI_PHCI_LOCK(vh); 925 ph = vh->vh_phci_head; 926 while (ph != NULL) { 927 pdip = ph->ph_dip; 928 ASSERT(pdip != NULL); 929 *temp_pathname = '\0'; 930 (void) ddi_pathname(pdip, temp_pathname); 931 if (strcmp(temp_pathname, pathname) == 0) { 932 break; 933 } 934 ph = ph->ph_next; 935 } 936 if (ph == NULL) { 937 pdip = NULL; 938 } 939 MDI_VHCI_PHCI_UNLOCK(vh); 940 kmem_free(temp_pathname, MAXPATHLEN); 941 return (pdip); 942 } 943 944 /* 945 * mdi_phci_get_path_count(): 946 * get number of path information nodes associated with a given 947 * pHCI device. 948 */ 949 int 950 mdi_phci_get_path_count(dev_info_t *pdip) 951 { 952 mdi_phci_t *ph; 953 int count = 0; 954 955 ph = i_devi_get_phci(pdip); 956 if (ph != NULL) { 957 count = ph->ph_path_count; 958 } 959 return (count); 960 } 961 962 /* 963 * i_mdi_phci_lock(): 964 * Lock a pHCI device 965 * Return Values: 966 * None 967 * Note: 968 * The default locking order is: 969 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 970 * But there are number of situations where locks need to be 971 * grabbed in reverse order. This routine implements try and lock 972 * mechanism depending on the requested parameter option. 973 */ 974 static void 975 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 976 { 977 if (pip) { 978 /* Reverse locking is requested. */ 979 while (MDI_PHCI_TRYLOCK(ph) == 0) { 980 /* 981 * tryenter failed. Try to grab again 982 * after a small delay 983 */ 984 MDI_PI_HOLD(pip); 985 MDI_PI_UNLOCK(pip); 986 delay(1); 987 MDI_PI_LOCK(pip); 988 MDI_PI_RELE(pip); 989 } 990 } else { 991 MDI_PHCI_LOCK(ph); 992 } 993 } 994 995 /* 996 * i_mdi_phci_unlock(): 997 * Unlock the pHCI component 998 */ 999 static void 1000 i_mdi_phci_unlock(mdi_phci_t *ph) 1001 { 1002 MDI_PHCI_UNLOCK(ph); 1003 } 1004 1005 /* 1006 * i_mdi_devinfo_create(): 1007 * create client device's devinfo node 1008 * Return Values: 1009 * dev_info 1010 * NULL 1011 * Notes: 1012 */ 1013 static dev_info_t * 1014 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1015 char **compatible, int ncompatible) 1016 { 1017 dev_info_t *cdip = NULL; 1018 1019 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1020 1021 /* Verify for duplicate entry */ 1022 cdip = i_mdi_devinfo_find(vh, name, guid); 1023 ASSERT(cdip == NULL); 1024 if (cdip) { 1025 cmn_err(CE_WARN, 1026 "i_mdi_devinfo_create: client dip %p already exists", 1027 (void *)cdip); 1028 } 1029 1030 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1031 if (cdip == NULL) 1032 goto fail; 1033 1034 /* 1035 * Create component type and Global unique identifier 1036 * properties 1037 */ 1038 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1039 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1040 goto fail; 1041 } 1042 1043 /* Decorate the node with compatible property */ 1044 if (compatible && 1045 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1046 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1047 goto fail; 1048 } 1049 1050 return (cdip); 1051 1052 fail: 1053 if (cdip) { 1054 (void) ndi_prop_remove_all(cdip); 1055 (void) ndi_devi_free(cdip); 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * i_mdi_devinfo_find(): 1062 * Find a matching devinfo node for given client node name 1063 * and its guid. 1064 * Return Values: 1065 * Handle to a dev_info node or NULL 1066 */ 1067 static dev_info_t * 1068 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1069 { 1070 char *data; 1071 dev_info_t *cdip = NULL; 1072 dev_info_t *ndip = NULL; 1073 int circular; 1074 1075 ndi_devi_enter(vh->vh_dip, &circular); 1076 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1077 while ((cdip = ndip) != NULL) { 1078 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1079 1080 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1081 continue; 1082 } 1083 1084 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1085 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1086 &data) != DDI_PROP_SUCCESS) { 1087 continue; 1088 } 1089 1090 if (strcmp(data, guid) != 0) { 1091 ddi_prop_free(data); 1092 continue; 1093 } 1094 ddi_prop_free(data); 1095 break; 1096 } 1097 ndi_devi_exit(vh->vh_dip, circular); 1098 return (cdip); 1099 } 1100 1101 /* 1102 * i_mdi_devinfo_remove(): 1103 * Remove a client device node 1104 */ 1105 static int 1106 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1107 { 1108 int rv = MDI_SUCCESS; 1109 1110 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1111 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1112 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1113 if (rv != NDI_SUCCESS) { 1114 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1115 " failed. cdip = %p\n", (void *)cdip)); 1116 } 1117 /* 1118 * Convert to MDI error code 1119 */ 1120 switch (rv) { 1121 case NDI_SUCCESS: 1122 rv = MDI_SUCCESS; 1123 break; 1124 case NDI_BUSY: 1125 rv = MDI_BUSY; 1126 break; 1127 default: 1128 rv = MDI_FAILURE; 1129 break; 1130 } 1131 } 1132 return (rv); 1133 } 1134 1135 /* 1136 * i_devi_get_client() 1137 * Utility function to get mpxio component extensions 1138 */ 1139 static mdi_client_t * 1140 i_devi_get_client(dev_info_t *cdip) 1141 { 1142 mdi_client_t *ct = NULL; 1143 1144 if (MDI_CLIENT(cdip)) { 1145 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1146 } 1147 return (ct); 1148 } 1149 1150 /* 1151 * i_mdi_is_child_present(): 1152 * Search for the presence of client device dev_info node 1153 */ 1154 static int 1155 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1156 { 1157 int rv = MDI_FAILURE; 1158 struct dev_info *dip; 1159 int circular; 1160 1161 ndi_devi_enter(vdip, &circular); 1162 dip = DEVI(vdip)->devi_child; 1163 while (dip) { 1164 if (dip == DEVI(cdip)) { 1165 rv = MDI_SUCCESS; 1166 break; 1167 } 1168 dip = dip->devi_sibling; 1169 } 1170 ndi_devi_exit(vdip, circular); 1171 return (rv); 1172 } 1173 1174 1175 /* 1176 * i_mdi_client_lock(): 1177 * Grab client component lock 1178 * Return Values: 1179 * None 1180 * Note: 1181 * The default locking order is: 1182 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1183 * But there are number of situations where locks need to be 1184 * grabbed in reverse order. This routine implements try and lock 1185 * mechanism depending on the requested parameter option. 1186 */ 1187 static void 1188 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1189 { 1190 if (pip) { 1191 /* 1192 * Reverse locking is requested. 1193 */ 1194 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1195 /* 1196 * tryenter failed. Try to grab again 1197 * after a small delay 1198 */ 1199 MDI_PI_HOLD(pip); 1200 MDI_PI_UNLOCK(pip); 1201 delay(1); 1202 MDI_PI_LOCK(pip); 1203 MDI_PI_RELE(pip); 1204 } 1205 } else { 1206 MDI_CLIENT_LOCK(ct); 1207 } 1208 } 1209 1210 /* 1211 * i_mdi_client_unlock(): 1212 * Unlock a client component 1213 */ 1214 static void 1215 i_mdi_client_unlock(mdi_client_t *ct) 1216 { 1217 MDI_CLIENT_UNLOCK(ct); 1218 } 1219 1220 /* 1221 * i_mdi_client_alloc(): 1222 * Allocate and initialize a client structure. Caller should 1223 * hold the vhci client lock. 1224 * Return Values: 1225 * Handle to a client component 1226 */ 1227 /*ARGSUSED*/ 1228 static mdi_client_t * 1229 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1230 { 1231 mdi_client_t *ct; 1232 1233 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1234 1235 /* 1236 * Allocate and initialize a component structure. 1237 */ 1238 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1239 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1240 ct->ct_hnext = NULL; 1241 ct->ct_hprev = NULL; 1242 ct->ct_dip = NULL; 1243 ct->ct_vhci = vh; 1244 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1245 (void) strcpy(ct->ct_drvname, name); 1246 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1247 (void) strcpy(ct->ct_guid, lguid); 1248 ct->ct_cprivate = NULL; 1249 ct->ct_vprivate = NULL; 1250 ct->ct_flags = 0; 1251 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1252 MDI_CLIENT_LOCK(ct); 1253 MDI_CLIENT_SET_OFFLINE(ct); 1254 MDI_CLIENT_SET_DETACH(ct); 1255 MDI_CLIENT_SET_POWER_UP(ct); 1256 MDI_CLIENT_UNLOCK(ct); 1257 ct->ct_failover_flags = 0; 1258 ct->ct_failover_status = 0; 1259 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1260 ct->ct_unstable = 0; 1261 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1262 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1263 ct->ct_lb = vh->vh_lb; 1264 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1265 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1266 ct->ct_path_count = 0; 1267 ct->ct_path_head = NULL; 1268 ct->ct_path_tail = NULL; 1269 ct->ct_path_last = NULL; 1270 1271 /* 1272 * Add this client component to our client hash queue 1273 */ 1274 i_mdi_client_enlist_table(vh, ct); 1275 return (ct); 1276 } 1277 1278 /* 1279 * i_mdi_client_enlist_table(): 1280 * Attach the client device to the client hash table. Caller 1281 * should hold the vhci client lock. 1282 */ 1283 static void 1284 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1285 { 1286 int index; 1287 struct client_hash *head; 1288 1289 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1290 1291 index = i_mdi_get_hash_key(ct->ct_guid); 1292 head = &vh->vh_client_table[index]; 1293 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1294 head->ct_hash_head = ct; 1295 head->ct_hash_count++; 1296 vh->vh_client_count++; 1297 } 1298 1299 /* 1300 * i_mdi_client_delist_table(): 1301 * Attach the client device to the client hash table. 1302 * Caller should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 char *guid; 1309 struct client_hash *head; 1310 mdi_client_t *next; 1311 mdi_client_t *last; 1312 1313 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1314 1315 guid = ct->ct_guid; 1316 index = i_mdi_get_hash_key(guid); 1317 head = &vh->vh_client_table[index]; 1318 1319 last = NULL; 1320 next = (mdi_client_t *)head->ct_hash_head; 1321 while (next != NULL) { 1322 if (next == ct) { 1323 break; 1324 } 1325 last = next; 1326 next = next->ct_hnext; 1327 } 1328 1329 if (next) { 1330 head->ct_hash_count--; 1331 if (last == NULL) { 1332 head->ct_hash_head = ct->ct_hnext; 1333 } else { 1334 last->ct_hnext = ct->ct_hnext; 1335 } 1336 ct->ct_hnext = NULL; 1337 vh->vh_client_count--; 1338 } 1339 } 1340 1341 1342 /* 1343 * i_mdi_client_free(): 1344 * Free a client component 1345 */ 1346 static int 1347 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1348 { 1349 int rv = MDI_SUCCESS; 1350 int flags = ct->ct_flags; 1351 dev_info_t *cdip; 1352 dev_info_t *vdip; 1353 1354 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1355 1356 vdip = vh->vh_dip; 1357 cdip = ct->ct_dip; 1358 1359 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1360 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1361 DEVI(cdip)->devi_mdi_client = NULL; 1362 1363 /* 1364 * Clear out back ref. to dev_info_t node 1365 */ 1366 ct->ct_dip = NULL; 1367 1368 /* 1369 * Remove this client from our hash queue 1370 */ 1371 i_mdi_client_delist_table(vh, ct); 1372 1373 /* 1374 * Uninitialize and free the component 1375 */ 1376 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1377 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1378 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1379 cv_destroy(&ct->ct_failover_cv); 1380 cv_destroy(&ct->ct_unstable_cv); 1381 cv_destroy(&ct->ct_powerchange_cv); 1382 mutex_destroy(&ct->ct_mutex); 1383 kmem_free(ct, sizeof (*ct)); 1384 1385 if (cdip != NULL) { 1386 MDI_VHCI_CLIENT_UNLOCK(vh); 1387 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1388 MDI_VHCI_CLIENT_LOCK(vh); 1389 } 1390 return (rv); 1391 } 1392 1393 /* 1394 * i_mdi_client_find(): 1395 * Find the client structure corresponding to a given guid 1396 * Caller should hold the vhci client lock. 1397 */ 1398 static mdi_client_t * 1399 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1400 { 1401 int index; 1402 struct client_hash *head; 1403 mdi_client_t *ct; 1404 1405 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1406 1407 index = i_mdi_get_hash_key(guid); 1408 head = &vh->vh_client_table[index]; 1409 1410 ct = head->ct_hash_head; 1411 while (ct != NULL) { 1412 if (strcmp(ct->ct_guid, guid) == 0 && 1413 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1414 break; 1415 } 1416 ct = ct->ct_hnext; 1417 } 1418 return (ct); 1419 } 1420 1421 /* 1422 * i_mdi_client_update_state(): 1423 * Compute and update client device state 1424 * Notes: 1425 * A client device can be in any of three possible states: 1426 * 1427 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1428 * one online/standby paths. Can tolerate failures. 1429 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1430 * no alternate paths available as standby. A failure on the online 1431 * would result in loss of access to device data. 1432 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1433 * no paths available to access the device. 1434 */ 1435 static void 1436 i_mdi_client_update_state(mdi_client_t *ct) 1437 { 1438 int state; 1439 1440 ASSERT(MDI_CLIENT_LOCKED(ct)); 1441 state = i_mdi_client_compute_state(ct, NULL); 1442 MDI_CLIENT_SET_STATE(ct, state); 1443 } 1444 1445 /* 1446 * i_mdi_client_compute_state(): 1447 * Compute client device state 1448 * 1449 * mdi_phci_t * Pointer to pHCI structure which should 1450 * while computing the new value. Used by 1451 * i_mdi_phci_offline() to find the new 1452 * client state after DR of a pHCI. 1453 */ 1454 static int 1455 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1456 { 1457 int state; 1458 int online_count = 0; 1459 int standby_count = 0; 1460 mdi_pathinfo_t *pip, *next; 1461 1462 ASSERT(MDI_CLIENT_LOCKED(ct)); 1463 pip = ct->ct_path_head; 1464 while (pip != NULL) { 1465 MDI_PI_LOCK(pip); 1466 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1467 if (MDI_PI(pip)->pi_phci == ph) { 1468 MDI_PI_UNLOCK(pip); 1469 pip = next; 1470 continue; 1471 } 1472 1473 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1474 == MDI_PATHINFO_STATE_ONLINE) 1475 online_count++; 1476 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1477 == MDI_PATHINFO_STATE_STANDBY) 1478 standby_count++; 1479 MDI_PI_UNLOCK(pip); 1480 pip = next; 1481 } 1482 1483 if (online_count == 0) { 1484 if (standby_count == 0) { 1485 state = MDI_CLIENT_STATE_FAILED; 1486 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1487 " ct = %p\n", (void *)ct)); 1488 } else if (standby_count == 1) { 1489 state = MDI_CLIENT_STATE_DEGRADED; 1490 } else { 1491 state = MDI_CLIENT_STATE_OPTIMAL; 1492 } 1493 } else if (online_count == 1) { 1494 if (standby_count == 0) { 1495 state = MDI_CLIENT_STATE_DEGRADED; 1496 } else { 1497 state = MDI_CLIENT_STATE_OPTIMAL; 1498 } 1499 } else { 1500 state = MDI_CLIENT_STATE_OPTIMAL; 1501 } 1502 return (state); 1503 } 1504 1505 /* 1506 * i_mdi_client2devinfo(): 1507 * Utility function 1508 */ 1509 dev_info_t * 1510 i_mdi_client2devinfo(mdi_client_t *ct) 1511 { 1512 return (ct->ct_dip); 1513 } 1514 1515 /* 1516 * mdi_client_path2_devinfo(): 1517 * Given the parent devinfo and child devfs pathname, search for 1518 * a valid devfs node handle. 1519 */ 1520 dev_info_t * 1521 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1522 { 1523 dev_info_t *cdip = NULL; 1524 dev_info_t *ndip = NULL; 1525 char *temp_pathname; 1526 int circular; 1527 1528 /* 1529 * Allocate temp buffer 1530 */ 1531 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1532 1533 /* 1534 * Lock parent against changes 1535 */ 1536 ndi_devi_enter(vdip, &circular); 1537 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1538 while ((cdip = ndip) != NULL) { 1539 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1540 1541 *temp_pathname = '\0'; 1542 (void) ddi_pathname(cdip, temp_pathname); 1543 if (strcmp(temp_pathname, pathname) == 0) { 1544 break; 1545 } 1546 } 1547 /* 1548 * Release devinfo lock 1549 */ 1550 ndi_devi_exit(vdip, circular); 1551 1552 /* 1553 * Free the temp buffer 1554 */ 1555 kmem_free(temp_pathname, MAXPATHLEN); 1556 return (cdip); 1557 } 1558 1559 /* 1560 * mdi_client_get_path_count(): 1561 * Utility function to get number of path information nodes 1562 * associated with a given client device. 1563 */ 1564 int 1565 mdi_client_get_path_count(dev_info_t *cdip) 1566 { 1567 mdi_client_t *ct; 1568 int count = 0; 1569 1570 ct = i_devi_get_client(cdip); 1571 if (ct != NULL) { 1572 count = ct->ct_path_count; 1573 } 1574 return (count); 1575 } 1576 1577 1578 /* 1579 * i_mdi_get_hash_key(): 1580 * Create a hash using strings as keys 1581 * 1582 */ 1583 static int 1584 i_mdi_get_hash_key(char *str) 1585 { 1586 uint32_t g, hash = 0; 1587 char *p; 1588 1589 for (p = str; *p != '\0'; p++) { 1590 g = *p; 1591 hash += g; 1592 } 1593 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1594 } 1595 1596 /* 1597 * mdi_get_lb_policy(): 1598 * Get current load balancing policy for a given client device 1599 */ 1600 client_lb_t 1601 mdi_get_lb_policy(dev_info_t *cdip) 1602 { 1603 client_lb_t lb = LOAD_BALANCE_NONE; 1604 mdi_client_t *ct; 1605 1606 ct = i_devi_get_client(cdip); 1607 if (ct != NULL) { 1608 lb = ct->ct_lb; 1609 } 1610 return (lb); 1611 } 1612 1613 /* 1614 * mdi_set_lb_region_size(): 1615 * Set current region size for the load-balance 1616 */ 1617 int 1618 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1619 { 1620 mdi_client_t *ct; 1621 int rv = MDI_FAILURE; 1622 1623 ct = i_devi_get_client(cdip); 1624 if (ct != NULL && ct->ct_lb_args != NULL) { 1625 ct->ct_lb_args->region_size = region_size; 1626 rv = MDI_SUCCESS; 1627 } 1628 return (rv); 1629 } 1630 1631 /* 1632 * mdi_Set_lb_policy(): 1633 * Set current load balancing policy for a given client device 1634 */ 1635 int 1636 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1637 { 1638 mdi_client_t *ct; 1639 int rv = MDI_FAILURE; 1640 1641 ct = i_devi_get_client(cdip); 1642 if (ct != NULL) { 1643 ct->ct_lb = lb; 1644 rv = MDI_SUCCESS; 1645 } 1646 return (rv); 1647 } 1648 1649 /* 1650 * mdi_failover(): 1651 * failover function called by the vHCI drivers to initiate 1652 * a failover operation. This is typically due to non-availability 1653 * of online paths to route I/O requests. Failover can be 1654 * triggered through user application also. 1655 * 1656 * The vHCI driver calls mdi_failover() to initiate a failover 1657 * operation. mdi_failover() calls back into the vHCI driver's 1658 * vo_failover() entry point to perform the actual failover 1659 * operation. The reason for requiring the vHCI driver to 1660 * initiate failover by calling mdi_failover(), instead of directly 1661 * executing vo_failover() itself, is to ensure that the mdi 1662 * framework can keep track of the client state properly. 1663 * Additionally, mdi_failover() provides as a convenience the 1664 * option of performing the failover operation synchronously or 1665 * asynchronously 1666 * 1667 * Upon successful completion of the failover operation, the 1668 * paths that were previously ONLINE will be in the STANDBY state, 1669 * and the newly activated paths will be in the ONLINE state. 1670 * 1671 * The flags modifier determines whether the activation is done 1672 * synchronously: MDI_FAILOVER_SYNC 1673 * Return Values: 1674 * MDI_SUCCESS 1675 * MDI_FAILURE 1676 * MDI_BUSY 1677 */ 1678 /*ARGSUSED*/ 1679 int 1680 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1681 { 1682 int rv; 1683 mdi_client_t *ct; 1684 1685 ct = i_devi_get_client(cdip); 1686 ASSERT(ct != NULL); 1687 if (ct == NULL) { 1688 /* cdip is not a valid client device. Nothing more to do. */ 1689 return (MDI_FAILURE); 1690 } 1691 1692 MDI_CLIENT_LOCK(ct); 1693 1694 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1695 /* A path to the client is being freed */ 1696 MDI_CLIENT_UNLOCK(ct); 1697 return (MDI_BUSY); 1698 } 1699 1700 1701 if (MDI_CLIENT_IS_FAILED(ct)) { 1702 /* 1703 * Client is in failed state. Nothing more to do. 1704 */ 1705 MDI_CLIENT_UNLOCK(ct); 1706 return (MDI_FAILURE); 1707 } 1708 1709 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1710 /* 1711 * Failover is already in progress; return BUSY 1712 */ 1713 MDI_CLIENT_UNLOCK(ct); 1714 return (MDI_BUSY); 1715 } 1716 /* 1717 * Make sure that mdi_pathinfo node state changes are processed. 1718 * We do not allow failovers to progress while client path state 1719 * changes are in progress 1720 */ 1721 if (ct->ct_unstable) { 1722 if (flags == MDI_FAILOVER_ASYNC) { 1723 MDI_CLIENT_UNLOCK(ct); 1724 return (MDI_BUSY); 1725 } else { 1726 while (ct->ct_unstable) 1727 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1728 } 1729 } 1730 1731 /* 1732 * Client device is in stable state. Before proceeding, perform sanity 1733 * checks again. 1734 */ 1735 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1736 (!i_ddi_devi_attached(ct->ct_dip))) { 1737 /* 1738 * Client is in failed state. Nothing more to do. 1739 */ 1740 MDI_CLIENT_UNLOCK(ct); 1741 return (MDI_FAILURE); 1742 } 1743 1744 /* 1745 * Set the client state as failover in progress. 1746 */ 1747 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1748 ct->ct_failover_flags = flags; 1749 MDI_CLIENT_UNLOCK(ct); 1750 1751 if (flags == MDI_FAILOVER_ASYNC) { 1752 /* 1753 * Submit the initiate failover request via CPR safe 1754 * taskq threads. 1755 */ 1756 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1757 ct, KM_SLEEP); 1758 return (MDI_ACCEPT); 1759 } else { 1760 /* 1761 * Synchronous failover mode. Typically invoked from the user 1762 * land. 1763 */ 1764 rv = i_mdi_failover(ct); 1765 } 1766 return (rv); 1767 } 1768 1769 /* 1770 * i_mdi_failover(): 1771 * internal failover function. Invokes vHCI drivers failover 1772 * callback function and process the failover status 1773 * Return Values: 1774 * None 1775 * 1776 * Note: A client device in failover state can not be detached or freed. 1777 */ 1778 static int 1779 i_mdi_failover(void *arg) 1780 { 1781 int rv = MDI_SUCCESS; 1782 mdi_client_t *ct = (mdi_client_t *)arg; 1783 mdi_vhci_t *vh = ct->ct_vhci; 1784 1785 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1786 1787 if (vh->vh_ops->vo_failover != NULL) { 1788 /* 1789 * Call vHCI drivers callback routine 1790 */ 1791 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1792 ct->ct_failover_flags); 1793 } 1794 1795 MDI_CLIENT_LOCK(ct); 1796 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1797 1798 /* 1799 * Save the failover return status 1800 */ 1801 ct->ct_failover_status = rv; 1802 1803 /* 1804 * As a result of failover, client status would have been changed. 1805 * Update the client state and wake up anyone waiting on this client 1806 * device. 1807 */ 1808 i_mdi_client_update_state(ct); 1809 1810 cv_broadcast(&ct->ct_failover_cv); 1811 MDI_CLIENT_UNLOCK(ct); 1812 return (rv); 1813 } 1814 1815 /* 1816 * Load balancing is logical block. 1817 * IOs within the range described by region_size 1818 * would go on the same path. This would improve the 1819 * performance by cache-hit on some of the RAID devices. 1820 * Search only for online paths(At some point we 1821 * may want to balance across target ports). 1822 * If no paths are found then default to round-robin. 1823 */ 1824 static int 1825 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1826 { 1827 int path_index = -1; 1828 int online_path_count = 0; 1829 int online_nonpref_path_count = 0; 1830 int region_size = ct->ct_lb_args->region_size; 1831 mdi_pathinfo_t *pip; 1832 mdi_pathinfo_t *next; 1833 int preferred, path_cnt; 1834 1835 pip = ct->ct_path_head; 1836 while (pip) { 1837 MDI_PI_LOCK(pip); 1838 if (MDI_PI(pip)->pi_state == 1839 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1840 online_path_count++; 1841 } else if (MDI_PI(pip)->pi_state == 1842 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1843 online_nonpref_path_count++; 1844 } 1845 next = (mdi_pathinfo_t *) 1846 MDI_PI(pip)->pi_client_link; 1847 MDI_PI_UNLOCK(pip); 1848 pip = next; 1849 } 1850 /* if found any online/preferred then use this type */ 1851 if (online_path_count > 0) { 1852 path_cnt = online_path_count; 1853 preferred = 1; 1854 } else if (online_nonpref_path_count > 0) { 1855 path_cnt = online_nonpref_path_count; 1856 preferred = 0; 1857 } else { 1858 path_cnt = 0; 1859 } 1860 if (path_cnt) { 1861 path_index = (bp->b_blkno >> region_size) % path_cnt; 1862 pip = ct->ct_path_head; 1863 while (pip && path_index != -1) { 1864 MDI_PI_LOCK(pip); 1865 if (path_index == 0 && 1866 (MDI_PI(pip)->pi_state == 1867 MDI_PATHINFO_STATE_ONLINE) && 1868 MDI_PI(pip)->pi_preferred == preferred) { 1869 MDI_PI_HOLD(pip); 1870 MDI_PI_UNLOCK(pip); 1871 *ret_pip = pip; 1872 return (MDI_SUCCESS); 1873 } 1874 path_index --; 1875 next = (mdi_pathinfo_t *) 1876 MDI_PI(pip)->pi_client_link; 1877 MDI_PI_UNLOCK(pip); 1878 pip = next; 1879 } 1880 if (pip == NULL) { 1881 MDI_DEBUG(4, (CE_NOTE, NULL, 1882 "!lba %llx, no pip !!\n", 1883 bp->b_lblkno)); 1884 } else { 1885 MDI_DEBUG(4, (CE_NOTE, NULL, 1886 "!lba %llx, no pip for path_index, " 1887 "pip %p\n", bp->b_lblkno, (void *)pip)); 1888 } 1889 } 1890 return (MDI_FAILURE); 1891 } 1892 1893 /* 1894 * mdi_select_path(): 1895 * select a path to access a client device. 1896 * 1897 * mdi_select_path() function is called by the vHCI drivers to 1898 * select a path to route the I/O request to. The caller passes 1899 * the block I/O data transfer structure ("buf") as one of the 1900 * parameters. The mpxio framework uses the buf structure 1901 * contents to maintain per path statistics (total I/O size / 1902 * count pending). If more than one online paths are available to 1903 * select, the framework automatically selects a suitable path 1904 * for routing I/O request. If a failover operation is active for 1905 * this client device the call shall be failed with MDI_BUSY error 1906 * code. 1907 * 1908 * By default this function returns a suitable path in online 1909 * state based on the current load balancing policy. Currently 1910 * we support LOAD_BALANCE_NONE (Previously selected online path 1911 * will continue to be used till the path is usable) and 1912 * LOAD_BALANCE_RR (Online paths will be selected in a round 1913 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1914 * based on the logical block). The load balancing 1915 * through vHCI drivers configuration file (driver.conf). 1916 * 1917 * vHCI drivers may override this default behavior by specifying 1918 * appropriate flags. If start_pip is specified (non NULL) is 1919 * used as start point to walk and find the next appropriate path. 1920 * The following values are currently defined: 1921 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1922 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1923 * 1924 * The non-standard behavior is used by the scsi_vhci driver, 1925 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1926 * attach of client devices (to avoid an unnecessary failover 1927 * when the STANDBY path comes up first), during failover 1928 * (to activate a STANDBY path as ONLINE). 1929 * 1930 * The selected path is returned in a a mdi_hold_path() state 1931 * (pi_ref_cnt). Caller should release the hold by calling 1932 * mdi_rele_path(). 1933 * 1934 * Return Values: 1935 * MDI_SUCCESS - Completed successfully 1936 * MDI_BUSY - Client device is busy failing over 1937 * MDI_NOPATH - Client device is online, but no valid path are 1938 * available to access this client device 1939 * MDI_FAILURE - Invalid client device or state 1940 * MDI_DEVI_ONLINING 1941 * - Client device (struct dev_info state) is in 1942 * onlining state. 1943 */ 1944 1945 /*ARGSUSED*/ 1946 int 1947 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1948 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1949 { 1950 mdi_client_t *ct; 1951 mdi_pathinfo_t *pip; 1952 mdi_pathinfo_t *next; 1953 mdi_pathinfo_t *head; 1954 mdi_pathinfo_t *start; 1955 client_lb_t lbp; /* load balancing policy */ 1956 int sb = 1; /* standard behavior */ 1957 int preferred = 1; /* preferred path */ 1958 int cond, cont = 1; 1959 int retry = 0; 1960 1961 if (flags != 0) { 1962 /* 1963 * disable default behavior 1964 */ 1965 sb = 0; 1966 } 1967 1968 *ret_pip = NULL; 1969 ct = i_devi_get_client(cdip); 1970 if (ct == NULL) { 1971 /* mdi extensions are NULL, Nothing more to do */ 1972 return (MDI_FAILURE); 1973 } 1974 1975 MDI_CLIENT_LOCK(ct); 1976 1977 if (sb) { 1978 if (MDI_CLIENT_IS_FAILED(ct)) { 1979 /* 1980 * Client is not ready to accept any I/O requests. 1981 * Fail this request. 1982 */ 1983 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1984 "client state offline ct = %p\n", (void *)ct)); 1985 MDI_CLIENT_UNLOCK(ct); 1986 return (MDI_FAILURE); 1987 } 1988 1989 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1990 /* 1991 * Check for Failover is in progress. If so tell the 1992 * caller that this device is busy. 1993 */ 1994 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1995 "client failover in progress ct = %p\n", 1996 (void *)ct)); 1997 MDI_CLIENT_UNLOCK(ct); 1998 return (MDI_BUSY); 1999 } 2000 2001 /* 2002 * Check to see whether the client device is attached. 2003 * If not so, let the vHCI driver manually select a path 2004 * (standby) and let the probe/attach process to continue. 2005 */ 2006 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2007 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2008 "ct = %p\n", (void *)ct)); 2009 MDI_CLIENT_UNLOCK(ct); 2010 return (MDI_DEVI_ONLINING); 2011 } 2012 } 2013 2014 /* 2015 * Cache in the client list head. If head of the list is NULL 2016 * return MDI_NOPATH 2017 */ 2018 head = ct->ct_path_head; 2019 if (head == NULL) { 2020 MDI_CLIENT_UNLOCK(ct); 2021 return (MDI_NOPATH); 2022 } 2023 2024 /* 2025 * for non default behavior, bypass current 2026 * load balancing policy and always use LOAD_BALANCE_RR 2027 * except that the start point will be adjusted based 2028 * on the provided start_pip 2029 */ 2030 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2031 2032 switch (lbp) { 2033 case LOAD_BALANCE_NONE: 2034 /* 2035 * Load balancing is None or Alternate path mode 2036 * Start looking for a online mdi_pathinfo node starting from 2037 * last known selected path 2038 */ 2039 preferred = 1; 2040 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2041 if (pip == NULL) { 2042 pip = head; 2043 } 2044 start = pip; 2045 do { 2046 MDI_PI_LOCK(pip); 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if ((MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_ONLINE) && 2054 preferred == MDI_PI(pip)->pi_preferred) { 2055 /* 2056 * Return the path in hold state. Caller should 2057 * release the lock by calling mdi_rele_path() 2058 */ 2059 MDI_PI_HOLD(pip); 2060 MDI_PI_UNLOCK(pip); 2061 ct->ct_path_last = pip; 2062 *ret_pip = pip; 2063 MDI_CLIENT_UNLOCK(ct); 2064 return (MDI_SUCCESS); 2065 } 2066 2067 /* 2068 * Path is busy. 2069 */ 2070 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2071 MDI_PI_IS_TRANSIENT(pip)) 2072 retry = 1; 2073 /* 2074 * Keep looking for a next available online path 2075 */ 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 next = head; 2079 } 2080 MDI_PI_UNLOCK(pip); 2081 pip = next; 2082 if (start == pip && preferred) { 2083 preferred = 0; 2084 } else if (start == pip && !preferred) { 2085 cont = 0; 2086 } 2087 } while (cont); 2088 break; 2089 2090 case LOAD_BALANCE_LBA: 2091 /* 2092 * Make sure we are looking 2093 * for an online path. Otherwise, if it is for a STANDBY 2094 * path request, it will go through and fetch an ONLINE 2095 * path which is not desirable. 2096 */ 2097 if ((ct->ct_lb_args != NULL) && 2098 (ct->ct_lb_args->region_size) && bp && 2099 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2100 if (i_mdi_lba_lb(ct, ret_pip, bp) 2101 == MDI_SUCCESS) { 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_SUCCESS); 2104 } 2105 } 2106 /* FALLTHROUGH */ 2107 case LOAD_BALANCE_RR: 2108 /* 2109 * Load balancing is Round Robin. Start looking for a online 2110 * mdi_pathinfo node starting from last known selected path 2111 * as the start point. If override flags are specified, 2112 * process accordingly. 2113 * If the search is already in effect(start_pip not null), 2114 * then lets just use the same path preference to continue the 2115 * traversal. 2116 */ 2117 2118 if (start_pip != NULL) { 2119 preferred = MDI_PI(start_pip)->pi_preferred; 2120 } else { 2121 preferred = 1; 2122 } 2123 2124 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2125 if (start == NULL) { 2126 pip = head; 2127 } else { 2128 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2129 if (pip == NULL) { 2130 if (!sb) { 2131 if (preferred == 0) { 2132 /* 2133 * Looks like we have completed 2134 * the traversal as preferred 2135 * value is 0. Time to bail out. 2136 */ 2137 *ret_pip = NULL; 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } else { 2141 /* 2142 * Looks like we reached the 2143 * end of the list. Lets enable 2144 * traversal of non preferred 2145 * paths. 2146 */ 2147 preferred = 0; 2148 } 2149 } 2150 pip = head; 2151 } 2152 } 2153 start = pip; 2154 do { 2155 MDI_PI_LOCK(pip); 2156 if (sb) { 2157 cond = ((MDI_PI(pip)->pi_state == 2158 MDI_PATHINFO_STATE_ONLINE && 2159 MDI_PI(pip)->pi_preferred == 2160 preferred) ? 1 : 0); 2161 } else { 2162 if (flags == MDI_SELECT_ONLINE_PATH) { 2163 cond = ((MDI_PI(pip)->pi_state == 2164 MDI_PATHINFO_STATE_ONLINE && 2165 MDI_PI(pip)->pi_preferred == 2166 preferred) ? 1 : 0); 2167 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2168 cond = ((MDI_PI(pip)->pi_state == 2169 MDI_PATHINFO_STATE_STANDBY && 2170 MDI_PI(pip)->pi_preferred == 2171 preferred) ? 1 : 0); 2172 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2173 MDI_SELECT_STANDBY_PATH)) { 2174 cond = (((MDI_PI(pip)->pi_state == 2175 MDI_PATHINFO_STATE_ONLINE || 2176 (MDI_PI(pip)->pi_state == 2177 MDI_PATHINFO_STATE_STANDBY)) && 2178 MDI_PI(pip)->pi_preferred == 2179 preferred) ? 1 : 0); 2180 } else if (flags == 2181 (MDI_SELECT_STANDBY_PATH | 2182 MDI_SELECT_ONLINE_PATH | 2183 MDI_SELECT_USER_DISABLE_PATH)) { 2184 cond = (((MDI_PI(pip)->pi_state == 2185 MDI_PATHINFO_STATE_ONLINE || 2186 (MDI_PI(pip)->pi_state == 2187 MDI_PATHINFO_STATE_STANDBY) || 2188 (MDI_PI(pip)->pi_state == 2189 (MDI_PATHINFO_STATE_ONLINE| 2190 MDI_PATHINFO_STATE_USER_DISABLE)) || 2191 (MDI_PI(pip)->pi_state == 2192 (MDI_PATHINFO_STATE_STANDBY | 2193 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2194 MDI_PI(pip)->pi_preferred == 2195 preferred) ? 1 : 0); 2196 } else { 2197 cond = 0; 2198 } 2199 } 2200 /* 2201 * No need to explicitly check if the path is disabled. 2202 * Since we are checking for state == ONLINE and the 2203 * same veriable is used for DISABLE/ENABLE information. 2204 */ 2205 if (cond) { 2206 /* 2207 * Return the path in hold state. Caller should 2208 * release the lock by calling mdi_rele_path() 2209 */ 2210 MDI_PI_HOLD(pip); 2211 MDI_PI_UNLOCK(pip); 2212 if (sb) 2213 ct->ct_path_last = pip; 2214 *ret_pip = pip; 2215 MDI_CLIENT_UNLOCK(ct); 2216 return (MDI_SUCCESS); 2217 } 2218 /* 2219 * Path is busy. 2220 */ 2221 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2222 MDI_PI_IS_TRANSIENT(pip)) 2223 retry = 1; 2224 2225 /* 2226 * Keep looking for a next available online path 2227 */ 2228 do_again: 2229 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2230 if (next == NULL) { 2231 if (!sb) { 2232 if (preferred == 1) { 2233 /* 2234 * Looks like we reached the 2235 * end of the list. Lets enable 2236 * traversal of non preferred 2237 * paths. 2238 */ 2239 preferred = 0; 2240 next = head; 2241 } else { 2242 /* 2243 * We have done both the passes 2244 * Preferred as well as for 2245 * Non-preferred. Bail out now. 2246 */ 2247 cont = 0; 2248 } 2249 } else { 2250 /* 2251 * Standard behavior case. 2252 */ 2253 next = head; 2254 } 2255 } 2256 MDI_PI_UNLOCK(pip); 2257 if (cont == 0) { 2258 break; 2259 } 2260 pip = next; 2261 2262 if (!sb) { 2263 /* 2264 * We need to handle the selection of 2265 * non-preferred path in the following 2266 * case: 2267 * 2268 * +------+ +------+ +------+ +-----+ 2269 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2270 * +------+ +------+ +------+ +-----+ 2271 * 2272 * If we start the search with B, we need to 2273 * skip beyond B to pick C which is non - 2274 * preferred in the second pass. The following 2275 * test, if true, will allow us to skip over 2276 * the 'start'(B in the example) to select 2277 * other non preferred elements. 2278 */ 2279 if ((start_pip != NULL) && (start_pip == pip) && 2280 (MDI_PI(start_pip)->pi_preferred 2281 != preferred)) { 2282 /* 2283 * try again after going past the start 2284 * pip 2285 */ 2286 MDI_PI_LOCK(pip); 2287 goto do_again; 2288 } 2289 } else { 2290 /* 2291 * Standard behavior case 2292 */ 2293 if (start == pip && preferred) { 2294 /* look for nonpreferred paths */ 2295 preferred = 0; 2296 } else if (start == pip && !preferred) { 2297 /* 2298 * Exit condition 2299 */ 2300 cont = 0; 2301 } 2302 } 2303 } while (cont); 2304 break; 2305 } 2306 2307 MDI_CLIENT_UNLOCK(ct); 2308 if (retry == 1) { 2309 return (MDI_BUSY); 2310 } else { 2311 return (MDI_NOPATH); 2312 } 2313 } 2314 2315 /* 2316 * For a client, return the next available path to any phci 2317 * 2318 * Note: 2319 * Caller should hold the branch's devinfo node to get a consistent 2320 * snap shot of the mdi_pathinfo nodes. 2321 * 2322 * Please note that even the list is stable the mdi_pathinfo 2323 * node state and properties are volatile. The caller should lock 2324 * and unlock the nodes by calling mdi_pi_lock() and 2325 * mdi_pi_unlock() functions to get a stable properties. 2326 * 2327 * If there is a need to use the nodes beyond the hold of the 2328 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2329 * need to be held against unexpected removal by calling 2330 * mdi_hold_path() and should be released by calling 2331 * mdi_rele_path() on completion. 2332 */ 2333 mdi_pathinfo_t * 2334 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2335 { 2336 mdi_client_t *ct; 2337 2338 if (!MDI_CLIENT(ct_dip)) 2339 return (NULL); 2340 2341 /* 2342 * Walk through client link 2343 */ 2344 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2345 ASSERT(ct != NULL); 2346 2347 if (pip == NULL) 2348 return ((mdi_pathinfo_t *)ct->ct_path_head); 2349 2350 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2351 } 2352 2353 /* 2354 * For a phci, return the next available path to any client 2355 * Note: ditto mdi_get_next_phci_path() 2356 */ 2357 mdi_pathinfo_t * 2358 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2359 { 2360 mdi_phci_t *ph; 2361 2362 if (!MDI_PHCI(ph_dip)) 2363 return (NULL); 2364 2365 /* 2366 * Walk through pHCI link 2367 */ 2368 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2369 ASSERT(ph != NULL); 2370 2371 if (pip == NULL) 2372 return ((mdi_pathinfo_t *)ph->ph_path_head); 2373 2374 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2375 } 2376 2377 /* 2378 * mdi_hold_path(): 2379 * Hold the mdi_pathinfo node against unwanted unexpected free. 2380 * Return Values: 2381 * None 2382 */ 2383 void 2384 mdi_hold_path(mdi_pathinfo_t *pip) 2385 { 2386 if (pip) { 2387 MDI_PI_LOCK(pip); 2388 MDI_PI_HOLD(pip); 2389 MDI_PI_UNLOCK(pip); 2390 } 2391 } 2392 2393 2394 /* 2395 * mdi_rele_path(): 2396 * Release the mdi_pathinfo node which was selected 2397 * through mdi_select_path() mechanism or manually held by 2398 * calling mdi_hold_path(). 2399 * Return Values: 2400 * None 2401 */ 2402 void 2403 mdi_rele_path(mdi_pathinfo_t *pip) 2404 { 2405 if (pip) { 2406 MDI_PI_LOCK(pip); 2407 MDI_PI_RELE(pip); 2408 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2409 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2410 } 2411 MDI_PI_UNLOCK(pip); 2412 } 2413 } 2414 2415 /* 2416 * mdi_pi_lock(): 2417 * Lock the mdi_pathinfo node. 2418 * Note: 2419 * The caller should release the lock by calling mdi_pi_unlock() 2420 */ 2421 void 2422 mdi_pi_lock(mdi_pathinfo_t *pip) 2423 { 2424 ASSERT(pip != NULL); 2425 if (pip) { 2426 MDI_PI_LOCK(pip); 2427 } 2428 } 2429 2430 2431 /* 2432 * mdi_pi_unlock(): 2433 * Unlock the mdi_pathinfo node. 2434 * Note: 2435 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2436 */ 2437 void 2438 mdi_pi_unlock(mdi_pathinfo_t *pip) 2439 { 2440 ASSERT(pip != NULL); 2441 if (pip) { 2442 MDI_PI_UNLOCK(pip); 2443 } 2444 } 2445 2446 /* 2447 * mdi_pi_find(): 2448 * Search the list of mdi_pathinfo nodes attached to the 2449 * pHCI/Client device node whose path address matches "paddr". 2450 * Returns a pointer to the mdi_pathinfo node if a matching node is 2451 * found. 2452 * Return Values: 2453 * mdi_pathinfo node handle 2454 * NULL 2455 * Notes: 2456 * Caller need not hold any locks to call this function. 2457 */ 2458 mdi_pathinfo_t * 2459 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2460 { 2461 mdi_phci_t *ph; 2462 mdi_vhci_t *vh; 2463 mdi_client_t *ct; 2464 mdi_pathinfo_t *pip = NULL; 2465 2466 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2467 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2468 if ((pdip == NULL) || (paddr == NULL)) { 2469 return (NULL); 2470 } 2471 ph = i_devi_get_phci(pdip); 2472 if (ph == NULL) { 2473 /* 2474 * Invalid pHCI device, Nothing more to do. 2475 */ 2476 MDI_DEBUG(2, (CE_WARN, pdip, 2477 "!mdi_pi_find: invalid phci")); 2478 return (NULL); 2479 } 2480 2481 vh = ph->ph_vhci; 2482 if (vh == NULL) { 2483 /* 2484 * Invalid vHCI device, Nothing more to do. 2485 */ 2486 MDI_DEBUG(2, (CE_WARN, pdip, 2487 "!mdi_pi_find: invalid vhci")); 2488 return (NULL); 2489 } 2490 2491 /* 2492 * Look for pathinfo node identified by paddr. 2493 */ 2494 if (caddr == NULL) { 2495 /* 2496 * Find a mdi_pathinfo node under pHCI list for a matching 2497 * unit address. 2498 */ 2499 MDI_PHCI_LOCK(ph); 2500 if (MDI_PHCI_IS_OFFLINE(ph)) { 2501 MDI_DEBUG(2, (CE_WARN, pdip, 2502 "!mdi_pi_find: offline phci %p", (void *)ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (NULL); 2505 } 2506 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2507 2508 while (pip != NULL) { 2509 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2510 break; 2511 } 2512 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2513 } 2514 MDI_PHCI_UNLOCK(ph); 2515 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2516 (void *)pip)); 2517 return (pip); 2518 } 2519 2520 /* 2521 * XXX - Is the rest of the code in this function really necessary? 2522 * The consumers of mdi_pi_find() can search for the desired pathinfo 2523 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2524 * whether the search is based on the pathinfo nodes attached to 2525 * the pHCI or the client node, the result will be the same. 2526 */ 2527 2528 /* 2529 * Find the client device corresponding to 'caddr' 2530 */ 2531 MDI_VHCI_CLIENT_LOCK(vh); 2532 2533 /* 2534 * XXX - Passing NULL to the following function works as long as the 2535 * the client addresses (caddr) are unique per vhci basis. 2536 */ 2537 ct = i_mdi_client_find(vh, NULL, caddr); 2538 if (ct == NULL) { 2539 /* 2540 * Client not found, Obviously mdi_pathinfo node has not been 2541 * created yet. 2542 */ 2543 MDI_VHCI_CLIENT_UNLOCK(vh); 2544 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2545 "found for caddr %s", caddr ? caddr : "NULL")); 2546 return (NULL); 2547 } 2548 2549 /* 2550 * Hold the client lock and look for a mdi_pathinfo node with matching 2551 * pHCI and paddr 2552 */ 2553 MDI_CLIENT_LOCK(ct); 2554 2555 /* 2556 * Release the global mutex as it is no more needed. Note: We always 2557 * respect the locking order while acquiring. 2558 */ 2559 MDI_VHCI_CLIENT_UNLOCK(vh); 2560 2561 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2562 while (pip != NULL) { 2563 /* 2564 * Compare the unit address 2565 */ 2566 if ((MDI_PI(pip)->pi_phci == ph) && 2567 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2568 break; 2569 } 2570 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2571 } 2572 MDI_CLIENT_UNLOCK(ct); 2573 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2574 return (pip); 2575 } 2576 2577 /* 2578 * mdi_pi_alloc(): 2579 * Allocate and initialize a new instance of a mdi_pathinfo node. 2580 * The mdi_pathinfo node returned by this function identifies a 2581 * unique device path is capable of having properties attached 2582 * and passed to mdi_pi_online() to fully attach and online the 2583 * path and client device node. 2584 * The mdi_pathinfo node returned by this function must be 2585 * destroyed using mdi_pi_free() if the path is no longer 2586 * operational or if the caller fails to attach a client device 2587 * node when calling mdi_pi_online(). The framework will not free 2588 * the resources allocated. 2589 * This function can be called from both interrupt and kernel 2590 * contexts. DDI_NOSLEEP flag should be used while calling 2591 * from interrupt contexts. 2592 * Return Values: 2593 * MDI_SUCCESS 2594 * MDI_FAILURE 2595 * MDI_NOMEM 2596 */ 2597 /*ARGSUSED*/ 2598 int 2599 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2600 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2601 { 2602 mdi_vhci_t *vh; 2603 mdi_phci_t *ph; 2604 mdi_client_t *ct; 2605 mdi_pathinfo_t *pip = NULL; 2606 dev_info_t *cdip; 2607 int rv = MDI_NOMEM; 2608 int path_allocated = 0; 2609 2610 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2611 cname ? cname : "NULL", caddr ? caddr : "NULL", 2612 paddr ? paddr : "NULL")); 2613 2614 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2615 ret_pip == NULL) { 2616 /* Nothing more to do */ 2617 return (MDI_FAILURE); 2618 } 2619 2620 *ret_pip = NULL; 2621 2622 /* No allocations on detaching pHCI */ 2623 if (DEVI_IS_DETACHING(pdip)) { 2624 /* Invalid pHCI device, return failure */ 2625 MDI_DEBUG(1, (CE_WARN, pdip, 2626 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2627 return (MDI_FAILURE); 2628 } 2629 2630 ph = i_devi_get_phci(pdip); 2631 ASSERT(ph != NULL); 2632 if (ph == NULL) { 2633 /* Invalid pHCI device, return failure */ 2634 MDI_DEBUG(1, (CE_WARN, pdip, 2635 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2636 return (MDI_FAILURE); 2637 } 2638 2639 MDI_PHCI_LOCK(ph); 2640 vh = ph->ph_vhci; 2641 if (vh == NULL) { 2642 /* Invalid vHCI device, return failure */ 2643 MDI_DEBUG(1, (CE_WARN, pdip, 2644 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2645 MDI_PHCI_UNLOCK(ph); 2646 return (MDI_FAILURE); 2647 } 2648 2649 if (MDI_PHCI_IS_READY(ph) == 0) { 2650 /* 2651 * Do not allow new node creation when pHCI is in 2652 * offline/suspended states 2653 */ 2654 MDI_DEBUG(1, (CE_WARN, pdip, 2655 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2656 MDI_PHCI_UNLOCK(ph); 2657 return (MDI_BUSY); 2658 } 2659 MDI_PHCI_UNSTABLE(ph); 2660 MDI_PHCI_UNLOCK(ph); 2661 2662 /* look for a matching client, create one if not found */ 2663 MDI_VHCI_CLIENT_LOCK(vh); 2664 ct = i_mdi_client_find(vh, cname, caddr); 2665 if (ct == NULL) { 2666 ct = i_mdi_client_alloc(vh, cname, caddr); 2667 ASSERT(ct != NULL); 2668 } 2669 2670 if (ct->ct_dip == NULL) { 2671 /* 2672 * Allocate a devinfo node 2673 */ 2674 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2675 compatible, ncompatible); 2676 if (ct->ct_dip == NULL) { 2677 (void) i_mdi_client_free(vh, ct); 2678 goto fail; 2679 } 2680 } 2681 cdip = ct->ct_dip; 2682 2683 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2684 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2685 2686 MDI_CLIENT_LOCK(ct); 2687 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2688 while (pip != NULL) { 2689 /* 2690 * Compare the unit address 2691 */ 2692 if ((MDI_PI(pip)->pi_phci == ph) && 2693 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2694 break; 2695 } 2696 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2697 } 2698 MDI_CLIENT_UNLOCK(ct); 2699 2700 if (pip == NULL) { 2701 /* 2702 * This is a new path for this client device. Allocate and 2703 * initialize a new pathinfo node 2704 */ 2705 pip = i_mdi_pi_alloc(ph, paddr, ct); 2706 ASSERT(pip != NULL); 2707 path_allocated = 1; 2708 } 2709 rv = MDI_SUCCESS; 2710 2711 fail: 2712 /* 2713 * Release the global mutex. 2714 */ 2715 MDI_VHCI_CLIENT_UNLOCK(vh); 2716 2717 /* 2718 * Mark the pHCI as stable 2719 */ 2720 MDI_PHCI_LOCK(ph); 2721 MDI_PHCI_STABLE(ph); 2722 MDI_PHCI_UNLOCK(ph); 2723 *ret_pip = pip; 2724 2725 MDI_DEBUG(2, (CE_NOTE, pdip, 2726 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2727 2728 if (path_allocated) 2729 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2730 2731 return (rv); 2732 } 2733 2734 /*ARGSUSED*/ 2735 int 2736 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2737 int flags, mdi_pathinfo_t **ret_pip) 2738 { 2739 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2740 flags, ret_pip)); 2741 } 2742 2743 /* 2744 * i_mdi_pi_alloc(): 2745 * Allocate a mdi_pathinfo node and add to the pHCI path list 2746 * Return Values: 2747 * mdi_pathinfo 2748 */ 2749 /*ARGSUSED*/ 2750 static mdi_pathinfo_t * 2751 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2752 { 2753 mdi_pathinfo_t *pip; 2754 int ct_circular; 2755 int ph_circular; 2756 int se_flag; 2757 int kmem_flag; 2758 2759 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2760 2761 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2762 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2763 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2764 MDI_PATHINFO_STATE_TRANSIENT; 2765 2766 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2767 MDI_PI_SET_USER_DISABLE(pip); 2768 2769 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2770 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2771 2772 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2773 MDI_PI_SET_DRV_DISABLE(pip); 2774 2775 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2776 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2777 MDI_PI(pip)->pi_client = ct; 2778 MDI_PI(pip)->pi_phci = ph; 2779 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2780 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2781 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2782 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2783 MDI_PI(pip)->pi_pprivate = NULL; 2784 MDI_PI(pip)->pi_cprivate = NULL; 2785 MDI_PI(pip)->pi_vprivate = NULL; 2786 MDI_PI(pip)->pi_client_link = NULL; 2787 MDI_PI(pip)->pi_phci_link = NULL; 2788 MDI_PI(pip)->pi_ref_cnt = 0; 2789 MDI_PI(pip)->pi_kstats = NULL; 2790 MDI_PI(pip)->pi_preferred = 1; 2791 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2792 2793 /* 2794 * Lock both dev_info nodes against changes in parallel. 2795 * 2796 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2797 * This atypical operation is done to synchronize pathinfo nodes 2798 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2799 * the pathinfo nodes are children of the Client. 2800 */ 2801 ndi_devi_enter(ct->ct_dip, &ct_circular); 2802 ndi_devi_enter(ph->ph_dip, &ph_circular); 2803 2804 i_mdi_phci_add_path(ph, pip); 2805 i_mdi_client_add_path(ct, pip); 2806 2807 ndi_devi_exit(ph->ph_dip, ph_circular); 2808 ndi_devi_exit(ct->ct_dip, ct_circular); 2809 2810 /* determine interrupt context */ 2811 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2812 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2813 2814 i_ddi_di_cache_invalidate(kmem_flag); 2815 2816 return (pip); 2817 } 2818 2819 /* 2820 * i_mdi_phci_add_path(): 2821 * Add a mdi_pathinfo node to pHCI list. 2822 * Notes: 2823 * Caller should per-pHCI mutex 2824 */ 2825 static void 2826 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2827 { 2828 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2829 2830 MDI_PHCI_LOCK(ph); 2831 if (ph->ph_path_head == NULL) { 2832 ph->ph_path_head = pip; 2833 } else { 2834 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2835 } 2836 ph->ph_path_tail = pip; 2837 ph->ph_path_count++; 2838 MDI_PHCI_UNLOCK(ph); 2839 } 2840 2841 /* 2842 * i_mdi_client_add_path(): 2843 * Add mdi_pathinfo node to client list 2844 */ 2845 static void 2846 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2847 { 2848 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2849 2850 MDI_CLIENT_LOCK(ct); 2851 if (ct->ct_path_head == NULL) { 2852 ct->ct_path_head = pip; 2853 } else { 2854 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2855 } 2856 ct->ct_path_tail = pip; 2857 ct->ct_path_count++; 2858 MDI_CLIENT_UNLOCK(ct); 2859 } 2860 2861 /* 2862 * mdi_pi_free(): 2863 * Free the mdi_pathinfo node and also client device node if this 2864 * is the last path to the device 2865 * Return Values: 2866 * MDI_SUCCESS 2867 * MDI_FAILURE 2868 * MDI_BUSY 2869 */ 2870 /*ARGSUSED*/ 2871 int 2872 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2873 { 2874 int rv = MDI_SUCCESS; 2875 mdi_vhci_t *vh; 2876 mdi_phci_t *ph; 2877 mdi_client_t *ct; 2878 int (*f)(); 2879 int client_held = 0; 2880 2881 MDI_PI_LOCK(pip); 2882 ph = MDI_PI(pip)->pi_phci; 2883 ASSERT(ph != NULL); 2884 if (ph == NULL) { 2885 /* 2886 * Invalid pHCI device, return failure 2887 */ 2888 MDI_DEBUG(1, (CE_WARN, NULL, 2889 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2890 MDI_PI_UNLOCK(pip); 2891 return (MDI_FAILURE); 2892 } 2893 2894 vh = ph->ph_vhci; 2895 ASSERT(vh != NULL); 2896 if (vh == NULL) { 2897 /* Invalid pHCI device, return failure */ 2898 MDI_DEBUG(1, (CE_WARN, NULL, 2899 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2900 MDI_PI_UNLOCK(pip); 2901 return (MDI_FAILURE); 2902 } 2903 2904 ct = MDI_PI(pip)->pi_client; 2905 ASSERT(ct != NULL); 2906 if (ct == NULL) { 2907 /* 2908 * Invalid Client device, return failure 2909 */ 2910 MDI_DEBUG(1, (CE_WARN, NULL, 2911 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2912 MDI_PI_UNLOCK(pip); 2913 return (MDI_FAILURE); 2914 } 2915 2916 /* 2917 * Check to see for busy condition. A mdi_pathinfo can only be freed 2918 * if the node state is either offline or init and the reference count 2919 * is zero. 2920 */ 2921 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2922 MDI_PI_IS_INITING(pip))) { 2923 /* 2924 * Node is busy 2925 */ 2926 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2927 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2928 MDI_PI_UNLOCK(pip); 2929 return (MDI_BUSY); 2930 } 2931 2932 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2933 /* 2934 * Give a chance for pending I/Os to complete. 2935 */ 2936 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2937 "%d cmds still pending on path: %p\n", 2938 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2939 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2940 &MDI_PI(pip)->pi_mutex, 2941 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2942 /* 2943 * The timeout time reached without ref_cnt being zero 2944 * being signaled. 2945 */ 2946 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2947 "!mdi_pi_free: " 2948 "Timeout reached on path %p without the cond\n", 2949 (void *)pip)); 2950 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2951 "!mdi_pi_free: " 2952 "%d cmds still pending on path: %p\n", 2953 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2954 MDI_PI_UNLOCK(pip); 2955 return (MDI_BUSY); 2956 } 2957 } 2958 if (MDI_PI(pip)->pi_pm_held) { 2959 client_held = 1; 2960 } 2961 MDI_PI_UNLOCK(pip); 2962 2963 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2964 2965 MDI_CLIENT_LOCK(ct); 2966 2967 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2968 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2969 2970 /* 2971 * Wait till failover is complete before removing this node. 2972 */ 2973 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2974 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2975 2976 MDI_CLIENT_UNLOCK(ct); 2977 MDI_VHCI_CLIENT_LOCK(vh); 2978 MDI_CLIENT_LOCK(ct); 2979 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2980 2981 if (!MDI_PI_IS_INITING(pip)) { 2982 f = vh->vh_ops->vo_pi_uninit; 2983 if (f != NULL) { 2984 rv = (*f)(vh->vh_dip, pip, 0); 2985 } 2986 } 2987 /* 2988 * If vo_pi_uninit() completed successfully. 2989 */ 2990 if (rv == MDI_SUCCESS) { 2991 if (client_held) { 2992 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2993 "i_mdi_pm_rele_client\n")); 2994 i_mdi_pm_rele_client(ct, 1); 2995 } 2996 i_mdi_pi_free(ph, pip, ct); 2997 if (ct->ct_path_count == 0) { 2998 /* 2999 * Client lost its last path. 3000 * Clean up the client device 3001 */ 3002 MDI_CLIENT_UNLOCK(ct); 3003 (void) i_mdi_client_free(ct->ct_vhci, ct); 3004 MDI_VHCI_CLIENT_UNLOCK(vh); 3005 return (rv); 3006 } 3007 } 3008 MDI_CLIENT_UNLOCK(ct); 3009 MDI_VHCI_CLIENT_UNLOCK(vh); 3010 3011 if (rv == MDI_FAILURE) 3012 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3013 3014 return (rv); 3015 } 3016 3017 /* 3018 * i_mdi_pi_free(): 3019 * Free the mdi_pathinfo node 3020 */ 3021 static void 3022 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3023 { 3024 int ct_circular; 3025 int ph_circular; 3026 int se_flag; 3027 int kmem_flag; 3028 3029 ASSERT(MDI_CLIENT_LOCKED(ct)); 3030 3031 /* 3032 * remove any per-path kstats 3033 */ 3034 i_mdi_pi_kstat_destroy(pip); 3035 3036 /* See comments in i_mdi_pi_alloc() */ 3037 ndi_devi_enter(ct->ct_dip, &ct_circular); 3038 ndi_devi_enter(ph->ph_dip, &ph_circular); 3039 3040 i_mdi_client_remove_path(ct, pip); 3041 i_mdi_phci_remove_path(ph, pip); 3042 3043 ndi_devi_exit(ph->ph_dip, ph_circular); 3044 ndi_devi_exit(ct->ct_dip, ct_circular); 3045 3046 /* determine interrupt context */ 3047 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3048 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3049 3050 i_ddi_di_cache_invalidate(kmem_flag); 3051 3052 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3053 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3054 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3055 if (MDI_PI(pip)->pi_addr) { 3056 kmem_free(MDI_PI(pip)->pi_addr, 3057 strlen(MDI_PI(pip)->pi_addr) + 1); 3058 MDI_PI(pip)->pi_addr = NULL; 3059 } 3060 3061 if (MDI_PI(pip)->pi_prop) { 3062 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3063 MDI_PI(pip)->pi_prop = NULL; 3064 } 3065 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3066 } 3067 3068 3069 /* 3070 * i_mdi_phci_remove_path(): 3071 * Remove a mdi_pathinfo node from pHCI list. 3072 * Notes: 3073 * Caller should hold per-pHCI mutex 3074 */ 3075 static void 3076 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3077 { 3078 mdi_pathinfo_t *prev = NULL; 3079 mdi_pathinfo_t *path = NULL; 3080 3081 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3082 3083 MDI_PHCI_LOCK(ph); 3084 path = ph->ph_path_head; 3085 while (path != NULL) { 3086 if (path == pip) { 3087 break; 3088 } 3089 prev = path; 3090 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3091 } 3092 3093 if (path) { 3094 ph->ph_path_count--; 3095 if (prev) { 3096 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3097 } else { 3098 ph->ph_path_head = 3099 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3100 } 3101 if (ph->ph_path_tail == path) { 3102 ph->ph_path_tail = prev; 3103 } 3104 } 3105 3106 /* 3107 * Clear the pHCI link 3108 */ 3109 MDI_PI(pip)->pi_phci_link = NULL; 3110 MDI_PI(pip)->pi_phci = NULL; 3111 MDI_PHCI_UNLOCK(ph); 3112 } 3113 3114 /* 3115 * i_mdi_client_remove_path(): 3116 * Remove a mdi_pathinfo node from client path list. 3117 */ 3118 static void 3119 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3120 { 3121 mdi_pathinfo_t *prev = NULL; 3122 mdi_pathinfo_t *path; 3123 3124 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3125 3126 ASSERT(MDI_CLIENT_LOCKED(ct)); 3127 path = ct->ct_path_head; 3128 while (path != NULL) { 3129 if (path == pip) { 3130 break; 3131 } 3132 prev = path; 3133 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3134 } 3135 3136 if (path) { 3137 ct->ct_path_count--; 3138 if (prev) { 3139 MDI_PI(prev)->pi_client_link = 3140 MDI_PI(path)->pi_client_link; 3141 } else { 3142 ct->ct_path_head = 3143 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3144 } 3145 if (ct->ct_path_tail == path) { 3146 ct->ct_path_tail = prev; 3147 } 3148 if (ct->ct_path_last == path) { 3149 ct->ct_path_last = ct->ct_path_head; 3150 } 3151 } 3152 MDI_PI(pip)->pi_client_link = NULL; 3153 MDI_PI(pip)->pi_client = NULL; 3154 } 3155 3156 /* 3157 * i_mdi_pi_state_change(): 3158 * online a mdi_pathinfo node 3159 * 3160 * Return Values: 3161 * MDI_SUCCESS 3162 * MDI_FAILURE 3163 */ 3164 /*ARGSUSED*/ 3165 static int 3166 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3167 { 3168 int rv = MDI_SUCCESS; 3169 mdi_vhci_t *vh; 3170 mdi_phci_t *ph; 3171 mdi_client_t *ct; 3172 int (*f)(); 3173 dev_info_t *cdip; 3174 3175 MDI_PI_LOCK(pip); 3176 3177 ph = MDI_PI(pip)->pi_phci; 3178 ASSERT(ph); 3179 if (ph == NULL) { 3180 /* 3181 * Invalid pHCI device, fail the request 3182 */ 3183 MDI_PI_UNLOCK(pip); 3184 MDI_DEBUG(1, (CE_WARN, NULL, 3185 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3186 return (MDI_FAILURE); 3187 } 3188 3189 vh = ph->ph_vhci; 3190 ASSERT(vh); 3191 if (vh == NULL) { 3192 /* 3193 * Invalid vHCI device, fail the request 3194 */ 3195 MDI_PI_UNLOCK(pip); 3196 MDI_DEBUG(1, (CE_WARN, NULL, 3197 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3198 return (MDI_FAILURE); 3199 } 3200 3201 ct = MDI_PI(pip)->pi_client; 3202 ASSERT(ct != NULL); 3203 if (ct == NULL) { 3204 /* 3205 * Invalid client device, fail the request 3206 */ 3207 MDI_PI_UNLOCK(pip); 3208 MDI_DEBUG(1, (CE_WARN, NULL, 3209 "!mdi_pi_state_change: invalid client pip=%p", 3210 (void *)pip)); 3211 return (MDI_FAILURE); 3212 } 3213 3214 /* 3215 * If this path has not been initialized yet, Callback vHCI driver's 3216 * pathinfo node initialize entry point 3217 */ 3218 3219 if (MDI_PI_IS_INITING(pip)) { 3220 MDI_PI_UNLOCK(pip); 3221 f = vh->vh_ops->vo_pi_init; 3222 if (f != NULL) { 3223 rv = (*f)(vh->vh_dip, pip, 0); 3224 if (rv != MDI_SUCCESS) { 3225 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3226 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3227 (void *)vh, (void *)pip)); 3228 return (MDI_FAILURE); 3229 } 3230 } 3231 MDI_PI_LOCK(pip); 3232 MDI_PI_CLEAR_TRANSIENT(pip); 3233 } 3234 3235 /* 3236 * Do not allow state transition when pHCI is in offline/suspended 3237 * states 3238 */ 3239 i_mdi_phci_lock(ph, pip); 3240 if (MDI_PHCI_IS_READY(ph) == 0) { 3241 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3242 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3243 (void *)ph)); 3244 MDI_PI_UNLOCK(pip); 3245 i_mdi_phci_unlock(ph); 3246 return (MDI_BUSY); 3247 } 3248 MDI_PHCI_UNSTABLE(ph); 3249 i_mdi_phci_unlock(ph); 3250 3251 /* 3252 * Check if mdi_pathinfo state is in transient state. 3253 * If yes, offlining is in progress and wait till transient state is 3254 * cleared. 3255 */ 3256 if (MDI_PI_IS_TRANSIENT(pip)) { 3257 while (MDI_PI_IS_TRANSIENT(pip)) { 3258 cv_wait(&MDI_PI(pip)->pi_state_cv, 3259 &MDI_PI(pip)->pi_mutex); 3260 } 3261 } 3262 3263 /* 3264 * Grab the client lock in reverse order sequence and release the 3265 * mdi_pathinfo mutex. 3266 */ 3267 i_mdi_client_lock(ct, pip); 3268 MDI_PI_UNLOCK(pip); 3269 3270 /* 3271 * Wait till failover state is cleared 3272 */ 3273 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3274 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3275 3276 /* 3277 * Mark the mdi_pathinfo node state as transient 3278 */ 3279 MDI_PI_LOCK(pip); 3280 switch (state) { 3281 case MDI_PATHINFO_STATE_ONLINE: 3282 MDI_PI_SET_ONLINING(pip); 3283 break; 3284 3285 case MDI_PATHINFO_STATE_STANDBY: 3286 MDI_PI_SET_STANDBYING(pip); 3287 break; 3288 3289 case MDI_PATHINFO_STATE_FAULT: 3290 /* 3291 * Mark the pathinfo state as FAULTED 3292 */ 3293 MDI_PI_SET_FAULTING(pip); 3294 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3295 break; 3296 3297 case MDI_PATHINFO_STATE_OFFLINE: 3298 /* 3299 * ndi_devi_offline() cannot hold pip or ct locks. 3300 */ 3301 MDI_PI_UNLOCK(pip); 3302 /* 3303 * Don't offline the client dev_info node unless we have 3304 * no available paths left at all. 3305 */ 3306 cdip = ct->ct_dip; 3307 if ((flag & NDI_DEVI_REMOVE) && 3308 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) && 3309 (ct->ct_path_count == 1)) { 3310 i_mdi_client_unlock(ct); 3311 rv = ndi_devi_offline(cdip, 0); 3312 if (rv != NDI_SUCCESS) { 3313 /* 3314 * Convert to MDI error code 3315 */ 3316 switch (rv) { 3317 case NDI_BUSY: 3318 rv = MDI_BUSY; 3319 break; 3320 default: 3321 rv = MDI_FAILURE; 3322 break; 3323 } 3324 goto state_change_exit; 3325 } else { 3326 i_mdi_client_lock(ct, NULL); 3327 } 3328 } 3329 /* 3330 * Mark the mdi_pathinfo node state as transient 3331 */ 3332 MDI_PI_LOCK(pip); 3333 MDI_PI_SET_OFFLINING(pip); 3334 break; 3335 } 3336 MDI_PI_UNLOCK(pip); 3337 MDI_CLIENT_UNSTABLE(ct); 3338 i_mdi_client_unlock(ct); 3339 3340 f = vh->vh_ops->vo_pi_state_change; 3341 if (f != NULL) 3342 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3343 3344 MDI_CLIENT_LOCK(ct); 3345 MDI_PI_LOCK(pip); 3346 if (rv == MDI_NOT_SUPPORTED) { 3347 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3348 } 3349 if (rv != MDI_SUCCESS) { 3350 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3351 "!vo_pi_state_change: failed rv = %x", rv)); 3352 } 3353 if (MDI_PI_IS_TRANSIENT(pip)) { 3354 if (rv == MDI_SUCCESS) { 3355 MDI_PI_CLEAR_TRANSIENT(pip); 3356 } else { 3357 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3358 } 3359 } 3360 3361 /* 3362 * Wake anyone waiting for this mdi_pathinfo node 3363 */ 3364 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3365 MDI_PI_UNLOCK(pip); 3366 3367 /* 3368 * Mark the client device as stable 3369 */ 3370 MDI_CLIENT_STABLE(ct); 3371 if (rv == MDI_SUCCESS) { 3372 if (ct->ct_unstable == 0) { 3373 cdip = ct->ct_dip; 3374 3375 /* 3376 * Onlining the mdi_pathinfo node will impact the 3377 * client state Update the client and dev_info node 3378 * state accordingly 3379 */ 3380 rv = NDI_SUCCESS; 3381 i_mdi_client_update_state(ct); 3382 switch (MDI_CLIENT_STATE(ct)) { 3383 case MDI_CLIENT_STATE_OPTIMAL: 3384 case MDI_CLIENT_STATE_DEGRADED: 3385 if (cdip && !i_ddi_devi_attached(cdip) && 3386 ((state == MDI_PATHINFO_STATE_ONLINE) || 3387 (state == MDI_PATHINFO_STATE_STANDBY))) { 3388 3389 /* 3390 * Must do ndi_devi_online() through 3391 * hotplug thread for deferred 3392 * attach mechanism to work 3393 */ 3394 MDI_CLIENT_UNLOCK(ct); 3395 rv = ndi_devi_online(cdip, 0); 3396 MDI_CLIENT_LOCK(ct); 3397 if ((rv != NDI_SUCCESS) && 3398 (MDI_CLIENT_STATE(ct) == 3399 MDI_CLIENT_STATE_DEGRADED)) { 3400 /* 3401 * ndi_devi_online failed. 3402 * Reset client flags to 3403 * offline. 3404 */ 3405 MDI_DEBUG(1, (CE_WARN, cdip, 3406 "!ndi_devi_online: failed " 3407 " Error: %x", rv)); 3408 MDI_CLIENT_SET_OFFLINE(ct); 3409 } 3410 if (rv != NDI_SUCCESS) { 3411 /* Reset the path state */ 3412 MDI_PI_LOCK(pip); 3413 MDI_PI(pip)->pi_state = 3414 MDI_PI_OLD_STATE(pip); 3415 MDI_PI_UNLOCK(pip); 3416 } 3417 } 3418 break; 3419 3420 case MDI_CLIENT_STATE_FAILED: 3421 /* 3422 * This is the last path case for 3423 * non-user initiated events. 3424 */ 3425 if (((flag & NDI_DEVI_REMOVE) == 0) && 3426 cdip && (i_ddi_node_state(cdip) >= 3427 DS_INITIALIZED)) { 3428 MDI_CLIENT_UNLOCK(ct); 3429 rv = ndi_devi_offline(cdip, 0); 3430 MDI_CLIENT_LOCK(ct); 3431 3432 if (rv != NDI_SUCCESS) { 3433 /* 3434 * ndi_devi_offline failed. 3435 * Reset client flags to 3436 * online as the path could not 3437 * be offlined. 3438 */ 3439 MDI_DEBUG(1, (CE_WARN, cdip, 3440 "!ndi_devi_offline: failed " 3441 " Error: %x", rv)); 3442 MDI_CLIENT_SET_ONLINE(ct); 3443 } 3444 } 3445 break; 3446 } 3447 /* 3448 * Convert to MDI error code 3449 */ 3450 switch (rv) { 3451 case NDI_SUCCESS: 3452 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3453 i_mdi_report_path_state(ct, pip); 3454 rv = MDI_SUCCESS; 3455 break; 3456 case NDI_BUSY: 3457 rv = MDI_BUSY; 3458 break; 3459 default: 3460 rv = MDI_FAILURE; 3461 break; 3462 } 3463 } 3464 } 3465 MDI_CLIENT_UNLOCK(ct); 3466 3467 state_change_exit: 3468 /* 3469 * Mark the pHCI as stable again. 3470 */ 3471 MDI_PHCI_LOCK(ph); 3472 MDI_PHCI_STABLE(ph); 3473 MDI_PHCI_UNLOCK(ph); 3474 return (rv); 3475 } 3476 3477 /* 3478 * mdi_pi_online(): 3479 * Place the path_info node in the online state. The path is 3480 * now available to be selected by mdi_select_path() for 3481 * transporting I/O requests to client devices. 3482 * Return Values: 3483 * MDI_SUCCESS 3484 * MDI_FAILURE 3485 */ 3486 int 3487 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3488 { 3489 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3490 int client_held = 0; 3491 int rv; 3492 3493 ASSERT(ct != NULL); 3494 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3495 if (rv != MDI_SUCCESS) 3496 return (rv); 3497 3498 MDI_PI_LOCK(pip); 3499 if (MDI_PI(pip)->pi_pm_held == 0) { 3500 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3501 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3502 i_mdi_pm_hold_pip(pip); 3503 client_held = 1; 3504 } 3505 MDI_PI_UNLOCK(pip); 3506 3507 if (client_held) { 3508 MDI_CLIENT_LOCK(ct); 3509 if (ct->ct_power_cnt == 0) { 3510 rv = i_mdi_power_all_phci(ct); 3511 } 3512 3513 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3514 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3515 i_mdi_pm_hold_client(ct, 1); 3516 MDI_CLIENT_UNLOCK(ct); 3517 } 3518 3519 return (rv); 3520 } 3521 3522 /* 3523 * mdi_pi_standby(): 3524 * Place the mdi_pathinfo node in standby state 3525 * 3526 * Return Values: 3527 * MDI_SUCCESS 3528 * MDI_FAILURE 3529 */ 3530 int 3531 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3532 { 3533 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3534 } 3535 3536 /* 3537 * mdi_pi_fault(): 3538 * Place the mdi_pathinfo node in fault'ed state 3539 * Return Values: 3540 * MDI_SUCCESS 3541 * MDI_FAILURE 3542 */ 3543 int 3544 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3545 { 3546 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3547 } 3548 3549 /* 3550 * mdi_pi_offline(): 3551 * Offline a mdi_pathinfo node. 3552 * Return Values: 3553 * MDI_SUCCESS 3554 * MDI_FAILURE 3555 */ 3556 int 3557 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3558 { 3559 int ret, client_held = 0; 3560 mdi_client_t *ct; 3561 3562 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3563 3564 if (ret == MDI_SUCCESS) { 3565 MDI_PI_LOCK(pip); 3566 if (MDI_PI(pip)->pi_pm_held) { 3567 client_held = 1; 3568 } 3569 MDI_PI_UNLOCK(pip); 3570 3571 if (client_held) { 3572 ct = MDI_PI(pip)->pi_client; 3573 MDI_CLIENT_LOCK(ct); 3574 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3575 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3576 i_mdi_pm_rele_client(ct, 1); 3577 MDI_CLIENT_UNLOCK(ct); 3578 } 3579 } 3580 3581 return (ret); 3582 } 3583 3584 /* 3585 * i_mdi_pi_offline(): 3586 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3587 */ 3588 static int 3589 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3590 { 3591 dev_info_t *vdip = NULL; 3592 mdi_vhci_t *vh = NULL; 3593 mdi_client_t *ct = NULL; 3594 int (*f)(); 3595 int rv; 3596 3597 MDI_PI_LOCK(pip); 3598 ct = MDI_PI(pip)->pi_client; 3599 ASSERT(ct != NULL); 3600 3601 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3602 /* 3603 * Give a chance for pending I/Os to complete. 3604 */ 3605 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3606 "%d cmds still pending on path: %p\n", 3607 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3608 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3609 &MDI_PI(pip)->pi_mutex, 3610 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3611 /* 3612 * The timeout time reached without ref_cnt being zero 3613 * being signaled. 3614 */ 3615 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3616 "Timeout reached on path %p without the cond\n", 3617 (void *)pip)); 3618 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3619 "%d cmds still pending on path: %p\n", 3620 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3621 } 3622 } 3623 vh = ct->ct_vhci; 3624 vdip = vh->vh_dip; 3625 3626 /* 3627 * Notify vHCI that has registered this event 3628 */ 3629 ASSERT(vh->vh_ops); 3630 f = vh->vh_ops->vo_pi_state_change; 3631 3632 if (f != NULL) { 3633 MDI_PI_UNLOCK(pip); 3634 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3635 flags)) != MDI_SUCCESS) { 3636 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3637 "!vo_path_offline failed " 3638 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3639 } 3640 MDI_PI_LOCK(pip); 3641 } 3642 3643 /* 3644 * Set the mdi_pathinfo node state and clear the transient condition 3645 */ 3646 MDI_PI_SET_OFFLINE(pip); 3647 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3648 MDI_PI_UNLOCK(pip); 3649 3650 MDI_CLIENT_LOCK(ct); 3651 if (rv == MDI_SUCCESS) { 3652 if (ct->ct_unstable == 0) { 3653 dev_info_t *cdip = ct->ct_dip; 3654 3655 /* 3656 * Onlining the mdi_pathinfo node will impact the 3657 * client state Update the client and dev_info node 3658 * state accordingly 3659 */ 3660 i_mdi_client_update_state(ct); 3661 rv = NDI_SUCCESS; 3662 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3663 if (cdip && 3664 (i_ddi_node_state(cdip) >= 3665 DS_INITIALIZED)) { 3666 MDI_CLIENT_UNLOCK(ct); 3667 rv = ndi_devi_offline(cdip, 0); 3668 MDI_CLIENT_LOCK(ct); 3669 if (rv != NDI_SUCCESS) { 3670 /* 3671 * ndi_devi_offline failed. 3672 * Reset client flags to 3673 * online. 3674 */ 3675 MDI_DEBUG(4, (CE_WARN, cdip, 3676 "!ndi_devi_offline: failed " 3677 " Error: %x", rv)); 3678 MDI_CLIENT_SET_ONLINE(ct); 3679 } 3680 } 3681 } 3682 /* 3683 * Convert to MDI error code 3684 */ 3685 switch (rv) { 3686 case NDI_SUCCESS: 3687 rv = MDI_SUCCESS; 3688 break; 3689 case NDI_BUSY: 3690 rv = MDI_BUSY; 3691 break; 3692 default: 3693 rv = MDI_FAILURE; 3694 break; 3695 } 3696 } 3697 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3698 i_mdi_report_path_state(ct, pip); 3699 } 3700 3701 MDI_CLIENT_UNLOCK(ct); 3702 3703 /* 3704 * Change in the mdi_pathinfo node state will impact the client state 3705 */ 3706 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3707 (void *)ct, (void *)pip)); 3708 return (rv); 3709 } 3710 3711 3712 /* 3713 * mdi_pi_get_addr(): 3714 * Get the unit address associated with a mdi_pathinfo node 3715 * 3716 * Return Values: 3717 * char * 3718 */ 3719 char * 3720 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3721 { 3722 if (pip == NULL) 3723 return (NULL); 3724 3725 return (MDI_PI(pip)->pi_addr); 3726 } 3727 3728 /* 3729 * mdi_pi_get_client(): 3730 * Get the client devinfo associated with a mdi_pathinfo node 3731 * 3732 * Return Values: 3733 * Handle to client device dev_info node 3734 */ 3735 dev_info_t * 3736 mdi_pi_get_client(mdi_pathinfo_t *pip) 3737 { 3738 dev_info_t *dip = NULL; 3739 if (pip) { 3740 dip = MDI_PI(pip)->pi_client->ct_dip; 3741 } 3742 return (dip); 3743 } 3744 3745 /* 3746 * mdi_pi_get_phci(): 3747 * Get the pHCI devinfo associated with the mdi_pathinfo node 3748 * Return Values: 3749 * Handle to dev_info node 3750 */ 3751 dev_info_t * 3752 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3753 { 3754 dev_info_t *dip = NULL; 3755 if (pip) { 3756 dip = MDI_PI(pip)->pi_phci->ph_dip; 3757 } 3758 return (dip); 3759 } 3760 3761 /* 3762 * mdi_pi_get_client_private(): 3763 * Get the client private information associated with the 3764 * mdi_pathinfo node 3765 */ 3766 void * 3767 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3768 { 3769 void *cprivate = NULL; 3770 if (pip) { 3771 cprivate = MDI_PI(pip)->pi_cprivate; 3772 } 3773 return (cprivate); 3774 } 3775 3776 /* 3777 * mdi_pi_set_client_private(): 3778 * Set the client private information in the mdi_pathinfo node 3779 */ 3780 void 3781 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3782 { 3783 if (pip) { 3784 MDI_PI(pip)->pi_cprivate = priv; 3785 } 3786 } 3787 3788 /* 3789 * mdi_pi_get_phci_private(): 3790 * Get the pHCI private information associated with the 3791 * mdi_pathinfo node 3792 */ 3793 caddr_t 3794 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3795 { 3796 caddr_t pprivate = NULL; 3797 if (pip) { 3798 pprivate = MDI_PI(pip)->pi_pprivate; 3799 } 3800 return (pprivate); 3801 } 3802 3803 /* 3804 * mdi_pi_set_phci_private(): 3805 * Set the pHCI private information in the mdi_pathinfo node 3806 */ 3807 void 3808 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3809 { 3810 if (pip) { 3811 MDI_PI(pip)->pi_pprivate = priv; 3812 } 3813 } 3814 3815 /* 3816 * mdi_pi_get_state(): 3817 * Get the mdi_pathinfo node state. Transient states are internal 3818 * and not provided to the users 3819 */ 3820 mdi_pathinfo_state_t 3821 mdi_pi_get_state(mdi_pathinfo_t *pip) 3822 { 3823 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3824 3825 if (pip) { 3826 if (MDI_PI_IS_TRANSIENT(pip)) { 3827 /* 3828 * mdi_pathinfo is in state transition. Return the 3829 * last good state. 3830 */ 3831 state = MDI_PI_OLD_STATE(pip); 3832 } else { 3833 state = MDI_PI_STATE(pip); 3834 } 3835 } 3836 return (state); 3837 } 3838 3839 /* 3840 * Note that the following function needs to be the new interface for 3841 * mdi_pi_get_state when mpxio gets integrated to ON. 3842 */ 3843 int 3844 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3845 uint32_t *ext_state) 3846 { 3847 *state = MDI_PATHINFO_STATE_INIT; 3848 3849 if (pip) { 3850 if (MDI_PI_IS_TRANSIENT(pip)) { 3851 /* 3852 * mdi_pathinfo is in state transition. Return the 3853 * last good state. 3854 */ 3855 *state = MDI_PI_OLD_STATE(pip); 3856 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3857 } else { 3858 *state = MDI_PI_STATE(pip); 3859 *ext_state = MDI_PI_EXT_STATE(pip); 3860 } 3861 } 3862 return (MDI_SUCCESS); 3863 } 3864 3865 /* 3866 * mdi_pi_get_preferred: 3867 * Get the preferred path flag 3868 */ 3869 int 3870 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3871 { 3872 if (pip) { 3873 return (MDI_PI(pip)->pi_preferred); 3874 } 3875 return (0); 3876 } 3877 3878 /* 3879 * mdi_pi_set_preferred: 3880 * Set the preferred path flag 3881 */ 3882 void 3883 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3884 { 3885 if (pip) { 3886 MDI_PI(pip)->pi_preferred = preferred; 3887 } 3888 } 3889 3890 /* 3891 * mdi_pi_set_state(): 3892 * Set the mdi_pathinfo node state 3893 */ 3894 void 3895 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3896 { 3897 uint32_t ext_state; 3898 3899 if (pip) { 3900 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3901 MDI_PI(pip)->pi_state = state; 3902 MDI_PI(pip)->pi_state |= ext_state; 3903 } 3904 } 3905 3906 /* 3907 * Property functions: 3908 */ 3909 int 3910 i_map_nvlist_error_to_mdi(int val) 3911 { 3912 int rv; 3913 3914 switch (val) { 3915 case 0: 3916 rv = DDI_PROP_SUCCESS; 3917 break; 3918 case EINVAL: 3919 case ENOTSUP: 3920 rv = DDI_PROP_INVAL_ARG; 3921 break; 3922 case ENOMEM: 3923 rv = DDI_PROP_NO_MEMORY; 3924 break; 3925 default: 3926 rv = DDI_PROP_NOT_FOUND; 3927 break; 3928 } 3929 return (rv); 3930 } 3931 3932 /* 3933 * mdi_pi_get_next_prop(): 3934 * Property walk function. The caller should hold mdi_pi_lock() 3935 * and release by calling mdi_pi_unlock() at the end of walk to 3936 * get a consistent value. 3937 */ 3938 nvpair_t * 3939 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3940 { 3941 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3942 return (NULL); 3943 } 3944 ASSERT(MDI_PI_LOCKED(pip)); 3945 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3946 } 3947 3948 /* 3949 * mdi_prop_remove(): 3950 * Remove the named property from the named list. 3951 */ 3952 int 3953 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3954 { 3955 if (pip == NULL) { 3956 return (DDI_PROP_NOT_FOUND); 3957 } 3958 ASSERT(!MDI_PI_LOCKED(pip)); 3959 MDI_PI_LOCK(pip); 3960 if (MDI_PI(pip)->pi_prop == NULL) { 3961 MDI_PI_UNLOCK(pip); 3962 return (DDI_PROP_NOT_FOUND); 3963 } 3964 if (name) { 3965 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3966 } else { 3967 char nvp_name[MAXNAMELEN]; 3968 nvpair_t *nvp; 3969 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3970 while (nvp) { 3971 nvpair_t *next; 3972 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3973 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3974 nvpair_name(nvp)); 3975 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3976 nvp_name); 3977 nvp = next; 3978 } 3979 } 3980 MDI_PI_UNLOCK(pip); 3981 return (DDI_PROP_SUCCESS); 3982 } 3983 3984 /* 3985 * mdi_prop_size(): 3986 * Get buffer size needed to pack the property data. 3987 * Caller should hold the mdi_pathinfo_t lock to get a consistent 3988 * buffer size. 3989 */ 3990 int 3991 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 3992 { 3993 int rv; 3994 size_t bufsize; 3995 3996 *buflenp = 0; 3997 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3998 return (DDI_PROP_NOT_FOUND); 3999 } 4000 ASSERT(MDI_PI_LOCKED(pip)); 4001 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4002 &bufsize, NV_ENCODE_NATIVE); 4003 *buflenp = bufsize; 4004 return (i_map_nvlist_error_to_mdi(rv)); 4005 } 4006 4007 /* 4008 * mdi_prop_pack(): 4009 * pack the property list. The caller should hold the 4010 * mdi_pathinfo_t node to get a consistent data 4011 */ 4012 int 4013 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4014 { 4015 int rv; 4016 size_t bufsize; 4017 4018 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4019 return (DDI_PROP_NOT_FOUND); 4020 } 4021 4022 ASSERT(MDI_PI_LOCKED(pip)); 4023 4024 bufsize = buflen; 4025 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4026 NV_ENCODE_NATIVE, KM_SLEEP); 4027 4028 return (i_map_nvlist_error_to_mdi(rv)); 4029 } 4030 4031 /* 4032 * mdi_prop_update_byte(): 4033 * Create/Update a byte property 4034 */ 4035 int 4036 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4037 { 4038 int rv; 4039 4040 if (pip == NULL) { 4041 return (DDI_PROP_INVAL_ARG); 4042 } 4043 ASSERT(!MDI_PI_LOCKED(pip)); 4044 MDI_PI_LOCK(pip); 4045 if (MDI_PI(pip)->pi_prop == NULL) { 4046 MDI_PI_UNLOCK(pip); 4047 return (DDI_PROP_NOT_FOUND); 4048 } 4049 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4050 MDI_PI_UNLOCK(pip); 4051 return (i_map_nvlist_error_to_mdi(rv)); 4052 } 4053 4054 /* 4055 * mdi_prop_update_byte_array(): 4056 * Create/Update a byte array property 4057 */ 4058 int 4059 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4060 uint_t nelements) 4061 { 4062 int rv; 4063 4064 if (pip == NULL) { 4065 return (DDI_PROP_INVAL_ARG); 4066 } 4067 ASSERT(!MDI_PI_LOCKED(pip)); 4068 MDI_PI_LOCK(pip); 4069 if (MDI_PI(pip)->pi_prop == NULL) { 4070 MDI_PI_UNLOCK(pip); 4071 return (DDI_PROP_NOT_FOUND); 4072 } 4073 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4074 MDI_PI_UNLOCK(pip); 4075 return (i_map_nvlist_error_to_mdi(rv)); 4076 } 4077 4078 /* 4079 * mdi_prop_update_int(): 4080 * Create/Update a 32 bit integer property 4081 */ 4082 int 4083 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4084 { 4085 int rv; 4086 4087 if (pip == NULL) { 4088 return (DDI_PROP_INVAL_ARG); 4089 } 4090 ASSERT(!MDI_PI_LOCKED(pip)); 4091 MDI_PI_LOCK(pip); 4092 if (MDI_PI(pip)->pi_prop == NULL) { 4093 MDI_PI_UNLOCK(pip); 4094 return (DDI_PROP_NOT_FOUND); 4095 } 4096 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4097 MDI_PI_UNLOCK(pip); 4098 return (i_map_nvlist_error_to_mdi(rv)); 4099 } 4100 4101 /* 4102 * mdi_prop_update_int64(): 4103 * Create/Update a 64 bit integer property 4104 */ 4105 int 4106 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4107 { 4108 int rv; 4109 4110 if (pip == NULL) { 4111 return (DDI_PROP_INVAL_ARG); 4112 } 4113 ASSERT(!MDI_PI_LOCKED(pip)); 4114 MDI_PI_LOCK(pip); 4115 if (MDI_PI(pip)->pi_prop == NULL) { 4116 MDI_PI_UNLOCK(pip); 4117 return (DDI_PROP_NOT_FOUND); 4118 } 4119 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4120 MDI_PI_UNLOCK(pip); 4121 return (i_map_nvlist_error_to_mdi(rv)); 4122 } 4123 4124 /* 4125 * mdi_prop_update_int_array(): 4126 * Create/Update a int array property 4127 */ 4128 int 4129 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4130 uint_t nelements) 4131 { 4132 int rv; 4133 4134 if (pip == NULL) { 4135 return (DDI_PROP_INVAL_ARG); 4136 } 4137 ASSERT(!MDI_PI_LOCKED(pip)); 4138 MDI_PI_LOCK(pip); 4139 if (MDI_PI(pip)->pi_prop == NULL) { 4140 MDI_PI_UNLOCK(pip); 4141 return (DDI_PROP_NOT_FOUND); 4142 } 4143 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4144 nelements); 4145 MDI_PI_UNLOCK(pip); 4146 return (i_map_nvlist_error_to_mdi(rv)); 4147 } 4148 4149 /* 4150 * mdi_prop_update_string(): 4151 * Create/Update a string property 4152 */ 4153 int 4154 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4155 { 4156 int rv; 4157 4158 if (pip == NULL) { 4159 return (DDI_PROP_INVAL_ARG); 4160 } 4161 ASSERT(!MDI_PI_LOCKED(pip)); 4162 MDI_PI_LOCK(pip); 4163 if (MDI_PI(pip)->pi_prop == NULL) { 4164 MDI_PI_UNLOCK(pip); 4165 return (DDI_PROP_NOT_FOUND); 4166 } 4167 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4168 MDI_PI_UNLOCK(pip); 4169 return (i_map_nvlist_error_to_mdi(rv)); 4170 } 4171 4172 /* 4173 * mdi_prop_update_string_array(): 4174 * Create/Update a string array property 4175 */ 4176 int 4177 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4178 uint_t nelements) 4179 { 4180 int rv; 4181 4182 if (pip == NULL) { 4183 return (DDI_PROP_INVAL_ARG); 4184 } 4185 ASSERT(!MDI_PI_LOCKED(pip)); 4186 MDI_PI_LOCK(pip); 4187 if (MDI_PI(pip)->pi_prop == NULL) { 4188 MDI_PI_UNLOCK(pip); 4189 return (DDI_PROP_NOT_FOUND); 4190 } 4191 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4192 nelements); 4193 MDI_PI_UNLOCK(pip); 4194 return (i_map_nvlist_error_to_mdi(rv)); 4195 } 4196 4197 /* 4198 * mdi_prop_lookup_byte(): 4199 * Look for byte property identified by name. The data returned 4200 * is the actual property and valid as long as mdi_pathinfo_t node 4201 * is alive. 4202 */ 4203 int 4204 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4205 { 4206 int rv; 4207 4208 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4209 return (DDI_PROP_NOT_FOUND); 4210 } 4211 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4212 return (i_map_nvlist_error_to_mdi(rv)); 4213 } 4214 4215 4216 /* 4217 * mdi_prop_lookup_byte_array(): 4218 * Look for byte array property identified by name. The data 4219 * returned is the actual property and valid as long as 4220 * mdi_pathinfo_t node is alive. 4221 */ 4222 int 4223 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4224 uint_t *nelements) 4225 { 4226 int rv; 4227 4228 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4229 return (DDI_PROP_NOT_FOUND); 4230 } 4231 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4232 nelements); 4233 return (i_map_nvlist_error_to_mdi(rv)); 4234 } 4235 4236 /* 4237 * mdi_prop_lookup_int(): 4238 * Look for int property identified by name. The data returned 4239 * is the actual property and valid as long as mdi_pathinfo_t 4240 * node is alive. 4241 */ 4242 int 4243 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4244 { 4245 int rv; 4246 4247 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4248 return (DDI_PROP_NOT_FOUND); 4249 } 4250 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4251 return (i_map_nvlist_error_to_mdi(rv)); 4252 } 4253 4254 /* 4255 * mdi_prop_lookup_int64(): 4256 * Look for int64 property identified by name. The data returned 4257 * is the actual property and valid as long as mdi_pathinfo_t node 4258 * is alive. 4259 */ 4260 int 4261 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4262 { 4263 int rv; 4264 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4265 return (DDI_PROP_NOT_FOUND); 4266 } 4267 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4268 return (i_map_nvlist_error_to_mdi(rv)); 4269 } 4270 4271 /* 4272 * mdi_prop_lookup_int_array(): 4273 * Look for int array property identified by name. The data 4274 * returned is the actual property and valid as long as 4275 * mdi_pathinfo_t node is alive. 4276 */ 4277 int 4278 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4279 uint_t *nelements) 4280 { 4281 int rv; 4282 4283 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4284 return (DDI_PROP_NOT_FOUND); 4285 } 4286 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4287 (int32_t **)data, nelements); 4288 return (i_map_nvlist_error_to_mdi(rv)); 4289 } 4290 4291 /* 4292 * mdi_prop_lookup_string(): 4293 * Look for string property identified by name. The data 4294 * returned is the actual property and valid as long as 4295 * mdi_pathinfo_t node is alive. 4296 */ 4297 int 4298 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4299 { 4300 int rv; 4301 4302 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4303 return (DDI_PROP_NOT_FOUND); 4304 } 4305 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4306 return (i_map_nvlist_error_to_mdi(rv)); 4307 } 4308 4309 /* 4310 * mdi_prop_lookup_string_array(): 4311 * Look for string array property identified by name. The data 4312 * returned is the actual property and valid as long as 4313 * mdi_pathinfo_t node is alive. 4314 */ 4315 int 4316 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4317 uint_t *nelements) 4318 { 4319 int rv; 4320 4321 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4322 return (DDI_PROP_NOT_FOUND); 4323 } 4324 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4325 nelements); 4326 return (i_map_nvlist_error_to_mdi(rv)); 4327 } 4328 4329 /* 4330 * mdi_prop_free(): 4331 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4332 * functions return the pointer to actual property data and not a 4333 * copy of it. So the data returned is valid as long as 4334 * mdi_pathinfo_t node is valid. 4335 */ 4336 /*ARGSUSED*/ 4337 int 4338 mdi_prop_free(void *data) 4339 { 4340 return (DDI_PROP_SUCCESS); 4341 } 4342 4343 /*ARGSUSED*/ 4344 static void 4345 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4346 { 4347 char *phci_path, *ct_path; 4348 char *ct_status; 4349 char *status; 4350 dev_info_t *dip = ct->ct_dip; 4351 char lb_buf[64]; 4352 4353 ASSERT(MDI_CLIENT_LOCKED(ct)); 4354 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4355 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4356 return; 4357 } 4358 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4359 ct_status = "optimal"; 4360 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4361 ct_status = "degraded"; 4362 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4363 ct_status = "failed"; 4364 } else { 4365 ct_status = "unknown"; 4366 } 4367 4368 if (MDI_PI_IS_OFFLINE(pip)) { 4369 status = "offline"; 4370 } else if (MDI_PI_IS_ONLINE(pip)) { 4371 status = "online"; 4372 } else if (MDI_PI_IS_STANDBY(pip)) { 4373 status = "standby"; 4374 } else if (MDI_PI_IS_FAULT(pip)) { 4375 status = "faulted"; 4376 } else { 4377 status = "unknown"; 4378 } 4379 4380 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4381 (void) snprintf(lb_buf, sizeof (lb_buf), 4382 "%s, region-size: %d", mdi_load_balance_lba, 4383 ct->ct_lb_args->region_size); 4384 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4385 (void) snprintf(lb_buf, sizeof (lb_buf), 4386 "%s", mdi_load_balance_none); 4387 } else { 4388 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4389 mdi_load_balance_rr); 4390 } 4391 4392 if (dip) { 4393 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4394 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4395 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4396 "path %s (%s%d) to target address: %s is %s" 4397 " Load balancing: %s\n", 4398 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4399 ddi_get_instance(dip), ct_status, 4400 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4401 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4402 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4403 MDI_PI(pip)->pi_addr, status, lb_buf); 4404 kmem_free(phci_path, MAXPATHLEN); 4405 kmem_free(ct_path, MAXPATHLEN); 4406 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4407 } 4408 } 4409 4410 #ifdef DEBUG 4411 /* 4412 * i_mdi_log(): 4413 * Utility function for error message management 4414 * 4415 */ 4416 /*PRINTFLIKE3*/ 4417 static void 4418 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4419 { 4420 char name[MAXNAMELEN]; 4421 char buf[MAXNAMELEN]; 4422 char *bp; 4423 va_list ap; 4424 int log_only = 0; 4425 int boot_only = 0; 4426 int console_only = 0; 4427 4428 if (dip) { 4429 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4430 ddi_node_name(dip), ddi_get_instance(dip)); 4431 } else { 4432 name[0] = 0; 4433 } 4434 4435 va_start(ap, fmt); 4436 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4437 va_end(ap); 4438 4439 switch (buf[0]) { 4440 case '!': 4441 bp = &buf[1]; 4442 log_only = 1; 4443 break; 4444 case '?': 4445 bp = &buf[1]; 4446 boot_only = 1; 4447 break; 4448 case '^': 4449 bp = &buf[1]; 4450 console_only = 1; 4451 break; 4452 default: 4453 bp = buf; 4454 break; 4455 } 4456 if (mdi_debug_logonly) { 4457 log_only = 1; 4458 boot_only = 0; 4459 console_only = 0; 4460 } 4461 4462 switch (level) { 4463 case CE_NOTE: 4464 level = CE_CONT; 4465 /* FALLTHROUGH */ 4466 case CE_CONT: 4467 case CE_WARN: 4468 case CE_PANIC: 4469 if (boot_only) { 4470 cmn_err(level, "?mdi: %s%s", name, bp); 4471 } else if (console_only) { 4472 cmn_err(level, "^mdi: %s%s", name, bp); 4473 } else if (log_only) { 4474 cmn_err(level, "!mdi: %s%s", name, bp); 4475 } else { 4476 cmn_err(level, "mdi: %s%s", name, bp); 4477 } 4478 break; 4479 default: 4480 cmn_err(level, "mdi: %s%s", name, bp); 4481 break; 4482 } 4483 } 4484 #endif /* DEBUG */ 4485 4486 void 4487 i_mdi_client_online(dev_info_t *ct_dip) 4488 { 4489 mdi_client_t *ct; 4490 4491 /* 4492 * Client online notification. Mark client state as online 4493 * restore our binding with dev_info node 4494 */ 4495 ct = i_devi_get_client(ct_dip); 4496 ASSERT(ct != NULL); 4497 MDI_CLIENT_LOCK(ct); 4498 MDI_CLIENT_SET_ONLINE(ct); 4499 /* catch for any memory leaks */ 4500 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4501 ct->ct_dip = ct_dip; 4502 4503 if (ct->ct_power_cnt == 0) 4504 (void) i_mdi_power_all_phci(ct); 4505 4506 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4507 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4508 i_mdi_pm_hold_client(ct, 1); 4509 4510 MDI_CLIENT_UNLOCK(ct); 4511 } 4512 4513 void 4514 i_mdi_phci_online(dev_info_t *ph_dip) 4515 { 4516 mdi_phci_t *ph; 4517 4518 /* pHCI online notification. Mark state accordingly */ 4519 ph = i_devi_get_phci(ph_dip); 4520 ASSERT(ph != NULL); 4521 MDI_PHCI_LOCK(ph); 4522 MDI_PHCI_SET_ONLINE(ph); 4523 MDI_PHCI_UNLOCK(ph); 4524 } 4525 4526 /* 4527 * mdi_devi_online(): 4528 * Online notification from NDI framework on pHCI/client 4529 * device online. 4530 * Return Values: 4531 * NDI_SUCCESS 4532 * MDI_FAILURE 4533 */ 4534 /*ARGSUSED*/ 4535 int 4536 mdi_devi_online(dev_info_t *dip, uint_t flags) 4537 { 4538 if (MDI_PHCI(dip)) { 4539 i_mdi_phci_online(dip); 4540 } 4541 4542 if (MDI_CLIENT(dip)) { 4543 i_mdi_client_online(dip); 4544 } 4545 return (NDI_SUCCESS); 4546 } 4547 4548 /* 4549 * mdi_devi_offline(): 4550 * Offline notification from NDI framework on pHCI/Client device 4551 * offline. 4552 * 4553 * Return Values: 4554 * NDI_SUCCESS 4555 * NDI_FAILURE 4556 */ 4557 /*ARGSUSED*/ 4558 int 4559 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4560 { 4561 int rv = NDI_SUCCESS; 4562 4563 if (MDI_CLIENT(dip)) { 4564 rv = i_mdi_client_offline(dip, flags); 4565 if (rv != NDI_SUCCESS) 4566 return (rv); 4567 } 4568 4569 if (MDI_PHCI(dip)) { 4570 rv = i_mdi_phci_offline(dip, flags); 4571 4572 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4573 /* set client back online */ 4574 i_mdi_client_online(dip); 4575 } 4576 } 4577 4578 return (rv); 4579 } 4580 4581 /*ARGSUSED*/ 4582 static int 4583 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4584 { 4585 int rv = NDI_SUCCESS; 4586 mdi_phci_t *ph; 4587 mdi_client_t *ct; 4588 mdi_pathinfo_t *pip; 4589 mdi_pathinfo_t *next; 4590 mdi_pathinfo_t *failed_pip = NULL; 4591 dev_info_t *cdip; 4592 4593 /* 4594 * pHCI component offline notification 4595 * Make sure that this pHCI instance is free to be offlined. 4596 * If it is OK to proceed, Offline and remove all the child 4597 * mdi_pathinfo nodes. This process automatically offlines 4598 * corresponding client devices, for which this pHCI provides 4599 * critical services. 4600 */ 4601 ph = i_devi_get_phci(dip); 4602 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4603 (void *)dip, (void *)ph)); 4604 if (ph == NULL) { 4605 return (rv); 4606 } 4607 4608 MDI_PHCI_LOCK(ph); 4609 4610 if (MDI_PHCI_IS_OFFLINE(ph)) { 4611 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4612 (void *)ph)); 4613 MDI_PHCI_UNLOCK(ph); 4614 return (NDI_SUCCESS); 4615 } 4616 4617 /* 4618 * Check to see if the pHCI can be offlined 4619 */ 4620 if (ph->ph_unstable) { 4621 MDI_DEBUG(1, (CE_WARN, dip, 4622 "!One or more target devices are in transient " 4623 "state. This device can not be removed at " 4624 "this moment. Please try again later.")); 4625 MDI_PHCI_UNLOCK(ph); 4626 return (NDI_BUSY); 4627 } 4628 4629 pip = ph->ph_path_head; 4630 while (pip != NULL) { 4631 MDI_PI_LOCK(pip); 4632 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4633 4634 /* 4635 * The mdi_pathinfo state is OK. Check the client state. 4636 * If failover in progress fail the pHCI from offlining 4637 */ 4638 ct = MDI_PI(pip)->pi_client; 4639 i_mdi_client_lock(ct, pip); 4640 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4641 (ct->ct_unstable)) { 4642 /* 4643 * Failover is in progress, Fail the DR 4644 */ 4645 MDI_DEBUG(1, (CE_WARN, dip, 4646 "!pHCI device (%s%d) is Busy. %s", 4647 ddi_driver_name(dip), ddi_get_instance(dip), 4648 "This device can not be removed at " 4649 "this moment. Please try again later.")); 4650 MDI_PI_UNLOCK(pip); 4651 i_mdi_client_unlock(ct); 4652 MDI_PHCI_UNLOCK(ph); 4653 return (NDI_BUSY); 4654 } 4655 MDI_PI_UNLOCK(pip); 4656 4657 /* 4658 * Check to see of we are removing the last path of this 4659 * client device... 4660 */ 4661 cdip = ct->ct_dip; 4662 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4663 (i_mdi_client_compute_state(ct, ph) == 4664 MDI_CLIENT_STATE_FAILED)) { 4665 i_mdi_client_unlock(ct); 4666 MDI_PHCI_UNLOCK(ph); 4667 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4668 /* 4669 * ndi_devi_offline() failed. 4670 * This pHCI provides the critical path 4671 * to one or more client devices. 4672 * Return busy. 4673 */ 4674 MDI_PHCI_LOCK(ph); 4675 MDI_DEBUG(1, (CE_WARN, dip, 4676 "!pHCI device (%s%d) is Busy. %s", 4677 ddi_driver_name(dip), ddi_get_instance(dip), 4678 "This device can not be removed at " 4679 "this moment. Please try again later.")); 4680 failed_pip = pip; 4681 break; 4682 } else { 4683 MDI_PHCI_LOCK(ph); 4684 pip = next; 4685 } 4686 } else { 4687 i_mdi_client_unlock(ct); 4688 pip = next; 4689 } 4690 } 4691 4692 if (failed_pip) { 4693 pip = ph->ph_path_head; 4694 while (pip != failed_pip) { 4695 MDI_PI_LOCK(pip); 4696 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4697 ct = MDI_PI(pip)->pi_client; 4698 i_mdi_client_lock(ct, pip); 4699 cdip = ct->ct_dip; 4700 switch (MDI_CLIENT_STATE(ct)) { 4701 case MDI_CLIENT_STATE_OPTIMAL: 4702 case MDI_CLIENT_STATE_DEGRADED: 4703 if (cdip) { 4704 MDI_PI_UNLOCK(pip); 4705 i_mdi_client_unlock(ct); 4706 MDI_PHCI_UNLOCK(ph); 4707 (void) ndi_devi_online(cdip, 0); 4708 MDI_PHCI_LOCK(ph); 4709 pip = next; 4710 continue; 4711 } 4712 break; 4713 4714 case MDI_CLIENT_STATE_FAILED: 4715 if (cdip) { 4716 MDI_PI_UNLOCK(pip); 4717 i_mdi_client_unlock(ct); 4718 MDI_PHCI_UNLOCK(ph); 4719 (void) ndi_devi_offline(cdip, 0); 4720 MDI_PHCI_LOCK(ph); 4721 pip = next; 4722 continue; 4723 } 4724 break; 4725 } 4726 MDI_PI_UNLOCK(pip); 4727 i_mdi_client_unlock(ct); 4728 pip = next; 4729 } 4730 MDI_PHCI_UNLOCK(ph); 4731 return (NDI_BUSY); 4732 } 4733 4734 /* 4735 * Mark the pHCI as offline 4736 */ 4737 MDI_PHCI_SET_OFFLINE(ph); 4738 4739 /* 4740 * Mark the child mdi_pathinfo nodes as transient 4741 */ 4742 pip = ph->ph_path_head; 4743 while (pip != NULL) { 4744 MDI_PI_LOCK(pip); 4745 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4746 MDI_PI_SET_OFFLINING(pip); 4747 MDI_PI_UNLOCK(pip); 4748 pip = next; 4749 } 4750 MDI_PHCI_UNLOCK(ph); 4751 /* 4752 * Give a chance for any pending commands to execute 4753 */ 4754 delay(1); 4755 MDI_PHCI_LOCK(ph); 4756 pip = ph->ph_path_head; 4757 while (pip != NULL) { 4758 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4759 (void) i_mdi_pi_offline(pip, flags); 4760 MDI_PI_LOCK(pip); 4761 ct = MDI_PI(pip)->pi_client; 4762 if (!MDI_PI_IS_OFFLINE(pip)) { 4763 MDI_DEBUG(1, (CE_WARN, dip, 4764 "!pHCI device (%s%d) is Busy. %s", 4765 ddi_driver_name(dip), ddi_get_instance(dip), 4766 "This device can not be removed at " 4767 "this moment. Please try again later.")); 4768 MDI_PI_UNLOCK(pip); 4769 MDI_PHCI_SET_ONLINE(ph); 4770 MDI_PHCI_UNLOCK(ph); 4771 return (NDI_BUSY); 4772 } 4773 MDI_PI_UNLOCK(pip); 4774 pip = next; 4775 } 4776 MDI_PHCI_UNLOCK(ph); 4777 4778 return (rv); 4779 } 4780 4781 /*ARGSUSED*/ 4782 static int 4783 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4784 { 4785 int rv = NDI_SUCCESS; 4786 mdi_client_t *ct; 4787 4788 /* 4789 * Client component to go offline. Make sure that we are 4790 * not in failing over state and update client state 4791 * accordingly 4792 */ 4793 ct = i_devi_get_client(dip); 4794 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 4795 (void *)dip, (void *)ct)); 4796 if (ct != NULL) { 4797 MDI_CLIENT_LOCK(ct); 4798 if (ct->ct_unstable) { 4799 /* 4800 * One or more paths are in transient state, 4801 * Dont allow offline of a client device 4802 */ 4803 MDI_DEBUG(1, (CE_WARN, dip, 4804 "!One or more paths to this device is " 4805 "in transient state. This device can not " 4806 "be removed at this moment. " 4807 "Please try again later.")); 4808 MDI_CLIENT_UNLOCK(ct); 4809 return (NDI_BUSY); 4810 } 4811 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4812 /* 4813 * Failover is in progress, Dont allow DR of 4814 * a client device 4815 */ 4816 MDI_DEBUG(1, (CE_WARN, dip, 4817 "!Client device (%s%d) is Busy. %s", 4818 ddi_driver_name(dip), ddi_get_instance(dip), 4819 "This device can not be removed at " 4820 "this moment. Please try again later.")); 4821 MDI_CLIENT_UNLOCK(ct); 4822 return (NDI_BUSY); 4823 } 4824 MDI_CLIENT_SET_OFFLINE(ct); 4825 4826 /* 4827 * Unbind our relationship with the dev_info node 4828 */ 4829 if (flags & NDI_DEVI_REMOVE) { 4830 ct->ct_dip = NULL; 4831 } 4832 MDI_CLIENT_UNLOCK(ct); 4833 } 4834 return (rv); 4835 } 4836 4837 /* 4838 * mdi_pre_attach(): 4839 * Pre attach() notification handler 4840 */ 4841 /*ARGSUSED*/ 4842 int 4843 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4844 { 4845 /* don't support old DDI_PM_RESUME */ 4846 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4847 (cmd == DDI_PM_RESUME)) 4848 return (DDI_FAILURE); 4849 4850 return (DDI_SUCCESS); 4851 } 4852 4853 /* 4854 * mdi_post_attach(): 4855 * Post attach() notification handler 4856 */ 4857 /*ARGSUSED*/ 4858 void 4859 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4860 { 4861 mdi_phci_t *ph; 4862 mdi_client_t *ct; 4863 mdi_vhci_t *vh; 4864 4865 if (MDI_PHCI(dip)) { 4866 ph = i_devi_get_phci(dip); 4867 ASSERT(ph != NULL); 4868 4869 MDI_PHCI_LOCK(ph); 4870 switch (cmd) { 4871 case DDI_ATTACH: 4872 MDI_DEBUG(2, (CE_NOTE, dip, 4873 "!pHCI post_attach: called %p\n", (void *)ph)); 4874 if (error == DDI_SUCCESS) { 4875 MDI_PHCI_SET_ATTACH(ph); 4876 } else { 4877 MDI_DEBUG(1, (CE_NOTE, dip, 4878 "!pHCI post_attach: failed error=%d\n", 4879 error)); 4880 MDI_PHCI_SET_DETACH(ph); 4881 } 4882 break; 4883 4884 case DDI_RESUME: 4885 MDI_DEBUG(2, (CE_NOTE, dip, 4886 "!pHCI post_resume: called %p\n", (void *)ph)); 4887 if (error == DDI_SUCCESS) { 4888 MDI_PHCI_SET_RESUME(ph); 4889 } else { 4890 MDI_DEBUG(1, (CE_NOTE, dip, 4891 "!pHCI post_resume: failed error=%d\n", 4892 error)); 4893 MDI_PHCI_SET_SUSPEND(ph); 4894 } 4895 break; 4896 } 4897 MDI_PHCI_UNLOCK(ph); 4898 } 4899 4900 if (MDI_CLIENT(dip)) { 4901 ct = i_devi_get_client(dip); 4902 ASSERT(ct != NULL); 4903 4904 MDI_CLIENT_LOCK(ct); 4905 switch (cmd) { 4906 case DDI_ATTACH: 4907 MDI_DEBUG(2, (CE_NOTE, dip, 4908 "!Client post_attach: called %p\n", (void *)ct)); 4909 if (error != DDI_SUCCESS) { 4910 MDI_DEBUG(1, (CE_NOTE, dip, 4911 "!Client post_attach: failed error=%d\n", 4912 error)); 4913 MDI_CLIENT_SET_DETACH(ct); 4914 MDI_DEBUG(4, (CE_WARN, dip, 4915 "mdi_post_attach i_mdi_pm_reset_client\n")); 4916 i_mdi_pm_reset_client(ct); 4917 break; 4918 } 4919 4920 /* 4921 * Client device has successfully attached, inform 4922 * the vhci. 4923 */ 4924 vh = ct->ct_vhci; 4925 if (vh->vh_ops->vo_client_attached) 4926 (*vh->vh_ops->vo_client_attached)(dip); 4927 4928 MDI_CLIENT_SET_ATTACH(ct); 4929 break; 4930 4931 case DDI_RESUME: 4932 MDI_DEBUG(2, (CE_NOTE, dip, 4933 "!Client post_attach: called %p\n", (void *)ct)); 4934 if (error == DDI_SUCCESS) { 4935 MDI_CLIENT_SET_RESUME(ct); 4936 } else { 4937 MDI_DEBUG(1, (CE_NOTE, dip, 4938 "!Client post_resume: failed error=%d\n", 4939 error)); 4940 MDI_CLIENT_SET_SUSPEND(ct); 4941 } 4942 break; 4943 } 4944 MDI_CLIENT_UNLOCK(ct); 4945 } 4946 } 4947 4948 /* 4949 * mdi_pre_detach(): 4950 * Pre detach notification handler 4951 */ 4952 /*ARGSUSED*/ 4953 int 4954 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4955 { 4956 int rv = DDI_SUCCESS; 4957 4958 if (MDI_CLIENT(dip)) { 4959 (void) i_mdi_client_pre_detach(dip, cmd); 4960 } 4961 4962 if (MDI_PHCI(dip)) { 4963 rv = i_mdi_phci_pre_detach(dip, cmd); 4964 } 4965 4966 return (rv); 4967 } 4968 4969 /*ARGSUSED*/ 4970 static int 4971 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4972 { 4973 int rv = DDI_SUCCESS; 4974 mdi_phci_t *ph; 4975 mdi_client_t *ct; 4976 mdi_pathinfo_t *pip; 4977 mdi_pathinfo_t *failed_pip = NULL; 4978 mdi_pathinfo_t *next; 4979 4980 ph = i_devi_get_phci(dip); 4981 if (ph == NULL) { 4982 return (rv); 4983 } 4984 4985 MDI_PHCI_LOCK(ph); 4986 switch (cmd) { 4987 case DDI_DETACH: 4988 MDI_DEBUG(2, (CE_NOTE, dip, 4989 "!pHCI pre_detach: called %p\n", (void *)ph)); 4990 if (!MDI_PHCI_IS_OFFLINE(ph)) { 4991 /* 4992 * mdi_pathinfo nodes are still attached to 4993 * this pHCI. Fail the detach for this pHCI. 4994 */ 4995 MDI_DEBUG(2, (CE_WARN, dip, 4996 "!pHCI pre_detach: " 4997 "mdi_pathinfo nodes are still attached " 4998 "%p\n", (void *)ph)); 4999 rv = DDI_FAILURE; 5000 break; 5001 } 5002 MDI_PHCI_SET_DETACH(ph); 5003 break; 5004 5005 case DDI_SUSPEND: 5006 /* 5007 * pHCI is getting suspended. Since mpxio client 5008 * devices may not be suspended at this point, to avoid 5009 * a potential stack overflow, it is important to suspend 5010 * client devices before pHCI can be suspended. 5011 */ 5012 5013 MDI_DEBUG(2, (CE_NOTE, dip, 5014 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5015 /* 5016 * Suspend all the client devices accessible through this pHCI 5017 */ 5018 pip = ph->ph_path_head; 5019 while (pip != NULL && rv == DDI_SUCCESS) { 5020 dev_info_t *cdip; 5021 MDI_PI_LOCK(pip); 5022 next = 5023 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5024 ct = MDI_PI(pip)->pi_client; 5025 i_mdi_client_lock(ct, pip); 5026 cdip = ct->ct_dip; 5027 MDI_PI_UNLOCK(pip); 5028 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5029 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5030 i_mdi_client_unlock(ct); 5031 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5032 DDI_SUCCESS) { 5033 /* 5034 * Suspend of one of the client 5035 * device has failed. 5036 */ 5037 MDI_DEBUG(1, (CE_WARN, dip, 5038 "!Suspend of device (%s%d) failed.", 5039 ddi_driver_name(cdip), 5040 ddi_get_instance(cdip))); 5041 failed_pip = pip; 5042 break; 5043 } 5044 } else { 5045 i_mdi_client_unlock(ct); 5046 } 5047 pip = next; 5048 } 5049 5050 if (rv == DDI_SUCCESS) { 5051 /* 5052 * Suspend of client devices is complete. Proceed 5053 * with pHCI suspend. 5054 */ 5055 MDI_PHCI_SET_SUSPEND(ph); 5056 } else { 5057 /* 5058 * Revert back all the suspended client device states 5059 * to converse. 5060 */ 5061 pip = ph->ph_path_head; 5062 while (pip != failed_pip) { 5063 dev_info_t *cdip; 5064 MDI_PI_LOCK(pip); 5065 next = 5066 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5067 ct = MDI_PI(pip)->pi_client; 5068 i_mdi_client_lock(ct, pip); 5069 cdip = ct->ct_dip; 5070 MDI_PI_UNLOCK(pip); 5071 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5072 i_mdi_client_unlock(ct); 5073 (void) devi_attach(cdip, DDI_RESUME); 5074 } else { 5075 i_mdi_client_unlock(ct); 5076 } 5077 pip = next; 5078 } 5079 } 5080 break; 5081 5082 default: 5083 rv = DDI_FAILURE; 5084 break; 5085 } 5086 MDI_PHCI_UNLOCK(ph); 5087 return (rv); 5088 } 5089 5090 /*ARGSUSED*/ 5091 static int 5092 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5093 { 5094 int rv = DDI_SUCCESS; 5095 mdi_client_t *ct; 5096 5097 ct = i_devi_get_client(dip); 5098 if (ct == NULL) { 5099 return (rv); 5100 } 5101 5102 MDI_CLIENT_LOCK(ct); 5103 switch (cmd) { 5104 case DDI_DETACH: 5105 MDI_DEBUG(2, (CE_NOTE, dip, 5106 "!Client pre_detach: called %p\n", (void *)ct)); 5107 MDI_CLIENT_SET_DETACH(ct); 5108 break; 5109 5110 case DDI_SUSPEND: 5111 MDI_DEBUG(2, (CE_NOTE, dip, 5112 "!Client pre_suspend: called %p\n", (void *)ct)); 5113 MDI_CLIENT_SET_SUSPEND(ct); 5114 break; 5115 5116 default: 5117 rv = DDI_FAILURE; 5118 break; 5119 } 5120 MDI_CLIENT_UNLOCK(ct); 5121 return (rv); 5122 } 5123 5124 /* 5125 * mdi_post_detach(): 5126 * Post detach notification handler 5127 */ 5128 /*ARGSUSED*/ 5129 void 5130 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5131 { 5132 /* 5133 * Detach/Suspend of mpxio component failed. Update our state 5134 * too 5135 */ 5136 if (MDI_PHCI(dip)) 5137 i_mdi_phci_post_detach(dip, cmd, error); 5138 5139 if (MDI_CLIENT(dip)) 5140 i_mdi_client_post_detach(dip, cmd, error); 5141 } 5142 5143 /*ARGSUSED*/ 5144 static void 5145 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5146 { 5147 mdi_phci_t *ph; 5148 5149 /* 5150 * Detach/Suspend of phci component failed. Update our state 5151 * too 5152 */ 5153 ph = i_devi_get_phci(dip); 5154 if (ph == NULL) { 5155 return; 5156 } 5157 5158 MDI_PHCI_LOCK(ph); 5159 /* 5160 * Detach of pHCI failed. Restore back converse 5161 * state 5162 */ 5163 switch (cmd) { 5164 case DDI_DETACH: 5165 MDI_DEBUG(2, (CE_NOTE, dip, 5166 "!pHCI post_detach: called %p\n", (void *)ph)); 5167 if (error != DDI_SUCCESS) 5168 MDI_PHCI_SET_ATTACH(ph); 5169 break; 5170 5171 case DDI_SUSPEND: 5172 MDI_DEBUG(2, (CE_NOTE, dip, 5173 "!pHCI post_suspend: called %p\n", (void *)ph)); 5174 if (error != DDI_SUCCESS) 5175 MDI_PHCI_SET_RESUME(ph); 5176 break; 5177 } 5178 MDI_PHCI_UNLOCK(ph); 5179 } 5180 5181 /*ARGSUSED*/ 5182 static void 5183 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5184 { 5185 mdi_client_t *ct; 5186 5187 ct = i_devi_get_client(dip); 5188 if (ct == NULL) { 5189 return; 5190 } 5191 MDI_CLIENT_LOCK(ct); 5192 /* 5193 * Detach of Client failed. Restore back converse 5194 * state 5195 */ 5196 switch (cmd) { 5197 case DDI_DETACH: 5198 MDI_DEBUG(2, (CE_NOTE, dip, 5199 "!Client post_detach: called %p\n", (void *)ct)); 5200 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5201 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5202 "i_mdi_pm_rele_client\n")); 5203 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5204 } else { 5205 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5206 "i_mdi_pm_reset_client\n")); 5207 i_mdi_pm_reset_client(ct); 5208 } 5209 if (error != DDI_SUCCESS) 5210 MDI_CLIENT_SET_ATTACH(ct); 5211 break; 5212 5213 case DDI_SUSPEND: 5214 MDI_DEBUG(2, (CE_NOTE, dip, 5215 "!Client post_suspend: called %p\n", (void *)ct)); 5216 if (error != DDI_SUCCESS) 5217 MDI_CLIENT_SET_RESUME(ct); 5218 break; 5219 } 5220 MDI_CLIENT_UNLOCK(ct); 5221 } 5222 5223 int 5224 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 5225 { 5226 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 5227 } 5228 5229 /* 5230 * create and install per-path (client - pHCI) statistics 5231 * I/O stats supported: nread, nwritten, reads, and writes 5232 * Error stats - hard errors, soft errors, & transport errors 5233 */ 5234 int 5235 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 5236 { 5237 kstat_t *kiosp, *kerrsp; 5238 struct pi_errs *nsp; 5239 struct mdi_pi_kstats *mdi_statp; 5240 5241 if (MDI_PI(pip)->pi_kstats != NULL) 5242 return (MDI_SUCCESS); 5243 5244 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5245 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 5246 return (MDI_FAILURE); 5247 } 5248 5249 (void) strcat(ksname, ",err"); 5250 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5251 KSTAT_TYPE_NAMED, 5252 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5253 if (kerrsp == NULL) { 5254 kstat_delete(kiosp); 5255 return (MDI_FAILURE); 5256 } 5257 5258 nsp = (struct pi_errs *)kerrsp->ks_data; 5259 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5260 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5261 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5262 KSTAT_DATA_UINT32); 5263 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5264 KSTAT_DATA_UINT32); 5265 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5266 KSTAT_DATA_UINT32); 5267 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5268 KSTAT_DATA_UINT32); 5269 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5270 KSTAT_DATA_UINT32); 5271 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5272 KSTAT_DATA_UINT32); 5273 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5274 KSTAT_DATA_UINT32); 5275 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5276 5277 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5278 mdi_statp->pi_kstat_ref = 1; 5279 mdi_statp->pi_kstat_iostats = kiosp; 5280 mdi_statp->pi_kstat_errstats = kerrsp; 5281 kstat_install(kiosp); 5282 kstat_install(kerrsp); 5283 MDI_PI(pip)->pi_kstats = mdi_statp; 5284 return (MDI_SUCCESS); 5285 } 5286 5287 /* 5288 * destroy per-path properties 5289 */ 5290 static void 5291 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5292 { 5293 5294 struct mdi_pi_kstats *mdi_statp; 5295 5296 if (MDI_PI(pip)->pi_kstats == NULL) 5297 return; 5298 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5299 return; 5300 5301 MDI_PI(pip)->pi_kstats = NULL; 5302 5303 /* 5304 * the kstat may be shared between multiple pathinfo nodes 5305 * decrement this pathinfo's usage, removing the kstats 5306 * themselves when the last pathinfo reference is removed. 5307 */ 5308 ASSERT(mdi_statp->pi_kstat_ref > 0); 5309 if (--mdi_statp->pi_kstat_ref != 0) 5310 return; 5311 5312 kstat_delete(mdi_statp->pi_kstat_iostats); 5313 kstat_delete(mdi_statp->pi_kstat_errstats); 5314 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5315 } 5316 5317 /* 5318 * update I/O paths KSTATS 5319 */ 5320 void 5321 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5322 { 5323 kstat_t *iostatp; 5324 size_t xfer_cnt; 5325 5326 ASSERT(pip != NULL); 5327 5328 /* 5329 * I/O can be driven across a path prior to having path 5330 * statistics available, i.e. probe(9e). 5331 */ 5332 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5333 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5334 xfer_cnt = bp->b_bcount - bp->b_resid; 5335 if (bp->b_flags & B_READ) { 5336 KSTAT_IO_PTR(iostatp)->reads++; 5337 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5338 } else { 5339 KSTAT_IO_PTR(iostatp)->writes++; 5340 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5341 } 5342 } 5343 } 5344 5345 /* 5346 * Enable the path(specific client/target/initiator) 5347 * Enabling a path means that MPxIO may select the enabled path for routing 5348 * future I/O requests, subject to other path state constraints. 5349 */ 5350 int 5351 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5352 { 5353 mdi_phci_t *ph; 5354 5355 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5356 if (ph == NULL) { 5357 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5358 " failed. pip: %p ph = NULL\n", (void *)pip)); 5359 return (MDI_FAILURE); 5360 } 5361 5362 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5363 MDI_ENABLE_OP); 5364 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5365 " Returning success pip = %p. ph = %p\n", 5366 (void *)pip, (void *)ph)); 5367 return (MDI_SUCCESS); 5368 5369 } 5370 5371 /* 5372 * Disable the path (specific client/target/initiator) 5373 * Disabling a path means that MPxIO will not select the disabled path for 5374 * routing any new I/O requests. 5375 */ 5376 int 5377 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5378 { 5379 mdi_phci_t *ph; 5380 5381 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5382 if (ph == NULL) { 5383 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5384 " failed. pip: %p ph = NULL\n", (void *)pip)); 5385 return (MDI_FAILURE); 5386 } 5387 5388 (void) i_mdi_enable_disable_path(pip, 5389 ph->ph_vhci, flags, MDI_DISABLE_OP); 5390 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5391 "Returning success pip = %p. ph = %p", 5392 (void *)pip, (void *)ph)); 5393 return (MDI_SUCCESS); 5394 } 5395 5396 /* 5397 * disable the path to a particular pHCI (pHCI specified in the phci_path 5398 * argument) for a particular client (specified in the client_path argument). 5399 * Disabling a path means that MPxIO will not select the disabled path for 5400 * routing any new I/O requests. 5401 * NOTE: this will be removed once the NWS files are changed to use the new 5402 * mdi_{enable,disable}_path interfaces 5403 */ 5404 int 5405 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5406 { 5407 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5408 } 5409 5410 /* 5411 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5412 * argument) for a particular client (specified in the client_path argument). 5413 * Enabling a path means that MPxIO may select the enabled path for routing 5414 * future I/O requests, subject to other path state constraints. 5415 * NOTE: this will be removed once the NWS files are changed to use the new 5416 * mdi_{enable,disable}_path interfaces 5417 */ 5418 5419 int 5420 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5421 { 5422 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5423 } 5424 5425 /* 5426 * Common routine for doing enable/disable. 5427 */ 5428 static mdi_pathinfo_t * 5429 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5430 int op) 5431 { 5432 int sync_flag = 0; 5433 int rv; 5434 mdi_pathinfo_t *next; 5435 int (*f)() = NULL; 5436 5437 f = vh->vh_ops->vo_pi_state_change; 5438 5439 sync_flag = (flags << 8) & 0xf00; 5440 5441 /* 5442 * Do a callback into the mdi consumer to let it 5443 * know that path is about to get enabled/disabled. 5444 */ 5445 if (f != NULL) { 5446 rv = (*f)(vh->vh_dip, pip, 0, 5447 MDI_PI_EXT_STATE(pip), 5448 MDI_EXT_STATE_CHANGE | sync_flag | 5449 op | MDI_BEFORE_STATE_CHANGE); 5450 if (rv != MDI_SUCCESS) { 5451 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5452 "!vo_pi_state_change: failed rv = %x", rv)); 5453 } 5454 } 5455 MDI_PI_LOCK(pip); 5456 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5457 5458 switch (flags) { 5459 case USER_DISABLE: 5460 if (op == MDI_DISABLE_OP) { 5461 MDI_PI_SET_USER_DISABLE(pip); 5462 } else { 5463 MDI_PI_SET_USER_ENABLE(pip); 5464 } 5465 break; 5466 case DRIVER_DISABLE: 5467 if (op == MDI_DISABLE_OP) { 5468 MDI_PI_SET_DRV_DISABLE(pip); 5469 } else { 5470 MDI_PI_SET_DRV_ENABLE(pip); 5471 } 5472 break; 5473 case DRIVER_DISABLE_TRANSIENT: 5474 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5475 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5476 } else { 5477 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5478 } 5479 break; 5480 } 5481 MDI_PI_UNLOCK(pip); 5482 /* 5483 * Do a callback into the mdi consumer to let it 5484 * know that path is now enabled/disabled. 5485 */ 5486 if (f != NULL) { 5487 rv = (*f)(vh->vh_dip, pip, 0, 5488 MDI_PI_EXT_STATE(pip), 5489 MDI_EXT_STATE_CHANGE | sync_flag | 5490 op | MDI_AFTER_STATE_CHANGE); 5491 if (rv != MDI_SUCCESS) { 5492 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5493 "!vo_pi_state_change: failed rv = %x", rv)); 5494 } 5495 } 5496 return (next); 5497 } 5498 5499 /* 5500 * Common routine for doing enable/disable. 5501 * NOTE: this will be removed once the NWS files are changed to use the new 5502 * mdi_{enable,disable}_path has been putback 5503 */ 5504 int 5505 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5506 { 5507 5508 mdi_phci_t *ph; 5509 mdi_vhci_t *vh = NULL; 5510 mdi_client_t *ct; 5511 mdi_pathinfo_t *next, *pip; 5512 int found_it; 5513 5514 ph = i_devi_get_phci(pdip); 5515 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5516 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5517 (void *)cdip)); 5518 if (ph == NULL) { 5519 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5520 "Op %d failed. ph = NULL\n", op)); 5521 return (MDI_FAILURE); 5522 } 5523 5524 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5525 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5526 "Op Invalid operation = %d\n", op)); 5527 return (MDI_FAILURE); 5528 } 5529 5530 vh = ph->ph_vhci; 5531 5532 if (cdip == NULL) { 5533 /* 5534 * Need to mark the Phci as enabled/disabled. 5535 */ 5536 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5537 "Op %d for the phci\n", op)); 5538 MDI_PHCI_LOCK(ph); 5539 switch (flags) { 5540 case USER_DISABLE: 5541 if (op == MDI_DISABLE_OP) { 5542 MDI_PHCI_SET_USER_DISABLE(ph); 5543 } else { 5544 MDI_PHCI_SET_USER_ENABLE(ph); 5545 } 5546 break; 5547 case DRIVER_DISABLE: 5548 if (op == MDI_DISABLE_OP) { 5549 MDI_PHCI_SET_DRV_DISABLE(ph); 5550 } else { 5551 MDI_PHCI_SET_DRV_ENABLE(ph); 5552 } 5553 break; 5554 case DRIVER_DISABLE_TRANSIENT: 5555 if (op == MDI_DISABLE_OP) { 5556 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5557 } else { 5558 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5559 } 5560 break; 5561 default: 5562 MDI_PHCI_UNLOCK(ph); 5563 MDI_DEBUG(1, (CE_NOTE, NULL, 5564 "!i_mdi_pi_enable_disable:" 5565 " Invalid flag argument= %d\n", flags)); 5566 } 5567 5568 /* 5569 * Phci has been disabled. Now try to enable/disable 5570 * path info's to each client. 5571 */ 5572 pip = ph->ph_path_head; 5573 while (pip != NULL) { 5574 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5575 } 5576 MDI_PHCI_UNLOCK(ph); 5577 } else { 5578 5579 /* 5580 * Disable a specific client. 5581 */ 5582 ct = i_devi_get_client(cdip); 5583 if (ct == NULL) { 5584 MDI_DEBUG(1, (CE_NOTE, NULL, 5585 "!i_mdi_pi_enable_disable:" 5586 " failed. ct = NULL operation = %d\n", op)); 5587 return (MDI_FAILURE); 5588 } 5589 5590 MDI_CLIENT_LOCK(ct); 5591 pip = ct->ct_path_head; 5592 found_it = 0; 5593 while (pip != NULL) { 5594 MDI_PI_LOCK(pip); 5595 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5596 if (MDI_PI(pip)->pi_phci == ph) { 5597 MDI_PI_UNLOCK(pip); 5598 found_it = 1; 5599 break; 5600 } 5601 MDI_PI_UNLOCK(pip); 5602 pip = next; 5603 } 5604 5605 5606 MDI_CLIENT_UNLOCK(ct); 5607 if (found_it == 0) { 5608 MDI_DEBUG(1, (CE_NOTE, NULL, 5609 "!i_mdi_pi_enable_disable:" 5610 " failed. Could not find corresponding pip\n")); 5611 return (MDI_FAILURE); 5612 } 5613 5614 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5615 } 5616 5617 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5618 "Op %d Returning success pdip = %p cdip = %p\n", 5619 op, (void *)pdip, (void *)cdip)); 5620 return (MDI_SUCCESS); 5621 } 5622 5623 /* 5624 * Ensure phci powered up 5625 */ 5626 static void 5627 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5628 { 5629 dev_info_t *ph_dip; 5630 5631 ASSERT(pip != NULL); 5632 ASSERT(MDI_PI_LOCKED(pip)); 5633 5634 if (MDI_PI(pip)->pi_pm_held) { 5635 return; 5636 } 5637 5638 ph_dip = mdi_pi_get_phci(pip); 5639 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5640 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5641 if (ph_dip == NULL) { 5642 return; 5643 } 5644 5645 MDI_PI_UNLOCK(pip); 5646 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5647 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5648 5649 pm_hold_power(ph_dip); 5650 5651 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5652 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5653 MDI_PI_LOCK(pip); 5654 5655 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 5656 if (DEVI(ph_dip)->devi_pm_info) 5657 MDI_PI(pip)->pi_pm_held = 1; 5658 } 5659 5660 /* 5661 * Allow phci powered down 5662 */ 5663 static void 5664 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5665 { 5666 dev_info_t *ph_dip = NULL; 5667 5668 ASSERT(pip != NULL); 5669 ASSERT(MDI_PI_LOCKED(pip)); 5670 5671 if (MDI_PI(pip)->pi_pm_held == 0) { 5672 return; 5673 } 5674 5675 ph_dip = mdi_pi_get_phci(pip); 5676 ASSERT(ph_dip != NULL); 5677 5678 MDI_PI_UNLOCK(pip); 5679 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5680 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5681 5682 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5683 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5684 pm_rele_power(ph_dip); 5685 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5686 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5687 5688 MDI_PI_LOCK(pip); 5689 MDI_PI(pip)->pi_pm_held = 0; 5690 } 5691 5692 static void 5693 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5694 { 5695 ASSERT(MDI_CLIENT_LOCKED(ct)); 5696 5697 ct->ct_power_cnt += incr; 5698 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5699 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5700 ct->ct_power_cnt, incr)); 5701 ASSERT(ct->ct_power_cnt >= 0); 5702 } 5703 5704 static void 5705 i_mdi_rele_all_phci(mdi_client_t *ct) 5706 { 5707 mdi_pathinfo_t *pip; 5708 5709 ASSERT(MDI_CLIENT_LOCKED(ct)); 5710 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5711 while (pip != NULL) { 5712 mdi_hold_path(pip); 5713 MDI_PI_LOCK(pip); 5714 i_mdi_pm_rele_pip(pip); 5715 MDI_PI_UNLOCK(pip); 5716 mdi_rele_path(pip); 5717 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5718 } 5719 } 5720 5721 static void 5722 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5723 { 5724 ASSERT(MDI_CLIENT_LOCKED(ct)); 5725 5726 if (i_ddi_devi_attached(ct->ct_dip)) { 5727 ct->ct_power_cnt -= decr; 5728 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 5729 "ct_power_cnt = %d decr = %d\n", 5730 (void *)ct, ct->ct_power_cnt, decr)); 5731 } 5732 5733 ASSERT(ct->ct_power_cnt >= 0); 5734 if (ct->ct_power_cnt == 0) { 5735 i_mdi_rele_all_phci(ct); 5736 return; 5737 } 5738 } 5739 5740 static void 5741 i_mdi_pm_reset_client(mdi_client_t *ct) 5742 { 5743 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 5744 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 5745 ASSERT(MDI_CLIENT_LOCKED(ct)); 5746 ct->ct_power_cnt = 0; 5747 i_mdi_rele_all_phci(ct); 5748 ct->ct_powercnt_config = 0; 5749 ct->ct_powercnt_unconfig = 0; 5750 ct->ct_powercnt_reset = 1; 5751 } 5752 5753 static int 5754 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5755 { 5756 int ret; 5757 dev_info_t *ph_dip; 5758 5759 MDI_PI_LOCK(pip); 5760 i_mdi_pm_hold_pip(pip); 5761 5762 ph_dip = mdi_pi_get_phci(pip); 5763 MDI_PI_UNLOCK(pip); 5764 5765 /* bring all components of phci to full power */ 5766 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5767 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 5768 ddi_get_instance(ph_dip), (void *)pip)); 5769 5770 ret = pm_powerup(ph_dip); 5771 5772 if (ret == DDI_FAILURE) { 5773 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5774 "pm_powerup FAILED for %s%d %p\n", 5775 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 5776 (void *)pip)); 5777 5778 MDI_PI_LOCK(pip); 5779 i_mdi_pm_rele_pip(pip); 5780 MDI_PI_UNLOCK(pip); 5781 return (MDI_FAILURE); 5782 } 5783 5784 return (MDI_SUCCESS); 5785 } 5786 5787 static int 5788 i_mdi_power_all_phci(mdi_client_t *ct) 5789 { 5790 mdi_pathinfo_t *pip; 5791 int succeeded = 0; 5792 5793 ASSERT(MDI_CLIENT_LOCKED(ct)); 5794 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5795 while (pip != NULL) { 5796 /* 5797 * Don't power if MDI_PATHINFO_STATE_FAULT 5798 * or MDI_PATHINFO_STATE_OFFLINE. 5799 */ 5800 if (MDI_PI_IS_INIT(pip) || 5801 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 5802 mdi_hold_path(pip); 5803 MDI_CLIENT_UNLOCK(ct); 5804 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5805 succeeded = 1; 5806 5807 ASSERT(ct == MDI_PI(pip)->pi_client); 5808 MDI_CLIENT_LOCK(ct); 5809 mdi_rele_path(pip); 5810 } 5811 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5812 } 5813 5814 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5815 } 5816 5817 /* 5818 * mdi_bus_power(): 5819 * 1. Place the phci(s) into powered up state so that 5820 * client can do power management 5821 * 2. Ensure phci powered up as client power managing 5822 * Return Values: 5823 * MDI_SUCCESS 5824 * MDI_FAILURE 5825 */ 5826 int 5827 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5828 void *arg, void *result) 5829 { 5830 int ret = MDI_SUCCESS; 5831 pm_bp_child_pwrchg_t *bpc; 5832 mdi_client_t *ct; 5833 dev_info_t *cdip; 5834 pm_bp_has_changed_t *bphc; 5835 5836 /* 5837 * BUS_POWER_NOINVOL not supported 5838 */ 5839 if (op == BUS_POWER_NOINVOL) 5840 return (MDI_FAILURE); 5841 5842 /* 5843 * ignore other OPs. 5844 * return quickly to save cou cycles on the ct processing 5845 */ 5846 switch (op) { 5847 case BUS_POWER_PRE_NOTIFICATION: 5848 case BUS_POWER_POST_NOTIFICATION: 5849 bpc = (pm_bp_child_pwrchg_t *)arg; 5850 cdip = bpc->bpc_dip; 5851 break; 5852 case BUS_POWER_HAS_CHANGED: 5853 bphc = (pm_bp_has_changed_t *)arg; 5854 cdip = bphc->bphc_dip; 5855 break; 5856 default: 5857 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5858 } 5859 5860 ASSERT(MDI_CLIENT(cdip)); 5861 5862 ct = i_devi_get_client(cdip); 5863 if (ct == NULL) 5864 return (MDI_FAILURE); 5865 5866 /* 5867 * wait till the mdi_pathinfo node state change are processed 5868 */ 5869 MDI_CLIENT_LOCK(ct); 5870 switch (op) { 5871 case BUS_POWER_PRE_NOTIFICATION: 5872 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5873 "BUS_POWER_PRE_NOTIFICATION:" 5874 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5875 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5876 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5877 5878 /* serialize power level change per client */ 5879 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5880 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5881 5882 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5883 5884 if (ct->ct_power_cnt == 0) { 5885 ret = i_mdi_power_all_phci(ct); 5886 } 5887 5888 /* 5889 * if new_level > 0: 5890 * - hold phci(s) 5891 * - power up phci(s) if not already 5892 * ignore power down 5893 */ 5894 if (bpc->bpc_nlevel > 0) { 5895 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5896 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5897 "mdi_bus_power i_mdi_pm_hold_client\n")); 5898 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5899 } 5900 } 5901 break; 5902 case BUS_POWER_POST_NOTIFICATION: 5903 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5904 "BUS_POWER_POST_NOTIFICATION:" 5905 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5906 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5907 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5908 *(int *)result)); 5909 5910 if (*(int *)result == DDI_SUCCESS) { 5911 if (bpc->bpc_nlevel > 0) { 5912 MDI_CLIENT_SET_POWER_UP(ct); 5913 } else { 5914 MDI_CLIENT_SET_POWER_DOWN(ct); 5915 } 5916 } 5917 5918 /* release the hold we did in pre-notification */ 5919 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5920 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5921 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5922 "mdi_bus_power i_mdi_pm_rele_client\n")); 5923 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5924 } 5925 5926 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5927 /* another thread might started attaching */ 5928 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5929 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5930 "mdi_bus_power i_mdi_pm_rele_client\n")); 5931 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5932 /* detaching has been taken care in pm_post_unconfig */ 5933 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5934 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5935 "mdi_bus_power i_mdi_pm_reset_client\n")); 5936 i_mdi_pm_reset_client(ct); 5937 } 5938 } 5939 5940 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5941 cv_broadcast(&ct->ct_powerchange_cv); 5942 5943 break; 5944 5945 /* need to do more */ 5946 case BUS_POWER_HAS_CHANGED: 5947 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5948 "BUS_POWER_HAS_CHANGED:" 5949 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5950 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5951 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 5952 5953 if (bphc->bphc_nlevel > 0 && 5954 bphc->bphc_nlevel > bphc->bphc_olevel) { 5955 if (ct->ct_power_cnt == 0) { 5956 ret = i_mdi_power_all_phci(ct); 5957 } 5958 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5959 "mdi_bus_power i_mdi_pm_hold_client\n")); 5960 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5961 } 5962 5963 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 5964 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 5965 "mdi_bus_power i_mdi_pm_rele_client\n")); 5966 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5967 } 5968 break; 5969 } 5970 5971 MDI_CLIENT_UNLOCK(ct); 5972 return (ret); 5973 } 5974 5975 static int 5976 i_mdi_pm_pre_config_one(dev_info_t *child) 5977 { 5978 int ret = MDI_SUCCESS; 5979 mdi_client_t *ct; 5980 5981 ct = i_devi_get_client(child); 5982 if (ct == NULL) 5983 return (MDI_FAILURE); 5984 5985 MDI_CLIENT_LOCK(ct); 5986 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5987 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5988 5989 if (!MDI_CLIENT_IS_FAILED(ct)) { 5990 MDI_CLIENT_UNLOCK(ct); 5991 MDI_DEBUG(4, (CE_NOTE, child, 5992 "i_mdi_pm_pre_config_one already configured\n")); 5993 return (MDI_SUCCESS); 5994 } 5995 5996 if (ct->ct_powercnt_config) { 5997 MDI_CLIENT_UNLOCK(ct); 5998 MDI_DEBUG(4, (CE_NOTE, child, 5999 "i_mdi_pm_pre_config_one ALREADY held\n")); 6000 return (MDI_SUCCESS); 6001 } 6002 6003 if (ct->ct_power_cnt == 0) { 6004 ret = i_mdi_power_all_phci(ct); 6005 } 6006 MDI_DEBUG(4, (CE_NOTE, child, 6007 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6008 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6009 ct->ct_powercnt_config = 1; 6010 ct->ct_powercnt_reset = 0; 6011 MDI_CLIENT_UNLOCK(ct); 6012 return (ret); 6013 } 6014 6015 static int 6016 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6017 { 6018 int ret = MDI_SUCCESS; 6019 dev_info_t *cdip; 6020 int circ; 6021 6022 ASSERT(MDI_VHCI(vdip)); 6023 6024 /* ndi_devi_config_one */ 6025 if (child) { 6026 ASSERT(DEVI_BUSY_OWNED(vdip)); 6027 return (i_mdi_pm_pre_config_one(child)); 6028 } 6029 6030 /* devi_config_common */ 6031 ndi_devi_enter(vdip, &circ); 6032 cdip = ddi_get_child(vdip); 6033 while (cdip) { 6034 dev_info_t *next = ddi_get_next_sibling(cdip); 6035 6036 ret = i_mdi_pm_pre_config_one(cdip); 6037 if (ret != MDI_SUCCESS) 6038 break; 6039 cdip = next; 6040 } 6041 ndi_devi_exit(vdip, circ); 6042 return (ret); 6043 } 6044 6045 static int 6046 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6047 { 6048 int ret = MDI_SUCCESS; 6049 mdi_client_t *ct; 6050 6051 ct = i_devi_get_client(child); 6052 if (ct == NULL) 6053 return (MDI_FAILURE); 6054 6055 MDI_CLIENT_LOCK(ct); 6056 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6057 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6058 6059 if (!i_ddi_devi_attached(ct->ct_dip)) { 6060 MDI_DEBUG(4, (CE_NOTE, child, 6061 "i_mdi_pm_pre_unconfig node detached already\n")); 6062 MDI_CLIENT_UNLOCK(ct); 6063 return (MDI_SUCCESS); 6064 } 6065 6066 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6067 (flags & NDI_AUTODETACH)) { 6068 MDI_DEBUG(4, (CE_NOTE, child, 6069 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6070 MDI_CLIENT_UNLOCK(ct); 6071 return (MDI_FAILURE); 6072 } 6073 6074 if (ct->ct_powercnt_unconfig) { 6075 MDI_DEBUG(4, (CE_NOTE, child, 6076 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6077 MDI_CLIENT_UNLOCK(ct); 6078 *held = 1; 6079 return (MDI_SUCCESS); 6080 } 6081 6082 if (ct->ct_power_cnt == 0) { 6083 ret = i_mdi_power_all_phci(ct); 6084 } 6085 MDI_DEBUG(4, (CE_NOTE, child, 6086 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6087 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6088 ct->ct_powercnt_unconfig = 1; 6089 ct->ct_powercnt_reset = 0; 6090 MDI_CLIENT_UNLOCK(ct); 6091 if (ret == MDI_SUCCESS) 6092 *held = 1; 6093 return (ret); 6094 } 6095 6096 static int 6097 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6098 int flags) 6099 { 6100 int ret = MDI_SUCCESS; 6101 dev_info_t *cdip; 6102 int circ; 6103 6104 ASSERT(MDI_VHCI(vdip)); 6105 *held = 0; 6106 6107 /* ndi_devi_unconfig_one */ 6108 if (child) { 6109 ASSERT(DEVI_BUSY_OWNED(vdip)); 6110 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6111 } 6112 6113 /* devi_unconfig_common */ 6114 ndi_devi_enter(vdip, &circ); 6115 cdip = ddi_get_child(vdip); 6116 while (cdip) { 6117 dev_info_t *next = ddi_get_next_sibling(cdip); 6118 6119 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6120 cdip = next; 6121 } 6122 ndi_devi_exit(vdip, circ); 6123 6124 if (*held) 6125 ret = MDI_SUCCESS; 6126 6127 return (ret); 6128 } 6129 6130 static void 6131 i_mdi_pm_post_config_one(dev_info_t *child) 6132 { 6133 mdi_client_t *ct; 6134 6135 ct = i_devi_get_client(child); 6136 if (ct == NULL) 6137 return; 6138 6139 MDI_CLIENT_LOCK(ct); 6140 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6141 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6142 6143 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6144 MDI_DEBUG(4, (CE_NOTE, child, 6145 "i_mdi_pm_post_config_one NOT configured\n")); 6146 MDI_CLIENT_UNLOCK(ct); 6147 return; 6148 } 6149 6150 /* client has not been updated */ 6151 if (MDI_CLIENT_IS_FAILED(ct)) { 6152 MDI_DEBUG(4, (CE_NOTE, child, 6153 "i_mdi_pm_post_config_one NOT configured\n")); 6154 MDI_CLIENT_UNLOCK(ct); 6155 return; 6156 } 6157 6158 /* another thread might have powered it down or detached it */ 6159 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6160 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6161 (!i_ddi_devi_attached(ct->ct_dip) && 6162 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6163 MDI_DEBUG(4, (CE_NOTE, child, 6164 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6165 i_mdi_pm_reset_client(ct); 6166 } else { 6167 mdi_pathinfo_t *pip, *next; 6168 int valid_path_count = 0; 6169 6170 MDI_DEBUG(4, (CE_NOTE, child, 6171 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6172 pip = ct->ct_path_head; 6173 while (pip != NULL) { 6174 MDI_PI_LOCK(pip); 6175 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6176 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6177 valid_path_count ++; 6178 MDI_PI_UNLOCK(pip); 6179 pip = next; 6180 } 6181 i_mdi_pm_rele_client(ct, valid_path_count); 6182 } 6183 ct->ct_powercnt_config = 0; 6184 MDI_CLIENT_UNLOCK(ct); 6185 } 6186 6187 static void 6188 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6189 { 6190 int circ; 6191 dev_info_t *cdip; 6192 6193 ASSERT(MDI_VHCI(vdip)); 6194 6195 /* ndi_devi_config_one */ 6196 if (child) { 6197 ASSERT(DEVI_BUSY_OWNED(vdip)); 6198 i_mdi_pm_post_config_one(child); 6199 return; 6200 } 6201 6202 /* devi_config_common */ 6203 ndi_devi_enter(vdip, &circ); 6204 cdip = ddi_get_child(vdip); 6205 while (cdip) { 6206 dev_info_t *next = ddi_get_next_sibling(cdip); 6207 6208 i_mdi_pm_post_config_one(cdip); 6209 cdip = next; 6210 } 6211 ndi_devi_exit(vdip, circ); 6212 } 6213 6214 static void 6215 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6216 { 6217 mdi_client_t *ct; 6218 6219 ct = i_devi_get_client(child); 6220 if (ct == NULL) 6221 return; 6222 6223 MDI_CLIENT_LOCK(ct); 6224 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6225 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6226 6227 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6228 MDI_DEBUG(4, (CE_NOTE, child, 6229 "i_mdi_pm_post_unconfig NOT held\n")); 6230 MDI_CLIENT_UNLOCK(ct); 6231 return; 6232 } 6233 6234 /* failure detaching or another thread just attached it */ 6235 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6236 i_ddi_devi_attached(ct->ct_dip)) || 6237 (!i_ddi_devi_attached(ct->ct_dip) && 6238 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6239 MDI_DEBUG(4, (CE_NOTE, child, 6240 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6241 i_mdi_pm_reset_client(ct); 6242 } else { 6243 mdi_pathinfo_t *pip, *next; 6244 int valid_path_count = 0; 6245 6246 MDI_DEBUG(4, (CE_NOTE, child, 6247 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6248 pip = ct->ct_path_head; 6249 while (pip != NULL) { 6250 MDI_PI_LOCK(pip); 6251 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6252 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6253 valid_path_count ++; 6254 MDI_PI_UNLOCK(pip); 6255 pip = next; 6256 } 6257 i_mdi_pm_rele_client(ct, valid_path_count); 6258 ct->ct_powercnt_unconfig = 0; 6259 } 6260 6261 MDI_CLIENT_UNLOCK(ct); 6262 } 6263 6264 static void 6265 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6266 { 6267 int circ; 6268 dev_info_t *cdip; 6269 6270 ASSERT(MDI_VHCI(vdip)); 6271 6272 if (!held) { 6273 MDI_DEBUG(4, (CE_NOTE, vdip, 6274 "i_mdi_pm_post_unconfig held = %d\n", held)); 6275 return; 6276 } 6277 6278 if (child) { 6279 ASSERT(DEVI_BUSY_OWNED(vdip)); 6280 i_mdi_pm_post_unconfig_one(child); 6281 return; 6282 } 6283 6284 ndi_devi_enter(vdip, &circ); 6285 cdip = ddi_get_child(vdip); 6286 while (cdip) { 6287 dev_info_t *next = ddi_get_next_sibling(cdip); 6288 6289 i_mdi_pm_post_unconfig_one(cdip); 6290 cdip = next; 6291 } 6292 ndi_devi_exit(vdip, circ); 6293 } 6294 6295 int 6296 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6297 { 6298 int circ, ret = MDI_SUCCESS; 6299 dev_info_t *client_dip = NULL; 6300 mdi_client_t *ct; 6301 6302 /* 6303 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6304 * Power up pHCI for the named client device. 6305 * Note: Before the client is enumerated under vhci by phci, 6306 * client_dip can be NULL. Then proceed to power up all the 6307 * pHCIs. 6308 */ 6309 if (devnm != NULL) { 6310 ndi_devi_enter(vdip, &circ); 6311 client_dip = ndi_devi_findchild(vdip, devnm); 6312 } 6313 6314 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6315 op, devnm ? devnm : "NULL", (void *)client_dip)); 6316 6317 switch (op) { 6318 case MDI_PM_PRE_CONFIG: 6319 ret = i_mdi_pm_pre_config(vdip, client_dip); 6320 break; 6321 6322 case MDI_PM_PRE_UNCONFIG: 6323 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6324 flags); 6325 break; 6326 6327 case MDI_PM_POST_CONFIG: 6328 i_mdi_pm_post_config(vdip, client_dip); 6329 break; 6330 6331 case MDI_PM_POST_UNCONFIG: 6332 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6333 break; 6334 6335 case MDI_PM_HOLD_POWER: 6336 case MDI_PM_RELE_POWER: 6337 ASSERT(args); 6338 6339 client_dip = (dev_info_t *)args; 6340 ASSERT(MDI_CLIENT(client_dip)); 6341 6342 ct = i_devi_get_client(client_dip); 6343 MDI_CLIENT_LOCK(ct); 6344 6345 if (op == MDI_PM_HOLD_POWER) { 6346 if (ct->ct_power_cnt == 0) { 6347 (void) i_mdi_power_all_phci(ct); 6348 MDI_DEBUG(4, (CE_NOTE, client_dip, 6349 "mdi_power i_mdi_pm_hold_client\n")); 6350 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6351 } 6352 } else { 6353 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6354 MDI_DEBUG(4, (CE_NOTE, client_dip, 6355 "mdi_power i_mdi_pm_rele_client\n")); 6356 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6357 } else { 6358 MDI_DEBUG(4, (CE_NOTE, client_dip, 6359 "mdi_power i_mdi_pm_reset_client\n")); 6360 i_mdi_pm_reset_client(ct); 6361 } 6362 } 6363 6364 MDI_CLIENT_UNLOCK(ct); 6365 break; 6366 6367 default: 6368 break; 6369 } 6370 6371 if (devnm) 6372 ndi_devi_exit(vdip, circ); 6373 6374 return (ret); 6375 } 6376 6377 int 6378 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6379 { 6380 mdi_vhci_t *vhci; 6381 6382 if (!MDI_VHCI(dip)) 6383 return (MDI_FAILURE); 6384 6385 if (mdi_class) { 6386 vhci = DEVI(dip)->devi_mdi_xhci; 6387 ASSERT(vhci); 6388 *mdi_class = vhci->vh_class; 6389 } 6390 6391 return (MDI_SUCCESS); 6392 } 6393 6394 int 6395 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6396 { 6397 mdi_phci_t *phci; 6398 6399 if (!MDI_PHCI(dip)) 6400 return (MDI_FAILURE); 6401 6402 if (mdi_class) { 6403 phci = DEVI(dip)->devi_mdi_xhci; 6404 ASSERT(phci); 6405 *mdi_class = phci->ph_vhci->vh_class; 6406 } 6407 6408 return (MDI_SUCCESS); 6409 } 6410 6411 int 6412 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6413 { 6414 mdi_client_t *client; 6415 6416 if (!MDI_CLIENT(dip)) 6417 return (MDI_FAILURE); 6418 6419 if (mdi_class) { 6420 client = DEVI(dip)->devi_mdi_client; 6421 ASSERT(client); 6422 *mdi_class = client->ct_vhci->vh_class; 6423 } 6424 6425 return (MDI_SUCCESS); 6426 } 6427 6428 void * 6429 mdi_client_get_vhci_private(dev_info_t *dip) 6430 { 6431 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6432 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6433 mdi_client_t *ct; 6434 ct = i_devi_get_client(dip); 6435 return (ct->ct_vprivate); 6436 } 6437 return (NULL); 6438 } 6439 6440 void 6441 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6442 { 6443 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6444 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6445 mdi_client_t *ct; 6446 ct = i_devi_get_client(dip); 6447 ct->ct_vprivate = data; 6448 } 6449 } 6450 /* 6451 * mdi_pi_get_vhci_private(): 6452 * Get the vhci private information associated with the 6453 * mdi_pathinfo node 6454 */ 6455 void * 6456 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6457 { 6458 caddr_t vprivate = NULL; 6459 if (pip) { 6460 vprivate = MDI_PI(pip)->pi_vprivate; 6461 } 6462 return (vprivate); 6463 } 6464 6465 /* 6466 * mdi_pi_set_vhci_private(): 6467 * Set the vhci private information in the mdi_pathinfo node 6468 */ 6469 void 6470 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6471 { 6472 if (pip) { 6473 MDI_PI(pip)->pi_vprivate = priv; 6474 } 6475 } 6476 6477 /* 6478 * mdi_phci_get_vhci_private(): 6479 * Get the vhci private information associated with the 6480 * mdi_phci node 6481 */ 6482 void * 6483 mdi_phci_get_vhci_private(dev_info_t *dip) 6484 { 6485 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6486 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6487 mdi_phci_t *ph; 6488 ph = i_devi_get_phci(dip); 6489 return (ph->ph_vprivate); 6490 } 6491 return (NULL); 6492 } 6493 6494 /* 6495 * mdi_phci_set_vhci_private(): 6496 * Set the vhci private information in the mdi_phci node 6497 */ 6498 void 6499 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6500 { 6501 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6502 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6503 mdi_phci_t *ph; 6504 ph = i_devi_get_phci(dip); 6505 ph->ph_vprivate = priv; 6506 } 6507 } 6508 6509 /* 6510 * List of vhci class names: 6511 * A vhci class name must be in this list only if the corresponding vhci 6512 * driver intends to use the mdi provided bus config implementation 6513 * (i.e., mdi_vhci_bus_config()). 6514 */ 6515 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6516 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6517 6518 /* 6519 * During boot time, the on-disk vhci cache for every vhci class is read 6520 * in the form of an nvlist and stored here. 6521 */ 6522 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6523 6524 /* nvpair names in vhci cache nvlist */ 6525 #define MDI_VHCI_CACHE_VERSION 1 6526 #define MDI_NVPNAME_VERSION "version" 6527 #define MDI_NVPNAME_PHCIS "phcis" 6528 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6529 6530 /* 6531 * Given vhci class name, return its on-disk vhci cache filename. 6532 * Memory for the returned filename which includes the full path is allocated 6533 * by this function. 6534 */ 6535 static char * 6536 vhclass2vhcache_filename(char *vhclass) 6537 { 6538 char *filename; 6539 int len; 6540 static char *fmt = "/etc/devices/mdi_%s_cache"; 6541 6542 /* 6543 * fmt contains the on-disk vhci cache file name format; 6544 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6545 */ 6546 6547 /* the -1 below is to account for "%s" in the format string */ 6548 len = strlen(fmt) + strlen(vhclass) - 1; 6549 filename = kmem_alloc(len, KM_SLEEP); 6550 (void) snprintf(filename, len, fmt, vhclass); 6551 ASSERT(len == (strlen(filename) + 1)); 6552 return (filename); 6553 } 6554 6555 /* 6556 * initialize the vhci cache related data structures and read the on-disk 6557 * vhci cached data into memory. 6558 */ 6559 static void 6560 setup_vhci_cache(mdi_vhci_t *vh) 6561 { 6562 mdi_vhci_config_t *vhc; 6563 mdi_vhci_cache_t *vhcache; 6564 int i; 6565 nvlist_t *nvl = NULL; 6566 6567 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6568 vh->vh_config = vhc; 6569 vhcache = &vhc->vhc_vhcache; 6570 6571 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6572 6573 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6574 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6575 6576 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6577 6578 /* 6579 * Create string hash; same as mod_hash_create_strhash() except that 6580 * we use NULL key destructor. 6581 */ 6582 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6583 mdi_bus_config_cache_hash_size, 6584 mod_hash_null_keydtor, mod_hash_null_valdtor, 6585 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6586 6587 /* 6588 * The on-disk vhci cache is read during booting prior to the 6589 * lights-out period by mdi_read_devices_files(). 6590 */ 6591 for (i = 0; i < N_VHCI_CLASSES; i++) { 6592 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6593 nvl = vhcache_nvl[i]; 6594 vhcache_nvl[i] = NULL; 6595 break; 6596 } 6597 } 6598 6599 /* 6600 * this is to cover the case of some one manually causing unloading 6601 * (or detaching) and reloading (or attaching) of a vhci driver. 6602 */ 6603 if (nvl == NULL && modrootloaded) 6604 nvl = read_on_disk_vhci_cache(vh->vh_class); 6605 6606 if (nvl != NULL) { 6607 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6608 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6609 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6610 else { 6611 cmn_err(CE_WARN, 6612 "%s: data file corrupted, will recreate\n", 6613 vhc->vhc_vhcache_filename); 6614 } 6615 rw_exit(&vhcache->vhcache_lock); 6616 nvlist_free(nvl); 6617 } 6618 6619 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6620 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6621 6622 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6623 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6624 } 6625 6626 /* 6627 * free all vhci cache related resources 6628 */ 6629 static int 6630 destroy_vhci_cache(mdi_vhci_t *vh) 6631 { 6632 mdi_vhci_config_t *vhc = vh->vh_config; 6633 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6634 mdi_vhcache_phci_t *cphci, *cphci_next; 6635 mdi_vhcache_client_t *cct, *cct_next; 6636 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6637 6638 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6639 return (MDI_FAILURE); 6640 6641 kmem_free(vhc->vhc_vhcache_filename, 6642 strlen(vhc->vhc_vhcache_filename) + 1); 6643 6644 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6645 6646 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6647 cphci = cphci_next) { 6648 cphci_next = cphci->cphci_next; 6649 free_vhcache_phci(cphci); 6650 } 6651 6652 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6653 cct_next = cct->cct_next; 6654 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6655 cpi_next = cpi->cpi_next; 6656 free_vhcache_pathinfo(cpi); 6657 } 6658 free_vhcache_client(cct); 6659 } 6660 6661 rw_destroy(&vhcache->vhcache_lock); 6662 6663 mutex_destroy(&vhc->vhc_lock); 6664 cv_destroy(&vhc->vhc_cv); 6665 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6666 return (MDI_SUCCESS); 6667 } 6668 6669 /* 6670 * Stop all vhci cache related async threads and free their resources. 6671 */ 6672 static int 6673 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6674 { 6675 mdi_async_client_config_t *acc, *acc_next; 6676 6677 mutex_enter(&vhc->vhc_lock); 6678 vhc->vhc_flags |= MDI_VHC_EXIT; 6679 ASSERT(vhc->vhc_acc_thrcount >= 0); 6680 cv_broadcast(&vhc->vhc_cv); 6681 6682 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6683 vhc->vhc_acc_thrcount != 0) { 6684 mutex_exit(&vhc->vhc_lock); 6685 delay(1); 6686 mutex_enter(&vhc->vhc_lock); 6687 } 6688 6689 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6690 6691 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6692 acc_next = acc->acc_next; 6693 free_async_client_config(acc); 6694 } 6695 vhc->vhc_acc_list_head = NULL; 6696 vhc->vhc_acc_list_tail = NULL; 6697 vhc->vhc_acc_count = 0; 6698 6699 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6700 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6701 mutex_exit(&vhc->vhc_lock); 6702 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6703 vhcache_dirty(vhc); 6704 return (MDI_FAILURE); 6705 } 6706 } else 6707 mutex_exit(&vhc->vhc_lock); 6708 6709 if (callb_delete(vhc->vhc_cbid) != 0) 6710 return (MDI_FAILURE); 6711 6712 return (MDI_SUCCESS); 6713 } 6714 6715 /* 6716 * Stop vhci cache flush thread 6717 */ 6718 /* ARGSUSED */ 6719 static boolean_t 6720 stop_vhcache_flush_thread(void *arg, int code) 6721 { 6722 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6723 6724 mutex_enter(&vhc->vhc_lock); 6725 vhc->vhc_flags |= MDI_VHC_EXIT; 6726 cv_broadcast(&vhc->vhc_cv); 6727 6728 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6729 mutex_exit(&vhc->vhc_lock); 6730 delay(1); 6731 mutex_enter(&vhc->vhc_lock); 6732 } 6733 6734 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6735 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6736 mutex_exit(&vhc->vhc_lock); 6737 (void) flush_vhcache(vhc, 1); 6738 } else 6739 mutex_exit(&vhc->vhc_lock); 6740 6741 return (B_TRUE); 6742 } 6743 6744 /* 6745 * Enqueue the vhcache phci (cphci) at the tail of the list 6746 */ 6747 static void 6748 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6749 { 6750 cphci->cphci_next = NULL; 6751 if (vhcache->vhcache_phci_head == NULL) 6752 vhcache->vhcache_phci_head = cphci; 6753 else 6754 vhcache->vhcache_phci_tail->cphci_next = cphci; 6755 vhcache->vhcache_phci_tail = cphci; 6756 } 6757 6758 /* 6759 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6760 */ 6761 static void 6762 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6763 mdi_vhcache_pathinfo_t *cpi) 6764 { 6765 cpi->cpi_next = NULL; 6766 if (cct->cct_cpi_head == NULL) 6767 cct->cct_cpi_head = cpi; 6768 else 6769 cct->cct_cpi_tail->cpi_next = cpi; 6770 cct->cct_cpi_tail = cpi; 6771 } 6772 6773 /* 6774 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6775 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6776 * flag set come at the beginning of the list. All cpis which have this 6777 * flag set come at the end of the list. 6778 */ 6779 static void 6780 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6781 mdi_vhcache_pathinfo_t *newcpi) 6782 { 6783 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6784 6785 if (cct->cct_cpi_head == NULL || 6786 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6787 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6788 else { 6789 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6790 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6791 prev_cpi = cpi, cpi = cpi->cpi_next) 6792 ; 6793 6794 if (prev_cpi == NULL) 6795 cct->cct_cpi_head = newcpi; 6796 else 6797 prev_cpi->cpi_next = newcpi; 6798 6799 newcpi->cpi_next = cpi; 6800 6801 if (cpi == NULL) 6802 cct->cct_cpi_tail = newcpi; 6803 } 6804 } 6805 6806 /* 6807 * Enqueue the vhcache client (cct) at the tail of the list 6808 */ 6809 static void 6810 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6811 mdi_vhcache_client_t *cct) 6812 { 6813 cct->cct_next = NULL; 6814 if (vhcache->vhcache_client_head == NULL) 6815 vhcache->vhcache_client_head = cct; 6816 else 6817 vhcache->vhcache_client_tail->cct_next = cct; 6818 vhcache->vhcache_client_tail = cct; 6819 } 6820 6821 static void 6822 free_string_array(char **str, int nelem) 6823 { 6824 int i; 6825 6826 if (str) { 6827 for (i = 0; i < nelem; i++) { 6828 if (str[i]) 6829 kmem_free(str[i], strlen(str[i]) + 1); 6830 } 6831 kmem_free(str, sizeof (char *) * nelem); 6832 } 6833 } 6834 6835 static void 6836 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6837 { 6838 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6839 kmem_free(cphci, sizeof (*cphci)); 6840 } 6841 6842 static void 6843 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6844 { 6845 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6846 kmem_free(cpi, sizeof (*cpi)); 6847 } 6848 6849 static void 6850 free_vhcache_client(mdi_vhcache_client_t *cct) 6851 { 6852 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6853 kmem_free(cct, sizeof (*cct)); 6854 } 6855 6856 static char * 6857 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6858 { 6859 char *name_addr; 6860 int len; 6861 6862 len = strlen(ct_name) + strlen(ct_addr) + 2; 6863 name_addr = kmem_alloc(len, KM_SLEEP); 6864 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6865 6866 if (ret_len) 6867 *ret_len = len; 6868 return (name_addr); 6869 } 6870 6871 /* 6872 * Copy the contents of paddrnvl to vhci cache. 6873 * paddrnvl nvlist contains path information for a vhci client. 6874 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6875 */ 6876 static void 6877 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6878 mdi_vhcache_client_t *cct) 6879 { 6880 nvpair_t *nvp = NULL; 6881 mdi_vhcache_pathinfo_t *cpi; 6882 uint_t nelem; 6883 uint32_t *val; 6884 6885 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6886 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6887 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6888 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6889 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6890 ASSERT(nelem == 2); 6891 cpi->cpi_cphci = cphci_list[val[0]]; 6892 cpi->cpi_flags = val[1]; 6893 enqueue_tail_vhcache_pathinfo(cct, cpi); 6894 } 6895 } 6896 6897 /* 6898 * Copy the contents of caddrmapnvl to vhci cache. 6899 * caddrmapnvl nvlist contains vhci client address to phci client address 6900 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6901 * this nvlist. 6902 */ 6903 static void 6904 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6905 mdi_vhcache_phci_t *cphci_list[]) 6906 { 6907 nvpair_t *nvp = NULL; 6908 nvlist_t *paddrnvl; 6909 mdi_vhcache_client_t *cct; 6910 6911 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6912 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6913 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6914 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6915 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6916 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6917 /* the client must contain at least one path */ 6918 ASSERT(cct->cct_cpi_head != NULL); 6919 6920 enqueue_vhcache_client(vhcache, cct); 6921 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6922 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6923 } 6924 } 6925 6926 /* 6927 * Copy the contents of the main nvlist to vhci cache. 6928 * 6929 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6930 * The nvlist contains the mappings between the vhci client addresses and 6931 * their corresponding phci client addresses. 6932 * 6933 * The structure of the nvlist is as follows: 6934 * 6935 * Main nvlist: 6936 * NAME TYPE DATA 6937 * version int32 version number 6938 * phcis string array array of phci paths 6939 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6940 * 6941 * structure of c2paddrs_nvl: 6942 * NAME TYPE DATA 6943 * caddr1 nvlist_t paddrs_nvl1 6944 * caddr2 nvlist_t paddrs_nvl2 6945 * ... 6946 * where caddr1, caddr2, ... are vhci client name and addresses in the 6947 * form of "<clientname>@<clientaddress>". 6948 * (for example: "ssd@2000002037cd9f72"); 6949 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6950 * 6951 * structure of paddrs_nvl: 6952 * NAME TYPE DATA 6953 * pi_addr1 uint32_array (phci-id, cpi_flags) 6954 * pi_addr2 uint32_array (phci-id, cpi_flags) 6955 * ... 6956 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 6957 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 6958 * phci-ids are integers that identify PHCIs to which the 6959 * the bus specific address belongs to. These integers are used as an index 6960 * into to the phcis string array in the main nvlist to get the PHCI path. 6961 */ 6962 static int 6963 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 6964 { 6965 char **phcis, **phci_namep; 6966 uint_t nphcis; 6967 mdi_vhcache_phci_t *cphci, **cphci_list; 6968 nvlist_t *caddrmapnvl; 6969 int32_t ver; 6970 int i; 6971 size_t cphci_list_size; 6972 6973 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 6974 6975 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 6976 ver != MDI_VHCI_CACHE_VERSION) 6977 return (MDI_FAILURE); 6978 6979 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 6980 &nphcis) != 0) 6981 return (MDI_SUCCESS); 6982 6983 ASSERT(nphcis > 0); 6984 6985 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 6986 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 6987 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 6988 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 6989 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 6990 enqueue_vhcache_phci(vhcache, cphci); 6991 cphci_list[i] = cphci; 6992 } 6993 6994 ASSERT(vhcache->vhcache_phci_head != NULL); 6995 6996 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 6997 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 6998 6999 kmem_free(cphci_list, cphci_list_size); 7000 return (MDI_SUCCESS); 7001 } 7002 7003 /* 7004 * Build paddrnvl for the specified client using the information in the 7005 * vhci cache and add it to the caddrmapnnvl. 7006 * Returns 0 on success, errno on failure. 7007 */ 7008 static int 7009 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7010 nvlist_t *caddrmapnvl) 7011 { 7012 mdi_vhcache_pathinfo_t *cpi; 7013 nvlist_t *nvl; 7014 int err; 7015 uint32_t val[2]; 7016 7017 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7018 7019 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7020 return (err); 7021 7022 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7023 val[0] = cpi->cpi_cphci->cphci_id; 7024 val[1] = cpi->cpi_flags; 7025 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7026 != 0) 7027 goto out; 7028 } 7029 7030 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7031 out: 7032 nvlist_free(nvl); 7033 return (err); 7034 } 7035 7036 /* 7037 * Build caddrmapnvl using the information in the vhci cache 7038 * and add it to the mainnvl. 7039 * Returns 0 on success, errno on failure. 7040 */ 7041 static int 7042 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7043 { 7044 mdi_vhcache_client_t *cct; 7045 nvlist_t *nvl; 7046 int err; 7047 7048 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7049 7050 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7051 return (err); 7052 7053 for (cct = vhcache->vhcache_client_head; cct != NULL; 7054 cct = cct->cct_next) { 7055 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7056 goto out; 7057 } 7058 7059 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7060 out: 7061 nvlist_free(nvl); 7062 return (err); 7063 } 7064 7065 /* 7066 * Build nvlist using the information in the vhci cache. 7067 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7068 * Returns nvl on success, NULL on failure. 7069 */ 7070 static nvlist_t * 7071 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7072 { 7073 mdi_vhcache_phci_t *cphci; 7074 uint_t phci_count; 7075 char **phcis; 7076 nvlist_t *nvl; 7077 int err, i; 7078 7079 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7080 nvl = NULL; 7081 goto out; 7082 } 7083 7084 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7085 MDI_VHCI_CACHE_VERSION)) != 0) 7086 goto out; 7087 7088 rw_enter(&vhcache->vhcache_lock, RW_READER); 7089 if (vhcache->vhcache_phci_head == NULL) { 7090 rw_exit(&vhcache->vhcache_lock); 7091 return (nvl); 7092 } 7093 7094 phci_count = 0; 7095 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7096 cphci = cphci->cphci_next) 7097 cphci->cphci_id = phci_count++; 7098 7099 /* build phci pathname list */ 7100 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7101 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7102 cphci = cphci->cphci_next, i++) 7103 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7104 7105 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7106 phci_count); 7107 free_string_array(phcis, phci_count); 7108 7109 if (err == 0 && 7110 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7111 rw_exit(&vhcache->vhcache_lock); 7112 return (nvl); 7113 } 7114 7115 rw_exit(&vhcache->vhcache_lock); 7116 out: 7117 if (nvl) 7118 nvlist_free(nvl); 7119 return (NULL); 7120 } 7121 7122 /* 7123 * Lookup vhcache phci structure for the specified phci path. 7124 */ 7125 static mdi_vhcache_phci_t * 7126 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7127 { 7128 mdi_vhcache_phci_t *cphci; 7129 7130 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7131 7132 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7133 cphci = cphci->cphci_next) { 7134 if (strcmp(cphci->cphci_path, phci_path) == 0) 7135 return (cphci); 7136 } 7137 7138 return (NULL); 7139 } 7140 7141 /* 7142 * Lookup vhcache phci structure for the specified phci. 7143 */ 7144 static mdi_vhcache_phci_t * 7145 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7146 { 7147 mdi_vhcache_phci_t *cphci; 7148 7149 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7150 7151 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7152 cphci = cphci->cphci_next) { 7153 if (cphci->cphci_phci == ph) 7154 return (cphci); 7155 } 7156 7157 return (NULL); 7158 } 7159 7160 /* 7161 * Add the specified phci to the vhci cache if not already present. 7162 */ 7163 static void 7164 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7165 { 7166 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7167 mdi_vhcache_phci_t *cphci; 7168 char *pathname; 7169 int cache_updated; 7170 7171 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7172 7173 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7174 (void) ddi_pathname(ph->ph_dip, pathname); 7175 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7176 != NULL) { 7177 cphci->cphci_phci = ph; 7178 cache_updated = 0; 7179 } else { 7180 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7181 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7182 cphci->cphci_phci = ph; 7183 enqueue_vhcache_phci(vhcache, cphci); 7184 cache_updated = 1; 7185 } 7186 7187 rw_exit(&vhcache->vhcache_lock); 7188 7189 /* 7190 * Since a new phci has been added, reset 7191 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7192 * during next vhcache_discover_paths(). 7193 */ 7194 mutex_enter(&vhc->vhc_lock); 7195 vhc->vhc_path_discovery_cutoff_time = 0; 7196 mutex_exit(&vhc->vhc_lock); 7197 7198 kmem_free(pathname, MAXPATHLEN); 7199 if (cache_updated) 7200 vhcache_dirty(vhc); 7201 } 7202 7203 /* 7204 * Remove the reference to the specified phci from the vhci cache. 7205 */ 7206 static void 7207 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7208 { 7209 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7210 mdi_vhcache_phci_t *cphci; 7211 7212 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7213 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7214 /* do not remove the actual mdi_vhcache_phci structure */ 7215 cphci->cphci_phci = NULL; 7216 } 7217 rw_exit(&vhcache->vhcache_lock); 7218 } 7219 7220 static void 7221 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7222 mdi_vhcache_lookup_token_t *src) 7223 { 7224 if (src == NULL) { 7225 dst->lt_cct = NULL; 7226 dst->lt_cct_lookup_time = 0; 7227 } else { 7228 dst->lt_cct = src->lt_cct; 7229 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7230 } 7231 } 7232 7233 /* 7234 * Look up vhcache client for the specified client. 7235 */ 7236 static mdi_vhcache_client_t * 7237 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7238 mdi_vhcache_lookup_token_t *token) 7239 { 7240 mod_hash_val_t hv; 7241 char *name_addr; 7242 int len; 7243 7244 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7245 7246 /* 7247 * If no vhcache clean occurred since the last lookup, we can 7248 * simply return the cct from the last lookup operation. 7249 * It works because ccts are never freed except during the vhcache 7250 * cleanup operation. 7251 */ 7252 if (token != NULL && 7253 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7254 return (token->lt_cct); 7255 7256 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7257 if (mod_hash_find(vhcache->vhcache_client_hash, 7258 (mod_hash_key_t)name_addr, &hv) == 0) { 7259 if (token) { 7260 token->lt_cct = (mdi_vhcache_client_t *)hv; 7261 token->lt_cct_lookup_time = lbolt64; 7262 } 7263 } else { 7264 if (token) { 7265 token->lt_cct = NULL; 7266 token->lt_cct_lookup_time = 0; 7267 } 7268 hv = NULL; 7269 } 7270 kmem_free(name_addr, len); 7271 return ((mdi_vhcache_client_t *)hv); 7272 } 7273 7274 /* 7275 * Add the specified path to the vhci cache if not already present. 7276 * Also add the vhcache client for the client corresponding to this path 7277 * if it doesn't already exist. 7278 */ 7279 static void 7280 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7281 { 7282 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7283 mdi_vhcache_client_t *cct; 7284 mdi_vhcache_pathinfo_t *cpi; 7285 mdi_phci_t *ph = pip->pi_phci; 7286 mdi_client_t *ct = pip->pi_client; 7287 int cache_updated = 0; 7288 7289 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7290 7291 /* if vhcache client for this pip doesn't already exist, add it */ 7292 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7293 NULL)) == NULL) { 7294 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7295 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7296 ct->ct_guid, NULL); 7297 enqueue_vhcache_client(vhcache, cct); 7298 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7299 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7300 cache_updated = 1; 7301 } 7302 7303 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7304 if (cpi->cpi_cphci->cphci_phci == ph && 7305 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7306 cpi->cpi_pip = pip; 7307 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7308 cpi->cpi_flags &= 7309 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7310 sort_vhcache_paths(cct); 7311 cache_updated = 1; 7312 } 7313 break; 7314 } 7315 } 7316 7317 if (cpi == NULL) { 7318 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7319 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7320 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7321 ASSERT(cpi->cpi_cphci != NULL); 7322 cpi->cpi_pip = pip; 7323 enqueue_vhcache_pathinfo(cct, cpi); 7324 cache_updated = 1; 7325 } 7326 7327 rw_exit(&vhcache->vhcache_lock); 7328 7329 if (cache_updated) 7330 vhcache_dirty(vhc); 7331 } 7332 7333 /* 7334 * Remove the reference to the specified path from the vhci cache. 7335 */ 7336 static void 7337 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7338 { 7339 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7340 mdi_client_t *ct = pip->pi_client; 7341 mdi_vhcache_client_t *cct; 7342 mdi_vhcache_pathinfo_t *cpi; 7343 7344 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7345 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7346 NULL)) != NULL) { 7347 for (cpi = cct->cct_cpi_head; cpi != NULL; 7348 cpi = cpi->cpi_next) { 7349 if (cpi->cpi_pip == pip) { 7350 cpi->cpi_pip = NULL; 7351 break; 7352 } 7353 } 7354 } 7355 rw_exit(&vhcache->vhcache_lock); 7356 } 7357 7358 /* 7359 * Flush the vhci cache to disk. 7360 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7361 */ 7362 static int 7363 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7364 { 7365 nvlist_t *nvl; 7366 int err; 7367 int rv; 7368 7369 /* 7370 * It is possible that the system may shutdown before 7371 * i_ddi_io_initialized (during stmsboot for example). To allow for 7372 * flushing the cache in this case do not check for 7373 * i_ddi_io_initialized when force flag is set. 7374 */ 7375 if (force_flag == 0 && !i_ddi_io_initialized()) 7376 return (MDI_FAILURE); 7377 7378 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7379 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7380 nvlist_free(nvl); 7381 } else 7382 err = EFAULT; 7383 7384 rv = MDI_SUCCESS; 7385 mutex_enter(&vhc->vhc_lock); 7386 if (err != 0) { 7387 if (err == EROFS) { 7388 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7389 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7390 MDI_VHC_VHCACHE_DIRTY); 7391 } else { 7392 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7393 cmn_err(CE_CONT, "%s: update failed\n", 7394 vhc->vhc_vhcache_filename); 7395 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7396 } 7397 rv = MDI_FAILURE; 7398 } 7399 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7400 cmn_err(CE_CONT, 7401 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7402 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7403 } 7404 mutex_exit(&vhc->vhc_lock); 7405 7406 return (rv); 7407 } 7408 7409 /* 7410 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7411 * Exits itself if left idle for the idle timeout period. 7412 */ 7413 static void 7414 vhcache_flush_thread(void *arg) 7415 { 7416 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7417 clock_t idle_time, quit_at_ticks; 7418 callb_cpr_t cprinfo; 7419 7420 /* number of seconds to sleep idle before exiting */ 7421 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7422 7423 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7424 "mdi_vhcache_flush"); 7425 mutex_enter(&vhc->vhc_lock); 7426 for (; ; ) { 7427 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7428 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7429 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7430 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7431 (void) cv_timedwait(&vhc->vhc_cv, 7432 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7433 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7434 } else { 7435 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7436 mutex_exit(&vhc->vhc_lock); 7437 7438 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7439 vhcache_dirty(vhc); 7440 7441 mutex_enter(&vhc->vhc_lock); 7442 } 7443 } 7444 7445 quit_at_ticks = ddi_get_lbolt() + idle_time; 7446 7447 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7448 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7449 ddi_get_lbolt() < quit_at_ticks) { 7450 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7451 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7452 quit_at_ticks); 7453 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7454 } 7455 7456 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7457 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7458 goto out; 7459 } 7460 7461 out: 7462 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7463 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7464 CALLB_CPR_EXIT(&cprinfo); 7465 } 7466 7467 /* 7468 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7469 */ 7470 static void 7471 vhcache_dirty(mdi_vhci_config_t *vhc) 7472 { 7473 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7474 int create_thread; 7475 7476 rw_enter(&vhcache->vhcache_lock, RW_READER); 7477 /* do not flush cache until the cache is fully built */ 7478 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7479 rw_exit(&vhcache->vhcache_lock); 7480 return; 7481 } 7482 rw_exit(&vhcache->vhcache_lock); 7483 7484 mutex_enter(&vhc->vhc_lock); 7485 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7486 mutex_exit(&vhc->vhc_lock); 7487 return; 7488 } 7489 7490 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7491 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7492 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7493 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7494 cv_broadcast(&vhc->vhc_cv); 7495 create_thread = 0; 7496 } else { 7497 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7498 create_thread = 1; 7499 } 7500 mutex_exit(&vhc->vhc_lock); 7501 7502 if (create_thread) 7503 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7504 0, &p0, TS_RUN, minclsyspri); 7505 } 7506 7507 /* 7508 * phci bus config structure - one for for each phci bus config operation that 7509 * we initiate on behalf of a vhci. 7510 */ 7511 typedef struct mdi_phci_bus_config_s { 7512 char *phbc_phci_path; 7513 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7514 struct mdi_phci_bus_config_s *phbc_next; 7515 } mdi_phci_bus_config_t; 7516 7517 /* vhci bus config structure - one for each vhci bus config operation */ 7518 typedef struct mdi_vhci_bus_config_s { 7519 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7520 major_t vhbc_op_major; /* bus config op major */ 7521 uint_t vhbc_op_flags; /* bus config op flags */ 7522 kmutex_t vhbc_lock; 7523 kcondvar_t vhbc_cv; 7524 int vhbc_thr_count; 7525 } mdi_vhci_bus_config_t; 7526 7527 /* 7528 * bus config the specified phci 7529 */ 7530 static void 7531 bus_config_phci(void *arg) 7532 { 7533 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7534 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7535 dev_info_t *ph_dip; 7536 7537 /* 7538 * first configure all path components upto phci and then configure 7539 * the phci children. 7540 */ 7541 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7542 != NULL) { 7543 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7544 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7545 (void) ndi_devi_config_driver(ph_dip, 7546 vhbc->vhbc_op_flags, 7547 vhbc->vhbc_op_major); 7548 } else 7549 (void) ndi_devi_config(ph_dip, 7550 vhbc->vhbc_op_flags); 7551 7552 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7553 ndi_rele_devi(ph_dip); 7554 } 7555 7556 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7557 kmem_free(phbc, sizeof (*phbc)); 7558 7559 mutex_enter(&vhbc->vhbc_lock); 7560 vhbc->vhbc_thr_count--; 7561 if (vhbc->vhbc_thr_count == 0) 7562 cv_broadcast(&vhbc->vhbc_cv); 7563 mutex_exit(&vhbc->vhbc_lock); 7564 } 7565 7566 /* 7567 * Bus config all phcis associated with the vhci in parallel. 7568 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7569 */ 7570 static void 7571 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7572 ddi_bus_config_op_t op, major_t maj) 7573 { 7574 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7575 mdi_vhci_bus_config_t *vhbc; 7576 mdi_vhcache_phci_t *cphci; 7577 7578 rw_enter(&vhcache->vhcache_lock, RW_READER); 7579 if (vhcache->vhcache_phci_head == NULL) { 7580 rw_exit(&vhcache->vhcache_lock); 7581 return; 7582 } 7583 7584 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7585 7586 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7587 cphci = cphci->cphci_next) { 7588 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7589 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7590 KM_SLEEP); 7591 phbc->phbc_vhbusconfig = vhbc; 7592 phbc->phbc_next = phbc_head; 7593 phbc_head = phbc; 7594 vhbc->vhbc_thr_count++; 7595 } 7596 rw_exit(&vhcache->vhcache_lock); 7597 7598 vhbc->vhbc_op = op; 7599 vhbc->vhbc_op_major = maj; 7600 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7601 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7602 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7603 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7604 7605 /* now create threads to initiate bus config on all phcis in parallel */ 7606 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7607 phbc_next = phbc->phbc_next; 7608 if (mdi_mtc_off) 7609 bus_config_phci((void *)phbc); 7610 else 7611 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7612 0, &p0, TS_RUN, minclsyspri); 7613 } 7614 7615 mutex_enter(&vhbc->vhbc_lock); 7616 /* wait until all threads exit */ 7617 while (vhbc->vhbc_thr_count > 0) 7618 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7619 mutex_exit(&vhbc->vhbc_lock); 7620 7621 mutex_destroy(&vhbc->vhbc_lock); 7622 cv_destroy(&vhbc->vhbc_cv); 7623 kmem_free(vhbc, sizeof (*vhbc)); 7624 } 7625 7626 /* 7627 * Single threaded version of bus_config_all_phcis() 7628 */ 7629 static void 7630 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7631 ddi_bus_config_op_t op, major_t maj) 7632 { 7633 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7634 7635 single_threaded_vhconfig_enter(vhc); 7636 bus_config_all_phcis(vhcache, flags, op, maj); 7637 single_threaded_vhconfig_exit(vhc); 7638 } 7639 7640 /* 7641 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7642 * The path includes the child component in addition to the phci path. 7643 */ 7644 static int 7645 bus_config_one_phci_child(char *path) 7646 { 7647 dev_info_t *ph_dip, *child; 7648 char *devnm; 7649 int rv = MDI_FAILURE; 7650 7651 /* extract the child component of the phci */ 7652 devnm = strrchr(path, '/'); 7653 *devnm++ = '\0'; 7654 7655 /* 7656 * first configure all path components upto phci and then 7657 * configure the phci child. 7658 */ 7659 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7660 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7661 NDI_SUCCESS) { 7662 /* 7663 * release the hold that ndi_devi_config_one() placed 7664 */ 7665 ndi_rele_devi(child); 7666 rv = MDI_SUCCESS; 7667 } 7668 7669 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7670 ndi_rele_devi(ph_dip); 7671 } 7672 7673 devnm--; 7674 *devnm = '/'; 7675 return (rv); 7676 } 7677 7678 /* 7679 * Build a list of phci client paths for the specified vhci client. 7680 * The list includes only those phci client paths which aren't configured yet. 7681 */ 7682 static mdi_phys_path_t * 7683 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7684 { 7685 mdi_vhcache_pathinfo_t *cpi; 7686 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7687 int config_path, len; 7688 7689 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7690 /* 7691 * include only those paths that aren't configured. 7692 */ 7693 config_path = 0; 7694 if (cpi->cpi_pip == NULL) 7695 config_path = 1; 7696 else { 7697 MDI_PI_LOCK(cpi->cpi_pip); 7698 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7699 config_path = 1; 7700 MDI_PI_UNLOCK(cpi->cpi_pip); 7701 } 7702 7703 if (config_path) { 7704 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7705 len = strlen(cpi->cpi_cphci->cphci_path) + 7706 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7707 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7708 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7709 cpi->cpi_cphci->cphci_path, ct_name, 7710 cpi->cpi_addr); 7711 pp->phys_path_next = NULL; 7712 7713 if (pp_head == NULL) 7714 pp_head = pp; 7715 else 7716 pp_tail->phys_path_next = pp; 7717 pp_tail = pp; 7718 } 7719 } 7720 7721 return (pp_head); 7722 } 7723 7724 /* 7725 * Free the memory allocated for phci client path list. 7726 */ 7727 static void 7728 free_phclient_path_list(mdi_phys_path_t *pp_head) 7729 { 7730 mdi_phys_path_t *pp, *pp_next; 7731 7732 for (pp = pp_head; pp != NULL; pp = pp_next) { 7733 pp_next = pp->phys_path_next; 7734 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7735 kmem_free(pp, sizeof (*pp)); 7736 } 7737 } 7738 7739 /* 7740 * Allocated async client structure and initialize with the specified values. 7741 */ 7742 static mdi_async_client_config_t * 7743 alloc_async_client_config(char *ct_name, char *ct_addr, 7744 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7745 { 7746 mdi_async_client_config_t *acc; 7747 7748 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7749 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7750 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7751 acc->acc_phclient_path_list_head = pp_head; 7752 init_vhcache_lookup_token(&acc->acc_token, tok); 7753 acc->acc_next = NULL; 7754 return (acc); 7755 } 7756 7757 /* 7758 * Free the memory allocated for the async client structure and their members. 7759 */ 7760 static void 7761 free_async_client_config(mdi_async_client_config_t *acc) 7762 { 7763 if (acc->acc_phclient_path_list_head) 7764 free_phclient_path_list(acc->acc_phclient_path_list_head); 7765 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7766 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7767 kmem_free(acc, sizeof (*acc)); 7768 } 7769 7770 /* 7771 * Sort vhcache pathinfos (cpis) of the specified client. 7772 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7773 * flag set come at the beginning of the list. All cpis which have this 7774 * flag set come at the end of the list. 7775 */ 7776 static void 7777 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7778 { 7779 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7780 7781 cpi_head = cct->cct_cpi_head; 7782 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7783 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7784 cpi_next = cpi->cpi_next; 7785 enqueue_vhcache_pathinfo(cct, cpi); 7786 } 7787 } 7788 7789 /* 7790 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7791 * every vhcache pathinfo of the specified client. If not adjust the flag 7792 * setting appropriately. 7793 * 7794 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7795 * on-disk vhci cache. So every time this flag is updated the cache must be 7796 * flushed. 7797 */ 7798 static void 7799 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7800 mdi_vhcache_lookup_token_t *tok) 7801 { 7802 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7803 mdi_vhcache_client_t *cct; 7804 mdi_vhcache_pathinfo_t *cpi; 7805 7806 rw_enter(&vhcache->vhcache_lock, RW_READER); 7807 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7808 == NULL) { 7809 rw_exit(&vhcache->vhcache_lock); 7810 return; 7811 } 7812 7813 /* 7814 * to avoid unnecessary on-disk cache updates, first check if an 7815 * update is really needed. If no update is needed simply return. 7816 */ 7817 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7818 if ((cpi->cpi_pip != NULL && 7819 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7820 (cpi->cpi_pip == NULL && 7821 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7822 break; 7823 } 7824 } 7825 if (cpi == NULL) { 7826 rw_exit(&vhcache->vhcache_lock); 7827 return; 7828 } 7829 7830 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7831 rw_exit(&vhcache->vhcache_lock); 7832 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7833 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7834 tok)) == NULL) { 7835 rw_exit(&vhcache->vhcache_lock); 7836 return; 7837 } 7838 } 7839 7840 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7841 if (cpi->cpi_pip != NULL) 7842 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7843 else 7844 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7845 } 7846 sort_vhcache_paths(cct); 7847 7848 rw_exit(&vhcache->vhcache_lock); 7849 vhcache_dirty(vhc); 7850 } 7851 7852 /* 7853 * Configure all specified paths of the client. 7854 */ 7855 static void 7856 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7857 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7858 { 7859 mdi_phys_path_t *pp; 7860 7861 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7862 (void) bus_config_one_phci_child(pp->phys_path); 7863 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7864 } 7865 7866 /* 7867 * Dequeue elements from vhci async client config list and bus configure 7868 * their corresponding phci clients. 7869 */ 7870 static void 7871 config_client_paths_thread(void *arg) 7872 { 7873 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7874 mdi_async_client_config_t *acc; 7875 clock_t quit_at_ticks; 7876 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7877 callb_cpr_t cprinfo; 7878 7879 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7880 "mdi_config_client_paths"); 7881 7882 for (; ; ) { 7883 quit_at_ticks = ddi_get_lbolt() + idle_time; 7884 7885 mutex_enter(&vhc->vhc_lock); 7886 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7887 vhc->vhc_acc_list_head == NULL && 7888 ddi_get_lbolt() < quit_at_ticks) { 7889 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7890 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7891 quit_at_ticks); 7892 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7893 } 7894 7895 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7896 vhc->vhc_acc_list_head == NULL) 7897 goto out; 7898 7899 acc = vhc->vhc_acc_list_head; 7900 vhc->vhc_acc_list_head = acc->acc_next; 7901 if (vhc->vhc_acc_list_head == NULL) 7902 vhc->vhc_acc_list_tail = NULL; 7903 vhc->vhc_acc_count--; 7904 mutex_exit(&vhc->vhc_lock); 7905 7906 config_client_paths_sync(vhc, acc->acc_ct_name, 7907 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7908 &acc->acc_token); 7909 7910 free_async_client_config(acc); 7911 } 7912 7913 out: 7914 vhc->vhc_acc_thrcount--; 7915 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7916 CALLB_CPR_EXIT(&cprinfo); 7917 } 7918 7919 /* 7920 * Arrange for all the phci client paths (pp_head) for the specified client 7921 * to be bus configured asynchronously by a thread. 7922 */ 7923 static void 7924 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7925 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7926 { 7927 mdi_async_client_config_t *acc, *newacc; 7928 int create_thread; 7929 7930 if (pp_head == NULL) 7931 return; 7932 7933 if (mdi_mtc_off) { 7934 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7935 free_phclient_path_list(pp_head); 7936 return; 7937 } 7938 7939 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7940 ASSERT(newacc); 7941 7942 mutex_enter(&vhc->vhc_lock); 7943 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7944 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7945 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7946 free_async_client_config(newacc); 7947 mutex_exit(&vhc->vhc_lock); 7948 return; 7949 } 7950 } 7951 7952 if (vhc->vhc_acc_list_head == NULL) 7953 vhc->vhc_acc_list_head = newacc; 7954 else 7955 vhc->vhc_acc_list_tail->acc_next = newacc; 7956 vhc->vhc_acc_list_tail = newacc; 7957 vhc->vhc_acc_count++; 7958 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 7959 cv_broadcast(&vhc->vhc_cv); 7960 create_thread = 0; 7961 } else { 7962 vhc->vhc_acc_thrcount++; 7963 create_thread = 1; 7964 } 7965 mutex_exit(&vhc->vhc_lock); 7966 7967 if (create_thread) 7968 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 7969 0, &p0, TS_RUN, minclsyspri); 7970 } 7971 7972 /* 7973 * Return number of online paths for the specified client. 7974 */ 7975 static int 7976 nonline_paths(mdi_vhcache_client_t *cct) 7977 { 7978 mdi_vhcache_pathinfo_t *cpi; 7979 int online_count = 0; 7980 7981 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7982 if (cpi->cpi_pip != NULL) { 7983 MDI_PI_LOCK(cpi->cpi_pip); 7984 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 7985 online_count++; 7986 MDI_PI_UNLOCK(cpi->cpi_pip); 7987 } 7988 } 7989 7990 return (online_count); 7991 } 7992 7993 /* 7994 * Bus configure all paths for the specified vhci client. 7995 * If at least one path for the client is already online, the remaining paths 7996 * will be configured asynchronously. Otherwise, it synchronously configures 7997 * the paths until at least one path is online and then rest of the paths 7998 * will be configured asynchronously. 7999 */ 8000 static void 8001 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8002 { 8003 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8004 mdi_phys_path_t *pp_head, *pp; 8005 mdi_vhcache_client_t *cct; 8006 mdi_vhcache_lookup_token_t tok; 8007 8008 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8009 8010 init_vhcache_lookup_token(&tok, NULL); 8011 8012 if (ct_name == NULL || ct_addr == NULL || 8013 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8014 == NULL || 8015 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8016 rw_exit(&vhcache->vhcache_lock); 8017 return; 8018 } 8019 8020 /* if at least one path is online, configure the rest asynchronously */ 8021 if (nonline_paths(cct) > 0) { 8022 rw_exit(&vhcache->vhcache_lock); 8023 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8024 return; 8025 } 8026 8027 rw_exit(&vhcache->vhcache_lock); 8028 8029 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8030 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8031 rw_enter(&vhcache->vhcache_lock, RW_READER); 8032 8033 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8034 ct_addr, &tok)) == NULL) { 8035 rw_exit(&vhcache->vhcache_lock); 8036 goto out; 8037 } 8038 8039 if (nonline_paths(cct) > 0 && 8040 pp->phys_path_next != NULL) { 8041 rw_exit(&vhcache->vhcache_lock); 8042 config_client_paths_async(vhc, ct_name, ct_addr, 8043 pp->phys_path_next, &tok); 8044 pp->phys_path_next = NULL; 8045 goto out; 8046 } 8047 8048 rw_exit(&vhcache->vhcache_lock); 8049 } 8050 } 8051 8052 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8053 out: 8054 free_phclient_path_list(pp_head); 8055 } 8056 8057 static void 8058 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8059 { 8060 mutex_enter(&vhc->vhc_lock); 8061 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8062 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8063 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8064 mutex_exit(&vhc->vhc_lock); 8065 } 8066 8067 static void 8068 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8069 { 8070 mutex_enter(&vhc->vhc_lock); 8071 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8072 cv_broadcast(&vhc->vhc_cv); 8073 mutex_exit(&vhc->vhc_lock); 8074 } 8075 8076 typedef struct mdi_phci_driver_info { 8077 char *phdriver_name; /* name of the phci driver */ 8078 8079 /* set to non zero if the phci driver supports root device */ 8080 int phdriver_root_support; 8081 } mdi_phci_driver_info_t; 8082 8083 /* 8084 * vhci class and root support capability of a phci driver can be 8085 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8086 * phci driver.conf file. The built-in tables below contain this information 8087 * for those phci drivers whose driver.conf files don't yet contain this info. 8088 * 8089 * All phci drivers expect iscsi have root device support. 8090 */ 8091 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8092 { "fp", 1 }, 8093 { "iscsi", 0 }, 8094 { "ibsrp", 1 } 8095 }; 8096 8097 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8098 8099 static void * 8100 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8101 { 8102 void *new_ptr; 8103 8104 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8105 if (old_ptr) { 8106 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 8107 kmem_free(old_ptr, old_size); 8108 } 8109 return (new_ptr); 8110 } 8111 8112 static void 8113 add_to_phci_list(char ***driver_list, int **root_support_list, 8114 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8115 { 8116 ASSERT(*cur_elements <= *max_elements); 8117 if (*cur_elements == *max_elements) { 8118 *max_elements += 10; 8119 *driver_list = mdi_realloc(*driver_list, 8120 sizeof (char *) * (*cur_elements), 8121 sizeof (char *) * (*max_elements)); 8122 *root_support_list = mdi_realloc(*root_support_list, 8123 sizeof (int) * (*cur_elements), 8124 sizeof (int) * (*max_elements)); 8125 } 8126 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8127 (*root_support_list)[*cur_elements] = root_support; 8128 (*cur_elements)++; 8129 } 8130 8131 static void 8132 get_phci_driver_list(char *vhci_class, char ***driver_list, 8133 int **root_support_list, int *cur_elements, int *max_elements) 8134 { 8135 mdi_phci_driver_info_t *st_driver_list, *p; 8136 int st_ndrivers, root_support, i, j, driver_conf_count; 8137 major_t m; 8138 struct devnames *dnp; 8139 ddi_prop_t *propp; 8140 8141 *driver_list = NULL; 8142 *root_support_list = NULL; 8143 *cur_elements = 0; 8144 *max_elements = 0; 8145 8146 /* add the phci drivers derived from the phci driver.conf files */ 8147 for (m = 0; m < devcnt; m++) { 8148 dnp = &devnamesp[m]; 8149 8150 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8151 LOCK_DEV_OPS(&dnp->dn_lock); 8152 if (dnp->dn_global_prop_ptr != NULL && 8153 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8154 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8155 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8156 strcmp(propp->prop_val, vhci_class) == 0) { 8157 8158 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8159 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8160 &dnp->dn_global_prop_ptr->prop_list) 8161 == NULL) ? 1 : 0; 8162 8163 add_to_phci_list(driver_list, root_support_list, 8164 cur_elements, max_elements, dnp->dn_name, 8165 root_support); 8166 8167 UNLOCK_DEV_OPS(&dnp->dn_lock); 8168 } else 8169 UNLOCK_DEV_OPS(&dnp->dn_lock); 8170 } 8171 } 8172 8173 driver_conf_count = *cur_elements; 8174 8175 /* add the phci drivers specified in the built-in tables */ 8176 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8177 st_driver_list = scsi_phci_driver_list; 8178 st_ndrivers = sizeof (scsi_phci_driver_list) / 8179 sizeof (mdi_phci_driver_info_t); 8180 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8181 st_driver_list = ib_phci_driver_list; 8182 st_ndrivers = sizeof (ib_phci_driver_list) / 8183 sizeof (mdi_phci_driver_info_t); 8184 } else { 8185 st_driver_list = NULL; 8186 st_ndrivers = 0; 8187 } 8188 8189 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8190 /* add this phci driver if not already added before */ 8191 for (j = 0; j < driver_conf_count; j++) { 8192 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8193 break; 8194 } 8195 if (j == driver_conf_count) { 8196 add_to_phci_list(driver_list, root_support_list, 8197 cur_elements, max_elements, p->phdriver_name, 8198 p->phdriver_root_support); 8199 } 8200 } 8201 } 8202 8203 /* 8204 * Attach the phci driver instances associated with the specified vhci class. 8205 * If root is mounted attach all phci driver instances. 8206 * If root is not mounted, attach the instances of only those phci 8207 * drivers that have the root support. 8208 */ 8209 static void 8210 attach_phci_drivers(char *vhci_class) 8211 { 8212 char **driver_list, **p; 8213 int *root_support_list; 8214 int cur_elements, max_elements, i; 8215 major_t m; 8216 8217 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8218 &cur_elements, &max_elements); 8219 8220 for (i = 0; i < cur_elements; i++) { 8221 if (modrootloaded || root_support_list[i]) { 8222 m = ddi_name_to_major(driver_list[i]); 8223 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8224 ddi_rele_driver(m); 8225 } 8226 } 8227 8228 if (driver_list) { 8229 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8230 kmem_free(*p, strlen(*p) + 1); 8231 kmem_free(driver_list, sizeof (char *) * max_elements); 8232 kmem_free(root_support_list, sizeof (int) * max_elements); 8233 } 8234 } 8235 8236 /* 8237 * Build vhci cache: 8238 * 8239 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8240 * the phci driver instances. During this process the cache gets built. 8241 * 8242 * Cache is built fully if the root is mounted. 8243 * If the root is not mounted, phci drivers that do not have root support 8244 * are not attached. As a result the cache is built partially. The entries 8245 * in the cache reflect only those phci drivers that have root support. 8246 */ 8247 static int 8248 build_vhci_cache(mdi_vhci_t *vh) 8249 { 8250 mdi_vhci_config_t *vhc = vh->vh_config; 8251 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8252 8253 single_threaded_vhconfig_enter(vhc); 8254 8255 rw_enter(&vhcache->vhcache_lock, RW_READER); 8256 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8257 rw_exit(&vhcache->vhcache_lock); 8258 single_threaded_vhconfig_exit(vhc); 8259 return (0); 8260 } 8261 rw_exit(&vhcache->vhcache_lock); 8262 8263 attach_phci_drivers(vh->vh_class); 8264 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8265 BUS_CONFIG_ALL, (major_t)-1); 8266 8267 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8268 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8269 rw_exit(&vhcache->vhcache_lock); 8270 8271 single_threaded_vhconfig_exit(vhc); 8272 vhcache_dirty(vhc); 8273 return (1); 8274 } 8275 8276 /* 8277 * Determine if discovery of paths is needed. 8278 */ 8279 static int 8280 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8281 { 8282 int rv = 1; 8283 8284 mutex_enter(&vhc->vhc_lock); 8285 if (i_ddi_io_initialized() == 0) { 8286 if (vhc->vhc_path_discovery_boot > 0) { 8287 vhc->vhc_path_discovery_boot--; 8288 goto out; 8289 } 8290 } else { 8291 if (vhc->vhc_path_discovery_postboot > 0) { 8292 vhc->vhc_path_discovery_postboot--; 8293 goto out; 8294 } 8295 } 8296 8297 /* 8298 * Do full path discovery at most once per mdi_path_discovery_interval. 8299 * This is to avoid a series of full path discoveries when opening 8300 * stale /dev/[r]dsk links. 8301 */ 8302 if (mdi_path_discovery_interval != -1 && 8303 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8304 goto out; 8305 8306 rv = 0; 8307 out: 8308 mutex_exit(&vhc->vhc_lock); 8309 return (rv); 8310 } 8311 8312 /* 8313 * Discover all paths: 8314 * 8315 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8316 * driver instances. During this process all paths will be discovered. 8317 */ 8318 static int 8319 vhcache_discover_paths(mdi_vhci_t *vh) 8320 { 8321 mdi_vhci_config_t *vhc = vh->vh_config; 8322 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8323 int rv = 0; 8324 8325 single_threaded_vhconfig_enter(vhc); 8326 8327 if (vhcache_do_discovery(vhc)) { 8328 attach_phci_drivers(vh->vh_class); 8329 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8330 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8331 8332 mutex_enter(&vhc->vhc_lock); 8333 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8334 mdi_path_discovery_interval * TICKS_PER_SECOND; 8335 mutex_exit(&vhc->vhc_lock); 8336 rv = 1; 8337 } 8338 8339 single_threaded_vhconfig_exit(vhc); 8340 return (rv); 8341 } 8342 8343 /* 8344 * Generic vhci bus config implementation: 8345 * 8346 * Parameters 8347 * vdip vhci dip 8348 * flags bus config flags 8349 * op bus config operation 8350 * The remaining parameters are bus config operation specific 8351 * 8352 * for BUS_CONFIG_ONE 8353 * arg pointer to name@addr 8354 * child upon successful return from this function, *child will be 8355 * set to the configured and held devinfo child node of vdip. 8356 * ct_addr pointer to client address (i.e. GUID) 8357 * 8358 * for BUS_CONFIG_DRIVER 8359 * arg major number of the driver 8360 * child and ct_addr parameters are ignored 8361 * 8362 * for BUS_CONFIG_ALL 8363 * arg, child, and ct_addr parameters are ignored 8364 * 8365 * Note that for the rest of the bus config operations, this function simply 8366 * calls the framework provided default bus config routine. 8367 */ 8368 int 8369 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8370 void *arg, dev_info_t **child, char *ct_addr) 8371 { 8372 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8373 mdi_vhci_config_t *vhc = vh->vh_config; 8374 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8375 int rv = 0; 8376 int params_valid = 0; 8377 char *cp; 8378 8379 /* 8380 * To bus config vhcis we relay operation, possibly using another 8381 * thread, to phcis. The phci driver then interacts with MDI to cause 8382 * vhci child nodes to be enumerated under the vhci node. Adding a 8383 * vhci child requires an ndi_devi_enter of the vhci. Since another 8384 * thread may be adding the child, to avoid deadlock we can't wait 8385 * for the relayed operations to complete if we have already entered 8386 * the vhci node. 8387 */ 8388 if (DEVI_BUSY_OWNED(vdip)) { 8389 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8390 "vhci dip is busy owned %p\n", (void *)vdip)); 8391 goto default_bus_config; 8392 } 8393 8394 rw_enter(&vhcache->vhcache_lock, RW_READER); 8395 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8396 rw_exit(&vhcache->vhcache_lock); 8397 rv = build_vhci_cache(vh); 8398 rw_enter(&vhcache->vhcache_lock, RW_READER); 8399 } 8400 8401 switch (op) { 8402 case BUS_CONFIG_ONE: 8403 if (arg != NULL && ct_addr != NULL) { 8404 /* extract node name */ 8405 cp = (char *)arg; 8406 while (*cp != '\0' && *cp != '@') 8407 cp++; 8408 if (*cp == '@') { 8409 params_valid = 1; 8410 *cp = '\0'; 8411 config_client_paths(vhc, (char *)arg, ct_addr); 8412 /* config_client_paths() releases cache_lock */ 8413 *cp = '@'; 8414 break; 8415 } 8416 } 8417 8418 rw_exit(&vhcache->vhcache_lock); 8419 break; 8420 8421 case BUS_CONFIG_DRIVER: 8422 rw_exit(&vhcache->vhcache_lock); 8423 if (rv == 0) 8424 st_bus_config_all_phcis(vhc, flags, op, 8425 (major_t)(uintptr_t)arg); 8426 break; 8427 8428 case BUS_CONFIG_ALL: 8429 rw_exit(&vhcache->vhcache_lock); 8430 if (rv == 0) 8431 st_bus_config_all_phcis(vhc, flags, op, -1); 8432 break; 8433 8434 default: 8435 rw_exit(&vhcache->vhcache_lock); 8436 break; 8437 } 8438 8439 8440 default_bus_config: 8441 /* 8442 * All requested child nodes are enumerated under the vhci. 8443 * Now configure them. 8444 */ 8445 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8446 NDI_SUCCESS) { 8447 return (MDI_SUCCESS); 8448 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8449 /* discover all paths and try configuring again */ 8450 if (vhcache_discover_paths(vh) && 8451 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8452 NDI_SUCCESS) 8453 return (MDI_SUCCESS); 8454 } 8455 8456 return (MDI_FAILURE); 8457 } 8458 8459 /* 8460 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8461 */ 8462 static nvlist_t * 8463 read_on_disk_vhci_cache(char *vhci_class) 8464 { 8465 nvlist_t *nvl; 8466 int err; 8467 char *filename; 8468 8469 filename = vhclass2vhcache_filename(vhci_class); 8470 8471 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8472 kmem_free(filename, strlen(filename) + 1); 8473 return (nvl); 8474 } else if (err == EIO) 8475 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8476 else if (err == EINVAL) 8477 cmn_err(CE_WARN, 8478 "%s: data file corrupted, will recreate\n", filename); 8479 8480 kmem_free(filename, strlen(filename) + 1); 8481 return (NULL); 8482 } 8483 8484 /* 8485 * Read on-disk vhci cache into nvlists for all vhci classes. 8486 * Called during booting by i_ddi_read_devices_files(). 8487 */ 8488 void 8489 mdi_read_devices_files(void) 8490 { 8491 int i; 8492 8493 for (i = 0; i < N_VHCI_CLASSES; i++) 8494 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8495 } 8496 8497 /* 8498 * Remove all stale entries from vhci cache. 8499 */ 8500 static void 8501 clean_vhcache(mdi_vhci_config_t *vhc) 8502 { 8503 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8504 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8505 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8506 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8507 8508 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8509 8510 cct_head = vhcache->vhcache_client_head; 8511 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8512 for (cct = cct_head; cct != NULL; cct = cct_next) { 8513 cct_next = cct->cct_next; 8514 8515 cpi_head = cct->cct_cpi_head; 8516 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8517 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8518 cpi_next = cpi->cpi_next; 8519 if (cpi->cpi_pip != NULL) { 8520 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8521 enqueue_tail_vhcache_pathinfo(cct, cpi); 8522 } else 8523 free_vhcache_pathinfo(cpi); 8524 } 8525 8526 if (cct->cct_cpi_head != NULL) 8527 enqueue_vhcache_client(vhcache, cct); 8528 else { 8529 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8530 (mod_hash_key_t)cct->cct_name_addr); 8531 free_vhcache_client(cct); 8532 } 8533 } 8534 8535 cphci_head = vhcache->vhcache_phci_head; 8536 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8537 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8538 cphci_next = cphci->cphci_next; 8539 if (cphci->cphci_phci != NULL) 8540 enqueue_vhcache_phci(vhcache, cphci); 8541 else 8542 free_vhcache_phci(cphci); 8543 } 8544 8545 vhcache->vhcache_clean_time = lbolt64; 8546 rw_exit(&vhcache->vhcache_lock); 8547 vhcache_dirty(vhc); 8548 } 8549 8550 /* 8551 * Remove all stale entries from vhci cache. 8552 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8553 */ 8554 void 8555 mdi_clean_vhcache(void) 8556 { 8557 mdi_vhci_t *vh; 8558 8559 mutex_enter(&mdi_mutex); 8560 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8561 vh->vh_refcnt++; 8562 mutex_exit(&mdi_mutex); 8563 clean_vhcache(vh->vh_config); 8564 mutex_enter(&mdi_mutex); 8565 vh->vh_refcnt--; 8566 } 8567 mutex_exit(&mdi_mutex); 8568 } 8569 8570 /* 8571 * mdi_vhci_walk_clients(): 8572 * Walker routine to traverse client dev_info nodes 8573 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8574 * below the client, including nexus devices, which we dont want. 8575 * So we just traverse the immediate siblings, starting from 1st client. 8576 */ 8577 void 8578 mdi_vhci_walk_clients(dev_info_t *vdip, 8579 int (*f)(dev_info_t *, void *), void *arg) 8580 { 8581 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8582 dev_info_t *cdip; 8583 mdi_client_t *ct; 8584 8585 MDI_VHCI_CLIENT_LOCK(vh); 8586 cdip = ddi_get_child(vdip); 8587 while (cdip) { 8588 ct = i_devi_get_client(cdip); 8589 MDI_CLIENT_LOCK(ct); 8590 8591 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8592 cdip = ddi_get_next_sibling(cdip); 8593 else 8594 cdip = NULL; 8595 8596 MDI_CLIENT_UNLOCK(ct); 8597 } 8598 MDI_VHCI_CLIENT_UNLOCK(vh); 8599 } 8600 8601 /* 8602 * mdi_vhci_walk_phcis(): 8603 * Walker routine to traverse phci dev_info nodes 8604 */ 8605 void 8606 mdi_vhci_walk_phcis(dev_info_t *vdip, 8607 int (*f)(dev_info_t *, void *), void *arg) 8608 { 8609 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8610 mdi_phci_t *ph, *next; 8611 8612 MDI_VHCI_PHCI_LOCK(vh); 8613 ph = vh->vh_phci_head; 8614 while (ph) { 8615 MDI_PHCI_LOCK(ph); 8616 8617 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8618 next = ph->ph_next; 8619 else 8620 next = NULL; 8621 8622 MDI_PHCI_UNLOCK(ph); 8623 ph = next; 8624 } 8625 MDI_VHCI_PHCI_UNLOCK(vh); 8626 } 8627 8628 8629 /* 8630 * mdi_walk_vhcis(): 8631 * Walker routine to traverse vhci dev_info nodes 8632 */ 8633 void 8634 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8635 { 8636 mdi_vhci_t *vh = NULL; 8637 8638 mutex_enter(&mdi_mutex); 8639 /* 8640 * Scan for already registered vhci 8641 */ 8642 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8643 vh->vh_refcnt++; 8644 mutex_exit(&mdi_mutex); 8645 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8646 mutex_enter(&mdi_mutex); 8647 vh->vh_refcnt--; 8648 break; 8649 } else { 8650 mutex_enter(&mdi_mutex); 8651 vh->vh_refcnt--; 8652 } 8653 } 8654 8655 mutex_exit(&mdi_mutex); 8656 } 8657 8658 /* 8659 * i_mdi_log_sysevent(): 8660 * Logs events for pickup by syseventd 8661 */ 8662 static void 8663 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8664 { 8665 char *path_name; 8666 nvlist_t *attr_list; 8667 8668 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8669 KM_SLEEP) != DDI_SUCCESS) { 8670 goto alloc_failed; 8671 } 8672 8673 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8674 (void) ddi_pathname(dip, path_name); 8675 8676 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8677 ddi_driver_name(dip)) != DDI_SUCCESS) { 8678 goto error; 8679 } 8680 8681 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8682 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8683 goto error; 8684 } 8685 8686 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8687 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8688 goto error; 8689 } 8690 8691 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8692 path_name) != DDI_SUCCESS) { 8693 goto error; 8694 } 8695 8696 if (nvlist_add_string(attr_list, DDI_CLASS, 8697 ph_vh_class) != DDI_SUCCESS) { 8698 goto error; 8699 } 8700 8701 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8702 attr_list, NULL, DDI_SLEEP); 8703 8704 error: 8705 kmem_free(path_name, MAXPATHLEN); 8706 nvlist_free(attr_list); 8707 return; 8708 8709 alloc_failed: 8710 MDI_DEBUG(1, (CE_WARN, dip, 8711 "!i_mdi_log_sysevent: Unable to send sysevent")); 8712 } 8713 8714 char ** 8715 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 8716 { 8717 char **driver_list, **ret_driver_list = NULL; 8718 int *root_support_list; 8719 int cur_elements, max_elements; 8720 8721 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8722 &cur_elements, &max_elements); 8723 8724 8725 if (driver_list) { 8726 kmem_free(root_support_list, sizeof (int) * max_elements); 8727 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 8728 * max_elements, sizeof (char *) * cur_elements); 8729 } 8730 *ndrivers = cur_elements; 8731 8732 return (ret_driver_list); 8733 8734 } 8735 8736 void 8737 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 8738 { 8739 char **p; 8740 int i; 8741 8742 if (driver_list) { 8743 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 8744 kmem_free(*p, strlen(*p) + 1); 8745 kmem_free(driver_list, sizeof (char *) * ndrivers); 8746 } 8747 } 8748