1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impl.h for a more 29 * detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 72 #ifdef DEBUG 73 #include <sys/debug.h> 74 int mdi_debug = 1; 75 int mdi_debug_logonly = 0; 76 #define MDI_DEBUG(level, stmnt) \ 77 if (mdi_debug >= (level)) i_mdi_log stmnt 78 static void i_mdi_log(int, dev_info_t *, const char *fmt, ...); 79 #else /* !DEBUG */ 80 #define MDI_DEBUG(level, stmnt) 81 #endif /* DEBUG */ 82 83 extern pri_t minclsyspri; 84 extern int modrootloaded; 85 86 /* 87 * Global mutex: 88 * Protects vHCI list and structure members. 89 */ 90 kmutex_t mdi_mutex; 91 92 /* 93 * Registered vHCI class driver lists 94 */ 95 int mdi_vhci_count; 96 mdi_vhci_t *mdi_vhci_head; 97 mdi_vhci_t *mdi_vhci_tail; 98 99 /* 100 * Client Hash Table size 101 */ 102 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 103 104 /* 105 * taskq interface definitions 106 */ 107 #define MDI_TASKQ_N_THREADS 8 108 #define MDI_TASKQ_PRI minclsyspri 109 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 110 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 111 112 taskq_t *mdi_taskq; 113 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 114 115 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 116 117 /* 118 * The data should be "quiet" for this interval (in seconds) before the 119 * vhci cached data is flushed to the disk. 120 */ 121 static int mdi_vhcache_flush_delay = 10; 122 123 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 124 static int mdi_vhcache_flush_daemon_idle_time = 60; 125 126 /* 127 * MDI falls back to discovery of all paths when a bus_config_one fails. 128 * The following parameters can be used to tune this operation. 129 * 130 * mdi_path_discovery_boot 131 * Number of times path discovery will be attempted during early boot. 132 * Probably there is no reason to ever set this value to greater than one. 133 * 134 * mdi_path_discovery_postboot 135 * Number of times path discovery will be attempted after early boot. 136 * Set it to a minimum of two to allow for discovery of iscsi paths which 137 * may happen very late during booting. 138 * 139 * mdi_path_discovery_interval 140 * Minimum number of seconds MDI will wait between successive discovery 141 * of all paths. Set it to -1 to disable discovery of all paths. 142 */ 143 static int mdi_path_discovery_boot = 1; 144 static int mdi_path_discovery_postboot = 2; 145 static int mdi_path_discovery_interval = 10; 146 147 /* 148 * number of seconds the asynchronous configuration thread will sleep idle 149 * before exiting. 150 */ 151 static int mdi_async_config_idle_time = 600; 152 153 static int mdi_bus_config_cache_hash_size = 256; 154 155 /* turns off multithreaded configuration for certain operations */ 156 static int mdi_mtc_off = 0; 157 158 /* 159 * MDI component property name/value string definitions 160 */ 161 const char *mdi_component_prop = "mpxio-component"; 162 const char *mdi_component_prop_vhci = "vhci"; 163 const char *mdi_component_prop_phci = "phci"; 164 const char *mdi_component_prop_client = "client"; 165 166 /* 167 * MDI client global unique identifier property name 168 */ 169 const char *mdi_client_guid_prop = "client-guid"; 170 171 /* 172 * MDI client load balancing property name/value string definitions 173 */ 174 const char *mdi_load_balance = "load-balance"; 175 const char *mdi_load_balance_none = "none"; 176 const char *mdi_load_balance_rr = "round-robin"; 177 const char *mdi_load_balance_lba = "logical-block"; 178 179 /* 180 * Obsolete vHCI class definition; to be removed after Leadville update 181 */ 182 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 183 184 static char vhci_greeting[] = 185 "\tThere already exists one vHCI driver for class %s\n" 186 "\tOnly one vHCI driver for each class is allowed\n"; 187 188 /* 189 * Static function prototypes 190 */ 191 static int i_mdi_phci_offline(dev_info_t *, uint_t); 192 static int i_mdi_client_offline(dev_info_t *, uint_t); 193 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 194 static void i_mdi_phci_post_detach(dev_info_t *, 195 ddi_detach_cmd_t, int); 196 static int i_mdi_client_pre_detach(dev_info_t *, 197 ddi_detach_cmd_t); 198 static void i_mdi_client_post_detach(dev_info_t *, 199 ddi_detach_cmd_t, int); 200 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 201 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 202 static int i_mdi_lba_lb(mdi_client_t *ct, 203 mdi_pathinfo_t **ret_pip, struct buf *buf); 204 static void i_mdi_pm_hold_client(mdi_client_t *, int); 205 static void i_mdi_pm_rele_client(mdi_client_t *, int); 206 static void i_mdi_pm_reset_client(mdi_client_t *); 207 static int i_mdi_power_all_phci(mdi_client_t *); 208 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 209 210 211 /* 212 * Internal mdi_pathinfo node functions 213 */ 214 static int i_mdi_pi_kstat_create(mdi_pathinfo_t *); 215 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 216 217 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 218 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 219 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 220 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 221 static void i_mdi_phci_unlock(mdi_phci_t *); 222 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 223 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 224 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 225 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 226 mdi_client_t *); 227 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 228 static void i_mdi_client_remove_path(mdi_client_t *, 229 mdi_pathinfo_t *); 230 231 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 232 mdi_pathinfo_state_t, int); 233 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 234 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 235 char **, int); 236 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 237 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 238 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 239 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 240 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 241 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 242 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 243 static void i_mdi_client_update_state(mdi_client_t *); 244 static int i_mdi_client_compute_state(mdi_client_t *, 245 mdi_phci_t *); 246 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 247 static void i_mdi_client_unlock(mdi_client_t *); 248 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 249 static mdi_client_t *i_devi_get_client(dev_info_t *); 250 /* 251 * NOTE: this will be removed once the NWS files are changed to use the new 252 * mdi_{enable,disable}_path interfaces 253 */ 254 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 255 int, int); 256 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 257 mdi_vhci_t *vh, int flags, int op); 258 /* 259 * Failover related function prototypes 260 */ 261 static int i_mdi_failover(void *); 262 263 /* 264 * misc internal functions 265 */ 266 static int i_mdi_get_hash_key(char *); 267 static int i_map_nvlist_error_to_mdi(int); 268 static void i_mdi_report_path_state(mdi_client_t *, 269 mdi_pathinfo_t *); 270 271 static void setup_vhci_cache(mdi_vhci_t *); 272 static int destroy_vhci_cache(mdi_vhci_t *); 273 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 274 static boolean_t stop_vhcache_flush_thread(void *, int); 275 static void free_string_array(char **, int); 276 static void free_vhcache_phci(mdi_vhcache_phci_t *); 277 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 278 static void free_vhcache_client(mdi_vhcache_client_t *); 279 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 280 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 281 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 282 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 283 static void vhcache_pi_add(mdi_vhci_config_t *, 284 struct mdi_pathinfo *); 285 static void vhcache_pi_remove(mdi_vhci_config_t *, 286 struct mdi_pathinfo *); 287 static void free_phclient_path_list(mdi_phys_path_t *); 288 static void sort_vhcache_paths(mdi_vhcache_client_t *); 289 static int flush_vhcache(mdi_vhci_config_t *, int); 290 static void vhcache_dirty(mdi_vhci_config_t *); 291 static void free_async_client_config(mdi_async_client_config_t *); 292 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 293 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 294 static nvlist_t *read_on_disk_vhci_cache(char *); 295 extern int fread_nvlist(char *, nvlist_t **); 296 extern int fwrite_nvlist(char *, nvlist_t *); 297 298 /* called once when first vhci registers with mdi */ 299 static void 300 i_mdi_init() 301 { 302 static int initialized = 0; 303 304 if (initialized) 305 return; 306 initialized = 1; 307 308 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 309 /* 310 * Create our taskq resources 311 */ 312 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 313 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 314 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 315 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 316 } 317 318 /* 319 * mdi_get_component_type(): 320 * Return mpxio component type 321 * Return Values: 322 * MDI_COMPONENT_NONE 323 * MDI_COMPONENT_VHCI 324 * MDI_COMPONENT_PHCI 325 * MDI_COMPONENT_CLIENT 326 * XXX This doesn't work under multi-level MPxIO and should be 327 * removed when clients migrate mdi_component_is_*() interfaces. 328 */ 329 int 330 mdi_get_component_type(dev_info_t *dip) 331 { 332 return (DEVI(dip)->devi_mdi_component); 333 } 334 335 /* 336 * mdi_vhci_register(): 337 * Register a vHCI module with the mpxio framework 338 * mdi_vhci_register() is called by vHCI drivers to register the 339 * 'class_driver' vHCI driver and its MDI entrypoints with the 340 * mpxio framework. The vHCI driver must call this interface as 341 * part of its attach(9e) handler. 342 * Competing threads may try to attach mdi_vhci_register() as 343 * the vHCI drivers are loaded and attached as a result of pHCI 344 * driver instance registration (mdi_phci_register()) with the 345 * framework. 346 * Return Values: 347 * MDI_SUCCESS 348 * MDI_FAILURE 349 */ 350 /*ARGSUSED*/ 351 int 352 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 353 int flags) 354 { 355 mdi_vhci_t *vh = NULL; 356 357 ASSERT(vops->vo_revision == MDI_VHCI_OPS_REV); 358 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 359 360 i_mdi_init(); 361 362 mutex_enter(&mdi_mutex); 363 /* 364 * Scan for already registered vhci 365 */ 366 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 367 if (strcmp(vh->vh_class, class) == 0) { 368 /* 369 * vHCI has already been created. Check for valid 370 * vHCI ops registration. We only support one vHCI 371 * module per class 372 */ 373 if (vh->vh_ops != NULL) { 374 mutex_exit(&mdi_mutex); 375 cmn_err(CE_NOTE, vhci_greeting, class); 376 return (MDI_FAILURE); 377 } 378 break; 379 } 380 } 381 382 /* 383 * if not yet created, create the vHCI component 384 */ 385 if (vh == NULL) { 386 struct client_hash *hash = NULL; 387 char *load_balance; 388 389 /* 390 * Allocate and initialize the mdi extensions 391 */ 392 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 393 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 394 KM_SLEEP); 395 vh->vh_client_table = hash; 396 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 397 (void) strcpy(vh->vh_class, class); 398 vh->vh_lb = LOAD_BALANCE_RR; 399 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 400 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 401 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 402 vh->vh_lb = LOAD_BALANCE_NONE; 403 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 404 == 0) { 405 vh->vh_lb = LOAD_BALANCE_LBA; 406 } 407 ddi_prop_free(load_balance); 408 } 409 410 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 411 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 412 413 /* 414 * Store the vHCI ops vectors 415 */ 416 vh->vh_dip = vdip; 417 vh->vh_ops = vops; 418 419 setup_vhci_cache(vh); 420 421 if (mdi_vhci_head == NULL) { 422 mdi_vhci_head = vh; 423 } 424 if (mdi_vhci_tail) { 425 mdi_vhci_tail->vh_next = vh; 426 } 427 mdi_vhci_tail = vh; 428 mdi_vhci_count++; 429 } 430 431 /* 432 * Claim the devfs node as a vhci component 433 */ 434 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 435 436 /* 437 * Initialize our back reference from dev_info node 438 */ 439 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 440 mutex_exit(&mdi_mutex); 441 return (MDI_SUCCESS); 442 } 443 444 /* 445 * mdi_vhci_unregister(): 446 * Unregister a vHCI module from mpxio framework 447 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 448 * of a vhci to unregister it from the framework. 449 * Return Values: 450 * MDI_SUCCESS 451 * MDI_FAILURE 452 */ 453 /*ARGSUSED*/ 454 int 455 mdi_vhci_unregister(dev_info_t *vdip, int flags) 456 { 457 mdi_vhci_t *found, *vh, *prev = NULL; 458 459 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 460 461 /* 462 * Check for invalid VHCI 463 */ 464 if ((vh = i_devi_get_vhci(vdip)) == NULL) 465 return (MDI_FAILURE); 466 467 /* 468 * Scan the list of registered vHCIs for a match 469 */ 470 mutex_enter(&mdi_mutex); 471 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 472 if (found == vh) 473 break; 474 prev = found; 475 } 476 477 if (found == NULL) { 478 mutex_exit(&mdi_mutex); 479 return (MDI_FAILURE); 480 } 481 482 /* 483 * Check the vHCI, pHCI and client count. All the pHCIs and clients 484 * should have been unregistered, before a vHCI can be 485 * unregistered. 486 */ 487 MDI_VHCI_PHCI_LOCK(vh); 488 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 489 MDI_VHCI_PHCI_UNLOCK(vh); 490 mutex_exit(&mdi_mutex); 491 return (MDI_FAILURE); 492 } 493 MDI_VHCI_PHCI_UNLOCK(vh); 494 495 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 496 mutex_exit(&mdi_mutex); 497 return (MDI_FAILURE); 498 } 499 500 /* 501 * Remove the vHCI from the global list 502 */ 503 if (vh == mdi_vhci_head) { 504 mdi_vhci_head = vh->vh_next; 505 } else { 506 prev->vh_next = vh->vh_next; 507 } 508 if (vh == mdi_vhci_tail) { 509 mdi_vhci_tail = prev; 510 } 511 mdi_vhci_count--; 512 mutex_exit(&mdi_mutex); 513 514 vh->vh_ops = NULL; 515 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 516 DEVI(vdip)->devi_mdi_xhci = NULL; 517 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 518 kmem_free(vh->vh_client_table, 519 mdi_client_table_size * sizeof (struct client_hash)); 520 mutex_destroy(&vh->vh_phci_mutex); 521 mutex_destroy(&vh->vh_client_mutex); 522 523 kmem_free(vh, sizeof (mdi_vhci_t)); 524 return (MDI_SUCCESS); 525 } 526 527 /* 528 * i_mdi_vhci_class2vhci(): 529 * Look for a matching vHCI module given a vHCI class name 530 * Return Values: 531 * Handle to a vHCI component 532 * NULL 533 */ 534 static mdi_vhci_t * 535 i_mdi_vhci_class2vhci(char *class) 536 { 537 mdi_vhci_t *vh = NULL; 538 539 ASSERT(!MUTEX_HELD(&mdi_mutex)); 540 541 mutex_enter(&mdi_mutex); 542 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 543 if (strcmp(vh->vh_class, class) == 0) { 544 break; 545 } 546 } 547 mutex_exit(&mdi_mutex); 548 return (vh); 549 } 550 551 /* 552 * i_devi_get_vhci(): 553 * Utility function to get the handle to a vHCI component 554 * Return Values: 555 * Handle to a vHCI component 556 * NULL 557 */ 558 mdi_vhci_t * 559 i_devi_get_vhci(dev_info_t *vdip) 560 { 561 mdi_vhci_t *vh = NULL; 562 if (MDI_VHCI(vdip)) { 563 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 564 } 565 return (vh); 566 } 567 568 /* 569 * mdi_phci_register(): 570 * Register a pHCI module with mpxio framework 571 * mdi_phci_register() is called by pHCI drivers to register with 572 * the mpxio framework and a specific 'class_driver' vHCI. The 573 * pHCI driver must call this interface as part of its attach(9e) 574 * handler. 575 * Return Values: 576 * MDI_SUCCESS 577 * MDI_FAILURE 578 */ 579 /*ARGSUSED*/ 580 int 581 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 582 { 583 mdi_phci_t *ph; 584 mdi_vhci_t *vh; 585 char *data; 586 char *pathname; 587 588 /* 589 * Some subsystems, like fcp, perform pHCI registration from a 590 * different thread than the one doing the pHCI attach(9E) - the 591 * driver attach code is waiting for this other thread to complete. 592 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 593 * (indicating that some thread has done an ndi_devi_enter of parent) 594 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 595 */ 596 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 597 598 pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 599 (void) ddi_pathname(pdip, pathname); 600 601 /* 602 * Check for mpxio-disable property. Enable mpxio if the property is 603 * missing or not set to "yes". 604 * If the property is set to "yes" then emit a brief message. 605 */ 606 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 607 &data) == DDI_SUCCESS)) { 608 if (strcmp(data, "yes") == 0) { 609 MDI_DEBUG(1, (CE_CONT, pdip, 610 "?%s (%s%d) multipath capabilities " 611 "disabled via %s.conf.\n", pathname, 612 ddi_driver_name(pdip), ddi_get_instance(pdip), 613 ddi_driver_name(pdip))); 614 ddi_prop_free(data); 615 kmem_free(pathname, MAXPATHLEN); 616 return (MDI_FAILURE); 617 } 618 ddi_prop_free(data); 619 } 620 621 kmem_free(pathname, MAXPATHLEN); 622 623 /* 624 * Search for a matching vHCI 625 */ 626 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 627 if (vh == NULL) { 628 return (MDI_FAILURE); 629 } 630 631 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 632 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 633 ph->ph_dip = pdip; 634 ph->ph_vhci = vh; 635 ph->ph_next = NULL; 636 ph->ph_unstable = 0; 637 ph->ph_vprivate = 0; 638 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 639 640 MDI_PHCI_LOCK(ph); 641 MDI_PHCI_SET_POWER_UP(ph); 642 MDI_PHCI_UNLOCK(ph); 643 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 644 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 645 646 vhcache_phci_add(vh->vh_config, ph); 647 648 MDI_VHCI_PHCI_LOCK(vh); 649 if (vh->vh_phci_head == NULL) { 650 vh->vh_phci_head = ph; 651 } 652 if (vh->vh_phci_tail) { 653 vh->vh_phci_tail->ph_next = ph; 654 } 655 vh->vh_phci_tail = ph; 656 vh->vh_phci_count++; 657 MDI_VHCI_PHCI_UNLOCK(vh); 658 659 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 660 return (MDI_SUCCESS); 661 } 662 663 /* 664 * mdi_phci_unregister(): 665 * Unregister a pHCI module from mpxio framework 666 * mdi_phci_unregister() is called by the pHCI drivers from their 667 * detach(9E) handler to unregister their instances from the 668 * framework. 669 * Return Values: 670 * MDI_SUCCESS 671 * MDI_FAILURE 672 */ 673 /*ARGSUSED*/ 674 int 675 mdi_phci_unregister(dev_info_t *pdip, int flags) 676 { 677 mdi_vhci_t *vh; 678 mdi_phci_t *ph; 679 mdi_phci_t *tmp; 680 mdi_phci_t *prev = NULL; 681 682 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 683 684 ph = i_devi_get_phci(pdip); 685 if (ph == NULL) { 686 MDI_DEBUG(1, (CE_WARN, pdip, 687 "!pHCI unregister: Not a valid pHCI")); 688 return (MDI_FAILURE); 689 } 690 691 vh = ph->ph_vhci; 692 ASSERT(vh != NULL); 693 if (vh == NULL) { 694 MDI_DEBUG(1, (CE_WARN, pdip, 695 "!pHCI unregister: Not a valid vHCI")); 696 return (MDI_FAILURE); 697 } 698 699 MDI_VHCI_PHCI_LOCK(vh); 700 tmp = vh->vh_phci_head; 701 while (tmp) { 702 if (tmp == ph) { 703 break; 704 } 705 prev = tmp; 706 tmp = tmp->ph_next; 707 } 708 709 if (ph == vh->vh_phci_head) { 710 vh->vh_phci_head = ph->ph_next; 711 } else { 712 prev->ph_next = ph->ph_next; 713 } 714 715 if (ph == vh->vh_phci_tail) { 716 vh->vh_phci_tail = prev; 717 } 718 719 vh->vh_phci_count--; 720 MDI_VHCI_PHCI_UNLOCK(vh); 721 722 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 723 ESC_DDI_INITIATOR_UNREGISTER); 724 vhcache_phci_remove(vh->vh_config, ph); 725 cv_destroy(&ph->ph_unstable_cv); 726 mutex_destroy(&ph->ph_mutex); 727 kmem_free(ph, sizeof (mdi_phci_t)); 728 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 729 DEVI(pdip)->devi_mdi_xhci = NULL; 730 return (MDI_SUCCESS); 731 } 732 733 /* 734 * i_devi_get_phci(): 735 * Utility function to return the phci extensions. 736 */ 737 static mdi_phci_t * 738 i_devi_get_phci(dev_info_t *pdip) 739 { 740 mdi_phci_t *ph = NULL; 741 if (MDI_PHCI(pdip)) { 742 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 743 } 744 return (ph); 745 } 746 747 /* 748 * Single thread mdi entry into devinfo node for modifying its children. 749 * If necessary we perform an ndi_devi_enter of the vHCI before doing 750 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 751 * for the vHCI and one for the pHCI. 752 */ 753 void 754 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 755 { 756 dev_info_t *vdip; 757 int vcircular, pcircular; 758 759 /* Verify calling context */ 760 ASSERT(MDI_PHCI(phci_dip)); 761 vdip = mdi_devi_get_vdip(phci_dip); 762 ASSERT(vdip); /* A pHCI always has a vHCI */ 763 764 /* 765 * If pHCI is detaching then the framework has already entered the 766 * vHCI on a threads that went down the code path leading to 767 * detach_node(). This framework enter of the vHCI during pHCI 768 * detach is done to avoid deadlock with vHCI power management 769 * operations which enter the vHCI and the enter down the path 770 * to the pHCI. If pHCI is detaching then we piggyback this calls 771 * enter of the vHCI on frameworks vHCI enter that has already 772 * occurred - this is OK because we know that the framework thread 773 * doing detach is waiting for our completion. 774 * 775 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 776 * race with detach - but we can't do that because the framework has 777 * already entered the parent, so we have some complexity instead. 778 */ 779 for (;;) { 780 if (ndi_devi_tryenter(vdip, &vcircular)) { 781 ASSERT(vcircular != -1); 782 if (DEVI_IS_DETACHING(phci_dip)) { 783 ndi_devi_exit(vdip, vcircular); 784 vcircular = -1; 785 } 786 break; 787 } else if (DEVI_IS_DETACHING(phci_dip)) { 788 vcircular = -1; 789 break; 790 } else { 791 delay(1); 792 } 793 } 794 795 ndi_devi_enter(phci_dip, &pcircular); 796 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 797 } 798 799 /* 800 * Release mdi_devi_enter or successful mdi_devi_tryenter. 801 */ 802 void 803 mdi_devi_exit(dev_info_t *phci_dip, int circular) 804 { 805 dev_info_t *vdip; 806 int vcircular, pcircular; 807 808 /* Verify calling context */ 809 ASSERT(MDI_PHCI(phci_dip)); 810 vdip = mdi_devi_get_vdip(phci_dip); 811 ASSERT(vdip); /* A pHCI always has a vHCI */ 812 813 /* extract two circular recursion values from single int */ 814 pcircular = (short)(circular & 0xFFFF); 815 vcircular = (short)((circular >> 16) & 0xFFFF); 816 817 ndi_devi_exit(phci_dip, pcircular); 818 if (vcircular != -1) 819 ndi_devi_exit(vdip, vcircular); 820 } 821 822 /* 823 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 824 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 825 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 826 * with vHCI power management code during path online/offline. Each 827 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 828 * occur within the scope of an active mdi_devi_enter that establishes the 829 * circular value. 830 */ 831 void 832 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 833 { 834 int pcircular; 835 836 /* Verify calling context */ 837 ASSERT(MDI_PHCI(phci_dip)); 838 839 pcircular = (short)(circular & 0xFFFF); 840 ndi_devi_exit(phci_dip, pcircular); 841 } 842 843 void 844 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 845 { 846 int pcircular; 847 848 /* Verify calling context */ 849 ASSERT(MDI_PHCI(phci_dip)); 850 851 ndi_devi_enter(phci_dip, &pcircular); 852 853 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 854 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 855 } 856 857 /* 858 * mdi_devi_get_vdip(): 859 * given a pHCI dip return vHCI dip 860 */ 861 dev_info_t * 862 mdi_devi_get_vdip(dev_info_t *pdip) 863 { 864 mdi_phci_t *ph; 865 866 ph = i_devi_get_phci(pdip); 867 if (ph && ph->ph_vhci) 868 return (ph->ph_vhci->vh_dip); 869 return (NULL); 870 } 871 872 /* 873 * mdi_devi_pdip_entered(): 874 * Return 1 if we are vHCI and have done an ndi_devi_enter 875 * of a pHCI 876 */ 877 int 878 mdi_devi_pdip_entered(dev_info_t *vdip) 879 { 880 mdi_vhci_t *vh; 881 mdi_phci_t *ph; 882 883 vh = i_devi_get_vhci(vdip); 884 if (vh == NULL) 885 return (0); 886 887 MDI_VHCI_PHCI_LOCK(vh); 888 ph = vh->vh_phci_head; 889 while (ph) { 890 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 891 MDI_VHCI_PHCI_UNLOCK(vh); 892 return (1); 893 } 894 ph = ph->ph_next; 895 } 896 MDI_VHCI_PHCI_UNLOCK(vh); 897 return (0); 898 } 899 900 /* 901 * mdi_phci_path2devinfo(): 902 * Utility function to search for a valid phci device given 903 * the devfs pathname. 904 */ 905 dev_info_t * 906 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 907 { 908 char *temp_pathname; 909 mdi_vhci_t *vh; 910 mdi_phci_t *ph; 911 dev_info_t *pdip = NULL; 912 913 vh = i_devi_get_vhci(vdip); 914 ASSERT(vh != NULL); 915 916 if (vh == NULL) { 917 /* 918 * Invalid vHCI component, return failure 919 */ 920 return (NULL); 921 } 922 923 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 924 MDI_VHCI_PHCI_LOCK(vh); 925 ph = vh->vh_phci_head; 926 while (ph != NULL) { 927 pdip = ph->ph_dip; 928 ASSERT(pdip != NULL); 929 *temp_pathname = '\0'; 930 (void) ddi_pathname(pdip, temp_pathname); 931 if (strcmp(temp_pathname, pathname) == 0) { 932 break; 933 } 934 ph = ph->ph_next; 935 } 936 if (ph == NULL) { 937 pdip = NULL; 938 } 939 MDI_VHCI_PHCI_UNLOCK(vh); 940 kmem_free(temp_pathname, MAXPATHLEN); 941 return (pdip); 942 } 943 944 /* 945 * mdi_phci_get_path_count(): 946 * get number of path information nodes associated with a given 947 * pHCI device. 948 */ 949 int 950 mdi_phci_get_path_count(dev_info_t *pdip) 951 { 952 mdi_phci_t *ph; 953 int count = 0; 954 955 ph = i_devi_get_phci(pdip); 956 if (ph != NULL) { 957 count = ph->ph_path_count; 958 } 959 return (count); 960 } 961 962 /* 963 * i_mdi_phci_lock(): 964 * Lock a pHCI device 965 * Return Values: 966 * None 967 * Note: 968 * The default locking order is: 969 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 970 * But there are number of situations where locks need to be 971 * grabbed in reverse order. This routine implements try and lock 972 * mechanism depending on the requested parameter option. 973 */ 974 static void 975 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 976 { 977 if (pip) { 978 /* Reverse locking is requested. */ 979 while (MDI_PHCI_TRYLOCK(ph) == 0) { 980 /* 981 * tryenter failed. Try to grab again 982 * after a small delay 983 */ 984 MDI_PI_HOLD(pip); 985 MDI_PI_UNLOCK(pip); 986 delay(1); 987 MDI_PI_LOCK(pip); 988 MDI_PI_RELE(pip); 989 } 990 } else { 991 MDI_PHCI_LOCK(ph); 992 } 993 } 994 995 /* 996 * i_mdi_phci_unlock(): 997 * Unlock the pHCI component 998 */ 999 static void 1000 i_mdi_phci_unlock(mdi_phci_t *ph) 1001 { 1002 MDI_PHCI_UNLOCK(ph); 1003 } 1004 1005 /* 1006 * i_mdi_devinfo_create(): 1007 * create client device's devinfo node 1008 * Return Values: 1009 * dev_info 1010 * NULL 1011 * Notes: 1012 */ 1013 static dev_info_t * 1014 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1015 char **compatible, int ncompatible) 1016 { 1017 dev_info_t *cdip = NULL; 1018 1019 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1020 1021 /* Verify for duplicate entry */ 1022 cdip = i_mdi_devinfo_find(vh, name, guid); 1023 ASSERT(cdip == NULL); 1024 if (cdip) { 1025 cmn_err(CE_WARN, 1026 "i_mdi_devinfo_create: client dip %p already exists", 1027 (void *)cdip); 1028 } 1029 1030 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1031 if (cdip == NULL) 1032 goto fail; 1033 1034 /* 1035 * Create component type and Global unique identifier 1036 * properties 1037 */ 1038 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1039 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1040 goto fail; 1041 } 1042 1043 /* Decorate the node with compatible property */ 1044 if (compatible && 1045 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1046 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1047 goto fail; 1048 } 1049 1050 return (cdip); 1051 1052 fail: 1053 if (cdip) { 1054 (void) ndi_prop_remove_all(cdip); 1055 (void) ndi_devi_free(cdip); 1056 } 1057 return (NULL); 1058 } 1059 1060 /* 1061 * i_mdi_devinfo_find(): 1062 * Find a matching devinfo node for given client node name 1063 * and its guid. 1064 * Return Values: 1065 * Handle to a dev_info node or NULL 1066 */ 1067 static dev_info_t * 1068 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1069 { 1070 char *data; 1071 dev_info_t *cdip = NULL; 1072 dev_info_t *ndip = NULL; 1073 int circular; 1074 1075 ndi_devi_enter(vh->vh_dip, &circular); 1076 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1077 while ((cdip = ndip) != NULL) { 1078 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1079 1080 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1081 continue; 1082 } 1083 1084 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1085 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1086 &data) != DDI_PROP_SUCCESS) { 1087 continue; 1088 } 1089 1090 if (strcmp(data, guid) != 0) { 1091 ddi_prop_free(data); 1092 continue; 1093 } 1094 ddi_prop_free(data); 1095 break; 1096 } 1097 ndi_devi_exit(vh->vh_dip, circular); 1098 return (cdip); 1099 } 1100 1101 /* 1102 * i_mdi_devinfo_remove(): 1103 * Remove a client device node 1104 */ 1105 static int 1106 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1107 { 1108 int rv = MDI_SUCCESS; 1109 1110 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1111 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1112 rv = ndi_devi_offline(cdip, NDI_DEVI_REMOVE); 1113 if (rv != NDI_SUCCESS) { 1114 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_devinfo_remove:" 1115 " failed. cdip = %p\n", (void *)cdip)); 1116 } 1117 /* 1118 * Convert to MDI error code 1119 */ 1120 switch (rv) { 1121 case NDI_SUCCESS: 1122 rv = MDI_SUCCESS; 1123 break; 1124 case NDI_BUSY: 1125 rv = MDI_BUSY; 1126 break; 1127 default: 1128 rv = MDI_FAILURE; 1129 break; 1130 } 1131 } 1132 return (rv); 1133 } 1134 1135 /* 1136 * i_devi_get_client() 1137 * Utility function to get mpxio component extensions 1138 */ 1139 static mdi_client_t * 1140 i_devi_get_client(dev_info_t *cdip) 1141 { 1142 mdi_client_t *ct = NULL; 1143 1144 if (MDI_CLIENT(cdip)) { 1145 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1146 } 1147 return (ct); 1148 } 1149 1150 /* 1151 * i_mdi_is_child_present(): 1152 * Search for the presence of client device dev_info node 1153 */ 1154 static int 1155 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1156 { 1157 int rv = MDI_FAILURE; 1158 struct dev_info *dip; 1159 int circular; 1160 1161 ndi_devi_enter(vdip, &circular); 1162 dip = DEVI(vdip)->devi_child; 1163 while (dip) { 1164 if (dip == DEVI(cdip)) { 1165 rv = MDI_SUCCESS; 1166 break; 1167 } 1168 dip = dip->devi_sibling; 1169 } 1170 ndi_devi_exit(vdip, circular); 1171 return (rv); 1172 } 1173 1174 1175 /* 1176 * i_mdi_client_lock(): 1177 * Grab client component lock 1178 * Return Values: 1179 * None 1180 * Note: 1181 * The default locking order is: 1182 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1183 * But there are number of situations where locks need to be 1184 * grabbed in reverse order. This routine implements try and lock 1185 * mechanism depending on the requested parameter option. 1186 */ 1187 static void 1188 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1189 { 1190 if (pip) { 1191 /* 1192 * Reverse locking is requested. 1193 */ 1194 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1195 /* 1196 * tryenter failed. Try to grab again 1197 * after a small delay 1198 */ 1199 MDI_PI_HOLD(pip); 1200 MDI_PI_UNLOCK(pip); 1201 delay(1); 1202 MDI_PI_LOCK(pip); 1203 MDI_PI_RELE(pip); 1204 } 1205 } else { 1206 MDI_CLIENT_LOCK(ct); 1207 } 1208 } 1209 1210 /* 1211 * i_mdi_client_unlock(): 1212 * Unlock a client component 1213 */ 1214 static void 1215 i_mdi_client_unlock(mdi_client_t *ct) 1216 { 1217 MDI_CLIENT_UNLOCK(ct); 1218 } 1219 1220 /* 1221 * i_mdi_client_alloc(): 1222 * Allocate and initialize a client structure. Caller should 1223 * hold the vhci client lock. 1224 * Return Values: 1225 * Handle to a client component 1226 */ 1227 /*ARGSUSED*/ 1228 static mdi_client_t * 1229 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1230 { 1231 mdi_client_t *ct; 1232 1233 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1234 1235 /* 1236 * Allocate and initialize a component structure. 1237 */ 1238 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1239 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1240 ct->ct_hnext = NULL; 1241 ct->ct_hprev = NULL; 1242 ct->ct_dip = NULL; 1243 ct->ct_vhci = vh; 1244 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1245 (void) strcpy(ct->ct_drvname, name); 1246 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1247 (void) strcpy(ct->ct_guid, lguid); 1248 ct->ct_cprivate = NULL; 1249 ct->ct_vprivate = NULL; 1250 ct->ct_flags = 0; 1251 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1252 MDI_CLIENT_LOCK(ct); 1253 MDI_CLIENT_SET_OFFLINE(ct); 1254 MDI_CLIENT_SET_DETACH(ct); 1255 MDI_CLIENT_SET_POWER_UP(ct); 1256 MDI_CLIENT_UNLOCK(ct); 1257 ct->ct_failover_flags = 0; 1258 ct->ct_failover_status = 0; 1259 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1260 ct->ct_unstable = 0; 1261 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1262 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1263 ct->ct_lb = vh->vh_lb; 1264 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1265 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1266 ct->ct_path_count = 0; 1267 ct->ct_path_head = NULL; 1268 ct->ct_path_tail = NULL; 1269 ct->ct_path_last = NULL; 1270 1271 /* 1272 * Add this client component to our client hash queue 1273 */ 1274 i_mdi_client_enlist_table(vh, ct); 1275 return (ct); 1276 } 1277 1278 /* 1279 * i_mdi_client_enlist_table(): 1280 * Attach the client device to the client hash table. Caller 1281 * should hold the vhci client lock. 1282 */ 1283 static void 1284 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1285 { 1286 int index; 1287 struct client_hash *head; 1288 1289 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1290 1291 index = i_mdi_get_hash_key(ct->ct_guid); 1292 head = &vh->vh_client_table[index]; 1293 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1294 head->ct_hash_head = ct; 1295 head->ct_hash_count++; 1296 vh->vh_client_count++; 1297 } 1298 1299 /* 1300 * i_mdi_client_delist_table(): 1301 * Attach the client device to the client hash table. 1302 * Caller should hold the vhci client lock. 1303 */ 1304 static void 1305 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1306 { 1307 int index; 1308 char *guid; 1309 struct client_hash *head; 1310 mdi_client_t *next; 1311 mdi_client_t *last; 1312 1313 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1314 1315 guid = ct->ct_guid; 1316 index = i_mdi_get_hash_key(guid); 1317 head = &vh->vh_client_table[index]; 1318 1319 last = NULL; 1320 next = (mdi_client_t *)head->ct_hash_head; 1321 while (next != NULL) { 1322 if (next == ct) { 1323 break; 1324 } 1325 last = next; 1326 next = next->ct_hnext; 1327 } 1328 1329 if (next) { 1330 head->ct_hash_count--; 1331 if (last == NULL) { 1332 head->ct_hash_head = ct->ct_hnext; 1333 } else { 1334 last->ct_hnext = ct->ct_hnext; 1335 } 1336 ct->ct_hnext = NULL; 1337 vh->vh_client_count--; 1338 } 1339 } 1340 1341 1342 /* 1343 * i_mdi_client_free(): 1344 * Free a client component 1345 */ 1346 static int 1347 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1348 { 1349 int rv = MDI_SUCCESS; 1350 int flags = ct->ct_flags; 1351 dev_info_t *cdip; 1352 dev_info_t *vdip; 1353 1354 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1355 1356 vdip = vh->vh_dip; 1357 cdip = ct->ct_dip; 1358 1359 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1360 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1361 DEVI(cdip)->devi_mdi_client = NULL; 1362 1363 /* 1364 * Clear out back ref. to dev_info_t node 1365 */ 1366 ct->ct_dip = NULL; 1367 1368 /* 1369 * Remove this client from our hash queue 1370 */ 1371 i_mdi_client_delist_table(vh, ct); 1372 1373 /* 1374 * Uninitialize and free the component 1375 */ 1376 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1377 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1378 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1379 cv_destroy(&ct->ct_failover_cv); 1380 cv_destroy(&ct->ct_unstable_cv); 1381 cv_destroy(&ct->ct_powerchange_cv); 1382 mutex_destroy(&ct->ct_mutex); 1383 kmem_free(ct, sizeof (*ct)); 1384 1385 if (cdip != NULL) { 1386 MDI_VHCI_CLIENT_UNLOCK(vh); 1387 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1388 MDI_VHCI_CLIENT_LOCK(vh); 1389 } 1390 return (rv); 1391 } 1392 1393 /* 1394 * i_mdi_client_find(): 1395 * Find the client structure corresponding to a given guid 1396 * Caller should hold the vhci client lock. 1397 */ 1398 static mdi_client_t * 1399 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1400 { 1401 int index; 1402 struct client_hash *head; 1403 mdi_client_t *ct; 1404 1405 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1406 1407 index = i_mdi_get_hash_key(guid); 1408 head = &vh->vh_client_table[index]; 1409 1410 ct = head->ct_hash_head; 1411 while (ct != NULL) { 1412 if (strcmp(ct->ct_guid, guid) == 0 && 1413 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1414 break; 1415 } 1416 ct = ct->ct_hnext; 1417 } 1418 return (ct); 1419 } 1420 1421 /* 1422 * i_mdi_client_update_state(): 1423 * Compute and update client device state 1424 * Notes: 1425 * A client device can be in any of three possible states: 1426 * 1427 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1428 * one online/standby paths. Can tolerate failures. 1429 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1430 * no alternate paths available as standby. A failure on the online 1431 * would result in loss of access to device data. 1432 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1433 * no paths available to access the device. 1434 */ 1435 static void 1436 i_mdi_client_update_state(mdi_client_t *ct) 1437 { 1438 int state; 1439 1440 ASSERT(MDI_CLIENT_LOCKED(ct)); 1441 state = i_mdi_client_compute_state(ct, NULL); 1442 MDI_CLIENT_SET_STATE(ct, state); 1443 } 1444 1445 /* 1446 * i_mdi_client_compute_state(): 1447 * Compute client device state 1448 * 1449 * mdi_phci_t * Pointer to pHCI structure which should 1450 * while computing the new value. Used by 1451 * i_mdi_phci_offline() to find the new 1452 * client state after DR of a pHCI. 1453 */ 1454 static int 1455 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1456 { 1457 int state; 1458 int online_count = 0; 1459 int standby_count = 0; 1460 mdi_pathinfo_t *pip, *next; 1461 1462 ASSERT(MDI_CLIENT_LOCKED(ct)); 1463 pip = ct->ct_path_head; 1464 while (pip != NULL) { 1465 MDI_PI_LOCK(pip); 1466 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1467 if (MDI_PI(pip)->pi_phci == ph) { 1468 MDI_PI_UNLOCK(pip); 1469 pip = next; 1470 continue; 1471 } 1472 1473 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1474 == MDI_PATHINFO_STATE_ONLINE) 1475 online_count++; 1476 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1477 == MDI_PATHINFO_STATE_STANDBY) 1478 standby_count++; 1479 MDI_PI_UNLOCK(pip); 1480 pip = next; 1481 } 1482 1483 if (online_count == 0) { 1484 if (standby_count == 0) { 1485 state = MDI_CLIENT_STATE_FAILED; 1486 MDI_DEBUG(2, (CE_NOTE, NULL, "!client state: failed" 1487 " ct = %p\n", (void *)ct)); 1488 } else if (standby_count == 1) { 1489 state = MDI_CLIENT_STATE_DEGRADED; 1490 } else { 1491 state = MDI_CLIENT_STATE_OPTIMAL; 1492 } 1493 } else if (online_count == 1) { 1494 if (standby_count == 0) { 1495 state = MDI_CLIENT_STATE_DEGRADED; 1496 } else { 1497 state = MDI_CLIENT_STATE_OPTIMAL; 1498 } 1499 } else { 1500 state = MDI_CLIENT_STATE_OPTIMAL; 1501 } 1502 return (state); 1503 } 1504 1505 /* 1506 * i_mdi_client2devinfo(): 1507 * Utility function 1508 */ 1509 dev_info_t * 1510 i_mdi_client2devinfo(mdi_client_t *ct) 1511 { 1512 return (ct->ct_dip); 1513 } 1514 1515 /* 1516 * mdi_client_path2_devinfo(): 1517 * Given the parent devinfo and child devfs pathname, search for 1518 * a valid devfs node handle. 1519 */ 1520 dev_info_t * 1521 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1522 { 1523 dev_info_t *cdip = NULL; 1524 dev_info_t *ndip = NULL; 1525 char *temp_pathname; 1526 int circular; 1527 1528 /* 1529 * Allocate temp buffer 1530 */ 1531 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1532 1533 /* 1534 * Lock parent against changes 1535 */ 1536 ndi_devi_enter(vdip, &circular); 1537 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1538 while ((cdip = ndip) != NULL) { 1539 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1540 1541 *temp_pathname = '\0'; 1542 (void) ddi_pathname(cdip, temp_pathname); 1543 if (strcmp(temp_pathname, pathname) == 0) { 1544 break; 1545 } 1546 } 1547 /* 1548 * Release devinfo lock 1549 */ 1550 ndi_devi_exit(vdip, circular); 1551 1552 /* 1553 * Free the temp buffer 1554 */ 1555 kmem_free(temp_pathname, MAXPATHLEN); 1556 return (cdip); 1557 } 1558 1559 /* 1560 * mdi_client_get_path_count(): 1561 * Utility function to get number of path information nodes 1562 * associated with a given client device. 1563 */ 1564 int 1565 mdi_client_get_path_count(dev_info_t *cdip) 1566 { 1567 mdi_client_t *ct; 1568 int count = 0; 1569 1570 ct = i_devi_get_client(cdip); 1571 if (ct != NULL) { 1572 count = ct->ct_path_count; 1573 } 1574 return (count); 1575 } 1576 1577 1578 /* 1579 * i_mdi_get_hash_key(): 1580 * Create a hash using strings as keys 1581 * 1582 */ 1583 static int 1584 i_mdi_get_hash_key(char *str) 1585 { 1586 uint32_t g, hash = 0; 1587 char *p; 1588 1589 for (p = str; *p != '\0'; p++) { 1590 g = *p; 1591 hash += g; 1592 } 1593 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1594 } 1595 1596 /* 1597 * mdi_get_lb_policy(): 1598 * Get current load balancing policy for a given client device 1599 */ 1600 client_lb_t 1601 mdi_get_lb_policy(dev_info_t *cdip) 1602 { 1603 client_lb_t lb = LOAD_BALANCE_NONE; 1604 mdi_client_t *ct; 1605 1606 ct = i_devi_get_client(cdip); 1607 if (ct != NULL) { 1608 lb = ct->ct_lb; 1609 } 1610 return (lb); 1611 } 1612 1613 /* 1614 * mdi_set_lb_region_size(): 1615 * Set current region size for the load-balance 1616 */ 1617 int 1618 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1619 { 1620 mdi_client_t *ct; 1621 int rv = MDI_FAILURE; 1622 1623 ct = i_devi_get_client(cdip); 1624 if (ct != NULL && ct->ct_lb_args != NULL) { 1625 ct->ct_lb_args->region_size = region_size; 1626 rv = MDI_SUCCESS; 1627 } 1628 return (rv); 1629 } 1630 1631 /* 1632 * mdi_Set_lb_policy(): 1633 * Set current load balancing policy for a given client device 1634 */ 1635 int 1636 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1637 { 1638 mdi_client_t *ct; 1639 int rv = MDI_FAILURE; 1640 1641 ct = i_devi_get_client(cdip); 1642 if (ct != NULL) { 1643 ct->ct_lb = lb; 1644 rv = MDI_SUCCESS; 1645 } 1646 return (rv); 1647 } 1648 1649 /* 1650 * mdi_failover(): 1651 * failover function called by the vHCI drivers to initiate 1652 * a failover operation. This is typically due to non-availability 1653 * of online paths to route I/O requests. Failover can be 1654 * triggered through user application also. 1655 * 1656 * The vHCI driver calls mdi_failover() to initiate a failover 1657 * operation. mdi_failover() calls back into the vHCI driver's 1658 * vo_failover() entry point to perform the actual failover 1659 * operation. The reason for requiring the vHCI driver to 1660 * initiate failover by calling mdi_failover(), instead of directly 1661 * executing vo_failover() itself, is to ensure that the mdi 1662 * framework can keep track of the client state properly. 1663 * Additionally, mdi_failover() provides as a convenience the 1664 * option of performing the failover operation synchronously or 1665 * asynchronously 1666 * 1667 * Upon successful completion of the failover operation, the 1668 * paths that were previously ONLINE will be in the STANDBY state, 1669 * and the newly activated paths will be in the ONLINE state. 1670 * 1671 * The flags modifier determines whether the activation is done 1672 * synchronously: MDI_FAILOVER_SYNC 1673 * Return Values: 1674 * MDI_SUCCESS 1675 * MDI_FAILURE 1676 * MDI_BUSY 1677 */ 1678 /*ARGSUSED*/ 1679 int 1680 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1681 { 1682 int rv; 1683 mdi_client_t *ct; 1684 1685 ct = i_devi_get_client(cdip); 1686 ASSERT(ct != NULL); 1687 if (ct == NULL) { 1688 /* cdip is not a valid client device. Nothing more to do. */ 1689 return (MDI_FAILURE); 1690 } 1691 1692 MDI_CLIENT_LOCK(ct); 1693 1694 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1695 /* A path to the client is being freed */ 1696 MDI_CLIENT_UNLOCK(ct); 1697 return (MDI_BUSY); 1698 } 1699 1700 1701 if (MDI_CLIENT_IS_FAILED(ct)) { 1702 /* 1703 * Client is in failed state. Nothing more to do. 1704 */ 1705 MDI_CLIENT_UNLOCK(ct); 1706 return (MDI_FAILURE); 1707 } 1708 1709 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1710 /* 1711 * Failover is already in progress; return BUSY 1712 */ 1713 MDI_CLIENT_UNLOCK(ct); 1714 return (MDI_BUSY); 1715 } 1716 /* 1717 * Make sure that mdi_pathinfo node state changes are processed. 1718 * We do not allow failovers to progress while client path state 1719 * changes are in progress 1720 */ 1721 if (ct->ct_unstable) { 1722 if (flags == MDI_FAILOVER_ASYNC) { 1723 MDI_CLIENT_UNLOCK(ct); 1724 return (MDI_BUSY); 1725 } else { 1726 while (ct->ct_unstable) 1727 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1728 } 1729 } 1730 1731 /* 1732 * Client device is in stable state. Before proceeding, perform sanity 1733 * checks again. 1734 */ 1735 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1736 (!i_ddi_devi_attached(ct->ct_dip))) { 1737 /* 1738 * Client is in failed state. Nothing more to do. 1739 */ 1740 MDI_CLIENT_UNLOCK(ct); 1741 return (MDI_FAILURE); 1742 } 1743 1744 /* 1745 * Set the client state as failover in progress. 1746 */ 1747 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1748 ct->ct_failover_flags = flags; 1749 MDI_CLIENT_UNLOCK(ct); 1750 1751 if (flags == MDI_FAILOVER_ASYNC) { 1752 /* 1753 * Submit the initiate failover request via CPR safe 1754 * taskq threads. 1755 */ 1756 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1757 ct, KM_SLEEP); 1758 return (MDI_ACCEPT); 1759 } else { 1760 /* 1761 * Synchronous failover mode. Typically invoked from the user 1762 * land. 1763 */ 1764 rv = i_mdi_failover(ct); 1765 } 1766 return (rv); 1767 } 1768 1769 /* 1770 * i_mdi_failover(): 1771 * internal failover function. Invokes vHCI drivers failover 1772 * callback function and process the failover status 1773 * Return Values: 1774 * None 1775 * 1776 * Note: A client device in failover state can not be detached or freed. 1777 */ 1778 static int 1779 i_mdi_failover(void *arg) 1780 { 1781 int rv = MDI_SUCCESS; 1782 mdi_client_t *ct = (mdi_client_t *)arg; 1783 mdi_vhci_t *vh = ct->ct_vhci; 1784 1785 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1786 1787 if (vh->vh_ops->vo_failover != NULL) { 1788 /* 1789 * Call vHCI drivers callback routine 1790 */ 1791 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1792 ct->ct_failover_flags); 1793 } 1794 1795 MDI_CLIENT_LOCK(ct); 1796 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1797 1798 /* 1799 * Save the failover return status 1800 */ 1801 ct->ct_failover_status = rv; 1802 1803 /* 1804 * As a result of failover, client status would have been changed. 1805 * Update the client state and wake up anyone waiting on this client 1806 * device. 1807 */ 1808 i_mdi_client_update_state(ct); 1809 1810 cv_broadcast(&ct->ct_failover_cv); 1811 MDI_CLIENT_UNLOCK(ct); 1812 return (rv); 1813 } 1814 1815 /* 1816 * Load balancing is logical block. 1817 * IOs within the range described by region_size 1818 * would go on the same path. This would improve the 1819 * performance by cache-hit on some of the RAID devices. 1820 * Search only for online paths(At some point we 1821 * may want to balance across target ports). 1822 * If no paths are found then default to round-robin. 1823 */ 1824 static int 1825 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1826 { 1827 int path_index = -1; 1828 int online_path_count = 0; 1829 int online_nonpref_path_count = 0; 1830 int region_size = ct->ct_lb_args->region_size; 1831 mdi_pathinfo_t *pip; 1832 mdi_pathinfo_t *next; 1833 int preferred, path_cnt; 1834 1835 pip = ct->ct_path_head; 1836 while (pip) { 1837 MDI_PI_LOCK(pip); 1838 if (MDI_PI(pip)->pi_state == 1839 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1840 online_path_count++; 1841 } else if (MDI_PI(pip)->pi_state == 1842 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1843 online_nonpref_path_count++; 1844 } 1845 next = (mdi_pathinfo_t *) 1846 MDI_PI(pip)->pi_client_link; 1847 MDI_PI_UNLOCK(pip); 1848 pip = next; 1849 } 1850 /* if found any online/preferred then use this type */ 1851 if (online_path_count > 0) { 1852 path_cnt = online_path_count; 1853 preferred = 1; 1854 } else if (online_nonpref_path_count > 0) { 1855 path_cnt = online_nonpref_path_count; 1856 preferred = 0; 1857 } else { 1858 path_cnt = 0; 1859 } 1860 if (path_cnt) { 1861 path_index = (bp->b_blkno >> region_size) % path_cnt; 1862 pip = ct->ct_path_head; 1863 while (pip && path_index != -1) { 1864 MDI_PI_LOCK(pip); 1865 if (path_index == 0 && 1866 (MDI_PI(pip)->pi_state == 1867 MDI_PATHINFO_STATE_ONLINE) && 1868 MDI_PI(pip)->pi_preferred == preferred) { 1869 MDI_PI_HOLD(pip); 1870 MDI_PI_UNLOCK(pip); 1871 *ret_pip = pip; 1872 return (MDI_SUCCESS); 1873 } 1874 path_index --; 1875 next = (mdi_pathinfo_t *) 1876 MDI_PI(pip)->pi_client_link; 1877 MDI_PI_UNLOCK(pip); 1878 pip = next; 1879 } 1880 if (pip == NULL) { 1881 MDI_DEBUG(4, (CE_NOTE, NULL, 1882 "!lba %llx, no pip !!\n", 1883 bp->b_lblkno)); 1884 } else { 1885 MDI_DEBUG(4, (CE_NOTE, NULL, 1886 "!lba %llx, no pip for path_index, " 1887 "pip %p\n", bp->b_lblkno, (void *)pip)); 1888 } 1889 } 1890 return (MDI_FAILURE); 1891 } 1892 1893 /* 1894 * mdi_select_path(): 1895 * select a path to access a client device. 1896 * 1897 * mdi_select_path() function is called by the vHCI drivers to 1898 * select a path to route the I/O request to. The caller passes 1899 * the block I/O data transfer structure ("buf") as one of the 1900 * parameters. The mpxio framework uses the buf structure 1901 * contents to maintain per path statistics (total I/O size / 1902 * count pending). If more than one online paths are available to 1903 * select, the framework automatically selects a suitable path 1904 * for routing I/O request. If a failover operation is active for 1905 * this client device the call shall be failed with MDI_BUSY error 1906 * code. 1907 * 1908 * By default this function returns a suitable path in online 1909 * state based on the current load balancing policy. Currently 1910 * we support LOAD_BALANCE_NONE (Previously selected online path 1911 * will continue to be used till the path is usable) and 1912 * LOAD_BALANCE_RR (Online paths will be selected in a round 1913 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1914 * based on the logical block). The load balancing 1915 * through vHCI drivers configuration file (driver.conf). 1916 * 1917 * vHCI drivers may override this default behavior by specifying 1918 * appropriate flags. If start_pip is specified (non NULL) is 1919 * used as start point to walk and find the next appropriate path. 1920 * The following values are currently defined: 1921 * MDI_SELECT_ONLINE_PATH (to select an ONLINE path) and/or 1922 * MDI_SELECT_STANDBY_PATH (to select an STANDBY path). 1923 * 1924 * The non-standard behavior is used by the scsi_vhci driver, 1925 * whenever it has to use a STANDBY/FAULTED path. Eg. during 1926 * attach of client devices (to avoid an unnecessary failover 1927 * when the STANDBY path comes up first), during failover 1928 * (to activate a STANDBY path as ONLINE). 1929 * 1930 * The selected path is returned in a a mdi_hold_path() state 1931 * (pi_ref_cnt). Caller should release the hold by calling 1932 * mdi_rele_path(). 1933 * 1934 * Return Values: 1935 * MDI_SUCCESS - Completed successfully 1936 * MDI_BUSY - Client device is busy failing over 1937 * MDI_NOPATH - Client device is online, but no valid path are 1938 * available to access this client device 1939 * MDI_FAILURE - Invalid client device or state 1940 * MDI_DEVI_ONLINING 1941 * - Client device (struct dev_info state) is in 1942 * onlining state. 1943 */ 1944 1945 /*ARGSUSED*/ 1946 int 1947 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 1948 mdi_pathinfo_t *start_pip, mdi_pathinfo_t **ret_pip) 1949 { 1950 mdi_client_t *ct; 1951 mdi_pathinfo_t *pip; 1952 mdi_pathinfo_t *next; 1953 mdi_pathinfo_t *head; 1954 mdi_pathinfo_t *start; 1955 client_lb_t lbp; /* load balancing policy */ 1956 int sb = 1; /* standard behavior */ 1957 int preferred = 1; /* preferred path */ 1958 int cond, cont = 1; 1959 int retry = 0; 1960 1961 if (flags != 0) { 1962 /* 1963 * disable default behavior 1964 */ 1965 sb = 0; 1966 } 1967 1968 *ret_pip = NULL; 1969 ct = i_devi_get_client(cdip); 1970 if (ct == NULL) { 1971 /* mdi extensions are NULL, Nothing more to do */ 1972 return (MDI_FAILURE); 1973 } 1974 1975 MDI_CLIENT_LOCK(ct); 1976 1977 if (sb) { 1978 if (MDI_CLIENT_IS_FAILED(ct)) { 1979 /* 1980 * Client is not ready to accept any I/O requests. 1981 * Fail this request. 1982 */ 1983 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1984 "client state offline ct = %p\n", (void *)ct)); 1985 MDI_CLIENT_UNLOCK(ct); 1986 return (MDI_FAILURE); 1987 } 1988 1989 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1990 /* 1991 * Check for Failover is in progress. If so tell the 1992 * caller that this device is busy. 1993 */ 1994 MDI_DEBUG(2, (CE_NOTE, cdip, "!mdi_select_path: " 1995 "client failover in progress ct = %p\n", 1996 (void *)ct)); 1997 MDI_CLIENT_UNLOCK(ct); 1998 return (MDI_BUSY); 1999 } 2000 2001 /* 2002 * Check to see whether the client device is attached. 2003 * If not so, let the vHCI driver manually select a path 2004 * (standby) and let the probe/attach process to continue. 2005 */ 2006 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2007 MDI_DEBUG(4, (CE_NOTE, cdip, "!Devi is onlining " 2008 "ct = %p\n", (void *)ct)); 2009 MDI_CLIENT_UNLOCK(ct); 2010 return (MDI_DEVI_ONLINING); 2011 } 2012 } 2013 2014 /* 2015 * Cache in the client list head. If head of the list is NULL 2016 * return MDI_NOPATH 2017 */ 2018 head = ct->ct_path_head; 2019 if (head == NULL) { 2020 MDI_CLIENT_UNLOCK(ct); 2021 return (MDI_NOPATH); 2022 } 2023 2024 /* 2025 * for non default behavior, bypass current 2026 * load balancing policy and always use LOAD_BALANCE_RR 2027 * except that the start point will be adjusted based 2028 * on the provided start_pip 2029 */ 2030 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2031 2032 switch (lbp) { 2033 case LOAD_BALANCE_NONE: 2034 /* 2035 * Load balancing is None or Alternate path mode 2036 * Start looking for a online mdi_pathinfo node starting from 2037 * last known selected path 2038 */ 2039 preferred = 1; 2040 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2041 if (pip == NULL) { 2042 pip = head; 2043 } 2044 start = pip; 2045 do { 2046 MDI_PI_LOCK(pip); 2047 /* 2048 * No need to explicitly check if the path is disabled. 2049 * Since we are checking for state == ONLINE and the 2050 * same veriable is used for DISABLE/ENABLE information. 2051 */ 2052 if ((MDI_PI(pip)->pi_state == 2053 MDI_PATHINFO_STATE_ONLINE) && 2054 preferred == MDI_PI(pip)->pi_preferred) { 2055 /* 2056 * Return the path in hold state. Caller should 2057 * release the lock by calling mdi_rele_path() 2058 */ 2059 MDI_PI_HOLD(pip); 2060 MDI_PI_UNLOCK(pip); 2061 ct->ct_path_last = pip; 2062 *ret_pip = pip; 2063 MDI_CLIENT_UNLOCK(ct); 2064 return (MDI_SUCCESS); 2065 } 2066 2067 /* 2068 * Path is busy. 2069 */ 2070 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2071 MDI_PI_IS_TRANSIENT(pip)) 2072 retry = 1; 2073 /* 2074 * Keep looking for a next available online path 2075 */ 2076 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2077 if (next == NULL) { 2078 next = head; 2079 } 2080 MDI_PI_UNLOCK(pip); 2081 pip = next; 2082 if (start == pip && preferred) { 2083 preferred = 0; 2084 } else if (start == pip && !preferred) { 2085 cont = 0; 2086 } 2087 } while (cont); 2088 break; 2089 2090 case LOAD_BALANCE_LBA: 2091 /* 2092 * Make sure we are looking 2093 * for an online path. Otherwise, if it is for a STANDBY 2094 * path request, it will go through and fetch an ONLINE 2095 * path which is not desirable. 2096 */ 2097 if ((ct->ct_lb_args != NULL) && 2098 (ct->ct_lb_args->region_size) && bp && 2099 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2100 if (i_mdi_lba_lb(ct, ret_pip, bp) 2101 == MDI_SUCCESS) { 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_SUCCESS); 2104 } 2105 } 2106 /* FALLTHROUGH */ 2107 case LOAD_BALANCE_RR: 2108 /* 2109 * Load balancing is Round Robin. Start looking for a online 2110 * mdi_pathinfo node starting from last known selected path 2111 * as the start point. If override flags are specified, 2112 * process accordingly. 2113 * If the search is already in effect(start_pip not null), 2114 * then lets just use the same path preference to continue the 2115 * traversal. 2116 */ 2117 2118 if (start_pip != NULL) { 2119 preferred = MDI_PI(start_pip)->pi_preferred; 2120 } else { 2121 preferred = 1; 2122 } 2123 2124 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2125 if (start == NULL) { 2126 pip = head; 2127 } else { 2128 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2129 if (pip == NULL) { 2130 if (!sb) { 2131 if (preferred == 0) { 2132 /* 2133 * Looks like we have completed 2134 * the traversal as preferred 2135 * value is 0. Time to bail out. 2136 */ 2137 *ret_pip = NULL; 2138 MDI_CLIENT_UNLOCK(ct); 2139 return (MDI_NOPATH); 2140 } else { 2141 /* 2142 * Looks like we reached the 2143 * end of the list. Lets enable 2144 * traversal of non preferred 2145 * paths. 2146 */ 2147 preferred = 0; 2148 } 2149 } 2150 pip = head; 2151 } 2152 } 2153 start = pip; 2154 do { 2155 MDI_PI_LOCK(pip); 2156 if (sb) { 2157 cond = ((MDI_PI(pip)->pi_state == 2158 MDI_PATHINFO_STATE_ONLINE && 2159 MDI_PI(pip)->pi_preferred == 2160 preferred) ? 1 : 0); 2161 } else { 2162 if (flags == MDI_SELECT_ONLINE_PATH) { 2163 cond = ((MDI_PI(pip)->pi_state == 2164 MDI_PATHINFO_STATE_ONLINE && 2165 MDI_PI(pip)->pi_preferred == 2166 preferred) ? 1 : 0); 2167 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2168 cond = ((MDI_PI(pip)->pi_state == 2169 MDI_PATHINFO_STATE_STANDBY && 2170 MDI_PI(pip)->pi_preferred == 2171 preferred) ? 1 : 0); 2172 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2173 MDI_SELECT_STANDBY_PATH)) { 2174 cond = (((MDI_PI(pip)->pi_state == 2175 MDI_PATHINFO_STATE_ONLINE || 2176 (MDI_PI(pip)->pi_state == 2177 MDI_PATHINFO_STATE_STANDBY)) && 2178 MDI_PI(pip)->pi_preferred == 2179 preferred) ? 1 : 0); 2180 } else if (flags == 2181 (MDI_SELECT_STANDBY_PATH | 2182 MDI_SELECT_ONLINE_PATH | 2183 MDI_SELECT_USER_DISABLE_PATH)) { 2184 cond = (((MDI_PI(pip)->pi_state == 2185 MDI_PATHINFO_STATE_ONLINE || 2186 (MDI_PI(pip)->pi_state == 2187 MDI_PATHINFO_STATE_STANDBY) || 2188 (MDI_PI(pip)->pi_state == 2189 (MDI_PATHINFO_STATE_ONLINE| 2190 MDI_PATHINFO_STATE_USER_DISABLE)) || 2191 (MDI_PI(pip)->pi_state == 2192 (MDI_PATHINFO_STATE_STANDBY | 2193 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2194 MDI_PI(pip)->pi_preferred == 2195 preferred) ? 1 : 0); 2196 } else { 2197 cond = 0; 2198 } 2199 } 2200 /* 2201 * No need to explicitly check if the path is disabled. 2202 * Since we are checking for state == ONLINE and the 2203 * same veriable is used for DISABLE/ENABLE information. 2204 */ 2205 if (cond) { 2206 /* 2207 * Return the path in hold state. Caller should 2208 * release the lock by calling mdi_rele_path() 2209 */ 2210 MDI_PI_HOLD(pip); 2211 MDI_PI_UNLOCK(pip); 2212 if (sb) 2213 ct->ct_path_last = pip; 2214 *ret_pip = pip; 2215 MDI_CLIENT_UNLOCK(ct); 2216 return (MDI_SUCCESS); 2217 } 2218 /* 2219 * Path is busy. 2220 */ 2221 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2222 MDI_PI_IS_TRANSIENT(pip)) 2223 retry = 1; 2224 2225 /* 2226 * Keep looking for a next available online path 2227 */ 2228 do_again: 2229 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2230 if (next == NULL) { 2231 if (!sb) { 2232 if (preferred == 1) { 2233 /* 2234 * Looks like we reached the 2235 * end of the list. Lets enable 2236 * traversal of non preferred 2237 * paths. 2238 */ 2239 preferred = 0; 2240 next = head; 2241 } else { 2242 /* 2243 * We have done both the passes 2244 * Preferred as well as for 2245 * Non-preferred. Bail out now. 2246 */ 2247 cont = 0; 2248 } 2249 } else { 2250 /* 2251 * Standard behavior case. 2252 */ 2253 next = head; 2254 } 2255 } 2256 MDI_PI_UNLOCK(pip); 2257 if (cont == 0) { 2258 break; 2259 } 2260 pip = next; 2261 2262 if (!sb) { 2263 /* 2264 * We need to handle the selection of 2265 * non-preferred path in the following 2266 * case: 2267 * 2268 * +------+ +------+ +------+ +-----+ 2269 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2270 * +------+ +------+ +------+ +-----+ 2271 * 2272 * If we start the search with B, we need to 2273 * skip beyond B to pick C which is non - 2274 * preferred in the second pass. The following 2275 * test, if true, will allow us to skip over 2276 * the 'start'(B in the example) to select 2277 * other non preferred elements. 2278 */ 2279 if ((start_pip != NULL) && (start_pip == pip) && 2280 (MDI_PI(start_pip)->pi_preferred 2281 != preferred)) { 2282 /* 2283 * try again after going past the start 2284 * pip 2285 */ 2286 MDI_PI_LOCK(pip); 2287 goto do_again; 2288 } 2289 } else { 2290 /* 2291 * Standard behavior case 2292 */ 2293 if (start == pip && preferred) { 2294 /* look for nonpreferred paths */ 2295 preferred = 0; 2296 } else if (start == pip && !preferred) { 2297 /* 2298 * Exit condition 2299 */ 2300 cont = 0; 2301 } 2302 } 2303 } while (cont); 2304 break; 2305 } 2306 2307 MDI_CLIENT_UNLOCK(ct); 2308 if (retry == 1) { 2309 return (MDI_BUSY); 2310 } else { 2311 return (MDI_NOPATH); 2312 } 2313 } 2314 2315 /* 2316 * For a client, return the next available path to any phci 2317 * 2318 * Note: 2319 * Caller should hold the branch's devinfo node to get a consistent 2320 * snap shot of the mdi_pathinfo nodes. 2321 * 2322 * Please note that even the list is stable the mdi_pathinfo 2323 * node state and properties are volatile. The caller should lock 2324 * and unlock the nodes by calling mdi_pi_lock() and 2325 * mdi_pi_unlock() functions to get a stable properties. 2326 * 2327 * If there is a need to use the nodes beyond the hold of the 2328 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2329 * need to be held against unexpected removal by calling 2330 * mdi_hold_path() and should be released by calling 2331 * mdi_rele_path() on completion. 2332 */ 2333 mdi_pathinfo_t * 2334 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2335 { 2336 mdi_client_t *ct; 2337 2338 if (!MDI_CLIENT(ct_dip)) 2339 return (NULL); 2340 2341 /* 2342 * Walk through client link 2343 */ 2344 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2345 ASSERT(ct != NULL); 2346 2347 if (pip == NULL) 2348 return ((mdi_pathinfo_t *)ct->ct_path_head); 2349 2350 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2351 } 2352 2353 /* 2354 * For a phci, return the next available path to any client 2355 * Note: ditto mdi_get_next_phci_path() 2356 */ 2357 mdi_pathinfo_t * 2358 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2359 { 2360 mdi_phci_t *ph; 2361 2362 if (!MDI_PHCI(ph_dip)) 2363 return (NULL); 2364 2365 /* 2366 * Walk through pHCI link 2367 */ 2368 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2369 ASSERT(ph != NULL); 2370 2371 if (pip == NULL) 2372 return ((mdi_pathinfo_t *)ph->ph_path_head); 2373 2374 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2375 } 2376 2377 /* 2378 * mdi_hold_path(): 2379 * Hold the mdi_pathinfo node against unwanted unexpected free. 2380 * Return Values: 2381 * None 2382 */ 2383 void 2384 mdi_hold_path(mdi_pathinfo_t *pip) 2385 { 2386 if (pip) { 2387 MDI_PI_LOCK(pip); 2388 MDI_PI_HOLD(pip); 2389 MDI_PI_UNLOCK(pip); 2390 } 2391 } 2392 2393 2394 /* 2395 * mdi_rele_path(): 2396 * Release the mdi_pathinfo node which was selected 2397 * through mdi_select_path() mechanism or manually held by 2398 * calling mdi_hold_path(). 2399 * Return Values: 2400 * None 2401 */ 2402 void 2403 mdi_rele_path(mdi_pathinfo_t *pip) 2404 { 2405 if (pip) { 2406 MDI_PI_LOCK(pip); 2407 MDI_PI_RELE(pip); 2408 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2409 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2410 } 2411 MDI_PI_UNLOCK(pip); 2412 } 2413 } 2414 2415 /* 2416 * mdi_pi_lock(): 2417 * Lock the mdi_pathinfo node. 2418 * Note: 2419 * The caller should release the lock by calling mdi_pi_unlock() 2420 */ 2421 void 2422 mdi_pi_lock(mdi_pathinfo_t *pip) 2423 { 2424 ASSERT(pip != NULL); 2425 if (pip) { 2426 MDI_PI_LOCK(pip); 2427 } 2428 } 2429 2430 2431 /* 2432 * mdi_pi_unlock(): 2433 * Unlock the mdi_pathinfo node. 2434 * Note: 2435 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2436 */ 2437 void 2438 mdi_pi_unlock(mdi_pathinfo_t *pip) 2439 { 2440 ASSERT(pip != NULL); 2441 if (pip) { 2442 MDI_PI_UNLOCK(pip); 2443 } 2444 } 2445 2446 /* 2447 * mdi_pi_find(): 2448 * Search the list of mdi_pathinfo nodes attached to the 2449 * pHCI/Client device node whose path address matches "paddr". 2450 * Returns a pointer to the mdi_pathinfo node if a matching node is 2451 * found. 2452 * Return Values: 2453 * mdi_pathinfo node handle 2454 * NULL 2455 * Notes: 2456 * Caller need not hold any locks to call this function. 2457 */ 2458 mdi_pathinfo_t * 2459 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2460 { 2461 mdi_phci_t *ph; 2462 mdi_vhci_t *vh; 2463 mdi_client_t *ct; 2464 mdi_pathinfo_t *pip = NULL; 2465 2466 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: %s %s", 2467 caddr ? caddr : "NULL", paddr ? paddr : "NULL")); 2468 if ((pdip == NULL) || (paddr == NULL)) { 2469 return (NULL); 2470 } 2471 ph = i_devi_get_phci(pdip); 2472 if (ph == NULL) { 2473 /* 2474 * Invalid pHCI device, Nothing more to do. 2475 */ 2476 MDI_DEBUG(2, (CE_WARN, pdip, 2477 "!mdi_pi_find: invalid phci")); 2478 return (NULL); 2479 } 2480 2481 vh = ph->ph_vhci; 2482 if (vh == NULL) { 2483 /* 2484 * Invalid vHCI device, Nothing more to do. 2485 */ 2486 MDI_DEBUG(2, (CE_WARN, pdip, 2487 "!mdi_pi_find: invalid vhci")); 2488 return (NULL); 2489 } 2490 2491 /* 2492 * Look for pathinfo node identified by paddr. 2493 */ 2494 if (caddr == NULL) { 2495 /* 2496 * Find a mdi_pathinfo node under pHCI list for a matching 2497 * unit address. 2498 */ 2499 MDI_PHCI_LOCK(ph); 2500 if (MDI_PHCI_IS_OFFLINE(ph)) { 2501 MDI_DEBUG(2, (CE_WARN, pdip, 2502 "!mdi_pi_find: offline phci %p", (void *)ph)); 2503 MDI_PHCI_UNLOCK(ph); 2504 return (NULL); 2505 } 2506 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2507 2508 while (pip != NULL) { 2509 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2510 break; 2511 } 2512 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2513 } 2514 MDI_PHCI_UNLOCK(ph); 2515 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found %p", 2516 (void *)pip)); 2517 return (pip); 2518 } 2519 2520 /* 2521 * XXX - Is the rest of the code in this function really necessary? 2522 * The consumers of mdi_pi_find() can search for the desired pathinfo 2523 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2524 * whether the search is based on the pathinfo nodes attached to 2525 * the pHCI or the client node, the result will be the same. 2526 */ 2527 2528 /* 2529 * Find the client device corresponding to 'caddr' 2530 */ 2531 MDI_VHCI_CLIENT_LOCK(vh); 2532 2533 /* 2534 * XXX - Passing NULL to the following function works as long as the 2535 * the client addresses (caddr) are unique per vhci basis. 2536 */ 2537 ct = i_mdi_client_find(vh, NULL, caddr); 2538 if (ct == NULL) { 2539 /* 2540 * Client not found, Obviously mdi_pathinfo node has not been 2541 * created yet. 2542 */ 2543 MDI_VHCI_CLIENT_UNLOCK(vh); 2544 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: client not " 2545 "found for caddr %s", caddr ? caddr : "NULL")); 2546 return (NULL); 2547 } 2548 2549 /* 2550 * Hold the client lock and look for a mdi_pathinfo node with matching 2551 * pHCI and paddr 2552 */ 2553 MDI_CLIENT_LOCK(ct); 2554 2555 /* 2556 * Release the global mutex as it is no more needed. Note: We always 2557 * respect the locking order while acquiring. 2558 */ 2559 MDI_VHCI_CLIENT_UNLOCK(vh); 2560 2561 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2562 while (pip != NULL) { 2563 /* 2564 * Compare the unit address 2565 */ 2566 if ((MDI_PI(pip)->pi_phci == ph) && 2567 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2568 break; 2569 } 2570 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2571 } 2572 MDI_CLIENT_UNLOCK(ct); 2573 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_find: found:: %p", (void *)pip)); 2574 return (pip); 2575 } 2576 2577 /* 2578 * mdi_pi_alloc(): 2579 * Allocate and initialize a new instance of a mdi_pathinfo node. 2580 * The mdi_pathinfo node returned by this function identifies a 2581 * unique device path is capable of having properties attached 2582 * and passed to mdi_pi_online() to fully attach and online the 2583 * path and client device node. 2584 * The mdi_pathinfo node returned by this function must be 2585 * destroyed using mdi_pi_free() if the path is no longer 2586 * operational or if the caller fails to attach a client device 2587 * node when calling mdi_pi_online(). The framework will not free 2588 * the resources allocated. 2589 * This function can be called from both interrupt and kernel 2590 * contexts. DDI_NOSLEEP flag should be used while calling 2591 * from interrupt contexts. 2592 * Return Values: 2593 * MDI_SUCCESS 2594 * MDI_FAILURE 2595 * MDI_NOMEM 2596 */ 2597 /*ARGSUSED*/ 2598 int 2599 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2600 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2601 { 2602 mdi_vhci_t *vh; 2603 mdi_phci_t *ph; 2604 mdi_client_t *ct; 2605 mdi_pathinfo_t *pip = NULL; 2606 dev_info_t *cdip; 2607 int rv = MDI_NOMEM; 2608 int path_allocated = 0; 2609 2610 MDI_DEBUG(2, (CE_NOTE, pdip, "!mdi_pi_alloc_compatible: %s %s %s", 2611 cname ? cname : "NULL", caddr ? caddr : "NULL", 2612 paddr ? paddr : "NULL")); 2613 2614 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2615 ret_pip == NULL) { 2616 /* Nothing more to do */ 2617 return (MDI_FAILURE); 2618 } 2619 2620 *ret_pip = NULL; 2621 2622 /* No allocations on detaching pHCI */ 2623 if (DEVI_IS_DETACHING(pdip)) { 2624 /* Invalid pHCI device, return failure */ 2625 MDI_DEBUG(1, (CE_WARN, pdip, 2626 "!mdi_pi_alloc: detaching pHCI=%p", (void *)pdip)); 2627 return (MDI_FAILURE); 2628 } 2629 2630 ph = i_devi_get_phci(pdip); 2631 ASSERT(ph != NULL); 2632 if (ph == NULL) { 2633 /* Invalid pHCI device, return failure */ 2634 MDI_DEBUG(1, (CE_WARN, pdip, 2635 "!mdi_pi_alloc: invalid pHCI=%p", (void *)pdip)); 2636 return (MDI_FAILURE); 2637 } 2638 2639 MDI_PHCI_LOCK(ph); 2640 vh = ph->ph_vhci; 2641 if (vh == NULL) { 2642 /* Invalid vHCI device, return failure */ 2643 MDI_DEBUG(1, (CE_WARN, pdip, 2644 "!mdi_pi_alloc: invalid vHCI=%p", (void *)pdip)); 2645 MDI_PHCI_UNLOCK(ph); 2646 return (MDI_FAILURE); 2647 } 2648 2649 if (MDI_PHCI_IS_READY(ph) == 0) { 2650 /* 2651 * Do not allow new node creation when pHCI is in 2652 * offline/suspended states 2653 */ 2654 MDI_DEBUG(1, (CE_WARN, pdip, 2655 "mdi_pi_alloc: pHCI=%p is not ready", (void *)ph)); 2656 MDI_PHCI_UNLOCK(ph); 2657 return (MDI_BUSY); 2658 } 2659 MDI_PHCI_UNSTABLE(ph); 2660 MDI_PHCI_UNLOCK(ph); 2661 2662 /* look for a matching client, create one if not found */ 2663 MDI_VHCI_CLIENT_LOCK(vh); 2664 ct = i_mdi_client_find(vh, cname, caddr); 2665 if (ct == NULL) { 2666 ct = i_mdi_client_alloc(vh, cname, caddr); 2667 ASSERT(ct != NULL); 2668 } 2669 2670 if (ct->ct_dip == NULL) { 2671 /* 2672 * Allocate a devinfo node 2673 */ 2674 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2675 compatible, ncompatible); 2676 if (ct->ct_dip == NULL) { 2677 (void) i_mdi_client_free(vh, ct); 2678 goto fail; 2679 } 2680 } 2681 cdip = ct->ct_dip; 2682 2683 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2684 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2685 2686 MDI_CLIENT_LOCK(ct); 2687 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2688 while (pip != NULL) { 2689 /* 2690 * Compare the unit address 2691 */ 2692 if ((MDI_PI(pip)->pi_phci == ph) && 2693 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2694 break; 2695 } 2696 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2697 } 2698 MDI_CLIENT_UNLOCK(ct); 2699 2700 if (pip == NULL) { 2701 /* 2702 * This is a new path for this client device. Allocate and 2703 * initialize a new pathinfo node 2704 */ 2705 pip = i_mdi_pi_alloc(ph, paddr, ct); 2706 ASSERT(pip != NULL); 2707 path_allocated = 1; 2708 } 2709 rv = MDI_SUCCESS; 2710 2711 fail: 2712 /* 2713 * Release the global mutex. 2714 */ 2715 MDI_VHCI_CLIENT_UNLOCK(vh); 2716 2717 /* 2718 * Mark the pHCI as stable 2719 */ 2720 MDI_PHCI_LOCK(ph); 2721 MDI_PHCI_STABLE(ph); 2722 MDI_PHCI_UNLOCK(ph); 2723 *ret_pip = pip; 2724 2725 MDI_DEBUG(2, (CE_NOTE, pdip, 2726 "!mdi_pi_alloc_compatible: alloc %p", (void *)pip)); 2727 2728 if (path_allocated) 2729 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2730 2731 return (rv); 2732 } 2733 2734 /*ARGSUSED*/ 2735 int 2736 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2737 int flags, mdi_pathinfo_t **ret_pip) 2738 { 2739 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2740 flags, ret_pip)); 2741 } 2742 2743 /* 2744 * i_mdi_pi_alloc(): 2745 * Allocate a mdi_pathinfo node and add to the pHCI path list 2746 * Return Values: 2747 * mdi_pathinfo 2748 */ 2749 /*ARGSUSED*/ 2750 static mdi_pathinfo_t * 2751 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2752 { 2753 mdi_pathinfo_t *pip; 2754 int ct_circular; 2755 int ph_circular; 2756 int se_flag; 2757 int kmem_flag; 2758 2759 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2760 2761 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2762 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2763 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2764 MDI_PATHINFO_STATE_TRANSIENT; 2765 2766 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2767 MDI_PI_SET_USER_DISABLE(pip); 2768 2769 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2770 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2771 2772 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2773 MDI_PI_SET_DRV_DISABLE(pip); 2774 2775 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2776 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2777 MDI_PI(pip)->pi_client = ct; 2778 MDI_PI(pip)->pi_phci = ph; 2779 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2780 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2781 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2782 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2783 MDI_PI(pip)->pi_pprivate = NULL; 2784 MDI_PI(pip)->pi_cprivate = NULL; 2785 MDI_PI(pip)->pi_vprivate = NULL; 2786 MDI_PI(pip)->pi_client_link = NULL; 2787 MDI_PI(pip)->pi_phci_link = NULL; 2788 MDI_PI(pip)->pi_ref_cnt = 0; 2789 MDI_PI(pip)->pi_kstats = NULL; 2790 MDI_PI(pip)->pi_preferred = 1; 2791 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 2792 2793 /* 2794 * Lock both dev_info nodes against changes in parallel. 2795 * 2796 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 2797 * This atypical operation is done to synchronize pathinfo nodes 2798 * during devinfo snapshot (see di_register_pip) by 'pretending' that 2799 * the pathinfo nodes are children of the Client. 2800 */ 2801 ndi_devi_enter(ct->ct_dip, &ct_circular); 2802 ndi_devi_enter(ph->ph_dip, &ph_circular); 2803 2804 i_mdi_phci_add_path(ph, pip); 2805 i_mdi_client_add_path(ct, pip); 2806 2807 ndi_devi_exit(ph->ph_dip, ph_circular); 2808 ndi_devi_exit(ct->ct_dip, ct_circular); 2809 2810 /* determine interrupt context */ 2811 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 2812 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 2813 2814 i_ddi_di_cache_invalidate(kmem_flag); 2815 2816 return (pip); 2817 } 2818 2819 /* 2820 * i_mdi_phci_add_path(): 2821 * Add a mdi_pathinfo node to pHCI list. 2822 * Notes: 2823 * Caller should per-pHCI mutex 2824 */ 2825 static void 2826 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 2827 { 2828 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 2829 2830 MDI_PHCI_LOCK(ph); 2831 if (ph->ph_path_head == NULL) { 2832 ph->ph_path_head = pip; 2833 } else { 2834 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 2835 } 2836 ph->ph_path_tail = pip; 2837 ph->ph_path_count++; 2838 MDI_PHCI_UNLOCK(ph); 2839 } 2840 2841 /* 2842 * i_mdi_client_add_path(): 2843 * Add mdi_pathinfo node to client list 2844 */ 2845 static void 2846 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 2847 { 2848 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 2849 2850 MDI_CLIENT_LOCK(ct); 2851 if (ct->ct_path_head == NULL) { 2852 ct->ct_path_head = pip; 2853 } else { 2854 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 2855 } 2856 ct->ct_path_tail = pip; 2857 ct->ct_path_count++; 2858 MDI_CLIENT_UNLOCK(ct); 2859 } 2860 2861 /* 2862 * mdi_pi_free(): 2863 * Free the mdi_pathinfo node and also client device node if this 2864 * is the last path to the device 2865 * Return Values: 2866 * MDI_SUCCESS 2867 * MDI_FAILURE 2868 * MDI_BUSY 2869 */ 2870 /*ARGSUSED*/ 2871 int 2872 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 2873 { 2874 int rv = MDI_SUCCESS; 2875 mdi_vhci_t *vh; 2876 mdi_phci_t *ph; 2877 mdi_client_t *ct; 2878 int (*f)(); 2879 int client_held = 0; 2880 2881 MDI_PI_LOCK(pip); 2882 ph = MDI_PI(pip)->pi_phci; 2883 ASSERT(ph != NULL); 2884 if (ph == NULL) { 2885 /* 2886 * Invalid pHCI device, return failure 2887 */ 2888 MDI_DEBUG(1, (CE_WARN, NULL, 2889 "!mdi_pi_free: invalid pHCI pip=%p", (void *)pip)); 2890 MDI_PI_UNLOCK(pip); 2891 return (MDI_FAILURE); 2892 } 2893 2894 vh = ph->ph_vhci; 2895 ASSERT(vh != NULL); 2896 if (vh == NULL) { 2897 /* Invalid pHCI device, return failure */ 2898 MDI_DEBUG(1, (CE_WARN, NULL, 2899 "!mdi_pi_free: invalid vHCI pip=%p", (void *)pip)); 2900 MDI_PI_UNLOCK(pip); 2901 return (MDI_FAILURE); 2902 } 2903 2904 ct = MDI_PI(pip)->pi_client; 2905 ASSERT(ct != NULL); 2906 if (ct == NULL) { 2907 /* 2908 * Invalid Client device, return failure 2909 */ 2910 MDI_DEBUG(1, (CE_WARN, NULL, 2911 "!mdi_pi_free: invalid client pip=%p", (void *)pip)); 2912 MDI_PI_UNLOCK(pip); 2913 return (MDI_FAILURE); 2914 } 2915 2916 /* 2917 * Check to see for busy condition. A mdi_pathinfo can only be freed 2918 * if the node state is either offline or init and the reference count 2919 * is zero. 2920 */ 2921 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 2922 MDI_PI_IS_INITING(pip))) { 2923 /* 2924 * Node is busy 2925 */ 2926 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 2927 "!mdi_pi_free: pathinfo node is busy pip=%p", (void *)pip)); 2928 MDI_PI_UNLOCK(pip); 2929 return (MDI_BUSY); 2930 } 2931 2932 while (MDI_PI(pip)->pi_ref_cnt != 0) { 2933 /* 2934 * Give a chance for pending I/Os to complete. 2935 */ 2936 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!mdi_pi_free: " 2937 "%d cmds still pending on path: %p\n", 2938 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2939 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 2940 &MDI_PI(pip)->pi_mutex, 2941 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 2942 /* 2943 * The timeout time reached without ref_cnt being zero 2944 * being signaled. 2945 */ 2946 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2947 "!mdi_pi_free: " 2948 "Timeout reached on path %p without the cond\n", 2949 (void *)pip)); 2950 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, 2951 "!mdi_pi_free: " 2952 "%d cmds still pending on path: %p\n", 2953 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 2954 MDI_PI_UNLOCK(pip); 2955 return (MDI_BUSY); 2956 } 2957 } 2958 if (MDI_PI(pip)->pi_pm_held) { 2959 client_held = 1; 2960 } 2961 MDI_PI_UNLOCK(pip); 2962 2963 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 2964 2965 MDI_CLIENT_LOCK(ct); 2966 2967 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 2968 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 2969 2970 /* 2971 * Wait till failover is complete before removing this node. 2972 */ 2973 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 2974 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 2975 2976 MDI_CLIENT_UNLOCK(ct); 2977 MDI_VHCI_CLIENT_LOCK(vh); 2978 MDI_CLIENT_LOCK(ct); 2979 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 2980 2981 if (!MDI_PI_IS_INITING(pip)) { 2982 f = vh->vh_ops->vo_pi_uninit; 2983 if (f != NULL) { 2984 rv = (*f)(vh->vh_dip, pip, 0); 2985 } 2986 } 2987 /* 2988 * If vo_pi_uninit() completed successfully. 2989 */ 2990 if (rv == MDI_SUCCESS) { 2991 if (client_held) { 2992 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_free " 2993 "i_mdi_pm_rele_client\n")); 2994 i_mdi_pm_rele_client(ct, 1); 2995 } 2996 i_mdi_pi_free(ph, pip, ct); 2997 if (ct->ct_path_count == 0) { 2998 /* 2999 * Client lost its last path. 3000 * Clean up the client device 3001 */ 3002 MDI_CLIENT_UNLOCK(ct); 3003 (void) i_mdi_client_free(ct->ct_vhci, ct); 3004 MDI_VHCI_CLIENT_UNLOCK(vh); 3005 return (rv); 3006 } 3007 } 3008 MDI_CLIENT_UNLOCK(ct); 3009 MDI_VHCI_CLIENT_UNLOCK(vh); 3010 3011 if (rv == MDI_FAILURE) 3012 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3013 3014 return (rv); 3015 } 3016 3017 /* 3018 * i_mdi_pi_free(): 3019 * Free the mdi_pathinfo node 3020 */ 3021 static void 3022 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3023 { 3024 int ct_circular; 3025 int ph_circular; 3026 int se_flag; 3027 int kmem_flag; 3028 3029 ASSERT(MDI_CLIENT_LOCKED(ct)); 3030 3031 /* 3032 * remove any per-path kstats 3033 */ 3034 i_mdi_pi_kstat_destroy(pip); 3035 3036 /* See comments in i_mdi_pi_alloc() */ 3037 ndi_devi_enter(ct->ct_dip, &ct_circular); 3038 ndi_devi_enter(ph->ph_dip, &ph_circular); 3039 3040 i_mdi_client_remove_path(ct, pip); 3041 i_mdi_phci_remove_path(ph, pip); 3042 3043 ndi_devi_exit(ph->ph_dip, ph_circular); 3044 ndi_devi_exit(ct->ct_dip, ct_circular); 3045 3046 /* determine interrupt context */ 3047 se_flag = (servicing_interrupt()) ? SE_NOSLEEP : SE_SLEEP; 3048 kmem_flag = (se_flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 3049 3050 i_ddi_di_cache_invalidate(kmem_flag); 3051 3052 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3053 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3054 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3055 if (MDI_PI(pip)->pi_addr) { 3056 kmem_free(MDI_PI(pip)->pi_addr, 3057 strlen(MDI_PI(pip)->pi_addr) + 1); 3058 MDI_PI(pip)->pi_addr = NULL; 3059 } 3060 3061 if (MDI_PI(pip)->pi_prop) { 3062 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3063 MDI_PI(pip)->pi_prop = NULL; 3064 } 3065 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3066 } 3067 3068 3069 /* 3070 * i_mdi_phci_remove_path(): 3071 * Remove a mdi_pathinfo node from pHCI list. 3072 * Notes: 3073 * Caller should hold per-pHCI mutex 3074 */ 3075 static void 3076 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3077 { 3078 mdi_pathinfo_t *prev = NULL; 3079 mdi_pathinfo_t *path = NULL; 3080 3081 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3082 3083 MDI_PHCI_LOCK(ph); 3084 path = ph->ph_path_head; 3085 while (path != NULL) { 3086 if (path == pip) { 3087 break; 3088 } 3089 prev = path; 3090 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3091 } 3092 3093 if (path) { 3094 ph->ph_path_count--; 3095 if (prev) { 3096 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3097 } else { 3098 ph->ph_path_head = 3099 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3100 } 3101 if (ph->ph_path_tail == path) { 3102 ph->ph_path_tail = prev; 3103 } 3104 } 3105 3106 /* 3107 * Clear the pHCI link 3108 */ 3109 MDI_PI(pip)->pi_phci_link = NULL; 3110 MDI_PI(pip)->pi_phci = NULL; 3111 MDI_PHCI_UNLOCK(ph); 3112 } 3113 3114 /* 3115 * i_mdi_client_remove_path(): 3116 * Remove a mdi_pathinfo node from client path list. 3117 */ 3118 static void 3119 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3120 { 3121 mdi_pathinfo_t *prev = NULL; 3122 mdi_pathinfo_t *path; 3123 3124 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3125 3126 ASSERT(MDI_CLIENT_LOCKED(ct)); 3127 path = ct->ct_path_head; 3128 while (path != NULL) { 3129 if (path == pip) { 3130 break; 3131 } 3132 prev = path; 3133 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3134 } 3135 3136 if (path) { 3137 ct->ct_path_count--; 3138 if (prev) { 3139 MDI_PI(prev)->pi_client_link = 3140 MDI_PI(path)->pi_client_link; 3141 } else { 3142 ct->ct_path_head = 3143 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3144 } 3145 if (ct->ct_path_tail == path) { 3146 ct->ct_path_tail = prev; 3147 } 3148 if (ct->ct_path_last == path) { 3149 ct->ct_path_last = ct->ct_path_head; 3150 } 3151 } 3152 MDI_PI(pip)->pi_client_link = NULL; 3153 MDI_PI(pip)->pi_client = NULL; 3154 } 3155 3156 /* 3157 * i_mdi_pi_state_change(): 3158 * online a mdi_pathinfo node 3159 * 3160 * Return Values: 3161 * MDI_SUCCESS 3162 * MDI_FAILURE 3163 */ 3164 /*ARGSUSED*/ 3165 static int 3166 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3167 { 3168 int rv = MDI_SUCCESS; 3169 mdi_vhci_t *vh; 3170 mdi_phci_t *ph; 3171 mdi_client_t *ct; 3172 int (*f)(); 3173 dev_info_t *cdip; 3174 3175 MDI_PI_LOCK(pip); 3176 3177 ph = MDI_PI(pip)->pi_phci; 3178 ASSERT(ph); 3179 if (ph == NULL) { 3180 /* 3181 * Invalid pHCI device, fail the request 3182 */ 3183 MDI_PI_UNLOCK(pip); 3184 MDI_DEBUG(1, (CE_WARN, NULL, 3185 "!mdi_pi_state_change: invalid phci pip=%p", (void *)pip)); 3186 return (MDI_FAILURE); 3187 } 3188 3189 vh = ph->ph_vhci; 3190 ASSERT(vh); 3191 if (vh == NULL) { 3192 /* 3193 * Invalid vHCI device, fail the request 3194 */ 3195 MDI_PI_UNLOCK(pip); 3196 MDI_DEBUG(1, (CE_WARN, NULL, 3197 "!mdi_pi_state_change: invalid vhci pip=%p", (void *)pip)); 3198 return (MDI_FAILURE); 3199 } 3200 3201 ct = MDI_PI(pip)->pi_client; 3202 ASSERT(ct != NULL); 3203 if (ct == NULL) { 3204 /* 3205 * Invalid client device, fail the request 3206 */ 3207 MDI_PI_UNLOCK(pip); 3208 MDI_DEBUG(1, (CE_WARN, NULL, 3209 "!mdi_pi_state_change: invalid client pip=%p", 3210 (void *)pip)); 3211 return (MDI_FAILURE); 3212 } 3213 3214 /* 3215 * If this path has not been initialized yet, Callback vHCI driver's 3216 * pathinfo node initialize entry point 3217 */ 3218 3219 if (MDI_PI_IS_INITING(pip)) { 3220 MDI_PI_UNLOCK(pip); 3221 f = vh->vh_ops->vo_pi_init; 3222 if (f != NULL) { 3223 rv = (*f)(vh->vh_dip, pip, 0); 3224 if (rv != MDI_SUCCESS) { 3225 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3226 "!vo_pi_init: failed vHCI=0x%p, pip=0x%p", 3227 (void *)vh, (void *)pip)); 3228 return (MDI_FAILURE); 3229 } 3230 } 3231 MDI_PI_LOCK(pip); 3232 MDI_PI_CLEAR_TRANSIENT(pip); 3233 } 3234 3235 /* 3236 * Do not allow state transition when pHCI is in offline/suspended 3237 * states 3238 */ 3239 i_mdi_phci_lock(ph, pip); 3240 if (MDI_PHCI_IS_READY(ph) == 0) { 3241 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3242 "!mdi_pi_state_change: pHCI not ready, pHCI=%p", 3243 (void *)ph)); 3244 MDI_PI_UNLOCK(pip); 3245 i_mdi_phci_unlock(ph); 3246 return (MDI_BUSY); 3247 } 3248 MDI_PHCI_UNSTABLE(ph); 3249 i_mdi_phci_unlock(ph); 3250 3251 /* 3252 * Check if mdi_pathinfo state is in transient state. 3253 * If yes, offlining is in progress and wait till transient state is 3254 * cleared. 3255 */ 3256 if (MDI_PI_IS_TRANSIENT(pip)) { 3257 while (MDI_PI_IS_TRANSIENT(pip)) { 3258 cv_wait(&MDI_PI(pip)->pi_state_cv, 3259 &MDI_PI(pip)->pi_mutex); 3260 } 3261 } 3262 3263 /* 3264 * Grab the client lock in reverse order sequence and release the 3265 * mdi_pathinfo mutex. 3266 */ 3267 i_mdi_client_lock(ct, pip); 3268 MDI_PI_UNLOCK(pip); 3269 3270 /* 3271 * Wait till failover state is cleared 3272 */ 3273 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3274 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3275 3276 /* 3277 * Mark the mdi_pathinfo node state as transient 3278 */ 3279 MDI_PI_LOCK(pip); 3280 switch (state) { 3281 case MDI_PATHINFO_STATE_ONLINE: 3282 MDI_PI_SET_ONLINING(pip); 3283 break; 3284 3285 case MDI_PATHINFO_STATE_STANDBY: 3286 MDI_PI_SET_STANDBYING(pip); 3287 break; 3288 3289 case MDI_PATHINFO_STATE_FAULT: 3290 /* 3291 * Mark the pathinfo state as FAULTED 3292 */ 3293 MDI_PI_SET_FAULTING(pip); 3294 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3295 break; 3296 3297 case MDI_PATHINFO_STATE_OFFLINE: 3298 /* 3299 * ndi_devi_offline() cannot hold pip or ct locks. 3300 */ 3301 MDI_PI_UNLOCK(pip); 3302 /* 3303 * Do not offline if path will become last path and path 3304 * is busy for user initiated events. 3305 */ 3306 cdip = ct->ct_dip; 3307 if ((flag & NDI_DEVI_REMOVE) && 3308 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3309 i_mdi_client_unlock(ct); 3310 rv = ndi_devi_offline(cdip, 0); 3311 if (rv != NDI_SUCCESS) { 3312 /* 3313 * Convert to MDI error code 3314 */ 3315 switch (rv) { 3316 case NDI_BUSY: 3317 rv = MDI_BUSY; 3318 break; 3319 default: 3320 rv = MDI_FAILURE; 3321 break; 3322 } 3323 goto state_change_exit; 3324 } else { 3325 i_mdi_client_lock(ct, NULL); 3326 } 3327 } 3328 /* 3329 * Mark the mdi_pathinfo node state as transient 3330 */ 3331 MDI_PI_LOCK(pip); 3332 MDI_PI_SET_OFFLINING(pip); 3333 break; 3334 } 3335 MDI_PI_UNLOCK(pip); 3336 MDI_CLIENT_UNSTABLE(ct); 3337 i_mdi_client_unlock(ct); 3338 3339 f = vh->vh_ops->vo_pi_state_change; 3340 if (f != NULL) 3341 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3342 3343 MDI_CLIENT_LOCK(ct); 3344 MDI_PI_LOCK(pip); 3345 if (rv == MDI_NOT_SUPPORTED) { 3346 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3347 } 3348 if (rv != MDI_SUCCESS) { 3349 MDI_DEBUG(2, (CE_WARN, ct->ct_dip, 3350 "!vo_pi_state_change: failed rv = %x", rv)); 3351 } 3352 if (MDI_PI_IS_TRANSIENT(pip)) { 3353 if (rv == MDI_SUCCESS) { 3354 MDI_PI_CLEAR_TRANSIENT(pip); 3355 } else { 3356 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3357 } 3358 } 3359 3360 /* 3361 * Wake anyone waiting for this mdi_pathinfo node 3362 */ 3363 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3364 MDI_PI_UNLOCK(pip); 3365 3366 /* 3367 * Mark the client device as stable 3368 */ 3369 MDI_CLIENT_STABLE(ct); 3370 if (rv == MDI_SUCCESS) { 3371 if (ct->ct_unstable == 0) { 3372 cdip = ct->ct_dip; 3373 3374 /* 3375 * Onlining the mdi_pathinfo node will impact the 3376 * client state Update the client and dev_info node 3377 * state accordingly 3378 */ 3379 rv = NDI_SUCCESS; 3380 i_mdi_client_update_state(ct); 3381 switch (MDI_CLIENT_STATE(ct)) { 3382 case MDI_CLIENT_STATE_OPTIMAL: 3383 case MDI_CLIENT_STATE_DEGRADED: 3384 if (cdip && !i_ddi_devi_attached(cdip) && 3385 ((state == MDI_PATHINFO_STATE_ONLINE) || 3386 (state == MDI_PATHINFO_STATE_STANDBY))) { 3387 3388 /* 3389 * Must do ndi_devi_online() through 3390 * hotplug thread for deferred 3391 * attach mechanism to work 3392 */ 3393 MDI_CLIENT_UNLOCK(ct); 3394 rv = ndi_devi_online(cdip, 0); 3395 MDI_CLIENT_LOCK(ct); 3396 if ((rv != NDI_SUCCESS) && 3397 (MDI_CLIENT_STATE(ct) == 3398 MDI_CLIENT_STATE_DEGRADED)) { 3399 /* 3400 * ndi_devi_online failed. 3401 * Reset client flags to 3402 * offline. 3403 */ 3404 MDI_DEBUG(1, (CE_WARN, cdip, 3405 "!ndi_devi_online: failed " 3406 " Error: %x", rv)); 3407 MDI_CLIENT_SET_OFFLINE(ct); 3408 } 3409 if (rv != NDI_SUCCESS) { 3410 /* Reset the path state */ 3411 MDI_PI_LOCK(pip); 3412 MDI_PI(pip)->pi_state = 3413 MDI_PI_OLD_STATE(pip); 3414 MDI_PI_UNLOCK(pip); 3415 } 3416 } 3417 break; 3418 3419 case MDI_CLIENT_STATE_FAILED: 3420 /* 3421 * This is the last path case for 3422 * non-user initiated events. 3423 */ 3424 if (((flag & NDI_DEVI_REMOVE) == 0) && 3425 cdip && (i_ddi_node_state(cdip) >= 3426 DS_INITIALIZED)) { 3427 MDI_CLIENT_UNLOCK(ct); 3428 rv = ndi_devi_offline(cdip, 0); 3429 MDI_CLIENT_LOCK(ct); 3430 3431 if (rv != NDI_SUCCESS) { 3432 /* 3433 * ndi_devi_offline failed. 3434 * Reset client flags to 3435 * online as the path could not 3436 * be offlined. 3437 */ 3438 MDI_DEBUG(1, (CE_WARN, cdip, 3439 "!ndi_devi_offline: failed " 3440 " Error: %x", rv)); 3441 MDI_CLIENT_SET_ONLINE(ct); 3442 } 3443 } 3444 break; 3445 } 3446 /* 3447 * Convert to MDI error code 3448 */ 3449 switch (rv) { 3450 case NDI_SUCCESS: 3451 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3452 i_mdi_report_path_state(ct, pip); 3453 rv = MDI_SUCCESS; 3454 break; 3455 case NDI_BUSY: 3456 rv = MDI_BUSY; 3457 break; 3458 default: 3459 rv = MDI_FAILURE; 3460 break; 3461 } 3462 } 3463 } 3464 MDI_CLIENT_UNLOCK(ct); 3465 3466 state_change_exit: 3467 /* 3468 * Mark the pHCI as stable again. 3469 */ 3470 MDI_PHCI_LOCK(ph); 3471 MDI_PHCI_STABLE(ph); 3472 MDI_PHCI_UNLOCK(ph); 3473 return (rv); 3474 } 3475 3476 /* 3477 * mdi_pi_online(): 3478 * Place the path_info node in the online state. The path is 3479 * now available to be selected by mdi_select_path() for 3480 * transporting I/O requests to client devices. 3481 * Return Values: 3482 * MDI_SUCCESS 3483 * MDI_FAILURE 3484 */ 3485 int 3486 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3487 { 3488 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3489 dev_info_t *cdip; 3490 int client_held = 0; 3491 int rv; 3492 3493 ASSERT(ct != NULL); 3494 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3495 if (rv != MDI_SUCCESS) 3496 return (rv); 3497 3498 MDI_PI_LOCK(pip); 3499 if (MDI_PI(pip)->pi_pm_held == 0) { 3500 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3501 "i_mdi_pm_hold_pip %p\n", (void *)pip)); 3502 i_mdi_pm_hold_pip(pip); 3503 client_held = 1; 3504 } 3505 MDI_PI_UNLOCK(pip); 3506 3507 if (client_held) { 3508 MDI_CLIENT_LOCK(ct); 3509 if (ct->ct_power_cnt == 0) { 3510 rv = i_mdi_power_all_phci(ct); 3511 } 3512 3513 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "mdi_pi_online " 3514 "i_mdi_pm_hold_client %p\n", (void *)ct)); 3515 i_mdi_pm_hold_client(ct, 1); 3516 MDI_CLIENT_UNLOCK(ct); 3517 } 3518 3519 /* 3520 * Create the per-path (pathinfo) IO and error kstats which 3521 * are reported via iostat(1m). 3522 * 3523 * Defer creating the per-path kstats if device is not yet 3524 * attached; the names of the kstats are constructed in part 3525 * using the devices instance number which is assigned during 3526 * process of attaching the client device. 3527 * 3528 * The framework post_attach handler, mdi_post_attach(), is 3529 * is responsible for initializing the client's pathinfo list 3530 * once successfully attached. 3531 */ 3532 cdip = ct->ct_dip; 3533 ASSERT(cdip); 3534 if (cdip == NULL || !i_ddi_devi_attached(cdip)) 3535 return (rv); 3536 3537 MDI_CLIENT_LOCK(ct); 3538 rv = i_mdi_pi_kstat_create(pip); 3539 MDI_CLIENT_UNLOCK(ct); 3540 return (rv); 3541 } 3542 3543 /* 3544 * mdi_pi_standby(): 3545 * Place the mdi_pathinfo node in standby state 3546 * 3547 * Return Values: 3548 * MDI_SUCCESS 3549 * MDI_FAILURE 3550 */ 3551 int 3552 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3553 { 3554 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3555 } 3556 3557 /* 3558 * mdi_pi_fault(): 3559 * Place the mdi_pathinfo node in fault'ed state 3560 * Return Values: 3561 * MDI_SUCCESS 3562 * MDI_FAILURE 3563 */ 3564 int 3565 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3566 { 3567 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3568 } 3569 3570 /* 3571 * mdi_pi_offline(): 3572 * Offline a mdi_pathinfo node. 3573 * Return Values: 3574 * MDI_SUCCESS 3575 * MDI_FAILURE 3576 */ 3577 int 3578 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3579 { 3580 int ret, client_held = 0; 3581 mdi_client_t *ct; 3582 3583 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3584 3585 if (ret == MDI_SUCCESS) { 3586 MDI_PI_LOCK(pip); 3587 if (MDI_PI(pip)->pi_pm_held) { 3588 client_held = 1; 3589 } 3590 MDI_PI_UNLOCK(pip); 3591 3592 if (client_held) { 3593 ct = MDI_PI(pip)->pi_client; 3594 MDI_CLIENT_LOCK(ct); 3595 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, 3596 "mdi_pi_offline i_mdi_pm_rele_client\n")); 3597 i_mdi_pm_rele_client(ct, 1); 3598 MDI_CLIENT_UNLOCK(ct); 3599 } 3600 } 3601 3602 return (ret); 3603 } 3604 3605 /* 3606 * i_mdi_pi_offline(): 3607 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3608 */ 3609 static int 3610 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3611 { 3612 dev_info_t *vdip = NULL; 3613 mdi_vhci_t *vh = NULL; 3614 mdi_client_t *ct = NULL; 3615 int (*f)(); 3616 int rv; 3617 3618 MDI_PI_LOCK(pip); 3619 ct = MDI_PI(pip)->pi_client; 3620 ASSERT(ct != NULL); 3621 3622 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3623 /* 3624 * Give a chance for pending I/Os to complete. 3625 */ 3626 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3627 "%d cmds still pending on path: %p\n", 3628 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3629 if (cv_timedwait(&MDI_PI(pip)->pi_ref_cv, 3630 &MDI_PI(pip)->pi_mutex, 3631 ddi_get_lbolt() + drv_usectohz(60 * 1000000)) == -1) { 3632 /* 3633 * The timeout time reached without ref_cnt being zero 3634 * being signaled. 3635 */ 3636 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3637 "Timeout reached on path %p without the cond\n", 3638 (void *)pip)); 3639 MDI_DEBUG(1, (CE_NOTE, ct->ct_dip, "!i_mdi_pi_offline: " 3640 "%d cmds still pending on path: %p\n", 3641 MDI_PI(pip)->pi_ref_cnt, (void *)pip)); 3642 } 3643 } 3644 vh = ct->ct_vhci; 3645 vdip = vh->vh_dip; 3646 3647 /* 3648 * Notify vHCI that has registered this event 3649 */ 3650 ASSERT(vh->vh_ops); 3651 f = vh->vh_ops->vo_pi_state_change; 3652 3653 if (f != NULL) { 3654 MDI_PI_UNLOCK(pip); 3655 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3656 flags)) != MDI_SUCCESS) { 3657 MDI_DEBUG(1, (CE_WARN, ct->ct_dip, 3658 "!vo_path_offline failed " 3659 "vdip %p, pip %p", (void *)vdip, (void *)pip)); 3660 } 3661 MDI_PI_LOCK(pip); 3662 } 3663 3664 /* 3665 * Set the mdi_pathinfo node state and clear the transient condition 3666 */ 3667 MDI_PI_SET_OFFLINE(pip); 3668 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3669 MDI_PI_UNLOCK(pip); 3670 3671 MDI_CLIENT_LOCK(ct); 3672 if (rv == MDI_SUCCESS) { 3673 if (ct->ct_unstable == 0) { 3674 dev_info_t *cdip = ct->ct_dip; 3675 3676 /* 3677 * Onlining the mdi_pathinfo node will impact the 3678 * client state Update the client and dev_info node 3679 * state accordingly 3680 */ 3681 i_mdi_client_update_state(ct); 3682 rv = NDI_SUCCESS; 3683 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3684 if (cdip && 3685 (i_ddi_node_state(cdip) >= 3686 DS_INITIALIZED)) { 3687 MDI_CLIENT_UNLOCK(ct); 3688 rv = ndi_devi_offline(cdip, 0); 3689 MDI_CLIENT_LOCK(ct); 3690 if (rv != NDI_SUCCESS) { 3691 /* 3692 * ndi_devi_offline failed. 3693 * Reset client flags to 3694 * online. 3695 */ 3696 MDI_DEBUG(4, (CE_WARN, cdip, 3697 "!ndi_devi_offline: failed " 3698 " Error: %x", rv)); 3699 MDI_CLIENT_SET_ONLINE(ct); 3700 } 3701 } 3702 } 3703 /* 3704 * Convert to MDI error code 3705 */ 3706 switch (rv) { 3707 case NDI_SUCCESS: 3708 rv = MDI_SUCCESS; 3709 break; 3710 case NDI_BUSY: 3711 rv = MDI_BUSY; 3712 break; 3713 default: 3714 rv = MDI_FAILURE; 3715 break; 3716 } 3717 } 3718 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3719 i_mdi_report_path_state(ct, pip); 3720 } 3721 3722 MDI_CLIENT_UNLOCK(ct); 3723 3724 /* 3725 * Change in the mdi_pathinfo node state will impact the client state 3726 */ 3727 MDI_DEBUG(2, (CE_NOTE, NULL, "!i_mdi_pi_offline ct = %p pip = %p", 3728 (void *)ct, (void *)pip)); 3729 return (rv); 3730 } 3731 3732 3733 /* 3734 * mdi_pi_get_addr(): 3735 * Get the unit address associated with a mdi_pathinfo node 3736 * 3737 * Return Values: 3738 * char * 3739 */ 3740 char * 3741 mdi_pi_get_addr(mdi_pathinfo_t *pip) 3742 { 3743 if (pip == NULL) 3744 return (NULL); 3745 3746 return (MDI_PI(pip)->pi_addr); 3747 } 3748 3749 /* 3750 * mdi_pi_get_client(): 3751 * Get the client devinfo associated with a mdi_pathinfo node 3752 * 3753 * Return Values: 3754 * Handle to client device dev_info node 3755 */ 3756 dev_info_t * 3757 mdi_pi_get_client(mdi_pathinfo_t *pip) 3758 { 3759 dev_info_t *dip = NULL; 3760 if (pip) { 3761 dip = MDI_PI(pip)->pi_client->ct_dip; 3762 } 3763 return (dip); 3764 } 3765 3766 /* 3767 * mdi_pi_get_phci(): 3768 * Get the pHCI devinfo associated with the mdi_pathinfo node 3769 * Return Values: 3770 * Handle to dev_info node 3771 */ 3772 dev_info_t * 3773 mdi_pi_get_phci(mdi_pathinfo_t *pip) 3774 { 3775 dev_info_t *dip = NULL; 3776 if (pip) { 3777 dip = MDI_PI(pip)->pi_phci->ph_dip; 3778 } 3779 return (dip); 3780 } 3781 3782 /* 3783 * mdi_pi_get_client_private(): 3784 * Get the client private information associated with the 3785 * mdi_pathinfo node 3786 */ 3787 void * 3788 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 3789 { 3790 void *cprivate = NULL; 3791 if (pip) { 3792 cprivate = MDI_PI(pip)->pi_cprivate; 3793 } 3794 return (cprivate); 3795 } 3796 3797 /* 3798 * mdi_pi_set_client_private(): 3799 * Set the client private information in the mdi_pathinfo node 3800 */ 3801 void 3802 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 3803 { 3804 if (pip) { 3805 MDI_PI(pip)->pi_cprivate = priv; 3806 } 3807 } 3808 3809 /* 3810 * mdi_pi_get_phci_private(): 3811 * Get the pHCI private information associated with the 3812 * mdi_pathinfo node 3813 */ 3814 caddr_t 3815 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 3816 { 3817 caddr_t pprivate = NULL; 3818 if (pip) { 3819 pprivate = MDI_PI(pip)->pi_pprivate; 3820 } 3821 return (pprivate); 3822 } 3823 3824 /* 3825 * mdi_pi_set_phci_private(): 3826 * Set the pHCI private information in the mdi_pathinfo node 3827 */ 3828 void 3829 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 3830 { 3831 if (pip) { 3832 MDI_PI(pip)->pi_pprivate = priv; 3833 } 3834 } 3835 3836 /* 3837 * mdi_pi_get_state(): 3838 * Get the mdi_pathinfo node state. Transient states are internal 3839 * and not provided to the users 3840 */ 3841 mdi_pathinfo_state_t 3842 mdi_pi_get_state(mdi_pathinfo_t *pip) 3843 { 3844 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 3845 3846 if (pip) { 3847 if (MDI_PI_IS_TRANSIENT(pip)) { 3848 /* 3849 * mdi_pathinfo is in state transition. Return the 3850 * last good state. 3851 */ 3852 state = MDI_PI_OLD_STATE(pip); 3853 } else { 3854 state = MDI_PI_STATE(pip); 3855 } 3856 } 3857 return (state); 3858 } 3859 3860 /* 3861 * Note that the following function needs to be the new interface for 3862 * mdi_pi_get_state when mpxio gets integrated to ON. 3863 */ 3864 int 3865 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 3866 uint32_t *ext_state) 3867 { 3868 *state = MDI_PATHINFO_STATE_INIT; 3869 3870 if (pip) { 3871 if (MDI_PI_IS_TRANSIENT(pip)) { 3872 /* 3873 * mdi_pathinfo is in state transition. Return the 3874 * last good state. 3875 */ 3876 *state = MDI_PI_OLD_STATE(pip); 3877 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 3878 } else { 3879 *state = MDI_PI_STATE(pip); 3880 *ext_state = MDI_PI_EXT_STATE(pip); 3881 } 3882 } 3883 return (MDI_SUCCESS); 3884 } 3885 3886 /* 3887 * mdi_pi_get_preferred: 3888 * Get the preferred path flag 3889 */ 3890 int 3891 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 3892 { 3893 if (pip) { 3894 return (MDI_PI(pip)->pi_preferred); 3895 } 3896 return (0); 3897 } 3898 3899 /* 3900 * mdi_pi_set_preferred: 3901 * Set the preferred path flag 3902 */ 3903 void 3904 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 3905 { 3906 if (pip) { 3907 MDI_PI(pip)->pi_preferred = preferred; 3908 } 3909 } 3910 3911 /* 3912 * mdi_pi_set_state(): 3913 * Set the mdi_pathinfo node state 3914 */ 3915 void 3916 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 3917 { 3918 uint32_t ext_state; 3919 3920 if (pip) { 3921 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 3922 MDI_PI(pip)->pi_state = state; 3923 MDI_PI(pip)->pi_state |= ext_state; 3924 } 3925 } 3926 3927 /* 3928 * Property functions: 3929 */ 3930 int 3931 i_map_nvlist_error_to_mdi(int val) 3932 { 3933 int rv; 3934 3935 switch (val) { 3936 case 0: 3937 rv = DDI_PROP_SUCCESS; 3938 break; 3939 case EINVAL: 3940 case ENOTSUP: 3941 rv = DDI_PROP_INVAL_ARG; 3942 break; 3943 case ENOMEM: 3944 rv = DDI_PROP_NO_MEMORY; 3945 break; 3946 default: 3947 rv = DDI_PROP_NOT_FOUND; 3948 break; 3949 } 3950 return (rv); 3951 } 3952 3953 /* 3954 * mdi_pi_get_next_prop(): 3955 * Property walk function. The caller should hold mdi_pi_lock() 3956 * and release by calling mdi_pi_unlock() at the end of walk to 3957 * get a consistent value. 3958 */ 3959 nvpair_t * 3960 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 3961 { 3962 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 3963 return (NULL); 3964 } 3965 ASSERT(MDI_PI_LOCKED(pip)); 3966 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 3967 } 3968 3969 /* 3970 * mdi_prop_remove(): 3971 * Remove the named property from the named list. 3972 */ 3973 int 3974 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 3975 { 3976 if (pip == NULL) { 3977 return (DDI_PROP_NOT_FOUND); 3978 } 3979 ASSERT(!MDI_PI_LOCKED(pip)); 3980 MDI_PI_LOCK(pip); 3981 if (MDI_PI(pip)->pi_prop == NULL) { 3982 MDI_PI_UNLOCK(pip); 3983 return (DDI_PROP_NOT_FOUND); 3984 } 3985 if (name) { 3986 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 3987 } else { 3988 char nvp_name[MAXNAMELEN]; 3989 nvpair_t *nvp; 3990 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 3991 while (nvp) { 3992 nvpair_t *next; 3993 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 3994 (void) snprintf(nvp_name, MAXNAMELEN, "%s", 3995 nvpair_name(nvp)); 3996 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 3997 nvp_name); 3998 nvp = next; 3999 } 4000 } 4001 MDI_PI_UNLOCK(pip); 4002 return (DDI_PROP_SUCCESS); 4003 } 4004 4005 /* 4006 * mdi_prop_size(): 4007 * Get buffer size needed to pack the property data. 4008 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4009 * buffer size. 4010 */ 4011 int 4012 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4013 { 4014 int rv; 4015 size_t bufsize; 4016 4017 *buflenp = 0; 4018 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4019 return (DDI_PROP_NOT_FOUND); 4020 } 4021 ASSERT(MDI_PI_LOCKED(pip)); 4022 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4023 &bufsize, NV_ENCODE_NATIVE); 4024 *buflenp = bufsize; 4025 return (i_map_nvlist_error_to_mdi(rv)); 4026 } 4027 4028 /* 4029 * mdi_prop_pack(): 4030 * pack the property list. The caller should hold the 4031 * mdi_pathinfo_t node to get a consistent data 4032 */ 4033 int 4034 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4035 { 4036 int rv; 4037 size_t bufsize; 4038 4039 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4040 return (DDI_PROP_NOT_FOUND); 4041 } 4042 4043 ASSERT(MDI_PI_LOCKED(pip)); 4044 4045 bufsize = buflen; 4046 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4047 NV_ENCODE_NATIVE, KM_SLEEP); 4048 4049 return (i_map_nvlist_error_to_mdi(rv)); 4050 } 4051 4052 /* 4053 * mdi_prop_update_byte(): 4054 * Create/Update a byte property 4055 */ 4056 int 4057 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4058 { 4059 int rv; 4060 4061 if (pip == NULL) { 4062 return (DDI_PROP_INVAL_ARG); 4063 } 4064 ASSERT(!MDI_PI_LOCKED(pip)); 4065 MDI_PI_LOCK(pip); 4066 if (MDI_PI(pip)->pi_prop == NULL) { 4067 MDI_PI_UNLOCK(pip); 4068 return (DDI_PROP_NOT_FOUND); 4069 } 4070 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4071 MDI_PI_UNLOCK(pip); 4072 return (i_map_nvlist_error_to_mdi(rv)); 4073 } 4074 4075 /* 4076 * mdi_prop_update_byte_array(): 4077 * Create/Update a byte array property 4078 */ 4079 int 4080 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4081 uint_t nelements) 4082 { 4083 int rv; 4084 4085 if (pip == NULL) { 4086 return (DDI_PROP_INVAL_ARG); 4087 } 4088 ASSERT(!MDI_PI_LOCKED(pip)); 4089 MDI_PI_LOCK(pip); 4090 if (MDI_PI(pip)->pi_prop == NULL) { 4091 MDI_PI_UNLOCK(pip); 4092 return (DDI_PROP_NOT_FOUND); 4093 } 4094 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4095 MDI_PI_UNLOCK(pip); 4096 return (i_map_nvlist_error_to_mdi(rv)); 4097 } 4098 4099 /* 4100 * mdi_prop_update_int(): 4101 * Create/Update a 32 bit integer property 4102 */ 4103 int 4104 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4105 { 4106 int rv; 4107 4108 if (pip == NULL) { 4109 return (DDI_PROP_INVAL_ARG); 4110 } 4111 ASSERT(!MDI_PI_LOCKED(pip)); 4112 MDI_PI_LOCK(pip); 4113 if (MDI_PI(pip)->pi_prop == NULL) { 4114 MDI_PI_UNLOCK(pip); 4115 return (DDI_PROP_NOT_FOUND); 4116 } 4117 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4118 MDI_PI_UNLOCK(pip); 4119 return (i_map_nvlist_error_to_mdi(rv)); 4120 } 4121 4122 /* 4123 * mdi_prop_update_int64(): 4124 * Create/Update a 64 bit integer property 4125 */ 4126 int 4127 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4128 { 4129 int rv; 4130 4131 if (pip == NULL) { 4132 return (DDI_PROP_INVAL_ARG); 4133 } 4134 ASSERT(!MDI_PI_LOCKED(pip)); 4135 MDI_PI_LOCK(pip); 4136 if (MDI_PI(pip)->pi_prop == NULL) { 4137 MDI_PI_UNLOCK(pip); 4138 return (DDI_PROP_NOT_FOUND); 4139 } 4140 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4141 MDI_PI_UNLOCK(pip); 4142 return (i_map_nvlist_error_to_mdi(rv)); 4143 } 4144 4145 /* 4146 * mdi_prop_update_int_array(): 4147 * Create/Update a int array property 4148 */ 4149 int 4150 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4151 uint_t nelements) 4152 { 4153 int rv; 4154 4155 if (pip == NULL) { 4156 return (DDI_PROP_INVAL_ARG); 4157 } 4158 ASSERT(!MDI_PI_LOCKED(pip)); 4159 MDI_PI_LOCK(pip); 4160 if (MDI_PI(pip)->pi_prop == NULL) { 4161 MDI_PI_UNLOCK(pip); 4162 return (DDI_PROP_NOT_FOUND); 4163 } 4164 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4165 nelements); 4166 MDI_PI_UNLOCK(pip); 4167 return (i_map_nvlist_error_to_mdi(rv)); 4168 } 4169 4170 /* 4171 * mdi_prop_update_string(): 4172 * Create/Update a string property 4173 */ 4174 int 4175 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4176 { 4177 int rv; 4178 4179 if (pip == NULL) { 4180 return (DDI_PROP_INVAL_ARG); 4181 } 4182 ASSERT(!MDI_PI_LOCKED(pip)); 4183 MDI_PI_LOCK(pip); 4184 if (MDI_PI(pip)->pi_prop == NULL) { 4185 MDI_PI_UNLOCK(pip); 4186 return (DDI_PROP_NOT_FOUND); 4187 } 4188 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4189 MDI_PI_UNLOCK(pip); 4190 return (i_map_nvlist_error_to_mdi(rv)); 4191 } 4192 4193 /* 4194 * mdi_prop_update_string_array(): 4195 * Create/Update a string array property 4196 */ 4197 int 4198 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4199 uint_t nelements) 4200 { 4201 int rv; 4202 4203 if (pip == NULL) { 4204 return (DDI_PROP_INVAL_ARG); 4205 } 4206 ASSERT(!MDI_PI_LOCKED(pip)); 4207 MDI_PI_LOCK(pip); 4208 if (MDI_PI(pip)->pi_prop == NULL) { 4209 MDI_PI_UNLOCK(pip); 4210 return (DDI_PROP_NOT_FOUND); 4211 } 4212 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4213 nelements); 4214 MDI_PI_UNLOCK(pip); 4215 return (i_map_nvlist_error_to_mdi(rv)); 4216 } 4217 4218 /* 4219 * mdi_prop_lookup_byte(): 4220 * Look for byte property identified by name. The data returned 4221 * is the actual property and valid as long as mdi_pathinfo_t node 4222 * is alive. 4223 */ 4224 int 4225 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4226 { 4227 int rv; 4228 4229 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4230 return (DDI_PROP_NOT_FOUND); 4231 } 4232 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4233 return (i_map_nvlist_error_to_mdi(rv)); 4234 } 4235 4236 4237 /* 4238 * mdi_prop_lookup_byte_array(): 4239 * Look for byte array property identified by name. The data 4240 * returned is the actual property and valid as long as 4241 * mdi_pathinfo_t node is alive. 4242 */ 4243 int 4244 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4245 uint_t *nelements) 4246 { 4247 int rv; 4248 4249 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4250 return (DDI_PROP_NOT_FOUND); 4251 } 4252 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4253 nelements); 4254 return (i_map_nvlist_error_to_mdi(rv)); 4255 } 4256 4257 /* 4258 * mdi_prop_lookup_int(): 4259 * Look for int property identified by name. The data returned 4260 * is the actual property and valid as long as mdi_pathinfo_t 4261 * node is alive. 4262 */ 4263 int 4264 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4265 { 4266 int rv; 4267 4268 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4269 return (DDI_PROP_NOT_FOUND); 4270 } 4271 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4272 return (i_map_nvlist_error_to_mdi(rv)); 4273 } 4274 4275 /* 4276 * mdi_prop_lookup_int64(): 4277 * Look for int64 property identified by name. The data returned 4278 * is the actual property and valid as long as mdi_pathinfo_t node 4279 * is alive. 4280 */ 4281 int 4282 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4283 { 4284 int rv; 4285 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4286 return (DDI_PROP_NOT_FOUND); 4287 } 4288 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4289 return (i_map_nvlist_error_to_mdi(rv)); 4290 } 4291 4292 /* 4293 * mdi_prop_lookup_int_array(): 4294 * Look for int array property identified by name. The data 4295 * returned is the actual property and valid as long as 4296 * mdi_pathinfo_t node is alive. 4297 */ 4298 int 4299 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4300 uint_t *nelements) 4301 { 4302 int rv; 4303 4304 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4305 return (DDI_PROP_NOT_FOUND); 4306 } 4307 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4308 (int32_t **)data, nelements); 4309 return (i_map_nvlist_error_to_mdi(rv)); 4310 } 4311 4312 /* 4313 * mdi_prop_lookup_string(): 4314 * Look for string property identified by name. The data 4315 * returned is the actual property and valid as long as 4316 * mdi_pathinfo_t node is alive. 4317 */ 4318 int 4319 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4320 { 4321 int rv; 4322 4323 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4324 return (DDI_PROP_NOT_FOUND); 4325 } 4326 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4327 return (i_map_nvlist_error_to_mdi(rv)); 4328 } 4329 4330 /* 4331 * mdi_prop_lookup_string_array(): 4332 * Look for string array property identified by name. The data 4333 * returned is the actual property and valid as long as 4334 * mdi_pathinfo_t node is alive. 4335 */ 4336 int 4337 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4338 uint_t *nelements) 4339 { 4340 int rv; 4341 4342 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4343 return (DDI_PROP_NOT_FOUND); 4344 } 4345 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4346 nelements); 4347 return (i_map_nvlist_error_to_mdi(rv)); 4348 } 4349 4350 /* 4351 * mdi_prop_free(): 4352 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4353 * functions return the pointer to actual property data and not a 4354 * copy of it. So the data returned is valid as long as 4355 * mdi_pathinfo_t node is valid. 4356 */ 4357 /*ARGSUSED*/ 4358 int 4359 mdi_prop_free(void *data) 4360 { 4361 return (DDI_PROP_SUCCESS); 4362 } 4363 4364 /*ARGSUSED*/ 4365 static void 4366 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4367 { 4368 char *phci_path, *ct_path; 4369 char *ct_status; 4370 char *status; 4371 dev_info_t *dip = ct->ct_dip; 4372 char lb_buf[64]; 4373 4374 ASSERT(MDI_CLIENT_LOCKED(ct)); 4375 if ((dip == NULL) || (ddi_get_instance(dip) == -1) || 4376 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4377 return; 4378 } 4379 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4380 ct_status = "optimal"; 4381 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4382 ct_status = "degraded"; 4383 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4384 ct_status = "failed"; 4385 } else { 4386 ct_status = "unknown"; 4387 } 4388 4389 if (MDI_PI_IS_OFFLINE(pip)) { 4390 status = "offline"; 4391 } else if (MDI_PI_IS_ONLINE(pip)) { 4392 status = "online"; 4393 } else if (MDI_PI_IS_STANDBY(pip)) { 4394 status = "standby"; 4395 } else if (MDI_PI_IS_FAULT(pip)) { 4396 status = "faulted"; 4397 } else { 4398 status = "unknown"; 4399 } 4400 4401 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4402 (void) snprintf(lb_buf, sizeof (lb_buf), 4403 "%s, region-size: %d", mdi_load_balance_lba, 4404 ct->ct_lb_args->region_size); 4405 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4406 (void) snprintf(lb_buf, sizeof (lb_buf), 4407 "%s", mdi_load_balance_none); 4408 } else { 4409 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4410 mdi_load_balance_rr); 4411 } 4412 4413 if (dip) { 4414 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4415 phci_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4416 cmn_err(CE_CONT, "?%s (%s%d) multipath status: %s, " 4417 "path %s (%s%d) to target address: %s is %s" 4418 " Load balancing: %s\n", 4419 ddi_pathname(dip, ct_path), ddi_driver_name(dip), 4420 ddi_get_instance(dip), ct_status, 4421 ddi_pathname(MDI_PI(pip)->pi_phci->ph_dip, phci_path), 4422 ddi_driver_name(MDI_PI(pip)->pi_phci->ph_dip), 4423 ddi_get_instance(MDI_PI(pip)->pi_phci->ph_dip), 4424 MDI_PI(pip)->pi_addr, status, lb_buf); 4425 kmem_free(phci_path, MAXPATHLEN); 4426 kmem_free(ct_path, MAXPATHLEN); 4427 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4428 } 4429 } 4430 4431 #ifdef DEBUG 4432 /* 4433 * i_mdi_log(): 4434 * Utility function for error message management 4435 * 4436 */ 4437 /*PRINTFLIKE3*/ 4438 static void 4439 i_mdi_log(int level, dev_info_t *dip, const char *fmt, ...) 4440 { 4441 char name[MAXNAMELEN]; 4442 char buf[MAXNAMELEN]; 4443 char *bp; 4444 va_list ap; 4445 int log_only = 0; 4446 int boot_only = 0; 4447 int console_only = 0; 4448 4449 if (dip) { 4450 (void) snprintf(name, MAXNAMELEN, "%s%d: ", 4451 ddi_node_name(dip), ddi_get_instance(dip)); 4452 } else { 4453 name[0] = 0; 4454 } 4455 4456 va_start(ap, fmt); 4457 (void) vsnprintf(buf, MAXNAMELEN, fmt, ap); 4458 va_end(ap); 4459 4460 switch (buf[0]) { 4461 case '!': 4462 bp = &buf[1]; 4463 log_only = 1; 4464 break; 4465 case '?': 4466 bp = &buf[1]; 4467 boot_only = 1; 4468 break; 4469 case '^': 4470 bp = &buf[1]; 4471 console_only = 1; 4472 break; 4473 default: 4474 bp = buf; 4475 break; 4476 } 4477 if (mdi_debug_logonly) { 4478 log_only = 1; 4479 boot_only = 0; 4480 console_only = 0; 4481 } 4482 4483 switch (level) { 4484 case CE_NOTE: 4485 level = CE_CONT; 4486 /* FALLTHROUGH */ 4487 case CE_CONT: 4488 case CE_WARN: 4489 case CE_PANIC: 4490 if (boot_only) { 4491 cmn_err(level, "?mdi: %s%s", name, bp); 4492 } else if (console_only) { 4493 cmn_err(level, "^mdi: %s%s", name, bp); 4494 } else if (log_only) { 4495 cmn_err(level, "!mdi: %s%s", name, bp); 4496 } else { 4497 cmn_err(level, "mdi: %s%s", name, bp); 4498 } 4499 break; 4500 default: 4501 cmn_err(level, "mdi: %s%s", name, bp); 4502 break; 4503 } 4504 } 4505 #endif /* DEBUG */ 4506 4507 void 4508 i_mdi_client_online(dev_info_t *ct_dip) 4509 { 4510 mdi_client_t *ct; 4511 4512 /* 4513 * Client online notification. Mark client state as online 4514 * restore our binding with dev_info node 4515 */ 4516 ct = i_devi_get_client(ct_dip); 4517 ASSERT(ct != NULL); 4518 MDI_CLIENT_LOCK(ct); 4519 MDI_CLIENT_SET_ONLINE(ct); 4520 /* catch for any memory leaks */ 4521 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 4522 ct->ct_dip = ct_dip; 4523 4524 if (ct->ct_power_cnt == 0) 4525 (void) i_mdi_power_all_phci(ct); 4526 4527 MDI_DEBUG(4, (CE_NOTE, ct_dip, "i_mdi_client_online " 4528 "i_mdi_pm_hold_client %p\n", (void *)ct)); 4529 i_mdi_pm_hold_client(ct, 1); 4530 4531 MDI_CLIENT_UNLOCK(ct); 4532 } 4533 4534 void 4535 i_mdi_phci_online(dev_info_t *ph_dip) 4536 { 4537 mdi_phci_t *ph; 4538 4539 /* pHCI online notification. Mark state accordingly */ 4540 ph = i_devi_get_phci(ph_dip); 4541 ASSERT(ph != NULL); 4542 MDI_PHCI_LOCK(ph); 4543 MDI_PHCI_SET_ONLINE(ph); 4544 MDI_PHCI_UNLOCK(ph); 4545 } 4546 4547 /* 4548 * mdi_devi_online(): 4549 * Online notification from NDI framework on pHCI/client 4550 * device online. 4551 * Return Values: 4552 * NDI_SUCCESS 4553 * MDI_FAILURE 4554 */ 4555 /*ARGSUSED*/ 4556 int 4557 mdi_devi_online(dev_info_t *dip, uint_t flags) 4558 { 4559 if (MDI_PHCI(dip)) { 4560 i_mdi_phci_online(dip); 4561 } 4562 4563 if (MDI_CLIENT(dip)) { 4564 i_mdi_client_online(dip); 4565 } 4566 return (NDI_SUCCESS); 4567 } 4568 4569 /* 4570 * mdi_devi_offline(): 4571 * Offline notification from NDI framework on pHCI/Client device 4572 * offline. 4573 * 4574 * Return Values: 4575 * NDI_SUCCESS 4576 * NDI_FAILURE 4577 */ 4578 /*ARGSUSED*/ 4579 int 4580 mdi_devi_offline(dev_info_t *dip, uint_t flags) 4581 { 4582 int rv = NDI_SUCCESS; 4583 4584 if (MDI_CLIENT(dip)) { 4585 rv = i_mdi_client_offline(dip, flags); 4586 if (rv != NDI_SUCCESS) 4587 return (rv); 4588 } 4589 4590 if (MDI_PHCI(dip)) { 4591 rv = i_mdi_phci_offline(dip, flags); 4592 4593 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 4594 /* set client back online */ 4595 i_mdi_client_online(dip); 4596 } 4597 } 4598 4599 return (rv); 4600 } 4601 4602 /*ARGSUSED*/ 4603 static int 4604 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 4605 { 4606 int rv = NDI_SUCCESS; 4607 mdi_phci_t *ph; 4608 mdi_client_t *ct; 4609 mdi_pathinfo_t *pip; 4610 mdi_pathinfo_t *next; 4611 mdi_pathinfo_t *failed_pip = NULL; 4612 dev_info_t *cdip; 4613 4614 /* 4615 * pHCI component offline notification 4616 * Make sure that this pHCI instance is free to be offlined. 4617 * If it is OK to proceed, Offline and remove all the child 4618 * mdi_pathinfo nodes. This process automatically offlines 4619 * corresponding client devices, for which this pHCI provides 4620 * critical services. 4621 */ 4622 ph = i_devi_get_phci(dip); 4623 MDI_DEBUG(2, (CE_NOTE, dip, "!mdi_phci_offline called %p %p\n", 4624 (void *)dip, (void *)ph)); 4625 if (ph == NULL) { 4626 return (rv); 4627 } 4628 4629 MDI_PHCI_LOCK(ph); 4630 4631 if (MDI_PHCI_IS_OFFLINE(ph)) { 4632 MDI_DEBUG(1, (CE_WARN, dip, "!pHCI %p already offlined", 4633 (void *)ph)); 4634 MDI_PHCI_UNLOCK(ph); 4635 return (NDI_SUCCESS); 4636 } 4637 4638 /* 4639 * Check to see if the pHCI can be offlined 4640 */ 4641 if (ph->ph_unstable) { 4642 MDI_DEBUG(1, (CE_WARN, dip, 4643 "!One or more target devices are in transient " 4644 "state. This device can not be removed at " 4645 "this moment. Please try again later.")); 4646 MDI_PHCI_UNLOCK(ph); 4647 return (NDI_BUSY); 4648 } 4649 4650 pip = ph->ph_path_head; 4651 while (pip != NULL) { 4652 MDI_PI_LOCK(pip); 4653 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4654 4655 /* 4656 * The mdi_pathinfo state is OK. Check the client state. 4657 * If failover in progress fail the pHCI from offlining 4658 */ 4659 ct = MDI_PI(pip)->pi_client; 4660 i_mdi_client_lock(ct, pip); 4661 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 4662 (ct->ct_unstable)) { 4663 /* 4664 * Failover is in progress, Fail the DR 4665 */ 4666 MDI_DEBUG(1, (CE_WARN, dip, 4667 "!pHCI device (%s%d) is Busy. %s", 4668 ddi_driver_name(dip), ddi_get_instance(dip), 4669 "This device can not be removed at " 4670 "this moment. Please try again later.")); 4671 MDI_PI_UNLOCK(pip); 4672 i_mdi_client_unlock(ct); 4673 MDI_PHCI_UNLOCK(ph); 4674 return (NDI_BUSY); 4675 } 4676 MDI_PI_UNLOCK(pip); 4677 4678 /* 4679 * Check to see of we are removing the last path of this 4680 * client device... 4681 */ 4682 cdip = ct->ct_dip; 4683 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 4684 (i_mdi_client_compute_state(ct, ph) == 4685 MDI_CLIENT_STATE_FAILED)) { 4686 i_mdi_client_unlock(ct); 4687 MDI_PHCI_UNLOCK(ph); 4688 if (ndi_devi_offline(cdip, 0) != NDI_SUCCESS) { 4689 /* 4690 * ndi_devi_offline() failed. 4691 * This pHCI provides the critical path 4692 * to one or more client devices. 4693 * Return busy. 4694 */ 4695 MDI_PHCI_LOCK(ph); 4696 MDI_DEBUG(1, (CE_WARN, dip, 4697 "!pHCI device (%s%d) is Busy. %s", 4698 ddi_driver_name(dip), ddi_get_instance(dip), 4699 "This device can not be removed at " 4700 "this moment. Please try again later.")); 4701 failed_pip = pip; 4702 break; 4703 } else { 4704 MDI_PHCI_LOCK(ph); 4705 pip = next; 4706 } 4707 } else { 4708 i_mdi_client_unlock(ct); 4709 pip = next; 4710 } 4711 } 4712 4713 if (failed_pip) { 4714 pip = ph->ph_path_head; 4715 while (pip != failed_pip) { 4716 MDI_PI_LOCK(pip); 4717 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4718 ct = MDI_PI(pip)->pi_client; 4719 i_mdi_client_lock(ct, pip); 4720 cdip = ct->ct_dip; 4721 switch (MDI_CLIENT_STATE(ct)) { 4722 case MDI_CLIENT_STATE_OPTIMAL: 4723 case MDI_CLIENT_STATE_DEGRADED: 4724 if (cdip) { 4725 MDI_PI_UNLOCK(pip); 4726 i_mdi_client_unlock(ct); 4727 MDI_PHCI_UNLOCK(ph); 4728 (void) ndi_devi_online(cdip, 0); 4729 MDI_PHCI_LOCK(ph); 4730 pip = next; 4731 continue; 4732 } 4733 break; 4734 4735 case MDI_CLIENT_STATE_FAILED: 4736 if (cdip) { 4737 MDI_PI_UNLOCK(pip); 4738 i_mdi_client_unlock(ct); 4739 MDI_PHCI_UNLOCK(ph); 4740 (void) ndi_devi_offline(cdip, 0); 4741 MDI_PHCI_LOCK(ph); 4742 pip = next; 4743 continue; 4744 } 4745 break; 4746 } 4747 MDI_PI_UNLOCK(pip); 4748 i_mdi_client_unlock(ct); 4749 pip = next; 4750 } 4751 MDI_PHCI_UNLOCK(ph); 4752 return (NDI_BUSY); 4753 } 4754 4755 /* 4756 * Mark the pHCI as offline 4757 */ 4758 MDI_PHCI_SET_OFFLINE(ph); 4759 4760 /* 4761 * Mark the child mdi_pathinfo nodes as transient 4762 */ 4763 pip = ph->ph_path_head; 4764 while (pip != NULL) { 4765 MDI_PI_LOCK(pip); 4766 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4767 MDI_PI_SET_OFFLINING(pip); 4768 MDI_PI_UNLOCK(pip); 4769 pip = next; 4770 } 4771 MDI_PHCI_UNLOCK(ph); 4772 /* 4773 * Give a chance for any pending commands to execute 4774 */ 4775 delay(1); 4776 MDI_PHCI_LOCK(ph); 4777 pip = ph->ph_path_head; 4778 while (pip != NULL) { 4779 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 4780 (void) i_mdi_pi_offline(pip, flags); 4781 MDI_PI_LOCK(pip); 4782 ct = MDI_PI(pip)->pi_client; 4783 if (!MDI_PI_IS_OFFLINE(pip)) { 4784 MDI_DEBUG(1, (CE_WARN, dip, 4785 "!pHCI device (%s%d) is Busy. %s", 4786 ddi_driver_name(dip), ddi_get_instance(dip), 4787 "This device can not be removed at " 4788 "this moment. Please try again later.")); 4789 MDI_PI_UNLOCK(pip); 4790 MDI_PHCI_SET_ONLINE(ph); 4791 MDI_PHCI_UNLOCK(ph); 4792 return (NDI_BUSY); 4793 } 4794 MDI_PI_UNLOCK(pip); 4795 pip = next; 4796 } 4797 MDI_PHCI_UNLOCK(ph); 4798 4799 return (rv); 4800 } 4801 4802 /*ARGSUSED*/ 4803 static int 4804 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 4805 { 4806 int rv = NDI_SUCCESS; 4807 mdi_client_t *ct; 4808 4809 /* 4810 * Client component to go offline. Make sure that we are 4811 * not in failing over state and update client state 4812 * accordingly 4813 */ 4814 ct = i_devi_get_client(dip); 4815 MDI_DEBUG(2, (CE_NOTE, dip, "!i_mdi_client_offline called %p %p\n", 4816 (void *)dip, (void *)ct)); 4817 if (ct != NULL) { 4818 MDI_CLIENT_LOCK(ct); 4819 if (ct->ct_unstable) { 4820 /* 4821 * One or more paths are in transient state, 4822 * Dont allow offline of a client device 4823 */ 4824 MDI_DEBUG(1, (CE_WARN, dip, 4825 "!One or more paths to this device is " 4826 "in transient state. This device can not " 4827 "be removed at this moment. " 4828 "Please try again later.")); 4829 MDI_CLIENT_UNLOCK(ct); 4830 return (NDI_BUSY); 4831 } 4832 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 4833 /* 4834 * Failover is in progress, Dont allow DR of 4835 * a client device 4836 */ 4837 MDI_DEBUG(1, (CE_WARN, dip, 4838 "!Client device (%s%d) is Busy. %s", 4839 ddi_driver_name(dip), ddi_get_instance(dip), 4840 "This device can not be removed at " 4841 "this moment. Please try again later.")); 4842 MDI_CLIENT_UNLOCK(ct); 4843 return (NDI_BUSY); 4844 } 4845 MDI_CLIENT_SET_OFFLINE(ct); 4846 4847 /* 4848 * Unbind our relationship with the dev_info node 4849 */ 4850 if (flags & NDI_DEVI_REMOVE) { 4851 ct->ct_dip = NULL; 4852 } 4853 MDI_CLIENT_UNLOCK(ct); 4854 } 4855 return (rv); 4856 } 4857 4858 /* 4859 * mdi_pre_attach(): 4860 * Pre attach() notification handler 4861 */ 4862 /*ARGSUSED*/ 4863 int 4864 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 4865 { 4866 /* don't support old DDI_PM_RESUME */ 4867 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 4868 (cmd == DDI_PM_RESUME)) 4869 return (DDI_FAILURE); 4870 4871 return (DDI_SUCCESS); 4872 } 4873 4874 /* 4875 * mdi_post_attach(): 4876 * Post attach() notification handler 4877 */ 4878 /*ARGSUSED*/ 4879 void 4880 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 4881 { 4882 mdi_phci_t *ph; 4883 mdi_client_t *ct; 4884 mdi_pathinfo_t *pip; 4885 4886 if (MDI_PHCI(dip)) { 4887 ph = i_devi_get_phci(dip); 4888 ASSERT(ph != NULL); 4889 4890 MDI_PHCI_LOCK(ph); 4891 switch (cmd) { 4892 case DDI_ATTACH: 4893 MDI_DEBUG(2, (CE_NOTE, dip, 4894 "!pHCI post_attach: called %p\n", (void *)ph)); 4895 if (error == DDI_SUCCESS) { 4896 MDI_PHCI_SET_ATTACH(ph); 4897 } else { 4898 MDI_DEBUG(1, (CE_NOTE, dip, 4899 "!pHCI post_attach: failed error=%d\n", 4900 error)); 4901 MDI_PHCI_SET_DETACH(ph); 4902 } 4903 break; 4904 4905 case DDI_RESUME: 4906 MDI_DEBUG(2, (CE_NOTE, dip, 4907 "!pHCI post_resume: called %p\n", (void *)ph)); 4908 if (error == DDI_SUCCESS) { 4909 MDI_PHCI_SET_RESUME(ph); 4910 } else { 4911 MDI_DEBUG(1, (CE_NOTE, dip, 4912 "!pHCI post_resume: failed error=%d\n", 4913 error)); 4914 MDI_PHCI_SET_SUSPEND(ph); 4915 } 4916 break; 4917 } 4918 MDI_PHCI_UNLOCK(ph); 4919 } 4920 4921 if (MDI_CLIENT(dip)) { 4922 ct = i_devi_get_client(dip); 4923 ASSERT(ct != NULL); 4924 4925 MDI_CLIENT_LOCK(ct); 4926 switch (cmd) { 4927 case DDI_ATTACH: 4928 MDI_DEBUG(2, (CE_NOTE, dip, 4929 "!Client post_attach: called %p\n", (void *)ct)); 4930 if (error != DDI_SUCCESS) { 4931 MDI_DEBUG(1, (CE_NOTE, dip, 4932 "!Client post_attach: failed error=%d\n", 4933 error)); 4934 MDI_CLIENT_SET_DETACH(ct); 4935 MDI_DEBUG(4, (CE_WARN, dip, 4936 "mdi_post_attach i_mdi_pm_reset_client\n")); 4937 i_mdi_pm_reset_client(ct); 4938 break; 4939 } 4940 4941 /* 4942 * Client device has successfully attached. 4943 * Create kstats for any pathinfo structures 4944 * initially associated with this client. 4945 */ 4946 for (pip = ct->ct_path_head; pip != NULL; 4947 pip = (mdi_pathinfo_t *) 4948 MDI_PI(pip)->pi_client_link) { 4949 if (!MDI_PI_IS_OFFLINE(pip)) { 4950 (void) i_mdi_pi_kstat_create(pip); 4951 i_mdi_report_path_state(ct, pip); 4952 } 4953 } 4954 MDI_CLIENT_SET_ATTACH(ct); 4955 break; 4956 4957 case DDI_RESUME: 4958 MDI_DEBUG(2, (CE_NOTE, dip, 4959 "!Client post_attach: called %p\n", (void *)ct)); 4960 if (error == DDI_SUCCESS) { 4961 MDI_CLIENT_SET_RESUME(ct); 4962 } else { 4963 MDI_DEBUG(1, (CE_NOTE, dip, 4964 "!Client post_resume: failed error=%d\n", 4965 error)); 4966 MDI_CLIENT_SET_SUSPEND(ct); 4967 } 4968 break; 4969 } 4970 MDI_CLIENT_UNLOCK(ct); 4971 } 4972 } 4973 4974 /* 4975 * mdi_pre_detach(): 4976 * Pre detach notification handler 4977 */ 4978 /*ARGSUSED*/ 4979 int 4980 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4981 { 4982 int rv = DDI_SUCCESS; 4983 4984 if (MDI_CLIENT(dip)) { 4985 (void) i_mdi_client_pre_detach(dip, cmd); 4986 } 4987 4988 if (MDI_PHCI(dip)) { 4989 rv = i_mdi_phci_pre_detach(dip, cmd); 4990 } 4991 4992 return (rv); 4993 } 4994 4995 /*ARGSUSED*/ 4996 static int 4997 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4998 { 4999 int rv = DDI_SUCCESS; 5000 mdi_phci_t *ph; 5001 mdi_client_t *ct; 5002 mdi_pathinfo_t *pip; 5003 mdi_pathinfo_t *failed_pip = NULL; 5004 mdi_pathinfo_t *next; 5005 5006 ph = i_devi_get_phci(dip); 5007 if (ph == NULL) { 5008 return (rv); 5009 } 5010 5011 MDI_PHCI_LOCK(ph); 5012 switch (cmd) { 5013 case DDI_DETACH: 5014 MDI_DEBUG(2, (CE_NOTE, dip, 5015 "!pHCI pre_detach: called %p\n", (void *)ph)); 5016 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5017 /* 5018 * mdi_pathinfo nodes are still attached to 5019 * this pHCI. Fail the detach for this pHCI. 5020 */ 5021 MDI_DEBUG(2, (CE_WARN, dip, 5022 "!pHCI pre_detach: " 5023 "mdi_pathinfo nodes are still attached " 5024 "%p\n", (void *)ph)); 5025 rv = DDI_FAILURE; 5026 break; 5027 } 5028 MDI_PHCI_SET_DETACH(ph); 5029 break; 5030 5031 case DDI_SUSPEND: 5032 /* 5033 * pHCI is getting suspended. Since mpxio client 5034 * devices may not be suspended at this point, to avoid 5035 * a potential stack overflow, it is important to suspend 5036 * client devices before pHCI can be suspended. 5037 */ 5038 5039 MDI_DEBUG(2, (CE_NOTE, dip, 5040 "!pHCI pre_suspend: called %p\n", (void *)ph)); 5041 /* 5042 * Suspend all the client devices accessible through this pHCI 5043 */ 5044 pip = ph->ph_path_head; 5045 while (pip != NULL && rv == DDI_SUCCESS) { 5046 dev_info_t *cdip; 5047 MDI_PI_LOCK(pip); 5048 next = 5049 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5050 ct = MDI_PI(pip)->pi_client; 5051 i_mdi_client_lock(ct, pip); 5052 cdip = ct->ct_dip; 5053 MDI_PI_UNLOCK(pip); 5054 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5055 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5056 i_mdi_client_unlock(ct); 5057 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5058 DDI_SUCCESS) { 5059 /* 5060 * Suspend of one of the client 5061 * device has failed. 5062 */ 5063 MDI_DEBUG(1, (CE_WARN, dip, 5064 "!Suspend of device (%s%d) failed.", 5065 ddi_driver_name(cdip), 5066 ddi_get_instance(cdip))); 5067 failed_pip = pip; 5068 break; 5069 } 5070 } else { 5071 i_mdi_client_unlock(ct); 5072 } 5073 pip = next; 5074 } 5075 5076 if (rv == DDI_SUCCESS) { 5077 /* 5078 * Suspend of client devices is complete. Proceed 5079 * with pHCI suspend. 5080 */ 5081 MDI_PHCI_SET_SUSPEND(ph); 5082 } else { 5083 /* 5084 * Revert back all the suspended client device states 5085 * to converse. 5086 */ 5087 pip = ph->ph_path_head; 5088 while (pip != failed_pip) { 5089 dev_info_t *cdip; 5090 MDI_PI_LOCK(pip); 5091 next = 5092 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5093 ct = MDI_PI(pip)->pi_client; 5094 i_mdi_client_lock(ct, pip); 5095 cdip = ct->ct_dip; 5096 MDI_PI_UNLOCK(pip); 5097 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5098 i_mdi_client_unlock(ct); 5099 (void) devi_attach(cdip, DDI_RESUME); 5100 } else { 5101 i_mdi_client_unlock(ct); 5102 } 5103 pip = next; 5104 } 5105 } 5106 break; 5107 5108 default: 5109 rv = DDI_FAILURE; 5110 break; 5111 } 5112 MDI_PHCI_UNLOCK(ph); 5113 return (rv); 5114 } 5115 5116 /*ARGSUSED*/ 5117 static int 5118 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5119 { 5120 int rv = DDI_SUCCESS; 5121 mdi_client_t *ct; 5122 5123 ct = i_devi_get_client(dip); 5124 if (ct == NULL) { 5125 return (rv); 5126 } 5127 5128 MDI_CLIENT_LOCK(ct); 5129 switch (cmd) { 5130 case DDI_DETACH: 5131 MDI_DEBUG(2, (CE_NOTE, dip, 5132 "!Client pre_detach: called %p\n", (void *)ct)); 5133 MDI_CLIENT_SET_DETACH(ct); 5134 break; 5135 5136 case DDI_SUSPEND: 5137 MDI_DEBUG(2, (CE_NOTE, dip, 5138 "!Client pre_suspend: called %p\n", (void *)ct)); 5139 MDI_CLIENT_SET_SUSPEND(ct); 5140 break; 5141 5142 default: 5143 rv = DDI_FAILURE; 5144 break; 5145 } 5146 MDI_CLIENT_UNLOCK(ct); 5147 return (rv); 5148 } 5149 5150 /* 5151 * mdi_post_detach(): 5152 * Post detach notification handler 5153 */ 5154 /*ARGSUSED*/ 5155 void 5156 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5157 { 5158 /* 5159 * Detach/Suspend of mpxio component failed. Update our state 5160 * too 5161 */ 5162 if (MDI_PHCI(dip)) 5163 i_mdi_phci_post_detach(dip, cmd, error); 5164 5165 if (MDI_CLIENT(dip)) 5166 i_mdi_client_post_detach(dip, cmd, error); 5167 } 5168 5169 /*ARGSUSED*/ 5170 static void 5171 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5172 { 5173 mdi_phci_t *ph; 5174 5175 /* 5176 * Detach/Suspend of phci component failed. Update our state 5177 * too 5178 */ 5179 ph = i_devi_get_phci(dip); 5180 if (ph == NULL) { 5181 return; 5182 } 5183 5184 MDI_PHCI_LOCK(ph); 5185 /* 5186 * Detach of pHCI failed. Restore back converse 5187 * state 5188 */ 5189 switch (cmd) { 5190 case DDI_DETACH: 5191 MDI_DEBUG(2, (CE_NOTE, dip, 5192 "!pHCI post_detach: called %p\n", (void *)ph)); 5193 if (error != DDI_SUCCESS) 5194 MDI_PHCI_SET_ATTACH(ph); 5195 break; 5196 5197 case DDI_SUSPEND: 5198 MDI_DEBUG(2, (CE_NOTE, dip, 5199 "!pHCI post_suspend: called %p\n", (void *)ph)); 5200 if (error != DDI_SUCCESS) 5201 MDI_PHCI_SET_RESUME(ph); 5202 break; 5203 } 5204 MDI_PHCI_UNLOCK(ph); 5205 } 5206 5207 /*ARGSUSED*/ 5208 static void 5209 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5210 { 5211 mdi_client_t *ct; 5212 5213 ct = i_devi_get_client(dip); 5214 if (ct == NULL) { 5215 return; 5216 } 5217 MDI_CLIENT_LOCK(ct); 5218 /* 5219 * Detach of Client failed. Restore back converse 5220 * state 5221 */ 5222 switch (cmd) { 5223 case DDI_DETACH: 5224 MDI_DEBUG(2, (CE_NOTE, dip, 5225 "!Client post_detach: called %p\n", (void *)ct)); 5226 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5227 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5228 "i_mdi_pm_rele_client\n")); 5229 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5230 } else { 5231 MDI_DEBUG(4, (CE_NOTE, dip, "i_mdi_client_post_detach " 5232 "i_mdi_pm_reset_client\n")); 5233 i_mdi_pm_reset_client(ct); 5234 } 5235 if (error != DDI_SUCCESS) 5236 MDI_CLIENT_SET_ATTACH(ct); 5237 break; 5238 5239 case DDI_SUSPEND: 5240 MDI_DEBUG(2, (CE_NOTE, dip, 5241 "!Client post_suspend: called %p\n", (void *)ct)); 5242 if (error != DDI_SUCCESS) 5243 MDI_CLIENT_SET_RESUME(ct); 5244 break; 5245 } 5246 MDI_CLIENT_UNLOCK(ct); 5247 } 5248 5249 /* 5250 * create and install per-path (client - pHCI) statistics 5251 * I/O stats supported: nread, nwritten, reads, and writes 5252 * Error stats - hard errors, soft errors, & transport errors 5253 */ 5254 static int 5255 i_mdi_pi_kstat_create(mdi_pathinfo_t *pip) 5256 { 5257 5258 dev_info_t *client = MDI_PI(pip)->pi_client->ct_dip; 5259 dev_info_t *ppath = MDI_PI(pip)->pi_phci->ph_dip; 5260 char ksname[KSTAT_STRLEN]; 5261 mdi_pathinfo_t *cpip; 5262 const char *err_postfix = ",err"; 5263 kstat_t *kiosp, *kerrsp; 5264 struct pi_errs *nsp; 5265 struct mdi_pi_kstats *mdi_statp; 5266 5267 ASSERT(client != NULL && ppath != NULL); 5268 5269 ASSERT(MDI_CLIENT_LOCKED(MDI_PI(pip)->pi_client)); 5270 5271 if (MDI_PI(pip)->pi_kstats != NULL) 5272 return (MDI_SUCCESS); 5273 5274 for (cpip = MDI_PI(pip)->pi_client->ct_path_head; cpip != NULL; 5275 cpip = (mdi_pathinfo_t *)(MDI_PI(cpip)->pi_client_link)) { 5276 if ((cpip == pip) || MDI_PI_IS_OFFLINE(pip)) 5277 continue; 5278 /* 5279 * We have found a different path with same parent 5280 * kstats for a given client-pHCI are common 5281 */ 5282 if ((MDI_PI(cpip)->pi_phci->ph_dip == ppath) && 5283 (MDI_PI(cpip)->pi_kstats != NULL)) { 5284 MDI_PI(cpip)->pi_kstats->pi_kstat_ref++; 5285 MDI_PI(pip)->pi_kstats = MDI_PI(cpip)->pi_kstats; 5286 return (MDI_SUCCESS); 5287 } 5288 } 5289 5290 /* 5291 * stats are named as follows: TGTx.HBAy, e.g. "ssd0.fp0" 5292 * clamp length of name against max length of error kstat name 5293 */ 5294 if (snprintf(ksname, KSTAT_STRLEN, "%s%d.%s%d", 5295 ddi_driver_name(client), ddi_get_instance(client), 5296 ddi_driver_name(ppath), ddi_get_instance(ppath)) > 5297 (KSTAT_STRLEN - strlen(err_postfix))) { 5298 return (MDI_FAILURE); 5299 } 5300 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 5301 KSTAT_TYPE_IO, 1, 0)) == NULL) { 5302 return (MDI_FAILURE); 5303 } 5304 5305 (void) strcat(ksname, err_postfix); 5306 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 5307 KSTAT_TYPE_NAMED, 5308 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 5309 5310 if (kerrsp == NULL) { 5311 kstat_delete(kiosp); 5312 return (MDI_FAILURE); 5313 } 5314 5315 nsp = (struct pi_errs *)kerrsp->ks_data; 5316 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 5317 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 5318 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 5319 KSTAT_DATA_UINT32); 5320 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 5321 KSTAT_DATA_UINT32); 5322 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 5323 KSTAT_DATA_UINT32); 5324 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 5325 KSTAT_DATA_UINT32); 5326 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 5327 KSTAT_DATA_UINT32); 5328 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 5329 KSTAT_DATA_UINT32); 5330 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 5331 KSTAT_DATA_UINT32); 5332 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 5333 5334 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 5335 mdi_statp->pi_kstat_ref = 1; 5336 mdi_statp->pi_kstat_iostats = kiosp; 5337 mdi_statp->pi_kstat_errstats = kerrsp; 5338 kstat_install(kiosp); 5339 kstat_install(kerrsp); 5340 MDI_PI(pip)->pi_kstats = mdi_statp; 5341 return (MDI_SUCCESS); 5342 } 5343 5344 /* 5345 * destroy per-path properties 5346 */ 5347 static void 5348 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 5349 { 5350 5351 struct mdi_pi_kstats *mdi_statp; 5352 5353 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 5354 return; 5355 5356 MDI_PI(pip)->pi_kstats = NULL; 5357 5358 /* 5359 * the kstat may be shared between multiple pathinfo nodes 5360 * decrement this pathinfo's usage, removing the kstats 5361 * themselves when the last pathinfo reference is removed. 5362 */ 5363 ASSERT(mdi_statp->pi_kstat_ref > 0); 5364 if (--mdi_statp->pi_kstat_ref != 0) 5365 return; 5366 5367 kstat_delete(mdi_statp->pi_kstat_iostats); 5368 kstat_delete(mdi_statp->pi_kstat_errstats); 5369 kmem_free(mdi_statp, sizeof (*mdi_statp)); 5370 } 5371 5372 /* 5373 * update I/O paths KSTATS 5374 */ 5375 void 5376 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 5377 { 5378 kstat_t *iostatp; 5379 size_t xfer_cnt; 5380 5381 ASSERT(pip != NULL); 5382 5383 /* 5384 * I/O can be driven across a path prior to having path 5385 * statistics available, i.e. probe(9e). 5386 */ 5387 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 5388 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 5389 xfer_cnt = bp->b_bcount - bp->b_resid; 5390 if (bp->b_flags & B_READ) { 5391 KSTAT_IO_PTR(iostatp)->reads++; 5392 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 5393 } else { 5394 KSTAT_IO_PTR(iostatp)->writes++; 5395 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 5396 } 5397 } 5398 } 5399 5400 /* 5401 * Enable the path(specific client/target/initiator) 5402 * Enabling a path means that MPxIO may select the enabled path for routing 5403 * future I/O requests, subject to other path state constraints. 5404 */ 5405 int 5406 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 5407 { 5408 mdi_phci_t *ph; 5409 5410 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5411 if (ph == NULL) { 5412 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5413 " failed. pip: %p ph = NULL\n", (void *)pip)); 5414 return (MDI_FAILURE); 5415 } 5416 5417 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 5418 MDI_ENABLE_OP); 5419 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_enable_path:" 5420 " Returning success pip = %p. ph = %p\n", 5421 (void *)pip, (void *)ph)); 5422 return (MDI_SUCCESS); 5423 5424 } 5425 5426 /* 5427 * Disable the path (specific client/target/initiator) 5428 * Disabling a path means that MPxIO will not select the disabled path for 5429 * routing any new I/O requests. 5430 */ 5431 int 5432 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 5433 { 5434 mdi_phci_t *ph; 5435 5436 ph = i_devi_get_phci(mdi_pi_get_phci(pip)); 5437 if (ph == NULL) { 5438 MDI_DEBUG(1, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5439 " failed. pip: %p ph = NULL\n", (void *)pip)); 5440 return (MDI_FAILURE); 5441 } 5442 5443 (void) i_mdi_enable_disable_path(pip, 5444 ph->ph_vhci, flags, MDI_DISABLE_OP); 5445 MDI_DEBUG(5, (CE_NOTE, NULL, "!mdi_pi_disable_path:" 5446 "Returning success pip = %p. ph = %p", 5447 (void *)pip, (void *)ph)); 5448 return (MDI_SUCCESS); 5449 } 5450 5451 /* 5452 * disable the path to a particular pHCI (pHCI specified in the phci_path 5453 * argument) for a particular client (specified in the client_path argument). 5454 * Disabling a path means that MPxIO will not select the disabled path for 5455 * routing any new I/O requests. 5456 * NOTE: this will be removed once the NWS files are changed to use the new 5457 * mdi_{enable,disable}_path interfaces 5458 */ 5459 int 5460 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5461 { 5462 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 5463 } 5464 5465 /* 5466 * Enable the path to a particular pHCI (pHCI specified in the phci_path 5467 * argument) for a particular client (specified in the client_path argument). 5468 * Enabling a path means that MPxIO may select the enabled path for routing 5469 * future I/O requests, subject to other path state constraints. 5470 * NOTE: this will be removed once the NWS files are changed to use the new 5471 * mdi_{enable,disable}_path interfaces 5472 */ 5473 5474 int 5475 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 5476 { 5477 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 5478 } 5479 5480 /* 5481 * Common routine for doing enable/disable. 5482 */ 5483 static mdi_pathinfo_t * 5484 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 5485 int op) 5486 { 5487 int sync_flag = 0; 5488 int rv; 5489 mdi_pathinfo_t *next; 5490 int (*f)() = NULL; 5491 5492 f = vh->vh_ops->vo_pi_state_change; 5493 5494 sync_flag = (flags << 8) & 0xf00; 5495 5496 /* 5497 * Do a callback into the mdi consumer to let it 5498 * know that path is about to get enabled/disabled. 5499 */ 5500 if (f != NULL) { 5501 rv = (*f)(vh->vh_dip, pip, 0, 5502 MDI_PI_EXT_STATE(pip), 5503 MDI_EXT_STATE_CHANGE | sync_flag | 5504 op | MDI_BEFORE_STATE_CHANGE); 5505 if (rv != MDI_SUCCESS) { 5506 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5507 "!vo_pi_state_change: failed rv = %x", rv)); 5508 } 5509 } 5510 MDI_PI_LOCK(pip); 5511 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5512 5513 switch (flags) { 5514 case USER_DISABLE: 5515 if (op == MDI_DISABLE_OP) { 5516 MDI_PI_SET_USER_DISABLE(pip); 5517 } else { 5518 MDI_PI_SET_USER_ENABLE(pip); 5519 } 5520 break; 5521 case DRIVER_DISABLE: 5522 if (op == MDI_DISABLE_OP) { 5523 MDI_PI_SET_DRV_DISABLE(pip); 5524 } else { 5525 MDI_PI_SET_DRV_ENABLE(pip); 5526 } 5527 break; 5528 case DRIVER_DISABLE_TRANSIENT: 5529 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 5530 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 5531 } else { 5532 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 5533 } 5534 break; 5535 } 5536 MDI_PI_UNLOCK(pip); 5537 /* 5538 * Do a callback into the mdi consumer to let it 5539 * know that path is now enabled/disabled. 5540 */ 5541 if (f != NULL) { 5542 rv = (*f)(vh->vh_dip, pip, 0, 5543 MDI_PI_EXT_STATE(pip), 5544 MDI_EXT_STATE_CHANGE | sync_flag | 5545 op | MDI_AFTER_STATE_CHANGE); 5546 if (rv != MDI_SUCCESS) { 5547 MDI_DEBUG(2, (CE_WARN, vh->vh_dip, 5548 "!vo_pi_state_change: failed rv = %x", rv)); 5549 } 5550 } 5551 return (next); 5552 } 5553 5554 /* 5555 * Common routine for doing enable/disable. 5556 * NOTE: this will be removed once the NWS files are changed to use the new 5557 * mdi_{enable,disable}_path has been putback 5558 */ 5559 int 5560 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 5561 { 5562 5563 mdi_phci_t *ph; 5564 mdi_vhci_t *vh = NULL; 5565 mdi_client_t *ct; 5566 mdi_pathinfo_t *next, *pip; 5567 int found_it; 5568 5569 ph = i_devi_get_phci(pdip); 5570 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5571 "Op = %d pdip = %p cdip = %p\n", op, (void *)pdip, 5572 (void *)cdip)); 5573 if (ph == NULL) { 5574 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable:" 5575 "Op %d failed. ph = NULL\n", op)); 5576 return (MDI_FAILURE); 5577 } 5578 5579 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 5580 MDI_DEBUG(1, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5581 "Op Invalid operation = %d\n", op)); 5582 return (MDI_FAILURE); 5583 } 5584 5585 vh = ph->ph_vhci; 5586 5587 if (cdip == NULL) { 5588 /* 5589 * Need to mark the Phci as enabled/disabled. 5590 */ 5591 MDI_DEBUG(3, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5592 "Op %d for the phci\n", op)); 5593 MDI_PHCI_LOCK(ph); 5594 switch (flags) { 5595 case USER_DISABLE: 5596 if (op == MDI_DISABLE_OP) { 5597 MDI_PHCI_SET_USER_DISABLE(ph); 5598 } else { 5599 MDI_PHCI_SET_USER_ENABLE(ph); 5600 } 5601 break; 5602 case DRIVER_DISABLE: 5603 if (op == MDI_DISABLE_OP) { 5604 MDI_PHCI_SET_DRV_DISABLE(ph); 5605 } else { 5606 MDI_PHCI_SET_DRV_ENABLE(ph); 5607 } 5608 break; 5609 case DRIVER_DISABLE_TRANSIENT: 5610 if (op == MDI_DISABLE_OP) { 5611 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 5612 } else { 5613 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 5614 } 5615 break; 5616 default: 5617 MDI_PHCI_UNLOCK(ph); 5618 MDI_DEBUG(1, (CE_NOTE, NULL, 5619 "!i_mdi_pi_enable_disable:" 5620 " Invalid flag argument= %d\n", flags)); 5621 } 5622 5623 /* 5624 * Phci has been disabled. Now try to enable/disable 5625 * path info's to each client. 5626 */ 5627 pip = ph->ph_path_head; 5628 while (pip != NULL) { 5629 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 5630 } 5631 MDI_PHCI_UNLOCK(ph); 5632 } else { 5633 5634 /* 5635 * Disable a specific client. 5636 */ 5637 ct = i_devi_get_client(cdip); 5638 if (ct == NULL) { 5639 MDI_DEBUG(1, (CE_NOTE, NULL, 5640 "!i_mdi_pi_enable_disable:" 5641 " failed. ct = NULL operation = %d\n", op)); 5642 return (MDI_FAILURE); 5643 } 5644 5645 MDI_CLIENT_LOCK(ct); 5646 pip = ct->ct_path_head; 5647 found_it = 0; 5648 while (pip != NULL) { 5649 MDI_PI_LOCK(pip); 5650 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5651 if (MDI_PI(pip)->pi_phci == ph) { 5652 MDI_PI_UNLOCK(pip); 5653 found_it = 1; 5654 break; 5655 } 5656 MDI_PI_UNLOCK(pip); 5657 pip = next; 5658 } 5659 5660 5661 MDI_CLIENT_UNLOCK(ct); 5662 if (found_it == 0) { 5663 MDI_DEBUG(1, (CE_NOTE, NULL, 5664 "!i_mdi_pi_enable_disable:" 5665 " failed. Could not find corresponding pip\n")); 5666 return (MDI_FAILURE); 5667 } 5668 5669 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 5670 } 5671 5672 MDI_DEBUG(5, (CE_NOTE, NULL, "!i_mdi_pi_enable_disable: " 5673 "Op %d Returning success pdip = %p cdip = %p\n", 5674 op, (void *)pdip, (void *)cdip)); 5675 return (MDI_SUCCESS); 5676 } 5677 5678 /* 5679 * Ensure phci powered up 5680 */ 5681 static void 5682 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 5683 { 5684 dev_info_t *ph_dip; 5685 5686 ASSERT(pip != NULL); 5687 ASSERT(MDI_PI_LOCKED(pip)); 5688 5689 if (MDI_PI(pip)->pi_pm_held) { 5690 return; 5691 } 5692 5693 ph_dip = mdi_pi_get_phci(pip); 5694 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_hold_pip for %s%d %p\n", 5695 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5696 if (ph_dip == NULL) { 5697 return; 5698 } 5699 5700 MDI_PI_UNLOCK(pip); 5701 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5702 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5703 5704 pm_hold_power(ph_dip); 5705 5706 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5707 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5708 MDI_PI_LOCK(pip); 5709 5710 MDI_PI(pip)->pi_pm_held = 1; 5711 } 5712 5713 /* 5714 * Allow phci powered down 5715 */ 5716 static void 5717 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 5718 { 5719 dev_info_t *ph_dip = NULL; 5720 5721 ASSERT(pip != NULL); 5722 ASSERT(MDI_PI_LOCKED(pip)); 5723 5724 if (MDI_PI(pip)->pi_pm_held == 0) { 5725 return; 5726 } 5727 5728 ph_dip = mdi_pi_get_phci(pip); 5729 ASSERT(ph_dip != NULL); 5730 5731 MDI_PI_UNLOCK(pip); 5732 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_pm_rele_pip for %s%d %p\n", 5733 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), (void *)pip)); 5734 5735 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt was %d\n", 5736 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5737 pm_rele_power(ph_dip); 5738 MDI_DEBUG(4, (CE_NOTE, ph_dip, "kidsupcnt is %d\n", 5739 DEVI(ph_dip)->devi_pm_kidsupcnt)); 5740 5741 MDI_PI_LOCK(pip); 5742 MDI_PI(pip)->pi_pm_held = 0; 5743 } 5744 5745 static void 5746 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 5747 { 5748 ASSERT(MDI_CLIENT_LOCKED(ct)); 5749 5750 ct->ct_power_cnt += incr; 5751 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_hold_client %p " 5752 "ct_power_cnt = %d incr = %d\n", (void *)ct, 5753 ct->ct_power_cnt, incr)); 5754 ASSERT(ct->ct_power_cnt >= 0); 5755 } 5756 5757 static void 5758 i_mdi_rele_all_phci(mdi_client_t *ct) 5759 { 5760 mdi_pathinfo_t *pip; 5761 5762 ASSERT(MDI_CLIENT_LOCKED(ct)); 5763 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5764 while (pip != NULL) { 5765 mdi_hold_path(pip); 5766 MDI_PI_LOCK(pip); 5767 i_mdi_pm_rele_pip(pip); 5768 MDI_PI_UNLOCK(pip); 5769 mdi_rele_path(pip); 5770 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5771 } 5772 } 5773 5774 static void 5775 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 5776 { 5777 ASSERT(MDI_CLIENT_LOCKED(ct)); 5778 5779 if (i_ddi_devi_attached(ct->ct_dip)) { 5780 ct->ct_power_cnt -= decr; 5781 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_rele_client %p " 5782 "ct_power_cnt = %d decr = %d\n", 5783 (void *)ct, ct->ct_power_cnt, decr)); 5784 } 5785 5786 ASSERT(ct->ct_power_cnt >= 0); 5787 if (ct->ct_power_cnt == 0) { 5788 i_mdi_rele_all_phci(ct); 5789 return; 5790 } 5791 } 5792 5793 static void 5794 i_mdi_pm_reset_client(mdi_client_t *ct) 5795 { 5796 MDI_DEBUG(4, (CE_NOTE, ct->ct_dip, "i_mdi_pm_reset_client %p " 5797 "ct_power_cnt = %d\n", (void *)ct, ct->ct_power_cnt)); 5798 ASSERT(MDI_CLIENT_LOCKED(ct)); 5799 ct->ct_power_cnt = 0; 5800 i_mdi_rele_all_phci(ct); 5801 ct->ct_powercnt_config = 0; 5802 ct->ct_powercnt_unconfig = 0; 5803 ct->ct_powercnt_reset = 1; 5804 } 5805 5806 static int 5807 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 5808 { 5809 int ret; 5810 dev_info_t *ph_dip; 5811 5812 MDI_PI_LOCK(pip); 5813 i_mdi_pm_hold_pip(pip); 5814 5815 ph_dip = mdi_pi_get_phci(pip); 5816 MDI_PI_UNLOCK(pip); 5817 5818 /* bring all components of phci to full power */ 5819 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5820 "pm_powerup for %s%d %p\n", ddi_get_name(ph_dip), 5821 ddi_get_instance(ph_dip), (void *)pip)); 5822 5823 ret = pm_powerup(ph_dip); 5824 5825 if (ret == DDI_FAILURE) { 5826 MDI_DEBUG(4, (CE_NOTE, ph_dip, "i_mdi_power_one_phci " 5827 "pm_powerup FAILED for %s%d %p\n", 5828 ddi_get_name(ph_dip), ddi_get_instance(ph_dip), 5829 (void *)pip)); 5830 5831 MDI_PI_LOCK(pip); 5832 i_mdi_pm_rele_pip(pip); 5833 MDI_PI_UNLOCK(pip); 5834 return (MDI_FAILURE); 5835 } 5836 5837 return (MDI_SUCCESS); 5838 } 5839 5840 static int 5841 i_mdi_power_all_phci(mdi_client_t *ct) 5842 { 5843 mdi_pathinfo_t *pip; 5844 int succeeded = 0; 5845 5846 ASSERT(MDI_CLIENT_LOCKED(ct)); 5847 pip = (mdi_pathinfo_t *)ct->ct_path_head; 5848 while (pip != NULL) { 5849 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 5850 mdi_hold_path(pip); 5851 MDI_CLIENT_UNLOCK(ct); 5852 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 5853 succeeded = 1; 5854 5855 ASSERT(ct == MDI_PI(pip)->pi_client); 5856 MDI_CLIENT_LOCK(ct); 5857 mdi_rele_path(pip); 5858 } 5859 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 5860 } 5861 5862 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 5863 } 5864 5865 /* 5866 * mdi_bus_power(): 5867 * 1. Place the phci(s) into powered up state so that 5868 * client can do power management 5869 * 2. Ensure phci powered up as client power managing 5870 * Return Values: 5871 * MDI_SUCCESS 5872 * MDI_FAILURE 5873 */ 5874 int 5875 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 5876 void *arg, void *result) 5877 { 5878 int ret = MDI_SUCCESS; 5879 pm_bp_child_pwrchg_t *bpc; 5880 mdi_client_t *ct; 5881 dev_info_t *cdip; 5882 pm_bp_has_changed_t *bphc; 5883 5884 /* 5885 * BUS_POWER_NOINVOL not supported 5886 */ 5887 if (op == BUS_POWER_NOINVOL) 5888 return (MDI_FAILURE); 5889 5890 /* 5891 * ignore other OPs. 5892 * return quickly to save cou cycles on the ct processing 5893 */ 5894 switch (op) { 5895 case BUS_POWER_PRE_NOTIFICATION: 5896 case BUS_POWER_POST_NOTIFICATION: 5897 bpc = (pm_bp_child_pwrchg_t *)arg; 5898 cdip = bpc->bpc_dip; 5899 break; 5900 case BUS_POWER_HAS_CHANGED: 5901 bphc = (pm_bp_has_changed_t *)arg; 5902 cdip = bphc->bphc_dip; 5903 break; 5904 default: 5905 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 5906 } 5907 5908 ASSERT(MDI_CLIENT(cdip)); 5909 5910 ct = i_devi_get_client(cdip); 5911 if (ct == NULL) 5912 return (MDI_FAILURE); 5913 5914 /* 5915 * wait till the mdi_pathinfo node state change are processed 5916 */ 5917 MDI_CLIENT_LOCK(ct); 5918 switch (op) { 5919 case BUS_POWER_PRE_NOTIFICATION: 5920 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5921 "BUS_POWER_PRE_NOTIFICATION:" 5922 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5923 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5924 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 5925 5926 /* serialize power level change per client */ 5927 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 5928 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 5929 5930 MDI_CLIENT_SET_POWER_TRANSITION(ct); 5931 5932 if (ct->ct_power_cnt == 0) { 5933 ret = i_mdi_power_all_phci(ct); 5934 } 5935 5936 /* 5937 * if new_level > 0: 5938 * - hold phci(s) 5939 * - power up phci(s) if not already 5940 * ignore power down 5941 */ 5942 if (bpc->bpc_nlevel > 0) { 5943 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 5944 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5945 "mdi_bus_power i_mdi_pm_hold_client\n")); 5946 i_mdi_pm_hold_client(ct, ct->ct_path_count); 5947 } 5948 } 5949 break; 5950 case BUS_POWER_POST_NOTIFICATION: 5951 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, "mdi_bus_power " 5952 "BUS_POWER_POST_NOTIFICATION:" 5953 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d\n", 5954 PM_NAME(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 5955 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 5956 *(int *)result)); 5957 5958 if (*(int *)result == DDI_SUCCESS) { 5959 if (bpc->bpc_nlevel > 0) { 5960 MDI_CLIENT_SET_POWER_UP(ct); 5961 } else { 5962 MDI_CLIENT_SET_POWER_DOWN(ct); 5963 } 5964 } 5965 5966 /* release the hold we did in pre-notification */ 5967 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 5968 !DEVI_IS_ATTACHING(ct->ct_dip)) { 5969 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5970 "mdi_bus_power i_mdi_pm_rele_client\n")); 5971 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5972 } 5973 5974 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 5975 /* another thread might started attaching */ 5976 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 5977 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5978 "mdi_bus_power i_mdi_pm_rele_client\n")); 5979 i_mdi_pm_rele_client(ct, ct->ct_path_count); 5980 /* detaching has been taken care in pm_post_unconfig */ 5981 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 5982 MDI_DEBUG(4, (CE_NOTE, bpc->bpc_dip, 5983 "mdi_bus_power i_mdi_pm_reset_client\n")); 5984 i_mdi_pm_reset_client(ct); 5985 } 5986 } 5987 5988 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 5989 cv_broadcast(&ct->ct_powerchange_cv); 5990 5991 break; 5992 5993 /* need to do more */ 5994 case BUS_POWER_HAS_CHANGED: 5995 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, "mdi_bus_power " 5996 "BUS_POWER_HAS_CHANGED:" 5997 "%s@%s, olevel=%d, nlevel=%d, comp=%d\n", 5998 PM_NAME(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 5999 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6000 6001 if (bphc->bphc_nlevel > 0 && 6002 bphc->bphc_nlevel > bphc->bphc_olevel) { 6003 if (ct->ct_power_cnt == 0) { 6004 ret = i_mdi_power_all_phci(ct); 6005 } 6006 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6007 "mdi_bus_power i_mdi_pm_hold_client\n")); 6008 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6009 } 6010 6011 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6012 MDI_DEBUG(4, (CE_NOTE, bphc->bphc_dip, 6013 "mdi_bus_power i_mdi_pm_rele_client\n")); 6014 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6015 } 6016 break; 6017 } 6018 6019 MDI_CLIENT_UNLOCK(ct); 6020 return (ret); 6021 } 6022 6023 static int 6024 i_mdi_pm_pre_config_one(dev_info_t *child) 6025 { 6026 int ret = MDI_SUCCESS; 6027 mdi_client_t *ct; 6028 6029 ct = i_devi_get_client(child); 6030 if (ct == NULL) 6031 return (MDI_FAILURE); 6032 6033 MDI_CLIENT_LOCK(ct); 6034 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6035 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6036 6037 if (!MDI_CLIENT_IS_FAILED(ct)) { 6038 MDI_CLIENT_UNLOCK(ct); 6039 MDI_DEBUG(4, (CE_NOTE, child, 6040 "i_mdi_pm_pre_config_one already configured\n")); 6041 return (MDI_SUCCESS); 6042 } 6043 6044 if (ct->ct_powercnt_config) { 6045 MDI_CLIENT_UNLOCK(ct); 6046 MDI_DEBUG(4, (CE_NOTE, child, 6047 "i_mdi_pm_pre_config_one ALREADY held\n")); 6048 return (MDI_SUCCESS); 6049 } 6050 6051 if (ct->ct_power_cnt == 0) { 6052 ret = i_mdi_power_all_phci(ct); 6053 } 6054 MDI_DEBUG(4, (CE_NOTE, child, 6055 "i_mdi_pm_pre_config_one i_mdi_pm_hold_client\n")); 6056 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6057 ct->ct_powercnt_config = 1; 6058 ct->ct_powercnt_reset = 0; 6059 MDI_CLIENT_UNLOCK(ct); 6060 return (ret); 6061 } 6062 6063 static int 6064 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6065 { 6066 int ret = MDI_SUCCESS; 6067 dev_info_t *cdip; 6068 int circ; 6069 6070 ASSERT(MDI_VHCI(vdip)); 6071 6072 /* ndi_devi_config_one */ 6073 if (child) { 6074 ASSERT(DEVI_BUSY_OWNED(vdip)); 6075 return (i_mdi_pm_pre_config_one(child)); 6076 } 6077 6078 /* devi_config_common */ 6079 ndi_devi_enter(vdip, &circ); 6080 cdip = ddi_get_child(vdip); 6081 while (cdip) { 6082 dev_info_t *next = ddi_get_next_sibling(cdip); 6083 6084 ret = i_mdi_pm_pre_config_one(cdip); 6085 if (ret != MDI_SUCCESS) 6086 break; 6087 cdip = next; 6088 } 6089 ndi_devi_exit(vdip, circ); 6090 return (ret); 6091 } 6092 6093 static int 6094 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6095 { 6096 int ret = MDI_SUCCESS; 6097 mdi_client_t *ct; 6098 6099 ct = i_devi_get_client(child); 6100 if (ct == NULL) 6101 return (MDI_FAILURE); 6102 6103 MDI_CLIENT_LOCK(ct); 6104 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6105 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6106 6107 if (!i_ddi_devi_attached(ct->ct_dip)) { 6108 MDI_DEBUG(4, (CE_NOTE, child, 6109 "i_mdi_pm_pre_unconfig node detached already\n")); 6110 MDI_CLIENT_UNLOCK(ct); 6111 return (MDI_SUCCESS); 6112 } 6113 6114 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6115 (flags & NDI_AUTODETACH)) { 6116 MDI_DEBUG(4, (CE_NOTE, child, 6117 "i_mdi_pm_pre_unconfig auto-modunload\n")); 6118 MDI_CLIENT_UNLOCK(ct); 6119 return (MDI_FAILURE); 6120 } 6121 6122 if (ct->ct_powercnt_unconfig) { 6123 MDI_DEBUG(4, (CE_NOTE, child, 6124 "i_mdi_pm_pre_unconfig ct_powercnt_held\n")); 6125 MDI_CLIENT_UNLOCK(ct); 6126 *held = 1; 6127 return (MDI_SUCCESS); 6128 } 6129 6130 if (ct->ct_power_cnt == 0) { 6131 ret = i_mdi_power_all_phci(ct); 6132 } 6133 MDI_DEBUG(4, (CE_NOTE, child, 6134 "i_mdi_pm_pre_unconfig i_mdi_pm_hold_client\n")); 6135 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6136 ct->ct_powercnt_unconfig = 1; 6137 ct->ct_powercnt_reset = 0; 6138 MDI_CLIENT_UNLOCK(ct); 6139 if (ret == MDI_SUCCESS) 6140 *held = 1; 6141 return (ret); 6142 } 6143 6144 static int 6145 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6146 int flags) 6147 { 6148 int ret = MDI_SUCCESS; 6149 dev_info_t *cdip; 6150 int circ; 6151 6152 ASSERT(MDI_VHCI(vdip)); 6153 *held = 0; 6154 6155 /* ndi_devi_unconfig_one */ 6156 if (child) { 6157 ASSERT(DEVI_BUSY_OWNED(vdip)); 6158 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6159 } 6160 6161 /* devi_unconfig_common */ 6162 ndi_devi_enter(vdip, &circ); 6163 cdip = ddi_get_child(vdip); 6164 while (cdip) { 6165 dev_info_t *next = ddi_get_next_sibling(cdip); 6166 6167 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6168 cdip = next; 6169 } 6170 ndi_devi_exit(vdip, circ); 6171 6172 if (*held) 6173 ret = MDI_SUCCESS; 6174 6175 return (ret); 6176 } 6177 6178 static void 6179 i_mdi_pm_post_config_one(dev_info_t *child) 6180 { 6181 mdi_client_t *ct; 6182 6183 ct = i_devi_get_client(child); 6184 if (ct == NULL) 6185 return; 6186 6187 MDI_CLIENT_LOCK(ct); 6188 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6189 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6190 6191 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6192 MDI_DEBUG(4, (CE_NOTE, child, 6193 "i_mdi_pm_post_config_one NOT configured\n")); 6194 MDI_CLIENT_UNLOCK(ct); 6195 return; 6196 } 6197 6198 /* client has not been updated */ 6199 if (MDI_CLIENT_IS_FAILED(ct)) { 6200 MDI_DEBUG(4, (CE_NOTE, child, 6201 "i_mdi_pm_post_config_one NOT configured\n")); 6202 MDI_CLIENT_UNLOCK(ct); 6203 return; 6204 } 6205 6206 /* another thread might have powered it down or detached it */ 6207 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6208 !DEVI_IS_ATTACHING(ct->ct_dip)) || 6209 (!i_ddi_devi_attached(ct->ct_dip) && 6210 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6211 MDI_DEBUG(4, (CE_NOTE, child, 6212 "i_mdi_pm_post_config i_mdi_pm_reset_client\n")); 6213 i_mdi_pm_reset_client(ct); 6214 } else { 6215 mdi_pathinfo_t *pip, *next; 6216 int valid_path_count = 0; 6217 6218 MDI_DEBUG(4, (CE_NOTE, child, 6219 "i_mdi_pm_post_config i_mdi_pm_rele_client\n")); 6220 pip = ct->ct_path_head; 6221 while (pip != NULL) { 6222 MDI_PI_LOCK(pip); 6223 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6224 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6225 valid_path_count ++; 6226 MDI_PI_UNLOCK(pip); 6227 pip = next; 6228 } 6229 i_mdi_pm_rele_client(ct, valid_path_count); 6230 } 6231 ct->ct_powercnt_config = 0; 6232 MDI_CLIENT_UNLOCK(ct); 6233 } 6234 6235 static void 6236 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 6237 { 6238 int circ; 6239 dev_info_t *cdip; 6240 6241 ASSERT(MDI_VHCI(vdip)); 6242 6243 /* ndi_devi_config_one */ 6244 if (child) { 6245 ASSERT(DEVI_BUSY_OWNED(vdip)); 6246 i_mdi_pm_post_config_one(child); 6247 return; 6248 } 6249 6250 /* devi_config_common */ 6251 ndi_devi_enter(vdip, &circ); 6252 cdip = ddi_get_child(vdip); 6253 while (cdip) { 6254 dev_info_t *next = ddi_get_next_sibling(cdip); 6255 6256 i_mdi_pm_post_config_one(cdip); 6257 cdip = next; 6258 } 6259 ndi_devi_exit(vdip, circ); 6260 } 6261 6262 static void 6263 i_mdi_pm_post_unconfig_one(dev_info_t *child) 6264 { 6265 mdi_client_t *ct; 6266 6267 ct = i_devi_get_client(child); 6268 if (ct == NULL) 6269 return; 6270 6271 MDI_CLIENT_LOCK(ct); 6272 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6273 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6274 6275 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 6276 MDI_DEBUG(4, (CE_NOTE, child, 6277 "i_mdi_pm_post_unconfig NOT held\n")); 6278 MDI_CLIENT_UNLOCK(ct); 6279 return; 6280 } 6281 6282 /* failure detaching or another thread just attached it */ 6283 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6284 i_ddi_devi_attached(ct->ct_dip)) || 6285 (!i_ddi_devi_attached(ct->ct_dip) && 6286 !DEVI_IS_ATTACHING(ct->ct_dip))) { 6287 MDI_DEBUG(4, (CE_NOTE, child, 6288 "i_mdi_pm_post_unconfig i_mdi_pm_reset_client\n")); 6289 i_mdi_pm_reset_client(ct); 6290 } else { 6291 mdi_pathinfo_t *pip, *next; 6292 int valid_path_count = 0; 6293 6294 MDI_DEBUG(4, (CE_NOTE, child, 6295 "i_mdi_pm_post_unconfig i_mdi_pm_rele_client\n")); 6296 pip = ct->ct_path_head; 6297 while (pip != NULL) { 6298 MDI_PI_LOCK(pip); 6299 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6300 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 6301 valid_path_count ++; 6302 MDI_PI_UNLOCK(pip); 6303 pip = next; 6304 } 6305 i_mdi_pm_rele_client(ct, valid_path_count); 6306 ct->ct_powercnt_unconfig = 0; 6307 } 6308 6309 MDI_CLIENT_UNLOCK(ct); 6310 } 6311 6312 static void 6313 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 6314 { 6315 int circ; 6316 dev_info_t *cdip; 6317 6318 ASSERT(MDI_VHCI(vdip)); 6319 6320 if (!held) { 6321 MDI_DEBUG(4, (CE_NOTE, vdip, 6322 "i_mdi_pm_post_unconfig held = %d\n", held)); 6323 return; 6324 } 6325 6326 if (child) { 6327 ASSERT(DEVI_BUSY_OWNED(vdip)); 6328 i_mdi_pm_post_unconfig_one(child); 6329 return; 6330 } 6331 6332 ndi_devi_enter(vdip, &circ); 6333 cdip = ddi_get_child(vdip); 6334 while (cdip) { 6335 dev_info_t *next = ddi_get_next_sibling(cdip); 6336 6337 i_mdi_pm_post_unconfig_one(cdip); 6338 cdip = next; 6339 } 6340 ndi_devi_exit(vdip, circ); 6341 } 6342 6343 int 6344 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 6345 { 6346 int circ, ret = MDI_SUCCESS; 6347 dev_info_t *client_dip = NULL; 6348 mdi_client_t *ct; 6349 6350 /* 6351 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 6352 * Power up pHCI for the named client device. 6353 * Note: Before the client is enumerated under vhci by phci, 6354 * client_dip can be NULL. Then proceed to power up all the 6355 * pHCIs. 6356 */ 6357 if (devnm != NULL) { 6358 ndi_devi_enter(vdip, &circ); 6359 client_dip = ndi_devi_findchild(vdip, devnm); 6360 } 6361 6362 MDI_DEBUG(4, (CE_NOTE, vdip, "mdi_power op = %d %s %p\n", 6363 op, devnm ? devnm : "NULL", (void *)client_dip)); 6364 6365 switch (op) { 6366 case MDI_PM_PRE_CONFIG: 6367 ret = i_mdi_pm_pre_config(vdip, client_dip); 6368 break; 6369 6370 case MDI_PM_PRE_UNCONFIG: 6371 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 6372 flags); 6373 break; 6374 6375 case MDI_PM_POST_CONFIG: 6376 i_mdi_pm_post_config(vdip, client_dip); 6377 break; 6378 6379 case MDI_PM_POST_UNCONFIG: 6380 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 6381 break; 6382 6383 case MDI_PM_HOLD_POWER: 6384 case MDI_PM_RELE_POWER: 6385 ASSERT(args); 6386 6387 client_dip = (dev_info_t *)args; 6388 ASSERT(MDI_CLIENT(client_dip)); 6389 6390 ct = i_devi_get_client(client_dip); 6391 MDI_CLIENT_LOCK(ct); 6392 6393 if (op == MDI_PM_HOLD_POWER) { 6394 if (ct->ct_power_cnt == 0) { 6395 (void) i_mdi_power_all_phci(ct); 6396 MDI_DEBUG(4, (CE_NOTE, client_dip, 6397 "mdi_power i_mdi_pm_hold_client\n")); 6398 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6399 } 6400 } else { 6401 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6402 MDI_DEBUG(4, (CE_NOTE, client_dip, 6403 "mdi_power i_mdi_pm_rele_client\n")); 6404 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6405 } else { 6406 MDI_DEBUG(4, (CE_NOTE, client_dip, 6407 "mdi_power i_mdi_pm_reset_client\n")); 6408 i_mdi_pm_reset_client(ct); 6409 } 6410 } 6411 6412 MDI_CLIENT_UNLOCK(ct); 6413 break; 6414 6415 default: 6416 break; 6417 } 6418 6419 if (devnm) 6420 ndi_devi_exit(vdip, circ); 6421 6422 return (ret); 6423 } 6424 6425 int 6426 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 6427 { 6428 mdi_vhci_t *vhci; 6429 6430 if (!MDI_VHCI(dip)) 6431 return (MDI_FAILURE); 6432 6433 if (mdi_class) { 6434 vhci = DEVI(dip)->devi_mdi_xhci; 6435 ASSERT(vhci); 6436 *mdi_class = vhci->vh_class; 6437 } 6438 6439 return (MDI_SUCCESS); 6440 } 6441 6442 int 6443 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 6444 { 6445 mdi_phci_t *phci; 6446 6447 if (!MDI_PHCI(dip)) 6448 return (MDI_FAILURE); 6449 6450 if (mdi_class) { 6451 phci = DEVI(dip)->devi_mdi_xhci; 6452 ASSERT(phci); 6453 *mdi_class = phci->ph_vhci->vh_class; 6454 } 6455 6456 return (MDI_SUCCESS); 6457 } 6458 6459 int 6460 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 6461 { 6462 mdi_client_t *client; 6463 6464 if (!MDI_CLIENT(dip)) 6465 return (MDI_FAILURE); 6466 6467 if (mdi_class) { 6468 client = DEVI(dip)->devi_mdi_client; 6469 ASSERT(client); 6470 *mdi_class = client->ct_vhci->vh_class; 6471 } 6472 6473 return (MDI_SUCCESS); 6474 } 6475 6476 void * 6477 mdi_client_get_vhci_private(dev_info_t *dip) 6478 { 6479 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6480 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6481 mdi_client_t *ct; 6482 ct = i_devi_get_client(dip); 6483 return (ct->ct_vprivate); 6484 } 6485 return (NULL); 6486 } 6487 6488 void 6489 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 6490 { 6491 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 6492 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 6493 mdi_client_t *ct; 6494 ct = i_devi_get_client(dip); 6495 ct->ct_vprivate = data; 6496 } 6497 } 6498 /* 6499 * mdi_pi_get_vhci_private(): 6500 * Get the vhci private information associated with the 6501 * mdi_pathinfo node 6502 */ 6503 void * 6504 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 6505 { 6506 caddr_t vprivate = NULL; 6507 if (pip) { 6508 vprivate = MDI_PI(pip)->pi_vprivate; 6509 } 6510 return (vprivate); 6511 } 6512 6513 /* 6514 * mdi_pi_set_vhci_private(): 6515 * Set the vhci private information in the mdi_pathinfo node 6516 */ 6517 void 6518 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 6519 { 6520 if (pip) { 6521 MDI_PI(pip)->pi_vprivate = priv; 6522 } 6523 } 6524 6525 /* 6526 * mdi_phci_get_vhci_private(): 6527 * Get the vhci private information associated with the 6528 * mdi_phci node 6529 */ 6530 void * 6531 mdi_phci_get_vhci_private(dev_info_t *dip) 6532 { 6533 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6534 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6535 mdi_phci_t *ph; 6536 ph = i_devi_get_phci(dip); 6537 return (ph->ph_vprivate); 6538 } 6539 return (NULL); 6540 } 6541 6542 /* 6543 * mdi_phci_set_vhci_private(): 6544 * Set the vhci private information in the mdi_phci node 6545 */ 6546 void 6547 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 6548 { 6549 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 6550 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 6551 mdi_phci_t *ph; 6552 ph = i_devi_get_phci(dip); 6553 ph->ph_vprivate = priv; 6554 } 6555 } 6556 6557 /* 6558 * List of vhci class names: 6559 * A vhci class name must be in this list only if the corresponding vhci 6560 * driver intends to use the mdi provided bus config implementation 6561 * (i.e., mdi_vhci_bus_config()). 6562 */ 6563 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 6564 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 6565 6566 /* 6567 * During boot time, the on-disk vhci cache for every vhci class is read 6568 * in the form of an nvlist and stored here. 6569 */ 6570 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 6571 6572 /* nvpair names in vhci cache nvlist */ 6573 #define MDI_VHCI_CACHE_VERSION 1 6574 #define MDI_NVPNAME_VERSION "version" 6575 #define MDI_NVPNAME_PHCIS "phcis" 6576 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 6577 6578 /* 6579 * Given vhci class name, return its on-disk vhci cache filename. 6580 * Memory for the returned filename which includes the full path is allocated 6581 * by this function. 6582 */ 6583 static char * 6584 vhclass2vhcache_filename(char *vhclass) 6585 { 6586 char *filename; 6587 int len; 6588 static char *fmt = "/etc/devices/mdi_%s_cache"; 6589 6590 /* 6591 * fmt contains the on-disk vhci cache file name format; 6592 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 6593 */ 6594 6595 /* the -1 below is to account for "%s" in the format string */ 6596 len = strlen(fmt) + strlen(vhclass) - 1; 6597 filename = kmem_alloc(len, KM_SLEEP); 6598 (void) snprintf(filename, len, fmt, vhclass); 6599 ASSERT(len == (strlen(filename) + 1)); 6600 return (filename); 6601 } 6602 6603 /* 6604 * initialize the vhci cache related data structures and read the on-disk 6605 * vhci cached data into memory. 6606 */ 6607 static void 6608 setup_vhci_cache(mdi_vhci_t *vh) 6609 { 6610 mdi_vhci_config_t *vhc; 6611 mdi_vhci_cache_t *vhcache; 6612 int i; 6613 nvlist_t *nvl = NULL; 6614 6615 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 6616 vh->vh_config = vhc; 6617 vhcache = &vhc->vhc_vhcache; 6618 6619 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 6620 6621 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 6622 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 6623 6624 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 6625 6626 /* 6627 * Create string hash; same as mod_hash_create_strhash() except that 6628 * we use NULL key destructor. 6629 */ 6630 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 6631 mdi_bus_config_cache_hash_size, 6632 mod_hash_null_keydtor, mod_hash_null_valdtor, 6633 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 6634 6635 /* 6636 * The on-disk vhci cache is read during booting prior to the 6637 * lights-out period by mdi_read_devices_files(). 6638 */ 6639 for (i = 0; i < N_VHCI_CLASSES; i++) { 6640 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 6641 nvl = vhcache_nvl[i]; 6642 vhcache_nvl[i] = NULL; 6643 break; 6644 } 6645 } 6646 6647 /* 6648 * this is to cover the case of some one manually causing unloading 6649 * (or detaching) and reloading (or attaching) of a vhci driver. 6650 */ 6651 if (nvl == NULL && modrootloaded) 6652 nvl = read_on_disk_vhci_cache(vh->vh_class); 6653 6654 if (nvl != NULL) { 6655 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 6656 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 6657 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 6658 else { 6659 cmn_err(CE_WARN, 6660 "%s: data file corrupted, will recreate\n", 6661 vhc->vhc_vhcache_filename); 6662 } 6663 rw_exit(&vhcache->vhcache_lock); 6664 nvlist_free(nvl); 6665 } 6666 6667 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 6668 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 6669 6670 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 6671 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 6672 } 6673 6674 /* 6675 * free all vhci cache related resources 6676 */ 6677 static int 6678 destroy_vhci_cache(mdi_vhci_t *vh) 6679 { 6680 mdi_vhci_config_t *vhc = vh->vh_config; 6681 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 6682 mdi_vhcache_phci_t *cphci, *cphci_next; 6683 mdi_vhcache_client_t *cct, *cct_next; 6684 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 6685 6686 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 6687 return (MDI_FAILURE); 6688 6689 kmem_free(vhc->vhc_vhcache_filename, 6690 strlen(vhc->vhc_vhcache_filename) + 1); 6691 6692 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 6693 6694 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 6695 cphci = cphci_next) { 6696 cphci_next = cphci->cphci_next; 6697 free_vhcache_phci(cphci); 6698 } 6699 6700 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 6701 cct_next = cct->cct_next; 6702 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 6703 cpi_next = cpi->cpi_next; 6704 free_vhcache_pathinfo(cpi); 6705 } 6706 free_vhcache_client(cct); 6707 } 6708 6709 rw_destroy(&vhcache->vhcache_lock); 6710 6711 mutex_destroy(&vhc->vhc_lock); 6712 cv_destroy(&vhc->vhc_cv); 6713 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 6714 return (MDI_SUCCESS); 6715 } 6716 6717 /* 6718 * Stop all vhci cache related async threads and free their resources. 6719 */ 6720 static int 6721 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 6722 { 6723 mdi_async_client_config_t *acc, *acc_next; 6724 6725 mutex_enter(&vhc->vhc_lock); 6726 vhc->vhc_flags |= MDI_VHC_EXIT; 6727 ASSERT(vhc->vhc_acc_thrcount >= 0); 6728 cv_broadcast(&vhc->vhc_cv); 6729 6730 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 6731 vhc->vhc_acc_thrcount != 0) { 6732 mutex_exit(&vhc->vhc_lock); 6733 delay(1); 6734 mutex_enter(&vhc->vhc_lock); 6735 } 6736 6737 vhc->vhc_flags &= ~MDI_VHC_EXIT; 6738 6739 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 6740 acc_next = acc->acc_next; 6741 free_async_client_config(acc); 6742 } 6743 vhc->vhc_acc_list_head = NULL; 6744 vhc->vhc_acc_list_tail = NULL; 6745 vhc->vhc_acc_count = 0; 6746 6747 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6748 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6749 mutex_exit(&vhc->vhc_lock); 6750 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 6751 vhcache_dirty(vhc); 6752 return (MDI_FAILURE); 6753 } 6754 } else 6755 mutex_exit(&vhc->vhc_lock); 6756 6757 if (callb_delete(vhc->vhc_cbid) != 0) 6758 return (MDI_FAILURE); 6759 6760 return (MDI_SUCCESS); 6761 } 6762 6763 /* 6764 * Stop vhci cache flush thread 6765 */ 6766 /* ARGSUSED */ 6767 static boolean_t 6768 stop_vhcache_flush_thread(void *arg, int code) 6769 { 6770 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 6771 6772 mutex_enter(&vhc->vhc_lock); 6773 vhc->vhc_flags |= MDI_VHC_EXIT; 6774 cv_broadcast(&vhc->vhc_cv); 6775 6776 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 6777 mutex_exit(&vhc->vhc_lock); 6778 delay(1); 6779 mutex_enter(&vhc->vhc_lock); 6780 } 6781 6782 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 6783 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 6784 mutex_exit(&vhc->vhc_lock); 6785 (void) flush_vhcache(vhc, 1); 6786 } else 6787 mutex_exit(&vhc->vhc_lock); 6788 6789 return (B_TRUE); 6790 } 6791 6792 /* 6793 * Enqueue the vhcache phci (cphci) at the tail of the list 6794 */ 6795 static void 6796 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 6797 { 6798 cphci->cphci_next = NULL; 6799 if (vhcache->vhcache_phci_head == NULL) 6800 vhcache->vhcache_phci_head = cphci; 6801 else 6802 vhcache->vhcache_phci_tail->cphci_next = cphci; 6803 vhcache->vhcache_phci_tail = cphci; 6804 } 6805 6806 /* 6807 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 6808 */ 6809 static void 6810 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6811 mdi_vhcache_pathinfo_t *cpi) 6812 { 6813 cpi->cpi_next = NULL; 6814 if (cct->cct_cpi_head == NULL) 6815 cct->cct_cpi_head = cpi; 6816 else 6817 cct->cct_cpi_tail->cpi_next = cpi; 6818 cct->cct_cpi_tail = cpi; 6819 } 6820 6821 /* 6822 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 6823 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 6824 * flag set come at the beginning of the list. All cpis which have this 6825 * flag set come at the end of the list. 6826 */ 6827 static void 6828 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 6829 mdi_vhcache_pathinfo_t *newcpi) 6830 { 6831 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 6832 6833 if (cct->cct_cpi_head == NULL || 6834 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 6835 enqueue_tail_vhcache_pathinfo(cct, newcpi); 6836 else { 6837 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 6838 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 6839 prev_cpi = cpi, cpi = cpi->cpi_next) 6840 ; 6841 6842 if (prev_cpi == NULL) 6843 cct->cct_cpi_head = newcpi; 6844 else 6845 prev_cpi->cpi_next = newcpi; 6846 6847 newcpi->cpi_next = cpi; 6848 6849 if (cpi == NULL) 6850 cct->cct_cpi_tail = newcpi; 6851 } 6852 } 6853 6854 /* 6855 * Enqueue the vhcache client (cct) at the tail of the list 6856 */ 6857 static void 6858 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 6859 mdi_vhcache_client_t *cct) 6860 { 6861 cct->cct_next = NULL; 6862 if (vhcache->vhcache_client_head == NULL) 6863 vhcache->vhcache_client_head = cct; 6864 else 6865 vhcache->vhcache_client_tail->cct_next = cct; 6866 vhcache->vhcache_client_tail = cct; 6867 } 6868 6869 static void 6870 free_string_array(char **str, int nelem) 6871 { 6872 int i; 6873 6874 if (str) { 6875 for (i = 0; i < nelem; i++) { 6876 if (str[i]) 6877 kmem_free(str[i], strlen(str[i]) + 1); 6878 } 6879 kmem_free(str, sizeof (char *) * nelem); 6880 } 6881 } 6882 6883 static void 6884 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 6885 { 6886 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 6887 kmem_free(cphci, sizeof (*cphci)); 6888 } 6889 6890 static void 6891 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 6892 { 6893 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 6894 kmem_free(cpi, sizeof (*cpi)); 6895 } 6896 6897 static void 6898 free_vhcache_client(mdi_vhcache_client_t *cct) 6899 { 6900 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 6901 kmem_free(cct, sizeof (*cct)); 6902 } 6903 6904 static char * 6905 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 6906 { 6907 char *name_addr; 6908 int len; 6909 6910 len = strlen(ct_name) + strlen(ct_addr) + 2; 6911 name_addr = kmem_alloc(len, KM_SLEEP); 6912 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 6913 6914 if (ret_len) 6915 *ret_len = len; 6916 return (name_addr); 6917 } 6918 6919 /* 6920 * Copy the contents of paddrnvl to vhci cache. 6921 * paddrnvl nvlist contains path information for a vhci client. 6922 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 6923 */ 6924 static void 6925 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 6926 mdi_vhcache_client_t *cct) 6927 { 6928 nvpair_t *nvp = NULL; 6929 mdi_vhcache_pathinfo_t *cpi; 6930 uint_t nelem; 6931 uint32_t *val; 6932 6933 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6934 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 6935 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 6936 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6937 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 6938 ASSERT(nelem == 2); 6939 cpi->cpi_cphci = cphci_list[val[0]]; 6940 cpi->cpi_flags = val[1]; 6941 enqueue_tail_vhcache_pathinfo(cct, cpi); 6942 } 6943 } 6944 6945 /* 6946 * Copy the contents of caddrmapnvl to vhci cache. 6947 * caddrmapnvl nvlist contains vhci client address to phci client address 6948 * mappings. See the comment in mainnvl_to_vhcache() for the format of 6949 * this nvlist. 6950 */ 6951 static void 6952 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 6953 mdi_vhcache_phci_t *cphci_list[]) 6954 { 6955 nvpair_t *nvp = NULL; 6956 nvlist_t *paddrnvl; 6957 mdi_vhcache_client_t *cct; 6958 6959 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 6960 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 6961 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 6962 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 6963 (void) nvpair_value_nvlist(nvp, &paddrnvl); 6964 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 6965 /* the client must contain at least one path */ 6966 ASSERT(cct->cct_cpi_head != NULL); 6967 6968 enqueue_vhcache_client(vhcache, cct); 6969 (void) mod_hash_insert(vhcache->vhcache_client_hash, 6970 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 6971 } 6972 } 6973 6974 /* 6975 * Copy the contents of the main nvlist to vhci cache. 6976 * 6977 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 6978 * The nvlist contains the mappings between the vhci client addresses and 6979 * their corresponding phci client addresses. 6980 * 6981 * The structure of the nvlist is as follows: 6982 * 6983 * Main nvlist: 6984 * NAME TYPE DATA 6985 * version int32 version number 6986 * phcis string array array of phci paths 6987 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 6988 * 6989 * structure of c2paddrs_nvl: 6990 * NAME TYPE DATA 6991 * caddr1 nvlist_t paddrs_nvl1 6992 * caddr2 nvlist_t paddrs_nvl2 6993 * ... 6994 * where caddr1, caddr2, ... are vhci client name and addresses in the 6995 * form of "<clientname>@<clientaddress>". 6996 * (for example: "ssd@2000002037cd9f72"); 6997 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 6998 * 6999 * structure of paddrs_nvl: 7000 * NAME TYPE DATA 7001 * pi_addr1 uint32_array (phci-id, cpi_flags) 7002 * pi_addr2 uint32_array (phci-id, cpi_flags) 7003 * ... 7004 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7005 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7006 * phci-ids are integers that identify PHCIs to which the 7007 * the bus specific address belongs to. These integers are used as an index 7008 * into to the phcis string array in the main nvlist to get the PHCI path. 7009 */ 7010 static int 7011 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7012 { 7013 char **phcis, **phci_namep; 7014 uint_t nphcis; 7015 mdi_vhcache_phci_t *cphci, **cphci_list; 7016 nvlist_t *caddrmapnvl; 7017 int32_t ver; 7018 int i; 7019 size_t cphci_list_size; 7020 7021 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7022 7023 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7024 ver != MDI_VHCI_CACHE_VERSION) 7025 return (MDI_FAILURE); 7026 7027 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7028 &nphcis) != 0) 7029 return (MDI_SUCCESS); 7030 7031 ASSERT(nphcis > 0); 7032 7033 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7034 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7035 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7036 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7037 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7038 enqueue_vhcache_phci(vhcache, cphci); 7039 cphci_list[i] = cphci; 7040 } 7041 7042 ASSERT(vhcache->vhcache_phci_head != NULL); 7043 7044 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7045 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7046 7047 kmem_free(cphci_list, cphci_list_size); 7048 return (MDI_SUCCESS); 7049 } 7050 7051 /* 7052 * Build paddrnvl for the specified client using the information in the 7053 * vhci cache and add it to the caddrmapnnvl. 7054 * Returns 0 on success, errno on failure. 7055 */ 7056 static int 7057 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7058 nvlist_t *caddrmapnvl) 7059 { 7060 mdi_vhcache_pathinfo_t *cpi; 7061 nvlist_t *nvl; 7062 int err; 7063 uint32_t val[2]; 7064 7065 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7066 7067 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7068 return (err); 7069 7070 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7071 val[0] = cpi->cpi_cphci->cphci_id; 7072 val[1] = cpi->cpi_flags; 7073 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7074 != 0) 7075 goto out; 7076 } 7077 7078 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7079 out: 7080 nvlist_free(nvl); 7081 return (err); 7082 } 7083 7084 /* 7085 * Build caddrmapnvl using the information in the vhci cache 7086 * and add it to the mainnvl. 7087 * Returns 0 on success, errno on failure. 7088 */ 7089 static int 7090 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7091 { 7092 mdi_vhcache_client_t *cct; 7093 nvlist_t *nvl; 7094 int err; 7095 7096 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7097 7098 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7099 return (err); 7100 7101 for (cct = vhcache->vhcache_client_head; cct != NULL; 7102 cct = cct->cct_next) { 7103 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7104 goto out; 7105 } 7106 7107 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7108 out: 7109 nvlist_free(nvl); 7110 return (err); 7111 } 7112 7113 /* 7114 * Build nvlist using the information in the vhci cache. 7115 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7116 * Returns nvl on success, NULL on failure. 7117 */ 7118 static nvlist_t * 7119 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7120 { 7121 mdi_vhcache_phci_t *cphci; 7122 uint_t phci_count; 7123 char **phcis; 7124 nvlist_t *nvl; 7125 int err, i; 7126 7127 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7128 nvl = NULL; 7129 goto out; 7130 } 7131 7132 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7133 MDI_VHCI_CACHE_VERSION)) != 0) 7134 goto out; 7135 7136 rw_enter(&vhcache->vhcache_lock, RW_READER); 7137 if (vhcache->vhcache_phci_head == NULL) { 7138 rw_exit(&vhcache->vhcache_lock); 7139 return (nvl); 7140 } 7141 7142 phci_count = 0; 7143 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7144 cphci = cphci->cphci_next) 7145 cphci->cphci_id = phci_count++; 7146 7147 /* build phci pathname list */ 7148 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 7149 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 7150 cphci = cphci->cphci_next, i++) 7151 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 7152 7153 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 7154 phci_count); 7155 free_string_array(phcis, phci_count); 7156 7157 if (err == 0 && 7158 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 7159 rw_exit(&vhcache->vhcache_lock); 7160 return (nvl); 7161 } 7162 7163 rw_exit(&vhcache->vhcache_lock); 7164 out: 7165 if (nvl) 7166 nvlist_free(nvl); 7167 return (NULL); 7168 } 7169 7170 /* 7171 * Lookup vhcache phci structure for the specified phci path. 7172 */ 7173 static mdi_vhcache_phci_t * 7174 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 7175 { 7176 mdi_vhcache_phci_t *cphci; 7177 7178 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7179 7180 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7181 cphci = cphci->cphci_next) { 7182 if (strcmp(cphci->cphci_path, phci_path) == 0) 7183 return (cphci); 7184 } 7185 7186 return (NULL); 7187 } 7188 7189 /* 7190 * Lookup vhcache phci structure for the specified phci. 7191 */ 7192 static mdi_vhcache_phci_t * 7193 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 7194 { 7195 mdi_vhcache_phci_t *cphci; 7196 7197 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7198 7199 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7200 cphci = cphci->cphci_next) { 7201 if (cphci->cphci_phci == ph) 7202 return (cphci); 7203 } 7204 7205 return (NULL); 7206 } 7207 7208 /* 7209 * Add the specified phci to the vhci cache if not already present. 7210 */ 7211 static void 7212 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7213 { 7214 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7215 mdi_vhcache_phci_t *cphci; 7216 char *pathname; 7217 int cache_updated; 7218 7219 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7220 7221 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 7222 (void) ddi_pathname(ph->ph_dip, pathname); 7223 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 7224 != NULL) { 7225 cphci->cphci_phci = ph; 7226 cache_updated = 0; 7227 } else { 7228 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 7229 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 7230 cphci->cphci_phci = ph; 7231 enqueue_vhcache_phci(vhcache, cphci); 7232 cache_updated = 1; 7233 } 7234 7235 rw_exit(&vhcache->vhcache_lock); 7236 7237 /* 7238 * Since a new phci has been added, reset 7239 * vhc_path_discovery_cutoff_time to allow for discovery of paths 7240 * during next vhcache_discover_paths(). 7241 */ 7242 mutex_enter(&vhc->vhc_lock); 7243 vhc->vhc_path_discovery_cutoff_time = 0; 7244 mutex_exit(&vhc->vhc_lock); 7245 7246 kmem_free(pathname, MAXPATHLEN); 7247 if (cache_updated) 7248 vhcache_dirty(vhc); 7249 } 7250 7251 /* 7252 * Remove the reference to the specified phci from the vhci cache. 7253 */ 7254 static void 7255 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 7256 { 7257 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7258 mdi_vhcache_phci_t *cphci; 7259 7260 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7261 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 7262 /* do not remove the actual mdi_vhcache_phci structure */ 7263 cphci->cphci_phci = NULL; 7264 } 7265 rw_exit(&vhcache->vhcache_lock); 7266 } 7267 7268 static void 7269 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 7270 mdi_vhcache_lookup_token_t *src) 7271 { 7272 if (src == NULL) { 7273 dst->lt_cct = NULL; 7274 dst->lt_cct_lookup_time = 0; 7275 } else { 7276 dst->lt_cct = src->lt_cct; 7277 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 7278 } 7279 } 7280 7281 /* 7282 * Look up vhcache client for the specified client. 7283 */ 7284 static mdi_vhcache_client_t * 7285 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 7286 mdi_vhcache_lookup_token_t *token) 7287 { 7288 mod_hash_val_t hv; 7289 char *name_addr; 7290 int len; 7291 7292 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7293 7294 /* 7295 * If no vhcache clean occurred since the last lookup, we can 7296 * simply return the cct from the last lookup operation. 7297 * It works because ccts are never freed except during the vhcache 7298 * cleanup operation. 7299 */ 7300 if (token != NULL && 7301 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 7302 return (token->lt_cct); 7303 7304 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 7305 if (mod_hash_find(vhcache->vhcache_client_hash, 7306 (mod_hash_key_t)name_addr, &hv) == 0) { 7307 if (token) { 7308 token->lt_cct = (mdi_vhcache_client_t *)hv; 7309 token->lt_cct_lookup_time = lbolt64; 7310 } 7311 } else { 7312 if (token) { 7313 token->lt_cct = NULL; 7314 token->lt_cct_lookup_time = 0; 7315 } 7316 hv = NULL; 7317 } 7318 kmem_free(name_addr, len); 7319 return ((mdi_vhcache_client_t *)hv); 7320 } 7321 7322 /* 7323 * Add the specified path to the vhci cache if not already present. 7324 * Also add the vhcache client for the client corresponding to this path 7325 * if it doesn't already exist. 7326 */ 7327 static void 7328 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7329 { 7330 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7331 mdi_vhcache_client_t *cct; 7332 mdi_vhcache_pathinfo_t *cpi; 7333 mdi_phci_t *ph = pip->pi_phci; 7334 mdi_client_t *ct = pip->pi_client; 7335 int cache_updated = 0; 7336 7337 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7338 7339 /* if vhcache client for this pip doesn't already exist, add it */ 7340 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7341 NULL)) == NULL) { 7342 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7343 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 7344 ct->ct_guid, NULL); 7345 enqueue_vhcache_client(vhcache, cct); 7346 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7347 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7348 cache_updated = 1; 7349 } 7350 7351 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7352 if (cpi->cpi_cphci->cphci_phci == ph && 7353 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 7354 cpi->cpi_pip = pip; 7355 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 7356 cpi->cpi_flags &= 7357 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7358 sort_vhcache_paths(cct); 7359 cache_updated = 1; 7360 } 7361 break; 7362 } 7363 } 7364 7365 if (cpi == NULL) { 7366 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7367 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 7368 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 7369 ASSERT(cpi->cpi_cphci != NULL); 7370 cpi->cpi_pip = pip; 7371 enqueue_vhcache_pathinfo(cct, cpi); 7372 cache_updated = 1; 7373 } 7374 7375 rw_exit(&vhcache->vhcache_lock); 7376 7377 if (cache_updated) 7378 vhcache_dirty(vhc); 7379 } 7380 7381 /* 7382 * Remove the reference to the specified path from the vhci cache. 7383 */ 7384 static void 7385 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 7386 { 7387 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7388 mdi_client_t *ct = pip->pi_client; 7389 mdi_vhcache_client_t *cct; 7390 mdi_vhcache_pathinfo_t *cpi; 7391 7392 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7393 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 7394 NULL)) != NULL) { 7395 for (cpi = cct->cct_cpi_head; cpi != NULL; 7396 cpi = cpi->cpi_next) { 7397 if (cpi->cpi_pip == pip) { 7398 cpi->cpi_pip = NULL; 7399 break; 7400 } 7401 } 7402 } 7403 rw_exit(&vhcache->vhcache_lock); 7404 } 7405 7406 /* 7407 * Flush the vhci cache to disk. 7408 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 7409 */ 7410 static int 7411 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 7412 { 7413 nvlist_t *nvl; 7414 int err; 7415 int rv; 7416 7417 /* 7418 * It is possible that the system may shutdown before 7419 * i_ddi_io_initialized (during stmsboot for example). To allow for 7420 * flushing the cache in this case do not check for 7421 * i_ddi_io_initialized when force flag is set. 7422 */ 7423 if (force_flag == 0 && !i_ddi_io_initialized()) 7424 return (MDI_FAILURE); 7425 7426 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 7427 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 7428 nvlist_free(nvl); 7429 } else 7430 err = EFAULT; 7431 7432 rv = MDI_SUCCESS; 7433 mutex_enter(&vhc->vhc_lock); 7434 if (err != 0) { 7435 if (err == EROFS) { 7436 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 7437 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 7438 MDI_VHC_VHCACHE_DIRTY); 7439 } else { 7440 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 7441 cmn_err(CE_CONT, "%s: update failed\n", 7442 vhc->vhc_vhcache_filename); 7443 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 7444 } 7445 rv = MDI_FAILURE; 7446 } 7447 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 7448 cmn_err(CE_CONT, 7449 "%s: update now ok\n", vhc->vhc_vhcache_filename); 7450 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 7451 } 7452 mutex_exit(&vhc->vhc_lock); 7453 7454 return (rv); 7455 } 7456 7457 /* 7458 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 7459 * Exits itself if left idle for the idle timeout period. 7460 */ 7461 static void 7462 vhcache_flush_thread(void *arg) 7463 { 7464 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7465 clock_t idle_time, quit_at_ticks; 7466 callb_cpr_t cprinfo; 7467 7468 /* number of seconds to sleep idle before exiting */ 7469 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 7470 7471 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7472 "mdi_vhcache_flush"); 7473 mutex_enter(&vhc->vhc_lock); 7474 for (; ; ) { 7475 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7476 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 7477 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 7478 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7479 (void) cv_timedwait(&vhc->vhc_cv, 7480 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 7481 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7482 } else { 7483 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7484 mutex_exit(&vhc->vhc_lock); 7485 7486 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 7487 vhcache_dirty(vhc); 7488 7489 mutex_enter(&vhc->vhc_lock); 7490 } 7491 } 7492 7493 quit_at_ticks = ddi_get_lbolt() + idle_time; 7494 7495 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7496 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 7497 ddi_get_lbolt() < quit_at_ticks) { 7498 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7499 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7500 quit_at_ticks); 7501 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7502 } 7503 7504 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7505 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 7506 goto out; 7507 } 7508 7509 out: 7510 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 7511 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7512 CALLB_CPR_EXIT(&cprinfo); 7513 } 7514 7515 /* 7516 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 7517 */ 7518 static void 7519 vhcache_dirty(mdi_vhci_config_t *vhc) 7520 { 7521 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7522 int create_thread; 7523 7524 rw_enter(&vhcache->vhcache_lock, RW_READER); 7525 /* do not flush cache until the cache is fully built */ 7526 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 7527 rw_exit(&vhcache->vhcache_lock); 7528 return; 7529 } 7530 rw_exit(&vhcache->vhcache_lock); 7531 7532 mutex_enter(&vhc->vhc_lock); 7533 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 7534 mutex_exit(&vhc->vhc_lock); 7535 return; 7536 } 7537 7538 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 7539 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 7540 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 7541 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7542 cv_broadcast(&vhc->vhc_cv); 7543 create_thread = 0; 7544 } else { 7545 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 7546 create_thread = 1; 7547 } 7548 mutex_exit(&vhc->vhc_lock); 7549 7550 if (create_thread) 7551 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 7552 0, &p0, TS_RUN, minclsyspri); 7553 } 7554 7555 /* 7556 * phci bus config structure - one for for each phci bus config operation that 7557 * we initiate on behalf of a vhci. 7558 */ 7559 typedef struct mdi_phci_bus_config_s { 7560 char *phbc_phci_path; 7561 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 7562 struct mdi_phci_bus_config_s *phbc_next; 7563 } mdi_phci_bus_config_t; 7564 7565 /* vhci bus config structure - one for each vhci bus config operation */ 7566 typedef struct mdi_vhci_bus_config_s { 7567 ddi_bus_config_op_t vhbc_op; /* bus config op */ 7568 major_t vhbc_op_major; /* bus config op major */ 7569 uint_t vhbc_op_flags; /* bus config op flags */ 7570 kmutex_t vhbc_lock; 7571 kcondvar_t vhbc_cv; 7572 int vhbc_thr_count; 7573 } mdi_vhci_bus_config_t; 7574 7575 /* 7576 * bus config the specified phci 7577 */ 7578 static void 7579 bus_config_phci(void *arg) 7580 { 7581 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 7582 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 7583 dev_info_t *ph_dip; 7584 7585 /* 7586 * first configure all path components upto phci and then configure 7587 * the phci children. 7588 */ 7589 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 7590 != NULL) { 7591 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 7592 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 7593 (void) ndi_devi_config_driver(ph_dip, 7594 vhbc->vhbc_op_flags, 7595 vhbc->vhbc_op_major); 7596 } else 7597 (void) ndi_devi_config(ph_dip, 7598 vhbc->vhbc_op_flags); 7599 7600 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7601 ndi_rele_devi(ph_dip); 7602 } 7603 7604 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 7605 kmem_free(phbc, sizeof (*phbc)); 7606 7607 mutex_enter(&vhbc->vhbc_lock); 7608 vhbc->vhbc_thr_count--; 7609 if (vhbc->vhbc_thr_count == 0) 7610 cv_broadcast(&vhbc->vhbc_cv); 7611 mutex_exit(&vhbc->vhbc_lock); 7612 } 7613 7614 /* 7615 * Bus config all phcis associated with the vhci in parallel. 7616 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 7617 */ 7618 static void 7619 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 7620 ddi_bus_config_op_t op, major_t maj) 7621 { 7622 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 7623 mdi_vhci_bus_config_t *vhbc; 7624 mdi_vhcache_phci_t *cphci; 7625 7626 rw_enter(&vhcache->vhcache_lock, RW_READER); 7627 if (vhcache->vhcache_phci_head == NULL) { 7628 rw_exit(&vhcache->vhcache_lock); 7629 return; 7630 } 7631 7632 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 7633 7634 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7635 cphci = cphci->cphci_next) { 7636 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 7637 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 7638 KM_SLEEP); 7639 phbc->phbc_vhbusconfig = vhbc; 7640 phbc->phbc_next = phbc_head; 7641 phbc_head = phbc; 7642 vhbc->vhbc_thr_count++; 7643 } 7644 rw_exit(&vhcache->vhcache_lock); 7645 7646 vhbc->vhbc_op = op; 7647 vhbc->vhbc_op_major = maj; 7648 vhbc->vhbc_op_flags = NDI_NO_EVENT | 7649 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 7650 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 7651 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 7652 7653 /* now create threads to initiate bus config on all phcis in parallel */ 7654 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 7655 phbc_next = phbc->phbc_next; 7656 if (mdi_mtc_off) 7657 bus_config_phci((void *)phbc); 7658 else 7659 (void) thread_create(NULL, 0, bus_config_phci, phbc, 7660 0, &p0, TS_RUN, minclsyspri); 7661 } 7662 7663 mutex_enter(&vhbc->vhbc_lock); 7664 /* wait until all threads exit */ 7665 while (vhbc->vhbc_thr_count > 0) 7666 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 7667 mutex_exit(&vhbc->vhbc_lock); 7668 7669 mutex_destroy(&vhbc->vhbc_lock); 7670 cv_destroy(&vhbc->vhbc_cv); 7671 kmem_free(vhbc, sizeof (*vhbc)); 7672 } 7673 7674 /* 7675 * Single threaded version of bus_config_all_phcis() 7676 */ 7677 static void 7678 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 7679 ddi_bus_config_op_t op, major_t maj) 7680 { 7681 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7682 7683 single_threaded_vhconfig_enter(vhc); 7684 bus_config_all_phcis(vhcache, flags, op, maj); 7685 single_threaded_vhconfig_exit(vhc); 7686 } 7687 7688 /* 7689 * Perform BUS_CONFIG_ONE on the specified child of the phci. 7690 * The path includes the child component in addition to the phci path. 7691 */ 7692 static int 7693 bus_config_one_phci_child(char *path) 7694 { 7695 dev_info_t *ph_dip, *child; 7696 char *devnm; 7697 int rv = MDI_FAILURE; 7698 7699 /* extract the child component of the phci */ 7700 devnm = strrchr(path, '/'); 7701 *devnm++ = '\0'; 7702 7703 /* 7704 * first configure all path components upto phci and then 7705 * configure the phci child. 7706 */ 7707 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 7708 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 7709 NDI_SUCCESS) { 7710 /* 7711 * release the hold that ndi_devi_config_one() placed 7712 */ 7713 ndi_rele_devi(child); 7714 rv = MDI_SUCCESS; 7715 } 7716 7717 /* release the hold that e_ddi_hold_devi_by_path() placed */ 7718 ndi_rele_devi(ph_dip); 7719 } 7720 7721 devnm--; 7722 *devnm = '/'; 7723 return (rv); 7724 } 7725 7726 /* 7727 * Build a list of phci client paths for the specified vhci client. 7728 * The list includes only those phci client paths which aren't configured yet. 7729 */ 7730 static mdi_phys_path_t * 7731 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 7732 { 7733 mdi_vhcache_pathinfo_t *cpi; 7734 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 7735 int config_path, len; 7736 7737 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7738 /* 7739 * include only those paths that aren't configured. 7740 */ 7741 config_path = 0; 7742 if (cpi->cpi_pip == NULL) 7743 config_path = 1; 7744 else { 7745 MDI_PI_LOCK(cpi->cpi_pip); 7746 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 7747 config_path = 1; 7748 MDI_PI_UNLOCK(cpi->cpi_pip); 7749 } 7750 7751 if (config_path) { 7752 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 7753 len = strlen(cpi->cpi_cphci->cphci_path) + 7754 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 7755 pp->phys_path = kmem_alloc(len, KM_SLEEP); 7756 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 7757 cpi->cpi_cphci->cphci_path, ct_name, 7758 cpi->cpi_addr); 7759 pp->phys_path_next = NULL; 7760 7761 if (pp_head == NULL) 7762 pp_head = pp; 7763 else 7764 pp_tail->phys_path_next = pp; 7765 pp_tail = pp; 7766 } 7767 } 7768 7769 return (pp_head); 7770 } 7771 7772 /* 7773 * Free the memory allocated for phci client path list. 7774 */ 7775 static void 7776 free_phclient_path_list(mdi_phys_path_t *pp_head) 7777 { 7778 mdi_phys_path_t *pp, *pp_next; 7779 7780 for (pp = pp_head; pp != NULL; pp = pp_next) { 7781 pp_next = pp->phys_path_next; 7782 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 7783 kmem_free(pp, sizeof (*pp)); 7784 } 7785 } 7786 7787 /* 7788 * Allocated async client structure and initialize with the specified values. 7789 */ 7790 static mdi_async_client_config_t * 7791 alloc_async_client_config(char *ct_name, char *ct_addr, 7792 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7793 { 7794 mdi_async_client_config_t *acc; 7795 7796 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 7797 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 7798 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 7799 acc->acc_phclient_path_list_head = pp_head; 7800 init_vhcache_lookup_token(&acc->acc_token, tok); 7801 acc->acc_next = NULL; 7802 return (acc); 7803 } 7804 7805 /* 7806 * Free the memory allocated for the async client structure and their members. 7807 */ 7808 static void 7809 free_async_client_config(mdi_async_client_config_t *acc) 7810 { 7811 if (acc->acc_phclient_path_list_head) 7812 free_phclient_path_list(acc->acc_phclient_path_list_head); 7813 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 7814 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 7815 kmem_free(acc, sizeof (*acc)); 7816 } 7817 7818 /* 7819 * Sort vhcache pathinfos (cpis) of the specified client. 7820 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7821 * flag set come at the beginning of the list. All cpis which have this 7822 * flag set come at the end of the list. 7823 */ 7824 static void 7825 sort_vhcache_paths(mdi_vhcache_client_t *cct) 7826 { 7827 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 7828 7829 cpi_head = cct->cct_cpi_head; 7830 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 7831 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 7832 cpi_next = cpi->cpi_next; 7833 enqueue_vhcache_pathinfo(cct, cpi); 7834 } 7835 } 7836 7837 /* 7838 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 7839 * every vhcache pathinfo of the specified client. If not adjust the flag 7840 * setting appropriately. 7841 * 7842 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 7843 * on-disk vhci cache. So every time this flag is updated the cache must be 7844 * flushed. 7845 */ 7846 static void 7847 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7848 mdi_vhcache_lookup_token_t *tok) 7849 { 7850 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7851 mdi_vhcache_client_t *cct; 7852 mdi_vhcache_pathinfo_t *cpi; 7853 7854 rw_enter(&vhcache->vhcache_lock, RW_READER); 7855 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 7856 == NULL) { 7857 rw_exit(&vhcache->vhcache_lock); 7858 return; 7859 } 7860 7861 /* 7862 * to avoid unnecessary on-disk cache updates, first check if an 7863 * update is really needed. If no update is needed simply return. 7864 */ 7865 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7866 if ((cpi->cpi_pip != NULL && 7867 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 7868 (cpi->cpi_pip == NULL && 7869 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 7870 break; 7871 } 7872 } 7873 if (cpi == NULL) { 7874 rw_exit(&vhcache->vhcache_lock); 7875 return; 7876 } 7877 7878 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 7879 rw_exit(&vhcache->vhcache_lock); 7880 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7881 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 7882 tok)) == NULL) { 7883 rw_exit(&vhcache->vhcache_lock); 7884 return; 7885 } 7886 } 7887 7888 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7889 if (cpi->cpi_pip != NULL) 7890 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7891 else 7892 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 7893 } 7894 sort_vhcache_paths(cct); 7895 7896 rw_exit(&vhcache->vhcache_lock); 7897 vhcache_dirty(vhc); 7898 } 7899 7900 /* 7901 * Configure all specified paths of the client. 7902 */ 7903 static void 7904 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7905 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7906 { 7907 mdi_phys_path_t *pp; 7908 7909 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 7910 (void) bus_config_one_phci_child(pp->phys_path); 7911 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 7912 } 7913 7914 /* 7915 * Dequeue elements from vhci async client config list and bus configure 7916 * their corresponding phci clients. 7917 */ 7918 static void 7919 config_client_paths_thread(void *arg) 7920 { 7921 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7922 mdi_async_client_config_t *acc; 7923 clock_t quit_at_ticks; 7924 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 7925 callb_cpr_t cprinfo; 7926 7927 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 7928 "mdi_config_client_paths"); 7929 7930 for (; ; ) { 7931 quit_at_ticks = ddi_get_lbolt() + idle_time; 7932 7933 mutex_enter(&vhc->vhc_lock); 7934 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 7935 vhc->vhc_acc_list_head == NULL && 7936 ddi_get_lbolt() < quit_at_ticks) { 7937 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7938 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 7939 quit_at_ticks); 7940 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 7941 } 7942 7943 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 7944 vhc->vhc_acc_list_head == NULL) 7945 goto out; 7946 7947 acc = vhc->vhc_acc_list_head; 7948 vhc->vhc_acc_list_head = acc->acc_next; 7949 if (vhc->vhc_acc_list_head == NULL) 7950 vhc->vhc_acc_list_tail = NULL; 7951 vhc->vhc_acc_count--; 7952 mutex_exit(&vhc->vhc_lock); 7953 7954 config_client_paths_sync(vhc, acc->acc_ct_name, 7955 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 7956 &acc->acc_token); 7957 7958 free_async_client_config(acc); 7959 } 7960 7961 out: 7962 vhc->vhc_acc_thrcount--; 7963 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 7964 CALLB_CPR_EXIT(&cprinfo); 7965 } 7966 7967 /* 7968 * Arrange for all the phci client paths (pp_head) for the specified client 7969 * to be bus configured asynchronously by a thread. 7970 */ 7971 static void 7972 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 7973 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 7974 { 7975 mdi_async_client_config_t *acc, *newacc; 7976 int create_thread; 7977 7978 if (pp_head == NULL) 7979 return; 7980 7981 if (mdi_mtc_off) { 7982 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 7983 free_phclient_path_list(pp_head); 7984 return; 7985 } 7986 7987 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 7988 ASSERT(newacc); 7989 7990 mutex_enter(&vhc->vhc_lock); 7991 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 7992 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 7993 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 7994 free_async_client_config(newacc); 7995 mutex_exit(&vhc->vhc_lock); 7996 return; 7997 } 7998 } 7999 8000 if (vhc->vhc_acc_list_head == NULL) 8001 vhc->vhc_acc_list_head = newacc; 8002 else 8003 vhc->vhc_acc_list_tail->acc_next = newacc; 8004 vhc->vhc_acc_list_tail = newacc; 8005 vhc->vhc_acc_count++; 8006 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8007 cv_broadcast(&vhc->vhc_cv); 8008 create_thread = 0; 8009 } else { 8010 vhc->vhc_acc_thrcount++; 8011 create_thread = 1; 8012 } 8013 mutex_exit(&vhc->vhc_lock); 8014 8015 if (create_thread) 8016 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8017 0, &p0, TS_RUN, minclsyspri); 8018 } 8019 8020 /* 8021 * Return number of online paths for the specified client. 8022 */ 8023 static int 8024 nonline_paths(mdi_vhcache_client_t *cct) 8025 { 8026 mdi_vhcache_pathinfo_t *cpi; 8027 int online_count = 0; 8028 8029 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8030 if (cpi->cpi_pip != NULL) { 8031 MDI_PI_LOCK(cpi->cpi_pip); 8032 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8033 online_count++; 8034 MDI_PI_UNLOCK(cpi->cpi_pip); 8035 } 8036 } 8037 8038 return (online_count); 8039 } 8040 8041 /* 8042 * Bus configure all paths for the specified vhci client. 8043 * If at least one path for the client is already online, the remaining paths 8044 * will be configured asynchronously. Otherwise, it synchronously configures 8045 * the paths until at least one path is online and then rest of the paths 8046 * will be configured asynchronously. 8047 */ 8048 static void 8049 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8050 { 8051 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8052 mdi_phys_path_t *pp_head, *pp; 8053 mdi_vhcache_client_t *cct; 8054 mdi_vhcache_lookup_token_t tok; 8055 8056 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8057 8058 init_vhcache_lookup_token(&tok, NULL); 8059 8060 if (ct_name == NULL || ct_addr == NULL || 8061 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8062 == NULL || 8063 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8064 rw_exit(&vhcache->vhcache_lock); 8065 return; 8066 } 8067 8068 /* if at least one path is online, configure the rest asynchronously */ 8069 if (nonline_paths(cct) > 0) { 8070 rw_exit(&vhcache->vhcache_lock); 8071 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8072 return; 8073 } 8074 8075 rw_exit(&vhcache->vhcache_lock); 8076 8077 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8078 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8079 rw_enter(&vhcache->vhcache_lock, RW_READER); 8080 8081 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8082 ct_addr, &tok)) == NULL) { 8083 rw_exit(&vhcache->vhcache_lock); 8084 goto out; 8085 } 8086 8087 if (nonline_paths(cct) > 0 && 8088 pp->phys_path_next != NULL) { 8089 rw_exit(&vhcache->vhcache_lock); 8090 config_client_paths_async(vhc, ct_name, ct_addr, 8091 pp->phys_path_next, &tok); 8092 pp->phys_path_next = NULL; 8093 goto out; 8094 } 8095 8096 rw_exit(&vhcache->vhcache_lock); 8097 } 8098 } 8099 8100 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8101 out: 8102 free_phclient_path_list(pp_head); 8103 } 8104 8105 static void 8106 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8107 { 8108 mutex_enter(&vhc->vhc_lock); 8109 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8110 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8111 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8112 mutex_exit(&vhc->vhc_lock); 8113 } 8114 8115 static void 8116 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8117 { 8118 mutex_enter(&vhc->vhc_lock); 8119 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8120 cv_broadcast(&vhc->vhc_cv); 8121 mutex_exit(&vhc->vhc_lock); 8122 } 8123 8124 typedef struct mdi_phci_driver_info { 8125 char *phdriver_name; /* name of the phci driver */ 8126 8127 /* set to non zero if the phci driver supports root device */ 8128 int phdriver_root_support; 8129 } mdi_phci_driver_info_t; 8130 8131 /* 8132 * vhci class and root support capability of a phci driver can be 8133 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8134 * phci driver.conf file. The built-in tables below contain this information 8135 * for those phci drivers whose driver.conf files don't yet contain this info. 8136 * 8137 * All phci drivers expect iscsi have root device support. 8138 */ 8139 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8140 { "fp", 1 }, 8141 { "iscsi", 0 }, 8142 { "ibsrp", 1 } 8143 }; 8144 8145 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 8146 8147 static void * 8148 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 8149 { 8150 void *new_ptr; 8151 8152 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 8153 if (old_ptr) { 8154 bcopy(old_ptr, new_ptr, old_size); 8155 kmem_free(old_ptr, old_size); 8156 } 8157 return (new_ptr); 8158 } 8159 8160 static void 8161 add_to_phci_list(char ***driver_list, int **root_support_list, 8162 int *cur_elements, int *max_elements, char *driver_name, int root_support) 8163 { 8164 ASSERT(*cur_elements <= *max_elements); 8165 if (*cur_elements == *max_elements) { 8166 *max_elements += 10; 8167 *driver_list = mdi_realloc(*driver_list, 8168 sizeof (char *) * (*cur_elements), 8169 sizeof (char *) * (*max_elements)); 8170 *root_support_list = mdi_realloc(*root_support_list, 8171 sizeof (int) * (*cur_elements), 8172 sizeof (int) * (*max_elements)); 8173 } 8174 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 8175 (*root_support_list)[*cur_elements] = root_support; 8176 (*cur_elements)++; 8177 } 8178 8179 static void 8180 get_phci_driver_list(char *vhci_class, char ***driver_list, 8181 int **root_support_list, int *cur_elements, int *max_elements) 8182 { 8183 mdi_phci_driver_info_t *st_driver_list, *p; 8184 int st_ndrivers, root_support, i, j, driver_conf_count; 8185 major_t m; 8186 struct devnames *dnp; 8187 ddi_prop_t *propp; 8188 8189 *driver_list = NULL; 8190 *root_support_list = NULL; 8191 *cur_elements = 0; 8192 *max_elements = 0; 8193 8194 /* add the phci drivers derived from the phci driver.conf files */ 8195 for (m = 0; m < devcnt; m++) { 8196 dnp = &devnamesp[m]; 8197 8198 if (dnp->dn_flags & DN_PHCI_DRIVER) { 8199 LOCK_DEV_OPS(&dnp->dn_lock); 8200 if (dnp->dn_global_prop_ptr != NULL && 8201 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 8202 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 8203 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 8204 strcmp(propp->prop_val, vhci_class) == 0) { 8205 8206 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 8207 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 8208 &dnp->dn_global_prop_ptr->prop_list) 8209 == NULL) ? 1 : 0; 8210 8211 add_to_phci_list(driver_list, root_support_list, 8212 cur_elements, max_elements, dnp->dn_name, 8213 root_support); 8214 8215 UNLOCK_DEV_OPS(&dnp->dn_lock); 8216 } else 8217 UNLOCK_DEV_OPS(&dnp->dn_lock); 8218 } 8219 } 8220 8221 driver_conf_count = *cur_elements; 8222 8223 /* add the phci drivers specified in the built-in tables */ 8224 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 8225 st_driver_list = scsi_phci_driver_list; 8226 st_ndrivers = sizeof (scsi_phci_driver_list) / 8227 sizeof (mdi_phci_driver_info_t); 8228 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 8229 st_driver_list = ib_phci_driver_list; 8230 st_ndrivers = sizeof (ib_phci_driver_list) / 8231 sizeof (mdi_phci_driver_info_t); 8232 } else { 8233 st_driver_list = NULL; 8234 st_ndrivers = 0; 8235 } 8236 8237 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 8238 /* add this phci driver if not already added before */ 8239 for (j = 0; j < driver_conf_count; j++) { 8240 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 8241 break; 8242 } 8243 if (j == driver_conf_count) { 8244 add_to_phci_list(driver_list, root_support_list, 8245 cur_elements, max_elements, p->phdriver_name, 8246 p->phdriver_root_support); 8247 } 8248 } 8249 } 8250 8251 /* 8252 * Attach the phci driver instances associated with the specified vhci class. 8253 * If root is mounted attach all phci driver instances. 8254 * If root is not mounted, attach the instances of only those phci 8255 * drivers that have the root support. 8256 */ 8257 static void 8258 attach_phci_drivers(char *vhci_class) 8259 { 8260 char **driver_list, **p; 8261 int *root_support_list; 8262 int cur_elements, max_elements, i; 8263 major_t m; 8264 8265 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 8266 &cur_elements, &max_elements); 8267 8268 for (i = 0; i < cur_elements; i++) { 8269 if (modrootloaded || root_support_list[i]) { 8270 m = ddi_name_to_major(driver_list[i]); 8271 if (m != (major_t)-1 && ddi_hold_installed_driver(m)) 8272 ddi_rele_driver(m); 8273 } 8274 } 8275 8276 if (driver_list) { 8277 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 8278 kmem_free(*p, strlen(*p) + 1); 8279 kmem_free(driver_list, sizeof (char *) * max_elements); 8280 kmem_free(root_support_list, sizeof (int) * max_elements); 8281 } 8282 } 8283 8284 /* 8285 * Build vhci cache: 8286 * 8287 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 8288 * the phci driver instances. During this process the cache gets built. 8289 * 8290 * Cache is built fully if the root is mounted. 8291 * If the root is not mounted, phci drivers that do not have root support 8292 * are not attached. As a result the cache is built partially. The entries 8293 * in the cache reflect only those phci drivers that have root support. 8294 */ 8295 static int 8296 build_vhci_cache(mdi_vhci_t *vh) 8297 { 8298 mdi_vhci_config_t *vhc = vh->vh_config; 8299 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8300 8301 single_threaded_vhconfig_enter(vhc); 8302 8303 rw_enter(&vhcache->vhcache_lock, RW_READER); 8304 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 8305 rw_exit(&vhcache->vhcache_lock); 8306 single_threaded_vhconfig_exit(vhc); 8307 return (0); 8308 } 8309 rw_exit(&vhcache->vhcache_lock); 8310 8311 attach_phci_drivers(vh->vh_class); 8312 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 8313 BUS_CONFIG_ALL, (major_t)-1); 8314 8315 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8316 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 8317 rw_exit(&vhcache->vhcache_lock); 8318 8319 single_threaded_vhconfig_exit(vhc); 8320 vhcache_dirty(vhc); 8321 return (1); 8322 } 8323 8324 /* 8325 * Determine if discovery of paths is needed. 8326 */ 8327 static int 8328 vhcache_do_discovery(mdi_vhci_config_t *vhc) 8329 { 8330 int rv = 1; 8331 8332 mutex_enter(&vhc->vhc_lock); 8333 if (i_ddi_io_initialized() == 0) { 8334 if (vhc->vhc_path_discovery_boot > 0) { 8335 vhc->vhc_path_discovery_boot--; 8336 goto out; 8337 } 8338 } else { 8339 if (vhc->vhc_path_discovery_postboot > 0) { 8340 vhc->vhc_path_discovery_postboot--; 8341 goto out; 8342 } 8343 } 8344 8345 /* 8346 * Do full path discovery at most once per mdi_path_discovery_interval. 8347 * This is to avoid a series of full path discoveries when opening 8348 * stale /dev/[r]dsk links. 8349 */ 8350 if (mdi_path_discovery_interval != -1 && 8351 lbolt64 >= vhc->vhc_path_discovery_cutoff_time) 8352 goto out; 8353 8354 rv = 0; 8355 out: 8356 mutex_exit(&vhc->vhc_lock); 8357 return (rv); 8358 } 8359 8360 /* 8361 * Discover all paths: 8362 * 8363 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 8364 * driver instances. During this process all paths will be discovered. 8365 */ 8366 static int 8367 vhcache_discover_paths(mdi_vhci_t *vh) 8368 { 8369 mdi_vhci_config_t *vhc = vh->vh_config; 8370 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8371 int rv = 0; 8372 8373 single_threaded_vhconfig_enter(vhc); 8374 8375 if (vhcache_do_discovery(vhc)) { 8376 attach_phci_drivers(vh->vh_class); 8377 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 8378 NDI_NO_EVENT, BUS_CONFIG_ALL, (major_t)-1); 8379 8380 mutex_enter(&vhc->vhc_lock); 8381 vhc->vhc_path_discovery_cutoff_time = lbolt64 + 8382 mdi_path_discovery_interval * TICKS_PER_SECOND; 8383 mutex_exit(&vhc->vhc_lock); 8384 rv = 1; 8385 } 8386 8387 single_threaded_vhconfig_exit(vhc); 8388 return (rv); 8389 } 8390 8391 /* 8392 * Generic vhci bus config implementation: 8393 * 8394 * Parameters 8395 * vdip vhci dip 8396 * flags bus config flags 8397 * op bus config operation 8398 * The remaining parameters are bus config operation specific 8399 * 8400 * for BUS_CONFIG_ONE 8401 * arg pointer to name@addr 8402 * child upon successful return from this function, *child will be 8403 * set to the configured and held devinfo child node of vdip. 8404 * ct_addr pointer to client address (i.e. GUID) 8405 * 8406 * for BUS_CONFIG_DRIVER 8407 * arg major number of the driver 8408 * child and ct_addr parameters are ignored 8409 * 8410 * for BUS_CONFIG_ALL 8411 * arg, child, and ct_addr parameters are ignored 8412 * 8413 * Note that for the rest of the bus config operations, this function simply 8414 * calls the framework provided default bus config routine. 8415 */ 8416 int 8417 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 8418 void *arg, dev_info_t **child, char *ct_addr) 8419 { 8420 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8421 mdi_vhci_config_t *vhc = vh->vh_config; 8422 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8423 int rv = 0; 8424 int params_valid = 0; 8425 char *cp; 8426 8427 /* 8428 * To bus config vhcis we relay operation, possibly using another 8429 * thread, to phcis. The phci driver then interacts with MDI to cause 8430 * vhci child nodes to be enumerated under the vhci node. Adding a 8431 * vhci child requires an ndi_devi_enter of the vhci. Since another 8432 * thread may be adding the child, to avoid deadlock we can't wait 8433 * for the relayed operations to complete if we have already entered 8434 * the vhci node. 8435 */ 8436 if (DEVI_BUSY_OWNED(vdip)) { 8437 MDI_DEBUG(2, (CE_NOTE, vdip, "!MDI: vhci bus config: " 8438 "vhci dip is busy owned %p\n", (void *)vdip)); 8439 goto default_bus_config; 8440 } 8441 8442 rw_enter(&vhcache->vhcache_lock, RW_READER); 8443 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8444 rw_exit(&vhcache->vhcache_lock); 8445 rv = build_vhci_cache(vh); 8446 rw_enter(&vhcache->vhcache_lock, RW_READER); 8447 } 8448 8449 switch (op) { 8450 case BUS_CONFIG_ONE: 8451 if (arg != NULL && ct_addr != NULL) { 8452 /* extract node name */ 8453 cp = (char *)arg; 8454 while (*cp != '\0' && *cp != '@') 8455 cp++; 8456 if (*cp == '@') { 8457 params_valid = 1; 8458 *cp = '\0'; 8459 config_client_paths(vhc, (char *)arg, ct_addr); 8460 /* config_client_paths() releases cache_lock */ 8461 *cp = '@'; 8462 break; 8463 } 8464 } 8465 8466 rw_exit(&vhcache->vhcache_lock); 8467 break; 8468 8469 case BUS_CONFIG_DRIVER: 8470 rw_exit(&vhcache->vhcache_lock); 8471 if (rv == 0) 8472 st_bus_config_all_phcis(vhc, flags, op, 8473 (major_t)(uintptr_t)arg); 8474 break; 8475 8476 case BUS_CONFIG_ALL: 8477 rw_exit(&vhcache->vhcache_lock); 8478 if (rv == 0) 8479 st_bus_config_all_phcis(vhc, flags, op, -1); 8480 break; 8481 8482 default: 8483 rw_exit(&vhcache->vhcache_lock); 8484 break; 8485 } 8486 8487 8488 default_bus_config: 8489 /* 8490 * All requested child nodes are enumerated under the vhci. 8491 * Now configure them. 8492 */ 8493 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8494 NDI_SUCCESS) { 8495 return (MDI_SUCCESS); 8496 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 8497 /* discover all paths and try configuring again */ 8498 if (vhcache_discover_paths(vh) && 8499 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 8500 NDI_SUCCESS) 8501 return (MDI_SUCCESS); 8502 } 8503 8504 return (MDI_FAILURE); 8505 } 8506 8507 /* 8508 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 8509 */ 8510 static nvlist_t * 8511 read_on_disk_vhci_cache(char *vhci_class) 8512 { 8513 nvlist_t *nvl; 8514 int err; 8515 char *filename; 8516 8517 filename = vhclass2vhcache_filename(vhci_class); 8518 8519 if ((err = fread_nvlist(filename, &nvl)) == 0) { 8520 kmem_free(filename, strlen(filename) + 1); 8521 return (nvl); 8522 } else if (err == EIO) 8523 cmn_err(CE_WARN, "%s: I/O error, will recreate\n", filename); 8524 else if (err == EINVAL) 8525 cmn_err(CE_WARN, 8526 "%s: data file corrupted, will recreate\n", filename); 8527 8528 kmem_free(filename, strlen(filename) + 1); 8529 return (NULL); 8530 } 8531 8532 /* 8533 * Read on-disk vhci cache into nvlists for all vhci classes. 8534 * Called during booting by i_ddi_read_devices_files(). 8535 */ 8536 void 8537 mdi_read_devices_files(void) 8538 { 8539 int i; 8540 8541 for (i = 0; i < N_VHCI_CLASSES; i++) 8542 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 8543 } 8544 8545 /* 8546 * Remove all stale entries from vhci cache. 8547 */ 8548 static void 8549 clean_vhcache(mdi_vhci_config_t *vhc) 8550 { 8551 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8552 mdi_vhcache_phci_t *cphci, *cphci_head, *cphci_next; 8553 mdi_vhcache_client_t *cct, *cct_head, *cct_next; 8554 mdi_vhcache_pathinfo_t *cpi, *cpi_head, *cpi_next; 8555 8556 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8557 8558 cct_head = vhcache->vhcache_client_head; 8559 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 8560 for (cct = cct_head; cct != NULL; cct = cct_next) { 8561 cct_next = cct->cct_next; 8562 8563 cpi_head = cct->cct_cpi_head; 8564 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8565 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8566 cpi_next = cpi->cpi_next; 8567 if (cpi->cpi_pip != NULL) { 8568 ASSERT(cpi->cpi_cphci->cphci_phci != NULL); 8569 enqueue_tail_vhcache_pathinfo(cct, cpi); 8570 } else 8571 free_vhcache_pathinfo(cpi); 8572 } 8573 8574 if (cct->cct_cpi_head != NULL) 8575 enqueue_vhcache_client(vhcache, cct); 8576 else { 8577 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 8578 (mod_hash_key_t)cct->cct_name_addr); 8579 free_vhcache_client(cct); 8580 } 8581 } 8582 8583 cphci_head = vhcache->vhcache_phci_head; 8584 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 8585 for (cphci = cphci_head; cphci != NULL; cphci = cphci_next) { 8586 cphci_next = cphci->cphci_next; 8587 if (cphci->cphci_phci != NULL) 8588 enqueue_vhcache_phci(vhcache, cphci); 8589 else 8590 free_vhcache_phci(cphci); 8591 } 8592 8593 vhcache->vhcache_clean_time = lbolt64; 8594 rw_exit(&vhcache->vhcache_lock); 8595 vhcache_dirty(vhc); 8596 } 8597 8598 /* 8599 * Remove all stale entries from vhci cache. 8600 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 8601 */ 8602 void 8603 mdi_clean_vhcache(void) 8604 { 8605 mdi_vhci_t *vh; 8606 8607 mutex_enter(&mdi_mutex); 8608 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8609 vh->vh_refcnt++; 8610 mutex_exit(&mdi_mutex); 8611 clean_vhcache(vh->vh_config); 8612 mutex_enter(&mdi_mutex); 8613 vh->vh_refcnt--; 8614 } 8615 mutex_exit(&mdi_mutex); 8616 } 8617 8618 /* 8619 * mdi_vhci_walk_clients(): 8620 * Walker routine to traverse client dev_info nodes 8621 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 8622 * below the client, including nexus devices, which we dont want. 8623 * So we just traverse the immediate siblings, starting from 1st client. 8624 */ 8625 void 8626 mdi_vhci_walk_clients(dev_info_t *vdip, 8627 int (*f)(dev_info_t *, void *), void *arg) 8628 { 8629 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8630 dev_info_t *cdip; 8631 mdi_client_t *ct; 8632 8633 MDI_VHCI_CLIENT_LOCK(vh); 8634 cdip = ddi_get_child(vdip); 8635 while (cdip) { 8636 ct = i_devi_get_client(cdip); 8637 MDI_CLIENT_LOCK(ct); 8638 8639 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 8640 cdip = ddi_get_next_sibling(cdip); 8641 else 8642 cdip = NULL; 8643 8644 MDI_CLIENT_UNLOCK(ct); 8645 } 8646 MDI_VHCI_CLIENT_UNLOCK(vh); 8647 } 8648 8649 /* 8650 * mdi_vhci_walk_phcis(): 8651 * Walker routine to traverse phci dev_info nodes 8652 */ 8653 void 8654 mdi_vhci_walk_phcis(dev_info_t *vdip, 8655 int (*f)(dev_info_t *, void *), void *arg) 8656 { 8657 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 8658 mdi_phci_t *ph, *next; 8659 8660 MDI_VHCI_PHCI_LOCK(vh); 8661 ph = vh->vh_phci_head; 8662 while (ph) { 8663 MDI_PHCI_LOCK(ph); 8664 8665 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 8666 next = ph->ph_next; 8667 else 8668 next = NULL; 8669 8670 MDI_PHCI_UNLOCK(ph); 8671 ph = next; 8672 } 8673 MDI_VHCI_PHCI_UNLOCK(vh); 8674 } 8675 8676 8677 /* 8678 * mdi_walk_vhcis(): 8679 * Walker routine to traverse vhci dev_info nodes 8680 */ 8681 void 8682 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 8683 { 8684 mdi_vhci_t *vh = NULL; 8685 8686 mutex_enter(&mdi_mutex); 8687 /* 8688 * Scan for already registered vhci 8689 */ 8690 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 8691 vh->vh_refcnt++; 8692 mutex_exit(&mdi_mutex); 8693 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 8694 mutex_enter(&mdi_mutex); 8695 vh->vh_refcnt--; 8696 break; 8697 } else { 8698 mutex_enter(&mdi_mutex); 8699 vh->vh_refcnt--; 8700 } 8701 } 8702 8703 mutex_exit(&mdi_mutex); 8704 } 8705 8706 /* 8707 * i_mdi_log_sysevent(): 8708 * Logs events for pickup by syseventd 8709 */ 8710 static void 8711 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 8712 { 8713 char *path_name; 8714 nvlist_t *attr_list; 8715 8716 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 8717 KM_SLEEP) != DDI_SUCCESS) { 8718 goto alloc_failed; 8719 } 8720 8721 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 8722 (void) ddi_pathname(dip, path_name); 8723 8724 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 8725 ddi_driver_name(dip)) != DDI_SUCCESS) { 8726 goto error; 8727 } 8728 8729 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 8730 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 8731 goto error; 8732 } 8733 8734 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 8735 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 8736 goto error; 8737 } 8738 8739 if (nvlist_add_string(attr_list, DDI_PATHNAME, 8740 path_name) != DDI_SUCCESS) { 8741 goto error; 8742 } 8743 8744 if (nvlist_add_string(attr_list, DDI_CLASS, 8745 ph_vh_class) != DDI_SUCCESS) { 8746 goto error; 8747 } 8748 8749 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 8750 attr_list, NULL, DDI_SLEEP); 8751 8752 error: 8753 kmem_free(path_name, MAXPATHLEN); 8754 nvlist_free(attr_list); 8755 return; 8756 8757 alloc_failed: 8758 MDI_DEBUG(1, (CE_WARN, dip, 8759 "!i_mdi_log_sysevent: Unable to send sysevent")); 8760 } 8761